diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,339207 +1,3 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 3.7043408452319513, - "eval_steps": 500, - "global_step": 48450, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 7.645698338972036e-05, - "grad_norm": 1.792826697055716e-05, - "learning_rate": 2e-05, - "loss": 46.0, - "step": 1 - }, - { - "epoch": 0.00015291396677944073, - "grad_norm": 7.698028639424592e-06, - "learning_rate": 4e-05, - "loss": 46.0, - "step": 2 - }, - { - "epoch": 0.00022937095016916108, - "grad_norm": 1.7271497199544683e-05, - "learning_rate": 6e-05, - "loss": 46.0, - "step": 3 - }, - { - "epoch": 0.00030582793355888146, - "grad_norm": 7.746815754217096e-06, - "learning_rate": 8e-05, - "loss": 46.0, - "step": 4 - }, - { - "epoch": 0.0003822849169486018, - "grad_norm": 3.2548108720220625e-05, - "learning_rate": 0.0001, - "loss": 46.0, - "step": 5 - }, - { - "epoch": 0.00045874190033832216, - "grad_norm": 2.1352847397793084e-05, - "learning_rate": 0.00012, - "loss": 46.0, - "step": 6 - }, - { - "epoch": 0.0005351988837280425, - "grad_norm": 1.3152564861229621e-05, - "learning_rate": 0.00014, - "loss": 46.0, - "step": 7 - }, - { - "epoch": 0.0006116558671177629, - "grad_norm": 1.4066746189200785e-05, - "learning_rate": 0.00016, - "loss": 46.0, - "step": 8 - }, - { - "epoch": 0.0006881128505074832, - "grad_norm": 2.2254904251894914e-05, - "learning_rate": 0.00018, - "loss": 46.0, - "step": 9 - }, - { - "epoch": 0.0007645698338972036, - "grad_norm": 4.437996176420711e-05, - "learning_rate": 0.0002, - "loss": 46.0, - "step": 10 - }, - { - "epoch": 0.0008410268172869239, - "grad_norm": 2.1450669009936973e-05, - "learning_rate": 0.00019999999999999714, - "loss": 46.0, - "step": 11 - }, - { - "epoch": 0.0009174838006766443, - "grad_norm": 3.5947781725553796e-05, - "learning_rate": 0.00019999999999998846, - "loss": 46.0, - "step": 12 - }, - { - "epoch": 0.0009939407840663646, - "grad_norm": 1.1451102182036266e-05, - "learning_rate": 0.00019999999999997404, - "loss": 46.0, - "step": 13 - }, - { - "epoch": 0.001070397767456085, - "grad_norm": 1.5752761100884527e-05, - "learning_rate": 0.00019999999999995388, - "loss": 46.0, - "step": 14 - }, - { - "epoch": 0.0011468547508458053, - "grad_norm": 1.1428691323089879e-05, - "learning_rate": 0.00019999999999992788, - "loss": 46.0, - "step": 15 - }, - { - "epoch": 0.0012233117342355258, - "grad_norm": 4.6794833906460553e-05, - "learning_rate": 0.00019999999999989617, - "loss": 46.0, - "step": 16 - }, - { - "epoch": 0.001299768717625246, - "grad_norm": 2.1383812054409645e-05, - "learning_rate": 0.00019999999999985866, - "loss": 46.0, - "step": 17 - }, - { - "epoch": 0.0013762257010149664, - "grad_norm": 2.292082535859663e-05, - "learning_rate": 0.0001999999999998154, - "loss": 46.0, - "step": 18 - }, - { - "epoch": 0.0014526826844046868, - "grad_norm": 1.3927367945143487e-05, - "learning_rate": 0.00019999999999976636, - "loss": 46.0, - "step": 19 - }, - { - "epoch": 0.0015291396677944071, - "grad_norm": 7.859431934775785e-05, - "learning_rate": 0.00019999999999971153, - "loss": 46.0, - "step": 20 - }, - { - "epoch": 0.0016055966511841276, - "grad_norm": 2.4063776436378248e-05, - "learning_rate": 0.00019999999999965095, - "loss": 46.0, - "step": 21 - }, - { - "epoch": 0.0016820536345738479, - "grad_norm": 2.7152964321430773e-05, - "learning_rate": 0.0001999999999995846, - "loss": 46.0, - "step": 22 - }, - { - "epoch": 0.0017585106179635681, - "grad_norm": 1.9019813407794572e-05, - "learning_rate": 0.0001999999999995125, - "loss": 46.0, - "step": 23 - }, - { - "epoch": 0.0018349676013532886, - "grad_norm": 1.2315812455199193e-05, - "learning_rate": 0.0001999999999994346, - "loss": 46.0, - "step": 24 - }, - { - "epoch": 0.001911424584743009, - "grad_norm": 2.5389064830960706e-05, - "learning_rate": 0.00019999999999935095, - "loss": 46.0, - "step": 25 - }, - { - "epoch": 0.001987881568132729, - "grad_norm": 1.290810450882418e-05, - "learning_rate": 0.00019999999999926153, - "loss": 46.0, - "step": 26 - }, - { - "epoch": 0.00206433855152245, - "grad_norm": 9.934929948940407e-06, - "learning_rate": 0.0001999999999991663, - "loss": 46.0, - "step": 27 - }, - { - "epoch": 0.00214079553491217, - "grad_norm": 1.172078827949008e-05, - "learning_rate": 0.00019999999999906535, - "loss": 46.0, - "step": 28 - }, - { - "epoch": 0.0022172525183018904, - "grad_norm": 3.653451130958274e-05, - "learning_rate": 0.00019999999999895863, - "loss": 46.0, - "step": 29 - }, - { - "epoch": 0.0022937095016916107, - "grad_norm": 2.3206204787129536e-05, - "learning_rate": 0.00019999999999884612, - "loss": 46.0, - "step": 30 - }, - { - "epoch": 0.002370166485081331, - "grad_norm": 1.5248630916175898e-05, - "learning_rate": 0.00019999999999872786, - "loss": 46.0, - "step": 31 - }, - { - "epoch": 0.0024466234684710516, - "grad_norm": 2.0085321011720225e-05, - "learning_rate": 0.0001999999999986038, - "loss": 46.0, - "step": 32 - }, - { - "epoch": 0.002523080451860772, - "grad_norm": 8.577127118769567e-06, - "learning_rate": 0.000199999999998474, - "loss": 46.0, - "step": 33 - }, - { - "epoch": 0.002599537435250492, - "grad_norm": 2.6271172828273848e-05, - "learning_rate": 0.00019999999999833842, - "loss": 46.0, - "step": 34 - }, - { - "epoch": 0.0026759944186402125, - "grad_norm": 1.3005247637920547e-05, - "learning_rate": 0.00019999999999819706, - "loss": 46.0, - "step": 35 - }, - { - "epoch": 0.0027524514020299327, - "grad_norm": 1.2495203009166289e-05, - "learning_rate": 0.00019999999999804994, - "loss": 46.0, - "step": 36 - }, - { - "epoch": 0.0028289083854196534, - "grad_norm": 2.2880592950969003e-05, - "learning_rate": 0.00019999999999789704, - "loss": 46.0, - "step": 37 - }, - { - "epoch": 0.0029053653688093737, - "grad_norm": 5.68693176319357e-05, - "learning_rate": 0.0001999999999977384, - "loss": 46.0, - "step": 38 - }, - { - "epoch": 0.002981822352199094, - "grad_norm": 2.655573553056456e-05, - "learning_rate": 0.00019999999999757394, - "loss": 46.0, - "step": 39 - }, - { - "epoch": 0.0030582793355888142, - "grad_norm": 2.349610258534085e-05, - "learning_rate": 0.00019999999999740375, - "loss": 46.0, - "step": 40 - }, - { - "epoch": 0.0031347363189785345, - "grad_norm": 2.5527391699142754e-05, - "learning_rate": 0.00019999999999722779, - "loss": 46.0, - "step": 41 - }, - { - "epoch": 0.003211193302368255, - "grad_norm": 1.3046727872279007e-05, - "learning_rate": 0.00019999999999704607, - "loss": 46.0, - "step": 42 - }, - { - "epoch": 0.0032876502857579755, - "grad_norm": 1.6746318578952923e-05, - "learning_rate": 0.00019999999999685853, - "loss": 46.0, - "step": 43 - }, - { - "epoch": 0.0033641072691476957, - "grad_norm": 2.166387639590539e-05, - "learning_rate": 0.00019999999999666525, - "loss": 46.0, - "step": 44 - }, - { - "epoch": 0.003440564252537416, - "grad_norm": 1.7299164028372616e-05, - "learning_rate": 0.00019999999999646622, - "loss": 46.0, - "step": 45 - }, - { - "epoch": 0.0035170212359271363, - "grad_norm": 3.4763092116918415e-05, - "learning_rate": 0.0001999999999962614, - "loss": 46.0, - "step": 46 - }, - { - "epoch": 0.003593478219316857, - "grad_norm": 2.022327134909574e-05, - "learning_rate": 0.0001999999999960508, - "loss": 46.0, - "step": 47 - }, - { - "epoch": 0.0036699352027065773, - "grad_norm": 1.1685785466397647e-05, - "learning_rate": 0.00019999999999583445, - "loss": 46.0, - "step": 48 - }, - { - "epoch": 0.0037463921860962975, - "grad_norm": 2.883524211938493e-05, - "learning_rate": 0.00019999999999561235, - "loss": 46.0, - "step": 49 - }, - { - "epoch": 0.003822849169486018, - "grad_norm": 1.3219233551353682e-05, - "learning_rate": 0.00019999999999538445, - "loss": 46.0, - "step": 50 - }, - { - "epoch": 0.0038993061528757385, - "grad_norm": 1.3463150935422163e-05, - "learning_rate": 0.00019999999999515078, - "loss": 46.0, - "step": 51 - }, - { - "epoch": 0.003975763136265458, - "grad_norm": 1.7962094716494903e-05, - "learning_rate": 0.00019999999999491133, - "loss": 46.0, - "step": 52 - }, - { - "epoch": 0.004052220119655179, - "grad_norm": 1.865141348389443e-05, - "learning_rate": 0.00019999999999466614, - "loss": 46.0, - "step": 53 - }, - { - "epoch": 0.0041286771030449, - "grad_norm": 3.395900421310216e-05, - "learning_rate": 0.00019999999999441518, - "loss": 46.0, - "step": 54 - }, - { - "epoch": 0.00420513408643462, - "grad_norm": 5.2411214710446075e-06, - "learning_rate": 0.00019999999999415844, - "loss": 46.0, - "step": 55 - }, - { - "epoch": 0.00428159106982434, - "grad_norm": 2.3695289200986736e-05, - "learning_rate": 0.00019999999999389592, - "loss": 46.0, - "step": 56 - }, - { - "epoch": 0.0043580480532140605, - "grad_norm": 4.788398291566409e-05, - "learning_rate": 0.00019999999999362764, - "loss": 46.0, - "step": 57 - }, - { - "epoch": 0.004434505036603781, - "grad_norm": 1.4658174222859088e-05, - "learning_rate": 0.0001999999999933536, - "loss": 46.0, - "step": 58 - }, - { - "epoch": 0.004510962019993501, - "grad_norm": 3.2516836654394865e-05, - "learning_rate": 0.00019999999999307377, - "loss": 46.0, - "step": 59 - }, - { - "epoch": 0.004587419003383221, - "grad_norm": 1.7249649317818694e-05, - "learning_rate": 0.00019999999999278817, - "loss": 46.0, - "step": 60 - }, - { - "epoch": 0.004663875986772942, - "grad_norm": 3.424560418352485e-05, - "learning_rate": 0.00019999999999249684, - "loss": 46.0, - "step": 61 - }, - { - "epoch": 0.004740332970162662, - "grad_norm": 1.6180661987164058e-05, - "learning_rate": 0.00019999999999219972, - "loss": 46.0, - "step": 62 - }, - { - "epoch": 0.004816789953552382, - "grad_norm": 2.7245410819887184e-05, - "learning_rate": 0.0001999999999918968, - "loss": 46.0, - "step": 63 - }, - { - "epoch": 0.004893246936942103, - "grad_norm": 1.8434240701026283e-05, - "learning_rate": 0.00019999999999158817, - "loss": 46.0, - "step": 64 - }, - { - "epoch": 0.0049697039203318236, - "grad_norm": 1.1324344995955471e-05, - "learning_rate": 0.0001999999999912737, - "loss": 46.0, - "step": 65 - }, - { - "epoch": 0.005046160903721544, - "grad_norm": 1.2631042409338988e-05, - "learning_rate": 0.0001999999999909535, - "loss": 46.0, - "step": 66 - }, - { - "epoch": 0.005122617887111264, - "grad_norm": 1.1797097613452934e-05, - "learning_rate": 0.00019999999999062752, - "loss": 46.0, - "step": 67 - }, - { - "epoch": 0.005199074870500984, - "grad_norm": 0.0001484593958593905, - "learning_rate": 0.00019999999999029578, - "loss": 46.0, - "step": 68 - }, - { - "epoch": 0.005275531853890705, - "grad_norm": 7.72525709180627e-06, - "learning_rate": 0.00019999999998995826, - "loss": 46.0, - "step": 69 - }, - { - "epoch": 0.005351988837280425, - "grad_norm": 2.2868665837449953e-05, - "learning_rate": 0.00019999999998961498, - "loss": 46.0, - "step": 70 - }, - { - "epoch": 0.005428445820670145, - "grad_norm": 2.722097997320816e-05, - "learning_rate": 0.00019999999998926592, - "loss": 46.0, - "step": 71 - }, - { - "epoch": 0.0055049028040598654, - "grad_norm": 2.157407652703114e-05, - "learning_rate": 0.00019999999998891112, - "loss": 46.0, - "step": 72 - }, - { - "epoch": 0.005581359787449587, - "grad_norm": 5.3770130762131885e-05, - "learning_rate": 0.00019999999998855054, - "loss": 46.0, - "step": 73 - }, - { - "epoch": 0.005657816770839307, - "grad_norm": 3.084981653955765e-05, - "learning_rate": 0.00019999999998818418, - "loss": 46.0, - "step": 74 - }, - { - "epoch": 0.005734273754229027, - "grad_norm": 2.2627700673183426e-05, - "learning_rate": 0.00019999999998781203, - "loss": 46.0, - "step": 75 - }, - { - "epoch": 0.005810730737618747, - "grad_norm": 4.124066617805511e-05, - "learning_rate": 0.00019999999998743416, - "loss": 46.0, - "step": 76 - }, - { - "epoch": 0.005887187721008468, - "grad_norm": 2.420853343210183e-05, - "learning_rate": 0.00019999999998705046, - "loss": 46.0, - "step": 77 - }, - { - "epoch": 0.005963644704398188, - "grad_norm": 4.235423693899065e-05, - "learning_rate": 0.00019999999998666104, - "loss": 46.0, - "step": 78 - }, - { - "epoch": 0.006040101687787908, - "grad_norm": 5.554194649448618e-05, - "learning_rate": 0.00019999999998626585, - "loss": 46.0, - "step": 79 - }, - { - "epoch": 0.0061165586711776285, - "grad_norm": 1.315249301114818e-05, - "learning_rate": 0.00019999999998586486, - "loss": 46.0, - "step": 80 - }, - { - "epoch": 0.006193015654567349, - "grad_norm": 1.062934552464867e-05, - "learning_rate": 0.0001999999999854581, - "loss": 46.0, - "step": 81 - }, - { - "epoch": 0.006269472637957069, - "grad_norm": 2.8291991839068942e-05, - "learning_rate": 0.00019999999998504558, - "loss": 46.0, - "step": 82 - }, - { - "epoch": 0.00634592962134679, - "grad_norm": 9.785777365323156e-06, - "learning_rate": 0.0001999999999846273, - "loss": 46.0, - "step": 83 - }, - { - "epoch": 0.00642238660473651, - "grad_norm": 3.804332664003596e-05, - "learning_rate": 0.00019999999998420324, - "loss": 46.0, - "step": 84 - }, - { - "epoch": 0.006498843588126231, - "grad_norm": 3.185340028721839e-05, - "learning_rate": 0.00019999999998377343, - "loss": 46.0, - "step": 85 - }, - { - "epoch": 0.006575300571515951, - "grad_norm": 2.0756384401465766e-05, - "learning_rate": 0.00019999999998333783, - "loss": 46.0, - "step": 86 - }, - { - "epoch": 0.006651757554905671, - "grad_norm": 1.887604594230652e-05, - "learning_rate": 0.00019999999998289645, - "loss": 46.0, - "step": 87 - }, - { - "epoch": 0.0067282145382953915, - "grad_norm": 1.6534835594939068e-05, - "learning_rate": 0.00019999999998244932, - "loss": 46.0, - "step": 88 - }, - { - "epoch": 0.006804671521685112, - "grad_norm": 1.6333686289726757e-05, - "learning_rate": 0.00019999999998199643, - "loss": 46.0, - "step": 89 - }, - { - "epoch": 0.006881128505074832, - "grad_norm": 3.246934647904709e-05, - "learning_rate": 0.00019999999998153775, - "loss": 46.0, - "step": 90 - }, - { - "epoch": 0.006957585488464552, - "grad_norm": 1.181766674562823e-05, - "learning_rate": 0.00019999999998107334, - "loss": 46.0, - "step": 91 - }, - { - "epoch": 0.007034042471854273, - "grad_norm": 1.2396643796819262e-05, - "learning_rate": 0.0001999999999806031, - "loss": 46.0, - "step": 92 - }, - { - "epoch": 0.007110499455243994, - "grad_norm": 4.238028486724943e-05, - "learning_rate": 0.00019999999998012713, - "loss": 46.0, - "step": 93 - }, - { - "epoch": 0.007186956438633714, - "grad_norm": 2.364716783631593e-05, - "learning_rate": 0.0001999999999796454, - "loss": 46.0, - "step": 94 - }, - { - "epoch": 0.007263413422023434, - "grad_norm": 4.005626396974549e-05, - "learning_rate": 0.00019999999997915785, - "loss": 46.0, - "step": 95 - }, - { - "epoch": 0.0073398704054131545, - "grad_norm": 2.0414901882759295e-05, - "learning_rate": 0.00019999999997866456, - "loss": 46.0, - "step": 96 - }, - { - "epoch": 0.007416327388802875, - "grad_norm": 2.954376395791769e-05, - "learning_rate": 0.00019999999997816553, - "loss": 46.0, - "step": 97 - }, - { - "epoch": 0.007492784372192595, - "grad_norm": 4.945532054989599e-05, - "learning_rate": 0.00019999999997766067, - "loss": 46.0, - "step": 98 - }, - { - "epoch": 0.007569241355582315, - "grad_norm": 1.6079820852610283e-05, - "learning_rate": 0.0001999999999771501, - "loss": 46.0, - "step": 99 - }, - { - "epoch": 0.007645698338972036, - "grad_norm": 1.7326456145383418e-05, - "learning_rate": 0.00019999999997663372, - "loss": 46.0, - "step": 100 - }, - { - "epoch": 0.007722155322361756, - "grad_norm": 2.4560122255934402e-05, - "learning_rate": 0.0001999999999761116, - "loss": 46.0, - "step": 101 - }, - { - "epoch": 0.007798612305751477, - "grad_norm": 2.7380283427191898e-05, - "learning_rate": 0.00019999999997558367, - "loss": 46.0, - "step": 102 - }, - { - "epoch": 0.007875069289141197, - "grad_norm": 4.690876085078344e-05, - "learning_rate": 0.00019999999997505002, - "loss": 46.0, - "step": 103 - }, - { - "epoch": 0.007951526272530917, - "grad_norm": 4.03676567657385e-05, - "learning_rate": 0.00019999999997451058, - "loss": 46.0, - "step": 104 - }, - { - "epoch": 0.008027983255920638, - "grad_norm": 3.087197183049284e-05, - "learning_rate": 0.00019999999997396533, - "loss": 46.0, - "step": 105 - }, - { - "epoch": 0.008104440239310357, - "grad_norm": 3.537006341503002e-05, - "learning_rate": 0.00019999999997341437, - "loss": 46.0, - "step": 106 - }, - { - "epoch": 0.008180897222700078, - "grad_norm": 2.691395093279425e-05, - "learning_rate": 0.0001999999999728576, - "loss": 46.0, - "step": 107 - }, - { - "epoch": 0.0082573542060898, - "grad_norm": 1.6602109099039808e-05, - "learning_rate": 0.0001999999999722951, - "loss": 46.0, - "step": 108 - }, - { - "epoch": 0.008333811189479519, - "grad_norm": 4.141966564930044e-05, - "learning_rate": 0.0001999999999717268, - "loss": 46.0, - "step": 109 - }, - { - "epoch": 0.00841026817286924, - "grad_norm": 5.193466495256871e-05, - "learning_rate": 0.00019999999997115272, - "loss": 46.0, - "step": 110 - }, - { - "epoch": 0.00848672515625896, - "grad_norm": 2.273494283144828e-05, - "learning_rate": 0.0001999999999705729, - "loss": 46.0, - "step": 111 - }, - { - "epoch": 0.00856318213964868, - "grad_norm": 1.8635138985700905e-05, - "learning_rate": 0.00019999999996998731, - "loss": 46.0, - "step": 112 - }, - { - "epoch": 0.0086396391230384, - "grad_norm": 5.76334678044077e-05, - "learning_rate": 0.00019999999996939594, - "loss": 46.0, - "step": 113 - }, - { - "epoch": 0.008716096106428121, - "grad_norm": 1.4108022696746048e-05, - "learning_rate": 0.00019999999996879879, - "loss": 46.0, - "step": 114 - }, - { - "epoch": 0.00879255308981784, - "grad_norm": 5.847453940077685e-05, - "learning_rate": 0.0001999999999681959, - "loss": 46.0, - "step": 115 - }, - { - "epoch": 0.008869010073207562, - "grad_norm": 2.4478789782733656e-05, - "learning_rate": 0.00019999999996758722, - "loss": 46.0, - "step": 116 - }, - { - "epoch": 0.008945467056597283, - "grad_norm": 1.6160238374141045e-05, - "learning_rate": 0.00019999999996697277, - "loss": 46.0, - "step": 117 - }, - { - "epoch": 0.009021924039987002, - "grad_norm": 1.1715706023096573e-05, - "learning_rate": 0.00019999999996635258, - "loss": 46.0, - "step": 118 - }, - { - "epoch": 0.009098381023376723, - "grad_norm": 2.351373404962942e-05, - "learning_rate": 0.0001999999999657266, - "loss": 46.0, - "step": 119 - }, - { - "epoch": 0.009174838006766443, - "grad_norm": 1.4249350897443946e-05, - "learning_rate": 0.00019999999996509483, - "loss": 46.0, - "step": 120 - }, - { - "epoch": 0.009251294990156164, - "grad_norm": 1.539502409286797e-05, - "learning_rate": 0.0001999999999644573, - "loss": 46.0, - "step": 121 - }, - { - "epoch": 0.009327751973545883, - "grad_norm": 1.3343144928512629e-05, - "learning_rate": 0.00019999999996381398, - "loss": 46.0, - "step": 122 - }, - { - "epoch": 0.009404208956935604, - "grad_norm": 2.421860881440807e-05, - "learning_rate": 0.00019999999996316495, - "loss": 46.0, - "step": 123 - }, - { - "epoch": 0.009480665940325324, - "grad_norm": 1.230876750923926e-05, - "learning_rate": 0.0001999999999625101, - "loss": 46.0, - "step": 124 - }, - { - "epoch": 0.009557122923715045, - "grad_norm": 1.5687432096456178e-05, - "learning_rate": 0.0001999999999618495, - "loss": 46.0, - "step": 125 - }, - { - "epoch": 0.009633579907104764, - "grad_norm": 2.405607665423304e-05, - "learning_rate": 0.00019999999996118313, - "loss": 46.0, - "step": 126 - }, - { - "epoch": 0.009710036890494485, - "grad_norm": 1.0055097845906857e-05, - "learning_rate": 0.00019999999996051098, - "loss": 46.0, - "step": 127 - }, - { - "epoch": 0.009786493873884207, - "grad_norm": 1.994884769374039e-05, - "learning_rate": 0.00019999999995983308, - "loss": 46.0, - "step": 128 - }, - { - "epoch": 0.009862950857273926, - "grad_norm": 2.4485007088514976e-05, - "learning_rate": 0.00019999999995914939, - "loss": 46.0, - "step": 129 - }, - { - "epoch": 0.009939407840663647, - "grad_norm": 3.35947479470633e-05, - "learning_rate": 0.00019999999995845994, - "loss": 46.0, - "step": 130 - }, - { - "epoch": 0.010015864824053367, - "grad_norm": 1.7626496628508903e-05, - "learning_rate": 0.00019999999995776472, - "loss": 46.0, - "step": 131 - }, - { - "epoch": 0.010092321807443088, - "grad_norm": 1.8997772713191807e-05, - "learning_rate": 0.00019999999995706373, - "loss": 46.0, - "step": 132 - }, - { - "epoch": 0.010168778790832807, - "grad_norm": 1.9099250494036824e-05, - "learning_rate": 0.00019999999995635697, - "loss": 46.0, - "step": 133 - }, - { - "epoch": 0.010245235774222528, - "grad_norm": 2.0239402147126384e-05, - "learning_rate": 0.00019999999995564446, - "loss": 46.0, - "step": 134 - }, - { - "epoch": 0.010321692757612248, - "grad_norm": 1.774134398146998e-05, - "learning_rate": 0.00019999999995492615, - "loss": 46.0, - "step": 135 - }, - { - "epoch": 0.010398149741001969, - "grad_norm": 1.6570445950492285e-05, - "learning_rate": 0.00019999999995420206, - "loss": 46.0, - "step": 136 - }, - { - "epoch": 0.01047460672439169, - "grad_norm": 1.1938095667574089e-05, - "learning_rate": 0.00019999999995347226, - "loss": 46.0, - "step": 137 - }, - { - "epoch": 0.01055106370778141, - "grad_norm": 1.392544527334394e-05, - "learning_rate": 0.00019999999995273663, - "loss": 46.0, - "step": 138 - }, - { - "epoch": 0.01062752069117113, - "grad_norm": 7.934186214697547e-06, - "learning_rate": 0.00019999999995199525, - "loss": 46.0, - "step": 139 - }, - { - "epoch": 0.01070397767456085, - "grad_norm": 3.369540718267672e-05, - "learning_rate": 0.00019999999995124813, - "loss": 46.0, - "step": 140 - }, - { - "epoch": 0.010780434657950571, - "grad_norm": 1.83624815690564e-05, - "learning_rate": 0.0001999999999504952, - "loss": 46.0, - "step": 141 - }, - { - "epoch": 0.01085689164134029, - "grad_norm": 4.4597763917408884e-05, - "learning_rate": 0.00019999999994973653, - "loss": 46.0, - "step": 142 - }, - { - "epoch": 0.010933348624730011, - "grad_norm": 1.9854931451845914e-05, - "learning_rate": 0.00019999999994897206, - "loss": 46.0, - "step": 143 - }, - { - "epoch": 0.011009805608119731, - "grad_norm": 1.549391890875995e-05, - "learning_rate": 0.00019999999994820184, - "loss": 46.0, - "step": 144 - }, - { - "epoch": 0.011086262591509452, - "grad_norm": 1.8510312656871974e-05, - "learning_rate": 0.00019999999994742585, - "loss": 46.0, - "step": 145 - }, - { - "epoch": 0.011162719574899173, - "grad_norm": 7.089629070833325e-05, - "learning_rate": 0.00019999999994664412, - "loss": 46.0, - "step": 146 - }, - { - "epoch": 0.011239176558288893, - "grad_norm": 1.4443186955759302e-05, - "learning_rate": 0.00019999999994585655, - "loss": 46.0, - "step": 147 - }, - { - "epoch": 0.011315633541678614, - "grad_norm": 1.778230034688022e-05, - "learning_rate": 0.00019999999994506327, - "loss": 46.0, - "step": 148 - }, - { - "epoch": 0.011392090525068333, - "grad_norm": 1.8103053662343882e-05, - "learning_rate": 0.00019999999994426418, - "loss": 46.0, - "step": 149 - }, - { - "epoch": 0.011468547508458054, - "grad_norm": 1.6613617844996043e-05, - "learning_rate": 0.00019999999994345938, - "loss": 46.0, - "step": 150 - }, - { - "epoch": 0.011545004491847774, - "grad_norm": 9.72727775661042e-06, - "learning_rate": 0.00019999999994264875, - "loss": 46.0, - "step": 151 - }, - { - "epoch": 0.011621461475237495, - "grad_norm": 4.356676072347909e-05, - "learning_rate": 0.00019999999994183237, - "loss": 46.0, - "step": 152 - }, - { - "epoch": 0.011697918458627214, - "grad_norm": 1.8201697457698174e-05, - "learning_rate": 0.00019999999994101022, - "loss": 46.0, - "step": 153 - }, - { - "epoch": 0.011774375442016935, - "grad_norm": 1.672066173341591e-05, - "learning_rate": 0.0001999999999401823, - "loss": 46.0, - "step": 154 - }, - { - "epoch": 0.011850832425406655, - "grad_norm": 1.5591931514791213e-05, - "learning_rate": 0.0001999999999393486, - "loss": 46.0, - "step": 155 - }, - { - "epoch": 0.011927289408796376, - "grad_norm": 1.747531132423319e-05, - "learning_rate": 0.00019999999993850916, - "loss": 46.0, - "step": 156 - }, - { - "epoch": 0.012003746392186097, - "grad_norm": 2.5923198336386122e-05, - "learning_rate": 0.00019999999993766394, - "loss": 46.0, - "step": 157 - }, - { - "epoch": 0.012080203375575816, - "grad_norm": 1.825248727982398e-05, - "learning_rate": 0.00019999999993681295, - "loss": 46.0, - "step": 158 - }, - { - "epoch": 0.012156660358965538, - "grad_norm": 1.7134279914898798e-05, - "learning_rate": 0.00019999999993595616, - "loss": 46.0, - "step": 159 - }, - { - "epoch": 0.012233117342355257, - "grad_norm": 1.23955860544811e-05, - "learning_rate": 0.00019999999993509365, - "loss": 46.0, - "step": 160 - }, - { - "epoch": 0.012309574325744978, - "grad_norm": 1.9497911125654355e-05, - "learning_rate": 0.00019999999993422534, - "loss": 46.0, - "step": 161 - }, - { - "epoch": 0.012386031309134697, - "grad_norm": 1.4672481484012678e-05, - "learning_rate": 0.00019999999993335128, - "loss": 46.0, - "step": 162 - }, - { - "epoch": 0.012462488292524419, - "grad_norm": 1.6997120837913826e-05, - "learning_rate": 0.00019999999993247145, - "loss": 46.0, - "step": 163 - }, - { - "epoch": 0.012538945275914138, - "grad_norm": 1.876752867246978e-05, - "learning_rate": 0.00019999999993158582, - "loss": 46.0, - "step": 164 - }, - { - "epoch": 0.01261540225930386, - "grad_norm": 2.1698650016332977e-05, - "learning_rate": 0.00019999999993069442, - "loss": 46.0, - "step": 165 - }, - { - "epoch": 0.01269185924269358, - "grad_norm": 7.307679334189743e-05, - "learning_rate": 0.00019999999992979727, - "loss": 46.0, - "step": 166 - }, - { - "epoch": 0.0127683162260833, - "grad_norm": 9.80331878963625e-06, - "learning_rate": 0.00019999999992889437, - "loss": 46.0, - "step": 167 - }, - { - "epoch": 0.01284477320947302, - "grad_norm": 1.5055651601869613e-05, - "learning_rate": 0.00019999999992798567, - "loss": 46.0, - "step": 168 - }, - { - "epoch": 0.01292123019286274, - "grad_norm": 3.526509317453019e-05, - "learning_rate": 0.00019999999992707123, - "loss": 46.0, - "step": 169 - }, - { - "epoch": 0.012997687176252461, - "grad_norm": 5.940972550888546e-05, - "learning_rate": 0.000199999999926151, - "loss": 46.0, - "step": 170 - }, - { - "epoch": 0.01307414415964218, - "grad_norm": 1.5164956494118087e-05, - "learning_rate": 0.000199999999925225, - "loss": 46.0, - "step": 171 - }, - { - "epoch": 0.013150601143031902, - "grad_norm": 8.361901564057916e-06, - "learning_rate": 0.00019999999992429323, - "loss": 46.0, - "step": 172 - }, - { - "epoch": 0.013227058126421621, - "grad_norm": 1.1857358913403004e-05, - "learning_rate": 0.0001999999999233557, - "loss": 46.0, - "step": 173 - }, - { - "epoch": 0.013303515109811342, - "grad_norm": 2.8307274988037534e-05, - "learning_rate": 0.0001999999999224124, - "loss": 46.0, - "step": 174 - }, - { - "epoch": 0.013379972093201064, - "grad_norm": 1.7267728253500536e-05, - "learning_rate": 0.00019999999992146333, - "loss": 46.0, - "step": 175 - }, - { - "epoch": 0.013456429076590783, - "grad_norm": 2.200157723564189e-05, - "learning_rate": 0.00019999999992050847, - "loss": 46.0, - "step": 176 - }, - { - "epoch": 0.013532886059980504, - "grad_norm": 2.0857438357779756e-05, - "learning_rate": 0.00019999999991954784, - "loss": 46.0, - "step": 177 - }, - { - "epoch": 0.013609343043370224, - "grad_norm": 3.0190241886884905e-05, - "learning_rate": 0.00019999999991858147, - "loss": 46.0, - "step": 178 - }, - { - "epoch": 0.013685800026759945, - "grad_norm": 1.7986943930736743e-05, - "learning_rate": 0.00019999999991760932, - "loss": 46.0, - "step": 179 - }, - { - "epoch": 0.013762257010149664, - "grad_norm": 1.136941045842832e-05, - "learning_rate": 0.00019999999991663142, - "loss": 46.0, - "step": 180 - }, - { - "epoch": 0.013838713993539385, - "grad_norm": 2.2450272808782756e-05, - "learning_rate": 0.0001999999999156477, - "loss": 46.0, - "step": 181 - }, - { - "epoch": 0.013915170976929105, - "grad_norm": 2.47952248173533e-05, - "learning_rate": 0.00019999999991465823, - "loss": 46.0, - "step": 182 - }, - { - "epoch": 0.013991627960318826, - "grad_norm": 2.660143763932865e-05, - "learning_rate": 0.00019999999991366304, - "loss": 46.0, - "step": 183 - }, - { - "epoch": 0.014068084943708545, - "grad_norm": 4.1635230445535854e-05, - "learning_rate": 0.000199999999912662, - "loss": 46.0, - "step": 184 - }, - { - "epoch": 0.014144541927098266, - "grad_norm": 2.161614793294575e-05, - "learning_rate": 0.00019999999991165526, - "loss": 46.0, - "step": 185 - }, - { - "epoch": 0.014220998910487987, - "grad_norm": 1.880426316347439e-05, - "learning_rate": 0.0001999999999106427, - "loss": 46.0, - "step": 186 - }, - { - "epoch": 0.014297455893877707, - "grad_norm": 2.4131679310812615e-05, - "learning_rate": 0.00019999999990962439, - "loss": 46.0, - "step": 187 - }, - { - "epoch": 0.014373912877267428, - "grad_norm": 3.642721276264638e-05, - "learning_rate": 0.00019999999990860033, - "loss": 46.0, - "step": 188 - }, - { - "epoch": 0.014450369860657147, - "grad_norm": 1.3916353054810315e-05, - "learning_rate": 0.00019999999990757047, - "loss": 46.0, - "step": 189 - }, - { - "epoch": 0.014526826844046868, - "grad_norm": 1.937287743203342e-05, - "learning_rate": 0.00019999999990653487, - "loss": 46.0, - "step": 190 - }, - { - "epoch": 0.014603283827436588, - "grad_norm": 2.7034817321691662e-05, - "learning_rate": 0.00019999999990549347, - "loss": 46.0, - "step": 191 - }, - { - "epoch": 0.014679740810826309, - "grad_norm": 3.415107858018018e-05, - "learning_rate": 0.00019999999990444632, - "loss": 46.0, - "step": 192 - }, - { - "epoch": 0.014756197794216028, - "grad_norm": 1.394677019561641e-05, - "learning_rate": 0.00019999999990339337, - "loss": 46.0, - "step": 193 - }, - { - "epoch": 0.01483265477760575, - "grad_norm": 1.77701331267599e-05, - "learning_rate": 0.0001999999999023347, - "loss": 46.0, - "step": 194 - }, - { - "epoch": 0.01490911176099547, - "grad_norm": 4.800179522135295e-05, - "learning_rate": 0.00019999999990127023, - "loss": 46.0, - "step": 195 - }, - { - "epoch": 0.01498556874438519, - "grad_norm": 3.9848117012297735e-05, - "learning_rate": 0.00019999999990019999, - "loss": 46.0, - "step": 196 - }, - { - "epoch": 0.015062025727774911, - "grad_norm": 4.420596087584272e-05, - "learning_rate": 0.000199999999899124, - "loss": 46.0, - "step": 197 - }, - { - "epoch": 0.01513848271116463, - "grad_norm": 3.029867002624087e-05, - "learning_rate": 0.0001999999998980422, - "loss": 46.0, - "step": 198 - }, - { - "epoch": 0.015214939694554352, - "grad_norm": 0.00012269854778423905, - "learning_rate": 0.00019999999989695467, - "loss": 46.0, - "step": 199 - }, - { - "epoch": 0.015291396677944071, - "grad_norm": 6.0171842051204294e-05, - "learning_rate": 0.00019999999989586136, - "loss": 46.0, - "step": 200 - }, - { - "epoch": 0.015367853661333792, - "grad_norm": 3.885979822371155e-05, - "learning_rate": 0.00019999999989476228, - "loss": 46.0, - "step": 201 - }, - { - "epoch": 0.015444310644723512, - "grad_norm": 6.002607915434055e-05, - "learning_rate": 0.00019999999989365742, - "loss": 46.0, - "step": 202 - }, - { - "epoch": 0.015520767628113233, - "grad_norm": 3.7406844057841226e-05, - "learning_rate": 0.00019999999989254682, - "loss": 46.0, - "step": 203 - }, - { - "epoch": 0.015597224611502954, - "grad_norm": 4.4123895349912345e-05, - "learning_rate": 0.00019999999989143042, - "loss": 46.0, - "step": 204 - }, - { - "epoch": 0.015673681594892675, - "grad_norm": 2.500749542377889e-05, - "learning_rate": 0.00019999999989030827, - "loss": 46.0, - "step": 205 - }, - { - "epoch": 0.015750138578282395, - "grad_norm": 4.2419942474225536e-05, - "learning_rate": 0.00019999999988918034, - "loss": 46.0, - "step": 206 - }, - { - "epoch": 0.015826595561672114, - "grad_norm": 2.4852703063515946e-05, - "learning_rate": 0.00019999999988804662, - "loss": 46.0, - "step": 207 - }, - { - "epoch": 0.015903052545061833, - "grad_norm": 1.3398740520642605e-05, - "learning_rate": 0.00019999999988690718, - "loss": 46.0, - "step": 208 - }, - { - "epoch": 0.015979509528451556, - "grad_norm": 3.970724719692953e-05, - "learning_rate": 0.00019999999988576194, - "loss": 46.0, - "step": 209 - }, - { - "epoch": 0.016055966511841276, - "grad_norm": 3.1210820452542976e-05, - "learning_rate": 0.00019999999988461092, - "loss": 46.0, - "step": 210 - }, - { - "epoch": 0.016132423495230995, - "grad_norm": 4.0842809539753944e-05, - "learning_rate": 0.00019999999988345416, - "loss": 46.0, - "step": 211 - }, - { - "epoch": 0.016208880478620714, - "grad_norm": 1.1598122910072561e-05, - "learning_rate": 0.00019999999988229162, - "loss": 46.0, - "step": 212 - }, - { - "epoch": 0.016285337462010437, - "grad_norm": 1.5735586202936247e-05, - "learning_rate": 0.00019999999988112329, - "loss": 46.0, - "step": 213 - }, - { - "epoch": 0.016361794445400157, - "grad_norm": 1.9531549696694128e-05, - "learning_rate": 0.0001999999998799492, - "loss": 46.0, - "step": 214 - }, - { - "epoch": 0.016438251428789876, - "grad_norm": 1.3231883713160641e-05, - "learning_rate": 0.00019999999987876935, - "loss": 46.0, - "step": 215 - }, - { - "epoch": 0.0165147084121796, - "grad_norm": 7.973035098984838e-05, - "learning_rate": 0.00019999999987758375, - "loss": 46.0, - "step": 216 - }, - { - "epoch": 0.01659116539556932, - "grad_norm": 3.848735286737792e-05, - "learning_rate": 0.00019999999987639234, - "loss": 46.0, - "step": 217 - }, - { - "epoch": 0.016667622378959038, - "grad_norm": 4.15618451370392e-05, - "learning_rate": 0.00019999999987519517, - "loss": 46.0, - "step": 218 - }, - { - "epoch": 0.016744079362348757, - "grad_norm": 3.883221870637499e-05, - "learning_rate": 0.00019999999987399224, - "loss": 46.0, - "step": 219 - }, - { - "epoch": 0.01682053634573848, - "grad_norm": 2.038398633885663e-05, - "learning_rate": 0.00019999999987278355, - "loss": 46.0, - "step": 220 - }, - { - "epoch": 0.0168969933291282, - "grad_norm": 1.29841910165851e-05, - "learning_rate": 0.00019999999987156908, - "loss": 46.0, - "step": 221 - }, - { - "epoch": 0.01697345031251792, - "grad_norm": 3.4105782106053084e-05, - "learning_rate": 0.00019999999987034884, - "loss": 46.0, - "step": 222 - }, - { - "epoch": 0.017049907295907638, - "grad_norm": 6.171867426019162e-05, - "learning_rate": 0.00019999999986912282, - "loss": 46.0, - "step": 223 - }, - { - "epoch": 0.01712636427929736, - "grad_norm": 1.5947302017593756e-05, - "learning_rate": 0.00019999999986789106, - "loss": 46.0, - "step": 224 - }, - { - "epoch": 0.01720282126268708, - "grad_norm": 1.7624932297621854e-05, - "learning_rate": 0.0001999999998666535, - "loss": 46.0, - "step": 225 - }, - { - "epoch": 0.0172792782460768, - "grad_norm": 1.3201419278630055e-05, - "learning_rate": 0.0001999999998654102, - "loss": 46.0, - "step": 226 - }, - { - "epoch": 0.017355735229466523, - "grad_norm": 2.5239411115762778e-05, - "learning_rate": 0.00019999999986416108, - "loss": 46.0, - "step": 227 - }, - { - "epoch": 0.017432192212856242, - "grad_norm": 3.751131225726567e-05, - "learning_rate": 0.00019999999986290625, - "loss": 46.0, - "step": 228 - }, - { - "epoch": 0.01750864919624596, - "grad_norm": 5.078049434814602e-05, - "learning_rate": 0.00019999999986164562, - "loss": 46.0, - "step": 229 - }, - { - "epoch": 0.01758510617963568, - "grad_norm": 1.581860487931408e-05, - "learning_rate": 0.00019999999986037922, - "loss": 46.0, - "step": 230 - }, - { - "epoch": 0.017661563163025404, - "grad_norm": 1.7339454643661156e-05, - "learning_rate": 0.00019999999985910707, - "loss": 46.0, - "step": 231 - }, - { - "epoch": 0.017738020146415123, - "grad_norm": 2.3564203729620203e-05, - "learning_rate": 0.00019999999985782912, - "loss": 46.0, - "step": 232 - }, - { - "epoch": 0.017814477129804843, - "grad_norm": 3.812616705545224e-05, - "learning_rate": 0.00019999999985654543, - "loss": 46.0, - "step": 233 - }, - { - "epoch": 0.017890934113194566, - "grad_norm": 6.810265040257946e-05, - "learning_rate": 0.00019999999985525596, - "loss": 46.0, - "step": 234 - }, - { - "epoch": 0.017967391096584285, - "grad_norm": 2.8255031793378294e-05, - "learning_rate": 0.0001999999998539607, - "loss": 46.0, - "step": 235 - }, - { - "epoch": 0.018043848079974004, - "grad_norm": 1.70758521562675e-05, - "learning_rate": 0.0001999999998526597, - "loss": 46.0, - "step": 236 - }, - { - "epoch": 0.018120305063363724, - "grad_norm": 3.266323619754985e-05, - "learning_rate": 0.0001999999998513529, - "loss": 46.0, - "step": 237 - }, - { - "epoch": 0.018196762046753447, - "grad_norm": 2.1402192942332476e-05, - "learning_rate": 0.00019999999985004038, - "loss": 46.0, - "step": 238 - }, - { - "epoch": 0.018273219030143166, - "grad_norm": 1.3219912034401204e-05, - "learning_rate": 0.00019999999984872204, - "loss": 46.0, - "step": 239 - }, - { - "epoch": 0.018349676013532885, - "grad_norm": 1.8829701730282977e-05, - "learning_rate": 0.00019999999984739796, - "loss": 46.0, - "step": 240 - }, - { - "epoch": 0.018426132996922605, - "grad_norm": 3.477720747468993e-05, - "learning_rate": 0.00019999999984606808, - "loss": 46.0, - "step": 241 - }, - { - "epoch": 0.018502589980312328, - "grad_norm": 2.4586177460150793e-05, - "learning_rate": 0.00019999999984473245, - "loss": 46.0, - "step": 242 - }, - { - "epoch": 0.018579046963702047, - "grad_norm": 2.229872734460514e-05, - "learning_rate": 0.00019999999984339107, - "loss": 46.0, - "step": 243 - }, - { - "epoch": 0.018655503947091766, - "grad_norm": 5.301136116031557e-05, - "learning_rate": 0.0001999999998420439, - "loss": 46.0, - "step": 244 - }, - { - "epoch": 0.01873196093048149, - "grad_norm": 6.441616278607398e-05, - "learning_rate": 0.00019999999984069095, - "loss": 46.0, - "step": 245 - }, - { - "epoch": 0.01880841791387121, - "grad_norm": 6.231950828805566e-05, - "learning_rate": 0.00019999999983933225, - "loss": 46.0, - "step": 246 - }, - { - "epoch": 0.018884874897260928, - "grad_norm": 2.3630893338122405e-05, - "learning_rate": 0.00019999999983796778, - "loss": 46.0, - "step": 247 - }, - { - "epoch": 0.018961331880650648, - "grad_norm": 4.4150590838398784e-05, - "learning_rate": 0.00019999999983659751, - "loss": 46.0, - "step": 248 - }, - { - "epoch": 0.01903778886404037, - "grad_norm": 4.186800288152881e-05, - "learning_rate": 0.0001999999998352215, - "loss": 46.0, - "step": 249 - }, - { - "epoch": 0.01911424584743009, - "grad_norm": 1.8623808500706218e-05, - "learning_rate": 0.0001999999998338397, - "loss": 46.0, - "step": 250 - }, - { - "epoch": 0.01919070283081981, - "grad_norm": 7.354863191721961e-05, - "learning_rate": 0.00019999999983245218, - "loss": 46.0, - "step": 251 - }, - { - "epoch": 0.01926715981420953, - "grad_norm": 2.2413632905227132e-05, - "learning_rate": 0.00019999999983105884, - "loss": 46.0, - "step": 252 - }, - { - "epoch": 0.01934361679759925, - "grad_norm": 3.723469126271084e-05, - "learning_rate": 0.00019999999982965976, - "loss": 46.0, - "step": 253 - }, - { - "epoch": 0.01942007378098897, - "grad_norm": 1.9338929632795043e-05, - "learning_rate": 0.0001999999998282549, - "loss": 46.0, - "step": 254 - }, - { - "epoch": 0.01949653076437869, - "grad_norm": 2.176074485760182e-05, - "learning_rate": 0.00019999999982684425, - "loss": 46.0, - "step": 255 - }, - { - "epoch": 0.019572987747768413, - "grad_norm": 5.1866609283024445e-05, - "learning_rate": 0.00019999999982542787, - "loss": 46.0, - "step": 256 - }, - { - "epoch": 0.019649444731158133, - "grad_norm": 4.26997612521518e-05, - "learning_rate": 0.00019999999982400567, - "loss": 46.0, - "step": 257 - }, - { - "epoch": 0.019725901714547852, - "grad_norm": 2.0059625967405736e-05, - "learning_rate": 0.00019999999982257775, - "loss": 46.0, - "step": 258 - }, - { - "epoch": 0.01980235869793757, - "grad_norm": 3.8806952943559736e-05, - "learning_rate": 0.00019999999982114406, - "loss": 46.0, - "step": 259 - }, - { - "epoch": 0.019878815681327294, - "grad_norm": 3.745260619325563e-05, - "learning_rate": 0.00019999999981970456, - "loss": 46.0, - "step": 260 - }, - { - "epoch": 0.019955272664717014, - "grad_norm": 4.402872218634002e-05, - "learning_rate": 0.00019999999981825932, - "loss": 46.0, - "step": 261 - }, - { - "epoch": 0.020031729648106733, - "grad_norm": 1.1846618690469768e-05, - "learning_rate": 0.0001999999998168083, - "loss": 46.0, - "step": 262 - }, - { - "epoch": 0.020108186631496456, - "grad_norm": 1.715353391773533e-05, - "learning_rate": 0.00019999999981535152, - "loss": 46.0, - "step": 263 - }, - { - "epoch": 0.020184643614886175, - "grad_norm": 3.253341856179759e-05, - "learning_rate": 0.00019999999981388898, - "loss": 46.0, - "step": 264 - }, - { - "epoch": 0.020261100598275895, - "grad_norm": 3.172983633703552e-05, - "learning_rate": 0.00019999999981242065, - "loss": 46.0, - "step": 265 - }, - { - "epoch": 0.020337557581665614, - "grad_norm": 5.8371944760438055e-05, - "learning_rate": 0.00019999999981094654, - "loss": 46.0, - "step": 266 - }, - { - "epoch": 0.020414014565055337, - "grad_norm": 2.696877345442772e-05, - "learning_rate": 0.00019999999980946668, - "loss": 46.0, - "step": 267 - }, - { - "epoch": 0.020490471548445056, - "grad_norm": 3.4904631320387125e-05, - "learning_rate": 0.00019999999980798103, - "loss": 46.0, - "step": 268 - }, - { - "epoch": 0.020566928531834776, - "grad_norm": 4.222767529427074e-05, - "learning_rate": 0.00019999999980648965, - "loss": 46.0, - "step": 269 - }, - { - "epoch": 0.020643385515224495, - "grad_norm": 2.7723754101316445e-05, - "learning_rate": 0.00019999999980499245, - "loss": 46.0, - "step": 270 - }, - { - "epoch": 0.020719842498614218, - "grad_norm": 7.65265867812559e-05, - "learning_rate": 0.0001999999998034895, - "loss": 46.0, - "step": 271 - }, - { - "epoch": 0.020796299482003937, - "grad_norm": 6.078218575567007e-05, - "learning_rate": 0.0001999999998019808, - "loss": 46.0, - "step": 272 - }, - { - "epoch": 0.020872756465393657, - "grad_norm": 2.5883078706101514e-05, - "learning_rate": 0.00019999999980046634, - "loss": 46.0, - "step": 273 - }, - { - "epoch": 0.02094921344878338, - "grad_norm": 3.141328488709405e-05, - "learning_rate": 0.00019999999979894607, - "loss": 46.0, - "step": 274 - }, - { - "epoch": 0.0210256704321731, - "grad_norm": 4.7406323574250564e-05, - "learning_rate": 0.00019999999979742006, - "loss": 46.0, - "step": 275 - }, - { - "epoch": 0.02110212741556282, - "grad_norm": 3.114222636213526e-05, - "learning_rate": 0.00019999999979588827, - "loss": 46.0, - "step": 276 - }, - { - "epoch": 0.021178584398952538, - "grad_norm": 3.695775012602098e-05, - "learning_rate": 0.00019999999979435068, - "loss": 46.0, - "step": 277 - }, - { - "epoch": 0.02125504138234226, - "grad_norm": 4.6277407818706706e-05, - "learning_rate": 0.00019999999979280738, - "loss": 46.0, - "step": 278 - }, - { - "epoch": 0.02133149836573198, - "grad_norm": 3.713286423590034e-05, - "learning_rate": 0.00019999999979125827, - "loss": 46.0, - "step": 279 - }, - { - "epoch": 0.0214079553491217, - "grad_norm": 5.980370769975707e-05, - "learning_rate": 0.0001999999997897034, - "loss": 46.0, - "step": 280 - }, - { - "epoch": 0.02148441233251142, - "grad_norm": 2.9063308829790913e-05, - "learning_rate": 0.00019999999978814276, - "loss": 46.0, - "step": 281 - }, - { - "epoch": 0.021560869315901142, - "grad_norm": 2.6975656510330737e-05, - "learning_rate": 0.00019999999978657638, - "loss": 46.0, - "step": 282 - }, - { - "epoch": 0.02163732629929086, - "grad_norm": 7.82805509516038e-05, - "learning_rate": 0.00019999999978500418, - "loss": 46.0, - "step": 283 - }, - { - "epoch": 0.02171378328268058, - "grad_norm": 3.2012809242587537e-05, - "learning_rate": 0.00019999999978342623, - "loss": 46.0, - "step": 284 - }, - { - "epoch": 0.021790240266070304, - "grad_norm": 8.559294656151906e-05, - "learning_rate": 0.0001999999997818425, - "loss": 46.0, - "step": 285 - }, - { - "epoch": 0.021866697249460023, - "grad_norm": 2.8340638891677372e-05, - "learning_rate": 0.00019999999978025305, - "loss": 46.0, - "step": 286 - }, - { - "epoch": 0.021943154232849742, - "grad_norm": 2.6537731173448265e-05, - "learning_rate": 0.00019999999977865778, - "loss": 46.0, - "step": 287 - }, - { - "epoch": 0.022019611216239462, - "grad_norm": 7.063262455631047e-05, - "learning_rate": 0.00019999999977705674, - "loss": 46.0, - "step": 288 - }, - { - "epoch": 0.022096068199629185, - "grad_norm": 0.00013856767327524722, - "learning_rate": 0.00019999999977544998, - "loss": 46.0, - "step": 289 - }, - { - "epoch": 0.022172525183018904, - "grad_norm": 4.319584695622325e-05, - "learning_rate": 0.00019999999977383742, - "loss": 46.0, - "step": 290 - }, - { - "epoch": 0.022248982166408623, - "grad_norm": 6.633839075220749e-05, - "learning_rate": 0.00019999999977221905, - "loss": 46.0, - "step": 291 - }, - { - "epoch": 0.022325439149798346, - "grad_norm": 2.470920117048081e-05, - "learning_rate": 0.00019999999977059497, - "loss": 46.0, - "step": 292 - }, - { - "epoch": 0.022401896133188066, - "grad_norm": 2.4265709726023488e-05, - "learning_rate": 0.00019999999976896512, - "loss": 46.0, - "step": 293 - }, - { - "epoch": 0.022478353116577785, - "grad_norm": 0.00011469124729046598, - "learning_rate": 0.00019999999976732946, - "loss": 46.0, - "step": 294 - }, - { - "epoch": 0.022554810099967505, - "grad_norm": 3.213988748029806e-05, - "learning_rate": 0.00019999999976568806, - "loss": 46.0, - "step": 295 - }, - { - "epoch": 0.022631267083357227, - "grad_norm": 1.801499820430763e-05, - "learning_rate": 0.00019999999976404086, - "loss": 46.0, - "step": 296 - }, - { - "epoch": 0.022707724066746947, - "grad_norm": 3.2129533792613074e-05, - "learning_rate": 0.00019999999976238791, - "loss": 46.0, - "step": 297 - }, - { - "epoch": 0.022784181050136666, - "grad_norm": 5.563296508626081e-05, - "learning_rate": 0.0001999999997607292, - "loss": 46.0, - "step": 298 - }, - { - "epoch": 0.022860638033526386, - "grad_norm": 5.471260010381229e-05, - "learning_rate": 0.00019999999975906473, - "loss": 46.0, - "step": 299 - }, - { - "epoch": 0.02293709501691611, - "grad_norm": 5.7839060900732875e-05, - "learning_rate": 0.00019999999975739449, - "loss": 46.0, - "step": 300 - }, - { - "epoch": 0.023013552000305828, - "grad_norm": 3.9000693504931405e-05, - "learning_rate": 0.00019999999975571845, - "loss": 46.0, - "step": 301 - }, - { - "epoch": 0.023090008983695547, - "grad_norm": 2.7543690521270037e-05, - "learning_rate": 0.00019999999975403663, - "loss": 46.0, - "step": 302 - }, - { - "epoch": 0.02316646596708527, - "grad_norm": 3.6833534977631643e-05, - "learning_rate": 0.0001999999997523491, - "loss": 46.0, - "step": 303 - }, - { - "epoch": 0.02324292295047499, - "grad_norm": 2.8821881642215885e-05, - "learning_rate": 0.00019999999975065574, - "loss": 46.0, - "step": 304 - }, - { - "epoch": 0.02331937993386471, - "grad_norm": 2.8563161322381347e-05, - "learning_rate": 0.00019999999974895666, - "loss": 46.0, - "step": 305 - }, - { - "epoch": 0.02339583691725443, - "grad_norm": 3.7597033951897174e-05, - "learning_rate": 0.00019999999974725177, - "loss": 46.0, - "step": 306 - }, - { - "epoch": 0.02347229390064415, - "grad_norm": 8.671709656482562e-05, - "learning_rate": 0.00019999999974554112, - "loss": 46.0, - "step": 307 - }, - { - "epoch": 0.02354875088403387, - "grad_norm": 3.126203591818921e-05, - "learning_rate": 0.0001999999997438247, - "loss": 46.0, - "step": 308 - }, - { - "epoch": 0.02362520786742359, - "grad_norm": 7.032913708826527e-05, - "learning_rate": 0.00019999999974210252, - "loss": 46.0, - "step": 309 - }, - { - "epoch": 0.02370166485081331, - "grad_norm": 3.444773028604686e-05, - "learning_rate": 0.00019999999974037457, - "loss": 46.0, - "step": 310 - }, - { - "epoch": 0.023778121834203032, - "grad_norm": 2.260547989862971e-05, - "learning_rate": 0.00019999999973864085, - "loss": 46.0, - "step": 311 - }, - { - "epoch": 0.02385457881759275, - "grad_norm": 4.152274050284177e-05, - "learning_rate": 0.00019999999973690136, - "loss": 46.0, - "step": 312 - }, - { - "epoch": 0.02393103580098247, - "grad_norm": 3.422985173529014e-05, - "learning_rate": 0.0001999999997351561, - "loss": 46.0, - "step": 313 - }, - { - "epoch": 0.024007492784372194, - "grad_norm": 5.12860351591371e-05, - "learning_rate": 0.00019999999973340508, - "loss": 46.0, - "step": 314 - }, - { - "epoch": 0.024083949767761913, - "grad_norm": 2.766688157862518e-05, - "learning_rate": 0.00019999999973164826, - "loss": 46.0, - "step": 315 - }, - { - "epoch": 0.024160406751151633, - "grad_norm": 3.869930515065789e-05, - "learning_rate": 0.0001999999997298857, - "loss": 46.0, - "step": 316 - }, - { - "epoch": 0.024236863734541352, - "grad_norm": 4.1635878005763516e-05, - "learning_rate": 0.00019999999972811737, - "loss": 46.0, - "step": 317 - }, - { - "epoch": 0.024313320717931075, - "grad_norm": 2.8059675969416276e-05, - "learning_rate": 0.00019999999972634326, - "loss": 46.0, - "step": 318 - }, - { - "epoch": 0.024389777701320794, - "grad_norm": 2.6571789931040257e-05, - "learning_rate": 0.00019999999972456338, - "loss": 46.0, - "step": 319 - }, - { - "epoch": 0.024466234684710514, - "grad_norm": 0.00010475471208337694, - "learning_rate": 0.00019999999972277775, - "loss": 46.0, - "step": 320 - }, - { - "epoch": 0.024542691668100237, - "grad_norm": 7.082799129420891e-05, - "learning_rate": 0.0001999999997209863, - "loss": 46.0, - "step": 321 - }, - { - "epoch": 0.024619148651489956, - "grad_norm": 3.336452573421411e-05, - "learning_rate": 0.00019999999971918913, - "loss": 46.0, - "step": 322 - }, - { - "epoch": 0.024695605634879676, - "grad_norm": 4.1005736420629546e-05, - "learning_rate": 0.00019999999971738618, - "loss": 46.0, - "step": 323 - }, - { - "epoch": 0.024772062618269395, - "grad_norm": 3.6239729524822906e-05, - "learning_rate": 0.00019999999971557746, - "loss": 46.0, - "step": 324 - }, - { - "epoch": 0.024848519601659118, - "grad_norm": 5.612520908471197e-05, - "learning_rate": 0.00019999999971376297, - "loss": 46.0, - "step": 325 - }, - { - "epoch": 0.024924976585048837, - "grad_norm": 4.01418365072459e-05, - "learning_rate": 0.0001999999997119427, - "loss": 46.0, - "step": 326 - }, - { - "epoch": 0.025001433568438557, - "grad_norm": 7.070326682878658e-05, - "learning_rate": 0.00019999999971011666, - "loss": 46.0, - "step": 327 - }, - { - "epoch": 0.025077890551828276, - "grad_norm": 2.9590713893412612e-05, - "learning_rate": 0.00019999999970828487, - "loss": 46.0, - "step": 328 - }, - { - "epoch": 0.025154347535218, - "grad_norm": 4.511591396294534e-05, - "learning_rate": 0.00019999999970644728, - "loss": 46.0, - "step": 329 - }, - { - "epoch": 0.02523080451860772, - "grad_norm": 5.778960985480808e-05, - "learning_rate": 0.00019999999970460395, - "loss": 46.0, - "step": 330 - }, - { - "epoch": 0.025307261501997438, - "grad_norm": 5.33730853931047e-05, - "learning_rate": 0.00019999999970275484, - "loss": 46.0, - "step": 331 - }, - { - "epoch": 0.02538371848538716, - "grad_norm": 5.5860473366919905e-05, - "learning_rate": 0.00019999999970089996, - "loss": 46.0, - "step": 332 - }, - { - "epoch": 0.02546017546877688, - "grad_norm": 3.790226764976978e-05, - "learning_rate": 0.00019999999969903934, - "loss": 46.0, - "step": 333 - }, - { - "epoch": 0.0255366324521666, - "grad_norm": 2.741688149399124e-05, - "learning_rate": 0.0001999999996971729, - "loss": 46.0, - "step": 334 - }, - { - "epoch": 0.02561308943555632, - "grad_norm": 5.0812501285690814e-05, - "learning_rate": 0.0001999999996953007, - "loss": 46.0, - "step": 335 - }, - { - "epoch": 0.02568954641894604, - "grad_norm": 0.000129085558000952, - "learning_rate": 0.00019999999969342276, - "loss": 46.0, - "step": 336 - }, - { - "epoch": 0.02576600340233576, - "grad_norm": 8.084265573415905e-05, - "learning_rate": 0.00019999999969153902, - "loss": 46.0, - "step": 337 - }, - { - "epoch": 0.02584246038572548, - "grad_norm": 3.692197424243204e-05, - "learning_rate": 0.00019999999968964952, - "loss": 46.0, - "step": 338 - }, - { - "epoch": 0.0259189173691152, - "grad_norm": 4.907184120384045e-05, - "learning_rate": 0.00019999999968775426, - "loss": 46.0, - "step": 339 - }, - { - "epoch": 0.025995374352504923, - "grad_norm": 8.835240441840142e-05, - "learning_rate": 0.00019999999968585324, - "loss": 46.0, - "step": 340 - }, - { - "epoch": 0.026071831335894642, - "grad_norm": 3.8036225305404514e-05, - "learning_rate": 0.00019999999968394643, - "loss": 46.0, - "step": 341 - }, - { - "epoch": 0.02614828831928436, - "grad_norm": 2.0803265215363353e-05, - "learning_rate": 0.00019999999968203387, - "loss": 46.0, - "step": 342 - }, - { - "epoch": 0.026224745302674084, - "grad_norm": 5.061951742391102e-05, - "learning_rate": 0.0001999999996801155, - "loss": 46.0, - "step": 343 - }, - { - "epoch": 0.026301202286063804, - "grad_norm": 2.6827849069377407e-05, - "learning_rate": 0.00019999999967819138, - "loss": 46.0, - "step": 344 - }, - { - "epoch": 0.026377659269453523, - "grad_norm": 0.00010376806312706321, - "learning_rate": 0.0001999999996762615, - "loss": 46.0, - "step": 345 - }, - { - "epoch": 0.026454116252843243, - "grad_norm": 3.130981349386275e-05, - "learning_rate": 0.00019999999967432584, - "loss": 46.0, - "step": 346 - }, - { - "epoch": 0.026530573236232965, - "grad_norm": 6.62563179503195e-05, - "learning_rate": 0.00019999999967238444, - "loss": 46.0, - "step": 347 - }, - { - "epoch": 0.026607030219622685, - "grad_norm": 5.259773388388567e-05, - "learning_rate": 0.00019999999967043724, - "loss": 46.0, - "step": 348 - }, - { - "epoch": 0.026683487203012404, - "grad_norm": 2.115760071319528e-05, - "learning_rate": 0.00019999999966848427, - "loss": 46.0, - "step": 349 - }, - { - "epoch": 0.026759944186402127, - "grad_norm": 8.141921716742218e-05, - "learning_rate": 0.00019999999966652555, - "loss": 46.0, - "step": 350 - }, - { - "epoch": 0.026836401169791847, - "grad_norm": 5.7557695981813595e-05, - "learning_rate": 0.00019999999966456106, - "loss": 46.0, - "step": 351 - }, - { - "epoch": 0.026912858153181566, - "grad_norm": 3.140844273730181e-05, - "learning_rate": 0.0001999999996625908, - "loss": 46.0, - "step": 352 - }, - { - "epoch": 0.026989315136571285, - "grad_norm": 0.0001673676015343517, - "learning_rate": 0.00019999999966061475, - "loss": 46.0, - "step": 353 - }, - { - "epoch": 0.027065772119961008, - "grad_norm": 2.577383020252455e-05, - "learning_rate": 0.00019999999965863294, - "loss": 46.0, - "step": 354 - }, - { - "epoch": 0.027142229103350728, - "grad_norm": 4.645596709451638e-05, - "learning_rate": 0.00019999999965664538, - "loss": 46.0, - "step": 355 - }, - { - "epoch": 0.027218686086740447, - "grad_norm": 4.3698026274796575e-05, - "learning_rate": 0.00019999999965465205, - "loss": 46.0, - "step": 356 - }, - { - "epoch": 0.027295143070130166, - "grad_norm": 5.712471102015115e-05, - "learning_rate": 0.0001999999996526529, - "loss": 46.0, - "step": 357 - }, - { - "epoch": 0.02737160005351989, - "grad_norm": 4.390337198856287e-05, - "learning_rate": 0.000199999999650648, - "loss": 46.0, - "step": 358 - }, - { - "epoch": 0.02744805703690961, - "grad_norm": 7.158845255617052e-05, - "learning_rate": 0.00019999999964863738, - "loss": 46.0, - "step": 359 - }, - { - "epoch": 0.027524514020299328, - "grad_norm": 4.132555841351859e-05, - "learning_rate": 0.00019999999964662095, - "loss": 46.0, - "step": 360 - }, - { - "epoch": 0.02760097100368905, - "grad_norm": 5.734768637921661e-05, - "learning_rate": 0.00019999999964459875, - "loss": 46.0, - "step": 361 - }, - { - "epoch": 0.02767742798707877, - "grad_norm": 5.01650501973927e-05, - "learning_rate": 0.00019999999964257078, - "loss": 46.0, - "step": 362 - }, - { - "epoch": 0.02775388497046849, - "grad_norm": 6.403920997399837e-05, - "learning_rate": 0.00019999999964053706, - "loss": 46.0, - "step": 363 - }, - { - "epoch": 0.02783034195385821, - "grad_norm": 9.236169717041776e-05, - "learning_rate": 0.00019999999963849754, - "loss": 46.0, - "step": 364 - }, - { - "epoch": 0.027906798937247932, - "grad_norm": 0.00011183096648892388, - "learning_rate": 0.00019999999963645228, - "loss": 46.0, - "step": 365 - }, - { - "epoch": 0.02798325592063765, - "grad_norm": 2.7487831175676547e-05, - "learning_rate": 0.00019999999963440124, - "loss": 46.0, - "step": 366 - }, - { - "epoch": 0.02805971290402737, - "grad_norm": 9.670599683886394e-05, - "learning_rate": 0.00019999999963234442, - "loss": 46.0, - "step": 367 - }, - { - "epoch": 0.02813616988741709, - "grad_norm": 9.182581561617553e-05, - "learning_rate": 0.00019999999963028184, - "loss": 46.0, - "step": 368 - }, - { - "epoch": 0.028212626870806813, - "grad_norm": 3.484496846795082e-05, - "learning_rate": 0.0001999999996282135, - "loss": 46.0, - "step": 369 - }, - { - "epoch": 0.028289083854196533, - "grad_norm": 2.1098958313814364e-05, - "learning_rate": 0.00019999999962613937, - "loss": 46.0, - "step": 370 - }, - { - "epoch": 0.028365540837586252, - "grad_norm": 3.1412764656124637e-05, - "learning_rate": 0.0001999999996240595, - "loss": 46.0, - "step": 371 - }, - { - "epoch": 0.028441997820975975, - "grad_norm": 7.061860378598794e-05, - "learning_rate": 0.0001999999996219738, - "loss": 46.0, - "step": 372 - }, - { - "epoch": 0.028518454804365694, - "grad_norm": 0.00010549635771894827, - "learning_rate": 0.00019999999961988241, - "loss": 46.0, - "step": 373 - }, - { - "epoch": 0.028594911787755414, - "grad_norm": 3.301572360214777e-05, - "learning_rate": 0.00019999999961778521, - "loss": 46.0, - "step": 374 - }, - { - "epoch": 0.028671368771145133, - "grad_norm": 3.148805626551621e-05, - "learning_rate": 0.00019999999961568224, - "loss": 46.0, - "step": 375 - }, - { - "epoch": 0.028747825754534856, - "grad_norm": 0.00010135354386875406, - "learning_rate": 0.00019999999961357352, - "loss": 46.0, - "step": 376 - }, - { - "epoch": 0.028824282737924575, - "grad_norm": 2.105231942550745e-05, - "learning_rate": 0.00019999999961145898, - "loss": 46.0, - "step": 377 - }, - { - "epoch": 0.028900739721314295, - "grad_norm": 3.620113420765847e-05, - "learning_rate": 0.00019999999960933874, - "loss": 46.0, - "step": 378 - }, - { - "epoch": 0.028977196704704018, - "grad_norm": 5.260032776277512e-05, - "learning_rate": 0.0001999999996072127, - "loss": 46.0, - "step": 379 - }, - { - "epoch": 0.029053653688093737, - "grad_norm": 9.694312029751018e-05, - "learning_rate": 0.00019999999960508086, - "loss": 46.0, - "step": 380 - }, - { - "epoch": 0.029130110671483456, - "grad_norm": 6.499604205600917e-05, - "learning_rate": 0.0001999999996029433, - "loss": 46.0, - "step": 381 - }, - { - "epoch": 0.029206567654873176, - "grad_norm": 2.0910307284793817e-05, - "learning_rate": 0.00019999999960079994, - "loss": 46.0, - "step": 382 - }, - { - "epoch": 0.0292830246382629, - "grad_norm": 7.609938620589674e-05, - "learning_rate": 0.0001999999995986508, - "loss": 46.0, - "step": 383 - }, - { - "epoch": 0.029359481621652618, - "grad_norm": 6.608160038013011e-05, - "learning_rate": 0.00019999999959649593, - "loss": 46.0, - "step": 384 - }, - { - "epoch": 0.029435938605042337, - "grad_norm": 4.28716630267445e-05, - "learning_rate": 0.00019999999959433525, - "loss": 46.0, - "step": 385 - }, - { - "epoch": 0.029512395588432057, - "grad_norm": 3.7647692806785926e-05, - "learning_rate": 0.00019999999959216882, - "loss": 46.0, - "step": 386 - }, - { - "epoch": 0.02958885257182178, - "grad_norm": 3.8156184018589556e-05, - "learning_rate": 0.00019999999958999665, - "loss": 46.0, - "step": 387 - }, - { - "epoch": 0.0296653095552115, - "grad_norm": 3.738639134098776e-05, - "learning_rate": 0.00019999999958781865, - "loss": 46.0, - "step": 388 - }, - { - "epoch": 0.02974176653860122, - "grad_norm": 6.814857624704018e-05, - "learning_rate": 0.00019999999958563493, - "loss": 46.0, - "step": 389 - }, - { - "epoch": 0.02981822352199094, - "grad_norm": 3.3049222111003473e-05, - "learning_rate": 0.00019999999958344541, - "loss": 46.0, - "step": 390 - }, - { - "epoch": 0.02989468050538066, - "grad_norm": 6.526375364046544e-05, - "learning_rate": 0.00019999999958125012, - "loss": 46.0, - "step": 391 - }, - { - "epoch": 0.02997113748877038, - "grad_norm": 5.8769906900124624e-05, - "learning_rate": 0.00019999999957904908, - "loss": 46.0, - "step": 392 - }, - { - "epoch": 0.0300475944721601, - "grad_norm": 0.00011813946912297979, - "learning_rate": 0.00019999999957684227, - "loss": 46.0, - "step": 393 - }, - { - "epoch": 0.030124051455549822, - "grad_norm": 5.27090705872979e-05, - "learning_rate": 0.00019999999957462969, - "loss": 46.0, - "step": 394 - }, - { - "epoch": 0.030200508438939542, - "grad_norm": 7.135706255212426e-05, - "learning_rate": 0.00019999999957241133, - "loss": 46.0, - "step": 395 - }, - { - "epoch": 0.03027696542232926, - "grad_norm": 0.00012790884647984058, - "learning_rate": 0.0001999999995701872, - "loss": 46.0, - "step": 396 - }, - { - "epoch": 0.03035342240571898, - "grad_norm": 0.00010968629794660956, - "learning_rate": 0.00019999999956795732, - "loss": 46.0, - "step": 397 - }, - { - "epoch": 0.030429879389108704, - "grad_norm": 7.167622970882803e-05, - "learning_rate": 0.00019999999956572166, - "loss": 46.0, - "step": 398 - }, - { - "epoch": 0.030506336372498423, - "grad_norm": 0.0002279918553540483, - "learning_rate": 0.0001999999995634802, - "loss": 46.0, - "step": 399 - }, - { - "epoch": 0.030582793355888142, - "grad_norm": 4.593819903675467e-05, - "learning_rate": 0.00019999999956123304, - "loss": 46.0, - "step": 400 - }, - { - "epoch": 0.030659250339277865, - "grad_norm": 5.459985914058052e-05, - "learning_rate": 0.00019999999955898004, - "loss": 46.0, - "step": 401 - }, - { - "epoch": 0.030735707322667585, - "grad_norm": 8.810993313090876e-05, - "learning_rate": 0.0001999999995567213, - "loss": 46.0, - "step": 402 - }, - { - "epoch": 0.030812164306057304, - "grad_norm": 4.9673231842461973e-05, - "learning_rate": 0.0001999999995544568, - "loss": 46.0, - "step": 403 - }, - { - "epoch": 0.030888621289447023, - "grad_norm": 0.000129247346194461, - "learning_rate": 0.0001999999995521865, - "loss": 46.0, - "step": 404 - }, - { - "epoch": 0.030965078272836746, - "grad_norm": 5.8459922001929954e-05, - "learning_rate": 0.00019999999954991047, - "loss": 46.0, - "step": 405 - }, - { - "epoch": 0.031041535256226466, - "grad_norm": 5.5456195696024224e-05, - "learning_rate": 0.00019999999954762863, - "loss": 46.0, - "step": 406 - }, - { - "epoch": 0.031117992239616185, - "grad_norm": 9.113064152188599e-05, - "learning_rate": 0.00019999999954534105, - "loss": 46.0, - "step": 407 - }, - { - "epoch": 0.031194449223005908, - "grad_norm": 9.12557152332738e-05, - "learning_rate": 0.00019999999954304772, - "loss": 46.0, - "step": 408 - }, - { - "epoch": 0.031270906206395624, - "grad_norm": 3.194018790964037e-05, - "learning_rate": 0.00019999999954074858, - "loss": 46.0, - "step": 409 - }, - { - "epoch": 0.03134736318978535, - "grad_norm": 5.113500446896069e-05, - "learning_rate": 0.00019999999953844365, - "loss": 46.0, - "step": 410 - }, - { - "epoch": 0.03142382017317507, - "grad_norm": 6.490101804956794e-05, - "learning_rate": 0.00019999999953613303, - "loss": 46.0, - "step": 411 - }, - { - "epoch": 0.03150027715656479, - "grad_norm": 8.563043957110494e-05, - "learning_rate": 0.00019999999953381658, - "loss": 46.0, - "step": 412 - }, - { - "epoch": 0.03157673413995451, - "grad_norm": 6.562795169884339e-05, - "learning_rate": 0.00019999999953149438, - "loss": 46.0, - "step": 413 - }, - { - "epoch": 0.03165319112334423, - "grad_norm": 4.411544796312228e-05, - "learning_rate": 0.0001999999995291664, - "loss": 46.0, - "step": 414 - }, - { - "epoch": 0.03172964810673395, - "grad_norm": 8.868756412994117e-05, - "learning_rate": 0.00019999999952683263, - "loss": 46.0, - "step": 415 - }, - { - "epoch": 0.03180610509012367, - "grad_norm": 0.0001218832258018665, - "learning_rate": 0.00019999999952449314, - "loss": 46.0, - "step": 416 - }, - { - "epoch": 0.03188256207351339, - "grad_norm": 8.545455057173967e-05, - "learning_rate": 0.00019999999952214785, - "loss": 46.0, - "step": 417 - }, - { - "epoch": 0.03195901905690311, - "grad_norm": 5.497948586707935e-05, - "learning_rate": 0.0001999999995197968, - "loss": 46.0, - "step": 418 - }, - { - "epoch": 0.03203547604029283, - "grad_norm": 5.394974505179562e-05, - "learning_rate": 0.00019999999951743997, - "loss": 46.0, - "step": 419 - }, - { - "epoch": 0.03211193302368255, - "grad_norm": 5.120428977534175e-05, - "learning_rate": 0.00019999999951507736, - "loss": 46.0, - "step": 420 - }, - { - "epoch": 0.03218839000707227, - "grad_norm": 4.472916043596342e-05, - "learning_rate": 0.00019999999951270903, - "loss": 46.0, - "step": 421 - }, - { - "epoch": 0.03226484699046199, - "grad_norm": 0.00014817029295954853, - "learning_rate": 0.0001999999995103349, - "loss": 46.0, - "step": 422 - }, - { - "epoch": 0.03234130397385171, - "grad_norm": 7.525101682404056e-05, - "learning_rate": 0.000199999999507955, - "loss": 46.0, - "step": 423 - }, - { - "epoch": 0.03241776095724143, - "grad_norm": 0.0001744249602779746, - "learning_rate": 0.00019999999950556934, - "loss": 46.0, - "step": 424 - }, - { - "epoch": 0.032494217940631155, - "grad_norm": 5.363115997170098e-05, - "learning_rate": 0.0001999999995031779, - "loss": 46.0, - "step": 425 - }, - { - "epoch": 0.032570674924020875, - "grad_norm": 9.20443344512023e-05, - "learning_rate": 0.00019999999950078066, - "loss": 46.0, - "step": 426 - }, - { - "epoch": 0.032647131907410594, - "grad_norm": 6.859291897853836e-05, - "learning_rate": 0.00019999999949837772, - "loss": 46.0, - "step": 427 - }, - { - "epoch": 0.03272358889080031, - "grad_norm": 3.25442997564096e-05, - "learning_rate": 0.00019999999949596898, - "loss": 46.0, - "step": 428 - }, - { - "epoch": 0.03280004587419003, - "grad_norm": 0.00013730718637816608, - "learning_rate": 0.00019999999949355443, - "loss": 46.0, - "step": 429 - }, - { - "epoch": 0.03287650285757975, - "grad_norm": 0.00010657977691153064, - "learning_rate": 0.00019999999949113414, - "loss": 46.0, - "step": 430 - }, - { - "epoch": 0.03295295984096947, - "grad_norm": 8.789327694103122e-05, - "learning_rate": 0.0001999999994887081, - "loss": 46.0, - "step": 431 - }, - { - "epoch": 0.0330294168243592, - "grad_norm": 3.960489993914962e-05, - "learning_rate": 0.0001999999994862763, - "loss": 46.0, - "step": 432 - }, - { - "epoch": 0.03310587380774892, - "grad_norm": 0.00012432123185135424, - "learning_rate": 0.00019999999948383868, - "loss": 46.0, - "step": 433 - }, - { - "epoch": 0.03318233079113864, - "grad_norm": 7.518427446484566e-05, - "learning_rate": 0.00019999999948139532, - "loss": 46.0, - "step": 434 - }, - { - "epoch": 0.033258787774528356, - "grad_norm": 5.9240293921902776e-05, - "learning_rate": 0.0001999999994789462, - "loss": 46.0, - "step": 435 - }, - { - "epoch": 0.033335244757918076, - "grad_norm": 7.50075196265243e-05, - "learning_rate": 0.00019999999947649126, - "loss": 46.0, - "step": 436 - }, - { - "epoch": 0.033411701741307795, - "grad_norm": 9.26538705243729e-05, - "learning_rate": 0.0001999999994740306, - "loss": 46.0, - "step": 437 - }, - { - "epoch": 0.033488158724697514, - "grad_norm": 8.948218601290137e-05, - "learning_rate": 0.00019999999947156415, - "loss": 46.0, - "step": 438 - }, - { - "epoch": 0.03356461570808724, - "grad_norm": 7.06083737895824e-05, - "learning_rate": 0.00019999999946909196, - "loss": 46.0, - "step": 439 - }, - { - "epoch": 0.03364107269147696, - "grad_norm": 6.116050644777715e-05, - "learning_rate": 0.00019999999946661396, - "loss": 46.0, - "step": 440 - }, - { - "epoch": 0.03371752967486668, - "grad_norm": 8.292579150293022e-05, - "learning_rate": 0.0001999999994641302, - "loss": 46.0, - "step": 441 - }, - { - "epoch": 0.0337939866582564, - "grad_norm": 0.0001200176848215051, - "learning_rate": 0.0001999999994616407, - "loss": 46.0, - "step": 442 - }, - { - "epoch": 0.03387044364164612, - "grad_norm": 6.51444643153809e-05, - "learning_rate": 0.0001999999994591454, - "loss": 46.0, - "step": 443 - }, - { - "epoch": 0.03394690062503584, - "grad_norm": 6.607238901779056e-05, - "learning_rate": 0.00019999999945664437, - "loss": 46.0, - "step": 444 - }, - { - "epoch": 0.03402335760842556, - "grad_norm": 7.727661431999877e-05, - "learning_rate": 0.00019999999945413755, - "loss": 46.0, - "step": 445 - }, - { - "epoch": 0.034099814591815276, - "grad_norm": 0.00010184628627030179, - "learning_rate": 0.00019999999945162494, - "loss": 46.0, - "step": 446 - }, - { - "epoch": 0.034176271575205, - "grad_norm": 7.788404764141887e-05, - "learning_rate": 0.00019999999944910656, - "loss": 46.0, - "step": 447 - }, - { - "epoch": 0.03425272855859472, - "grad_norm": 0.0001848977553891018, - "learning_rate": 0.00019999999944658243, - "loss": 46.0, - "step": 448 - }, - { - "epoch": 0.03432918554198444, - "grad_norm": 4.975163392373361e-05, - "learning_rate": 0.00019999999944405252, - "loss": 46.0, - "step": 449 - }, - { - "epoch": 0.03440564252537416, - "grad_norm": 6.76798154017888e-05, - "learning_rate": 0.00019999999944151685, - "loss": 46.0, - "step": 450 - }, - { - "epoch": 0.03448209950876388, - "grad_norm": 6.532653787871823e-05, - "learning_rate": 0.0001999999994389754, - "loss": 46.0, - "step": 451 - }, - { - "epoch": 0.0345585564921536, - "grad_norm": 6.454264075728133e-05, - "learning_rate": 0.0001999999994364282, - "loss": 46.0, - "step": 452 - }, - { - "epoch": 0.03463501347554332, - "grad_norm": 0.00014598850975744426, - "learning_rate": 0.00019999999943387523, - "loss": 46.0, - "step": 453 - }, - { - "epoch": 0.034711470458933046, - "grad_norm": 0.00027584374765865505, - "learning_rate": 0.00019999999943131645, - "loss": 46.0, - "step": 454 - }, - { - "epoch": 0.034787927442322765, - "grad_norm": 9.978777234209701e-05, - "learning_rate": 0.00019999999942875194, - "loss": 46.0, - "step": 455 - }, - { - "epoch": 0.034864384425712484, - "grad_norm": 7.483481749659404e-05, - "learning_rate": 0.00019999999942618165, - "loss": 46.0, - "step": 456 - }, - { - "epoch": 0.034940841409102204, - "grad_norm": 4.1194238292519e-05, - "learning_rate": 0.00019999999942360555, - "loss": 46.0, - "step": 457 - }, - { - "epoch": 0.03501729839249192, - "grad_norm": 0.00022903215722180903, - "learning_rate": 0.00019999999942102374, - "loss": 46.0, - "step": 458 - }, - { - "epoch": 0.03509375537588164, - "grad_norm": 0.0002285886148456484, - "learning_rate": 0.00019999999941843613, - "loss": 46.0, - "step": 459 - }, - { - "epoch": 0.03517021235927136, - "grad_norm": 9.704591502668336e-05, - "learning_rate": 0.00019999999941584278, - "loss": 46.0, - "step": 460 - }, - { - "epoch": 0.03524666934266109, - "grad_norm": 4.935009201290086e-05, - "learning_rate": 0.00019999999941324364, - "loss": 46.0, - "step": 461 - }, - { - "epoch": 0.03532312632605081, - "grad_norm": 6.34451862424612e-05, - "learning_rate": 0.00019999999941063874, - "loss": 46.0, - "step": 462 - }, - { - "epoch": 0.03539958330944053, - "grad_norm": 6.689593283226714e-05, - "learning_rate": 0.00019999999940802804, - "loss": 46.0, - "step": 463 - }, - { - "epoch": 0.035476040292830247, - "grad_norm": 0.00016250558837782592, - "learning_rate": 0.00019999999940541158, - "loss": 46.0, - "step": 464 - }, - { - "epoch": 0.035552497276219966, - "grad_norm": 7.079118222463876e-05, - "learning_rate": 0.00019999999940278936, - "loss": 46.0, - "step": 465 - }, - { - "epoch": 0.035628954259609685, - "grad_norm": 7.57503803470172e-05, - "learning_rate": 0.0001999999994001614, - "loss": 46.0, - "step": 466 - }, - { - "epoch": 0.035705411242999405, - "grad_norm": 9.18152800295502e-05, - "learning_rate": 0.00019999999939752765, - "loss": 46.0, - "step": 467 - }, - { - "epoch": 0.03578186822638913, - "grad_norm": 8.724846702534705e-05, - "learning_rate": 0.00019999999939488813, - "loss": 46.0, - "step": 468 - }, - { - "epoch": 0.03585832520977885, - "grad_norm": 0.00019557027553673834, - "learning_rate": 0.00019999999939224284, - "loss": 46.0, - "step": 469 - }, - { - "epoch": 0.03593478219316857, - "grad_norm": 0.00016852102999109775, - "learning_rate": 0.00019999999938959175, - "loss": 46.0, - "step": 470 - }, - { - "epoch": 0.03601123917655829, - "grad_norm": 9.384356235386804e-05, - "learning_rate": 0.0001999999993869349, - "loss": 46.0, - "step": 471 - }, - { - "epoch": 0.03608769615994801, - "grad_norm": 4.3559190089581534e-05, - "learning_rate": 0.00019999999938427233, - "loss": 46.0, - "step": 472 - }, - { - "epoch": 0.03616415314333773, - "grad_norm": 9.730083547765389e-05, - "learning_rate": 0.00019999999938160397, - "loss": 46.0, - "step": 473 - }, - { - "epoch": 0.03624061012672745, - "grad_norm": 9.731669706525281e-05, - "learning_rate": 0.00019999999937892978, - "loss": 46.0, - "step": 474 - }, - { - "epoch": 0.03631706711011717, - "grad_norm": 8.360798528883606e-05, - "learning_rate": 0.00019999999937624988, - "loss": 46.0, - "step": 475 - }, - { - "epoch": 0.03639352409350689, - "grad_norm": 8.222902397392318e-05, - "learning_rate": 0.00019999999937356423, - "loss": 46.0, - "step": 476 - }, - { - "epoch": 0.03646998107689661, - "grad_norm": 0.00017370429122820497, - "learning_rate": 0.00019999999937087278, - "loss": 46.0, - "step": 477 - }, - { - "epoch": 0.03654643806028633, - "grad_norm": 0.00026396807515993714, - "learning_rate": 0.00019999999936817556, - "loss": 46.0, - "step": 478 - }, - { - "epoch": 0.03662289504367605, - "grad_norm": 7.789378287270665e-05, - "learning_rate": 0.00019999999936547256, - "loss": 46.0, - "step": 479 - }, - { - "epoch": 0.03669935202706577, - "grad_norm": 0.00016577373025938869, - "learning_rate": 0.0001999999993627638, - "loss": 46.0, - "step": 480 - }, - { - "epoch": 0.03677580901045549, - "grad_norm": 0.00011259245366090909, - "learning_rate": 0.00019999999936004927, - "loss": 46.0, - "step": 481 - }, - { - "epoch": 0.03685226599384521, - "grad_norm": 0.0003629502316471189, - "learning_rate": 0.00019999999935732898, - "loss": 46.0, - "step": 482 - }, - { - "epoch": 0.036928722977234936, - "grad_norm": 0.00021626398665830493, - "learning_rate": 0.0001999999993546029, - "loss": 46.0, - "step": 483 - }, - { - "epoch": 0.037005179960624655, - "grad_norm": 0.00010009731340687722, - "learning_rate": 0.00019999999935187108, - "loss": 46.0, - "step": 484 - }, - { - "epoch": 0.037081636944014375, - "grad_norm": 8.321260975208133e-05, - "learning_rate": 0.00019999999934913344, - "loss": 46.0, - "step": 485 - }, - { - "epoch": 0.037158093927404094, - "grad_norm": 7.356579590123147e-05, - "learning_rate": 0.00019999999934639009, - "loss": 46.0, - "step": 486 - }, - { - "epoch": 0.037234550910793814, - "grad_norm": 9.992928971769288e-05, - "learning_rate": 0.00019999999934364096, - "loss": 46.0, - "step": 487 - }, - { - "epoch": 0.03731100789418353, - "grad_norm": 0.00011778913903981447, - "learning_rate": 0.00019999999934088602, - "loss": 46.0, - "step": 488 - }, - { - "epoch": 0.03738746487757325, - "grad_norm": 0.00021629378898069263, - "learning_rate": 0.00019999999933812535, - "loss": 46.0, - "step": 489 - }, - { - "epoch": 0.03746392186096298, - "grad_norm": 0.00011395724868634716, - "learning_rate": 0.0001999999993353589, - "loss": 46.0, - "step": 490 - }, - { - "epoch": 0.0375403788443527, - "grad_norm": 0.0001429607073077932, - "learning_rate": 0.00019999999933258667, - "loss": 46.0, - "step": 491 - }, - { - "epoch": 0.03761683582774242, - "grad_norm": 9.531518298899755e-05, - "learning_rate": 0.00019999999932980868, - "loss": 46.0, - "step": 492 - }, - { - "epoch": 0.03769329281113214, - "grad_norm": 7.5924486736767e-05, - "learning_rate": 0.0001999999993270249, - "loss": 46.0, - "step": 493 - }, - { - "epoch": 0.037769749794521856, - "grad_norm": 0.00014676700811833143, - "learning_rate": 0.0001999999993242354, - "loss": 46.0, - "step": 494 - }, - { - "epoch": 0.037846206777911576, - "grad_norm": 0.00011669647938106209, - "learning_rate": 0.0001999999993214401, - "loss": 46.0, - "step": 495 - }, - { - "epoch": 0.037922663761301295, - "grad_norm": 0.0001441106724087149, - "learning_rate": 0.000199999999318639, - "loss": 46.0, - "step": 496 - }, - { - "epoch": 0.03799912074469102, - "grad_norm": 8.739835175219923e-05, - "learning_rate": 0.00019999999931583217, - "loss": 46.0, - "step": 497 - }, - { - "epoch": 0.03807557772808074, - "grad_norm": 0.00025402920437045395, - "learning_rate": 0.00019999999931301956, - "loss": 46.0, - "step": 498 - }, - { - "epoch": 0.03815203471147046, - "grad_norm": 0.0001071511724148877, - "learning_rate": 0.00019999999931020118, - "loss": 46.0, - "step": 499 - }, - { - "epoch": 0.03822849169486018, - "grad_norm": 0.00023775412410032004, - "learning_rate": 0.00019999999930737702, - "loss": 46.0, - "step": 500 - }, - { - "epoch": 0.0383049486782499, - "grad_norm": 5.465760477818549e-05, - "learning_rate": 0.00019999999930454712, - "loss": 46.0, - "step": 501 - }, - { - "epoch": 0.03838140566163962, - "grad_norm": 0.00017646980995777994, - "learning_rate": 0.00019999999930171141, - "loss": 46.0, - "step": 502 - }, - { - "epoch": 0.03845786264502934, - "grad_norm": 7.21248215995729e-05, - "learning_rate": 0.00019999999929886996, - "loss": 46.0, - "step": 503 - }, - { - "epoch": 0.03853431962841906, - "grad_norm": 0.0001972862664842978, - "learning_rate": 0.00019999999929602277, - "loss": 46.0, - "step": 504 - }, - { - "epoch": 0.038610776611808784, - "grad_norm": 0.0003103954659309238, - "learning_rate": 0.00019999999929316977, - "loss": 46.0, - "step": 505 - }, - { - "epoch": 0.0386872335951985, - "grad_norm": 0.00019607577996794134, - "learning_rate": 0.000199999999290311, - "loss": 46.0, - "step": 506 - }, - { - "epoch": 0.03876369057858822, - "grad_norm": 0.00010769756772788242, - "learning_rate": 0.00019999999928744646, - "loss": 46.0, - "step": 507 - }, - { - "epoch": 0.03884014756197794, - "grad_norm": 0.0003284113190602511, - "learning_rate": 0.00019999999928457617, - "loss": 46.0, - "step": 508 - }, - { - "epoch": 0.03891660454536766, - "grad_norm": 0.0001277989213122055, - "learning_rate": 0.00019999999928170008, - "loss": 46.0, - "step": 509 - }, - { - "epoch": 0.03899306152875738, - "grad_norm": 0.00020971034246031195, - "learning_rate": 0.00019999999927881824, - "loss": 46.0, - "step": 510 - }, - { - "epoch": 0.0390695185121471, - "grad_norm": 6.318332452792674e-05, - "learning_rate": 0.00019999999927593063, - "loss": 46.0, - "step": 511 - }, - { - "epoch": 0.039145975495536826, - "grad_norm": 8.429765148321167e-05, - "learning_rate": 0.00019999999927303725, - "loss": 46.0, - "step": 512 - }, - { - "epoch": 0.039222432478926546, - "grad_norm": 0.0002804486430250108, - "learning_rate": 0.0001999999992701381, - "loss": 46.0, - "step": 513 - }, - { - "epoch": 0.039298889462316265, - "grad_norm": 9.464767936151475e-05, - "learning_rate": 0.0001999999992672332, - "loss": 46.0, - "step": 514 - }, - { - "epoch": 0.039375346445705985, - "grad_norm": 0.0003463878238108009, - "learning_rate": 0.00019999999926432249, - "loss": 46.0, - "step": 515 - }, - { - "epoch": 0.039451803429095704, - "grad_norm": 3.753870623768307e-05, - "learning_rate": 0.00019999999926140604, - "loss": 46.0, - "step": 516 - }, - { - "epoch": 0.03952826041248542, - "grad_norm": 0.00020038125512655824, - "learning_rate": 0.00019999999925848379, - "loss": 46.0, - "step": 517 - }, - { - "epoch": 0.03960471739587514, - "grad_norm": 0.00027526941266842186, - "learning_rate": 0.00019999999925555582, - "loss": 46.0, - "step": 518 - }, - { - "epoch": 0.03968117437926487, - "grad_norm": 0.000571784854400903, - "learning_rate": 0.00019999999925262202, - "loss": 46.0, - "step": 519 - }, - { - "epoch": 0.03975763136265459, - "grad_norm": 0.00020035584748256952, - "learning_rate": 0.0001999999992496825, - "loss": 46.0, - "step": 520 - }, - { - "epoch": 0.03983408834604431, - "grad_norm": 0.00019171522581018507, - "learning_rate": 0.0001999999992467372, - "loss": 46.0, - "step": 521 - }, - { - "epoch": 0.03991054532943403, - "grad_norm": 0.00016133245662786067, - "learning_rate": 0.00019999999924378612, - "loss": 46.0, - "step": 522 - }, - { - "epoch": 0.03998700231282375, - "grad_norm": 0.0001265967875951901, - "learning_rate": 0.0001999999992408293, - "loss": 46.0, - "step": 523 - }, - { - "epoch": 0.040063459296213466, - "grad_norm": 0.00018212999566458166, - "learning_rate": 0.00019999999923786665, - "loss": 46.0, - "step": 524 - }, - { - "epoch": 0.040139916279603186, - "grad_norm": 0.00015810677723493427, - "learning_rate": 0.00019999999923489827, - "loss": 46.0, - "step": 525 - }, - { - "epoch": 0.04021637326299291, - "grad_norm": 0.0003167934773955494, - "learning_rate": 0.0001999999992319241, - "loss": 46.0, - "step": 526 - }, - { - "epoch": 0.04029283024638263, - "grad_norm": 0.0001548193395137787, - "learning_rate": 0.00019999999922894418, - "loss": 46.0, - "step": 527 - }, - { - "epoch": 0.04036928722977235, - "grad_norm": 0.0002268975949846208, - "learning_rate": 0.0001999999992259585, - "loss": 46.0, - "step": 528 - }, - { - "epoch": 0.04044574421316207, - "grad_norm": 0.0004839841276407242, - "learning_rate": 0.00019999999922296703, - "loss": 46.0, - "step": 529 - }, - { - "epoch": 0.04052220119655179, - "grad_norm": 0.00027105200570076704, - "learning_rate": 0.00019999999921996978, - "loss": 46.0, - "step": 530 - }, - { - "epoch": 0.04059865817994151, - "grad_norm": 0.0003971451660618186, - "learning_rate": 0.0001999999992169668, - "loss": 46.0, - "step": 531 - }, - { - "epoch": 0.04067511516333123, - "grad_norm": 0.00019004048954229802, - "learning_rate": 0.00019999999921395802, - "loss": 46.0, - "step": 532 - }, - { - "epoch": 0.04075157214672095, - "grad_norm": 0.00024154713901225477, - "learning_rate": 0.0001999999992109435, - "loss": 46.0, - "step": 533 - }, - { - "epoch": 0.040828029130110674, - "grad_norm": 0.0004077182966284454, - "learning_rate": 0.00019999999920792318, - "loss": 46.0, - "step": 534 - }, - { - "epoch": 0.04090448611350039, - "grad_norm": 0.00010285509779350832, - "learning_rate": 0.0001999999992048971, - "loss": 46.0, - "step": 535 - }, - { - "epoch": 0.04098094309689011, - "grad_norm": 0.00016635113570373505, - "learning_rate": 0.00019999999920186526, - "loss": 46.0, - "step": 536 - }, - { - "epoch": 0.04105740008027983, - "grad_norm": 0.0001531173475086689, - "learning_rate": 0.00019999999919882765, - "loss": 46.0, - "step": 537 - }, - { - "epoch": 0.04113385706366955, - "grad_norm": 0.00016305070312228054, - "learning_rate": 0.00019999999919578424, - "loss": 46.0, - "step": 538 - }, - { - "epoch": 0.04121031404705927, - "grad_norm": 0.0001529300061520189, - "learning_rate": 0.00019999999919273508, - "loss": 46.0, - "step": 539 - }, - { - "epoch": 0.04128677103044899, - "grad_norm": 0.00027484921156428754, - "learning_rate": 0.00019999999918968018, - "loss": 46.0, - "step": 540 - }, - { - "epoch": 0.04136322801383872, - "grad_norm": 0.00030769151635468006, - "learning_rate": 0.00019999999918661948, - "loss": 46.0, - "step": 541 - }, - { - "epoch": 0.041439684997228436, - "grad_norm": 0.0004475169407669455, - "learning_rate": 0.000199999999183553, - "loss": 46.0, - "step": 542 - }, - { - "epoch": 0.041516141980618156, - "grad_norm": 0.0002054843498626724, - "learning_rate": 0.00019999999918048078, - "loss": 46.0, - "step": 543 - }, - { - "epoch": 0.041592598964007875, - "grad_norm": 0.00040626409463584423, - "learning_rate": 0.00019999999917740278, - "loss": 46.0, - "step": 544 - }, - { - "epoch": 0.041669055947397594, - "grad_norm": 0.0003186376125086099, - "learning_rate": 0.000199999999174319, - "loss": 46.0, - "step": 545 - }, - { - "epoch": 0.041745512930787314, - "grad_norm": 0.00023347382375504822, - "learning_rate": 0.00019999999917122945, - "loss": 46.0, - "step": 546 - }, - { - "epoch": 0.04182196991417703, - "grad_norm": 7.283046579686925e-05, - "learning_rate": 0.00019999999916813416, - "loss": 46.0, - "step": 547 - }, - { - "epoch": 0.04189842689756676, - "grad_norm": 0.0001351140672340989, - "learning_rate": 0.00019999999916503307, - "loss": 46.0, - "step": 548 - }, - { - "epoch": 0.04197488388095648, - "grad_norm": 0.00022846198407933116, - "learning_rate": 0.00019999999916192623, - "loss": 46.0, - "step": 549 - }, - { - "epoch": 0.0420513408643462, - "grad_norm": 0.00027839007088914514, - "learning_rate": 0.0001999999991588136, - "loss": 46.0, - "step": 550 - }, - { - "epoch": 0.04212779784773592, - "grad_norm": 0.0003140224434901029, - "learning_rate": 0.0001999999991556952, - "loss": 46.0, - "step": 551 - }, - { - "epoch": 0.04220425483112564, - "grad_norm": 0.0001425761729478836, - "learning_rate": 0.00019999999915257103, - "loss": 46.0, - "step": 552 - }, - { - "epoch": 0.04228071181451536, - "grad_norm": 8.557864202884957e-05, - "learning_rate": 0.00019999999914944113, - "loss": 46.0, - "step": 553 - }, - { - "epoch": 0.042357168797905076, - "grad_norm": 0.00045285187661647797, - "learning_rate": 0.00019999999914630543, - "loss": 46.0, - "step": 554 - }, - { - "epoch": 0.0424336257812948, - "grad_norm": 0.0001562376564834267, - "learning_rate": 0.00019999999914316395, - "loss": 46.0, - "step": 555 - }, - { - "epoch": 0.04251008276468452, - "grad_norm": 0.0002117129333782941, - "learning_rate": 0.00019999999914001673, - "loss": 46.0, - "step": 556 - }, - { - "epoch": 0.04258653974807424, - "grad_norm": 0.00028741630376316607, - "learning_rate": 0.00019999999913686368, - "loss": 46.0, - "step": 557 - }, - { - "epoch": 0.04266299673146396, - "grad_norm": 0.00020813473383896053, - "learning_rate": 0.00019999999913370494, - "loss": 46.0, - "step": 558 - }, - { - "epoch": 0.04273945371485368, - "grad_norm": 0.00019847205840051174, - "learning_rate": 0.0001999999991305404, - "loss": 46.0, - "step": 559 - }, - { - "epoch": 0.0428159106982434, - "grad_norm": 0.0001927387056639418, - "learning_rate": 0.00019999999912737005, - "loss": 46.0, - "step": 560 - }, - { - "epoch": 0.04289236768163312, - "grad_norm": 0.00036941171856597066, - "learning_rate": 0.000199999999124194, - "loss": 46.0, - "step": 561 - }, - { - "epoch": 0.04296882466502284, - "grad_norm": 0.0006141941412352026, - "learning_rate": 0.00019999999912101213, - "loss": 46.0, - "step": 562 - }, - { - "epoch": 0.043045281648412564, - "grad_norm": 0.0004593846097122878, - "learning_rate": 0.00019999999911782452, - "loss": 46.0, - "step": 563 - }, - { - "epoch": 0.043121738631802284, - "grad_norm": 0.0013217341620475054, - "learning_rate": 0.0001999999991146311, - "loss": 46.0, - "step": 564 - }, - { - "epoch": 0.043198195615192, - "grad_norm": 0.00018051821098197252, - "learning_rate": 0.00019999999911143195, - "loss": 46.0, - "step": 565 - }, - { - "epoch": 0.04327465259858172, - "grad_norm": 0.00019405983039177954, - "learning_rate": 0.00019999999910822703, - "loss": 46.0, - "step": 566 - }, - { - "epoch": 0.04335110958197144, - "grad_norm": 0.00010190376633545384, - "learning_rate": 0.00019999999910501632, - "loss": 46.0, - "step": 567 - }, - { - "epoch": 0.04342756656536116, - "grad_norm": 0.00011113641085103154, - "learning_rate": 0.00019999999910179985, - "loss": 46.0, - "step": 568 - }, - { - "epoch": 0.04350402354875088, - "grad_norm": 0.00010373206896474585, - "learning_rate": 0.0001999999990985776, - "loss": 46.0, - "step": 569 - }, - { - "epoch": 0.04358048053214061, - "grad_norm": 0.00019257872190792114, - "learning_rate": 0.0001999999990953496, - "loss": 46.0, - "step": 570 - }, - { - "epoch": 0.04365693751553033, - "grad_norm": 0.0003890043299179524, - "learning_rate": 0.0001999999990921158, - "loss": 46.0, - "step": 571 - }, - { - "epoch": 0.043733394498920046, - "grad_norm": 0.0002035276556853205, - "learning_rate": 0.00019999999908887627, - "loss": 46.0, - "step": 572 - }, - { - "epoch": 0.043809851482309765, - "grad_norm": 0.00011801308573922142, - "learning_rate": 0.00019999999908563095, - "loss": 46.0, - "step": 573 - }, - { - "epoch": 0.043886308465699485, - "grad_norm": 0.00014850600564386696, - "learning_rate": 0.00019999999908237986, - "loss": 46.0, - "step": 574 - }, - { - "epoch": 0.043962765449089204, - "grad_norm": 0.0006481784512288868, - "learning_rate": 0.000199999999079123, - "loss": 46.0, - "step": 575 - }, - { - "epoch": 0.044039222432478924, - "grad_norm": 0.0001285257749259472, - "learning_rate": 0.00019999999907586037, - "loss": 46.0, - "step": 576 - }, - { - "epoch": 0.04411567941586865, - "grad_norm": 0.0001004383884719573, - "learning_rate": 0.00019999999907259198, - "loss": 46.0, - "step": 577 - }, - { - "epoch": 0.04419213639925837, - "grad_norm": 0.00016313082596752793, - "learning_rate": 0.0001999999990693178, - "loss": 46.0, - "step": 578 - }, - { - "epoch": 0.04426859338264809, - "grad_norm": 0.00016484205843880773, - "learning_rate": 0.0001999999990660379, - "loss": 46.0, - "step": 579 - }, - { - "epoch": 0.04434505036603781, - "grad_norm": 0.00018597515008877963, - "learning_rate": 0.00019999999906275217, - "loss": 46.0, - "step": 580 - }, - { - "epoch": 0.04442150734942753, - "grad_norm": 0.00018805245053954422, - "learning_rate": 0.00019999999905946072, - "loss": 46.0, - "step": 581 - }, - { - "epoch": 0.04449796433281725, - "grad_norm": 0.0006113880081102252, - "learning_rate": 0.00019999999905616348, - "loss": 46.0, - "step": 582 - }, - { - "epoch": 0.044574421316206966, - "grad_norm": 0.00032206092146225274, - "learning_rate": 0.00019999999905286045, - "loss": 46.0, - "step": 583 - }, - { - "epoch": 0.04465087829959669, - "grad_norm": 0.00019632400653790683, - "learning_rate": 0.00019999999904955166, - "loss": 46.0, - "step": 584 - }, - { - "epoch": 0.04472733528298641, - "grad_norm": 0.00037773430813103914, - "learning_rate": 0.00019999999904623712, - "loss": 46.0, - "step": 585 - }, - { - "epoch": 0.04480379226637613, - "grad_norm": 0.00031085708178579807, - "learning_rate": 0.0001999999990429168, - "loss": 46.0, - "step": 586 - }, - { - "epoch": 0.04488024924976585, - "grad_norm": 0.00029227888444438577, - "learning_rate": 0.0001999999990395907, - "loss": 46.0, - "step": 587 - }, - { - "epoch": 0.04495670623315557, - "grad_norm": 0.0002536651154514402, - "learning_rate": 0.00019999999903625882, - "loss": 46.0, - "step": 588 - }, - { - "epoch": 0.04503316321654529, - "grad_norm": 0.00011346046085236594, - "learning_rate": 0.00019999999903292122, - "loss": 46.0, - "step": 589 - }, - { - "epoch": 0.04510962019993501, - "grad_norm": 0.000156095513375476, - "learning_rate": 0.00019999999902957783, - "loss": 46.0, - "step": 590 - }, - { - "epoch": 0.04518607718332473, - "grad_norm": 0.0003040801966562867, - "learning_rate": 0.00019999999902622865, - "loss": 46.0, - "step": 591 - }, - { - "epoch": 0.045262534166714455, - "grad_norm": 0.00018165202345699072, - "learning_rate": 0.00019999999902287372, - "loss": 46.0, - "step": 592 - }, - { - "epoch": 0.045338991150104174, - "grad_norm": 0.00017982885765377432, - "learning_rate": 0.000199999999019513, - "loss": 46.0, - "step": 593 - }, - { - "epoch": 0.045415448133493894, - "grad_norm": 0.00012296812201384455, - "learning_rate": 0.00019999999901614652, - "loss": 46.0, - "step": 594 - }, - { - "epoch": 0.04549190511688361, - "grad_norm": 0.00031843906617723405, - "learning_rate": 0.00019999999901277427, - "loss": 46.0, - "step": 595 - }, - { - "epoch": 0.04556836210027333, - "grad_norm": 0.00010394735727459192, - "learning_rate": 0.00019999999900939628, - "loss": 46.0, - "step": 596 - }, - { - "epoch": 0.04564481908366305, - "grad_norm": 0.00028753752121701837, - "learning_rate": 0.00019999999900601248, - "loss": 46.0, - "step": 597 - }, - { - "epoch": 0.04572127606705277, - "grad_norm": 0.0002125209430232644, - "learning_rate": 0.00019999999900262294, - "loss": 46.0, - "step": 598 - }, - { - "epoch": 0.0457977330504425, - "grad_norm": 0.0004885608796030283, - "learning_rate": 0.0001999999989992276, - "loss": 46.0, - "step": 599 - }, - { - "epoch": 0.04587419003383222, - "grad_norm": 0.0002425906277494505, - "learning_rate": 0.0001999999989958265, - "loss": 46.0, - "step": 600 - }, - { - "epoch": 0.045950647017221936, - "grad_norm": 0.0008075024816207588, - "learning_rate": 0.00019999999899241962, - "loss": 46.0, - "step": 601 - }, - { - "epoch": 0.046027104000611656, - "grad_norm": 0.0002986732288263738, - "learning_rate": 0.00019999999898900704, - "loss": 46.0, - "step": 602 - }, - { - "epoch": 0.046103560984001375, - "grad_norm": 0.0004090134461876005, - "learning_rate": 0.0001999999989855886, - "loss": 46.0, - "step": 603 - }, - { - "epoch": 0.046180017967391095, - "grad_norm": 0.0003536487347446382, - "learning_rate": 0.00019999999898216445, - "loss": 46.0, - "step": 604 - }, - { - "epoch": 0.046256474950780814, - "grad_norm": 0.00017800123896449804, - "learning_rate": 0.0001999999989787345, - "loss": 46.0, - "step": 605 - }, - { - "epoch": 0.04633293193417054, - "grad_norm": 0.0003169942938257009, - "learning_rate": 0.0001999999989752988, - "loss": 46.0, - "step": 606 - }, - { - "epoch": 0.04640938891756026, - "grad_norm": 0.0005397346685640514, - "learning_rate": 0.00019999999897185732, - "loss": 46.0, - "step": 607 - }, - { - "epoch": 0.04648584590094998, - "grad_norm": 0.00015565697685815394, - "learning_rate": 0.00019999999896841008, - "loss": 46.0, - "step": 608 - }, - { - "epoch": 0.0465623028843397, - "grad_norm": 0.00035490491427481174, - "learning_rate": 0.00019999999896495705, - "loss": 46.0, - "step": 609 - }, - { - "epoch": 0.04663875986772942, - "grad_norm": 0.00017223434406332672, - "learning_rate": 0.00019999999896149826, - "loss": 46.0, - "step": 610 - }, - { - "epoch": 0.04671521685111914, - "grad_norm": 0.00021444361482281238, - "learning_rate": 0.00019999999895803372, - "loss": 46.0, - "step": 611 - }, - { - "epoch": 0.04679167383450886, - "grad_norm": 0.00018947887292597443, - "learning_rate": 0.00019999999895456338, - "loss": 46.0, - "step": 612 - }, - { - "epoch": 0.04686813081789858, - "grad_norm": 0.0005515294033102691, - "learning_rate": 0.00019999999895108726, - "loss": 46.0, - "step": 613 - }, - { - "epoch": 0.0469445878012883, - "grad_norm": 0.0005505699082277715, - "learning_rate": 0.00019999999894760543, - "loss": 46.0, - "step": 614 - }, - { - "epoch": 0.04702104478467802, - "grad_norm": 0.00024242745712399483, - "learning_rate": 0.0001999999989441178, - "loss": 46.0, - "step": 615 - }, - { - "epoch": 0.04709750176806774, - "grad_norm": 0.0003676850174088031, - "learning_rate": 0.00019999999894062439, - "loss": 46.0, - "step": 616 - }, - { - "epoch": 0.04717395875145746, - "grad_norm": 0.00020813284208998084, - "learning_rate": 0.0001999999989371252, - "loss": 46.0, - "step": 617 - }, - { - "epoch": 0.04725041573484718, - "grad_norm": 0.0001982890535145998, - "learning_rate": 0.00019999999893362028, - "loss": 46.0, - "step": 618 - }, - { - "epoch": 0.0473268727182369, - "grad_norm": 0.0007860666955821216, - "learning_rate": 0.00019999999893010955, - "loss": 46.0, - "step": 619 - }, - { - "epoch": 0.04740332970162662, - "grad_norm": 0.00025393030955456197, - "learning_rate": 0.00019999999892659308, - "loss": 46.0, - "step": 620 - }, - { - "epoch": 0.047479786685016345, - "grad_norm": 0.00033899443224072456, - "learning_rate": 0.0001999999989230708, - "loss": 46.0, - "step": 621 - }, - { - "epoch": 0.047556243668406065, - "grad_norm": 0.00021985615603625774, - "learning_rate": 0.0001999999989195428, - "loss": 46.0, - "step": 622 - }, - { - "epoch": 0.047632700651795784, - "grad_norm": 0.00035814798320643604, - "learning_rate": 0.000199999998916009, - "loss": 46.0, - "step": 623 - }, - { - "epoch": 0.0477091576351855, - "grad_norm": 0.0002714464208111167, - "learning_rate": 0.00019999999891246944, - "loss": 46.0, - "step": 624 - }, - { - "epoch": 0.04778561461857522, - "grad_norm": 0.0002546482137404382, - "learning_rate": 0.0001999999989089241, - "loss": 46.0, - "step": 625 - }, - { - "epoch": 0.04786207160196494, - "grad_norm": 0.00033310535945929587, - "learning_rate": 0.000199999998905373, - "loss": 46.0, - "step": 626 - }, - { - "epoch": 0.04793852858535466, - "grad_norm": 0.0009725523414090276, - "learning_rate": 0.00019999999890181616, - "loss": 46.0, - "step": 627 - }, - { - "epoch": 0.04801498556874439, - "grad_norm": 0.00013420562027022243, - "learning_rate": 0.0001999999988982535, - "loss": 46.0, - "step": 628 - }, - { - "epoch": 0.04809144255213411, - "grad_norm": 0.0005744160735048354, - "learning_rate": 0.00019999999889468511, - "loss": 46.0, - "step": 629 - }, - { - "epoch": 0.04816789953552383, - "grad_norm": 0.00042676873272284865, - "learning_rate": 0.0001999999988911109, - "loss": 46.0, - "step": 630 - }, - { - "epoch": 0.048244356518913546, - "grad_norm": 0.0004222260322421789, - "learning_rate": 0.00019999999888753095, - "loss": 46.0, - "step": 631 - }, - { - "epoch": 0.048320813502303266, - "grad_norm": 0.00023509195307269692, - "learning_rate": 0.00019999999888394525, - "loss": 46.0, - "step": 632 - }, - { - "epoch": 0.048397270485692985, - "grad_norm": 0.0004956149496138096, - "learning_rate": 0.00019999999888035378, - "loss": 46.0, - "step": 633 - }, - { - "epoch": 0.048473727469082704, - "grad_norm": 0.000332842260831967, - "learning_rate": 0.0001999999988767565, - "loss": 46.0, - "step": 634 - }, - { - "epoch": 0.04855018445247243, - "grad_norm": 0.00015729085134807974, - "learning_rate": 0.00019999999887315348, - "loss": 46.0, - "step": 635 - }, - { - "epoch": 0.04862664143586215, - "grad_norm": 0.00039549326174892485, - "learning_rate": 0.00019999999886954472, - "loss": 46.0, - "step": 636 - }, - { - "epoch": 0.04870309841925187, - "grad_norm": 0.0005816948250867426, - "learning_rate": 0.00019999999886593015, - "loss": 46.0, - "step": 637 - }, - { - "epoch": 0.04877955540264159, - "grad_norm": 0.0006450708606280386, - "learning_rate": 0.0001999999988623098, - "loss": 46.0, - "step": 638 - }, - { - "epoch": 0.04885601238603131, - "grad_norm": 0.0005489220493473113, - "learning_rate": 0.0001999999988586837, - "loss": 46.0, - "step": 639 - }, - { - "epoch": 0.04893246936942103, - "grad_norm": 0.0005445321439765394, - "learning_rate": 0.00019999999885505183, - "loss": 46.0, - "step": 640 - }, - { - "epoch": 0.04900892635281075, - "grad_norm": 0.00037244780105538666, - "learning_rate": 0.0001999999988514142, - "loss": 46.0, - "step": 641 - }, - { - "epoch": 0.049085383336200474, - "grad_norm": 0.0006239353679120541, - "learning_rate": 0.0001999999988477708, - "loss": 46.0, - "step": 642 - }, - { - "epoch": 0.04916184031959019, - "grad_norm": 0.0004768453654833138, - "learning_rate": 0.00019999999884412158, - "loss": 46.0, - "step": 643 - }, - { - "epoch": 0.04923829730297991, - "grad_norm": 0.00021584770001936704, - "learning_rate": 0.00019999999884046666, - "loss": 46.0, - "step": 644 - }, - { - "epoch": 0.04931475428636963, - "grad_norm": 0.0004969947622157633, - "learning_rate": 0.00019999999883680593, - "loss": 46.0, - "step": 645 - }, - { - "epoch": 0.04939121126975935, - "grad_norm": 0.0006911872769705951, - "learning_rate": 0.00019999999883313946, - "loss": 46.0, - "step": 646 - }, - { - "epoch": 0.04946766825314907, - "grad_norm": 0.0009078672155737877, - "learning_rate": 0.00019999999882946718, - "loss": 46.0, - "step": 647 - }, - { - "epoch": 0.04954412523653879, - "grad_norm": 0.0010897921165451407, - "learning_rate": 0.00019999999882578913, - "loss": 46.0, - "step": 648 - }, - { - "epoch": 0.04962058221992851, - "grad_norm": 0.0005299266777001321, - "learning_rate": 0.00019999999882210537, - "loss": 46.0, - "step": 649 - }, - { - "epoch": 0.049697039203318236, - "grad_norm": 0.00026388245169073343, - "learning_rate": 0.0001999999988184158, - "loss": 46.0, - "step": 650 - }, - { - "epoch": 0.049773496186707955, - "grad_norm": 0.0002868471492547542, - "learning_rate": 0.00019999999881472046, - "loss": 46.0, - "step": 651 - }, - { - "epoch": 0.049849953170097674, - "grad_norm": 0.00046446488704532385, - "learning_rate": 0.00019999999881101937, - "loss": 46.0, - "step": 652 - }, - { - "epoch": 0.049926410153487394, - "grad_norm": 0.0015076535055413842, - "learning_rate": 0.00019999999880731249, - "loss": 46.0, - "step": 653 - }, - { - "epoch": 0.05000286713687711, - "grad_norm": 0.0005419623921625316, - "learning_rate": 0.00019999999880359983, - "loss": 46.0, - "step": 654 - }, - { - "epoch": 0.05007932412026683, - "grad_norm": 0.000653263065032661, - "learning_rate": 0.00019999999879988142, - "loss": 46.0, - "step": 655 - }, - { - "epoch": 0.05015578110365655, - "grad_norm": 0.00019291364878881723, - "learning_rate": 0.00019999999879615724, - "loss": 46.0, - "step": 656 - }, - { - "epoch": 0.05023223808704628, - "grad_norm": 0.0004329807707108557, - "learning_rate": 0.0001999999987924273, - "loss": 46.0, - "step": 657 - }, - { - "epoch": 0.050308695070436, - "grad_norm": 0.00031730011687614024, - "learning_rate": 0.00019999999878869159, - "loss": 46.0, - "step": 658 - }, - { - "epoch": 0.05038515205382572, - "grad_norm": 0.0005122473812662065, - "learning_rate": 0.00019999999878495008, - "loss": 46.0, - "step": 659 - }, - { - "epoch": 0.05046160903721544, - "grad_norm": 0.0005939305992797017, - "learning_rate": 0.0001999999987812028, - "loss": 46.0, - "step": 660 - }, - { - "epoch": 0.050538066020605156, - "grad_norm": 0.000692489673383534, - "learning_rate": 0.0001999999987774498, - "loss": 46.0, - "step": 661 - }, - { - "epoch": 0.050614523003994875, - "grad_norm": 0.00040229037404060364, - "learning_rate": 0.000199999998773691, - "loss": 46.0, - "step": 662 - }, - { - "epoch": 0.050690979987384595, - "grad_norm": 0.0014655481791123748, - "learning_rate": 0.0001999999987699264, - "loss": 46.0, - "step": 663 - }, - { - "epoch": 0.05076743697077432, - "grad_norm": 0.00015594468277413398, - "learning_rate": 0.0001999999987661561, - "loss": 46.0, - "step": 664 - }, - { - "epoch": 0.05084389395416404, - "grad_norm": 0.0004371341201476753, - "learning_rate": 0.00019999999876237998, - "loss": 46.0, - "step": 665 - }, - { - "epoch": 0.05092035093755376, - "grad_norm": 0.00044017413165420294, - "learning_rate": 0.0001999999987585981, - "loss": 46.0, - "step": 666 - }, - { - "epoch": 0.05099680792094348, - "grad_norm": 0.0006631204159930348, - "learning_rate": 0.00019999999875481046, - "loss": 46.0, - "step": 667 - }, - { - "epoch": 0.0510732649043332, - "grad_norm": 0.0005109034827910364, - "learning_rate": 0.00019999999875101705, - "loss": 46.0, - "step": 668 - }, - { - "epoch": 0.05114972188772292, - "grad_norm": 0.0002045494766207412, - "learning_rate": 0.00019999999874721787, - "loss": 46.0, - "step": 669 - }, - { - "epoch": 0.05122617887111264, - "grad_norm": 0.00024516473058611155, - "learning_rate": 0.00019999999874341292, - "loss": 46.0, - "step": 670 - }, - { - "epoch": 0.051302635854502364, - "grad_norm": 0.0008533840300515294, - "learning_rate": 0.0001999999987396022, - "loss": 46.0, - "step": 671 - }, - { - "epoch": 0.05137909283789208, - "grad_norm": 0.000275343976682052, - "learning_rate": 0.00019999999873578566, - "loss": 46.0, - "step": 672 - }, - { - "epoch": 0.0514555498212818, - "grad_norm": 0.001113425474613905, - "learning_rate": 0.0001999999987319634, - "loss": 46.0, - "step": 673 - }, - { - "epoch": 0.05153200680467152, - "grad_norm": 0.0005910403560847044, - "learning_rate": 0.0001999999987281354, - "loss": 46.0, - "step": 674 - }, - { - "epoch": 0.05160846378806124, - "grad_norm": 0.000977934803813696, - "learning_rate": 0.00019999999872430158, - "loss": 46.0, - "step": 675 - }, - { - "epoch": 0.05168492077145096, - "grad_norm": 0.0004100938094779849, - "learning_rate": 0.000199999998720462, - "loss": 46.0, - "step": 676 - }, - { - "epoch": 0.05176137775484068, - "grad_norm": 0.0031233434565365314, - "learning_rate": 0.00019999999871661664, - "loss": 46.0, - "step": 677 - }, - { - "epoch": 0.0518378347382304, - "grad_norm": 0.0007034446462057531, - "learning_rate": 0.00019999999871276556, - "loss": 46.0, - "step": 678 - }, - { - "epoch": 0.051914291721620126, - "grad_norm": 0.0006807809113524854, - "learning_rate": 0.0001999999987089087, - "loss": 46.0, - "step": 679 - }, - { - "epoch": 0.051990748705009845, - "grad_norm": 0.00047518391511403024, - "learning_rate": 0.000199999998705046, - "loss": 46.0, - "step": 680 - }, - { - "epoch": 0.052067205688399565, - "grad_norm": 0.0010379269951954484, - "learning_rate": 0.0001999999987011776, - "loss": 46.0, - "step": 681 - }, - { - "epoch": 0.052143662671789284, - "grad_norm": 0.0004005995288025588, - "learning_rate": 0.00019999999869730343, - "loss": 46.0, - "step": 682 - }, - { - "epoch": 0.052220119655179004, - "grad_norm": 0.0038646352477371693, - "learning_rate": 0.00019999999869342347, - "loss": 46.0, - "step": 683 - }, - { - "epoch": 0.05229657663856872, - "grad_norm": 0.0006725925486534834, - "learning_rate": 0.00019999999868953775, - "loss": 46.0, - "step": 684 - }, - { - "epoch": 0.05237303362195844, - "grad_norm": 0.0007942290976643562, - "learning_rate": 0.00019999999868564622, - "loss": 46.0, - "step": 685 - }, - { - "epoch": 0.05244949060534817, - "grad_norm": 0.0013885163934901357, - "learning_rate": 0.00019999999868174897, - "loss": 46.0, - "step": 686 - }, - { - "epoch": 0.05252594758873789, - "grad_norm": 0.0005410252488218248, - "learning_rate": 0.00019999999867784593, - "loss": 46.0, - "step": 687 - }, - { - "epoch": 0.05260240457212761, - "grad_norm": 0.0006587334210053086, - "learning_rate": 0.00019999999867393713, - "loss": 46.0, - "step": 688 - }, - { - "epoch": 0.05267886155551733, - "grad_norm": 0.0006875649560242891, - "learning_rate": 0.00019999999867002254, - "loss": 46.0, - "step": 689 - }, - { - "epoch": 0.052755318538907046, - "grad_norm": 0.0004901383072137833, - "learning_rate": 0.00019999999866610223, - "loss": 46.0, - "step": 690 - }, - { - "epoch": 0.052831775522296766, - "grad_norm": 0.0004494905879255384, - "learning_rate": 0.0001999999986621761, - "loss": 46.0, - "step": 691 - }, - { - "epoch": 0.052908232505686485, - "grad_norm": 0.001209255657158792, - "learning_rate": 0.0001999999986582442, - "loss": 46.0, - "step": 692 - }, - { - "epoch": 0.05298468948907621, - "grad_norm": 0.0002568756463006139, - "learning_rate": 0.00019999999865430657, - "loss": 46.0, - "step": 693 - }, - { - "epoch": 0.05306114647246593, - "grad_norm": 0.0006126717198640108, - "learning_rate": 0.00019999999865036314, - "loss": 46.0, - "step": 694 - }, - { - "epoch": 0.05313760345585565, - "grad_norm": 0.0024391694460064173, - "learning_rate": 0.00019999999864641396, - "loss": 46.0, - "step": 695 - }, - { - "epoch": 0.05321406043924537, - "grad_norm": 0.0005382664385251701, - "learning_rate": 0.000199999998642459, - "loss": 46.0, - "step": 696 - }, - { - "epoch": 0.05329051742263509, - "grad_norm": 0.0008495989604853094, - "learning_rate": 0.00019999999863849825, - "loss": 46.0, - "step": 697 - }, - { - "epoch": 0.05336697440602481, - "grad_norm": 0.0006599112530238926, - "learning_rate": 0.00019999999863453175, - "loss": 46.0, - "step": 698 - }, - { - "epoch": 0.05344343138941453, - "grad_norm": 0.0015425707679241896, - "learning_rate": 0.0001999999986305595, - "loss": 46.0, - "step": 699 - }, - { - "epoch": 0.053519888372804254, - "grad_norm": 0.0006688204011879861, - "learning_rate": 0.00019999999862658146, - "loss": 46.0, - "step": 700 - }, - { - "epoch": 0.053596345356193974, - "grad_norm": 0.0012332834303379059, - "learning_rate": 0.00019999999862259764, - "loss": 46.0, - "step": 701 - }, - { - "epoch": 0.05367280233958369, - "grad_norm": 0.00041103101102635264, - "learning_rate": 0.00019999999861860807, - "loss": 46.0, - "step": 702 - }, - { - "epoch": 0.05374925932297341, - "grad_norm": 0.0012126514920964837, - "learning_rate": 0.0001999999986146127, - "loss": 46.0, - "step": 703 - }, - { - "epoch": 0.05382571630636313, - "grad_norm": 0.0007691223290748894, - "learning_rate": 0.0001999999986106116, - "loss": 46.0, - "step": 704 - }, - { - "epoch": 0.05390217328975285, - "grad_norm": 0.0015347336884588003, - "learning_rate": 0.0001999999986066047, - "loss": 46.0, - "step": 705 - }, - { - "epoch": 0.05397863027314257, - "grad_norm": 0.00041275881812907755, - "learning_rate": 0.00019999999860259205, - "loss": 46.0, - "step": 706 - }, - { - "epoch": 0.05405508725653229, - "grad_norm": 0.0009639740455895662, - "learning_rate": 0.00019999999859857364, - "loss": 46.0, - "step": 707 - }, - { - "epoch": 0.054131544239922016, - "grad_norm": 0.0007914204034022987, - "learning_rate": 0.00019999999859454947, - "loss": 46.0, - "step": 708 - }, - { - "epoch": 0.054208001223311736, - "grad_norm": 0.0005744309164583683, - "learning_rate": 0.00019999999859051949, - "loss": 46.0, - "step": 709 - }, - { - "epoch": 0.054284458206701455, - "grad_norm": 0.0006608443800359964, - "learning_rate": 0.00019999999858648373, - "loss": 46.0, - "step": 710 - }, - { - "epoch": 0.054360915190091175, - "grad_norm": 0.00045458797831088305, - "learning_rate": 0.00019999999858244226, - "loss": 46.0, - "step": 711 - }, - { - "epoch": 0.054437372173480894, - "grad_norm": 0.00039117992855608463, - "learning_rate": 0.00019999999857839496, - "loss": 46.0, - "step": 712 - }, - { - "epoch": 0.054513829156870613, - "grad_norm": 0.0006807833560742438, - "learning_rate": 0.00019999999857434194, - "loss": 46.0, - "step": 713 - }, - { - "epoch": 0.05459028614026033, - "grad_norm": 0.0005720146582461894, - "learning_rate": 0.00019999999857028312, - "loss": 46.0, - "step": 714 - }, - { - "epoch": 0.05466674312365006, - "grad_norm": 0.00039807340363040566, - "learning_rate": 0.00019999999856621853, - "loss": 46.0, - "step": 715 - }, - { - "epoch": 0.05474320010703978, - "grad_norm": 0.00148311210796237, - "learning_rate": 0.00019999999856214816, - "loss": 46.0, - "step": 716 - }, - { - "epoch": 0.0548196570904295, - "grad_norm": 0.0012500175507739186, - "learning_rate": 0.00019999999855807205, - "loss": 46.0, - "step": 717 - }, - { - "epoch": 0.05489611407381922, - "grad_norm": 0.0014031262835487723, - "learning_rate": 0.00019999999855399017, - "loss": 46.0, - "step": 718 - }, - { - "epoch": 0.05497257105720894, - "grad_norm": 0.0023746713995933533, - "learning_rate": 0.00019999999854990253, - "loss": 46.0, - "step": 719 - }, - { - "epoch": 0.055049028040598656, - "grad_norm": 0.0006557343294844031, - "learning_rate": 0.00019999999854580908, - "loss": 46.0, - "step": 720 - }, - { - "epoch": 0.055125485023988376, - "grad_norm": 0.0008830199367366731, - "learning_rate": 0.0001999999985417099, - "loss": 46.0, - "step": 721 - }, - { - "epoch": 0.0552019420073781, - "grad_norm": 0.0013945880346000195, - "learning_rate": 0.00019999999853760492, - "loss": 46.0, - "step": 722 - }, - { - "epoch": 0.05527839899076782, - "grad_norm": 0.0006048845243640244, - "learning_rate": 0.0001999999985334942, - "loss": 46.0, - "step": 723 - }, - { - "epoch": 0.05535485597415754, - "grad_norm": 0.00040394082316197455, - "learning_rate": 0.00019999999852937767, - "loss": 46.0, - "step": 724 - }, - { - "epoch": 0.05543131295754726, - "grad_norm": 0.001725990092381835, - "learning_rate": 0.00019999999852525542, - "loss": 46.0, - "step": 725 - }, - { - "epoch": 0.05550776994093698, - "grad_norm": 0.00040107901440933347, - "learning_rate": 0.00019999999852112738, - "loss": 46.0, - "step": 726 - }, - { - "epoch": 0.0555842269243267, - "grad_norm": 0.0016303808661177754, - "learning_rate": 0.00019999999851699353, - "loss": 46.0, - "step": 727 - }, - { - "epoch": 0.05566068390771642, - "grad_norm": 0.001971200807020068, - "learning_rate": 0.00019999999851285396, - "loss": 46.0, - "step": 728 - }, - { - "epoch": 0.055737140891106145, - "grad_norm": 0.0008331132121384144, - "learning_rate": 0.0001999999985087086, - "loss": 46.0, - "step": 729 - }, - { - "epoch": 0.055813597874495864, - "grad_norm": 0.0012251917505636811, - "learning_rate": 0.0001999999985045575, - "loss": 46.0, - "step": 730 - }, - { - "epoch": 0.055890054857885584, - "grad_norm": 0.0004360883904155344, - "learning_rate": 0.00019999999850040057, - "loss": 46.0, - "step": 731 - }, - { - "epoch": 0.0559665118412753, - "grad_norm": 0.00035662445588968694, - "learning_rate": 0.00019999999849623794, - "loss": 46.0, - "step": 732 - }, - { - "epoch": 0.05604296882466502, - "grad_norm": 0.0008678619633428752, - "learning_rate": 0.0001999999984920695, - "loss": 46.0, - "step": 733 - }, - { - "epoch": 0.05611942580805474, - "grad_norm": 0.0005197175778448582, - "learning_rate": 0.0001999999984878953, - "loss": 46.0, - "step": 734 - }, - { - "epoch": 0.05619588279144446, - "grad_norm": 0.003531946800649166, - "learning_rate": 0.00019999999848371535, - "loss": 46.0, - "step": 735 - }, - { - "epoch": 0.05627233977483418, - "grad_norm": 0.0005907154991291463, - "learning_rate": 0.0001999999984795296, - "loss": 46.0, - "step": 736 - }, - { - "epoch": 0.05634879675822391, - "grad_norm": 0.0016546265687793493, - "learning_rate": 0.0001999999984753381, - "loss": 46.0, - "step": 737 - }, - { - "epoch": 0.056425253741613626, - "grad_norm": 0.0007478833431378007, - "learning_rate": 0.0001999999984711408, - "loss": 46.0, - "step": 738 - }, - { - "epoch": 0.056501710725003346, - "grad_norm": 0.0009737526997923851, - "learning_rate": 0.00019999999846693776, - "loss": 46.0, - "step": 739 - }, - { - "epoch": 0.056578167708393065, - "grad_norm": 0.0009089783998206258, - "learning_rate": 0.00019999999846272894, - "loss": 46.0, - "step": 740 - }, - { - "epoch": 0.056654624691782784, - "grad_norm": 0.0008082463755272329, - "learning_rate": 0.00019999999845851435, - "loss": 46.0, - "step": 741 - }, - { - "epoch": 0.056731081675172504, - "grad_norm": 0.00042460591066628695, - "learning_rate": 0.000199999998454294, - "loss": 46.0, - "step": 742 - }, - { - "epoch": 0.05680753865856222, - "grad_norm": 0.0007730139186605811, - "learning_rate": 0.00019999999845006787, - "loss": 46.0, - "step": 743 - }, - { - "epoch": 0.05688399564195195, - "grad_norm": 0.000482582749100402, - "learning_rate": 0.00019999999844583599, - "loss": 46.0, - "step": 744 - }, - { - "epoch": 0.05696045262534167, - "grad_norm": 0.00027285594842396677, - "learning_rate": 0.00019999999844159833, - "loss": 46.0, - "step": 745 - }, - { - "epoch": 0.05703690960873139, - "grad_norm": 0.00045095005771145225, - "learning_rate": 0.0001999999984373549, - "loss": 46.0, - "step": 746 - }, - { - "epoch": 0.05711336659212111, - "grad_norm": 0.00031077791936695576, - "learning_rate": 0.0001999999984331057, - "loss": 46.0, - "step": 747 - }, - { - "epoch": 0.05718982357551083, - "grad_norm": 0.000746348116081208, - "learning_rate": 0.0001999999984288507, - "loss": 46.0, - "step": 748 - }, - { - "epoch": 0.05726628055890055, - "grad_norm": 0.0015635429881513119, - "learning_rate": 0.00019999999842459, - "loss": 46.0, - "step": 749 - }, - { - "epoch": 0.057342737542290266, - "grad_norm": 0.0019601222593337297, - "learning_rate": 0.00019999999842032346, - "loss": 46.0, - "step": 750 - }, - { - "epoch": 0.05741919452567999, - "grad_norm": 0.001063704607076943, - "learning_rate": 0.00019999999841605116, - "loss": 46.0, - "step": 751 - }, - { - "epoch": 0.05749565150906971, - "grad_norm": 0.00038144036079756916, - "learning_rate": 0.00019999999841177312, - "loss": 46.0, - "step": 752 - }, - { - "epoch": 0.05757210849245943, - "grad_norm": 0.0005947853205725551, - "learning_rate": 0.0001999999984074893, - "loss": 46.0, - "step": 753 - }, - { - "epoch": 0.05764856547584915, - "grad_norm": 0.0008315677405335009, - "learning_rate": 0.0001999999984031997, - "loss": 46.0, - "step": 754 - }, - { - "epoch": 0.05772502245923887, - "grad_norm": 0.0005147226038388908, - "learning_rate": 0.00019999999839890434, - "loss": 46.0, - "step": 755 - }, - { - "epoch": 0.05780147944262859, - "grad_norm": 0.00045732385478913784, - "learning_rate": 0.00019999999839460323, - "loss": 46.0, - "step": 756 - }, - { - "epoch": 0.05787793642601831, - "grad_norm": 0.0015032520750537515, - "learning_rate": 0.00019999999839029635, - "loss": 46.0, - "step": 757 - }, - { - "epoch": 0.057954393409408035, - "grad_norm": 0.0011783275986090302, - "learning_rate": 0.00019999999838598366, - "loss": 46.0, - "step": 758 - }, - { - "epoch": 0.058030850392797755, - "grad_norm": 0.0007772682001814246, - "learning_rate": 0.00019999999838166523, - "loss": 46.0, - "step": 759 - }, - { - "epoch": 0.058107307376187474, - "grad_norm": 0.004088142886757851, - "learning_rate": 0.00019999999837734103, - "loss": 46.0, - "step": 760 - }, - { - "epoch": 0.05818376435957719, - "grad_norm": 0.0005342710064724088, - "learning_rate": 0.00019999999837301107, - "loss": 46.0, - "step": 761 - }, - { - "epoch": 0.05826022134296691, - "grad_norm": 0.0007272792863659561, - "learning_rate": 0.00019999999836867532, - "loss": 46.0, - "step": 762 - }, - { - "epoch": 0.05833667832635663, - "grad_norm": 0.0035962089896202087, - "learning_rate": 0.0001999999983643338, - "loss": 46.0, - "step": 763 - }, - { - "epoch": 0.05841313530974635, - "grad_norm": 0.0009250107686966658, - "learning_rate": 0.0001999999983599865, - "loss": 46.0, - "step": 764 - }, - { - "epoch": 0.05848959229313607, - "grad_norm": 0.0007521944935433567, - "learning_rate": 0.00019999999835563345, - "loss": 46.0, - "step": 765 - }, - { - "epoch": 0.0585660492765258, - "grad_norm": 0.0010559684596955776, - "learning_rate": 0.00019999999835127464, - "loss": 46.0, - "step": 766 - }, - { - "epoch": 0.05864250625991552, - "grad_norm": 0.0008711419068276882, - "learning_rate": 0.00019999999834691005, - "loss": 46.0, - "step": 767 - }, - { - "epoch": 0.058718963243305236, - "grad_norm": 0.000753355969209224, - "learning_rate": 0.00019999999834253968, - "loss": 46.0, - "step": 768 - }, - { - "epoch": 0.058795420226694955, - "grad_norm": 0.0006714974297210574, - "learning_rate": 0.00019999999833816357, - "loss": 46.0, - "step": 769 - }, - { - "epoch": 0.058871877210084675, - "grad_norm": 0.000910244882106781, - "learning_rate": 0.00019999999833378166, - "loss": 46.0, - "step": 770 - }, - { - "epoch": 0.058948334193474394, - "grad_norm": 0.0008632204262539744, - "learning_rate": 0.00019999999832939397, - "loss": 46.0, - "step": 771 - }, - { - "epoch": 0.059024791176864114, - "grad_norm": 0.0012319379020482302, - "learning_rate": 0.00019999999832500054, - "loss": 46.0, - "step": 772 - }, - { - "epoch": 0.05910124816025384, - "grad_norm": 0.0009262815001420677, - "learning_rate": 0.00019999999832060134, - "loss": 46.0, - "step": 773 - }, - { - "epoch": 0.05917770514364356, - "grad_norm": 0.0003459417202975601, - "learning_rate": 0.00019999999831619636, - "loss": 46.0, - "step": 774 - }, - { - "epoch": 0.05925416212703328, - "grad_norm": 0.0009886884363368154, - "learning_rate": 0.0001999999983117856, - "loss": 46.0, - "step": 775 - }, - { - "epoch": 0.059330619110423, - "grad_norm": 0.0003174004959873855, - "learning_rate": 0.00019999999830736908, - "loss": 46.0, - "step": 776 - }, - { - "epoch": 0.05940707609381272, - "grad_norm": 0.0006498537841252983, - "learning_rate": 0.0001999999983029468, - "loss": 46.0, - "step": 777 - }, - { - "epoch": 0.05948353307720244, - "grad_norm": 0.0006045684567652643, - "learning_rate": 0.00019999999829851874, - "loss": 46.0, - "step": 778 - }, - { - "epoch": 0.059559990060592156, - "grad_norm": 0.0008451056783087552, - "learning_rate": 0.00019999999829408492, - "loss": 46.0, - "step": 779 - }, - { - "epoch": 0.05963644704398188, - "grad_norm": 0.0006718471995554864, - "learning_rate": 0.00019999999828964533, - "loss": 46.0, - "step": 780 - }, - { - "epoch": 0.0597129040273716, - "grad_norm": 0.0004686721949838102, - "learning_rate": 0.00019999999828519997, - "loss": 46.0, - "step": 781 - }, - { - "epoch": 0.05978936101076132, - "grad_norm": 0.0010273698717355728, - "learning_rate": 0.00019999999828074883, - "loss": 46.0, - "step": 782 - }, - { - "epoch": 0.05986581799415104, - "grad_norm": 0.0007598138763569295, - "learning_rate": 0.00019999999827629192, - "loss": 46.0, - "step": 783 - }, - { - "epoch": 0.05994227497754076, - "grad_norm": 0.0017071543261408806, - "learning_rate": 0.00019999999827182923, - "loss": 46.0, - "step": 784 - }, - { - "epoch": 0.06001873196093048, - "grad_norm": 0.0026008524000644684, - "learning_rate": 0.00019999999826736083, - "loss": 46.0, - "step": 785 - }, - { - "epoch": 0.0600951889443202, - "grad_norm": 0.0023794500157237053, - "learning_rate": 0.0001999999982628866, - "loss": 46.0, - "step": 786 - }, - { - "epoch": 0.060171645927709926, - "grad_norm": 0.0008658910519443452, - "learning_rate": 0.0001999999982584066, - "loss": 46.0, - "step": 787 - }, - { - "epoch": 0.060248102911099645, - "grad_norm": 0.000469990394776687, - "learning_rate": 0.00019999999825392087, - "loss": 46.0, - "step": 788 - }, - { - "epoch": 0.060324559894489364, - "grad_norm": 0.0009368501487188041, - "learning_rate": 0.00019999999824942935, - "loss": 46.0, - "step": 789 - }, - { - "epoch": 0.060401016877879084, - "grad_norm": 0.0008547449833713472, - "learning_rate": 0.00019999999824493205, - "loss": 46.0, - "step": 790 - }, - { - "epoch": 0.0604774738612688, - "grad_norm": 0.0006957253790460527, - "learning_rate": 0.00019999999824042898, - "loss": 46.0, - "step": 791 - }, - { - "epoch": 0.06055393084465852, - "grad_norm": 0.0008682442712597549, - "learning_rate": 0.00019999999823592016, - "loss": 46.0, - "step": 792 - }, - { - "epoch": 0.06063038782804824, - "grad_norm": 0.0010879782494157553, - "learning_rate": 0.00019999999823140557, - "loss": 46.0, - "step": 793 - }, - { - "epoch": 0.06070684481143796, - "grad_norm": 0.0005859620287083089, - "learning_rate": 0.0001999999982268852, - "loss": 46.0, - "step": 794 - }, - { - "epoch": 0.06078330179482769, - "grad_norm": 0.0009435936226509511, - "learning_rate": 0.00019999999822235907, - "loss": 46.0, - "step": 795 - }, - { - "epoch": 0.06085975877821741, - "grad_norm": 0.0006902844179421663, - "learning_rate": 0.00019999999821782716, - "loss": 46.0, - "step": 796 - }, - { - "epoch": 0.060936215761607126, - "grad_norm": 0.0004523476236499846, - "learning_rate": 0.0001999999982132895, - "loss": 46.0, - "step": 797 - }, - { - "epoch": 0.061012672744996846, - "grad_norm": 0.0014437553472816944, - "learning_rate": 0.00019999999820874604, - "loss": 46.0, - "step": 798 - }, - { - "epoch": 0.061089129728386565, - "grad_norm": 0.000748743477743119, - "learning_rate": 0.0001999999982041968, - "loss": 46.0, - "step": 799 - }, - { - "epoch": 0.061165586711776285, - "grad_norm": 0.001465115463361144, - "learning_rate": 0.00019999999819964183, - "loss": 46.0, - "step": 800 - }, - { - "epoch": 0.061242043695166004, - "grad_norm": 0.00201000296510756, - "learning_rate": 0.00019999999819508108, - "loss": 46.0, - "step": 801 - }, - { - "epoch": 0.06131850067855573, - "grad_norm": 0.0008797416812740266, - "learning_rate": 0.00019999999819051456, - "loss": 46.0, - "step": 802 - }, - { - "epoch": 0.06139495766194545, - "grad_norm": 0.0008056176011450589, - "learning_rate": 0.00019999999818594226, - "loss": 46.0, - "step": 803 - }, - { - "epoch": 0.06147141464533517, - "grad_norm": 0.0008374870521947742, - "learning_rate": 0.00019999999818136422, - "loss": 46.0, - "step": 804 - }, - { - "epoch": 0.06154787162872489, - "grad_norm": 0.0007310323417186737, - "learning_rate": 0.0001999999981767804, - "loss": 46.0, - "step": 805 - }, - { - "epoch": 0.06162432861211461, - "grad_norm": 0.0005377045599743724, - "learning_rate": 0.00019999999817219078, - "loss": 46.0, - "step": 806 - }, - { - "epoch": 0.06170078559550433, - "grad_norm": 0.0023042112588882446, - "learning_rate": 0.00019999999816759542, - "loss": 46.0, - "step": 807 - }, - { - "epoch": 0.06177724257889405, - "grad_norm": 0.0018868353217840195, - "learning_rate": 0.00019999999816299426, - "loss": 46.0, - "step": 808 - }, - { - "epoch": 0.06185369956228377, - "grad_norm": 0.0009269246947951615, - "learning_rate": 0.00019999999815838737, - "loss": 46.0, - "step": 809 - }, - { - "epoch": 0.06193015654567349, - "grad_norm": 0.0008533610962331295, - "learning_rate": 0.0001999999981537747, - "loss": 46.0, - "step": 810 - }, - { - "epoch": 0.06200661352906321, - "grad_norm": 0.0012293921317905188, - "learning_rate": 0.00019999999814915623, - "loss": 46.0, - "step": 811 - }, - { - "epoch": 0.06208307051245293, - "grad_norm": 0.0009388715261593461, - "learning_rate": 0.000199999998144532, - "loss": 46.0, - "step": 812 - }, - { - "epoch": 0.06215952749584265, - "grad_norm": 0.0009311331086792052, - "learning_rate": 0.00019999999813990202, - "loss": 46.0, - "step": 813 - }, - { - "epoch": 0.06223598447923237, - "grad_norm": 0.0012165793450549245, - "learning_rate": 0.00019999999813526627, - "loss": 46.0, - "step": 814 - }, - { - "epoch": 0.06231244146262209, - "grad_norm": 0.0009007969638332725, - "learning_rate": 0.00019999999813062472, - "loss": 46.0, - "step": 815 - }, - { - "epoch": 0.062388898446011816, - "grad_norm": 0.0006988821551203728, - "learning_rate": 0.00019999999812597745, - "loss": 46.0, - "step": 816 - }, - { - "epoch": 0.062465355429401535, - "grad_norm": 0.001399722881615162, - "learning_rate": 0.00019999999812132438, - "loss": 46.0, - "step": 817 - }, - { - "epoch": 0.06254181241279125, - "grad_norm": 0.0003073534753639251, - "learning_rate": 0.00019999999811666554, - "loss": 46.0, - "step": 818 - }, - { - "epoch": 0.06261826939618097, - "grad_norm": 0.00043782329885289073, - "learning_rate": 0.00019999999811200098, - "loss": 46.0, - "step": 819 - }, - { - "epoch": 0.0626947263795707, - "grad_norm": 0.0018148773815482855, - "learning_rate": 0.00019999999810733056, - "loss": 46.0, - "step": 820 - }, - { - "epoch": 0.06277118336296042, - "grad_norm": 0.0006111579714342952, - "learning_rate": 0.00019999999810265445, - "loss": 46.0, - "step": 821 - }, - { - "epoch": 0.06284764034635014, - "grad_norm": 0.0015046574408188462, - "learning_rate": 0.0001999999980979725, - "loss": 46.0, - "step": 822 - }, - { - "epoch": 0.06292409732973986, - "grad_norm": 0.000985735678113997, - "learning_rate": 0.00019999999809328485, - "loss": 46.0, - "step": 823 - }, - { - "epoch": 0.06300055431312958, - "grad_norm": 0.00037988051190041006, - "learning_rate": 0.00019999999808859137, - "loss": 46.0, - "step": 824 - }, - { - "epoch": 0.0630770112965193, - "grad_norm": 0.0026610256172716618, - "learning_rate": 0.00019999999808389217, - "loss": 46.0, - "step": 825 - }, - { - "epoch": 0.06315346827990902, - "grad_norm": 0.0013666359009221196, - "learning_rate": 0.00019999999807918716, - "loss": 46.0, - "step": 826 - }, - { - "epoch": 0.06322992526329874, - "grad_norm": 0.0012934188125655055, - "learning_rate": 0.0001999999980744764, - "loss": 46.0, - "step": 827 - }, - { - "epoch": 0.06330638224668846, - "grad_norm": 0.001372310915030539, - "learning_rate": 0.00019999999806975992, - "loss": 46.0, - "step": 828 - }, - { - "epoch": 0.06338283923007818, - "grad_norm": 0.0019590971060097218, - "learning_rate": 0.0001999999980650376, - "loss": 46.0, - "step": 829 - }, - { - "epoch": 0.0634592962134679, - "grad_norm": 0.0009822536958381534, - "learning_rate": 0.00019999999806030952, - "loss": 46.0, - "step": 830 - }, - { - "epoch": 0.06353575319685761, - "grad_norm": 0.0006497660069726408, - "learning_rate": 0.0001999999980555757, - "loss": 46.0, - "step": 831 - }, - { - "epoch": 0.06361221018024733, - "grad_norm": 0.0016641374677419662, - "learning_rate": 0.00019999999805083606, - "loss": 46.0, - "step": 832 - }, - { - "epoch": 0.06368866716363705, - "grad_norm": 0.00133910879958421, - "learning_rate": 0.0001999999980460907, - "loss": 46.0, - "step": 833 - }, - { - "epoch": 0.06376512414702679, - "grad_norm": 0.0017276826547458768, - "learning_rate": 0.00019999999804133956, - "loss": 46.0, - "step": 834 - }, - { - "epoch": 0.0638415811304165, - "grad_norm": 0.0004873050202149898, - "learning_rate": 0.00019999999803658265, - "loss": 46.0, - "step": 835 - }, - { - "epoch": 0.06391803811380622, - "grad_norm": 0.0010182983241975307, - "learning_rate": 0.00019999999803181994, - "loss": 46.0, - "step": 836 - }, - { - "epoch": 0.06399449509719594, - "grad_norm": 0.0010844813659787178, - "learning_rate": 0.00019999999802705149, - "loss": 46.0, - "step": 837 - }, - { - "epoch": 0.06407095208058566, - "grad_norm": 0.0010255974484607577, - "learning_rate": 0.00019999999802227726, - "loss": 46.0, - "step": 838 - }, - { - "epoch": 0.06414740906397538, - "grad_norm": 0.001574929105117917, - "learning_rate": 0.00019999999801749728, - "loss": 46.0, - "step": 839 - }, - { - "epoch": 0.0642238660473651, - "grad_norm": 0.0017449459992349148, - "learning_rate": 0.00019999999801271153, - "loss": 46.0, - "step": 840 - }, - { - "epoch": 0.06430032303075482, - "grad_norm": 0.001526626176200807, - "learning_rate": 0.00019999999800792, - "loss": 46.0, - "step": 841 - }, - { - "epoch": 0.06437678001414454, - "grad_norm": 0.001373098697513342, - "learning_rate": 0.0001999999980031227, - "loss": 46.0, - "step": 842 - }, - { - "epoch": 0.06445323699753426, - "grad_norm": 0.0008916949154809117, - "learning_rate": 0.00019999999799831964, - "loss": 46.0, - "step": 843 - }, - { - "epoch": 0.06452969398092398, - "grad_norm": 0.0014329302357509732, - "learning_rate": 0.0001999999979935108, - "loss": 46.0, - "step": 844 - }, - { - "epoch": 0.0646061509643137, - "grad_norm": 0.00162373052444309, - "learning_rate": 0.00019999999798869618, - "loss": 46.0, - "step": 845 - }, - { - "epoch": 0.06468260794770342, - "grad_norm": 0.0010249036131426692, - "learning_rate": 0.00019999999798387582, - "loss": 46.0, - "step": 846 - }, - { - "epoch": 0.06475906493109314, - "grad_norm": 0.0014899412635713816, - "learning_rate": 0.00019999999797904965, - "loss": 46.0, - "step": 847 - }, - { - "epoch": 0.06483552191448286, - "grad_norm": 0.0010711546055972576, - "learning_rate": 0.00019999999797421774, - "loss": 46.0, - "step": 848 - }, - { - "epoch": 0.06491197889787259, - "grad_norm": 0.0004599509993568063, - "learning_rate": 0.00019999999796938006, - "loss": 46.0, - "step": 849 - }, - { - "epoch": 0.06498843588126231, - "grad_norm": 0.002075366908684373, - "learning_rate": 0.00019999999796453658, - "loss": 46.0, - "step": 850 - }, - { - "epoch": 0.06506489286465203, - "grad_norm": 0.0010055272141471505, - "learning_rate": 0.00019999999795968735, - "loss": 46.0, - "step": 851 - }, - { - "epoch": 0.06514134984804175, - "grad_norm": 0.0009296773350797594, - "learning_rate": 0.00019999999795483237, - "loss": 46.0, - "step": 852 - }, - { - "epoch": 0.06521780683143147, - "grad_norm": 0.0006457075360231102, - "learning_rate": 0.0001999999979499716, - "loss": 46.0, - "step": 853 - }, - { - "epoch": 0.06529426381482119, - "grad_norm": 0.0009924647165462375, - "learning_rate": 0.00019999999794510507, - "loss": 46.0, - "step": 854 - }, - { - "epoch": 0.06537072079821091, - "grad_norm": 0.0007508395938202739, - "learning_rate": 0.00019999999794023278, - "loss": 46.0, - "step": 855 - }, - { - "epoch": 0.06544717778160063, - "grad_norm": 0.0025950735434889793, - "learning_rate": 0.00019999999793535468, - "loss": 46.0, - "step": 856 - }, - { - "epoch": 0.06552363476499035, - "grad_norm": 0.0009169146069325507, - "learning_rate": 0.00019999999793047084, - "loss": 46.0, - "step": 857 - }, - { - "epoch": 0.06560009174838007, - "grad_norm": 0.0007553419563919306, - "learning_rate": 0.00019999999792558125, - "loss": 46.0, - "step": 858 - }, - { - "epoch": 0.06567654873176978, - "grad_norm": 0.0003439235151745379, - "learning_rate": 0.00019999999792068586, - "loss": 46.0, - "step": 859 - }, - { - "epoch": 0.0657530057151595, - "grad_norm": 0.0004695653624366969, - "learning_rate": 0.0001999999979157847, - "loss": 46.0, - "step": 860 - }, - { - "epoch": 0.06582946269854922, - "grad_norm": 0.0007054957677610219, - "learning_rate": 0.00019999999791087779, - "loss": 46.0, - "step": 861 - }, - { - "epoch": 0.06590591968193894, - "grad_norm": 0.0005009636515751481, - "learning_rate": 0.00019999999790596508, - "loss": 46.0, - "step": 862 - }, - { - "epoch": 0.06598237666532866, - "grad_norm": 0.0007160931127145886, - "learning_rate": 0.00019999999790104665, - "loss": 46.0, - "step": 863 - }, - { - "epoch": 0.0660588336487184, - "grad_norm": 0.0018645310774445534, - "learning_rate": 0.0001999999978961224, - "loss": 46.0, - "step": 864 - }, - { - "epoch": 0.06613529063210812, - "grad_norm": 0.0009724583942443132, - "learning_rate": 0.0001999999978911924, - "loss": 46.0, - "step": 865 - }, - { - "epoch": 0.06621174761549783, - "grad_norm": 0.001802563201636076, - "learning_rate": 0.00019999999788625664, - "loss": 46.0, - "step": 866 - }, - { - "epoch": 0.06628820459888755, - "grad_norm": 0.0010456809541210532, - "learning_rate": 0.0001999999978813151, - "loss": 46.0, - "step": 867 - }, - { - "epoch": 0.06636466158227727, - "grad_norm": 0.0007007730309851468, - "learning_rate": 0.0001999999978763678, - "loss": 46.0, - "step": 868 - }, - { - "epoch": 0.06644111856566699, - "grad_norm": 0.0006793321226723492, - "learning_rate": 0.0001999999978714147, - "loss": 46.0, - "step": 869 - }, - { - "epoch": 0.06651757554905671, - "grad_norm": 0.0010249931365251541, - "learning_rate": 0.00019999999786645588, - "loss": 46.0, - "step": 870 - }, - { - "epoch": 0.06659403253244643, - "grad_norm": 0.0005554365343414247, - "learning_rate": 0.00019999999786149127, - "loss": 46.0, - "step": 871 - }, - { - "epoch": 0.06667048951583615, - "grad_norm": 0.0017067046137526631, - "learning_rate": 0.00019999999785652088, - "loss": 46.0, - "step": 872 - }, - { - "epoch": 0.06674694649922587, - "grad_norm": 0.0014227716019377112, - "learning_rate": 0.00019999999785154474, - "loss": 46.0, - "step": 873 - }, - { - "epoch": 0.06682340348261559, - "grad_norm": 0.0004888643161393702, - "learning_rate": 0.0001999999978465628, - "loss": 46.0, - "step": 874 - }, - { - "epoch": 0.06689986046600531, - "grad_norm": 0.0007449073018506169, - "learning_rate": 0.0001999999978415751, - "loss": 46.0, - "step": 875 - }, - { - "epoch": 0.06697631744939503, - "grad_norm": 0.0009143061470240355, - "learning_rate": 0.00019999999783658164, - "loss": 46.0, - "step": 876 - }, - { - "epoch": 0.06705277443278475, - "grad_norm": 0.003081568516790867, - "learning_rate": 0.00019999999783158238, - "loss": 46.0, - "step": 877 - }, - { - "epoch": 0.06712923141617448, - "grad_norm": 0.0012243265518918633, - "learning_rate": 0.0001999999978265774, - "loss": 46.0, - "step": 878 - }, - { - "epoch": 0.0672056883995642, - "grad_norm": 0.0009646289981901646, - "learning_rate": 0.00019999999782156663, - "loss": 46.0, - "step": 879 - }, - { - "epoch": 0.06728214538295392, - "grad_norm": 0.0009335740469396114, - "learning_rate": 0.0001999999978165501, - "loss": 46.0, - "step": 880 - }, - { - "epoch": 0.06735860236634364, - "grad_norm": 0.0005281473277136683, - "learning_rate": 0.0001999999978115278, - "loss": 46.0, - "step": 881 - }, - { - "epoch": 0.06743505934973336, - "grad_norm": 0.0003626249963417649, - "learning_rate": 0.0001999999978064997, - "loss": 46.0, - "step": 882 - }, - { - "epoch": 0.06751151633312308, - "grad_norm": 0.0009607441024854779, - "learning_rate": 0.00019999999780146585, - "loss": 46.0, - "step": 883 - }, - { - "epoch": 0.0675879733165128, - "grad_norm": 0.0008950056508183479, - "learning_rate": 0.00019999999779642623, - "loss": 46.0, - "step": 884 - }, - { - "epoch": 0.06766443029990252, - "grad_norm": 0.0006860795547254384, - "learning_rate": 0.00019999999779138084, - "loss": 46.0, - "step": 885 - }, - { - "epoch": 0.06774088728329224, - "grad_norm": 0.0009584408835507929, - "learning_rate": 0.0001999999977863297, - "loss": 46.0, - "step": 886 - }, - { - "epoch": 0.06781734426668196, - "grad_norm": 0.0011941768461838365, - "learning_rate": 0.00019999999778127277, - "loss": 46.0, - "step": 887 - }, - { - "epoch": 0.06789380125007168, - "grad_norm": 0.0008309573750011623, - "learning_rate": 0.00019999999777621006, - "loss": 46.0, - "step": 888 - }, - { - "epoch": 0.0679702582334614, - "grad_norm": 0.0009365335572510958, - "learning_rate": 0.0001999999977711416, - "loss": 46.0, - "step": 889 - }, - { - "epoch": 0.06804671521685111, - "grad_norm": 0.0010838804300874472, - "learning_rate": 0.00019999999776606738, - "loss": 46.0, - "step": 890 - }, - { - "epoch": 0.06812317220024083, - "grad_norm": 0.0005349599523469806, - "learning_rate": 0.00019999999776098735, - "loss": 46.0, - "step": 891 - }, - { - "epoch": 0.06819962918363055, - "grad_norm": 0.0038097223732620478, - "learning_rate": 0.0001999999977559016, - "loss": 46.0, - "step": 892 - }, - { - "epoch": 0.06827608616702029, - "grad_norm": 0.0010471109999343753, - "learning_rate": 0.00019999999775081005, - "loss": 46.0, - "step": 893 - }, - { - "epoch": 0.06835254315041, - "grad_norm": 0.0009728758595883846, - "learning_rate": 0.00019999999774571273, - "loss": 46.0, - "step": 894 - }, - { - "epoch": 0.06842900013379973, - "grad_norm": 0.000665025319904089, - "learning_rate": 0.00019999999774060964, - "loss": 46.0, - "step": 895 - }, - { - "epoch": 0.06850545711718944, - "grad_norm": 0.0013135699555277824, - "learning_rate": 0.0001999999977355008, - "loss": 46.0, - "step": 896 - }, - { - "epoch": 0.06858191410057916, - "grad_norm": 0.0027354967314749956, - "learning_rate": 0.00019999999773038618, - "loss": 46.0, - "step": 897 - }, - { - "epoch": 0.06865837108396888, - "grad_norm": 0.0038396650925278664, - "learning_rate": 0.0001999999977252658, - "loss": 46.0, - "step": 898 - }, - { - "epoch": 0.0687348280673586, - "grad_norm": 0.001014421577565372, - "learning_rate": 0.00019999999772013963, - "loss": 46.0, - "step": 899 - }, - { - "epoch": 0.06881128505074832, - "grad_norm": 0.0014678931329399347, - "learning_rate": 0.0001999999977150077, - "loss": 46.0, - "step": 900 - }, - { - "epoch": 0.06888774203413804, - "grad_norm": 0.0017121133860200644, - "learning_rate": 0.00019999999770987, - "loss": 46.0, - "step": 901 - }, - { - "epoch": 0.06896419901752776, - "grad_norm": 0.0008576174732297659, - "learning_rate": 0.00019999999770472653, - "loss": 46.0, - "step": 902 - }, - { - "epoch": 0.06904065600091748, - "grad_norm": 0.003078488865867257, - "learning_rate": 0.0001999999976995773, - "loss": 46.0, - "step": 903 - }, - { - "epoch": 0.0691171129843072, - "grad_norm": 0.0006379986298270524, - "learning_rate": 0.00019999999769442227, - "loss": 46.0, - "step": 904 - }, - { - "epoch": 0.06919356996769692, - "grad_norm": 0.00044800524483434856, - "learning_rate": 0.00019999999768926152, - "loss": 46.0, - "step": 905 - }, - { - "epoch": 0.06927002695108664, - "grad_norm": 0.0008939265972003341, - "learning_rate": 0.00019999999768409497, - "loss": 46.0, - "step": 906 - }, - { - "epoch": 0.06934648393447637, - "grad_norm": 0.0008266211370937526, - "learning_rate": 0.00019999999767892262, - "loss": 46.0, - "step": 907 - }, - { - "epoch": 0.06942294091786609, - "grad_norm": 0.0026271925307810307, - "learning_rate": 0.00019999999767374456, - "loss": 46.0, - "step": 908 - }, - { - "epoch": 0.06949939790125581, - "grad_norm": 0.0006726975552737713, - "learning_rate": 0.00019999999766856071, - "loss": 46.0, - "step": 909 - }, - { - "epoch": 0.06957585488464553, - "grad_norm": 0.001108812284655869, - "learning_rate": 0.00019999999766337107, - "loss": 46.0, - "step": 910 - }, - { - "epoch": 0.06965231186803525, - "grad_norm": 0.0012064531911164522, - "learning_rate": 0.00019999999765817568, - "loss": 46.0, - "step": 911 - }, - { - "epoch": 0.06972876885142497, - "grad_norm": 0.0011297069722786546, - "learning_rate": 0.00019999999765297452, - "loss": 46.0, - "step": 912 - }, - { - "epoch": 0.06980522583481469, - "grad_norm": 0.0018110997043550014, - "learning_rate": 0.00019999999764776761, - "loss": 46.0, - "step": 913 - }, - { - "epoch": 0.06988168281820441, - "grad_norm": 0.0003823382721748203, - "learning_rate": 0.00019999999764255488, - "loss": 46.0, - "step": 914 - }, - { - "epoch": 0.06995813980159413, - "grad_norm": 0.0005454560159705579, - "learning_rate": 0.00019999999763733642, - "loss": 46.0, - "step": 915 - }, - { - "epoch": 0.07003459678498385, - "grad_norm": 0.002416105940937996, - "learning_rate": 0.00019999999763211217, - "loss": 46.0, - "step": 916 - }, - { - "epoch": 0.07011105376837357, - "grad_norm": 0.0006399700650945306, - "learning_rate": 0.00019999999762688217, - "loss": 46.0, - "step": 917 - }, - { - "epoch": 0.07018751075176329, - "grad_norm": 0.0007986078271642327, - "learning_rate": 0.00019999999762164642, - "loss": 46.0, - "step": 918 - }, - { - "epoch": 0.070263967735153, - "grad_norm": 0.0013469344703480601, - "learning_rate": 0.00019999999761640484, - "loss": 46.0, - "step": 919 - }, - { - "epoch": 0.07034042471854272, - "grad_norm": 0.00181288190651685, - "learning_rate": 0.0001999999976111575, - "loss": 46.0, - "step": 920 - }, - { - "epoch": 0.07041688170193244, - "grad_norm": 0.0010183494305238128, - "learning_rate": 0.00019999999760590443, - "loss": 46.0, - "step": 921 - }, - { - "epoch": 0.07049333868532218, - "grad_norm": 0.00047780590830370784, - "learning_rate": 0.00019999999760064556, - "loss": 46.0, - "step": 922 - }, - { - "epoch": 0.0705697956687119, - "grad_norm": 0.0014317670138552785, - "learning_rate": 0.00019999999759538094, - "loss": 46.0, - "step": 923 - }, - { - "epoch": 0.07064625265210162, - "grad_norm": 0.0009017597767524421, - "learning_rate": 0.00019999999759011056, - "loss": 46.0, - "step": 924 - }, - { - "epoch": 0.07072270963549133, - "grad_norm": 0.0011452081380411983, - "learning_rate": 0.0001999999975848344, - "loss": 46.0, - "step": 925 - }, - { - "epoch": 0.07079916661888105, - "grad_norm": 0.001637183828279376, - "learning_rate": 0.00019999999757955246, - "loss": 46.0, - "step": 926 - }, - { - "epoch": 0.07087562360227077, - "grad_norm": 0.006138650234788656, - "learning_rate": 0.00019999999757426475, - "loss": 46.0, - "step": 927 - }, - { - "epoch": 0.07095208058566049, - "grad_norm": 0.0009082559263333678, - "learning_rate": 0.00019999999756897127, - "loss": 46.0, - "step": 928 - }, - { - "epoch": 0.07102853756905021, - "grad_norm": 0.001173483207821846, - "learning_rate": 0.00019999999756367201, - "loss": 46.0, - "step": 929 - }, - { - "epoch": 0.07110499455243993, - "grad_norm": 0.000761194562073797, - "learning_rate": 0.000199999997558367, - "loss": 46.0, - "step": 930 - }, - { - "epoch": 0.07118145153582965, - "grad_norm": 0.0019090657588094473, - "learning_rate": 0.00019999999755305624, - "loss": 46.0, - "step": 931 - }, - { - "epoch": 0.07125790851921937, - "grad_norm": 0.000802334223408252, - "learning_rate": 0.0001999999975477397, - "loss": 46.0, - "step": 932 - }, - { - "epoch": 0.07133436550260909, - "grad_norm": 0.0006604130030609667, - "learning_rate": 0.00019999999754241737, - "loss": 46.0, - "step": 933 - }, - { - "epoch": 0.07141082248599881, - "grad_norm": 0.00108730373904109, - "learning_rate": 0.00019999999753708927, - "loss": 46.0, - "step": 934 - }, - { - "epoch": 0.07148727946938853, - "grad_norm": 0.0008749807602725923, - "learning_rate": 0.00019999999753175543, - "loss": 46.0, - "step": 935 - }, - { - "epoch": 0.07156373645277826, - "grad_norm": 0.000738863309379667, - "learning_rate": 0.0001999999975264158, - "loss": 46.0, - "step": 936 - }, - { - "epoch": 0.07164019343616798, - "grad_norm": 0.0009789742762222886, - "learning_rate": 0.00019999999752107038, - "loss": 46.0, - "step": 937 - }, - { - "epoch": 0.0717166504195577, - "grad_norm": 0.00181388680357486, - "learning_rate": 0.00019999999751571922, - "loss": 46.0, - "step": 938 - }, - { - "epoch": 0.07179310740294742, - "grad_norm": 0.0017250460805371404, - "learning_rate": 0.00019999999751036228, - "loss": 46.0, - "step": 939 - }, - { - "epoch": 0.07186956438633714, - "grad_norm": 0.0009100417955778539, - "learning_rate": 0.00019999999750499957, - "loss": 46.0, - "step": 940 - }, - { - "epoch": 0.07194602136972686, - "grad_norm": 0.0007340360316447914, - "learning_rate": 0.00019999999749963107, - "loss": 46.0, - "step": 941 - }, - { - "epoch": 0.07202247835311658, - "grad_norm": 0.001073396997526288, - "learning_rate": 0.00019999999749425684, - "loss": 46.0, - "step": 942 - }, - { - "epoch": 0.0720989353365063, - "grad_norm": 0.00076859793625772, - "learning_rate": 0.00019999999748887684, - "loss": 46.0, - "step": 943 - }, - { - "epoch": 0.07217539231989602, - "grad_norm": 0.0008783076191321015, - "learning_rate": 0.00019999999748349106, - "loss": 46.0, - "step": 944 - }, - { - "epoch": 0.07225184930328574, - "grad_norm": 0.0005738537875004113, - "learning_rate": 0.0001999999974780995, - "loss": 46.0, - "step": 945 - }, - { - "epoch": 0.07232830628667546, - "grad_norm": 0.0015352055197581649, - "learning_rate": 0.00019999999747270217, - "loss": 46.0, - "step": 946 - }, - { - "epoch": 0.07240476327006518, - "grad_norm": 0.0012679146602749825, - "learning_rate": 0.00019999999746729907, - "loss": 46.0, - "step": 947 - }, - { - "epoch": 0.0724812202534549, - "grad_norm": 0.0005767527618445456, - "learning_rate": 0.0001999999974618902, - "loss": 46.0, - "step": 948 - }, - { - "epoch": 0.07255767723684461, - "grad_norm": 0.0008155430550687015, - "learning_rate": 0.00019999999745647556, - "loss": 46.0, - "step": 949 - }, - { - "epoch": 0.07263413422023433, - "grad_norm": 0.0011201055021956563, - "learning_rate": 0.00019999999745105518, - "loss": 46.0, - "step": 950 - }, - { - "epoch": 0.07271059120362407, - "grad_norm": 0.0010096292244270444, - "learning_rate": 0.000199999997445629, - "loss": 46.0, - "step": 951 - }, - { - "epoch": 0.07278704818701379, - "grad_norm": 0.001026639831252396, - "learning_rate": 0.00019999999744019705, - "loss": 46.0, - "step": 952 - }, - { - "epoch": 0.0728635051704035, - "grad_norm": 0.0006314333877526224, - "learning_rate": 0.00019999999743475935, - "loss": 46.0, - "step": 953 - }, - { - "epoch": 0.07293996215379323, - "grad_norm": 0.0014932324411347508, - "learning_rate": 0.00019999999742931587, - "loss": 46.0, - "step": 954 - }, - { - "epoch": 0.07301641913718294, - "grad_norm": 0.0007988548022694886, - "learning_rate": 0.0001999999974238666, - "loss": 46.0, - "step": 955 - }, - { - "epoch": 0.07309287612057266, - "grad_norm": 0.0011979959672316909, - "learning_rate": 0.0001999999974184116, - "loss": 46.0, - "step": 956 - }, - { - "epoch": 0.07316933310396238, - "grad_norm": 0.0006841117865405977, - "learning_rate": 0.0001999999974129508, - "loss": 46.0, - "step": 957 - }, - { - "epoch": 0.0732457900873521, - "grad_norm": 0.0005685442010872066, - "learning_rate": 0.00019999999740748426, - "loss": 46.0, - "step": 958 - }, - { - "epoch": 0.07332224707074182, - "grad_norm": 0.0009426722535863519, - "learning_rate": 0.00019999999740201191, - "loss": 46.0, - "step": 959 - }, - { - "epoch": 0.07339870405413154, - "grad_norm": 0.0014698036247864366, - "learning_rate": 0.00019999999739653382, - "loss": 46.0, - "step": 960 - }, - { - "epoch": 0.07347516103752126, - "grad_norm": 0.0009936311980709434, - "learning_rate": 0.00019999999739104998, - "loss": 46.0, - "step": 961 - }, - { - "epoch": 0.07355161802091098, - "grad_norm": 0.001483134226873517, - "learning_rate": 0.0001999999973855603, - "loss": 46.0, - "step": 962 - }, - { - "epoch": 0.0736280750043007, - "grad_norm": 0.0008906892035156488, - "learning_rate": 0.00019999999738006492, - "loss": 46.0, - "step": 963 - }, - { - "epoch": 0.07370453198769042, - "grad_norm": 0.001091508544050157, - "learning_rate": 0.00019999999737456374, - "loss": 46.0, - "step": 964 - }, - { - "epoch": 0.07378098897108015, - "grad_norm": 0.0006207852275110781, - "learning_rate": 0.0001999999973690568, - "loss": 46.0, - "step": 965 - }, - { - "epoch": 0.07385744595446987, - "grad_norm": 0.0018408653559163213, - "learning_rate": 0.0001999999973635441, - "loss": 46.0, - "step": 966 - }, - { - "epoch": 0.07393390293785959, - "grad_norm": 0.002001339802518487, - "learning_rate": 0.0001999999973580256, - "loss": 46.0, - "step": 967 - }, - { - "epoch": 0.07401035992124931, - "grad_norm": 0.0011400439543649554, - "learning_rate": 0.00019999999735250133, - "loss": 46.0, - "step": 968 - }, - { - "epoch": 0.07408681690463903, - "grad_norm": 0.0028275521472096443, - "learning_rate": 0.00019999999734697133, - "loss": 46.0, - "step": 969 - }, - { - "epoch": 0.07416327388802875, - "grad_norm": 0.0011981431161984801, - "learning_rate": 0.00019999999734143553, - "loss": 46.0, - "step": 970 - }, - { - "epoch": 0.07423973087141847, - "grad_norm": 0.0008723320788703859, - "learning_rate": 0.00019999999733589399, - "loss": 46.0, - "step": 971 - }, - { - "epoch": 0.07431618785480819, - "grad_norm": 0.0011318976758047938, - "learning_rate": 0.00019999999733034667, - "loss": 46.0, - "step": 972 - }, - { - "epoch": 0.07439264483819791, - "grad_norm": 0.0007382151670753956, - "learning_rate": 0.00019999999732479354, - "loss": 46.0, - "step": 973 - }, - { - "epoch": 0.07446910182158763, - "grad_norm": 0.0004299880238249898, - "learning_rate": 0.00019999999731923468, - "loss": 46.0, - "step": 974 - }, - { - "epoch": 0.07454555880497735, - "grad_norm": 0.000987416016869247, - "learning_rate": 0.00019999999731367006, - "loss": 46.0, - "step": 975 - }, - { - "epoch": 0.07462201578836707, - "grad_norm": 0.0010482885409146547, - "learning_rate": 0.00019999999730809965, - "loss": 46.0, - "step": 976 - }, - { - "epoch": 0.07469847277175679, - "grad_norm": 0.0005643464392051101, - "learning_rate": 0.00019999999730252346, - "loss": 46.0, - "step": 977 - }, - { - "epoch": 0.0747749297551465, - "grad_norm": 0.0006474208785220981, - "learning_rate": 0.00019999999729694153, - "loss": 46.0, - "step": 978 - }, - { - "epoch": 0.07485138673853622, - "grad_norm": 0.001107594114728272, - "learning_rate": 0.0001999999972913538, - "loss": 46.0, - "step": 979 - }, - { - "epoch": 0.07492784372192596, - "grad_norm": 0.0016678207321092486, - "learning_rate": 0.00019999999728576032, - "loss": 46.0, - "step": 980 - }, - { - "epoch": 0.07500430070531568, - "grad_norm": 0.000690351938828826, - "learning_rate": 0.00019999999728016106, - "loss": 46.0, - "step": 981 - }, - { - "epoch": 0.0750807576887054, - "grad_norm": 0.0011274040443822742, - "learning_rate": 0.00019999999727455603, - "loss": 46.0, - "step": 982 - }, - { - "epoch": 0.07515721467209512, - "grad_norm": 0.0009322416153736413, - "learning_rate": 0.00019999999726894526, - "loss": 46.0, - "step": 983 - }, - { - "epoch": 0.07523367165548484, - "grad_norm": 0.0013927072286605835, - "learning_rate": 0.0001999999972633287, - "loss": 46.0, - "step": 984 - }, - { - "epoch": 0.07531012863887455, - "grad_norm": 0.0009432007791474462, - "learning_rate": 0.00019999999725770634, - "loss": 46.0, - "step": 985 - }, - { - "epoch": 0.07538658562226427, - "grad_norm": 0.00047698491835035384, - "learning_rate": 0.00019999999725207825, - "loss": 46.0, - "step": 986 - }, - { - "epoch": 0.075463042605654, - "grad_norm": 0.0006307463045231998, - "learning_rate": 0.00019999999724644435, - "loss": 46.0, - "step": 987 - }, - { - "epoch": 0.07553949958904371, - "grad_norm": 0.0013307937188073993, - "learning_rate": 0.00019999999724080474, - "loss": 46.0, - "step": 988 - }, - { - "epoch": 0.07561595657243343, - "grad_norm": 0.0009763085399754345, - "learning_rate": 0.0001999999972351593, - "loss": 46.0, - "step": 989 - }, - { - "epoch": 0.07569241355582315, - "grad_norm": 0.0010025585070252419, - "learning_rate": 0.00019999999722950814, - "loss": 46.0, - "step": 990 - }, - { - "epoch": 0.07576887053921287, - "grad_norm": 0.0009095129207707942, - "learning_rate": 0.00019999999722385118, - "loss": 46.0, - "step": 991 - }, - { - "epoch": 0.07584532752260259, - "grad_norm": 0.0009452021331526339, - "learning_rate": 0.00019999999721818847, - "loss": 46.0, - "step": 992 - }, - { - "epoch": 0.07592178450599231, - "grad_norm": 0.0007823688210919499, - "learning_rate": 0.00019999999721252, - "loss": 46.0, - "step": 993 - }, - { - "epoch": 0.07599824148938204, - "grad_norm": 0.0006104687927290797, - "learning_rate": 0.0001999999972068457, - "loss": 46.0, - "step": 994 - }, - { - "epoch": 0.07607469847277176, - "grad_norm": 0.0005413282196968794, - "learning_rate": 0.00019999999720116568, - "loss": 46.0, - "step": 995 - }, - { - "epoch": 0.07615115545616148, - "grad_norm": 0.0019189310260117054, - "learning_rate": 0.00019999999719547988, - "loss": 46.0, - "step": 996 - }, - { - "epoch": 0.0762276124395512, - "grad_norm": 0.0006479976000264287, - "learning_rate": 0.0001999999971897883, - "loss": 46.0, - "step": 997 - }, - { - "epoch": 0.07630406942294092, - "grad_norm": 0.0024212358985096216, - "learning_rate": 0.000199999997184091, - "loss": 46.0, - "step": 998 - }, - { - "epoch": 0.07638052640633064, - "grad_norm": 0.001033797743730247, - "learning_rate": 0.0001999999971783879, - "loss": 46.0, - "step": 999 - }, - { - "epoch": 0.07645698338972036, - "grad_norm": 0.0011149945203214884, - "learning_rate": 0.000199999997172679, - "loss": 46.0, - "step": 1000 - }, - { - "epoch": 0.07653344037311008, - "grad_norm": 0.0009869968052953482, - "learning_rate": 0.0001999999971669644, - "loss": 46.0, - "step": 1001 - }, - { - "epoch": 0.0766098973564998, - "grad_norm": 0.0008997033583000302, - "learning_rate": 0.00019999999716124395, - "loss": 46.0, - "step": 1002 - }, - { - "epoch": 0.07668635433988952, - "grad_norm": 0.0005471960757859051, - "learning_rate": 0.00019999999715551776, - "loss": 46.0, - "step": 1003 - }, - { - "epoch": 0.07676281132327924, - "grad_norm": 0.002586745424196124, - "learning_rate": 0.0001999999971497858, - "loss": 46.0, - "step": 1004 - }, - { - "epoch": 0.07683926830666896, - "grad_norm": 0.0005815322510898113, - "learning_rate": 0.0001999999971440481, - "loss": 46.0, - "step": 1005 - }, - { - "epoch": 0.07691572529005868, - "grad_norm": 0.001637510024011135, - "learning_rate": 0.0001999999971383046, - "loss": 46.0, - "step": 1006 - }, - { - "epoch": 0.0769921822734484, - "grad_norm": 0.0006479872390627861, - "learning_rate": 0.00019999999713255534, - "loss": 46.0, - "step": 1007 - }, - { - "epoch": 0.07706863925683811, - "grad_norm": 0.0017759514739736915, - "learning_rate": 0.0001999999971268003, - "loss": 46.0, - "step": 1008 - }, - { - "epoch": 0.07714509624022785, - "grad_norm": 0.0005267034866847098, - "learning_rate": 0.0001999999971210395, - "loss": 46.0, - "step": 1009 - }, - { - "epoch": 0.07722155322361757, - "grad_norm": 0.0009080252493731678, - "learning_rate": 0.00019999999711527294, - "loss": 46.0, - "step": 1010 - }, - { - "epoch": 0.07729801020700729, - "grad_norm": 0.0006503700278699398, - "learning_rate": 0.00019999999710950062, - "loss": 46.0, - "step": 1011 - }, - { - "epoch": 0.077374467190397, - "grad_norm": 0.0009566502994857728, - "learning_rate": 0.00019999999710372247, - "loss": 46.0, - "step": 1012 - }, - { - "epoch": 0.07745092417378673, - "grad_norm": 0.0010661629494279623, - "learning_rate": 0.0001999999970979386, - "loss": 46.0, - "step": 1013 - }, - { - "epoch": 0.07752738115717644, - "grad_norm": 0.0011315967421978712, - "learning_rate": 0.000199999997092149, - "loss": 46.0, - "step": 1014 - }, - { - "epoch": 0.07760383814056616, - "grad_norm": 0.0012493714457377791, - "learning_rate": 0.00019999999708635355, - "loss": 46.0, - "step": 1015 - }, - { - "epoch": 0.07768029512395588, - "grad_norm": 0.0011820078361779451, - "learning_rate": 0.00019999999708055237, - "loss": 46.0, - "step": 1016 - }, - { - "epoch": 0.0777567521073456, - "grad_norm": 0.0015900529688224196, - "learning_rate": 0.0001999999970747454, - "loss": 46.0, - "step": 1017 - }, - { - "epoch": 0.07783320909073532, - "grad_norm": 0.0008089744369499385, - "learning_rate": 0.00019999999706893267, - "loss": 46.0, - "step": 1018 - }, - { - "epoch": 0.07790966607412504, - "grad_norm": 0.0026222998276352882, - "learning_rate": 0.00019999999706311422, - "loss": 46.0, - "step": 1019 - }, - { - "epoch": 0.07798612305751476, - "grad_norm": 0.0018962507601827383, - "learning_rate": 0.00019999999705728994, - "loss": 46.0, - "step": 1020 - }, - { - "epoch": 0.07806258004090448, - "grad_norm": 0.00139760528691113, - "learning_rate": 0.0001999999970514599, - "loss": 46.0, - "step": 1021 - }, - { - "epoch": 0.0781390370242942, - "grad_norm": 0.003917804919183254, - "learning_rate": 0.0001999999970456241, - "loss": 46.0, - "step": 1022 - }, - { - "epoch": 0.07821549400768393, - "grad_norm": 0.0006986702792346478, - "learning_rate": 0.00019999999703978252, - "loss": 46.0, - "step": 1023 - }, - { - "epoch": 0.07829195099107365, - "grad_norm": 0.004320481792092323, - "learning_rate": 0.0001999999970339352, - "loss": 46.0, - "step": 1024 - }, - { - "epoch": 0.07836840797446337, - "grad_norm": 0.0010345918126404285, - "learning_rate": 0.00019999999702808205, - "loss": 46.0, - "step": 1025 - }, - { - "epoch": 0.07844486495785309, - "grad_norm": 0.0016160261584445834, - "learning_rate": 0.00019999999702222318, - "loss": 46.0, - "step": 1026 - }, - { - "epoch": 0.07852132194124281, - "grad_norm": 0.0013722765725106, - "learning_rate": 0.00019999999701635855, - "loss": 46.0, - "step": 1027 - }, - { - "epoch": 0.07859777892463253, - "grad_norm": 0.0010916527826339006, - "learning_rate": 0.00019999999701048813, - "loss": 46.0, - "step": 1028 - }, - { - "epoch": 0.07867423590802225, - "grad_norm": 0.0010964693501591682, - "learning_rate": 0.00019999999700461195, - "loss": 46.0, - "step": 1029 - }, - { - "epoch": 0.07875069289141197, - "grad_norm": 0.0010036800522357225, - "learning_rate": 0.00019999999699873, - "loss": 46.0, - "step": 1030 - }, - { - "epoch": 0.07882714987480169, - "grad_norm": 0.0009888997301459312, - "learning_rate": 0.00019999999699284225, - "loss": 46.0, - "step": 1031 - }, - { - "epoch": 0.07890360685819141, - "grad_norm": 0.001753503573127091, - "learning_rate": 0.00019999999698694872, - "loss": 46.0, - "step": 1032 - }, - { - "epoch": 0.07898006384158113, - "grad_norm": 0.0009753037593327463, - "learning_rate": 0.00019999999698104947, - "loss": 46.0, - "step": 1033 - }, - { - "epoch": 0.07905652082497085, - "grad_norm": 0.0011483731213957071, - "learning_rate": 0.00019999999697514444, - "loss": 46.0, - "step": 1034 - }, - { - "epoch": 0.07913297780836057, - "grad_norm": 0.0008962884312495589, - "learning_rate": 0.00019999999696923364, - "loss": 46.0, - "step": 1035 - }, - { - "epoch": 0.07920943479175029, - "grad_norm": 0.000925900531001389, - "learning_rate": 0.00019999999696331707, - "loss": 46.0, - "step": 1036 - }, - { - "epoch": 0.07928589177514, - "grad_norm": 0.0007393479463644326, - "learning_rate": 0.00019999999695739473, - "loss": 46.0, - "step": 1037 - }, - { - "epoch": 0.07936234875852974, - "grad_norm": 0.0013094200985506177, - "learning_rate": 0.0001999999969514666, - "loss": 46.0, - "step": 1038 - }, - { - "epoch": 0.07943880574191946, - "grad_norm": 0.002036183374002576, - "learning_rate": 0.0001999999969455327, - "loss": 46.0, - "step": 1039 - }, - { - "epoch": 0.07951526272530918, - "grad_norm": 0.0010627619922161102, - "learning_rate": 0.00019999999693959305, - "loss": 46.0, - "step": 1040 - }, - { - "epoch": 0.0795917197086989, - "grad_norm": 0.0008229623781517148, - "learning_rate": 0.00019999999693364763, - "loss": 46.0, - "step": 1041 - }, - { - "epoch": 0.07966817669208862, - "grad_norm": 0.0008170257788151503, - "learning_rate": 0.00019999999692769645, - "loss": 46.0, - "step": 1042 - }, - { - "epoch": 0.07974463367547834, - "grad_norm": 0.0005138233536854386, - "learning_rate": 0.00019999999692173946, - "loss": 46.0, - "step": 1043 - }, - { - "epoch": 0.07982109065886805, - "grad_norm": 0.0014529372565448284, - "learning_rate": 0.00019999999691577676, - "loss": 46.0, - "step": 1044 - }, - { - "epoch": 0.07989754764225777, - "grad_norm": 0.0009753629565238953, - "learning_rate": 0.00019999999690980825, - "loss": 46.0, - "step": 1045 - }, - { - "epoch": 0.0799740046256475, - "grad_norm": 0.001065521384589374, - "learning_rate": 0.00019999999690383397, - "loss": 46.0, - "step": 1046 - }, - { - "epoch": 0.08005046160903721, - "grad_norm": 0.0012711328454315662, - "learning_rate": 0.00019999999689785395, - "loss": 46.0, - "step": 1047 - }, - { - "epoch": 0.08012691859242693, - "grad_norm": 0.0011729527032002807, - "learning_rate": 0.00019999999689186812, - "loss": 46.0, - "step": 1048 - }, - { - "epoch": 0.08020337557581665, - "grad_norm": 0.000720737676601857, - "learning_rate": 0.00019999999688587655, - "loss": 46.0, - "step": 1049 - }, - { - "epoch": 0.08027983255920637, - "grad_norm": 0.0017695153364911675, - "learning_rate": 0.00019999999687987923, - "loss": 46.0, - "step": 1050 - }, - { - "epoch": 0.08035628954259609, - "grad_norm": 0.0010254306253045797, - "learning_rate": 0.00019999999687387608, - "loss": 46.0, - "step": 1051 - }, - { - "epoch": 0.08043274652598582, - "grad_norm": 0.0009627710096538067, - "learning_rate": 0.00019999999686786722, - "loss": 46.0, - "step": 1052 - }, - { - "epoch": 0.08050920350937554, - "grad_norm": 0.000522421847563237, - "learning_rate": 0.00019999999686185255, - "loss": 46.0, - "step": 1053 - }, - { - "epoch": 0.08058566049276526, - "grad_norm": 0.0007806363864801824, - "learning_rate": 0.00019999999685583214, - "loss": 46.0, - "step": 1054 - }, - { - "epoch": 0.08066211747615498, - "grad_norm": 0.0035145070869475603, - "learning_rate": 0.00019999999684980596, - "loss": 46.0, - "step": 1055 - }, - { - "epoch": 0.0807385744595447, - "grad_norm": 0.0007616059156134725, - "learning_rate": 0.00019999999684377397, - "loss": 46.0, - "step": 1056 - }, - { - "epoch": 0.08081503144293442, - "grad_norm": 0.001607340993359685, - "learning_rate": 0.00019999999683773624, - "loss": 46.0, - "step": 1057 - }, - { - "epoch": 0.08089148842632414, - "grad_norm": 0.0024056860711425543, - "learning_rate": 0.00019999999683169276, - "loss": 46.0, - "step": 1058 - }, - { - "epoch": 0.08096794540971386, - "grad_norm": 0.0007960721268318594, - "learning_rate": 0.00019999999682564345, - "loss": 46.0, - "step": 1059 - }, - { - "epoch": 0.08104440239310358, - "grad_norm": 0.0010764168109744787, - "learning_rate": 0.00019999999681958843, - "loss": 46.0, - "step": 1060 - }, - { - "epoch": 0.0811208593764933, - "grad_norm": 0.001000276650302112, - "learning_rate": 0.0001999999968135276, - "loss": 46.0, - "step": 1061 - }, - { - "epoch": 0.08119731635988302, - "grad_norm": 0.0010639075189828873, - "learning_rate": 0.00019999999680746103, - "loss": 46.0, - "step": 1062 - }, - { - "epoch": 0.08127377334327274, - "grad_norm": 0.0025826103519648314, - "learning_rate": 0.00019999999680138868, - "loss": 46.0, - "step": 1063 - }, - { - "epoch": 0.08135023032666246, - "grad_norm": 0.0008312023128382862, - "learning_rate": 0.00019999999679531057, - "loss": 46.0, - "step": 1064 - }, - { - "epoch": 0.08142668731005218, - "grad_norm": 0.0008342218352481723, - "learning_rate": 0.00019999999678922667, - "loss": 46.0, - "step": 1065 - }, - { - "epoch": 0.0815031442934419, - "grad_norm": 0.0007059397175908089, - "learning_rate": 0.000199999996783137, - "loss": 46.0, - "step": 1066 - }, - { - "epoch": 0.08157960127683163, - "grad_norm": 0.0006852514925412834, - "learning_rate": 0.00019999999677704157, - "loss": 46.0, - "step": 1067 - }, - { - "epoch": 0.08165605826022135, - "grad_norm": 0.00293467054143548, - "learning_rate": 0.00019999999677094036, - "loss": 46.0, - "step": 1068 - }, - { - "epoch": 0.08173251524361107, - "grad_norm": 0.0012233840534463525, - "learning_rate": 0.00019999999676483343, - "loss": 46.0, - "step": 1069 - }, - { - "epoch": 0.08180897222700079, - "grad_norm": 0.0007880316697992384, - "learning_rate": 0.00019999999675872067, - "loss": 46.0, - "step": 1070 - }, - { - "epoch": 0.0818854292103905, - "grad_norm": 0.0005319473566487432, - "learning_rate": 0.00019999999675260216, - "loss": 46.0, - "step": 1071 - }, - { - "epoch": 0.08196188619378023, - "grad_norm": 0.0007042661309242249, - "learning_rate": 0.00019999999674647788, - "loss": 46.0, - "step": 1072 - }, - { - "epoch": 0.08203834317716995, - "grad_norm": 0.0010053367586806417, - "learning_rate": 0.00019999999674034786, - "loss": 46.0, - "step": 1073 - }, - { - "epoch": 0.08211480016055966, - "grad_norm": 0.004070596303790808, - "learning_rate": 0.00019999999673421203, - "loss": 46.0, - "step": 1074 - }, - { - "epoch": 0.08219125714394938, - "grad_norm": 0.002002035966143012, - "learning_rate": 0.00019999999672807043, - "loss": 46.0, - "step": 1075 - }, - { - "epoch": 0.0822677141273391, - "grad_norm": 0.0011843186803162098, - "learning_rate": 0.0001999999967219231, - "loss": 46.0, - "step": 1076 - }, - { - "epoch": 0.08234417111072882, - "grad_norm": 0.0007600758108310401, - "learning_rate": 0.00019999999671576997, - "loss": 46.0, - "step": 1077 - }, - { - "epoch": 0.08242062809411854, - "grad_norm": 0.001096943742595613, - "learning_rate": 0.00019999999670961108, - "loss": 46.0, - "step": 1078 - }, - { - "epoch": 0.08249708507750826, - "grad_norm": 0.000551481731235981, - "learning_rate": 0.00019999999670344641, - "loss": 46.0, - "step": 1079 - }, - { - "epoch": 0.08257354206089798, - "grad_norm": 0.002418719232082367, - "learning_rate": 0.000199999996697276, - "loss": 46.0, - "step": 1080 - }, - { - "epoch": 0.08264999904428771, - "grad_norm": 0.0008597131236456335, - "learning_rate": 0.0001999999966910998, - "loss": 46.0, - "step": 1081 - }, - { - "epoch": 0.08272645602767743, - "grad_norm": 0.0015707275597378612, - "learning_rate": 0.00019999999668491783, - "loss": 46.0, - "step": 1082 - }, - { - "epoch": 0.08280291301106715, - "grad_norm": 0.003683779388666153, - "learning_rate": 0.00019999999667873007, - "loss": 46.0, - "step": 1083 - }, - { - "epoch": 0.08287936999445687, - "grad_norm": 0.0003693265316542238, - "learning_rate": 0.00019999999667253657, - "loss": 46.0, - "step": 1084 - }, - { - "epoch": 0.08295582697784659, - "grad_norm": 0.0010062635410577059, - "learning_rate": 0.0001999999966663373, - "loss": 46.0, - "step": 1085 - }, - { - "epoch": 0.08303228396123631, - "grad_norm": 0.0003948551893699914, - "learning_rate": 0.00019999999666013224, - "loss": 46.0, - "step": 1086 - }, - { - "epoch": 0.08310874094462603, - "grad_norm": 0.0009817811660468578, - "learning_rate": 0.00019999999665392144, - "loss": 46.0, - "step": 1087 - }, - { - "epoch": 0.08318519792801575, - "grad_norm": 0.0008574684034101665, - "learning_rate": 0.00019999999664770487, - "loss": 46.0, - "step": 1088 - }, - { - "epoch": 0.08326165491140547, - "grad_norm": 0.0024348909500986338, - "learning_rate": 0.0001999999966414825, - "loss": 46.0, - "step": 1089 - }, - { - "epoch": 0.08333811189479519, - "grad_norm": 0.004272660706192255, - "learning_rate": 0.00019999999663525438, - "loss": 46.0, - "step": 1090 - }, - { - "epoch": 0.08341456887818491, - "grad_norm": 0.0010652975179255009, - "learning_rate": 0.00019999999662902046, - "loss": 46.0, - "step": 1091 - }, - { - "epoch": 0.08349102586157463, - "grad_norm": 0.0014635265106335282, - "learning_rate": 0.0001999999966227808, - "loss": 46.0, - "step": 1092 - }, - { - "epoch": 0.08356748284496435, - "grad_norm": 0.002833076985552907, - "learning_rate": 0.00019999999661653536, - "loss": 46.0, - "step": 1093 - }, - { - "epoch": 0.08364393982835407, - "grad_norm": 0.001694944454357028, - "learning_rate": 0.00019999999661028417, - "loss": 46.0, - "step": 1094 - }, - { - "epoch": 0.08372039681174379, - "grad_norm": 0.0007754330872558057, - "learning_rate": 0.0001999999966040272, - "loss": 46.0, - "step": 1095 - }, - { - "epoch": 0.08379685379513352, - "grad_norm": 0.0017635487020015717, - "learning_rate": 0.00019999999659776446, - "loss": 46.0, - "step": 1096 - }, - { - "epoch": 0.08387331077852324, - "grad_norm": 0.0010679919505491853, - "learning_rate": 0.00019999999659149593, - "loss": 46.0, - "step": 1097 - }, - { - "epoch": 0.08394976776191296, - "grad_norm": 0.0011753238504752517, - "learning_rate": 0.00019999999658522165, - "loss": 46.0, - "step": 1098 - }, - { - "epoch": 0.08402622474530268, - "grad_norm": 0.001619552611373365, - "learning_rate": 0.0001999999965789416, - "loss": 46.0, - "step": 1099 - }, - { - "epoch": 0.0841026817286924, - "grad_norm": 0.0029004209209233522, - "learning_rate": 0.00019999999657265577, - "loss": 46.0, - "step": 1100 - }, - { - "epoch": 0.08417913871208212, - "grad_norm": 0.0013809700030833483, - "learning_rate": 0.0001999999965663642, - "loss": 46.0, - "step": 1101 - }, - { - "epoch": 0.08425559569547184, - "grad_norm": 0.00043901524622924626, - "learning_rate": 0.00019999999656006683, - "loss": 46.0, - "step": 1102 - }, - { - "epoch": 0.08433205267886155, - "grad_norm": 0.0012373154750093818, - "learning_rate": 0.00019999999655376368, - "loss": 46.0, - "step": 1103 - }, - { - "epoch": 0.08440850966225127, - "grad_norm": 0.0013792196987196803, - "learning_rate": 0.0001999999965474548, - "loss": 46.0, - "step": 1104 - }, - { - "epoch": 0.084484966645641, - "grad_norm": 0.0009669942664913833, - "learning_rate": 0.00019999999654114016, - "loss": 46.0, - "step": 1105 - }, - { - "epoch": 0.08456142362903071, - "grad_norm": 0.0008969721966423094, - "learning_rate": 0.0001999999965348197, - "loss": 46.0, - "step": 1106 - }, - { - "epoch": 0.08463788061242043, - "grad_norm": 0.0032094954513013363, - "learning_rate": 0.0001999999965284935, - "loss": 46.0, - "step": 1107 - }, - { - "epoch": 0.08471433759581015, - "grad_norm": 0.0007989536970853806, - "learning_rate": 0.00019999999652216152, - "loss": 46.0, - "step": 1108 - }, - { - "epoch": 0.08479079457919987, - "grad_norm": 0.0007923963130451739, - "learning_rate": 0.00019999999651582376, - "loss": 46.0, - "step": 1109 - }, - { - "epoch": 0.0848672515625896, - "grad_norm": 0.002309756353497505, - "learning_rate": 0.00019999999650948026, - "loss": 46.0, - "step": 1110 - }, - { - "epoch": 0.08494370854597932, - "grad_norm": 0.0006696914788335562, - "learning_rate": 0.00019999999650313098, - "loss": 46.0, - "step": 1111 - }, - { - "epoch": 0.08502016552936904, - "grad_norm": 0.0010415037395432591, - "learning_rate": 0.00019999999649677593, - "loss": 46.0, - "step": 1112 - }, - { - "epoch": 0.08509662251275876, - "grad_norm": 0.00045059010153636336, - "learning_rate": 0.0001999999964904151, - "loss": 46.0, - "step": 1113 - }, - { - "epoch": 0.08517307949614848, - "grad_norm": 0.001029227045364678, - "learning_rate": 0.00019999999648404854, - "loss": 46.0, - "step": 1114 - }, - { - "epoch": 0.0852495364795382, - "grad_norm": 0.0010876213200390339, - "learning_rate": 0.00019999999647767617, - "loss": 46.0, - "step": 1115 - }, - { - "epoch": 0.08532599346292792, - "grad_norm": 0.0008422642713412642, - "learning_rate": 0.00019999999647129802, - "loss": 46.0, - "step": 1116 - }, - { - "epoch": 0.08540245044631764, - "grad_norm": 0.0008873858023434877, - "learning_rate": 0.0001999999964649141, - "loss": 46.0, - "step": 1117 - }, - { - "epoch": 0.08547890742970736, - "grad_norm": 0.0006313907215371728, - "learning_rate": 0.00019999999645852447, - "loss": 46.0, - "step": 1118 - }, - { - "epoch": 0.08555536441309708, - "grad_norm": 0.0017866980051621795, - "learning_rate": 0.000199999996452129, - "loss": 46.0, - "step": 1119 - }, - { - "epoch": 0.0856318213964868, - "grad_norm": 0.0027931237127631903, - "learning_rate": 0.0001999999964457278, - "loss": 46.0, - "step": 1120 - }, - { - "epoch": 0.08570827837987652, - "grad_norm": 0.0002896841906476766, - "learning_rate": 0.0001999999964393208, - "loss": 46.0, - "step": 1121 - }, - { - "epoch": 0.08578473536326624, - "grad_norm": 0.0004619108512997627, - "learning_rate": 0.00019999999643290805, - "loss": 46.0, - "step": 1122 - }, - { - "epoch": 0.08586119234665596, - "grad_norm": 0.0009961490286514163, - "learning_rate": 0.00019999999642648954, - "loss": 46.0, - "step": 1123 - }, - { - "epoch": 0.08593764933004568, - "grad_norm": 0.00150589132681489, - "learning_rate": 0.00019999999642006527, - "loss": 46.0, - "step": 1124 - }, - { - "epoch": 0.08601410631343541, - "grad_norm": 0.0011365427635610104, - "learning_rate": 0.00019999999641363522, - "loss": 46.0, - "step": 1125 - }, - { - "epoch": 0.08609056329682513, - "grad_norm": 0.0011451183818280697, - "learning_rate": 0.00019999999640719936, - "loss": 46.0, - "step": 1126 - }, - { - "epoch": 0.08616702028021485, - "grad_norm": 0.0008174086106009781, - "learning_rate": 0.00019999999640075777, - "loss": 46.0, - "step": 1127 - }, - { - "epoch": 0.08624347726360457, - "grad_norm": 0.001513507915660739, - "learning_rate": 0.00019999999639431042, - "loss": 46.0, - "step": 1128 - }, - { - "epoch": 0.08631993424699429, - "grad_norm": 0.0011059994576498866, - "learning_rate": 0.00019999999638785728, - "loss": 46.0, - "step": 1129 - }, - { - "epoch": 0.086396391230384, - "grad_norm": 0.0010196544462814927, - "learning_rate": 0.00019999999638139836, - "loss": 46.0, - "step": 1130 - }, - { - "epoch": 0.08647284821377373, - "grad_norm": 0.0021436179522424936, - "learning_rate": 0.00019999999637493373, - "loss": 46.0, - "step": 1131 - }, - { - "epoch": 0.08654930519716345, - "grad_norm": 0.0025370994117110968, - "learning_rate": 0.00019999999636846326, - "loss": 46.0, - "step": 1132 - }, - { - "epoch": 0.08662576218055316, - "grad_norm": 0.003925876226276159, - "learning_rate": 0.00019999999636198705, - "loss": 46.0, - "step": 1133 - }, - { - "epoch": 0.08670221916394288, - "grad_norm": 0.0011787598486989737, - "learning_rate": 0.0001999999963555051, - "loss": 46.0, - "step": 1134 - }, - { - "epoch": 0.0867786761473326, - "grad_norm": 0.0005098643596284091, - "learning_rate": 0.0001999999963490173, - "loss": 46.0, - "step": 1135 - }, - { - "epoch": 0.08685513313072232, - "grad_norm": 0.0009188891854137182, - "learning_rate": 0.0001999999963425238, - "loss": 46.0, - "step": 1136 - }, - { - "epoch": 0.08693159011411204, - "grad_norm": 0.0027898759581148624, - "learning_rate": 0.0001999999963360245, - "loss": 46.0, - "step": 1137 - }, - { - "epoch": 0.08700804709750176, - "grad_norm": 0.0013244698056951165, - "learning_rate": 0.00019999999632951945, - "loss": 46.0, - "step": 1138 - }, - { - "epoch": 0.0870845040808915, - "grad_norm": 0.0010905115632340312, - "learning_rate": 0.00019999999632300863, - "loss": 46.0, - "step": 1139 - }, - { - "epoch": 0.08716096106428121, - "grad_norm": 0.0010143949184566736, - "learning_rate": 0.00019999999631649203, - "loss": 46.0, - "step": 1140 - }, - { - "epoch": 0.08723741804767093, - "grad_norm": 0.0008232492255046964, - "learning_rate": 0.00019999999630996963, - "loss": 46.0, - "step": 1141 - }, - { - "epoch": 0.08731387503106065, - "grad_norm": 0.0018145126523450017, - "learning_rate": 0.00019999999630344152, - "loss": 46.0, - "step": 1142 - }, - { - "epoch": 0.08739033201445037, - "grad_norm": 0.0008560459245927632, - "learning_rate": 0.0001999999962969076, - "loss": 46.0, - "step": 1143 - }, - { - "epoch": 0.08746678899784009, - "grad_norm": 0.0014575072564184666, - "learning_rate": 0.00019999999629036793, - "loss": 46.0, - "step": 1144 - }, - { - "epoch": 0.08754324598122981, - "grad_norm": 0.0031108111143112183, - "learning_rate": 0.0001999999962838225, - "loss": 46.0, - "step": 1145 - }, - { - "epoch": 0.08761970296461953, - "grad_norm": 0.00047231800272129476, - "learning_rate": 0.00019999999627727126, - "loss": 46.0, - "step": 1146 - }, - { - "epoch": 0.08769615994800925, - "grad_norm": 0.0017584000015631318, - "learning_rate": 0.00019999999627071428, - "loss": 46.0, - "step": 1147 - }, - { - "epoch": 0.08777261693139897, - "grad_norm": 0.001495389617048204, - "learning_rate": 0.00019999999626415155, - "loss": 46.0, - "step": 1148 - }, - { - "epoch": 0.08784907391478869, - "grad_norm": 0.005392612423747778, - "learning_rate": 0.00019999999625758302, - "loss": 46.0, - "step": 1149 - }, - { - "epoch": 0.08792553089817841, - "grad_norm": 0.001163176610134542, - "learning_rate": 0.0001999999962510087, - "loss": 46.0, - "step": 1150 - }, - { - "epoch": 0.08800198788156813, - "grad_norm": 0.0011836659396067262, - "learning_rate": 0.00019999999624442863, - "loss": 46.0, - "step": 1151 - }, - { - "epoch": 0.08807844486495785, - "grad_norm": 0.0006165836821310222, - "learning_rate": 0.0001999999962378428, - "loss": 46.0, - "step": 1152 - }, - { - "epoch": 0.08815490184834757, - "grad_norm": 0.0011477165389806032, - "learning_rate": 0.00019999999623125124, - "loss": 46.0, - "step": 1153 - }, - { - "epoch": 0.0882313588317373, - "grad_norm": 0.001473361742682755, - "learning_rate": 0.00019999999622465384, - "loss": 46.0, - "step": 1154 - }, - { - "epoch": 0.08830781581512702, - "grad_norm": 0.0011002605315297842, - "learning_rate": 0.00019999999621805073, - "loss": 46.0, - "step": 1155 - }, - { - "epoch": 0.08838427279851674, - "grad_norm": 0.0009919332806020975, - "learning_rate": 0.00019999999621144178, - "loss": 46.0, - "step": 1156 - }, - { - "epoch": 0.08846072978190646, - "grad_norm": 0.0008903579437173903, - "learning_rate": 0.00019999999620482712, - "loss": 46.0, - "step": 1157 - }, - { - "epoch": 0.08853718676529618, - "grad_norm": 0.0009217155165970325, - "learning_rate": 0.00019999999619820668, - "loss": 46.0, - "step": 1158 - }, - { - "epoch": 0.0886136437486859, - "grad_norm": 0.0007448533433489501, - "learning_rate": 0.00019999999619158045, - "loss": 46.0, - "step": 1159 - }, - { - "epoch": 0.08869010073207562, - "grad_norm": 0.0004429701657500118, - "learning_rate": 0.00019999999618494846, - "loss": 46.0, - "step": 1160 - }, - { - "epoch": 0.08876655771546534, - "grad_norm": 0.0014019494410604239, - "learning_rate": 0.0001999999961783107, - "loss": 46.0, - "step": 1161 - }, - { - "epoch": 0.08884301469885506, - "grad_norm": 0.0009256371413357556, - "learning_rate": 0.00019999999617166718, - "loss": 46.0, - "step": 1162 - }, - { - "epoch": 0.08891947168224477, - "grad_norm": 0.0029703632462769747, - "learning_rate": 0.0001999999961650179, - "loss": 46.0, - "step": 1163 - }, - { - "epoch": 0.0889959286656345, - "grad_norm": 0.0011461441172286868, - "learning_rate": 0.00019999999615836282, - "loss": 46.0, - "step": 1164 - }, - { - "epoch": 0.08907238564902421, - "grad_norm": 0.0007109674625098705, - "learning_rate": 0.00019999999615170197, - "loss": 46.0, - "step": 1165 - }, - { - "epoch": 0.08914884263241393, - "grad_norm": 0.0009964013006538153, - "learning_rate": 0.0001999999961450354, - "loss": 46.0, - "step": 1166 - }, - { - "epoch": 0.08922529961580365, - "grad_norm": 0.0015873070806264877, - "learning_rate": 0.000199999996138363, - "loss": 46.0, - "step": 1167 - }, - { - "epoch": 0.08930175659919339, - "grad_norm": 0.003190083894878626, - "learning_rate": 0.00019999999613168486, - "loss": 46.0, - "step": 1168 - }, - { - "epoch": 0.0893782135825831, - "grad_norm": 0.0008296355954371393, - "learning_rate": 0.00019999999612500095, - "loss": 46.0, - "step": 1169 - }, - { - "epoch": 0.08945467056597282, - "grad_norm": 0.000597225094679743, - "learning_rate": 0.00019999999611831126, - "loss": 46.0, - "step": 1170 - }, - { - "epoch": 0.08953112754936254, - "grad_norm": 0.000980622717179358, - "learning_rate": 0.00019999999611161582, - "loss": 46.0, - "step": 1171 - }, - { - "epoch": 0.08960758453275226, - "grad_norm": 0.0015572402626276016, - "learning_rate": 0.0001999999961049146, - "loss": 46.0, - "step": 1172 - }, - { - "epoch": 0.08968404151614198, - "grad_norm": 0.002365921624004841, - "learning_rate": 0.0001999999960982076, - "loss": 46.0, - "step": 1173 - }, - { - "epoch": 0.0897604984995317, - "grad_norm": 0.0024859767872840166, - "learning_rate": 0.00019999999609149484, - "loss": 46.0, - "step": 1174 - }, - { - "epoch": 0.08983695548292142, - "grad_norm": 0.0008226605132222176, - "learning_rate": 0.00019999999608477631, - "loss": 46.0, - "step": 1175 - }, - { - "epoch": 0.08991341246631114, - "grad_norm": 0.0015789471799507737, - "learning_rate": 0.000199999996078052, - "loss": 46.0, - "step": 1176 - }, - { - "epoch": 0.08998986944970086, - "grad_norm": 0.0004363392072264105, - "learning_rate": 0.00019999999607132196, - "loss": 46.0, - "step": 1177 - }, - { - "epoch": 0.09006632643309058, - "grad_norm": 0.0006765234284102917, - "learning_rate": 0.0001999999960645861, - "loss": 46.0, - "step": 1178 - }, - { - "epoch": 0.0901427834164803, - "grad_norm": 0.0006881494191475213, - "learning_rate": 0.00019999999605784452, - "loss": 46.0, - "step": 1179 - }, - { - "epoch": 0.09021924039987002, - "grad_norm": 0.010233285836875439, - "learning_rate": 0.00019999999605109715, - "loss": 46.0, - "step": 1180 - }, - { - "epoch": 0.09029569738325974, - "grad_norm": 0.002100891899317503, - "learning_rate": 0.00019999999604434398, - "loss": 46.0, - "step": 1181 - }, - { - "epoch": 0.09037215436664946, - "grad_norm": 0.000936768192332238, - "learning_rate": 0.0001999999960375851, - "loss": 46.0, - "step": 1182 - }, - { - "epoch": 0.09044861135003919, - "grad_norm": 0.0007137735956348479, - "learning_rate": 0.0001999999960308204, - "loss": 46.0, - "step": 1183 - }, - { - "epoch": 0.09052506833342891, - "grad_norm": 0.0013765707844868302, - "learning_rate": 0.00019999999602404993, - "loss": 46.0, - "step": 1184 - }, - { - "epoch": 0.09060152531681863, - "grad_norm": 0.0021736095659434795, - "learning_rate": 0.0001999999960172737, - "loss": 46.0, - "step": 1185 - }, - { - "epoch": 0.09067798230020835, - "grad_norm": 0.0019218625966459513, - "learning_rate": 0.00019999999601049174, - "loss": 46.0, - "step": 1186 - }, - { - "epoch": 0.09075443928359807, - "grad_norm": 0.0011082632699981332, - "learning_rate": 0.00019999999600370396, - "loss": 46.0, - "step": 1187 - }, - { - "epoch": 0.09083089626698779, - "grad_norm": 0.000510059529915452, - "learning_rate": 0.00019999999599691043, - "loss": 46.0, - "step": 1188 - }, - { - "epoch": 0.0909073532503775, - "grad_norm": 0.00044670479837805033, - "learning_rate": 0.00019999999599011113, - "loss": 46.0, - "step": 1189 - }, - { - "epoch": 0.09098381023376723, - "grad_norm": 0.0015187797835096717, - "learning_rate": 0.00019999999598330605, - "loss": 46.0, - "step": 1190 - }, - { - "epoch": 0.09106026721715695, - "grad_norm": 0.0005870451568625867, - "learning_rate": 0.0001999999959764952, - "loss": 46.0, - "step": 1191 - }, - { - "epoch": 0.09113672420054666, - "grad_norm": 0.0006216135807335377, - "learning_rate": 0.0001999999959696786, - "loss": 46.0, - "step": 1192 - }, - { - "epoch": 0.09121318118393638, - "grad_norm": 0.0009201872744597495, - "learning_rate": 0.0001999999959628562, - "loss": 46.0, - "step": 1193 - }, - { - "epoch": 0.0912896381673261, - "grad_norm": 0.0006811945931985974, - "learning_rate": 0.00019999999595602807, - "loss": 46.0, - "step": 1194 - }, - { - "epoch": 0.09136609515071582, - "grad_norm": 0.001340296003036201, - "learning_rate": 0.00019999999594919415, - "loss": 46.0, - "step": 1195 - }, - { - "epoch": 0.09144255213410554, - "grad_norm": 0.0010067133698612452, - "learning_rate": 0.00019999999594235446, - "loss": 46.0, - "step": 1196 - }, - { - "epoch": 0.09151900911749528, - "grad_norm": 0.0012444059830158949, - "learning_rate": 0.000199999995935509, - "loss": 46.0, - "step": 1197 - }, - { - "epoch": 0.091595466100885, - "grad_norm": 0.00182445312384516, - "learning_rate": 0.00019999999592865776, - "loss": 46.0, - "step": 1198 - }, - { - "epoch": 0.09167192308427471, - "grad_norm": 0.000848781899549067, - "learning_rate": 0.00019999999592180078, - "loss": 46.0, - "step": 1199 - }, - { - "epoch": 0.09174838006766443, - "grad_norm": 0.0014819852076470852, - "learning_rate": 0.00019999999591493802, - "loss": 46.0, - "step": 1200 - }, - { - "epoch": 0.09182483705105415, - "grad_norm": 0.0027086471673101187, - "learning_rate": 0.0001999999959080695, - "loss": 46.0, - "step": 1201 - }, - { - "epoch": 0.09190129403444387, - "grad_norm": 0.0004966605338267982, - "learning_rate": 0.0001999999959011952, - "loss": 46.0, - "step": 1202 - }, - { - "epoch": 0.09197775101783359, - "grad_norm": 0.0004992112517356873, - "learning_rate": 0.00019999999589431511, - "loss": 46.0, - "step": 1203 - }, - { - "epoch": 0.09205420800122331, - "grad_norm": 0.0009974294807761908, - "learning_rate": 0.00019999999588742927, - "loss": 46.0, - "step": 1204 - }, - { - "epoch": 0.09213066498461303, - "grad_norm": 0.0006977037410251796, - "learning_rate": 0.00019999999588053764, - "loss": 46.0, - "step": 1205 - }, - { - "epoch": 0.09220712196800275, - "grad_norm": 0.00046226897393353283, - "learning_rate": 0.00019999999587364027, - "loss": 46.0, - "step": 1206 - }, - { - "epoch": 0.09228357895139247, - "grad_norm": 0.001052415114827454, - "learning_rate": 0.00019999999586673713, - "loss": 46.0, - "step": 1207 - }, - { - "epoch": 0.09236003593478219, - "grad_norm": 0.0013654630165547132, - "learning_rate": 0.00019999999585982822, - "loss": 46.0, - "step": 1208 - }, - { - "epoch": 0.09243649291817191, - "grad_norm": 0.0013527219416573644, - "learning_rate": 0.0001999999958529135, - "loss": 46.0, - "step": 1209 - }, - { - "epoch": 0.09251294990156163, - "grad_norm": 0.0006191693828441203, - "learning_rate": 0.00019999999584599304, - "loss": 46.0, - "step": 1210 - }, - { - "epoch": 0.09258940688495135, - "grad_norm": 0.0010233297944068909, - "learning_rate": 0.0001999999958390668, - "loss": 46.0, - "step": 1211 - }, - { - "epoch": 0.09266586386834108, - "grad_norm": 0.0009534502169117332, - "learning_rate": 0.0001999999958321348, - "loss": 46.0, - "step": 1212 - }, - { - "epoch": 0.0927423208517308, - "grad_norm": 0.001106227864511311, - "learning_rate": 0.00019999999582519704, - "loss": 46.0, - "step": 1213 - }, - { - "epoch": 0.09281877783512052, - "grad_norm": 0.0005697849555872381, - "learning_rate": 0.0001999999958182535, - "loss": 46.0, - "step": 1214 - }, - { - "epoch": 0.09289523481851024, - "grad_norm": 0.0009686643606983125, - "learning_rate": 0.0001999999958113042, - "loss": 46.0, - "step": 1215 - }, - { - "epoch": 0.09297169180189996, - "grad_norm": 0.0023167189210653305, - "learning_rate": 0.0001999999958043491, - "loss": 46.0, - "step": 1216 - }, - { - "epoch": 0.09304814878528968, - "grad_norm": 0.0004048386763315648, - "learning_rate": 0.00019999999579738828, - "loss": 46.0, - "step": 1217 - }, - { - "epoch": 0.0931246057686794, - "grad_norm": 0.0010416668374091387, - "learning_rate": 0.00019999999579042166, - "loss": 46.0, - "step": 1218 - }, - { - "epoch": 0.09320106275206912, - "grad_norm": 0.0005037899245508015, - "learning_rate": 0.0001999999957834493, - "loss": 46.0, - "step": 1219 - }, - { - "epoch": 0.09327751973545884, - "grad_norm": 0.0008396848570555449, - "learning_rate": 0.0001999999957764711, - "loss": 46.0, - "step": 1220 - }, - { - "epoch": 0.09335397671884856, - "grad_norm": 0.0013662183191627264, - "learning_rate": 0.00019999999576948718, - "loss": 46.0, - "step": 1221 - }, - { - "epoch": 0.09343043370223827, - "grad_norm": 0.0013979448704048991, - "learning_rate": 0.00019999999576249752, - "loss": 46.0, - "step": 1222 - }, - { - "epoch": 0.093506890685628, - "grad_norm": 0.002754638670012355, - "learning_rate": 0.00019999999575550203, - "loss": 46.0, - "step": 1223 - }, - { - "epoch": 0.09358334766901771, - "grad_norm": 0.000634212396107614, - "learning_rate": 0.00019999999574850082, - "loss": 46.0, - "step": 1224 - }, - { - "epoch": 0.09365980465240743, - "grad_norm": 0.0014306706143543124, - "learning_rate": 0.00019999999574149378, - "loss": 46.0, - "step": 1225 - }, - { - "epoch": 0.09373626163579717, - "grad_norm": 0.004412438254803419, - "learning_rate": 0.00019999999573448103, - "loss": 46.0, - "step": 1226 - }, - { - "epoch": 0.09381271861918689, - "grad_norm": 0.0008019739761948586, - "learning_rate": 0.0001999999957274625, - "loss": 46.0, - "step": 1227 - }, - { - "epoch": 0.0938891756025766, - "grad_norm": 0.0008827553829178214, - "learning_rate": 0.0001999999957204382, - "loss": 46.0, - "step": 1228 - }, - { - "epoch": 0.09396563258596632, - "grad_norm": 0.0012745364801958203, - "learning_rate": 0.0001999999957134081, - "loss": 46.0, - "step": 1229 - }, - { - "epoch": 0.09404208956935604, - "grad_norm": 0.0017059399979189038, - "learning_rate": 0.00019999999570637225, - "loss": 46.0, - "step": 1230 - }, - { - "epoch": 0.09411854655274576, - "grad_norm": 0.0014126162277534604, - "learning_rate": 0.00019999999569933062, - "loss": 46.0, - "step": 1231 - }, - { - "epoch": 0.09419500353613548, - "grad_norm": 0.000933696108404547, - "learning_rate": 0.00019999999569228326, - "loss": 46.0, - "step": 1232 - }, - { - "epoch": 0.0942714605195252, - "grad_norm": 0.0009252531453967094, - "learning_rate": 0.0001999999956852301, - "loss": 46.0, - "step": 1233 - }, - { - "epoch": 0.09434791750291492, - "grad_norm": 0.0006862206500954926, - "learning_rate": 0.00019999999567817117, - "loss": 46.0, - "step": 1234 - }, - { - "epoch": 0.09442437448630464, - "grad_norm": 0.0011284563224762678, - "learning_rate": 0.00019999999567110646, - "loss": 46.0, - "step": 1235 - }, - { - "epoch": 0.09450083146969436, - "grad_norm": 0.0008051855838857591, - "learning_rate": 0.000199999995664036, - "loss": 46.0, - "step": 1236 - }, - { - "epoch": 0.09457728845308408, - "grad_norm": 0.0008793355664238334, - "learning_rate": 0.00019999999565695976, - "loss": 46.0, - "step": 1237 - }, - { - "epoch": 0.0946537454364738, - "grad_norm": 0.0009020173456519842, - "learning_rate": 0.00019999999564987775, - "loss": 46.0, - "step": 1238 - }, - { - "epoch": 0.09473020241986352, - "grad_norm": 0.0006941471947357059, - "learning_rate": 0.00019999999564279, - "loss": 46.0, - "step": 1239 - }, - { - "epoch": 0.09480665940325324, - "grad_norm": 0.0010112235322594643, - "learning_rate": 0.0001999999956356964, - "loss": 46.0, - "step": 1240 - }, - { - "epoch": 0.09488311638664297, - "grad_norm": 0.0003985882503911853, - "learning_rate": 0.00019999999562859714, - "loss": 46.0, - "step": 1241 - }, - { - "epoch": 0.09495957337003269, - "grad_norm": 0.0010461274068802595, - "learning_rate": 0.00019999999562149206, - "loss": 46.0, - "step": 1242 - }, - { - "epoch": 0.09503603035342241, - "grad_norm": 0.0012383535504341125, - "learning_rate": 0.0001999999956143812, - "loss": 46.0, - "step": 1243 - }, - { - "epoch": 0.09511248733681213, - "grad_norm": 0.0006342925480566919, - "learning_rate": 0.00019999999560726457, - "loss": 46.0, - "step": 1244 - }, - { - "epoch": 0.09518894432020185, - "grad_norm": 0.0017313059652224183, - "learning_rate": 0.00019999999560014217, - "loss": 46.0, - "step": 1245 - }, - { - "epoch": 0.09526540130359157, - "grad_norm": 0.0011314081493765116, - "learning_rate": 0.00019999999559301403, - "loss": 46.0, - "step": 1246 - }, - { - "epoch": 0.09534185828698129, - "grad_norm": 0.0005559470737352967, - "learning_rate": 0.0001999999955858801, - "loss": 46.0, - "step": 1247 - }, - { - "epoch": 0.095418315270371, - "grad_norm": 0.00640692375600338, - "learning_rate": 0.00019999999557874037, - "loss": 46.0, - "step": 1248 - }, - { - "epoch": 0.09549477225376073, - "grad_norm": 0.004177349153906107, - "learning_rate": 0.0001999999955715949, - "loss": 46.0, - "step": 1249 - }, - { - "epoch": 0.09557122923715045, - "grad_norm": 0.0006592542049475014, - "learning_rate": 0.00019999999556444368, - "loss": 46.0, - "step": 1250 - }, - { - "epoch": 0.09564768622054017, - "grad_norm": 0.0027955283876508474, - "learning_rate": 0.00019999999555728667, - "loss": 46.0, - "step": 1251 - }, - { - "epoch": 0.09572414320392988, - "grad_norm": 0.0012236223556101322, - "learning_rate": 0.0001999999955501239, - "loss": 46.0, - "step": 1252 - }, - { - "epoch": 0.0958006001873196, - "grad_norm": 0.0017678553704172373, - "learning_rate": 0.00019999999554295536, - "loss": 46.0, - "step": 1253 - }, - { - "epoch": 0.09587705717070932, - "grad_norm": 0.0006399803096428514, - "learning_rate": 0.00019999999553578103, - "loss": 46.0, - "step": 1254 - }, - { - "epoch": 0.09595351415409906, - "grad_norm": 0.0006326381699182093, - "learning_rate": 0.00019999999552860093, - "loss": 46.0, - "step": 1255 - }, - { - "epoch": 0.09602997113748878, - "grad_norm": 0.0008295346051454544, - "learning_rate": 0.00019999999552141508, - "loss": 46.0, - "step": 1256 - }, - { - "epoch": 0.0961064281208785, - "grad_norm": 0.0018643852090463042, - "learning_rate": 0.00019999999551422346, - "loss": 46.0, - "step": 1257 - }, - { - "epoch": 0.09618288510426821, - "grad_norm": 0.0015572465490549803, - "learning_rate": 0.00019999999550702606, - "loss": 46.0, - "step": 1258 - }, - { - "epoch": 0.09625934208765793, - "grad_norm": 0.005144514609128237, - "learning_rate": 0.00019999999549982292, - "loss": 46.0, - "step": 1259 - }, - { - "epoch": 0.09633579907104765, - "grad_norm": 0.0018445689929649234, - "learning_rate": 0.00019999999549261398, - "loss": 46.0, - "step": 1260 - }, - { - "epoch": 0.09641225605443737, - "grad_norm": 0.003999857697635889, - "learning_rate": 0.00019999999548539927, - "loss": 46.0, - "step": 1261 - }, - { - "epoch": 0.09648871303782709, - "grad_norm": 0.0009839792037382722, - "learning_rate": 0.0001999999954781788, - "loss": 46.0, - "step": 1262 - }, - { - "epoch": 0.09656517002121681, - "grad_norm": 0.0015999675961211324, - "learning_rate": 0.00019999999547095257, - "loss": 46.0, - "step": 1263 - }, - { - "epoch": 0.09664162700460653, - "grad_norm": 0.0011857443023473024, - "learning_rate": 0.00019999999546372056, - "loss": 46.0, - "step": 1264 - }, - { - "epoch": 0.09671808398799625, - "grad_norm": 0.0004027715476695448, - "learning_rate": 0.00019999999545648275, - "loss": 46.0, - "step": 1265 - }, - { - "epoch": 0.09679454097138597, - "grad_norm": 0.001518356497399509, - "learning_rate": 0.00019999999544923923, - "loss": 46.0, - "step": 1266 - }, - { - "epoch": 0.09687099795477569, - "grad_norm": 0.0010775034315884113, - "learning_rate": 0.0001999999954419899, - "loss": 46.0, - "step": 1267 - }, - { - "epoch": 0.09694745493816541, - "grad_norm": 0.0009000435820780694, - "learning_rate": 0.00019999999543473482, - "loss": 46.0, - "step": 1268 - }, - { - "epoch": 0.09702391192155513, - "grad_norm": 0.0011318640317767859, - "learning_rate": 0.00019999999542747395, - "loss": 46.0, - "step": 1269 - }, - { - "epoch": 0.09710036890494486, - "grad_norm": 0.0022418838925659657, - "learning_rate": 0.00019999999542020733, - "loss": 46.0, - "step": 1270 - }, - { - "epoch": 0.09717682588833458, - "grad_norm": 0.0009755489300005138, - "learning_rate": 0.00019999999541293493, - "loss": 46.0, - "step": 1271 - }, - { - "epoch": 0.0972532828717243, - "grad_norm": 0.0011484373826533556, - "learning_rate": 0.00019999999540565677, - "loss": 46.0, - "step": 1272 - }, - { - "epoch": 0.09732973985511402, - "grad_norm": 0.0031784288585186005, - "learning_rate": 0.00019999999539837285, - "loss": 46.0, - "step": 1273 - }, - { - "epoch": 0.09740619683850374, - "grad_norm": 0.010791708715260029, - "learning_rate": 0.0001999999953910831, - "loss": 46.0, - "step": 1274 - }, - { - "epoch": 0.09748265382189346, - "grad_norm": 0.001676803221926093, - "learning_rate": 0.00019999999538378768, - "loss": 46.0, - "step": 1275 - }, - { - "epoch": 0.09755911080528318, - "grad_norm": 0.00092638551723212, - "learning_rate": 0.0001999999953764864, - "loss": 46.0, - "step": 1276 - }, - { - "epoch": 0.0976355677886729, - "grad_norm": 0.0006048434879630804, - "learning_rate": 0.00019999999536917938, - "loss": 46.0, - "step": 1277 - }, - { - "epoch": 0.09771202477206262, - "grad_norm": 0.0007846732623875141, - "learning_rate": 0.00019999999536186663, - "loss": 46.0, - "step": 1278 - }, - { - "epoch": 0.09778848175545234, - "grad_norm": 0.0016191190807148814, - "learning_rate": 0.00019999999535454804, - "loss": 46.0, - "step": 1279 - }, - { - "epoch": 0.09786493873884206, - "grad_norm": 0.0004936332697980106, - "learning_rate": 0.00019999999534722374, - "loss": 46.0, - "step": 1280 - }, - { - "epoch": 0.09794139572223177, - "grad_norm": 0.002149005653336644, - "learning_rate": 0.00019999999533989364, - "loss": 46.0, - "step": 1281 - }, - { - "epoch": 0.0980178527056215, - "grad_norm": 0.0007897565956227481, - "learning_rate": 0.0001999999953325578, - "loss": 46.0, - "step": 1282 - }, - { - "epoch": 0.09809430968901121, - "grad_norm": 0.0018300628289580345, - "learning_rate": 0.00019999999532521615, - "loss": 46.0, - "step": 1283 - }, - { - "epoch": 0.09817076667240095, - "grad_norm": 0.0013459909241646528, - "learning_rate": 0.00019999999531786875, - "loss": 46.0, - "step": 1284 - }, - { - "epoch": 0.09824722365579067, - "grad_norm": 0.0019359695725142956, - "learning_rate": 0.00019999999531051559, - "loss": 46.0, - "step": 1285 - }, - { - "epoch": 0.09832368063918039, - "grad_norm": 0.0015307387802749872, - "learning_rate": 0.00019999999530315665, - "loss": 46.0, - "step": 1286 - }, - { - "epoch": 0.0984001376225701, - "grad_norm": 0.0029983101412653923, - "learning_rate": 0.00019999999529579193, - "loss": 46.0, - "step": 1287 - }, - { - "epoch": 0.09847659460595982, - "grad_norm": 0.0018178214086219668, - "learning_rate": 0.00019999999528842147, - "loss": 46.0, - "step": 1288 - }, - { - "epoch": 0.09855305158934954, - "grad_norm": 0.0004463070072233677, - "learning_rate": 0.00019999999528104524, - "loss": 46.0, - "step": 1289 - }, - { - "epoch": 0.09862950857273926, - "grad_norm": 0.0013351240195333958, - "learning_rate": 0.0001999999952736632, - "loss": 46.0, - "step": 1290 - }, - { - "epoch": 0.09870596555612898, - "grad_norm": 0.0009454942191950977, - "learning_rate": 0.0001999999952662754, - "loss": 46.0, - "step": 1291 - }, - { - "epoch": 0.0987824225395187, - "grad_norm": 0.0004892030847258866, - "learning_rate": 0.00019999999525888184, - "loss": 46.0, - "step": 1292 - }, - { - "epoch": 0.09885887952290842, - "grad_norm": 0.000888179347384721, - "learning_rate": 0.0001999999952514825, - "loss": 46.0, - "step": 1293 - }, - { - "epoch": 0.09893533650629814, - "grad_norm": 0.0007274766685441136, - "learning_rate": 0.0001999999952440774, - "loss": 46.0, - "step": 1294 - }, - { - "epoch": 0.09901179348968786, - "grad_norm": 0.0005824375548399985, - "learning_rate": 0.00019999999523666656, - "loss": 46.0, - "step": 1295 - }, - { - "epoch": 0.09908825047307758, - "grad_norm": 0.001742347958497703, - "learning_rate": 0.00019999999522924994, - "loss": 46.0, - "step": 1296 - }, - { - "epoch": 0.0991647074564673, - "grad_norm": 0.0010265177115797997, - "learning_rate": 0.00019999999522182752, - "loss": 46.0, - "step": 1297 - }, - { - "epoch": 0.09924116443985702, - "grad_norm": 0.0022620977833867073, - "learning_rate": 0.00019999999521439936, - "loss": 46.0, - "step": 1298 - }, - { - "epoch": 0.09931762142324675, - "grad_norm": 0.001948086079210043, - "learning_rate": 0.00019999999520696542, - "loss": 46.0, - "step": 1299 - }, - { - "epoch": 0.09939407840663647, - "grad_norm": 0.003251249436289072, - "learning_rate": 0.00019999999519952573, - "loss": 46.0, - "step": 1300 - }, - { - "epoch": 0.09947053539002619, - "grad_norm": 0.002538104774430394, - "learning_rate": 0.00019999999519208024, - "loss": 46.0, - "step": 1301 - }, - { - "epoch": 0.09954699237341591, - "grad_norm": 0.0008535406668670475, - "learning_rate": 0.00019999999518462898, - "loss": 46.0, - "step": 1302 - }, - { - "epoch": 0.09962344935680563, - "grad_norm": 0.0009606928797438741, - "learning_rate": 0.00019999999517717197, - "loss": 46.0, - "step": 1303 - }, - { - "epoch": 0.09969990634019535, - "grad_norm": 0.0006648574490100145, - "learning_rate": 0.00019999999516970917, - "loss": 46.0, - "step": 1304 - }, - { - "epoch": 0.09977636332358507, - "grad_norm": 0.001308361068367958, - "learning_rate": 0.0001999999951622406, - "loss": 46.0, - "step": 1305 - }, - { - "epoch": 0.09985282030697479, - "grad_norm": 0.0016028402606025338, - "learning_rate": 0.00019999999515476628, - "loss": 46.0, - "step": 1306 - }, - { - "epoch": 0.09992927729036451, - "grad_norm": 0.0020908236037939787, - "learning_rate": 0.00019999999514728618, - "loss": 46.0, - "step": 1307 - }, - { - "epoch": 0.10000573427375423, - "grad_norm": 0.0009625316015444696, - "learning_rate": 0.0001999999951398003, - "loss": 46.0, - "step": 1308 - }, - { - "epoch": 0.10008219125714395, - "grad_norm": 0.0007036154274828732, - "learning_rate": 0.0001999999951323087, - "loss": 46.0, - "step": 1309 - }, - { - "epoch": 0.10015864824053367, - "grad_norm": 0.00224491860717535, - "learning_rate": 0.00019999999512481127, - "loss": 46.0, - "step": 1310 - }, - { - "epoch": 0.10023510522392338, - "grad_norm": 0.0017687119543552399, - "learning_rate": 0.0001999999951173081, - "loss": 46.0, - "step": 1311 - }, - { - "epoch": 0.1003115622073131, - "grad_norm": 0.000878228573128581, - "learning_rate": 0.00019999999510979916, - "loss": 46.0, - "step": 1312 - }, - { - "epoch": 0.10038801919070284, - "grad_norm": 0.0011639725416898727, - "learning_rate": 0.00019999999510228445, - "loss": 46.0, - "step": 1313 - }, - { - "epoch": 0.10046447617409256, - "grad_norm": 0.0017363595543429255, - "learning_rate": 0.00019999999509476396, - "loss": 46.0, - "step": 1314 - }, - { - "epoch": 0.10054093315748228, - "grad_norm": 0.0003351522027514875, - "learning_rate": 0.00019999999508723773, - "loss": 46.0, - "step": 1315 - }, - { - "epoch": 0.100617390140872, - "grad_norm": 0.0007463704678229988, - "learning_rate": 0.0001999999950797057, - "loss": 46.0, - "step": 1316 - }, - { - "epoch": 0.10069384712426172, - "grad_norm": 0.0011413536267355084, - "learning_rate": 0.00019999999507216791, - "loss": 46.0, - "step": 1317 - }, - { - "epoch": 0.10077030410765143, - "grad_norm": 0.001504633342847228, - "learning_rate": 0.00019999999506462433, - "loss": 46.0, - "step": 1318 - }, - { - "epoch": 0.10084676109104115, - "grad_norm": 0.0015678256750106812, - "learning_rate": 0.000199999995057075, - "loss": 46.0, - "step": 1319 - }, - { - "epoch": 0.10092321807443087, - "grad_norm": 0.0018599717877805233, - "learning_rate": 0.0001999999950495199, - "loss": 46.0, - "step": 1320 - }, - { - "epoch": 0.10099967505782059, - "grad_norm": 0.0007657839450985193, - "learning_rate": 0.00019999999504195903, - "loss": 46.0, - "step": 1321 - }, - { - "epoch": 0.10107613204121031, - "grad_norm": 0.00315374624915421, - "learning_rate": 0.00019999999503439239, - "loss": 46.0, - "step": 1322 - }, - { - "epoch": 0.10115258902460003, - "grad_norm": 0.0013712298823520541, - "learning_rate": 0.00019999999502682, - "loss": 46.0, - "step": 1323 - }, - { - "epoch": 0.10122904600798975, - "grad_norm": 0.0013697234680876136, - "learning_rate": 0.0001999999950192418, - "loss": 46.0, - "step": 1324 - }, - { - "epoch": 0.10130550299137947, - "grad_norm": 0.00043646697304211557, - "learning_rate": 0.00019999999501165786, - "loss": 46.0, - "step": 1325 - }, - { - "epoch": 0.10138195997476919, - "grad_norm": 0.0012329216115176678, - "learning_rate": 0.00019999999500406815, - "loss": 46.0, - "step": 1326 - }, - { - "epoch": 0.10145841695815891, - "grad_norm": 0.001268675783649087, - "learning_rate": 0.00019999999499647266, - "loss": 46.0, - "step": 1327 - }, - { - "epoch": 0.10153487394154864, - "grad_norm": 0.0014134745579212904, - "learning_rate": 0.0001999999949888714, - "loss": 46.0, - "step": 1328 - }, - { - "epoch": 0.10161133092493836, - "grad_norm": 0.0012230125721544027, - "learning_rate": 0.0001999999949812644, - "loss": 46.0, - "step": 1329 - }, - { - "epoch": 0.10168778790832808, - "grad_norm": 0.0012176332529634237, - "learning_rate": 0.00019999999497365159, - "loss": 46.0, - "step": 1330 - }, - { - "epoch": 0.1017642448917178, - "grad_norm": 0.0008769358391873538, - "learning_rate": 0.00019999999496603303, - "loss": 46.0, - "step": 1331 - }, - { - "epoch": 0.10184070187510752, - "grad_norm": 0.000845807371661067, - "learning_rate": 0.00019999999495840868, - "loss": 46.0, - "step": 1332 - }, - { - "epoch": 0.10191715885849724, - "grad_norm": 0.0010382183827459812, - "learning_rate": 0.00019999999495077858, - "loss": 46.0, - "step": 1333 - }, - { - "epoch": 0.10199361584188696, - "grad_norm": 0.002728296210989356, - "learning_rate": 0.00019999999494314273, - "loss": 46.0, - "step": 1334 - }, - { - "epoch": 0.10207007282527668, - "grad_norm": 0.005204348359256983, - "learning_rate": 0.00019999999493550106, - "loss": 46.0, - "step": 1335 - }, - { - "epoch": 0.1021465298086664, - "grad_norm": 0.0008558625122532248, - "learning_rate": 0.00019999999492785367, - "loss": 46.0, - "step": 1336 - }, - { - "epoch": 0.10222298679205612, - "grad_norm": 0.0006963508203625679, - "learning_rate": 0.00019999999492020047, - "loss": 46.0, - "step": 1337 - }, - { - "epoch": 0.10229944377544584, - "grad_norm": 0.0006648279377259314, - "learning_rate": 0.0001999999949125415, - "loss": 46.0, - "step": 1338 - }, - { - "epoch": 0.10237590075883556, - "grad_norm": 0.0010026381351053715, - "learning_rate": 0.0001999999949048768, - "loss": 46.0, - "step": 1339 - }, - { - "epoch": 0.10245235774222528, - "grad_norm": 0.00200001266784966, - "learning_rate": 0.0001999999948972063, - "loss": 46.0, - "step": 1340 - }, - { - "epoch": 0.102528814725615, - "grad_norm": 0.0011981796706095338, - "learning_rate": 0.00019999999488953005, - "loss": 46.0, - "step": 1341 - }, - { - "epoch": 0.10260527170900473, - "grad_norm": 0.0011386389378458261, - "learning_rate": 0.00019999999488184801, - "loss": 46.0, - "step": 1342 - }, - { - "epoch": 0.10268172869239445, - "grad_norm": 0.0009121635230258107, - "learning_rate": 0.00019999999487416023, - "loss": 46.0, - "step": 1343 - }, - { - "epoch": 0.10275818567578417, - "grad_norm": 0.0010379153536632657, - "learning_rate": 0.00019999999486646668, - "loss": 46.0, - "step": 1344 - }, - { - "epoch": 0.10283464265917389, - "grad_norm": 0.0006629675044678152, - "learning_rate": 0.00019999999485876733, - "loss": 46.0, - "step": 1345 - }, - { - "epoch": 0.1029110996425636, - "grad_norm": 0.0009854877134785056, - "learning_rate": 0.00019999999485106223, - "loss": 46.0, - "step": 1346 - }, - { - "epoch": 0.10298755662595332, - "grad_norm": 0.00296715646982193, - "learning_rate": 0.00019999999484335136, - "loss": 46.0, - "step": 1347 - }, - { - "epoch": 0.10306401360934304, - "grad_norm": 0.0007912347791716456, - "learning_rate": 0.0001999999948356347, - "loss": 46.0, - "step": 1348 - }, - { - "epoch": 0.10314047059273276, - "grad_norm": 0.0035866890102624893, - "learning_rate": 0.00019999999482791226, - "loss": 46.0, - "step": 1349 - }, - { - "epoch": 0.10321692757612248, - "grad_norm": 0.001526356441900134, - "learning_rate": 0.0001999999948201841, - "loss": 46.0, - "step": 1350 - }, - { - "epoch": 0.1032933845595122, - "grad_norm": 0.0008646687492728233, - "learning_rate": 0.00019999999481245016, - "loss": 46.0, - "step": 1351 - }, - { - "epoch": 0.10336984154290192, - "grad_norm": 0.0016209310851991177, - "learning_rate": 0.00019999999480471044, - "loss": 46.0, - "step": 1352 - }, - { - "epoch": 0.10344629852629164, - "grad_norm": 0.0020330268889665604, - "learning_rate": 0.00019999999479696493, - "loss": 46.0, - "step": 1353 - }, - { - "epoch": 0.10352275550968136, - "grad_norm": 0.0007184363785199821, - "learning_rate": 0.00019999999478921367, - "loss": 46.0, - "step": 1354 - }, - { - "epoch": 0.10359921249307108, - "grad_norm": 0.0011526669841259718, - "learning_rate": 0.00019999999478145664, - "loss": 46.0, - "step": 1355 - }, - { - "epoch": 0.1036756694764608, - "grad_norm": 0.000992448185570538, - "learning_rate": 0.00019999999477369383, - "loss": 46.0, - "step": 1356 - }, - { - "epoch": 0.10375212645985053, - "grad_norm": 0.0019610212184488773, - "learning_rate": 0.00019999999476592528, - "loss": 46.0, - "step": 1357 - }, - { - "epoch": 0.10382858344324025, - "grad_norm": 0.008228207938373089, - "learning_rate": 0.00019999999475815093, - "loss": 46.0, - "step": 1358 - }, - { - "epoch": 0.10390504042662997, - "grad_norm": 0.0012455694377422333, - "learning_rate": 0.00019999999475037083, - "loss": 46.0, - "step": 1359 - }, - { - "epoch": 0.10398149741001969, - "grad_norm": 0.0009230130817741156, - "learning_rate": 0.00019999999474258496, - "loss": 46.0, - "step": 1360 - }, - { - "epoch": 0.10405795439340941, - "grad_norm": 0.0013253414072096348, - "learning_rate": 0.00019999999473479328, - "loss": 46.0, - "step": 1361 - }, - { - "epoch": 0.10413441137679913, - "grad_norm": 0.0012087085051462054, - "learning_rate": 0.0001999999947269959, - "loss": 46.0, - "step": 1362 - }, - { - "epoch": 0.10421086836018885, - "grad_norm": 0.00046159615158103406, - "learning_rate": 0.0001999999947191927, - "loss": 46.0, - "step": 1363 - }, - { - "epoch": 0.10428732534357857, - "grad_norm": 0.0009205068345181644, - "learning_rate": 0.00019999999471138376, - "loss": 46.0, - "step": 1364 - }, - { - "epoch": 0.10436378232696829, - "grad_norm": 0.0014563918812200427, - "learning_rate": 0.00019999999470356902, - "loss": 46.0, - "step": 1365 - }, - { - "epoch": 0.10444023931035801, - "grad_norm": 0.0005110208876430988, - "learning_rate": 0.00019999999469574853, - "loss": 46.0, - "step": 1366 - }, - { - "epoch": 0.10451669629374773, - "grad_norm": 0.0013468445977196097, - "learning_rate": 0.00019999999468792225, - "loss": 46.0, - "step": 1367 - }, - { - "epoch": 0.10459315327713745, - "grad_norm": 0.0008579662535339594, - "learning_rate": 0.00019999999468009021, - "loss": 46.0, - "step": 1368 - }, - { - "epoch": 0.10466961026052717, - "grad_norm": 0.0013018710305914283, - "learning_rate": 0.00019999999467225243, - "loss": 46.0, - "step": 1369 - }, - { - "epoch": 0.10474606724391688, - "grad_norm": 0.0007715172250755131, - "learning_rate": 0.00019999999466440883, - "loss": 46.0, - "step": 1370 - }, - { - "epoch": 0.10482252422730662, - "grad_norm": 0.0006521688774228096, - "learning_rate": 0.00019999999465655953, - "loss": 46.0, - "step": 1371 - }, - { - "epoch": 0.10489898121069634, - "grad_norm": 0.002342222724109888, - "learning_rate": 0.00019999999464870438, - "loss": 46.0, - "step": 1372 - }, - { - "epoch": 0.10497543819408606, - "grad_norm": 0.0009031341760419309, - "learning_rate": 0.0001999999946408435, - "loss": 46.0, - "step": 1373 - }, - { - "epoch": 0.10505189517747578, - "grad_norm": 0.0006895132246427238, - "learning_rate": 0.00019999999463297689, - "loss": 46.0, - "step": 1374 - }, - { - "epoch": 0.1051283521608655, - "grad_norm": 0.0007181079126894474, - "learning_rate": 0.00019999999462510444, - "loss": 46.0, - "step": 1375 - }, - { - "epoch": 0.10520480914425522, - "grad_norm": 0.0008163030724972486, - "learning_rate": 0.00019999999461722625, - "loss": 46.0, - "step": 1376 - }, - { - "epoch": 0.10528126612764493, - "grad_norm": 0.0004917309852316976, - "learning_rate": 0.00019999999460934228, - "loss": 46.0, - "step": 1377 - }, - { - "epoch": 0.10535772311103465, - "grad_norm": 0.001014159875921905, - "learning_rate": 0.00019999999460145257, - "loss": 46.0, - "step": 1378 - }, - { - "epoch": 0.10543418009442437, - "grad_norm": 0.0010242253774777055, - "learning_rate": 0.00019999999459355708, - "loss": 46.0, - "step": 1379 - }, - { - "epoch": 0.10551063707781409, - "grad_norm": 0.0006272322498261929, - "learning_rate": 0.0001999999945856558, - "loss": 46.0, - "step": 1380 - }, - { - "epoch": 0.10558709406120381, - "grad_norm": 0.0014750051777809858, - "learning_rate": 0.00019999999457774877, - "loss": 46.0, - "step": 1381 - }, - { - "epoch": 0.10566355104459353, - "grad_norm": 0.0006043561734259129, - "learning_rate": 0.00019999999456983596, - "loss": 46.0, - "step": 1382 - }, - { - "epoch": 0.10574000802798325, - "grad_norm": 0.0010625538416206837, - "learning_rate": 0.00019999999456191738, - "loss": 46.0, - "step": 1383 - }, - { - "epoch": 0.10581646501137297, - "grad_norm": 0.004903338849544525, - "learning_rate": 0.00019999999455399303, - "loss": 46.0, - "step": 1384 - }, - { - "epoch": 0.10589292199476269, - "grad_norm": 0.0020839418284595013, - "learning_rate": 0.00019999999454606293, - "loss": 46.0, - "step": 1385 - }, - { - "epoch": 0.10596937897815242, - "grad_norm": 0.0008051164913922548, - "learning_rate": 0.00019999999453812703, - "loss": 46.0, - "step": 1386 - }, - { - "epoch": 0.10604583596154214, - "grad_norm": 0.0009010107605718076, - "learning_rate": 0.00019999999453018539, - "loss": 46.0, - "step": 1387 - }, - { - "epoch": 0.10612229294493186, - "grad_norm": 0.0011650670785456896, - "learning_rate": 0.00019999999452223794, - "loss": 46.0, - "step": 1388 - }, - { - "epoch": 0.10619874992832158, - "grad_norm": 0.0010050187120214105, - "learning_rate": 0.00019999999451428477, - "loss": 46.0, - "step": 1389 - }, - { - "epoch": 0.1062752069117113, - "grad_norm": 0.000857661827467382, - "learning_rate": 0.0001999999945063258, - "loss": 46.0, - "step": 1390 - }, - { - "epoch": 0.10635166389510102, - "grad_norm": 0.0009748273878358305, - "learning_rate": 0.0001999999944983611, - "loss": 46.0, - "step": 1391 - }, - { - "epoch": 0.10642812087849074, - "grad_norm": 0.0028728360775858164, - "learning_rate": 0.00019999999449039059, - "loss": 46.0, - "step": 1392 - }, - { - "epoch": 0.10650457786188046, - "grad_norm": 0.0011886555003002286, - "learning_rate": 0.0001999999944824143, - "loss": 46.0, - "step": 1393 - }, - { - "epoch": 0.10658103484527018, - "grad_norm": 0.0017759384354576468, - "learning_rate": 0.00019999999447443227, - "loss": 46.0, - "step": 1394 - }, - { - "epoch": 0.1066574918286599, - "grad_norm": 0.000757109432015568, - "learning_rate": 0.00019999999446644444, - "loss": 46.0, - "step": 1395 - }, - { - "epoch": 0.10673394881204962, - "grad_norm": 0.00039343966636806726, - "learning_rate": 0.00019999999445845088, - "loss": 46.0, - "step": 1396 - }, - { - "epoch": 0.10681040579543934, - "grad_norm": 0.0009081176249310374, - "learning_rate": 0.00019999999445045153, - "loss": 46.0, - "step": 1397 - }, - { - "epoch": 0.10688686277882906, - "grad_norm": 0.0014924656134098768, - "learning_rate": 0.0001999999944424464, - "loss": 46.0, - "step": 1398 - }, - { - "epoch": 0.10696331976221878, - "grad_norm": 0.0026354840956628323, - "learning_rate": 0.0001999999944344355, - "loss": 46.0, - "step": 1399 - }, - { - "epoch": 0.10703977674560851, - "grad_norm": 0.00043676828499883413, - "learning_rate": 0.00019999999442641886, - "loss": 46.0, - "step": 1400 - }, - { - "epoch": 0.10711623372899823, - "grad_norm": 0.0005772305885329843, - "learning_rate": 0.00019999999441839644, - "loss": 46.0, - "step": 1401 - }, - { - "epoch": 0.10719269071238795, - "grad_norm": 0.0016556215705350041, - "learning_rate": 0.00019999999441036822, - "loss": 46.0, - "step": 1402 - }, - { - "epoch": 0.10726914769577767, - "grad_norm": 0.0012777734082192183, - "learning_rate": 0.0001999999944023343, - "loss": 46.0, - "step": 1403 - }, - { - "epoch": 0.10734560467916739, - "grad_norm": 0.0009159351466223598, - "learning_rate": 0.00019999999439429455, - "loss": 46.0, - "step": 1404 - }, - { - "epoch": 0.1074220616625571, - "grad_norm": 0.0022170941811054945, - "learning_rate": 0.00019999999438624904, - "loss": 46.0, - "step": 1405 - }, - { - "epoch": 0.10749851864594683, - "grad_norm": 0.0008294574217870831, - "learning_rate": 0.00019999999437819778, - "loss": 46.0, - "step": 1406 - }, - { - "epoch": 0.10757497562933654, - "grad_norm": 0.0010854746215045452, - "learning_rate": 0.00019999999437014072, - "loss": 46.0, - "step": 1407 - }, - { - "epoch": 0.10765143261272626, - "grad_norm": 0.0009142841445282102, - "learning_rate": 0.00019999999436207791, - "loss": 46.0, - "step": 1408 - }, - { - "epoch": 0.10772788959611598, - "grad_norm": 0.0007848692475818098, - "learning_rate": 0.00019999999435400934, - "loss": 46.0, - "step": 1409 - }, - { - "epoch": 0.1078043465795057, - "grad_norm": 0.0013571399031206965, - "learning_rate": 0.00019999999434593498, - "loss": 46.0, - "step": 1410 - }, - { - "epoch": 0.10788080356289542, - "grad_norm": 0.0006752159097231925, - "learning_rate": 0.00019999999433785486, - "loss": 46.0, - "step": 1411 - }, - { - "epoch": 0.10795726054628514, - "grad_norm": 0.0006309831514954567, - "learning_rate": 0.00019999999432976896, - "loss": 46.0, - "step": 1412 - }, - { - "epoch": 0.10803371752967486, - "grad_norm": 0.0014313762076199055, - "learning_rate": 0.00019999999432167732, - "loss": 46.0, - "step": 1413 - }, - { - "epoch": 0.10811017451306458, - "grad_norm": 0.0020970685873180628, - "learning_rate": 0.00019999999431357987, - "loss": 46.0, - "step": 1414 - }, - { - "epoch": 0.10818663149645431, - "grad_norm": 0.001207476481795311, - "learning_rate": 0.00019999999430547668, - "loss": 46.0, - "step": 1415 - }, - { - "epoch": 0.10826308847984403, - "grad_norm": 0.0011039258679375052, - "learning_rate": 0.00019999999429736774, - "loss": 46.0, - "step": 1416 - }, - { - "epoch": 0.10833954546323375, - "grad_norm": 0.0011299074394628406, - "learning_rate": 0.00019999999428925298, - "loss": 46.0, - "step": 1417 - }, - { - "epoch": 0.10841600244662347, - "grad_norm": 0.003093366278335452, - "learning_rate": 0.00019999999428113246, - "loss": 46.0, - "step": 1418 - }, - { - "epoch": 0.10849245943001319, - "grad_norm": 0.0013180439127609134, - "learning_rate": 0.0001999999942730062, - "loss": 46.0, - "step": 1419 - }, - { - "epoch": 0.10856891641340291, - "grad_norm": 0.002898380160331726, - "learning_rate": 0.00019999999426487418, - "loss": 46.0, - "step": 1420 - }, - { - "epoch": 0.10864537339679263, - "grad_norm": 0.001105992472730577, - "learning_rate": 0.00019999999425673632, - "loss": 46.0, - "step": 1421 - }, - { - "epoch": 0.10872183038018235, - "grad_norm": 0.0031519923359155655, - "learning_rate": 0.00019999999424859274, - "loss": 46.0, - "step": 1422 - }, - { - "epoch": 0.10879828736357207, - "grad_norm": 0.0011741496855393052, - "learning_rate": 0.0001999999942404434, - "loss": 46.0, - "step": 1423 - }, - { - "epoch": 0.10887474434696179, - "grad_norm": 0.001801345613785088, - "learning_rate": 0.0001999999942322883, - "loss": 46.0, - "step": 1424 - }, - { - "epoch": 0.10895120133035151, - "grad_norm": 0.0026288768276572227, - "learning_rate": 0.0001999999942241274, - "loss": 46.0, - "step": 1425 - }, - { - "epoch": 0.10902765831374123, - "grad_norm": 0.0019972689915448427, - "learning_rate": 0.00019999999421596072, - "loss": 46.0, - "step": 1426 - }, - { - "epoch": 0.10910411529713095, - "grad_norm": 0.001267926418222487, - "learning_rate": 0.00019999999420778827, - "loss": 46.0, - "step": 1427 - }, - { - "epoch": 0.10918057228052067, - "grad_norm": 0.0011345412349328399, - "learning_rate": 0.00019999999419961008, - "loss": 46.0, - "step": 1428 - }, - { - "epoch": 0.1092570292639104, - "grad_norm": 0.000510324549395591, - "learning_rate": 0.00019999999419142612, - "loss": 46.0, - "step": 1429 - }, - { - "epoch": 0.10933348624730012, - "grad_norm": 0.0005570969078689814, - "learning_rate": 0.00019999999418323638, - "loss": 46.0, - "step": 1430 - }, - { - "epoch": 0.10940994323068984, - "grad_norm": 0.0011050528846681118, - "learning_rate": 0.00019999999417504087, - "loss": 46.0, - "step": 1431 - }, - { - "epoch": 0.10948640021407956, - "grad_norm": 0.0005611145752482116, - "learning_rate": 0.00019999999416683958, - "loss": 46.0, - "step": 1432 - }, - { - "epoch": 0.10956285719746928, - "grad_norm": 0.0015184417134150863, - "learning_rate": 0.00019999999415863253, - "loss": 46.0, - "step": 1433 - }, - { - "epoch": 0.109639314180859, - "grad_norm": 0.0009100169991143048, - "learning_rate": 0.00019999999415041972, - "loss": 46.0, - "step": 1434 - }, - { - "epoch": 0.10971577116424872, - "grad_norm": 0.007965796627104282, - "learning_rate": 0.00019999999414220114, - "loss": 46.0, - "step": 1435 - }, - { - "epoch": 0.10979222814763843, - "grad_norm": 0.0007768543437123299, - "learning_rate": 0.0001999999941339768, - "loss": 46.0, - "step": 1436 - }, - { - "epoch": 0.10986868513102815, - "grad_norm": 0.0009607757674530149, - "learning_rate": 0.00019999999412574667, - "loss": 46.0, - "step": 1437 - }, - { - "epoch": 0.10994514211441787, - "grad_norm": 0.0006826695171184838, - "learning_rate": 0.00019999999411751077, - "loss": 46.0, - "step": 1438 - }, - { - "epoch": 0.11002159909780759, - "grad_norm": 0.0016457445453852415, - "learning_rate": 0.00019999999410926907, - "loss": 46.0, - "step": 1439 - }, - { - "epoch": 0.11009805608119731, - "grad_norm": 0.0008279511239379644, - "learning_rate": 0.00019999999410102166, - "loss": 46.0, - "step": 1440 - }, - { - "epoch": 0.11017451306458703, - "grad_norm": 0.001849550404585898, - "learning_rate": 0.00019999999409276846, - "loss": 46.0, - "step": 1441 - }, - { - "epoch": 0.11025097004797675, - "grad_norm": 0.0016342638991773129, - "learning_rate": 0.00019999999408450947, - "loss": 46.0, - "step": 1442 - }, - { - "epoch": 0.11032742703136647, - "grad_norm": 0.0023353334981948137, - "learning_rate": 0.00019999999407624474, - "loss": 46.0, - "step": 1443 - }, - { - "epoch": 0.1104038840147562, - "grad_norm": 0.0011888748267665505, - "learning_rate": 0.00019999999406797422, - "loss": 46.0, - "step": 1444 - }, - { - "epoch": 0.11048034099814592, - "grad_norm": 0.0009299297234974802, - "learning_rate": 0.00019999999405969794, - "loss": 46.0, - "step": 1445 - }, - { - "epoch": 0.11055679798153564, - "grad_norm": 0.0008306152885779738, - "learning_rate": 0.00019999999405141588, - "loss": 46.0, - "step": 1446 - }, - { - "epoch": 0.11063325496492536, - "grad_norm": 0.0020928976591676474, - "learning_rate": 0.00019999999404312808, - "loss": 46.0, - "step": 1447 - }, - { - "epoch": 0.11070971194831508, - "grad_norm": 0.0013051697751507163, - "learning_rate": 0.0001999999940348345, - "loss": 46.0, - "step": 1448 - }, - { - "epoch": 0.1107861689317048, - "grad_norm": 0.002895282581448555, - "learning_rate": 0.00019999999402653512, - "loss": 46.0, - "step": 1449 - }, - { - "epoch": 0.11086262591509452, - "grad_norm": 0.0020371994469314814, - "learning_rate": 0.00019999999401823, - "loss": 46.0, - "step": 1450 - }, - { - "epoch": 0.11093908289848424, - "grad_norm": 0.000690752174705267, - "learning_rate": 0.0001999999940099191, - "loss": 46.0, - "step": 1451 - }, - { - "epoch": 0.11101553988187396, - "grad_norm": 0.0012538381852209568, - "learning_rate": 0.00019999999400160243, - "loss": 46.0, - "step": 1452 - }, - { - "epoch": 0.11109199686526368, - "grad_norm": 0.0018687015399336815, - "learning_rate": 0.00019999999399327999, - "loss": 46.0, - "step": 1453 - }, - { - "epoch": 0.1111684538486534, - "grad_norm": 0.0005745233502238989, - "learning_rate": 0.00019999999398495177, - "loss": 46.0, - "step": 1454 - }, - { - "epoch": 0.11124491083204312, - "grad_norm": 0.0011587274493649602, - "learning_rate": 0.0001999999939766178, - "loss": 46.0, - "step": 1455 - }, - { - "epoch": 0.11132136781543284, - "grad_norm": 0.0012289321748539805, - "learning_rate": 0.00019999999396827804, - "loss": 46.0, - "step": 1456 - }, - { - "epoch": 0.11139782479882256, - "grad_norm": 0.0006031632656231523, - "learning_rate": 0.00019999999395993253, - "loss": 46.0, - "step": 1457 - }, - { - "epoch": 0.11147428178221229, - "grad_norm": 0.0010334086837247014, - "learning_rate": 0.00019999999395158127, - "loss": 46.0, - "step": 1458 - }, - { - "epoch": 0.11155073876560201, - "grad_norm": 0.0022469027899205685, - "learning_rate": 0.00019999999394322421, - "loss": 46.0, - "step": 1459 - }, - { - "epoch": 0.11162719574899173, - "grad_norm": 0.0007784420158714056, - "learning_rate": 0.00019999999393486138, - "loss": 46.0, - "step": 1460 - }, - { - "epoch": 0.11170365273238145, - "grad_norm": 0.0029464373365044594, - "learning_rate": 0.0001999999939264928, - "loss": 46.0, - "step": 1461 - }, - { - "epoch": 0.11178010971577117, - "grad_norm": 0.0010416472796350718, - "learning_rate": 0.00019999999391811843, - "loss": 46.0, - "step": 1462 - }, - { - "epoch": 0.11185656669916089, - "grad_norm": 0.0014502541162073612, - "learning_rate": 0.0001999999939097383, - "loss": 46.0, - "step": 1463 - }, - { - "epoch": 0.1119330236825506, - "grad_norm": 0.0013804328627884388, - "learning_rate": 0.00019999999390135238, - "loss": 46.0, - "step": 1464 - }, - { - "epoch": 0.11200948066594033, - "grad_norm": 0.0004934798344038427, - "learning_rate": 0.00019999999389296074, - "loss": 46.0, - "step": 1465 - }, - { - "epoch": 0.11208593764933004, - "grad_norm": 0.0012159311445429921, - "learning_rate": 0.00019999999388456327, - "loss": 46.0, - "step": 1466 - }, - { - "epoch": 0.11216239463271976, - "grad_norm": 0.0015492700040340424, - "learning_rate": 0.00019999999387616008, - "loss": 46.0, - "step": 1467 - }, - { - "epoch": 0.11223885161610948, - "grad_norm": 0.0008706760709173977, - "learning_rate": 0.00019999999386775109, - "loss": 46.0, - "step": 1468 - }, - { - "epoch": 0.1123153085994992, - "grad_norm": 0.0009424204472452402, - "learning_rate": 0.00019999999385933635, - "loss": 46.0, - "step": 1469 - }, - { - "epoch": 0.11239176558288892, - "grad_norm": 0.0019854246638715267, - "learning_rate": 0.00019999999385091584, - "loss": 46.0, - "step": 1470 - }, - { - "epoch": 0.11246822256627864, - "grad_norm": 0.0011662030592560768, - "learning_rate": 0.00019999999384248955, - "loss": 46.0, - "step": 1471 - }, - { - "epoch": 0.11254467954966836, - "grad_norm": 0.0038098893128335476, - "learning_rate": 0.00019999999383405747, - "loss": 46.0, - "step": 1472 - }, - { - "epoch": 0.1126211365330581, - "grad_norm": 0.0011925911530852318, - "learning_rate": 0.00019999999382561967, - "loss": 46.0, - "step": 1473 - }, - { - "epoch": 0.11269759351644781, - "grad_norm": 0.0010681368876248598, - "learning_rate": 0.00019999999381717606, - "loss": 46.0, - "step": 1474 - }, - { - "epoch": 0.11277405049983753, - "grad_norm": 0.002948192646726966, - "learning_rate": 0.00019999999380872669, - "loss": 46.0, - "step": 1475 - }, - { - "epoch": 0.11285050748322725, - "grad_norm": 0.001269427128136158, - "learning_rate": 0.00019999999380027153, - "loss": 46.0, - "step": 1476 - }, - { - "epoch": 0.11292696446661697, - "grad_norm": 0.001073826220817864, - "learning_rate": 0.00019999999379181067, - "loss": 46.0, - "step": 1477 - }, - { - "epoch": 0.11300342145000669, - "grad_norm": 0.0010608448646962643, - "learning_rate": 0.00019999999378334397, - "loss": 46.0, - "step": 1478 - }, - { - "epoch": 0.11307987843339641, - "grad_norm": 0.0005864224513061345, - "learning_rate": 0.00019999999377487155, - "loss": 46.0, - "step": 1479 - }, - { - "epoch": 0.11315633541678613, - "grad_norm": 0.0008025203133001924, - "learning_rate": 0.0001999999937663933, - "loss": 46.0, - "step": 1480 - }, - { - "epoch": 0.11323279240017585, - "grad_norm": 0.0012322886614128947, - "learning_rate": 0.00019999999375790934, - "loss": 46.0, - "step": 1481 - }, - { - "epoch": 0.11330924938356557, - "grad_norm": 0.0012553668348118663, - "learning_rate": 0.0001999999937494196, - "loss": 46.0, - "step": 1482 - }, - { - "epoch": 0.11338570636695529, - "grad_norm": 0.0005710609257221222, - "learning_rate": 0.0001999999937409241, - "loss": 46.0, - "step": 1483 - }, - { - "epoch": 0.11346216335034501, - "grad_norm": 0.002128420863300562, - "learning_rate": 0.00019999999373242279, - "loss": 46.0, - "step": 1484 - }, - { - "epoch": 0.11353862033373473, - "grad_norm": 0.001062526716850698, - "learning_rate": 0.0001999999937239157, - "loss": 46.0, - "step": 1485 - }, - { - "epoch": 0.11361507731712445, - "grad_norm": 0.0016776778502389789, - "learning_rate": 0.0001999999937154029, - "loss": 46.0, - "step": 1486 - }, - { - "epoch": 0.11369153430051418, - "grad_norm": 0.0010068764677271247, - "learning_rate": 0.0001999999937068843, - "loss": 46.0, - "step": 1487 - }, - { - "epoch": 0.1137679912839039, - "grad_norm": 0.0009164699586108327, - "learning_rate": 0.00019999999369835992, - "loss": 46.0, - "step": 1488 - }, - { - "epoch": 0.11384444826729362, - "grad_norm": 0.0009315891074948013, - "learning_rate": 0.00019999999368982977, - "loss": 46.0, - "step": 1489 - }, - { - "epoch": 0.11392090525068334, - "grad_norm": 0.004191101063042879, - "learning_rate": 0.00019999999368129387, - "loss": 46.0, - "step": 1490 - }, - { - "epoch": 0.11399736223407306, - "grad_norm": 0.001186610315926373, - "learning_rate": 0.0001999999936727522, - "loss": 46.0, - "step": 1491 - }, - { - "epoch": 0.11407381921746278, - "grad_norm": 0.0006289630546234548, - "learning_rate": 0.00019999999366420473, - "loss": 46.0, - "step": 1492 - }, - { - "epoch": 0.1141502762008525, - "grad_norm": 0.0005648771184496582, - "learning_rate": 0.00019999999365565154, - "loss": 46.0, - "step": 1493 - }, - { - "epoch": 0.11422673318424222, - "grad_norm": 0.0008261979091912508, - "learning_rate": 0.00019999999364709255, - "loss": 46.0, - "step": 1494 - }, - { - "epoch": 0.11430319016763194, - "grad_norm": 0.0027124767657369375, - "learning_rate": 0.0001999999936385278, - "loss": 46.0, - "step": 1495 - }, - { - "epoch": 0.11437964715102165, - "grad_norm": 0.00077613111352548, - "learning_rate": 0.00019999999362995728, - "loss": 46.0, - "step": 1496 - }, - { - "epoch": 0.11445610413441137, - "grad_norm": 0.0007746916962787509, - "learning_rate": 0.00019999999362138097, - "loss": 46.0, - "step": 1497 - }, - { - "epoch": 0.1145325611178011, - "grad_norm": 0.002079334808513522, - "learning_rate": 0.00019999999361279894, - "loss": 46.0, - "step": 1498 - }, - { - "epoch": 0.11460901810119081, - "grad_norm": 0.00495891971513629, - "learning_rate": 0.00019999999360421108, - "loss": 46.0, - "step": 1499 - }, - { - "epoch": 0.11468547508458053, - "grad_norm": 0.0011691523250192404, - "learning_rate": 0.00019999999359561748, - "loss": 46.0, - "step": 1500 - }, - { - "epoch": 0.11476193206797025, - "grad_norm": 0.0010779015719890594, - "learning_rate": 0.0001999999935870181, - "loss": 46.0, - "step": 1501 - }, - { - "epoch": 0.11483838905135998, - "grad_norm": 0.0004119532823096961, - "learning_rate": 0.00019999999357841298, - "loss": 46.0, - "step": 1502 - }, - { - "epoch": 0.1149148460347497, - "grad_norm": 0.0021948053035885096, - "learning_rate": 0.00019999999356980206, - "loss": 46.0, - "step": 1503 - }, - { - "epoch": 0.11499130301813942, - "grad_norm": 0.0027411372866481543, - "learning_rate": 0.00019999999356118539, - "loss": 46.0, - "step": 1504 - }, - { - "epoch": 0.11506776000152914, - "grad_norm": 0.003387674456462264, - "learning_rate": 0.00019999999355256294, - "loss": 46.0, - "step": 1505 - }, - { - "epoch": 0.11514421698491886, - "grad_norm": 0.0023454974871128798, - "learning_rate": 0.00019999999354393473, - "loss": 46.0, - "step": 1506 - }, - { - "epoch": 0.11522067396830858, - "grad_norm": 0.0013973686145618558, - "learning_rate": 0.0001999999935353007, - "loss": 46.0, - "step": 1507 - }, - { - "epoch": 0.1152971309516983, - "grad_norm": 0.00044357802835293114, - "learning_rate": 0.00019999999352666097, - "loss": 46.0, - "step": 1508 - }, - { - "epoch": 0.11537358793508802, - "grad_norm": 0.0015124761266633868, - "learning_rate": 0.00019999999351801544, - "loss": 46.0, - "step": 1509 - }, - { - "epoch": 0.11545004491847774, - "grad_norm": 0.0008096142555586994, - "learning_rate": 0.00019999999350936415, - "loss": 46.0, - "step": 1510 - }, - { - "epoch": 0.11552650190186746, - "grad_norm": 0.001232494949363172, - "learning_rate": 0.00019999999350070707, - "loss": 46.0, - "step": 1511 - }, - { - "epoch": 0.11560295888525718, - "grad_norm": 0.0010967283742502332, - "learning_rate": 0.00019999999349204424, - "loss": 46.0, - "step": 1512 - }, - { - "epoch": 0.1156794158686469, - "grad_norm": 0.0011971114436164498, - "learning_rate": 0.00019999999348337564, - "loss": 46.0, - "step": 1513 - }, - { - "epoch": 0.11575587285203662, - "grad_norm": 0.0020442737732082605, - "learning_rate": 0.00019999999347470126, - "loss": 46.0, - "step": 1514 - }, - { - "epoch": 0.11583232983542634, - "grad_norm": 0.0030483112204819918, - "learning_rate": 0.0001999999934660211, - "loss": 46.0, - "step": 1515 - }, - { - "epoch": 0.11590878681881607, - "grad_norm": 0.0011137648252770305, - "learning_rate": 0.0001999999934573352, - "loss": 46.0, - "step": 1516 - }, - { - "epoch": 0.11598524380220579, - "grad_norm": 0.0008476824150420725, - "learning_rate": 0.00019999999344864352, - "loss": 46.0, - "step": 1517 - }, - { - "epoch": 0.11606170078559551, - "grad_norm": 0.000687445281073451, - "learning_rate": 0.00019999999343994608, - "loss": 46.0, - "step": 1518 - }, - { - "epoch": 0.11613815776898523, - "grad_norm": 0.00230810372158885, - "learning_rate": 0.00019999999343124286, - "loss": 46.0, - "step": 1519 - }, - { - "epoch": 0.11621461475237495, - "grad_norm": 0.0007136607891879976, - "learning_rate": 0.00019999999342253387, - "loss": 46.0, - "step": 1520 - }, - { - "epoch": 0.11629107173576467, - "grad_norm": 0.0013810483505949378, - "learning_rate": 0.0001999999934138191, - "loss": 46.0, - "step": 1521 - }, - { - "epoch": 0.11636752871915439, - "grad_norm": 0.0020662571769207716, - "learning_rate": 0.00019999999340509857, - "loss": 46.0, - "step": 1522 - }, - { - "epoch": 0.1164439857025441, - "grad_norm": 0.0006200867937877774, - "learning_rate": 0.00019999999339637226, - "loss": 46.0, - "step": 1523 - }, - { - "epoch": 0.11652044268593383, - "grad_norm": 0.001146310125477612, - "learning_rate": 0.0001999999933876402, - "loss": 46.0, - "step": 1524 - }, - { - "epoch": 0.11659689966932354, - "grad_norm": 0.001682683709077537, - "learning_rate": 0.00019999999337890235, - "loss": 46.0, - "step": 1525 - }, - { - "epoch": 0.11667335665271326, - "grad_norm": 0.0017838873900473118, - "learning_rate": 0.00019999999337015875, - "loss": 46.0, - "step": 1526 - }, - { - "epoch": 0.11674981363610298, - "grad_norm": 0.0004894313751719892, - "learning_rate": 0.00019999999336140937, - "loss": 46.0, - "step": 1527 - }, - { - "epoch": 0.1168262706194927, - "grad_norm": 0.002732678782194853, - "learning_rate": 0.00019999999335265422, - "loss": 46.0, - "step": 1528 - }, - { - "epoch": 0.11690272760288242, - "grad_norm": 0.0005846915300935507, - "learning_rate": 0.00019999999334389333, - "loss": 46.0, - "step": 1529 - }, - { - "epoch": 0.11697918458627214, - "grad_norm": 0.0008995202369987965, - "learning_rate": 0.00019999999333512666, - "loss": 46.0, - "step": 1530 - }, - { - "epoch": 0.11705564156966188, - "grad_norm": 0.0016604404663667083, - "learning_rate": 0.0001999999933263542, - "loss": 46.0, - "step": 1531 - }, - { - "epoch": 0.1171320985530516, - "grad_norm": 0.001645913696847856, - "learning_rate": 0.00019999999331757595, - "loss": 46.0, - "step": 1532 - }, - { - "epoch": 0.11720855553644131, - "grad_norm": 0.006875058636069298, - "learning_rate": 0.00019999999330879198, - "loss": 46.0, - "step": 1533 - }, - { - "epoch": 0.11728501251983103, - "grad_norm": 0.0011267911177128553, - "learning_rate": 0.00019999999330000222, - "loss": 46.0, - "step": 1534 - }, - { - "epoch": 0.11736146950322075, - "grad_norm": 0.0006858114502392709, - "learning_rate": 0.00019999999329120666, - "loss": 46.0, - "step": 1535 - }, - { - "epoch": 0.11743792648661047, - "grad_norm": 0.0014258320443332195, - "learning_rate": 0.00019999999328240535, - "loss": 46.0, - "step": 1536 - }, - { - "epoch": 0.11751438347000019, - "grad_norm": 0.0008733182330615819, - "learning_rate": 0.00019999999327359832, - "loss": 46.0, - "step": 1537 - }, - { - "epoch": 0.11759084045338991, - "grad_norm": 0.0006535431020893157, - "learning_rate": 0.00019999999326478544, - "loss": 46.0, - "step": 1538 - }, - { - "epoch": 0.11766729743677963, - "grad_norm": 0.0006404178566299379, - "learning_rate": 0.00019999999325596686, - "loss": 46.0, - "step": 1539 - }, - { - "epoch": 0.11774375442016935, - "grad_norm": 0.0011238687438890338, - "learning_rate": 0.00019999999324714246, - "loss": 46.0, - "step": 1540 - }, - { - "epoch": 0.11782021140355907, - "grad_norm": 0.0014135445235297084, - "learning_rate": 0.0001999999932383123, - "loss": 46.0, - "step": 1541 - }, - { - "epoch": 0.11789666838694879, - "grad_norm": 0.0005383029347285628, - "learning_rate": 0.00019999999322947641, - "loss": 46.0, - "step": 1542 - }, - { - "epoch": 0.11797312537033851, - "grad_norm": 0.0007065276149660349, - "learning_rate": 0.00019999999322063472, - "loss": 46.0, - "step": 1543 - }, - { - "epoch": 0.11804958235372823, - "grad_norm": 0.0010419221362099051, - "learning_rate": 0.00019999999321178725, - "loss": 46.0, - "step": 1544 - }, - { - "epoch": 0.11812603933711796, - "grad_norm": 0.0015286566922441125, - "learning_rate": 0.00019999999320293403, - "loss": 46.0, - "step": 1545 - }, - { - "epoch": 0.11820249632050768, - "grad_norm": 0.0021061627194285393, - "learning_rate": 0.00019999999319407502, - "loss": 46.0, - "step": 1546 - }, - { - "epoch": 0.1182789533038974, - "grad_norm": 0.005479545332491398, - "learning_rate": 0.00019999999318521026, - "loss": 46.0, - "step": 1547 - }, - { - "epoch": 0.11835541028728712, - "grad_norm": 0.0011062328703701496, - "learning_rate": 0.0001999999931763397, - "loss": 46.0, - "step": 1548 - }, - { - "epoch": 0.11843186727067684, - "grad_norm": 0.0010945976246148348, - "learning_rate": 0.00019999999316746344, - "loss": 46.0, - "step": 1549 - }, - { - "epoch": 0.11850832425406656, - "grad_norm": 0.0019600046798586845, - "learning_rate": 0.00019999999315858133, - "loss": 46.0, - "step": 1550 - }, - { - "epoch": 0.11858478123745628, - "grad_norm": 0.0015371062327176332, - "learning_rate": 0.0001999999931496935, - "loss": 46.0, - "step": 1551 - }, - { - "epoch": 0.118661238220846, - "grad_norm": 0.0026392927393317223, - "learning_rate": 0.00019999999314079987, - "loss": 46.0, - "step": 1552 - }, - { - "epoch": 0.11873769520423572, - "grad_norm": 0.0008766052778810263, - "learning_rate": 0.0001999999931319005, - "loss": 46.0, - "step": 1553 - }, - { - "epoch": 0.11881415218762544, - "grad_norm": 0.0020937644876539707, - "learning_rate": 0.00019999999312299535, - "loss": 46.0, - "step": 1554 - }, - { - "epoch": 0.11889060917101515, - "grad_norm": 0.0025007661897689104, - "learning_rate": 0.00019999999311408443, - "loss": 46.0, - "step": 1555 - }, - { - "epoch": 0.11896706615440487, - "grad_norm": 0.0004965580883435905, - "learning_rate": 0.00019999999310516776, - "loss": 46.0, - "step": 1556 - }, - { - "epoch": 0.1190435231377946, - "grad_norm": 0.001314135268330574, - "learning_rate": 0.0001999999930962453, - "loss": 46.0, - "step": 1557 - }, - { - "epoch": 0.11911998012118431, - "grad_norm": 0.0008292917627841234, - "learning_rate": 0.00019999999308731705, - "loss": 46.0, - "step": 1558 - }, - { - "epoch": 0.11919643710457403, - "grad_norm": 0.0013552983291447163, - "learning_rate": 0.00019999999307838303, - "loss": 46.0, - "step": 1559 - }, - { - "epoch": 0.11927289408796377, - "grad_norm": 0.0005517539102584124, - "learning_rate": 0.00019999999306944327, - "loss": 46.0, - "step": 1560 - }, - { - "epoch": 0.11934935107135348, - "grad_norm": 0.0017526630545035005, - "learning_rate": 0.00019999999306049776, - "loss": 46.0, - "step": 1561 - }, - { - "epoch": 0.1194258080547432, - "grad_norm": 0.0008628126233816147, - "learning_rate": 0.00019999999305154642, - "loss": 46.0, - "step": 1562 - }, - { - "epoch": 0.11950226503813292, - "grad_norm": 0.0014163063606247306, - "learning_rate": 0.00019999999304258937, - "loss": 46.0, - "step": 1563 - }, - { - "epoch": 0.11957872202152264, - "grad_norm": 0.001431107404641807, - "learning_rate": 0.00019999999303362651, - "loss": 46.0, - "step": 1564 - }, - { - "epoch": 0.11965517900491236, - "grad_norm": 0.004410839639604092, - "learning_rate": 0.00019999999302465788, - "loss": 46.0, - "step": 1565 - }, - { - "epoch": 0.11973163598830208, - "grad_norm": 0.0005859207012690604, - "learning_rate": 0.0001999999930156835, - "loss": 46.0, - "step": 1566 - }, - { - "epoch": 0.1198080929716918, - "grad_norm": 0.0008173504029400647, - "learning_rate": 0.00019999999300670336, - "loss": 46.0, - "step": 1567 - }, - { - "epoch": 0.11988454995508152, - "grad_norm": 0.003606301499530673, - "learning_rate": 0.00019999999299771744, - "loss": 46.0, - "step": 1568 - }, - { - "epoch": 0.11996100693847124, - "grad_norm": 0.0006822749273851514, - "learning_rate": 0.00019999999298872574, - "loss": 46.0, - "step": 1569 - }, - { - "epoch": 0.12003746392186096, - "grad_norm": 0.0013308299239724874, - "learning_rate": 0.00019999999297972828, - "loss": 46.0, - "step": 1570 - }, - { - "epoch": 0.12011392090525068, - "grad_norm": 0.0026975388173013926, - "learning_rate": 0.00019999999297072503, - "loss": 46.0, - "step": 1571 - }, - { - "epoch": 0.1201903778886404, - "grad_norm": 0.0006454059621319175, - "learning_rate": 0.00019999999296171602, - "loss": 46.0, - "step": 1572 - }, - { - "epoch": 0.12026683487203012, - "grad_norm": 0.0020386148244142532, - "learning_rate": 0.00019999999295270126, - "loss": 46.0, - "step": 1573 - }, - { - "epoch": 0.12034329185541985, - "grad_norm": 0.0009303594706580043, - "learning_rate": 0.0001999999929436807, - "loss": 46.0, - "step": 1574 - }, - { - "epoch": 0.12041974883880957, - "grad_norm": 0.001186365494504571, - "learning_rate": 0.00019999999293465441, - "loss": 46.0, - "step": 1575 - }, - { - "epoch": 0.12049620582219929, - "grad_norm": 0.0010818771552294493, - "learning_rate": 0.00019999999292562233, - "loss": 46.0, - "step": 1576 - }, - { - "epoch": 0.12057266280558901, - "grad_norm": 0.001366869080811739, - "learning_rate": 0.00019999999291658448, - "loss": 46.0, - "step": 1577 - }, - { - "epoch": 0.12064911978897873, - "grad_norm": 0.0028633789625018835, - "learning_rate": 0.00019999999290754085, - "loss": 46.0, - "step": 1578 - }, - { - "epoch": 0.12072557677236845, - "grad_norm": 0.0005275193834677339, - "learning_rate": 0.00019999999289849147, - "loss": 46.0, - "step": 1579 - }, - { - "epoch": 0.12080203375575817, - "grad_norm": 0.0007849180838093162, - "learning_rate": 0.00019999999288943633, - "loss": 46.0, - "step": 1580 - }, - { - "epoch": 0.12087849073914789, - "grad_norm": 0.0007656039670109749, - "learning_rate": 0.00019999999288037538, - "loss": 46.0, - "step": 1581 - }, - { - "epoch": 0.1209549477225376, - "grad_norm": 0.0012410631170496345, - "learning_rate": 0.00019999999287130868, - "loss": 46.0, - "step": 1582 - }, - { - "epoch": 0.12103140470592733, - "grad_norm": 0.002462558913975954, - "learning_rate": 0.00019999999286223621, - "loss": 46.0, - "step": 1583 - }, - { - "epoch": 0.12110786168931705, - "grad_norm": 0.0009302882244810462, - "learning_rate": 0.00019999999285315797, - "loss": 46.0, - "step": 1584 - }, - { - "epoch": 0.12118431867270676, - "grad_norm": 0.0015897693810984492, - "learning_rate": 0.00019999999284407398, - "loss": 46.0, - "step": 1585 - }, - { - "epoch": 0.12126077565609648, - "grad_norm": 0.001534050446934998, - "learning_rate": 0.0001999999928349842, - "loss": 46.0, - "step": 1586 - }, - { - "epoch": 0.1213372326394862, - "grad_norm": 0.0007494865567423403, - "learning_rate": 0.00019999999282588866, - "loss": 46.0, - "step": 1587 - }, - { - "epoch": 0.12141368962287592, - "grad_norm": 0.0010759225115180016, - "learning_rate": 0.00019999999281678735, - "loss": 46.0, - "step": 1588 - }, - { - "epoch": 0.12149014660626566, - "grad_norm": 0.0006454409449361265, - "learning_rate": 0.00019999999280768024, - "loss": 46.0, - "step": 1589 - }, - { - "epoch": 0.12156660358965538, - "grad_norm": 0.0008099248516373336, - "learning_rate": 0.00019999999279856742, - "loss": 46.0, - "step": 1590 - }, - { - "epoch": 0.1216430605730451, - "grad_norm": 0.0014339295448735356, - "learning_rate": 0.0001999999927894488, - "loss": 46.0, - "step": 1591 - }, - { - "epoch": 0.12171951755643481, - "grad_norm": 0.0006267589633353055, - "learning_rate": 0.0001999999927803244, - "loss": 46.0, - "step": 1592 - }, - { - "epoch": 0.12179597453982453, - "grad_norm": 0.000685067439917475, - "learning_rate": 0.00019999999277119424, - "loss": 46.0, - "step": 1593 - }, - { - "epoch": 0.12187243152321425, - "grad_norm": 0.001076327171176672, - "learning_rate": 0.0001999999927620583, - "loss": 46.0, - "step": 1594 - }, - { - "epoch": 0.12194888850660397, - "grad_norm": 0.00198566191829741, - "learning_rate": 0.0001999999927529166, - "loss": 46.0, - "step": 1595 - }, - { - "epoch": 0.12202534548999369, - "grad_norm": 0.00043639555224217474, - "learning_rate": 0.00019999999274376916, - "loss": 46.0, - "step": 1596 - }, - { - "epoch": 0.12210180247338341, - "grad_norm": 0.0013530422002077103, - "learning_rate": 0.0001999999927346159, - "loss": 46.0, - "step": 1597 - }, - { - "epoch": 0.12217825945677313, - "grad_norm": 0.0025155062321573496, - "learning_rate": 0.00019999999272545687, - "loss": 46.0, - "step": 1598 - }, - { - "epoch": 0.12225471644016285, - "grad_norm": 0.0008100735140033066, - "learning_rate": 0.0001999999927162921, - "loss": 46.0, - "step": 1599 - }, - { - "epoch": 0.12233117342355257, - "grad_norm": 0.0015133937122300267, - "learning_rate": 0.00019999999270712158, - "loss": 46.0, - "step": 1600 - }, - { - "epoch": 0.12240763040694229, - "grad_norm": 0.0012205112725496292, - "learning_rate": 0.00019999999269794524, - "loss": 46.0, - "step": 1601 - }, - { - "epoch": 0.12248408739033201, - "grad_norm": 0.0005722329951822758, - "learning_rate": 0.00019999999268876316, - "loss": 46.0, - "step": 1602 - }, - { - "epoch": 0.12256054437372174, - "grad_norm": 0.0007153971237130463, - "learning_rate": 0.0001999999926795753, - "loss": 46.0, - "step": 1603 - }, - { - "epoch": 0.12263700135711146, - "grad_norm": 0.0010634405771270394, - "learning_rate": 0.00019999999267038168, - "loss": 46.0, - "step": 1604 - }, - { - "epoch": 0.12271345834050118, - "grad_norm": 0.0007890252163633704, - "learning_rate": 0.00019999999266118228, - "loss": 46.0, - "step": 1605 - }, - { - "epoch": 0.1227899153238909, - "grad_norm": 0.0008101451676338911, - "learning_rate": 0.0001999999926519771, - "loss": 46.0, - "step": 1606 - }, - { - "epoch": 0.12286637230728062, - "grad_norm": 0.0022999884095042944, - "learning_rate": 0.00019999999264276618, - "loss": 46.0, - "step": 1607 - }, - { - "epoch": 0.12294282929067034, - "grad_norm": 0.001534348470158875, - "learning_rate": 0.0001999999926335495, - "loss": 46.0, - "step": 1608 - }, - { - "epoch": 0.12301928627406006, - "grad_norm": 0.0007720842259004712, - "learning_rate": 0.000199999992624327, - "loss": 46.0, - "step": 1609 - }, - { - "epoch": 0.12309574325744978, - "grad_norm": 0.0006925991037860513, - "learning_rate": 0.00019999999261509878, - "loss": 46.0, - "step": 1610 - }, - { - "epoch": 0.1231722002408395, - "grad_norm": 0.0008491818443872035, - "learning_rate": 0.00019999999260586474, - "loss": 46.0, - "step": 1611 - }, - { - "epoch": 0.12324865722422922, - "grad_norm": 0.0011237066937610507, - "learning_rate": 0.00019999999259662498, - "loss": 46.0, - "step": 1612 - }, - { - "epoch": 0.12332511420761894, - "grad_norm": 0.0008256579167209566, - "learning_rate": 0.00019999999258737942, - "loss": 46.0, - "step": 1613 - }, - { - "epoch": 0.12340157119100865, - "grad_norm": 0.00036529547651298344, - "learning_rate": 0.0001999999925781281, - "loss": 46.0, - "step": 1614 - }, - { - "epoch": 0.12347802817439837, - "grad_norm": 0.0004993222537450492, - "learning_rate": 0.000199999992568871, - "loss": 46.0, - "step": 1615 - }, - { - "epoch": 0.1235544851577881, - "grad_norm": 0.00123459508176893, - "learning_rate": 0.00019999999255960815, - "loss": 46.0, - "step": 1616 - }, - { - "epoch": 0.12363094214117781, - "grad_norm": 0.0006311935721896589, - "learning_rate": 0.00019999999255033952, - "loss": 46.0, - "step": 1617 - }, - { - "epoch": 0.12370739912456755, - "grad_norm": 0.001889835111796856, - "learning_rate": 0.00019999999254106515, - "loss": 46.0, - "step": 1618 - }, - { - "epoch": 0.12378385610795727, - "grad_norm": 0.0004617491504177451, - "learning_rate": 0.000199999992531785, - "loss": 46.0, - "step": 1619 - }, - { - "epoch": 0.12386031309134699, - "grad_norm": 0.000762148411013186, - "learning_rate": 0.00019999999252249902, - "loss": 46.0, - "step": 1620 - }, - { - "epoch": 0.1239367700747367, - "grad_norm": 0.0007758060819469392, - "learning_rate": 0.0001999999925132073, - "loss": 46.0, - "step": 1621 - }, - { - "epoch": 0.12401322705812642, - "grad_norm": 0.0009397360845468938, - "learning_rate": 0.00019999999250390986, - "loss": 46.0, - "step": 1622 - }, - { - "epoch": 0.12408968404151614, - "grad_norm": 0.001772842719219625, - "learning_rate": 0.00019999999249460662, - "loss": 46.0, - "step": 1623 - }, - { - "epoch": 0.12416614102490586, - "grad_norm": 0.0011918810196220875, - "learning_rate": 0.00019999999248529758, - "loss": 46.0, - "step": 1624 - }, - { - "epoch": 0.12424259800829558, - "grad_norm": 0.0007972493185661733, - "learning_rate": 0.00019999999247598282, - "loss": 46.0, - "step": 1625 - }, - { - "epoch": 0.1243190549916853, - "grad_norm": 0.006198294460773468, - "learning_rate": 0.00019999999246666226, - "loss": 46.0, - "step": 1626 - }, - { - "epoch": 0.12439551197507502, - "grad_norm": 0.0014145338209345937, - "learning_rate": 0.00019999999245733593, - "loss": 46.0, - "step": 1627 - }, - { - "epoch": 0.12447196895846474, - "grad_norm": 0.0006201372598297894, - "learning_rate": 0.00019999999244800384, - "loss": 46.0, - "step": 1628 - }, - { - "epoch": 0.12454842594185446, - "grad_norm": 0.0004219146794639528, - "learning_rate": 0.000199999992438666, - "loss": 46.0, - "step": 1629 - }, - { - "epoch": 0.12462488292524418, - "grad_norm": 0.0005916628870181739, - "learning_rate": 0.00019999999242932234, - "loss": 46.0, - "step": 1630 - }, - { - "epoch": 0.1247013399086339, - "grad_norm": 0.003980670124292374, - "learning_rate": 0.00019999999241997296, - "loss": 46.0, - "step": 1631 - }, - { - "epoch": 0.12477779689202363, - "grad_norm": 0.002719270996749401, - "learning_rate": 0.0001999999924106178, - "loss": 46.0, - "step": 1632 - }, - { - "epoch": 0.12485425387541335, - "grad_norm": 0.0005816468619741499, - "learning_rate": 0.00019999999240125684, - "loss": 46.0, - "step": 1633 - }, - { - "epoch": 0.12493071085880307, - "grad_norm": 0.0010520482901483774, - "learning_rate": 0.00019999999239189012, - "loss": 46.0, - "step": 1634 - }, - { - "epoch": 0.1250071678421928, - "grad_norm": 0.0015013177180662751, - "learning_rate": 0.00019999999238251765, - "loss": 46.0, - "step": 1635 - }, - { - "epoch": 0.1250836248255825, - "grad_norm": 0.0013738133711740375, - "learning_rate": 0.0001999999923731394, - "loss": 46.0, - "step": 1636 - }, - { - "epoch": 0.12516008180897223, - "grad_norm": 0.0010658979881554842, - "learning_rate": 0.0001999999923637554, - "loss": 46.0, - "step": 1637 - }, - { - "epoch": 0.12523653879236193, - "grad_norm": 0.0010128950234502554, - "learning_rate": 0.0001999999923543656, - "loss": 46.0, - "step": 1638 - }, - { - "epoch": 0.12531299577575167, - "grad_norm": 0.0007655302179045975, - "learning_rate": 0.00019999999234497005, - "loss": 46.0, - "step": 1639 - }, - { - "epoch": 0.1253894527591414, - "grad_norm": 0.0012335422215983272, - "learning_rate": 0.00019999999233556872, - "loss": 46.0, - "step": 1640 - }, - { - "epoch": 0.1254659097425311, - "grad_norm": 0.0015967022627592087, - "learning_rate": 0.00019999999232616164, - "loss": 46.0, - "step": 1641 - }, - { - "epoch": 0.12554236672592084, - "grad_norm": 0.0010169546585530043, - "learning_rate": 0.00019999999231674878, - "loss": 46.0, - "step": 1642 - }, - { - "epoch": 0.12561882370931055, - "grad_norm": 0.0006275867926888168, - "learning_rate": 0.00019999999230733013, - "loss": 46.0, - "step": 1643 - }, - { - "epoch": 0.12569528069270028, - "grad_norm": 0.0013624482089653611, - "learning_rate": 0.0001999999922979057, - "loss": 46.0, - "step": 1644 - }, - { - "epoch": 0.12577173767608998, - "grad_norm": 0.00048296854947693646, - "learning_rate": 0.00019999999228847556, - "loss": 46.0, - "step": 1645 - }, - { - "epoch": 0.12584819465947972, - "grad_norm": 0.0007546735578216612, - "learning_rate": 0.0001999999922790396, - "loss": 46.0, - "step": 1646 - }, - { - "epoch": 0.12592465164286942, - "grad_norm": 0.0007814841228537261, - "learning_rate": 0.00019999999226959792, - "loss": 46.0, - "step": 1647 - }, - { - "epoch": 0.12600110862625916, - "grad_norm": 0.003359179710969329, - "learning_rate": 0.00019999999226015042, - "loss": 46.0, - "step": 1648 - }, - { - "epoch": 0.12607756560964886, - "grad_norm": 0.0009251692681573331, - "learning_rate": 0.00019999999225069716, - "loss": 46.0, - "step": 1649 - }, - { - "epoch": 0.1261540225930386, - "grad_norm": 0.0019433617126196623, - "learning_rate": 0.00019999999224123814, - "loss": 46.0, - "step": 1650 - }, - { - "epoch": 0.1262304795764283, - "grad_norm": 0.0014663782203570008, - "learning_rate": 0.00019999999223177338, - "loss": 46.0, - "step": 1651 - }, - { - "epoch": 0.12630693655981803, - "grad_norm": 0.0020708160009235144, - "learning_rate": 0.00019999999222230282, - "loss": 46.0, - "step": 1652 - }, - { - "epoch": 0.12638339354320774, - "grad_norm": 0.007735388353466988, - "learning_rate": 0.0001999999922128265, - "loss": 46.0, - "step": 1653 - }, - { - "epoch": 0.12645985052659747, - "grad_norm": 0.002494325628504157, - "learning_rate": 0.00019999999220334438, - "loss": 46.0, - "step": 1654 - }, - { - "epoch": 0.1265363075099872, - "grad_norm": 0.0014957452658563852, - "learning_rate": 0.0001999999921938565, - "loss": 46.0, - "step": 1655 - }, - { - "epoch": 0.1266127644933769, - "grad_norm": 0.0009274912299588323, - "learning_rate": 0.00019999999218436287, - "loss": 46.0, - "step": 1656 - }, - { - "epoch": 0.12668922147676664, - "grad_norm": 0.0009796343510970473, - "learning_rate": 0.00019999999217486347, - "loss": 46.0, - "step": 1657 - }, - { - "epoch": 0.12676567846015635, - "grad_norm": 0.001824269420467317, - "learning_rate": 0.0001999999921653583, - "loss": 46.0, - "step": 1658 - }, - { - "epoch": 0.12684213544354608, - "grad_norm": 0.0014029761077836156, - "learning_rate": 0.00019999999215584735, - "loss": 46.0, - "step": 1659 - }, - { - "epoch": 0.1269185924269358, - "grad_norm": 0.0010570652084425092, - "learning_rate": 0.00019999999214633063, - "loss": 46.0, - "step": 1660 - }, - { - "epoch": 0.12699504941032552, - "grad_norm": 0.003979287575930357, - "learning_rate": 0.00019999999213680814, - "loss": 46.0, - "step": 1661 - }, - { - "epoch": 0.12707150639371523, - "grad_norm": 0.0005274254945106804, - "learning_rate": 0.0001999999921272799, - "loss": 46.0, - "step": 1662 - }, - { - "epoch": 0.12714796337710496, - "grad_norm": 0.0013867878587916493, - "learning_rate": 0.00019999999211774586, - "loss": 46.0, - "step": 1663 - }, - { - "epoch": 0.12722442036049467, - "grad_norm": 0.001509237103164196, - "learning_rate": 0.00019999999210820607, - "loss": 46.0, - "step": 1664 - }, - { - "epoch": 0.1273008773438844, - "grad_norm": 0.0016890116967260838, - "learning_rate": 0.00019999999209866051, - "loss": 46.0, - "step": 1665 - }, - { - "epoch": 0.1273773343272741, - "grad_norm": 0.0016593248583376408, - "learning_rate": 0.00019999999208910918, - "loss": 46.0, - "step": 1666 - }, - { - "epoch": 0.12745379131066384, - "grad_norm": 0.0013300592545419931, - "learning_rate": 0.0001999999920795521, - "loss": 46.0, - "step": 1667 - }, - { - "epoch": 0.12753024829405357, - "grad_norm": 0.00036369453300721943, - "learning_rate": 0.00019999999206998922, - "loss": 46.0, - "step": 1668 - }, - { - "epoch": 0.12760670527744328, - "grad_norm": 0.0005816552438773215, - "learning_rate": 0.00019999999206042057, - "loss": 46.0, - "step": 1669 - }, - { - "epoch": 0.127683162260833, - "grad_norm": 0.0015860212733969092, - "learning_rate": 0.00019999999205084617, - "loss": 46.0, - "step": 1670 - }, - { - "epoch": 0.12775961924422272, - "grad_norm": 0.0007984215044416487, - "learning_rate": 0.00019999999204126597, - "loss": 46.0, - "step": 1671 - }, - { - "epoch": 0.12783607622761245, - "grad_norm": 0.0011627302737906575, - "learning_rate": 0.00019999999203168005, - "loss": 46.0, - "step": 1672 - }, - { - "epoch": 0.12791253321100216, - "grad_norm": 0.0016763482708483934, - "learning_rate": 0.00019999999202208833, - "loss": 46.0, - "step": 1673 - }, - { - "epoch": 0.1279889901943919, - "grad_norm": 0.008085369132459164, - "learning_rate": 0.0001999999920124908, - "loss": 46.0, - "step": 1674 - }, - { - "epoch": 0.1280654471777816, - "grad_norm": 0.0028306087478995323, - "learning_rate": 0.00019999999200288757, - "loss": 46.0, - "step": 1675 - }, - { - "epoch": 0.12814190416117133, - "grad_norm": 0.0013863978674635291, - "learning_rate": 0.00019999999199327856, - "loss": 46.0, - "step": 1676 - }, - { - "epoch": 0.12821836114456103, - "grad_norm": 0.000501670059747994, - "learning_rate": 0.00019999999198366374, - "loss": 46.0, - "step": 1677 - }, - { - "epoch": 0.12829481812795077, - "grad_norm": 0.0011134223314002156, - "learning_rate": 0.0001999999919740432, - "loss": 46.0, - "step": 1678 - }, - { - "epoch": 0.12837127511134047, - "grad_norm": 0.000379667297238484, - "learning_rate": 0.00019999999196441688, - "loss": 46.0, - "step": 1679 - }, - { - "epoch": 0.1284477320947302, - "grad_norm": 0.0010382885811850429, - "learning_rate": 0.00019999999195478477, - "loss": 46.0, - "step": 1680 - }, - { - "epoch": 0.1285241890781199, - "grad_norm": 0.0009940988384187222, - "learning_rate": 0.0001999999919451469, - "loss": 46.0, - "step": 1681 - }, - { - "epoch": 0.12860064606150964, - "grad_norm": 0.001528052263893187, - "learning_rate": 0.00019999999193550324, - "loss": 46.0, - "step": 1682 - }, - { - "epoch": 0.12867710304489938, - "grad_norm": 0.0008618487627245486, - "learning_rate": 0.00019999999192585384, - "loss": 46.0, - "step": 1683 - }, - { - "epoch": 0.12875356002828908, - "grad_norm": 0.0008057771483436227, - "learning_rate": 0.00019999999191619864, - "loss": 46.0, - "step": 1684 - }, - { - "epoch": 0.12883001701167882, - "grad_norm": 0.0008462500409223139, - "learning_rate": 0.00019999999190653772, - "loss": 46.0, - "step": 1685 - }, - { - "epoch": 0.12890647399506852, - "grad_norm": 0.0009672439191490412, - "learning_rate": 0.00019999999189687097, - "loss": 46.0, - "step": 1686 - }, - { - "epoch": 0.12898293097845825, - "grad_norm": 0.0020651649683713913, - "learning_rate": 0.00019999999188719848, - "loss": 46.0, - "step": 1687 - }, - { - "epoch": 0.12905938796184796, - "grad_norm": 0.0013438333990052342, - "learning_rate": 0.00019999999187752024, - "loss": 46.0, - "step": 1688 - }, - { - "epoch": 0.1291358449452377, - "grad_norm": 0.0018028083723038435, - "learning_rate": 0.0001999999918678362, - "loss": 46.0, - "step": 1689 - }, - { - "epoch": 0.1292123019286274, - "grad_norm": 0.0005871797911822796, - "learning_rate": 0.00019999999185814642, - "loss": 46.0, - "step": 1690 - }, - { - "epoch": 0.12928875891201713, - "grad_norm": 0.0031369503121823072, - "learning_rate": 0.00019999999184845083, - "loss": 46.0, - "step": 1691 - }, - { - "epoch": 0.12936521589540684, - "grad_norm": 0.0004994928021915257, - "learning_rate": 0.0001999999918387495, - "loss": 46.0, - "step": 1692 - }, - { - "epoch": 0.12944167287879657, - "grad_norm": 0.0007263327715918422, - "learning_rate": 0.00019999999182904242, - "loss": 46.0, - "step": 1693 - }, - { - "epoch": 0.12951812986218628, - "grad_norm": 0.0016645743744447827, - "learning_rate": 0.0001999999918193295, - "loss": 46.0, - "step": 1694 - }, - { - "epoch": 0.129594586845576, - "grad_norm": 0.0006018921849317849, - "learning_rate": 0.0001999999918096109, - "loss": 46.0, - "step": 1695 - }, - { - "epoch": 0.12967104382896572, - "grad_norm": 0.001130896620452404, - "learning_rate": 0.00019999999179988643, - "loss": 46.0, - "step": 1696 - }, - { - "epoch": 0.12974750081235545, - "grad_norm": 0.0007095420151017606, - "learning_rate": 0.00019999999179015626, - "loss": 46.0, - "step": 1697 - }, - { - "epoch": 0.12982395779574518, - "grad_norm": 0.0006771529442630708, - "learning_rate": 0.00019999999178042032, - "loss": 46.0, - "step": 1698 - }, - { - "epoch": 0.1299004147791349, - "grad_norm": 0.0011925175786018372, - "learning_rate": 0.0001999999917706786, - "loss": 46.0, - "step": 1699 - }, - { - "epoch": 0.12997687176252462, - "grad_norm": 0.0011560809798538685, - "learning_rate": 0.0001999999917609311, - "loss": 46.0, - "step": 1700 - }, - { - "epoch": 0.13005332874591433, - "grad_norm": 0.0009192679426632822, - "learning_rate": 0.00019999999175117784, - "loss": 46.0, - "step": 1701 - }, - { - "epoch": 0.13012978572930406, - "grad_norm": 0.001408636337146163, - "learning_rate": 0.00019999999174141883, - "loss": 46.0, - "step": 1702 - }, - { - "epoch": 0.13020624271269376, - "grad_norm": 0.0012174068251624703, - "learning_rate": 0.00019999999173165401, - "loss": 46.0, - "step": 1703 - }, - { - "epoch": 0.1302826996960835, - "grad_norm": 0.0016217316733673215, - "learning_rate": 0.00019999999172188343, - "loss": 46.0, - "step": 1704 - }, - { - "epoch": 0.1303591566794732, - "grad_norm": 0.0007246576133184135, - "learning_rate": 0.0001999999917121071, - "loss": 46.0, - "step": 1705 - }, - { - "epoch": 0.13043561366286294, - "grad_norm": 0.0011429123114794493, - "learning_rate": 0.000199999991702325, - "loss": 46.0, - "step": 1706 - }, - { - "epoch": 0.13051207064625264, - "grad_norm": 0.0015667779371142387, - "learning_rate": 0.0001999999916925371, - "loss": 46.0, - "step": 1707 - }, - { - "epoch": 0.13058852762964238, - "grad_norm": 0.0011551033239811659, - "learning_rate": 0.00019999999168274346, - "loss": 46.0, - "step": 1708 - }, - { - "epoch": 0.13066498461303208, - "grad_norm": 0.0006915386184118688, - "learning_rate": 0.00019999999167294403, - "loss": 46.0, - "step": 1709 - }, - { - "epoch": 0.13074144159642181, - "grad_norm": 0.0023293134290724993, - "learning_rate": 0.00019999999166313886, - "loss": 46.0, - "step": 1710 - }, - { - "epoch": 0.13081789857981152, - "grad_norm": 0.0008786504622548819, - "learning_rate": 0.0001999999916533279, - "loss": 46.0, - "step": 1711 - }, - { - "epoch": 0.13089435556320125, - "grad_norm": 0.0006594064179807901, - "learning_rate": 0.00019999999164351117, - "loss": 46.0, - "step": 1712 - }, - { - "epoch": 0.130970812546591, - "grad_norm": 0.0014750296249985695, - "learning_rate": 0.00019999999163368868, - "loss": 46.0, - "step": 1713 - }, - { - "epoch": 0.1310472695299807, - "grad_norm": 0.0009271716699004173, - "learning_rate": 0.0001999999916238604, - "loss": 46.0, - "step": 1714 - }, - { - "epoch": 0.13112372651337043, - "grad_norm": 0.0009438525885343552, - "learning_rate": 0.00019999999161402637, - "loss": 46.0, - "step": 1715 - }, - { - "epoch": 0.13120018349676013, - "grad_norm": 0.0027726551052182913, - "learning_rate": 0.0001999999916041866, - "loss": 46.0, - "step": 1716 - }, - { - "epoch": 0.13127664048014986, - "grad_norm": 0.0009162983624264598, - "learning_rate": 0.000199999991594341, - "loss": 46.0, - "step": 1717 - }, - { - "epoch": 0.13135309746353957, - "grad_norm": 0.002412374597042799, - "learning_rate": 0.00019999999158448967, - "loss": 46.0, - "step": 1718 - }, - { - "epoch": 0.1314295544469293, - "grad_norm": 0.0010640060063451529, - "learning_rate": 0.00019999999157463254, - "loss": 46.0, - "step": 1719 - }, - { - "epoch": 0.131506011430319, - "grad_norm": 0.0011001689126715064, - "learning_rate": 0.00019999999156476969, - "loss": 46.0, - "step": 1720 - }, - { - "epoch": 0.13158246841370874, - "grad_norm": 0.0011513471836224198, - "learning_rate": 0.00019999999155490103, - "loss": 46.0, - "step": 1721 - }, - { - "epoch": 0.13165892539709845, - "grad_norm": 0.00048143602907657623, - "learning_rate": 0.0001999999915450266, - "loss": 46.0, - "step": 1722 - }, - { - "epoch": 0.13173538238048818, - "grad_norm": 0.0010478375479578972, - "learning_rate": 0.00019999999153514638, - "loss": 46.0, - "step": 1723 - }, - { - "epoch": 0.1318118393638779, - "grad_norm": 0.001411057193763554, - "learning_rate": 0.00019999999152526044, - "loss": 46.0, - "step": 1724 - }, - { - "epoch": 0.13188829634726762, - "grad_norm": 0.0012279852526262403, - "learning_rate": 0.00019999999151536872, - "loss": 46.0, - "step": 1725 - }, - { - "epoch": 0.13196475333065733, - "grad_norm": 0.0014598125126212835, - "learning_rate": 0.00019999999150547123, - "loss": 46.0, - "step": 1726 - }, - { - "epoch": 0.13204121031404706, - "grad_norm": 0.0014014207990840077, - "learning_rate": 0.00019999999149556796, - "loss": 46.0, - "step": 1727 - }, - { - "epoch": 0.1321176672974368, - "grad_norm": 0.000739993411116302, - "learning_rate": 0.00019999999148565892, - "loss": 46.0, - "step": 1728 - }, - { - "epoch": 0.1321941242808265, - "grad_norm": 0.001423330744728446, - "learning_rate": 0.0001999999914757441, - "loss": 46.0, - "step": 1729 - }, - { - "epoch": 0.13227058126421623, - "grad_norm": 0.0021808992605656385, - "learning_rate": 0.00019999999146582352, - "loss": 46.0, - "step": 1730 - }, - { - "epoch": 0.13234703824760594, - "grad_norm": 0.000990840489976108, - "learning_rate": 0.0001999999914558972, - "loss": 46.0, - "step": 1731 - }, - { - "epoch": 0.13242349523099567, - "grad_norm": 0.0009367921738885343, - "learning_rate": 0.00019999999144596506, - "loss": 46.0, - "step": 1732 - }, - { - "epoch": 0.13249995221438537, - "grad_norm": 0.001360927359201014, - "learning_rate": 0.00019999999143602718, - "loss": 46.0, - "step": 1733 - }, - { - "epoch": 0.1325764091977751, - "grad_norm": 0.0020556238014250994, - "learning_rate": 0.00019999999142608353, - "loss": 46.0, - "step": 1734 - }, - { - "epoch": 0.1326528661811648, - "grad_norm": 0.0017063411651179194, - "learning_rate": 0.00019999999141613413, - "loss": 46.0, - "step": 1735 - }, - { - "epoch": 0.13272932316455455, - "grad_norm": 0.0010955114848911762, - "learning_rate": 0.00019999999140617893, - "loss": 46.0, - "step": 1736 - }, - { - "epoch": 0.13280578014794425, - "grad_norm": 0.0020986285526305437, - "learning_rate": 0.00019999999139621796, - "loss": 46.0, - "step": 1737 - }, - { - "epoch": 0.13288223713133399, - "grad_norm": 0.0014130845665931702, - "learning_rate": 0.00019999999138625122, - "loss": 46.0, - "step": 1738 - }, - { - "epoch": 0.1329586941147237, - "grad_norm": 0.0009628068073652685, - "learning_rate": 0.00019999999137627872, - "loss": 46.0, - "step": 1739 - }, - { - "epoch": 0.13303515109811342, - "grad_norm": 0.0011783394729718566, - "learning_rate": 0.00019999999136630046, - "loss": 46.0, - "step": 1740 - }, - { - "epoch": 0.13311160808150316, - "grad_norm": 0.001116079045459628, - "learning_rate": 0.00019999999135631642, - "loss": 46.0, - "step": 1741 - }, - { - "epoch": 0.13318806506489286, - "grad_norm": 0.0008289802353829145, - "learning_rate": 0.00019999999134632658, - "loss": 46.0, - "step": 1742 - }, - { - "epoch": 0.1332645220482826, - "grad_norm": 0.0019486815435811877, - "learning_rate": 0.00019999999133633102, - "loss": 46.0, - "step": 1743 - }, - { - "epoch": 0.1333409790316723, - "grad_norm": 0.0013661953853443265, - "learning_rate": 0.00019999999132632967, - "loss": 46.0, - "step": 1744 - }, - { - "epoch": 0.13341743601506204, - "grad_norm": 0.0009323824779130518, - "learning_rate": 0.00019999999131632253, - "loss": 46.0, - "step": 1745 - }, - { - "epoch": 0.13349389299845174, - "grad_norm": 0.0030559145379811525, - "learning_rate": 0.00019999999130630966, - "loss": 46.0, - "step": 1746 - }, - { - "epoch": 0.13357034998184147, - "grad_norm": 0.0009545132634229958, - "learning_rate": 0.000199999991296291, - "loss": 46.0, - "step": 1747 - }, - { - "epoch": 0.13364680696523118, - "grad_norm": 0.002411074936389923, - "learning_rate": 0.00019999999128626658, - "loss": 46.0, - "step": 1748 - }, - { - "epoch": 0.1337232639486209, - "grad_norm": 0.0011840452207252383, - "learning_rate": 0.00019999999127623638, - "loss": 46.0, - "step": 1749 - }, - { - "epoch": 0.13379972093201062, - "grad_norm": 0.0012516540009528399, - "learning_rate": 0.0001999999912662004, - "loss": 46.0, - "step": 1750 - }, - { - "epoch": 0.13387617791540035, - "grad_norm": 0.0007451776182278991, - "learning_rate": 0.0001999999912561587, - "loss": 46.0, - "step": 1751 - }, - { - "epoch": 0.13395263489879006, - "grad_norm": 0.0010144745465368032, - "learning_rate": 0.0001999999912461112, - "loss": 46.0, - "step": 1752 - }, - { - "epoch": 0.1340290918821798, - "grad_norm": 0.0008639212464913726, - "learning_rate": 0.0001999999912360579, - "loss": 46.0, - "step": 1753 - }, - { - "epoch": 0.1341055488655695, - "grad_norm": 0.005747777875512838, - "learning_rate": 0.00019999999122599887, - "loss": 46.0, - "step": 1754 - }, - { - "epoch": 0.13418200584895923, - "grad_norm": 0.0015556936850771308, - "learning_rate": 0.00019999999121593403, - "loss": 46.0, - "step": 1755 - }, - { - "epoch": 0.13425846283234896, - "grad_norm": 0.0008981954888440669, - "learning_rate": 0.00019999999120586348, - "loss": 46.0, - "step": 1756 - }, - { - "epoch": 0.13433491981573867, - "grad_norm": 0.0012614093720912933, - "learning_rate": 0.00019999999119578714, - "loss": 46.0, - "step": 1757 - }, - { - "epoch": 0.1344113767991284, - "grad_norm": 0.0021436798851937056, - "learning_rate": 0.000199999991185705, - "loss": 46.0, - "step": 1758 - }, - { - "epoch": 0.1344878337825181, - "grad_norm": 0.0009233438177034259, - "learning_rate": 0.0001999999911756171, - "loss": 46.0, - "step": 1759 - }, - { - "epoch": 0.13456429076590784, - "grad_norm": 0.0005031541222706437, - "learning_rate": 0.00019999999116552343, - "loss": 46.0, - "step": 1760 - }, - { - "epoch": 0.13464074774929755, - "grad_norm": 0.0016876172740012407, - "learning_rate": 0.00019999999115542403, - "loss": 46.0, - "step": 1761 - }, - { - "epoch": 0.13471720473268728, - "grad_norm": 0.0007119384245015681, - "learning_rate": 0.00019999999114531883, - "loss": 46.0, - "step": 1762 - }, - { - "epoch": 0.13479366171607698, - "grad_norm": 0.001981558511033654, - "learning_rate": 0.00019999999113520786, - "loss": 46.0, - "step": 1763 - }, - { - "epoch": 0.13487011869946672, - "grad_norm": 0.00034314021468162537, - "learning_rate": 0.00019999999112509112, - "loss": 46.0, - "step": 1764 - }, - { - "epoch": 0.13494657568285642, - "grad_norm": 0.0012478202115744352, - "learning_rate": 0.0001999999911149686, - "loss": 46.0, - "step": 1765 - }, - { - "epoch": 0.13502303266624616, - "grad_norm": 0.0010478689800947905, - "learning_rate": 0.00019999999110484034, - "loss": 46.0, - "step": 1766 - }, - { - "epoch": 0.13509948964963586, - "grad_norm": 0.0008405048865824938, - "learning_rate": 0.0001999999910947063, - "loss": 46.0, - "step": 1767 - }, - { - "epoch": 0.1351759466330256, - "grad_norm": 0.0012383825378492475, - "learning_rate": 0.00019999999108456646, - "loss": 46.0, - "step": 1768 - }, - { - "epoch": 0.1352524036164153, - "grad_norm": 0.0007070383871905506, - "learning_rate": 0.0001999999910744209, - "loss": 46.0, - "step": 1769 - }, - { - "epoch": 0.13532886059980503, - "grad_norm": 0.0011966886231675744, - "learning_rate": 0.00019999999106426954, - "loss": 46.0, - "step": 1770 - }, - { - "epoch": 0.13540531758319477, - "grad_norm": 0.0036497600376605988, - "learning_rate": 0.0001999999910541124, - "loss": 46.0, - "step": 1771 - }, - { - "epoch": 0.13548177456658447, - "grad_norm": 0.0010715022217482328, - "learning_rate": 0.00019999999104394954, - "loss": 46.0, - "step": 1772 - }, - { - "epoch": 0.1355582315499742, - "grad_norm": 0.0021324383560568094, - "learning_rate": 0.00019999999103378086, - "loss": 46.0, - "step": 1773 - }, - { - "epoch": 0.1356346885333639, - "grad_norm": 0.0011359077179804444, - "learning_rate": 0.00019999999102360643, - "loss": 46.0, - "step": 1774 - }, - { - "epoch": 0.13571114551675365, - "grad_norm": 0.000986409606412053, - "learning_rate": 0.0001999999910134262, - "loss": 46.0, - "step": 1775 - }, - { - "epoch": 0.13578760250014335, - "grad_norm": 0.0005540827987715602, - "learning_rate": 0.00019999999100324024, - "loss": 46.0, - "step": 1776 - }, - { - "epoch": 0.13586405948353308, - "grad_norm": 0.000653906783554703, - "learning_rate": 0.00019999999099304852, - "loss": 46.0, - "step": 1777 - }, - { - "epoch": 0.1359405164669228, - "grad_norm": 0.0011749338591471314, - "learning_rate": 0.000199999990982851, - "loss": 46.0, - "step": 1778 - }, - { - "epoch": 0.13601697345031252, - "grad_norm": 0.0011938236420974135, - "learning_rate": 0.0001999999909726477, - "loss": 46.0, - "step": 1779 - }, - { - "epoch": 0.13609343043370223, - "grad_norm": 0.0019144678954035044, - "learning_rate": 0.0001999999909624387, - "loss": 46.0, - "step": 1780 - }, - { - "epoch": 0.13616988741709196, - "grad_norm": 0.0009658513008616865, - "learning_rate": 0.00019999999095222384, - "loss": 46.0, - "step": 1781 - }, - { - "epoch": 0.13624634440048167, - "grad_norm": 0.0012276963097974658, - "learning_rate": 0.00019999999094200325, - "loss": 46.0, - "step": 1782 - }, - { - "epoch": 0.1363228013838714, - "grad_norm": 0.0007017145399004221, - "learning_rate": 0.0001999999909317769, - "loss": 46.0, - "step": 1783 - }, - { - "epoch": 0.1363992583672611, - "grad_norm": 0.002202402101829648, - "learning_rate": 0.0001999999909215448, - "loss": 46.0, - "step": 1784 - }, - { - "epoch": 0.13647571535065084, - "grad_norm": 0.003947373945266008, - "learning_rate": 0.0001999999909113069, - "loss": 46.0, - "step": 1785 - }, - { - "epoch": 0.13655217233404057, - "grad_norm": 0.0015952371759340167, - "learning_rate": 0.0001999999909010632, - "loss": 46.0, - "step": 1786 - }, - { - "epoch": 0.13662862931743028, - "grad_norm": 0.001507932785898447, - "learning_rate": 0.00019999999089081379, - "loss": 46.0, - "step": 1787 - }, - { - "epoch": 0.13670508630082, - "grad_norm": 0.0004487130790948868, - "learning_rate": 0.0001999999908805586, - "loss": 46.0, - "step": 1788 - }, - { - "epoch": 0.13678154328420972, - "grad_norm": 0.0010462729260325432, - "learning_rate": 0.0001999999908702976, - "loss": 46.0, - "step": 1789 - }, - { - "epoch": 0.13685800026759945, - "grad_norm": 0.0009094856213778257, - "learning_rate": 0.00019999999086003087, - "loss": 46.0, - "step": 1790 - }, - { - "epoch": 0.13693445725098916, - "grad_norm": 0.0011709691025316715, - "learning_rate": 0.00019999999084975833, - "loss": 46.0, - "step": 1791 - }, - { - "epoch": 0.1370109142343789, - "grad_norm": 0.002708164742216468, - "learning_rate": 0.00019999999083948006, - "loss": 46.0, - "step": 1792 - }, - { - "epoch": 0.1370873712177686, - "grad_norm": 0.001033182255923748, - "learning_rate": 0.00019999999082919603, - "loss": 46.0, - "step": 1793 - }, - { - "epoch": 0.13716382820115833, - "grad_norm": 0.0006695556221529841, - "learning_rate": 0.0001999999908189062, - "loss": 46.0, - "step": 1794 - }, - { - "epoch": 0.13724028518454803, - "grad_norm": 0.002639173064380884, - "learning_rate": 0.0001999999908086106, - "loss": 46.0, - "step": 1795 - }, - { - "epoch": 0.13731674216793777, - "grad_norm": 0.0016310201026499271, - "learning_rate": 0.00019999999079830925, - "loss": 46.0, - "step": 1796 - }, - { - "epoch": 0.13739319915132747, - "grad_norm": 0.00037465078639797866, - "learning_rate": 0.00019999999078800212, - "loss": 46.0, - "step": 1797 - }, - { - "epoch": 0.1374696561347172, - "grad_norm": 0.00067168939858675, - "learning_rate": 0.00019999999077768921, - "loss": 46.0, - "step": 1798 - }, - { - "epoch": 0.13754611311810694, - "grad_norm": 0.00045956438407301903, - "learning_rate": 0.00019999999076737054, - "loss": 46.0, - "step": 1799 - }, - { - "epoch": 0.13762257010149664, - "grad_norm": 0.00042533682426437736, - "learning_rate": 0.0001999999907570461, - "loss": 46.0, - "step": 1800 - }, - { - "epoch": 0.13769902708488638, - "grad_norm": 0.004772007465362549, - "learning_rate": 0.00019999999074671592, - "loss": 46.0, - "step": 1801 - }, - { - "epoch": 0.13777548406827608, - "grad_norm": 0.0006424225866794586, - "learning_rate": 0.00019999999073637992, - "loss": 46.0, - "step": 1802 - }, - { - "epoch": 0.13785194105166582, - "grad_norm": 0.0012661645887419581, - "learning_rate": 0.00019999999072603818, - "loss": 46.0, - "step": 1803 - }, - { - "epoch": 0.13792839803505552, - "grad_norm": 0.0010105270193889737, - "learning_rate": 0.00019999999071569066, - "loss": 46.0, - "step": 1804 - }, - { - "epoch": 0.13800485501844525, - "grad_norm": 0.001936870045028627, - "learning_rate": 0.00019999999070533737, - "loss": 46.0, - "step": 1805 - }, - { - "epoch": 0.13808131200183496, - "grad_norm": 0.0015983741031959653, - "learning_rate": 0.00019999999069497833, - "loss": 46.0, - "step": 1806 - }, - { - "epoch": 0.1381577689852247, - "grad_norm": 0.0010567340068519115, - "learning_rate": 0.0001999999906846135, - "loss": 46.0, - "step": 1807 - }, - { - "epoch": 0.1382342259686144, - "grad_norm": 0.004460916388779879, - "learning_rate": 0.0001999999906742429, - "loss": 46.0, - "step": 1808 - }, - { - "epoch": 0.13831068295200413, - "grad_norm": 0.0007848286768421531, - "learning_rate": 0.00019999999066386655, - "loss": 46.0, - "step": 1809 - }, - { - "epoch": 0.13838713993539384, - "grad_norm": 0.0028831337112933397, - "learning_rate": 0.0001999999906534844, - "loss": 46.0, - "step": 1810 - }, - { - "epoch": 0.13846359691878357, - "grad_norm": 0.002073341980576515, - "learning_rate": 0.00019999999064309652, - "loss": 46.0, - "step": 1811 - }, - { - "epoch": 0.13854005390217328, - "grad_norm": 0.0007669876213185489, - "learning_rate": 0.00019999999063270284, - "loss": 46.0, - "step": 1812 - }, - { - "epoch": 0.138616510885563, - "grad_norm": 0.0014372995356097817, - "learning_rate": 0.0001999999906223034, - "loss": 46.0, - "step": 1813 - }, - { - "epoch": 0.13869296786895274, - "grad_norm": 0.0006090968963690102, - "learning_rate": 0.0001999999906118982, - "loss": 46.0, - "step": 1814 - }, - { - "epoch": 0.13876942485234245, - "grad_norm": 0.0007370939711108804, - "learning_rate": 0.0001999999906014872, - "loss": 46.0, - "step": 1815 - }, - { - "epoch": 0.13884588183573218, - "grad_norm": 0.0013098521158099174, - "learning_rate": 0.00019999999059107046, - "loss": 46.0, - "step": 1816 - }, - { - "epoch": 0.1389223388191219, - "grad_norm": 0.0012884825700893998, - "learning_rate": 0.00019999999058064794, - "loss": 46.0, - "step": 1817 - }, - { - "epoch": 0.13899879580251162, - "grad_norm": 0.0009428421035408974, - "learning_rate": 0.00019999999057021965, - "loss": 46.0, - "step": 1818 - }, - { - "epoch": 0.13907525278590133, - "grad_norm": 0.001111352350562811, - "learning_rate": 0.00019999999055978558, - "loss": 46.0, - "step": 1819 - }, - { - "epoch": 0.13915170976929106, - "grad_norm": 0.0011604103492572904, - "learning_rate": 0.00019999999054934575, - "loss": 46.0, - "step": 1820 - }, - { - "epoch": 0.13922816675268077, - "grad_norm": 0.0011482506524771452, - "learning_rate": 0.00019999999053890016, - "loss": 46.0, - "step": 1821 - }, - { - "epoch": 0.1393046237360705, - "grad_norm": 0.002155241323634982, - "learning_rate": 0.0001999999905284488, - "loss": 46.0, - "step": 1822 - }, - { - "epoch": 0.1393810807194602, - "grad_norm": 0.0007282591541297734, - "learning_rate": 0.00019999999051799168, - "loss": 46.0, - "step": 1823 - }, - { - "epoch": 0.13945753770284994, - "grad_norm": 0.0009172306163236499, - "learning_rate": 0.00019999999050752875, - "loss": 46.0, - "step": 1824 - }, - { - "epoch": 0.13953399468623964, - "grad_norm": 0.0009994545252993703, - "learning_rate": 0.0001999999904970601, - "loss": 46.0, - "step": 1825 - }, - { - "epoch": 0.13961045166962938, - "grad_norm": 0.000805270450655371, - "learning_rate": 0.00019999999048658565, - "loss": 46.0, - "step": 1826 - }, - { - "epoch": 0.13968690865301908, - "grad_norm": 0.0014365249080583453, - "learning_rate": 0.00019999999047610545, - "loss": 46.0, - "step": 1827 - }, - { - "epoch": 0.13976336563640882, - "grad_norm": 0.001337170717306435, - "learning_rate": 0.00019999999046561943, - "loss": 46.0, - "step": 1828 - }, - { - "epoch": 0.13983982261979855, - "grad_norm": 0.0025257414672523737, - "learning_rate": 0.00019999999045512768, - "loss": 46.0, - "step": 1829 - }, - { - "epoch": 0.13991627960318825, - "grad_norm": 0.001134646707214415, - "learning_rate": 0.00019999999044463017, - "loss": 46.0, - "step": 1830 - }, - { - "epoch": 0.139992736586578, - "grad_norm": 0.0008038088562898338, - "learning_rate": 0.0001999999904341269, - "loss": 46.0, - "step": 1831 - }, - { - "epoch": 0.1400691935699677, - "grad_norm": 0.003690134035423398, - "learning_rate": 0.00019999999042361784, - "loss": 46.0, - "step": 1832 - }, - { - "epoch": 0.14014565055335743, - "grad_norm": 0.0011938436655327678, - "learning_rate": 0.000199999990413103, - "loss": 46.0, - "step": 1833 - }, - { - "epoch": 0.14022210753674713, - "grad_norm": 0.0008435413474217057, - "learning_rate": 0.0001999999904025824, - "loss": 46.0, - "step": 1834 - }, - { - "epoch": 0.14029856452013686, - "grad_norm": 0.0007511136354878545, - "learning_rate": 0.000199999990392056, - "loss": 46.0, - "step": 1835 - }, - { - "epoch": 0.14037502150352657, - "grad_norm": 0.0016807162901386619, - "learning_rate": 0.0001999999903815239, - "loss": 46.0, - "step": 1836 - }, - { - "epoch": 0.1404514784869163, - "grad_norm": 0.001615367946214974, - "learning_rate": 0.00019999999037098598, - "loss": 46.0, - "step": 1837 - }, - { - "epoch": 0.140527935470306, - "grad_norm": 0.000741770607419312, - "learning_rate": 0.0001999999903604423, - "loss": 46.0, - "step": 1838 - }, - { - "epoch": 0.14060439245369574, - "grad_norm": 0.0012382975546643138, - "learning_rate": 0.00019999999034989285, - "loss": 46.0, - "step": 1839 - }, - { - "epoch": 0.14068084943708545, - "grad_norm": 0.00300770765170455, - "learning_rate": 0.00019999999033933766, - "loss": 46.0, - "step": 1840 - }, - { - "epoch": 0.14075730642047518, - "grad_norm": 0.0013860536273568869, - "learning_rate": 0.00019999999032877666, - "loss": 46.0, - "step": 1841 - }, - { - "epoch": 0.1408337634038649, - "grad_norm": 0.001102592796087265, - "learning_rate": 0.0001999999903182099, - "loss": 46.0, - "step": 1842 - }, - { - "epoch": 0.14091022038725462, - "grad_norm": 0.0020995796658098698, - "learning_rate": 0.00019999999030763737, - "loss": 46.0, - "step": 1843 - }, - { - "epoch": 0.14098667737064435, - "grad_norm": 0.001294348039664328, - "learning_rate": 0.00019999999029705906, - "loss": 46.0, - "step": 1844 - }, - { - "epoch": 0.14106313435403406, - "grad_norm": 0.0011832526652142406, - "learning_rate": 0.000199999990286475, - "loss": 46.0, - "step": 1845 - }, - { - "epoch": 0.1411395913374238, - "grad_norm": 0.0004684127343352884, - "learning_rate": 0.00019999999027588519, - "loss": 46.0, - "step": 1846 - }, - { - "epoch": 0.1412160483208135, - "grad_norm": 0.0010823906632140279, - "learning_rate": 0.0001999999902652896, - "loss": 46.0, - "step": 1847 - }, - { - "epoch": 0.14129250530420323, - "grad_norm": 0.001460356405004859, - "learning_rate": 0.00019999999025468822, - "loss": 46.0, - "step": 1848 - }, - { - "epoch": 0.14136896228759294, - "grad_norm": 0.010155671276152134, - "learning_rate": 0.00019999999024408106, - "loss": 46.0, - "step": 1849 - }, - { - "epoch": 0.14144541927098267, - "grad_norm": 0.001174120930954814, - "learning_rate": 0.00019999999023346816, - "loss": 46.0, - "step": 1850 - }, - { - "epoch": 0.14152187625437238, - "grad_norm": 0.002394245006144047, - "learning_rate": 0.00019999999022284948, - "loss": 46.0, - "step": 1851 - }, - { - "epoch": 0.1415983332377621, - "grad_norm": 0.0011494889622554183, - "learning_rate": 0.000199999990212225, - "loss": 46.0, - "step": 1852 - }, - { - "epoch": 0.14167479022115181, - "grad_norm": 0.0013915582094341516, - "learning_rate": 0.0001999999902015948, - "loss": 46.0, - "step": 1853 - }, - { - "epoch": 0.14175124720454155, - "grad_norm": 0.0006613280274905264, - "learning_rate": 0.00019999999019095884, - "loss": 46.0, - "step": 1854 - }, - { - "epoch": 0.14182770418793125, - "grad_norm": 0.0019673940259963274, - "learning_rate": 0.00019999999018031707, - "loss": 46.0, - "step": 1855 - }, - { - "epoch": 0.14190416117132099, - "grad_norm": 0.0023109884932637215, - "learning_rate": 0.00019999999016966956, - "loss": 46.0, - "step": 1856 - }, - { - "epoch": 0.14198061815471072, - "grad_norm": 0.0017937744269147515, - "learning_rate": 0.00019999999015901624, - "loss": 46.0, - "step": 1857 - }, - { - "epoch": 0.14205707513810042, - "grad_norm": 0.0016560000367462635, - "learning_rate": 0.00019999999014835718, - "loss": 46.0, - "step": 1858 - }, - { - "epoch": 0.14213353212149016, - "grad_norm": 0.0010846997611224651, - "learning_rate": 0.00019999999013769232, - "loss": 46.0, - "step": 1859 - }, - { - "epoch": 0.14220998910487986, - "grad_norm": 0.004100044257938862, - "learning_rate": 0.00019999999012702173, - "loss": 46.0, - "step": 1860 - }, - { - "epoch": 0.1422864460882696, - "grad_norm": 0.0015313801122829318, - "learning_rate": 0.00019999999011634535, - "loss": 46.0, - "step": 1861 - }, - { - "epoch": 0.1423629030716593, - "grad_norm": 0.0009085564524866641, - "learning_rate": 0.00019999999010566322, - "loss": 46.0, - "step": 1862 - }, - { - "epoch": 0.14243936005504904, - "grad_norm": 0.0016520225908607244, - "learning_rate": 0.0001999999900949753, - "loss": 46.0, - "step": 1863 - }, - { - "epoch": 0.14251581703843874, - "grad_norm": 0.0022325743921101093, - "learning_rate": 0.00019999999008428162, - "loss": 46.0, - "step": 1864 - }, - { - "epoch": 0.14259227402182847, - "grad_norm": 0.00046500880853272974, - "learning_rate": 0.00019999999007358217, - "loss": 46.0, - "step": 1865 - }, - { - "epoch": 0.14266873100521818, - "grad_norm": 0.0008897559018805623, - "learning_rate": 0.00019999999006287692, - "loss": 46.0, - "step": 1866 - }, - { - "epoch": 0.1427451879886079, - "grad_norm": 0.000936188967898488, - "learning_rate": 0.00019999999005216595, - "loss": 46.0, - "step": 1867 - }, - { - "epoch": 0.14282164497199762, - "grad_norm": 0.002499505877494812, - "learning_rate": 0.00019999999004144918, - "loss": 46.0, - "step": 1868 - }, - { - "epoch": 0.14289810195538735, - "grad_norm": 0.0015446791658177972, - "learning_rate": 0.00019999999003072664, - "loss": 46.0, - "step": 1869 - }, - { - "epoch": 0.14297455893877706, - "grad_norm": 0.0025301750283688307, - "learning_rate": 0.00019999999001999838, - "loss": 46.0, - "step": 1870 - }, - { - "epoch": 0.1430510159221668, - "grad_norm": 0.0008142556180246174, - "learning_rate": 0.0001999999900092643, - "loss": 46.0, - "step": 1871 - }, - { - "epoch": 0.14312747290555652, - "grad_norm": 0.0009948518127202988, - "learning_rate": 0.00019999998999852445, - "loss": 46.0, - "step": 1872 - }, - { - "epoch": 0.14320392988894623, - "grad_norm": 0.0011949791805818677, - "learning_rate": 0.00019999998998777884, - "loss": 46.0, - "step": 1873 - }, - { - "epoch": 0.14328038687233596, - "grad_norm": 0.0004118412034586072, - "learning_rate": 0.00019999998997702746, - "loss": 46.0, - "step": 1874 - }, - { - "epoch": 0.14335684385572567, - "grad_norm": 0.002504717791453004, - "learning_rate": 0.00019999998996627033, - "loss": 46.0, - "step": 1875 - }, - { - "epoch": 0.1434333008391154, - "grad_norm": 0.0026481417007744312, - "learning_rate": 0.0001999999899555074, - "loss": 46.0, - "step": 1876 - }, - { - "epoch": 0.1435097578225051, - "grad_norm": 0.003533893497660756, - "learning_rate": 0.00019999998994473873, - "loss": 46.0, - "step": 1877 - }, - { - "epoch": 0.14358621480589484, - "grad_norm": 0.008359206840395927, - "learning_rate": 0.00019999998993396428, - "loss": 46.0, - "step": 1878 - }, - { - "epoch": 0.14366267178928455, - "grad_norm": 0.0005306825041770935, - "learning_rate": 0.00019999998992318405, - "loss": 46.0, - "step": 1879 - }, - { - "epoch": 0.14373912877267428, - "grad_norm": 0.0011779069900512695, - "learning_rate": 0.00019999998991239806, - "loss": 46.0, - "step": 1880 - }, - { - "epoch": 0.14381558575606398, - "grad_norm": 0.0020184062886983156, - "learning_rate": 0.0001999999899016063, - "loss": 46.0, - "step": 1881 - }, - { - "epoch": 0.14389204273945372, - "grad_norm": 0.0015682012308388948, - "learning_rate": 0.00019999998989080875, - "loss": 46.0, - "step": 1882 - }, - { - "epoch": 0.14396849972284342, - "grad_norm": 0.0009336181683465838, - "learning_rate": 0.00019999998988000543, - "loss": 46.0, - "step": 1883 - }, - { - "epoch": 0.14404495670623316, - "grad_norm": 0.0010547672864049673, - "learning_rate": 0.0001999999898691964, - "loss": 46.0, - "step": 1884 - }, - { - "epoch": 0.14412141368962286, - "grad_norm": 0.0007349682855419815, - "learning_rate": 0.00019999998985838153, - "loss": 46.0, - "step": 1885 - }, - { - "epoch": 0.1441978706730126, - "grad_norm": 0.0011008752044290304, - "learning_rate": 0.00019999998984756093, - "loss": 46.0, - "step": 1886 - }, - { - "epoch": 0.14427432765640233, - "grad_norm": 0.0026120059192180634, - "learning_rate": 0.00019999998983673454, - "loss": 46.0, - "step": 1887 - }, - { - "epoch": 0.14435078463979203, - "grad_norm": 0.001931669656187296, - "learning_rate": 0.0001999999898259024, - "loss": 46.0, - "step": 1888 - }, - { - "epoch": 0.14442724162318177, - "grad_norm": 0.0013773287646472454, - "learning_rate": 0.00019999998981506446, - "loss": 46.0, - "step": 1889 - }, - { - "epoch": 0.14450369860657147, - "grad_norm": 0.0021041342988610268, - "learning_rate": 0.00019999998980422078, - "loss": 46.0, - "step": 1890 - }, - { - "epoch": 0.1445801555899612, - "grad_norm": 0.0014199084835126996, - "learning_rate": 0.0001999999897933713, - "loss": 46.0, - "step": 1891 - }, - { - "epoch": 0.1446566125733509, - "grad_norm": 0.0007218638784252107, - "learning_rate": 0.0001999999897825161, - "loss": 46.0, - "step": 1892 - }, - { - "epoch": 0.14473306955674065, - "grad_norm": 0.0005682877381332219, - "learning_rate": 0.00019999998977165512, - "loss": 46.0, - "step": 1893 - }, - { - "epoch": 0.14480952654013035, - "grad_norm": 0.0018532413523644209, - "learning_rate": 0.00019999998976078832, - "loss": 46.0, - "step": 1894 - }, - { - "epoch": 0.14488598352352008, - "grad_norm": 0.0009956990834325552, - "learning_rate": 0.0001999999897499158, - "loss": 46.0, - "step": 1895 - }, - { - "epoch": 0.1449624405069098, - "grad_norm": 0.0023877564817667007, - "learning_rate": 0.0001999999897390375, - "loss": 46.0, - "step": 1896 - }, - { - "epoch": 0.14503889749029952, - "grad_norm": 0.0008169774082489312, - "learning_rate": 0.00019999998972815343, - "loss": 46.0, - "step": 1897 - }, - { - "epoch": 0.14511535447368923, - "grad_norm": 0.0012659343192353845, - "learning_rate": 0.00019999998971726357, - "loss": 46.0, - "step": 1898 - }, - { - "epoch": 0.14519181145707896, - "grad_norm": 0.0036854087375104427, - "learning_rate": 0.00019999998970636796, - "loss": 46.0, - "step": 1899 - }, - { - "epoch": 0.14526826844046867, - "grad_norm": 0.001317733433097601, - "learning_rate": 0.00019999998969546658, - "loss": 46.0, - "step": 1900 - }, - { - "epoch": 0.1453447254238584, - "grad_norm": 0.0017411282751709223, - "learning_rate": 0.00019999998968455942, - "loss": 46.0, - "step": 1901 - }, - { - "epoch": 0.14542118240724813, - "grad_norm": 0.0010978442151099443, - "learning_rate": 0.0001999999896736465, - "loss": 46.0, - "step": 1902 - }, - { - "epoch": 0.14549763939063784, - "grad_norm": 0.0007700786809436977, - "learning_rate": 0.00019999998966272782, - "loss": 46.0, - "step": 1903 - }, - { - "epoch": 0.14557409637402757, - "grad_norm": 0.001506231026723981, - "learning_rate": 0.00019999998965180334, - "loss": 46.0, - "step": 1904 - }, - { - "epoch": 0.14565055335741728, - "grad_norm": 0.0007330658263526857, - "learning_rate": 0.00019999998964087312, - "loss": 46.0, - "step": 1905 - }, - { - "epoch": 0.145727010340807, - "grad_norm": 0.001693650265224278, - "learning_rate": 0.0001999999896299371, - "loss": 46.0, - "step": 1906 - }, - { - "epoch": 0.14580346732419672, - "grad_norm": 0.002958431839942932, - "learning_rate": 0.00019999998961899536, - "loss": 46.0, - "step": 1907 - }, - { - "epoch": 0.14587992430758645, - "grad_norm": 0.0007097668130882084, - "learning_rate": 0.00019999998960804782, - "loss": 46.0, - "step": 1908 - }, - { - "epoch": 0.14595638129097616, - "grad_norm": 0.0016484018415212631, - "learning_rate": 0.0001999999895970945, - "loss": 46.0, - "step": 1909 - }, - { - "epoch": 0.1460328382743659, - "grad_norm": 0.0011686907382681966, - "learning_rate": 0.00019999998958613544, - "loss": 46.0, - "step": 1910 - }, - { - "epoch": 0.1461092952577556, - "grad_norm": 0.0017345441738143563, - "learning_rate": 0.00019999998957517055, - "loss": 46.0, - "step": 1911 - }, - { - "epoch": 0.14618575224114533, - "grad_norm": 0.003488808637484908, - "learning_rate": 0.00019999998956419997, - "loss": 46.0, - "step": 1912 - }, - { - "epoch": 0.14626220922453503, - "grad_norm": 0.0011651850072667003, - "learning_rate": 0.0001999999895532236, - "loss": 46.0, - "step": 1913 - }, - { - "epoch": 0.14633866620792477, - "grad_norm": 0.002016558311879635, - "learning_rate": 0.0001999999895422414, - "loss": 46.0, - "step": 1914 - }, - { - "epoch": 0.1464151231913145, - "grad_norm": 0.0005485732108354568, - "learning_rate": 0.0001999999895312535, - "loss": 46.0, - "step": 1915 - }, - { - "epoch": 0.1464915801747042, - "grad_norm": 0.0007180381799116731, - "learning_rate": 0.0001999999895202598, - "loss": 46.0, - "step": 1916 - }, - { - "epoch": 0.14656803715809394, - "grad_norm": 0.00216016941703856, - "learning_rate": 0.00019999998950926033, - "loss": 46.0, - "step": 1917 - }, - { - "epoch": 0.14664449414148364, - "grad_norm": 0.00147823931183666, - "learning_rate": 0.0001999999894982551, - "loss": 46.0, - "step": 1918 - }, - { - "epoch": 0.14672095112487338, - "grad_norm": 0.0007160396198742092, - "learning_rate": 0.0001999999894872441, - "loss": 46.0, - "step": 1919 - }, - { - "epoch": 0.14679740810826308, - "grad_norm": 0.0011503099231049418, - "learning_rate": 0.00019999998947622732, - "loss": 46.0, - "step": 1920 - }, - { - "epoch": 0.14687386509165282, - "grad_norm": 0.000507137447129935, - "learning_rate": 0.0001999999894652048, - "loss": 46.0, - "step": 1921 - }, - { - "epoch": 0.14695032207504252, - "grad_norm": 0.001573152025230229, - "learning_rate": 0.00019999998945417646, - "loss": 46.0, - "step": 1922 - }, - { - "epoch": 0.14702677905843226, - "grad_norm": 0.0007994626648724079, - "learning_rate": 0.0001999999894431424, - "loss": 46.0, - "step": 1923 - }, - { - "epoch": 0.14710323604182196, - "grad_norm": 0.0010093190940096974, - "learning_rate": 0.00019999998943210254, - "loss": 46.0, - "step": 1924 - }, - { - "epoch": 0.1471796930252117, - "grad_norm": 0.0007949511636979878, - "learning_rate": 0.0001999999894210569, - "loss": 46.0, - "step": 1925 - }, - { - "epoch": 0.1472561500086014, - "grad_norm": 0.0013361780438572168, - "learning_rate": 0.00019999998941000552, - "loss": 46.0, - "step": 1926 - }, - { - "epoch": 0.14733260699199113, - "grad_norm": 0.004195475019514561, - "learning_rate": 0.00019999998939894837, - "loss": 46.0, - "step": 1927 - }, - { - "epoch": 0.14740906397538084, - "grad_norm": 0.0006216627079993486, - "learning_rate": 0.00019999998938788544, - "loss": 46.0, - "step": 1928 - }, - { - "epoch": 0.14748552095877057, - "grad_norm": 0.0016950040590018034, - "learning_rate": 0.00019999998937681674, - "loss": 46.0, - "step": 1929 - }, - { - "epoch": 0.1475619779421603, - "grad_norm": 0.001891896827146411, - "learning_rate": 0.00019999998936574227, - "loss": 46.0, - "step": 1930 - }, - { - "epoch": 0.14763843492555, - "grad_norm": 0.00175075966399163, - "learning_rate": 0.00019999998935466205, - "loss": 46.0, - "step": 1931 - }, - { - "epoch": 0.14771489190893974, - "grad_norm": 0.0012245957041159272, - "learning_rate": 0.00019999998934357605, - "loss": 46.0, - "step": 1932 - }, - { - "epoch": 0.14779134889232945, - "grad_norm": 0.006115789990872145, - "learning_rate": 0.00019999998933248428, - "loss": 46.0, - "step": 1933 - }, - { - "epoch": 0.14786780587571918, - "grad_norm": 0.0010071592405438423, - "learning_rate": 0.00019999998932138672, - "loss": 46.0, - "step": 1934 - }, - { - "epoch": 0.1479442628591089, - "grad_norm": 0.0008599729044362903, - "learning_rate": 0.0001999999893102834, - "loss": 46.0, - "step": 1935 - }, - { - "epoch": 0.14802071984249862, - "grad_norm": 0.0007196691003628075, - "learning_rate": 0.0001999999892991743, - "loss": 46.0, - "step": 1936 - }, - { - "epoch": 0.14809717682588833, - "grad_norm": 0.0008595269755460322, - "learning_rate": 0.00019999998928805948, - "loss": 46.0, - "step": 1937 - }, - { - "epoch": 0.14817363380927806, - "grad_norm": 0.0012517975410446525, - "learning_rate": 0.00019999998927693887, - "loss": 46.0, - "step": 1938 - }, - { - "epoch": 0.14825009079266777, - "grad_norm": 0.0008255209540948272, - "learning_rate": 0.00019999998926581246, - "loss": 46.0, - "step": 1939 - }, - { - "epoch": 0.1483265477760575, - "grad_norm": 0.0016731730429455638, - "learning_rate": 0.0001999999892546803, - "loss": 46.0, - "step": 1940 - }, - { - "epoch": 0.1484030047594472, - "grad_norm": 0.0007372979307547212, - "learning_rate": 0.00019999998924354238, - "loss": 46.0, - "step": 1941 - }, - { - "epoch": 0.14847946174283694, - "grad_norm": 0.00036389255546964705, - "learning_rate": 0.00019999998923239865, - "loss": 46.0, - "step": 1942 - }, - { - "epoch": 0.14855591872622664, - "grad_norm": 0.0007508700946345925, - "learning_rate": 0.00019999998922124918, - "loss": 46.0, - "step": 1943 - }, - { - "epoch": 0.14863237570961638, - "grad_norm": 0.001239851932041347, - "learning_rate": 0.00019999998921009396, - "loss": 46.0, - "step": 1944 - }, - { - "epoch": 0.1487088326930061, - "grad_norm": 0.0017710485262796283, - "learning_rate": 0.00019999998919893294, - "loss": 46.0, - "step": 1945 - }, - { - "epoch": 0.14878528967639582, - "grad_norm": 0.0015017925761640072, - "learning_rate": 0.00019999998918776617, - "loss": 46.0, - "step": 1946 - }, - { - "epoch": 0.14886174665978555, - "grad_norm": 0.0018905504839494824, - "learning_rate": 0.00019999998917659363, - "loss": 46.0, - "step": 1947 - }, - { - "epoch": 0.14893820364317525, - "grad_norm": 0.0011791565921157598, - "learning_rate": 0.00019999998916541532, - "loss": 46.0, - "step": 1948 - }, - { - "epoch": 0.149014660626565, - "grad_norm": 0.0034085509832948446, - "learning_rate": 0.00019999998915423123, - "loss": 46.0, - "step": 1949 - }, - { - "epoch": 0.1490911176099547, - "grad_norm": 0.0008498480310663581, - "learning_rate": 0.00019999998914304137, - "loss": 46.0, - "step": 1950 - }, - { - "epoch": 0.14916757459334443, - "grad_norm": 0.0006049004732631147, - "learning_rate": 0.00019999998913184573, - "loss": 46.0, - "step": 1951 - }, - { - "epoch": 0.14924403157673413, - "grad_norm": 0.0005667923833243549, - "learning_rate": 0.00019999998912064433, - "loss": 46.0, - "step": 1952 - }, - { - "epoch": 0.14932048856012387, - "grad_norm": 0.002274232218042016, - "learning_rate": 0.00019999998910943717, - "loss": 46.0, - "step": 1953 - }, - { - "epoch": 0.14939694554351357, - "grad_norm": 0.0009235851466655731, - "learning_rate": 0.00019999998909822424, - "loss": 46.0, - "step": 1954 - }, - { - "epoch": 0.1494734025269033, - "grad_norm": 0.0006369143957272172, - "learning_rate": 0.00019999998908700554, - "loss": 46.0, - "step": 1955 - }, - { - "epoch": 0.149549859510293, - "grad_norm": 0.001315504894591868, - "learning_rate": 0.00019999998907578107, - "loss": 46.0, - "step": 1956 - }, - { - "epoch": 0.14962631649368274, - "grad_norm": 0.0012645369861274958, - "learning_rate": 0.00019999998906455085, - "loss": 46.0, - "step": 1957 - }, - { - "epoch": 0.14970277347707245, - "grad_norm": 0.002732011955231428, - "learning_rate": 0.0001999999890533148, - "loss": 46.0, - "step": 1958 - }, - { - "epoch": 0.14977923046046218, - "grad_norm": 0.0005908128223381937, - "learning_rate": 0.00019999998904207303, - "loss": 46.0, - "step": 1959 - }, - { - "epoch": 0.14985568744385191, - "grad_norm": 0.001558394986204803, - "learning_rate": 0.0001999999890308255, - "loss": 46.0, - "step": 1960 - }, - { - "epoch": 0.14993214442724162, - "grad_norm": 0.0009301886311732233, - "learning_rate": 0.00019999998901957215, - "loss": 46.0, - "step": 1961 - }, - { - "epoch": 0.15000860141063135, - "grad_norm": 0.0005543414736166596, - "learning_rate": 0.00019999998900831307, - "loss": 46.0, - "step": 1962 - }, - { - "epoch": 0.15008505839402106, - "grad_norm": 0.0023252021055668592, - "learning_rate": 0.0001999999889970482, - "loss": 46.0, - "step": 1963 - }, - { - "epoch": 0.1501615153774108, - "grad_norm": 0.0009747722069732845, - "learning_rate": 0.0001999999889857776, - "loss": 46.0, - "step": 1964 - }, - { - "epoch": 0.1502379723608005, - "grad_norm": 0.0006042062886990607, - "learning_rate": 0.0001999999889745012, - "loss": 46.0, - "step": 1965 - }, - { - "epoch": 0.15031442934419023, - "grad_norm": 0.0009974264539778233, - "learning_rate": 0.000199999988963219, - "loss": 46.0, - "step": 1966 - }, - { - "epoch": 0.15039088632757994, - "grad_norm": 0.0011752208229154348, - "learning_rate": 0.00019999998895193108, - "loss": 46.0, - "step": 1967 - }, - { - "epoch": 0.15046734331096967, - "grad_norm": 0.0004962339298799634, - "learning_rate": 0.00019999998894063738, - "loss": 46.0, - "step": 1968 - }, - { - "epoch": 0.15054380029435938, - "grad_norm": 0.0013381093740463257, - "learning_rate": 0.00019999998892933788, - "loss": 46.0, - "step": 1969 - }, - { - "epoch": 0.1506202572777491, - "grad_norm": 0.0005600806907750666, - "learning_rate": 0.00019999998891803266, - "loss": 46.0, - "step": 1970 - }, - { - "epoch": 0.15069671426113881, - "grad_norm": 0.0025287673342972994, - "learning_rate": 0.00019999998890672164, - "loss": 46.0, - "step": 1971 - }, - { - "epoch": 0.15077317124452855, - "grad_norm": 0.006942170672118664, - "learning_rate": 0.00019999998889540485, - "loss": 46.0, - "step": 1972 - }, - { - "epoch": 0.15084962822791828, - "grad_norm": 0.0009068876970559359, - "learning_rate": 0.0001999999888840823, - "loss": 46.0, - "step": 1973 - }, - { - "epoch": 0.150926085211308, - "grad_norm": 0.0008322437643073499, - "learning_rate": 0.000199999988872754, - "loss": 46.0, - "step": 1974 - }, - { - "epoch": 0.15100254219469772, - "grad_norm": 0.0015376051887869835, - "learning_rate": 0.0001999999888614199, - "loss": 46.0, - "step": 1975 - }, - { - "epoch": 0.15107899917808743, - "grad_norm": 0.0015460910508409142, - "learning_rate": 0.00019999998885008005, - "loss": 46.0, - "step": 1976 - }, - { - "epoch": 0.15115545616147716, - "grad_norm": 0.0014701655600219965, - "learning_rate": 0.0001999999888387344, - "loss": 46.0, - "step": 1977 - }, - { - "epoch": 0.15123191314486686, - "grad_norm": 0.0007002081838436425, - "learning_rate": 0.000199999988827383, - "loss": 46.0, - "step": 1978 - }, - { - "epoch": 0.1513083701282566, - "grad_norm": 0.0016067997785285115, - "learning_rate": 0.00019999998881602583, - "loss": 46.0, - "step": 1979 - }, - { - "epoch": 0.1513848271116463, - "grad_norm": 0.0018214803421869874, - "learning_rate": 0.0001999999888046629, - "loss": 46.0, - "step": 1980 - }, - { - "epoch": 0.15146128409503604, - "grad_norm": 0.0013334158575162292, - "learning_rate": 0.00019999998879329417, - "loss": 46.0, - "step": 1981 - }, - { - "epoch": 0.15153774107842574, - "grad_norm": 0.0008082379354164004, - "learning_rate": 0.0001999999887819197, - "loss": 46.0, - "step": 1982 - }, - { - "epoch": 0.15161419806181547, - "grad_norm": 0.0010814432753250003, - "learning_rate": 0.00019999998877053945, - "loss": 46.0, - "step": 1983 - }, - { - "epoch": 0.15169065504520518, - "grad_norm": 0.0008795211324468255, - "learning_rate": 0.00019999998875915343, - "loss": 46.0, - "step": 1984 - }, - { - "epoch": 0.1517671120285949, - "grad_norm": 0.0008336423779837787, - "learning_rate": 0.00019999998874776164, - "loss": 46.0, - "step": 1985 - }, - { - "epoch": 0.15184356901198462, - "grad_norm": 0.0014741772320121527, - "learning_rate": 0.0001999999887363641, - "loss": 46.0, - "step": 1986 - }, - { - "epoch": 0.15192002599537435, - "grad_norm": 0.0005251533002592623, - "learning_rate": 0.00019999998872496076, - "loss": 46.0, - "step": 1987 - }, - { - "epoch": 0.15199648297876409, - "grad_norm": 0.00796753540635109, - "learning_rate": 0.00019999998871355164, - "loss": 46.0, - "step": 1988 - }, - { - "epoch": 0.1520729399621538, - "grad_norm": 0.002017762279137969, - "learning_rate": 0.0001999999887021368, - "loss": 46.0, - "step": 1989 - }, - { - "epoch": 0.15214939694554352, - "grad_norm": 0.0010771476663649082, - "learning_rate": 0.00019999998869071618, - "loss": 46.0, - "step": 1990 - }, - { - "epoch": 0.15222585392893323, - "grad_norm": 0.0007402400951832533, - "learning_rate": 0.00019999998867928977, - "loss": 46.0, - "step": 1991 - }, - { - "epoch": 0.15230231091232296, - "grad_norm": 0.0027147787623107433, - "learning_rate": 0.0001999999886678576, - "loss": 46.0, - "step": 1992 - }, - { - "epoch": 0.15237876789571267, - "grad_norm": 0.001034329878166318, - "learning_rate": 0.00019999998865641966, - "loss": 46.0, - "step": 1993 - }, - { - "epoch": 0.1524552248791024, - "grad_norm": 0.002314161043614149, - "learning_rate": 0.00019999998864497594, - "loss": 46.0, - "step": 1994 - }, - { - "epoch": 0.1525316818624921, - "grad_norm": 0.0010412337724119425, - "learning_rate": 0.00019999998863352646, - "loss": 46.0, - "step": 1995 - }, - { - "epoch": 0.15260813884588184, - "grad_norm": 0.0012637953041121364, - "learning_rate": 0.00019999998862207122, - "loss": 46.0, - "step": 1996 - }, - { - "epoch": 0.15268459582927155, - "grad_norm": 0.004517598543316126, - "learning_rate": 0.00019999998861061017, - "loss": 46.0, - "step": 1997 - }, - { - "epoch": 0.15276105281266128, - "grad_norm": 0.0006794864893890917, - "learning_rate": 0.0001999999885991434, - "loss": 46.0, - "step": 1998 - }, - { - "epoch": 0.15283750979605099, - "grad_norm": 0.0006668080459348857, - "learning_rate": 0.00019999998858767084, - "loss": 46.0, - "step": 1999 - }, - { - "epoch": 0.15291396677944072, - "grad_norm": 0.00331284711137414, - "learning_rate": 0.0001999999885761925, - "loss": 46.0, - "step": 2000 - }, - { - "epoch": 0.15299042376283042, - "grad_norm": 0.0020922617986798286, - "learning_rate": 0.0001999999885647084, - "loss": 46.0, - "step": 2001 - }, - { - "epoch": 0.15306688074622016, - "grad_norm": 0.0015879099955782294, - "learning_rate": 0.00019999998855321853, - "loss": 46.0, - "step": 2002 - }, - { - "epoch": 0.1531433377296099, - "grad_norm": 0.0008255551219917834, - "learning_rate": 0.0001999999885417229, - "loss": 46.0, - "step": 2003 - }, - { - "epoch": 0.1532197947129996, - "grad_norm": 0.0009046594495885074, - "learning_rate": 0.0001999999885302215, - "loss": 46.0, - "step": 2004 - }, - { - "epoch": 0.15329625169638933, - "grad_norm": 0.0006455119582824409, - "learning_rate": 0.0001999999885187143, - "loss": 46.0, - "step": 2005 - }, - { - "epoch": 0.15337270867977904, - "grad_norm": 0.0012476410483941436, - "learning_rate": 0.00019999998850720138, - "loss": 46.0, - "step": 2006 - }, - { - "epoch": 0.15344916566316877, - "grad_norm": 0.001307318452745676, - "learning_rate": 0.00019999998849568265, - "loss": 46.0, - "step": 2007 - }, - { - "epoch": 0.15352562264655847, - "grad_norm": 0.001295082620345056, - "learning_rate": 0.00019999998848415818, - "loss": 46.0, - "step": 2008 - }, - { - "epoch": 0.1536020796299482, - "grad_norm": 0.002381877275183797, - "learning_rate": 0.00019999998847262794, - "loss": 46.0, - "step": 2009 - }, - { - "epoch": 0.1536785366133379, - "grad_norm": 0.003229626687243581, - "learning_rate": 0.00019999998846109192, - "loss": 46.0, - "step": 2010 - }, - { - "epoch": 0.15375499359672765, - "grad_norm": 0.0010005870135501027, - "learning_rate": 0.00019999998844955012, - "loss": 46.0, - "step": 2011 - }, - { - "epoch": 0.15383145058011735, - "grad_norm": 0.0009752497426234186, - "learning_rate": 0.00019999998843800256, - "loss": 46.0, - "step": 2012 - }, - { - "epoch": 0.15390790756350708, - "grad_norm": 0.00232255132868886, - "learning_rate": 0.00019999998842644922, - "loss": 46.0, - "step": 2013 - }, - { - "epoch": 0.1539843645468968, - "grad_norm": 0.001352866762317717, - "learning_rate": 0.0001999999884148901, - "loss": 46.0, - "step": 2014 - }, - { - "epoch": 0.15406082153028652, - "grad_norm": 0.0010676317615434527, - "learning_rate": 0.00019999998840332524, - "loss": 46.0, - "step": 2015 - }, - { - "epoch": 0.15413727851367623, - "grad_norm": 0.0017763529904186726, - "learning_rate": 0.0001999999883917546, - "loss": 46.0, - "step": 2016 - }, - { - "epoch": 0.15421373549706596, - "grad_norm": 0.001128602772951126, - "learning_rate": 0.0001999999883801782, - "loss": 46.0, - "step": 2017 - }, - { - "epoch": 0.1542901924804557, - "grad_norm": 0.0012293212348595262, - "learning_rate": 0.00019999998836859603, - "loss": 46.0, - "step": 2018 - }, - { - "epoch": 0.1543666494638454, - "grad_norm": 0.004482009913772345, - "learning_rate": 0.00019999998835700807, - "loss": 46.0, - "step": 2019 - }, - { - "epoch": 0.15444310644723513, - "grad_norm": 0.0013166291173547506, - "learning_rate": 0.00019999998834541435, - "loss": 46.0, - "step": 2020 - }, - { - "epoch": 0.15451956343062484, - "grad_norm": 0.0013117279158905149, - "learning_rate": 0.00019999998833381487, - "loss": 46.0, - "step": 2021 - }, - { - "epoch": 0.15459602041401457, - "grad_norm": 0.0010317321866750717, - "learning_rate": 0.0001999999883222096, - "loss": 46.0, - "step": 2022 - }, - { - "epoch": 0.15467247739740428, - "grad_norm": 0.0026497314684093, - "learning_rate": 0.00019999998831059858, - "loss": 46.0, - "step": 2023 - }, - { - "epoch": 0.154748934380794, - "grad_norm": 0.0003948312078136951, - "learning_rate": 0.0001999999882989818, - "loss": 46.0, - "step": 2024 - }, - { - "epoch": 0.15482539136418372, - "grad_norm": 0.001416828134097159, - "learning_rate": 0.00019999998828735922, - "loss": 46.0, - "step": 2025 - }, - { - "epoch": 0.15490184834757345, - "grad_norm": 0.0013770645018666983, - "learning_rate": 0.0001999999882757309, - "loss": 46.0, - "step": 2026 - }, - { - "epoch": 0.15497830533096316, - "grad_norm": 0.0012552356347441673, - "learning_rate": 0.00019999998826409677, - "loss": 46.0, - "step": 2027 - }, - { - "epoch": 0.1550547623143529, - "grad_norm": 0.00109623814933002, - "learning_rate": 0.0001999999882524569, - "loss": 46.0, - "step": 2028 - }, - { - "epoch": 0.1551312192977426, - "grad_norm": 0.0020386953838169575, - "learning_rate": 0.00019999998824081128, - "loss": 46.0, - "step": 2029 - }, - { - "epoch": 0.15520767628113233, - "grad_norm": 0.0008147793123498559, - "learning_rate": 0.00019999998822915985, - "loss": 46.0, - "step": 2030 - }, - { - "epoch": 0.15528413326452206, - "grad_norm": 0.0021701687946915627, - "learning_rate": 0.00019999998821750267, - "loss": 46.0, - "step": 2031 - }, - { - "epoch": 0.15536059024791177, - "grad_norm": 0.004709912929683924, - "learning_rate": 0.00019999998820583974, - "loss": 46.0, - "step": 2032 - }, - { - "epoch": 0.1554370472313015, - "grad_norm": 0.0016737825935706496, - "learning_rate": 0.00019999998819417102, - "loss": 46.0, - "step": 2033 - }, - { - "epoch": 0.1555135042146912, - "grad_norm": 0.0007028424297459424, - "learning_rate": 0.0001999999881824965, - "loss": 46.0, - "step": 2034 - }, - { - "epoch": 0.15558996119808094, - "grad_norm": 0.0011884956620633602, - "learning_rate": 0.00019999998817081625, - "loss": 46.0, - "step": 2035 - }, - { - "epoch": 0.15566641818147064, - "grad_norm": 0.0010022360365837812, - "learning_rate": 0.00019999998815913023, - "loss": 46.0, - "step": 2036 - }, - { - "epoch": 0.15574287516486038, - "grad_norm": 0.0005076303496025503, - "learning_rate": 0.0001999999881474384, - "loss": 46.0, - "step": 2037 - }, - { - "epoch": 0.15581933214825008, - "grad_norm": 0.0014753614086657763, - "learning_rate": 0.00019999998813574087, - "loss": 46.0, - "step": 2038 - }, - { - "epoch": 0.15589578913163982, - "grad_norm": 0.0006860756548121572, - "learning_rate": 0.00019999998812403753, - "loss": 46.0, - "step": 2039 - }, - { - "epoch": 0.15597224611502952, - "grad_norm": 0.0010463307844474912, - "learning_rate": 0.00019999998811232842, - "loss": 46.0, - "step": 2040 - }, - { - "epoch": 0.15604870309841926, - "grad_norm": 0.003599617863073945, - "learning_rate": 0.00019999998810061353, - "loss": 46.0, - "step": 2041 - }, - { - "epoch": 0.15612516008180896, - "grad_norm": 0.0015982154291123152, - "learning_rate": 0.0001999999880888929, - "loss": 46.0, - "step": 2042 - }, - { - "epoch": 0.1562016170651987, - "grad_norm": 0.0025439190212637186, - "learning_rate": 0.0001999999880771665, - "loss": 46.0, - "step": 2043 - }, - { - "epoch": 0.1562780740485884, - "grad_norm": 0.0015923408791422844, - "learning_rate": 0.0001999999880654343, - "loss": 46.0, - "step": 2044 - }, - { - "epoch": 0.15635453103197813, - "grad_norm": 0.0007652989006601274, - "learning_rate": 0.00019999998805369634, - "loss": 46.0, - "step": 2045 - }, - { - "epoch": 0.15643098801536787, - "grad_norm": 0.001005074242129922, - "learning_rate": 0.0001999999880419526, - "loss": 46.0, - "step": 2046 - }, - { - "epoch": 0.15650744499875757, - "grad_norm": 0.011235262267291546, - "learning_rate": 0.0001999999880302031, - "loss": 46.0, - "step": 2047 - }, - { - "epoch": 0.1565839019821473, - "grad_norm": 0.0014421091182157397, - "learning_rate": 0.00019999998801844787, - "loss": 46.0, - "step": 2048 - }, - { - "epoch": 0.156660358965537, - "grad_norm": 0.0009523187763988972, - "learning_rate": 0.00019999998800668682, - "loss": 46.0, - "step": 2049 - }, - { - "epoch": 0.15673681594892674, - "grad_norm": 0.0015538878506049514, - "learning_rate": 0.00019999998799492, - "loss": 46.0, - "step": 2050 - }, - { - "epoch": 0.15681327293231645, - "grad_norm": 0.0025210848543792963, - "learning_rate": 0.00019999998798314746, - "loss": 46.0, - "step": 2051 - }, - { - "epoch": 0.15688972991570618, - "grad_norm": 0.0003893290995620191, - "learning_rate": 0.00019999998797136912, - "loss": 46.0, - "step": 2052 - }, - { - "epoch": 0.1569661868990959, - "grad_norm": 0.0023423300590366125, - "learning_rate": 0.000199999987959585, - "loss": 46.0, - "step": 2053 - }, - { - "epoch": 0.15704264388248562, - "grad_norm": 0.00150935014244169, - "learning_rate": 0.00019999998794779513, - "loss": 46.0, - "step": 2054 - }, - { - "epoch": 0.15711910086587533, - "grad_norm": 0.0014054529601708055, - "learning_rate": 0.00019999998793599947, - "loss": 46.0, - "step": 2055 - }, - { - "epoch": 0.15719555784926506, - "grad_norm": 0.001149459625594318, - "learning_rate": 0.00019999998792419806, - "loss": 46.0, - "step": 2056 - }, - { - "epoch": 0.15727201483265477, - "grad_norm": 0.005198684521019459, - "learning_rate": 0.00019999998791239088, - "loss": 46.0, - "step": 2057 - }, - { - "epoch": 0.1573484718160445, - "grad_norm": 0.004143446683883667, - "learning_rate": 0.00019999998790057793, - "loss": 46.0, - "step": 2058 - }, - { - "epoch": 0.1574249287994342, - "grad_norm": 0.0008370177820324898, - "learning_rate": 0.0001999999878887592, - "loss": 46.0, - "step": 2059 - }, - { - "epoch": 0.15750138578282394, - "grad_norm": 0.0007398803136311471, - "learning_rate": 0.0001999999878769347, - "loss": 46.0, - "step": 2060 - }, - { - "epoch": 0.15757784276621367, - "grad_norm": 0.001524512772448361, - "learning_rate": 0.00019999998786510444, - "loss": 46.0, - "step": 2061 - }, - { - "epoch": 0.15765429974960338, - "grad_norm": 0.002082538092508912, - "learning_rate": 0.0001999999878532684, - "loss": 46.0, - "step": 2062 - }, - { - "epoch": 0.1577307567329931, - "grad_norm": 0.0008017992950044572, - "learning_rate": 0.0001999999878414266, - "loss": 46.0, - "step": 2063 - }, - { - "epoch": 0.15780721371638282, - "grad_norm": 0.000616123725194484, - "learning_rate": 0.00019999998782957903, - "loss": 46.0, - "step": 2064 - }, - { - "epoch": 0.15788367069977255, - "grad_norm": 0.0007139994413591921, - "learning_rate": 0.00019999998781772567, - "loss": 46.0, - "step": 2065 - }, - { - "epoch": 0.15796012768316225, - "grad_norm": 0.001166562782600522, - "learning_rate": 0.00019999998780586658, - "loss": 46.0, - "step": 2066 - }, - { - "epoch": 0.158036584666552, - "grad_norm": 0.0005792691372334957, - "learning_rate": 0.00019999998779400167, - "loss": 46.0, - "step": 2067 - }, - { - "epoch": 0.1581130416499417, - "grad_norm": 0.0011840608203783631, - "learning_rate": 0.000199999987782131, - "loss": 46.0, - "step": 2068 - }, - { - "epoch": 0.15818949863333143, - "grad_norm": 0.0016025849618017673, - "learning_rate": 0.0001999999877702546, - "loss": 46.0, - "step": 2069 - }, - { - "epoch": 0.15826595561672113, - "grad_norm": 0.0004976553027518094, - "learning_rate": 0.00019999998775837243, - "loss": 46.0, - "step": 2070 - }, - { - "epoch": 0.15834241260011087, - "grad_norm": 0.001387871685437858, - "learning_rate": 0.00019999998774648445, - "loss": 46.0, - "step": 2071 - }, - { - "epoch": 0.15841886958350057, - "grad_norm": 0.001114772749133408, - "learning_rate": 0.00019999998773459073, - "loss": 46.0, - "step": 2072 - }, - { - "epoch": 0.1584953265668903, - "grad_norm": 0.0009465409675613046, - "learning_rate": 0.00019999998772269123, - "loss": 46.0, - "step": 2073 - }, - { - "epoch": 0.15857178355028, - "grad_norm": 0.002183204982429743, - "learning_rate": 0.00019999998771078596, - "loss": 46.0, - "step": 2074 - }, - { - "epoch": 0.15864824053366974, - "grad_norm": 0.0007752188830636442, - "learning_rate": 0.0001999999876988749, - "loss": 46.0, - "step": 2075 - }, - { - "epoch": 0.15872469751705948, - "grad_norm": 0.0007461218046955764, - "learning_rate": 0.00019999998768695812, - "loss": 46.0, - "step": 2076 - }, - { - "epoch": 0.15880115450044918, - "grad_norm": 0.0009555542492307723, - "learning_rate": 0.00019999998767503553, - "loss": 46.0, - "step": 2077 - }, - { - "epoch": 0.15887761148383892, - "grad_norm": 0.0009751114994287491, - "learning_rate": 0.0001999999876631072, - "loss": 46.0, - "step": 2078 - }, - { - "epoch": 0.15895406846722862, - "grad_norm": 0.0006597651517949998, - "learning_rate": 0.00019999998765117308, - "loss": 46.0, - "step": 2079 - }, - { - "epoch": 0.15903052545061835, - "grad_norm": 0.0008710972033441067, - "learning_rate": 0.0001999999876392332, - "loss": 46.0, - "step": 2080 - }, - { - "epoch": 0.15910698243400806, - "grad_norm": 0.0006096456781961024, - "learning_rate": 0.00019999998762728754, - "loss": 46.0, - "step": 2081 - }, - { - "epoch": 0.1591834394173978, - "grad_norm": 0.004188179969787598, - "learning_rate": 0.0001999999876153361, - "loss": 46.0, - "step": 2082 - }, - { - "epoch": 0.1592598964007875, - "grad_norm": 0.0004476464237086475, - "learning_rate": 0.00019999998760337893, - "loss": 46.0, - "step": 2083 - }, - { - "epoch": 0.15933635338417723, - "grad_norm": 0.0009876301046460867, - "learning_rate": 0.00019999998759141595, - "loss": 46.0, - "step": 2084 - }, - { - "epoch": 0.15941281036756694, - "grad_norm": 0.0017412264132872224, - "learning_rate": 0.00019999998757944722, - "loss": 46.0, - "step": 2085 - }, - { - "epoch": 0.15948926735095667, - "grad_norm": 0.0010906915413215756, - "learning_rate": 0.00019999998756747273, - "loss": 46.0, - "step": 2086 - }, - { - "epoch": 0.15956572433434638, - "grad_norm": 0.0009544620988890529, - "learning_rate": 0.00019999998755549245, - "loss": 46.0, - "step": 2087 - }, - { - "epoch": 0.1596421813177361, - "grad_norm": 0.0013774504186585546, - "learning_rate": 0.0001999999875435064, - "loss": 46.0, - "step": 2088 - }, - { - "epoch": 0.15971863830112584, - "grad_norm": 0.00046507621300406754, - "learning_rate": 0.00019999998753151462, - "loss": 46.0, - "step": 2089 - }, - { - "epoch": 0.15979509528451555, - "grad_norm": 0.0033230867702513933, - "learning_rate": 0.000199999987519517, - "loss": 46.0, - "step": 2090 - }, - { - "epoch": 0.15987155226790528, - "grad_norm": 0.0015882361913099885, - "learning_rate": 0.00019999998750751366, - "loss": 46.0, - "step": 2091 - }, - { - "epoch": 0.159948009251295, - "grad_norm": 0.0010441900230944157, - "learning_rate": 0.00019999998749550458, - "loss": 46.0, - "step": 2092 - }, - { - "epoch": 0.16002446623468472, - "grad_norm": 0.0013350731460377574, - "learning_rate": 0.00019999998748348967, - "loss": 46.0, - "step": 2093 - }, - { - "epoch": 0.16010092321807443, - "grad_norm": 0.0006891741650179029, - "learning_rate": 0.000199999987471469, - "loss": 46.0, - "step": 2094 - }, - { - "epoch": 0.16017738020146416, - "grad_norm": 0.0014986591413617134, - "learning_rate": 0.00019999998745944258, - "loss": 46.0, - "step": 2095 - }, - { - "epoch": 0.16025383718485386, - "grad_norm": 0.0008050437900237739, - "learning_rate": 0.00019999998744741037, - "loss": 46.0, - "step": 2096 - }, - { - "epoch": 0.1603302941682436, - "grad_norm": 0.0007129543810151517, - "learning_rate": 0.00019999998743537242, - "loss": 46.0, - "step": 2097 - }, - { - "epoch": 0.1604067511516333, - "grad_norm": 0.0016608787700533867, - "learning_rate": 0.00019999998742332867, - "loss": 46.0, - "step": 2098 - }, - { - "epoch": 0.16048320813502304, - "grad_norm": 0.0016671482007950544, - "learning_rate": 0.0001999999874112792, - "loss": 46.0, - "step": 2099 - }, - { - "epoch": 0.16055966511841274, - "grad_norm": 0.0006881877197884023, - "learning_rate": 0.0001999999873992239, - "loss": 46.0, - "step": 2100 - }, - { - "epoch": 0.16063612210180248, - "grad_norm": 0.0008800942450761795, - "learning_rate": 0.00019999998738716288, - "loss": 46.0, - "step": 2101 - }, - { - "epoch": 0.16071257908519218, - "grad_norm": 0.0007842633640393615, - "learning_rate": 0.00019999998737509604, - "loss": 46.0, - "step": 2102 - }, - { - "epoch": 0.16078903606858191, - "grad_norm": 0.0011482764966785908, - "learning_rate": 0.00019999998736302347, - "loss": 46.0, - "step": 2103 - }, - { - "epoch": 0.16086549305197165, - "grad_norm": 0.0014609784120693803, - "learning_rate": 0.0001999999873509451, - "loss": 46.0, - "step": 2104 - }, - { - "epoch": 0.16094195003536135, - "grad_norm": 0.002479089191183448, - "learning_rate": 0.00019999998733886097, - "loss": 46.0, - "step": 2105 - }, - { - "epoch": 0.1610184070187511, - "grad_norm": 0.0009622796787880361, - "learning_rate": 0.0001999999873267711, - "loss": 46.0, - "step": 2106 - }, - { - "epoch": 0.1610948640021408, - "grad_norm": 0.000658390112221241, - "learning_rate": 0.00019999998731467543, - "loss": 46.0, - "step": 2107 - }, - { - "epoch": 0.16117132098553053, - "grad_norm": 0.0032200063578784466, - "learning_rate": 0.000199999987302574, - "loss": 46.0, - "step": 2108 - }, - { - "epoch": 0.16124777796892023, - "grad_norm": 0.001384242088533938, - "learning_rate": 0.0001999999872904668, - "loss": 46.0, - "step": 2109 - }, - { - "epoch": 0.16132423495230996, - "grad_norm": 0.0031077503226697445, - "learning_rate": 0.00019999998727835384, - "loss": 46.0, - "step": 2110 - }, - { - "epoch": 0.16140069193569967, - "grad_norm": 0.0007535358308814466, - "learning_rate": 0.00019999998726623512, - "loss": 46.0, - "step": 2111 - }, - { - "epoch": 0.1614771489190894, - "grad_norm": 0.0013495383318513632, - "learning_rate": 0.0001999999872541106, - "loss": 46.0, - "step": 2112 - }, - { - "epoch": 0.1615536059024791, - "grad_norm": 0.0016731504583731294, - "learning_rate": 0.00019999998724198033, - "loss": 46.0, - "step": 2113 - }, - { - "epoch": 0.16163006288586884, - "grad_norm": 0.0009444876341149211, - "learning_rate": 0.00019999998722984428, - "loss": 46.0, - "step": 2114 - }, - { - "epoch": 0.16170651986925855, - "grad_norm": 0.005001413635909557, - "learning_rate": 0.0001999999872177025, - "loss": 46.0, - "step": 2115 - }, - { - "epoch": 0.16178297685264828, - "grad_norm": 0.0032453536987304688, - "learning_rate": 0.00019999998720555487, - "loss": 46.0, - "step": 2116 - }, - { - "epoch": 0.16185943383603799, - "grad_norm": 0.0007565810228697956, - "learning_rate": 0.0001999999871934015, - "loss": 46.0, - "step": 2117 - }, - { - "epoch": 0.16193589081942772, - "grad_norm": 0.0006535269203595817, - "learning_rate": 0.0001999999871812424, - "loss": 46.0, - "step": 2118 - }, - { - "epoch": 0.16201234780281745, - "grad_norm": 0.0010752216912806034, - "learning_rate": 0.00019999998716907751, - "loss": 46.0, - "step": 2119 - }, - { - "epoch": 0.16208880478620716, - "grad_norm": 0.001103323302231729, - "learning_rate": 0.00019999998715690683, - "loss": 46.0, - "step": 2120 - }, - { - "epoch": 0.1621652617695969, - "grad_norm": 0.0011689539533108473, - "learning_rate": 0.0001999999871447304, - "loss": 46.0, - "step": 2121 - }, - { - "epoch": 0.1622417187529866, - "grad_norm": 0.00039576523704454303, - "learning_rate": 0.0001999999871325482, - "loss": 46.0, - "step": 2122 - }, - { - "epoch": 0.16231817573637633, - "grad_norm": 0.004719104617834091, - "learning_rate": 0.00019999998712036024, - "loss": 46.0, - "step": 2123 - }, - { - "epoch": 0.16239463271976604, - "grad_norm": 0.003539037425071001, - "learning_rate": 0.0001999999871081665, - "loss": 46.0, - "step": 2124 - }, - { - "epoch": 0.16247108970315577, - "grad_norm": 0.0017531836638227105, - "learning_rate": 0.000199999987095967, - "loss": 46.0, - "step": 2125 - }, - { - "epoch": 0.16254754668654547, - "grad_norm": 0.0008034551865421236, - "learning_rate": 0.0001999999870837617, - "loss": 46.0, - "step": 2126 - }, - { - "epoch": 0.1626240036699352, - "grad_norm": 0.0015628214459866285, - "learning_rate": 0.00019999998707155066, - "loss": 46.0, - "step": 2127 - }, - { - "epoch": 0.1627004606533249, - "grad_norm": 0.001498895580880344, - "learning_rate": 0.00019999998705933384, - "loss": 46.0, - "step": 2128 - }, - { - "epoch": 0.16277691763671465, - "grad_norm": 0.0008606580668129027, - "learning_rate": 0.00019999998704711125, - "loss": 46.0, - "step": 2129 - }, - { - "epoch": 0.16285337462010435, - "grad_norm": 0.003488902933895588, - "learning_rate": 0.00019999998703488288, - "loss": 46.0, - "step": 2130 - }, - { - "epoch": 0.16292983160349409, - "grad_norm": 0.0008501252741552889, - "learning_rate": 0.00019999998702264877, - "loss": 46.0, - "step": 2131 - }, - { - "epoch": 0.1630062885868838, - "grad_norm": 0.0006707796710543334, - "learning_rate": 0.00019999998701040886, - "loss": 46.0, - "step": 2132 - }, - { - "epoch": 0.16308274557027352, - "grad_norm": 0.0011532234493643045, - "learning_rate": 0.0001999999869981632, - "loss": 46.0, - "step": 2133 - }, - { - "epoch": 0.16315920255366326, - "grad_norm": 0.001927627483382821, - "learning_rate": 0.00019999998698591175, - "loss": 46.0, - "step": 2134 - }, - { - "epoch": 0.16323565953705296, - "grad_norm": 0.004618369974195957, - "learning_rate": 0.00019999998697365457, - "loss": 46.0, - "step": 2135 - }, - { - "epoch": 0.1633121165204427, - "grad_norm": 0.001824512262828648, - "learning_rate": 0.0001999999869613916, - "loss": 46.0, - "step": 2136 - }, - { - "epoch": 0.1633885735038324, - "grad_norm": 0.002617462072521448, - "learning_rate": 0.00019999998694912282, - "loss": 46.0, - "step": 2137 - }, - { - "epoch": 0.16346503048722213, - "grad_norm": 0.012715129181742668, - "learning_rate": 0.00019999998693684835, - "loss": 46.0, - "step": 2138 - }, - { - "epoch": 0.16354148747061184, - "grad_norm": 0.0006668414571322501, - "learning_rate": 0.00019999998692456805, - "loss": 46.0, - "step": 2139 - }, - { - "epoch": 0.16361794445400157, - "grad_norm": 0.0008075271034613252, - "learning_rate": 0.00019999998691228198, - "loss": 46.0, - "step": 2140 - }, - { - "epoch": 0.16369440143739128, - "grad_norm": 0.0009113276028074324, - "learning_rate": 0.0001999999868999902, - "loss": 46.0, - "step": 2141 - }, - { - "epoch": 0.163770858420781, - "grad_norm": 0.005092452745884657, - "learning_rate": 0.0001999999868876926, - "loss": 46.0, - "step": 2142 - }, - { - "epoch": 0.16384731540417072, - "grad_norm": 0.0009798347018659115, - "learning_rate": 0.00019999998687538924, - "loss": 46.0, - "step": 2143 - }, - { - "epoch": 0.16392377238756045, - "grad_norm": 0.0004848692915402353, - "learning_rate": 0.0001999999868630801, - "loss": 46.0, - "step": 2144 - }, - { - "epoch": 0.16400022937095016, - "grad_norm": 0.0009060886804945767, - "learning_rate": 0.0001999999868507652, - "loss": 46.0, - "step": 2145 - }, - { - "epoch": 0.1640766863543399, - "grad_norm": 0.004894052632153034, - "learning_rate": 0.00019999998683844453, - "loss": 46.0, - "step": 2146 - }, - { - "epoch": 0.16415314333772962, - "grad_norm": 0.0008194067631848156, - "learning_rate": 0.0001999999868261181, - "loss": 46.0, - "step": 2147 - }, - { - "epoch": 0.16422960032111933, - "grad_norm": 0.0006418468547053635, - "learning_rate": 0.0001999999868137859, - "loss": 46.0, - "step": 2148 - }, - { - "epoch": 0.16430605730450906, - "grad_norm": 0.0005521623534150422, - "learning_rate": 0.0001999999868014479, - "loss": 46.0, - "step": 2149 - }, - { - "epoch": 0.16438251428789877, - "grad_norm": 0.004384755622595549, - "learning_rate": 0.00019999998678910417, - "loss": 46.0, - "step": 2150 - }, - { - "epoch": 0.1644589712712885, - "grad_norm": 0.001799800549633801, - "learning_rate": 0.00019999998677675465, - "loss": 46.0, - "step": 2151 - }, - { - "epoch": 0.1645354282546782, - "grad_norm": 0.0008425433188676834, - "learning_rate": 0.00019999998676439935, - "loss": 46.0, - "step": 2152 - }, - { - "epoch": 0.16461188523806794, - "grad_norm": 0.0023057525977492332, - "learning_rate": 0.0001999999867520383, - "loss": 46.0, - "step": 2153 - }, - { - "epoch": 0.16468834222145765, - "grad_norm": 0.0004937535850331187, - "learning_rate": 0.0001999999867396715, - "loss": 46.0, - "step": 2154 - }, - { - "epoch": 0.16476479920484738, - "grad_norm": 0.001624703872948885, - "learning_rate": 0.0001999999867272989, - "loss": 46.0, - "step": 2155 - }, - { - "epoch": 0.16484125618823708, - "grad_norm": 0.0017441855743527412, - "learning_rate": 0.00019999998671492054, - "loss": 46.0, - "step": 2156 - }, - { - "epoch": 0.16491771317162682, - "grad_norm": 0.002246905816718936, - "learning_rate": 0.0001999999867025364, - "loss": 46.0, - "step": 2157 - }, - { - "epoch": 0.16499417015501652, - "grad_norm": 0.0006978364544920623, - "learning_rate": 0.0001999999866901465, - "loss": 46.0, - "step": 2158 - }, - { - "epoch": 0.16507062713840626, - "grad_norm": 0.0007684300653636456, - "learning_rate": 0.0001999999866777508, - "loss": 46.0, - "step": 2159 - }, - { - "epoch": 0.16514708412179596, - "grad_norm": 0.0004500261857174337, - "learning_rate": 0.00019999998666534938, - "loss": 46.0, - "step": 2160 - }, - { - "epoch": 0.1652235411051857, - "grad_norm": 0.0007735901745036244, - "learning_rate": 0.00019999998665294215, - "loss": 46.0, - "step": 2161 - }, - { - "epoch": 0.16529999808857543, - "grad_norm": 0.0009222542867064476, - "learning_rate": 0.0001999999866405292, - "loss": 46.0, - "step": 2162 - }, - { - "epoch": 0.16537645507196513, - "grad_norm": 0.0011216216953471303, - "learning_rate": 0.00019999998662811045, - "loss": 46.0, - "step": 2163 - }, - { - "epoch": 0.16545291205535487, - "grad_norm": 0.009975769557058811, - "learning_rate": 0.0001999999866156859, - "loss": 46.0, - "step": 2164 - }, - { - "epoch": 0.16552936903874457, - "grad_norm": 0.0022531873546540737, - "learning_rate": 0.00019999998660325563, - "loss": 46.0, - "step": 2165 - }, - { - "epoch": 0.1656058260221343, - "grad_norm": 0.0005565266474150121, - "learning_rate": 0.0001999999865908196, - "loss": 46.0, - "step": 2166 - }, - { - "epoch": 0.165682283005524, - "grad_norm": 0.0012787970481440425, - "learning_rate": 0.00019999998657837774, - "loss": 46.0, - "step": 2167 - }, - { - "epoch": 0.16575873998891374, - "grad_norm": 0.0013191215693950653, - "learning_rate": 0.00019999998656593015, - "loss": 46.0, - "step": 2168 - }, - { - "epoch": 0.16583519697230345, - "grad_norm": 0.0009020023280754685, - "learning_rate": 0.0001999999865534768, - "loss": 46.0, - "step": 2169 - }, - { - "epoch": 0.16591165395569318, - "grad_norm": 0.0016326627228409052, - "learning_rate": 0.00019999998654101763, - "loss": 46.0, - "step": 2170 - }, - { - "epoch": 0.1659881109390829, - "grad_norm": 0.0009855673415586352, - "learning_rate": 0.00019999998652855272, - "loss": 46.0, - "step": 2171 - }, - { - "epoch": 0.16606456792247262, - "grad_norm": 0.0007167229196056724, - "learning_rate": 0.00019999998651608206, - "loss": 46.0, - "step": 2172 - }, - { - "epoch": 0.16614102490586233, - "grad_norm": 0.0018436299869790673, - "learning_rate": 0.0001999999865036056, - "loss": 46.0, - "step": 2173 - }, - { - "epoch": 0.16621748188925206, - "grad_norm": 0.0032357568852603436, - "learning_rate": 0.0001999999864911234, - "loss": 46.0, - "step": 2174 - }, - { - "epoch": 0.16629393887264177, - "grad_norm": 0.003142263740301132, - "learning_rate": 0.0001999999864786354, - "loss": 46.0, - "step": 2175 - }, - { - "epoch": 0.1663703958560315, - "grad_norm": 0.001277631032280624, - "learning_rate": 0.00019999998646614168, - "loss": 46.0, - "step": 2176 - }, - { - "epoch": 0.16644685283942123, - "grad_norm": 0.003091112244874239, - "learning_rate": 0.00019999998645364215, - "loss": 46.0, - "step": 2177 - }, - { - "epoch": 0.16652330982281094, - "grad_norm": 0.0004034547018818557, - "learning_rate": 0.00019999998644113686, - "loss": 46.0, - "step": 2178 - }, - { - "epoch": 0.16659976680620067, - "grad_norm": 0.0009671281441114843, - "learning_rate": 0.00019999998642862581, - "loss": 46.0, - "step": 2179 - }, - { - "epoch": 0.16667622378959038, - "grad_norm": 0.0009685232653282583, - "learning_rate": 0.00019999998641610897, - "loss": 46.0, - "step": 2180 - }, - { - "epoch": 0.1667526807729801, - "grad_norm": 0.0009498662548139691, - "learning_rate": 0.00019999998640358638, - "loss": 46.0, - "step": 2181 - }, - { - "epoch": 0.16682913775636982, - "grad_norm": 0.0013346386840566993, - "learning_rate": 0.000199999986391058, - "loss": 46.0, - "step": 2182 - }, - { - "epoch": 0.16690559473975955, - "grad_norm": 0.0021585833746939898, - "learning_rate": 0.00019999998637852383, - "loss": 46.0, - "step": 2183 - }, - { - "epoch": 0.16698205172314926, - "grad_norm": 0.0008964298758655787, - "learning_rate": 0.00019999998636598395, - "loss": 46.0, - "step": 2184 - }, - { - "epoch": 0.167058508706539, - "grad_norm": 0.001154283294454217, - "learning_rate": 0.00019999998635343826, - "loss": 46.0, - "step": 2185 - }, - { - "epoch": 0.1671349656899287, - "grad_norm": 0.001135069178417325, - "learning_rate": 0.00019999998634088684, - "loss": 46.0, - "step": 2186 - }, - { - "epoch": 0.16721142267331843, - "grad_norm": 0.002007537754252553, - "learning_rate": 0.00019999998632832958, - "loss": 46.0, - "step": 2187 - }, - { - "epoch": 0.16728787965670813, - "grad_norm": 0.0010732619557529688, - "learning_rate": 0.00019999998631576663, - "loss": 46.0, - "step": 2188 - }, - { - "epoch": 0.16736433664009787, - "grad_norm": 0.002044120104983449, - "learning_rate": 0.00019999998630319788, - "loss": 46.0, - "step": 2189 - }, - { - "epoch": 0.16744079362348757, - "grad_norm": 0.0047595989890396595, - "learning_rate": 0.00019999998629062336, - "loss": 46.0, - "step": 2190 - }, - { - "epoch": 0.1675172506068773, - "grad_norm": 0.0016334770480170846, - "learning_rate": 0.00019999998627804306, - "loss": 46.0, - "step": 2191 - }, - { - "epoch": 0.16759370759026704, - "grad_norm": 0.0022198061924427748, - "learning_rate": 0.000199999986265457, - "loss": 46.0, - "step": 2192 - }, - { - "epoch": 0.16767016457365674, - "grad_norm": 0.001590372179634869, - "learning_rate": 0.00019999998625286515, - "loss": 46.0, - "step": 2193 - }, - { - "epoch": 0.16774662155704648, - "grad_norm": 0.002364665037021041, - "learning_rate": 0.00019999998624026753, - "loss": 46.0, - "step": 2194 - }, - { - "epoch": 0.16782307854043618, - "grad_norm": 0.0008185718324966729, - "learning_rate": 0.00019999998622766417, - "loss": 46.0, - "step": 2195 - }, - { - "epoch": 0.16789953552382592, - "grad_norm": 0.0012419706908985972, - "learning_rate": 0.00019999998621505504, - "loss": 46.0, - "step": 2196 - }, - { - "epoch": 0.16797599250721562, - "grad_norm": 0.0007304903701879084, - "learning_rate": 0.00019999998620244013, - "loss": 46.0, - "step": 2197 - }, - { - "epoch": 0.16805244949060535, - "grad_norm": 0.0011022234102711082, - "learning_rate": 0.00019999998618981944, - "loss": 46.0, - "step": 2198 - }, - { - "epoch": 0.16812890647399506, - "grad_norm": 0.0006423944141715765, - "learning_rate": 0.000199999986177193, - "loss": 46.0, - "step": 2199 - }, - { - "epoch": 0.1682053634573848, - "grad_norm": 0.0011182207381352782, - "learning_rate": 0.00019999998616456076, - "loss": 46.0, - "step": 2200 - }, - { - "epoch": 0.1682818204407745, - "grad_norm": 0.0011730561964213848, - "learning_rate": 0.00019999998615192278, - "loss": 46.0, - "step": 2201 - }, - { - "epoch": 0.16835827742416423, - "grad_norm": 0.0008415406919084489, - "learning_rate": 0.00019999998613927903, - "loss": 46.0, - "step": 2202 - }, - { - "epoch": 0.16843473440755394, - "grad_norm": 0.0019437976879999042, - "learning_rate": 0.0001999999861266295, - "loss": 46.0, - "step": 2203 - }, - { - "epoch": 0.16851119139094367, - "grad_norm": 0.0013399278977885842, - "learning_rate": 0.00019999998611397422, - "loss": 46.0, - "step": 2204 - }, - { - "epoch": 0.1685876483743334, - "grad_norm": 0.0005270153633318841, - "learning_rate": 0.00019999998610131315, - "loss": 46.0, - "step": 2205 - }, - { - "epoch": 0.1686641053577231, - "grad_norm": 0.0006866326439194381, - "learning_rate": 0.0001999999860886463, - "loss": 46.0, - "step": 2206 - }, - { - "epoch": 0.16874056234111284, - "grad_norm": 0.0012157591991126537, - "learning_rate": 0.0001999999860759737, - "loss": 46.0, - "step": 2207 - }, - { - "epoch": 0.16881701932450255, - "grad_norm": 0.0005165923503227532, - "learning_rate": 0.00019999998606329533, - "loss": 46.0, - "step": 2208 - }, - { - "epoch": 0.16889347630789228, - "grad_norm": 0.008756878785789013, - "learning_rate": 0.00019999998605061117, - "loss": 46.0, - "step": 2209 - }, - { - "epoch": 0.168969933291282, - "grad_norm": 0.0021029466297477484, - "learning_rate": 0.00019999998603792128, - "loss": 46.0, - "step": 2210 - }, - { - "epoch": 0.16904639027467172, - "grad_norm": 0.0008491664775647223, - "learning_rate": 0.00019999998602522557, - "loss": 46.0, - "step": 2211 - }, - { - "epoch": 0.16912284725806143, - "grad_norm": 0.0011756946332752705, - "learning_rate": 0.00019999998601252412, - "loss": 46.0, - "step": 2212 - }, - { - "epoch": 0.16919930424145116, - "grad_norm": 0.0006581502384506166, - "learning_rate": 0.00019999998599981692, - "loss": 46.0, - "step": 2213 - }, - { - "epoch": 0.16927576122484086, - "grad_norm": 0.0003999524633400142, - "learning_rate": 0.00019999998598710391, - "loss": 46.0, - "step": 2214 - }, - { - "epoch": 0.1693522182082306, - "grad_norm": 0.0012821991695091128, - "learning_rate": 0.00019999998597438517, - "loss": 46.0, - "step": 2215 - }, - { - "epoch": 0.1694286751916203, - "grad_norm": 0.0007446478120982647, - "learning_rate": 0.00019999998596166062, - "loss": 46.0, - "step": 2216 - }, - { - "epoch": 0.16950513217501004, - "grad_norm": 0.0012399741681292653, - "learning_rate": 0.00019999998594893032, - "loss": 46.0, - "step": 2217 - }, - { - "epoch": 0.16958158915839974, - "grad_norm": 0.00047427957179024816, - "learning_rate": 0.00019999998593619425, - "loss": 46.0, - "step": 2218 - }, - { - "epoch": 0.16965804614178948, - "grad_norm": 0.0009618039475753903, - "learning_rate": 0.00019999998592345238, - "loss": 46.0, - "step": 2219 - }, - { - "epoch": 0.1697345031251792, - "grad_norm": 0.0028572191949933767, - "learning_rate": 0.00019999998591070482, - "loss": 46.0, - "step": 2220 - }, - { - "epoch": 0.16981096010856891, - "grad_norm": 0.0007253816584125161, - "learning_rate": 0.0001999999858979514, - "loss": 46.0, - "step": 2221 - }, - { - "epoch": 0.16988741709195865, - "grad_norm": 0.0007867772947065532, - "learning_rate": 0.0001999999858851923, - "loss": 46.0, - "step": 2222 - }, - { - "epoch": 0.16996387407534835, - "grad_norm": 0.0018229043344035745, - "learning_rate": 0.00019999998587242736, - "loss": 46.0, - "step": 2223 - }, - { - "epoch": 0.1700403310587381, - "grad_norm": 0.0021692633163183928, - "learning_rate": 0.00019999998585965668, - "loss": 46.0, - "step": 2224 - }, - { - "epoch": 0.1701167880421278, - "grad_norm": 0.001353766187094152, - "learning_rate": 0.00019999998584688023, - "loss": 46.0, - "step": 2225 - }, - { - "epoch": 0.17019324502551753, - "grad_norm": 0.00323905935510993, - "learning_rate": 0.000199999985834098, - "loss": 46.0, - "step": 2226 - }, - { - "epoch": 0.17026970200890723, - "grad_norm": 0.0014555241214111447, - "learning_rate": 0.00019999998582131002, - "loss": 46.0, - "step": 2227 - }, - { - "epoch": 0.17034615899229696, - "grad_norm": 0.0011925987200811505, - "learning_rate": 0.00019999998580851625, - "loss": 46.0, - "step": 2228 - }, - { - "epoch": 0.17042261597568667, - "grad_norm": 0.00040810563950799406, - "learning_rate": 0.0001999999857957167, - "loss": 46.0, - "step": 2229 - }, - { - "epoch": 0.1704990729590764, - "grad_norm": 0.0016967917326837778, - "learning_rate": 0.0001999999857829114, - "loss": 46.0, - "step": 2230 - }, - { - "epoch": 0.1705755299424661, - "grad_norm": 0.0008670039242133498, - "learning_rate": 0.00019999998577010033, - "loss": 46.0, - "step": 2231 - }, - { - "epoch": 0.17065198692585584, - "grad_norm": 0.0011233967961743474, - "learning_rate": 0.0001999999857572835, - "loss": 46.0, - "step": 2232 - }, - { - "epoch": 0.17072844390924555, - "grad_norm": 0.0011340989731252193, - "learning_rate": 0.00019999998574446085, - "loss": 46.0, - "step": 2233 - }, - { - "epoch": 0.17080490089263528, - "grad_norm": 0.0021429695188999176, - "learning_rate": 0.0001999999857316325, - "loss": 46.0, - "step": 2234 - }, - { - "epoch": 0.17088135787602501, - "grad_norm": 0.005632530432194471, - "learning_rate": 0.00019999998571879835, - "loss": 46.0, - "step": 2235 - }, - { - "epoch": 0.17095781485941472, - "grad_norm": 0.0027736122719943523, - "learning_rate": 0.00019999998570595842, - "loss": 46.0, - "step": 2236 - }, - { - "epoch": 0.17103427184280445, - "grad_norm": 0.0009647748083807528, - "learning_rate": 0.00019999998569311274, - "loss": 46.0, - "step": 2237 - }, - { - "epoch": 0.17111072882619416, - "grad_norm": 0.0012944993795827031, - "learning_rate": 0.00019999998568026128, - "loss": 46.0, - "step": 2238 - }, - { - "epoch": 0.1711871858095839, - "grad_norm": 0.0036405797582119703, - "learning_rate": 0.00019999998566740405, - "loss": 46.0, - "step": 2239 - }, - { - "epoch": 0.1712636427929736, - "grad_norm": 0.0016364466864615679, - "learning_rate": 0.00019999998565454105, - "loss": 46.0, - "step": 2240 - }, - { - "epoch": 0.17134009977636333, - "grad_norm": 0.001100799418054521, - "learning_rate": 0.00019999998564167228, - "loss": 46.0, - "step": 2241 - }, - { - "epoch": 0.17141655675975304, - "grad_norm": 0.000934005540329963, - "learning_rate": 0.00019999998562879773, - "loss": 46.0, - "step": 2242 - }, - { - "epoch": 0.17149301374314277, - "grad_norm": 0.0016864098142832518, - "learning_rate": 0.00019999998561591743, - "loss": 46.0, - "step": 2243 - }, - { - "epoch": 0.17156947072653247, - "grad_norm": 0.0006700264639221132, - "learning_rate": 0.00019999998560303137, - "loss": 46.0, - "step": 2244 - }, - { - "epoch": 0.1716459277099222, - "grad_norm": 0.0008497000089846551, - "learning_rate": 0.0001999999855901395, - "loss": 46.0, - "step": 2245 - }, - { - "epoch": 0.1717223846933119, - "grad_norm": 0.001969871111214161, - "learning_rate": 0.0001999999855772419, - "loss": 46.0, - "step": 2246 - }, - { - "epoch": 0.17179884167670165, - "grad_norm": 0.0012944393092766404, - "learning_rate": 0.00019999998556433852, - "loss": 46.0, - "step": 2247 - }, - { - "epoch": 0.17187529866009135, - "grad_norm": 0.0018585756188258529, - "learning_rate": 0.00019999998555142936, - "loss": 46.0, - "step": 2248 - }, - { - "epoch": 0.17195175564348109, - "grad_norm": 0.0005201755557209253, - "learning_rate": 0.00019999998553851445, - "loss": 46.0, - "step": 2249 - }, - { - "epoch": 0.17202821262687082, - "grad_norm": 0.0038005460519343615, - "learning_rate": 0.00019999998552559377, - "loss": 46.0, - "step": 2250 - }, - { - "epoch": 0.17210466961026052, - "grad_norm": 0.00512295588850975, - "learning_rate": 0.0001999999855126673, - "loss": 46.0, - "step": 2251 - }, - { - "epoch": 0.17218112659365026, - "grad_norm": 0.0014348982367664576, - "learning_rate": 0.00019999998549973506, - "loss": 46.0, - "step": 2252 - }, - { - "epoch": 0.17225758357703996, - "grad_norm": 0.0007697799592278898, - "learning_rate": 0.00019999998548679706, - "loss": 46.0, - "step": 2253 - }, - { - "epoch": 0.1723340405604297, - "grad_norm": 0.0014597177505493164, - "learning_rate": 0.00019999998547385329, - "loss": 46.0, - "step": 2254 - }, - { - "epoch": 0.1724104975438194, - "grad_norm": 0.0006445308681577444, - "learning_rate": 0.00019999998546090374, - "loss": 46.0, - "step": 2255 - }, - { - "epoch": 0.17248695452720914, - "grad_norm": 0.0006086422363296151, - "learning_rate": 0.00019999998544794844, - "loss": 46.0, - "step": 2256 - }, - { - "epoch": 0.17256341151059884, - "grad_norm": 0.0010298449778929353, - "learning_rate": 0.00019999998543498738, - "loss": 46.0, - "step": 2257 - }, - { - "epoch": 0.17263986849398857, - "grad_norm": 0.0030916971154510975, - "learning_rate": 0.0001999999854220205, - "loss": 46.0, - "step": 2258 - }, - { - "epoch": 0.17271632547737828, - "grad_norm": 0.0019768341444432735, - "learning_rate": 0.0001999999854090479, - "loss": 46.0, - "step": 2259 - }, - { - "epoch": 0.172792782460768, - "grad_norm": 0.05438971891999245, - "learning_rate": 0.0001999999853960695, - "loss": 46.0, - "step": 2260 - }, - { - "epoch": 0.17286923944415772, - "grad_norm": 0.0028179828077554703, - "learning_rate": 0.00019999998538308534, - "loss": 46.0, - "step": 2261 - }, - { - "epoch": 0.17294569642754745, - "grad_norm": 0.0009732816251926124, - "learning_rate": 0.00019999998537009544, - "loss": 46.0, - "step": 2262 - }, - { - "epoch": 0.17302215341093719, - "grad_norm": 0.0005932300700806081, - "learning_rate": 0.00019999998535709973, - "loss": 46.0, - "step": 2263 - }, - { - "epoch": 0.1730986103943269, - "grad_norm": 0.0023620782885700464, - "learning_rate": 0.00019999998534409827, - "loss": 46.0, - "step": 2264 - }, - { - "epoch": 0.17317506737771662, - "grad_norm": 0.0008549176855012774, - "learning_rate": 0.00019999998533109105, - "loss": 46.0, - "step": 2265 - }, - { - "epoch": 0.17325152436110633, - "grad_norm": 0.0017323002684861422, - "learning_rate": 0.00019999998531807802, - "loss": 46.0, - "step": 2266 - }, - { - "epoch": 0.17332798134449606, - "grad_norm": 0.0014616270782426, - "learning_rate": 0.00019999998530505927, - "loss": 46.0, - "step": 2267 - }, - { - "epoch": 0.17340443832788577, - "grad_norm": 0.002720715943723917, - "learning_rate": 0.00019999998529203472, - "loss": 46.0, - "step": 2268 - }, - { - "epoch": 0.1734808953112755, - "grad_norm": 0.0011732501443475485, - "learning_rate": 0.0001999999852790044, - "loss": 46.0, - "step": 2269 - }, - { - "epoch": 0.1735573522946652, - "grad_norm": 0.001845426158979535, - "learning_rate": 0.00019999998526596833, - "loss": 46.0, - "step": 2270 - }, - { - "epoch": 0.17363380927805494, - "grad_norm": 0.000726019439753145, - "learning_rate": 0.0001999999852529265, - "loss": 46.0, - "step": 2271 - }, - { - "epoch": 0.17371026626144465, - "grad_norm": 0.0007406814838759601, - "learning_rate": 0.00019999998523987885, - "loss": 46.0, - "step": 2272 - }, - { - "epoch": 0.17378672324483438, - "grad_norm": 0.0009963270276784897, - "learning_rate": 0.00019999998522682546, - "loss": 46.0, - "step": 2273 - }, - { - "epoch": 0.17386318022822408, - "grad_norm": 0.0013505546376109123, - "learning_rate": 0.0001999999852137663, - "loss": 46.0, - "step": 2274 - }, - { - "epoch": 0.17393963721161382, - "grad_norm": 0.0020522945560514927, - "learning_rate": 0.00019999998520070137, - "loss": 46.0, - "step": 2275 - }, - { - "epoch": 0.17401609419500352, - "grad_norm": 0.0008776316535659134, - "learning_rate": 0.0001999999851876307, - "loss": 46.0, - "step": 2276 - }, - { - "epoch": 0.17409255117839326, - "grad_norm": 0.002591426018625498, - "learning_rate": 0.0001999999851745542, - "loss": 46.0, - "step": 2277 - }, - { - "epoch": 0.174169008161783, - "grad_norm": 0.001668052631430328, - "learning_rate": 0.00019999998516147198, - "loss": 46.0, - "step": 2278 - }, - { - "epoch": 0.1742454651451727, - "grad_norm": 0.0009902473539113998, - "learning_rate": 0.00019999998514838398, - "loss": 46.0, - "step": 2279 - }, - { - "epoch": 0.17432192212856243, - "grad_norm": 0.0013911043060943484, - "learning_rate": 0.00019999998513529018, - "loss": 46.0, - "step": 2280 - }, - { - "epoch": 0.17439837911195213, - "grad_norm": 0.0019023942295461893, - "learning_rate": 0.00019999998512219066, - "loss": 46.0, - "step": 2281 - }, - { - "epoch": 0.17447483609534187, - "grad_norm": 0.0009533657575957477, - "learning_rate": 0.00019999998510908534, - "loss": 46.0, - "step": 2282 - }, - { - "epoch": 0.17455129307873157, - "grad_norm": 0.000650926842354238, - "learning_rate": 0.00019999998509597427, - "loss": 46.0, - "step": 2283 - }, - { - "epoch": 0.1746277500621213, - "grad_norm": 0.0038995062932372093, - "learning_rate": 0.0001999999850828574, - "loss": 46.0, - "step": 2284 - }, - { - "epoch": 0.174704207045511, - "grad_norm": 0.0018009381601586938, - "learning_rate": 0.00019999998506973476, - "loss": 46.0, - "step": 2285 - }, - { - "epoch": 0.17478066402890075, - "grad_norm": 0.0023535252548754215, - "learning_rate": 0.00019999998505660637, - "loss": 46.0, - "step": 2286 - }, - { - "epoch": 0.17485712101229045, - "grad_norm": 0.0011501583503559232, - "learning_rate": 0.0001999999850434722, - "loss": 46.0, - "step": 2287 - }, - { - "epoch": 0.17493357799568018, - "grad_norm": 0.0007303390302695334, - "learning_rate": 0.00019999998503033228, - "loss": 46.0, - "step": 2288 - }, - { - "epoch": 0.1750100349790699, - "grad_norm": 0.0006898452411405742, - "learning_rate": 0.0001999999850171866, - "loss": 46.0, - "step": 2289 - }, - { - "epoch": 0.17508649196245962, - "grad_norm": 0.000676496943924576, - "learning_rate": 0.00019999998500403514, - "loss": 46.0, - "step": 2290 - }, - { - "epoch": 0.17516294894584933, - "grad_norm": 0.0024054618552327156, - "learning_rate": 0.0001999999849908779, - "loss": 46.0, - "step": 2291 - }, - { - "epoch": 0.17523940592923906, - "grad_norm": 0.0006575587904080749, - "learning_rate": 0.00019999998497771486, - "loss": 46.0, - "step": 2292 - }, - { - "epoch": 0.1753158629126288, - "grad_norm": 0.001693792175501585, - "learning_rate": 0.00019999998496454612, - "loss": 46.0, - "step": 2293 - }, - { - "epoch": 0.1753923198960185, - "grad_norm": 0.0007719940040260553, - "learning_rate": 0.00019999998495137154, - "loss": 46.0, - "step": 2294 - }, - { - "epoch": 0.17546877687940823, - "grad_norm": 0.0008060596883296967, - "learning_rate": 0.00019999998493819122, - "loss": 46.0, - "step": 2295 - }, - { - "epoch": 0.17554523386279794, - "grad_norm": 0.0019577713683247566, - "learning_rate": 0.00019999998492500515, - "loss": 46.0, - "step": 2296 - }, - { - "epoch": 0.17562169084618767, - "grad_norm": 0.0019999847281724215, - "learning_rate": 0.0001999999849118133, - "loss": 46.0, - "step": 2297 - }, - { - "epoch": 0.17569814782957738, - "grad_norm": 0.009767111390829086, - "learning_rate": 0.00019999998489861567, - "loss": 46.0, - "step": 2298 - }, - { - "epoch": 0.1757746048129671, - "grad_norm": 0.0017186939949169755, - "learning_rate": 0.00019999998488541226, - "loss": 46.0, - "step": 2299 - }, - { - "epoch": 0.17585106179635682, - "grad_norm": 0.0011332217836752534, - "learning_rate": 0.0001999999848722031, - "loss": 46.0, - "step": 2300 - }, - { - "epoch": 0.17592751877974655, - "grad_norm": 0.006631455849856138, - "learning_rate": 0.00019999998485898817, - "loss": 46.0, - "step": 2301 - }, - { - "epoch": 0.17600397576313626, - "grad_norm": 0.0012229913845658302, - "learning_rate": 0.00019999998484576749, - "loss": 46.0, - "step": 2302 - }, - { - "epoch": 0.176080432746526, - "grad_norm": 0.0012728262227028608, - "learning_rate": 0.00019999998483254098, - "loss": 46.0, - "step": 2303 - }, - { - "epoch": 0.1761568897299157, - "grad_norm": 0.004363963846117258, - "learning_rate": 0.00019999998481930875, - "loss": 46.0, - "step": 2304 - }, - { - "epoch": 0.17623334671330543, - "grad_norm": 0.0035214009694755077, - "learning_rate": 0.00019999998480607075, - "loss": 46.0, - "step": 2305 - }, - { - "epoch": 0.17630980369669513, - "grad_norm": 0.0012037267442792654, - "learning_rate": 0.00019999998479282695, - "loss": 46.0, - "step": 2306 - }, - { - "epoch": 0.17638626068008487, - "grad_norm": 0.0009421817376278341, - "learning_rate": 0.0001999999847795774, - "loss": 46.0, - "step": 2307 - }, - { - "epoch": 0.1764627176634746, - "grad_norm": 0.0012976977741345763, - "learning_rate": 0.00019999998476632208, - "loss": 46.0, - "step": 2308 - }, - { - "epoch": 0.1765391746468643, - "grad_norm": 0.00046510202810168266, - "learning_rate": 0.00019999998475306102, - "loss": 46.0, - "step": 2309 - }, - { - "epoch": 0.17661563163025404, - "grad_norm": 0.003901679767295718, - "learning_rate": 0.00019999998473979412, - "loss": 46.0, - "step": 2310 - }, - { - "epoch": 0.17669208861364374, - "grad_norm": 0.0007329244399443269, - "learning_rate": 0.0001999999847265215, - "loss": 46.0, - "step": 2311 - }, - { - "epoch": 0.17676854559703348, - "grad_norm": 0.0011444626143202186, - "learning_rate": 0.0001999999847132431, - "loss": 46.0, - "step": 2312 - }, - { - "epoch": 0.17684500258042318, - "grad_norm": 0.000476592977065593, - "learning_rate": 0.00019999998469995896, - "loss": 46.0, - "step": 2313 - }, - { - "epoch": 0.17692145956381292, - "grad_norm": 0.0006228131242096424, - "learning_rate": 0.000199999984686669, - "loss": 46.0, - "step": 2314 - }, - { - "epoch": 0.17699791654720262, - "grad_norm": 0.001816373085603118, - "learning_rate": 0.0001999999846733733, - "loss": 46.0, - "step": 2315 - }, - { - "epoch": 0.17707437353059235, - "grad_norm": 0.0008470119209960103, - "learning_rate": 0.00019999998466007184, - "loss": 46.0, - "step": 2316 - }, - { - "epoch": 0.17715083051398206, - "grad_norm": 0.0010827983496710658, - "learning_rate": 0.00019999998464676456, - "loss": 46.0, - "step": 2317 - }, - { - "epoch": 0.1772272874973718, - "grad_norm": 0.0013238334795460105, - "learning_rate": 0.00019999998463345156, - "loss": 46.0, - "step": 2318 - }, - { - "epoch": 0.1773037444807615, - "grad_norm": 0.0010916294995695353, - "learning_rate": 0.00019999998462013276, - "loss": 46.0, - "step": 2319 - }, - { - "epoch": 0.17738020146415123, - "grad_norm": 0.0005368746933527291, - "learning_rate": 0.00019999998460680822, - "loss": 46.0, - "step": 2320 - }, - { - "epoch": 0.17745665844754097, - "grad_norm": 0.0010060060303658247, - "learning_rate": 0.0001999999845934779, - "loss": 46.0, - "step": 2321 - }, - { - "epoch": 0.17753311543093067, - "grad_norm": 0.003440416418015957, - "learning_rate": 0.00019999998458014183, - "loss": 46.0, - "step": 2322 - }, - { - "epoch": 0.1776095724143204, - "grad_norm": 0.0006268015713430941, - "learning_rate": 0.00019999998456679996, - "loss": 46.0, - "step": 2323 - }, - { - "epoch": 0.1776860293977101, - "grad_norm": 0.0026197130791842937, - "learning_rate": 0.00019999998455345232, - "loss": 46.0, - "step": 2324 - }, - { - "epoch": 0.17776248638109984, - "grad_norm": 0.0009889118373394012, - "learning_rate": 0.0001999999845400989, - "loss": 46.0, - "step": 2325 - }, - { - "epoch": 0.17783894336448955, - "grad_norm": 0.0011897907825186849, - "learning_rate": 0.00019999998452673975, - "loss": 46.0, - "step": 2326 - }, - { - "epoch": 0.17791540034787928, - "grad_norm": 0.0011173890670761466, - "learning_rate": 0.00019999998451337482, - "loss": 46.0, - "step": 2327 - }, - { - "epoch": 0.177991857331269, - "grad_norm": 0.0017595734680071473, - "learning_rate": 0.0001999999845000041, - "loss": 46.0, - "step": 2328 - }, - { - "epoch": 0.17806831431465872, - "grad_norm": 0.0009243313106708229, - "learning_rate": 0.0001999999844866276, - "loss": 46.0, - "step": 2329 - }, - { - "epoch": 0.17814477129804843, - "grad_norm": 0.001011726912111044, - "learning_rate": 0.00019999998447324538, - "loss": 46.0, - "step": 2330 - }, - { - "epoch": 0.17822122828143816, - "grad_norm": 0.0018673008307814598, - "learning_rate": 0.00019999998445985735, - "loss": 46.0, - "step": 2331 - }, - { - "epoch": 0.17829768526482787, - "grad_norm": 0.0012218868359923363, - "learning_rate": 0.00019999998444646355, - "loss": 46.0, - "step": 2332 - }, - { - "epoch": 0.1783741422482176, - "grad_norm": 0.0010105993133038282, - "learning_rate": 0.000199999984433064, - "loss": 46.0, - "step": 2333 - }, - { - "epoch": 0.1784505992316073, - "grad_norm": 0.000628974346909672, - "learning_rate": 0.00019999998441965868, - "loss": 46.0, - "step": 2334 - }, - { - "epoch": 0.17852705621499704, - "grad_norm": 0.001163146342150867, - "learning_rate": 0.0001999999844062476, - "loss": 46.0, - "step": 2335 - }, - { - "epoch": 0.17860351319838677, - "grad_norm": 0.0012094784760847688, - "learning_rate": 0.00019999998439283073, - "loss": 46.0, - "step": 2336 - }, - { - "epoch": 0.17867997018177648, - "grad_norm": 0.0059892479330301285, - "learning_rate": 0.00019999998437940809, - "loss": 46.0, - "step": 2337 - }, - { - "epoch": 0.1787564271651662, - "grad_norm": 0.0007704837480559945, - "learning_rate": 0.00019999998436597967, - "loss": 46.0, - "step": 2338 - }, - { - "epoch": 0.17883288414855592, - "grad_norm": 0.002447045873850584, - "learning_rate": 0.0001999999843525455, - "loss": 46.0, - "step": 2339 - }, - { - "epoch": 0.17890934113194565, - "grad_norm": 0.003908355720341206, - "learning_rate": 0.00019999998433910555, - "loss": 46.0, - "step": 2340 - }, - { - "epoch": 0.17898579811533535, - "grad_norm": 0.0011193696409463882, - "learning_rate": 0.00019999998432565985, - "loss": 46.0, - "step": 2341 - }, - { - "epoch": 0.1790622550987251, - "grad_norm": 0.0011555581586435437, - "learning_rate": 0.00019999998431220837, - "loss": 46.0, - "step": 2342 - }, - { - "epoch": 0.1791387120821148, - "grad_norm": 0.002081210259348154, - "learning_rate": 0.00019999998429875114, - "loss": 46.0, - "step": 2343 - }, - { - "epoch": 0.17921516906550453, - "grad_norm": 0.0015744606498628855, - "learning_rate": 0.0001999999842852881, - "loss": 46.0, - "step": 2344 - }, - { - "epoch": 0.17929162604889423, - "grad_norm": 0.0026379143819212914, - "learning_rate": 0.0001999999842718193, - "loss": 46.0, - "step": 2345 - }, - { - "epoch": 0.17936808303228396, - "grad_norm": 0.000804356241133064, - "learning_rate": 0.00019999998425834474, - "loss": 46.0, - "step": 2346 - }, - { - "epoch": 0.17944454001567367, - "grad_norm": 0.0028589973226189613, - "learning_rate": 0.00019999998424486442, - "loss": 46.0, - "step": 2347 - }, - { - "epoch": 0.1795209969990634, - "grad_norm": 0.0017277380684390664, - "learning_rate": 0.0001999999842313783, - "loss": 46.0, - "step": 2348 - }, - { - "epoch": 0.1795974539824531, - "grad_norm": 0.0008444335544481874, - "learning_rate": 0.00019999998421788644, - "loss": 46.0, - "step": 2349 - }, - { - "epoch": 0.17967391096584284, - "grad_norm": 0.0007687475881539285, - "learning_rate": 0.00019999998420438883, - "loss": 46.0, - "step": 2350 - }, - { - "epoch": 0.17975036794923258, - "grad_norm": 0.0011582612060010433, - "learning_rate": 0.0001999999841908854, - "loss": 46.0, - "step": 2351 - }, - { - "epoch": 0.17982682493262228, - "grad_norm": 0.0004720762080978602, - "learning_rate": 0.00019999998417737623, - "loss": 46.0, - "step": 2352 - }, - { - "epoch": 0.17990328191601201, - "grad_norm": 0.0012853079242631793, - "learning_rate": 0.0001999999841638613, - "loss": 46.0, - "step": 2353 - }, - { - "epoch": 0.17997973889940172, - "grad_norm": 0.0006842748844064772, - "learning_rate": 0.00019999998415034056, - "loss": 46.0, - "step": 2354 - }, - { - "epoch": 0.18005619588279145, - "grad_norm": 0.0013378605945035815, - "learning_rate": 0.00019999998413681408, - "loss": 46.0, - "step": 2355 - }, - { - "epoch": 0.18013265286618116, - "grad_norm": 0.0008957096724770963, - "learning_rate": 0.00019999998412328186, - "loss": 46.0, - "step": 2356 - }, - { - "epoch": 0.1802091098495709, - "grad_norm": 0.0009715714841149747, - "learning_rate": 0.0001999999841097438, - "loss": 46.0, - "step": 2357 - }, - { - "epoch": 0.1802855668329606, - "grad_norm": 0.0034031725954264402, - "learning_rate": 0.00019999998409620003, - "loss": 46.0, - "step": 2358 - }, - { - "epoch": 0.18036202381635033, - "grad_norm": 0.0007429048418998718, - "learning_rate": 0.00019999998408265046, - "loss": 46.0, - "step": 2359 - }, - { - "epoch": 0.18043848079974004, - "grad_norm": 0.0018524250481277704, - "learning_rate": 0.0001999999840690951, - "loss": 46.0, - "step": 2360 - }, - { - "epoch": 0.18051493778312977, - "grad_norm": 0.0015551699325442314, - "learning_rate": 0.00019999998405553402, - "loss": 46.0, - "step": 2361 - }, - { - "epoch": 0.18059139476651948, - "grad_norm": 0.000425168895162642, - "learning_rate": 0.00019999998404196715, - "loss": 46.0, - "step": 2362 - }, - { - "epoch": 0.1806678517499092, - "grad_norm": 0.001114976592361927, - "learning_rate": 0.00019999998402839452, - "loss": 46.0, - "step": 2363 - }, - { - "epoch": 0.18074430873329891, - "grad_norm": 0.0019761016592383385, - "learning_rate": 0.0001999999840148161, - "loss": 46.0, - "step": 2364 - }, - { - "epoch": 0.18082076571668865, - "grad_norm": 0.002594160847365856, - "learning_rate": 0.00019999998400123192, - "loss": 46.0, - "step": 2365 - }, - { - "epoch": 0.18089722270007838, - "grad_norm": 0.0005472972407005727, - "learning_rate": 0.00019999998398764196, - "loss": 46.0, - "step": 2366 - }, - { - "epoch": 0.18097367968346809, - "grad_norm": 0.0027455140370875597, - "learning_rate": 0.00019999998397404625, - "loss": 46.0, - "step": 2367 - }, - { - "epoch": 0.18105013666685782, - "grad_norm": 0.0005896030925214291, - "learning_rate": 0.00019999998396044477, - "loss": 46.0, - "step": 2368 - }, - { - "epoch": 0.18112659365024752, - "grad_norm": 0.0009163576178252697, - "learning_rate": 0.00019999998394683752, - "loss": 46.0, - "step": 2369 - }, - { - "epoch": 0.18120305063363726, - "grad_norm": 0.002880948130041361, - "learning_rate": 0.0001999999839332245, - "loss": 46.0, - "step": 2370 - }, - { - "epoch": 0.18127950761702696, - "grad_norm": 0.002056585159152746, - "learning_rate": 0.0001999999839196057, - "loss": 46.0, - "step": 2371 - }, - { - "epoch": 0.1813559646004167, - "grad_norm": 0.0008534000953659415, - "learning_rate": 0.00019999998390598112, - "loss": 46.0, - "step": 2372 - }, - { - "epoch": 0.1814324215838064, - "grad_norm": 0.0006491973763331771, - "learning_rate": 0.0001999999838923508, - "loss": 46.0, - "step": 2373 - }, - { - "epoch": 0.18150887856719614, - "grad_norm": 0.0004762164026033133, - "learning_rate": 0.00019999998387871469, - "loss": 46.0, - "step": 2374 - }, - { - "epoch": 0.18158533555058584, - "grad_norm": 0.0010661830892786384, - "learning_rate": 0.00019999998386507282, - "loss": 46.0, - "step": 2375 - }, - { - "epoch": 0.18166179253397557, - "grad_norm": 0.0011364857200533152, - "learning_rate": 0.00019999998385142518, - "loss": 46.0, - "step": 2376 - }, - { - "epoch": 0.18173824951736528, - "grad_norm": 0.0012326686410233378, - "learning_rate": 0.00019999998383777174, - "loss": 46.0, - "step": 2377 - }, - { - "epoch": 0.181814706500755, - "grad_norm": 0.000846969080157578, - "learning_rate": 0.00019999998382411258, - "loss": 46.0, - "step": 2378 - }, - { - "epoch": 0.18189116348414475, - "grad_norm": 0.0004108122084289789, - "learning_rate": 0.00019999998381044763, - "loss": 46.0, - "step": 2379 - }, - { - "epoch": 0.18196762046753445, - "grad_norm": 0.001905068987980485, - "learning_rate": 0.0001999999837967769, - "loss": 46.0, - "step": 2380 - }, - { - "epoch": 0.18204407745092419, - "grad_norm": 0.002693180926144123, - "learning_rate": 0.00019999998378310041, - "loss": 46.0, - "step": 2381 - }, - { - "epoch": 0.1821205344343139, - "grad_norm": 0.0005059579852968454, - "learning_rate": 0.00019999998376941816, - "loss": 46.0, - "step": 2382 - }, - { - "epoch": 0.18219699141770362, - "grad_norm": 0.0033266465179622173, - "learning_rate": 0.00019999998375573014, - "loss": 46.0, - "step": 2383 - }, - { - "epoch": 0.18227344840109333, - "grad_norm": 0.001252981717698276, - "learning_rate": 0.0001999999837420363, - "loss": 46.0, - "step": 2384 - }, - { - "epoch": 0.18234990538448306, - "grad_norm": 0.0008392962045036256, - "learning_rate": 0.00019999998372833674, - "loss": 46.0, - "step": 2385 - }, - { - "epoch": 0.18242636236787277, - "grad_norm": 0.0014696762664243579, - "learning_rate": 0.00019999998371463142, - "loss": 46.0, - "step": 2386 - }, - { - "epoch": 0.1825028193512625, - "grad_norm": 0.001209444017149508, - "learning_rate": 0.00019999998370092033, - "loss": 46.0, - "step": 2387 - }, - { - "epoch": 0.1825792763346522, - "grad_norm": 0.0018046700861304998, - "learning_rate": 0.00019999998368720344, - "loss": 46.0, - "step": 2388 - }, - { - "epoch": 0.18265573331804194, - "grad_norm": 0.0009530246024951339, - "learning_rate": 0.00019999998367348077, - "loss": 46.0, - "step": 2389 - }, - { - "epoch": 0.18273219030143165, - "grad_norm": 0.0012285119155421853, - "learning_rate": 0.00019999998365975236, - "loss": 46.0, - "step": 2390 - }, - { - "epoch": 0.18280864728482138, - "grad_norm": 0.0006943094194866717, - "learning_rate": 0.0001999999836460182, - "loss": 46.0, - "step": 2391 - }, - { - "epoch": 0.18288510426821109, - "grad_norm": 0.000586421403568238, - "learning_rate": 0.00019999998363227824, - "loss": 46.0, - "step": 2392 - }, - { - "epoch": 0.18296156125160082, - "grad_norm": 0.0037157570477575064, - "learning_rate": 0.00019999998361853248, - "loss": 46.0, - "step": 2393 - }, - { - "epoch": 0.18303801823499055, - "grad_norm": 0.0012294065672904253, - "learning_rate": 0.000199999983604781, - "loss": 46.0, - "step": 2394 - }, - { - "epoch": 0.18311447521838026, - "grad_norm": 0.0027290915604680777, - "learning_rate": 0.00019999998359102375, - "loss": 46.0, - "step": 2395 - }, - { - "epoch": 0.18319093220177, - "grad_norm": 0.0008222172618843615, - "learning_rate": 0.0001999999835772607, - "loss": 46.0, - "step": 2396 - }, - { - "epoch": 0.1832673891851597, - "grad_norm": 0.0007116239285096526, - "learning_rate": 0.00019999998356349193, - "loss": 46.0, - "step": 2397 - }, - { - "epoch": 0.18334384616854943, - "grad_norm": 0.001192706055007875, - "learning_rate": 0.00019999998354971736, - "loss": 46.0, - "step": 2398 - }, - { - "epoch": 0.18342030315193913, - "grad_norm": 0.0014561553252860904, - "learning_rate": 0.00019999998353593701, - "loss": 46.0, - "step": 2399 - }, - { - "epoch": 0.18349676013532887, - "grad_norm": 0.0017072921618819237, - "learning_rate": 0.0001999999835221509, - "loss": 46.0, - "step": 2400 - }, - { - "epoch": 0.18357321711871857, - "grad_norm": 0.002692918060347438, - "learning_rate": 0.00019999998350835903, - "loss": 46.0, - "step": 2401 - }, - { - "epoch": 0.1836496741021083, - "grad_norm": 0.002356676384806633, - "learning_rate": 0.00019999998349456137, - "loss": 46.0, - "step": 2402 - }, - { - "epoch": 0.183726131085498, - "grad_norm": 0.0011876497883349657, - "learning_rate": 0.00019999998348075795, - "loss": 46.0, - "step": 2403 - }, - { - "epoch": 0.18380258806888775, - "grad_norm": 0.0011347861727699637, - "learning_rate": 0.00019999998346694877, - "loss": 46.0, - "step": 2404 - }, - { - "epoch": 0.18387904505227745, - "grad_norm": 0.0009794689249247313, - "learning_rate": 0.0001999999834531338, - "loss": 46.0, - "step": 2405 - }, - { - "epoch": 0.18395550203566718, - "grad_norm": 0.0011760820634663105, - "learning_rate": 0.00019999998343931308, - "loss": 46.0, - "step": 2406 - }, - { - "epoch": 0.1840319590190569, - "grad_norm": 0.00043110037222504616, - "learning_rate": 0.00019999998342548657, - "loss": 46.0, - "step": 2407 - }, - { - "epoch": 0.18410841600244662, - "grad_norm": 0.0011725655058398843, - "learning_rate": 0.00019999998341165432, - "loss": 46.0, - "step": 2408 - }, - { - "epoch": 0.18418487298583636, - "grad_norm": 0.0022977543994784355, - "learning_rate": 0.0001999999833978163, - "loss": 46.0, - "step": 2409 - }, - { - "epoch": 0.18426132996922606, - "grad_norm": 0.001277922885492444, - "learning_rate": 0.00019999998338397247, - "loss": 46.0, - "step": 2410 - }, - { - "epoch": 0.1843377869526158, - "grad_norm": 0.0029628209304064512, - "learning_rate": 0.0001999999833701229, - "loss": 46.0, - "step": 2411 - }, - { - "epoch": 0.1844142439360055, - "grad_norm": 0.00076859857654199, - "learning_rate": 0.00019999998335626758, - "loss": 46.0, - "step": 2412 - }, - { - "epoch": 0.18449070091939523, - "grad_norm": 0.0006292408215813339, - "learning_rate": 0.00019999998334240644, - "loss": 46.0, - "step": 2413 - }, - { - "epoch": 0.18456715790278494, - "grad_norm": 0.0025320679415017366, - "learning_rate": 0.00019999998332853957, - "loss": 46.0, - "step": 2414 - }, - { - "epoch": 0.18464361488617467, - "grad_norm": 0.001478655613027513, - "learning_rate": 0.0001999999833146669, - "loss": 46.0, - "step": 2415 - }, - { - "epoch": 0.18472007186956438, - "grad_norm": 0.0017672876128926873, - "learning_rate": 0.0001999999833007885, - "loss": 46.0, - "step": 2416 - }, - { - "epoch": 0.1847965288529541, - "grad_norm": 0.0011046217987313867, - "learning_rate": 0.0001999999832869043, - "loss": 46.0, - "step": 2417 - }, - { - "epoch": 0.18487298583634382, - "grad_norm": 0.0016992365708574653, - "learning_rate": 0.00019999998327301435, - "loss": 46.0, - "step": 2418 - }, - { - "epoch": 0.18494944281973355, - "grad_norm": 0.001854369300417602, - "learning_rate": 0.00019999998325911862, - "loss": 46.0, - "step": 2419 - }, - { - "epoch": 0.18502589980312326, - "grad_norm": 0.0010395062854513526, - "learning_rate": 0.00019999998324521712, - "loss": 46.0, - "step": 2420 - }, - { - "epoch": 0.185102356786513, - "grad_norm": 0.0019571902230381966, - "learning_rate": 0.00019999998323130984, - "loss": 46.0, - "step": 2421 - }, - { - "epoch": 0.1851788137699027, - "grad_norm": 0.0010250064078718424, - "learning_rate": 0.00019999998321739682, - "loss": 46.0, - "step": 2422 - }, - { - "epoch": 0.18525527075329243, - "grad_norm": 0.00210104463621974, - "learning_rate": 0.00019999998320347802, - "loss": 46.0, - "step": 2423 - }, - { - "epoch": 0.18533172773668216, - "grad_norm": 0.0009446580079384148, - "learning_rate": 0.00019999998318955342, - "loss": 46.0, - "step": 2424 - }, - { - "epoch": 0.18540818472007187, - "grad_norm": 0.0008213113178499043, - "learning_rate": 0.00019999998317562308, - "loss": 46.0, - "step": 2425 - }, - { - "epoch": 0.1854846417034616, - "grad_norm": 0.0018267889972776175, - "learning_rate": 0.00019999998316168696, - "loss": 46.0, - "step": 2426 - }, - { - "epoch": 0.1855610986868513, - "grad_norm": 0.0005351110594347119, - "learning_rate": 0.0001999999831477451, - "loss": 46.0, - "step": 2427 - }, - { - "epoch": 0.18563755567024104, - "grad_norm": 0.0006839693523943424, - "learning_rate": 0.00019999998313379743, - "loss": 46.0, - "step": 2428 - }, - { - "epoch": 0.18571401265363074, - "grad_norm": 0.0007742916932329535, - "learning_rate": 0.000199999983119844, - "loss": 46.0, - "step": 2429 - }, - { - "epoch": 0.18579046963702048, - "grad_norm": 0.0012417191173881292, - "learning_rate": 0.00019999998310588478, - "loss": 46.0, - "step": 2430 - }, - { - "epoch": 0.18586692662041018, - "grad_norm": 0.0009488030918873847, - "learning_rate": 0.00019999998309191982, - "loss": 46.0, - "step": 2431 - }, - { - "epoch": 0.18594338360379992, - "grad_norm": 0.0016189453890547156, - "learning_rate": 0.0001999999830779491, - "loss": 46.0, - "step": 2432 - }, - { - "epoch": 0.18601984058718962, - "grad_norm": 0.0009394802036695182, - "learning_rate": 0.0001999999830639726, - "loss": 46.0, - "step": 2433 - }, - { - "epoch": 0.18609629757057936, - "grad_norm": 0.0008046234142966568, - "learning_rate": 0.00019999998304999034, - "loss": 46.0, - "step": 2434 - }, - { - "epoch": 0.18617275455396906, - "grad_norm": 0.0007821296458132565, - "learning_rate": 0.00019999998303600228, - "loss": 46.0, - "step": 2435 - }, - { - "epoch": 0.1862492115373588, - "grad_norm": 0.0015971878310665488, - "learning_rate": 0.0001999999830220085, - "loss": 46.0, - "step": 2436 - }, - { - "epoch": 0.18632566852074853, - "grad_norm": 0.0010250088525936007, - "learning_rate": 0.00019999998300800892, - "loss": 46.0, - "step": 2437 - }, - { - "epoch": 0.18640212550413823, - "grad_norm": 0.0006358137470670044, - "learning_rate": 0.00019999998299400355, - "loss": 46.0, - "step": 2438 - }, - { - "epoch": 0.18647858248752797, - "grad_norm": 0.0006607634713873267, - "learning_rate": 0.00019999998297999243, - "loss": 46.0, - "step": 2439 - }, - { - "epoch": 0.18655503947091767, - "grad_norm": 0.0020706020295619965, - "learning_rate": 0.00019999998296597556, - "loss": 46.0, - "step": 2440 - }, - { - "epoch": 0.1866314964543074, - "grad_norm": 0.0005927235470153391, - "learning_rate": 0.0001999999829519529, - "loss": 46.0, - "step": 2441 - }, - { - "epoch": 0.1867079534376971, - "grad_norm": 0.0008116699755191803, - "learning_rate": 0.0001999999829379245, - "loss": 46.0, - "step": 2442 - }, - { - "epoch": 0.18678441042108684, - "grad_norm": 0.0009640644420869648, - "learning_rate": 0.00019999998292389028, - "loss": 46.0, - "step": 2443 - }, - { - "epoch": 0.18686086740447655, - "grad_norm": 0.0011444780975580215, - "learning_rate": 0.00019999998290985032, - "loss": 46.0, - "step": 2444 - }, - { - "epoch": 0.18693732438786628, - "grad_norm": 0.0011077811941504478, - "learning_rate": 0.00019999998289580456, - "loss": 46.0, - "step": 2445 - }, - { - "epoch": 0.187013781371256, - "grad_norm": 0.0008246845682151616, - "learning_rate": 0.00019999998288175306, - "loss": 46.0, - "step": 2446 - }, - { - "epoch": 0.18709023835464572, - "grad_norm": 0.0013810722157359123, - "learning_rate": 0.00019999998286769578, - "loss": 46.0, - "step": 2447 - }, - { - "epoch": 0.18716669533803543, - "grad_norm": 0.002938495483249426, - "learning_rate": 0.00019999998285363276, - "loss": 46.0, - "step": 2448 - }, - { - "epoch": 0.18724315232142516, - "grad_norm": 0.0007734111859463155, - "learning_rate": 0.0001999999828395639, - "loss": 46.0, - "step": 2449 - }, - { - "epoch": 0.18731960930481487, - "grad_norm": 0.00296254875138402, - "learning_rate": 0.00019999998282548936, - "loss": 46.0, - "step": 2450 - }, - { - "epoch": 0.1873960662882046, - "grad_norm": 0.001780922175385058, - "learning_rate": 0.000199999982811409, - "loss": 46.0, - "step": 2451 - }, - { - "epoch": 0.18747252327159433, - "grad_norm": 0.0027095621917396784, - "learning_rate": 0.00019999998279732285, - "loss": 46.0, - "step": 2452 - }, - { - "epoch": 0.18754898025498404, - "grad_norm": 0.0012391556520015001, - "learning_rate": 0.00019999998278323098, - "loss": 46.0, - "step": 2453 - }, - { - "epoch": 0.18762543723837377, - "grad_norm": 0.0022775058168917894, - "learning_rate": 0.00019999998276913332, - "loss": 46.0, - "step": 2454 - }, - { - "epoch": 0.18770189422176348, - "grad_norm": 0.0006520353490486741, - "learning_rate": 0.00019999998275502988, - "loss": 46.0, - "step": 2455 - }, - { - "epoch": 0.1877783512051532, - "grad_norm": 0.001414004829712212, - "learning_rate": 0.0001999999827409207, - "loss": 46.0, - "step": 2456 - }, - { - "epoch": 0.18785480818854292, - "grad_norm": 0.0009541686740703881, - "learning_rate": 0.00019999998272680571, - "loss": 46.0, - "step": 2457 - }, - { - "epoch": 0.18793126517193265, - "grad_norm": 0.0011238297447562218, - "learning_rate": 0.00019999998271268498, - "loss": 46.0, - "step": 2458 - }, - { - "epoch": 0.18800772215532235, - "grad_norm": 0.0057695223949849606, - "learning_rate": 0.00019999998269855848, - "loss": 46.0, - "step": 2459 - }, - { - "epoch": 0.1880841791387121, - "grad_norm": 0.0013992065796628594, - "learning_rate": 0.0001999999826844262, - "loss": 46.0, - "step": 2460 - }, - { - "epoch": 0.1881606361221018, - "grad_norm": 0.0009604279766790569, - "learning_rate": 0.00019999998267028818, - "loss": 46.0, - "step": 2461 - }, - { - "epoch": 0.18823709310549153, - "grad_norm": 0.0005514805670827627, - "learning_rate": 0.00019999998265614433, - "loss": 46.0, - "step": 2462 - }, - { - "epoch": 0.18831355008888123, - "grad_norm": 0.0015055537223815918, - "learning_rate": 0.00019999998264199476, - "loss": 46.0, - "step": 2463 - }, - { - "epoch": 0.18839000707227097, - "grad_norm": 0.0018673110753297806, - "learning_rate": 0.00019999998262783941, - "loss": 46.0, - "step": 2464 - }, - { - "epoch": 0.18846646405566067, - "grad_norm": 0.005999795626848936, - "learning_rate": 0.00019999998261367827, - "loss": 46.0, - "step": 2465 - }, - { - "epoch": 0.1885429210390504, - "grad_norm": 0.0008092976058833301, - "learning_rate": 0.00019999998259951138, - "loss": 46.0, - "step": 2466 - }, - { - "epoch": 0.18861937802244014, - "grad_norm": 0.0011135678505524993, - "learning_rate": 0.00019999998258533872, - "loss": 46.0, - "step": 2467 - }, - { - "epoch": 0.18869583500582984, - "grad_norm": 0.0010720588034018874, - "learning_rate": 0.00019999998257116028, - "loss": 46.0, - "step": 2468 - }, - { - "epoch": 0.18877229198921958, - "grad_norm": 0.0009085648343898356, - "learning_rate": 0.0001999999825569761, - "loss": 46.0, - "step": 2469 - }, - { - "epoch": 0.18884874897260928, - "grad_norm": 0.000594708020798862, - "learning_rate": 0.0001999999825427861, - "loss": 46.0, - "step": 2470 - }, - { - "epoch": 0.18892520595599901, - "grad_norm": 0.0012979174498468637, - "learning_rate": 0.00019999998252859036, - "loss": 46.0, - "step": 2471 - }, - { - "epoch": 0.18900166293938872, - "grad_norm": 0.0007752353558316827, - "learning_rate": 0.00019999998251438888, - "loss": 46.0, - "step": 2472 - }, - { - "epoch": 0.18907811992277845, - "grad_norm": 0.0005088779143989086, - "learning_rate": 0.00019999998250018157, - "loss": 46.0, - "step": 2473 - }, - { - "epoch": 0.18915457690616816, - "grad_norm": 0.001224258914589882, - "learning_rate": 0.00019999998248596855, - "loss": 46.0, - "step": 2474 - }, - { - "epoch": 0.1892310338895579, - "grad_norm": 0.0009395096567459404, - "learning_rate": 0.0001999999824717497, - "loss": 46.0, - "step": 2475 - }, - { - "epoch": 0.1893074908729476, - "grad_norm": 0.0018619762267917395, - "learning_rate": 0.0001999999824575251, - "loss": 46.0, - "step": 2476 - }, - { - "epoch": 0.18938394785633733, - "grad_norm": 0.0009954768465831876, - "learning_rate": 0.00019999998244329476, - "loss": 46.0, - "step": 2477 - }, - { - "epoch": 0.18946040483972704, - "grad_norm": 0.002437873976305127, - "learning_rate": 0.00019999998242905864, - "loss": 46.0, - "step": 2478 - }, - { - "epoch": 0.18953686182311677, - "grad_norm": 0.00048663877532817423, - "learning_rate": 0.00019999998241481675, - "loss": 46.0, - "step": 2479 - }, - { - "epoch": 0.18961331880650648, - "grad_norm": 0.0005779552739113569, - "learning_rate": 0.00019999998240056906, - "loss": 46.0, - "step": 2480 - }, - { - "epoch": 0.1896897757898962, - "grad_norm": 0.0005875512724742293, - "learning_rate": 0.00019999998238631563, - "loss": 46.0, - "step": 2481 - }, - { - "epoch": 0.18976623277328594, - "grad_norm": 0.0017303097993135452, - "learning_rate": 0.00019999998237205644, - "loss": 46.0, - "step": 2482 - }, - { - "epoch": 0.18984268975667565, - "grad_norm": 0.0005524460575543344, - "learning_rate": 0.00019999998235779146, - "loss": 46.0, - "step": 2483 - }, - { - "epoch": 0.18991914674006538, - "grad_norm": 0.001414101687259972, - "learning_rate": 0.0001999999823435207, - "loss": 46.0, - "step": 2484 - }, - { - "epoch": 0.1899956037234551, - "grad_norm": 0.0005345565732568502, - "learning_rate": 0.0001999999823292442, - "loss": 46.0, - "step": 2485 - }, - { - "epoch": 0.19007206070684482, - "grad_norm": 0.0027988983783870935, - "learning_rate": 0.0001999999823149619, - "loss": 46.0, - "step": 2486 - }, - { - "epoch": 0.19014851769023453, - "grad_norm": 0.0008862941176630557, - "learning_rate": 0.00019999998230067387, - "loss": 46.0, - "step": 2487 - }, - { - "epoch": 0.19022497467362426, - "grad_norm": 0.0006016691331751645, - "learning_rate": 0.00019999998228638005, - "loss": 46.0, - "step": 2488 - }, - { - "epoch": 0.19030143165701396, - "grad_norm": 0.0033350768499076366, - "learning_rate": 0.00019999998227208045, - "loss": 46.0, - "step": 2489 - }, - { - "epoch": 0.1903778886404037, - "grad_norm": 0.0009862019214779139, - "learning_rate": 0.00019999998225777508, - "loss": 46.0, - "step": 2490 - }, - { - "epoch": 0.1904543456237934, - "grad_norm": 0.0016853954875841737, - "learning_rate": 0.00019999998224346394, - "loss": 46.0, - "step": 2491 - }, - { - "epoch": 0.19053080260718314, - "grad_norm": 0.0005088983452878892, - "learning_rate": 0.00019999998222914708, - "loss": 46.0, - "step": 2492 - }, - { - "epoch": 0.19060725959057284, - "grad_norm": 0.0015099769225344062, - "learning_rate": 0.0001999999822148244, - "loss": 46.0, - "step": 2493 - }, - { - "epoch": 0.19068371657396258, - "grad_norm": 0.012772087007761002, - "learning_rate": 0.00019999998220049595, - "loss": 46.0, - "step": 2494 - }, - { - "epoch": 0.1907601735573523, - "grad_norm": 0.0011454602936282754, - "learning_rate": 0.00019999998218616174, - "loss": 46.0, - "step": 2495 - }, - { - "epoch": 0.190836630540742, - "grad_norm": 0.0007046663085930049, - "learning_rate": 0.00019999998217182176, - "loss": 46.0, - "step": 2496 - }, - { - "epoch": 0.19091308752413175, - "grad_norm": 0.0014262953773140907, - "learning_rate": 0.00019999998215747603, - "loss": 46.0, - "step": 2497 - }, - { - "epoch": 0.19098954450752145, - "grad_norm": 0.001202348037622869, - "learning_rate": 0.00019999998214312453, - "loss": 46.0, - "step": 2498 - }, - { - "epoch": 0.19106600149091119, - "grad_norm": 0.0009572213748469949, - "learning_rate": 0.0001999999821287672, - "loss": 46.0, - "step": 2499 - }, - { - "epoch": 0.1911424584743009, - "grad_norm": 0.001123833004385233, - "learning_rate": 0.00019999998211440417, - "loss": 46.0, - "step": 2500 - }, - { - "epoch": 0.19121891545769062, - "grad_norm": 0.0004405451472848654, - "learning_rate": 0.00019999998210003535, - "loss": 46.0, - "step": 2501 - }, - { - "epoch": 0.19129537244108033, - "grad_norm": 0.0008608513162471354, - "learning_rate": 0.00019999998208566075, - "loss": 46.0, - "step": 2502 - }, - { - "epoch": 0.19137182942447006, - "grad_norm": 0.0019066163804382086, - "learning_rate": 0.00019999998207128038, - "loss": 46.0, - "step": 2503 - }, - { - "epoch": 0.19144828640785977, - "grad_norm": 0.0017126858001574874, - "learning_rate": 0.00019999998205689424, - "loss": 46.0, - "step": 2504 - }, - { - "epoch": 0.1915247433912495, - "grad_norm": 0.0008729486726224422, - "learning_rate": 0.00019999998204250235, - "loss": 46.0, - "step": 2505 - }, - { - "epoch": 0.1916012003746392, - "grad_norm": 0.0019524359377101064, - "learning_rate": 0.00019999998202810466, - "loss": 46.0, - "step": 2506 - }, - { - "epoch": 0.19167765735802894, - "grad_norm": 0.0014123177388682961, - "learning_rate": 0.00019999998201370125, - "loss": 46.0, - "step": 2507 - }, - { - "epoch": 0.19175411434141865, - "grad_norm": 0.0007191233453340828, - "learning_rate": 0.00019999998199929202, - "loss": 46.0, - "step": 2508 - }, - { - "epoch": 0.19183057132480838, - "grad_norm": 0.000843721441924572, - "learning_rate": 0.00019999998198487703, - "loss": 46.0, - "step": 2509 - }, - { - "epoch": 0.1919070283081981, - "grad_norm": 0.0014823595993220806, - "learning_rate": 0.00019999998197045628, - "loss": 46.0, - "step": 2510 - }, - { - "epoch": 0.19198348529158782, - "grad_norm": 0.0014458306832239032, - "learning_rate": 0.00019999998195602977, - "loss": 46.0, - "step": 2511 - }, - { - "epoch": 0.19205994227497755, - "grad_norm": 0.001128409756347537, - "learning_rate": 0.00019999998194159747, - "loss": 46.0, - "step": 2512 - }, - { - "epoch": 0.19213639925836726, - "grad_norm": 0.0012496395502239466, - "learning_rate": 0.00019999998192715942, - "loss": 46.0, - "step": 2513 - }, - { - "epoch": 0.192212856241757, - "grad_norm": 0.0013120636576786637, - "learning_rate": 0.00019999998191271557, - "loss": 46.0, - "step": 2514 - }, - { - "epoch": 0.1922893132251467, - "grad_norm": 0.0006523728370666504, - "learning_rate": 0.00019999998189826598, - "loss": 46.0, - "step": 2515 - }, - { - "epoch": 0.19236577020853643, - "grad_norm": 0.0007474781014025211, - "learning_rate": 0.00019999998188381063, - "loss": 46.0, - "step": 2516 - }, - { - "epoch": 0.19244222719192614, - "grad_norm": 0.0016185181448236108, - "learning_rate": 0.0001999999818693495, - "loss": 46.0, - "step": 2517 - }, - { - "epoch": 0.19251868417531587, - "grad_norm": 0.002002629218623042, - "learning_rate": 0.0001999999818548826, - "loss": 46.0, - "step": 2518 - }, - { - "epoch": 0.19259514115870557, - "grad_norm": 0.0005933846114203334, - "learning_rate": 0.0001999999818404099, - "loss": 46.0, - "step": 2519 - }, - { - "epoch": 0.1926715981420953, - "grad_norm": 0.0009361347765661776, - "learning_rate": 0.00019999998182593148, - "loss": 46.0, - "step": 2520 - }, - { - "epoch": 0.192748055125485, - "grad_norm": 0.0010292287915945053, - "learning_rate": 0.00019999998181144727, - "loss": 46.0, - "step": 2521 - }, - { - "epoch": 0.19282451210887475, - "grad_norm": 0.0007696306565776467, - "learning_rate": 0.00019999998179695729, - "loss": 46.0, - "step": 2522 - }, - { - "epoch": 0.19290096909226445, - "grad_norm": 0.0012693873140960932, - "learning_rate": 0.0001999999817824615, - "loss": 46.0, - "step": 2523 - }, - { - "epoch": 0.19297742607565418, - "grad_norm": 0.001217762823216617, - "learning_rate": 0.00019999998176796, - "loss": 46.0, - "step": 2524 - }, - { - "epoch": 0.19305388305904392, - "grad_norm": 0.001914670574478805, - "learning_rate": 0.00019999998175345272, - "loss": 46.0, - "step": 2525 - }, - { - "epoch": 0.19313034004243362, - "grad_norm": 0.0015102314064279199, - "learning_rate": 0.00019999998173893965, - "loss": 46.0, - "step": 2526 - }, - { - "epoch": 0.19320679702582336, - "grad_norm": 0.001179531100206077, - "learning_rate": 0.00019999998172442083, - "loss": 46.0, - "step": 2527 - }, - { - "epoch": 0.19328325400921306, - "grad_norm": 0.001162834814749658, - "learning_rate": 0.00019999998170989623, - "loss": 46.0, - "step": 2528 - }, - { - "epoch": 0.1933597109926028, - "grad_norm": 0.0016187006840482354, - "learning_rate": 0.00019999998169536583, - "loss": 46.0, - "step": 2529 - }, - { - "epoch": 0.1934361679759925, - "grad_norm": 0.00241902656853199, - "learning_rate": 0.0001999999816808297, - "loss": 46.0, - "step": 2530 - }, - { - "epoch": 0.19351262495938223, - "grad_norm": 0.0016595746856182814, - "learning_rate": 0.00019999998166628783, - "loss": 46.0, - "step": 2531 - }, - { - "epoch": 0.19358908194277194, - "grad_norm": 0.0004893513396382332, - "learning_rate": 0.00019999998165174011, - "loss": 46.0, - "step": 2532 - }, - { - "epoch": 0.19366553892616167, - "grad_norm": 0.001225075451657176, - "learning_rate": 0.00019999998163718668, - "loss": 46.0, - "step": 2533 - }, - { - "epoch": 0.19374199590955138, - "grad_norm": 0.0011962822172790766, - "learning_rate": 0.00019999998162262744, - "loss": 46.0, - "step": 2534 - }, - { - "epoch": 0.1938184528929411, - "grad_norm": 0.0006792786298319697, - "learning_rate": 0.0001999999816080625, - "loss": 46.0, - "step": 2535 - }, - { - "epoch": 0.19389490987633082, - "grad_norm": 0.002364215673878789, - "learning_rate": 0.0001999999815934917, - "loss": 46.0, - "step": 2536 - }, - { - "epoch": 0.19397136685972055, - "grad_norm": 0.0007861287449486554, - "learning_rate": 0.0001999999815789152, - "loss": 46.0, - "step": 2537 - }, - { - "epoch": 0.19404782384311026, - "grad_norm": 0.0011116234818473458, - "learning_rate": 0.00019999998156433287, - "loss": 46.0, - "step": 2538 - }, - { - "epoch": 0.1941242808265, - "grad_norm": 0.0026597920805215836, - "learning_rate": 0.00019999998154974482, - "loss": 46.0, - "step": 2539 - }, - { - "epoch": 0.19420073780988972, - "grad_norm": 0.0011285573709756136, - "learning_rate": 0.000199999981535151, - "loss": 46.0, - "step": 2540 - }, - { - "epoch": 0.19427719479327943, - "grad_norm": 0.0006225054967217147, - "learning_rate": 0.0001999999815205514, - "loss": 46.0, - "step": 2541 - }, - { - "epoch": 0.19435365177666916, - "grad_norm": 0.0020340282935649157, - "learning_rate": 0.00019999998150594604, - "loss": 46.0, - "step": 2542 - }, - { - "epoch": 0.19443010876005887, - "grad_norm": 0.0014335649320855737, - "learning_rate": 0.00019999998149133487, - "loss": 46.0, - "step": 2543 - }, - { - "epoch": 0.1945065657434486, - "grad_norm": 0.004374792333692312, - "learning_rate": 0.00019999998147671798, - "loss": 46.0, - "step": 2544 - }, - { - "epoch": 0.1945830227268383, - "grad_norm": 0.001119488850235939, - "learning_rate": 0.0001999999814620953, - "loss": 46.0, - "step": 2545 - }, - { - "epoch": 0.19465947971022804, - "grad_norm": 0.0023181482683867216, - "learning_rate": 0.00019999998144746686, - "loss": 46.0, - "step": 2546 - }, - { - "epoch": 0.19473593669361774, - "grad_norm": 0.001117787091061473, - "learning_rate": 0.0001999999814328326, - "loss": 46.0, - "step": 2547 - }, - { - "epoch": 0.19481239367700748, - "grad_norm": 0.0004660346021410078, - "learning_rate": 0.00019999998141819264, - "loss": 46.0, - "step": 2548 - }, - { - "epoch": 0.19488885066039718, - "grad_norm": 0.0014202577294781804, - "learning_rate": 0.00019999998140354688, - "loss": 46.0, - "step": 2549 - }, - { - "epoch": 0.19496530764378692, - "grad_norm": 0.0025082866195589304, - "learning_rate": 0.00019999998138889535, - "loss": 46.0, - "step": 2550 - }, - { - "epoch": 0.19504176462717662, - "grad_norm": 0.0015981708420440555, - "learning_rate": 0.00019999998137423805, - "loss": 46.0, - "step": 2551 - }, - { - "epoch": 0.19511822161056636, - "grad_norm": 0.000774684885982424, - "learning_rate": 0.00019999998135957498, - "loss": 46.0, - "step": 2552 - }, - { - "epoch": 0.1951946785939561, - "grad_norm": 0.001830187626183033, - "learning_rate": 0.00019999998134490613, - "loss": 46.0, - "step": 2553 - }, - { - "epoch": 0.1952711355773458, - "grad_norm": 0.0020728816743940115, - "learning_rate": 0.00019999998133023153, - "loss": 46.0, - "step": 2554 - }, - { - "epoch": 0.19534759256073553, - "grad_norm": 0.0007425218354910612, - "learning_rate": 0.00019999998131555116, - "loss": 46.0, - "step": 2555 - }, - { - "epoch": 0.19542404954412523, - "grad_norm": 0.0014173837844282389, - "learning_rate": 0.00019999998130086502, - "loss": 46.0, - "step": 2556 - }, - { - "epoch": 0.19550050652751497, - "grad_norm": 0.0005700261681340635, - "learning_rate": 0.00019999998128617313, - "loss": 46.0, - "step": 2557 - }, - { - "epoch": 0.19557696351090467, - "grad_norm": 0.005316985305398703, - "learning_rate": 0.00019999998127147542, - "loss": 46.0, - "step": 2558 - }, - { - "epoch": 0.1956534204942944, - "grad_norm": 0.001112484373152256, - "learning_rate": 0.00019999998125677198, - "loss": 46.0, - "step": 2559 - }, - { - "epoch": 0.1957298774776841, - "grad_norm": 0.001547213876619935, - "learning_rate": 0.00019999998124206275, - "loss": 46.0, - "step": 2560 - }, - { - "epoch": 0.19580633446107384, - "grad_norm": 0.0007029794505797327, - "learning_rate": 0.00019999998122734777, - "loss": 46.0, - "step": 2561 - }, - { - "epoch": 0.19588279144446355, - "grad_norm": 0.0005506088491529226, - "learning_rate": 0.00019999998121262699, - "loss": 46.0, - "step": 2562 - }, - { - "epoch": 0.19595924842785328, - "grad_norm": 0.0009937337599694729, - "learning_rate": 0.00019999998119790046, - "loss": 46.0, - "step": 2563 - }, - { - "epoch": 0.196035705411243, - "grad_norm": 0.0013787487987428904, - "learning_rate": 0.00019999998118316815, - "loss": 46.0, - "step": 2564 - }, - { - "epoch": 0.19611216239463272, - "grad_norm": 0.003115082159638405, - "learning_rate": 0.0001999999811684301, - "loss": 46.0, - "step": 2565 - }, - { - "epoch": 0.19618861937802243, - "grad_norm": 0.0009442034061066806, - "learning_rate": 0.00019999998115368626, - "loss": 46.0, - "step": 2566 - }, - { - "epoch": 0.19626507636141216, - "grad_norm": 0.0014289517421275377, - "learning_rate": 0.00019999998113893664, - "loss": 46.0, - "step": 2567 - }, - { - "epoch": 0.1963415333448019, - "grad_norm": 0.0032986123114824295, - "learning_rate": 0.00019999998112418127, - "loss": 46.0, - "step": 2568 - }, - { - "epoch": 0.1964179903281916, - "grad_norm": 0.001238914323039353, - "learning_rate": 0.0001999999811094201, - "loss": 46.0, - "step": 2569 - }, - { - "epoch": 0.19649444731158133, - "grad_norm": 0.0007793986005708575, - "learning_rate": 0.0001999999810946532, - "loss": 46.0, - "step": 2570 - }, - { - "epoch": 0.19657090429497104, - "grad_norm": 0.0018220782512798905, - "learning_rate": 0.00019999998107988052, - "loss": 46.0, - "step": 2571 - }, - { - "epoch": 0.19664736127836077, - "grad_norm": 0.0010378597071394324, - "learning_rate": 0.00019999998106510206, - "loss": 46.0, - "step": 2572 - }, - { - "epoch": 0.19672381826175048, - "grad_norm": 0.0005865950952284038, - "learning_rate": 0.00019999998105031783, - "loss": 46.0, - "step": 2573 - }, - { - "epoch": 0.1968002752451402, - "grad_norm": 0.0006227686535567045, - "learning_rate": 0.00019999998103552785, - "loss": 46.0, - "step": 2574 - }, - { - "epoch": 0.19687673222852992, - "grad_norm": 0.0010060693603008986, - "learning_rate": 0.00019999998102073206, - "loss": 46.0, - "step": 2575 - }, - { - "epoch": 0.19695318921191965, - "grad_norm": 0.0018384178401902318, - "learning_rate": 0.00019999998100593054, - "loss": 46.0, - "step": 2576 - }, - { - "epoch": 0.19702964619530935, - "grad_norm": 0.0007536253542639315, - "learning_rate": 0.00019999998099112323, - "loss": 46.0, - "step": 2577 - }, - { - "epoch": 0.1971061031786991, - "grad_norm": 0.0010045995004475117, - "learning_rate": 0.00019999998097631016, - "loss": 46.0, - "step": 2578 - }, - { - "epoch": 0.1971825601620888, - "grad_norm": 0.002119707176461816, - "learning_rate": 0.00019999998096149134, - "loss": 46.0, - "step": 2579 - }, - { - "epoch": 0.19725901714547853, - "grad_norm": 0.0019034318393096328, - "learning_rate": 0.0001999999809466667, - "loss": 46.0, - "step": 2580 - }, - { - "epoch": 0.19733547412886823, - "grad_norm": 0.002396214520558715, - "learning_rate": 0.00019999998093183635, - "loss": 46.0, - "step": 2581 - }, - { - "epoch": 0.19741193111225797, - "grad_norm": 0.0009230375289916992, - "learning_rate": 0.00019999998091700018, - "loss": 46.0, - "step": 2582 - }, - { - "epoch": 0.1974883880956477, - "grad_norm": 0.0025573682505637407, - "learning_rate": 0.00019999998090215827, - "loss": 46.0, - "step": 2583 - }, - { - "epoch": 0.1975648450790374, - "grad_norm": 0.001235230010934174, - "learning_rate": 0.00019999998088731058, - "loss": 46.0, - "step": 2584 - }, - { - "epoch": 0.19764130206242714, - "grad_norm": 0.0012933886609971523, - "learning_rate": 0.00019999998087245712, - "loss": 46.0, - "step": 2585 - }, - { - "epoch": 0.19771775904581684, - "grad_norm": 0.0014868311118334532, - "learning_rate": 0.0001999999808575979, - "loss": 46.0, - "step": 2586 - }, - { - "epoch": 0.19779421602920658, - "grad_norm": 0.0010652708588168025, - "learning_rate": 0.0001999999808427329, - "loss": 46.0, - "step": 2587 - }, - { - "epoch": 0.19787067301259628, - "grad_norm": 0.0012538136215880513, - "learning_rate": 0.00019999998082786212, - "loss": 46.0, - "step": 2588 - }, - { - "epoch": 0.19794712999598602, - "grad_norm": 0.0007710539503023028, - "learning_rate": 0.0001999999808129856, - "loss": 46.0, - "step": 2589 - }, - { - "epoch": 0.19802358697937572, - "grad_norm": 0.0004720716387964785, - "learning_rate": 0.00019999998079810326, - "loss": 46.0, - "step": 2590 - }, - { - "epoch": 0.19810004396276545, - "grad_norm": 0.0012821248965337873, - "learning_rate": 0.0001999999807832152, - "loss": 46.0, - "step": 2591 - }, - { - "epoch": 0.19817650094615516, - "grad_norm": 0.0036623727064579725, - "learning_rate": 0.00019999998076832137, - "loss": 46.0, - "step": 2592 - }, - { - "epoch": 0.1982529579295449, - "grad_norm": 0.001504046143963933, - "learning_rate": 0.00019999998075342175, - "loss": 46.0, - "step": 2593 - }, - { - "epoch": 0.1983294149129346, - "grad_norm": 0.0006991566042415798, - "learning_rate": 0.00019999998073851635, - "loss": 46.0, - "step": 2594 - }, - { - "epoch": 0.19840587189632433, - "grad_norm": 0.0013940909411758184, - "learning_rate": 0.0001999999807236052, - "loss": 46.0, - "step": 2595 - }, - { - "epoch": 0.19848232887971404, - "grad_norm": 0.003455542493611574, - "learning_rate": 0.00019999998070868827, - "loss": 46.0, - "step": 2596 - }, - { - "epoch": 0.19855878586310377, - "grad_norm": 0.0006003906019032001, - "learning_rate": 0.00019999998069376558, - "loss": 46.0, - "step": 2597 - }, - { - "epoch": 0.1986352428464935, - "grad_norm": 0.002141443779692054, - "learning_rate": 0.00019999998067883712, - "loss": 46.0, - "step": 2598 - }, - { - "epoch": 0.1987116998298832, - "grad_norm": 0.004281296860426664, - "learning_rate": 0.0001999999806639029, - "loss": 46.0, - "step": 2599 - }, - { - "epoch": 0.19878815681327294, - "grad_norm": 0.0006730580935254693, - "learning_rate": 0.00019999998064896288, - "loss": 46.0, - "step": 2600 - }, - { - "epoch": 0.19886461379666265, - "grad_norm": 0.0012281349627301097, - "learning_rate": 0.00019999998063401713, - "loss": 46.0, - "step": 2601 - }, - { - "epoch": 0.19894107078005238, - "grad_norm": 0.000990453059785068, - "learning_rate": 0.0001999999806190656, - "loss": 46.0, - "step": 2602 - }, - { - "epoch": 0.1990175277634421, - "grad_norm": 0.0018406666349619627, - "learning_rate": 0.00019999998060410827, - "loss": 46.0, - "step": 2603 - }, - { - "epoch": 0.19909398474683182, - "grad_norm": 0.0007173509802669287, - "learning_rate": 0.0001999999805891452, - "loss": 46.0, - "step": 2604 - }, - { - "epoch": 0.19917044173022153, - "grad_norm": 0.0012165848165750504, - "learning_rate": 0.00019999998057417635, - "loss": 46.0, - "step": 2605 - }, - { - "epoch": 0.19924689871361126, - "grad_norm": 0.0017083440907299519, - "learning_rate": 0.00019999998055920175, - "loss": 46.0, - "step": 2606 - }, - { - "epoch": 0.19932335569700096, - "grad_norm": 0.0018412291537970304, - "learning_rate": 0.00019999998054422133, - "loss": 46.0, - "step": 2607 - }, - { - "epoch": 0.1993998126803907, - "grad_norm": 0.0015125240897759795, - "learning_rate": 0.0001999999805292352, - "loss": 46.0, - "step": 2608 - }, - { - "epoch": 0.1994762696637804, - "grad_norm": 0.0023048233706504107, - "learning_rate": 0.00019999998051424328, - "loss": 46.0, - "step": 2609 - }, - { - "epoch": 0.19955272664717014, - "grad_norm": 0.002014223486185074, - "learning_rate": 0.00019999998049924556, - "loss": 46.0, - "step": 2610 - }, - { - "epoch": 0.19962918363055987, - "grad_norm": 0.0011433360632508993, - "learning_rate": 0.0001999999804842421, - "loss": 46.0, - "step": 2611 - }, - { - "epoch": 0.19970564061394958, - "grad_norm": 0.0006147747044451535, - "learning_rate": 0.00019999998046923287, - "loss": 46.0, - "step": 2612 - }, - { - "epoch": 0.1997820975973393, - "grad_norm": 0.0009227930568158627, - "learning_rate": 0.00019999998045421786, - "loss": 46.0, - "step": 2613 - }, - { - "epoch": 0.19985855458072901, - "grad_norm": 0.0007886108360253274, - "learning_rate": 0.0001999999804391971, - "loss": 46.0, - "step": 2614 - }, - { - "epoch": 0.19993501156411875, - "grad_norm": 0.004000439308583736, - "learning_rate": 0.00019999998042417055, - "loss": 46.0, - "step": 2615 - }, - { - "epoch": 0.20001146854750845, - "grad_norm": 0.0003597511677071452, - "learning_rate": 0.00019999998040913823, - "loss": 46.0, - "step": 2616 - }, - { - "epoch": 0.2000879255308982, - "grad_norm": 0.001686968607828021, - "learning_rate": 0.00019999998039410018, - "loss": 46.0, - "step": 2617 - }, - { - "epoch": 0.2001643825142879, - "grad_norm": 0.0009858241537585855, - "learning_rate": 0.0001999999803790563, - "loss": 46.0, - "step": 2618 - }, - { - "epoch": 0.20024083949767763, - "grad_norm": 0.0008331689750775695, - "learning_rate": 0.00019999998036400668, - "loss": 46.0, - "step": 2619 - }, - { - "epoch": 0.20031729648106733, - "grad_norm": 0.0013714659726247191, - "learning_rate": 0.00019999998034895132, - "loss": 46.0, - "step": 2620 - }, - { - "epoch": 0.20039375346445706, - "grad_norm": 0.0023798574693500996, - "learning_rate": 0.00019999998033389015, - "loss": 46.0, - "step": 2621 - }, - { - "epoch": 0.20047021044784677, - "grad_norm": 0.0006794448709115386, - "learning_rate": 0.0001999999803188232, - "loss": 46.0, - "step": 2622 - }, - { - "epoch": 0.2005466674312365, - "grad_norm": 0.0012324457056820393, - "learning_rate": 0.00019999998030375052, - "loss": 46.0, - "step": 2623 - }, - { - "epoch": 0.2006231244146262, - "grad_norm": 0.0015901293372735381, - "learning_rate": 0.00019999998028867206, - "loss": 46.0, - "step": 2624 - }, - { - "epoch": 0.20069958139801594, - "grad_norm": 0.000446124846348539, - "learning_rate": 0.00019999998027358783, - "loss": 46.0, - "step": 2625 - }, - { - "epoch": 0.20077603838140567, - "grad_norm": 0.0008350448915734887, - "learning_rate": 0.0001999999802584978, - "loss": 46.0, - "step": 2626 - }, - { - "epoch": 0.20085249536479538, - "grad_norm": 0.002167291473597288, - "learning_rate": 0.00019999998024340204, - "loss": 46.0, - "step": 2627 - }, - { - "epoch": 0.2009289523481851, - "grad_norm": 0.0013825835194438696, - "learning_rate": 0.0001999999802283005, - "loss": 46.0, - "step": 2628 - }, - { - "epoch": 0.20100540933157482, - "grad_norm": 0.0007878118776716292, - "learning_rate": 0.00019999998021319316, - "loss": 46.0, - "step": 2629 - }, - { - "epoch": 0.20108186631496455, - "grad_norm": 0.0033684580121189356, - "learning_rate": 0.00019999998019808009, - "loss": 46.0, - "step": 2630 - }, - { - "epoch": 0.20115832329835426, - "grad_norm": 0.00131437205709517, - "learning_rate": 0.00019999998018296124, - "loss": 46.0, - "step": 2631 - }, - { - "epoch": 0.201234780281744, - "grad_norm": 0.0019817743450403214, - "learning_rate": 0.00019999998016783662, - "loss": 46.0, - "step": 2632 - }, - { - "epoch": 0.2013112372651337, - "grad_norm": 0.000990221044048667, - "learning_rate": 0.0001999999801527062, - "loss": 46.0, - "step": 2633 - }, - { - "epoch": 0.20138769424852343, - "grad_norm": 0.0023654510732740164, - "learning_rate": 0.00019999998013757003, - "loss": 46.0, - "step": 2634 - }, - { - "epoch": 0.20146415123191314, - "grad_norm": 0.004006518982350826, - "learning_rate": 0.00019999998012242812, - "loss": 46.0, - "step": 2635 - }, - { - "epoch": 0.20154060821530287, - "grad_norm": 0.0007428004173561931, - "learning_rate": 0.0001999999801072804, - "loss": 46.0, - "step": 2636 - }, - { - "epoch": 0.20161706519869257, - "grad_norm": 0.0007062962977215648, - "learning_rate": 0.00019999998009212694, - "loss": 46.0, - "step": 2637 - }, - { - "epoch": 0.2016935221820823, - "grad_norm": 0.0013279336271807551, - "learning_rate": 0.0001999999800769677, - "loss": 46.0, - "step": 2638 - }, - { - "epoch": 0.201769979165472, - "grad_norm": 0.0021120128221809864, - "learning_rate": 0.0001999999800618027, - "loss": 46.0, - "step": 2639 - }, - { - "epoch": 0.20184643614886175, - "grad_norm": 0.0010096341138705611, - "learning_rate": 0.00019999998004663192, - "loss": 46.0, - "step": 2640 - }, - { - "epoch": 0.20192289313225148, - "grad_norm": 0.001144168432801962, - "learning_rate": 0.00019999998003145537, - "loss": 46.0, - "step": 2641 - }, - { - "epoch": 0.20199935011564119, - "grad_norm": 0.0014059501700103283, - "learning_rate": 0.00019999998001627307, - "loss": 46.0, - "step": 2642 - }, - { - "epoch": 0.20207580709903092, - "grad_norm": 0.0007812492549419403, - "learning_rate": 0.00019999998000108497, - "loss": 46.0, - "step": 2643 - }, - { - "epoch": 0.20215226408242062, - "grad_norm": 0.0015959207667037845, - "learning_rate": 0.0001999999799858911, - "loss": 46.0, - "step": 2644 - }, - { - "epoch": 0.20222872106581036, - "grad_norm": 0.0012493618996813893, - "learning_rate": 0.00019999997997069148, - "loss": 46.0, - "step": 2645 - }, - { - "epoch": 0.20230517804920006, - "grad_norm": 0.0005881648976355791, - "learning_rate": 0.00019999997995548608, - "loss": 46.0, - "step": 2646 - }, - { - "epoch": 0.2023816350325898, - "grad_norm": 0.001119842054322362, - "learning_rate": 0.00019999997994027492, - "loss": 46.0, - "step": 2647 - }, - { - "epoch": 0.2024580920159795, - "grad_norm": 0.0007371313986368477, - "learning_rate": 0.000199999979925058, - "loss": 46.0, - "step": 2648 - }, - { - "epoch": 0.20253454899936923, - "grad_norm": 0.0008875987841747701, - "learning_rate": 0.0001999999799098353, - "loss": 46.0, - "step": 2649 - }, - { - "epoch": 0.20261100598275894, - "grad_norm": 0.0006791872438043356, - "learning_rate": 0.0001999999798946068, - "loss": 46.0, - "step": 2650 - }, - { - "epoch": 0.20268746296614867, - "grad_norm": 0.0010499234776943922, - "learning_rate": 0.00019999997987937257, - "loss": 46.0, - "step": 2651 - }, - { - "epoch": 0.20276391994953838, - "grad_norm": 0.0023816260509192944, - "learning_rate": 0.00019999997986413254, - "loss": 46.0, - "step": 2652 - }, - { - "epoch": 0.2028403769329281, - "grad_norm": 0.0023006810806691647, - "learning_rate": 0.00019999997984888678, - "loss": 46.0, - "step": 2653 - }, - { - "epoch": 0.20291683391631782, - "grad_norm": 0.0008985421154648066, - "learning_rate": 0.00019999997983363523, - "loss": 46.0, - "step": 2654 - }, - { - "epoch": 0.20299329089970755, - "grad_norm": 0.002558620646595955, - "learning_rate": 0.0001999999798183779, - "loss": 46.0, - "step": 2655 - }, - { - "epoch": 0.20306974788309728, - "grad_norm": 0.001698905834928155, - "learning_rate": 0.00019999997980311483, - "loss": 46.0, - "step": 2656 - }, - { - "epoch": 0.203146204866487, - "grad_norm": 0.0009687116253189743, - "learning_rate": 0.00019999997978784598, - "loss": 46.0, - "step": 2657 - }, - { - "epoch": 0.20322266184987672, - "grad_norm": 0.0012422003783285618, - "learning_rate": 0.00019999997977257134, - "loss": 46.0, - "step": 2658 - }, - { - "epoch": 0.20329911883326643, - "grad_norm": 0.0019307717448100448, - "learning_rate": 0.00019999997975729094, - "loss": 46.0, - "step": 2659 - }, - { - "epoch": 0.20337557581665616, - "grad_norm": 0.00837969034910202, - "learning_rate": 0.00019999997974200478, - "loss": 46.0, - "step": 2660 - }, - { - "epoch": 0.20345203280004587, - "grad_norm": 0.0011417909990996122, - "learning_rate": 0.00019999997972671284, - "loss": 46.0, - "step": 2661 - }, - { - "epoch": 0.2035284897834356, - "grad_norm": 0.0011604258324950933, - "learning_rate": 0.0001999999797114151, - "loss": 46.0, - "step": 2662 - }, - { - "epoch": 0.2036049467668253, - "grad_norm": 0.0008633575052954257, - "learning_rate": 0.00019999997969611164, - "loss": 46.0, - "step": 2663 - }, - { - "epoch": 0.20368140375021504, - "grad_norm": 0.0008733525755815208, - "learning_rate": 0.0001999999796808024, - "loss": 46.0, - "step": 2664 - }, - { - "epoch": 0.20375786073360475, - "grad_norm": 0.0012839054688811302, - "learning_rate": 0.00019999997966548737, - "loss": 46.0, - "step": 2665 - }, - { - "epoch": 0.20383431771699448, - "grad_norm": 0.0004955227486789227, - "learning_rate": 0.0001999999796501666, - "loss": 46.0, - "step": 2666 - }, - { - "epoch": 0.20391077470038418, - "grad_norm": 0.0011039386736229062, - "learning_rate": 0.00019999997963484004, - "loss": 46.0, - "step": 2667 - }, - { - "epoch": 0.20398723168377392, - "grad_norm": 0.0022267145104706287, - "learning_rate": 0.0001999999796195077, - "loss": 46.0, - "step": 2668 - }, - { - "epoch": 0.20406368866716365, - "grad_norm": 0.0029530690517276525, - "learning_rate": 0.00019999997960416964, - "loss": 46.0, - "step": 2669 - }, - { - "epoch": 0.20414014565055336, - "grad_norm": 0.0025334572419524193, - "learning_rate": 0.00019999997958882577, - "loss": 46.0, - "step": 2670 - }, - { - "epoch": 0.2042166026339431, - "grad_norm": 0.0008771863067522645, - "learning_rate": 0.00019999997957347615, - "loss": 46.0, - "step": 2671 - }, - { - "epoch": 0.2042930596173328, - "grad_norm": 0.0009226668043993413, - "learning_rate": 0.00019999997955812073, - "loss": 46.0, - "step": 2672 - }, - { - "epoch": 0.20436951660072253, - "grad_norm": 0.0010280348360538483, - "learning_rate": 0.00019999997954275956, - "loss": 46.0, - "step": 2673 - }, - { - "epoch": 0.20444597358411223, - "grad_norm": 0.004375841002911329, - "learning_rate": 0.00019999997952739262, - "loss": 46.0, - "step": 2674 - }, - { - "epoch": 0.20452243056750197, - "grad_norm": 0.0006381785497069359, - "learning_rate": 0.0001999999795120199, - "loss": 46.0, - "step": 2675 - }, - { - "epoch": 0.20459888755089167, - "grad_norm": 0.00226203934289515, - "learning_rate": 0.00019999997949664145, - "loss": 46.0, - "step": 2676 - }, - { - "epoch": 0.2046753445342814, - "grad_norm": 0.0019268908072263002, - "learning_rate": 0.00019999997948125716, - "loss": 46.0, - "step": 2677 - }, - { - "epoch": 0.2047518015176711, - "grad_norm": 0.0012107514776289463, - "learning_rate": 0.00019999997946586718, - "loss": 46.0, - "step": 2678 - }, - { - "epoch": 0.20482825850106084, - "grad_norm": 0.0006391117931343615, - "learning_rate": 0.00019999997945047138, - "loss": 46.0, - "step": 2679 - }, - { - "epoch": 0.20490471548445055, - "grad_norm": 0.0015201630303636193, - "learning_rate": 0.0001999999794350698, - "loss": 46.0, - "step": 2680 - }, - { - "epoch": 0.20498117246784028, - "grad_norm": 0.001089393044821918, - "learning_rate": 0.0001999999794196625, - "loss": 46.0, - "step": 2681 - }, - { - "epoch": 0.20505762945123, - "grad_norm": 0.0020287600345909595, - "learning_rate": 0.0001999999794042494, - "loss": 46.0, - "step": 2682 - }, - { - "epoch": 0.20513408643461972, - "grad_norm": 0.001776363467797637, - "learning_rate": 0.00019999997938883053, - "loss": 46.0, - "step": 2683 - }, - { - "epoch": 0.20521054341800946, - "grad_norm": 0.0014043195405974984, - "learning_rate": 0.00019999997937340588, - "loss": 46.0, - "step": 2684 - }, - { - "epoch": 0.20528700040139916, - "grad_norm": 0.0030978552531450987, - "learning_rate": 0.0001999999793579755, - "loss": 46.0, - "step": 2685 - }, - { - "epoch": 0.2053634573847889, - "grad_norm": 0.0008114740485325456, - "learning_rate": 0.00019999997934253932, - "loss": 46.0, - "step": 2686 - }, - { - "epoch": 0.2054399143681786, - "grad_norm": 0.0008145145257003605, - "learning_rate": 0.00019999997932709738, - "loss": 46.0, - "step": 2687 - }, - { - "epoch": 0.20551637135156833, - "grad_norm": 0.001314866472966969, - "learning_rate": 0.00019999997931164967, - "loss": 46.0, - "step": 2688 - }, - { - "epoch": 0.20559282833495804, - "grad_norm": 0.002413044683635235, - "learning_rate": 0.00019999997929619619, - "loss": 46.0, - "step": 2689 - }, - { - "epoch": 0.20566928531834777, - "grad_norm": 0.00035419585765339434, - "learning_rate": 0.00019999997928073695, - "loss": 46.0, - "step": 2690 - }, - { - "epoch": 0.20574574230173748, - "grad_norm": 0.0013094025198370218, - "learning_rate": 0.0001999999792652719, - "loss": 46.0, - "step": 2691 - }, - { - "epoch": 0.2058221992851272, - "grad_norm": 0.002120288321748376, - "learning_rate": 0.00019999997924980111, - "loss": 46.0, - "step": 2692 - }, - { - "epoch": 0.20589865626851692, - "grad_norm": 0.0012566301738843322, - "learning_rate": 0.00019999997923432456, - "loss": 46.0, - "step": 2693 - }, - { - "epoch": 0.20597511325190665, - "grad_norm": 0.007534113246947527, - "learning_rate": 0.00019999997921884224, - "loss": 46.0, - "step": 2694 - }, - { - "epoch": 0.20605157023529636, - "grad_norm": 0.003485876601189375, - "learning_rate": 0.00019999997920335414, - "loss": 46.0, - "step": 2695 - }, - { - "epoch": 0.2061280272186861, - "grad_norm": 0.0016508623957633972, - "learning_rate": 0.00019999997918786026, - "loss": 46.0, - "step": 2696 - }, - { - "epoch": 0.2062044842020758, - "grad_norm": 0.0007939081406220794, - "learning_rate": 0.00019999997917236062, - "loss": 46.0, - "step": 2697 - }, - { - "epoch": 0.20628094118546553, - "grad_norm": 0.0005702505586668849, - "learning_rate": 0.00019999997915685523, - "loss": 46.0, - "step": 2698 - }, - { - "epoch": 0.20635739816885526, - "grad_norm": 0.0021604765206575394, - "learning_rate": 0.00019999997914134406, - "loss": 46.0, - "step": 2699 - }, - { - "epoch": 0.20643385515224497, - "grad_norm": 0.0031259257812052965, - "learning_rate": 0.0001999999791258271, - "loss": 46.0, - "step": 2700 - }, - { - "epoch": 0.2065103121356347, - "grad_norm": 0.0024204538203775883, - "learning_rate": 0.00019999997911030438, - "loss": 46.0, - "step": 2701 - }, - { - "epoch": 0.2065867691190244, - "grad_norm": 0.0021650376729667187, - "learning_rate": 0.0001999999790947759, - "loss": 46.0, - "step": 2702 - }, - { - "epoch": 0.20666322610241414, - "grad_norm": 0.0031147070694714785, - "learning_rate": 0.00019999997907924164, - "loss": 46.0, - "step": 2703 - }, - { - "epoch": 0.20673968308580384, - "grad_norm": 0.0015438698465004563, - "learning_rate": 0.00019999997906370163, - "loss": 46.0, - "step": 2704 - }, - { - "epoch": 0.20681614006919358, - "grad_norm": 0.0012661865912377834, - "learning_rate": 0.00019999997904815583, - "loss": 46.0, - "step": 2705 - }, - { - "epoch": 0.20689259705258328, - "grad_norm": 0.0006658679340034723, - "learning_rate": 0.00019999997903260428, - "loss": 46.0, - "step": 2706 - }, - { - "epoch": 0.20696905403597302, - "grad_norm": 0.002733797300606966, - "learning_rate": 0.00019999997901704695, - "loss": 46.0, - "step": 2707 - }, - { - "epoch": 0.20704551101936272, - "grad_norm": 0.0009632634464651346, - "learning_rate": 0.00019999997900148385, - "loss": 46.0, - "step": 2708 - }, - { - "epoch": 0.20712196800275245, - "grad_norm": 0.0006991362315602601, - "learning_rate": 0.00019999997898591495, - "loss": 46.0, - "step": 2709 - }, - { - "epoch": 0.20719842498614216, - "grad_norm": 0.0010518896160647273, - "learning_rate": 0.0001999999789703403, - "loss": 46.0, - "step": 2710 - }, - { - "epoch": 0.2072748819695319, - "grad_norm": 0.0029758852906525135, - "learning_rate": 0.00019999997895475991, - "loss": 46.0, - "step": 2711 - }, - { - "epoch": 0.2073513389529216, - "grad_norm": 0.0013607953442260623, - "learning_rate": 0.00019999997893917372, - "loss": 46.0, - "step": 2712 - }, - { - "epoch": 0.20742779593631133, - "grad_norm": 0.000644504267256707, - "learning_rate": 0.0001999999789235818, - "loss": 46.0, - "step": 2713 - }, - { - "epoch": 0.20750425291970107, - "grad_norm": 0.0007269313791766763, - "learning_rate": 0.00019999997890798404, - "loss": 46.0, - "step": 2714 - }, - { - "epoch": 0.20758070990309077, - "grad_norm": 0.0016563581302762032, - "learning_rate": 0.0001999999788923806, - "loss": 46.0, - "step": 2715 - }, - { - "epoch": 0.2076571668864805, - "grad_norm": 0.0008605378097854555, - "learning_rate": 0.00019999997887677133, - "loss": 46.0, - "step": 2716 - }, - { - "epoch": 0.2077336238698702, - "grad_norm": 0.0016932736616581678, - "learning_rate": 0.0001999999788611563, - "loss": 46.0, - "step": 2717 - }, - { - "epoch": 0.20781008085325994, - "grad_norm": 0.0003803808067459613, - "learning_rate": 0.0001999999788455355, - "loss": 46.0, - "step": 2718 - }, - { - "epoch": 0.20788653783664965, - "grad_norm": 0.0005976628162898123, - "learning_rate": 0.00019999997882990894, - "loss": 46.0, - "step": 2719 - }, - { - "epoch": 0.20796299482003938, - "grad_norm": 0.0013230486074462533, - "learning_rate": 0.00019999997881427661, - "loss": 46.0, - "step": 2720 - }, - { - "epoch": 0.2080394518034291, - "grad_norm": 0.00038978815427981317, - "learning_rate": 0.0001999999787986385, - "loss": 46.0, - "step": 2721 - }, - { - "epoch": 0.20811590878681882, - "grad_norm": 0.001036260393448174, - "learning_rate": 0.00019999997878299462, - "loss": 46.0, - "step": 2722 - }, - { - "epoch": 0.20819236577020853, - "grad_norm": 0.0026271517854183912, - "learning_rate": 0.00019999997876734497, - "loss": 46.0, - "step": 2723 - }, - { - "epoch": 0.20826882275359826, - "grad_norm": 0.0026575273368507624, - "learning_rate": 0.00019999997875168955, - "loss": 46.0, - "step": 2724 - }, - { - "epoch": 0.20834527973698796, - "grad_norm": 0.0007150326273404062, - "learning_rate": 0.0001999999787360284, - "loss": 46.0, - "step": 2725 - }, - { - "epoch": 0.2084217367203777, - "grad_norm": 0.0005619221483357251, - "learning_rate": 0.00019999997872036142, - "loss": 46.0, - "step": 2726 - }, - { - "epoch": 0.20849819370376743, - "grad_norm": 0.0007964373799040914, - "learning_rate": 0.0001999999787046887, - "loss": 46.0, - "step": 2727 - }, - { - "epoch": 0.20857465068715714, - "grad_norm": 0.0012551010586321354, - "learning_rate": 0.00019999997868901023, - "loss": 46.0, - "step": 2728 - }, - { - "epoch": 0.20865110767054687, - "grad_norm": 0.0014274526620283723, - "learning_rate": 0.00019999997867332597, - "loss": 46.0, - "step": 2729 - }, - { - "epoch": 0.20872756465393658, - "grad_norm": 0.0013953399611636996, - "learning_rate": 0.00019999997865763594, - "loss": 46.0, - "step": 2730 - }, - { - "epoch": 0.2088040216373263, - "grad_norm": 0.001514603616669774, - "learning_rate": 0.00019999997864194013, - "loss": 46.0, - "step": 2731 - }, - { - "epoch": 0.20888047862071601, - "grad_norm": 0.001486646244302392, - "learning_rate": 0.00019999997862623855, - "loss": 46.0, - "step": 2732 - }, - { - "epoch": 0.20895693560410575, - "grad_norm": 0.0008643695036880672, - "learning_rate": 0.00019999997861053123, - "loss": 46.0, - "step": 2733 - }, - { - "epoch": 0.20903339258749545, - "grad_norm": 0.0012586575467139482, - "learning_rate": 0.00019999997859481813, - "loss": 46.0, - "step": 2734 - }, - { - "epoch": 0.2091098495708852, - "grad_norm": 0.003097428707405925, - "learning_rate": 0.00019999997857909923, - "loss": 46.0, - "step": 2735 - }, - { - "epoch": 0.2091863065542749, - "grad_norm": 0.002444636542350054, - "learning_rate": 0.0001999999785633746, - "loss": 46.0, - "step": 2736 - }, - { - "epoch": 0.20926276353766463, - "grad_norm": 0.0005594029207713902, - "learning_rate": 0.00019999997854764417, - "loss": 46.0, - "step": 2737 - }, - { - "epoch": 0.20933922052105433, - "grad_norm": 0.0014575031818822026, - "learning_rate": 0.000199999978531908, - "loss": 46.0, - "step": 2738 - }, - { - "epoch": 0.20941567750444406, - "grad_norm": 0.0028861218597739935, - "learning_rate": 0.00019999997851616606, - "loss": 46.0, - "step": 2739 - }, - { - "epoch": 0.20949213448783377, - "grad_norm": 0.0006614363519474864, - "learning_rate": 0.00019999997850041833, - "loss": 46.0, - "step": 2740 - }, - { - "epoch": 0.2095685914712235, - "grad_norm": 0.0009695856715552509, - "learning_rate": 0.00019999997848466481, - "loss": 46.0, - "step": 2741 - }, - { - "epoch": 0.20964504845461324, - "grad_norm": 0.001881250413134694, - "learning_rate": 0.00019999997846890556, - "loss": 46.0, - "step": 2742 - }, - { - "epoch": 0.20972150543800294, - "grad_norm": 0.0012821612181141973, - "learning_rate": 0.00019999997845314053, - "loss": 46.0, - "step": 2743 - }, - { - "epoch": 0.20979796242139268, - "grad_norm": 0.0011615402763709426, - "learning_rate": 0.00019999997843736972, - "loss": 46.0, - "step": 2744 - }, - { - "epoch": 0.20987441940478238, - "grad_norm": 0.0010495653841644526, - "learning_rate": 0.00019999997842159314, - "loss": 46.0, - "step": 2745 - }, - { - "epoch": 0.20995087638817211, - "grad_norm": 0.0009658645722083747, - "learning_rate": 0.00019999997840581082, - "loss": 46.0, - "step": 2746 - }, - { - "epoch": 0.21002733337156182, - "grad_norm": 0.0016275092493742704, - "learning_rate": 0.0001999999783900227, - "loss": 46.0, - "step": 2747 - }, - { - "epoch": 0.21010379035495155, - "grad_norm": 0.001446898328140378, - "learning_rate": 0.00019999997837422885, - "loss": 46.0, - "step": 2748 - }, - { - "epoch": 0.21018024733834126, - "grad_norm": 0.0021533663384616375, - "learning_rate": 0.00019999997835842915, - "loss": 46.0, - "step": 2749 - }, - { - "epoch": 0.210256704321731, - "grad_norm": 0.00771222123876214, - "learning_rate": 0.00019999997834262376, - "loss": 46.0, - "step": 2750 - }, - { - "epoch": 0.2103331613051207, - "grad_norm": 0.0015932237729430199, - "learning_rate": 0.00019999997832681257, - "loss": 46.0, - "step": 2751 - }, - { - "epoch": 0.21040961828851043, - "grad_norm": 0.0008433216717094183, - "learning_rate": 0.0001999999783109956, - "loss": 46.0, - "step": 2752 - }, - { - "epoch": 0.21048607527190014, - "grad_norm": 0.0011260309256613255, - "learning_rate": 0.00019999997829517287, - "loss": 46.0, - "step": 2753 - }, - { - "epoch": 0.21056253225528987, - "grad_norm": 0.0010261379648000002, - "learning_rate": 0.00019999997827934438, - "loss": 46.0, - "step": 2754 - }, - { - "epoch": 0.21063898923867957, - "grad_norm": 0.0014874220360070467, - "learning_rate": 0.00019999997826351012, - "loss": 46.0, - "step": 2755 - }, - { - "epoch": 0.2107154462220693, - "grad_norm": 0.0020668439101427794, - "learning_rate": 0.00019999997824767006, - "loss": 46.0, - "step": 2756 - }, - { - "epoch": 0.21079190320545904, - "grad_norm": 0.0011867792345583439, - "learning_rate": 0.0001999999782318243, - "loss": 46.0, - "step": 2757 - }, - { - "epoch": 0.21086836018884875, - "grad_norm": 0.0031772837974131107, - "learning_rate": 0.0001999999782159727, - "loss": 46.0, - "step": 2758 - }, - { - "epoch": 0.21094481717223848, - "grad_norm": 0.0012536614667624235, - "learning_rate": 0.00019999997820011536, - "loss": 46.0, - "step": 2759 - }, - { - "epoch": 0.21102127415562819, - "grad_norm": 0.0014362270012497902, - "learning_rate": 0.00019999997818425223, - "loss": 46.0, - "step": 2760 - }, - { - "epoch": 0.21109773113901792, - "grad_norm": 0.00485815666615963, - "learning_rate": 0.00019999997816838336, - "loss": 46.0, - "step": 2761 - }, - { - "epoch": 0.21117418812240762, - "grad_norm": 0.0010709596099331975, - "learning_rate": 0.00019999997815250872, - "loss": 46.0, - "step": 2762 - }, - { - "epoch": 0.21125064510579736, - "grad_norm": 0.0027423000428825617, - "learning_rate": 0.0001999999781366283, - "loss": 46.0, - "step": 2763 - }, - { - "epoch": 0.21132710208918706, - "grad_norm": 0.0004806232755072415, - "learning_rate": 0.00019999997812074208, - "loss": 46.0, - "step": 2764 - }, - { - "epoch": 0.2114035590725768, - "grad_norm": 0.0008911628974601626, - "learning_rate": 0.00019999997810485015, - "loss": 46.0, - "step": 2765 - }, - { - "epoch": 0.2114800160559665, - "grad_norm": 0.0007796669378876686, - "learning_rate": 0.0001999999780889524, - "loss": 46.0, - "step": 2766 - }, - { - "epoch": 0.21155647303935624, - "grad_norm": 0.0013641685945913196, - "learning_rate": 0.00019999997807304892, - "loss": 46.0, - "step": 2767 - }, - { - "epoch": 0.21163293002274594, - "grad_norm": 0.0009681656956672668, - "learning_rate": 0.00019999997805713964, - "loss": 46.0, - "step": 2768 - }, - { - "epoch": 0.21170938700613567, - "grad_norm": 0.004737140145152807, - "learning_rate": 0.00019999997804122458, - "loss": 46.0, - "step": 2769 - }, - { - "epoch": 0.21178584398952538, - "grad_norm": 0.0018367001321166754, - "learning_rate": 0.0001999999780253038, - "loss": 46.0, - "step": 2770 - }, - { - "epoch": 0.2118623009729151, - "grad_norm": 0.002287733368575573, - "learning_rate": 0.00019999997800937723, - "loss": 46.0, - "step": 2771 - }, - { - "epoch": 0.21193875795630485, - "grad_norm": 0.0016572296153753996, - "learning_rate": 0.00019999997799344488, - "loss": 46.0, - "step": 2772 - }, - { - "epoch": 0.21201521493969455, - "grad_norm": 0.0010927787516266108, - "learning_rate": 0.00019999997797750675, - "loss": 46.0, - "step": 2773 - }, - { - "epoch": 0.21209167192308429, - "grad_norm": 0.0023603811860084534, - "learning_rate": 0.00019999997796156288, - "loss": 46.0, - "step": 2774 - }, - { - "epoch": 0.212168128906474, - "grad_norm": 0.0039044448640197515, - "learning_rate": 0.0001999999779456132, - "loss": 46.0, - "step": 2775 - }, - { - "epoch": 0.21224458588986372, - "grad_norm": 0.001130746561102569, - "learning_rate": 0.0001999999779296578, - "loss": 46.0, - "step": 2776 - }, - { - "epoch": 0.21232104287325343, - "grad_norm": 0.001227583852596581, - "learning_rate": 0.0001999999779136966, - "loss": 46.0, - "step": 2777 - }, - { - "epoch": 0.21239749985664316, - "grad_norm": 0.00040609861025586724, - "learning_rate": 0.00019999997789772964, - "loss": 46.0, - "step": 2778 - }, - { - "epoch": 0.21247395684003287, - "grad_norm": 0.0012875209795311093, - "learning_rate": 0.0001999999778817569, - "loss": 46.0, - "step": 2779 - }, - { - "epoch": 0.2125504138234226, - "grad_norm": 0.001201312756165862, - "learning_rate": 0.0001999999778657784, - "loss": 46.0, - "step": 2780 - }, - { - "epoch": 0.2126268708068123, - "grad_norm": 0.0006764173740521073, - "learning_rate": 0.0001999999778497941, - "loss": 46.0, - "step": 2781 - }, - { - "epoch": 0.21270332779020204, - "grad_norm": 0.0017223118338733912, - "learning_rate": 0.00019999997783380407, - "loss": 46.0, - "step": 2782 - }, - { - "epoch": 0.21277978477359175, - "grad_norm": 0.0002848854346666485, - "learning_rate": 0.00019999997781780827, - "loss": 46.0, - "step": 2783 - }, - { - "epoch": 0.21285624175698148, - "grad_norm": 0.0012034042738378048, - "learning_rate": 0.0001999999778018067, - "loss": 46.0, - "step": 2784 - }, - { - "epoch": 0.2129326987403712, - "grad_norm": 0.001390379387885332, - "learning_rate": 0.00019999997778579934, - "loss": 46.0, - "step": 2785 - }, - { - "epoch": 0.21300915572376092, - "grad_norm": 0.002384237013757229, - "learning_rate": 0.00019999997776978622, - "loss": 46.0, - "step": 2786 - }, - { - "epoch": 0.21308561270715065, - "grad_norm": 0.00044011662248522043, - "learning_rate": 0.00019999997775376732, - "loss": 46.0, - "step": 2787 - }, - { - "epoch": 0.21316206969054036, - "grad_norm": 0.0022945080418139696, - "learning_rate": 0.00019999997773774268, - "loss": 46.0, - "step": 2788 - }, - { - "epoch": 0.2132385266739301, - "grad_norm": 0.0004650302289519459, - "learning_rate": 0.00019999997772171226, - "loss": 46.0, - "step": 2789 - }, - { - "epoch": 0.2133149836573198, - "grad_norm": 0.0004878402396570891, - "learning_rate": 0.00019999997770567607, - "loss": 46.0, - "step": 2790 - }, - { - "epoch": 0.21339144064070953, - "grad_norm": 0.0007357585127465427, - "learning_rate": 0.0001999999776896341, - "loss": 46.0, - "step": 2791 - }, - { - "epoch": 0.21346789762409923, - "grad_norm": 0.0009692542953416705, - "learning_rate": 0.00019999997767358637, - "loss": 46.0, - "step": 2792 - }, - { - "epoch": 0.21354435460748897, - "grad_norm": 0.0007882479694671929, - "learning_rate": 0.00019999997765753283, - "loss": 46.0, - "step": 2793 - }, - { - "epoch": 0.21362081159087867, - "grad_norm": 0.0006892674136906862, - "learning_rate": 0.00019999997764147358, - "loss": 46.0, - "step": 2794 - }, - { - "epoch": 0.2136972685742684, - "grad_norm": 0.001179009792394936, - "learning_rate": 0.00019999997762540855, - "loss": 46.0, - "step": 2795 - }, - { - "epoch": 0.2137737255576581, - "grad_norm": 0.0012044119648635387, - "learning_rate": 0.00019999997760933771, - "loss": 46.0, - "step": 2796 - }, - { - "epoch": 0.21385018254104785, - "grad_norm": 0.0009840520797297359, - "learning_rate": 0.00019999997759326114, - "loss": 46.0, - "step": 2797 - }, - { - "epoch": 0.21392663952443755, - "grad_norm": 0.0010541044175624847, - "learning_rate": 0.0001999999775771788, - "loss": 46.0, - "step": 2798 - }, - { - "epoch": 0.21400309650782728, - "grad_norm": 0.0011699431343004107, - "learning_rate": 0.00019999997756109066, - "loss": 46.0, - "step": 2799 - }, - { - "epoch": 0.21407955349121702, - "grad_norm": 0.003357216250151396, - "learning_rate": 0.00019999997754499677, - "loss": 46.0, - "step": 2800 - }, - { - "epoch": 0.21415601047460672, - "grad_norm": 0.0013296111719682813, - "learning_rate": 0.00019999997752889712, - "loss": 46.0, - "step": 2801 - }, - { - "epoch": 0.21423246745799646, - "grad_norm": 0.0030206083320081234, - "learning_rate": 0.00019999997751279168, - "loss": 46.0, - "step": 2802 - }, - { - "epoch": 0.21430892444138616, - "grad_norm": 0.001299820956774056, - "learning_rate": 0.0001999999774966805, - "loss": 46.0, - "step": 2803 - }, - { - "epoch": 0.2143853814247759, - "grad_norm": 0.0005900601972825825, - "learning_rate": 0.00019999997748056353, - "loss": 46.0, - "step": 2804 - }, - { - "epoch": 0.2144618384081656, - "grad_norm": 0.0011733282590284944, - "learning_rate": 0.00019999997746444076, - "loss": 46.0, - "step": 2805 - }, - { - "epoch": 0.21453829539155533, - "grad_norm": 0.004266679286956787, - "learning_rate": 0.00019999997744831228, - "loss": 46.0, - "step": 2806 - }, - { - "epoch": 0.21461475237494504, - "grad_norm": 0.0005597809795290232, - "learning_rate": 0.000199999977432178, - "loss": 46.0, - "step": 2807 - }, - { - "epoch": 0.21469120935833477, - "grad_norm": 0.0010960650397464633, - "learning_rate": 0.00019999997741603794, - "loss": 46.0, - "step": 2808 - }, - { - "epoch": 0.21476766634172448, - "grad_norm": 0.0012118702288717031, - "learning_rate": 0.00019999997739989214, - "loss": 46.0, - "step": 2809 - }, - { - "epoch": 0.2148441233251142, - "grad_norm": 0.0007235713419504464, - "learning_rate": 0.00019999997738374053, - "loss": 46.0, - "step": 2810 - }, - { - "epoch": 0.21492058030850392, - "grad_norm": 0.02052875980734825, - "learning_rate": 0.00019999997736758318, - "loss": 46.0, - "step": 2811 - }, - { - "epoch": 0.21499703729189365, - "grad_norm": 0.0031311828643083572, - "learning_rate": 0.00019999997735142006, - "loss": 46.0, - "step": 2812 - }, - { - "epoch": 0.21507349427528336, - "grad_norm": 0.0007861799676902592, - "learning_rate": 0.0001999999773352512, - "loss": 46.0, - "step": 2813 - }, - { - "epoch": 0.2151499512586731, - "grad_norm": 0.0007042209035716951, - "learning_rate": 0.00019999997731907652, - "loss": 46.0, - "step": 2814 - }, - { - "epoch": 0.21522640824206282, - "grad_norm": 0.0066701313480734825, - "learning_rate": 0.00019999997730289607, - "loss": 46.0, - "step": 2815 - }, - { - "epoch": 0.21530286522545253, - "grad_norm": 0.0010436145821586251, - "learning_rate": 0.00019999997728670989, - "loss": 46.0, - "step": 2816 - }, - { - "epoch": 0.21537932220884226, - "grad_norm": 0.0008254246204160154, - "learning_rate": 0.0001999999772705179, - "loss": 46.0, - "step": 2817 - }, - { - "epoch": 0.21545577919223197, - "grad_norm": 0.0012020569993183017, - "learning_rate": 0.00019999997725432016, - "loss": 46.0, - "step": 2818 - }, - { - "epoch": 0.2155322361756217, - "grad_norm": 0.0012380763655528426, - "learning_rate": 0.00019999997723811665, - "loss": 46.0, - "step": 2819 - }, - { - "epoch": 0.2156086931590114, - "grad_norm": 0.0006606707465834916, - "learning_rate": 0.00019999997722190737, - "loss": 46.0, - "step": 2820 - }, - { - "epoch": 0.21568515014240114, - "grad_norm": 0.001367033808492124, - "learning_rate": 0.00019999997720569234, - "loss": 46.0, - "step": 2821 - }, - { - "epoch": 0.21576160712579084, - "grad_norm": 0.0010483409278094769, - "learning_rate": 0.0001999999771894715, - "loss": 46.0, - "step": 2822 - }, - { - "epoch": 0.21583806410918058, - "grad_norm": 0.0014846177073195577, - "learning_rate": 0.0001999999771732449, - "loss": 46.0, - "step": 2823 - }, - { - "epoch": 0.21591452109257028, - "grad_norm": 0.0007387480582110584, - "learning_rate": 0.00019999997715701255, - "loss": 46.0, - "step": 2824 - }, - { - "epoch": 0.21599097807596002, - "grad_norm": 0.001421895227394998, - "learning_rate": 0.00019999997714077443, - "loss": 46.0, - "step": 2825 - }, - { - "epoch": 0.21606743505934972, - "grad_norm": 0.0008972487994469702, - "learning_rate": 0.00019999997712453053, - "loss": 46.0, - "step": 2826 - }, - { - "epoch": 0.21614389204273945, - "grad_norm": 0.0010549025610089302, - "learning_rate": 0.00019999997710828086, - "loss": 46.0, - "step": 2827 - }, - { - "epoch": 0.21622034902612916, - "grad_norm": 0.0015169063117355108, - "learning_rate": 0.00019999997709202542, - "loss": 46.0, - "step": 2828 - }, - { - "epoch": 0.2162968060095189, - "grad_norm": 0.0009279120131395757, - "learning_rate": 0.0001999999770757642, - "loss": 46.0, - "step": 2829 - }, - { - "epoch": 0.21637326299290863, - "grad_norm": 0.0007443832000717521, - "learning_rate": 0.00019999997705949724, - "loss": 46.0, - "step": 2830 - }, - { - "epoch": 0.21644971997629833, - "grad_norm": 0.0011436827480793, - "learning_rate": 0.0001999999770432245, - "loss": 46.0, - "step": 2831 - }, - { - "epoch": 0.21652617695968807, - "grad_norm": 0.0015338954981416464, - "learning_rate": 0.00019999997702694597, - "loss": 46.0, - "step": 2832 - }, - { - "epoch": 0.21660263394307777, - "grad_norm": 0.000506790354847908, - "learning_rate": 0.0001999999770106617, - "loss": 46.0, - "step": 2833 - }, - { - "epoch": 0.2166790909264675, - "grad_norm": 0.0006653231102973223, - "learning_rate": 0.00019999997699437166, - "loss": 46.0, - "step": 2834 - }, - { - "epoch": 0.2167555479098572, - "grad_norm": 0.0011177490232512355, - "learning_rate": 0.00019999997697807583, - "loss": 46.0, - "step": 2835 - }, - { - "epoch": 0.21683200489324694, - "grad_norm": 0.0007280634599737823, - "learning_rate": 0.00019999997696177425, - "loss": 46.0, - "step": 2836 - }, - { - "epoch": 0.21690846187663665, - "grad_norm": 0.007233390584588051, - "learning_rate": 0.00019999997694546687, - "loss": 46.0, - "step": 2837 - }, - { - "epoch": 0.21698491886002638, - "grad_norm": 0.0009809202747419477, - "learning_rate": 0.00019999997692915375, - "loss": 46.0, - "step": 2838 - }, - { - "epoch": 0.2170613758434161, - "grad_norm": 0.0008100091945379972, - "learning_rate": 0.00019999997691283485, - "loss": 46.0, - "step": 2839 - }, - { - "epoch": 0.21713783282680582, - "grad_norm": 0.00035515156923793256, - "learning_rate": 0.00019999997689651019, - "loss": 46.0, - "step": 2840 - }, - { - "epoch": 0.21721428981019553, - "grad_norm": 0.0005504949949681759, - "learning_rate": 0.00019999997688017974, - "loss": 46.0, - "step": 2841 - }, - { - "epoch": 0.21729074679358526, - "grad_norm": 0.0009517159196548164, - "learning_rate": 0.00019999997686384353, - "loss": 46.0, - "step": 2842 - }, - { - "epoch": 0.217367203776975, - "grad_norm": 0.0021004853770136833, - "learning_rate": 0.00019999997684750156, - "loss": 46.0, - "step": 2843 - }, - { - "epoch": 0.2174436607603647, - "grad_norm": 0.003291824832558632, - "learning_rate": 0.0001999999768311538, - "loss": 46.0, - "step": 2844 - }, - { - "epoch": 0.21752011774375443, - "grad_norm": 0.002038466976955533, - "learning_rate": 0.0001999999768148003, - "loss": 46.0, - "step": 2845 - }, - { - "epoch": 0.21759657472714414, - "grad_norm": 0.00028456581640057266, - "learning_rate": 0.000199999976798441, - "loss": 46.0, - "step": 2846 - }, - { - "epoch": 0.21767303171053387, - "grad_norm": 0.0016658210661262274, - "learning_rate": 0.00019999997678207593, - "loss": 46.0, - "step": 2847 - }, - { - "epoch": 0.21774948869392358, - "grad_norm": 0.00036686030216515064, - "learning_rate": 0.00019999997676570512, - "loss": 46.0, - "step": 2848 - }, - { - "epoch": 0.2178259456773133, - "grad_norm": 0.0020448833238333464, - "learning_rate": 0.00019999997674932852, - "loss": 46.0, - "step": 2849 - }, - { - "epoch": 0.21790240266070302, - "grad_norm": 0.001440804684534669, - "learning_rate": 0.00019999997673294617, - "loss": 46.0, - "step": 2850 - }, - { - "epoch": 0.21797885964409275, - "grad_norm": 0.0030311725568026304, - "learning_rate": 0.00019999997671655802, - "loss": 46.0, - "step": 2851 - }, - { - "epoch": 0.21805531662748245, - "grad_norm": 0.0007056264439597726, - "learning_rate": 0.00019999997670016413, - "loss": 46.0, - "step": 2852 - }, - { - "epoch": 0.2181317736108722, - "grad_norm": 0.002115414012223482, - "learning_rate": 0.00019999997668376446, - "loss": 46.0, - "step": 2853 - }, - { - "epoch": 0.2182082305942619, - "grad_norm": 0.000498169451020658, - "learning_rate": 0.00019999997666735901, - "loss": 46.0, - "step": 2854 - }, - { - "epoch": 0.21828468757765163, - "grad_norm": 0.002279487904161215, - "learning_rate": 0.0001999999766509478, - "loss": 46.0, - "step": 2855 - }, - { - "epoch": 0.21836114456104133, - "grad_norm": 0.0026159349363297224, - "learning_rate": 0.0001999999766345308, - "loss": 46.0, - "step": 2856 - }, - { - "epoch": 0.21843760154443106, - "grad_norm": 0.0016177184879779816, - "learning_rate": 0.00019999997661810807, - "loss": 46.0, - "step": 2857 - }, - { - "epoch": 0.2185140585278208, - "grad_norm": 0.0008091639028862119, - "learning_rate": 0.00019999997660167954, - "loss": 46.0, - "step": 2858 - }, - { - "epoch": 0.2185905155112105, - "grad_norm": 0.0008049023454077542, - "learning_rate": 0.00019999997658524526, - "loss": 46.0, - "step": 2859 - }, - { - "epoch": 0.21866697249460024, - "grad_norm": 0.002862535184249282, - "learning_rate": 0.0001999999765688052, - "loss": 46.0, - "step": 2860 - }, - { - "epoch": 0.21874342947798994, - "grad_norm": 0.0019411144312471151, - "learning_rate": 0.00019999997655235937, - "loss": 46.0, - "step": 2861 - }, - { - "epoch": 0.21881988646137968, - "grad_norm": 0.0014282979536801577, - "learning_rate": 0.0001999999765359078, - "loss": 46.0, - "step": 2862 - }, - { - "epoch": 0.21889634344476938, - "grad_norm": 0.0007232266943901777, - "learning_rate": 0.00019999997651945042, - "loss": 46.0, - "step": 2863 - }, - { - "epoch": 0.21897280042815911, - "grad_norm": 0.001601203577592969, - "learning_rate": 0.00019999997650298727, - "loss": 46.0, - "step": 2864 - }, - { - "epoch": 0.21904925741154882, - "grad_norm": 0.0010538886999711394, - "learning_rate": 0.00019999997648651835, - "loss": 46.0, - "step": 2865 - }, - { - "epoch": 0.21912571439493855, - "grad_norm": 0.004148329608142376, - "learning_rate": 0.0001999999764700437, - "loss": 46.0, - "step": 2866 - }, - { - "epoch": 0.21920217137832826, - "grad_norm": 0.0009113341220654547, - "learning_rate": 0.00019999997645356326, - "loss": 46.0, - "step": 2867 - }, - { - "epoch": 0.219278628361718, - "grad_norm": 0.0017885052366182208, - "learning_rate": 0.00019999997643707705, - "loss": 46.0, - "step": 2868 - }, - { - "epoch": 0.2193550853451077, - "grad_norm": 0.0013525685062631965, - "learning_rate": 0.00019999997642058506, - "loss": 46.0, - "step": 2869 - }, - { - "epoch": 0.21943154232849743, - "grad_norm": 0.002189547521993518, - "learning_rate": 0.0001999999764040873, - "loss": 46.0, - "step": 2870 - }, - { - "epoch": 0.21950799931188714, - "grad_norm": 0.0014040491078048944, - "learning_rate": 0.00019999997638758376, - "loss": 46.0, - "step": 2871 - }, - { - "epoch": 0.21958445629527687, - "grad_norm": 0.0012876776745542884, - "learning_rate": 0.00019999997637107448, - "loss": 46.0, - "step": 2872 - }, - { - "epoch": 0.2196609132786666, - "grad_norm": 0.0038134800270199776, - "learning_rate": 0.00019999997635455942, - "loss": 46.0, - "step": 2873 - }, - { - "epoch": 0.2197373702620563, - "grad_norm": 0.00109303358476609, - "learning_rate": 0.0001999999763380386, - "loss": 46.0, - "step": 2874 - }, - { - "epoch": 0.21981382724544604, - "grad_norm": 0.001133384765125811, - "learning_rate": 0.000199999976321512, - "loss": 46.0, - "step": 2875 - }, - { - "epoch": 0.21989028422883575, - "grad_norm": 0.0010685939341783524, - "learning_rate": 0.00019999997630497962, - "loss": 46.0, - "step": 2876 - }, - { - "epoch": 0.21996674121222548, - "grad_norm": 0.0007618262316100299, - "learning_rate": 0.00019999997628844147, - "loss": 46.0, - "step": 2877 - }, - { - "epoch": 0.22004319819561519, - "grad_norm": 0.0007574647897854447, - "learning_rate": 0.0001999999762718976, - "loss": 46.0, - "step": 2878 - }, - { - "epoch": 0.22011965517900492, - "grad_norm": 0.0008021695539355278, - "learning_rate": 0.0001999999762553479, - "loss": 46.0, - "step": 2879 - }, - { - "epoch": 0.22019611216239462, - "grad_norm": 0.007086888886988163, - "learning_rate": 0.00019999997623879246, - "loss": 46.0, - "step": 2880 - }, - { - "epoch": 0.22027256914578436, - "grad_norm": 0.0032493509352207184, - "learning_rate": 0.00019999997622223122, - "loss": 46.0, - "step": 2881 - }, - { - "epoch": 0.22034902612917406, - "grad_norm": 0.0014883350813761353, - "learning_rate": 0.00019999997620566426, - "loss": 46.0, - "step": 2882 - }, - { - "epoch": 0.2204254831125638, - "grad_norm": 0.0011161850998178124, - "learning_rate": 0.0001999999761890915, - "loss": 46.0, - "step": 2883 - }, - { - "epoch": 0.2205019400959535, - "grad_norm": 0.001158189377747476, - "learning_rate": 0.00019999997617251296, - "loss": 46.0, - "step": 2884 - }, - { - "epoch": 0.22057839707934324, - "grad_norm": 0.003696287516504526, - "learning_rate": 0.00019999997615592868, - "loss": 46.0, - "step": 2885 - }, - { - "epoch": 0.22065485406273294, - "grad_norm": 0.002970096655189991, - "learning_rate": 0.0001999999761393386, - "loss": 46.0, - "step": 2886 - }, - { - "epoch": 0.22073131104612267, - "grad_norm": 0.0013597555225715041, - "learning_rate": 0.00019999997612274277, - "loss": 46.0, - "step": 2887 - }, - { - "epoch": 0.2208077680295124, - "grad_norm": 0.0021887505427002907, - "learning_rate": 0.00019999997610614117, - "loss": 46.0, - "step": 2888 - }, - { - "epoch": 0.2208842250129021, - "grad_norm": 0.0020471117459237576, - "learning_rate": 0.0001999999760895338, - "loss": 46.0, - "step": 2889 - }, - { - "epoch": 0.22096068199629185, - "grad_norm": 0.003169095842167735, - "learning_rate": 0.00019999997607292067, - "loss": 46.0, - "step": 2890 - }, - { - "epoch": 0.22103713897968155, - "grad_norm": 0.0011828108690679073, - "learning_rate": 0.00019999997605630175, - "loss": 46.0, - "step": 2891 - }, - { - "epoch": 0.22111359596307129, - "grad_norm": 0.0012258938513696194, - "learning_rate": 0.00019999997603967705, - "loss": 46.0, - "step": 2892 - }, - { - "epoch": 0.221190052946461, - "grad_norm": 0.0005020485841669142, - "learning_rate": 0.0001999999760230466, - "loss": 46.0, - "step": 2893 - }, - { - "epoch": 0.22126650992985072, - "grad_norm": 0.002062303712591529, - "learning_rate": 0.0001999999760064104, - "loss": 46.0, - "step": 2894 - }, - { - "epoch": 0.22134296691324043, - "grad_norm": 0.001256912131793797, - "learning_rate": 0.0001999999759897684, - "loss": 46.0, - "step": 2895 - }, - { - "epoch": 0.22141942389663016, - "grad_norm": 0.0018735273042693734, - "learning_rate": 0.00019999997597312065, - "loss": 46.0, - "step": 2896 - }, - { - "epoch": 0.22149588088001987, - "grad_norm": 0.0005598751595243812, - "learning_rate": 0.0001999999759564671, - "loss": 46.0, - "step": 2897 - }, - { - "epoch": 0.2215723378634096, - "grad_norm": 0.0008617884013801813, - "learning_rate": 0.0001999999759398078, - "loss": 46.0, - "step": 2898 - }, - { - "epoch": 0.2216487948467993, - "grad_norm": 0.0007058823830448091, - "learning_rate": 0.00019999997592314275, - "loss": 46.0, - "step": 2899 - }, - { - "epoch": 0.22172525183018904, - "grad_norm": 0.0025898173917084932, - "learning_rate": 0.0001999999759064719, - "loss": 46.0, - "step": 2900 - }, - { - "epoch": 0.22180170881357877, - "grad_norm": 0.000993963680230081, - "learning_rate": 0.00019999997588979532, - "loss": 46.0, - "step": 2901 - }, - { - "epoch": 0.22187816579696848, - "grad_norm": 0.001138947787694633, - "learning_rate": 0.00019999997587311294, - "loss": 46.0, - "step": 2902 - }, - { - "epoch": 0.2219546227803582, - "grad_norm": 0.0008392338640987873, - "learning_rate": 0.00019999997585642477, - "loss": 46.0, - "step": 2903 - }, - { - "epoch": 0.22203107976374792, - "grad_norm": 0.0015987890074029565, - "learning_rate": 0.0001999999758397309, - "loss": 46.0, - "step": 2904 - }, - { - "epoch": 0.22210753674713765, - "grad_norm": 0.002902098698541522, - "learning_rate": 0.00019999997582303118, - "loss": 46.0, - "step": 2905 - }, - { - "epoch": 0.22218399373052736, - "grad_norm": 0.0004394171410240233, - "learning_rate": 0.00019999997580632574, - "loss": 46.0, - "step": 2906 - }, - { - "epoch": 0.2222604507139171, - "grad_norm": 0.0012952984543517232, - "learning_rate": 0.00019999997578961452, - "loss": 46.0, - "step": 2907 - }, - { - "epoch": 0.2223369076973068, - "grad_norm": 0.0004091626906301826, - "learning_rate": 0.00019999997577289753, - "loss": 46.0, - "step": 2908 - }, - { - "epoch": 0.22241336468069653, - "grad_norm": 0.0013748976634815335, - "learning_rate": 0.00019999997575617475, - "loss": 46.0, - "step": 2909 - }, - { - "epoch": 0.22248982166408623, - "grad_norm": 0.0016329112695530057, - "learning_rate": 0.0001999999757394462, - "loss": 46.0, - "step": 2910 - }, - { - "epoch": 0.22256627864747597, - "grad_norm": 0.0006410747300833464, - "learning_rate": 0.00019999997572271193, - "loss": 46.0, - "step": 2911 - }, - { - "epoch": 0.22264273563086567, - "grad_norm": 0.001577257877215743, - "learning_rate": 0.00019999997570597188, - "loss": 46.0, - "step": 2912 - }, - { - "epoch": 0.2227191926142554, - "grad_norm": 0.0024543842300772667, - "learning_rate": 0.00019999997568922605, - "loss": 46.0, - "step": 2913 - }, - { - "epoch": 0.2227956495976451, - "grad_norm": 0.0006878508138470352, - "learning_rate": 0.00019999997567247442, - "loss": 46.0, - "step": 2914 - }, - { - "epoch": 0.22287210658103485, - "grad_norm": 0.0017850800650194287, - "learning_rate": 0.00019999997565571705, - "loss": 46.0, - "step": 2915 - }, - { - "epoch": 0.22294856356442458, - "grad_norm": 0.0016517764888703823, - "learning_rate": 0.0001999999756389539, - "loss": 46.0, - "step": 2916 - }, - { - "epoch": 0.22302502054781428, - "grad_norm": 0.0010513722663745284, - "learning_rate": 0.00019999997562218498, - "loss": 46.0, - "step": 2917 - }, - { - "epoch": 0.22310147753120402, - "grad_norm": 0.0009467603522352874, - "learning_rate": 0.00019999997560541028, - "loss": 46.0, - "step": 2918 - }, - { - "epoch": 0.22317793451459372, - "grad_norm": 0.0006628857227042317, - "learning_rate": 0.00019999997558862984, - "loss": 46.0, - "step": 2919 - }, - { - "epoch": 0.22325439149798346, - "grad_norm": 0.0026683774776756763, - "learning_rate": 0.0001999999755718436, - "loss": 46.0, - "step": 2920 - }, - { - "epoch": 0.22333084848137316, - "grad_norm": 0.0006932035321369767, - "learning_rate": 0.0001999999755550516, - "loss": 46.0, - "step": 2921 - }, - { - "epoch": 0.2234073054647629, - "grad_norm": 0.0029748145025223494, - "learning_rate": 0.00019999997553825385, - "loss": 46.0, - "step": 2922 - }, - { - "epoch": 0.2234837624481526, - "grad_norm": 0.0007320643635466695, - "learning_rate": 0.00019999997552145032, - "loss": 46.0, - "step": 2923 - }, - { - "epoch": 0.22356021943154233, - "grad_norm": 0.0020910215098410845, - "learning_rate": 0.00019999997550464103, - "loss": 46.0, - "step": 2924 - }, - { - "epoch": 0.22363667641493204, - "grad_norm": 0.0012187798274680972, - "learning_rate": 0.00019999997548782595, - "loss": 46.0, - "step": 2925 - }, - { - "epoch": 0.22371313339832177, - "grad_norm": 0.000737123133148998, - "learning_rate": 0.00019999997547100507, - "loss": 46.0, - "step": 2926 - }, - { - "epoch": 0.22378959038171148, - "grad_norm": 0.0008600133005529642, - "learning_rate": 0.00019999997545417847, - "loss": 46.0, - "step": 2927 - }, - { - "epoch": 0.2238660473651012, - "grad_norm": 0.0009574157302267849, - "learning_rate": 0.0001999999754373461, - "loss": 46.0, - "step": 2928 - }, - { - "epoch": 0.22394250434849092, - "grad_norm": 0.0007718966226093471, - "learning_rate": 0.00019999997542050795, - "loss": 46.0, - "step": 2929 - }, - { - "epoch": 0.22401896133188065, - "grad_norm": 0.000957387441303581, - "learning_rate": 0.00019999997540366403, - "loss": 46.0, - "step": 2930 - }, - { - "epoch": 0.22409541831527038, - "grad_norm": 0.0014276178553700447, - "learning_rate": 0.00019999997538681433, - "loss": 46.0, - "step": 2931 - }, - { - "epoch": 0.2241718752986601, - "grad_norm": 0.0013034959556534886, - "learning_rate": 0.0001999999753699589, - "loss": 46.0, - "step": 2932 - }, - { - "epoch": 0.22424833228204982, - "grad_norm": 0.0037667297292500734, - "learning_rate": 0.00019999997535309768, - "loss": 46.0, - "step": 2933 - }, - { - "epoch": 0.22432478926543953, - "grad_norm": 0.00153718376532197, - "learning_rate": 0.00019999997533623064, - "loss": 46.0, - "step": 2934 - }, - { - "epoch": 0.22440124624882926, - "grad_norm": 0.0014878826914355159, - "learning_rate": 0.0001999999753193579, - "loss": 46.0, - "step": 2935 - }, - { - "epoch": 0.22447770323221897, - "grad_norm": 0.005342686548829079, - "learning_rate": 0.00019999997530247937, - "loss": 46.0, - "step": 2936 - }, - { - "epoch": 0.2245541602156087, - "grad_norm": 0.0007300264551304281, - "learning_rate": 0.00019999997528559504, - "loss": 46.0, - "step": 2937 - }, - { - "epoch": 0.2246306171989984, - "grad_norm": 0.006637099198997021, - "learning_rate": 0.00019999997526870495, - "loss": 46.0, - "step": 2938 - }, - { - "epoch": 0.22470707418238814, - "grad_norm": 0.001090914011001587, - "learning_rate": 0.00019999997525180913, - "loss": 46.0, - "step": 2939 - }, - { - "epoch": 0.22478353116577784, - "grad_norm": 0.0008396320044994354, - "learning_rate": 0.0001999999752349075, - "loss": 46.0, - "step": 2940 - }, - { - "epoch": 0.22485998814916758, - "grad_norm": 0.0006463932804763317, - "learning_rate": 0.00019999997521800013, - "loss": 46.0, - "step": 2941 - }, - { - "epoch": 0.22493644513255728, - "grad_norm": 0.0005670917453244328, - "learning_rate": 0.00019999997520108698, - "loss": 46.0, - "step": 2942 - }, - { - "epoch": 0.22501290211594702, - "grad_norm": 0.0013487263349816203, - "learning_rate": 0.00019999997518416803, - "loss": 46.0, - "step": 2943 - }, - { - "epoch": 0.22508935909933672, - "grad_norm": 0.0013837008737027645, - "learning_rate": 0.00019999997516724336, - "loss": 46.0, - "step": 2944 - }, - { - "epoch": 0.22516581608272646, - "grad_norm": 0.003335876390337944, - "learning_rate": 0.0001999999751503129, - "loss": 46.0, - "step": 2945 - }, - { - "epoch": 0.2252422730661162, - "grad_norm": 0.0010610654717311263, - "learning_rate": 0.00019999997513337668, - "loss": 46.0, - "step": 2946 - }, - { - "epoch": 0.2253187300495059, - "grad_norm": 0.0017328435787931085, - "learning_rate": 0.00019999997511643467, - "loss": 46.0, - "step": 2947 - }, - { - "epoch": 0.22539518703289563, - "grad_norm": 0.0016389141092076898, - "learning_rate": 0.00019999997509948688, - "loss": 46.0, - "step": 2948 - }, - { - "epoch": 0.22547164401628533, - "grad_norm": 0.001042242394760251, - "learning_rate": 0.00019999997508253335, - "loss": 46.0, - "step": 2949 - }, - { - "epoch": 0.22554810099967507, - "grad_norm": 0.0008065119036473334, - "learning_rate": 0.00019999997506557404, - "loss": 46.0, - "step": 2950 - }, - { - "epoch": 0.22562455798306477, - "grad_norm": 0.004538060165941715, - "learning_rate": 0.00019999997504860896, - "loss": 46.0, - "step": 2951 - }, - { - "epoch": 0.2257010149664545, - "grad_norm": 0.0008074028301052749, - "learning_rate": 0.00019999997503163813, - "loss": 46.0, - "step": 2952 - }, - { - "epoch": 0.2257774719498442, - "grad_norm": 0.0028297309763729572, - "learning_rate": 0.0001999999750146615, - "loss": 46.0, - "step": 2953 - }, - { - "epoch": 0.22585392893323394, - "grad_norm": 0.009999656118452549, - "learning_rate": 0.0001999999749976791, - "loss": 46.0, - "step": 2954 - }, - { - "epoch": 0.22593038591662365, - "grad_norm": 0.0006880185683257878, - "learning_rate": 0.00019999997498069093, - "loss": 46.0, - "step": 2955 - }, - { - "epoch": 0.22600684290001338, - "grad_norm": 0.0012339602690190077, - "learning_rate": 0.00019999997496369704, - "loss": 46.0, - "step": 2956 - }, - { - "epoch": 0.2260832998834031, - "grad_norm": 0.0008526530582457781, - "learning_rate": 0.00019999997494669734, - "loss": 46.0, - "step": 2957 - }, - { - "epoch": 0.22615975686679282, - "grad_norm": 0.0011052998015657067, - "learning_rate": 0.00019999997492969188, - "loss": 46.0, - "step": 2958 - }, - { - "epoch": 0.22623621385018255, - "grad_norm": 0.0033115132246166468, - "learning_rate": 0.00019999997491268063, - "loss": 46.0, - "step": 2959 - }, - { - "epoch": 0.22631267083357226, - "grad_norm": 0.00083912443369627, - "learning_rate": 0.00019999997489566362, - "loss": 46.0, - "step": 2960 - }, - { - "epoch": 0.226389127816962, - "grad_norm": 0.000600250088609755, - "learning_rate": 0.00019999997487864086, - "loss": 46.0, - "step": 2961 - }, - { - "epoch": 0.2264655848003517, - "grad_norm": 0.0012474660761654377, - "learning_rate": 0.00019999997486161233, - "loss": 46.0, - "step": 2962 - }, - { - "epoch": 0.22654204178374143, - "grad_norm": 0.000983542762696743, - "learning_rate": 0.000199999974844578, - "loss": 46.0, - "step": 2963 - }, - { - "epoch": 0.22661849876713114, - "grad_norm": 0.0013953946763649583, - "learning_rate": 0.00019999997482753794, - "loss": 46.0, - "step": 2964 - }, - { - "epoch": 0.22669495575052087, - "grad_norm": 0.0009575795847922564, - "learning_rate": 0.00019999997481049206, - "loss": 46.0, - "step": 2965 - }, - { - "epoch": 0.22677141273391058, - "grad_norm": 0.0009301762329414487, - "learning_rate": 0.00019999997479344046, - "loss": 46.0, - "step": 2966 - }, - { - "epoch": 0.2268478697173003, - "grad_norm": 0.0007534794858656824, - "learning_rate": 0.00019999997477638306, - "loss": 46.0, - "step": 2967 - }, - { - "epoch": 0.22692432670069002, - "grad_norm": 0.0005373649764806032, - "learning_rate": 0.0001999999747593199, - "loss": 46.0, - "step": 2968 - }, - { - "epoch": 0.22700078368407975, - "grad_norm": 0.0007423251518048346, - "learning_rate": 0.00019999997474225097, - "loss": 46.0, - "step": 2969 - }, - { - "epoch": 0.22707724066746945, - "grad_norm": 0.0018095847917720675, - "learning_rate": 0.00019999997472517627, - "loss": 46.0, - "step": 2970 - }, - { - "epoch": 0.2271536976508592, - "grad_norm": 0.0008475442882627249, - "learning_rate": 0.0001999999747080958, - "loss": 46.0, - "step": 2971 - }, - { - "epoch": 0.2272301546342489, - "grad_norm": 0.0014801191864535213, - "learning_rate": 0.00019999997469100956, - "loss": 46.0, - "step": 2972 - }, - { - "epoch": 0.22730661161763863, - "grad_norm": 0.013205279596149921, - "learning_rate": 0.00019999997467391758, - "loss": 46.0, - "step": 2973 - }, - { - "epoch": 0.22738306860102836, - "grad_norm": 0.0015982815530151129, - "learning_rate": 0.0001999999746568198, - "loss": 46.0, - "step": 2974 - }, - { - "epoch": 0.22745952558441807, - "grad_norm": 0.002632777439430356, - "learning_rate": 0.00019999997463971626, - "loss": 46.0, - "step": 2975 - }, - { - "epoch": 0.2275359825678078, - "grad_norm": 0.0010471882997080684, - "learning_rate": 0.00019999997462260692, - "loss": 46.0, - "step": 2976 - }, - { - "epoch": 0.2276124395511975, - "grad_norm": 0.0012914057588204741, - "learning_rate": 0.00019999997460549184, - "loss": 46.0, - "step": 2977 - }, - { - "epoch": 0.22768889653458724, - "grad_norm": 0.0010279935086145997, - "learning_rate": 0.000199999974588371, - "loss": 46.0, - "step": 2978 - }, - { - "epoch": 0.22776535351797694, - "grad_norm": 0.000953810871578753, - "learning_rate": 0.00019999997457124436, - "loss": 46.0, - "step": 2979 - }, - { - "epoch": 0.22784181050136668, - "grad_norm": 0.0016735795652493834, - "learning_rate": 0.000199999974554112, - "loss": 46.0, - "step": 2980 - }, - { - "epoch": 0.22791826748475638, - "grad_norm": 0.0007070382707752287, - "learning_rate": 0.00019999997453697382, - "loss": 46.0, - "step": 2981 - }, - { - "epoch": 0.22799472446814611, - "grad_norm": 0.0009227624395862222, - "learning_rate": 0.00019999997451982987, - "loss": 46.0, - "step": 2982 - }, - { - "epoch": 0.22807118145153582, - "grad_norm": 0.0018291121814399958, - "learning_rate": 0.00019999997450268018, - "loss": 46.0, - "step": 2983 - }, - { - "epoch": 0.22814763843492555, - "grad_norm": 0.0007183416746556759, - "learning_rate": 0.0001999999744855247, - "loss": 46.0, - "step": 2984 - }, - { - "epoch": 0.22822409541831526, - "grad_norm": 0.0008718360331840813, - "learning_rate": 0.00019999997446836347, - "loss": 46.0, - "step": 2985 - }, - { - "epoch": 0.228300552401705, - "grad_norm": 0.0011529215844348073, - "learning_rate": 0.00019999997445119646, - "loss": 46.0, - "step": 2986 - }, - { - "epoch": 0.2283770093850947, - "grad_norm": 0.001746269059367478, - "learning_rate": 0.0001999999744340237, - "loss": 46.0, - "step": 2987 - }, - { - "epoch": 0.22845346636848443, - "grad_norm": 0.002684320555999875, - "learning_rate": 0.00019999997441684514, - "loss": 46.0, - "step": 2988 - }, - { - "epoch": 0.22852992335187416, - "grad_norm": 0.0008634284022264183, - "learning_rate": 0.00019999997439966083, - "loss": 46.0, - "step": 2989 - }, - { - "epoch": 0.22860638033526387, - "grad_norm": 0.0011859749210998416, - "learning_rate": 0.0001999999743824707, - "loss": 46.0, - "step": 2990 - }, - { - "epoch": 0.2286828373186536, - "grad_norm": 0.00141819822601974, - "learning_rate": 0.00019999997436527487, - "loss": 46.0, - "step": 2991 - }, - { - "epoch": 0.2287592943020433, - "grad_norm": 0.000725681777112186, - "learning_rate": 0.00019999997434807325, - "loss": 46.0, - "step": 2992 - }, - { - "epoch": 0.22883575128543304, - "grad_norm": 0.00043696275679394603, - "learning_rate": 0.00019999997433086585, - "loss": 46.0, - "step": 2993 - }, - { - "epoch": 0.22891220826882275, - "grad_norm": 0.002142752753570676, - "learning_rate": 0.0001999999743136527, - "loss": 46.0, - "step": 2994 - }, - { - "epoch": 0.22898866525221248, - "grad_norm": 0.00040243452531285584, - "learning_rate": 0.00019999997429643375, - "loss": 46.0, - "step": 2995 - }, - { - "epoch": 0.2290651222356022, - "grad_norm": 0.0006108229281380773, - "learning_rate": 0.00019999997427920903, - "loss": 46.0, - "step": 2996 - }, - { - "epoch": 0.22914157921899192, - "grad_norm": 0.0014221464516595006, - "learning_rate": 0.00019999997426197857, - "loss": 46.0, - "step": 2997 - }, - { - "epoch": 0.22921803620238163, - "grad_norm": 0.0011005630949512124, - "learning_rate": 0.00019999997424474233, - "loss": 46.0, - "step": 2998 - }, - { - "epoch": 0.22929449318577136, - "grad_norm": 0.0017183477757498622, - "learning_rate": 0.00019999997422750032, - "loss": 46.0, - "step": 2999 - }, - { - "epoch": 0.22937095016916106, - "grad_norm": 0.0008607763447798789, - "learning_rate": 0.00019999997421025253, - "loss": 46.0, - "step": 3000 - }, - { - "epoch": 0.2294474071525508, - "grad_norm": 0.0015489585930481553, - "learning_rate": 0.000199999974192999, - "loss": 46.0, - "step": 3001 - }, - { - "epoch": 0.2295238641359405, - "grad_norm": 0.0014900440583005548, - "learning_rate": 0.0001999999741757397, - "loss": 46.0, - "step": 3002 - }, - { - "epoch": 0.22960032111933024, - "grad_norm": 0.0009447936317883432, - "learning_rate": 0.00019999997415847458, - "loss": 46.0, - "step": 3003 - }, - { - "epoch": 0.22967677810271997, - "grad_norm": 0.0021479162387549877, - "learning_rate": 0.0001999999741412037, - "loss": 46.0, - "step": 3004 - }, - { - "epoch": 0.22975323508610968, - "grad_norm": 0.0021889146883040667, - "learning_rate": 0.00019999997412392708, - "loss": 46.0, - "step": 3005 - }, - { - "epoch": 0.2298296920694994, - "grad_norm": 0.0009473631507717073, - "learning_rate": 0.00019999997410664468, - "loss": 46.0, - "step": 3006 - }, - { - "epoch": 0.2299061490528891, - "grad_norm": 0.002369271358475089, - "learning_rate": 0.0001999999740893565, - "loss": 46.0, - "step": 3007 - }, - { - "epoch": 0.22998260603627885, - "grad_norm": 0.001820671372115612, - "learning_rate": 0.00019999997407206256, - "loss": 46.0, - "step": 3008 - }, - { - "epoch": 0.23005906301966855, - "grad_norm": 0.0010885591618716717, - "learning_rate": 0.00019999997405476287, - "loss": 46.0, - "step": 3009 - }, - { - "epoch": 0.23013552000305829, - "grad_norm": 0.0003443363239057362, - "learning_rate": 0.0001999999740374574, - "loss": 46.0, - "step": 3010 - }, - { - "epoch": 0.230211976986448, - "grad_norm": 0.0011062311241403222, - "learning_rate": 0.00019999997402014613, - "loss": 46.0, - "step": 3011 - }, - { - "epoch": 0.23028843396983772, - "grad_norm": 0.0027726131957024336, - "learning_rate": 0.00019999997400282915, - "loss": 46.0, - "step": 3012 - }, - { - "epoch": 0.23036489095322743, - "grad_norm": 0.001121298409998417, - "learning_rate": 0.00019999997398550634, - "loss": 46.0, - "step": 3013 - }, - { - "epoch": 0.23044134793661716, - "grad_norm": 0.0020850275177508593, - "learning_rate": 0.0001999999739681778, - "loss": 46.0, - "step": 3014 - }, - { - "epoch": 0.23051780492000687, - "grad_norm": 0.00258211069740355, - "learning_rate": 0.0001999999739508435, - "loss": 46.0, - "step": 3015 - }, - { - "epoch": 0.2305942619033966, - "grad_norm": 0.0010522013762965798, - "learning_rate": 0.0001999999739335034, - "loss": 46.0, - "step": 3016 - }, - { - "epoch": 0.23067071888678634, - "grad_norm": 0.0008258194429799914, - "learning_rate": 0.0001999999739161575, - "loss": 46.0, - "step": 3017 - }, - { - "epoch": 0.23074717587017604, - "grad_norm": 0.0010189589811488986, - "learning_rate": 0.0001999999738988059, - "loss": 46.0, - "step": 3018 - }, - { - "epoch": 0.23082363285356577, - "grad_norm": 0.0010131691815331578, - "learning_rate": 0.0001999999738814485, - "loss": 46.0, - "step": 3019 - }, - { - "epoch": 0.23090008983695548, - "grad_norm": 0.0065146577544510365, - "learning_rate": 0.00019999997386408532, - "loss": 46.0, - "step": 3020 - }, - { - "epoch": 0.2309765468203452, - "grad_norm": 0.0011046944418922067, - "learning_rate": 0.00019999997384671637, - "loss": 46.0, - "step": 3021 - }, - { - "epoch": 0.23105300380373492, - "grad_norm": 0.000710965774487704, - "learning_rate": 0.00019999997382934165, - "loss": 46.0, - "step": 3022 - }, - { - "epoch": 0.23112946078712465, - "grad_norm": 0.0033271685242652893, - "learning_rate": 0.00019999997381196118, - "loss": 46.0, - "step": 3023 - }, - { - "epoch": 0.23120591777051436, - "grad_norm": 0.000987824983894825, - "learning_rate": 0.00019999997379457495, - "loss": 46.0, - "step": 3024 - }, - { - "epoch": 0.2312823747539041, - "grad_norm": 0.0006469162763096392, - "learning_rate": 0.00019999997377718293, - "loss": 46.0, - "step": 3025 - }, - { - "epoch": 0.2313588317372938, - "grad_norm": 0.0033174417912960052, - "learning_rate": 0.00019999997375978512, - "loss": 46.0, - "step": 3026 - }, - { - "epoch": 0.23143528872068353, - "grad_norm": 0.0010534895118325949, - "learning_rate": 0.00019999997374238156, - "loss": 46.0, - "step": 3027 - }, - { - "epoch": 0.23151174570407324, - "grad_norm": 0.0005828223074786365, - "learning_rate": 0.00019999997372497226, - "loss": 46.0, - "step": 3028 - }, - { - "epoch": 0.23158820268746297, - "grad_norm": 0.0008897861116565764, - "learning_rate": 0.00019999997370755718, - "loss": 46.0, - "step": 3029 - }, - { - "epoch": 0.23166465967085267, - "grad_norm": 0.0008952495409175754, - "learning_rate": 0.0001999999736901363, - "loss": 46.0, - "step": 3030 - }, - { - "epoch": 0.2317411166542424, - "grad_norm": 0.0005366086261346936, - "learning_rate": 0.00019999997367270967, - "loss": 46.0, - "step": 3031 - }, - { - "epoch": 0.23181757363763214, - "grad_norm": 0.0013184205163270235, - "learning_rate": 0.00019999997365527725, - "loss": 46.0, - "step": 3032 - }, - { - "epoch": 0.23189403062102185, - "grad_norm": 0.0017260562162846327, - "learning_rate": 0.00019999997363783907, - "loss": 46.0, - "step": 3033 - }, - { - "epoch": 0.23197048760441158, - "grad_norm": 0.002803966635838151, - "learning_rate": 0.00019999997362039513, - "loss": 46.0, - "step": 3034 - }, - { - "epoch": 0.23204694458780128, - "grad_norm": 0.001317357993684709, - "learning_rate": 0.0001999999736029454, - "loss": 46.0, - "step": 3035 - }, - { - "epoch": 0.23212340157119102, - "grad_norm": 0.00045207326184026897, - "learning_rate": 0.00019999997358548994, - "loss": 46.0, - "step": 3036 - }, - { - "epoch": 0.23219985855458072, - "grad_norm": 0.0015102846082299948, - "learning_rate": 0.00019999997356802868, - "loss": 46.0, - "step": 3037 - }, - { - "epoch": 0.23227631553797046, - "grad_norm": 0.0023084136191755533, - "learning_rate": 0.0001999999735505617, - "loss": 46.0, - "step": 3038 - }, - { - "epoch": 0.23235277252136016, - "grad_norm": 0.0022218015510588884, - "learning_rate": 0.00019999997353308888, - "loss": 46.0, - "step": 3039 - }, - { - "epoch": 0.2324292295047499, - "grad_norm": 0.0018917821580544114, - "learning_rate": 0.0001999999735156103, - "loss": 46.0, - "step": 3040 - }, - { - "epoch": 0.2325056864881396, - "grad_norm": 0.0011300478363409638, - "learning_rate": 0.000199999973498126, - "loss": 46.0, - "step": 3041 - }, - { - "epoch": 0.23258214347152933, - "grad_norm": 0.0010690023191273212, - "learning_rate": 0.00019999997348063589, - "loss": 46.0, - "step": 3042 - }, - { - "epoch": 0.23265860045491904, - "grad_norm": 0.0008099564583972096, - "learning_rate": 0.00019999997346314, - "loss": 46.0, - "step": 3043 - }, - { - "epoch": 0.23273505743830877, - "grad_norm": 0.0010447854874655604, - "learning_rate": 0.00019999997344563835, - "loss": 46.0, - "step": 3044 - }, - { - "epoch": 0.23281151442169848, - "grad_norm": 0.002069528680294752, - "learning_rate": 0.00019999997342813098, - "loss": 46.0, - "step": 3045 - }, - { - "epoch": 0.2328879714050882, - "grad_norm": 0.0036557926796376705, - "learning_rate": 0.0001999999734106178, - "loss": 46.0, - "step": 3046 - }, - { - "epoch": 0.23296442838847795, - "grad_norm": 0.0016329380450770259, - "learning_rate": 0.00019999997339309884, - "loss": 46.0, - "step": 3047 - }, - { - "epoch": 0.23304088537186765, - "grad_norm": 0.0015899931313470006, - "learning_rate": 0.00019999997337557415, - "loss": 46.0, - "step": 3048 - }, - { - "epoch": 0.23311734235525738, - "grad_norm": 0.0011713940184563398, - "learning_rate": 0.00019999997335804363, - "loss": 46.0, - "step": 3049 - }, - { - "epoch": 0.2331937993386471, - "grad_norm": 0.0031643749680370092, - "learning_rate": 0.0001999999733405074, - "loss": 46.0, - "step": 3050 - }, - { - "epoch": 0.23327025632203682, - "grad_norm": 0.0034488870296627283, - "learning_rate": 0.00019999997332296538, - "loss": 46.0, - "step": 3051 - }, - { - "epoch": 0.23334671330542653, - "grad_norm": 0.0013652169145643711, - "learning_rate": 0.00019999997330541757, - "loss": 46.0, - "step": 3052 - }, - { - "epoch": 0.23342317028881626, - "grad_norm": 0.0006173512665554881, - "learning_rate": 0.000199999973287864, - "loss": 46.0, - "step": 3053 - }, - { - "epoch": 0.23349962727220597, - "grad_norm": 0.0008447043946944177, - "learning_rate": 0.00019999997327030468, - "loss": 46.0, - "step": 3054 - }, - { - "epoch": 0.2335760842555957, - "grad_norm": 0.0014501862460747361, - "learning_rate": 0.00019999997325273957, - "loss": 46.0, - "step": 3055 - }, - { - "epoch": 0.2336525412389854, - "grad_norm": 0.0009118857560679317, - "learning_rate": 0.00019999997323516872, - "loss": 46.0, - "step": 3056 - }, - { - "epoch": 0.23372899822237514, - "grad_norm": 0.001235531410202384, - "learning_rate": 0.00019999997321759207, - "loss": 46.0, - "step": 3057 - }, - { - "epoch": 0.23380545520576484, - "grad_norm": 0.0018249572021886706, - "learning_rate": 0.00019999997320000964, - "loss": 46.0, - "step": 3058 - }, - { - "epoch": 0.23388191218915458, - "grad_norm": 0.0022003059275448322, - "learning_rate": 0.0001999999731824215, - "loss": 46.0, - "step": 3059 - }, - { - "epoch": 0.23395836917254428, - "grad_norm": 0.005554493051022291, - "learning_rate": 0.00019999997316482752, - "loss": 46.0, - "step": 3060 - }, - { - "epoch": 0.23403482615593402, - "grad_norm": 0.003695337101817131, - "learning_rate": 0.00019999997314722783, - "loss": 46.0, - "step": 3061 - }, - { - "epoch": 0.23411128313932375, - "grad_norm": 0.0036623438354581594, - "learning_rate": 0.00019999997312962234, - "loss": 46.0, - "step": 3062 - }, - { - "epoch": 0.23418774012271346, - "grad_norm": 0.0017200737493112683, - "learning_rate": 0.00019999997311201105, - "loss": 46.0, - "step": 3063 - }, - { - "epoch": 0.2342641971061032, - "grad_norm": 0.0003641015209723264, - "learning_rate": 0.00019999997309439404, - "loss": 46.0, - "step": 3064 - }, - { - "epoch": 0.2343406540894929, - "grad_norm": 0.0006425449973903596, - "learning_rate": 0.00019999997307677125, - "loss": 46.0, - "step": 3065 - }, - { - "epoch": 0.23441711107288263, - "grad_norm": 0.001516220043413341, - "learning_rate": 0.0001999999730591427, - "loss": 46.0, - "step": 3066 - }, - { - "epoch": 0.23449356805627233, - "grad_norm": 0.0014770985580980778, - "learning_rate": 0.00019999997304150834, - "loss": 46.0, - "step": 3067 - }, - { - "epoch": 0.23457002503966207, - "grad_norm": 0.0016748914495110512, - "learning_rate": 0.00019999997302386823, - "loss": 46.0, - "step": 3068 - }, - { - "epoch": 0.23464648202305177, - "grad_norm": 0.0016839439049363136, - "learning_rate": 0.00019999997300622235, - "loss": 46.0, - "step": 3069 - }, - { - "epoch": 0.2347229390064415, - "grad_norm": 0.00153891381341964, - "learning_rate": 0.00019999997298857073, - "loss": 46.0, - "step": 3070 - }, - { - "epoch": 0.2347993959898312, - "grad_norm": 0.0013383397599682212, - "learning_rate": 0.0001999999729709133, - "loss": 46.0, - "step": 3071 - }, - { - "epoch": 0.23487585297322094, - "grad_norm": 0.0011115333763882518, - "learning_rate": 0.00019999997295325013, - "loss": 46.0, - "step": 3072 - }, - { - "epoch": 0.23495230995661065, - "grad_norm": 0.0012283881660550833, - "learning_rate": 0.00019999997293558116, - "loss": 46.0, - "step": 3073 - }, - { - "epoch": 0.23502876694000038, - "grad_norm": 0.002778014400973916, - "learning_rate": 0.00019999997291790647, - "loss": 46.0, - "step": 3074 - }, - { - "epoch": 0.23510522392339012, - "grad_norm": 0.0013519068015739322, - "learning_rate": 0.00019999997290022595, - "loss": 46.0, - "step": 3075 - }, - { - "epoch": 0.23518168090677982, - "grad_norm": 0.0013388131046667695, - "learning_rate": 0.00019999997288253971, - "loss": 46.0, - "step": 3076 - }, - { - "epoch": 0.23525813789016956, - "grad_norm": 0.0007907307590357959, - "learning_rate": 0.00019999997286484765, - "loss": 46.0, - "step": 3077 - }, - { - "epoch": 0.23533459487355926, - "grad_norm": 0.0009382362477481365, - "learning_rate": 0.00019999997284714986, - "loss": 46.0, - "step": 3078 - }, - { - "epoch": 0.235411051856949, - "grad_norm": 0.014919286593794823, - "learning_rate": 0.00019999997282944628, - "loss": 46.0, - "step": 3079 - }, - { - "epoch": 0.2354875088403387, - "grad_norm": 0.0009769061580300331, - "learning_rate": 0.00019999997281173698, - "loss": 46.0, - "step": 3080 - }, - { - "epoch": 0.23556396582372843, - "grad_norm": 0.008831021375954151, - "learning_rate": 0.00019999997279402187, - "loss": 46.0, - "step": 3081 - }, - { - "epoch": 0.23564042280711814, - "grad_norm": 0.0016875851433724165, - "learning_rate": 0.000199999972776301, - "loss": 46.0, - "step": 3082 - }, - { - "epoch": 0.23571687979050787, - "grad_norm": 0.0007098687347024679, - "learning_rate": 0.00019999997275857434, - "loss": 46.0, - "step": 3083 - }, - { - "epoch": 0.23579333677389758, - "grad_norm": 0.0028730009216815233, - "learning_rate": 0.00019999997274084192, - "loss": 46.0, - "step": 3084 - }, - { - "epoch": 0.2358697937572873, - "grad_norm": 0.00518947746604681, - "learning_rate": 0.00019999997272310375, - "loss": 46.0, - "step": 3085 - }, - { - "epoch": 0.23594625074067702, - "grad_norm": 0.00037465529749169946, - "learning_rate": 0.00019999997270535977, - "loss": 46.0, - "step": 3086 - }, - { - "epoch": 0.23602270772406675, - "grad_norm": 0.002074565039947629, - "learning_rate": 0.00019999997268761006, - "loss": 46.0, - "step": 3087 - }, - { - "epoch": 0.23609916470745645, - "grad_norm": 0.00031881104223430157, - "learning_rate": 0.00019999997266985457, - "loss": 46.0, - "step": 3088 - }, - { - "epoch": 0.2361756216908462, - "grad_norm": 0.0008691286784596741, - "learning_rate": 0.0001999999726520933, - "loss": 46.0, - "step": 3089 - }, - { - "epoch": 0.23625207867423592, - "grad_norm": 0.0007430244004353881, - "learning_rate": 0.0001999999726343263, - "loss": 46.0, - "step": 3090 - }, - { - "epoch": 0.23632853565762563, - "grad_norm": 0.004080537706613541, - "learning_rate": 0.00019999997261655348, - "loss": 46.0, - "step": 3091 - }, - { - "epoch": 0.23640499264101536, - "grad_norm": 0.0008010346791706979, - "learning_rate": 0.0001999999725987749, - "loss": 46.0, - "step": 3092 - }, - { - "epoch": 0.23648144962440507, - "grad_norm": 0.0005251724505797029, - "learning_rate": 0.00019999997258099054, - "loss": 46.0, - "step": 3093 - }, - { - "epoch": 0.2365579066077948, - "grad_norm": 0.0018075796542689204, - "learning_rate": 0.00019999997256320043, - "loss": 46.0, - "step": 3094 - }, - { - "epoch": 0.2366343635911845, - "grad_norm": 0.0024680241476744413, - "learning_rate": 0.00019999997254540455, - "loss": 46.0, - "step": 3095 - }, - { - "epoch": 0.23671082057457424, - "grad_norm": 0.0018383264541625977, - "learning_rate": 0.00019999997252760293, - "loss": 46.0, - "step": 3096 - }, - { - "epoch": 0.23678727755796394, - "grad_norm": 0.001028991537168622, - "learning_rate": 0.0001999999725097955, - "loss": 46.0, - "step": 3097 - }, - { - "epoch": 0.23686373454135368, - "grad_norm": 0.0008818877977319062, - "learning_rate": 0.00019999997249198234, - "loss": 46.0, - "step": 3098 - }, - { - "epoch": 0.23694019152474338, - "grad_norm": 0.0019731391221284866, - "learning_rate": 0.00019999997247416336, - "loss": 46.0, - "step": 3099 - }, - { - "epoch": 0.23701664850813312, - "grad_norm": 0.0007807589136064053, - "learning_rate": 0.00019999997245633862, - "loss": 46.0, - "step": 3100 - }, - { - "epoch": 0.23709310549152282, - "grad_norm": 0.0025067089591175318, - "learning_rate": 0.00019999997243850813, - "loss": 46.0, - "step": 3101 - }, - { - "epoch": 0.23716956247491255, - "grad_norm": 0.003142465138807893, - "learning_rate": 0.00019999997242067187, - "loss": 46.0, - "step": 3102 - }, - { - "epoch": 0.23724601945830226, - "grad_norm": 0.0017502025002613664, - "learning_rate": 0.00019999997240282983, - "loss": 46.0, - "step": 3103 - }, - { - "epoch": 0.237322476441692, - "grad_norm": 0.0007660277769900858, - "learning_rate": 0.00019999997238498202, - "loss": 46.0, - "step": 3104 - }, - { - "epoch": 0.23739893342508173, - "grad_norm": 0.0021017694380134344, - "learning_rate": 0.00019999997236712846, - "loss": 46.0, - "step": 3105 - }, - { - "epoch": 0.23747539040847143, - "grad_norm": 0.0015855930978432298, - "learning_rate": 0.0001999999723492691, - "loss": 46.0, - "step": 3106 - }, - { - "epoch": 0.23755184739186117, - "grad_norm": 0.0011727524688467383, - "learning_rate": 0.000199999972331404, - "loss": 46.0, - "step": 3107 - }, - { - "epoch": 0.23762830437525087, - "grad_norm": 0.0010773026151582599, - "learning_rate": 0.00019999997231353312, - "loss": 46.0, - "step": 3108 - }, - { - "epoch": 0.2377047613586406, - "grad_norm": 0.0005457886145450175, - "learning_rate": 0.00019999997229565647, - "loss": 46.0, - "step": 3109 - }, - { - "epoch": 0.2377812183420303, - "grad_norm": 0.001725172158330679, - "learning_rate": 0.00019999997227777402, - "loss": 46.0, - "step": 3110 - }, - { - "epoch": 0.23785767532542004, - "grad_norm": 0.0009092254331335425, - "learning_rate": 0.00019999997225988585, - "loss": 46.0, - "step": 3111 - }, - { - "epoch": 0.23793413230880975, - "grad_norm": 0.00058984593488276, - "learning_rate": 0.0001999999722419919, - "loss": 46.0, - "step": 3112 - }, - { - "epoch": 0.23801058929219948, - "grad_norm": 0.0009242399828508496, - "learning_rate": 0.00019999997222409216, - "loss": 46.0, - "step": 3113 - }, - { - "epoch": 0.2380870462755892, - "grad_norm": 0.0006454035174101591, - "learning_rate": 0.00019999997220618667, - "loss": 46.0, - "step": 3114 - }, - { - "epoch": 0.23816350325897892, - "grad_norm": 0.002925480017438531, - "learning_rate": 0.0001999999721882754, - "loss": 46.0, - "step": 3115 - }, - { - "epoch": 0.23823996024236863, - "grad_norm": 0.0006803838186897337, - "learning_rate": 0.00019999997217035834, - "loss": 46.0, - "step": 3116 - }, - { - "epoch": 0.23831641722575836, - "grad_norm": 0.0006977599114179611, - "learning_rate": 0.00019999997215243553, - "loss": 46.0, - "step": 3117 - }, - { - "epoch": 0.23839287420914806, - "grad_norm": 0.00117528869304806, - "learning_rate": 0.00019999997213450697, - "loss": 46.0, - "step": 3118 - }, - { - "epoch": 0.2384693311925378, - "grad_norm": 0.0012723889667540789, - "learning_rate": 0.00019999997211657262, - "loss": 46.0, - "step": 3119 - }, - { - "epoch": 0.23854578817592753, - "grad_norm": 0.0007753327372483909, - "learning_rate": 0.00019999997209863251, - "loss": 46.0, - "step": 3120 - }, - { - "epoch": 0.23862224515931724, - "grad_norm": 0.003216144861653447, - "learning_rate": 0.0001999999720806866, - "loss": 46.0, - "step": 3121 - }, - { - "epoch": 0.23869870214270697, - "grad_norm": 0.0016340861329808831, - "learning_rate": 0.000199999972062735, - "loss": 46.0, - "step": 3122 - }, - { - "epoch": 0.23877515912609668, - "grad_norm": 0.001154883299022913, - "learning_rate": 0.00019999997204477754, - "loss": 46.0, - "step": 3123 - }, - { - "epoch": 0.2388516161094864, - "grad_norm": 0.0010707852197811007, - "learning_rate": 0.00019999997202681434, - "loss": 46.0, - "step": 3124 - }, - { - "epoch": 0.23892807309287611, - "grad_norm": 0.0013649303000420332, - "learning_rate": 0.00019999997200884537, - "loss": 46.0, - "step": 3125 - }, - { - "epoch": 0.23900453007626585, - "grad_norm": 0.0008350344724021852, - "learning_rate": 0.00019999997199087065, - "loss": 46.0, - "step": 3126 - }, - { - "epoch": 0.23908098705965555, - "grad_norm": 0.003300977637991309, - "learning_rate": 0.00019999997197289013, - "loss": 46.0, - "step": 3127 - }, - { - "epoch": 0.2391574440430453, - "grad_norm": 0.0006466025370173156, - "learning_rate": 0.00019999997195490387, - "loss": 46.0, - "step": 3128 - }, - { - "epoch": 0.239233901026435, - "grad_norm": 0.0007208252209238708, - "learning_rate": 0.00019999997193691183, - "loss": 46.0, - "step": 3129 - }, - { - "epoch": 0.23931035800982473, - "grad_norm": 0.000784624891821295, - "learning_rate": 0.00019999997191891402, - "loss": 46.0, - "step": 3130 - }, - { - "epoch": 0.23938681499321443, - "grad_norm": 0.001098365057259798, - "learning_rate": 0.00019999997190091044, - "loss": 46.0, - "step": 3131 - }, - { - "epoch": 0.23946327197660416, - "grad_norm": 0.0013156288769096136, - "learning_rate": 0.00019999997188290108, - "loss": 46.0, - "step": 3132 - }, - { - "epoch": 0.2395397289599939, - "grad_norm": 0.005401108413934708, - "learning_rate": 0.00019999997186488598, - "loss": 46.0, - "step": 3133 - }, - { - "epoch": 0.2396161859433836, - "grad_norm": 0.0022071991115808487, - "learning_rate": 0.00019999997184686508, - "loss": 46.0, - "step": 3134 - }, - { - "epoch": 0.23969264292677334, - "grad_norm": 0.0017125934828072786, - "learning_rate": 0.00019999997182883843, - "loss": 46.0, - "step": 3135 - }, - { - "epoch": 0.23976909991016304, - "grad_norm": 0.0014313082210719585, - "learning_rate": 0.000199999971810806, - "loss": 46.0, - "step": 3136 - }, - { - "epoch": 0.23984555689355277, - "grad_norm": 0.001316345063969493, - "learning_rate": 0.0001999999717927678, - "loss": 46.0, - "step": 3137 - }, - { - "epoch": 0.23992201387694248, - "grad_norm": 0.0031909095123410225, - "learning_rate": 0.00019999997177472386, - "loss": 46.0, - "step": 3138 - }, - { - "epoch": 0.2399984708603322, - "grad_norm": 0.0016524961683899164, - "learning_rate": 0.00019999997175667412, - "loss": 46.0, - "step": 3139 - }, - { - "epoch": 0.24007492784372192, - "grad_norm": 0.0006484754267148674, - "learning_rate": 0.0001999999717386186, - "loss": 46.0, - "step": 3140 - }, - { - "epoch": 0.24015138482711165, - "grad_norm": 0.0012863075826317072, - "learning_rate": 0.0001999999717205573, - "loss": 46.0, - "step": 3141 - }, - { - "epoch": 0.24022784181050136, - "grad_norm": 0.0014462950639426708, - "learning_rate": 0.00019999997170249027, - "loss": 46.0, - "step": 3142 - }, - { - "epoch": 0.2403042987938911, - "grad_norm": 0.00083252222975716, - "learning_rate": 0.00019999997168441746, - "loss": 46.0, - "step": 3143 - }, - { - "epoch": 0.2403807557772808, - "grad_norm": 0.0022779309656471014, - "learning_rate": 0.00019999997166633888, - "loss": 46.0, - "step": 3144 - }, - { - "epoch": 0.24045721276067053, - "grad_norm": 0.0013867767993360758, - "learning_rate": 0.00019999997164825452, - "loss": 46.0, - "step": 3145 - }, - { - "epoch": 0.24053366974406024, - "grad_norm": 0.002565558534115553, - "learning_rate": 0.00019999997163016442, - "loss": 46.0, - "step": 3146 - }, - { - "epoch": 0.24061012672744997, - "grad_norm": 0.0019565375987440348, - "learning_rate": 0.00019999997161206852, - "loss": 46.0, - "step": 3147 - }, - { - "epoch": 0.2406865837108397, - "grad_norm": 0.0009911081288009882, - "learning_rate": 0.00019999997159396684, - "loss": 46.0, - "step": 3148 - }, - { - "epoch": 0.2407630406942294, - "grad_norm": 0.001760189770720899, - "learning_rate": 0.00019999997157585942, - "loss": 46.0, - "step": 3149 - }, - { - "epoch": 0.24083949767761914, - "grad_norm": 0.0032243893947452307, - "learning_rate": 0.00019999997155774622, - "loss": 46.0, - "step": 3150 - }, - { - "epoch": 0.24091595466100885, - "grad_norm": 0.004657264798879623, - "learning_rate": 0.00019999997153962725, - "loss": 46.0, - "step": 3151 - }, - { - "epoch": 0.24099241164439858, - "grad_norm": 0.0016785510815680027, - "learning_rate": 0.0001999999715215025, - "loss": 46.0, - "step": 3152 - }, - { - "epoch": 0.24106886862778829, - "grad_norm": 0.0014704017667099833, - "learning_rate": 0.00019999997150337202, - "loss": 46.0, - "step": 3153 - }, - { - "epoch": 0.24114532561117802, - "grad_norm": 0.0009119369206018746, - "learning_rate": 0.00019999997148523573, - "loss": 46.0, - "step": 3154 - }, - { - "epoch": 0.24122178259456772, - "grad_norm": 0.006077085621654987, - "learning_rate": 0.0001999999714670937, - "loss": 46.0, - "step": 3155 - }, - { - "epoch": 0.24129823957795746, - "grad_norm": 0.010737337172031403, - "learning_rate": 0.00019999997144894586, - "loss": 46.0, - "step": 3156 - }, - { - "epoch": 0.24137469656134716, - "grad_norm": 0.0013041161000728607, - "learning_rate": 0.00019999997143079227, - "loss": 46.0, - "step": 3157 - }, - { - "epoch": 0.2414511535447369, - "grad_norm": 0.0010203189449384809, - "learning_rate": 0.00019999997141263292, - "loss": 46.0, - "step": 3158 - }, - { - "epoch": 0.2415276105281266, - "grad_norm": 0.007857026532292366, - "learning_rate": 0.0001999999713944678, - "loss": 46.0, - "step": 3159 - }, - { - "epoch": 0.24160406751151633, - "grad_norm": 0.0016786643536761403, - "learning_rate": 0.0001999999713762969, - "loss": 46.0, - "step": 3160 - }, - { - "epoch": 0.24168052449490604, - "grad_norm": 0.0016964125679805875, - "learning_rate": 0.00019999997135812026, - "loss": 46.0, - "step": 3161 - }, - { - "epoch": 0.24175698147829577, - "grad_norm": 0.0016391290118917823, - "learning_rate": 0.0001999999713399378, - "loss": 46.0, - "step": 3162 - }, - { - "epoch": 0.2418334384616855, - "grad_norm": 0.0015250424621626735, - "learning_rate": 0.00019999997132174962, - "loss": 46.0, - "step": 3163 - }, - { - "epoch": 0.2419098954450752, - "grad_norm": 0.0009343001875095069, - "learning_rate": 0.00019999997130355565, - "loss": 46.0, - "step": 3164 - }, - { - "epoch": 0.24198635242846495, - "grad_norm": 0.0010346361668780446, - "learning_rate": 0.00019999997128535588, - "loss": 46.0, - "step": 3165 - }, - { - "epoch": 0.24206280941185465, - "grad_norm": 0.0010919218184426427, - "learning_rate": 0.0001999999712671504, - "loss": 46.0, - "step": 3166 - }, - { - "epoch": 0.24213926639524438, - "grad_norm": 0.0014733336865901947, - "learning_rate": 0.0001999999712489391, - "loss": 46.0, - "step": 3167 - }, - { - "epoch": 0.2422157233786341, - "grad_norm": 0.004036740865558386, - "learning_rate": 0.00019999997123072206, - "loss": 46.0, - "step": 3168 - }, - { - "epoch": 0.24229218036202382, - "grad_norm": 0.0006784771103411913, - "learning_rate": 0.00019999997121249922, - "loss": 46.0, - "step": 3169 - }, - { - "epoch": 0.24236863734541353, - "grad_norm": 0.0014110878109931946, - "learning_rate": 0.00019999997119427064, - "loss": 46.0, - "step": 3170 - }, - { - "epoch": 0.24244509432880326, - "grad_norm": 0.001428284216672182, - "learning_rate": 0.00019999997117603629, - "loss": 46.0, - "step": 3171 - }, - { - "epoch": 0.24252155131219297, - "grad_norm": 0.0006259740912355483, - "learning_rate": 0.00019999997115779616, - "loss": 46.0, - "step": 3172 - }, - { - "epoch": 0.2425980082955827, - "grad_norm": 0.0008021716494113207, - "learning_rate": 0.00019999997113955028, - "loss": 46.0, - "step": 3173 - }, - { - "epoch": 0.2426744652789724, - "grad_norm": 0.0010673960205167532, - "learning_rate": 0.0001999999711212986, - "loss": 46.0, - "step": 3174 - }, - { - "epoch": 0.24275092226236214, - "grad_norm": 0.0027660317718982697, - "learning_rate": 0.00019999997110304115, - "loss": 46.0, - "step": 3175 - }, - { - "epoch": 0.24282737924575185, - "grad_norm": 0.0016346651827916503, - "learning_rate": 0.00019999997108477796, - "loss": 46.0, - "step": 3176 - }, - { - "epoch": 0.24290383622914158, - "grad_norm": 0.001536539988592267, - "learning_rate": 0.000199999971066509, - "loss": 46.0, - "step": 3177 - }, - { - "epoch": 0.2429802932125313, - "grad_norm": 0.0007707267068326473, - "learning_rate": 0.00019999997104823422, - "loss": 46.0, - "step": 3178 - }, - { - "epoch": 0.24305675019592102, - "grad_norm": 0.001259678159840405, - "learning_rate": 0.00019999997102995373, - "loss": 46.0, - "step": 3179 - }, - { - "epoch": 0.24313320717931075, - "grad_norm": 0.0007284677121788263, - "learning_rate": 0.00019999997101166744, - "loss": 46.0, - "step": 3180 - }, - { - "epoch": 0.24320966416270046, - "grad_norm": 0.0007023262442089617, - "learning_rate": 0.0001999999709933754, - "loss": 46.0, - "step": 3181 - }, - { - "epoch": 0.2432861211460902, - "grad_norm": 0.0027272161096334457, - "learning_rate": 0.00019999997097507757, - "loss": 46.0, - "step": 3182 - }, - { - "epoch": 0.2433625781294799, - "grad_norm": 0.0008949391194619238, - "learning_rate": 0.00019999997095677396, - "loss": 46.0, - "step": 3183 - }, - { - "epoch": 0.24343903511286963, - "grad_norm": 0.001690986449830234, - "learning_rate": 0.0001999999709384646, - "loss": 46.0, - "step": 3184 - }, - { - "epoch": 0.24351549209625933, - "grad_norm": 0.0010557736968621612, - "learning_rate": 0.00019999997092014948, - "loss": 46.0, - "step": 3185 - }, - { - "epoch": 0.24359194907964907, - "grad_norm": 0.0006213740562088788, - "learning_rate": 0.00019999997090182857, - "loss": 46.0, - "step": 3186 - }, - { - "epoch": 0.24366840606303877, - "grad_norm": 0.001995612168684602, - "learning_rate": 0.0001999999708835019, - "loss": 46.0, - "step": 3187 - }, - { - "epoch": 0.2437448630464285, - "grad_norm": 0.0026208574417978525, - "learning_rate": 0.00019999997086516945, - "loss": 46.0, - "step": 3188 - }, - { - "epoch": 0.2438213200298182, - "grad_norm": 0.0008315776358358562, - "learning_rate": 0.00019999997084683125, - "loss": 46.0, - "step": 3189 - }, - { - "epoch": 0.24389777701320794, - "grad_norm": 0.0011688253143802285, - "learning_rate": 0.00019999997082848728, - "loss": 46.0, - "step": 3190 - }, - { - "epoch": 0.24397423399659768, - "grad_norm": 0.0007019365439191461, - "learning_rate": 0.00019999997081013751, - "loss": 46.0, - "step": 3191 - }, - { - "epoch": 0.24405069097998738, - "grad_norm": 0.0010562252718955278, - "learning_rate": 0.00019999997079178203, - "loss": 46.0, - "step": 3192 - }, - { - "epoch": 0.24412714796337712, - "grad_norm": 0.0009864402236416936, - "learning_rate": 0.00019999997077342074, - "loss": 46.0, - "step": 3193 - }, - { - "epoch": 0.24420360494676682, - "grad_norm": 0.0009028976201079786, - "learning_rate": 0.00019999997075505367, - "loss": 46.0, - "step": 3194 - }, - { - "epoch": 0.24428006193015656, - "grad_norm": 0.0013834924902766943, - "learning_rate": 0.00019999997073668087, - "loss": 46.0, - "step": 3195 - }, - { - "epoch": 0.24435651891354626, - "grad_norm": 0.002960958518087864, - "learning_rate": 0.00019999997071830228, - "loss": 46.0, - "step": 3196 - }, - { - "epoch": 0.244432975896936, - "grad_norm": 0.0032917845528572798, - "learning_rate": 0.0001999999706999179, - "loss": 46.0, - "step": 3197 - }, - { - "epoch": 0.2445094328803257, - "grad_norm": 0.002530928235501051, - "learning_rate": 0.00019999997068152777, - "loss": 46.0, - "step": 3198 - }, - { - "epoch": 0.24458588986371543, - "grad_norm": 0.000605881039518863, - "learning_rate": 0.00019999997066313187, - "loss": 46.0, - "step": 3199 - }, - { - "epoch": 0.24466234684710514, - "grad_norm": 0.0012429484631866217, - "learning_rate": 0.0001999999706447302, - "loss": 46.0, - "step": 3200 - }, - { - "epoch": 0.24473880383049487, - "grad_norm": 0.005064371973276138, - "learning_rate": 0.00019999997062632275, - "loss": 46.0, - "step": 3201 - }, - { - "epoch": 0.24481526081388458, - "grad_norm": 0.0011240814346820116, - "learning_rate": 0.00019999997060790955, - "loss": 46.0, - "step": 3202 - }, - { - "epoch": 0.2448917177972743, - "grad_norm": 0.00804038904607296, - "learning_rate": 0.00019999997058949055, - "loss": 46.0, - "step": 3203 - }, - { - "epoch": 0.24496817478066402, - "grad_norm": 0.0004708790220320225, - "learning_rate": 0.0001999999705710658, - "loss": 46.0, - "step": 3204 - }, - { - "epoch": 0.24504463176405375, - "grad_norm": 0.0010552947642281651, - "learning_rate": 0.0001999999705526353, - "loss": 46.0, - "step": 3205 - }, - { - "epoch": 0.24512108874744348, - "grad_norm": 0.000468751008156687, - "learning_rate": 0.000199999970534199, - "loss": 46.0, - "step": 3206 - }, - { - "epoch": 0.2451975457308332, - "grad_norm": 0.0004748433129861951, - "learning_rate": 0.00019999997051575695, - "loss": 46.0, - "step": 3207 - }, - { - "epoch": 0.24527400271422292, - "grad_norm": 0.0018256522016599774, - "learning_rate": 0.0001999999704973091, - "loss": 46.0, - "step": 3208 - }, - { - "epoch": 0.24535045969761263, - "grad_norm": 0.0006603817455470562, - "learning_rate": 0.0001999999704788555, - "loss": 46.0, - "step": 3209 - }, - { - "epoch": 0.24542691668100236, - "grad_norm": 0.0004485788522288203, - "learning_rate": 0.00019999997046039615, - "loss": 46.0, - "step": 3210 - }, - { - "epoch": 0.24550337366439207, - "grad_norm": 0.001897329930216074, - "learning_rate": 0.00019999997044193102, - "loss": 46.0, - "step": 3211 - }, - { - "epoch": 0.2455798306477818, - "grad_norm": 0.0006241471855901182, - "learning_rate": 0.00019999997042346012, - "loss": 46.0, - "step": 3212 - }, - { - "epoch": 0.2456562876311715, - "grad_norm": 0.0008216181886382401, - "learning_rate": 0.00019999997040498344, - "loss": 46.0, - "step": 3213 - }, - { - "epoch": 0.24573274461456124, - "grad_norm": 0.0010022129863500595, - "learning_rate": 0.000199999970386501, - "loss": 46.0, - "step": 3214 - }, - { - "epoch": 0.24580920159795094, - "grad_norm": 0.0019924382213503122, - "learning_rate": 0.00019999997036801277, - "loss": 46.0, - "step": 3215 - }, - { - "epoch": 0.24588565858134068, - "grad_norm": 0.0009271148010157049, - "learning_rate": 0.00019999997034951877, - "loss": 46.0, - "step": 3216 - }, - { - "epoch": 0.24596211556473038, - "grad_norm": 0.0008573323721066117, - "learning_rate": 0.00019999997033101906, - "loss": 46.0, - "step": 3217 - }, - { - "epoch": 0.24603857254812012, - "grad_norm": 0.001418428379110992, - "learning_rate": 0.00019999997031251352, - "loss": 46.0, - "step": 3218 - }, - { - "epoch": 0.24611502953150982, - "grad_norm": 0.001948562334291637, - "learning_rate": 0.0001999999702940022, - "loss": 46.0, - "step": 3219 - }, - { - "epoch": 0.24619148651489955, - "grad_norm": 0.0020987882744520903, - "learning_rate": 0.00019999997027548517, - "loss": 46.0, - "step": 3220 - }, - { - "epoch": 0.2462679434982893, - "grad_norm": 0.0036669347900897264, - "learning_rate": 0.00019999997025696233, - "loss": 46.0, - "step": 3221 - }, - { - "epoch": 0.246344400481679, - "grad_norm": 0.0014769320841878653, - "learning_rate": 0.00019999997023843372, - "loss": 46.0, - "step": 3222 - }, - { - "epoch": 0.24642085746506873, - "grad_norm": 0.003571388777345419, - "learning_rate": 0.00019999997021989937, - "loss": 46.0, - "step": 3223 - }, - { - "epoch": 0.24649731444845843, - "grad_norm": 0.0016468921676278114, - "learning_rate": 0.00019999997020135921, - "loss": 46.0, - "step": 3224 - }, - { - "epoch": 0.24657377143184817, - "grad_norm": 0.0011206233175471425, - "learning_rate": 0.0001999999701828133, - "loss": 46.0, - "step": 3225 - }, - { - "epoch": 0.24665022841523787, - "grad_norm": 0.0006686090491712093, - "learning_rate": 0.00019999997016426164, - "loss": 46.0, - "step": 3226 - }, - { - "epoch": 0.2467266853986276, - "grad_norm": 0.0012624136870726943, - "learning_rate": 0.00019999997014570416, - "loss": 46.0, - "step": 3227 - }, - { - "epoch": 0.2468031423820173, - "grad_norm": 0.0017163109732791781, - "learning_rate": 0.00019999997012714097, - "loss": 46.0, - "step": 3228 - }, - { - "epoch": 0.24687959936540704, - "grad_norm": 0.0011483390117064118, - "learning_rate": 0.000199999970108572, - "loss": 46.0, - "step": 3229 - }, - { - "epoch": 0.24695605634879675, - "grad_norm": 0.0005547954351641238, - "learning_rate": 0.00019999997008999723, - "loss": 46.0, - "step": 3230 - }, - { - "epoch": 0.24703251333218648, - "grad_norm": 0.001476628822274506, - "learning_rate": 0.00019999997007141672, - "loss": 46.0, - "step": 3231 - }, - { - "epoch": 0.2471089703155762, - "grad_norm": 0.0017434757901355624, - "learning_rate": 0.00019999997005283043, - "loss": 46.0, - "step": 3232 - }, - { - "epoch": 0.24718542729896592, - "grad_norm": 0.0012250370346009731, - "learning_rate": 0.00019999997003423837, - "loss": 46.0, - "step": 3233 - }, - { - "epoch": 0.24726188428235563, - "grad_norm": 0.0019066028762608767, - "learning_rate": 0.00019999997001564053, - "loss": 46.0, - "step": 3234 - }, - { - "epoch": 0.24733834126574536, - "grad_norm": 0.0013979371869936585, - "learning_rate": 0.00019999996999703692, - "loss": 46.0, - "step": 3235 - }, - { - "epoch": 0.2474147982491351, - "grad_norm": 0.0038987488951534033, - "learning_rate": 0.00019999996997842754, - "loss": 46.0, - "step": 3236 - }, - { - "epoch": 0.2474912552325248, - "grad_norm": 0.000744482793379575, - "learning_rate": 0.0001999999699598124, - "loss": 46.0, - "step": 3237 - }, - { - "epoch": 0.24756771221591453, - "grad_norm": 0.0019050112459808588, - "learning_rate": 0.00019999996994119149, - "loss": 46.0, - "step": 3238 - }, - { - "epoch": 0.24764416919930424, - "grad_norm": 0.0011425131233409047, - "learning_rate": 0.0001999999699225648, - "loss": 46.0, - "step": 3239 - }, - { - "epoch": 0.24772062618269397, - "grad_norm": 0.000778753194026649, - "learning_rate": 0.00019999996990393234, - "loss": 46.0, - "step": 3240 - }, - { - "epoch": 0.24779708316608368, - "grad_norm": 0.00144282600376755, - "learning_rate": 0.00019999996988529411, - "loss": 46.0, - "step": 3241 - }, - { - "epoch": 0.2478735401494734, - "grad_norm": 0.0014188982313498855, - "learning_rate": 0.00019999996986665015, - "loss": 46.0, - "step": 3242 - }, - { - "epoch": 0.24794999713286311, - "grad_norm": 0.0012028975179418921, - "learning_rate": 0.00019999996984800038, - "loss": 46.0, - "step": 3243 - }, - { - "epoch": 0.24802645411625285, - "grad_norm": 0.0005816504708491266, - "learning_rate": 0.00019999996982934486, - "loss": 46.0, - "step": 3244 - }, - { - "epoch": 0.24810291109964255, - "grad_norm": 0.0005952494102530181, - "learning_rate": 0.00019999996981068355, - "loss": 46.0, - "step": 3245 - }, - { - "epoch": 0.2481793680830323, - "grad_norm": 0.0005283544887788594, - "learning_rate": 0.00019999996979201646, - "loss": 46.0, - "step": 3246 - }, - { - "epoch": 0.248255825066422, - "grad_norm": 0.00039464057772420347, - "learning_rate": 0.00019999996977334365, - "loss": 46.0, - "step": 3247 - }, - { - "epoch": 0.24833228204981173, - "grad_norm": 0.0015188711695373058, - "learning_rate": 0.00019999996975466507, - "loss": 46.0, - "step": 3248 - }, - { - "epoch": 0.24840873903320146, - "grad_norm": 0.0006703032995574176, - "learning_rate": 0.00019999996973598066, - "loss": 46.0, - "step": 3249 - }, - { - "epoch": 0.24848519601659116, - "grad_norm": 0.0031718409154564142, - "learning_rate": 0.0001999999697172905, - "loss": 46.0, - "step": 3250 - }, - { - "epoch": 0.2485616529999809, - "grad_norm": 0.0019810611847788095, - "learning_rate": 0.00019999996969859464, - "loss": 46.0, - "step": 3251 - }, - { - "epoch": 0.2486381099833706, - "grad_norm": 0.004819038324058056, - "learning_rate": 0.0001999999696798929, - "loss": 46.0, - "step": 3252 - }, - { - "epoch": 0.24871456696676034, - "grad_norm": 0.001066030701622367, - "learning_rate": 0.0001999999696611855, - "loss": 46.0, - "step": 3253 - }, - { - "epoch": 0.24879102395015004, - "grad_norm": 0.002298054052516818, - "learning_rate": 0.00019999996964247224, - "loss": 46.0, - "step": 3254 - }, - { - "epoch": 0.24886748093353978, - "grad_norm": 0.0033549789804965258, - "learning_rate": 0.00019999996962375324, - "loss": 46.0, - "step": 3255 - }, - { - "epoch": 0.24894393791692948, - "grad_norm": 0.005768239963799715, - "learning_rate": 0.0001999999696050285, - "loss": 46.0, - "step": 3256 - }, - { - "epoch": 0.24902039490031921, - "grad_norm": 0.0016898488393053412, - "learning_rate": 0.00019999996958629796, - "loss": 46.0, - "step": 3257 - }, - { - "epoch": 0.24909685188370892, - "grad_norm": 0.0008953118231147528, - "learning_rate": 0.00019999996956756167, - "loss": 46.0, - "step": 3258 - }, - { - "epoch": 0.24917330886709865, - "grad_norm": 0.004425527527928352, - "learning_rate": 0.0001999999695488196, - "loss": 46.0, - "step": 3259 - }, - { - "epoch": 0.24924976585048836, - "grad_norm": 0.007059887982904911, - "learning_rate": 0.00019999996953007175, - "loss": 46.0, - "step": 3260 - }, - { - "epoch": 0.2493262228338781, - "grad_norm": 0.0011865132255479693, - "learning_rate": 0.00019999996951131817, - "loss": 46.0, - "step": 3261 - }, - { - "epoch": 0.2494026798172678, - "grad_norm": 0.009505679830908775, - "learning_rate": 0.00019999996949255876, - "loss": 46.0, - "step": 3262 - }, - { - "epoch": 0.24947913680065753, - "grad_norm": 0.004992562811821699, - "learning_rate": 0.00019999996947379364, - "loss": 46.0, - "step": 3263 - }, - { - "epoch": 0.24955559378404726, - "grad_norm": 0.0014782652724534273, - "learning_rate": 0.0001999999694550227, - "loss": 46.0, - "step": 3264 - }, - { - "epoch": 0.24963205076743697, - "grad_norm": 0.0010925416136160493, - "learning_rate": 0.000199999969436246, - "loss": 46.0, - "step": 3265 - }, - { - "epoch": 0.2497085077508267, - "grad_norm": 0.0027794234920293093, - "learning_rate": 0.00019999996941746356, - "loss": 46.0, - "step": 3266 - }, - { - "epoch": 0.2497849647342164, - "grad_norm": 0.0015363667625933886, - "learning_rate": 0.00019999996939867534, - "loss": 46.0, - "step": 3267 - }, - { - "epoch": 0.24986142171760614, - "grad_norm": 0.0014524442376568913, - "learning_rate": 0.00019999996937988135, - "loss": 46.0, - "step": 3268 - }, - { - "epoch": 0.24993787870099585, - "grad_norm": 0.000899262900929898, - "learning_rate": 0.00019999996936108158, - "loss": 46.0, - "step": 3269 - }, - { - "epoch": 0.2500143356843856, - "grad_norm": 0.0004008629184681922, - "learning_rate": 0.00019999996934227604, - "loss": 46.0, - "step": 3270 - }, - { - "epoch": 0.2500907926677753, - "grad_norm": 0.000693173089530319, - "learning_rate": 0.00019999996932346475, - "loss": 46.0, - "step": 3271 - }, - { - "epoch": 0.250167249651165, - "grad_norm": 0.0015817157691344619, - "learning_rate": 0.00019999996930464766, - "loss": 46.0, - "step": 3272 - }, - { - "epoch": 0.25024370663455475, - "grad_norm": 0.0015744644915685058, - "learning_rate": 0.00019999996928582483, - "loss": 46.0, - "step": 3273 - }, - { - "epoch": 0.25032016361794446, - "grad_norm": 0.000943234481383115, - "learning_rate": 0.00019999996926699622, - "loss": 46.0, - "step": 3274 - }, - { - "epoch": 0.25039662060133416, - "grad_norm": 0.00100785365793854, - "learning_rate": 0.0001999999692481618, - "loss": 46.0, - "step": 3275 - }, - { - "epoch": 0.25047307758472387, - "grad_norm": 0.0021864501759409904, - "learning_rate": 0.0001999999692293217, - "loss": 46.0, - "step": 3276 - }, - { - "epoch": 0.25054953456811363, - "grad_norm": 0.0008699607569724321, - "learning_rate": 0.00019999996921047576, - "loss": 46.0, - "step": 3277 - }, - { - "epoch": 0.25062599155150334, - "grad_norm": 0.0004331499512773007, - "learning_rate": 0.00019999996919162406, - "loss": 46.0, - "step": 3278 - }, - { - "epoch": 0.25070244853489304, - "grad_norm": 0.002732151886448264, - "learning_rate": 0.0001999999691727666, - "loss": 46.0, - "step": 3279 - }, - { - "epoch": 0.2507789055182828, - "grad_norm": 0.004899463150650263, - "learning_rate": 0.0001999999691539034, - "loss": 46.0, - "step": 3280 - }, - { - "epoch": 0.2508553625016725, - "grad_norm": 0.0009712368482723832, - "learning_rate": 0.00019999996913503437, - "loss": 46.0, - "step": 3281 - }, - { - "epoch": 0.2509318194850622, - "grad_norm": 0.004395731259137392, - "learning_rate": 0.00019999996911615963, - "loss": 46.0, - "step": 3282 - }, - { - "epoch": 0.2510082764684519, - "grad_norm": 0.0009943393524736166, - "learning_rate": 0.0001999999690972791, - "loss": 46.0, - "step": 3283 - }, - { - "epoch": 0.2510847334518417, - "grad_norm": 0.0015691715525463223, - "learning_rate": 0.00019999996907839278, - "loss": 46.0, - "step": 3284 - }, - { - "epoch": 0.2511611904352314, - "grad_norm": 0.0011420054361224174, - "learning_rate": 0.0001999999690595007, - "loss": 46.0, - "step": 3285 - }, - { - "epoch": 0.2512376474186211, - "grad_norm": 0.0010686635505408049, - "learning_rate": 0.00019999996904060285, - "loss": 46.0, - "step": 3286 - }, - { - "epoch": 0.2513141044020108, - "grad_norm": 0.0005065586883574724, - "learning_rate": 0.00019999996902169925, - "loss": 46.0, - "step": 3287 - }, - { - "epoch": 0.25139056138540056, - "grad_norm": 0.0025305699091404676, - "learning_rate": 0.00019999996900278987, - "loss": 46.0, - "step": 3288 - }, - { - "epoch": 0.25146701836879026, - "grad_norm": 0.0015879916027188301, - "learning_rate": 0.00019999996898387469, - "loss": 46.0, - "step": 3289 - }, - { - "epoch": 0.25154347535217997, - "grad_norm": 0.0010090366704389453, - "learning_rate": 0.00019999996896495379, - "loss": 46.0, - "step": 3290 - }, - { - "epoch": 0.2516199323355697, - "grad_norm": 0.00225488911382854, - "learning_rate": 0.00019999996894602709, - "loss": 46.0, - "step": 3291 - }, - { - "epoch": 0.25169638931895943, - "grad_norm": 0.0006276239291764796, - "learning_rate": 0.00019999996892709464, - "loss": 46.0, - "step": 3292 - }, - { - "epoch": 0.25177284630234914, - "grad_norm": 0.010474435985088348, - "learning_rate": 0.0001999999689081564, - "loss": 46.0, - "step": 3293 - }, - { - "epoch": 0.25184930328573885, - "grad_norm": 0.000672626425512135, - "learning_rate": 0.0001999999688892124, - "loss": 46.0, - "step": 3294 - }, - { - "epoch": 0.2519257602691286, - "grad_norm": 0.004152297042310238, - "learning_rate": 0.00019999996887026263, - "loss": 46.0, - "step": 3295 - }, - { - "epoch": 0.2520022172525183, - "grad_norm": 0.002188890939578414, - "learning_rate": 0.0001999999688513071, - "loss": 46.0, - "step": 3296 - }, - { - "epoch": 0.252078674235908, - "grad_norm": 0.0010546904522925615, - "learning_rate": 0.00019999996883234578, - "loss": 46.0, - "step": 3297 - }, - { - "epoch": 0.2521551312192977, - "grad_norm": 0.0012328902957960963, - "learning_rate": 0.0001999999688133787, - "loss": 46.0, - "step": 3298 - }, - { - "epoch": 0.2522315882026875, - "grad_norm": 0.000764339929446578, - "learning_rate": 0.00019999996879440586, - "loss": 46.0, - "step": 3299 - }, - { - "epoch": 0.2523080451860772, - "grad_norm": 0.0014056528452783823, - "learning_rate": 0.00019999996877542722, - "loss": 46.0, - "step": 3300 - }, - { - "epoch": 0.2523845021694669, - "grad_norm": 0.0009448706987313926, - "learning_rate": 0.00019999996875644284, - "loss": 46.0, - "step": 3301 - }, - { - "epoch": 0.2524609591528566, - "grad_norm": 0.001086943899281323, - "learning_rate": 0.0001999999687374527, - "loss": 46.0, - "step": 3302 - }, - { - "epoch": 0.25253741613624636, - "grad_norm": 0.0021563644986599684, - "learning_rate": 0.00019999996871845676, - "loss": 46.0, - "step": 3303 - }, - { - "epoch": 0.25261387311963607, - "grad_norm": 0.0020023297984153032, - "learning_rate": 0.00019999996869945506, - "loss": 46.0, - "step": 3304 - }, - { - "epoch": 0.2526903301030258, - "grad_norm": 0.00150933931581676, - "learning_rate": 0.0001999999686804476, - "loss": 46.0, - "step": 3305 - }, - { - "epoch": 0.2527667870864155, - "grad_norm": 0.003986313473433256, - "learning_rate": 0.00019999996866143437, - "loss": 46.0, - "step": 3306 - }, - { - "epoch": 0.25284324406980524, - "grad_norm": 0.0003875916881952435, - "learning_rate": 0.00019999996864241535, - "loss": 46.0, - "step": 3307 - }, - { - "epoch": 0.25291970105319495, - "grad_norm": 0.000934297451749444, - "learning_rate": 0.00019999996862339058, - "loss": 46.0, - "step": 3308 - }, - { - "epoch": 0.25299615803658465, - "grad_norm": 0.0016030014958232641, - "learning_rate": 0.00019999996860436004, - "loss": 46.0, - "step": 3309 - }, - { - "epoch": 0.2530726150199744, - "grad_norm": 0.0007313450332731009, - "learning_rate": 0.00019999996858532373, - "loss": 46.0, - "step": 3310 - }, - { - "epoch": 0.2531490720033641, - "grad_norm": 0.0011905212886631489, - "learning_rate": 0.00019999996856628167, - "loss": 46.0, - "step": 3311 - }, - { - "epoch": 0.2532255289867538, - "grad_norm": 0.0008470544125884771, - "learning_rate": 0.00019999996854723378, - "loss": 46.0, - "step": 3312 - }, - { - "epoch": 0.25330198597014353, - "grad_norm": 0.0006716848001815379, - "learning_rate": 0.0001999999685281802, - "loss": 46.0, - "step": 3313 - }, - { - "epoch": 0.2533784429535333, - "grad_norm": 0.001264550955966115, - "learning_rate": 0.0001999999685091208, - "loss": 46.0, - "step": 3314 - }, - { - "epoch": 0.253454899936923, - "grad_norm": 0.0007389072561636567, - "learning_rate": 0.00019999996849005562, - "loss": 46.0, - "step": 3315 - }, - { - "epoch": 0.2535313569203127, - "grad_norm": 0.000758472247980535, - "learning_rate": 0.0001999999684709847, - "loss": 46.0, - "step": 3316 - }, - { - "epoch": 0.2536078139037024, - "grad_norm": 0.0005641333991661668, - "learning_rate": 0.00019999996845190802, - "loss": 46.0, - "step": 3317 - }, - { - "epoch": 0.25368427088709217, - "grad_norm": 0.0012269377475604415, - "learning_rate": 0.00019999996843282554, - "loss": 46.0, - "step": 3318 - }, - { - "epoch": 0.2537607278704819, - "grad_norm": 0.0006507251528091729, - "learning_rate": 0.0001999999684137373, - "loss": 46.0, - "step": 3319 - }, - { - "epoch": 0.2538371848538716, - "grad_norm": 0.0008744804072193801, - "learning_rate": 0.0001999999683946433, - "loss": 46.0, - "step": 3320 - }, - { - "epoch": 0.2539136418372613, - "grad_norm": 0.0023502083495259285, - "learning_rate": 0.00019999996837554354, - "loss": 46.0, - "step": 3321 - }, - { - "epoch": 0.25399009882065104, - "grad_norm": 0.0025126736145466566, - "learning_rate": 0.00019999996835643797, - "loss": 46.0, - "step": 3322 - }, - { - "epoch": 0.25406655580404075, - "grad_norm": 0.0009097993024624884, - "learning_rate": 0.00019999996833732666, - "loss": 46.0, - "step": 3323 - }, - { - "epoch": 0.25414301278743046, - "grad_norm": 0.0009338732343167067, - "learning_rate": 0.00019999996831820957, - "loss": 46.0, - "step": 3324 - }, - { - "epoch": 0.2542194697708202, - "grad_norm": 0.0004946420667693019, - "learning_rate": 0.00019999996829908674, - "loss": 46.0, - "step": 3325 - }, - { - "epoch": 0.2542959267542099, - "grad_norm": 0.0016363008180633187, - "learning_rate": 0.00019999996827995808, - "loss": 46.0, - "step": 3326 - }, - { - "epoch": 0.2543723837375996, - "grad_norm": 0.001478428253903985, - "learning_rate": 0.0001999999682608237, - "loss": 46.0, - "step": 3327 - }, - { - "epoch": 0.25444884072098933, - "grad_norm": 0.0009009524364955723, - "learning_rate": 0.00019999996824168355, - "loss": 46.0, - "step": 3328 - }, - { - "epoch": 0.2545252977043791, - "grad_norm": 0.0015700636431574821, - "learning_rate": 0.00019999996822253762, - "loss": 46.0, - "step": 3329 - }, - { - "epoch": 0.2546017546877688, - "grad_norm": 0.0005721982452087104, - "learning_rate": 0.00019999996820338592, - "loss": 46.0, - "step": 3330 - }, - { - "epoch": 0.2546782116711585, - "grad_norm": 0.0010893730213865638, - "learning_rate": 0.00019999996818422845, - "loss": 46.0, - "step": 3331 - }, - { - "epoch": 0.2547546686545482, - "grad_norm": 0.0008342974469996989, - "learning_rate": 0.00019999996816506517, - "loss": 46.0, - "step": 3332 - }, - { - "epoch": 0.25483112563793797, - "grad_norm": 0.001518531353212893, - "learning_rate": 0.00019999996814589618, - "loss": 46.0, - "step": 3333 - }, - { - "epoch": 0.2549075826213277, - "grad_norm": 0.0014199293218553066, - "learning_rate": 0.00019999996812672142, - "loss": 46.0, - "step": 3334 - }, - { - "epoch": 0.2549840396047174, - "grad_norm": 0.002698963973671198, - "learning_rate": 0.00019999996810754085, - "loss": 46.0, - "step": 3335 - }, - { - "epoch": 0.25506049658810714, - "grad_norm": 0.0032409175764769316, - "learning_rate": 0.00019999996808835454, - "loss": 46.0, - "step": 3336 - }, - { - "epoch": 0.25513695357149685, - "grad_norm": 0.0016743586165830493, - "learning_rate": 0.00019999996806916245, - "loss": 46.0, - "step": 3337 - }, - { - "epoch": 0.25521341055488656, - "grad_norm": 0.0012604790972545743, - "learning_rate": 0.0001999999680499646, - "loss": 46.0, - "step": 3338 - }, - { - "epoch": 0.25528986753827626, - "grad_norm": 0.0008487821905873716, - "learning_rate": 0.00019999996803076099, - "loss": 46.0, - "step": 3339 - }, - { - "epoch": 0.255366324521666, - "grad_norm": 0.0008831185987219214, - "learning_rate": 0.00019999996801155158, - "loss": 46.0, - "step": 3340 - }, - { - "epoch": 0.2554427815050557, - "grad_norm": 0.001443361397832632, - "learning_rate": 0.00019999996799233643, - "loss": 46.0, - "step": 3341 - }, - { - "epoch": 0.25551923848844543, - "grad_norm": 0.0013351291418075562, - "learning_rate": 0.00019999996797311547, - "loss": 46.0, - "step": 3342 - }, - { - "epoch": 0.25559569547183514, - "grad_norm": 0.0008088031318038702, - "learning_rate": 0.00019999996795388878, - "loss": 46.0, - "step": 3343 - }, - { - "epoch": 0.2556721524552249, - "grad_norm": 0.0010591468308120966, - "learning_rate": 0.0001999999679346563, - "loss": 46.0, - "step": 3344 - }, - { - "epoch": 0.2557486094386146, - "grad_norm": 0.0008869795128703117, - "learning_rate": 0.00019999996791541806, - "loss": 46.0, - "step": 3345 - }, - { - "epoch": 0.2558250664220043, - "grad_norm": 0.0041189962066709995, - "learning_rate": 0.00019999996789617406, - "loss": 46.0, - "step": 3346 - }, - { - "epoch": 0.255901523405394, - "grad_norm": 0.0007569058216176927, - "learning_rate": 0.00019999996787692424, - "loss": 46.0, - "step": 3347 - }, - { - "epoch": 0.2559779803887838, - "grad_norm": 0.0012949596857652068, - "learning_rate": 0.00019999996785766873, - "loss": 46.0, - "step": 3348 - }, - { - "epoch": 0.2560544373721735, - "grad_norm": 0.0011699702590703964, - "learning_rate": 0.0001999999678384074, - "loss": 46.0, - "step": 3349 - }, - { - "epoch": 0.2561308943555632, - "grad_norm": 0.0021525020711123943, - "learning_rate": 0.0001999999678191403, - "loss": 46.0, - "step": 3350 - }, - { - "epoch": 0.25620735133895295, - "grad_norm": 0.0006216112524271011, - "learning_rate": 0.00019999996779986745, - "loss": 46.0, - "step": 3351 - }, - { - "epoch": 0.25628380832234265, - "grad_norm": 0.002517369342967868, - "learning_rate": 0.00019999996778058882, - "loss": 46.0, - "step": 3352 - }, - { - "epoch": 0.25636026530573236, - "grad_norm": 0.0015303826658055186, - "learning_rate": 0.00019999996776130444, - "loss": 46.0, - "step": 3353 - }, - { - "epoch": 0.25643672228912207, - "grad_norm": 0.0005715960287488997, - "learning_rate": 0.00019999996774201426, - "loss": 46.0, - "step": 3354 - }, - { - "epoch": 0.2565131792725118, - "grad_norm": 0.0013456068700179458, - "learning_rate": 0.0001999999677227183, - "loss": 46.0, - "step": 3355 - }, - { - "epoch": 0.25658963625590153, - "grad_norm": 0.001387232099659741, - "learning_rate": 0.0001999999677034166, - "loss": 46.0, - "step": 3356 - }, - { - "epoch": 0.25666609323929124, - "grad_norm": 0.0013406374491751194, - "learning_rate": 0.0001999999676841091, - "loss": 46.0, - "step": 3357 - }, - { - "epoch": 0.25674255022268094, - "grad_norm": 0.00104979716707021, - "learning_rate": 0.0001999999676647959, - "loss": 46.0, - "step": 3358 - }, - { - "epoch": 0.2568190072060707, - "grad_norm": 0.00042975760879926383, - "learning_rate": 0.00019999996764547687, - "loss": 46.0, - "step": 3359 - }, - { - "epoch": 0.2568954641894604, - "grad_norm": 0.0016505069797858596, - "learning_rate": 0.0001999999676261521, - "loss": 46.0, - "step": 3360 - }, - { - "epoch": 0.2569719211728501, - "grad_norm": 0.001937379944138229, - "learning_rate": 0.0001999999676068215, - "loss": 46.0, - "step": 3361 - }, - { - "epoch": 0.2570483781562398, - "grad_norm": 0.0012581613846123219, - "learning_rate": 0.0001999999675874852, - "loss": 46.0, - "step": 3362 - }, - { - "epoch": 0.2571248351396296, - "grad_norm": 0.0006858674460090697, - "learning_rate": 0.0001999999675681431, - "loss": 46.0, - "step": 3363 - }, - { - "epoch": 0.2572012921230193, - "grad_norm": 0.0019825738854706287, - "learning_rate": 0.00019999996754879525, - "loss": 46.0, - "step": 3364 - }, - { - "epoch": 0.257277749106409, - "grad_norm": 0.0006939525483176112, - "learning_rate": 0.00019999996752944162, - "loss": 46.0, - "step": 3365 - }, - { - "epoch": 0.25735420608979875, - "grad_norm": 0.0012705805711448193, - "learning_rate": 0.00019999996751008224, - "loss": 46.0, - "step": 3366 - }, - { - "epoch": 0.25743066307318846, - "grad_norm": 0.0012182053178548813, - "learning_rate": 0.00019999996749071706, - "loss": 46.0, - "step": 3367 - }, - { - "epoch": 0.25750712005657816, - "grad_norm": 0.0008177760173566639, - "learning_rate": 0.0001999999674713461, - "loss": 46.0, - "step": 3368 - }, - { - "epoch": 0.25758357703996787, - "grad_norm": 0.0009514178382232785, - "learning_rate": 0.0001999999674519694, - "loss": 46.0, - "step": 3369 - }, - { - "epoch": 0.25766003402335763, - "grad_norm": 0.0012822678545489907, - "learning_rate": 0.00019999996743258694, - "loss": 46.0, - "step": 3370 - }, - { - "epoch": 0.25773649100674734, - "grad_norm": 0.0005474091740325093, - "learning_rate": 0.00019999996741319867, - "loss": 46.0, - "step": 3371 - }, - { - "epoch": 0.25781294799013704, - "grad_norm": 0.0008514283108524978, - "learning_rate": 0.00019999996739380468, - "loss": 46.0, - "step": 3372 - }, - { - "epoch": 0.25788940497352675, - "grad_norm": 0.002983875572681427, - "learning_rate": 0.00019999996737440486, - "loss": 46.0, - "step": 3373 - }, - { - "epoch": 0.2579658619569165, - "grad_norm": 0.001140502979978919, - "learning_rate": 0.00019999996735499932, - "loss": 46.0, - "step": 3374 - }, - { - "epoch": 0.2580423189403062, - "grad_norm": 0.0012950376840308309, - "learning_rate": 0.00019999996733558798, - "loss": 46.0, - "step": 3375 - }, - { - "epoch": 0.2581187759236959, - "grad_norm": 0.0007958801579661667, - "learning_rate": 0.0001999999673161709, - "loss": 46.0, - "step": 3376 - }, - { - "epoch": 0.2581952329070856, - "grad_norm": 0.0012902435846626759, - "learning_rate": 0.000199999967296748, - "loss": 46.0, - "step": 3377 - }, - { - "epoch": 0.2582716898904754, - "grad_norm": 0.002661551581695676, - "learning_rate": 0.00019999996727731938, - "loss": 46.0, - "step": 3378 - }, - { - "epoch": 0.2583481468738651, - "grad_norm": 0.003200280712917447, - "learning_rate": 0.000199999967257885, - "loss": 46.0, - "step": 3379 - }, - { - "epoch": 0.2584246038572548, - "grad_norm": 0.0022828904911875725, - "learning_rate": 0.0001999999672384448, - "loss": 46.0, - "step": 3380 - }, - { - "epoch": 0.25850106084064456, - "grad_norm": 0.0009272780735045671, - "learning_rate": 0.00019999996721899884, - "loss": 46.0, - "step": 3381 - }, - { - "epoch": 0.25857751782403426, - "grad_norm": 0.0005332891596481204, - "learning_rate": 0.00019999996719954714, - "loss": 46.0, - "step": 3382 - }, - { - "epoch": 0.25865397480742397, - "grad_norm": 0.0013827787479385734, - "learning_rate": 0.00019999996718008967, - "loss": 46.0, - "step": 3383 - }, - { - "epoch": 0.2587304317908137, - "grad_norm": 0.000758643785957247, - "learning_rate": 0.0001999999671606264, - "loss": 46.0, - "step": 3384 - }, - { - "epoch": 0.25880688877420344, - "grad_norm": 0.002129457425326109, - "learning_rate": 0.00019999996714115738, - "loss": 46.0, - "step": 3385 - }, - { - "epoch": 0.25888334575759314, - "grad_norm": 0.0003911287640221417, - "learning_rate": 0.00019999996712168262, - "loss": 46.0, - "step": 3386 - }, - { - "epoch": 0.25895980274098285, - "grad_norm": 0.0008566020987927914, - "learning_rate": 0.00019999996710220205, - "loss": 46.0, - "step": 3387 - }, - { - "epoch": 0.25903625972437255, - "grad_norm": 0.001327891950495541, - "learning_rate": 0.00019999996708271571, - "loss": 46.0, - "step": 3388 - }, - { - "epoch": 0.2591127167077623, - "grad_norm": 0.0014818107010796666, - "learning_rate": 0.0001999999670632236, - "loss": 46.0, - "step": 3389 - }, - { - "epoch": 0.259189173691152, - "grad_norm": 0.001240147859789431, - "learning_rate": 0.00019999996704372574, - "loss": 46.0, - "step": 3390 - }, - { - "epoch": 0.2592656306745417, - "grad_norm": 0.0015240032225847244, - "learning_rate": 0.0001999999670242221, - "loss": 46.0, - "step": 3391 - }, - { - "epoch": 0.25934208765793143, - "grad_norm": 0.0010145625565201044, - "learning_rate": 0.0001999999670047127, - "loss": 46.0, - "step": 3392 - }, - { - "epoch": 0.2594185446413212, - "grad_norm": 0.005682583898305893, - "learning_rate": 0.00019999996698519753, - "loss": 46.0, - "step": 3393 - }, - { - "epoch": 0.2594950016247109, - "grad_norm": 0.0016105679096654058, - "learning_rate": 0.00019999996696567658, - "loss": 46.0, - "step": 3394 - }, - { - "epoch": 0.2595714586081006, - "grad_norm": 0.00141436536796391, - "learning_rate": 0.00019999996694614985, - "loss": 46.0, - "step": 3395 - }, - { - "epoch": 0.25964791559149036, - "grad_norm": 0.0061468142084777355, - "learning_rate": 0.00019999996692661738, - "loss": 46.0, - "step": 3396 - }, - { - "epoch": 0.25972437257488007, - "grad_norm": 0.0004221137205604464, - "learning_rate": 0.0001999999669070791, - "loss": 46.0, - "step": 3397 - }, - { - "epoch": 0.2598008295582698, - "grad_norm": 0.0006950510432943702, - "learning_rate": 0.00019999996688753512, - "loss": 46.0, - "step": 3398 - }, - { - "epoch": 0.2598772865416595, - "grad_norm": 0.0022589629516005516, - "learning_rate": 0.00019999996686798533, - "loss": 46.0, - "step": 3399 - }, - { - "epoch": 0.25995374352504924, - "grad_norm": 0.0009185707895085216, - "learning_rate": 0.00019999996684842973, - "loss": 46.0, - "step": 3400 - }, - { - "epoch": 0.26003020050843895, - "grad_norm": 0.0014534504152834415, - "learning_rate": 0.00019999996682886842, - "loss": 46.0, - "step": 3401 - }, - { - "epoch": 0.26010665749182865, - "grad_norm": 0.0030209224205464125, - "learning_rate": 0.0001999999668093013, - "loss": 46.0, - "step": 3402 - }, - { - "epoch": 0.26018311447521836, - "grad_norm": 0.0006988857639953494, - "learning_rate": 0.00019999996678972845, - "loss": 46.0, - "step": 3403 - }, - { - "epoch": 0.2602595714586081, - "grad_norm": 0.0010906365932896733, - "learning_rate": 0.0001999999667701498, - "loss": 46.0, - "step": 3404 - }, - { - "epoch": 0.2603360284419978, - "grad_norm": 0.0008640087908133864, - "learning_rate": 0.0001999999667505654, - "loss": 46.0, - "step": 3405 - }, - { - "epoch": 0.26041248542538753, - "grad_norm": 0.0005684989737346768, - "learning_rate": 0.0001999999667309752, - "loss": 46.0, - "step": 3406 - }, - { - "epoch": 0.26048894240877724, - "grad_norm": 0.0006818450638093054, - "learning_rate": 0.00019999996671137926, - "loss": 46.0, - "step": 3407 - }, - { - "epoch": 0.260565399392167, - "grad_norm": 0.0009807914029806852, - "learning_rate": 0.00019999996669177754, - "loss": 46.0, - "step": 3408 - }, - { - "epoch": 0.2606418563755567, - "grad_norm": 0.0005603322060778737, - "learning_rate": 0.00019999996667217004, - "loss": 46.0, - "step": 3409 - }, - { - "epoch": 0.2607183133589464, - "grad_norm": 0.0016643533017486334, - "learning_rate": 0.0001999999666525568, - "loss": 46.0, - "step": 3410 - }, - { - "epoch": 0.26079477034233617, - "grad_norm": 0.001112714409828186, - "learning_rate": 0.00019999996663293778, - "loss": 46.0, - "step": 3411 - }, - { - "epoch": 0.2608712273257259, - "grad_norm": 0.0004928434500470757, - "learning_rate": 0.00019999996661331299, - "loss": 46.0, - "step": 3412 - }, - { - "epoch": 0.2609476843091156, - "grad_norm": 0.0012750652385875583, - "learning_rate": 0.00019999996659368242, - "loss": 46.0, - "step": 3413 - }, - { - "epoch": 0.2610241412925053, - "grad_norm": 0.0012590659316629171, - "learning_rate": 0.00019999996657404608, - "loss": 46.0, - "step": 3414 - }, - { - "epoch": 0.26110059827589505, - "grad_norm": 0.001027178717777133, - "learning_rate": 0.00019999996655440397, - "loss": 46.0, - "step": 3415 - }, - { - "epoch": 0.26117705525928475, - "grad_norm": 0.0008961145649664104, - "learning_rate": 0.0001999999665347561, - "loss": 46.0, - "step": 3416 - }, - { - "epoch": 0.26125351224267446, - "grad_norm": 0.0015716425841674209, - "learning_rate": 0.00019999996651510246, - "loss": 46.0, - "step": 3417 - }, - { - "epoch": 0.26132996922606416, - "grad_norm": 0.002116632182151079, - "learning_rate": 0.00019999996649544302, - "loss": 46.0, - "step": 3418 - }, - { - "epoch": 0.2614064262094539, - "grad_norm": 0.001458898652344942, - "learning_rate": 0.00019999996647577785, - "loss": 46.0, - "step": 3419 - }, - { - "epoch": 0.26148288319284363, - "grad_norm": 0.0010803916957229376, - "learning_rate": 0.0001999999664561069, - "loss": 46.0, - "step": 3420 - }, - { - "epoch": 0.26155934017623333, - "grad_norm": 0.000599379651248455, - "learning_rate": 0.00019999996643643017, - "loss": 46.0, - "step": 3421 - }, - { - "epoch": 0.26163579715962304, - "grad_norm": 0.001198024838231504, - "learning_rate": 0.0001999999664167477, - "loss": 46.0, - "step": 3422 - }, - { - "epoch": 0.2617122541430128, - "grad_norm": 0.0007421146729029715, - "learning_rate": 0.00019999996639705946, - "loss": 46.0, - "step": 3423 - }, - { - "epoch": 0.2617887111264025, - "grad_norm": 0.0006218171329237521, - "learning_rate": 0.0001999999663773654, - "loss": 46.0, - "step": 3424 - }, - { - "epoch": 0.2618651681097922, - "grad_norm": 0.0023230707738548517, - "learning_rate": 0.0001999999663576656, - "loss": 46.0, - "step": 3425 - }, - { - "epoch": 0.261941625093182, - "grad_norm": 0.0007635362562723458, - "learning_rate": 0.00019999996633796006, - "loss": 46.0, - "step": 3426 - }, - { - "epoch": 0.2620180820765717, - "grad_norm": 0.002543106907978654, - "learning_rate": 0.00019999996631824872, - "loss": 46.0, - "step": 3427 - }, - { - "epoch": 0.2620945390599614, - "grad_norm": 0.0026661870069801807, - "learning_rate": 0.00019999996629853158, - "loss": 46.0, - "step": 3428 - }, - { - "epoch": 0.2621709960433511, - "grad_norm": 0.0017379287164658308, - "learning_rate": 0.0001999999662788087, - "loss": 46.0, - "step": 3429 - }, - { - "epoch": 0.26224745302674085, - "grad_norm": 0.0009609837434254587, - "learning_rate": 0.0001999999662590801, - "loss": 46.0, - "step": 3430 - }, - { - "epoch": 0.26232391001013056, - "grad_norm": 0.0023372580762952566, - "learning_rate": 0.00019999996623934564, - "loss": 46.0, - "step": 3431 - }, - { - "epoch": 0.26240036699352026, - "grad_norm": 0.0008000805974006653, - "learning_rate": 0.00019999996621960549, - "loss": 46.0, - "step": 3432 - }, - { - "epoch": 0.26247682397690997, - "grad_norm": 0.0016166317509487271, - "learning_rate": 0.0001999999661998595, - "loss": 46.0, - "step": 3433 - }, - { - "epoch": 0.26255328096029973, - "grad_norm": 0.002178431721404195, - "learning_rate": 0.00019999996618010778, - "loss": 46.0, - "step": 3434 - }, - { - "epoch": 0.26262973794368943, - "grad_norm": 0.0009274385520257056, - "learning_rate": 0.0001999999661603503, - "loss": 46.0, - "step": 3435 - }, - { - "epoch": 0.26270619492707914, - "grad_norm": 0.0010919689666479826, - "learning_rate": 0.00019999996614058704, - "loss": 46.0, - "step": 3436 - }, - { - "epoch": 0.26278265191046885, - "grad_norm": 0.0012274044565856457, - "learning_rate": 0.00019999996612081802, - "loss": 46.0, - "step": 3437 - }, - { - "epoch": 0.2628591088938586, - "grad_norm": 0.0015083723701536655, - "learning_rate": 0.0001999999661010432, - "loss": 46.0, - "step": 3438 - }, - { - "epoch": 0.2629355658772483, - "grad_norm": 0.0014632773818448186, - "learning_rate": 0.00019999996608126264, - "loss": 46.0, - "step": 3439 - }, - { - "epoch": 0.263012022860638, - "grad_norm": 0.0009205866372212768, - "learning_rate": 0.00019999996606147628, - "loss": 46.0, - "step": 3440 - }, - { - "epoch": 0.2630884798440278, - "grad_norm": 0.001350146485492587, - "learning_rate": 0.00019999996604168417, - "loss": 46.0, - "step": 3441 - }, - { - "epoch": 0.2631649368274175, - "grad_norm": 0.0011877267388626933, - "learning_rate": 0.0001999999660218863, - "loss": 46.0, - "step": 3442 - }, - { - "epoch": 0.2632413938108072, - "grad_norm": 0.0008765162201598287, - "learning_rate": 0.00019999996600208263, - "loss": 46.0, - "step": 3443 - }, - { - "epoch": 0.2633178507941969, - "grad_norm": 0.026009637862443924, - "learning_rate": 0.00019999996598227325, - "loss": 46.0, - "step": 3444 - }, - { - "epoch": 0.26339430777758666, - "grad_norm": 0.0007201490807347, - "learning_rate": 0.00019999996596245802, - "loss": 46.0, - "step": 3445 - }, - { - "epoch": 0.26347076476097636, - "grad_norm": 0.0004314627149142325, - "learning_rate": 0.00019999996594263707, - "loss": 46.0, - "step": 3446 - }, - { - "epoch": 0.26354722174436607, - "grad_norm": 0.0009992282139137387, - "learning_rate": 0.00019999996592281037, - "loss": 46.0, - "step": 3447 - }, - { - "epoch": 0.2636236787277558, - "grad_norm": 0.0012712192256003618, - "learning_rate": 0.00019999996590297787, - "loss": 46.0, - "step": 3448 - }, - { - "epoch": 0.26370013571114553, - "grad_norm": 0.0021229113917797804, - "learning_rate": 0.0001999999658831396, - "loss": 46.0, - "step": 3449 - }, - { - "epoch": 0.26377659269453524, - "grad_norm": 0.0008281507762148976, - "learning_rate": 0.00019999996586329556, - "loss": 46.0, - "step": 3450 - }, - { - "epoch": 0.26385304967792494, - "grad_norm": 0.0014178858837112784, - "learning_rate": 0.00019999996584344577, - "loss": 46.0, - "step": 3451 - }, - { - "epoch": 0.26392950666131465, - "grad_norm": 0.003944141324609518, - "learning_rate": 0.00019999996582359018, - "loss": 46.0, - "step": 3452 - }, - { - "epoch": 0.2640059636447044, - "grad_norm": 0.0011372524313628674, - "learning_rate": 0.00019999996580372884, - "loss": 46.0, - "step": 3453 - }, - { - "epoch": 0.2640824206280941, - "grad_norm": 0.0032555984798818827, - "learning_rate": 0.00019999996578386173, - "loss": 46.0, - "step": 3454 - }, - { - "epoch": 0.2641588776114838, - "grad_norm": 0.0027557043358683586, - "learning_rate": 0.00019999996576398882, - "loss": 46.0, - "step": 3455 - }, - { - "epoch": 0.2642353345948736, - "grad_norm": 0.0008548556361347437, - "learning_rate": 0.0001999999657441102, - "loss": 46.0, - "step": 3456 - }, - { - "epoch": 0.2643117915782633, - "grad_norm": 0.0005990201607346535, - "learning_rate": 0.00019999996572422576, - "loss": 46.0, - "step": 3457 - }, - { - "epoch": 0.264388248561653, - "grad_norm": 0.0007861117483116686, - "learning_rate": 0.00019999996570433556, - "loss": 46.0, - "step": 3458 - }, - { - "epoch": 0.2644647055450427, - "grad_norm": 0.0010544734541326761, - "learning_rate": 0.0001999999656844396, - "loss": 46.0, - "step": 3459 - }, - { - "epoch": 0.26454116252843246, - "grad_norm": 0.004305166658014059, - "learning_rate": 0.00019999996566453788, - "loss": 46.0, - "step": 3460 - }, - { - "epoch": 0.26461761951182217, - "grad_norm": 0.007666804827749729, - "learning_rate": 0.00019999996564463039, - "loss": 46.0, - "step": 3461 - }, - { - "epoch": 0.26469407649521187, - "grad_norm": 0.0017637485871091485, - "learning_rate": 0.00019999996562471712, - "loss": 46.0, - "step": 3462 - }, - { - "epoch": 0.2647705334786016, - "grad_norm": 0.000689836626406759, - "learning_rate": 0.00019999996560479807, - "loss": 46.0, - "step": 3463 - }, - { - "epoch": 0.26484699046199134, - "grad_norm": 0.0011632087407633662, - "learning_rate": 0.00019999996558487326, - "loss": 46.0, - "step": 3464 - }, - { - "epoch": 0.26492344744538104, - "grad_norm": 0.001704981317743659, - "learning_rate": 0.0001999999655649427, - "loss": 46.0, - "step": 3465 - }, - { - "epoch": 0.26499990442877075, - "grad_norm": 0.0023453945759683847, - "learning_rate": 0.00019999996554500636, - "loss": 46.0, - "step": 3466 - }, - { - "epoch": 0.2650763614121605, - "grad_norm": 0.00347949774004519, - "learning_rate": 0.00019999996552506425, - "loss": 46.0, - "step": 3467 - }, - { - "epoch": 0.2651528183955502, - "grad_norm": 0.0012835649540647864, - "learning_rate": 0.00019999996550511636, - "loss": 46.0, - "step": 3468 - }, - { - "epoch": 0.2652292753789399, - "grad_norm": 0.001120152068324387, - "learning_rate": 0.00019999996548516268, - "loss": 46.0, - "step": 3469 - }, - { - "epoch": 0.2653057323623296, - "grad_norm": 0.0005451901233755052, - "learning_rate": 0.00019999996546520325, - "loss": 46.0, - "step": 3470 - }, - { - "epoch": 0.2653821893457194, - "grad_norm": 0.00048714099102653563, - "learning_rate": 0.00019999996544523807, - "loss": 46.0, - "step": 3471 - }, - { - "epoch": 0.2654586463291091, - "grad_norm": 0.0031618874054402113, - "learning_rate": 0.00019999996542526712, - "loss": 46.0, - "step": 3472 - }, - { - "epoch": 0.2655351033124988, - "grad_norm": 0.00133831228595227, - "learning_rate": 0.00019999996540529037, - "loss": 46.0, - "step": 3473 - }, - { - "epoch": 0.2656115602958885, - "grad_norm": 0.002463582204654813, - "learning_rate": 0.00019999996538530787, - "loss": 46.0, - "step": 3474 - }, - { - "epoch": 0.26568801727927827, - "grad_norm": 0.0008574210805818439, - "learning_rate": 0.0001999999653653196, - "loss": 46.0, - "step": 3475 - }, - { - "epoch": 0.26576447426266797, - "grad_norm": 0.0006142286583781242, - "learning_rate": 0.00019999996534532553, - "loss": 46.0, - "step": 3476 - }, - { - "epoch": 0.2658409312460577, - "grad_norm": 0.002048185793682933, - "learning_rate": 0.00019999996532532577, - "loss": 46.0, - "step": 3477 - }, - { - "epoch": 0.2659173882294474, - "grad_norm": 0.0017892924370244145, - "learning_rate": 0.00019999996530532018, - "loss": 46.0, - "step": 3478 - }, - { - "epoch": 0.26599384521283714, - "grad_norm": 0.0006220433278940618, - "learning_rate": 0.00019999996528530882, - "loss": 46.0, - "step": 3479 - }, - { - "epoch": 0.26607030219622685, - "grad_norm": 0.001623353105969727, - "learning_rate": 0.0001999999652652917, - "loss": 46.0, - "step": 3480 - }, - { - "epoch": 0.26614675917961655, - "grad_norm": 0.00040237829671241343, - "learning_rate": 0.00019999996524526883, - "loss": 46.0, - "step": 3481 - }, - { - "epoch": 0.2662232161630063, - "grad_norm": 0.0005781952640973032, - "learning_rate": 0.00019999996522524017, - "loss": 46.0, - "step": 3482 - }, - { - "epoch": 0.266299673146396, - "grad_norm": 0.003110188990831375, - "learning_rate": 0.0001999999652052057, - "loss": 46.0, - "step": 3483 - }, - { - "epoch": 0.2663761301297857, - "grad_norm": 0.0012962182518094778, - "learning_rate": 0.00019999996518516554, - "loss": 46.0, - "step": 3484 - }, - { - "epoch": 0.26645258711317543, - "grad_norm": 0.0006347584421746433, - "learning_rate": 0.00019999996516511956, - "loss": 46.0, - "step": 3485 - }, - { - "epoch": 0.2665290440965652, - "grad_norm": 0.0017043203115463257, - "learning_rate": 0.00019999996514506784, - "loss": 46.0, - "step": 3486 - }, - { - "epoch": 0.2666055010799549, - "grad_norm": 0.005442338529974222, - "learning_rate": 0.0001999999651250103, - "loss": 46.0, - "step": 3487 - }, - { - "epoch": 0.2666819580633446, - "grad_norm": 0.0015704495599493384, - "learning_rate": 0.00019999996510494704, - "loss": 46.0, - "step": 3488 - }, - { - "epoch": 0.2667584150467343, - "grad_norm": 0.0008896812214516103, - "learning_rate": 0.00019999996508487803, - "loss": 46.0, - "step": 3489 - }, - { - "epoch": 0.26683487203012407, - "grad_norm": 0.000847585208248347, - "learning_rate": 0.00019999996506480318, - "loss": 46.0, - "step": 3490 - }, - { - "epoch": 0.2669113290135138, - "grad_norm": 0.0005499208928085864, - "learning_rate": 0.00019999996504472262, - "loss": 46.0, - "step": 3491 - }, - { - "epoch": 0.2669877859969035, - "grad_norm": 0.0010366961359977722, - "learning_rate": 0.00019999996502463626, - "loss": 46.0, - "step": 3492 - }, - { - "epoch": 0.2670642429802932, - "grad_norm": 0.0011850334703922272, - "learning_rate": 0.00019999996500454415, - "loss": 46.0, - "step": 3493 - }, - { - "epoch": 0.26714069996368295, - "grad_norm": 0.0008601018344052136, - "learning_rate": 0.00019999996498444624, - "loss": 46.0, - "step": 3494 - }, - { - "epoch": 0.26721715694707265, - "grad_norm": 0.0005690960679203272, - "learning_rate": 0.00019999996496434258, - "loss": 46.0, - "step": 3495 - }, - { - "epoch": 0.26729361393046236, - "grad_norm": 0.0008795912726782262, - "learning_rate": 0.00019999996494423315, - "loss": 46.0, - "step": 3496 - }, - { - "epoch": 0.2673700709138521, - "grad_norm": 0.0015382609562948346, - "learning_rate": 0.00019999996492411795, - "loss": 46.0, - "step": 3497 - }, - { - "epoch": 0.2674465278972418, - "grad_norm": 0.0005444943089969456, - "learning_rate": 0.00019999996490399697, - "loss": 46.0, - "step": 3498 - }, - { - "epoch": 0.26752298488063153, - "grad_norm": 0.008633997291326523, - "learning_rate": 0.00019999996488387025, - "loss": 46.0, - "step": 3499 - }, - { - "epoch": 0.26759944186402124, - "grad_norm": 0.0038967877626419067, - "learning_rate": 0.00019999996486373776, - "loss": 46.0, - "step": 3500 - }, - { - "epoch": 0.267675898847411, - "grad_norm": 0.0012343310518190265, - "learning_rate": 0.00019999996484359946, - "loss": 46.0, - "step": 3501 - }, - { - "epoch": 0.2677523558308007, - "grad_norm": 0.007553554605692625, - "learning_rate": 0.00019999996482345542, - "loss": 46.0, - "step": 3502 - }, - { - "epoch": 0.2678288128141904, - "grad_norm": 0.0018467012559995055, - "learning_rate": 0.0001999999648033056, - "loss": 46.0, - "step": 3503 - }, - { - "epoch": 0.2679052697975801, - "grad_norm": 0.000622952647972852, - "learning_rate": 0.00019999996478315, - "loss": 46.0, - "step": 3504 - }, - { - "epoch": 0.2679817267809699, - "grad_norm": 0.0019059015903621912, - "learning_rate": 0.00019999996476298865, - "loss": 46.0, - "step": 3505 - }, - { - "epoch": 0.2680581837643596, - "grad_norm": 0.000873995537403971, - "learning_rate": 0.00019999996474282154, - "loss": 46.0, - "step": 3506 - }, - { - "epoch": 0.2681346407477493, - "grad_norm": 0.004310471937060356, - "learning_rate": 0.00019999996472264863, - "loss": 46.0, - "step": 3507 - }, - { - "epoch": 0.268211097731139, - "grad_norm": 0.0005282750935293734, - "learning_rate": 0.00019999996470246997, - "loss": 46.0, - "step": 3508 - }, - { - "epoch": 0.26828755471452875, - "grad_norm": 0.0006494536646641791, - "learning_rate": 0.00019999996468228554, - "loss": 46.0, - "step": 3509 - }, - { - "epoch": 0.26836401169791846, - "grad_norm": 0.001211096765473485, - "learning_rate": 0.00019999996466209534, - "loss": 46.0, - "step": 3510 - }, - { - "epoch": 0.26844046868130816, - "grad_norm": 0.0005752411670982838, - "learning_rate": 0.0001999999646418994, - "loss": 46.0, - "step": 3511 - }, - { - "epoch": 0.2685169256646979, - "grad_norm": 0.0015640348428860307, - "learning_rate": 0.00019999996462169764, - "loss": 46.0, - "step": 3512 - }, - { - "epoch": 0.26859338264808763, - "grad_norm": 0.0029215794056653976, - "learning_rate": 0.00019999996460149012, - "loss": 46.0, - "step": 3513 - }, - { - "epoch": 0.26866983963147734, - "grad_norm": 0.0006118442979641259, - "learning_rate": 0.00019999996458127683, - "loss": 46.0, - "step": 3514 - }, - { - "epoch": 0.26874629661486704, - "grad_norm": 0.001395733910612762, - "learning_rate": 0.00019999996456105778, - "loss": 46.0, - "step": 3515 - }, - { - "epoch": 0.2688227535982568, - "grad_norm": 0.0012524726334959269, - "learning_rate": 0.00019999996454083297, - "loss": 46.0, - "step": 3516 - }, - { - "epoch": 0.2688992105816465, - "grad_norm": 0.001530499430373311, - "learning_rate": 0.00019999996452060238, - "loss": 46.0, - "step": 3517 - }, - { - "epoch": 0.2689756675650362, - "grad_norm": 0.001576151349581778, - "learning_rate": 0.00019999996450036602, - "loss": 46.0, - "step": 3518 - }, - { - "epoch": 0.2690521245484259, - "grad_norm": 0.0015910296933725476, - "learning_rate": 0.0001999999644801239, - "loss": 46.0, - "step": 3519 - }, - { - "epoch": 0.2691285815318157, - "grad_norm": 0.001185716944746673, - "learning_rate": 0.000199999964459876, - "loss": 46.0, - "step": 3520 - }, - { - "epoch": 0.2692050385152054, - "grad_norm": 0.0006350938929244876, - "learning_rate": 0.00019999996443962234, - "loss": 46.0, - "step": 3521 - }, - { - "epoch": 0.2692814954985951, - "grad_norm": 0.0006047820206731558, - "learning_rate": 0.0001999999644193629, - "loss": 46.0, - "step": 3522 - }, - { - "epoch": 0.2693579524819848, - "grad_norm": 0.0033772122114896774, - "learning_rate": 0.00019999996439909768, - "loss": 46.0, - "step": 3523 - }, - { - "epoch": 0.26943440946537456, - "grad_norm": 0.0004474870511330664, - "learning_rate": 0.0001999999643788267, - "loss": 46.0, - "step": 3524 - }, - { - "epoch": 0.26951086644876426, - "grad_norm": 0.0006406800821423531, - "learning_rate": 0.00019999996435854999, - "loss": 46.0, - "step": 3525 - }, - { - "epoch": 0.26958732343215397, - "grad_norm": 0.0016108149429783225, - "learning_rate": 0.00019999996433826746, - "loss": 46.0, - "step": 3526 - }, - { - "epoch": 0.26966378041554373, - "grad_norm": 0.0013555943733081222, - "learning_rate": 0.00019999996431797917, - "loss": 46.0, - "step": 3527 - }, - { - "epoch": 0.26974023739893344, - "grad_norm": 0.001304943347349763, - "learning_rate": 0.00019999996429768513, - "loss": 46.0, - "step": 3528 - }, - { - "epoch": 0.26981669438232314, - "grad_norm": 0.0015969887608662248, - "learning_rate": 0.0001999999642773853, - "loss": 46.0, - "step": 3529 - }, - { - "epoch": 0.26989315136571285, - "grad_norm": 0.0004311311640776694, - "learning_rate": 0.0001999999642570797, - "loss": 46.0, - "step": 3530 - }, - { - "epoch": 0.2699696083491026, - "grad_norm": 0.0009083780460059643, - "learning_rate": 0.00019999996423676833, - "loss": 46.0, - "step": 3531 - }, - { - "epoch": 0.2700460653324923, - "grad_norm": 0.0010997372446581721, - "learning_rate": 0.00019999996421645122, - "loss": 46.0, - "step": 3532 - }, - { - "epoch": 0.270122522315882, - "grad_norm": 0.0009625137900002301, - "learning_rate": 0.00019999996419612831, - "loss": 46.0, - "step": 3533 - }, - { - "epoch": 0.2701989792992717, - "grad_norm": 0.000788743665907532, - "learning_rate": 0.00019999996417579966, - "loss": 46.0, - "step": 3534 - }, - { - "epoch": 0.2702754362826615, - "grad_norm": 0.0027825627475976944, - "learning_rate": 0.0001999999641554652, - "loss": 46.0, - "step": 3535 - }, - { - "epoch": 0.2703518932660512, - "grad_norm": 0.001018956070765853, - "learning_rate": 0.00019999996413512497, - "loss": 46.0, - "step": 3536 - }, - { - "epoch": 0.2704283502494409, - "grad_norm": 0.012079456821084023, - "learning_rate": 0.00019999996411477903, - "loss": 46.0, - "step": 3537 - }, - { - "epoch": 0.2705048072328306, - "grad_norm": 0.0037696314975619316, - "learning_rate": 0.00019999996409442728, - "loss": 46.0, - "step": 3538 - }, - { - "epoch": 0.27058126421622036, - "grad_norm": 0.001351679558865726, - "learning_rate": 0.00019999996407406976, - "loss": 46.0, - "step": 3539 - }, - { - "epoch": 0.27065772119961007, - "grad_norm": 0.0011624229373410344, - "learning_rate": 0.00019999996405370646, - "loss": 46.0, - "step": 3540 - }, - { - "epoch": 0.2707341781829998, - "grad_norm": 0.0011623959289863706, - "learning_rate": 0.00019999996403333742, - "loss": 46.0, - "step": 3541 - }, - { - "epoch": 0.27081063516638954, - "grad_norm": 0.0009441355941817164, - "learning_rate": 0.0001999999640129626, - "loss": 46.0, - "step": 3542 - }, - { - "epoch": 0.27088709214977924, - "grad_norm": 0.001355852116830647, - "learning_rate": 0.000199999963992582, - "loss": 46.0, - "step": 3543 - }, - { - "epoch": 0.27096354913316895, - "grad_norm": 0.0017991584027186036, - "learning_rate": 0.00019999996397219563, - "loss": 46.0, - "step": 3544 - }, - { - "epoch": 0.27104000611655865, - "grad_norm": 0.001899454277008772, - "learning_rate": 0.0001999999639518035, - "loss": 46.0, - "step": 3545 - }, - { - "epoch": 0.2711164630999484, - "grad_norm": 0.0022068587131798267, - "learning_rate": 0.00019999996393140558, - "loss": 46.0, - "step": 3546 - }, - { - "epoch": 0.2711929200833381, - "grad_norm": 0.0008864528499543667, - "learning_rate": 0.0001999999639110019, - "loss": 46.0, - "step": 3547 - }, - { - "epoch": 0.2712693770667278, - "grad_norm": 0.000995575450360775, - "learning_rate": 0.00019999996389059247, - "loss": 46.0, - "step": 3548 - }, - { - "epoch": 0.27134583405011753, - "grad_norm": 0.0009679142385721207, - "learning_rate": 0.00019999996387017727, - "loss": 46.0, - "step": 3549 - }, - { - "epoch": 0.2714222910335073, - "grad_norm": 0.0004857187741436064, - "learning_rate": 0.00019999996384975627, - "loss": 46.0, - "step": 3550 - }, - { - "epoch": 0.271498748016897, - "grad_norm": 0.0008006109273992479, - "learning_rate": 0.00019999996382932955, - "loss": 46.0, - "step": 3551 - }, - { - "epoch": 0.2715752050002867, - "grad_norm": 0.0013027467066422105, - "learning_rate": 0.000199999963808897, - "loss": 46.0, - "step": 3552 - }, - { - "epoch": 0.2716516619836764, - "grad_norm": 0.0014260867610573769, - "learning_rate": 0.00019999996378845873, - "loss": 46.0, - "step": 3553 - }, - { - "epoch": 0.27172811896706617, - "grad_norm": 0.0011759180342778563, - "learning_rate": 0.0001999999637680147, - "loss": 46.0, - "step": 3554 - }, - { - "epoch": 0.2718045759504559, - "grad_norm": 0.0006172429420985281, - "learning_rate": 0.00019999996374756485, - "loss": 46.0, - "step": 3555 - }, - { - "epoch": 0.2718810329338456, - "grad_norm": 0.001046796445734799, - "learning_rate": 0.00019999996372710926, - "loss": 46.0, - "step": 3556 - }, - { - "epoch": 0.27195748991723534, - "grad_norm": 0.001194289536215365, - "learning_rate": 0.0001999999637066479, - "loss": 46.0, - "step": 3557 - }, - { - "epoch": 0.27203394690062505, - "grad_norm": 0.0023409014102071524, - "learning_rate": 0.00019999996368618073, - "loss": 46.0, - "step": 3558 - }, - { - "epoch": 0.27211040388401475, - "grad_norm": 0.004527931567281485, - "learning_rate": 0.00019999996366570785, - "loss": 46.0, - "step": 3559 - }, - { - "epoch": 0.27218686086740446, - "grad_norm": 0.0012273762840777636, - "learning_rate": 0.00019999996364522917, - "loss": 46.0, - "step": 3560 - }, - { - "epoch": 0.2722633178507942, - "grad_norm": 0.0007445935043506324, - "learning_rate": 0.00019999996362474472, - "loss": 46.0, - "step": 3561 - }, - { - "epoch": 0.2723397748341839, - "grad_norm": 0.0007178076775744557, - "learning_rate": 0.00019999996360425452, - "loss": 46.0, - "step": 3562 - }, - { - "epoch": 0.27241623181757363, - "grad_norm": 0.0018861166900023818, - "learning_rate": 0.00019999996358375851, - "loss": 46.0, - "step": 3563 - }, - { - "epoch": 0.27249268880096333, - "grad_norm": 0.0005265391082502902, - "learning_rate": 0.0001999999635632568, - "loss": 46.0, - "step": 3564 - }, - { - "epoch": 0.2725691457843531, - "grad_norm": 0.0013579848455265164, - "learning_rate": 0.00019999996354274924, - "loss": 46.0, - "step": 3565 - }, - { - "epoch": 0.2726456027677428, - "grad_norm": 0.0013015683507546782, - "learning_rate": 0.00019999996352223595, - "loss": 46.0, - "step": 3566 - }, - { - "epoch": 0.2727220597511325, - "grad_norm": 0.0014005533885210752, - "learning_rate": 0.0001999999635017169, - "loss": 46.0, - "step": 3567 - }, - { - "epoch": 0.2727985167345222, - "grad_norm": 0.0016418549930676818, - "learning_rate": 0.00019999996348119207, - "loss": 46.0, - "step": 3568 - }, - { - "epoch": 0.272874973717912, - "grad_norm": 0.0022029762621968985, - "learning_rate": 0.00019999996346066148, - "loss": 46.0, - "step": 3569 - }, - { - "epoch": 0.2729514307013017, - "grad_norm": 0.0007229609764181077, - "learning_rate": 0.00019999996344012512, - "loss": 46.0, - "step": 3570 - }, - { - "epoch": 0.2730278876846914, - "grad_norm": 0.0007221169653348625, - "learning_rate": 0.00019999996341958296, - "loss": 46.0, - "step": 3571 - }, - { - "epoch": 0.27310434466808114, - "grad_norm": 0.003370679449290037, - "learning_rate": 0.00019999996339903505, - "loss": 46.0, - "step": 3572 - }, - { - "epoch": 0.27318080165147085, - "grad_norm": 0.001101373927667737, - "learning_rate": 0.00019999996337848137, - "loss": 46.0, - "step": 3573 - }, - { - "epoch": 0.27325725863486056, - "grad_norm": 0.0007499827188439667, - "learning_rate": 0.00019999996335792194, - "loss": 46.0, - "step": 3574 - }, - { - "epoch": 0.27333371561825026, - "grad_norm": 0.0012965545756742358, - "learning_rate": 0.0001999999633373567, - "loss": 46.0, - "step": 3575 - }, - { - "epoch": 0.27341017260164, - "grad_norm": 0.0003533526905812323, - "learning_rate": 0.0001999999633167857, - "loss": 46.0, - "step": 3576 - }, - { - "epoch": 0.27348662958502973, - "grad_norm": 0.001247638720087707, - "learning_rate": 0.00019999996329620896, - "loss": 46.0, - "step": 3577 - }, - { - "epoch": 0.27356308656841943, - "grad_norm": 0.001336206216365099, - "learning_rate": 0.0001999999632756264, - "loss": 46.0, - "step": 3578 - }, - { - "epoch": 0.27363954355180914, - "grad_norm": 0.0005127611220814288, - "learning_rate": 0.00019999996325503815, - "loss": 46.0, - "step": 3579 - }, - { - "epoch": 0.2737160005351989, - "grad_norm": 0.0018877009861171246, - "learning_rate": 0.00019999996323444408, - "loss": 46.0, - "step": 3580 - }, - { - "epoch": 0.2737924575185886, - "grad_norm": 0.0008929924224503338, - "learning_rate": 0.00019999996321384424, - "loss": 46.0, - "step": 3581 - }, - { - "epoch": 0.2738689145019783, - "grad_norm": 0.0012983556371182203, - "learning_rate": 0.00019999996319323865, - "loss": 46.0, - "step": 3582 - }, - { - "epoch": 0.27394537148536807, - "grad_norm": 0.0039312588050961494, - "learning_rate": 0.00019999996317262726, - "loss": 46.0, - "step": 3583 - }, - { - "epoch": 0.2740218284687578, - "grad_norm": 0.002769493730738759, - "learning_rate": 0.00019999996315201013, - "loss": 46.0, - "step": 3584 - }, - { - "epoch": 0.2740982854521475, - "grad_norm": 0.0006664571701548994, - "learning_rate": 0.00019999996313138722, - "loss": 46.0, - "step": 3585 - }, - { - "epoch": 0.2741747424355372, - "grad_norm": 0.0011385715333744884, - "learning_rate": 0.00019999996311075854, - "loss": 46.0, - "step": 3586 - }, - { - "epoch": 0.27425119941892695, - "grad_norm": 0.002503304975107312, - "learning_rate": 0.0001999999630901241, - "loss": 46.0, - "step": 3587 - }, - { - "epoch": 0.27432765640231666, - "grad_norm": 0.00046438383287750185, - "learning_rate": 0.00019999996306948386, - "loss": 46.0, - "step": 3588 - }, - { - "epoch": 0.27440411338570636, - "grad_norm": 0.0008705140207894146, - "learning_rate": 0.00019999996304883788, - "loss": 46.0, - "step": 3589 - }, - { - "epoch": 0.27448057036909607, - "grad_norm": 0.0026737467851489782, - "learning_rate": 0.00019999996302818613, - "loss": 46.0, - "step": 3590 - }, - { - "epoch": 0.2745570273524858, - "grad_norm": 0.002014830010011792, - "learning_rate": 0.00019999996300752859, - "loss": 46.0, - "step": 3591 - }, - { - "epoch": 0.27463348433587553, - "grad_norm": 0.0014765787636861205, - "learning_rate": 0.0001999999629868653, - "loss": 46.0, - "step": 3592 - }, - { - "epoch": 0.27470994131926524, - "grad_norm": 0.0020565094891935587, - "learning_rate": 0.00019999996296619622, - "loss": 46.0, - "step": 3593 - }, - { - "epoch": 0.27478639830265494, - "grad_norm": 0.0014105496229603887, - "learning_rate": 0.00019999996294552138, - "loss": 46.0, - "step": 3594 - }, - { - "epoch": 0.2748628552860447, - "grad_norm": 0.0010734430979937315, - "learning_rate": 0.0001999999629248408, - "loss": 46.0, - "step": 3595 - }, - { - "epoch": 0.2749393122694344, - "grad_norm": 0.0009878509445115924, - "learning_rate": 0.0001999999629041544, - "loss": 46.0, - "step": 3596 - }, - { - "epoch": 0.2750157692528241, - "grad_norm": 0.002685703570023179, - "learning_rate": 0.00019999996288346225, - "loss": 46.0, - "step": 3597 - }, - { - "epoch": 0.2750922262362139, - "grad_norm": 0.0020470006857067347, - "learning_rate": 0.00019999996286276434, - "loss": 46.0, - "step": 3598 - }, - { - "epoch": 0.2751686832196036, - "grad_norm": 0.0010294730309396982, - "learning_rate": 0.00019999996284206066, - "loss": 46.0, - "step": 3599 - }, - { - "epoch": 0.2752451402029933, - "grad_norm": 0.0009523346088826656, - "learning_rate": 0.00019999996282135123, - "loss": 46.0, - "step": 3600 - }, - { - "epoch": 0.275321597186383, - "grad_norm": 0.0004560293455142528, - "learning_rate": 0.00019999996280063598, - "loss": 46.0, - "step": 3601 - }, - { - "epoch": 0.27539805416977275, - "grad_norm": 0.005681591108441353, - "learning_rate": 0.000199999962779915, - "loss": 46.0, - "step": 3602 - }, - { - "epoch": 0.27547451115316246, - "grad_norm": 0.0024480780120939016, - "learning_rate": 0.00019999996275918823, - "loss": 46.0, - "step": 3603 - }, - { - "epoch": 0.27555096813655217, - "grad_norm": 0.0016501410864293575, - "learning_rate": 0.0001999999627384557, - "loss": 46.0, - "step": 3604 - }, - { - "epoch": 0.27562742511994187, - "grad_norm": 0.001211164053529501, - "learning_rate": 0.00019999996271771742, - "loss": 46.0, - "step": 3605 - }, - { - "epoch": 0.27570388210333163, - "grad_norm": 0.000644101994112134, - "learning_rate": 0.00019999996269697335, - "loss": 46.0, - "step": 3606 - }, - { - "epoch": 0.27578033908672134, - "grad_norm": 0.0010957722552120686, - "learning_rate": 0.0001999999626762235, - "loss": 46.0, - "step": 3607 - }, - { - "epoch": 0.27585679607011104, - "grad_norm": 0.0007161587127484381, - "learning_rate": 0.00019999996265546787, - "loss": 46.0, - "step": 3608 - }, - { - "epoch": 0.27593325305350075, - "grad_norm": 0.003564264392480254, - "learning_rate": 0.0001999999626347065, - "loss": 46.0, - "step": 3609 - }, - { - "epoch": 0.2760097100368905, - "grad_norm": 0.0006503176991827786, - "learning_rate": 0.00019999996261393937, - "loss": 46.0, - "step": 3610 - }, - { - "epoch": 0.2760861670202802, - "grad_norm": 0.001754163415171206, - "learning_rate": 0.00019999996259316644, - "loss": 46.0, - "step": 3611 - }, - { - "epoch": 0.2761626240036699, - "grad_norm": 0.001982693327590823, - "learning_rate": 0.00019999996257238776, - "loss": 46.0, - "step": 3612 - }, - { - "epoch": 0.2762390809870597, - "grad_norm": 0.0016400435706600547, - "learning_rate": 0.0001999999625516033, - "loss": 46.0, - "step": 3613 - }, - { - "epoch": 0.2763155379704494, - "grad_norm": 0.00039631876279599965, - "learning_rate": 0.00019999996253081308, - "loss": 46.0, - "step": 3614 - }, - { - "epoch": 0.2763919949538391, - "grad_norm": 0.001241436111740768, - "learning_rate": 0.00019999996251001708, - "loss": 46.0, - "step": 3615 - }, - { - "epoch": 0.2764684519372288, - "grad_norm": 0.0005651030223816633, - "learning_rate": 0.0001999999624892153, - "loss": 46.0, - "step": 3616 - }, - { - "epoch": 0.27654490892061856, - "grad_norm": 0.001037145615555346, - "learning_rate": 0.00019999996246840778, - "loss": 46.0, - "step": 3617 - }, - { - "epoch": 0.27662136590400827, - "grad_norm": 0.0009783509885892272, - "learning_rate": 0.00019999996244759446, - "loss": 46.0, - "step": 3618 - }, - { - "epoch": 0.27669782288739797, - "grad_norm": 0.0007942967931739986, - "learning_rate": 0.00019999996242677542, - "loss": 46.0, - "step": 3619 - }, - { - "epoch": 0.2767742798707877, - "grad_norm": 0.0028216196224093437, - "learning_rate": 0.00019999996240595058, - "loss": 46.0, - "step": 3620 - }, - { - "epoch": 0.27685073685417744, - "grad_norm": 0.00045819449587725103, - "learning_rate": 0.00019999996238511997, - "loss": 46.0, - "step": 3621 - }, - { - "epoch": 0.27692719383756714, - "grad_norm": 0.0014861531089991331, - "learning_rate": 0.00019999996236428358, - "loss": 46.0, - "step": 3622 - }, - { - "epoch": 0.27700365082095685, - "grad_norm": 0.0012091546086594462, - "learning_rate": 0.00019999996234344142, - "loss": 46.0, - "step": 3623 - }, - { - "epoch": 0.27708010780434655, - "grad_norm": 0.0023589839693158865, - "learning_rate": 0.0001999999623225935, - "loss": 46.0, - "step": 3624 - }, - { - "epoch": 0.2771565647877363, - "grad_norm": 0.0011700313771143556, - "learning_rate": 0.0001999999623017398, - "loss": 46.0, - "step": 3625 - }, - { - "epoch": 0.277233021771126, - "grad_norm": 0.0021366956643760204, - "learning_rate": 0.00019999996228088035, - "loss": 46.0, - "step": 3626 - }, - { - "epoch": 0.2773094787545157, - "grad_norm": 0.0006147163221612573, - "learning_rate": 0.00019999996226001513, - "loss": 46.0, - "step": 3627 - }, - { - "epoch": 0.2773859357379055, - "grad_norm": 0.0011000499362125993, - "learning_rate": 0.00019999996223914413, - "loss": 46.0, - "step": 3628 - }, - { - "epoch": 0.2774623927212952, - "grad_norm": 0.0024103980977088213, - "learning_rate": 0.00019999996221826735, - "loss": 46.0, - "step": 3629 - }, - { - "epoch": 0.2775388497046849, - "grad_norm": 0.0010705257300287485, - "learning_rate": 0.00019999996219738483, - "loss": 46.0, - "step": 3630 - }, - { - "epoch": 0.2776153066880746, - "grad_norm": 0.005650050472468138, - "learning_rate": 0.0001999999621764965, - "loss": 46.0, - "step": 3631 - }, - { - "epoch": 0.27769176367146436, - "grad_norm": 0.00201904377900064, - "learning_rate": 0.00019999996215560244, - "loss": 46.0, - "step": 3632 - }, - { - "epoch": 0.27776822065485407, - "grad_norm": 0.0003853754315059632, - "learning_rate": 0.0001999999621347026, - "loss": 46.0, - "step": 3633 - }, - { - "epoch": 0.2778446776382438, - "grad_norm": 0.004631387535482645, - "learning_rate": 0.000199999962113797, - "loss": 46.0, - "step": 3634 - }, - { - "epoch": 0.2779211346216335, - "grad_norm": 0.000928018765989691, - "learning_rate": 0.00019999996209288558, - "loss": 46.0, - "step": 3635 - }, - { - "epoch": 0.27799759160502324, - "grad_norm": 0.0008019008091650903, - "learning_rate": 0.00019999996207196844, - "loss": 46.0, - "step": 3636 - }, - { - "epoch": 0.27807404858841295, - "grad_norm": 0.0010388718219473958, - "learning_rate": 0.0001999999620510455, - "loss": 46.0, - "step": 3637 - }, - { - "epoch": 0.27815050557180265, - "grad_norm": 0.0015405297745019197, - "learning_rate": 0.0001999999620301168, - "loss": 46.0, - "step": 3638 - }, - { - "epoch": 0.27822696255519236, - "grad_norm": 0.004980092402547598, - "learning_rate": 0.00019999996200918235, - "loss": 46.0, - "step": 3639 - }, - { - "epoch": 0.2783034195385821, - "grad_norm": 0.0022869580425322056, - "learning_rate": 0.00019999996198824212, - "loss": 46.0, - "step": 3640 - }, - { - "epoch": 0.2783798765219718, - "grad_norm": 0.0013629082823172212, - "learning_rate": 0.00019999996196729612, - "loss": 46.0, - "step": 3641 - }, - { - "epoch": 0.27845633350536153, - "grad_norm": 0.0009925602935254574, - "learning_rate": 0.00019999996194634435, - "loss": 46.0, - "step": 3642 - }, - { - "epoch": 0.2785327904887513, - "grad_norm": 0.001760657411068678, - "learning_rate": 0.0001999999619253868, - "loss": 46.0, - "step": 3643 - }, - { - "epoch": 0.278609247472141, - "grad_norm": 0.0014615763211622834, - "learning_rate": 0.0001999999619044235, - "loss": 46.0, - "step": 3644 - }, - { - "epoch": 0.2786857044555307, - "grad_norm": 0.00123655388597399, - "learning_rate": 0.00019999996188345445, - "loss": 46.0, - "step": 3645 - }, - { - "epoch": 0.2787621614389204, - "grad_norm": 0.0007111479062587023, - "learning_rate": 0.00019999996186247958, - "loss": 46.0, - "step": 3646 - }, - { - "epoch": 0.27883861842231017, - "grad_norm": 0.0012057044077664614, - "learning_rate": 0.000199999961841499, - "loss": 46.0, - "step": 3647 - }, - { - "epoch": 0.2789150754056999, - "grad_norm": 0.0017454746412113309, - "learning_rate": 0.00019999996182051258, - "loss": 46.0, - "step": 3648 - }, - { - "epoch": 0.2789915323890896, - "grad_norm": 0.0014446472050622106, - "learning_rate": 0.00019999996179952042, - "loss": 46.0, - "step": 3649 - }, - { - "epoch": 0.2790679893724793, - "grad_norm": 0.0013323846505954862, - "learning_rate": 0.0001999999617785225, - "loss": 46.0, - "step": 3650 - }, - { - "epoch": 0.27914444635586905, - "grad_norm": 0.0004912894801236689, - "learning_rate": 0.0001999999617575188, - "loss": 46.0, - "step": 3651 - }, - { - "epoch": 0.27922090333925875, - "grad_norm": 0.0013517895713448524, - "learning_rate": 0.00019999996173650935, - "loss": 46.0, - "step": 3652 - }, - { - "epoch": 0.27929736032264846, - "grad_norm": 0.0011540567502379417, - "learning_rate": 0.0001999999617154941, - "loss": 46.0, - "step": 3653 - }, - { - "epoch": 0.27937381730603816, - "grad_norm": 0.001616124645806849, - "learning_rate": 0.0001999999616944731, - "loss": 46.0, - "step": 3654 - }, - { - "epoch": 0.2794502742894279, - "grad_norm": 0.001074964297004044, - "learning_rate": 0.00019999996167344635, - "loss": 46.0, - "step": 3655 - }, - { - "epoch": 0.27952673127281763, - "grad_norm": 0.0008659610757604241, - "learning_rate": 0.00019999996165241378, - "loss": 46.0, - "step": 3656 - }, - { - "epoch": 0.27960318825620734, - "grad_norm": 0.008615114726126194, - "learning_rate": 0.00019999996163137546, - "loss": 46.0, - "step": 3657 - }, - { - "epoch": 0.2796796452395971, - "grad_norm": 0.0010578272631391883, - "learning_rate": 0.00019999996161033142, - "loss": 46.0, - "step": 3658 - }, - { - "epoch": 0.2797561022229868, - "grad_norm": 0.0016543472884222865, - "learning_rate": 0.00019999996158928153, - "loss": 46.0, - "step": 3659 - }, - { - "epoch": 0.2798325592063765, - "grad_norm": 0.0014690675307065248, - "learning_rate": 0.00019999996156822594, - "loss": 46.0, - "step": 3660 - }, - { - "epoch": 0.2799090161897662, - "grad_norm": 0.002100287936627865, - "learning_rate": 0.00019999996154716453, - "loss": 46.0, - "step": 3661 - }, - { - "epoch": 0.279985473173156, - "grad_norm": 0.0028917405288666487, - "learning_rate": 0.00019999996152609737, - "loss": 46.0, - "step": 3662 - }, - { - "epoch": 0.2800619301565457, - "grad_norm": 0.001529567874968052, - "learning_rate": 0.00019999996150502444, - "loss": 46.0, - "step": 3663 - }, - { - "epoch": 0.2801383871399354, - "grad_norm": 0.0008070769836194813, - "learning_rate": 0.00019999996148394576, - "loss": 46.0, - "step": 3664 - }, - { - "epoch": 0.2802148441233251, - "grad_norm": 0.0013607712462544441, - "learning_rate": 0.00019999996146286128, - "loss": 46.0, - "step": 3665 - }, - { - "epoch": 0.28029130110671485, - "grad_norm": 0.000600371859036386, - "learning_rate": 0.00019999996144177105, - "loss": 46.0, - "step": 3666 - }, - { - "epoch": 0.28036775809010456, - "grad_norm": 0.0009783091954886913, - "learning_rate": 0.00019999996142067503, - "loss": 46.0, - "step": 3667 - }, - { - "epoch": 0.28044421507349426, - "grad_norm": 0.0015447192126885056, - "learning_rate": 0.00019999996139957325, - "loss": 46.0, - "step": 3668 - }, - { - "epoch": 0.28052067205688397, - "grad_norm": 0.0018628071993589401, - "learning_rate": 0.00019999996137846573, - "loss": 46.0, - "step": 3669 - }, - { - "epoch": 0.28059712904027373, - "grad_norm": 0.0016895532608032227, - "learning_rate": 0.0001999999613573524, - "loss": 46.0, - "step": 3670 - }, - { - "epoch": 0.28067358602366344, - "grad_norm": 0.0005364527460187674, - "learning_rate": 0.00019999996133623332, - "loss": 46.0, - "step": 3671 - }, - { - "epoch": 0.28075004300705314, - "grad_norm": 0.001170719158835709, - "learning_rate": 0.00019999996131510846, - "loss": 46.0, - "step": 3672 - }, - { - "epoch": 0.2808264999904429, - "grad_norm": 0.0016270236810669303, - "learning_rate": 0.00019999996129397784, - "loss": 46.0, - "step": 3673 - }, - { - "epoch": 0.2809029569738326, - "grad_norm": 0.005730601958930492, - "learning_rate": 0.00019999996127284146, - "loss": 46.0, - "step": 3674 - }, - { - "epoch": 0.2809794139572223, - "grad_norm": 0.0019150610314682126, - "learning_rate": 0.0001999999612516993, - "loss": 46.0, - "step": 3675 - }, - { - "epoch": 0.281055870940612, - "grad_norm": 0.0022221652325242758, - "learning_rate": 0.00019999996123055137, - "loss": 46.0, - "step": 3676 - }, - { - "epoch": 0.2811323279240018, - "grad_norm": 0.0014204467879608274, - "learning_rate": 0.00019999996120939766, - "loss": 46.0, - "step": 3677 - }, - { - "epoch": 0.2812087849073915, - "grad_norm": 0.0008353561861440539, - "learning_rate": 0.00019999996118823818, - "loss": 46.0, - "step": 3678 - }, - { - "epoch": 0.2812852418907812, - "grad_norm": 0.0007339682779274881, - "learning_rate": 0.00019999996116707296, - "loss": 46.0, - "step": 3679 - }, - { - "epoch": 0.2813616988741709, - "grad_norm": 0.003266470041126013, - "learning_rate": 0.00019999996114590193, - "loss": 46.0, - "step": 3680 - }, - { - "epoch": 0.28143815585756066, - "grad_norm": 0.0012170193949714303, - "learning_rate": 0.00019999996112472516, - "loss": 46.0, - "step": 3681 - }, - { - "epoch": 0.28151461284095036, - "grad_norm": 0.0014374416787177324, - "learning_rate": 0.0001999999611035426, - "loss": 46.0, - "step": 3682 - }, - { - "epoch": 0.28159106982434007, - "grad_norm": 0.0007415685686282814, - "learning_rate": 0.0001999999610823543, - "loss": 46.0, - "step": 3683 - }, - { - "epoch": 0.2816675268077298, - "grad_norm": 0.001207210822030902, - "learning_rate": 0.0001999999610611602, - "loss": 46.0, - "step": 3684 - }, - { - "epoch": 0.28174398379111953, - "grad_norm": 0.0009158743196167052, - "learning_rate": 0.00019999996103996036, - "loss": 46.0, - "step": 3685 - }, - { - "epoch": 0.28182044077450924, - "grad_norm": 0.003506585257127881, - "learning_rate": 0.00019999996101875472, - "loss": 46.0, - "step": 3686 - }, - { - "epoch": 0.28189689775789895, - "grad_norm": 0.0006057792343199253, - "learning_rate": 0.00019999996099754334, - "loss": 46.0, - "step": 3687 - }, - { - "epoch": 0.2819733547412887, - "grad_norm": 0.0011886082356795669, - "learning_rate": 0.00019999996097632615, - "loss": 46.0, - "step": 3688 - }, - { - "epoch": 0.2820498117246784, - "grad_norm": 0.001971777528524399, - "learning_rate": 0.00019999996095510322, - "loss": 46.0, - "step": 3689 - }, - { - "epoch": 0.2821262687080681, - "grad_norm": 0.0007664765580557287, - "learning_rate": 0.0001999999609338745, - "loss": 46.0, - "step": 3690 - }, - { - "epoch": 0.2822027256914578, - "grad_norm": 0.005547497887164354, - "learning_rate": 0.00019999996091264003, - "loss": 46.0, - "step": 3691 - }, - { - "epoch": 0.2822791826748476, - "grad_norm": 0.000978269032202661, - "learning_rate": 0.0001999999608913998, - "loss": 46.0, - "step": 3692 - }, - { - "epoch": 0.2823556396582373, - "grad_norm": 0.0008185630431398749, - "learning_rate": 0.0001999999608701538, - "loss": 46.0, - "step": 3693 - }, - { - "epoch": 0.282432096641627, - "grad_norm": 0.00230449833907187, - "learning_rate": 0.000199999960848902, - "loss": 46.0, - "step": 3694 - }, - { - "epoch": 0.2825085536250167, - "grad_norm": 0.0020101654808968306, - "learning_rate": 0.00019999996082764446, - "loss": 46.0, - "step": 3695 - }, - { - "epoch": 0.28258501060840646, - "grad_norm": 0.0027369833551347256, - "learning_rate": 0.00019999996080638112, - "loss": 46.0, - "step": 3696 - }, - { - "epoch": 0.28266146759179617, - "grad_norm": 0.0007445720839314163, - "learning_rate": 0.00019999996078511205, - "loss": 46.0, - "step": 3697 - }, - { - "epoch": 0.2827379245751859, - "grad_norm": 0.0030912559013813734, - "learning_rate": 0.00019999996076383722, - "loss": 46.0, - "step": 3698 - }, - { - "epoch": 0.28281438155857563, - "grad_norm": 0.0008372279116883874, - "learning_rate": 0.00019999996074255655, - "loss": 46.0, - "step": 3699 - }, - { - "epoch": 0.28289083854196534, - "grad_norm": 0.002350414404645562, - "learning_rate": 0.00019999996072127016, - "loss": 46.0, - "step": 3700 - }, - { - "epoch": 0.28296729552535504, - "grad_norm": 0.0030697931069880724, - "learning_rate": 0.000199999960699978, - "loss": 46.0, - "step": 3701 - }, - { - "epoch": 0.28304375250874475, - "grad_norm": 0.0010113042080774903, - "learning_rate": 0.00019999996067868007, - "loss": 46.0, - "step": 3702 - }, - { - "epoch": 0.2831202094921345, - "grad_norm": 0.0004754991678055376, - "learning_rate": 0.00019999996065737637, - "loss": 46.0, - "step": 3703 - }, - { - "epoch": 0.2831966664755242, - "grad_norm": 0.003134047845378518, - "learning_rate": 0.00019999996063606686, - "loss": 46.0, - "step": 3704 - }, - { - "epoch": 0.2832731234589139, - "grad_norm": 0.0007566006970591843, - "learning_rate": 0.00019999996061475164, - "loss": 46.0, - "step": 3705 - }, - { - "epoch": 0.28334958044230363, - "grad_norm": 0.0005881906836293638, - "learning_rate": 0.0001999999605934306, - "loss": 46.0, - "step": 3706 - }, - { - "epoch": 0.2834260374256934, - "grad_norm": 0.0016896775923669338, - "learning_rate": 0.0001999999605721038, - "loss": 46.0, - "step": 3707 - }, - { - "epoch": 0.2835024944090831, - "grad_norm": 0.0010982779785990715, - "learning_rate": 0.00019999996055077127, - "loss": 46.0, - "step": 3708 - }, - { - "epoch": 0.2835789513924728, - "grad_norm": 0.0012666484108194709, - "learning_rate": 0.00019999996052943295, - "loss": 46.0, - "step": 3709 - }, - { - "epoch": 0.2836554083758625, - "grad_norm": 0.0011422195238992572, - "learning_rate": 0.00019999996050808886, - "loss": 46.0, - "step": 3710 - }, - { - "epoch": 0.28373186535925227, - "grad_norm": 0.0021781160030514, - "learning_rate": 0.000199999960486739, - "loss": 46.0, - "step": 3711 - }, - { - "epoch": 0.28380832234264197, - "grad_norm": 0.0008051304612308741, - "learning_rate": 0.00019999996046538335, - "loss": 46.0, - "step": 3712 - }, - { - "epoch": 0.2838847793260317, - "grad_norm": 0.0008812343585304916, - "learning_rate": 0.00019999996044402197, - "loss": 46.0, - "step": 3713 - }, - { - "epoch": 0.28396123630942144, - "grad_norm": 0.0035376292653381824, - "learning_rate": 0.00019999996042265478, - "loss": 46.0, - "step": 3714 - }, - { - "epoch": 0.28403769329281114, - "grad_norm": 0.0013234912184998393, - "learning_rate": 0.00019999996040128185, - "loss": 46.0, - "step": 3715 - }, - { - "epoch": 0.28411415027620085, - "grad_norm": 0.0006036676932126284, - "learning_rate": 0.00019999996037990317, - "loss": 46.0, - "step": 3716 - }, - { - "epoch": 0.28419060725959056, - "grad_norm": 0.0009874098468571901, - "learning_rate": 0.00019999996035851867, - "loss": 46.0, - "step": 3717 - }, - { - "epoch": 0.2842670642429803, - "grad_norm": 0.0012311360333114862, - "learning_rate": 0.00019999996033712842, - "loss": 46.0, - "step": 3718 - }, - { - "epoch": 0.28434352122637, - "grad_norm": 0.0006533898995257914, - "learning_rate": 0.0001999999603157324, - "loss": 46.0, - "step": 3719 - }, - { - "epoch": 0.2844199782097597, - "grad_norm": 0.0005601924494840205, - "learning_rate": 0.00019999996029433062, - "loss": 46.0, - "step": 3720 - }, - { - "epoch": 0.28449643519314943, - "grad_norm": 0.0023518491070717573, - "learning_rate": 0.00019999996027292305, - "loss": 46.0, - "step": 3721 - }, - { - "epoch": 0.2845728921765392, - "grad_norm": 0.001143717672675848, - "learning_rate": 0.00019999996025150973, - "loss": 46.0, - "step": 3722 - }, - { - "epoch": 0.2846493491599289, - "grad_norm": 0.001057030283845961, - "learning_rate": 0.00019999996023009064, - "loss": 46.0, - "step": 3723 - }, - { - "epoch": 0.2847258061433186, - "grad_norm": 0.002260795794427395, - "learning_rate": 0.00019999996020866577, - "loss": 46.0, - "step": 3724 - }, - { - "epoch": 0.2848022631267083, - "grad_norm": 0.002682718215510249, - "learning_rate": 0.00019999996018723514, - "loss": 46.0, - "step": 3725 - }, - { - "epoch": 0.28487872011009807, - "grad_norm": 0.000765470031183213, - "learning_rate": 0.00019999996016579875, - "loss": 46.0, - "step": 3726 - }, - { - "epoch": 0.2849551770934878, - "grad_norm": 0.001148425741121173, - "learning_rate": 0.00019999996014435657, - "loss": 46.0, - "step": 3727 - }, - { - "epoch": 0.2850316340768775, - "grad_norm": 0.0007466459064744413, - "learning_rate": 0.0001999999601229086, - "loss": 46.0, - "step": 3728 - }, - { - "epoch": 0.28510809106026724, - "grad_norm": 0.0030335683841258287, - "learning_rate": 0.0001999999601014549, - "loss": 46.0, - "step": 3729 - }, - { - "epoch": 0.28518454804365695, - "grad_norm": 0.0022319993004202843, - "learning_rate": 0.00019999996007999542, - "loss": 46.0, - "step": 3730 - }, - { - "epoch": 0.28526100502704665, - "grad_norm": 0.0024950969964265823, - "learning_rate": 0.00019999996005853017, - "loss": 46.0, - "step": 3731 - }, - { - "epoch": 0.28533746201043636, - "grad_norm": 0.0009043251629918814, - "learning_rate": 0.00019999996003705915, - "loss": 46.0, - "step": 3732 - }, - { - "epoch": 0.2854139189938261, - "grad_norm": 0.0013740276917815208, - "learning_rate": 0.00019999996001558238, - "loss": 46.0, - "step": 3733 - }, - { - "epoch": 0.2854903759772158, - "grad_norm": 0.0011639823205769062, - "learning_rate": 0.0001999999599940998, - "loss": 46.0, - "step": 3734 - }, - { - "epoch": 0.28556683296060553, - "grad_norm": 0.012872766703367233, - "learning_rate": 0.00019999995997261146, - "loss": 46.0, - "step": 3735 - }, - { - "epoch": 0.28564328994399524, - "grad_norm": 0.0012254433240741491, - "learning_rate": 0.00019999995995111734, - "loss": 46.0, - "step": 3736 - }, - { - "epoch": 0.285719746927385, - "grad_norm": 0.0017625355394557118, - "learning_rate": 0.00019999995992961748, - "loss": 46.0, - "step": 3737 - }, - { - "epoch": 0.2857962039107747, - "grad_norm": 0.0016765417531132698, - "learning_rate": 0.00019999995990811187, - "loss": 46.0, - "step": 3738 - }, - { - "epoch": 0.2858726608941644, - "grad_norm": 0.0004553665639832616, - "learning_rate": 0.00019999995988660046, - "loss": 46.0, - "step": 3739 - }, - { - "epoch": 0.2859491178775541, - "grad_norm": 0.0008050231263041496, - "learning_rate": 0.0001999999598650833, - "loss": 46.0, - "step": 3740 - }, - { - "epoch": 0.2860255748609439, - "grad_norm": 0.0011766463285312057, - "learning_rate": 0.0001999999598435603, - "loss": 46.0, - "step": 3741 - }, - { - "epoch": 0.2861020318443336, - "grad_norm": 0.000840594177134335, - "learning_rate": 0.00019999995982203164, - "loss": 46.0, - "step": 3742 - }, - { - "epoch": 0.2861784888277233, - "grad_norm": 0.0015770596219226718, - "learning_rate": 0.00019999995980049713, - "loss": 46.0, - "step": 3743 - }, - { - "epoch": 0.28625494581111305, - "grad_norm": 0.001588560058735311, - "learning_rate": 0.00019999995977895688, - "loss": 46.0, - "step": 3744 - }, - { - "epoch": 0.28633140279450275, - "grad_norm": 0.0010057406034320593, - "learning_rate": 0.00019999995975741085, - "loss": 46.0, - "step": 3745 - }, - { - "epoch": 0.28640785977789246, - "grad_norm": 0.0015824324218556285, - "learning_rate": 0.00019999995973585906, - "loss": 46.0, - "step": 3746 - }, - { - "epoch": 0.28648431676128217, - "grad_norm": 0.0011771401623263955, - "learning_rate": 0.00019999995971430149, - "loss": 46.0, - "step": 3747 - }, - { - "epoch": 0.2865607737446719, - "grad_norm": 0.0009981276234611869, - "learning_rate": 0.00019999995969273817, - "loss": 46.0, - "step": 3748 - }, - { - "epoch": 0.28663723072806163, - "grad_norm": 0.0010086316615343094, - "learning_rate": 0.00019999995967116908, - "loss": 46.0, - "step": 3749 - }, - { - "epoch": 0.28671368771145134, - "grad_norm": 0.0007743178866803646, - "learning_rate": 0.00019999995964959419, - "loss": 46.0, - "step": 3750 - }, - { - "epoch": 0.28679014469484104, - "grad_norm": 0.0016441509360447526, - "learning_rate": 0.00019999995962801355, - "loss": 46.0, - "step": 3751 - }, - { - "epoch": 0.2868666016782308, - "grad_norm": 0.0009750126046128571, - "learning_rate": 0.00019999995960642714, - "loss": 46.0, - "step": 3752 - }, - { - "epoch": 0.2869430586616205, - "grad_norm": 0.001294981804676354, - "learning_rate": 0.00019999995958483498, - "loss": 46.0, - "step": 3753 - }, - { - "epoch": 0.2870195156450102, - "grad_norm": 0.001622818293981254, - "learning_rate": 0.000199999959563237, - "loss": 46.0, - "step": 3754 - }, - { - "epoch": 0.2870959726283999, - "grad_norm": 0.0009695104090496898, - "learning_rate": 0.00019999995954163332, - "loss": 46.0, - "step": 3755 - }, - { - "epoch": 0.2871724296117897, - "grad_norm": 0.0012397788232192397, - "learning_rate": 0.0001999999595200238, - "loss": 46.0, - "step": 3756 - }, - { - "epoch": 0.2872488865951794, - "grad_norm": 0.0029442079830914736, - "learning_rate": 0.00019999995949840856, - "loss": 46.0, - "step": 3757 - }, - { - "epoch": 0.2873253435785691, - "grad_norm": 0.0020591500215232372, - "learning_rate": 0.00019999995947678754, - "loss": 46.0, - "step": 3758 - }, - { - "epoch": 0.28740180056195885, - "grad_norm": 0.0008922885172069073, - "learning_rate": 0.00019999995945516074, - "loss": 46.0, - "step": 3759 - }, - { - "epoch": 0.28747825754534856, - "grad_norm": 0.0014189706416800618, - "learning_rate": 0.00019999995943352817, - "loss": 46.0, - "step": 3760 - }, - { - "epoch": 0.28755471452873826, - "grad_norm": 0.0033019159454852343, - "learning_rate": 0.00019999995941188985, - "loss": 46.0, - "step": 3761 - }, - { - "epoch": 0.28763117151212797, - "grad_norm": 0.0008956018718890846, - "learning_rate": 0.00019999995939024573, - "loss": 46.0, - "step": 3762 - }, - { - "epoch": 0.28770762849551773, - "grad_norm": 0.0012777860974892974, - "learning_rate": 0.00019999995936859584, - "loss": 46.0, - "step": 3763 - }, - { - "epoch": 0.28778408547890744, - "grad_norm": 0.0033158992882817984, - "learning_rate": 0.0001999999593469402, - "loss": 46.0, - "step": 3764 - }, - { - "epoch": 0.28786054246229714, - "grad_norm": 0.000867457187268883, - "learning_rate": 0.0001999999593252788, - "loss": 46.0, - "step": 3765 - }, - { - "epoch": 0.28793699944568685, - "grad_norm": 0.0017554003279656172, - "learning_rate": 0.0001999999593036116, - "loss": 46.0, - "step": 3766 - }, - { - "epoch": 0.2880134564290766, - "grad_norm": 0.0006303522386588156, - "learning_rate": 0.00019999995928193865, - "loss": 46.0, - "step": 3767 - }, - { - "epoch": 0.2880899134124663, - "grad_norm": 0.0005204095505177975, - "learning_rate": 0.00019999995926025995, - "loss": 46.0, - "step": 3768 - }, - { - "epoch": 0.288166370395856, - "grad_norm": 0.0023115079384297132, - "learning_rate": 0.00019999995923857547, - "loss": 46.0, - "step": 3769 - }, - { - "epoch": 0.2882428273792457, - "grad_norm": 0.0011257268488407135, - "learning_rate": 0.0001999999592168852, - "loss": 46.0, - "step": 3770 - }, - { - "epoch": 0.2883192843626355, - "grad_norm": 0.0020739634055644274, - "learning_rate": 0.00019999995919518917, - "loss": 46.0, - "step": 3771 - }, - { - "epoch": 0.2883957413460252, - "grad_norm": 0.0014096315717324615, - "learning_rate": 0.00019999995917348735, - "loss": 46.0, - "step": 3772 - }, - { - "epoch": 0.2884721983294149, - "grad_norm": 0.0005879343370907009, - "learning_rate": 0.00019999995915177978, - "loss": 46.0, - "step": 3773 - }, - { - "epoch": 0.28854865531280466, - "grad_norm": 0.000697542738635093, - "learning_rate": 0.00019999995913006646, - "loss": 46.0, - "step": 3774 - }, - { - "epoch": 0.28862511229619436, - "grad_norm": 0.0005846460117027164, - "learning_rate": 0.00019999995910834734, - "loss": 46.0, - "step": 3775 - }, - { - "epoch": 0.28870156927958407, - "grad_norm": 0.0014586938777938485, - "learning_rate": 0.00019999995908662248, - "loss": 46.0, - "step": 3776 - }, - { - "epoch": 0.2887780262629738, - "grad_norm": 0.0016310411738231778, - "learning_rate": 0.00019999995906489181, - "loss": 46.0, - "step": 3777 - }, - { - "epoch": 0.28885448324636354, - "grad_norm": 0.0033955464605242014, - "learning_rate": 0.00019999995904315543, - "loss": 46.0, - "step": 3778 - }, - { - "epoch": 0.28893094022975324, - "grad_norm": 0.0020739377941936255, - "learning_rate": 0.00019999995902141322, - "loss": 46.0, - "step": 3779 - }, - { - "epoch": 0.28900739721314295, - "grad_norm": 0.003352381521835923, - "learning_rate": 0.0001999999589996653, - "loss": 46.0, - "step": 3780 - }, - { - "epoch": 0.28908385419653265, - "grad_norm": 0.002708357758820057, - "learning_rate": 0.00019999995897791156, - "loss": 46.0, - "step": 3781 - }, - { - "epoch": 0.2891603111799224, - "grad_norm": 0.0062057143077254295, - "learning_rate": 0.00019999995895615206, - "loss": 46.0, - "step": 3782 - }, - { - "epoch": 0.2892367681633121, - "grad_norm": 0.0014678441220894456, - "learning_rate": 0.00019999995893438678, - "loss": 46.0, - "step": 3783 - }, - { - "epoch": 0.2893132251467018, - "grad_norm": 0.0005883380654267967, - "learning_rate": 0.00019999995891261576, - "loss": 46.0, - "step": 3784 - }, - { - "epoch": 0.28938968213009153, - "grad_norm": 0.0006215718458406627, - "learning_rate": 0.00019999995889083896, - "loss": 46.0, - "step": 3785 - }, - { - "epoch": 0.2894661391134813, - "grad_norm": 0.0015106117352843285, - "learning_rate": 0.0001999999588690564, - "loss": 46.0, - "step": 3786 - }, - { - "epoch": 0.289542596096871, - "grad_norm": 0.0013974400935694575, - "learning_rate": 0.00019999995884726804, - "loss": 46.0, - "step": 3787 - }, - { - "epoch": 0.2896190530802607, - "grad_norm": 0.000646473781671375, - "learning_rate": 0.00019999995882547393, - "loss": 46.0, - "step": 3788 - }, - { - "epoch": 0.28969551006365046, - "grad_norm": 0.0031547234393656254, - "learning_rate": 0.00019999995880367406, - "loss": 46.0, - "step": 3789 - }, - { - "epoch": 0.28977196704704017, - "grad_norm": 0.0005789370043203235, - "learning_rate": 0.0001999999587818684, - "loss": 46.0, - "step": 3790 - }, - { - "epoch": 0.2898484240304299, - "grad_norm": 0.003012710716575384, - "learning_rate": 0.000199999958760057, - "loss": 46.0, - "step": 3791 - }, - { - "epoch": 0.2899248810138196, - "grad_norm": 0.0006698978831991553, - "learning_rate": 0.0001999999587382398, - "loss": 46.0, - "step": 3792 - }, - { - "epoch": 0.29000133799720934, - "grad_norm": 0.0009188276017084718, - "learning_rate": 0.00019999995871641682, - "loss": 46.0, - "step": 3793 - }, - { - "epoch": 0.29007779498059905, - "grad_norm": 0.0014205612242221832, - "learning_rate": 0.00019999995869458812, - "loss": 46.0, - "step": 3794 - }, - { - "epoch": 0.29015425196398875, - "grad_norm": 0.0007790869567543268, - "learning_rate": 0.00019999995867275362, - "loss": 46.0, - "step": 3795 - }, - { - "epoch": 0.29023070894737846, - "grad_norm": 0.00046087370719760656, - "learning_rate": 0.00019999995865091337, - "loss": 46.0, - "step": 3796 - }, - { - "epoch": 0.2903071659307682, - "grad_norm": 0.0009427714976482093, - "learning_rate": 0.00019999995862906734, - "loss": 46.0, - "step": 3797 - }, - { - "epoch": 0.2903836229141579, - "grad_norm": 0.0006847182521596551, - "learning_rate": 0.00019999995860721552, - "loss": 46.0, - "step": 3798 - }, - { - "epoch": 0.29046007989754763, - "grad_norm": 0.0008071879274211824, - "learning_rate": 0.00019999995858535795, - "loss": 46.0, - "step": 3799 - }, - { - "epoch": 0.29053653688093733, - "grad_norm": 0.0011825787369161844, - "learning_rate": 0.00019999995856349458, - "loss": 46.0, - "step": 3800 - }, - { - "epoch": 0.2906129938643271, - "grad_norm": 0.0016411119140684605, - "learning_rate": 0.0001999999585416255, - "loss": 46.0, - "step": 3801 - }, - { - "epoch": 0.2906894508477168, - "grad_norm": 0.0011062535922974348, - "learning_rate": 0.0001999999585197506, - "loss": 46.0, - "step": 3802 - }, - { - "epoch": 0.2907659078311065, - "grad_norm": 0.0021068945061415434, - "learning_rate": 0.00019999995849786996, - "loss": 46.0, - "step": 3803 - }, - { - "epoch": 0.29084236481449627, - "grad_norm": 0.004064976237714291, - "learning_rate": 0.00019999995847598353, - "loss": 46.0, - "step": 3804 - }, - { - "epoch": 0.290918821797886, - "grad_norm": 0.0008881333633325994, - "learning_rate": 0.00019999995845409134, - "loss": 46.0, - "step": 3805 - }, - { - "epoch": 0.2909952787812757, - "grad_norm": 0.0008308746037073433, - "learning_rate": 0.00019999995843219339, - "loss": 46.0, - "step": 3806 - }, - { - "epoch": 0.2910717357646654, - "grad_norm": 0.0015752946492284536, - "learning_rate": 0.00019999995841028966, - "loss": 46.0, - "step": 3807 - }, - { - "epoch": 0.29114819274805515, - "grad_norm": 0.0019235041690990329, - "learning_rate": 0.00019999995838838015, - "loss": 46.0, - "step": 3808 - }, - { - "epoch": 0.29122464973144485, - "grad_norm": 0.0009915533009916544, - "learning_rate": 0.00019999995836646488, - "loss": 46.0, - "step": 3809 - }, - { - "epoch": 0.29130110671483456, - "grad_norm": 0.004070097114890814, - "learning_rate": 0.00019999995834454385, - "loss": 46.0, - "step": 3810 - }, - { - "epoch": 0.29137756369822426, - "grad_norm": 0.0016343813622370362, - "learning_rate": 0.00019999995832261703, - "loss": 46.0, - "step": 3811 - }, - { - "epoch": 0.291454020681614, - "grad_norm": 0.0010043747024610639, - "learning_rate": 0.00019999995830068446, - "loss": 46.0, - "step": 3812 - }, - { - "epoch": 0.29153047766500373, - "grad_norm": 0.0012621752684935927, - "learning_rate": 0.00019999995827874612, - "loss": 46.0, - "step": 3813 - }, - { - "epoch": 0.29160693464839343, - "grad_norm": 0.00109544035512954, - "learning_rate": 0.000199999958256802, - "loss": 46.0, - "step": 3814 - }, - { - "epoch": 0.2916833916317832, - "grad_norm": 0.0036395767237991095, - "learning_rate": 0.0001999999582348521, - "loss": 46.0, - "step": 3815 - }, - { - "epoch": 0.2917598486151729, - "grad_norm": 0.001107160234823823, - "learning_rate": 0.00019999995821289645, - "loss": 46.0, - "step": 3816 - }, - { - "epoch": 0.2918363055985626, - "grad_norm": 0.0004128957516513765, - "learning_rate": 0.00019999995819093504, - "loss": 46.0, - "step": 3817 - }, - { - "epoch": 0.2919127625819523, - "grad_norm": 0.011029314249753952, - "learning_rate": 0.00019999995816896783, - "loss": 46.0, - "step": 3818 - }, - { - "epoch": 0.2919892195653421, - "grad_norm": 0.0036829912569373846, - "learning_rate": 0.00019999995814699487, - "loss": 46.0, - "step": 3819 - }, - { - "epoch": 0.2920656765487318, - "grad_norm": 0.0009113659616559744, - "learning_rate": 0.00019999995812501614, - "loss": 46.0, - "step": 3820 - }, - { - "epoch": 0.2921421335321215, - "grad_norm": 0.00145654974039644, - "learning_rate": 0.00019999995810303164, - "loss": 46.0, - "step": 3821 - }, - { - "epoch": 0.2922185905155112, - "grad_norm": 0.0010323075111955404, - "learning_rate": 0.00019999995808104136, - "loss": 46.0, - "step": 3822 - }, - { - "epoch": 0.29229504749890095, - "grad_norm": 0.0008147822809405625, - "learning_rate": 0.0001999999580590453, - "loss": 46.0, - "step": 3823 - }, - { - "epoch": 0.29237150448229066, - "grad_norm": 0.000815986655652523, - "learning_rate": 0.00019999995803704352, - "loss": 46.0, - "step": 3824 - }, - { - "epoch": 0.29244796146568036, - "grad_norm": 0.0014565408928319812, - "learning_rate": 0.00019999995801503592, - "loss": 46.0, - "step": 3825 - }, - { - "epoch": 0.29252441844907007, - "grad_norm": 0.0014035555068403482, - "learning_rate": 0.0001999999579930226, - "loss": 46.0, - "step": 3826 - }, - { - "epoch": 0.29260087543245983, - "grad_norm": 0.0025402100291103125, - "learning_rate": 0.00019999995797100346, - "loss": 46.0, - "step": 3827 - }, - { - "epoch": 0.29267733241584953, - "grad_norm": 0.0037015005946159363, - "learning_rate": 0.0001999999579489786, - "loss": 46.0, - "step": 3828 - }, - { - "epoch": 0.29275378939923924, - "grad_norm": 0.002406664425507188, - "learning_rate": 0.00019999995792694794, - "loss": 46.0, - "step": 3829 - }, - { - "epoch": 0.292830246382629, - "grad_norm": 0.0012523853220045567, - "learning_rate": 0.00019999995790491147, - "loss": 46.0, - "step": 3830 - }, - { - "epoch": 0.2929067033660187, - "grad_norm": 0.002196969697251916, - "learning_rate": 0.0001999999578828693, - "loss": 46.0, - "step": 3831 - }, - { - "epoch": 0.2929831603494084, - "grad_norm": 0.004046255256980658, - "learning_rate": 0.00019999995786082133, - "loss": 46.0, - "step": 3832 - }, - { - "epoch": 0.2930596173327981, - "grad_norm": 0.0005504401633515954, - "learning_rate": 0.0001999999578387676, - "loss": 46.0, - "step": 3833 - }, - { - "epoch": 0.2931360743161879, - "grad_norm": 0.001778360572643578, - "learning_rate": 0.00019999995781670808, - "loss": 46.0, - "step": 3834 - }, - { - "epoch": 0.2932125312995776, - "grad_norm": 0.0012232806766405702, - "learning_rate": 0.00019999995779464283, - "loss": 46.0, - "step": 3835 - }, - { - "epoch": 0.2932889882829673, - "grad_norm": 0.002202985342592001, - "learning_rate": 0.00019999995777257178, - "loss": 46.0, - "step": 3836 - }, - { - "epoch": 0.293365445266357, - "grad_norm": 0.0006656167097389698, - "learning_rate": 0.00019999995775049495, - "loss": 46.0, - "step": 3837 - }, - { - "epoch": 0.29344190224974676, - "grad_norm": 0.0006763612036593258, - "learning_rate": 0.00019999995772841239, - "loss": 46.0, - "step": 3838 - }, - { - "epoch": 0.29351835923313646, - "grad_norm": 0.0004571889585349709, - "learning_rate": 0.00019999995770632402, - "loss": 46.0, - "step": 3839 - }, - { - "epoch": 0.29359481621652617, - "grad_norm": 0.0016356800915673375, - "learning_rate": 0.00019999995768422993, - "loss": 46.0, - "step": 3840 - }, - { - "epoch": 0.29367127319991587, - "grad_norm": 0.0015060099540278316, - "learning_rate": 0.00019999995766213, - "loss": 46.0, - "step": 3841 - }, - { - "epoch": 0.29374773018330563, - "grad_norm": 0.003492431016638875, - "learning_rate": 0.00019999995764002435, - "loss": 46.0, - "step": 3842 - }, - { - "epoch": 0.29382418716669534, - "grad_norm": 0.0019442737102508545, - "learning_rate": 0.0001999999576179129, - "loss": 46.0, - "step": 3843 - }, - { - "epoch": 0.29390064415008504, - "grad_norm": 0.0009575521107763052, - "learning_rate": 0.00019999995759579573, - "loss": 46.0, - "step": 3844 - }, - { - "epoch": 0.2939771011334748, - "grad_norm": 0.00020072500046808273, - "learning_rate": 0.00019999995757367275, - "loss": 46.0, - "step": 3845 - }, - { - "epoch": 0.2940535581168645, - "grad_norm": 0.0019815845880657434, - "learning_rate": 0.00019999995755154402, - "loss": 46.0, - "step": 3846 - }, - { - "epoch": 0.2941300151002542, - "grad_norm": 0.0010517174378037453, - "learning_rate": 0.0001999999575294095, - "loss": 46.0, - "step": 3847 - }, - { - "epoch": 0.2942064720836439, - "grad_norm": 0.002135148737579584, - "learning_rate": 0.00019999995750726924, - "loss": 46.0, - "step": 3848 - }, - { - "epoch": 0.2942829290670337, - "grad_norm": 0.001201451406814158, - "learning_rate": 0.0001999999574851232, - "loss": 46.0, - "step": 3849 - }, - { - "epoch": 0.2943593860504234, - "grad_norm": 0.001454431563615799, - "learning_rate": 0.00019999995746297134, - "loss": 46.0, - "step": 3850 - }, - { - "epoch": 0.2944358430338131, - "grad_norm": 0.022603917866945267, - "learning_rate": 0.0001999999574408138, - "loss": 46.0, - "step": 3851 - }, - { - "epoch": 0.2945123000172028, - "grad_norm": 0.000887357338797301, - "learning_rate": 0.00019999995741865043, - "loss": 46.0, - "step": 3852 - }, - { - "epoch": 0.29458875700059256, - "grad_norm": 0.0008037868537940085, - "learning_rate": 0.0001999999573964813, - "loss": 46.0, - "step": 3853 - }, - { - "epoch": 0.29466521398398227, - "grad_norm": 0.002315834164619446, - "learning_rate": 0.00019999995737430643, - "loss": 46.0, - "step": 3854 - }, - { - "epoch": 0.29474167096737197, - "grad_norm": 0.0032458745408803225, - "learning_rate": 0.00019999995735212576, - "loss": 46.0, - "step": 3855 - }, - { - "epoch": 0.2948181279507617, - "grad_norm": 0.0006478840950876474, - "learning_rate": 0.00019999995732993933, - "loss": 46.0, - "step": 3856 - }, - { - "epoch": 0.29489458493415144, - "grad_norm": 0.012460929341614246, - "learning_rate": 0.00019999995730774712, - "loss": 46.0, - "step": 3857 - }, - { - "epoch": 0.29497104191754114, - "grad_norm": 0.0012602087808772922, - "learning_rate": 0.00019999995728554914, - "loss": 46.0, - "step": 3858 - }, - { - "epoch": 0.29504749890093085, - "grad_norm": 0.008179609663784504, - "learning_rate": 0.00019999995726334538, - "loss": 46.0, - "step": 3859 - }, - { - "epoch": 0.2951239558843206, - "grad_norm": 0.0030665250960737467, - "learning_rate": 0.0001999999572411359, - "loss": 46.0, - "step": 3860 - }, - { - "epoch": 0.2952004128677103, - "grad_norm": 0.003528802189975977, - "learning_rate": 0.00019999995721892063, - "loss": 46.0, - "step": 3861 - }, - { - "epoch": 0.2952768698511, - "grad_norm": 0.0010181349935010076, - "learning_rate": 0.00019999995719669955, - "loss": 46.0, - "step": 3862 - }, - { - "epoch": 0.2953533268344897, - "grad_norm": 0.0004096160701010376, - "learning_rate": 0.00019999995717447276, - "loss": 46.0, - "step": 3863 - }, - { - "epoch": 0.2954297838178795, - "grad_norm": 0.0006414162344299257, - "learning_rate": 0.00019999995715224014, - "loss": 46.0, - "step": 3864 - }, - { - "epoch": 0.2955062408012692, - "grad_norm": 0.0021652875002473593, - "learning_rate": 0.0001999999571300018, - "loss": 46.0, - "step": 3865 - }, - { - "epoch": 0.2955826977846589, - "grad_norm": 0.00095454475376755, - "learning_rate": 0.00019999995710775768, - "loss": 46.0, - "step": 3866 - }, - { - "epoch": 0.2956591547680486, - "grad_norm": 0.002390076406300068, - "learning_rate": 0.0001999999570855078, - "loss": 46.0, - "step": 3867 - }, - { - "epoch": 0.29573561175143837, - "grad_norm": 0.0016897311434149742, - "learning_rate": 0.00019999995706325213, - "loss": 46.0, - "step": 3868 - }, - { - "epoch": 0.29581206873482807, - "grad_norm": 0.001022031414322555, - "learning_rate": 0.0001999999570409907, - "loss": 46.0, - "step": 3869 - }, - { - "epoch": 0.2958885257182178, - "grad_norm": 0.0006169632542878389, - "learning_rate": 0.00019999995701872346, - "loss": 46.0, - "step": 3870 - }, - { - "epoch": 0.2959649827016075, - "grad_norm": 0.001120051252655685, - "learning_rate": 0.00019999995699645048, - "loss": 46.0, - "step": 3871 - }, - { - "epoch": 0.29604143968499724, - "grad_norm": 0.0021365159191191196, - "learning_rate": 0.00019999995697417172, - "loss": 46.0, - "step": 3872 - }, - { - "epoch": 0.29611789666838695, - "grad_norm": 0.001412899000570178, - "learning_rate": 0.00019999995695188725, - "loss": 46.0, - "step": 3873 - }, - { - "epoch": 0.29619435365177665, - "grad_norm": 0.0010116349440068007, - "learning_rate": 0.00019999995692959694, - "loss": 46.0, - "step": 3874 - }, - { - "epoch": 0.2962708106351664, - "grad_norm": 0.0009577778982929885, - "learning_rate": 0.00019999995690730092, - "loss": 46.0, - "step": 3875 - }, - { - "epoch": 0.2963472676185561, - "grad_norm": 0.0007074445602484047, - "learning_rate": 0.00019999995688499907, - "loss": 46.0, - "step": 3876 - }, - { - "epoch": 0.2964237246019458, - "grad_norm": 0.0004960338701494038, - "learning_rate": 0.00019999995686269148, - "loss": 46.0, - "step": 3877 - }, - { - "epoch": 0.29650018158533553, - "grad_norm": 0.000575447513256222, - "learning_rate": 0.00019999995684037814, - "loss": 46.0, - "step": 3878 - }, - { - "epoch": 0.2965766385687253, - "grad_norm": 0.0008145607425831258, - "learning_rate": 0.000199999956818059, - "loss": 46.0, - "step": 3879 - }, - { - "epoch": 0.296653095552115, - "grad_norm": 0.0005860779201611876, - "learning_rate": 0.00019999995679573408, - "loss": 46.0, - "step": 3880 - }, - { - "epoch": 0.2967295525355047, - "grad_norm": 0.0004820851900149137, - "learning_rate": 0.00019999995677340342, - "loss": 46.0, - "step": 3881 - }, - { - "epoch": 0.2968060095188944, - "grad_norm": 0.0011859680525958538, - "learning_rate": 0.000199999956751067, - "loss": 46.0, - "step": 3882 - }, - { - "epoch": 0.29688246650228417, - "grad_norm": 0.003930750302970409, - "learning_rate": 0.00019999995672872477, - "loss": 46.0, - "step": 3883 - }, - { - "epoch": 0.2969589234856739, - "grad_norm": 0.0008929710602387786, - "learning_rate": 0.0001999999567063768, - "loss": 46.0, - "step": 3884 - }, - { - "epoch": 0.2970353804690636, - "grad_norm": 0.0005970599595457315, - "learning_rate": 0.00019999995668402304, - "loss": 46.0, - "step": 3885 - }, - { - "epoch": 0.2971118374524533, - "grad_norm": 0.0006033756071701646, - "learning_rate": 0.00019999995666166354, - "loss": 46.0, - "step": 3886 - }, - { - "epoch": 0.29718829443584305, - "grad_norm": 0.0019969872664660215, - "learning_rate": 0.00019999995663929823, - "loss": 46.0, - "step": 3887 - }, - { - "epoch": 0.29726475141923275, - "grad_norm": 0.000959285069257021, - "learning_rate": 0.00019999995661692719, - "loss": 46.0, - "step": 3888 - }, - { - "epoch": 0.29734120840262246, - "grad_norm": 0.0007328496431000531, - "learning_rate": 0.00019999995659455036, - "loss": 46.0, - "step": 3889 - }, - { - "epoch": 0.2974176653860122, - "grad_norm": 0.0007862882921472192, - "learning_rate": 0.0001999999565721678, - "loss": 46.0, - "step": 3890 - }, - { - "epoch": 0.2974941223694019, - "grad_norm": 0.0022088452242314816, - "learning_rate": 0.0001999999565497794, - "loss": 46.0, - "step": 3891 - }, - { - "epoch": 0.29757057935279163, - "grad_norm": 0.0008607911877334118, - "learning_rate": 0.0001999999565273853, - "loss": 46.0, - "step": 3892 - }, - { - "epoch": 0.29764703633618134, - "grad_norm": 0.0006285980343818665, - "learning_rate": 0.00019999995650498537, - "loss": 46.0, - "step": 3893 - }, - { - "epoch": 0.2977234933195711, - "grad_norm": 0.0027029935736209154, - "learning_rate": 0.0001999999564825797, - "loss": 46.0, - "step": 3894 - }, - { - "epoch": 0.2977999503029608, - "grad_norm": 0.002828470664098859, - "learning_rate": 0.00019999995646016828, - "loss": 46.0, - "step": 3895 - }, - { - "epoch": 0.2978764072863505, - "grad_norm": 0.0025153513997793198, - "learning_rate": 0.00019999995643775104, - "loss": 46.0, - "step": 3896 - }, - { - "epoch": 0.2979528642697402, - "grad_norm": 0.002997411647811532, - "learning_rate": 0.00019999995641532806, - "loss": 46.0, - "step": 3897 - }, - { - "epoch": 0.29802932125313, - "grad_norm": 0.006343774497509003, - "learning_rate": 0.00019999995639289933, - "loss": 46.0, - "step": 3898 - }, - { - "epoch": 0.2981057782365197, - "grad_norm": 0.0021542010363191366, - "learning_rate": 0.0001999999563704648, - "loss": 46.0, - "step": 3899 - }, - { - "epoch": 0.2981822352199094, - "grad_norm": 0.0007479273481294513, - "learning_rate": 0.0001999999563480245, - "loss": 46.0, - "step": 3900 - }, - { - "epoch": 0.2982586922032991, - "grad_norm": 0.0007215498480945826, - "learning_rate": 0.00019999995632557845, - "loss": 46.0, - "step": 3901 - }, - { - "epoch": 0.29833514918668885, - "grad_norm": 0.0029518231749534607, - "learning_rate": 0.00019999995630312663, - "loss": 46.0, - "step": 3902 - }, - { - "epoch": 0.29841160617007856, - "grad_norm": 0.0011331290006637573, - "learning_rate": 0.00019999995628066904, - "loss": 46.0, - "step": 3903 - }, - { - "epoch": 0.29848806315346826, - "grad_norm": 0.0010614742059260607, - "learning_rate": 0.00019999995625820567, - "loss": 46.0, - "step": 3904 - }, - { - "epoch": 0.298564520136858, - "grad_norm": 0.0008829003199934959, - "learning_rate": 0.00019999995623573653, - "loss": 46.0, - "step": 3905 - }, - { - "epoch": 0.29864097712024773, - "grad_norm": 0.0013663776917383075, - "learning_rate": 0.00019999995621326161, - "loss": 46.0, - "step": 3906 - }, - { - "epoch": 0.29871743410363744, - "grad_norm": 0.007047569379210472, - "learning_rate": 0.00019999995619078095, - "loss": 46.0, - "step": 3907 - }, - { - "epoch": 0.29879389108702714, - "grad_norm": 0.002720632590353489, - "learning_rate": 0.0001999999561682945, - "loss": 46.0, - "step": 3908 - }, - { - "epoch": 0.2988703480704169, - "grad_norm": 0.0009586411179043353, - "learning_rate": 0.00019999995614580228, - "loss": 46.0, - "step": 3909 - }, - { - "epoch": 0.2989468050538066, - "grad_norm": 0.0021984048653393984, - "learning_rate": 0.0001999999561233043, - "loss": 46.0, - "step": 3910 - }, - { - "epoch": 0.2990232620371963, - "grad_norm": 0.0009016099502332509, - "learning_rate": 0.00019999995610080055, - "loss": 46.0, - "step": 3911 - }, - { - "epoch": 0.299099719020586, - "grad_norm": 0.0007355074631050229, - "learning_rate": 0.00019999995607829102, - "loss": 46.0, - "step": 3912 - }, - { - "epoch": 0.2991761760039758, - "grad_norm": 0.007912776432931423, - "learning_rate": 0.00019999995605577575, - "loss": 46.0, - "step": 3913 - }, - { - "epoch": 0.2992526329873655, - "grad_norm": 0.001287880353629589, - "learning_rate": 0.00019999995603325467, - "loss": 46.0, - "step": 3914 - }, - { - "epoch": 0.2993290899707552, - "grad_norm": 0.001875565038062632, - "learning_rate": 0.00019999995601072785, - "loss": 46.0, - "step": 3915 - }, - { - "epoch": 0.2994055469541449, - "grad_norm": 0.0014386377297341824, - "learning_rate": 0.00019999995598819526, - "loss": 46.0, - "step": 3916 - }, - { - "epoch": 0.29948200393753466, - "grad_norm": 0.0010124879190698266, - "learning_rate": 0.0001999999559656569, - "loss": 46.0, - "step": 3917 - }, - { - "epoch": 0.29955846092092436, - "grad_norm": 0.0017455739434808493, - "learning_rate": 0.00019999995594311275, - "loss": 46.0, - "step": 3918 - }, - { - "epoch": 0.29963491790431407, - "grad_norm": 0.001225010259076953, - "learning_rate": 0.00019999995592056283, - "loss": 46.0, - "step": 3919 - }, - { - "epoch": 0.29971137488770383, - "grad_norm": 0.0006972053088247776, - "learning_rate": 0.00019999995589800715, - "loss": 46.0, - "step": 3920 - }, - { - "epoch": 0.29978783187109354, - "grad_norm": 0.002167454455047846, - "learning_rate": 0.0001999999558754457, - "loss": 46.0, - "step": 3921 - }, - { - "epoch": 0.29986428885448324, - "grad_norm": 0.0012359477113932371, - "learning_rate": 0.0001999999558528785, - "loss": 46.0, - "step": 3922 - }, - { - "epoch": 0.29994074583787295, - "grad_norm": 0.000918172940146178, - "learning_rate": 0.0001999999558303055, - "loss": 46.0, - "step": 3923 - }, - { - "epoch": 0.3000172028212627, - "grad_norm": 0.00037611983134411275, - "learning_rate": 0.00019999995580772674, - "loss": 46.0, - "step": 3924 - }, - { - "epoch": 0.3000936598046524, - "grad_norm": 0.0004843119240831584, - "learning_rate": 0.00019999995578514224, - "loss": 46.0, - "step": 3925 - }, - { - "epoch": 0.3001701167880421, - "grad_norm": 0.0017777679022401571, - "learning_rate": 0.00019999995576255191, - "loss": 46.0, - "step": 3926 - }, - { - "epoch": 0.3002465737714318, - "grad_norm": 0.0005172256496734917, - "learning_rate": 0.00019999995573995587, - "loss": 46.0, - "step": 3927 - }, - { - "epoch": 0.3003230307548216, - "grad_norm": 0.0013011523988097906, - "learning_rate": 0.00019999995571735405, - "loss": 46.0, - "step": 3928 - }, - { - "epoch": 0.3003994877382113, - "grad_norm": 0.0007835791329853237, - "learning_rate": 0.00019999995569474642, - "loss": 46.0, - "step": 3929 - }, - { - "epoch": 0.300475944721601, - "grad_norm": 0.0007522383239120245, - "learning_rate": 0.00019999995567213306, - "loss": 46.0, - "step": 3930 - }, - { - "epoch": 0.30055240170499076, - "grad_norm": 0.004805904813110828, - "learning_rate": 0.00019999995564951392, - "loss": 46.0, - "step": 3931 - }, - { - "epoch": 0.30062885868838046, - "grad_norm": 0.002395146293565631, - "learning_rate": 0.000199999955626889, - "loss": 46.0, - "step": 3932 - }, - { - "epoch": 0.30070531567177017, - "grad_norm": 0.004002503585070372, - "learning_rate": 0.00019999995560425832, - "loss": 46.0, - "step": 3933 - }, - { - "epoch": 0.3007817726551599, - "grad_norm": 0.0005741655477322638, - "learning_rate": 0.00019999995558162188, - "loss": 46.0, - "step": 3934 - }, - { - "epoch": 0.30085822963854963, - "grad_norm": 0.003098508110269904, - "learning_rate": 0.00019999995555897965, - "loss": 46.0, - "step": 3935 - }, - { - "epoch": 0.30093468662193934, - "grad_norm": 0.00042936380486935377, - "learning_rate": 0.00019999995553633167, - "loss": 46.0, - "step": 3936 - }, - { - "epoch": 0.30101114360532905, - "grad_norm": 0.0006006730254739523, - "learning_rate": 0.0001999999555136779, - "loss": 46.0, - "step": 3937 - }, - { - "epoch": 0.30108760058871875, - "grad_norm": 0.0011409219587221742, - "learning_rate": 0.00019999995549101839, - "loss": 46.0, - "step": 3938 - }, - { - "epoch": 0.3011640575721085, - "grad_norm": 0.002090958645567298, - "learning_rate": 0.00019999995546835308, - "loss": 46.0, - "step": 3939 - }, - { - "epoch": 0.3012405145554982, - "grad_norm": 0.0014344867086037993, - "learning_rate": 0.000199999955445682, - "loss": 46.0, - "step": 3940 - }, - { - "epoch": 0.3013169715388879, - "grad_norm": 0.0014389441348612309, - "learning_rate": 0.0001999999554230052, - "loss": 46.0, - "step": 3941 - }, - { - "epoch": 0.30139342852227763, - "grad_norm": 0.0014627301134169102, - "learning_rate": 0.00019999995540032257, - "loss": 46.0, - "step": 3942 - }, - { - "epoch": 0.3014698855056674, - "grad_norm": 0.0008876899955794215, - "learning_rate": 0.0001999999553776342, - "loss": 46.0, - "step": 3943 - }, - { - "epoch": 0.3015463424890571, - "grad_norm": 0.0006306195282377303, - "learning_rate": 0.00019999995535494006, - "loss": 46.0, - "step": 3944 - }, - { - "epoch": 0.3016227994724468, - "grad_norm": 0.002020200015977025, - "learning_rate": 0.00019999995533224015, - "loss": 46.0, - "step": 3945 - }, - { - "epoch": 0.30169925645583656, - "grad_norm": 0.0005920823314227164, - "learning_rate": 0.00019999995530953446, - "loss": 46.0, - "step": 3946 - }, - { - "epoch": 0.30177571343922627, - "grad_norm": 0.000938529847189784, - "learning_rate": 0.000199999955286823, - "loss": 46.0, - "step": 3947 - }, - { - "epoch": 0.301852170422616, - "grad_norm": 0.0007786461501382291, - "learning_rate": 0.00019999995526410577, - "loss": 46.0, - "step": 3948 - }, - { - "epoch": 0.3019286274060057, - "grad_norm": 0.0006874241516925395, - "learning_rate": 0.0001999999552413828, - "loss": 46.0, - "step": 3949 - }, - { - "epoch": 0.30200508438939544, - "grad_norm": 0.004485182464122772, - "learning_rate": 0.00019999995521865403, - "loss": 46.0, - "step": 3950 - }, - { - "epoch": 0.30208154137278515, - "grad_norm": 0.008694938383996487, - "learning_rate": 0.0001999999551959195, - "loss": 46.0, - "step": 3951 - }, - { - "epoch": 0.30215799835617485, - "grad_norm": 0.0013990922598168254, - "learning_rate": 0.0001999999551731792, - "loss": 46.0, - "step": 3952 - }, - { - "epoch": 0.30223445533956456, - "grad_norm": 0.0009009944624267519, - "learning_rate": 0.00019999995515043313, - "loss": 46.0, - "step": 3953 - }, - { - "epoch": 0.3023109123229543, - "grad_norm": 0.0007439752225764096, - "learning_rate": 0.0001999999551276813, - "loss": 46.0, - "step": 3954 - }, - { - "epoch": 0.302387369306344, - "grad_norm": 0.0009529454400762916, - "learning_rate": 0.0001999999551049237, - "loss": 46.0, - "step": 3955 - }, - { - "epoch": 0.30246382628973373, - "grad_norm": 0.003594091860577464, - "learning_rate": 0.0001999999550821603, - "loss": 46.0, - "step": 3956 - }, - { - "epoch": 0.30254028327312343, - "grad_norm": 0.0011254452401772141, - "learning_rate": 0.00019999995505939116, - "loss": 46.0, - "step": 3957 - }, - { - "epoch": 0.3026167402565132, - "grad_norm": 0.0016184871783480048, - "learning_rate": 0.00019999995503661624, - "loss": 46.0, - "step": 3958 - }, - { - "epoch": 0.3026931972399029, - "grad_norm": 0.0006242019589990377, - "learning_rate": 0.00019999995501383556, - "loss": 46.0, - "step": 3959 - }, - { - "epoch": 0.3027696542232926, - "grad_norm": 0.0006411741487681866, - "learning_rate": 0.0001999999549910491, - "loss": 46.0, - "step": 3960 - }, - { - "epoch": 0.30284611120668237, - "grad_norm": 0.0021180137991905212, - "learning_rate": 0.00019999995496825686, - "loss": 46.0, - "step": 3961 - }, - { - "epoch": 0.3029225681900721, - "grad_norm": 0.00109868252184242, - "learning_rate": 0.00019999995494545888, - "loss": 46.0, - "step": 3962 - }, - { - "epoch": 0.3029990251734618, - "grad_norm": 0.00096026441315189, - "learning_rate": 0.0001999999549226551, - "loss": 46.0, - "step": 3963 - }, - { - "epoch": 0.3030754821568515, - "grad_norm": 0.008483518846333027, - "learning_rate": 0.00019999995489984558, - "loss": 46.0, - "step": 3964 - }, - { - "epoch": 0.30315193914024124, - "grad_norm": 0.0017056922661140561, - "learning_rate": 0.00019999995487703028, - "loss": 46.0, - "step": 3965 - }, - { - "epoch": 0.30322839612363095, - "grad_norm": 0.0007459048647433519, - "learning_rate": 0.0001999999548542092, - "loss": 46.0, - "step": 3966 - }, - { - "epoch": 0.30330485310702066, - "grad_norm": 0.001148242736235261, - "learning_rate": 0.00019999995483138236, - "loss": 46.0, - "step": 3967 - }, - { - "epoch": 0.30338131009041036, - "grad_norm": 0.0007320080767385662, - "learning_rate": 0.00019999995480854974, - "loss": 46.0, - "step": 3968 - }, - { - "epoch": 0.3034577670738001, - "grad_norm": 0.0035365570802241564, - "learning_rate": 0.00019999995478571137, - "loss": 46.0, - "step": 3969 - }, - { - "epoch": 0.3035342240571898, - "grad_norm": 0.0072437673807144165, - "learning_rate": 0.0001999999547628672, - "loss": 46.0, - "step": 3970 - }, - { - "epoch": 0.30361068104057953, - "grad_norm": 0.0005270031979307532, - "learning_rate": 0.0001999999547400173, - "loss": 46.0, - "step": 3971 - }, - { - "epoch": 0.30368713802396924, - "grad_norm": 0.0015035016695037484, - "learning_rate": 0.0001999999547171616, - "loss": 46.0, - "step": 3972 - }, - { - "epoch": 0.303763595007359, - "grad_norm": 0.009577272459864616, - "learning_rate": 0.00019999995469430014, - "loss": 46.0, - "step": 3973 - }, - { - "epoch": 0.3038400519907487, - "grad_norm": 0.0032476128544658422, - "learning_rate": 0.00019999995467143294, - "loss": 46.0, - "step": 3974 - }, - { - "epoch": 0.3039165089741384, - "grad_norm": 0.0009590466506779194, - "learning_rate": 0.00019999995464855993, - "loss": 46.0, - "step": 3975 - }, - { - "epoch": 0.30399296595752817, - "grad_norm": 0.0018426980823278427, - "learning_rate": 0.00019999995462568115, - "loss": 46.0, - "step": 3976 - }, - { - "epoch": 0.3040694229409179, - "grad_norm": 0.0026536802761256695, - "learning_rate": 0.00019999995460279662, - "loss": 46.0, - "step": 3977 - }, - { - "epoch": 0.3041458799243076, - "grad_norm": 0.0021176941227167845, - "learning_rate": 0.00019999995457990633, - "loss": 46.0, - "step": 3978 - }, - { - "epoch": 0.3042223369076973, - "grad_norm": 0.0017880977829918265, - "learning_rate": 0.00019999995455701023, - "loss": 46.0, - "step": 3979 - }, - { - "epoch": 0.30429879389108705, - "grad_norm": 0.0007800829480402172, - "learning_rate": 0.00019999995453410838, - "loss": 46.0, - "step": 3980 - }, - { - "epoch": 0.30437525087447675, - "grad_norm": 0.0011141517898067832, - "learning_rate": 0.00019999995451120079, - "loss": 46.0, - "step": 3981 - }, - { - "epoch": 0.30445170785786646, - "grad_norm": 0.0017600301653146744, - "learning_rate": 0.0001999999544882874, - "loss": 46.0, - "step": 3982 - }, - { - "epoch": 0.30452816484125617, - "grad_norm": 0.0010085272369906306, - "learning_rate": 0.00019999995446536825, - "loss": 46.0, - "step": 3983 - }, - { - "epoch": 0.3046046218246459, - "grad_norm": 0.0015926266787573695, - "learning_rate": 0.0001999999544424433, - "loss": 46.0, - "step": 3984 - }, - { - "epoch": 0.30468107880803563, - "grad_norm": 0.0007591758039779961, - "learning_rate": 0.00019999995441951263, - "loss": 46.0, - "step": 3985 - }, - { - "epoch": 0.30475753579142534, - "grad_norm": 0.000749889062717557, - "learning_rate": 0.00019999995439657617, - "loss": 46.0, - "step": 3986 - }, - { - "epoch": 0.30483399277481504, - "grad_norm": 0.0007087599369697273, - "learning_rate": 0.00019999995437363393, - "loss": 46.0, - "step": 3987 - }, - { - "epoch": 0.3049104497582048, - "grad_norm": 0.001025100820697844, - "learning_rate": 0.00019999995435068596, - "loss": 46.0, - "step": 3988 - }, - { - "epoch": 0.3049869067415945, - "grad_norm": 0.002785252872854471, - "learning_rate": 0.00019999995432773218, - "loss": 46.0, - "step": 3989 - }, - { - "epoch": 0.3050633637249842, - "grad_norm": 0.0014751151902601123, - "learning_rate": 0.00019999995430477262, - "loss": 46.0, - "step": 3990 - }, - { - "epoch": 0.305139820708374, - "grad_norm": 0.0025423127226531506, - "learning_rate": 0.00019999995428180732, - "loss": 46.0, - "step": 3991 - }, - { - "epoch": 0.3052162776917637, - "grad_norm": 0.0009441522997803986, - "learning_rate": 0.00019999995425883625, - "loss": 46.0, - "step": 3992 - }, - { - "epoch": 0.3052927346751534, - "grad_norm": 0.0018807377200573683, - "learning_rate": 0.0001999999542358594, - "loss": 46.0, - "step": 3993 - }, - { - "epoch": 0.3053691916585431, - "grad_norm": 0.002795872278511524, - "learning_rate": 0.00019999995421287676, - "loss": 46.0, - "step": 3994 - }, - { - "epoch": 0.30544564864193285, - "grad_norm": 0.00199322821572423, - "learning_rate": 0.0001999999541898884, - "loss": 46.0, - "step": 3995 - }, - { - "epoch": 0.30552210562532256, - "grad_norm": 0.0005783766973763704, - "learning_rate": 0.00019999995416689423, - "loss": 46.0, - "step": 3996 - }, - { - "epoch": 0.30559856260871227, - "grad_norm": 0.0011058659292757511, - "learning_rate": 0.00019999995414389429, - "loss": 46.0, - "step": 3997 - }, - { - "epoch": 0.30567501959210197, - "grad_norm": 0.0016042126808315516, - "learning_rate": 0.0001999999541208886, - "loss": 46.0, - "step": 3998 - }, - { - "epoch": 0.30575147657549173, - "grad_norm": 0.003017428098246455, - "learning_rate": 0.00019999995409787714, - "loss": 46.0, - "step": 3999 - }, - { - "epoch": 0.30582793355888144, - "grad_norm": 0.0010716089745983481, - "learning_rate": 0.0001999999540748599, - "loss": 46.0, - "step": 4000 - }, - { - "epoch": 0.30590439054227114, - "grad_norm": 0.0009396169916726649, - "learning_rate": 0.0001999999540518369, - "loss": 46.0, - "step": 4001 - }, - { - "epoch": 0.30598084752566085, - "grad_norm": 0.002436660695821047, - "learning_rate": 0.00019999995402880812, - "loss": 46.0, - "step": 4002 - }, - { - "epoch": 0.3060573045090506, - "grad_norm": 0.0020608168561011553, - "learning_rate": 0.00019999995400577357, - "loss": 46.0, - "step": 4003 - }, - { - "epoch": 0.3061337614924403, - "grad_norm": 0.001335165579803288, - "learning_rate": 0.00019999995398273327, - "loss": 46.0, - "step": 4004 - }, - { - "epoch": 0.30621021847583, - "grad_norm": 0.0012055481784045696, - "learning_rate": 0.0001999999539596872, - "loss": 46.0, - "step": 4005 - }, - { - "epoch": 0.3062866754592198, - "grad_norm": 0.001344876131042838, - "learning_rate": 0.00019999995393663535, - "loss": 46.0, - "step": 4006 - }, - { - "epoch": 0.3063631324426095, - "grad_norm": 0.0011385285761207342, - "learning_rate": 0.00019999995391357773, - "loss": 46.0, - "step": 4007 - }, - { - "epoch": 0.3064395894259992, - "grad_norm": 0.001564131467603147, - "learning_rate": 0.00019999995389051437, - "loss": 46.0, - "step": 4008 - }, - { - "epoch": 0.3065160464093889, - "grad_norm": 0.0036503213923424482, - "learning_rate": 0.00019999995386744517, - "loss": 46.0, - "step": 4009 - }, - { - "epoch": 0.30659250339277866, - "grad_norm": 0.003316072979941964, - "learning_rate": 0.00019999995384437024, - "loss": 46.0, - "step": 4010 - }, - { - "epoch": 0.30666896037616836, - "grad_norm": 0.007509085815399885, - "learning_rate": 0.00019999995382128955, - "loss": 46.0, - "step": 4011 - }, - { - "epoch": 0.30674541735955807, - "grad_norm": 0.0006065723719075322, - "learning_rate": 0.0001999999537982031, - "loss": 46.0, - "step": 4012 - }, - { - "epoch": 0.3068218743429478, - "grad_norm": 0.0011315278243273497, - "learning_rate": 0.00019999995377511086, - "loss": 46.0, - "step": 4013 - }, - { - "epoch": 0.30689833132633754, - "grad_norm": 0.0018448256887495518, - "learning_rate": 0.00019999995375201283, - "loss": 46.0, - "step": 4014 - }, - { - "epoch": 0.30697478830972724, - "grad_norm": 0.0007080396171659231, - "learning_rate": 0.00019999995372890907, - "loss": 46.0, - "step": 4015 - }, - { - "epoch": 0.30705124529311695, - "grad_norm": 0.0004142904654145241, - "learning_rate": 0.00019999995370579955, - "loss": 46.0, - "step": 4016 - }, - { - "epoch": 0.30712770227650665, - "grad_norm": 0.0011961066629737616, - "learning_rate": 0.0001999999536826842, - "loss": 46.0, - "step": 4017 - }, - { - "epoch": 0.3072041592598964, - "grad_norm": 0.000924957450479269, - "learning_rate": 0.00019999995365956312, - "loss": 46.0, - "step": 4018 - }, - { - "epoch": 0.3072806162432861, - "grad_norm": 0.0007961697410792112, - "learning_rate": 0.00019999995363643628, - "loss": 46.0, - "step": 4019 - }, - { - "epoch": 0.3073570732266758, - "grad_norm": 0.0012061174493283033, - "learning_rate": 0.00019999995361330366, - "loss": 46.0, - "step": 4020 - }, - { - "epoch": 0.3074335302100656, - "grad_norm": 0.0011080013355240226, - "learning_rate": 0.00019999995359016527, - "loss": 46.0, - "step": 4021 - }, - { - "epoch": 0.3075099871934553, - "grad_norm": 0.0007111974409781396, - "learning_rate": 0.00019999995356702107, - "loss": 46.0, - "step": 4022 - }, - { - "epoch": 0.307586444176845, - "grad_norm": 0.007970698177814484, - "learning_rate": 0.00019999995354387116, - "loss": 46.0, - "step": 4023 - }, - { - "epoch": 0.3076629011602347, - "grad_norm": 0.0012143987696617842, - "learning_rate": 0.00019999995352071545, - "loss": 46.0, - "step": 4024 - }, - { - "epoch": 0.30773935814362446, - "grad_norm": 0.0029039548244327307, - "learning_rate": 0.000199999953497554, - "loss": 46.0, - "step": 4025 - }, - { - "epoch": 0.30781581512701417, - "grad_norm": 0.0006623852532356977, - "learning_rate": 0.00019999995347438676, - "loss": 46.0, - "step": 4026 - }, - { - "epoch": 0.3078922721104039, - "grad_norm": 0.0012280797818675637, - "learning_rate": 0.00019999995345121375, - "loss": 46.0, - "step": 4027 - }, - { - "epoch": 0.3079687290937936, - "grad_norm": 0.0025175693444907665, - "learning_rate": 0.00019999995342803498, - "loss": 46.0, - "step": 4028 - }, - { - "epoch": 0.30804518607718334, - "grad_norm": 0.0009231767617166042, - "learning_rate": 0.0001999999534048504, - "loss": 46.0, - "step": 4029 - }, - { - "epoch": 0.30812164306057305, - "grad_norm": 0.002071147784590721, - "learning_rate": 0.0001999999533816601, - "loss": 46.0, - "step": 4030 - }, - { - "epoch": 0.30819810004396275, - "grad_norm": 0.00136423425283283, - "learning_rate": 0.000199999953358464, - "loss": 46.0, - "step": 4031 - }, - { - "epoch": 0.30827455702735246, - "grad_norm": 0.001215483178384602, - "learning_rate": 0.00019999995333526215, - "loss": 46.0, - "step": 4032 - }, - { - "epoch": 0.3083510140107422, - "grad_norm": 0.005483425222337246, - "learning_rate": 0.00019999995331205453, - "loss": 46.0, - "step": 4033 - }, - { - "epoch": 0.3084274709941319, - "grad_norm": 0.002995036542415619, - "learning_rate": 0.00019999995328884114, - "loss": 46.0, - "step": 4034 - }, - { - "epoch": 0.30850392797752163, - "grad_norm": 0.001227505155839026, - "learning_rate": 0.00019999995326562198, - "loss": 46.0, - "step": 4035 - }, - { - "epoch": 0.3085803849609114, - "grad_norm": 0.0007473343284800649, - "learning_rate": 0.00019999995324239704, - "loss": 46.0, - "step": 4036 - }, - { - "epoch": 0.3086568419443011, - "grad_norm": 0.0005458722007460892, - "learning_rate": 0.00019999995321916633, - "loss": 46.0, - "step": 4037 - }, - { - "epoch": 0.3087332989276908, - "grad_norm": 0.0003971096593886614, - "learning_rate": 0.00019999995319592987, - "loss": 46.0, - "step": 4038 - }, - { - "epoch": 0.3088097559110805, - "grad_norm": 0.0005233470583334565, - "learning_rate": 0.00019999995317268764, - "loss": 46.0, - "step": 4039 - }, - { - "epoch": 0.30888621289447027, - "grad_norm": 0.0018375712679699063, - "learning_rate": 0.0001999999531494396, - "loss": 46.0, - "step": 4040 - }, - { - "epoch": 0.30896266987786, - "grad_norm": 0.0036186447832733393, - "learning_rate": 0.00019999995312618583, - "loss": 46.0, - "step": 4041 - }, - { - "epoch": 0.3090391268612497, - "grad_norm": 0.004774878732860088, - "learning_rate": 0.00019999995310292627, - "loss": 46.0, - "step": 4042 - }, - { - "epoch": 0.3091155838446394, - "grad_norm": 0.0038502702955156565, - "learning_rate": 0.00019999995307966095, - "loss": 46.0, - "step": 4043 - }, - { - "epoch": 0.30919204082802915, - "grad_norm": 0.0012251483276486397, - "learning_rate": 0.00019999995305638985, - "loss": 46.0, - "step": 4044 - }, - { - "epoch": 0.30926849781141885, - "grad_norm": 0.004108453635126352, - "learning_rate": 0.000199999953033113, - "loss": 46.0, - "step": 4045 - }, - { - "epoch": 0.30934495479480856, - "grad_norm": 0.000770887709222734, - "learning_rate": 0.0001999999530098304, - "loss": 46.0, - "step": 4046 - }, - { - "epoch": 0.3094214117781983, - "grad_norm": 0.0006265907431952655, - "learning_rate": 0.000199999952986542, - "loss": 46.0, - "step": 4047 - }, - { - "epoch": 0.309497868761588, - "grad_norm": 0.0009616278694011271, - "learning_rate": 0.00019999995296324783, - "loss": 46.0, - "step": 4048 - }, - { - "epoch": 0.30957432574497773, - "grad_norm": 0.0012052081292495131, - "learning_rate": 0.0001999999529399479, - "loss": 46.0, - "step": 4049 - }, - { - "epoch": 0.30965078272836744, - "grad_norm": 0.0014100506668910384, - "learning_rate": 0.00019999995291664218, - "loss": 46.0, - "step": 4050 - }, - { - "epoch": 0.3097272397117572, - "grad_norm": 0.0006808596663177013, - "learning_rate": 0.0001999999528933307, - "loss": 46.0, - "step": 4051 - }, - { - "epoch": 0.3098036966951469, - "grad_norm": 0.0007321045268326998, - "learning_rate": 0.00019999995287001346, - "loss": 46.0, - "step": 4052 - }, - { - "epoch": 0.3098801536785366, - "grad_norm": 0.0010772771202027798, - "learning_rate": 0.00019999995284669043, - "loss": 46.0, - "step": 4053 - }, - { - "epoch": 0.3099566106619263, - "grad_norm": 0.0014033811166882515, - "learning_rate": 0.00019999995282336163, - "loss": 46.0, - "step": 4054 - }, - { - "epoch": 0.3100330676453161, - "grad_norm": 0.0012709693983197212, - "learning_rate": 0.0001999999528000271, - "loss": 46.0, - "step": 4055 - }, - { - "epoch": 0.3101095246287058, - "grad_norm": 0.0014826421393081546, - "learning_rate": 0.00019999995277668678, - "loss": 46.0, - "step": 4056 - }, - { - "epoch": 0.3101859816120955, - "grad_norm": 0.002636758843436837, - "learning_rate": 0.00019999995275334068, - "loss": 46.0, - "step": 4057 - }, - { - "epoch": 0.3102624385954852, - "grad_norm": 0.0008221042808145285, - "learning_rate": 0.0001999999527299888, - "loss": 46.0, - "step": 4058 - }, - { - "epoch": 0.31033889557887495, - "grad_norm": 0.0010345177724957466, - "learning_rate": 0.0001999999527066312, - "loss": 46.0, - "step": 4059 - }, - { - "epoch": 0.31041535256226466, - "grad_norm": 0.0009395965025760233, - "learning_rate": 0.0001999999526832678, - "loss": 46.0, - "step": 4060 - }, - { - "epoch": 0.31049180954565436, - "grad_norm": 0.0035355750005692244, - "learning_rate": 0.0001999999526598986, - "loss": 46.0, - "step": 4061 - }, - { - "epoch": 0.3105682665290441, - "grad_norm": 0.0007544115069322288, - "learning_rate": 0.00019999995263652367, - "loss": 46.0, - "step": 4062 - }, - { - "epoch": 0.31064472351243383, - "grad_norm": 0.0017829195130616426, - "learning_rate": 0.00019999995261314296, - "loss": 46.0, - "step": 4063 - }, - { - "epoch": 0.31072118049582353, - "grad_norm": 0.0014873375184834003, - "learning_rate": 0.0001999999525897565, - "loss": 46.0, - "step": 4064 - }, - { - "epoch": 0.31079763747921324, - "grad_norm": 0.0007083939271979034, - "learning_rate": 0.00019999995256636424, - "loss": 46.0, - "step": 4065 - }, - { - "epoch": 0.310874094462603, - "grad_norm": 0.0007060458301566541, - "learning_rate": 0.0001999999525429662, - "loss": 46.0, - "step": 4066 - }, - { - "epoch": 0.3109505514459927, - "grad_norm": 0.001243519363924861, - "learning_rate": 0.00019999995251956243, - "loss": 46.0, - "step": 4067 - }, - { - "epoch": 0.3110270084293824, - "grad_norm": 0.0007302002632059157, - "learning_rate": 0.00019999995249615288, - "loss": 46.0, - "step": 4068 - }, - { - "epoch": 0.3111034654127721, - "grad_norm": 0.0011072546476498246, - "learning_rate": 0.00019999995247273756, - "loss": 46.0, - "step": 4069 - }, - { - "epoch": 0.3111799223961619, - "grad_norm": 0.0012059843866154552, - "learning_rate": 0.00019999995244931643, - "loss": 46.0, - "step": 4070 - }, - { - "epoch": 0.3112563793795516, - "grad_norm": 0.0009788620518520474, - "learning_rate": 0.0001999999524258896, - "loss": 46.0, - "step": 4071 - }, - { - "epoch": 0.3113328363629413, - "grad_norm": 0.0007899851771071553, - "learning_rate": 0.00019999995240245694, - "loss": 46.0, - "step": 4072 - }, - { - "epoch": 0.311409293346331, - "grad_norm": 0.0016155410557985306, - "learning_rate": 0.00019999995237901855, - "loss": 46.0, - "step": 4073 - }, - { - "epoch": 0.31148575032972076, - "grad_norm": 0.001013038563542068, - "learning_rate": 0.0001999999523555744, - "loss": 46.0, - "step": 4074 - }, - { - "epoch": 0.31156220731311046, - "grad_norm": 0.0014393455348908901, - "learning_rate": 0.00019999995233212445, - "loss": 46.0, - "step": 4075 - }, - { - "epoch": 0.31163866429650017, - "grad_norm": 0.0028291363269090652, - "learning_rate": 0.00019999995230866874, - "loss": 46.0, - "step": 4076 - }, - { - "epoch": 0.31171512127988993, - "grad_norm": 0.001994832418859005, - "learning_rate": 0.00019999995228520723, - "loss": 46.0, - "step": 4077 - }, - { - "epoch": 0.31179157826327963, - "grad_norm": 0.001404017093591392, - "learning_rate": 0.00019999995226174, - "loss": 46.0, - "step": 4078 - }, - { - "epoch": 0.31186803524666934, - "grad_norm": 0.005517485085874796, - "learning_rate": 0.00019999995223826696, - "loss": 46.0, - "step": 4079 - }, - { - "epoch": 0.31194449223005905, - "grad_norm": 0.0009098463342525065, - "learning_rate": 0.0001999999522147882, - "loss": 46.0, - "step": 4080 - }, - { - "epoch": 0.3120209492134488, - "grad_norm": 0.0011220687301829457, - "learning_rate": 0.00019999995219130364, - "loss": 46.0, - "step": 4081 - }, - { - "epoch": 0.3120974061968385, - "grad_norm": 0.0005901592667214572, - "learning_rate": 0.00019999995216781328, - "loss": 46.0, - "step": 4082 - }, - { - "epoch": 0.3121738631802282, - "grad_norm": 0.0008901542751118541, - "learning_rate": 0.0001999999521443172, - "loss": 46.0, - "step": 4083 - }, - { - "epoch": 0.3122503201636179, - "grad_norm": 0.003306616796180606, - "learning_rate": 0.00019999995212081532, - "loss": 46.0, - "step": 4084 - }, - { - "epoch": 0.3123267771470077, - "grad_norm": 0.0007005054503679276, - "learning_rate": 0.0001999999520973077, - "loss": 46.0, - "step": 4085 - }, - { - "epoch": 0.3124032341303974, - "grad_norm": 0.0021608273964375257, - "learning_rate": 0.00019999995207379428, - "loss": 46.0, - "step": 4086 - }, - { - "epoch": 0.3124796911137871, - "grad_norm": 0.0030763857066631317, - "learning_rate": 0.00019999995205027512, - "loss": 46.0, - "step": 4087 - }, - { - "epoch": 0.3125561480971768, - "grad_norm": 0.0011079679243266582, - "learning_rate": 0.00019999995202675015, - "loss": 46.0, - "step": 4088 - }, - { - "epoch": 0.31263260508056656, - "grad_norm": 0.0017841230146586895, - "learning_rate": 0.00019999995200321947, - "loss": 46.0, - "step": 4089 - }, - { - "epoch": 0.31270906206395627, - "grad_norm": 0.0027018177788704634, - "learning_rate": 0.00019999995197968296, - "loss": 46.0, - "step": 4090 - }, - { - "epoch": 0.312785519047346, - "grad_norm": 0.000514084124006331, - "learning_rate": 0.00019999995195614073, - "loss": 46.0, - "step": 4091 - }, - { - "epoch": 0.31286197603073573, - "grad_norm": 0.0005712288548238575, - "learning_rate": 0.0001999999519325927, - "loss": 46.0, - "step": 4092 - }, - { - "epoch": 0.31293843301412544, - "grad_norm": 0.0017212866805493832, - "learning_rate": 0.0001999999519090389, - "loss": 46.0, - "step": 4093 - }, - { - "epoch": 0.31301488999751514, - "grad_norm": 0.0006872282247059047, - "learning_rate": 0.00019999995188547934, - "loss": 46.0, - "step": 4094 - }, - { - "epoch": 0.31309134698090485, - "grad_norm": 0.001309835584834218, - "learning_rate": 0.000199999951861914, - "loss": 46.0, - "step": 4095 - }, - { - "epoch": 0.3131678039642946, - "grad_norm": 0.0021782179828733206, - "learning_rate": 0.0001999999518383429, - "loss": 46.0, - "step": 4096 - }, - { - "epoch": 0.3132442609476843, - "grad_norm": 0.000896404788363725, - "learning_rate": 0.00019999995181476602, - "loss": 46.0, - "step": 4097 - }, - { - "epoch": 0.313320717931074, - "grad_norm": 0.0019859285093843937, - "learning_rate": 0.00019999995179118338, - "loss": 46.0, - "step": 4098 - }, - { - "epoch": 0.3133971749144637, - "grad_norm": 0.0020705722272396088, - "learning_rate": 0.00019999995176759496, - "loss": 46.0, - "step": 4099 - }, - { - "epoch": 0.3134736318978535, - "grad_norm": 0.000561065913643688, - "learning_rate": 0.00019999995174400082, - "loss": 46.0, - "step": 4100 - }, - { - "epoch": 0.3135500888812432, - "grad_norm": 0.0021201521158218384, - "learning_rate": 0.00019999995172040086, - "loss": 46.0, - "step": 4101 - }, - { - "epoch": 0.3136265458646329, - "grad_norm": 0.008403378538787365, - "learning_rate": 0.00019999995169679512, - "loss": 46.0, - "step": 4102 - }, - { - "epoch": 0.3137030028480226, - "grad_norm": 0.0005731437704525888, - "learning_rate": 0.00019999995167318364, - "loss": 46.0, - "step": 4103 - }, - { - "epoch": 0.31377945983141237, - "grad_norm": 0.002495912602171302, - "learning_rate": 0.0001999999516495664, - "loss": 46.0, - "step": 4104 - }, - { - "epoch": 0.31385591681480207, - "grad_norm": 0.0014699643943458796, - "learning_rate": 0.00019999995162594335, - "loss": 46.0, - "step": 4105 - }, - { - "epoch": 0.3139323737981918, - "grad_norm": 0.0006395775708369911, - "learning_rate": 0.00019999995160231455, - "loss": 46.0, - "step": 4106 - }, - { - "epoch": 0.31400883078158154, - "grad_norm": 0.0007599610253237188, - "learning_rate": 0.00019999995157868, - "loss": 46.0, - "step": 4107 - }, - { - "epoch": 0.31408528776497124, - "grad_norm": 0.0029232690576463938, - "learning_rate": 0.00019999995155503965, - "loss": 46.0, - "step": 4108 - }, - { - "epoch": 0.31416174474836095, - "grad_norm": 0.0023315108846873045, - "learning_rate": 0.00019999995153139355, - "loss": 46.0, - "step": 4109 - }, - { - "epoch": 0.31423820173175065, - "grad_norm": 0.0013846426736563444, - "learning_rate": 0.00019999995150774168, - "loss": 46.0, - "step": 4110 - }, - { - "epoch": 0.3143146587151404, - "grad_norm": 0.0009937274735420942, - "learning_rate": 0.00019999995148408406, - "loss": 46.0, - "step": 4111 - }, - { - "epoch": 0.3143911156985301, - "grad_norm": 0.0048923855647444725, - "learning_rate": 0.00019999995146042064, - "loss": 46.0, - "step": 4112 - }, - { - "epoch": 0.3144675726819198, - "grad_norm": 0.0037990878336131573, - "learning_rate": 0.00019999995143675145, - "loss": 46.0, - "step": 4113 - }, - { - "epoch": 0.31454402966530953, - "grad_norm": 0.0026742154732346535, - "learning_rate": 0.0001999999514130765, - "loss": 46.0, - "step": 4114 - }, - { - "epoch": 0.3146204866486993, - "grad_norm": 0.0010428677778691053, - "learning_rate": 0.00019999995138939578, - "loss": 46.0, - "step": 4115 - }, - { - "epoch": 0.314696943632089, - "grad_norm": 0.0006835341919213533, - "learning_rate": 0.00019999995136570927, - "loss": 46.0, - "step": 4116 - }, - { - "epoch": 0.3147734006154787, - "grad_norm": 0.001079073059372604, - "learning_rate": 0.00019999995134201704, - "loss": 46.0, - "step": 4117 - }, - { - "epoch": 0.3148498575988684, - "grad_norm": 0.0013314823154360056, - "learning_rate": 0.000199999951318319, - "loss": 46.0, - "step": 4118 - }, - { - "epoch": 0.31492631458225817, - "grad_norm": 0.0027630191762000322, - "learning_rate": 0.0001999999512946152, - "loss": 46.0, - "step": 4119 - }, - { - "epoch": 0.3150027715656479, - "grad_norm": 0.0007205162546597421, - "learning_rate": 0.00019999995127090563, - "loss": 46.0, - "step": 4120 - }, - { - "epoch": 0.3150792285490376, - "grad_norm": 0.0016001613112166524, - "learning_rate": 0.0001999999512471903, - "loss": 46.0, - "step": 4121 - }, - { - "epoch": 0.31515568553242734, - "grad_norm": 0.0015134575078263879, - "learning_rate": 0.0001999999512234692, - "loss": 46.0, - "step": 4122 - }, - { - "epoch": 0.31523214251581705, - "grad_norm": 0.0014422638341784477, - "learning_rate": 0.0001999999511997423, - "loss": 46.0, - "step": 4123 - }, - { - "epoch": 0.31530859949920675, - "grad_norm": 0.0025649317540228367, - "learning_rate": 0.00019999995117600967, - "loss": 46.0, - "step": 4124 - }, - { - "epoch": 0.31538505648259646, - "grad_norm": 0.001873421249911189, - "learning_rate": 0.00019999995115227125, - "loss": 46.0, - "step": 4125 - }, - { - "epoch": 0.3154615134659862, - "grad_norm": 0.0014939858810976148, - "learning_rate": 0.00019999995112852706, - "loss": 46.0, - "step": 4126 - }, - { - "epoch": 0.3155379704493759, - "grad_norm": 0.0008123088045977056, - "learning_rate": 0.00019999995110477712, - "loss": 46.0, - "step": 4127 - }, - { - "epoch": 0.31561442743276563, - "grad_norm": 0.0013626243453472853, - "learning_rate": 0.0001999999510810214, - "loss": 46.0, - "step": 4128 - }, - { - "epoch": 0.31569088441615534, - "grad_norm": 0.0014732293784618378, - "learning_rate": 0.00019999995105725988, - "loss": 46.0, - "step": 4129 - }, - { - "epoch": 0.3157673413995451, - "grad_norm": 0.0017963205464184284, - "learning_rate": 0.00019999995103349262, - "loss": 46.0, - "step": 4130 - }, - { - "epoch": 0.3158437983829348, - "grad_norm": 0.0015620823251083493, - "learning_rate": 0.0001999999510097196, - "loss": 46.0, - "step": 4131 - }, - { - "epoch": 0.3159202553663245, - "grad_norm": 0.0007996860658749938, - "learning_rate": 0.0001999999509859408, - "loss": 46.0, - "step": 4132 - }, - { - "epoch": 0.3159967123497142, - "grad_norm": 0.01104381587356329, - "learning_rate": 0.00019999995096215624, - "loss": 46.0, - "step": 4133 - }, - { - "epoch": 0.316073169333104, - "grad_norm": 0.0022612884640693665, - "learning_rate": 0.0001999999509383659, - "loss": 46.0, - "step": 4134 - }, - { - "epoch": 0.3161496263164937, - "grad_norm": 0.0031405312474817038, - "learning_rate": 0.0001999999509145698, - "loss": 46.0, - "step": 4135 - }, - { - "epoch": 0.3162260832998834, - "grad_norm": 0.000809003715403378, - "learning_rate": 0.0001999999508907679, - "loss": 46.0, - "step": 4136 - }, - { - "epoch": 0.31630254028327315, - "grad_norm": 0.0009952536784112453, - "learning_rate": 0.00019999995086696025, - "loss": 46.0, - "step": 4137 - }, - { - "epoch": 0.31637899726666285, - "grad_norm": 0.007186158560216427, - "learning_rate": 0.00019999995084314683, - "loss": 46.0, - "step": 4138 - }, - { - "epoch": 0.31645545425005256, - "grad_norm": 0.0030693570151925087, - "learning_rate": 0.00019999995081932764, - "loss": 46.0, - "step": 4139 - }, - { - "epoch": 0.31653191123344226, - "grad_norm": 0.0008522903081029654, - "learning_rate": 0.00019999995079550268, - "loss": 46.0, - "step": 4140 - }, - { - "epoch": 0.316608368216832, - "grad_norm": 0.0013415059074759483, - "learning_rate": 0.00019999995077167197, - "loss": 46.0, - "step": 4141 - }, - { - "epoch": 0.31668482520022173, - "grad_norm": 0.0007622973062098026, - "learning_rate": 0.00019999995074783546, - "loss": 46.0, - "step": 4142 - }, - { - "epoch": 0.31676128218361144, - "grad_norm": 0.005981494206935167, - "learning_rate": 0.0001999999507239932, - "loss": 46.0, - "step": 4143 - }, - { - "epoch": 0.31683773916700114, - "grad_norm": 0.013176447711884975, - "learning_rate": 0.00019999995070014518, - "loss": 46.0, - "step": 4144 - }, - { - "epoch": 0.3169141961503909, - "grad_norm": 0.0005784628447145224, - "learning_rate": 0.00019999995067629137, - "loss": 46.0, - "step": 4145 - }, - { - "epoch": 0.3169906531337806, - "grad_norm": 0.0030265958048403263, - "learning_rate": 0.0001999999506524318, - "loss": 46.0, - "step": 4146 - }, - { - "epoch": 0.3170671101171703, - "grad_norm": 0.0008855731575749815, - "learning_rate": 0.00019999995062856645, - "loss": 46.0, - "step": 4147 - }, - { - "epoch": 0.31714356710056, - "grad_norm": 0.0009584020590409636, - "learning_rate": 0.00019999995060469532, - "loss": 46.0, - "step": 4148 - }, - { - "epoch": 0.3172200240839498, - "grad_norm": 0.000916891498491168, - "learning_rate": 0.00019999995058081845, - "loss": 46.0, - "step": 4149 - }, - { - "epoch": 0.3172964810673395, - "grad_norm": 0.0007251052302308381, - "learning_rate": 0.00019999995055693578, - "loss": 46.0, - "step": 4150 - }, - { - "epoch": 0.3173729380507292, - "grad_norm": 0.0009630761342123151, - "learning_rate": 0.0001999999505330474, - "loss": 46.0, - "step": 4151 - }, - { - "epoch": 0.31744939503411895, - "grad_norm": 0.0010503242956474423, - "learning_rate": 0.00019999995050915318, - "loss": 46.0, - "step": 4152 - }, - { - "epoch": 0.31752585201750866, - "grad_norm": 0.001143371919170022, - "learning_rate": 0.00019999995048525321, - "loss": 46.0, - "step": 4153 - }, - { - "epoch": 0.31760230900089836, - "grad_norm": 0.0011370426509529352, - "learning_rate": 0.0001999999504613475, - "loss": 46.0, - "step": 4154 - }, - { - "epoch": 0.31767876598428807, - "grad_norm": 0.00044460356002673507, - "learning_rate": 0.000199999950437436, - "loss": 46.0, - "step": 4155 - }, - { - "epoch": 0.31775522296767783, - "grad_norm": 0.0006213995511643589, - "learning_rate": 0.00019999995041351874, - "loss": 46.0, - "step": 4156 - }, - { - "epoch": 0.31783167995106754, - "grad_norm": 0.0013278733240440488, - "learning_rate": 0.00019999995038959568, - "loss": 46.0, - "step": 4157 - }, - { - "epoch": 0.31790813693445724, - "grad_norm": 0.0019477899186313152, - "learning_rate": 0.00019999995036566688, - "loss": 46.0, - "step": 4158 - }, - { - "epoch": 0.31798459391784695, - "grad_norm": 0.0030430464539676905, - "learning_rate": 0.0001999999503417323, - "loss": 46.0, - "step": 4159 - }, - { - "epoch": 0.3180610509012367, - "grad_norm": 0.0008460903191007674, - "learning_rate": 0.00019999995031779198, - "loss": 46.0, - "step": 4160 - }, - { - "epoch": 0.3181375078846264, - "grad_norm": 0.0034120797645300627, - "learning_rate": 0.00019999995029384583, - "loss": 46.0, - "step": 4161 - }, - { - "epoch": 0.3182139648680161, - "grad_norm": 0.0005994453676976264, - "learning_rate": 0.00019999995026989396, - "loss": 46.0, - "step": 4162 - }, - { - "epoch": 0.3182904218514059, - "grad_norm": 0.00183070357888937, - "learning_rate": 0.0001999999502459363, - "loss": 46.0, - "step": 4163 - }, - { - "epoch": 0.3183668788347956, - "grad_norm": 0.0013423011405393481, - "learning_rate": 0.00019999995022197288, - "loss": 46.0, - "step": 4164 - }, - { - "epoch": 0.3184433358181853, - "grad_norm": 0.001122456043958664, - "learning_rate": 0.0001999999501980037, - "loss": 46.0, - "step": 4165 - }, - { - "epoch": 0.318519792801575, - "grad_norm": 0.003014274640008807, - "learning_rate": 0.00019999995017402873, - "loss": 46.0, - "step": 4166 - }, - { - "epoch": 0.31859624978496476, - "grad_norm": 0.0008784834644757211, - "learning_rate": 0.000199999950150048, - "loss": 46.0, - "step": 4167 - }, - { - "epoch": 0.31867270676835446, - "grad_norm": 0.008919299580156803, - "learning_rate": 0.00019999995012606148, - "loss": 46.0, - "step": 4168 - }, - { - "epoch": 0.31874916375174417, - "grad_norm": 0.0008249454549513757, - "learning_rate": 0.00019999995010206922, - "loss": 46.0, - "step": 4169 - }, - { - "epoch": 0.3188256207351339, - "grad_norm": 0.000595959194470197, - "learning_rate": 0.00019999995007807117, - "loss": 46.0, - "step": 4170 - }, - { - "epoch": 0.31890207771852364, - "grad_norm": 0.0012690945295616984, - "learning_rate": 0.00019999995005406737, - "loss": 46.0, - "step": 4171 - }, - { - "epoch": 0.31897853470191334, - "grad_norm": 0.005566779058426619, - "learning_rate": 0.0001999999500300578, - "loss": 46.0, - "step": 4172 - }, - { - "epoch": 0.31905499168530305, - "grad_norm": 0.0008208694634959102, - "learning_rate": 0.00019999995000604244, - "loss": 46.0, - "step": 4173 - }, - { - "epoch": 0.31913144866869275, - "grad_norm": 0.003568213665857911, - "learning_rate": 0.00019999994998202132, - "loss": 46.0, - "step": 4174 - }, - { - "epoch": 0.3192079056520825, - "grad_norm": 0.0006343183922581375, - "learning_rate": 0.00019999994995799445, - "loss": 46.0, - "step": 4175 - }, - { - "epoch": 0.3192843626354722, - "grad_norm": 0.0040811472572386265, - "learning_rate": 0.00019999994993396175, - "loss": 46.0, - "step": 4176 - }, - { - "epoch": 0.3193608196188619, - "grad_norm": 0.002137728501111269, - "learning_rate": 0.00019999994990992334, - "loss": 46.0, - "step": 4177 - }, - { - "epoch": 0.3194372766022517, - "grad_norm": 0.0026833750307559967, - "learning_rate": 0.00019999994988587915, - "loss": 46.0, - "step": 4178 - }, - { - "epoch": 0.3195137335856414, - "grad_norm": 0.0005680866306647658, - "learning_rate": 0.00019999994986182919, - "loss": 46.0, - "step": 4179 - }, - { - "epoch": 0.3195901905690311, - "grad_norm": 0.0033166774082928896, - "learning_rate": 0.00019999994983777345, - "loss": 46.0, - "step": 4180 - }, - { - "epoch": 0.3196666475524208, - "grad_norm": 0.0029382030479609966, - "learning_rate": 0.00019999994981371194, - "loss": 46.0, - "step": 4181 - }, - { - "epoch": 0.31974310453581056, - "grad_norm": 0.0033811938483268023, - "learning_rate": 0.00019999994978964466, - "loss": 46.0, - "step": 4182 - }, - { - "epoch": 0.31981956151920027, - "grad_norm": 0.0008562508155591786, - "learning_rate": 0.00019999994976557163, - "loss": 46.0, - "step": 4183 - }, - { - "epoch": 0.31989601850259, - "grad_norm": 0.0015214183367788792, - "learning_rate": 0.00019999994974149283, - "loss": 46.0, - "step": 4184 - }, - { - "epoch": 0.3199724754859797, - "grad_norm": 0.0022460981272161007, - "learning_rate": 0.00019999994971740823, - "loss": 46.0, - "step": 4185 - }, - { - "epoch": 0.32004893246936944, - "grad_norm": 0.0012656330363824964, - "learning_rate": 0.0001999999496933179, - "loss": 46.0, - "step": 4186 - }, - { - "epoch": 0.32012538945275915, - "grad_norm": 0.0010185488499701023, - "learning_rate": 0.00019999994966922175, - "loss": 46.0, - "step": 4187 - }, - { - "epoch": 0.32020184643614885, - "grad_norm": 0.0041980748064816, - "learning_rate": 0.00019999994964511989, - "loss": 46.0, - "step": 4188 - }, - { - "epoch": 0.32027830341953856, - "grad_norm": 0.001467040623538196, - "learning_rate": 0.00019999994962101222, - "loss": 46.0, - "step": 4189 - }, - { - "epoch": 0.3203547604029283, - "grad_norm": 0.0009076852584257722, - "learning_rate": 0.00019999994959689877, - "loss": 46.0, - "step": 4190 - }, - { - "epoch": 0.320431217386318, - "grad_norm": 0.0034510979894548655, - "learning_rate": 0.00019999994957277959, - "loss": 46.0, - "step": 4191 - }, - { - "epoch": 0.32050767436970773, - "grad_norm": 0.0005649134982377291, - "learning_rate": 0.00019999994954865462, - "loss": 46.0, - "step": 4192 - }, - { - "epoch": 0.3205841313530975, - "grad_norm": 0.0012544525088742375, - "learning_rate": 0.0001999999495245239, - "loss": 46.0, - "step": 4193 - }, - { - "epoch": 0.3206605883364872, - "grad_norm": 0.0010324690956622362, - "learning_rate": 0.00019999994950038738, - "loss": 46.0, - "step": 4194 - }, - { - "epoch": 0.3207370453198769, - "grad_norm": 0.0019627446308732033, - "learning_rate": 0.0001999999494762451, - "loss": 46.0, - "step": 4195 - }, - { - "epoch": 0.3208135023032666, - "grad_norm": 0.0030409370083361864, - "learning_rate": 0.00019999994945209704, - "loss": 46.0, - "step": 4196 - }, - { - "epoch": 0.32088995928665637, - "grad_norm": 0.0008453971240669489, - "learning_rate": 0.00019999994942794324, - "loss": 46.0, - "step": 4197 - }, - { - "epoch": 0.3209664162700461, - "grad_norm": 0.0005371761508285999, - "learning_rate": 0.00019999994940378364, - "loss": 46.0, - "step": 4198 - }, - { - "epoch": 0.3210428732534358, - "grad_norm": 0.013904278166592121, - "learning_rate": 0.0001999999493796183, - "loss": 46.0, - "step": 4199 - }, - { - "epoch": 0.3211193302368255, - "grad_norm": 0.002591705182567239, - "learning_rate": 0.00019999994935544717, - "loss": 46.0, - "step": 4200 - }, - { - "epoch": 0.32119578722021525, - "grad_norm": 0.0017301174812018871, - "learning_rate": 0.00019999994933127027, - "loss": 46.0, - "step": 4201 - }, - { - "epoch": 0.32127224420360495, - "grad_norm": 0.0035617249086499214, - "learning_rate": 0.0001999999493070876, - "loss": 46.0, - "step": 4202 - }, - { - "epoch": 0.32134870118699466, - "grad_norm": 0.01069709099829197, - "learning_rate": 0.0001999999492828992, - "loss": 46.0, - "step": 4203 - }, - { - "epoch": 0.32142515817038436, - "grad_norm": 0.0016002259217202663, - "learning_rate": 0.00019999994925870497, - "loss": 46.0, - "step": 4204 - }, - { - "epoch": 0.3215016151537741, - "grad_norm": 0.004124329425394535, - "learning_rate": 0.000199999949234505, - "loss": 46.0, - "step": 4205 - }, - { - "epoch": 0.32157807213716383, - "grad_norm": 0.0010847809026017785, - "learning_rate": 0.00019999994921029925, - "loss": 46.0, - "step": 4206 - }, - { - "epoch": 0.32165452912055353, - "grad_norm": 0.0018952528480440378, - "learning_rate": 0.00019999994918608774, - "loss": 46.0, - "step": 4207 - }, - { - "epoch": 0.3217309861039433, - "grad_norm": 0.0008499515242874622, - "learning_rate": 0.00019999994916187049, - "loss": 46.0, - "step": 4208 - }, - { - "epoch": 0.321807443087333, - "grad_norm": 0.001965031260624528, - "learning_rate": 0.0001999999491376474, - "loss": 46.0, - "step": 4209 - }, - { - "epoch": 0.3218839000707227, - "grad_norm": 0.0012979368912056088, - "learning_rate": 0.00019999994911341863, - "loss": 46.0, - "step": 4210 - }, - { - "epoch": 0.3219603570541124, - "grad_norm": 0.0005783950909972191, - "learning_rate": 0.000199999949089184, - "loss": 46.0, - "step": 4211 - }, - { - "epoch": 0.3220368140375022, - "grad_norm": 0.0014094688231125474, - "learning_rate": 0.00019999994906494365, - "loss": 46.0, - "step": 4212 - }, - { - "epoch": 0.3221132710208919, - "grad_norm": 0.0004982354585081339, - "learning_rate": 0.00019999994904069753, - "loss": 46.0, - "step": 4213 - }, - { - "epoch": 0.3221897280042816, - "grad_norm": 0.0024239635095000267, - "learning_rate": 0.00019999994901644564, - "loss": 46.0, - "step": 4214 - }, - { - "epoch": 0.3222661849876713, - "grad_norm": 0.0007969841826707125, - "learning_rate": 0.00019999994899218797, - "loss": 46.0, - "step": 4215 - }, - { - "epoch": 0.32234264197106105, - "grad_norm": 0.001183964079245925, - "learning_rate": 0.00019999994896792453, - "loss": 46.0, - "step": 4216 - }, - { - "epoch": 0.32241909895445076, - "grad_norm": 0.008951770141720772, - "learning_rate": 0.00019999994894365534, - "loss": 46.0, - "step": 4217 - }, - { - "epoch": 0.32249555593784046, - "grad_norm": 0.000656113785225898, - "learning_rate": 0.00019999994891938038, - "loss": 46.0, - "step": 4218 - }, - { - "epoch": 0.32257201292123017, - "grad_norm": 0.001815742114558816, - "learning_rate": 0.00019999994889509961, - "loss": 46.0, - "step": 4219 - }, - { - "epoch": 0.32264846990461993, - "grad_norm": 0.0017433996545150876, - "learning_rate": 0.0001999999488708131, - "loss": 46.0, - "step": 4220 - }, - { - "epoch": 0.32272492688800963, - "grad_norm": 0.0014687128132209182, - "learning_rate": 0.00019999994884652082, - "loss": 46.0, - "step": 4221 - }, - { - "epoch": 0.32280138387139934, - "grad_norm": 0.003201437648385763, - "learning_rate": 0.00019999994882222274, - "loss": 46.0, - "step": 4222 - }, - { - "epoch": 0.3228778408547891, - "grad_norm": 0.000897066667675972, - "learning_rate": 0.00019999994879791894, - "loss": 46.0, - "step": 4223 - }, - { - "epoch": 0.3229542978381788, - "grad_norm": 0.001856208429671824, - "learning_rate": 0.00019999994877360934, - "loss": 46.0, - "step": 4224 - }, - { - "epoch": 0.3230307548215685, - "grad_norm": 0.0012774377828463912, - "learning_rate": 0.00019999994874929397, - "loss": 46.0, - "step": 4225 - }, - { - "epoch": 0.3231072118049582, - "grad_norm": 0.0008346923277713358, - "learning_rate": 0.00019999994872497284, - "loss": 46.0, - "step": 4226 - }, - { - "epoch": 0.323183668788348, - "grad_norm": 0.0016784276813268661, - "learning_rate": 0.00019999994870064595, - "loss": 46.0, - "step": 4227 - }, - { - "epoch": 0.3232601257717377, - "grad_norm": 0.0009191537392325699, - "learning_rate": 0.00019999994867631328, - "loss": 46.0, - "step": 4228 - }, - { - "epoch": 0.3233365827551274, - "grad_norm": 0.0023871574085205793, - "learning_rate": 0.00019999994865197484, - "loss": 46.0, - "step": 4229 - }, - { - "epoch": 0.3234130397385171, - "grad_norm": 0.001587114529684186, - "learning_rate": 0.00019999994862763065, - "loss": 46.0, - "step": 4230 - }, - { - "epoch": 0.32348949672190686, - "grad_norm": 0.00043832772644236684, - "learning_rate": 0.0001999999486032807, - "loss": 46.0, - "step": 4231 - }, - { - "epoch": 0.32356595370529656, - "grad_norm": 0.0011136778630316257, - "learning_rate": 0.00019999994857892493, - "loss": 46.0, - "step": 4232 - }, - { - "epoch": 0.32364241068868627, - "grad_norm": 0.0014798268675804138, - "learning_rate": 0.00019999994855456342, - "loss": 46.0, - "step": 4233 - }, - { - "epoch": 0.32371886767207597, - "grad_norm": 0.0012873532250523567, - "learning_rate": 0.0001999999485301961, - "loss": 46.0, - "step": 4234 - }, - { - "epoch": 0.32379532465546573, - "grad_norm": 0.003759076353162527, - "learning_rate": 0.00019999994850582306, - "loss": 46.0, - "step": 4235 - }, - { - "epoch": 0.32387178163885544, - "grad_norm": 0.0006014717509970069, - "learning_rate": 0.00019999994848144423, - "loss": 46.0, - "step": 4236 - }, - { - "epoch": 0.32394823862224514, - "grad_norm": 0.0006482438766397536, - "learning_rate": 0.00019999994845705966, - "loss": 46.0, - "step": 4237 - }, - { - "epoch": 0.3240246956056349, - "grad_norm": 0.0010961649240925908, - "learning_rate": 0.00019999994843266928, - "loss": 46.0, - "step": 4238 - }, - { - "epoch": 0.3241011525890246, - "grad_norm": 0.001960871508345008, - "learning_rate": 0.00019999994840827316, - "loss": 46.0, - "step": 4239 - }, - { - "epoch": 0.3241776095724143, - "grad_norm": 0.0005564126768149436, - "learning_rate": 0.00019999994838387127, - "loss": 46.0, - "step": 4240 - }, - { - "epoch": 0.324254066555804, - "grad_norm": 0.00242434861138463, - "learning_rate": 0.00019999994835946357, - "loss": 46.0, - "step": 4241 - }, - { - "epoch": 0.3243305235391938, - "grad_norm": 0.0006800544797442853, - "learning_rate": 0.00019999994833505013, - "loss": 46.0, - "step": 4242 - }, - { - "epoch": 0.3244069805225835, - "grad_norm": 0.00216276990249753, - "learning_rate": 0.00019999994831063092, - "loss": 46.0, - "step": 4243 - }, - { - "epoch": 0.3244834375059732, - "grad_norm": 0.0018655310850590467, - "learning_rate": 0.00019999994828620596, - "loss": 46.0, - "step": 4244 - }, - { - "epoch": 0.3245598944893629, - "grad_norm": 0.0009296726784668863, - "learning_rate": 0.0001999999482617752, - "loss": 46.0, - "step": 4245 - }, - { - "epoch": 0.32463635147275266, - "grad_norm": 0.00227538519538939, - "learning_rate": 0.0001999999482373387, - "loss": 46.0, - "step": 4246 - }, - { - "epoch": 0.32471280845614237, - "grad_norm": 0.001492717768996954, - "learning_rate": 0.00019999994821289638, - "loss": 46.0, - "step": 4247 - }, - { - "epoch": 0.32478926543953207, - "grad_norm": 0.0005910330219194293, - "learning_rate": 0.00019999994818844835, - "loss": 46.0, - "step": 4248 - }, - { - "epoch": 0.3248657224229218, - "grad_norm": 0.0014322446659207344, - "learning_rate": 0.00019999994816399452, - "loss": 46.0, - "step": 4249 - }, - { - "epoch": 0.32494217940631154, - "grad_norm": 0.0016522066434845328, - "learning_rate": 0.00019999994813953492, - "loss": 46.0, - "step": 4250 - }, - { - "epoch": 0.32501863638970124, - "grad_norm": 0.0006958871963433921, - "learning_rate": 0.00019999994811506955, - "loss": 46.0, - "step": 4251 - }, - { - "epoch": 0.32509509337309095, - "grad_norm": 0.0007227236637845635, - "learning_rate": 0.0001999999480905984, - "loss": 46.0, - "step": 4252 - }, - { - "epoch": 0.3251715503564807, - "grad_norm": 0.001814017305150628, - "learning_rate": 0.0001999999480661215, - "loss": 46.0, - "step": 4253 - }, - { - "epoch": 0.3252480073398704, - "grad_norm": 0.0008652217220515013, - "learning_rate": 0.00019999994804163884, - "loss": 46.0, - "step": 4254 - }, - { - "epoch": 0.3253244643232601, - "grad_norm": 0.000539875531103462, - "learning_rate": 0.00019999994801715037, - "loss": 46.0, - "step": 4255 - }, - { - "epoch": 0.3254009213066498, - "grad_norm": 0.0008699905010871589, - "learning_rate": 0.00019999994799265616, - "loss": 46.0, - "step": 4256 - }, - { - "epoch": 0.3254773782900396, - "grad_norm": 0.00214879447594285, - "learning_rate": 0.0001999999479681562, - "loss": 46.0, - "step": 4257 - }, - { - "epoch": 0.3255538352734293, - "grad_norm": 0.002308016875758767, - "learning_rate": 0.00019999994794365044, - "loss": 46.0, - "step": 4258 - }, - { - "epoch": 0.325630292256819, - "grad_norm": 0.0005390993901528418, - "learning_rate": 0.00019999994791913893, - "loss": 46.0, - "step": 4259 - }, - { - "epoch": 0.3257067492402087, - "grad_norm": 0.0006740989629179239, - "learning_rate": 0.00019999994789462162, - "loss": 46.0, - "step": 4260 - }, - { - "epoch": 0.32578320622359846, - "grad_norm": 0.0008012195467017591, - "learning_rate": 0.00019999994787009854, - "loss": 46.0, - "step": 4261 - }, - { - "epoch": 0.32585966320698817, - "grad_norm": 0.0010206456063315272, - "learning_rate": 0.0001999999478455697, - "loss": 46.0, - "step": 4262 - }, - { - "epoch": 0.3259361201903779, - "grad_norm": 0.0007958015194162726, - "learning_rate": 0.00019999994782103514, - "loss": 46.0, - "step": 4263 - }, - { - "epoch": 0.3260125771737676, - "grad_norm": 0.003606856567785144, - "learning_rate": 0.00019999994779649476, - "loss": 46.0, - "step": 4264 - }, - { - "epoch": 0.32608903415715734, - "grad_norm": 0.0033516171388328075, - "learning_rate": 0.00019999994777194862, - "loss": 46.0, - "step": 4265 - }, - { - "epoch": 0.32616549114054705, - "grad_norm": 0.0005686180666089058, - "learning_rate": 0.00019999994774739672, - "loss": 46.0, - "step": 4266 - }, - { - "epoch": 0.32624194812393675, - "grad_norm": 0.0008491341141052544, - "learning_rate": 0.00019999994772283906, - "loss": 46.0, - "step": 4267 - }, - { - "epoch": 0.3263184051073265, - "grad_norm": 0.0008566360920667648, - "learning_rate": 0.0001999999476982756, - "loss": 46.0, - "step": 4268 - }, - { - "epoch": 0.3263948620907162, - "grad_norm": 0.0021916660480201244, - "learning_rate": 0.00019999994767370638, - "loss": 46.0, - "step": 4269 - }, - { - "epoch": 0.3264713190741059, - "grad_norm": 0.0007130063604563475, - "learning_rate": 0.0001999999476491314, - "loss": 46.0, - "step": 4270 - }, - { - "epoch": 0.32654777605749563, - "grad_norm": 0.0007190363830886781, - "learning_rate": 0.00019999994762455063, - "loss": 46.0, - "step": 4271 - }, - { - "epoch": 0.3266242330408854, - "grad_norm": 0.0003927747602574527, - "learning_rate": 0.0001999999475999641, - "loss": 46.0, - "step": 4272 - }, - { - "epoch": 0.3267006900242751, - "grad_norm": 0.0013259283732622862, - "learning_rate": 0.0001999999475753718, - "loss": 46.0, - "step": 4273 - }, - { - "epoch": 0.3267771470076648, - "grad_norm": 0.00265662744641304, - "learning_rate": 0.00019999994755077376, - "loss": 46.0, - "step": 4274 - }, - { - "epoch": 0.3268536039910545, - "grad_norm": 0.0015432748477905989, - "learning_rate": 0.00019999994752616993, - "loss": 46.0, - "step": 4275 - }, - { - "epoch": 0.32693006097444427, - "grad_norm": 0.001415196806192398, - "learning_rate": 0.00019999994750156033, - "loss": 46.0, - "step": 4276 - }, - { - "epoch": 0.327006517957834, - "grad_norm": 0.0011422167299315333, - "learning_rate": 0.00019999994747694496, - "loss": 46.0, - "step": 4277 - }, - { - "epoch": 0.3270829749412237, - "grad_norm": 0.0008736549643799663, - "learning_rate": 0.0001999999474523238, - "loss": 46.0, - "step": 4278 - }, - { - "epoch": 0.32715943192461344, - "grad_norm": 0.0008260179311037064, - "learning_rate": 0.0001999999474276969, - "loss": 46.0, - "step": 4279 - }, - { - "epoch": 0.32723588890800315, - "grad_norm": 0.0013766777701675892, - "learning_rate": 0.00019999994740306422, - "loss": 46.0, - "step": 4280 - }, - { - "epoch": 0.32731234589139285, - "grad_norm": 0.0009538207086734474, - "learning_rate": 0.00019999994737842576, - "loss": 46.0, - "step": 4281 - }, - { - "epoch": 0.32738880287478256, - "grad_norm": 0.001052172388881445, - "learning_rate": 0.00019999994735378157, - "loss": 46.0, - "step": 4282 - }, - { - "epoch": 0.3274652598581723, - "grad_norm": 0.0014353011501953006, - "learning_rate": 0.00019999994732913156, - "loss": 46.0, - "step": 4283 - }, - { - "epoch": 0.327541716841562, - "grad_norm": 0.001284641562961042, - "learning_rate": 0.0001999999473044758, - "loss": 46.0, - "step": 4284 - }, - { - "epoch": 0.32761817382495173, - "grad_norm": 0.0018336954526603222, - "learning_rate": 0.0001999999472798143, - "loss": 46.0, - "step": 4285 - }, - { - "epoch": 0.32769463080834144, - "grad_norm": 0.0022756552789360285, - "learning_rate": 0.00019999994725514698, - "loss": 46.0, - "step": 4286 - }, - { - "epoch": 0.3277710877917312, - "grad_norm": 0.002937385579571128, - "learning_rate": 0.0001999999472304739, - "loss": 46.0, - "step": 4287 - }, - { - "epoch": 0.3278475447751209, - "grad_norm": 0.003305448219180107, - "learning_rate": 0.00019999994720579508, - "loss": 46.0, - "step": 4288 - }, - { - "epoch": 0.3279240017585106, - "grad_norm": 0.004905517678707838, - "learning_rate": 0.00019999994718111048, - "loss": 46.0, - "step": 4289 - }, - { - "epoch": 0.3280004587419003, - "grad_norm": 0.0015234446618705988, - "learning_rate": 0.0001999999471564201, - "loss": 46.0, - "step": 4290 - }, - { - "epoch": 0.3280769157252901, - "grad_norm": 0.000986318220384419, - "learning_rate": 0.00019999994713172395, - "loss": 46.0, - "step": 4291 - }, - { - "epoch": 0.3281533727086798, - "grad_norm": 0.00145861750934273, - "learning_rate": 0.00019999994710702203, - "loss": 46.0, - "step": 4292 - }, - { - "epoch": 0.3282298296920695, - "grad_norm": 0.004262291826307774, - "learning_rate": 0.00019999994708231434, - "loss": 46.0, - "step": 4293 - }, - { - "epoch": 0.32830628667545925, - "grad_norm": 0.001263973070308566, - "learning_rate": 0.0001999999470576009, - "loss": 46.0, - "step": 4294 - }, - { - "epoch": 0.32838274365884895, - "grad_norm": 0.008075137622654438, - "learning_rate": 0.00019999994703288166, - "loss": 46.0, - "step": 4295 - }, - { - "epoch": 0.32845920064223866, - "grad_norm": 0.002123615937307477, - "learning_rate": 0.0001999999470081567, - "loss": 46.0, - "step": 4296 - }, - { - "epoch": 0.32853565762562836, - "grad_norm": 0.000710161926690489, - "learning_rate": 0.00019999994698342591, - "loss": 46.0, - "step": 4297 - }, - { - "epoch": 0.3286121146090181, - "grad_norm": 0.0008752677822485566, - "learning_rate": 0.00019999994695868938, - "loss": 46.0, - "step": 4298 - }, - { - "epoch": 0.32868857159240783, - "grad_norm": 0.0013771122321486473, - "learning_rate": 0.00019999994693394707, - "loss": 46.0, - "step": 4299 - }, - { - "epoch": 0.32876502857579754, - "grad_norm": 0.0017129092011600733, - "learning_rate": 0.00019999994690919902, - "loss": 46.0, - "step": 4300 - }, - { - "epoch": 0.32884148555918724, - "grad_norm": 0.0011252955300733447, - "learning_rate": 0.0001999999468844452, - "loss": 46.0, - "step": 4301 - }, - { - "epoch": 0.328917942542577, - "grad_norm": 0.001725336886011064, - "learning_rate": 0.0001999999468596856, - "loss": 46.0, - "step": 4302 - }, - { - "epoch": 0.3289943995259667, - "grad_norm": 0.0009346289443783462, - "learning_rate": 0.0001999999468349202, - "loss": 46.0, - "step": 4303 - }, - { - "epoch": 0.3290708565093564, - "grad_norm": 0.0009860260179266334, - "learning_rate": 0.00019999994681014905, - "loss": 46.0, - "step": 4304 - }, - { - "epoch": 0.3291473134927461, - "grad_norm": 0.0021675415337085724, - "learning_rate": 0.00019999994678537216, - "loss": 46.0, - "step": 4305 - }, - { - "epoch": 0.3292237704761359, - "grad_norm": 0.001486804336309433, - "learning_rate": 0.00019999994676058944, - "loss": 46.0, - "step": 4306 - }, - { - "epoch": 0.3293002274595256, - "grad_norm": 0.0037671176251024008, - "learning_rate": 0.000199999946735801, - "loss": 46.0, - "step": 4307 - }, - { - "epoch": 0.3293766844429153, - "grad_norm": 0.000998138333670795, - "learning_rate": 0.00019999994671100678, - "loss": 46.0, - "step": 4308 - }, - { - "epoch": 0.32945314142630505, - "grad_norm": 0.0028031668625772, - "learning_rate": 0.00019999994668620677, - "loss": 46.0, - "step": 4309 - }, - { - "epoch": 0.32952959840969476, - "grad_norm": 0.0010716308606788516, - "learning_rate": 0.000199999946661401, - "loss": 46.0, - "step": 4310 - }, - { - "epoch": 0.32960605539308446, - "grad_norm": 0.0014670881209895015, - "learning_rate": 0.00019999994663658948, - "loss": 46.0, - "step": 4311 - }, - { - "epoch": 0.32968251237647417, - "grad_norm": 0.00852916482836008, - "learning_rate": 0.00019999994661177217, - "loss": 46.0, - "step": 4312 - }, - { - "epoch": 0.32975896935986393, - "grad_norm": 0.0010498393094167113, - "learning_rate": 0.0001999999465869491, - "loss": 46.0, - "step": 4313 - }, - { - "epoch": 0.32983542634325363, - "grad_norm": 0.0005530392518267035, - "learning_rate": 0.00019999994656212026, - "loss": 46.0, - "step": 4314 - }, - { - "epoch": 0.32991188332664334, - "grad_norm": 0.0014768405817449093, - "learning_rate": 0.00019999994653728567, - "loss": 46.0, - "step": 4315 - }, - { - "epoch": 0.32998834031003305, - "grad_norm": 0.0005336601752787828, - "learning_rate": 0.00019999994651244527, - "loss": 46.0, - "step": 4316 - }, - { - "epoch": 0.3300647972934228, - "grad_norm": 0.0010253757936879992, - "learning_rate": 0.00019999994648759912, - "loss": 46.0, - "step": 4317 - }, - { - "epoch": 0.3301412542768125, - "grad_norm": 0.0025468647945672274, - "learning_rate": 0.0001999999464627472, - "loss": 46.0, - "step": 4318 - }, - { - "epoch": 0.3302177112602022, - "grad_norm": 0.0008193323737941682, - "learning_rate": 0.00019999994643788953, - "loss": 46.0, - "step": 4319 - }, - { - "epoch": 0.3302941682435919, - "grad_norm": 0.0011719852918758988, - "learning_rate": 0.00019999994641302604, - "loss": 46.0, - "step": 4320 - }, - { - "epoch": 0.3303706252269817, - "grad_norm": 0.0031661740504205227, - "learning_rate": 0.00019999994638815683, - "loss": 46.0, - "step": 4321 - }, - { - "epoch": 0.3304470822103714, - "grad_norm": 0.0006985975196585059, - "learning_rate": 0.00019999994636328184, - "loss": 46.0, - "step": 4322 - }, - { - "epoch": 0.3305235391937611, - "grad_norm": 0.000613821845036, - "learning_rate": 0.00019999994633840106, - "loss": 46.0, - "step": 4323 - }, - { - "epoch": 0.33059999617715086, - "grad_norm": 0.007687657605856657, - "learning_rate": 0.00019999994631351452, - "loss": 46.0, - "step": 4324 - }, - { - "epoch": 0.33067645316054056, - "grad_norm": 0.001659299829043448, - "learning_rate": 0.00019999994628862222, - "loss": 46.0, - "step": 4325 - }, - { - "epoch": 0.33075291014393027, - "grad_norm": 0.001331859384663403, - "learning_rate": 0.00019999994626372414, - "loss": 46.0, - "step": 4326 - }, - { - "epoch": 0.33082936712732, - "grad_norm": 0.0009261846425943077, - "learning_rate": 0.0001999999462388203, - "loss": 46.0, - "step": 4327 - }, - { - "epoch": 0.33090582411070973, - "grad_norm": 0.001558349234983325, - "learning_rate": 0.00019999994621391068, - "loss": 46.0, - "step": 4328 - }, - { - "epoch": 0.33098228109409944, - "grad_norm": 0.0011646501952782273, - "learning_rate": 0.00019999994618899529, - "loss": 46.0, - "step": 4329 - }, - { - "epoch": 0.33105873807748915, - "grad_norm": 0.0010463042417541146, - "learning_rate": 0.00019999994616407417, - "loss": 46.0, - "step": 4330 - }, - { - "epoch": 0.33113519506087885, - "grad_norm": 0.0007130959420464933, - "learning_rate": 0.00019999994613914722, - "loss": 46.0, - "step": 4331 - }, - { - "epoch": 0.3312116520442686, - "grad_norm": 0.0005763443186879158, - "learning_rate": 0.00019999994611421453, - "loss": 46.0, - "step": 4332 - }, - { - "epoch": 0.3312881090276583, - "grad_norm": 0.0007845947402529418, - "learning_rate": 0.0001999999460892761, - "loss": 46.0, - "step": 4333 - }, - { - "epoch": 0.331364566011048, - "grad_norm": 0.0008663633489049971, - "learning_rate": 0.00019999994606433185, - "loss": 46.0, - "step": 4334 - }, - { - "epoch": 0.33144102299443773, - "grad_norm": 0.0008883571135811508, - "learning_rate": 0.00019999994603938186, - "loss": 46.0, - "step": 4335 - }, - { - "epoch": 0.3315174799778275, - "grad_norm": 0.0020621302537620068, - "learning_rate": 0.00019999994601442608, - "loss": 46.0, - "step": 4336 - }, - { - "epoch": 0.3315939369612172, - "grad_norm": 0.0015221703797578812, - "learning_rate": 0.00019999994598946454, - "loss": 46.0, - "step": 4337 - }, - { - "epoch": 0.3316703939446069, - "grad_norm": 0.0024182454217225313, - "learning_rate": 0.00019999994596449724, - "loss": 46.0, - "step": 4338 - }, - { - "epoch": 0.33174685092799666, - "grad_norm": 0.0007478754268959165, - "learning_rate": 0.00019999994593952413, - "loss": 46.0, - "step": 4339 - }, - { - "epoch": 0.33182330791138637, - "grad_norm": 0.0014873365871608257, - "learning_rate": 0.0001999999459145453, - "loss": 46.0, - "step": 4340 - }, - { - "epoch": 0.3318997648947761, - "grad_norm": 0.003358301008120179, - "learning_rate": 0.00019999994588956068, - "loss": 46.0, - "step": 4341 - }, - { - "epoch": 0.3319762218781658, - "grad_norm": 0.0025808545760810375, - "learning_rate": 0.0001999999458645703, - "loss": 46.0, - "step": 4342 - }, - { - "epoch": 0.33205267886155554, - "grad_norm": 0.0005722782807424664, - "learning_rate": 0.00019999994583957414, - "loss": 46.0, - "step": 4343 - }, - { - "epoch": 0.33212913584494524, - "grad_norm": 0.0008264755015261471, - "learning_rate": 0.00019999994581457222, - "loss": 46.0, - "step": 4344 - }, - { - "epoch": 0.33220559282833495, - "grad_norm": 0.0036591014359146357, - "learning_rate": 0.00019999994578956453, - "loss": 46.0, - "step": 4345 - }, - { - "epoch": 0.33228204981172466, - "grad_norm": 0.002337259706109762, - "learning_rate": 0.0001999999457645511, - "loss": 46.0, - "step": 4346 - }, - { - "epoch": 0.3323585067951144, - "grad_norm": 0.0008739191689528525, - "learning_rate": 0.00019999994573953182, - "loss": 46.0, - "step": 4347 - }, - { - "epoch": 0.3324349637785041, - "grad_norm": 0.0007346050697378814, - "learning_rate": 0.00019999994571450684, - "loss": 46.0, - "step": 4348 - }, - { - "epoch": 0.33251142076189383, - "grad_norm": 0.0006359497201628983, - "learning_rate": 0.00019999994568947608, - "loss": 46.0, - "step": 4349 - }, - { - "epoch": 0.33258787774528353, - "grad_norm": 0.0005062447744421661, - "learning_rate": 0.00019999994566443952, - "loss": 46.0, - "step": 4350 - }, - { - "epoch": 0.3326643347286733, - "grad_norm": 0.00189758010674268, - "learning_rate": 0.00019999994563939718, - "loss": 46.0, - "step": 4351 - }, - { - "epoch": 0.332740791712063, - "grad_norm": 0.0012883191229775548, - "learning_rate": 0.0001999999456143491, - "loss": 46.0, - "step": 4352 - }, - { - "epoch": 0.3328172486954527, - "grad_norm": 0.000746228382922709, - "learning_rate": 0.00019999994558929528, - "loss": 46.0, - "step": 4353 - }, - { - "epoch": 0.33289370567884247, - "grad_norm": 0.0010014697909355164, - "learning_rate": 0.00019999994556423566, - "loss": 46.0, - "step": 4354 - }, - { - "epoch": 0.33297016266223217, - "grad_norm": 0.000663650338537991, - "learning_rate": 0.00019999994553917026, - "loss": 46.0, - "step": 4355 - }, - { - "epoch": 0.3330466196456219, - "grad_norm": 0.0008152858936227858, - "learning_rate": 0.0001999999455140991, - "loss": 46.0, - "step": 4356 - }, - { - "epoch": 0.3331230766290116, - "grad_norm": 0.0025598809588700533, - "learning_rate": 0.0001999999454890222, - "loss": 46.0, - "step": 4357 - }, - { - "epoch": 0.33319953361240134, - "grad_norm": 0.0004814782878383994, - "learning_rate": 0.00019999994546393947, - "loss": 46.0, - "step": 4358 - }, - { - "epoch": 0.33327599059579105, - "grad_norm": 0.00047982606338337064, - "learning_rate": 0.000199999945438851, - "loss": 46.0, - "step": 4359 - }, - { - "epoch": 0.33335244757918076, - "grad_norm": 0.0028309291228652, - "learning_rate": 0.00019999994541375677, - "loss": 46.0, - "step": 4360 - }, - { - "epoch": 0.33342890456257046, - "grad_norm": 0.0008123546140268445, - "learning_rate": 0.00019999994538865678, - "loss": 46.0, - "step": 4361 - }, - { - "epoch": 0.3335053615459602, - "grad_norm": 0.0009709223522804677, - "learning_rate": 0.000199999945363551, - "loss": 46.0, - "step": 4362 - }, - { - "epoch": 0.3335818185293499, - "grad_norm": 0.0009069971274584532, - "learning_rate": 0.00019999994533843944, - "loss": 46.0, - "step": 4363 - }, - { - "epoch": 0.33365827551273963, - "grad_norm": 0.001450235489755869, - "learning_rate": 0.00019999994531332213, - "loss": 46.0, - "step": 4364 - }, - { - "epoch": 0.33373473249612934, - "grad_norm": 0.0010049097472801805, - "learning_rate": 0.00019999994528819906, - "loss": 46.0, - "step": 4365 - }, - { - "epoch": 0.3338111894795191, - "grad_norm": 0.0011636137496680021, - "learning_rate": 0.0001999999452630702, - "loss": 46.0, - "step": 4366 - }, - { - "epoch": 0.3338876464629088, - "grad_norm": 0.0032694179099053144, - "learning_rate": 0.00019999994523793558, - "loss": 46.0, - "step": 4367 - }, - { - "epoch": 0.3339641034462985, - "grad_norm": 0.00974624790251255, - "learning_rate": 0.00019999994521279518, - "loss": 46.0, - "step": 4368 - }, - { - "epoch": 0.33404056042968827, - "grad_norm": 0.0007959165377542377, - "learning_rate": 0.000199999945187649, - "loss": 46.0, - "step": 4369 - }, - { - "epoch": 0.334117017413078, - "grad_norm": 0.00528798857703805, - "learning_rate": 0.0001999999451624971, - "loss": 46.0, - "step": 4370 - }, - { - "epoch": 0.3341934743964677, - "grad_norm": 0.001440275926142931, - "learning_rate": 0.0001999999451373394, - "loss": 46.0, - "step": 4371 - }, - { - "epoch": 0.3342699313798574, - "grad_norm": 0.0020948785822838545, - "learning_rate": 0.00019999994511217593, - "loss": 46.0, - "step": 4372 - }, - { - "epoch": 0.33434638836324715, - "grad_norm": 0.0010314505780115724, - "learning_rate": 0.00019999994508700667, - "loss": 46.0, - "step": 4373 - }, - { - "epoch": 0.33442284534663685, - "grad_norm": 0.0012385656591504812, - "learning_rate": 0.00019999994506183168, - "loss": 46.0, - "step": 4374 - }, - { - "epoch": 0.33449930233002656, - "grad_norm": 0.00176147127058357, - "learning_rate": 0.0001999999450366509, - "loss": 46.0, - "step": 4375 - }, - { - "epoch": 0.33457575931341627, - "grad_norm": 0.0009488561772741377, - "learning_rate": 0.00019999994501146434, - "loss": 46.0, - "step": 4376 - }, - { - "epoch": 0.334652216296806, - "grad_norm": 0.0010161340469494462, - "learning_rate": 0.00019999994498627203, - "loss": 46.0, - "step": 4377 - }, - { - "epoch": 0.33472867328019573, - "grad_norm": 0.0014125718735158443, - "learning_rate": 0.00019999994496107396, - "loss": 46.0, - "step": 4378 - }, - { - "epoch": 0.33480513026358544, - "grad_norm": 0.0003296210488770157, - "learning_rate": 0.00019999994493587008, - "loss": 46.0, - "step": 4379 - }, - { - "epoch": 0.33488158724697514, - "grad_norm": 0.0012108338996767998, - "learning_rate": 0.00019999994491066048, - "loss": 46.0, - "step": 4380 - }, - { - "epoch": 0.3349580442303649, - "grad_norm": 0.000855764839798212, - "learning_rate": 0.00019999994488544505, - "loss": 46.0, - "step": 4381 - }, - { - "epoch": 0.3350345012137546, - "grad_norm": 0.003351888619363308, - "learning_rate": 0.0001999999448602239, - "loss": 46.0, - "step": 4382 - }, - { - "epoch": 0.3351109581971443, - "grad_norm": 0.004446738865226507, - "learning_rate": 0.00019999994483499696, - "loss": 46.0, - "step": 4383 - }, - { - "epoch": 0.3351874151805341, - "grad_norm": 0.0009897021809592843, - "learning_rate": 0.00019999994480976425, - "loss": 46.0, - "step": 4384 - }, - { - "epoch": 0.3352638721639238, - "grad_norm": 0.001084261341020465, - "learning_rate": 0.00019999994478452578, - "loss": 46.0, - "step": 4385 - }, - { - "epoch": 0.3353403291473135, - "grad_norm": 0.0018345518037676811, - "learning_rate": 0.00019999994475928157, - "loss": 46.0, - "step": 4386 - }, - { - "epoch": 0.3354167861307032, - "grad_norm": 0.000890737515874207, - "learning_rate": 0.00019999994473403153, - "loss": 46.0, - "step": 4387 - }, - { - "epoch": 0.33549324311409295, - "grad_norm": 0.0012093266705051064, - "learning_rate": 0.00019999994470877575, - "loss": 46.0, - "step": 4388 - }, - { - "epoch": 0.33556970009748266, - "grad_norm": 0.0005385908880271018, - "learning_rate": 0.00019999994468351422, - "loss": 46.0, - "step": 4389 - }, - { - "epoch": 0.33564615708087236, - "grad_norm": 0.0007514269673265517, - "learning_rate": 0.00019999994465824688, - "loss": 46.0, - "step": 4390 - }, - { - "epoch": 0.33572261406426207, - "grad_norm": 0.012829259969294071, - "learning_rate": 0.0001999999446329738, - "loss": 46.0, - "step": 4391 - }, - { - "epoch": 0.33579907104765183, - "grad_norm": 0.0026637602131813765, - "learning_rate": 0.00019999994460769495, - "loss": 46.0, - "step": 4392 - }, - { - "epoch": 0.33587552803104154, - "grad_norm": 0.003662163158878684, - "learning_rate": 0.0001999999445824103, - "loss": 46.0, - "step": 4393 - }, - { - "epoch": 0.33595198501443124, - "grad_norm": 0.0014186989283189178, - "learning_rate": 0.0001999999445571199, - "loss": 46.0, - "step": 4394 - }, - { - "epoch": 0.336028441997821, - "grad_norm": 0.0011301335180178285, - "learning_rate": 0.00019999994453182373, - "loss": 46.0, - "step": 4395 - }, - { - "epoch": 0.3361048989812107, - "grad_norm": 0.00131536356639117, - "learning_rate": 0.0001999999445065218, - "loss": 46.0, - "step": 4396 - }, - { - "epoch": 0.3361813559646004, - "grad_norm": 0.0040961154736578465, - "learning_rate": 0.0001999999444812141, - "loss": 46.0, - "step": 4397 - }, - { - "epoch": 0.3362578129479901, - "grad_norm": 0.0013224639697000384, - "learning_rate": 0.00019999994445590063, - "loss": 46.0, - "step": 4398 - }, - { - "epoch": 0.3363342699313799, - "grad_norm": 0.0012360153486952186, - "learning_rate": 0.0001999999444305814, - "loss": 46.0, - "step": 4399 - }, - { - "epoch": 0.3364107269147696, - "grad_norm": 0.0014156968099996448, - "learning_rate": 0.00019999994440525636, - "loss": 46.0, - "step": 4400 - }, - { - "epoch": 0.3364871838981593, - "grad_norm": 0.0012368501629680395, - "learning_rate": 0.0001999999443799256, - "loss": 46.0, - "step": 4401 - }, - { - "epoch": 0.336563640881549, - "grad_norm": 0.0013104111421853304, - "learning_rate": 0.00019999994435458901, - "loss": 46.0, - "step": 4402 - }, - { - "epoch": 0.33664009786493876, - "grad_norm": 0.0010781441815197468, - "learning_rate": 0.0001999999443292467, - "loss": 46.0, - "step": 4403 - }, - { - "epoch": 0.33671655484832846, - "grad_norm": 0.001098192878998816, - "learning_rate": 0.00019999994430389863, - "loss": 46.0, - "step": 4404 - }, - { - "epoch": 0.33679301183171817, - "grad_norm": 0.0014286516234278679, - "learning_rate": 0.00019999994427854475, - "loss": 46.0, - "step": 4405 - }, - { - "epoch": 0.3368694688151079, - "grad_norm": 0.0013937565963715315, - "learning_rate": 0.0001999999442531851, - "loss": 46.0, - "step": 4406 - }, - { - "epoch": 0.33694592579849764, - "grad_norm": 0.0026233461685478687, - "learning_rate": 0.0001999999442278197, - "loss": 46.0, - "step": 4407 - }, - { - "epoch": 0.33702238278188734, - "grad_norm": 0.002443751785904169, - "learning_rate": 0.00019999994420244853, - "loss": 46.0, - "step": 4408 - }, - { - "epoch": 0.33709883976527705, - "grad_norm": 0.0006538288434967399, - "learning_rate": 0.00019999994417707162, - "loss": 46.0, - "step": 4409 - }, - { - "epoch": 0.3371752967486668, - "grad_norm": 0.0006216029287315905, - "learning_rate": 0.00019999994415168893, - "loss": 46.0, - "step": 4410 - }, - { - "epoch": 0.3372517537320565, - "grad_norm": 0.009112684987485409, - "learning_rate": 0.00019999994412630044, - "loss": 46.0, - "step": 4411 - }, - { - "epoch": 0.3373282107154462, - "grad_norm": 0.0023524202406406403, - "learning_rate": 0.00019999994410090617, - "loss": 46.0, - "step": 4412 - }, - { - "epoch": 0.3374046676988359, - "grad_norm": 0.0005722793866880238, - "learning_rate": 0.0001999999440755062, - "loss": 46.0, - "step": 4413 - }, - { - "epoch": 0.3374811246822257, - "grad_norm": 0.0006286685238592327, - "learning_rate": 0.00019999994405010038, - "loss": 46.0, - "step": 4414 - }, - { - "epoch": 0.3375575816656154, - "grad_norm": 0.0016417823499068618, - "learning_rate": 0.00019999994402468882, - "loss": 46.0, - "step": 4415 - }, - { - "epoch": 0.3376340386490051, - "grad_norm": 0.0013658159878104925, - "learning_rate": 0.00019999994399927151, - "loss": 46.0, - "step": 4416 - }, - { - "epoch": 0.3377104956323948, - "grad_norm": 0.0008710395777598023, - "learning_rate": 0.0001999999439738484, - "loss": 46.0, - "step": 4417 - }, - { - "epoch": 0.33778695261578456, - "grad_norm": 0.0011983991134911776, - "learning_rate": 0.00019999994394841953, - "loss": 46.0, - "step": 4418 - }, - { - "epoch": 0.33786340959917427, - "grad_norm": 0.0006315264035947621, - "learning_rate": 0.00019999994392298488, - "loss": 46.0, - "step": 4419 - }, - { - "epoch": 0.337939866582564, - "grad_norm": 0.0011137855472043157, - "learning_rate": 0.00019999994389754448, - "loss": 46.0, - "step": 4420 - }, - { - "epoch": 0.3380163235659537, - "grad_norm": 0.0010404310887679458, - "learning_rate": 0.00019999994387209834, - "loss": 46.0, - "step": 4421 - }, - { - "epoch": 0.33809278054934344, - "grad_norm": 0.0018426774768158793, - "learning_rate": 0.0001999999438466464, - "loss": 46.0, - "step": 4422 - }, - { - "epoch": 0.33816923753273315, - "grad_norm": 0.0012585781514644623, - "learning_rate": 0.00019999994382118868, - "loss": 46.0, - "step": 4423 - }, - { - "epoch": 0.33824569451612285, - "grad_norm": 0.002455796580761671, - "learning_rate": 0.0001999999437957252, - "loss": 46.0, - "step": 4424 - }, - { - "epoch": 0.3383221514995126, - "grad_norm": 0.0013544521061703563, - "learning_rate": 0.00019999994377025595, - "loss": 46.0, - "step": 4425 - }, - { - "epoch": 0.3383986084829023, - "grad_norm": 0.0008386909612454474, - "learning_rate": 0.0001999999437447809, - "loss": 46.0, - "step": 4426 - }, - { - "epoch": 0.338475065466292, - "grad_norm": 0.0021937384735792875, - "learning_rate": 0.00019999994371930013, - "loss": 46.0, - "step": 4427 - }, - { - "epoch": 0.33855152244968173, - "grad_norm": 0.0005258836899884045, - "learning_rate": 0.0001999999436938136, - "loss": 46.0, - "step": 4428 - }, - { - "epoch": 0.3386279794330715, - "grad_norm": 0.004467345774173737, - "learning_rate": 0.00019999994366832124, - "loss": 46.0, - "step": 4429 - }, - { - "epoch": 0.3387044364164612, - "grad_norm": 0.0008250975515693426, - "learning_rate": 0.00019999994364282316, - "loss": 46.0, - "step": 4430 - }, - { - "epoch": 0.3387808933998509, - "grad_norm": 0.0029229619540274143, - "learning_rate": 0.00019999994361731926, - "loss": 46.0, - "step": 4431 - }, - { - "epoch": 0.3388573503832406, - "grad_norm": 0.0012704990804195404, - "learning_rate": 0.00019999994359180964, - "loss": 46.0, - "step": 4432 - }, - { - "epoch": 0.33893380736663037, - "grad_norm": 0.0006872370140627027, - "learning_rate": 0.00019999994356629424, - "loss": 46.0, - "step": 4433 - }, - { - "epoch": 0.3390102643500201, - "grad_norm": 0.00018776826618704945, - "learning_rate": 0.00019999994354077307, - "loss": 46.0, - "step": 4434 - }, - { - "epoch": 0.3390867213334098, - "grad_norm": 0.0013287997571751475, - "learning_rate": 0.00019999994351524612, - "loss": 46.0, - "step": 4435 - }, - { - "epoch": 0.3391631783167995, - "grad_norm": 0.0006739794625900686, - "learning_rate": 0.0001999999434897134, - "loss": 46.0, - "step": 4436 - }, - { - "epoch": 0.33923963530018925, - "grad_norm": 0.001005502650514245, - "learning_rate": 0.00019999994346417492, - "loss": 46.0, - "step": 4437 - }, - { - "epoch": 0.33931609228357895, - "grad_norm": 0.0013392752734944224, - "learning_rate": 0.00019999994343863068, - "loss": 46.0, - "step": 4438 - }, - { - "epoch": 0.33939254926696866, - "grad_norm": 0.0011017059441655874, - "learning_rate": 0.00019999994341308064, - "loss": 46.0, - "step": 4439 - }, - { - "epoch": 0.3394690062503584, - "grad_norm": 0.002742550801485777, - "learning_rate": 0.00019999994338752486, - "loss": 46.0, - "step": 4440 - }, - { - "epoch": 0.3395454632337481, - "grad_norm": 0.0007794153643772006, - "learning_rate": 0.00019999994336196327, - "loss": 46.0, - "step": 4441 - }, - { - "epoch": 0.33962192021713783, - "grad_norm": 0.0006616926984861493, - "learning_rate": 0.00019999994333639594, - "loss": 46.0, - "step": 4442 - }, - { - "epoch": 0.33969837720052753, - "grad_norm": 0.0005805851542390883, - "learning_rate": 0.00019999994331082287, - "loss": 46.0, - "step": 4443 - }, - { - "epoch": 0.3397748341839173, - "grad_norm": 0.0007528080604970455, - "learning_rate": 0.000199999943285244, - "loss": 46.0, - "step": 4444 - }, - { - "epoch": 0.339851291167307, - "grad_norm": 0.0010019076289609075, - "learning_rate": 0.00019999994325965934, - "loss": 46.0, - "step": 4445 - }, - { - "epoch": 0.3399277481506967, - "grad_norm": 0.002531372243538499, - "learning_rate": 0.00019999994323406894, - "loss": 46.0, - "step": 4446 - }, - { - "epoch": 0.3400042051340864, - "grad_norm": 0.0014815061585977674, - "learning_rate": 0.00019999994320847275, - "loss": 46.0, - "step": 4447 - }, - { - "epoch": 0.3400806621174762, - "grad_norm": 0.0005497461534105241, - "learning_rate": 0.0001999999431828708, - "loss": 46.0, - "step": 4448 - }, - { - "epoch": 0.3401571191008659, - "grad_norm": 0.0017356178723275661, - "learning_rate": 0.00019999994315726308, - "loss": 46.0, - "step": 4449 - }, - { - "epoch": 0.3402335760842556, - "grad_norm": 0.004692509770393372, - "learning_rate": 0.0001999999431316496, - "loss": 46.0, - "step": 4450 - }, - { - "epoch": 0.3403100330676453, - "grad_norm": 0.0018580228788778186, - "learning_rate": 0.00019999994310603033, - "loss": 46.0, - "step": 4451 - }, - { - "epoch": 0.34038649005103505, - "grad_norm": 0.0013220901601016521, - "learning_rate": 0.00019999994308040532, - "loss": 46.0, - "step": 4452 - }, - { - "epoch": 0.34046294703442476, - "grad_norm": 0.00034104715450666845, - "learning_rate": 0.00019999994305477454, - "loss": 46.0, - "step": 4453 - }, - { - "epoch": 0.34053940401781446, - "grad_norm": 0.0004975063493475318, - "learning_rate": 0.00019999994302913795, - "loss": 46.0, - "step": 4454 - }, - { - "epoch": 0.3406158610012042, - "grad_norm": 0.005003293976187706, - "learning_rate": 0.00019999994300349562, - "loss": 46.0, - "step": 4455 - }, - { - "epoch": 0.34069231798459393, - "grad_norm": 0.0006284716073423624, - "learning_rate": 0.00019999994297784752, - "loss": 46.0, - "step": 4456 - }, - { - "epoch": 0.34076877496798363, - "grad_norm": 0.0011805167887359858, - "learning_rate": 0.00019999994295219367, - "loss": 46.0, - "step": 4457 - }, - { - "epoch": 0.34084523195137334, - "grad_norm": 0.0014241504250094295, - "learning_rate": 0.00019999994292653402, - "loss": 46.0, - "step": 4458 - }, - { - "epoch": 0.3409216889347631, - "grad_norm": 0.0010949663119390607, - "learning_rate": 0.0001999999429008686, - "loss": 46.0, - "step": 4459 - }, - { - "epoch": 0.3409981459181528, - "grad_norm": 0.0015556616708636284, - "learning_rate": 0.0001999999428751974, - "loss": 46.0, - "step": 4460 - }, - { - "epoch": 0.3410746029015425, - "grad_norm": 0.0016401479952037334, - "learning_rate": 0.00019999994284952046, - "loss": 46.0, - "step": 4461 - }, - { - "epoch": 0.3411510598849322, - "grad_norm": 0.0011257074074819684, - "learning_rate": 0.00019999994282383774, - "loss": 46.0, - "step": 4462 - }, - { - "epoch": 0.341227516868322, - "grad_norm": 0.0009559948230162263, - "learning_rate": 0.00019999994279814925, - "loss": 46.0, - "step": 4463 - }, - { - "epoch": 0.3413039738517117, - "grad_norm": 0.0007614089990966022, - "learning_rate": 0.000199999942772455, - "loss": 46.0, - "step": 4464 - }, - { - "epoch": 0.3413804308351014, - "grad_norm": 0.002122860634699464, - "learning_rate": 0.00019999994274675498, - "loss": 46.0, - "step": 4465 - }, - { - "epoch": 0.3414568878184911, - "grad_norm": 0.0006695298361591995, - "learning_rate": 0.0001999999427210492, - "loss": 46.0, - "step": 4466 - }, - { - "epoch": 0.34153334480188086, - "grad_norm": 0.006944767199456692, - "learning_rate": 0.0001999999426953376, - "loss": 46.0, - "step": 4467 - }, - { - "epoch": 0.34160980178527056, - "grad_norm": 0.0006343826535157859, - "learning_rate": 0.00019999994266962028, - "loss": 46.0, - "step": 4468 - }, - { - "epoch": 0.34168625876866027, - "grad_norm": 0.0006700207013636827, - "learning_rate": 0.00019999994264389718, - "loss": 46.0, - "step": 4469 - }, - { - "epoch": 0.34176271575205003, - "grad_norm": 0.000629562942776829, - "learning_rate": 0.00019999994261816827, - "loss": 46.0, - "step": 4470 - }, - { - "epoch": 0.34183917273543973, - "grad_norm": 0.0008044940186664462, - "learning_rate": 0.00019999994259243362, - "loss": 46.0, - "step": 4471 - }, - { - "epoch": 0.34191562971882944, - "grad_norm": 0.000491089653223753, - "learning_rate": 0.00019999994256669323, - "loss": 46.0, - "step": 4472 - }, - { - "epoch": 0.34199208670221914, - "grad_norm": 0.0008835463668219745, - "learning_rate": 0.00019999994254094703, - "loss": 46.0, - "step": 4473 - }, - { - "epoch": 0.3420685436856089, - "grad_norm": 0.0011827208800241351, - "learning_rate": 0.0001999999425151951, - "loss": 46.0, - "step": 4474 - }, - { - "epoch": 0.3421450006689986, - "grad_norm": 0.0028299165423959494, - "learning_rate": 0.00019999994248943737, - "loss": 46.0, - "step": 4475 - }, - { - "epoch": 0.3422214576523883, - "grad_norm": 0.003202289342880249, - "learning_rate": 0.00019999994246367386, - "loss": 46.0, - "step": 4476 - }, - { - "epoch": 0.342297914635778, - "grad_norm": 0.004717707633972168, - "learning_rate": 0.00019999994243790462, - "loss": 46.0, - "step": 4477 - }, - { - "epoch": 0.3423743716191678, - "grad_norm": 0.0030418529640883207, - "learning_rate": 0.00019999994241212958, - "loss": 46.0, - "step": 4478 - }, - { - "epoch": 0.3424508286025575, - "grad_norm": 0.0009572543203830719, - "learning_rate": 0.00019999994238634877, - "loss": 46.0, - "step": 4479 - }, - { - "epoch": 0.3425272855859472, - "grad_norm": 0.0011142563307657838, - "learning_rate": 0.00019999994236056222, - "loss": 46.0, - "step": 4480 - }, - { - "epoch": 0.3426037425693369, - "grad_norm": 0.0005846779677085578, - "learning_rate": 0.00019999994233476986, - "loss": 46.0, - "step": 4481 - }, - { - "epoch": 0.34268019955272666, - "grad_norm": 0.0010721880244091153, - "learning_rate": 0.00019999994230897173, - "loss": 46.0, - "step": 4482 - }, - { - "epoch": 0.34275665653611637, - "grad_norm": 0.004597486928105354, - "learning_rate": 0.00019999994228316788, - "loss": 46.0, - "step": 4483 - }, - { - "epoch": 0.34283311351950607, - "grad_norm": 0.0008741975179873407, - "learning_rate": 0.00019999994225735823, - "loss": 46.0, - "step": 4484 - }, - { - "epoch": 0.34290957050289583, - "grad_norm": 0.0020603176672011614, - "learning_rate": 0.00019999994223154278, - "loss": 46.0, - "step": 4485 - }, - { - "epoch": 0.34298602748628554, - "grad_norm": 0.0020062404219061136, - "learning_rate": 0.0001999999422057216, - "loss": 46.0, - "step": 4486 - }, - { - "epoch": 0.34306248446967524, - "grad_norm": 0.0007867870735935867, - "learning_rate": 0.00019999994217989467, - "loss": 46.0, - "step": 4487 - }, - { - "epoch": 0.34313894145306495, - "grad_norm": 0.0017731457483023405, - "learning_rate": 0.00019999994215406193, - "loss": 46.0, - "step": 4488 - }, - { - "epoch": 0.3432153984364547, - "grad_norm": 0.0017227425705641508, - "learning_rate": 0.00019999994212822344, - "loss": 46.0, - "step": 4489 - }, - { - "epoch": 0.3432918554198444, - "grad_norm": 0.0044555384665727615, - "learning_rate": 0.00019999994210237918, - "loss": 46.0, - "step": 4490 - }, - { - "epoch": 0.3433683124032341, - "grad_norm": 0.0008505648002028465, - "learning_rate": 0.00019999994207652914, - "loss": 46.0, - "step": 4491 - }, - { - "epoch": 0.3434447693866238, - "grad_norm": 0.0019806567579507828, - "learning_rate": 0.00019999994205067333, - "loss": 46.0, - "step": 4492 - }, - { - "epoch": 0.3435212263700136, - "grad_norm": 0.0010835396824404597, - "learning_rate": 0.00019999994202481175, - "loss": 46.0, - "step": 4493 - }, - { - "epoch": 0.3435976833534033, - "grad_norm": 0.00045218656305223703, - "learning_rate": 0.00019999994199894442, - "loss": 46.0, - "step": 4494 - }, - { - "epoch": 0.343674140336793, - "grad_norm": 0.0007355757406912744, - "learning_rate": 0.00019999994197307132, - "loss": 46.0, - "step": 4495 - }, - { - "epoch": 0.3437505973201827, - "grad_norm": 0.001620026072487235, - "learning_rate": 0.00019999994194719244, - "loss": 46.0, - "step": 4496 - }, - { - "epoch": 0.34382705430357247, - "grad_norm": 0.002306954935193062, - "learning_rate": 0.00019999994192130776, - "loss": 46.0, - "step": 4497 - }, - { - "epoch": 0.34390351128696217, - "grad_norm": 0.002012313809245825, - "learning_rate": 0.00019999994189541734, - "loss": 46.0, - "step": 4498 - }, - { - "epoch": 0.3439799682703519, - "grad_norm": 0.0005791075527667999, - "learning_rate": 0.00019999994186952117, - "loss": 46.0, - "step": 4499 - }, - { - "epoch": 0.34405642525374164, - "grad_norm": 0.0008548152400180697, - "learning_rate": 0.0001999999418436192, - "loss": 46.0, - "step": 4500 - }, - { - "epoch": 0.34413288223713134, - "grad_norm": 0.0043380907736718655, - "learning_rate": 0.00019999994181771146, - "loss": 46.0, - "step": 4501 - }, - { - "epoch": 0.34420933922052105, - "grad_norm": 0.003028023988008499, - "learning_rate": 0.00019999994179179797, - "loss": 46.0, - "step": 4502 - }, - { - "epoch": 0.34428579620391075, - "grad_norm": 0.0018402260029688478, - "learning_rate": 0.0001999999417658787, - "loss": 46.0, - "step": 4503 - }, - { - "epoch": 0.3443622531873005, - "grad_norm": 0.0015750774182379246, - "learning_rate": 0.00019999994173995367, - "loss": 46.0, - "step": 4504 - }, - { - "epoch": 0.3444387101706902, - "grad_norm": 0.001570810447447002, - "learning_rate": 0.00019999994171402286, - "loss": 46.0, - "step": 4505 - }, - { - "epoch": 0.3445151671540799, - "grad_norm": 0.0012720414670184255, - "learning_rate": 0.00019999994168808628, - "loss": 46.0, - "step": 4506 - }, - { - "epoch": 0.34459162413746963, - "grad_norm": 0.002900895895436406, - "learning_rate": 0.00019999994166214395, - "loss": 46.0, - "step": 4507 - }, - { - "epoch": 0.3446680811208594, - "grad_norm": 0.0009334111819043756, - "learning_rate": 0.00019999994163619582, - "loss": 46.0, - "step": 4508 - }, - { - "epoch": 0.3447445381042491, - "grad_norm": 0.0019853694830089808, - "learning_rate": 0.00019999994161024195, - "loss": 46.0, - "step": 4509 - }, - { - "epoch": 0.3448209950876388, - "grad_norm": 0.0009553749114274979, - "learning_rate": 0.00019999994158428227, - "loss": 46.0, - "step": 4510 - }, - { - "epoch": 0.34489745207102857, - "grad_norm": 0.0013566161505877972, - "learning_rate": 0.00019999994155831688, - "loss": 46.0, - "step": 4511 - }, - { - "epoch": 0.34497390905441827, - "grad_norm": 0.004217440728098154, - "learning_rate": 0.00019999994153234568, - "loss": 46.0, - "step": 4512 - }, - { - "epoch": 0.345050366037808, - "grad_norm": 0.001521850354038179, - "learning_rate": 0.0001999999415063687, - "loss": 46.0, - "step": 4513 - }, - { - "epoch": 0.3451268230211977, - "grad_norm": 0.0019429214298725128, - "learning_rate": 0.000199999941480386, - "loss": 46.0, - "step": 4514 - }, - { - "epoch": 0.34520328000458744, - "grad_norm": 0.0013594161719083786, - "learning_rate": 0.00019999994145439745, - "loss": 46.0, - "step": 4515 - }, - { - "epoch": 0.34527973698797715, - "grad_norm": 0.0015067590866237879, - "learning_rate": 0.00019999994142840322, - "loss": 46.0, - "step": 4516 - }, - { - "epoch": 0.34535619397136685, - "grad_norm": 0.0009392031352035701, - "learning_rate": 0.00019999994140240316, - "loss": 46.0, - "step": 4517 - }, - { - "epoch": 0.34543265095475656, - "grad_norm": 0.0007936739712022245, - "learning_rate": 0.00019999994137639737, - "loss": 46.0, - "step": 4518 - }, - { - "epoch": 0.3455091079381463, - "grad_norm": 0.0016129830619320273, - "learning_rate": 0.00019999994135038576, - "loss": 46.0, - "step": 4519 - }, - { - "epoch": 0.345585564921536, - "grad_norm": 0.0016986047849059105, - "learning_rate": 0.00019999994132436844, - "loss": 46.0, - "step": 4520 - }, - { - "epoch": 0.34566202190492573, - "grad_norm": 0.003739092266187072, - "learning_rate": 0.0001999999412983453, - "loss": 46.0, - "step": 4521 - }, - { - "epoch": 0.34573847888831544, - "grad_norm": 0.002710785483941436, - "learning_rate": 0.00019999994127231643, - "loss": 46.0, - "step": 4522 - }, - { - "epoch": 0.3458149358717052, - "grad_norm": 0.0009404800366610289, - "learning_rate": 0.00019999994124628176, - "loss": 46.0, - "step": 4523 - }, - { - "epoch": 0.3458913928550949, - "grad_norm": 0.0013227352173998952, - "learning_rate": 0.00019999994122024136, - "loss": 46.0, - "step": 4524 - }, - { - "epoch": 0.3459678498384846, - "grad_norm": 0.0023588594049215317, - "learning_rate": 0.00019999994119419517, - "loss": 46.0, - "step": 4525 - }, - { - "epoch": 0.34604430682187437, - "grad_norm": 0.0022572921589016914, - "learning_rate": 0.0001999999411681432, - "loss": 46.0, - "step": 4526 - }, - { - "epoch": 0.3461207638052641, - "grad_norm": 0.001386230462230742, - "learning_rate": 0.00019999994114208546, - "loss": 46.0, - "step": 4527 - }, - { - "epoch": 0.3461972207886538, - "grad_norm": 0.00137048470787704, - "learning_rate": 0.00019999994111602197, - "loss": 46.0, - "step": 4528 - }, - { - "epoch": 0.3462736777720435, - "grad_norm": 0.0008595773251727223, - "learning_rate": 0.00019999994108995268, - "loss": 46.0, - "step": 4529 - }, - { - "epoch": 0.34635013475543325, - "grad_norm": 0.0019864393398165703, - "learning_rate": 0.00019999994106387762, - "loss": 46.0, - "step": 4530 - }, - { - "epoch": 0.34642659173882295, - "grad_norm": 0.0019187870202586055, - "learning_rate": 0.00019999994103779683, - "loss": 46.0, - "step": 4531 - }, - { - "epoch": 0.34650304872221266, - "grad_norm": 0.000958569988142699, - "learning_rate": 0.00019999994101171025, - "loss": 46.0, - "step": 4532 - }, - { - "epoch": 0.34657950570560236, - "grad_norm": 0.004497377201914787, - "learning_rate": 0.00019999994098561787, - "loss": 46.0, - "step": 4533 - }, - { - "epoch": 0.3466559626889921, - "grad_norm": 0.001482845633290708, - "learning_rate": 0.00019999994095951977, - "loss": 46.0, - "step": 4534 - }, - { - "epoch": 0.34673241967238183, - "grad_norm": 0.00035967418807558715, - "learning_rate": 0.00019999994093341587, - "loss": 46.0, - "step": 4535 - }, - { - "epoch": 0.34680887665577154, - "grad_norm": 0.0036673317663371563, - "learning_rate": 0.00019999994090730622, - "loss": 46.0, - "step": 4536 - }, - { - "epoch": 0.34688533363916124, - "grad_norm": 0.0008209376828745008, - "learning_rate": 0.0001999999408811908, - "loss": 46.0, - "step": 4537 - }, - { - "epoch": 0.346961790622551, - "grad_norm": 0.0004369386297184974, - "learning_rate": 0.0001999999408550696, - "loss": 46.0, - "step": 4538 - }, - { - "epoch": 0.3470382476059407, - "grad_norm": 0.0014164528110995889, - "learning_rate": 0.00019999994082894263, - "loss": 46.0, - "step": 4539 - }, - { - "epoch": 0.3471147045893304, - "grad_norm": 0.0005142363952472806, - "learning_rate": 0.0001999999408028099, - "loss": 46.0, - "step": 4540 - }, - { - "epoch": 0.3471911615727202, - "grad_norm": 0.0010802133474498987, - "learning_rate": 0.00019999994077667138, - "loss": 46.0, - "step": 4541 - }, - { - "epoch": 0.3472676185561099, - "grad_norm": 0.0017264769412577152, - "learning_rate": 0.0001999999407505271, - "loss": 46.0, - "step": 4542 - }, - { - "epoch": 0.3473440755394996, - "grad_norm": 0.0007748965290375054, - "learning_rate": 0.00019999994072437708, - "loss": 46.0, - "step": 4543 - }, - { - "epoch": 0.3474205325228893, - "grad_norm": 0.0029790163971483707, - "learning_rate": 0.00019999994069822124, - "loss": 46.0, - "step": 4544 - }, - { - "epoch": 0.34749698950627905, - "grad_norm": 0.0010709143243730068, - "learning_rate": 0.00019999994067205966, - "loss": 46.0, - "step": 4545 - }, - { - "epoch": 0.34757344648966876, - "grad_norm": 0.002072112401947379, - "learning_rate": 0.00019999994064589228, - "loss": 46.0, - "step": 4546 - }, - { - "epoch": 0.34764990347305846, - "grad_norm": 0.0006146198138594627, - "learning_rate": 0.00019999994061971918, - "loss": 46.0, - "step": 4547 - }, - { - "epoch": 0.34772636045644817, - "grad_norm": 0.0006449267384596169, - "learning_rate": 0.00019999994059354028, - "loss": 46.0, - "step": 4548 - }, - { - "epoch": 0.34780281743983793, - "grad_norm": 0.0012200400233268738, - "learning_rate": 0.0001999999405673556, - "loss": 46.0, - "step": 4549 - }, - { - "epoch": 0.34787927442322764, - "grad_norm": 0.0011937841773033142, - "learning_rate": 0.00019999994054116518, - "loss": 46.0, - "step": 4550 - }, - { - "epoch": 0.34795573140661734, - "grad_norm": 0.004128133412450552, - "learning_rate": 0.000199999940514969, - "loss": 46.0, - "step": 4551 - }, - { - "epoch": 0.34803218839000705, - "grad_norm": 0.0008886297000572085, - "learning_rate": 0.000199999940488767, - "loss": 46.0, - "step": 4552 - }, - { - "epoch": 0.3481086453733968, - "grad_norm": 0.0006153713911771774, - "learning_rate": 0.00019999994046255928, - "loss": 46.0, - "step": 4553 - }, - { - "epoch": 0.3481851023567865, - "grad_norm": 0.0006943295011296868, - "learning_rate": 0.0001999999404363458, - "loss": 46.0, - "step": 4554 - }, - { - "epoch": 0.3482615593401762, - "grad_norm": 0.0017371104331687093, - "learning_rate": 0.00019999994041012647, - "loss": 46.0, - "step": 4555 - }, - { - "epoch": 0.348338016323566, - "grad_norm": 0.0005236382712610066, - "learning_rate": 0.00019999994038390144, - "loss": 46.0, - "step": 4556 - }, - { - "epoch": 0.3484144733069557, - "grad_norm": 0.0008119958220049739, - "learning_rate": 0.00019999994035767063, - "loss": 46.0, - "step": 4557 - }, - { - "epoch": 0.3484909302903454, - "grad_norm": 0.0017897101351991296, - "learning_rate": 0.00019999994033143402, - "loss": 46.0, - "step": 4558 - }, - { - "epoch": 0.3485673872737351, - "grad_norm": 0.0007238021353259683, - "learning_rate": 0.00019999994030519167, - "loss": 46.0, - "step": 4559 - }, - { - "epoch": 0.34864384425712486, - "grad_norm": 0.007205334957689047, - "learning_rate": 0.00019999994027894357, - "loss": 46.0, - "step": 4560 - }, - { - "epoch": 0.34872030124051456, - "grad_norm": 0.0003773700154852122, - "learning_rate": 0.00019999994025268967, - "loss": 46.0, - "step": 4561 - }, - { - "epoch": 0.34879675822390427, - "grad_norm": 0.001063156989403069, - "learning_rate": 0.00019999994022643, - "loss": 46.0, - "step": 4562 - }, - { - "epoch": 0.348873215207294, - "grad_norm": 0.0013440484181046486, - "learning_rate": 0.00019999994020016454, - "loss": 46.0, - "step": 4563 - }, - { - "epoch": 0.34894967219068374, - "grad_norm": 0.0021393182687461376, - "learning_rate": 0.00019999994017389335, - "loss": 46.0, - "step": 4564 - }, - { - "epoch": 0.34902612917407344, - "grad_norm": 0.000978855649009347, - "learning_rate": 0.00019999994014761638, - "loss": 46.0, - "step": 4565 - }, - { - "epoch": 0.34910258615746315, - "grad_norm": 0.0008459172677248716, - "learning_rate": 0.00019999994012133364, - "loss": 46.0, - "step": 4566 - }, - { - "epoch": 0.34917904314085285, - "grad_norm": 0.0005167752970010042, - "learning_rate": 0.0001999999400950451, - "loss": 46.0, - "step": 4567 - }, - { - "epoch": 0.3492555001242426, - "grad_norm": 0.0026665236800909042, - "learning_rate": 0.00019999994006875084, - "loss": 46.0, - "step": 4568 - }, - { - "epoch": 0.3493319571076323, - "grad_norm": 0.0009810024639591575, - "learning_rate": 0.00019999994004245078, - "loss": 46.0, - "step": 4569 - }, - { - "epoch": 0.349408414091022, - "grad_norm": 0.0012815716909244657, - "learning_rate": 0.00019999994001614497, - "loss": 46.0, - "step": 4570 - }, - { - "epoch": 0.3494848710744118, - "grad_norm": 0.003264606697484851, - "learning_rate": 0.00019999993998983336, - "loss": 46.0, - "step": 4571 - }, - { - "epoch": 0.3495613280578015, - "grad_norm": 0.003079640679061413, - "learning_rate": 0.000199999939963516, - "loss": 46.0, - "step": 4572 - }, - { - "epoch": 0.3496377850411912, - "grad_norm": 0.002139975316822529, - "learning_rate": 0.00019999993993719288, - "loss": 46.0, - "step": 4573 - }, - { - "epoch": 0.3497142420245809, - "grad_norm": 0.0009452769882045686, - "learning_rate": 0.00019999993991086398, - "loss": 46.0, - "step": 4574 - }, - { - "epoch": 0.34979069900797066, - "grad_norm": 0.0007609169115312397, - "learning_rate": 0.00019999993988452933, - "loss": 46.0, - "step": 4575 - }, - { - "epoch": 0.34986715599136037, - "grad_norm": 0.0016770473448559642, - "learning_rate": 0.00019999993985818888, - "loss": 46.0, - "step": 4576 - }, - { - "epoch": 0.3499436129747501, - "grad_norm": 0.0014944222057238221, - "learning_rate": 0.00019999993983184266, - "loss": 46.0, - "step": 4577 - }, - { - "epoch": 0.3500200699581398, - "grad_norm": 0.0004888473195023835, - "learning_rate": 0.00019999993980549067, - "loss": 46.0, - "step": 4578 - }, - { - "epoch": 0.35009652694152954, - "grad_norm": 0.0009985408978536725, - "learning_rate": 0.00019999993977913295, - "loss": 46.0, - "step": 4579 - }, - { - "epoch": 0.35017298392491925, - "grad_norm": 0.0012293473118916154, - "learning_rate": 0.00019999993975276944, - "loss": 46.0, - "step": 4580 - }, - { - "epoch": 0.35024944090830895, - "grad_norm": 0.0007471921271644533, - "learning_rate": 0.00019999993972640015, - "loss": 46.0, - "step": 4581 - }, - { - "epoch": 0.35032589789169866, - "grad_norm": 0.0026485580019652843, - "learning_rate": 0.0001999999397000251, - "loss": 46.0, - "step": 4582 - }, - { - "epoch": 0.3504023548750884, - "grad_norm": 0.0007579503580927849, - "learning_rate": 0.00019999993967364428, - "loss": 46.0, - "step": 4583 - }, - { - "epoch": 0.3504788118584781, - "grad_norm": 0.0006152355344966054, - "learning_rate": 0.00019999993964725767, - "loss": 46.0, - "step": 4584 - }, - { - "epoch": 0.35055526884186783, - "grad_norm": 0.0010391559917479753, - "learning_rate": 0.00019999993962086532, - "loss": 46.0, - "step": 4585 - }, - { - "epoch": 0.3506317258252576, - "grad_norm": 0.0006845281459391117, - "learning_rate": 0.00019999993959446717, - "loss": 46.0, - "step": 4586 - }, - { - "epoch": 0.3507081828086473, - "grad_norm": 0.001958251930773258, - "learning_rate": 0.00019999993956806327, - "loss": 46.0, - "step": 4587 - }, - { - "epoch": 0.350784639792037, - "grad_norm": 0.0031576522160321474, - "learning_rate": 0.0001999999395416536, - "loss": 46.0, - "step": 4588 - }, - { - "epoch": 0.3508610967754267, - "grad_norm": 0.001309097628109157, - "learning_rate": 0.00019999993951523817, - "loss": 46.0, - "step": 4589 - }, - { - "epoch": 0.35093755375881647, - "grad_norm": 0.001636011991649866, - "learning_rate": 0.00019999993948881698, - "loss": 46.0, - "step": 4590 - }, - { - "epoch": 0.3510140107422062, - "grad_norm": 0.0006237008492462337, - "learning_rate": 0.00019999993946238998, - "loss": 46.0, - "step": 4591 - }, - { - "epoch": 0.3510904677255959, - "grad_norm": 0.0030939914286136627, - "learning_rate": 0.00019999993943595724, - "loss": 46.0, - "step": 4592 - }, - { - "epoch": 0.3511669247089856, - "grad_norm": 0.0012296033091843128, - "learning_rate": 0.0001999999394095187, - "loss": 46.0, - "step": 4593 - }, - { - "epoch": 0.35124338169237534, - "grad_norm": 0.0014240751042962074, - "learning_rate": 0.00019999993938307441, - "loss": 46.0, - "step": 4594 - }, - { - "epoch": 0.35131983867576505, - "grad_norm": 0.007237753365188837, - "learning_rate": 0.00019999993935662435, - "loss": 46.0, - "step": 4595 - }, - { - "epoch": 0.35139629565915476, - "grad_norm": 0.0012333758641034365, - "learning_rate": 0.00019999993933016855, - "loss": 46.0, - "step": 4596 - }, - { - "epoch": 0.35147275264254446, - "grad_norm": 0.0021177921444177628, - "learning_rate": 0.00019999993930370694, - "loss": 46.0, - "step": 4597 - }, - { - "epoch": 0.3515492096259342, - "grad_norm": 0.0011861567618325353, - "learning_rate": 0.00019999993927723959, - "loss": 46.0, - "step": 4598 - }, - { - "epoch": 0.35162566660932393, - "grad_norm": 0.0008451088797301054, - "learning_rate": 0.00019999993925076646, - "loss": 46.0, - "step": 4599 - }, - { - "epoch": 0.35170212359271363, - "grad_norm": 0.00794740580022335, - "learning_rate": 0.00019999993922428756, - "loss": 46.0, - "step": 4600 - }, - { - "epoch": 0.3517785805761034, - "grad_norm": 0.0013230512849986553, - "learning_rate": 0.00019999993919780286, - "loss": 46.0, - "step": 4601 - }, - { - "epoch": 0.3518550375594931, - "grad_norm": 0.000470456900075078, - "learning_rate": 0.0001999999391713124, - "loss": 46.0, - "step": 4602 - }, - { - "epoch": 0.3519314945428828, - "grad_norm": 0.0005277706077322364, - "learning_rate": 0.00019999993914481622, - "loss": 46.0, - "step": 4603 - }, - { - "epoch": 0.3520079515262725, - "grad_norm": 0.0024399952962994576, - "learning_rate": 0.00019999993911831422, - "loss": 46.0, - "step": 4604 - }, - { - "epoch": 0.35208440850966227, - "grad_norm": 0.005916515365242958, - "learning_rate": 0.00019999993909180648, - "loss": 46.0, - "step": 4605 - }, - { - "epoch": 0.352160865493052, - "grad_norm": 0.0006022404413670301, - "learning_rate": 0.00019999993906529294, - "loss": 46.0, - "step": 4606 - }, - { - "epoch": 0.3522373224764417, - "grad_norm": 0.0038669996429234743, - "learning_rate": 0.00019999993903877368, - "loss": 46.0, - "step": 4607 - }, - { - "epoch": 0.3523137794598314, - "grad_norm": 0.0008730207337066531, - "learning_rate": 0.0001999999390122486, - "loss": 46.0, - "step": 4608 - }, - { - "epoch": 0.35239023644322115, - "grad_norm": 0.0013347804779186845, - "learning_rate": 0.00019999993898571776, - "loss": 46.0, - "step": 4609 - }, - { - "epoch": 0.35246669342661086, - "grad_norm": 0.0017347969114780426, - "learning_rate": 0.00019999993895918115, - "loss": 46.0, - "step": 4610 - }, - { - "epoch": 0.35254315041000056, - "grad_norm": 0.0007171679753810167, - "learning_rate": 0.0001999999389326388, - "loss": 46.0, - "step": 4611 - }, - { - "epoch": 0.35261960739339027, - "grad_norm": 0.0026676938869059086, - "learning_rate": 0.00019999993890609065, - "loss": 46.0, - "step": 4612 - }, - { - "epoch": 0.35269606437678, - "grad_norm": 0.0006391755887307227, - "learning_rate": 0.00019999993887953677, - "loss": 46.0, - "step": 4613 - }, - { - "epoch": 0.35277252136016973, - "grad_norm": 0.0006686265696771443, - "learning_rate": 0.0001999999388529771, - "loss": 46.0, - "step": 4614 - }, - { - "epoch": 0.35284897834355944, - "grad_norm": 0.0017134080408141017, - "learning_rate": 0.00019999993882641163, - "loss": 46.0, - "step": 4615 - }, - { - "epoch": 0.3529254353269492, - "grad_norm": 0.0013954247115179896, - "learning_rate": 0.00019999993879984043, - "loss": 46.0, - "step": 4616 - }, - { - "epoch": 0.3530018923103389, - "grad_norm": 0.0005169216310605407, - "learning_rate": 0.00019999993877326344, - "loss": 46.0, - "step": 4617 - }, - { - "epoch": 0.3530783492937286, - "grad_norm": 0.0005864306003786623, - "learning_rate": 0.00019999993874668067, - "loss": 46.0, - "step": 4618 - }, - { - "epoch": 0.3531548062771183, - "grad_norm": 0.0035213076043874025, - "learning_rate": 0.00019999993872009216, - "loss": 46.0, - "step": 4619 - }, - { - "epoch": 0.3532312632605081, - "grad_norm": 0.0015282656531780958, - "learning_rate": 0.00019999993869349787, - "loss": 46.0, - "step": 4620 - }, - { - "epoch": 0.3533077202438978, - "grad_norm": 0.007982725277543068, - "learning_rate": 0.0001999999386668978, - "loss": 46.0, - "step": 4621 - }, - { - "epoch": 0.3533841772272875, - "grad_norm": 0.0014074406353756785, - "learning_rate": 0.00019999993864029195, - "loss": 46.0, - "step": 4622 - }, - { - "epoch": 0.3534606342106772, - "grad_norm": 0.0016917784232646227, - "learning_rate": 0.00019999993861368037, - "loss": 46.0, - "step": 4623 - }, - { - "epoch": 0.35353709119406695, - "grad_norm": 0.001214608084410429, - "learning_rate": 0.00019999993858706302, - "loss": 46.0, - "step": 4624 - }, - { - "epoch": 0.35361354817745666, - "grad_norm": 0.0033316679764539003, - "learning_rate": 0.00019999993856043986, - "loss": 46.0, - "step": 4625 - }, - { - "epoch": 0.35369000516084637, - "grad_norm": 0.0008969350601546466, - "learning_rate": 0.00019999993853381094, - "loss": 46.0, - "step": 4626 - }, - { - "epoch": 0.3537664621442361, - "grad_norm": 0.002167478669434786, - "learning_rate": 0.00019999993850717626, - "loss": 46.0, - "step": 4627 - }, - { - "epoch": 0.35384291912762583, - "grad_norm": 0.0012287312420085073, - "learning_rate": 0.00019999993848053582, - "loss": 46.0, - "step": 4628 - }, - { - "epoch": 0.35391937611101554, - "grad_norm": 0.0006428689230233431, - "learning_rate": 0.00019999993845388957, - "loss": 46.0, - "step": 4629 - }, - { - "epoch": 0.35399583309440524, - "grad_norm": 0.0013685186859220266, - "learning_rate": 0.0001999999384272376, - "loss": 46.0, - "step": 4630 - }, - { - "epoch": 0.354072290077795, - "grad_norm": 0.0007062467630021274, - "learning_rate": 0.00019999993840057984, - "loss": 46.0, - "step": 4631 - }, - { - "epoch": 0.3541487470611847, - "grad_norm": 0.0008716791053302586, - "learning_rate": 0.0001999999383739163, - "loss": 46.0, - "step": 4632 - }, - { - "epoch": 0.3542252040445744, - "grad_norm": 0.0008582550217397511, - "learning_rate": 0.000199999938347247, - "loss": 46.0, - "step": 4633 - }, - { - "epoch": 0.3543016610279641, - "grad_norm": 0.001223880797624588, - "learning_rate": 0.00019999993832057195, - "loss": 46.0, - "step": 4634 - }, - { - "epoch": 0.3543781180113539, - "grad_norm": 0.000669918255880475, - "learning_rate": 0.00019999993829389112, - "loss": 46.0, - "step": 4635 - }, - { - "epoch": 0.3544545749947436, - "grad_norm": 0.005380552262067795, - "learning_rate": 0.0001999999382672045, - "loss": 46.0, - "step": 4636 - }, - { - "epoch": 0.3545310319781333, - "grad_norm": 0.002971152774989605, - "learning_rate": 0.00019999993824051213, - "loss": 46.0, - "step": 4637 - }, - { - "epoch": 0.354607488961523, - "grad_norm": 0.0016036460874602199, - "learning_rate": 0.000199999938213814, - "loss": 46.0, - "step": 4638 - }, - { - "epoch": 0.35468394594491276, - "grad_norm": 0.004605886060744524, - "learning_rate": 0.00019999993818711008, - "loss": 46.0, - "step": 4639 - }, - { - "epoch": 0.35476040292830247, - "grad_norm": 0.0025853635743260384, - "learning_rate": 0.00019999993816040038, - "loss": 46.0, - "step": 4640 - }, - { - "epoch": 0.35483685991169217, - "grad_norm": 0.0007215982768684626, - "learning_rate": 0.00019999993813368496, - "loss": 46.0, - "step": 4641 - }, - { - "epoch": 0.35491331689508193, - "grad_norm": 0.0013181599788367748, - "learning_rate": 0.00019999993810696374, - "loss": 46.0, - "step": 4642 - }, - { - "epoch": 0.35498977387847164, - "grad_norm": 0.0034462118055671453, - "learning_rate": 0.00019999993808023675, - "loss": 46.0, - "step": 4643 - }, - { - "epoch": 0.35506623086186134, - "grad_norm": 0.0016977038467302918, - "learning_rate": 0.00019999993805350398, - "loss": 46.0, - "step": 4644 - }, - { - "epoch": 0.35514268784525105, - "grad_norm": 0.0007073847227729857, - "learning_rate": 0.00019999993802676547, - "loss": 46.0, - "step": 4645 - }, - { - "epoch": 0.3552191448286408, - "grad_norm": 0.0018078909488394856, - "learning_rate": 0.00019999993800002118, - "loss": 46.0, - "step": 4646 - }, - { - "epoch": 0.3552956018120305, - "grad_norm": 0.0008559692651033401, - "learning_rate": 0.0001999999379732711, - "loss": 46.0, - "step": 4647 - }, - { - "epoch": 0.3553720587954202, - "grad_norm": 0.0005841772654093802, - "learning_rate": 0.00019999993794651526, - "loss": 46.0, - "step": 4648 - }, - { - "epoch": 0.3554485157788099, - "grad_norm": 0.0023644184693694115, - "learning_rate": 0.00019999993791975366, - "loss": 46.0, - "step": 4649 - }, - { - "epoch": 0.3555249727621997, - "grad_norm": 0.0011216261191293597, - "learning_rate": 0.00019999993789298625, - "loss": 46.0, - "step": 4650 - }, - { - "epoch": 0.3556014297455894, - "grad_norm": 0.0022794923279434443, - "learning_rate": 0.00019999993786621312, - "loss": 46.0, - "step": 4651 - }, - { - "epoch": 0.3556778867289791, - "grad_norm": 0.0017540324479341507, - "learning_rate": 0.0001999999378394342, - "loss": 46.0, - "step": 4652 - }, - { - "epoch": 0.3557543437123688, - "grad_norm": 0.0008257882436737418, - "learning_rate": 0.00019999993781264953, - "loss": 46.0, - "step": 4653 - }, - { - "epoch": 0.35583080069575856, - "grad_norm": 0.0005509591428562999, - "learning_rate": 0.00019999993778585905, - "loss": 46.0, - "step": 4654 - }, - { - "epoch": 0.35590725767914827, - "grad_norm": 0.0015455081593245268, - "learning_rate": 0.00019999993775906286, - "loss": 46.0, - "step": 4655 - }, - { - "epoch": 0.355983714662538, - "grad_norm": 0.0015083843609318137, - "learning_rate": 0.00019999993773226084, - "loss": 46.0, - "step": 4656 - }, - { - "epoch": 0.35606017164592774, - "grad_norm": 0.004210244864225388, - "learning_rate": 0.00019999993770545307, - "loss": 46.0, - "step": 4657 - }, - { - "epoch": 0.35613662862931744, - "grad_norm": 0.001926167169585824, - "learning_rate": 0.00019999993767863954, - "loss": 46.0, - "step": 4658 - }, - { - "epoch": 0.35621308561270715, - "grad_norm": 0.0009868277702480555, - "learning_rate": 0.00019999993765182022, - "loss": 46.0, - "step": 4659 - }, - { - "epoch": 0.35628954259609685, - "grad_norm": 0.00519395899027586, - "learning_rate": 0.00019999993762499516, - "loss": 46.0, - "step": 4660 - }, - { - "epoch": 0.3563659995794866, - "grad_norm": 0.001189759117551148, - "learning_rate": 0.00019999993759816433, - "loss": 46.0, - "step": 4661 - }, - { - "epoch": 0.3564424565628763, - "grad_norm": 0.0008527018944732845, - "learning_rate": 0.00019999993757132772, - "loss": 46.0, - "step": 4662 - }, - { - "epoch": 0.356518913546266, - "grad_norm": 0.000508213764987886, - "learning_rate": 0.00019999993754448535, - "loss": 46.0, - "step": 4663 - }, - { - "epoch": 0.35659537052965573, - "grad_norm": 0.0018172443378716707, - "learning_rate": 0.0001999999375176372, - "loss": 46.0, - "step": 4664 - }, - { - "epoch": 0.3566718275130455, - "grad_norm": 0.003778244135901332, - "learning_rate": 0.00019999993749078327, - "loss": 46.0, - "step": 4665 - }, - { - "epoch": 0.3567482844964352, - "grad_norm": 0.0016574617475271225, - "learning_rate": 0.0001999999374639236, - "loss": 46.0, - "step": 4666 - }, - { - "epoch": 0.3568247414798249, - "grad_norm": 0.001543763792142272, - "learning_rate": 0.00019999993743705812, - "loss": 46.0, - "step": 4667 - }, - { - "epoch": 0.3569011984632146, - "grad_norm": 0.0007495081517845392, - "learning_rate": 0.0001999999374101869, - "loss": 46.0, - "step": 4668 - }, - { - "epoch": 0.35697765544660437, - "grad_norm": 0.0035756188444793224, - "learning_rate": 0.0001999999373833099, - "loss": 46.0, - "step": 4669 - }, - { - "epoch": 0.3570541124299941, - "grad_norm": 0.0013363715261220932, - "learning_rate": 0.00019999993735642714, - "loss": 46.0, - "step": 4670 - }, - { - "epoch": 0.3571305694133838, - "grad_norm": 0.0007094431784935296, - "learning_rate": 0.0001999999373295386, - "loss": 46.0, - "step": 4671 - }, - { - "epoch": 0.35720702639677354, - "grad_norm": 0.0006934573757462204, - "learning_rate": 0.0001999999373026443, - "loss": 46.0, - "step": 4672 - }, - { - "epoch": 0.35728348338016325, - "grad_norm": 0.0009954001288861036, - "learning_rate": 0.0001999999372757442, - "loss": 46.0, - "step": 4673 - }, - { - "epoch": 0.35735994036355295, - "grad_norm": 0.001204869826324284, - "learning_rate": 0.00019999993724883838, - "loss": 46.0, - "step": 4674 - }, - { - "epoch": 0.35743639734694266, - "grad_norm": 0.0008512535714544356, - "learning_rate": 0.00019999993722192677, - "loss": 46.0, - "step": 4675 - }, - { - "epoch": 0.3575128543303324, - "grad_norm": 0.0007278133416548371, - "learning_rate": 0.0001999999371950094, - "loss": 46.0, - "step": 4676 - }, - { - "epoch": 0.3575893113137221, - "grad_norm": 0.0015090714441612363, - "learning_rate": 0.00019999993716808624, - "loss": 46.0, - "step": 4677 - }, - { - "epoch": 0.35766576829711183, - "grad_norm": 0.0011691406834870577, - "learning_rate": 0.0001999999371411573, - "loss": 46.0, - "step": 4678 - }, - { - "epoch": 0.35774222528050154, - "grad_norm": 0.002763924887403846, - "learning_rate": 0.00019999993711422261, - "loss": 46.0, - "step": 4679 - }, - { - "epoch": 0.3578186822638913, - "grad_norm": 0.001961057772859931, - "learning_rate": 0.00019999993708728214, - "loss": 46.0, - "step": 4680 - }, - { - "epoch": 0.357895139247281, - "grad_norm": 0.0006002667360007763, - "learning_rate": 0.00019999993706033592, - "loss": 46.0, - "step": 4681 - }, - { - "epoch": 0.3579715962306707, - "grad_norm": 0.0030982759781181812, - "learning_rate": 0.0001999999370333839, - "loss": 46.0, - "step": 4682 - }, - { - "epoch": 0.3580480532140604, - "grad_norm": 0.0033666484523564577, - "learning_rate": 0.00019999993700642614, - "loss": 46.0, - "step": 4683 - }, - { - "epoch": 0.3581245101974502, - "grad_norm": 0.00808589905500412, - "learning_rate": 0.00019999993697946263, - "loss": 46.0, - "step": 4684 - }, - { - "epoch": 0.3582009671808399, - "grad_norm": 0.002203570678830147, - "learning_rate": 0.00019999993695249334, - "loss": 46.0, - "step": 4685 - }, - { - "epoch": 0.3582774241642296, - "grad_norm": 0.0013378560543060303, - "learning_rate": 0.00019999993692551823, - "loss": 46.0, - "step": 4686 - }, - { - "epoch": 0.35835388114761935, - "grad_norm": 0.013525111600756645, - "learning_rate": 0.00019999993689853737, - "loss": 46.0, - "step": 4687 - }, - { - "epoch": 0.35843033813100905, - "grad_norm": 0.004850596655160189, - "learning_rate": 0.0001999999368715508, - "loss": 46.0, - "step": 4688 - }, - { - "epoch": 0.35850679511439876, - "grad_norm": 0.0011082233395427465, - "learning_rate": 0.00019999993684455839, - "loss": 46.0, - "step": 4689 - }, - { - "epoch": 0.35858325209778846, - "grad_norm": 0.0007109181024134159, - "learning_rate": 0.00019999993681756023, - "loss": 46.0, - "step": 4690 - }, - { - "epoch": 0.3586597090811782, - "grad_norm": 0.0014756397577002645, - "learning_rate": 0.0001999999367905563, - "loss": 46.0, - "step": 4691 - }, - { - "epoch": 0.35873616606456793, - "grad_norm": 0.0010675217490643263, - "learning_rate": 0.0001999999367635466, - "loss": 46.0, - "step": 4692 - }, - { - "epoch": 0.35881262304795764, - "grad_norm": 0.0005786485271528363, - "learning_rate": 0.00019999993673653114, - "loss": 46.0, - "step": 4693 - }, - { - "epoch": 0.35888908003134734, - "grad_norm": 0.0021037685219198465, - "learning_rate": 0.00019999993670950992, - "loss": 46.0, - "step": 4694 - }, - { - "epoch": 0.3589655370147371, - "grad_norm": 0.0035053843166679144, - "learning_rate": 0.0001999999366824829, - "loss": 46.0, - "step": 4695 - }, - { - "epoch": 0.3590419939981268, - "grad_norm": 0.0022866849321871996, - "learning_rate": 0.00019999993665545014, - "loss": 46.0, - "step": 4696 - }, - { - "epoch": 0.3591184509815165, - "grad_norm": 0.0010928100673481822, - "learning_rate": 0.0001999999366284116, - "loss": 46.0, - "step": 4697 - }, - { - "epoch": 0.3591949079649062, - "grad_norm": 0.0028621566016227007, - "learning_rate": 0.00019999993660136728, - "loss": 46.0, - "step": 4698 - }, - { - "epoch": 0.359271364948296, - "grad_norm": 0.0014850847655907273, - "learning_rate": 0.00019999993657431723, - "loss": 46.0, - "step": 4699 - }, - { - "epoch": 0.3593478219316857, - "grad_norm": 0.0007642853306606412, - "learning_rate": 0.00019999993654726134, - "loss": 46.0, - "step": 4700 - }, - { - "epoch": 0.3594242789150754, - "grad_norm": 0.002495459746569395, - "learning_rate": 0.00019999993652019974, - "loss": 46.0, - "step": 4701 - }, - { - "epoch": 0.35950073589846515, - "grad_norm": 0.0009976213332265615, - "learning_rate": 0.00019999993649313236, - "loss": 46.0, - "step": 4702 - }, - { - "epoch": 0.35957719288185486, - "grad_norm": 0.001287257531657815, - "learning_rate": 0.00019999993646605918, - "loss": 46.0, - "step": 4703 - }, - { - "epoch": 0.35965364986524456, - "grad_norm": 0.0011877205688506365, - "learning_rate": 0.00019999993643898028, - "loss": 46.0, - "step": 4704 - }, - { - "epoch": 0.35973010684863427, - "grad_norm": 0.001366660464555025, - "learning_rate": 0.00019999993641189558, - "loss": 46.0, - "step": 4705 - }, - { - "epoch": 0.35980656383202403, - "grad_norm": 0.0014981237472966313, - "learning_rate": 0.0001999999363848051, - "loss": 46.0, - "step": 4706 - }, - { - "epoch": 0.35988302081541373, - "grad_norm": 0.0028690313920378685, - "learning_rate": 0.0001999999363577089, - "loss": 46.0, - "step": 4707 - }, - { - "epoch": 0.35995947779880344, - "grad_norm": 0.0020689209923148155, - "learning_rate": 0.00019999993633060687, - "loss": 46.0, - "step": 4708 - }, - { - "epoch": 0.36003593478219315, - "grad_norm": 0.0010616728104650974, - "learning_rate": 0.00019999993630349908, - "loss": 46.0, - "step": 4709 - }, - { - "epoch": 0.3601123917655829, - "grad_norm": 0.0011188882635906339, - "learning_rate": 0.00019999993627638554, - "loss": 46.0, - "step": 4710 - }, - { - "epoch": 0.3601888487489726, - "grad_norm": 0.0011518624378368258, - "learning_rate": 0.00019999993624926623, - "loss": 46.0, - "step": 4711 - }, - { - "epoch": 0.3602653057323623, - "grad_norm": 0.001189077040180564, - "learning_rate": 0.00019999993622214115, - "loss": 46.0, - "step": 4712 - }, - { - "epoch": 0.360341762715752, - "grad_norm": 0.0008661917527206242, - "learning_rate": 0.0001999999361950103, - "loss": 46.0, - "step": 4713 - }, - { - "epoch": 0.3604182196991418, - "grad_norm": 0.0006917808786965907, - "learning_rate": 0.00019999993616787366, - "loss": 46.0, - "step": 4714 - }, - { - "epoch": 0.3604946766825315, - "grad_norm": 0.0020765475928783417, - "learning_rate": 0.00019999993614073128, - "loss": 46.0, - "step": 4715 - }, - { - "epoch": 0.3605711336659212, - "grad_norm": 0.0014501598197966814, - "learning_rate": 0.00019999993611358313, - "loss": 46.0, - "step": 4716 - }, - { - "epoch": 0.36064759064931096, - "grad_norm": 0.0012613646686077118, - "learning_rate": 0.0001999999360864292, - "loss": 46.0, - "step": 4717 - }, - { - "epoch": 0.36072404763270066, - "grad_norm": 0.0005951666971668601, - "learning_rate": 0.00019999993605926948, - "loss": 46.0, - "step": 4718 - }, - { - "epoch": 0.36080050461609037, - "grad_norm": 0.0025049233809113503, - "learning_rate": 0.000199999936032104, - "loss": 46.0, - "step": 4719 - }, - { - "epoch": 0.3608769615994801, - "grad_norm": 0.0011853475589305162, - "learning_rate": 0.0001999999360049328, - "loss": 46.0, - "step": 4720 - }, - { - "epoch": 0.36095341858286983, - "grad_norm": 0.0007207774324342608, - "learning_rate": 0.00019999993597775577, - "loss": 46.0, - "step": 4721 - }, - { - "epoch": 0.36102987556625954, - "grad_norm": 0.0006562497583217919, - "learning_rate": 0.000199999935950573, - "loss": 46.0, - "step": 4722 - }, - { - "epoch": 0.36110633254964924, - "grad_norm": 0.0017990752821788192, - "learning_rate": 0.00019999993592338444, - "loss": 46.0, - "step": 4723 - }, - { - "epoch": 0.36118278953303895, - "grad_norm": 0.001188163529150188, - "learning_rate": 0.00019999993589619016, - "loss": 46.0, - "step": 4724 - }, - { - "epoch": 0.3612592465164287, - "grad_norm": 0.0007358421571552753, - "learning_rate": 0.00019999993586899005, - "loss": 46.0, - "step": 4725 - }, - { - "epoch": 0.3613357034998184, - "grad_norm": 0.0007657320820726454, - "learning_rate": 0.00019999993584178421, - "loss": 46.0, - "step": 4726 - }, - { - "epoch": 0.3614121604832081, - "grad_norm": 0.0008777249022386968, - "learning_rate": 0.00019999993581457258, - "loss": 46.0, - "step": 4727 - }, - { - "epoch": 0.36148861746659783, - "grad_norm": 0.0034008976072072983, - "learning_rate": 0.00019999993578735515, - "loss": 46.0, - "step": 4728 - }, - { - "epoch": 0.3615650744499876, - "grad_norm": 0.0008502238779328763, - "learning_rate": 0.000199999935760132, - "loss": 46.0, - "step": 4729 - }, - { - "epoch": 0.3616415314333773, - "grad_norm": 0.0009893195237964392, - "learning_rate": 0.00019999993573290308, - "loss": 46.0, - "step": 4730 - }, - { - "epoch": 0.361717988416767, - "grad_norm": 0.0013479336630553007, - "learning_rate": 0.00019999993570566838, - "loss": 46.0, - "step": 4731 - }, - { - "epoch": 0.36179444540015676, - "grad_norm": 0.005582977551966906, - "learning_rate": 0.0001999999356784279, - "loss": 46.0, - "step": 4732 - }, - { - "epoch": 0.36187090238354647, - "grad_norm": 0.0009295704076066613, - "learning_rate": 0.00019999993565118164, - "loss": 46.0, - "step": 4733 - }, - { - "epoch": 0.36194735936693617, - "grad_norm": 0.0017829765565693378, - "learning_rate": 0.00019999993562392964, - "loss": 46.0, - "step": 4734 - }, - { - "epoch": 0.3620238163503259, - "grad_norm": 0.003539358964189887, - "learning_rate": 0.00019999993559667188, - "loss": 46.0, - "step": 4735 - }, - { - "epoch": 0.36210027333371564, - "grad_norm": 0.0009421908762305975, - "learning_rate": 0.00019999993556940832, - "loss": 46.0, - "step": 4736 - }, - { - "epoch": 0.36217673031710534, - "grad_norm": 0.0009855086682364345, - "learning_rate": 0.000199999935542139, - "loss": 46.0, - "step": 4737 - }, - { - "epoch": 0.36225318730049505, - "grad_norm": 0.0008309340919367969, - "learning_rate": 0.00019999993551486392, - "loss": 46.0, - "step": 4738 - }, - { - "epoch": 0.36232964428388476, - "grad_norm": 0.002351518953219056, - "learning_rate": 0.00019999993548758304, - "loss": 46.0, - "step": 4739 - }, - { - "epoch": 0.3624061012672745, - "grad_norm": 0.0032715625129640102, - "learning_rate": 0.00019999993546029643, - "loss": 46.0, - "step": 4740 - }, - { - "epoch": 0.3624825582506642, - "grad_norm": 0.05072646960616112, - "learning_rate": 0.00019999993543300403, - "loss": 46.0, - "step": 4741 - }, - { - "epoch": 0.3625590152340539, - "grad_norm": 0.002393092028796673, - "learning_rate": 0.00019999993540570588, - "loss": 46.0, - "step": 4742 - }, - { - "epoch": 0.3626354722174437, - "grad_norm": 0.0005407915450632572, - "learning_rate": 0.00019999993537840193, - "loss": 46.0, - "step": 4743 - }, - { - "epoch": 0.3627119292008334, - "grad_norm": 0.0010096813784912229, - "learning_rate": 0.00019999993535109223, - "loss": 46.0, - "step": 4744 - }, - { - "epoch": 0.3627883861842231, - "grad_norm": 0.001218139659613371, - "learning_rate": 0.00019999993532377676, - "loss": 46.0, - "step": 4745 - }, - { - "epoch": 0.3628648431676128, - "grad_norm": 0.0028415655251592398, - "learning_rate": 0.00019999993529645549, - "loss": 46.0, - "step": 4746 - }, - { - "epoch": 0.36294130015100257, - "grad_norm": 0.0008066268055699766, - "learning_rate": 0.0001999999352691285, - "loss": 46.0, - "step": 4747 - }, - { - "epoch": 0.36301775713439227, - "grad_norm": 0.0013411416439339519, - "learning_rate": 0.0001999999352417957, - "loss": 46.0, - "step": 4748 - }, - { - "epoch": 0.363094214117782, - "grad_norm": 0.0014214726397767663, - "learning_rate": 0.00019999993521445717, - "loss": 46.0, - "step": 4749 - }, - { - "epoch": 0.3631706711011717, - "grad_norm": 0.001000717980787158, - "learning_rate": 0.00019999993518711283, - "loss": 46.0, - "step": 4750 - }, - { - "epoch": 0.36324712808456144, - "grad_norm": 0.0011393342865630984, - "learning_rate": 0.00019999993515976274, - "loss": 46.0, - "step": 4751 - }, - { - "epoch": 0.36332358506795115, - "grad_norm": 0.0014566500904038548, - "learning_rate": 0.0001999999351324069, - "loss": 46.0, - "step": 4752 - }, - { - "epoch": 0.36340004205134085, - "grad_norm": 0.0006267392891459167, - "learning_rate": 0.00019999993510504526, - "loss": 46.0, - "step": 4753 - }, - { - "epoch": 0.36347649903473056, - "grad_norm": 0.0020487220026552677, - "learning_rate": 0.00019999993507767785, - "loss": 46.0, - "step": 4754 - }, - { - "epoch": 0.3635529560181203, - "grad_norm": 0.004392855800688267, - "learning_rate": 0.00019999993505030468, - "loss": 46.0, - "step": 4755 - }, - { - "epoch": 0.36362941300151, - "grad_norm": 0.0009140231995843351, - "learning_rate": 0.00019999993502292575, - "loss": 46.0, - "step": 4756 - }, - { - "epoch": 0.36370586998489973, - "grad_norm": 0.004033043514937162, - "learning_rate": 0.00019999993499554103, - "loss": 46.0, - "step": 4757 - }, - { - "epoch": 0.3637823269682895, - "grad_norm": 0.004864738322794437, - "learning_rate": 0.00019999993496815056, - "loss": 46.0, - "step": 4758 - }, - { - "epoch": 0.3638587839516792, - "grad_norm": 0.006420567631721497, - "learning_rate": 0.0001999999349407543, - "loss": 46.0, - "step": 4759 - }, - { - "epoch": 0.3639352409350689, - "grad_norm": 0.0005323308287188411, - "learning_rate": 0.0001999999349133523, - "loss": 46.0, - "step": 4760 - }, - { - "epoch": 0.3640116979184586, - "grad_norm": 0.0009971384424716234, - "learning_rate": 0.0001999999348859445, - "loss": 46.0, - "step": 4761 - }, - { - "epoch": 0.36408815490184837, - "grad_norm": 0.0005832620081491768, - "learning_rate": 0.00019999993485853094, - "loss": 46.0, - "step": 4762 - }, - { - "epoch": 0.3641646118852381, - "grad_norm": 0.002891265321522951, - "learning_rate": 0.00019999993483111163, - "loss": 46.0, - "step": 4763 - }, - { - "epoch": 0.3642410688686278, - "grad_norm": 0.005334098357707262, - "learning_rate": 0.00019999993480368652, - "loss": 46.0, - "step": 4764 - }, - { - "epoch": 0.3643175258520175, - "grad_norm": 0.0009453212842345238, - "learning_rate": 0.00019999993477625566, - "loss": 46.0, - "step": 4765 - }, - { - "epoch": 0.36439398283540725, - "grad_norm": 0.0016841174801811576, - "learning_rate": 0.000199999934748819, - "loss": 46.0, - "step": 4766 - }, - { - "epoch": 0.36447043981879695, - "grad_norm": 0.0011450157035142183, - "learning_rate": 0.0001999999347213766, - "loss": 46.0, - "step": 4767 - }, - { - "epoch": 0.36454689680218666, - "grad_norm": 0.0017618951387703419, - "learning_rate": 0.00019999993469392845, - "loss": 46.0, - "step": 4768 - }, - { - "epoch": 0.36462335378557637, - "grad_norm": 0.0007831259281374514, - "learning_rate": 0.00019999993466647453, - "loss": 46.0, - "step": 4769 - }, - { - "epoch": 0.3646998107689661, - "grad_norm": 0.005581744946539402, - "learning_rate": 0.0001999999346390148, - "loss": 46.0, - "step": 4770 - }, - { - "epoch": 0.36477626775235583, - "grad_norm": 0.0011554569937288761, - "learning_rate": 0.0001999999346115493, - "loss": 46.0, - "step": 4771 - }, - { - "epoch": 0.36485272473574554, - "grad_norm": 0.0028830962255597115, - "learning_rate": 0.0001999999345840781, - "loss": 46.0, - "step": 4772 - }, - { - "epoch": 0.3649291817191353, - "grad_norm": 0.0004013258730992675, - "learning_rate": 0.00019999993455660105, - "loss": 46.0, - "step": 4773 - }, - { - "epoch": 0.365005638702525, - "grad_norm": 0.0014745995867997408, - "learning_rate": 0.00019999993452911828, - "loss": 46.0, - "step": 4774 - }, - { - "epoch": 0.3650820956859147, - "grad_norm": 0.0008328726980835199, - "learning_rate": 0.0001999999345016297, - "loss": 46.0, - "step": 4775 - }, - { - "epoch": 0.3651585526693044, - "grad_norm": 0.002912326017394662, - "learning_rate": 0.00019999993447413535, - "loss": 46.0, - "step": 4776 - }, - { - "epoch": 0.3652350096526942, - "grad_norm": 0.0015257705235853791, - "learning_rate": 0.00019999993444663527, - "loss": 46.0, - "step": 4777 - }, - { - "epoch": 0.3653114666360839, - "grad_norm": 0.0015373674686998129, - "learning_rate": 0.00019999993441912941, - "loss": 46.0, - "step": 4778 - }, - { - "epoch": 0.3653879236194736, - "grad_norm": 0.0037738927640020847, - "learning_rate": 0.00019999993439161778, - "loss": 46.0, - "step": 4779 - }, - { - "epoch": 0.3654643806028633, - "grad_norm": 0.0019009530078619719, - "learning_rate": 0.00019999993436410035, - "loss": 46.0, - "step": 4780 - }, - { - "epoch": 0.36554083758625305, - "grad_norm": 0.0010089471470564604, - "learning_rate": 0.0001999999343365772, - "loss": 46.0, - "step": 4781 - }, - { - "epoch": 0.36561729456964276, - "grad_norm": 0.0012007777113467455, - "learning_rate": 0.00019999993430904825, - "loss": 46.0, - "step": 4782 - }, - { - "epoch": 0.36569375155303246, - "grad_norm": 0.0020197604317218065, - "learning_rate": 0.00019999993428151353, - "loss": 46.0, - "step": 4783 - }, - { - "epoch": 0.36577020853642217, - "grad_norm": 0.0022159023210406303, - "learning_rate": 0.00019999993425397306, - "loss": 46.0, - "step": 4784 - }, - { - "epoch": 0.36584666551981193, - "grad_norm": 0.005160761531442404, - "learning_rate": 0.0001999999342264268, - "loss": 46.0, - "step": 4785 - }, - { - "epoch": 0.36592312250320164, - "grad_norm": 0.0014021808747202158, - "learning_rate": 0.0001999999341988748, - "loss": 46.0, - "step": 4786 - }, - { - "epoch": 0.36599957948659134, - "grad_norm": 0.0013583634281530976, - "learning_rate": 0.00019999993417131698, - "loss": 46.0, - "step": 4787 - }, - { - "epoch": 0.3660760364699811, - "grad_norm": 0.014794185757637024, - "learning_rate": 0.00019999993414375342, - "loss": 46.0, - "step": 4788 - }, - { - "epoch": 0.3661524934533708, - "grad_norm": 0.0015073573449626565, - "learning_rate": 0.00019999993411618408, - "loss": 46.0, - "step": 4789 - }, - { - "epoch": 0.3662289504367605, - "grad_norm": 0.0014534660149365664, - "learning_rate": 0.000199999934088609, - "loss": 46.0, - "step": 4790 - }, - { - "epoch": 0.3663054074201502, - "grad_norm": 0.0008017596555873752, - "learning_rate": 0.00019999993406102812, - "loss": 46.0, - "step": 4791 - }, - { - "epoch": 0.36638186440354, - "grad_norm": 0.0021246725227683783, - "learning_rate": 0.00019999993403344149, - "loss": 46.0, - "step": 4792 - }, - { - "epoch": 0.3664583213869297, - "grad_norm": 0.0013715496752411127, - "learning_rate": 0.00019999993400584906, - "loss": 46.0, - "step": 4793 - }, - { - "epoch": 0.3665347783703194, - "grad_norm": 0.00122716068290174, - "learning_rate": 0.0001999999339782509, - "loss": 46.0, - "step": 4794 - }, - { - "epoch": 0.3666112353537091, - "grad_norm": 0.0005874124472029507, - "learning_rate": 0.00019999993395064696, - "loss": 46.0, - "step": 4795 - }, - { - "epoch": 0.36668769233709886, - "grad_norm": 0.0008420470403507352, - "learning_rate": 0.00019999993392303723, - "loss": 46.0, - "step": 4796 - }, - { - "epoch": 0.36676414932048856, - "grad_norm": 0.0027939307037740946, - "learning_rate": 0.00019999993389542176, - "loss": 46.0, - "step": 4797 - }, - { - "epoch": 0.36684060630387827, - "grad_norm": 0.0007634905050508678, - "learning_rate": 0.0001999999338678005, - "loss": 46.0, - "step": 4798 - }, - { - "epoch": 0.366917063287268, - "grad_norm": 0.00122022011782974, - "learning_rate": 0.00019999993384017345, - "loss": 46.0, - "step": 4799 - }, - { - "epoch": 0.36699352027065774, - "grad_norm": 0.0007961088558658957, - "learning_rate": 0.00019999993381254066, - "loss": 46.0, - "step": 4800 - }, - { - "epoch": 0.36706997725404744, - "grad_norm": 0.004239731468260288, - "learning_rate": 0.00019999993378490212, - "loss": 46.0, - "step": 4801 - }, - { - "epoch": 0.36714643423743715, - "grad_norm": 0.0014644551556557417, - "learning_rate": 0.0001999999337572578, - "loss": 46.0, - "step": 4802 - }, - { - "epoch": 0.3672228912208269, - "grad_norm": 0.0019061948405578732, - "learning_rate": 0.00019999993372960768, - "loss": 46.0, - "step": 4803 - }, - { - "epoch": 0.3672993482042166, - "grad_norm": 0.0006932874675840139, - "learning_rate": 0.0001999999337019518, - "loss": 46.0, - "step": 4804 - }, - { - "epoch": 0.3673758051876063, - "grad_norm": 0.0015004683518782258, - "learning_rate": 0.00019999993367429014, - "loss": 46.0, - "step": 4805 - }, - { - "epoch": 0.367452262170996, - "grad_norm": 0.0003630152787081897, - "learning_rate": 0.00019999993364662276, - "loss": 46.0, - "step": 4806 - }, - { - "epoch": 0.3675287191543858, - "grad_norm": 0.0006373982760123909, - "learning_rate": 0.00019999993361894959, - "loss": 46.0, - "step": 4807 - }, - { - "epoch": 0.3676051761377755, - "grad_norm": 0.0002189316292060539, - "learning_rate": 0.0001999999335912706, - "loss": 46.0, - "step": 4808 - }, - { - "epoch": 0.3676816331211652, - "grad_norm": 0.0023921134416013956, - "learning_rate": 0.00019999993356358591, - "loss": 46.0, - "step": 4809 - }, - { - "epoch": 0.3677580901045549, - "grad_norm": 0.0029419907368719578, - "learning_rate": 0.00019999993353589542, - "loss": 46.0, - "step": 4810 - }, - { - "epoch": 0.36783454708794466, - "grad_norm": 0.0006955838762223721, - "learning_rate": 0.00019999993350819915, - "loss": 46.0, - "step": 4811 - }, - { - "epoch": 0.36791100407133437, - "grad_norm": 0.0007645781734026968, - "learning_rate": 0.00019999993348049713, - "loss": 46.0, - "step": 4812 - }, - { - "epoch": 0.3679874610547241, - "grad_norm": 0.001570119522511959, - "learning_rate": 0.00019999993345278934, - "loss": 46.0, - "step": 4813 - }, - { - "epoch": 0.3680639180381138, - "grad_norm": 0.00292653264477849, - "learning_rate": 0.00019999993342507578, - "loss": 46.0, - "step": 4814 - }, - { - "epoch": 0.36814037502150354, - "grad_norm": 0.0005215071723796427, - "learning_rate": 0.00019999993339735644, - "loss": 46.0, - "step": 4815 - }, - { - "epoch": 0.36821683200489325, - "grad_norm": 0.000847869087010622, - "learning_rate": 0.00019999993336963133, - "loss": 46.0, - "step": 4816 - }, - { - "epoch": 0.36829328898828295, - "grad_norm": 0.004098173696547747, - "learning_rate": 0.00019999993334190042, - "loss": 46.0, - "step": 4817 - }, - { - "epoch": 0.3683697459716727, - "grad_norm": 0.0008783571538515389, - "learning_rate": 0.0001999999333141638, - "loss": 46.0, - "step": 4818 - }, - { - "epoch": 0.3684462029550624, - "grad_norm": 0.0007490054122172296, - "learning_rate": 0.0001999999332864214, - "loss": 46.0, - "step": 4819 - }, - { - "epoch": 0.3685226599384521, - "grad_norm": 0.0007143210968934, - "learning_rate": 0.0001999999332586732, - "loss": 46.0, - "step": 4820 - }, - { - "epoch": 0.36859911692184183, - "grad_norm": 0.003364644246175885, - "learning_rate": 0.00019999993323091927, - "loss": 46.0, - "step": 4821 - }, - { - "epoch": 0.3686755739052316, - "grad_norm": 0.001407845295034349, - "learning_rate": 0.00019999993320315952, - "loss": 46.0, - "step": 4822 - }, - { - "epoch": 0.3687520308886213, - "grad_norm": 0.0016733992379158735, - "learning_rate": 0.00019999993317539402, - "loss": 46.0, - "step": 4823 - }, - { - "epoch": 0.368828487872011, - "grad_norm": 0.00022612736211158335, - "learning_rate": 0.00019999993314762278, - "loss": 46.0, - "step": 4824 - }, - { - "epoch": 0.3689049448554007, - "grad_norm": 0.003249608911573887, - "learning_rate": 0.00019999993311984576, - "loss": 46.0, - "step": 4825 - }, - { - "epoch": 0.36898140183879047, - "grad_norm": 0.0008073369390331209, - "learning_rate": 0.00019999993309206295, - "loss": 46.0, - "step": 4826 - }, - { - "epoch": 0.3690578588221802, - "grad_norm": 0.0005007277941331267, - "learning_rate": 0.00019999993306427438, - "loss": 46.0, - "step": 4827 - }, - { - "epoch": 0.3691343158055699, - "grad_norm": 0.0014423951506614685, - "learning_rate": 0.00019999993303648005, - "loss": 46.0, - "step": 4828 - }, - { - "epoch": 0.3692107727889596, - "grad_norm": 0.0008734010043554008, - "learning_rate": 0.0001999999330086799, - "loss": 46.0, - "step": 4829 - }, - { - "epoch": 0.36928722977234935, - "grad_norm": 0.0010049734264612198, - "learning_rate": 0.00019999993298087406, - "loss": 46.0, - "step": 4830 - }, - { - "epoch": 0.36936368675573905, - "grad_norm": 0.00026240863371640444, - "learning_rate": 0.0001999999329530624, - "loss": 46.0, - "step": 4831 - }, - { - "epoch": 0.36944014373912876, - "grad_norm": 0.002178644761443138, - "learning_rate": 0.00019999993292524497, - "loss": 46.0, - "step": 4832 - }, - { - "epoch": 0.3695166007225185, - "grad_norm": 0.0006505553028546274, - "learning_rate": 0.0001999999328974218, - "loss": 46.0, - "step": 4833 - }, - { - "epoch": 0.3695930577059082, - "grad_norm": 0.0010698490077629685, - "learning_rate": 0.00019999993286959282, - "loss": 46.0, - "step": 4834 - }, - { - "epoch": 0.36966951468929793, - "grad_norm": 0.0014758114702999592, - "learning_rate": 0.0001999999328417581, - "loss": 46.0, - "step": 4835 - }, - { - "epoch": 0.36974597167268763, - "grad_norm": 0.0010098819620907307, - "learning_rate": 0.00019999993281391763, - "loss": 46.0, - "step": 4836 - }, - { - "epoch": 0.3698224286560774, - "grad_norm": 0.001681495807133615, - "learning_rate": 0.00019999993278607136, - "loss": 46.0, - "step": 4837 - }, - { - "epoch": 0.3698988856394671, - "grad_norm": 0.003068075980991125, - "learning_rate": 0.00019999993275821934, - "loss": 46.0, - "step": 4838 - }, - { - "epoch": 0.3699753426228568, - "grad_norm": 0.000599504099227488, - "learning_rate": 0.00019999993273036153, - "loss": 46.0, - "step": 4839 - }, - { - "epoch": 0.3700517996062465, - "grad_norm": 0.000580562453251332, - "learning_rate": 0.00019999993270249794, - "loss": 46.0, - "step": 4840 - }, - { - "epoch": 0.3701282565896363, - "grad_norm": 0.001212918316014111, - "learning_rate": 0.0001999999326746286, - "loss": 46.0, - "step": 4841 - }, - { - "epoch": 0.370204713573026, - "grad_norm": 0.0013744347961619496, - "learning_rate": 0.00019999993264675347, - "loss": 46.0, - "step": 4842 - }, - { - "epoch": 0.3702811705564157, - "grad_norm": 0.0016104707028716803, - "learning_rate": 0.0001999999326188726, - "loss": 46.0, - "step": 4843 - }, - { - "epoch": 0.3703576275398054, - "grad_norm": 0.0051866937428712845, - "learning_rate": 0.00019999993259098596, - "loss": 46.0, - "step": 4844 - }, - { - "epoch": 0.37043408452319515, - "grad_norm": 0.000993227818980813, - "learning_rate": 0.00019999993256309355, - "loss": 46.0, - "step": 4845 - }, - { - "epoch": 0.37051054150658486, - "grad_norm": 0.0013105841353535652, - "learning_rate": 0.00019999993253519532, - "loss": 46.0, - "step": 4846 - }, - { - "epoch": 0.37058699848997456, - "grad_norm": 0.0008735943119972944, - "learning_rate": 0.00019999993250729138, - "loss": 46.0, - "step": 4847 - }, - { - "epoch": 0.3706634554733643, - "grad_norm": 0.0008797018090263009, - "learning_rate": 0.00019999993247938168, - "loss": 46.0, - "step": 4848 - }, - { - "epoch": 0.37073991245675403, - "grad_norm": 0.0022008928935974836, - "learning_rate": 0.00019999993245146619, - "loss": 46.0, - "step": 4849 - }, - { - "epoch": 0.37081636944014373, - "grad_norm": 0.00046380568528547883, - "learning_rate": 0.00019999993242354492, - "loss": 46.0, - "step": 4850 - }, - { - "epoch": 0.37089282642353344, - "grad_norm": 0.002696666633710265, - "learning_rate": 0.00019999993239561787, - "loss": 46.0, - "step": 4851 - }, - { - "epoch": 0.3709692834069232, - "grad_norm": 0.002761688083410263, - "learning_rate": 0.00019999993236768506, - "loss": 46.0, - "step": 4852 - }, - { - "epoch": 0.3710457403903129, - "grad_norm": 0.0005381557857617736, - "learning_rate": 0.00019999993233974647, - "loss": 46.0, - "step": 4853 - }, - { - "epoch": 0.3711221973737026, - "grad_norm": 0.0019500568741932511, - "learning_rate": 0.0001999999323118021, - "loss": 46.0, - "step": 4854 - }, - { - "epoch": 0.3711986543570923, - "grad_norm": 0.012805874459445477, - "learning_rate": 0.00019999993228385203, - "loss": 46.0, - "step": 4855 - }, - { - "epoch": 0.3712751113404821, - "grad_norm": 0.0021064174361526966, - "learning_rate": 0.00019999993225589612, - "loss": 46.0, - "step": 4856 - }, - { - "epoch": 0.3713515683238718, - "grad_norm": 0.0019460027106106281, - "learning_rate": 0.0001999999322279345, - "loss": 46.0, - "step": 4857 - }, - { - "epoch": 0.3714280253072615, - "grad_norm": 0.0011560218408703804, - "learning_rate": 0.00019999993219996703, - "loss": 46.0, - "step": 4858 - }, - { - "epoch": 0.37150448229065125, - "grad_norm": 0.000566302624065429, - "learning_rate": 0.00019999993217199386, - "loss": 46.0, - "step": 4859 - }, - { - "epoch": 0.37158093927404096, - "grad_norm": 0.0006808766629546881, - "learning_rate": 0.0001999999321440149, - "loss": 46.0, - "step": 4860 - }, - { - "epoch": 0.37165739625743066, - "grad_norm": 0.0037500590551644564, - "learning_rate": 0.0001999999321160302, - "loss": 46.0, - "step": 4861 - }, - { - "epoch": 0.37173385324082037, - "grad_norm": 0.001662333612330258, - "learning_rate": 0.0001999999320880397, - "loss": 46.0, - "step": 4862 - }, - { - "epoch": 0.3718103102242101, - "grad_norm": 0.001028480939567089, - "learning_rate": 0.00019999993206004342, - "loss": 46.0, - "step": 4863 - }, - { - "epoch": 0.37188676720759983, - "grad_norm": 0.0006017939304001629, - "learning_rate": 0.00019999993203204138, - "loss": 46.0, - "step": 4864 - }, - { - "epoch": 0.37196322419098954, - "grad_norm": 0.0038125556893646717, - "learning_rate": 0.00019999993200403354, - "loss": 46.0, - "step": 4865 - }, - { - "epoch": 0.37203968117437924, - "grad_norm": 0.0035511080641299486, - "learning_rate": 0.00019999993197601998, - "loss": 46.0, - "step": 4866 - }, - { - "epoch": 0.372116138157769, - "grad_norm": 0.002554011531174183, - "learning_rate": 0.00019999993194800064, - "loss": 46.0, - "step": 4867 - }, - { - "epoch": 0.3721925951411587, - "grad_norm": 0.0005075350636616349, - "learning_rate": 0.0001999999319199755, - "loss": 46.0, - "step": 4868 - }, - { - "epoch": 0.3722690521245484, - "grad_norm": 0.001362888957373798, - "learning_rate": 0.00019999993189194463, - "loss": 46.0, - "step": 4869 - }, - { - "epoch": 0.3723455091079381, - "grad_norm": 0.005210305564105511, - "learning_rate": 0.00019999993186390797, - "loss": 46.0, - "step": 4870 - }, - { - "epoch": 0.3724219660913279, - "grad_norm": 0.003195004304870963, - "learning_rate": 0.00019999993183586554, - "loss": 46.0, - "step": 4871 - }, - { - "epoch": 0.3724984230747176, - "grad_norm": 0.0004451118584256619, - "learning_rate": 0.00019999993180781734, - "loss": 46.0, - "step": 4872 - }, - { - "epoch": 0.3725748800581073, - "grad_norm": 0.0003656750777736306, - "learning_rate": 0.0001999999317797634, - "loss": 46.0, - "step": 4873 - }, - { - "epoch": 0.37265133704149705, - "grad_norm": 0.0005771152791567147, - "learning_rate": 0.00019999993175170367, - "loss": 46.0, - "step": 4874 - }, - { - "epoch": 0.37272779402488676, - "grad_norm": 0.001187969814054668, - "learning_rate": 0.00019999993172363815, - "loss": 46.0, - "step": 4875 - }, - { - "epoch": 0.37280425100827647, - "grad_norm": 0.0004604406713042408, - "learning_rate": 0.00019999993169556688, - "loss": 46.0, - "step": 4876 - }, - { - "epoch": 0.37288070799166617, - "grad_norm": 0.00129945226944983, - "learning_rate": 0.00019999993166748981, - "loss": 46.0, - "step": 4877 - }, - { - "epoch": 0.37295716497505593, - "grad_norm": 0.0007609662134200335, - "learning_rate": 0.00019999993163940703, - "loss": 46.0, - "step": 4878 - }, - { - "epoch": 0.37303362195844564, - "grad_norm": 0.0035767932422459126, - "learning_rate": 0.00019999993161131844, - "loss": 46.0, - "step": 4879 - }, - { - "epoch": 0.37311007894183534, - "grad_norm": 0.0013194376369938254, - "learning_rate": 0.00019999993158322408, - "loss": 46.0, - "step": 4880 - }, - { - "epoch": 0.37318653592522505, - "grad_norm": 0.0028642769902944565, - "learning_rate": 0.00019999993155512397, - "loss": 46.0, - "step": 4881 - }, - { - "epoch": 0.3732629929086148, - "grad_norm": 0.0012514109257608652, - "learning_rate": 0.00019999993152701809, - "loss": 46.0, - "step": 4882 - }, - { - "epoch": 0.3733394498920045, - "grad_norm": 0.0011644541518762708, - "learning_rate": 0.0001999999314989064, - "loss": 46.0, - "step": 4883 - }, - { - "epoch": 0.3734159068753942, - "grad_norm": 0.00152407621499151, - "learning_rate": 0.000199999931470789, - "loss": 46.0, - "step": 4884 - }, - { - "epoch": 0.3734923638587839, - "grad_norm": 0.003909895662218332, - "learning_rate": 0.0001999999314426658, - "loss": 46.0, - "step": 4885 - }, - { - "epoch": 0.3735688208421737, - "grad_norm": 0.0018088618526235223, - "learning_rate": 0.00019999993141453683, - "loss": 46.0, - "step": 4886 - }, - { - "epoch": 0.3736452778255634, - "grad_norm": 0.003985913470387459, - "learning_rate": 0.0001999999313864021, - "loss": 46.0, - "step": 4887 - }, - { - "epoch": 0.3737217348089531, - "grad_norm": 0.01101826224476099, - "learning_rate": 0.00019999993135826158, - "loss": 46.0, - "step": 4888 - }, - { - "epoch": 0.37379819179234286, - "grad_norm": 0.0017482512630522251, - "learning_rate": 0.0001999999313301153, - "loss": 46.0, - "step": 4889 - }, - { - "epoch": 0.37387464877573257, - "grad_norm": 0.0036681180354207754, - "learning_rate": 0.00019999993130196325, - "loss": 46.0, - "step": 4890 - }, - { - "epoch": 0.37395110575912227, - "grad_norm": 0.0023561078123748302, - "learning_rate": 0.00019999993127380546, - "loss": 46.0, - "step": 4891 - }, - { - "epoch": 0.374027562742512, - "grad_norm": 0.0033678405452519655, - "learning_rate": 0.00019999993124564187, - "loss": 46.0, - "step": 4892 - }, - { - "epoch": 0.37410401972590174, - "grad_norm": 0.0008517990354448557, - "learning_rate": 0.0001999999312174725, - "loss": 46.0, - "step": 4893 - }, - { - "epoch": 0.37418047670929144, - "grad_norm": 0.0006096003344282508, - "learning_rate": 0.00019999993118929738, - "loss": 46.0, - "step": 4894 - }, - { - "epoch": 0.37425693369268115, - "grad_norm": 0.0004946135450154543, - "learning_rate": 0.0001999999311611165, - "loss": 46.0, - "step": 4895 - }, - { - "epoch": 0.37433339067607085, - "grad_norm": 0.001580708078108728, - "learning_rate": 0.00019999993113292984, - "loss": 46.0, - "step": 4896 - }, - { - "epoch": 0.3744098476594606, - "grad_norm": 0.0027992581017315388, - "learning_rate": 0.00019999993110473741, - "loss": 46.0, - "step": 4897 - }, - { - "epoch": 0.3744863046428503, - "grad_norm": 0.0019262167625129223, - "learning_rate": 0.0001999999310765392, - "loss": 46.0, - "step": 4898 - }, - { - "epoch": 0.37456276162624, - "grad_norm": 0.001027535181492567, - "learning_rate": 0.00019999993104833524, - "loss": 46.0, - "step": 4899 - }, - { - "epoch": 0.37463921860962973, - "grad_norm": 0.0014404486864805222, - "learning_rate": 0.0001999999310201255, - "loss": 46.0, - "step": 4900 - }, - { - "epoch": 0.3747156755930195, - "grad_norm": 0.003168889321386814, - "learning_rate": 0.00019999993099190997, - "loss": 46.0, - "step": 4901 - }, - { - "epoch": 0.3747921325764092, - "grad_norm": 0.0012133385753259063, - "learning_rate": 0.00019999993096368873, - "loss": 46.0, - "step": 4902 - }, - { - "epoch": 0.3748685895597989, - "grad_norm": 0.0016722440486773849, - "learning_rate": 0.00019999993093546166, - "loss": 46.0, - "step": 4903 - }, - { - "epoch": 0.37494504654318866, - "grad_norm": 0.0020575562957674265, - "learning_rate": 0.00019999993090722885, - "loss": 46.0, - "step": 4904 - }, - { - "epoch": 0.37502150352657837, - "grad_norm": 0.0008931782795116305, - "learning_rate": 0.00019999993087899026, - "loss": 46.0, - "step": 4905 - }, - { - "epoch": 0.3750979605099681, - "grad_norm": 0.0012393470387905836, - "learning_rate": 0.0001999999308507459, - "loss": 46.0, - "step": 4906 - }, - { - "epoch": 0.3751744174933578, - "grad_norm": 0.0017042583785951138, - "learning_rate": 0.00019999993082249576, - "loss": 46.0, - "step": 4907 - }, - { - "epoch": 0.37525087447674754, - "grad_norm": 0.004278429783880711, - "learning_rate": 0.00019999993079423985, - "loss": 46.0, - "step": 4908 - }, - { - "epoch": 0.37532733146013725, - "grad_norm": 0.008249441161751747, - "learning_rate": 0.0001999999307659782, - "loss": 46.0, - "step": 4909 - }, - { - "epoch": 0.37540378844352695, - "grad_norm": 0.002022908767685294, - "learning_rate": 0.00019999993073771077, - "loss": 46.0, - "step": 4910 - }, - { - "epoch": 0.37548024542691666, - "grad_norm": 0.0018369735917076468, - "learning_rate": 0.00019999993070943757, - "loss": 46.0, - "step": 4911 - }, - { - "epoch": 0.3755567024103064, - "grad_norm": 0.0015796294901520014, - "learning_rate": 0.0001999999306811586, - "loss": 46.0, - "step": 4912 - }, - { - "epoch": 0.3756331593936961, - "grad_norm": 0.0009233673335984349, - "learning_rate": 0.00019999993065287385, - "loss": 46.0, - "step": 4913 - }, - { - "epoch": 0.37570961637708583, - "grad_norm": 0.001429198426194489, - "learning_rate": 0.00019999993062458336, - "loss": 46.0, - "step": 4914 - }, - { - "epoch": 0.37578607336047554, - "grad_norm": 0.002765101147815585, - "learning_rate": 0.00019999993059628706, - "loss": 46.0, - "step": 4915 - }, - { - "epoch": 0.3758625303438653, - "grad_norm": 0.0012760492973029613, - "learning_rate": 0.00019999993056798502, - "loss": 46.0, - "step": 4916 - }, - { - "epoch": 0.375938987327255, - "grad_norm": 0.0011727642267942429, - "learning_rate": 0.00019999993053967718, - "loss": 46.0, - "step": 4917 - }, - { - "epoch": 0.3760154443106447, - "grad_norm": 0.0014468106674030423, - "learning_rate": 0.00019999993051136362, - "loss": 46.0, - "step": 4918 - }, - { - "epoch": 0.37609190129403447, - "grad_norm": 0.0010393660049885511, - "learning_rate": 0.00019999993048304423, - "loss": 46.0, - "step": 4919 - }, - { - "epoch": 0.3761683582774242, - "grad_norm": 0.0011485849972814322, - "learning_rate": 0.0001999999304547191, - "loss": 46.0, - "step": 4920 - }, - { - "epoch": 0.3762448152608139, - "grad_norm": 0.002423313446342945, - "learning_rate": 0.0001999999304263882, - "loss": 46.0, - "step": 4921 - }, - { - "epoch": 0.3763212722442036, - "grad_norm": 0.004466170445084572, - "learning_rate": 0.00019999993039805154, - "loss": 46.0, - "step": 4922 - }, - { - "epoch": 0.37639772922759335, - "grad_norm": 0.000734652450773865, - "learning_rate": 0.0001999999303697091, - "loss": 46.0, - "step": 4923 - }, - { - "epoch": 0.37647418621098305, - "grad_norm": 0.000604241737164557, - "learning_rate": 0.0001999999303413609, - "loss": 46.0, - "step": 4924 - }, - { - "epoch": 0.37655064319437276, - "grad_norm": 0.035608306527137756, - "learning_rate": 0.00019999993031300694, - "loss": 46.0, - "step": 4925 - }, - { - "epoch": 0.37662710017776246, - "grad_norm": 0.009268702939152718, - "learning_rate": 0.0001999999302846472, - "loss": 46.0, - "step": 4926 - }, - { - "epoch": 0.3767035571611522, - "grad_norm": 0.0009004255407489836, - "learning_rate": 0.00019999993025628164, - "loss": 46.0, - "step": 4927 - }, - { - "epoch": 0.37678001414454193, - "grad_norm": 0.0007173077901825309, - "learning_rate": 0.0001999999302279104, - "loss": 46.0, - "step": 4928 - }, - { - "epoch": 0.37685647112793164, - "grad_norm": 0.005671788938343525, - "learning_rate": 0.00019999993019953333, - "loss": 46.0, - "step": 4929 - }, - { - "epoch": 0.37693292811132134, - "grad_norm": 0.0031418928410857916, - "learning_rate": 0.00019999993017115052, - "loss": 46.0, - "step": 4930 - }, - { - "epoch": 0.3770093850947111, - "grad_norm": 0.0012169423280283809, - "learning_rate": 0.00019999993014276193, - "loss": 46.0, - "step": 4931 - }, - { - "epoch": 0.3770858420781008, - "grad_norm": 0.0004836243751924485, - "learning_rate": 0.00019999993011436755, - "loss": 46.0, - "step": 4932 - }, - { - "epoch": 0.3771622990614905, - "grad_norm": 0.0009299837402068079, - "learning_rate": 0.0001999999300859674, - "loss": 46.0, - "step": 4933 - }, - { - "epoch": 0.3772387560448803, - "grad_norm": 0.0011206520721316338, - "learning_rate": 0.00019999993005756153, - "loss": 46.0, - "step": 4934 - }, - { - "epoch": 0.37731521302827, - "grad_norm": 0.0023651737719774246, - "learning_rate": 0.00019999993002914985, - "loss": 46.0, - "step": 4935 - }, - { - "epoch": 0.3773916700116597, - "grad_norm": 0.004383682273328304, - "learning_rate": 0.0001999999300007324, - "loss": 46.0, - "step": 4936 - }, - { - "epoch": 0.3774681269950494, - "grad_norm": 0.0016093684826046228, - "learning_rate": 0.0001999999299723092, - "loss": 46.0, - "step": 4937 - }, - { - "epoch": 0.37754458397843915, - "grad_norm": 0.001072571729309857, - "learning_rate": 0.00019999992994388022, - "loss": 46.0, - "step": 4938 - }, - { - "epoch": 0.37762104096182886, - "grad_norm": 0.0010431492701172829, - "learning_rate": 0.00019999992991544548, - "loss": 46.0, - "step": 4939 - }, - { - "epoch": 0.37769749794521856, - "grad_norm": 0.0010073445737361908, - "learning_rate": 0.00019999992988700496, - "loss": 46.0, - "step": 4940 - }, - { - "epoch": 0.37777395492860827, - "grad_norm": 0.0012456390541046858, - "learning_rate": 0.00019999992985855866, - "loss": 46.0, - "step": 4941 - }, - { - "epoch": 0.37785041191199803, - "grad_norm": 0.0005301701021380723, - "learning_rate": 0.0001999999298301066, - "loss": 46.0, - "step": 4942 - }, - { - "epoch": 0.37792686889538774, - "grad_norm": 0.0023952494375407696, - "learning_rate": 0.00019999992980164878, - "loss": 46.0, - "step": 4943 - }, - { - "epoch": 0.37800332587877744, - "grad_norm": 0.0035832435823976994, - "learning_rate": 0.0001999999297731852, - "loss": 46.0, - "step": 4944 - }, - { - "epoch": 0.37807978286216715, - "grad_norm": 0.0015766293508931994, - "learning_rate": 0.00019999992974471583, - "loss": 46.0, - "step": 4945 - }, - { - "epoch": 0.3781562398455569, - "grad_norm": 0.0011472138576209545, - "learning_rate": 0.0001999999297162407, - "loss": 46.0, - "step": 4946 - }, - { - "epoch": 0.3782326968289466, - "grad_norm": 0.003403461305424571, - "learning_rate": 0.0001999999296877598, - "loss": 46.0, - "step": 4947 - }, - { - "epoch": 0.3783091538123363, - "grad_norm": 0.003906186204403639, - "learning_rate": 0.00019999992965927311, - "loss": 46.0, - "step": 4948 - }, - { - "epoch": 0.3783856107957261, - "grad_norm": 0.0017572161741554737, - "learning_rate": 0.00019999992963078066, - "loss": 46.0, - "step": 4949 - }, - { - "epoch": 0.3784620677791158, - "grad_norm": 0.0020414418540894985, - "learning_rate": 0.00019999992960228246, - "loss": 46.0, - "step": 4950 - }, - { - "epoch": 0.3785385247625055, - "grad_norm": 0.001186339184641838, - "learning_rate": 0.0001999999295737785, - "loss": 46.0, - "step": 4951 - }, - { - "epoch": 0.3786149817458952, - "grad_norm": 0.007186608854681253, - "learning_rate": 0.0001999999295452687, - "loss": 46.0, - "step": 4952 - }, - { - "epoch": 0.37869143872928496, - "grad_norm": 0.0008579810382798314, - "learning_rate": 0.00019999992951675322, - "loss": 46.0, - "step": 4953 - }, - { - "epoch": 0.37876789571267466, - "grad_norm": 0.0020101191475987434, - "learning_rate": 0.00019999992948823193, - "loss": 46.0, - "step": 4954 - }, - { - "epoch": 0.37884435269606437, - "grad_norm": 0.0015061977319419384, - "learning_rate": 0.00019999992945970486, - "loss": 46.0, - "step": 4955 - }, - { - "epoch": 0.3789208096794541, - "grad_norm": 0.0013935064198449254, - "learning_rate": 0.00019999992943117202, - "loss": 46.0, - "step": 4956 - }, - { - "epoch": 0.37899726666284383, - "grad_norm": 0.0010541134979575872, - "learning_rate": 0.00019999992940263343, - "loss": 46.0, - "step": 4957 - }, - { - "epoch": 0.37907372364623354, - "grad_norm": 0.0024583195336163044, - "learning_rate": 0.00019999992937408907, - "loss": 46.0, - "step": 4958 - }, - { - "epoch": 0.37915018062962325, - "grad_norm": 0.000910849601496011, - "learning_rate": 0.00019999992934553894, - "loss": 46.0, - "step": 4959 - }, - { - "epoch": 0.37922663761301295, - "grad_norm": 0.0007606238941662014, - "learning_rate": 0.000199999929316983, - "loss": 46.0, - "step": 4960 - }, - { - "epoch": 0.3793030945964027, - "grad_norm": 0.00043626653496176004, - "learning_rate": 0.00019999992928842133, - "loss": 46.0, - "step": 4961 - }, - { - "epoch": 0.3793795515797924, - "grad_norm": 0.0008070764597505331, - "learning_rate": 0.0001999999292598539, - "loss": 46.0, - "step": 4962 - }, - { - "epoch": 0.3794560085631821, - "grad_norm": 0.0007010205881670117, - "learning_rate": 0.00019999992923128067, - "loss": 46.0, - "step": 4963 - }, - { - "epoch": 0.3795324655465719, - "grad_norm": 0.002104604383930564, - "learning_rate": 0.0001999999292027017, - "loss": 46.0, - "step": 4964 - }, - { - "epoch": 0.3796089225299616, - "grad_norm": 0.0011542040156200528, - "learning_rate": 0.00019999992917411693, - "loss": 46.0, - "step": 4965 - }, - { - "epoch": 0.3796853795133513, - "grad_norm": 0.00029347772942855954, - "learning_rate": 0.00019999992914552644, - "loss": 46.0, - "step": 4966 - }, - { - "epoch": 0.379761836496741, - "grad_norm": 0.000831316749099642, - "learning_rate": 0.00019999992911693014, - "loss": 46.0, - "step": 4967 - }, - { - "epoch": 0.37983829348013076, - "grad_norm": 0.002270141616463661, - "learning_rate": 0.00019999992908832805, - "loss": 46.0, - "step": 4968 - }, - { - "epoch": 0.37991475046352047, - "grad_norm": 0.007392963860183954, - "learning_rate": 0.00019999992905972024, - "loss": 46.0, - "step": 4969 - }, - { - "epoch": 0.3799912074469102, - "grad_norm": 0.0010177148506045341, - "learning_rate": 0.00019999992903110662, - "loss": 46.0, - "step": 4970 - }, - { - "epoch": 0.3800676644302999, - "grad_norm": 0.0010335877304896712, - "learning_rate": 0.00019999992900248726, - "loss": 46.0, - "step": 4971 - }, - { - "epoch": 0.38014412141368964, - "grad_norm": 0.0015258160419762135, - "learning_rate": 0.00019999992897386213, - "loss": 46.0, - "step": 4972 - }, - { - "epoch": 0.38022057839707935, - "grad_norm": 0.0016457870369777083, - "learning_rate": 0.0001999999289452312, - "loss": 46.0, - "step": 4973 - }, - { - "epoch": 0.38029703538046905, - "grad_norm": 0.002090898808091879, - "learning_rate": 0.00019999992891659454, - "loss": 46.0, - "step": 4974 - }, - { - "epoch": 0.3803734923638588, - "grad_norm": 0.006494347937405109, - "learning_rate": 0.00019999992888795206, - "loss": 46.0, - "step": 4975 - }, - { - "epoch": 0.3804499493472485, - "grad_norm": 0.001054001972079277, - "learning_rate": 0.00019999992885930387, - "loss": 46.0, - "step": 4976 - }, - { - "epoch": 0.3805264063306382, - "grad_norm": 0.002643430605530739, - "learning_rate": 0.0001999999288306499, - "loss": 46.0, - "step": 4977 - }, - { - "epoch": 0.38060286331402793, - "grad_norm": 0.0019271423807367682, - "learning_rate": 0.00019999992880199012, - "loss": 46.0, - "step": 4978 - }, - { - "epoch": 0.3806793202974177, - "grad_norm": 0.0014187066117301583, - "learning_rate": 0.0001999999287733246, - "loss": 46.0, - "step": 4979 - }, - { - "epoch": 0.3807557772808074, - "grad_norm": 0.000565417343750596, - "learning_rate": 0.0001999999287446533, - "loss": 46.0, - "step": 4980 - }, - { - "epoch": 0.3808322342641971, - "grad_norm": 0.0012101131724193692, - "learning_rate": 0.00019999992871597624, - "loss": 46.0, - "step": 4981 - }, - { - "epoch": 0.3809086912475868, - "grad_norm": 0.0024158800952136517, - "learning_rate": 0.0001999999286872934, - "loss": 46.0, - "step": 4982 - }, - { - "epoch": 0.38098514823097657, - "grad_norm": 0.00048457173397764564, - "learning_rate": 0.0001999999286586048, - "loss": 46.0, - "step": 4983 - }, - { - "epoch": 0.3810616052143663, - "grad_norm": 0.003602725686505437, - "learning_rate": 0.00019999992862991043, - "loss": 46.0, - "step": 4984 - }, - { - "epoch": 0.381138062197756, - "grad_norm": 0.0021635626908391714, - "learning_rate": 0.0001999999286012103, - "loss": 46.0, - "step": 4985 - }, - { - "epoch": 0.3812145191811457, - "grad_norm": 0.002915341407060623, - "learning_rate": 0.00019999992857250436, - "loss": 46.0, - "step": 4986 - }, - { - "epoch": 0.38129097616453544, - "grad_norm": 0.004720613826066256, - "learning_rate": 0.0001999999285437927, - "loss": 46.0, - "step": 4987 - }, - { - "epoch": 0.38136743314792515, - "grad_norm": 0.0014432631433010101, - "learning_rate": 0.00019999992851507523, - "loss": 46.0, - "step": 4988 - }, - { - "epoch": 0.38144389013131486, - "grad_norm": 0.0010512990411370993, - "learning_rate": 0.00019999992848635203, - "loss": 46.0, - "step": 4989 - }, - { - "epoch": 0.3815203471147046, - "grad_norm": 0.0019252338679507375, - "learning_rate": 0.000199999928457623, - "loss": 46.0, - "step": 4990 - }, - { - "epoch": 0.3815968040980943, - "grad_norm": 0.01550122071057558, - "learning_rate": 0.00019999992842888826, - "loss": 46.0, - "step": 4991 - }, - { - "epoch": 0.381673261081484, - "grad_norm": 0.0017938502132892609, - "learning_rate": 0.00019999992840014774, - "loss": 46.0, - "step": 4992 - }, - { - "epoch": 0.38174971806487373, - "grad_norm": 0.000750053150113672, - "learning_rate": 0.00019999992837140145, - "loss": 46.0, - "step": 4993 - }, - { - "epoch": 0.3818261750482635, - "grad_norm": 0.007179294712841511, - "learning_rate": 0.00019999992834264938, - "loss": 46.0, - "step": 4994 - }, - { - "epoch": 0.3819026320316532, - "grad_norm": 0.001865683007054031, - "learning_rate": 0.00019999992831389154, - "loss": 46.0, - "step": 4995 - }, - { - "epoch": 0.3819790890150429, - "grad_norm": 0.003025611164048314, - "learning_rate": 0.00019999992828512793, - "loss": 46.0, - "step": 4996 - }, - { - "epoch": 0.3820555459984326, - "grad_norm": 0.0005240002064965665, - "learning_rate": 0.00019999992825635855, - "loss": 46.0, - "step": 4997 - }, - { - "epoch": 0.38213200298182237, - "grad_norm": 0.001084992429241538, - "learning_rate": 0.00019999992822758339, - "loss": 46.0, - "step": 4998 - }, - { - "epoch": 0.3822084599652121, - "grad_norm": 0.0010686705354601145, - "learning_rate": 0.00019999992819880248, - "loss": 46.0, - "step": 4999 - }, - { - "epoch": 0.3822849169486018, - "grad_norm": 0.0018099829321727157, - "learning_rate": 0.0001999999281700158, - "loss": 46.0, - "step": 5000 - }, - { - "epoch": 0.3823613739319915, - "grad_norm": 0.0015157641610130668, - "learning_rate": 0.00019999992814122335, - "loss": 46.0, - "step": 5001 - }, - { - "epoch": 0.38243783091538125, - "grad_norm": 0.0007292305817827582, - "learning_rate": 0.00019999992811242515, - "loss": 46.0, - "step": 5002 - }, - { - "epoch": 0.38251428789877095, - "grad_norm": 0.0006928403163328767, - "learning_rate": 0.00019999992808362112, - "loss": 46.0, - "step": 5003 - }, - { - "epoch": 0.38259074488216066, - "grad_norm": 0.0003051522944588214, - "learning_rate": 0.00019999992805481135, - "loss": 46.0, - "step": 5004 - }, - { - "epoch": 0.3826672018655504, - "grad_norm": 0.0514926053583622, - "learning_rate": 0.00019999992802599583, - "loss": 46.0, - "step": 5005 - }, - { - "epoch": 0.3827436588489401, - "grad_norm": 0.006026845425367355, - "learning_rate": 0.00019999992799717454, - "loss": 46.0, - "step": 5006 - }, - { - "epoch": 0.38282011583232983, - "grad_norm": 0.0008158411365002394, - "learning_rate": 0.00019999992796834745, - "loss": 46.0, - "step": 5007 - }, - { - "epoch": 0.38289657281571954, - "grad_norm": 0.00041071680607274175, - "learning_rate": 0.00019999992793951464, - "loss": 46.0, - "step": 5008 - }, - { - "epoch": 0.3829730297991093, - "grad_norm": 0.0009232686134055257, - "learning_rate": 0.00019999992791067602, - "loss": 46.0, - "step": 5009 - }, - { - "epoch": 0.383049486782499, - "grad_norm": 0.0008955828379839659, - "learning_rate": 0.00019999992788183164, - "loss": 46.0, - "step": 5010 - }, - { - "epoch": 0.3831259437658887, - "grad_norm": 0.003606929210945964, - "learning_rate": 0.00019999992785298148, - "loss": 46.0, - "step": 5011 - }, - { - "epoch": 0.3832024007492784, - "grad_norm": 0.0006889833020977676, - "learning_rate": 0.00019999992782412557, - "loss": 46.0, - "step": 5012 - }, - { - "epoch": 0.3832788577326682, - "grad_norm": 0.0033818173687905073, - "learning_rate": 0.0001999999277952639, - "loss": 46.0, - "step": 5013 - }, - { - "epoch": 0.3833553147160579, - "grad_norm": 0.0023649297654628754, - "learning_rate": 0.00019999992776639642, - "loss": 46.0, - "step": 5014 - }, - { - "epoch": 0.3834317716994476, - "grad_norm": 0.0013693240471184254, - "learning_rate": 0.00019999992773752322, - "loss": 46.0, - "step": 5015 - }, - { - "epoch": 0.3835082286828373, - "grad_norm": 0.0009798690443858504, - "learning_rate": 0.00019999992770864422, - "loss": 46.0, - "step": 5016 - }, - { - "epoch": 0.38358468566622705, - "grad_norm": 0.002101196674630046, - "learning_rate": 0.00019999992767975945, - "loss": 46.0, - "step": 5017 - }, - { - "epoch": 0.38366114264961676, - "grad_norm": 0.0006856800755485892, - "learning_rate": 0.0001999999276508689, - "loss": 46.0, - "step": 5018 - }, - { - "epoch": 0.38373759963300647, - "grad_norm": 0.0012978214072063565, - "learning_rate": 0.0001999999276219726, - "loss": 46.0, - "step": 5019 - }, - { - "epoch": 0.3838140566163962, - "grad_norm": 0.0005282756756059825, - "learning_rate": 0.00019999992759307054, - "loss": 46.0, - "step": 5020 - }, - { - "epoch": 0.38389051359978593, - "grad_norm": 0.0015676789917051792, - "learning_rate": 0.00019999992756416268, - "loss": 46.0, - "step": 5021 - }, - { - "epoch": 0.38396697058317564, - "grad_norm": 0.00042119945283047855, - "learning_rate": 0.00019999992753524907, - "loss": 46.0, - "step": 5022 - }, - { - "epoch": 0.38404342756656534, - "grad_norm": 0.0009619808988645673, - "learning_rate": 0.0001999999275063297, - "loss": 46.0, - "step": 5023 - }, - { - "epoch": 0.3841198845499551, - "grad_norm": 0.0013550643343478441, - "learning_rate": 0.00019999992747740452, - "loss": 46.0, - "step": 5024 - }, - { - "epoch": 0.3841963415333448, - "grad_norm": 0.0006222610245458782, - "learning_rate": 0.00019999992744847362, - "loss": 46.0, - "step": 5025 - }, - { - "epoch": 0.3842727985167345, - "grad_norm": 0.0012270138831809163, - "learning_rate": 0.0001999999274195369, - "loss": 46.0, - "step": 5026 - }, - { - "epoch": 0.3843492555001242, - "grad_norm": 0.0007512378506362438, - "learning_rate": 0.00019999992739059446, - "loss": 46.0, - "step": 5027 - }, - { - "epoch": 0.384425712483514, - "grad_norm": 0.005678137298673391, - "learning_rate": 0.00019999992736164624, - "loss": 46.0, - "step": 5028 - }, - { - "epoch": 0.3845021694669037, - "grad_norm": 0.005132416263222694, - "learning_rate": 0.00019999992733269224, - "loss": 46.0, - "step": 5029 - }, - { - "epoch": 0.3845786264502934, - "grad_norm": 0.0011747103417292237, - "learning_rate": 0.00019999992730373247, - "loss": 46.0, - "step": 5030 - }, - { - "epoch": 0.3846550834336831, - "grad_norm": 0.00031244984711520374, - "learning_rate": 0.00019999992727476692, - "loss": 46.0, - "step": 5031 - }, - { - "epoch": 0.38473154041707286, - "grad_norm": 0.0007497024489566684, - "learning_rate": 0.00019999992724579563, - "loss": 46.0, - "step": 5032 - }, - { - "epoch": 0.38480799740046256, - "grad_norm": 0.0008607287309132516, - "learning_rate": 0.00019999992721681854, - "loss": 46.0, - "step": 5033 - }, - { - "epoch": 0.38488445438385227, - "grad_norm": 0.0005297533934935927, - "learning_rate": 0.0001999999271878357, - "loss": 46.0, - "step": 5034 - }, - { - "epoch": 0.38496091136724203, - "grad_norm": 0.0015238580526784062, - "learning_rate": 0.0001999999271588471, - "loss": 46.0, - "step": 5035 - }, - { - "epoch": 0.38503736835063174, - "grad_norm": 0.0007831451366655529, - "learning_rate": 0.00019999992712985273, - "loss": 46.0, - "step": 5036 - }, - { - "epoch": 0.38511382533402144, - "grad_norm": 0.022358976304531097, - "learning_rate": 0.00019999992710085254, - "loss": 46.0, - "step": 5037 - }, - { - "epoch": 0.38519028231741115, - "grad_norm": 0.0008355851750820875, - "learning_rate": 0.00019999992707184664, - "loss": 46.0, - "step": 5038 - }, - { - "epoch": 0.3852667393008009, - "grad_norm": 0.0021296977065503597, - "learning_rate": 0.00019999992704283493, - "loss": 46.0, - "step": 5039 - }, - { - "epoch": 0.3853431962841906, - "grad_norm": 0.0005228519439697266, - "learning_rate": 0.00019999992701381746, - "loss": 46.0, - "step": 5040 - }, - { - "epoch": 0.3854196532675803, - "grad_norm": 0.0010646473383530974, - "learning_rate": 0.00019999992698479423, - "loss": 46.0, - "step": 5041 - }, - { - "epoch": 0.38549611025097, - "grad_norm": 0.002040991559624672, - "learning_rate": 0.00019999992695576523, - "loss": 46.0, - "step": 5042 - }, - { - "epoch": 0.3855725672343598, - "grad_norm": 0.0009962695185095072, - "learning_rate": 0.00019999992692673046, - "loss": 46.0, - "step": 5043 - }, - { - "epoch": 0.3856490242177495, - "grad_norm": 0.0004573659098241478, - "learning_rate": 0.00019999992689768994, - "loss": 46.0, - "step": 5044 - }, - { - "epoch": 0.3857254812011392, - "grad_norm": 0.001267716521397233, - "learning_rate": 0.00019999992686864362, - "loss": 46.0, - "step": 5045 - }, - { - "epoch": 0.3858019381845289, - "grad_norm": 0.0018960109446197748, - "learning_rate": 0.00019999992683959153, - "loss": 46.0, - "step": 5046 - }, - { - "epoch": 0.38587839516791866, - "grad_norm": 0.0011301188496872783, - "learning_rate": 0.0001999999268105337, - "loss": 46.0, - "step": 5047 - }, - { - "epoch": 0.38595485215130837, - "grad_norm": 0.0010853378335013986, - "learning_rate": 0.00019999992678147006, - "loss": 46.0, - "step": 5048 - }, - { - "epoch": 0.3860313091346981, - "grad_norm": 0.001279229880310595, - "learning_rate": 0.0001999999267524007, - "loss": 46.0, - "step": 5049 - }, - { - "epoch": 0.38610776611808784, - "grad_norm": 0.00043137866305187345, - "learning_rate": 0.00019999992672332551, - "loss": 46.0, - "step": 5050 - }, - { - "epoch": 0.38618422310147754, - "grad_norm": 0.0014631374506279826, - "learning_rate": 0.00019999992669424458, - "loss": 46.0, - "step": 5051 - }, - { - "epoch": 0.38626068008486725, - "grad_norm": 0.0014483515406027436, - "learning_rate": 0.0001999999266651579, - "loss": 46.0, - "step": 5052 - }, - { - "epoch": 0.38633713706825695, - "grad_norm": 0.002519422210752964, - "learning_rate": 0.00019999992663606543, - "loss": 46.0, - "step": 5053 - }, - { - "epoch": 0.3864135940516467, - "grad_norm": 0.0011974717490375042, - "learning_rate": 0.0001999999266069672, - "loss": 46.0, - "step": 5054 - }, - { - "epoch": 0.3864900510350364, - "grad_norm": 0.0015806478913873434, - "learning_rate": 0.0001999999265778632, - "loss": 46.0, - "step": 5055 - }, - { - "epoch": 0.3865665080184261, - "grad_norm": 0.0015322653343901038, - "learning_rate": 0.0001999999265487534, - "loss": 46.0, - "step": 5056 - }, - { - "epoch": 0.38664296500181583, - "grad_norm": 0.0014195576077327132, - "learning_rate": 0.0001999999265196379, - "loss": 46.0, - "step": 5057 - }, - { - "epoch": 0.3867194219852056, - "grad_norm": 0.0009716078056953847, - "learning_rate": 0.00019999992649051657, - "loss": 46.0, - "step": 5058 - }, - { - "epoch": 0.3867958789685953, - "grad_norm": 0.0016142894746735692, - "learning_rate": 0.00019999992646138948, - "loss": 46.0, - "step": 5059 - }, - { - "epoch": 0.386872335951985, - "grad_norm": 0.001137088518589735, - "learning_rate": 0.00019999992643225664, - "loss": 46.0, - "step": 5060 - }, - { - "epoch": 0.3869487929353747, - "grad_norm": 0.0006488229846581817, - "learning_rate": 0.000199999926403118, - "loss": 46.0, - "step": 5061 - }, - { - "epoch": 0.38702524991876447, - "grad_norm": 0.0004907710826955736, - "learning_rate": 0.00019999992637397362, - "loss": 46.0, - "step": 5062 - }, - { - "epoch": 0.3871017069021542, - "grad_norm": 0.00034755514934659004, - "learning_rate": 0.00019999992634482346, - "loss": 46.0, - "step": 5063 - }, - { - "epoch": 0.3871781638855439, - "grad_norm": 0.0007590790628455579, - "learning_rate": 0.00019999992631566753, - "loss": 46.0, - "step": 5064 - }, - { - "epoch": 0.38725462086893364, - "grad_norm": 0.009573681280016899, - "learning_rate": 0.00019999992628650583, - "loss": 46.0, - "step": 5065 - }, - { - "epoch": 0.38733107785232335, - "grad_norm": 0.0034562053624540567, - "learning_rate": 0.00019999992625733837, - "loss": 46.0, - "step": 5066 - }, - { - "epoch": 0.38740753483571305, - "grad_norm": 0.0015021811705082655, - "learning_rate": 0.00019999992622816512, - "loss": 46.0, - "step": 5067 - }, - { - "epoch": 0.38748399181910276, - "grad_norm": 0.0008419494261033833, - "learning_rate": 0.00019999992619898613, - "loss": 46.0, - "step": 5068 - }, - { - "epoch": 0.3875604488024925, - "grad_norm": 0.0004314680409152061, - "learning_rate": 0.00019999992616980135, - "loss": 46.0, - "step": 5069 - }, - { - "epoch": 0.3876369057858822, - "grad_norm": 0.000730108586139977, - "learning_rate": 0.00019999992614061078, - "loss": 46.0, - "step": 5070 - }, - { - "epoch": 0.38771336276927193, - "grad_norm": 0.0014029715675860643, - "learning_rate": 0.0001999999261114145, - "loss": 46.0, - "step": 5071 - }, - { - "epoch": 0.38778981975266164, - "grad_norm": 0.0010304381139576435, - "learning_rate": 0.00019999992608221237, - "loss": 46.0, - "step": 5072 - }, - { - "epoch": 0.3878662767360514, - "grad_norm": 0.0039737895131111145, - "learning_rate": 0.00019999992605300454, - "loss": 46.0, - "step": 5073 - }, - { - "epoch": 0.3879427337194411, - "grad_norm": 0.0003692148602567613, - "learning_rate": 0.00019999992602379093, - "loss": 46.0, - "step": 5074 - }, - { - "epoch": 0.3880191907028308, - "grad_norm": 0.0005067788879387081, - "learning_rate": 0.00019999992599457152, - "loss": 46.0, - "step": 5075 - }, - { - "epoch": 0.3880956476862205, - "grad_norm": 0.0016741999424993992, - "learning_rate": 0.00019999992596534636, - "loss": 46.0, - "step": 5076 - }, - { - "epoch": 0.3881721046696103, - "grad_norm": 0.004904313012957573, - "learning_rate": 0.00019999992593611543, - "loss": 46.0, - "step": 5077 - }, - { - "epoch": 0.388248561653, - "grad_norm": 0.0013878638856112957, - "learning_rate": 0.00019999992590687872, - "loss": 46.0, - "step": 5078 - }, - { - "epoch": 0.3883250186363897, - "grad_norm": 0.0012515804264694452, - "learning_rate": 0.00019999992587763625, - "loss": 46.0, - "step": 5079 - }, - { - "epoch": 0.38840147561977945, - "grad_norm": 0.0029100251849740744, - "learning_rate": 0.000199999925848388, - "loss": 46.0, - "step": 5080 - }, - { - "epoch": 0.38847793260316915, - "grad_norm": 0.003508092137053609, - "learning_rate": 0.000199999925819134, - "loss": 46.0, - "step": 5081 - }, - { - "epoch": 0.38855438958655886, - "grad_norm": 0.0012574639404192567, - "learning_rate": 0.00019999992578987423, - "loss": 46.0, - "step": 5082 - }, - { - "epoch": 0.38863084656994856, - "grad_norm": 0.0008746524690650403, - "learning_rate": 0.00019999992576060866, - "loss": 46.0, - "step": 5083 - }, - { - "epoch": 0.3887073035533383, - "grad_norm": 0.0011893465416505933, - "learning_rate": 0.00019999992573133737, - "loss": 46.0, - "step": 5084 - }, - { - "epoch": 0.38878376053672803, - "grad_norm": 0.0013913646107539535, - "learning_rate": 0.00019999992570206025, - "loss": 46.0, - "step": 5085 - }, - { - "epoch": 0.38886021752011773, - "grad_norm": 0.0018540453165769577, - "learning_rate": 0.0001999999256727774, - "loss": 46.0, - "step": 5086 - }, - { - "epoch": 0.38893667450350744, - "grad_norm": 0.001524603576399386, - "learning_rate": 0.0001999999256434888, - "loss": 46.0, - "step": 5087 - }, - { - "epoch": 0.3890131314868972, - "grad_norm": 0.0026149724144488573, - "learning_rate": 0.0001999999256141944, - "loss": 46.0, - "step": 5088 - }, - { - "epoch": 0.3890895884702869, - "grad_norm": 0.0011269188253208995, - "learning_rate": 0.00019999992558489423, - "loss": 46.0, - "step": 5089 - }, - { - "epoch": 0.3891660454536766, - "grad_norm": 0.0004958967329002917, - "learning_rate": 0.00019999992555558827, - "loss": 46.0, - "step": 5090 - }, - { - "epoch": 0.3892425024370663, - "grad_norm": 0.004132285714149475, - "learning_rate": 0.00019999992552627657, - "loss": 46.0, - "step": 5091 - }, - { - "epoch": 0.3893189594204561, - "grad_norm": 0.00184576865285635, - "learning_rate": 0.00019999992549695912, - "loss": 46.0, - "step": 5092 - }, - { - "epoch": 0.3893954164038458, - "grad_norm": 0.0006565260700881481, - "learning_rate": 0.00019999992546763587, - "loss": 46.0, - "step": 5093 - }, - { - "epoch": 0.3894718733872355, - "grad_norm": 0.004384566098451614, - "learning_rate": 0.00019999992543830685, - "loss": 46.0, - "step": 5094 - }, - { - "epoch": 0.38954833037062525, - "grad_norm": 0.0009473259560763836, - "learning_rate": 0.00019999992540897208, - "loss": 46.0, - "step": 5095 - }, - { - "epoch": 0.38962478735401496, - "grad_norm": 0.0009245181572623551, - "learning_rate": 0.0001999999253796315, - "loss": 46.0, - "step": 5096 - }, - { - "epoch": 0.38970124433740466, - "grad_norm": 0.0016667972085997462, - "learning_rate": 0.0001999999253502852, - "loss": 46.0, - "step": 5097 - }, - { - "epoch": 0.38977770132079437, - "grad_norm": 0.0008501725969836116, - "learning_rate": 0.0001999999253209331, - "loss": 46.0, - "step": 5098 - }, - { - "epoch": 0.38985415830418413, - "grad_norm": 0.0021702460944652557, - "learning_rate": 0.00019999992529157523, - "loss": 46.0, - "step": 5099 - }, - { - "epoch": 0.38993061528757383, - "grad_norm": 0.0017222303431481123, - "learning_rate": 0.0001999999252622116, - "loss": 46.0, - "step": 5100 - }, - { - "epoch": 0.39000707227096354, - "grad_norm": 0.001087190699763596, - "learning_rate": 0.0001999999252328422, - "loss": 46.0, - "step": 5101 - }, - { - "epoch": 0.39008352925435325, - "grad_norm": 0.0009349556639790535, - "learning_rate": 0.00019999992520346706, - "loss": 46.0, - "step": 5102 - }, - { - "epoch": 0.390159986237743, - "grad_norm": 0.0029659061692655087, - "learning_rate": 0.0001999999251740861, - "loss": 46.0, - "step": 5103 - }, - { - "epoch": 0.3902364432211327, - "grad_norm": 0.0008405956323258579, - "learning_rate": 0.0001999999251446994, - "loss": 46.0, - "step": 5104 - }, - { - "epoch": 0.3903129002045224, - "grad_norm": 0.0007301776204258204, - "learning_rate": 0.00019999992511530695, - "loss": 46.0, - "step": 5105 - }, - { - "epoch": 0.3903893571879122, - "grad_norm": 0.0008082663407549262, - "learning_rate": 0.00019999992508590867, - "loss": 46.0, - "step": 5106 - }, - { - "epoch": 0.3904658141713019, - "grad_norm": 0.0008237584261223674, - "learning_rate": 0.00019999992505650467, - "loss": 46.0, - "step": 5107 - }, - { - "epoch": 0.3905422711546916, - "grad_norm": 0.001603541779331863, - "learning_rate": 0.00019999992502709487, - "loss": 46.0, - "step": 5108 - }, - { - "epoch": 0.3906187281380813, - "grad_norm": 0.0006512822583317757, - "learning_rate": 0.00019999992499767933, - "loss": 46.0, - "step": 5109 - }, - { - "epoch": 0.39069518512147106, - "grad_norm": 0.0025201989337801933, - "learning_rate": 0.000199999924968258, - "loss": 46.0, - "step": 5110 - }, - { - "epoch": 0.39077164210486076, - "grad_norm": 0.0008589408826082945, - "learning_rate": 0.0001999999249388309, - "loss": 46.0, - "step": 5111 - }, - { - "epoch": 0.39084809908825047, - "grad_norm": 0.0007604301790706813, - "learning_rate": 0.00019999992490939806, - "loss": 46.0, - "step": 5112 - }, - { - "epoch": 0.3909245560716402, - "grad_norm": 0.0029012097511440516, - "learning_rate": 0.00019999992487995942, - "loss": 46.0, - "step": 5113 - }, - { - "epoch": 0.39100101305502993, - "grad_norm": 0.001728267059661448, - "learning_rate": 0.000199999924850515, - "loss": 46.0, - "step": 5114 - }, - { - "epoch": 0.39107747003841964, - "grad_norm": 0.0007799426093697548, - "learning_rate": 0.00019999992482106486, - "loss": 46.0, - "step": 5115 - }, - { - "epoch": 0.39115392702180934, - "grad_norm": 0.0007992628961801529, - "learning_rate": 0.0001999999247916089, - "loss": 46.0, - "step": 5116 - }, - { - "epoch": 0.39123038400519905, - "grad_norm": 0.0012400917476043105, - "learning_rate": 0.00019999992476214717, - "loss": 46.0, - "step": 5117 - }, - { - "epoch": 0.3913068409885888, - "grad_norm": 0.0023228100035339594, - "learning_rate": 0.00019999992473267972, - "loss": 46.0, - "step": 5118 - }, - { - "epoch": 0.3913832979719785, - "grad_norm": 0.0033154755365103483, - "learning_rate": 0.00019999992470320644, - "loss": 46.0, - "step": 5119 - }, - { - "epoch": 0.3914597549553682, - "grad_norm": 0.00548673328012228, - "learning_rate": 0.00019999992467372745, - "loss": 46.0, - "step": 5120 - }, - { - "epoch": 0.391536211938758, - "grad_norm": 0.0013765579788014293, - "learning_rate": 0.00019999992464424265, - "loss": 46.0, - "step": 5121 - }, - { - "epoch": 0.3916126689221477, - "grad_norm": 0.0014996732352301478, - "learning_rate": 0.0001999999246147521, - "loss": 46.0, - "step": 5122 - }, - { - "epoch": 0.3916891259055374, - "grad_norm": 0.0031229890882968903, - "learning_rate": 0.00019999992458525576, - "loss": 46.0, - "step": 5123 - }, - { - "epoch": 0.3917655828889271, - "grad_norm": 0.0011169922072440386, - "learning_rate": 0.00019999992455575367, - "loss": 46.0, - "step": 5124 - }, - { - "epoch": 0.39184203987231686, - "grad_norm": 0.0013352936366572976, - "learning_rate": 0.0001999999245262458, - "loss": 46.0, - "step": 5125 - }, - { - "epoch": 0.39191849685570657, - "grad_norm": 0.0014544575242325664, - "learning_rate": 0.00019999992449673217, - "loss": 46.0, - "step": 5126 - }, - { - "epoch": 0.39199495383909627, - "grad_norm": 0.0006914925761520863, - "learning_rate": 0.00019999992446721276, - "loss": 46.0, - "step": 5127 - }, - { - "epoch": 0.392071410822486, - "grad_norm": 0.0014417931670323014, - "learning_rate": 0.0001999999244376876, - "loss": 46.0, - "step": 5128 - }, - { - "epoch": 0.39214786780587574, - "grad_norm": 0.0005496296798810363, - "learning_rate": 0.00019999992440815665, - "loss": 46.0, - "step": 5129 - }, - { - "epoch": 0.39222432478926544, - "grad_norm": 0.0007964738178998232, - "learning_rate": 0.00019999992437861992, - "loss": 46.0, - "step": 5130 - }, - { - "epoch": 0.39230078177265515, - "grad_norm": 0.0012175141600891948, - "learning_rate": 0.00019999992434907747, - "loss": 46.0, - "step": 5131 - }, - { - "epoch": 0.39237723875604485, - "grad_norm": 0.0024001123383641243, - "learning_rate": 0.0001999999243195292, - "loss": 46.0, - "step": 5132 - }, - { - "epoch": 0.3924536957394346, - "grad_norm": 0.0005558711127378047, - "learning_rate": 0.00019999992428997517, - "loss": 46.0, - "step": 5133 - }, - { - "epoch": 0.3925301527228243, - "grad_norm": 0.004153661895543337, - "learning_rate": 0.00019999992426041538, - "loss": 46.0, - "step": 5134 - }, - { - "epoch": 0.392606609706214, - "grad_norm": 0.0008092347416095436, - "learning_rate": 0.0001999999242308498, - "loss": 46.0, - "step": 5135 - }, - { - "epoch": 0.3926830666896038, - "grad_norm": 0.0009048887877725065, - "learning_rate": 0.0001999999242012785, - "loss": 46.0, - "step": 5136 - }, - { - "epoch": 0.3927595236729935, - "grad_norm": 0.004317007027566433, - "learning_rate": 0.0001999999241717014, - "loss": 46.0, - "step": 5137 - }, - { - "epoch": 0.3928359806563832, - "grad_norm": 0.0023986853193491697, - "learning_rate": 0.0001999999241421185, - "loss": 46.0, - "step": 5138 - }, - { - "epoch": 0.3929124376397729, - "grad_norm": 0.000637244142126292, - "learning_rate": 0.00019999992411252987, - "loss": 46.0, - "step": 5139 - }, - { - "epoch": 0.39298889462316267, - "grad_norm": 0.004860072862356901, - "learning_rate": 0.0001999999240829355, - "loss": 46.0, - "step": 5140 - }, - { - "epoch": 0.39306535160655237, - "grad_norm": 0.0027384189888834953, - "learning_rate": 0.0001999999240533353, - "loss": 46.0, - "step": 5141 - }, - { - "epoch": 0.3931418085899421, - "grad_norm": 0.0015161463525146246, - "learning_rate": 0.00019999992402372935, - "loss": 46.0, - "step": 5142 - }, - { - "epoch": 0.3932182655733318, - "grad_norm": 0.002926499815657735, - "learning_rate": 0.00019999992399411765, - "loss": 46.0, - "step": 5143 - }, - { - "epoch": 0.39329472255672154, - "grad_norm": 0.0041192155331373215, - "learning_rate": 0.00019999992396450015, - "loss": 46.0, - "step": 5144 - }, - { - "epoch": 0.39337117954011125, - "grad_norm": 0.001989888260141015, - "learning_rate": 0.0001999999239348769, - "loss": 46.0, - "step": 5145 - }, - { - "epoch": 0.39344763652350095, - "grad_norm": 0.0006507466896437109, - "learning_rate": 0.00019999992390524787, - "loss": 46.0, - "step": 5146 - }, - { - "epoch": 0.39352409350689066, - "grad_norm": 0.007054603658616543, - "learning_rate": 0.00019999992387561308, - "loss": 46.0, - "step": 5147 - }, - { - "epoch": 0.3936005504902804, - "grad_norm": 0.008480913005769253, - "learning_rate": 0.00019999992384597254, - "loss": 46.0, - "step": 5148 - }, - { - "epoch": 0.3936770074736701, - "grad_norm": 0.0007137679494917393, - "learning_rate": 0.0001999999238163262, - "loss": 46.0, - "step": 5149 - }, - { - "epoch": 0.39375346445705983, - "grad_norm": 0.0012047920608893037, - "learning_rate": 0.0001999999237866741, - "loss": 46.0, - "step": 5150 - }, - { - "epoch": 0.3938299214404496, - "grad_norm": 0.0010212705237790942, - "learning_rate": 0.00019999992375701621, - "loss": 46.0, - "step": 5151 - }, - { - "epoch": 0.3939063784238393, - "grad_norm": 0.000611610827036202, - "learning_rate": 0.00019999992372735258, - "loss": 46.0, - "step": 5152 - }, - { - "epoch": 0.393982835407229, - "grad_norm": 0.0076452926732599735, - "learning_rate": 0.00019999992369768317, - "loss": 46.0, - "step": 5153 - }, - { - "epoch": 0.3940592923906187, - "grad_norm": 0.0012855703243985772, - "learning_rate": 0.00019999992366800799, - "loss": 46.0, - "step": 5154 - }, - { - "epoch": 0.39413574937400847, - "grad_norm": 0.0008539396803826094, - "learning_rate": 0.00019999992363832703, - "loss": 46.0, - "step": 5155 - }, - { - "epoch": 0.3942122063573982, - "grad_norm": 0.0011326392414048314, - "learning_rate": 0.00019999992360864033, - "loss": 46.0, - "step": 5156 - }, - { - "epoch": 0.3942886633407879, - "grad_norm": 0.0008603236638009548, - "learning_rate": 0.00019999992357894782, - "loss": 46.0, - "step": 5157 - }, - { - "epoch": 0.3943651203241776, - "grad_norm": 0.0015567372320219874, - "learning_rate": 0.00019999992354924958, - "loss": 46.0, - "step": 5158 - }, - { - "epoch": 0.39444157730756735, - "grad_norm": 0.0011137907858937979, - "learning_rate": 0.00019999992351954555, - "loss": 46.0, - "step": 5159 - }, - { - "epoch": 0.39451803429095705, - "grad_norm": 0.0011242581531405449, - "learning_rate": 0.00019999992348983573, - "loss": 46.0, - "step": 5160 - }, - { - "epoch": 0.39459449127434676, - "grad_norm": 0.001839528325945139, - "learning_rate": 0.0001999999234601202, - "loss": 46.0, - "step": 5161 - }, - { - "epoch": 0.39467094825773646, - "grad_norm": 0.005966348107904196, - "learning_rate": 0.00019999992343039885, - "loss": 46.0, - "step": 5162 - }, - { - "epoch": 0.3947474052411262, - "grad_norm": 0.00060272152768448, - "learning_rate": 0.00019999992340067176, - "loss": 46.0, - "step": 5163 - }, - { - "epoch": 0.39482386222451593, - "grad_norm": 0.0013761867303401232, - "learning_rate": 0.00019999992337093887, - "loss": 46.0, - "step": 5164 - }, - { - "epoch": 0.39490031920790564, - "grad_norm": 0.0026355090085417032, - "learning_rate": 0.0001999999233412002, - "loss": 46.0, - "step": 5165 - }, - { - "epoch": 0.3949767761912954, - "grad_norm": 0.0004673987568821758, - "learning_rate": 0.0001999999233114558, - "loss": 46.0, - "step": 5166 - }, - { - "epoch": 0.3950532331746851, - "grad_norm": 0.0013201392721384764, - "learning_rate": 0.0001999999232817056, - "loss": 46.0, - "step": 5167 - }, - { - "epoch": 0.3951296901580748, - "grad_norm": 0.0004839313041884452, - "learning_rate": 0.00019999992325194968, - "loss": 46.0, - "step": 5168 - }, - { - "epoch": 0.3952061471414645, - "grad_norm": 0.005619749892503023, - "learning_rate": 0.00019999992322218796, - "loss": 46.0, - "step": 5169 - }, - { - "epoch": 0.3952826041248543, - "grad_norm": 0.0011793739395216107, - "learning_rate": 0.00019999992319242048, - "loss": 46.0, - "step": 5170 - }, - { - "epoch": 0.395359061108244, - "grad_norm": 0.0028332117944955826, - "learning_rate": 0.0001999999231626472, - "loss": 46.0, - "step": 5171 - }, - { - "epoch": 0.3954355180916337, - "grad_norm": 0.0005728068645112216, - "learning_rate": 0.00019999992313286818, - "loss": 46.0, - "step": 5172 - }, - { - "epoch": 0.3955119750750234, - "grad_norm": 0.0030518416315317154, - "learning_rate": 0.00019999992310308336, - "loss": 46.0, - "step": 5173 - }, - { - "epoch": 0.39558843205841315, - "grad_norm": 0.002132006688043475, - "learning_rate": 0.0001999999230732928, - "loss": 46.0, - "step": 5174 - }, - { - "epoch": 0.39566488904180286, - "grad_norm": 0.0009794343495741487, - "learning_rate": 0.00019999992304349648, - "loss": 46.0, - "step": 5175 - }, - { - "epoch": 0.39574134602519256, - "grad_norm": 0.0015904414467513561, - "learning_rate": 0.00019999992301369436, - "loss": 46.0, - "step": 5176 - }, - { - "epoch": 0.39581780300858227, - "grad_norm": 0.0008810659637674689, - "learning_rate": 0.00019999992298388647, - "loss": 46.0, - "step": 5177 - }, - { - "epoch": 0.39589425999197203, - "grad_norm": 0.003126387717202306, - "learning_rate": 0.0001999999229540728, - "loss": 46.0, - "step": 5178 - }, - { - "epoch": 0.39597071697536174, - "grad_norm": 0.003057079389691353, - "learning_rate": 0.00019999992292425343, - "loss": 46.0, - "step": 5179 - }, - { - "epoch": 0.39604717395875144, - "grad_norm": 0.0038163457065820694, - "learning_rate": 0.00019999992289442822, - "loss": 46.0, - "step": 5180 - }, - { - "epoch": 0.3961236309421412, - "grad_norm": 0.005050922278314829, - "learning_rate": 0.0001999999228645973, - "loss": 46.0, - "step": 5181 - }, - { - "epoch": 0.3962000879255309, - "grad_norm": 0.0018765765707939863, - "learning_rate": 0.00019999992283476053, - "loss": 46.0, - "step": 5182 - }, - { - "epoch": 0.3962765449089206, - "grad_norm": 0.0016817814903333783, - "learning_rate": 0.00019999992280491803, - "loss": 46.0, - "step": 5183 - }, - { - "epoch": 0.3963530018923103, - "grad_norm": 0.001378295011818409, - "learning_rate": 0.00019999992277506978, - "loss": 46.0, - "step": 5184 - }, - { - "epoch": 0.3964294588757001, - "grad_norm": 0.003858389565721154, - "learning_rate": 0.00019999992274521576, - "loss": 46.0, - "step": 5185 - }, - { - "epoch": 0.3965059158590898, - "grad_norm": 0.0011563836596906185, - "learning_rate": 0.00019999992271535594, - "loss": 46.0, - "step": 5186 - }, - { - "epoch": 0.3965823728424795, - "grad_norm": 0.000758295354899019, - "learning_rate": 0.00019999992268549037, - "loss": 46.0, - "step": 5187 - }, - { - "epoch": 0.3966588298258692, - "grad_norm": 0.0018489421345293522, - "learning_rate": 0.00019999992265561906, - "loss": 46.0, - "step": 5188 - }, - { - "epoch": 0.39673528680925896, - "grad_norm": 0.002228625351563096, - "learning_rate": 0.00019999992262574194, - "loss": 46.0, - "step": 5189 - }, - { - "epoch": 0.39681174379264866, - "grad_norm": 0.0015381401171907783, - "learning_rate": 0.00019999992259585905, - "loss": 46.0, - "step": 5190 - }, - { - "epoch": 0.39688820077603837, - "grad_norm": 0.002447132719680667, - "learning_rate": 0.0001999999225659704, - "loss": 46.0, - "step": 5191 - }, - { - "epoch": 0.3969646577594281, - "grad_norm": 0.0007145426352508366, - "learning_rate": 0.00019999992253607598, - "loss": 46.0, - "step": 5192 - }, - { - "epoch": 0.39704111474281784, - "grad_norm": 0.001091236132197082, - "learning_rate": 0.0001999999225061758, - "loss": 46.0, - "step": 5193 - }, - { - "epoch": 0.39711757172620754, - "grad_norm": 0.0007018567412160337, - "learning_rate": 0.00019999992247626982, - "loss": 46.0, - "step": 5194 - }, - { - "epoch": 0.39719402870959725, - "grad_norm": 0.010759027674794197, - "learning_rate": 0.0001999999224463581, - "loss": 46.0, - "step": 5195 - }, - { - "epoch": 0.397270485692987, - "grad_norm": 0.0006774477078579366, - "learning_rate": 0.00019999992241644061, - "loss": 46.0, - "step": 5196 - }, - { - "epoch": 0.3973469426763767, - "grad_norm": 0.0025085732340812683, - "learning_rate": 0.00019999992238651734, - "loss": 46.0, - "step": 5197 - }, - { - "epoch": 0.3974233996597664, - "grad_norm": 0.0005426656571216881, - "learning_rate": 0.00019999992235658832, - "loss": 46.0, - "step": 5198 - }, - { - "epoch": 0.3974998566431561, - "grad_norm": 0.002728756284341216, - "learning_rate": 0.0001999999223266535, - "loss": 46.0, - "step": 5199 - }, - { - "epoch": 0.3975763136265459, - "grad_norm": 0.0028018064331263304, - "learning_rate": 0.00019999992229671293, - "loss": 46.0, - "step": 5200 - }, - { - "epoch": 0.3976527706099356, - "grad_norm": 0.0006701347883790731, - "learning_rate": 0.00019999992226676659, - "loss": 46.0, - "step": 5201 - }, - { - "epoch": 0.3977292275933253, - "grad_norm": 0.0027472658548504114, - "learning_rate": 0.00019999992223681447, - "loss": 46.0, - "step": 5202 - }, - { - "epoch": 0.397805684576715, - "grad_norm": 0.0007059333729557693, - "learning_rate": 0.00019999992220685658, - "loss": 46.0, - "step": 5203 - }, - { - "epoch": 0.39788214156010476, - "grad_norm": 0.0009670344879850745, - "learning_rate": 0.00019999992217689295, - "loss": 46.0, - "step": 5204 - }, - { - "epoch": 0.39795859854349447, - "grad_norm": 0.004457896109670401, - "learning_rate": 0.0001999999221469235, - "loss": 46.0, - "step": 5205 - }, - { - "epoch": 0.3980350555268842, - "grad_norm": 0.0008017513900995255, - "learning_rate": 0.0001999999221169483, - "loss": 46.0, - "step": 5206 - }, - { - "epoch": 0.3981115125102739, - "grad_norm": 0.0021098165307193995, - "learning_rate": 0.00019999992208696735, - "loss": 46.0, - "step": 5207 - }, - { - "epoch": 0.39818796949366364, - "grad_norm": 0.002500280039384961, - "learning_rate": 0.00019999992205698065, - "loss": 46.0, - "step": 5208 - }, - { - "epoch": 0.39826442647705335, - "grad_norm": 0.0009139854228124022, - "learning_rate": 0.00019999992202698815, - "loss": 46.0, - "step": 5209 - }, - { - "epoch": 0.39834088346044305, - "grad_norm": 0.000868063245434314, - "learning_rate": 0.00019999992199698987, - "loss": 46.0, - "step": 5210 - }, - { - "epoch": 0.3984173404438328, - "grad_norm": 0.003440618747845292, - "learning_rate": 0.00019999992196698582, - "loss": 46.0, - "step": 5211 - }, - { - "epoch": 0.3984937974272225, - "grad_norm": 0.0007640470284968615, - "learning_rate": 0.000199999921936976, - "loss": 46.0, - "step": 5212 - }, - { - "epoch": 0.3985702544106122, - "grad_norm": 0.00931647326797247, - "learning_rate": 0.00019999992190696043, - "loss": 46.0, - "step": 5213 - }, - { - "epoch": 0.39864671139400193, - "grad_norm": 0.0030859685502946377, - "learning_rate": 0.0001999999218769391, - "loss": 46.0, - "step": 5214 - }, - { - "epoch": 0.3987231683773917, - "grad_norm": 0.0037534020375460386, - "learning_rate": 0.00019999992184691198, - "loss": 46.0, - "step": 5215 - }, - { - "epoch": 0.3987996253607814, - "grad_norm": 0.002515604253858328, - "learning_rate": 0.0001999999218168791, - "loss": 46.0, - "step": 5216 - }, - { - "epoch": 0.3988760823441711, - "grad_norm": 0.0013701661955565214, - "learning_rate": 0.00019999992178684043, - "loss": 46.0, - "step": 5217 - }, - { - "epoch": 0.3989525393275608, - "grad_norm": 0.0018612011335790157, - "learning_rate": 0.000199999921756796, - "loss": 46.0, - "step": 5218 - }, - { - "epoch": 0.39902899631095057, - "grad_norm": 0.004496550187468529, - "learning_rate": 0.0001999999217267458, - "loss": 46.0, - "step": 5219 - }, - { - "epoch": 0.3991054532943403, - "grad_norm": 0.0005126980831846595, - "learning_rate": 0.00019999992169668986, - "loss": 46.0, - "step": 5220 - }, - { - "epoch": 0.39918191027773, - "grad_norm": 0.0009422823204658926, - "learning_rate": 0.00019999992166662813, - "loss": 46.0, - "step": 5221 - }, - { - "epoch": 0.39925836726111974, - "grad_norm": 0.0018354547210037708, - "learning_rate": 0.00019999992163656063, - "loss": 46.0, - "step": 5222 - }, - { - "epoch": 0.39933482424450945, - "grad_norm": 0.0038140076212584972, - "learning_rate": 0.00019999992160648735, - "loss": 46.0, - "step": 5223 - }, - { - "epoch": 0.39941128122789915, - "grad_norm": 0.0018269179854542017, - "learning_rate": 0.0001999999215764083, - "loss": 46.0, - "step": 5224 - }, - { - "epoch": 0.39948773821128886, - "grad_norm": 0.0017675970448181033, - "learning_rate": 0.00019999992154632348, - "loss": 46.0, - "step": 5225 - }, - { - "epoch": 0.3995641951946786, - "grad_norm": 0.002125356113538146, - "learning_rate": 0.00019999992151623292, - "loss": 46.0, - "step": 5226 - }, - { - "epoch": 0.3996406521780683, - "grad_norm": 0.0029736682772636414, - "learning_rate": 0.00019999992148613655, - "loss": 46.0, - "step": 5227 - }, - { - "epoch": 0.39971710916145803, - "grad_norm": 0.0013741985894739628, - "learning_rate": 0.00019999992145603446, - "loss": 46.0, - "step": 5228 - }, - { - "epoch": 0.39979356614484773, - "grad_norm": 0.0005685454234480858, - "learning_rate": 0.00019999992142592655, - "loss": 46.0, - "step": 5229 - }, - { - "epoch": 0.3998700231282375, - "grad_norm": 0.001084899646230042, - "learning_rate": 0.00019999992139581289, - "loss": 46.0, - "step": 5230 - }, - { - "epoch": 0.3999464801116272, - "grad_norm": 0.0014022631803527474, - "learning_rate": 0.00019999992136569348, - "loss": 46.0, - "step": 5231 - }, - { - "epoch": 0.4000229370950169, - "grad_norm": 0.0008176940609700978, - "learning_rate": 0.0001999999213355683, - "loss": 46.0, - "step": 5232 - }, - { - "epoch": 0.4000993940784066, - "grad_norm": 0.0008974050870165229, - "learning_rate": 0.00019999992130543732, - "loss": 46.0, - "step": 5233 - }, - { - "epoch": 0.4001758510617964, - "grad_norm": 0.0012108850060030818, - "learning_rate": 0.0001999999212753006, - "loss": 46.0, - "step": 5234 - }, - { - "epoch": 0.4002523080451861, - "grad_norm": 0.0024835052900016308, - "learning_rate": 0.00019999992124515806, - "loss": 46.0, - "step": 5235 - }, - { - "epoch": 0.4003287650285758, - "grad_norm": 0.0009904626058414578, - "learning_rate": 0.00019999992121500979, - "loss": 46.0, - "step": 5236 - }, - { - "epoch": 0.40040522201196554, - "grad_norm": 0.0016699059633538127, - "learning_rate": 0.00019999992118485574, - "loss": 46.0, - "step": 5237 - }, - { - "epoch": 0.40048167899535525, - "grad_norm": 0.0026765011716634035, - "learning_rate": 0.00019999992115469594, - "loss": 46.0, - "step": 5238 - }, - { - "epoch": 0.40055813597874496, - "grad_norm": 0.0003974362916778773, - "learning_rate": 0.00019999992112453035, - "loss": 46.0, - "step": 5239 - }, - { - "epoch": 0.40063459296213466, - "grad_norm": 0.0038094143383204937, - "learning_rate": 0.000199999921094359, - "loss": 46.0, - "step": 5240 - }, - { - "epoch": 0.4007110499455244, - "grad_norm": 0.0016438699094578624, - "learning_rate": 0.00019999992106418187, - "loss": 46.0, - "step": 5241 - }, - { - "epoch": 0.40078750692891413, - "grad_norm": 0.0009883217280730605, - "learning_rate": 0.00019999992103399898, - "loss": 46.0, - "step": 5242 - }, - { - "epoch": 0.40086396391230383, - "grad_norm": 0.013140978291630745, - "learning_rate": 0.00019999992100381032, - "loss": 46.0, - "step": 5243 - }, - { - "epoch": 0.40094042089569354, - "grad_norm": 0.0010866523953154683, - "learning_rate": 0.0001999999209736159, - "loss": 46.0, - "step": 5244 - }, - { - "epoch": 0.4010168778790833, - "grad_norm": 0.0009235245524905622, - "learning_rate": 0.00019999992094341568, - "loss": 46.0, - "step": 5245 - }, - { - "epoch": 0.401093334862473, - "grad_norm": 0.00375084369443357, - "learning_rate": 0.00019999992091320972, - "loss": 46.0, - "step": 5246 - }, - { - "epoch": 0.4011697918458627, - "grad_norm": 0.0012220956850796938, - "learning_rate": 0.000199999920882998, - "loss": 46.0, - "step": 5247 - }, - { - "epoch": 0.4012462488292524, - "grad_norm": 0.005583017598837614, - "learning_rate": 0.00019999992085278047, - "loss": 46.0, - "step": 5248 - }, - { - "epoch": 0.4013227058126422, - "grad_norm": 0.0007204393041320145, - "learning_rate": 0.0001999999208225572, - "loss": 46.0, - "step": 5249 - }, - { - "epoch": 0.4013991627960319, - "grad_norm": 0.001671968144364655, - "learning_rate": 0.00019999992079232818, - "loss": 46.0, - "step": 5250 - }, - { - "epoch": 0.4014756197794216, - "grad_norm": 0.0012400003615766764, - "learning_rate": 0.00019999992076209333, - "loss": 46.0, - "step": 5251 - }, - { - "epoch": 0.40155207676281135, - "grad_norm": 0.0011241972679272294, - "learning_rate": 0.00019999992073185276, - "loss": 46.0, - "step": 5252 - }, - { - "epoch": 0.40162853374620106, - "grad_norm": 0.00090367830125615, - "learning_rate": 0.0001999999207016064, - "loss": 46.0, - "step": 5253 - }, - { - "epoch": 0.40170499072959076, - "grad_norm": 0.004639594350010157, - "learning_rate": 0.00019999992067135428, - "loss": 46.0, - "step": 5254 - }, - { - "epoch": 0.40178144771298047, - "grad_norm": 0.0009280552621930838, - "learning_rate": 0.0001999999206410964, - "loss": 46.0, - "step": 5255 - }, - { - "epoch": 0.4018579046963702, - "grad_norm": 0.00045384871191345155, - "learning_rate": 0.00019999992061083273, - "loss": 46.0, - "step": 5256 - }, - { - "epoch": 0.40193436167975993, - "grad_norm": 0.01320857647806406, - "learning_rate": 0.00019999992058056327, - "loss": 46.0, - "step": 5257 - }, - { - "epoch": 0.40201081866314964, - "grad_norm": 0.0011672774562612176, - "learning_rate": 0.0001999999205502881, - "loss": 46.0, - "step": 5258 - }, - { - "epoch": 0.40208727564653934, - "grad_norm": 0.002242287155240774, - "learning_rate": 0.0001999999205200071, - "loss": 46.0, - "step": 5259 - }, - { - "epoch": 0.4021637326299291, - "grad_norm": 0.0005927511374466121, - "learning_rate": 0.00019999992048972036, - "loss": 46.0, - "step": 5260 - }, - { - "epoch": 0.4022401896133188, - "grad_norm": 0.0009226332185789943, - "learning_rate": 0.00019999992045942783, - "loss": 46.0, - "step": 5261 - }, - { - "epoch": 0.4023166465967085, - "grad_norm": 0.0016111298464238644, - "learning_rate": 0.00019999992042912958, - "loss": 46.0, - "step": 5262 - }, - { - "epoch": 0.4023931035800982, - "grad_norm": 0.0006652683950960636, - "learning_rate": 0.0001999999203988255, - "loss": 46.0, - "step": 5263 - }, - { - "epoch": 0.402469560563488, - "grad_norm": 0.0025729595217853785, - "learning_rate": 0.00019999992036851571, - "loss": 46.0, - "step": 5264 - }, - { - "epoch": 0.4025460175468777, - "grad_norm": 0.00046787853352725506, - "learning_rate": 0.0001999999203382001, - "loss": 46.0, - "step": 5265 - }, - { - "epoch": 0.4026224745302674, - "grad_norm": 0.0009747404255904257, - "learning_rate": 0.00019999992030787875, - "loss": 46.0, - "step": 5266 - }, - { - "epoch": 0.40269893151365715, - "grad_norm": 0.0012643086956813931, - "learning_rate": 0.00019999992027755164, - "loss": 46.0, - "step": 5267 - }, - { - "epoch": 0.40277538849704686, - "grad_norm": 0.001681259018369019, - "learning_rate": 0.00019999992024721875, - "loss": 46.0, - "step": 5268 - }, - { - "epoch": 0.40285184548043657, - "grad_norm": 0.0027980790473520756, - "learning_rate": 0.00019999992021688007, - "loss": 46.0, - "step": 5269 - }, - { - "epoch": 0.40292830246382627, - "grad_norm": 0.0007206897134892642, - "learning_rate": 0.00019999992018653563, - "loss": 46.0, - "step": 5270 - }, - { - "epoch": 0.40300475944721603, - "grad_norm": 0.0017942889826372266, - "learning_rate": 0.00019999992015618543, - "loss": 46.0, - "step": 5271 - }, - { - "epoch": 0.40308121643060574, - "grad_norm": 0.00030999790760688484, - "learning_rate": 0.00019999992012582945, - "loss": 46.0, - "step": 5272 - }, - { - "epoch": 0.40315767341399544, - "grad_norm": 0.0022347443737089634, - "learning_rate": 0.0001999999200954677, - "loss": 46.0, - "step": 5273 - }, - { - "epoch": 0.40323413039738515, - "grad_norm": 0.0034785778261721134, - "learning_rate": 0.0001999999200651002, - "loss": 46.0, - "step": 5274 - }, - { - "epoch": 0.4033105873807749, - "grad_norm": 0.0009431695216335356, - "learning_rate": 0.0001999999200347269, - "loss": 46.0, - "step": 5275 - }, - { - "epoch": 0.4033870443641646, - "grad_norm": 0.0008784524980001152, - "learning_rate": 0.00019999992000434787, - "loss": 46.0, - "step": 5276 - }, - { - "epoch": 0.4034635013475543, - "grad_norm": 0.0007470921264030039, - "learning_rate": 0.00019999991997396302, - "loss": 46.0, - "step": 5277 - }, - { - "epoch": 0.403539958330944, - "grad_norm": 0.0014381594955921173, - "learning_rate": 0.00019999991994357243, - "loss": 46.0, - "step": 5278 - }, - { - "epoch": 0.4036164153143338, - "grad_norm": 0.002072378294542432, - "learning_rate": 0.0001999999199131761, - "loss": 46.0, - "step": 5279 - }, - { - "epoch": 0.4036928722977235, - "grad_norm": 0.002146427519619465, - "learning_rate": 0.00019999991988277395, - "loss": 46.0, - "step": 5280 - }, - { - "epoch": 0.4037693292811132, - "grad_norm": 0.014893047511577606, - "learning_rate": 0.00019999991985236606, - "loss": 46.0, - "step": 5281 - }, - { - "epoch": 0.40384578626450296, - "grad_norm": 0.001805880106985569, - "learning_rate": 0.00019999991982195238, - "loss": 46.0, - "step": 5282 - }, - { - "epoch": 0.40392224324789266, - "grad_norm": 0.0006186531973071396, - "learning_rate": 0.00019999991979153294, - "loss": 46.0, - "step": 5283 - }, - { - "epoch": 0.40399870023128237, - "grad_norm": 0.0008769570267759264, - "learning_rate": 0.0001999999197611077, - "loss": 46.0, - "step": 5284 - }, - { - "epoch": 0.4040751572146721, - "grad_norm": 0.0013388508232310414, - "learning_rate": 0.00019999991973067673, - "loss": 46.0, - "step": 5285 - }, - { - "epoch": 0.40415161419806184, - "grad_norm": 0.0018591596744954586, - "learning_rate": 0.00019999991970024, - "loss": 46.0, - "step": 5286 - }, - { - "epoch": 0.40422807118145154, - "grad_norm": 0.0010779805015772581, - "learning_rate": 0.00019999991966979748, - "loss": 46.0, - "step": 5287 - }, - { - "epoch": 0.40430452816484125, - "grad_norm": 0.0028559148777276278, - "learning_rate": 0.0001999999196393492, - "loss": 46.0, - "step": 5288 - }, - { - "epoch": 0.40438098514823095, - "grad_norm": 0.010105373337864876, - "learning_rate": 0.00019999991960889513, - "loss": 46.0, - "step": 5289 - }, - { - "epoch": 0.4044574421316207, - "grad_norm": 0.00030190908000804484, - "learning_rate": 0.0001999999195784353, - "loss": 46.0, - "step": 5290 - }, - { - "epoch": 0.4045338991150104, - "grad_norm": 0.0020312201231718063, - "learning_rate": 0.00019999991954796972, - "loss": 46.0, - "step": 5291 - }, - { - "epoch": 0.4046103560984001, - "grad_norm": 0.0010380193125456572, - "learning_rate": 0.00019999991951749838, - "loss": 46.0, - "step": 5292 - }, - { - "epoch": 0.40468681308178983, - "grad_norm": 0.0006743098492734134, - "learning_rate": 0.00019999991948702124, - "loss": 46.0, - "step": 5293 - }, - { - "epoch": 0.4047632700651796, - "grad_norm": 0.0030814683996140957, - "learning_rate": 0.00019999991945653833, - "loss": 46.0, - "step": 5294 - }, - { - "epoch": 0.4048397270485693, - "grad_norm": 0.0006637056358158588, - "learning_rate": 0.00019999991942604967, - "loss": 46.0, - "step": 5295 - }, - { - "epoch": 0.404916184031959, - "grad_norm": 0.0008747492101974785, - "learning_rate": 0.0001999999193955552, - "loss": 46.0, - "step": 5296 - }, - { - "epoch": 0.40499264101534876, - "grad_norm": 0.0016115738544613123, - "learning_rate": 0.000199999919365055, - "loss": 46.0, - "step": 5297 - }, - { - "epoch": 0.40506909799873847, - "grad_norm": 0.0005605354090221226, - "learning_rate": 0.00019999991933454902, - "loss": 46.0, - "step": 5298 - }, - { - "epoch": 0.4051455549821282, - "grad_norm": 0.0011988300830125809, - "learning_rate": 0.00019999991930403727, - "loss": 46.0, - "step": 5299 - }, - { - "epoch": 0.4052220119655179, - "grad_norm": 0.0006165509694255888, - "learning_rate": 0.00019999991927351974, - "loss": 46.0, - "step": 5300 - }, - { - "epoch": 0.40529846894890764, - "grad_norm": 0.0003288340230938047, - "learning_rate": 0.00019999991924299647, - "loss": 46.0, - "step": 5301 - }, - { - "epoch": 0.40537492593229735, - "grad_norm": 0.002198850968852639, - "learning_rate": 0.0001999999192124674, - "loss": 46.0, - "step": 5302 - }, - { - "epoch": 0.40545138291568705, - "grad_norm": 0.0016613348852843046, - "learning_rate": 0.00019999991918193257, - "loss": 46.0, - "step": 5303 - }, - { - "epoch": 0.40552783989907676, - "grad_norm": 0.0013832877157256007, - "learning_rate": 0.00019999991915139198, - "loss": 46.0, - "step": 5304 - }, - { - "epoch": 0.4056042968824665, - "grad_norm": 0.003217297373339534, - "learning_rate": 0.00019999991912084562, - "loss": 46.0, - "step": 5305 - }, - { - "epoch": 0.4056807538658562, - "grad_norm": 0.00459557119756937, - "learning_rate": 0.00019999991909029348, - "loss": 46.0, - "step": 5306 - }, - { - "epoch": 0.40575721084924593, - "grad_norm": 0.0008140060817822814, - "learning_rate": 0.00019999991905973556, - "loss": 46.0, - "step": 5307 - }, - { - "epoch": 0.40583366783263564, - "grad_norm": 0.001100831781513989, - "learning_rate": 0.00019999991902917188, - "loss": 46.0, - "step": 5308 - }, - { - "epoch": 0.4059101248160254, - "grad_norm": 0.0184186939150095, - "learning_rate": 0.00019999991899860244, - "loss": 46.0, - "step": 5309 - }, - { - "epoch": 0.4059865817994151, - "grad_norm": 0.0008051187032833695, - "learning_rate": 0.00019999991896802724, - "loss": 46.0, - "step": 5310 - }, - { - "epoch": 0.4060630387828048, - "grad_norm": 0.003083000658079982, - "learning_rate": 0.00019999991893744626, - "loss": 46.0, - "step": 5311 - }, - { - "epoch": 0.40613949576619457, - "grad_norm": 0.0031733568757772446, - "learning_rate": 0.0001999999189068595, - "loss": 46.0, - "step": 5312 - }, - { - "epoch": 0.4062159527495843, - "grad_norm": 0.0012065396877005696, - "learning_rate": 0.00019999991887626698, - "loss": 46.0, - "step": 5313 - }, - { - "epoch": 0.406292409732974, - "grad_norm": 0.0030355232302099466, - "learning_rate": 0.00019999991884566865, - "loss": 46.0, - "step": 5314 - }, - { - "epoch": 0.4063688667163637, - "grad_norm": 0.000756801397074014, - "learning_rate": 0.0001999999188150646, - "loss": 46.0, - "step": 5315 - }, - { - "epoch": 0.40644532369975345, - "grad_norm": 0.0007659818511456251, - "learning_rate": 0.0001999999187844548, - "loss": 46.0, - "step": 5316 - }, - { - "epoch": 0.40652178068314315, - "grad_norm": 0.0007685855962336063, - "learning_rate": 0.0001999999187538392, - "loss": 46.0, - "step": 5317 - }, - { - "epoch": 0.40659823766653286, - "grad_norm": 0.00038792213308624923, - "learning_rate": 0.0001999999187232178, - "loss": 46.0, - "step": 5318 - }, - { - "epoch": 0.40667469464992256, - "grad_norm": 0.0006638366030529141, - "learning_rate": 0.00019999991869259066, - "loss": 46.0, - "step": 5319 - }, - { - "epoch": 0.4067511516333123, - "grad_norm": 0.0009837057441473007, - "learning_rate": 0.00019999991866195778, - "loss": 46.0, - "step": 5320 - }, - { - "epoch": 0.40682760861670203, - "grad_norm": 0.00202792976051569, - "learning_rate": 0.0001999999186313191, - "loss": 46.0, - "step": 5321 - }, - { - "epoch": 0.40690406560009174, - "grad_norm": 0.003748349379748106, - "learning_rate": 0.00019999991860067463, - "loss": 46.0, - "step": 5322 - }, - { - "epoch": 0.40698052258348144, - "grad_norm": 0.0019859028980135918, - "learning_rate": 0.00019999991857002443, - "loss": 46.0, - "step": 5323 - }, - { - "epoch": 0.4070569795668712, - "grad_norm": 0.0029828983824700117, - "learning_rate": 0.00019999991853936845, - "loss": 46.0, - "step": 5324 - }, - { - "epoch": 0.4071334365502609, - "grad_norm": 0.0022469153627753258, - "learning_rate": 0.0001999999185087067, - "loss": 46.0, - "step": 5325 - }, - { - "epoch": 0.4072098935336506, - "grad_norm": 0.0006606092792935669, - "learning_rate": 0.00019999991847803917, - "loss": 46.0, - "step": 5326 - }, - { - "epoch": 0.4072863505170404, - "grad_norm": 0.004952155519276857, - "learning_rate": 0.00019999991844736584, - "loss": 46.0, - "step": 5327 - }, - { - "epoch": 0.4073628075004301, - "grad_norm": 0.0008421237580478191, - "learning_rate": 0.0001999999184166868, - "loss": 46.0, - "step": 5328 - }, - { - "epoch": 0.4074392644838198, - "grad_norm": 0.0007701292634010315, - "learning_rate": 0.00019999991838600195, - "loss": 46.0, - "step": 5329 - }, - { - "epoch": 0.4075157214672095, - "grad_norm": 0.0025455495342612267, - "learning_rate": 0.00019999991835531136, - "loss": 46.0, - "step": 5330 - }, - { - "epoch": 0.40759217845059925, - "grad_norm": 0.0008730392437428236, - "learning_rate": 0.00019999991832461497, - "loss": 46.0, - "step": 5331 - }, - { - "epoch": 0.40766863543398896, - "grad_norm": 0.0005633785622194409, - "learning_rate": 0.00019999991829391286, - "loss": 46.0, - "step": 5332 - }, - { - "epoch": 0.40774509241737866, - "grad_norm": 0.0009618055773898959, - "learning_rate": 0.00019999991826320494, - "loss": 46.0, - "step": 5333 - }, - { - "epoch": 0.40782154940076837, - "grad_norm": 0.002823295071721077, - "learning_rate": 0.00019999991823249126, - "loss": 46.0, - "step": 5334 - }, - { - "epoch": 0.40789800638415813, - "grad_norm": 0.0013456797460094094, - "learning_rate": 0.00019999991820177183, - "loss": 46.0, - "step": 5335 - }, - { - "epoch": 0.40797446336754783, - "grad_norm": 0.008451217785477638, - "learning_rate": 0.00019999991817104657, - "loss": 46.0, - "step": 5336 - }, - { - "epoch": 0.40805092035093754, - "grad_norm": 0.0044900947250425816, - "learning_rate": 0.00019999991814031561, - "loss": 46.0, - "step": 5337 - }, - { - "epoch": 0.4081273773343273, - "grad_norm": 0.0004067916888743639, - "learning_rate": 0.00019999991810957886, - "loss": 46.0, - "step": 5338 - }, - { - "epoch": 0.408203834317717, - "grad_norm": 0.001792369643226266, - "learning_rate": 0.0001999999180788363, - "loss": 46.0, - "step": 5339 - }, - { - "epoch": 0.4082802913011067, - "grad_norm": 0.0004632076888810843, - "learning_rate": 0.000199999918048088, - "loss": 46.0, - "step": 5340 - }, - { - "epoch": 0.4083567482844964, - "grad_norm": 0.000739187584258616, - "learning_rate": 0.00019999991801733394, - "loss": 46.0, - "step": 5341 - }, - { - "epoch": 0.4084332052678862, - "grad_norm": 0.00245550530962646, - "learning_rate": 0.00019999991798657412, - "loss": 46.0, - "step": 5342 - }, - { - "epoch": 0.4085096622512759, - "grad_norm": 0.0005575842224061489, - "learning_rate": 0.0001999999179558085, - "loss": 46.0, - "step": 5343 - }, - { - "epoch": 0.4085861192346656, - "grad_norm": 0.0010448317043483257, - "learning_rate": 0.00019999991792503714, - "loss": 46.0, - "step": 5344 - }, - { - "epoch": 0.4086625762180553, - "grad_norm": 0.0008700392208993435, - "learning_rate": 0.00019999991789425997, - "loss": 46.0, - "step": 5345 - }, - { - "epoch": 0.40873903320144506, - "grad_norm": 0.0013869213871657848, - "learning_rate": 0.00019999991786347706, - "loss": 46.0, - "step": 5346 - }, - { - "epoch": 0.40881549018483476, - "grad_norm": 0.0014831259613856673, - "learning_rate": 0.0001999999178326884, - "loss": 46.0, - "step": 5347 - }, - { - "epoch": 0.40889194716822447, - "grad_norm": 0.001482793944887817, - "learning_rate": 0.00019999991780189394, - "loss": 46.0, - "step": 5348 - }, - { - "epoch": 0.4089684041516142, - "grad_norm": 0.001930071273818612, - "learning_rate": 0.0001999999177710937, - "loss": 46.0, - "step": 5349 - }, - { - "epoch": 0.40904486113500393, - "grad_norm": 0.001391713391058147, - "learning_rate": 0.00019999991774028773, - "loss": 46.0, - "step": 5350 - }, - { - "epoch": 0.40912131811839364, - "grad_norm": 0.0008259987225756049, - "learning_rate": 0.00019999991770947598, - "loss": 46.0, - "step": 5351 - }, - { - "epoch": 0.40919777510178335, - "grad_norm": 0.0018095808336511254, - "learning_rate": 0.00019999991767865843, - "loss": 46.0, - "step": 5352 - }, - { - "epoch": 0.4092742320851731, - "grad_norm": 0.005817838944494724, - "learning_rate": 0.00019999991764783513, - "loss": 46.0, - "step": 5353 - }, - { - "epoch": 0.4093506890685628, - "grad_norm": 0.0012043097522109747, - "learning_rate": 0.00019999991761700608, - "loss": 46.0, - "step": 5354 - }, - { - "epoch": 0.4094271460519525, - "grad_norm": 0.0006035730475559831, - "learning_rate": 0.00019999991758617124, - "loss": 46.0, - "step": 5355 - }, - { - "epoch": 0.4095036030353422, - "grad_norm": 0.0008884496055543423, - "learning_rate": 0.00019999991755533062, - "loss": 46.0, - "step": 5356 - }, - { - "epoch": 0.409580060018732, - "grad_norm": 0.00195993366651237, - "learning_rate": 0.00019999991752448423, - "loss": 46.0, - "step": 5357 - }, - { - "epoch": 0.4096565170021217, - "grad_norm": 0.0033339839428663254, - "learning_rate": 0.0001999999174936321, - "loss": 46.0, - "step": 5358 - }, - { - "epoch": 0.4097329739855114, - "grad_norm": 0.00281574297696352, - "learning_rate": 0.00019999991746277415, - "loss": 46.0, - "step": 5359 - }, - { - "epoch": 0.4098094309689011, - "grad_norm": 0.0024064439348876476, - "learning_rate": 0.0001999999174319105, - "loss": 46.0, - "step": 5360 - }, - { - "epoch": 0.40988588795229086, - "grad_norm": 0.0013206801377236843, - "learning_rate": 0.00019999991740104103, - "loss": 46.0, - "step": 5361 - }, - { - "epoch": 0.40996234493568057, - "grad_norm": 0.0009637962793931365, - "learning_rate": 0.0001999999173701658, - "loss": 46.0, - "step": 5362 - }, - { - "epoch": 0.4100388019190703, - "grad_norm": 0.0006769727915525436, - "learning_rate": 0.0001999999173392848, - "loss": 46.0, - "step": 5363 - }, - { - "epoch": 0.41011525890246, - "grad_norm": 0.002288165735080838, - "learning_rate": 0.00019999991730839804, - "loss": 46.0, - "step": 5364 - }, - { - "epoch": 0.41019171588584974, - "grad_norm": 0.0014488318702206016, - "learning_rate": 0.00019999991727750552, - "loss": 46.0, - "step": 5365 - }, - { - "epoch": 0.41026817286923944, - "grad_norm": 0.0012545925565063953, - "learning_rate": 0.0001999999172466072, - "loss": 46.0, - "step": 5366 - }, - { - "epoch": 0.41034462985262915, - "grad_norm": 0.00043280309182591736, - "learning_rate": 0.00019999991721570312, - "loss": 46.0, - "step": 5367 - }, - { - "epoch": 0.4104210868360189, - "grad_norm": 0.0018425228772684932, - "learning_rate": 0.00019999991718479328, - "loss": 46.0, - "step": 5368 - }, - { - "epoch": 0.4104975438194086, - "grad_norm": 0.0011807163245975971, - "learning_rate": 0.00019999991715387766, - "loss": 46.0, - "step": 5369 - }, - { - "epoch": 0.4105740008027983, - "grad_norm": 0.002894126810133457, - "learning_rate": 0.0001999999171229563, - "loss": 46.0, - "step": 5370 - }, - { - "epoch": 0.41065045778618803, - "grad_norm": 0.0011282350169494748, - "learning_rate": 0.00019999991709202916, - "loss": 46.0, - "step": 5371 - }, - { - "epoch": 0.4107269147695778, - "grad_norm": 0.003663264913484454, - "learning_rate": 0.00019999991706109622, - "loss": 46.0, - "step": 5372 - }, - { - "epoch": 0.4108033717529675, - "grad_norm": 0.0018011954380199313, - "learning_rate": 0.00019999991703015753, - "loss": 46.0, - "step": 5373 - }, - { - "epoch": 0.4108798287363572, - "grad_norm": 0.0004497392219491303, - "learning_rate": 0.00019999991699921307, - "loss": 46.0, - "step": 5374 - }, - { - "epoch": 0.4109562857197469, - "grad_norm": 0.0007140978123061359, - "learning_rate": 0.00019999991696826284, - "loss": 46.0, - "step": 5375 - }, - { - "epoch": 0.41103274270313667, - "grad_norm": 0.0011017256183549762, - "learning_rate": 0.00019999991693730684, - "loss": 46.0, - "step": 5376 - }, - { - "epoch": 0.41110919968652637, - "grad_norm": 0.0008979947888292372, - "learning_rate": 0.0001999999169063451, - "loss": 46.0, - "step": 5377 - }, - { - "epoch": 0.4111856566699161, - "grad_norm": 0.0008670479874126613, - "learning_rate": 0.00019999991687537754, - "loss": 46.0, - "step": 5378 - }, - { - "epoch": 0.4112621136533058, - "grad_norm": 0.0027278808411210775, - "learning_rate": 0.0001999999168444042, - "loss": 46.0, - "step": 5379 - }, - { - "epoch": 0.41133857063669554, - "grad_norm": 0.0036291528958827257, - "learning_rate": 0.00019999991681342517, - "loss": 46.0, - "step": 5380 - }, - { - "epoch": 0.41141502762008525, - "grad_norm": 0.0005036481888964772, - "learning_rate": 0.00019999991678244032, - "loss": 46.0, - "step": 5381 - }, - { - "epoch": 0.41149148460347496, - "grad_norm": 0.0014522558776661754, - "learning_rate": 0.00019999991675144968, - "loss": 46.0, - "step": 5382 - }, - { - "epoch": 0.4115679415868647, - "grad_norm": 0.0027872086502611637, - "learning_rate": 0.00019999991672045334, - "loss": 46.0, - "step": 5383 - }, - { - "epoch": 0.4116443985702544, - "grad_norm": 0.0008004358969628811, - "learning_rate": 0.00019999991668945115, - "loss": 46.0, - "step": 5384 - }, - { - "epoch": 0.4117208555536441, - "grad_norm": 0.003509378293529153, - "learning_rate": 0.00019999991665844324, - "loss": 46.0, - "step": 5385 - }, - { - "epoch": 0.41179731253703383, - "grad_norm": 0.0010886641684919596, - "learning_rate": 0.00019999991662742955, - "loss": 46.0, - "step": 5386 - }, - { - "epoch": 0.4118737695204236, - "grad_norm": 0.00036593497497960925, - "learning_rate": 0.0001999999165964101, - "loss": 46.0, - "step": 5387 - }, - { - "epoch": 0.4119502265038133, - "grad_norm": 0.0027661616913974285, - "learning_rate": 0.00019999991656538486, - "loss": 46.0, - "step": 5388 - }, - { - "epoch": 0.412026683487203, - "grad_norm": 0.0010463183280080557, - "learning_rate": 0.00019999991653435386, - "loss": 46.0, - "step": 5389 - }, - { - "epoch": 0.4121031404705927, - "grad_norm": 0.0007264424930326641, - "learning_rate": 0.00019999991650331708, - "loss": 46.0, - "step": 5390 - }, - { - "epoch": 0.41217959745398247, - "grad_norm": 0.007100660353899002, - "learning_rate": 0.00019999991647227456, - "loss": 46.0, - "step": 5391 - }, - { - "epoch": 0.4122560544373722, - "grad_norm": 0.0022564150858670473, - "learning_rate": 0.00019999991644122623, - "loss": 46.0, - "step": 5392 - }, - { - "epoch": 0.4123325114207619, - "grad_norm": 0.0013005431974306703, - "learning_rate": 0.00019999991641017216, - "loss": 46.0, - "step": 5393 - }, - { - "epoch": 0.4124089684041516, - "grad_norm": 0.0011177370324730873, - "learning_rate": 0.00019999991637911232, - "loss": 46.0, - "step": 5394 - }, - { - "epoch": 0.41248542538754135, - "grad_norm": 0.0016524213133379817, - "learning_rate": 0.0001999999163480467, - "loss": 46.0, - "step": 5395 - }, - { - "epoch": 0.41256188237093105, - "grad_norm": 0.0004972158349119127, - "learning_rate": 0.00019999991631697533, - "loss": 46.0, - "step": 5396 - }, - { - "epoch": 0.41263833935432076, - "grad_norm": 0.0005656633293256164, - "learning_rate": 0.00019999991628589814, - "loss": 46.0, - "step": 5397 - }, - { - "epoch": 0.4127147963377105, - "grad_norm": 0.0014298890018835664, - "learning_rate": 0.00019999991625481523, - "loss": 46.0, - "step": 5398 - }, - { - "epoch": 0.4127912533211002, - "grad_norm": 0.0006675570621155202, - "learning_rate": 0.00019999991622372655, - "loss": 46.0, - "step": 5399 - }, - { - "epoch": 0.41286771030448993, - "grad_norm": 0.0020149063784629107, - "learning_rate": 0.00019999991619263206, - "loss": 46.0, - "step": 5400 - }, - { - "epoch": 0.41294416728787964, - "grad_norm": 0.0011675046989694238, - "learning_rate": 0.00019999991616153183, - "loss": 46.0, - "step": 5401 - }, - { - "epoch": 0.4130206242712694, - "grad_norm": 0.0007131360471248627, - "learning_rate": 0.00019999991613042583, - "loss": 46.0, - "step": 5402 - }, - { - "epoch": 0.4130970812546591, - "grad_norm": 0.0005054062348790467, - "learning_rate": 0.00019999991609931405, - "loss": 46.0, - "step": 5403 - }, - { - "epoch": 0.4131735382380488, - "grad_norm": 0.0007717363187111914, - "learning_rate": 0.0001999999160681965, - "loss": 46.0, - "step": 5404 - }, - { - "epoch": 0.4132499952214385, - "grad_norm": 0.0008846339769661427, - "learning_rate": 0.0001999999160370732, - "loss": 46.0, - "step": 5405 - }, - { - "epoch": 0.4133264522048283, - "grad_norm": 0.00278662983328104, - "learning_rate": 0.00019999991600594413, - "loss": 46.0, - "step": 5406 - }, - { - "epoch": 0.413402909188218, - "grad_norm": 0.004176516085863113, - "learning_rate": 0.00019999991597480926, - "loss": 46.0, - "step": 5407 - }, - { - "epoch": 0.4134793661716077, - "grad_norm": 0.0008209343650378287, - "learning_rate": 0.00019999991594366864, - "loss": 46.0, - "step": 5408 - }, - { - "epoch": 0.4135558231549974, - "grad_norm": 0.000903637264855206, - "learning_rate": 0.00019999991591252228, - "loss": 46.0, - "step": 5409 - }, - { - "epoch": 0.41363228013838715, - "grad_norm": 0.0033453076612204313, - "learning_rate": 0.00019999991588137011, - "loss": 46.0, - "step": 5410 - }, - { - "epoch": 0.41370873712177686, - "grad_norm": 0.0020626818295568228, - "learning_rate": 0.00019999991585021215, - "loss": 46.0, - "step": 5411 - }, - { - "epoch": 0.41378519410516656, - "grad_norm": 0.002370130503550172, - "learning_rate": 0.00019999991581904847, - "loss": 46.0, - "step": 5412 - }, - { - "epoch": 0.4138616510885563, - "grad_norm": 0.0008303317590616643, - "learning_rate": 0.000199999915787879, - "loss": 46.0, - "step": 5413 - }, - { - "epoch": 0.41393810807194603, - "grad_norm": 0.005083939526230097, - "learning_rate": 0.00019999991575670375, - "loss": 46.0, - "step": 5414 - }, - { - "epoch": 0.41401456505533574, - "grad_norm": 0.001392135163769126, - "learning_rate": 0.00019999991572552277, - "loss": 46.0, - "step": 5415 - }, - { - "epoch": 0.41409102203872544, - "grad_norm": 0.0025353056844323874, - "learning_rate": 0.000199999915694336, - "loss": 46.0, - "step": 5416 - }, - { - "epoch": 0.4141674790221152, - "grad_norm": 0.003599467221647501, - "learning_rate": 0.00019999991566314345, - "loss": 46.0, - "step": 5417 - }, - { - "epoch": 0.4142439360055049, - "grad_norm": 0.0011562509462237358, - "learning_rate": 0.00019999991563194512, - "loss": 46.0, - "step": 5418 - }, - { - "epoch": 0.4143203929888946, - "grad_norm": 0.0017617044504731894, - "learning_rate": 0.00019999991560074105, - "loss": 46.0, - "step": 5419 - }, - { - "epoch": 0.4143968499722843, - "grad_norm": 0.0018615226726979017, - "learning_rate": 0.00019999991556953118, - "loss": 46.0, - "step": 5420 - }, - { - "epoch": 0.4144733069556741, - "grad_norm": 0.0012078463332727551, - "learning_rate": 0.00019999991553831556, - "loss": 46.0, - "step": 5421 - }, - { - "epoch": 0.4145497639390638, - "grad_norm": 0.0008750527631491423, - "learning_rate": 0.00019999991550709417, - "loss": 46.0, - "step": 5422 - }, - { - "epoch": 0.4146262209224535, - "grad_norm": 0.0006168300751596689, - "learning_rate": 0.00019999991547586704, - "loss": 46.0, - "step": 5423 - }, - { - "epoch": 0.4147026779058432, - "grad_norm": 0.0006413274095393717, - "learning_rate": 0.0001999999154446341, - "loss": 46.0, - "step": 5424 - }, - { - "epoch": 0.41477913488923296, - "grad_norm": 0.000596673518884927, - "learning_rate": 0.0001999999154133954, - "loss": 46.0, - "step": 5425 - }, - { - "epoch": 0.41485559187262266, - "grad_norm": 0.0011239356826990843, - "learning_rate": 0.00019999991538215093, - "loss": 46.0, - "step": 5426 - }, - { - "epoch": 0.41493204885601237, - "grad_norm": 0.002828011056408286, - "learning_rate": 0.00019999991535090067, - "loss": 46.0, - "step": 5427 - }, - { - "epoch": 0.41500850583940213, - "grad_norm": 0.000898508820682764, - "learning_rate": 0.00019999991531964467, - "loss": 46.0, - "step": 5428 - }, - { - "epoch": 0.41508496282279184, - "grad_norm": 0.0015465901233255863, - "learning_rate": 0.0001999999152883829, - "loss": 46.0, - "step": 5429 - }, - { - "epoch": 0.41516141980618154, - "grad_norm": 0.0009853861993178725, - "learning_rate": 0.00019999991525711534, - "loss": 46.0, - "step": 5430 - }, - { - "epoch": 0.41523787678957125, - "grad_norm": 0.0017397133633494377, - "learning_rate": 0.000199999915225842, - "loss": 46.0, - "step": 5431 - }, - { - "epoch": 0.415314333772961, - "grad_norm": 0.0005541547434404492, - "learning_rate": 0.00019999991519456295, - "loss": 46.0, - "step": 5432 - }, - { - "epoch": 0.4153907907563507, - "grad_norm": 0.0007481400389224291, - "learning_rate": 0.00019999991516327808, - "loss": 46.0, - "step": 5433 - }, - { - "epoch": 0.4154672477397404, - "grad_norm": 0.0007982912939041853, - "learning_rate": 0.00019999991513198744, - "loss": 46.0, - "step": 5434 - }, - { - "epoch": 0.4155437047231301, - "grad_norm": 0.0035754344426095486, - "learning_rate": 0.00019999991510069105, - "loss": 46.0, - "step": 5435 - }, - { - "epoch": 0.4156201617065199, - "grad_norm": 0.0020971496123820543, - "learning_rate": 0.00019999991506938888, - "loss": 46.0, - "step": 5436 - }, - { - "epoch": 0.4156966186899096, - "grad_norm": 0.0015283911488950253, - "learning_rate": 0.00019999991503808094, - "loss": 46.0, - "step": 5437 - }, - { - "epoch": 0.4157730756732993, - "grad_norm": 0.0027436655946075916, - "learning_rate": 0.00019999991500676723, - "loss": 46.0, - "step": 5438 - }, - { - "epoch": 0.415849532656689, - "grad_norm": 0.0018711426528170705, - "learning_rate": 0.00019999991497544778, - "loss": 46.0, - "step": 5439 - }, - { - "epoch": 0.41592598964007876, - "grad_norm": 0.013819126412272453, - "learning_rate": 0.00019999991494412255, - "loss": 46.0, - "step": 5440 - }, - { - "epoch": 0.41600244662346847, - "grad_norm": 0.0015264011453837156, - "learning_rate": 0.00019999991491279152, - "loss": 46.0, - "step": 5441 - }, - { - "epoch": 0.4160789036068582, - "grad_norm": 0.001619368209503591, - "learning_rate": 0.00019999991488145474, - "loss": 46.0, - "step": 5442 - }, - { - "epoch": 0.41615536059024794, - "grad_norm": 0.0025840725284069777, - "learning_rate": 0.0001999999148501122, - "loss": 46.0, - "step": 5443 - }, - { - "epoch": 0.41623181757363764, - "grad_norm": 0.0010630057659000158, - "learning_rate": 0.00019999991481876387, - "loss": 46.0, - "step": 5444 - }, - { - "epoch": 0.41630827455702735, - "grad_norm": 0.003099302761256695, - "learning_rate": 0.00019999991478740977, - "loss": 46.0, - "step": 5445 - }, - { - "epoch": 0.41638473154041705, - "grad_norm": 0.0006178977782838047, - "learning_rate": 0.00019999991475604993, - "loss": 46.0, - "step": 5446 - }, - { - "epoch": 0.4164611885238068, - "grad_norm": 0.0009201104403473437, - "learning_rate": 0.0001999999147246843, - "loss": 46.0, - "step": 5447 - }, - { - "epoch": 0.4165376455071965, - "grad_norm": 0.0019779487047344446, - "learning_rate": 0.0001999999146933129, - "loss": 46.0, - "step": 5448 - }, - { - "epoch": 0.4166141024905862, - "grad_norm": 0.001476823934353888, - "learning_rate": 0.00019999991466193573, - "loss": 46.0, - "step": 5449 - }, - { - "epoch": 0.41669055947397593, - "grad_norm": 0.00114912586286664, - "learning_rate": 0.00019999991463055282, - "loss": 46.0, - "step": 5450 - }, - { - "epoch": 0.4167670164573657, - "grad_norm": 0.001857487135566771, - "learning_rate": 0.00019999991459916408, - "loss": 46.0, - "step": 5451 - }, - { - "epoch": 0.4168434734407554, - "grad_norm": 0.0025243971031159163, - "learning_rate": 0.0001999999145677696, - "loss": 46.0, - "step": 5452 - }, - { - "epoch": 0.4169199304241451, - "grad_norm": 0.001984777394682169, - "learning_rate": 0.00019999991453636937, - "loss": 46.0, - "step": 5453 - }, - { - "epoch": 0.41699638740753486, - "grad_norm": 0.0016920112539082766, - "learning_rate": 0.00019999991450496337, - "loss": 46.0, - "step": 5454 - }, - { - "epoch": 0.41707284439092457, - "grad_norm": 0.001000522868707776, - "learning_rate": 0.00019999991447355156, - "loss": 46.0, - "step": 5455 - }, - { - "epoch": 0.4171493013743143, - "grad_norm": 0.0017415520269423723, - "learning_rate": 0.00019999991444213401, - "loss": 46.0, - "step": 5456 - }, - { - "epoch": 0.417225758357704, - "grad_norm": 0.0022787717171013355, - "learning_rate": 0.0001999999144107107, - "loss": 46.0, - "step": 5457 - }, - { - "epoch": 0.41730221534109374, - "grad_norm": 0.0020467701833695173, - "learning_rate": 0.0001999999143792816, - "loss": 46.0, - "step": 5458 - }, - { - "epoch": 0.41737867232448345, - "grad_norm": 0.0035578913521021605, - "learning_rate": 0.00019999991434784673, - "loss": 46.0, - "step": 5459 - }, - { - "epoch": 0.41745512930787315, - "grad_norm": 0.0009780119871720672, - "learning_rate": 0.0001999999143164061, - "loss": 46.0, - "step": 5460 - }, - { - "epoch": 0.41753158629126286, - "grad_norm": 0.0006003855960443616, - "learning_rate": 0.0001999999142849597, - "loss": 46.0, - "step": 5461 - }, - { - "epoch": 0.4176080432746526, - "grad_norm": 0.0014704280765727162, - "learning_rate": 0.00019999991425350753, - "loss": 46.0, - "step": 5462 - }, - { - "epoch": 0.4176845002580423, - "grad_norm": 0.0006409023771993816, - "learning_rate": 0.00019999991422204957, - "loss": 46.0, - "step": 5463 - }, - { - "epoch": 0.41776095724143203, - "grad_norm": 0.0013225938891991973, - "learning_rate": 0.00019999991419058588, - "loss": 46.0, - "step": 5464 - }, - { - "epoch": 0.41783741422482173, - "grad_norm": 0.0017730999970808625, - "learning_rate": 0.0001999999141591164, - "loss": 46.0, - "step": 5465 - }, - { - "epoch": 0.4179138712082115, - "grad_norm": 0.0006660941289737821, - "learning_rate": 0.00019999991412764114, - "loss": 46.0, - "step": 5466 - }, - { - "epoch": 0.4179903281916012, - "grad_norm": 0.0005924385040998459, - "learning_rate": 0.00019999991409616014, - "loss": 46.0, - "step": 5467 - }, - { - "epoch": 0.4180667851749909, - "grad_norm": 0.0013160814996808767, - "learning_rate": 0.00019999991406467337, - "loss": 46.0, - "step": 5468 - }, - { - "epoch": 0.41814324215838067, - "grad_norm": 0.0009196484461426735, - "learning_rate": 0.0001999999140331808, - "loss": 46.0, - "step": 5469 - }, - { - "epoch": 0.4182196991417704, - "grad_norm": 0.0006729019223712385, - "learning_rate": 0.00019999991400168247, - "loss": 46.0, - "step": 5470 - }, - { - "epoch": 0.4182961561251601, - "grad_norm": 0.0006046950584277511, - "learning_rate": 0.0001999999139701784, - "loss": 46.0, - "step": 5471 - }, - { - "epoch": 0.4183726131085498, - "grad_norm": 0.0005343265365809202, - "learning_rate": 0.0001999999139386685, - "loss": 46.0, - "step": 5472 - }, - { - "epoch": 0.41844907009193955, - "grad_norm": 0.0011145288590341806, - "learning_rate": 0.00019999991390715286, - "loss": 46.0, - "step": 5473 - }, - { - "epoch": 0.41852552707532925, - "grad_norm": 0.0025561274960637093, - "learning_rate": 0.0001999999138756315, - "loss": 46.0, - "step": 5474 - }, - { - "epoch": 0.41860198405871896, - "grad_norm": 0.0004951556329615414, - "learning_rate": 0.0001999999138441043, - "loss": 46.0, - "step": 5475 - }, - { - "epoch": 0.41867844104210866, - "grad_norm": 0.0014897570945322514, - "learning_rate": 0.00019999991381257134, - "loss": 46.0, - "step": 5476 - }, - { - "epoch": 0.4187548980254984, - "grad_norm": 0.0034681472461670637, - "learning_rate": 0.00019999991378103263, - "loss": 46.0, - "step": 5477 - }, - { - "epoch": 0.41883135500888813, - "grad_norm": 0.00209251930937171, - "learning_rate": 0.00019999991374948818, - "loss": 46.0, - "step": 5478 - }, - { - "epoch": 0.41890781199227783, - "grad_norm": 0.005612328182905912, - "learning_rate": 0.00019999991371793792, - "loss": 46.0, - "step": 5479 - }, - { - "epoch": 0.41898426897566754, - "grad_norm": 0.0005825295811519027, - "learning_rate": 0.0001999999136863819, - "loss": 46.0, - "step": 5480 - }, - { - "epoch": 0.4190607259590573, - "grad_norm": 0.002547320444136858, - "learning_rate": 0.00019999991365482012, - "loss": 46.0, - "step": 5481 - }, - { - "epoch": 0.419137182942447, - "grad_norm": 0.002230370882898569, - "learning_rate": 0.00019999991362325254, - "loss": 46.0, - "step": 5482 - }, - { - "epoch": 0.4192136399258367, - "grad_norm": 0.0015402492135763168, - "learning_rate": 0.0001999999135916792, - "loss": 46.0, - "step": 5483 - }, - { - "epoch": 0.4192900969092265, - "grad_norm": 0.00045340106589719653, - "learning_rate": 0.0001999999135601001, - "loss": 46.0, - "step": 5484 - }, - { - "epoch": 0.4193665538926162, - "grad_norm": 0.0008690215181559324, - "learning_rate": 0.00019999991352851522, - "loss": 46.0, - "step": 5485 - }, - { - "epoch": 0.4194430108760059, - "grad_norm": 0.0009650334832258523, - "learning_rate": 0.0001999999134969246, - "loss": 46.0, - "step": 5486 - }, - { - "epoch": 0.4195194678593956, - "grad_norm": 0.0008257623412646353, - "learning_rate": 0.0001999999134653282, - "loss": 46.0, - "step": 5487 - }, - { - "epoch": 0.41959592484278535, - "grad_norm": 0.0012264044489711523, - "learning_rate": 0.00019999991343372603, - "loss": 46.0, - "step": 5488 - }, - { - "epoch": 0.41967238182617506, - "grad_norm": 0.0012948067160323262, - "learning_rate": 0.00019999991340211807, - "loss": 46.0, - "step": 5489 - }, - { - "epoch": 0.41974883880956476, - "grad_norm": 0.0014125751331448555, - "learning_rate": 0.00019999991337050436, - "loss": 46.0, - "step": 5490 - }, - { - "epoch": 0.41982529579295447, - "grad_norm": 0.002354119438678026, - "learning_rate": 0.00019999991333888488, - "loss": 46.0, - "step": 5491 - }, - { - "epoch": 0.41990175277634423, - "grad_norm": 0.0032949866726994514, - "learning_rate": 0.00019999991330725962, - "loss": 46.0, - "step": 5492 - }, - { - "epoch": 0.41997820975973393, - "grad_norm": 0.0016361873131245375, - "learning_rate": 0.0001999999132756286, - "loss": 46.0, - "step": 5493 - }, - { - "epoch": 0.42005466674312364, - "grad_norm": 0.002817495260387659, - "learning_rate": 0.00019999991324399182, - "loss": 46.0, - "step": 5494 - }, - { - "epoch": 0.42013112372651334, - "grad_norm": 0.003947072196751833, - "learning_rate": 0.00019999991321234927, - "loss": 46.0, - "step": 5495 - }, - { - "epoch": 0.4202075807099031, - "grad_norm": 0.0008553503430448472, - "learning_rate": 0.00019999991318070092, - "loss": 46.0, - "step": 5496 - }, - { - "epoch": 0.4202840376932928, - "grad_norm": 0.0009838691912591457, - "learning_rate": 0.00019999991314904682, - "loss": 46.0, - "step": 5497 - }, - { - "epoch": 0.4203604946766825, - "grad_norm": 0.0006458622519858181, - "learning_rate": 0.00019999991311738695, - "loss": 46.0, - "step": 5498 - }, - { - "epoch": 0.4204369516600723, - "grad_norm": 0.001247179927304387, - "learning_rate": 0.0001999999130857213, - "loss": 46.0, - "step": 5499 - }, - { - "epoch": 0.420513408643462, - "grad_norm": 0.000877166457939893, - "learning_rate": 0.00019999991305404987, - "loss": 46.0, - "step": 5500 - }, - { - "epoch": 0.4205898656268517, - "grad_norm": 0.0005960864364169538, - "learning_rate": 0.00019999991302237273, - "loss": 46.0, - "step": 5501 - }, - { - "epoch": 0.4206663226102414, - "grad_norm": 0.0007253667572513223, - "learning_rate": 0.00019999991299068977, - "loss": 46.0, - "step": 5502 - }, - { - "epoch": 0.42074277959363116, - "grad_norm": 0.0023964273277670145, - "learning_rate": 0.00019999991295900104, - "loss": 46.0, - "step": 5503 - }, - { - "epoch": 0.42081923657702086, - "grad_norm": 0.0016958360793069005, - "learning_rate": 0.00019999991292730655, - "loss": 46.0, - "step": 5504 - }, - { - "epoch": 0.42089569356041057, - "grad_norm": 0.0007424805662594736, - "learning_rate": 0.00019999991289560632, - "loss": 46.0, - "step": 5505 - }, - { - "epoch": 0.42097215054380027, - "grad_norm": 0.0007164208800531924, - "learning_rate": 0.0001999999128639003, - "loss": 46.0, - "step": 5506 - }, - { - "epoch": 0.42104860752719003, - "grad_norm": 0.001105301664210856, - "learning_rate": 0.0001999999128321885, - "loss": 46.0, - "step": 5507 - }, - { - "epoch": 0.42112506451057974, - "grad_norm": 0.000531343393959105, - "learning_rate": 0.00019999991280047092, - "loss": 46.0, - "step": 5508 - }, - { - "epoch": 0.42120152149396944, - "grad_norm": 0.0007272218936122954, - "learning_rate": 0.0001999999127687476, - "loss": 46.0, - "step": 5509 - }, - { - "epoch": 0.42127797847735915, - "grad_norm": 0.005331685766577721, - "learning_rate": 0.00019999991273701847, - "loss": 46.0, - "step": 5510 - }, - { - "epoch": 0.4213544354607489, - "grad_norm": 0.0007073844317346811, - "learning_rate": 0.00019999991270528363, - "loss": 46.0, - "step": 5511 - }, - { - "epoch": 0.4214308924441386, - "grad_norm": 0.0015893859090283513, - "learning_rate": 0.00019999991267354296, - "loss": 46.0, - "step": 5512 - }, - { - "epoch": 0.4215073494275283, - "grad_norm": 0.002210618695244193, - "learning_rate": 0.00019999991264179655, - "loss": 46.0, - "step": 5513 - }, - { - "epoch": 0.4215838064109181, - "grad_norm": 0.001384711591526866, - "learning_rate": 0.00019999991261004439, - "loss": 46.0, - "step": 5514 - }, - { - "epoch": 0.4216602633943078, - "grad_norm": 0.0009261943050660193, - "learning_rate": 0.0001999999125782864, - "loss": 46.0, - "step": 5515 - }, - { - "epoch": 0.4217367203776975, - "grad_norm": 0.001576651236973703, - "learning_rate": 0.00019999991254652272, - "loss": 46.0, - "step": 5516 - }, - { - "epoch": 0.4218131773610872, - "grad_norm": 0.001993928337469697, - "learning_rate": 0.0001999999125147532, - "loss": 46.0, - "step": 5517 - }, - { - "epoch": 0.42188963434447696, - "grad_norm": 0.0009874376701191068, - "learning_rate": 0.00019999991248297792, - "loss": 46.0, - "step": 5518 - }, - { - "epoch": 0.42196609132786667, - "grad_norm": 0.002483912743628025, - "learning_rate": 0.00019999991245119692, - "loss": 46.0, - "step": 5519 - }, - { - "epoch": 0.42204254831125637, - "grad_norm": 0.0018782797269523144, - "learning_rate": 0.00019999991241941012, - "loss": 46.0, - "step": 5520 - }, - { - "epoch": 0.4221190052946461, - "grad_norm": 0.0009184435475617647, - "learning_rate": 0.00019999991238761755, - "loss": 46.0, - "step": 5521 - }, - { - "epoch": 0.42219546227803584, - "grad_norm": 0.0013539415085688233, - "learning_rate": 0.0001999999123558192, - "loss": 46.0, - "step": 5522 - }, - { - "epoch": 0.42227191926142554, - "grad_norm": 0.0007348635117523372, - "learning_rate": 0.0001999999123240151, - "loss": 46.0, - "step": 5523 - }, - { - "epoch": 0.42234837624481525, - "grad_norm": 0.0018051530933007598, - "learning_rate": 0.00019999991229220526, - "loss": 46.0, - "step": 5524 - }, - { - "epoch": 0.42242483322820495, - "grad_norm": 0.0007736569968983531, - "learning_rate": 0.0001999999122603896, - "loss": 46.0, - "step": 5525 - }, - { - "epoch": 0.4225012902115947, - "grad_norm": 0.002512391423806548, - "learning_rate": 0.00019999991222856818, - "loss": 46.0, - "step": 5526 - }, - { - "epoch": 0.4225777471949844, - "grad_norm": 0.0014235122362151742, - "learning_rate": 0.000199999912196741, - "loss": 46.0, - "step": 5527 - }, - { - "epoch": 0.4226542041783741, - "grad_norm": 0.0014216542476788163, - "learning_rate": 0.00019999991216490803, - "loss": 46.0, - "step": 5528 - }, - { - "epoch": 0.4227306611617639, - "grad_norm": 0.0017427793936803937, - "learning_rate": 0.0001999999121330693, - "loss": 46.0, - "step": 5529 - }, - { - "epoch": 0.4228071181451536, - "grad_norm": 0.0021457301918417215, - "learning_rate": 0.0001999999121012248, - "loss": 46.0, - "step": 5530 - }, - { - "epoch": 0.4228835751285433, - "grad_norm": 0.0008998525445349514, - "learning_rate": 0.00019999991206937456, - "loss": 46.0, - "step": 5531 - }, - { - "epoch": 0.422960032111933, - "grad_norm": 0.0026607727631926537, - "learning_rate": 0.00019999991203751853, - "loss": 46.0, - "step": 5532 - }, - { - "epoch": 0.42303648909532277, - "grad_norm": 0.0008612971287220716, - "learning_rate": 0.00019999991200565673, - "loss": 46.0, - "step": 5533 - }, - { - "epoch": 0.42311294607871247, - "grad_norm": 0.00044479413190856576, - "learning_rate": 0.00019999991197378915, - "loss": 46.0, - "step": 5534 - }, - { - "epoch": 0.4231894030621022, - "grad_norm": 0.0010325514012947679, - "learning_rate": 0.0001999999119419158, - "loss": 46.0, - "step": 5535 - }, - { - "epoch": 0.4232658600454919, - "grad_norm": 0.001145778689533472, - "learning_rate": 0.0001999999119100367, - "loss": 46.0, - "step": 5536 - }, - { - "epoch": 0.42334231702888164, - "grad_norm": 0.0009772671619430184, - "learning_rate": 0.00019999991187815184, - "loss": 46.0, - "step": 5537 - }, - { - "epoch": 0.42341877401227135, - "grad_norm": 0.0007602735422551632, - "learning_rate": 0.00019999991184626117, - "loss": 46.0, - "step": 5538 - }, - { - "epoch": 0.42349523099566105, - "grad_norm": 0.00092788360780105, - "learning_rate": 0.00019999991181436478, - "loss": 46.0, - "step": 5539 - }, - { - "epoch": 0.42357168797905076, - "grad_norm": 0.000411903893109411, - "learning_rate": 0.00019999991178246257, - "loss": 46.0, - "step": 5540 - }, - { - "epoch": 0.4236481449624405, - "grad_norm": 0.0018015257082879543, - "learning_rate": 0.0001999999117505546, - "loss": 46.0, - "step": 5541 - }, - { - "epoch": 0.4237246019458302, - "grad_norm": 0.0008646459900774062, - "learning_rate": 0.0001999999117186409, - "loss": 46.0, - "step": 5542 - }, - { - "epoch": 0.42380105892921993, - "grad_norm": 0.0024136521387845278, - "learning_rate": 0.0001999999116867214, - "loss": 46.0, - "step": 5543 - }, - { - "epoch": 0.4238775159126097, - "grad_norm": 0.0017535907682031393, - "learning_rate": 0.00019999991165479614, - "loss": 46.0, - "step": 5544 - }, - { - "epoch": 0.4239539728959994, - "grad_norm": 0.0022689446341246367, - "learning_rate": 0.00019999991162286508, - "loss": 46.0, - "step": 5545 - }, - { - "epoch": 0.4240304298793891, - "grad_norm": 0.002755317371338606, - "learning_rate": 0.0001999999115909283, - "loss": 46.0, - "step": 5546 - }, - { - "epoch": 0.4241068868627788, - "grad_norm": 0.0006008046912029386, - "learning_rate": 0.00019999991155898573, - "loss": 46.0, - "step": 5547 - }, - { - "epoch": 0.42418334384616857, - "grad_norm": 0.0027862340211868286, - "learning_rate": 0.00019999991152703736, - "loss": 46.0, - "step": 5548 - }, - { - "epoch": 0.4242598008295583, - "grad_norm": 0.000724279903806746, - "learning_rate": 0.00019999991149508326, - "loss": 46.0, - "step": 5549 - }, - { - "epoch": 0.424336257812948, - "grad_norm": 0.0008108525071293116, - "learning_rate": 0.00019999991146312337, - "loss": 46.0, - "step": 5550 - }, - { - "epoch": 0.4244127147963377, - "grad_norm": 0.0012870472855865955, - "learning_rate": 0.00019999991143115773, - "loss": 46.0, - "step": 5551 - }, - { - "epoch": 0.42448917177972745, - "grad_norm": 0.0013304775347933173, - "learning_rate": 0.0001999999113991863, - "loss": 46.0, - "step": 5552 - }, - { - "epoch": 0.42456562876311715, - "grad_norm": 0.0013518822379410267, - "learning_rate": 0.00019999991136720912, - "loss": 46.0, - "step": 5553 - }, - { - "epoch": 0.42464208574650686, - "grad_norm": 0.0010198524687439203, - "learning_rate": 0.00019999991133522616, - "loss": 46.0, - "step": 5554 - }, - { - "epoch": 0.42471854272989656, - "grad_norm": 0.005139484535902739, - "learning_rate": 0.0001999999113032374, - "loss": 46.0, - "step": 5555 - }, - { - "epoch": 0.4247949997132863, - "grad_norm": 0.0024663228541612625, - "learning_rate": 0.00019999991127124292, - "loss": 46.0, - "step": 5556 - }, - { - "epoch": 0.42487145669667603, - "grad_norm": 0.0020741941407322884, - "learning_rate": 0.00019999991123924264, - "loss": 46.0, - "step": 5557 - }, - { - "epoch": 0.42494791368006574, - "grad_norm": 0.0011090937769040465, - "learning_rate": 0.0001999999112072366, - "loss": 46.0, - "step": 5558 - }, - { - "epoch": 0.4250243706634555, - "grad_norm": 0.002045979490503669, - "learning_rate": 0.00019999991117522484, - "loss": 46.0, - "step": 5559 - }, - { - "epoch": 0.4251008276468452, - "grad_norm": 0.0011074787471443415, - "learning_rate": 0.00019999991114320724, - "loss": 46.0, - "step": 5560 - }, - { - "epoch": 0.4251772846302349, - "grad_norm": 0.0005929818144068122, - "learning_rate": 0.0001999999111111839, - "loss": 46.0, - "step": 5561 - }, - { - "epoch": 0.4252537416136246, - "grad_norm": 0.00236821873113513, - "learning_rate": 0.0001999999110791548, - "loss": 46.0, - "step": 5562 - }, - { - "epoch": 0.4253301985970144, - "grad_norm": 0.0007213192875497043, - "learning_rate": 0.00019999991104711987, - "loss": 46.0, - "step": 5563 - }, - { - "epoch": 0.4254066555804041, - "grad_norm": 0.0028703149873763323, - "learning_rate": 0.00019999991101507923, - "loss": 46.0, - "step": 5564 - }, - { - "epoch": 0.4254831125637938, - "grad_norm": 0.0013398994924500585, - "learning_rate": 0.00019999991098303282, - "loss": 46.0, - "step": 5565 - }, - { - "epoch": 0.4255595695471835, - "grad_norm": 0.002595379250124097, - "learning_rate": 0.0001999999109509806, - "loss": 46.0, - "step": 5566 - }, - { - "epoch": 0.42563602653057325, - "grad_norm": 0.000621318758931011, - "learning_rate": 0.00019999991091892264, - "loss": 46.0, - "step": 5567 - }, - { - "epoch": 0.42571248351396296, - "grad_norm": 0.0016340581933036447, - "learning_rate": 0.0001999999108868589, - "loss": 46.0, - "step": 5568 - }, - { - "epoch": 0.42578894049735266, - "grad_norm": 0.0007561444654129446, - "learning_rate": 0.00019999991085478943, - "loss": 46.0, - "step": 5569 - }, - { - "epoch": 0.4258653974807424, - "grad_norm": 0.0015946123749017715, - "learning_rate": 0.00019999991082271415, - "loss": 46.0, - "step": 5570 - }, - { - "epoch": 0.42594185446413213, - "grad_norm": 0.0008790738647803664, - "learning_rate": 0.00019999991079063312, - "loss": 46.0, - "step": 5571 - }, - { - "epoch": 0.42601831144752184, - "grad_norm": 0.00113858247641474, - "learning_rate": 0.00019999991075854632, - "loss": 46.0, - "step": 5572 - }, - { - "epoch": 0.42609476843091154, - "grad_norm": 0.0009134138817898929, - "learning_rate": 0.00019999991072645372, - "loss": 46.0, - "step": 5573 - }, - { - "epoch": 0.4261712254143013, - "grad_norm": 0.001085581025108695, - "learning_rate": 0.00019999991069435537, - "loss": 46.0, - "step": 5574 - }, - { - "epoch": 0.426247682397691, - "grad_norm": 0.0015665290411561728, - "learning_rate": 0.00019999991066225125, - "loss": 46.0, - "step": 5575 - }, - { - "epoch": 0.4263241393810807, - "grad_norm": 0.0018858853727579117, - "learning_rate": 0.00019999991063014138, - "loss": 46.0, - "step": 5576 - }, - { - "epoch": 0.4264005963644704, - "grad_norm": 0.009295439347624779, - "learning_rate": 0.0001999999105980257, - "loss": 46.0, - "step": 5577 - }, - { - "epoch": 0.4264770533478602, - "grad_norm": 0.0006506615900434554, - "learning_rate": 0.00019999991056590427, - "loss": 46.0, - "step": 5578 - }, - { - "epoch": 0.4265535103312499, - "grad_norm": 0.0015039339195936918, - "learning_rate": 0.0001999999105337771, - "loss": 46.0, - "step": 5579 - }, - { - "epoch": 0.4266299673146396, - "grad_norm": 0.0003653905587270856, - "learning_rate": 0.00019999991050164413, - "loss": 46.0, - "step": 5580 - }, - { - "epoch": 0.4267064242980293, - "grad_norm": 0.0010432812850922346, - "learning_rate": 0.0001999999104695054, - "loss": 46.0, - "step": 5581 - }, - { - "epoch": 0.42678288128141906, - "grad_norm": 0.0013864922802895308, - "learning_rate": 0.00019999991043736088, - "loss": 46.0, - "step": 5582 - }, - { - "epoch": 0.42685933826480876, - "grad_norm": 0.0008443210972473025, - "learning_rate": 0.0001999999104052106, - "loss": 46.0, - "step": 5583 - }, - { - "epoch": 0.42693579524819847, - "grad_norm": 0.0015386386075988412, - "learning_rate": 0.00019999991037305458, - "loss": 46.0, - "step": 5584 - }, - { - "epoch": 0.42701225223158823, - "grad_norm": 0.00041637918911874294, - "learning_rate": 0.00019999991034089275, - "loss": 46.0, - "step": 5585 - }, - { - "epoch": 0.42708870921497794, - "grad_norm": 0.0012116326251998544, - "learning_rate": 0.00019999991030872515, - "loss": 46.0, - "step": 5586 - }, - { - "epoch": 0.42716516619836764, - "grad_norm": 0.001534649869427085, - "learning_rate": 0.00019999991027655183, - "loss": 46.0, - "step": 5587 - }, - { - "epoch": 0.42724162318175735, - "grad_norm": 0.0006149267428554595, - "learning_rate": 0.0001999999102443727, - "loss": 46.0, - "step": 5588 - }, - { - "epoch": 0.4273180801651471, - "grad_norm": 0.0008732268470339477, - "learning_rate": 0.00019999991021218781, - "loss": 46.0, - "step": 5589 - }, - { - "epoch": 0.4273945371485368, - "grad_norm": 0.004182655829936266, - "learning_rate": 0.00019999991017999715, - "loss": 46.0, - "step": 5590 - }, - { - "epoch": 0.4274709941319265, - "grad_norm": 0.002968008164316416, - "learning_rate": 0.00019999991014780073, - "loss": 46.0, - "step": 5591 - }, - { - "epoch": 0.4275474511153162, - "grad_norm": 0.004070365335792303, - "learning_rate": 0.00019999991011559852, - "loss": 46.0, - "step": 5592 - }, - { - "epoch": 0.427623908098706, - "grad_norm": 0.002037449972704053, - "learning_rate": 0.00019999991008339056, - "loss": 46.0, - "step": 5593 - }, - { - "epoch": 0.4277003650820957, - "grad_norm": 0.0018142735352739692, - "learning_rate": 0.00019999991005117683, - "loss": 46.0, - "step": 5594 - }, - { - "epoch": 0.4277768220654854, - "grad_norm": 0.0020180039573460817, - "learning_rate": 0.0001999999100189573, - "loss": 46.0, - "step": 5595 - }, - { - "epoch": 0.4278532790488751, - "grad_norm": 0.01016896590590477, - "learning_rate": 0.00019999990998673204, - "loss": 46.0, - "step": 5596 - }, - { - "epoch": 0.42792973603226486, - "grad_norm": 0.0009558206656947732, - "learning_rate": 0.00019999990995450098, - "loss": 46.0, - "step": 5597 - }, - { - "epoch": 0.42800619301565457, - "grad_norm": 0.001294442918151617, - "learning_rate": 0.00019999990992226416, - "loss": 46.0, - "step": 5598 - }, - { - "epoch": 0.4280826499990443, - "grad_norm": 0.0005126984906382859, - "learning_rate": 0.00019999990989002158, - "loss": 46.0, - "step": 5599 - }, - { - "epoch": 0.42815910698243403, - "grad_norm": 0.012218517251312733, - "learning_rate": 0.00019999990985777324, - "loss": 46.0, - "step": 5600 - }, - { - "epoch": 0.42823556396582374, - "grad_norm": 0.0019848363008350134, - "learning_rate": 0.00019999990982551912, - "loss": 46.0, - "step": 5601 - }, - { - "epoch": 0.42831202094921345, - "grad_norm": 0.00214760797098279, - "learning_rate": 0.00019999990979325922, - "loss": 46.0, - "step": 5602 - }, - { - "epoch": 0.42838847793260315, - "grad_norm": 0.0008706836379133165, - "learning_rate": 0.00019999990976099356, - "loss": 46.0, - "step": 5603 - }, - { - "epoch": 0.4284649349159929, - "grad_norm": 0.0010077876504510641, - "learning_rate": 0.00019999990972872214, - "loss": 46.0, - "step": 5604 - }, - { - "epoch": 0.4285413918993826, - "grad_norm": 0.0022549619898200035, - "learning_rate": 0.00019999990969644493, - "loss": 46.0, - "step": 5605 - }, - { - "epoch": 0.4286178488827723, - "grad_norm": 0.0020016906782984734, - "learning_rate": 0.00019999990966416194, - "loss": 46.0, - "step": 5606 - }, - { - "epoch": 0.42869430586616203, - "grad_norm": 0.0013492833822965622, - "learning_rate": 0.0001999999096318732, - "loss": 46.0, - "step": 5607 - }, - { - "epoch": 0.4287707628495518, - "grad_norm": 0.002511871512979269, - "learning_rate": 0.00019999990959957868, - "loss": 46.0, - "step": 5608 - }, - { - "epoch": 0.4288472198329415, - "grad_norm": 0.0009711926686577499, - "learning_rate": 0.00019999990956727842, - "loss": 46.0, - "step": 5609 - }, - { - "epoch": 0.4289236768163312, - "grad_norm": 0.0008278907625935972, - "learning_rate": 0.00019999990953497237, - "loss": 46.0, - "step": 5610 - }, - { - "epoch": 0.4290001337997209, - "grad_norm": 0.000699732918292284, - "learning_rate": 0.00019999990950266057, - "loss": 46.0, - "step": 5611 - }, - { - "epoch": 0.42907659078311067, - "grad_norm": 0.0010250251507386565, - "learning_rate": 0.00019999990947034294, - "loss": 46.0, - "step": 5612 - }, - { - "epoch": 0.4291530477665004, - "grad_norm": 0.0015237688785418868, - "learning_rate": 0.0001999999094380196, - "loss": 46.0, - "step": 5613 - }, - { - "epoch": 0.4292295047498901, - "grad_norm": 0.0006184679223224521, - "learning_rate": 0.00019999990940569048, - "loss": 46.0, - "step": 5614 - }, - { - "epoch": 0.42930596173327984, - "grad_norm": 0.000717982416972518, - "learning_rate": 0.0001999999093733556, - "loss": 46.0, - "step": 5615 - }, - { - "epoch": 0.42938241871666954, - "grad_norm": 0.001517292927019298, - "learning_rate": 0.00019999990934101492, - "loss": 46.0, - "step": 5616 - }, - { - "epoch": 0.42945887570005925, - "grad_norm": 0.001716771861538291, - "learning_rate": 0.0001999999093086685, - "loss": 46.0, - "step": 5617 - }, - { - "epoch": 0.42953533268344896, - "grad_norm": 0.0008201595046557486, - "learning_rate": 0.0001999999092763163, - "loss": 46.0, - "step": 5618 - }, - { - "epoch": 0.4296117896668387, - "grad_norm": 0.002338016638532281, - "learning_rate": 0.0001999999092439583, - "loss": 46.0, - "step": 5619 - }, - { - "epoch": 0.4296882466502284, - "grad_norm": 0.0006418954581022263, - "learning_rate": 0.00019999990921159454, - "loss": 46.0, - "step": 5620 - }, - { - "epoch": 0.42976470363361813, - "grad_norm": 0.002266326919198036, - "learning_rate": 0.00019999990917922506, - "loss": 46.0, - "step": 5621 - }, - { - "epoch": 0.42984116061700783, - "grad_norm": 0.0009738274966366589, - "learning_rate": 0.00019999990914684976, - "loss": 46.0, - "step": 5622 - }, - { - "epoch": 0.4299176176003976, - "grad_norm": 0.0008395953918807209, - "learning_rate": 0.00019999990911446873, - "loss": 46.0, - "step": 5623 - }, - { - "epoch": 0.4299940745837873, - "grad_norm": 0.0032531479373574257, - "learning_rate": 0.00019999990908208188, - "loss": 46.0, - "step": 5624 - }, - { - "epoch": 0.430070531567177, - "grad_norm": 0.0042190407402813435, - "learning_rate": 0.0001999999090496893, - "loss": 46.0, - "step": 5625 - }, - { - "epoch": 0.4301469885505667, - "grad_norm": 0.0003855096292681992, - "learning_rate": 0.00019999990901729096, - "loss": 46.0, - "step": 5626 - }, - { - "epoch": 0.4302234455339565, - "grad_norm": 0.0032034313771873713, - "learning_rate": 0.00019999990898488684, - "loss": 46.0, - "step": 5627 - }, - { - "epoch": 0.4302999025173462, - "grad_norm": 0.0020397244952619076, - "learning_rate": 0.00019999990895247692, - "loss": 46.0, - "step": 5628 - }, - { - "epoch": 0.4303763595007359, - "grad_norm": 0.0011068512685596943, - "learning_rate": 0.00019999990892006125, - "loss": 46.0, - "step": 5629 - }, - { - "epoch": 0.43045281648412564, - "grad_norm": 0.002530556870624423, - "learning_rate": 0.0001999999088876398, - "loss": 46.0, - "step": 5630 - }, - { - "epoch": 0.43052927346751535, - "grad_norm": 0.0010821041651070118, - "learning_rate": 0.0001999999088552126, - "loss": 46.0, - "step": 5631 - }, - { - "epoch": 0.43060573045090506, - "grad_norm": 0.011005576699972153, - "learning_rate": 0.0001999999088227796, - "loss": 46.0, - "step": 5632 - }, - { - "epoch": 0.43068218743429476, - "grad_norm": 0.001517979078926146, - "learning_rate": 0.00019999990879034088, - "loss": 46.0, - "step": 5633 - }, - { - "epoch": 0.4307586444176845, - "grad_norm": 0.0010132138850167394, - "learning_rate": 0.00019999990875789635, - "loss": 46.0, - "step": 5634 - }, - { - "epoch": 0.4308351014010742, - "grad_norm": 0.0007832224946469069, - "learning_rate": 0.00019999990872544607, - "loss": 46.0, - "step": 5635 - }, - { - "epoch": 0.43091155838446393, - "grad_norm": 0.0015773362247273326, - "learning_rate": 0.00019999990869299002, - "loss": 46.0, - "step": 5636 - }, - { - "epoch": 0.43098801536785364, - "grad_norm": 0.0015283471439033747, - "learning_rate": 0.0001999999086605282, - "loss": 46.0, - "step": 5637 - }, - { - "epoch": 0.4310644723512434, - "grad_norm": 0.0038945076521486044, - "learning_rate": 0.00019999990862806062, - "loss": 46.0, - "step": 5638 - }, - { - "epoch": 0.4311409293346331, - "grad_norm": 0.0041750576347112656, - "learning_rate": 0.00019999990859558724, - "loss": 46.0, - "step": 5639 - }, - { - "epoch": 0.4312173863180228, - "grad_norm": 0.0019340752623975277, - "learning_rate": 0.0001999999085631081, - "loss": 46.0, - "step": 5640 - }, - { - "epoch": 0.4312938433014125, - "grad_norm": 0.002922413172200322, - "learning_rate": 0.0001999999085306232, - "loss": 46.0, - "step": 5641 - }, - { - "epoch": 0.4313703002848023, - "grad_norm": 0.0004881019704043865, - "learning_rate": 0.00019999990849813254, - "loss": 46.0, - "step": 5642 - }, - { - "epoch": 0.431446757268192, - "grad_norm": 0.0009905296610668302, - "learning_rate": 0.0001999999084656361, - "loss": 46.0, - "step": 5643 - }, - { - "epoch": 0.4315232142515817, - "grad_norm": 0.001366614829748869, - "learning_rate": 0.0001999999084331339, - "loss": 46.0, - "step": 5644 - }, - { - "epoch": 0.43159967123497145, - "grad_norm": 0.0013193455524742603, - "learning_rate": 0.0001999999084006259, - "loss": 46.0, - "step": 5645 - }, - { - "epoch": 0.43167612821836115, - "grad_norm": 0.0019613460171967745, - "learning_rate": 0.00019999990836811214, - "loss": 46.0, - "step": 5646 - }, - { - "epoch": 0.43175258520175086, - "grad_norm": 0.000924398482311517, - "learning_rate": 0.00019999990833559266, - "loss": 46.0, - "step": 5647 - }, - { - "epoch": 0.43182904218514057, - "grad_norm": 0.000894192315172404, - "learning_rate": 0.00019999990830306733, - "loss": 46.0, - "step": 5648 - }, - { - "epoch": 0.4319054991685303, - "grad_norm": 0.0017163888551294804, - "learning_rate": 0.0001999999082705363, - "loss": 46.0, - "step": 5649 - }, - { - "epoch": 0.43198195615192003, - "grad_norm": 0.0011232475517317653, - "learning_rate": 0.00019999990823799948, - "loss": 46.0, - "step": 5650 - }, - { - "epoch": 0.43205841313530974, - "grad_norm": 0.0033304686658084393, - "learning_rate": 0.00019999990820545688, - "loss": 46.0, - "step": 5651 - }, - { - "epoch": 0.43213487011869944, - "grad_norm": 0.00852080900222063, - "learning_rate": 0.0001999999081729085, - "loss": 46.0, - "step": 5652 - }, - { - "epoch": 0.4322113271020892, - "grad_norm": 0.002735903486609459, - "learning_rate": 0.0001999999081403544, - "loss": 46.0, - "step": 5653 - }, - { - "epoch": 0.4322877840854789, - "grad_norm": 0.0007669954793527722, - "learning_rate": 0.00019999990810779447, - "loss": 46.0, - "step": 5654 - }, - { - "epoch": 0.4323642410688686, - "grad_norm": 0.00026303669437766075, - "learning_rate": 0.0001999999080752288, - "loss": 46.0, - "step": 5655 - }, - { - "epoch": 0.4324406980522583, - "grad_norm": 0.0008959940169006586, - "learning_rate": 0.00019999990804265734, - "loss": 46.0, - "step": 5656 - }, - { - "epoch": 0.4325171550356481, - "grad_norm": 0.0012439782731235027, - "learning_rate": 0.00019999990801008015, - "loss": 46.0, - "step": 5657 - }, - { - "epoch": 0.4325936120190378, - "grad_norm": 0.0018033916130661964, - "learning_rate": 0.00019999990797749714, - "loss": 46.0, - "step": 5658 - }, - { - "epoch": 0.4326700690024275, - "grad_norm": 0.0007578761433251202, - "learning_rate": 0.0001999999079449084, - "loss": 46.0, - "step": 5659 - }, - { - "epoch": 0.43274652598581725, - "grad_norm": 0.002927834866568446, - "learning_rate": 0.00019999990791231387, - "loss": 46.0, - "step": 5660 - }, - { - "epoch": 0.43282298296920696, - "grad_norm": 0.0008654286502860487, - "learning_rate": 0.0001999999078797136, - "loss": 46.0, - "step": 5661 - }, - { - "epoch": 0.43289943995259667, - "grad_norm": 0.003591811517253518, - "learning_rate": 0.00019999990784710754, - "loss": 46.0, - "step": 5662 - }, - { - "epoch": 0.43297589693598637, - "grad_norm": 0.00880864355713129, - "learning_rate": 0.00019999990781449572, - "loss": 46.0, - "step": 5663 - }, - { - "epoch": 0.43305235391937613, - "grad_norm": 0.0011075816582888365, - "learning_rate": 0.00019999990778187812, - "loss": 46.0, - "step": 5664 - }, - { - "epoch": 0.43312881090276584, - "grad_norm": 0.0003506146022118628, - "learning_rate": 0.00019999990774925475, - "loss": 46.0, - "step": 5665 - }, - { - "epoch": 0.43320526788615554, - "grad_norm": 0.0024543479084968567, - "learning_rate": 0.0001999999077166256, - "loss": 46.0, - "step": 5666 - }, - { - "epoch": 0.43328172486954525, - "grad_norm": 0.0023912524338811636, - "learning_rate": 0.00019999990768399068, - "loss": 46.0, - "step": 5667 - }, - { - "epoch": 0.433358181852935, - "grad_norm": 0.003390712197870016, - "learning_rate": 0.00019999990765135, - "loss": 46.0, - "step": 5668 - }, - { - "epoch": 0.4334346388363247, - "grad_norm": 0.0027496919501572847, - "learning_rate": 0.00019999990761870358, - "loss": 46.0, - "step": 5669 - }, - { - "epoch": 0.4335110958197144, - "grad_norm": 0.0011278516612946987, - "learning_rate": 0.00019999990758605134, - "loss": 46.0, - "step": 5670 - }, - { - "epoch": 0.4335875528031041, - "grad_norm": 0.0019641281105577946, - "learning_rate": 0.00019999990755339338, - "loss": 46.0, - "step": 5671 - }, - { - "epoch": 0.4336640097864939, - "grad_norm": 0.015833742916584015, - "learning_rate": 0.00019999990752072962, - "loss": 46.0, - "step": 5672 - }, - { - "epoch": 0.4337404667698836, - "grad_norm": 0.0010731312213465571, - "learning_rate": 0.0001999999074880601, - "loss": 46.0, - "step": 5673 - }, - { - "epoch": 0.4338169237532733, - "grad_norm": 0.0010447646491229534, - "learning_rate": 0.00019999990745538478, - "loss": 46.0, - "step": 5674 - }, - { - "epoch": 0.43389338073666306, - "grad_norm": 0.005253052804619074, - "learning_rate": 0.00019999990742270373, - "loss": 46.0, - "step": 5675 - }, - { - "epoch": 0.43396983772005276, - "grad_norm": 0.003013312816619873, - "learning_rate": 0.0001999999073900169, - "loss": 46.0, - "step": 5676 - }, - { - "epoch": 0.43404629470344247, - "grad_norm": 0.003034434514120221, - "learning_rate": 0.0001999999073573243, - "loss": 46.0, - "step": 5677 - }, - { - "epoch": 0.4341227516868322, - "grad_norm": 0.0014418066712096334, - "learning_rate": 0.00019999990732462593, - "loss": 46.0, - "step": 5678 - }, - { - "epoch": 0.43419920867022194, - "grad_norm": 0.003474494442343712, - "learning_rate": 0.00019999990729192176, - "loss": 46.0, - "step": 5679 - }, - { - "epoch": 0.43427566565361164, - "grad_norm": 0.0027134548872709274, - "learning_rate": 0.00019999990725921187, - "loss": 46.0, - "step": 5680 - }, - { - "epoch": 0.43435212263700135, - "grad_norm": 0.0006129696266725659, - "learning_rate": 0.0001999999072264962, - "loss": 46.0, - "step": 5681 - }, - { - "epoch": 0.43442857962039105, - "grad_norm": 0.0012946901842951775, - "learning_rate": 0.00019999990719377474, - "loss": 46.0, - "step": 5682 - }, - { - "epoch": 0.4345050366037808, - "grad_norm": 0.0019250615732744336, - "learning_rate": 0.00019999990716104753, - "loss": 46.0, - "step": 5683 - }, - { - "epoch": 0.4345814935871705, - "grad_norm": 0.004271842073649168, - "learning_rate": 0.00019999990712831454, - "loss": 46.0, - "step": 5684 - }, - { - "epoch": 0.4346579505705602, - "grad_norm": 0.000979638542048633, - "learning_rate": 0.00019999990709557576, - "loss": 46.0, - "step": 5685 - }, - { - "epoch": 0.43473440755395, - "grad_norm": 0.0004195354413241148, - "learning_rate": 0.00019999990706283125, - "loss": 46.0, - "step": 5686 - }, - { - "epoch": 0.4348108645373397, - "grad_norm": 0.0006629640120081604, - "learning_rate": 0.00019999990703008095, - "loss": 46.0, - "step": 5687 - }, - { - "epoch": 0.4348873215207294, - "grad_norm": 0.002123679965734482, - "learning_rate": 0.0001999999069973249, - "loss": 46.0, - "step": 5688 - }, - { - "epoch": 0.4349637785041191, - "grad_norm": 0.0007858469034545124, - "learning_rate": 0.00019999990696456304, - "loss": 46.0, - "step": 5689 - }, - { - "epoch": 0.43504023548750886, - "grad_norm": 0.0010483539663255215, - "learning_rate": 0.00019999990693179545, - "loss": 46.0, - "step": 5690 - }, - { - "epoch": 0.43511669247089857, - "grad_norm": 0.0009564394131302834, - "learning_rate": 0.00019999990689902207, - "loss": 46.0, - "step": 5691 - }, - { - "epoch": 0.4351931494542883, - "grad_norm": 0.0003780309925787151, - "learning_rate": 0.00019999990686624293, - "loss": 46.0, - "step": 5692 - }, - { - "epoch": 0.435269606437678, - "grad_norm": 0.004239376168698072, - "learning_rate": 0.000199999906833458, - "loss": 46.0, - "step": 5693 - }, - { - "epoch": 0.43534606342106774, - "grad_norm": 0.0011086431331932545, - "learning_rate": 0.00019999990680066735, - "loss": 46.0, - "step": 5694 - }, - { - "epoch": 0.43542252040445745, - "grad_norm": 0.0013577269855886698, - "learning_rate": 0.00019999990676787088, - "loss": 46.0, - "step": 5695 - }, - { - "epoch": 0.43549897738784715, - "grad_norm": 0.000948662287555635, - "learning_rate": 0.00019999990673506867, - "loss": 46.0, - "step": 5696 - }, - { - "epoch": 0.43557543437123686, - "grad_norm": 0.0005964995361864567, - "learning_rate": 0.00019999990670226066, - "loss": 46.0, - "step": 5697 - }, - { - "epoch": 0.4356518913546266, - "grad_norm": 0.0008766123792156577, - "learning_rate": 0.0001999999066694469, - "loss": 46.0, - "step": 5698 - }, - { - "epoch": 0.4357283483380163, - "grad_norm": 0.0029147870372980833, - "learning_rate": 0.00019999990663662737, - "loss": 46.0, - "step": 5699 - }, - { - "epoch": 0.43580480532140603, - "grad_norm": 0.0014968147734180093, - "learning_rate": 0.0001999999066038021, - "loss": 46.0, - "step": 5700 - }, - { - "epoch": 0.4358812623047958, - "grad_norm": 0.002592020435258746, - "learning_rate": 0.000199999906570971, - "loss": 46.0, - "step": 5701 - }, - { - "epoch": 0.4359577192881855, - "grad_norm": 0.0006876286934129894, - "learning_rate": 0.00019999990653813416, - "loss": 46.0, - "step": 5702 - }, - { - "epoch": 0.4360341762715752, - "grad_norm": 0.0011866152053698897, - "learning_rate": 0.00019999990650529156, - "loss": 46.0, - "step": 5703 - }, - { - "epoch": 0.4361106332549649, - "grad_norm": 0.0007853118004277349, - "learning_rate": 0.0001999999064724432, - "loss": 46.0, - "step": 5704 - }, - { - "epoch": 0.43618709023835467, - "grad_norm": 0.0013021667255088687, - "learning_rate": 0.00019999990643958904, - "loss": 46.0, - "step": 5705 - }, - { - "epoch": 0.4362635472217444, - "grad_norm": 0.0014558027032762766, - "learning_rate": 0.00019999990640672915, - "loss": 46.0, - "step": 5706 - }, - { - "epoch": 0.4363400042051341, - "grad_norm": 0.0006471550441347063, - "learning_rate": 0.00019999990637386344, - "loss": 46.0, - "step": 5707 - }, - { - "epoch": 0.4364164611885238, - "grad_norm": 0.0007654508226551116, - "learning_rate": 0.000199999906340992, - "loss": 46.0, - "step": 5708 - }, - { - "epoch": 0.43649291817191355, - "grad_norm": 0.0010116776684299111, - "learning_rate": 0.00019999990630811476, - "loss": 46.0, - "step": 5709 - }, - { - "epoch": 0.43656937515530325, - "grad_norm": 0.0016404909547418356, - "learning_rate": 0.00019999990627523175, - "loss": 46.0, - "step": 5710 - }, - { - "epoch": 0.43664583213869296, - "grad_norm": 0.0008823841926641762, - "learning_rate": 0.000199999906242343, - "loss": 46.0, - "step": 5711 - }, - { - "epoch": 0.43672228912208266, - "grad_norm": 0.0008995257667265832, - "learning_rate": 0.00019999990620944846, - "loss": 46.0, - "step": 5712 - }, - { - "epoch": 0.4367987461054724, - "grad_norm": 0.001308804377913475, - "learning_rate": 0.00019999990617654815, - "loss": 46.0, - "step": 5713 - }, - { - "epoch": 0.43687520308886213, - "grad_norm": 0.0011804414680227637, - "learning_rate": 0.0001999999061436421, - "loss": 46.0, - "step": 5714 - }, - { - "epoch": 0.43695166007225184, - "grad_norm": 0.002129241591319442, - "learning_rate": 0.00019999990611073028, - "loss": 46.0, - "step": 5715 - }, - { - "epoch": 0.4370281170556416, - "grad_norm": 0.0037376261316239834, - "learning_rate": 0.00019999990607781265, - "loss": 46.0, - "step": 5716 - }, - { - "epoch": 0.4371045740390313, - "grad_norm": 0.0027529390063136816, - "learning_rate": 0.00019999990604488926, - "loss": 46.0, - "step": 5717 - }, - { - "epoch": 0.437181031022421, - "grad_norm": 0.0007344126934185624, - "learning_rate": 0.00019999990601196014, - "loss": 46.0, - "step": 5718 - }, - { - "epoch": 0.4372574880058107, - "grad_norm": 0.001152716693468392, - "learning_rate": 0.0001999999059790252, - "loss": 46.0, - "step": 5719 - }, - { - "epoch": 0.4373339449892005, - "grad_norm": 0.0012725250562652946, - "learning_rate": 0.0001999999059460845, - "loss": 46.0, - "step": 5720 - }, - { - "epoch": 0.4374104019725902, - "grad_norm": 0.001870090956799686, - "learning_rate": 0.00019999990591313806, - "loss": 46.0, - "step": 5721 - }, - { - "epoch": 0.4374868589559799, - "grad_norm": 0.005571620538830757, - "learning_rate": 0.00019999990588018583, - "loss": 46.0, - "step": 5722 - }, - { - "epoch": 0.4375633159393696, - "grad_norm": 0.0014280268223956227, - "learning_rate": 0.00019999990584722784, - "loss": 46.0, - "step": 5723 - }, - { - "epoch": 0.43763977292275935, - "grad_norm": 0.0005739098414778709, - "learning_rate": 0.00019999990581426406, - "loss": 46.0, - "step": 5724 - }, - { - "epoch": 0.43771622990614906, - "grad_norm": 0.0011787748662754893, - "learning_rate": 0.00019999990578129453, - "loss": 46.0, - "step": 5725 - }, - { - "epoch": 0.43779268688953876, - "grad_norm": 0.001546344137750566, - "learning_rate": 0.00019999990574831925, - "loss": 46.0, - "step": 5726 - }, - { - "epoch": 0.43786914387292847, - "grad_norm": 0.0007155445637181401, - "learning_rate": 0.00019999990571533814, - "loss": 46.0, - "step": 5727 - }, - { - "epoch": 0.43794560085631823, - "grad_norm": 0.0018179194303229451, - "learning_rate": 0.0001999999056823513, - "loss": 46.0, - "step": 5728 - }, - { - "epoch": 0.43802205783970793, - "grad_norm": 0.0032046008855104446, - "learning_rate": 0.0001999999056493587, - "loss": 46.0, - "step": 5729 - }, - { - "epoch": 0.43809851482309764, - "grad_norm": 0.0014087113086134195, - "learning_rate": 0.0001999999056163603, - "loss": 46.0, - "step": 5730 - }, - { - "epoch": 0.4381749718064874, - "grad_norm": 0.001231934642419219, - "learning_rate": 0.00019999990558335618, - "loss": 46.0, - "step": 5731 - }, - { - "epoch": 0.4382514287898771, - "grad_norm": 0.0010691469069570303, - "learning_rate": 0.0001999999055503462, - "loss": 46.0, - "step": 5732 - }, - { - "epoch": 0.4383278857732668, - "grad_norm": 0.0017734867287799716, - "learning_rate": 0.00019999990551733055, - "loss": 46.0, - "step": 5733 - }, - { - "epoch": 0.4384043427566565, - "grad_norm": 0.0005985402385704219, - "learning_rate": 0.00019999990548430908, - "loss": 46.0, - "step": 5734 - }, - { - "epoch": 0.4384807997400463, - "grad_norm": 0.0026119702961295843, - "learning_rate": 0.00019999990545128187, - "loss": 46.0, - "step": 5735 - }, - { - "epoch": 0.438557256723436, - "grad_norm": 0.001037146896123886, - "learning_rate": 0.00019999990541824886, - "loss": 46.0, - "step": 5736 - }, - { - "epoch": 0.4386337137068257, - "grad_norm": 0.0008529619663022459, - "learning_rate": 0.0001999999053852101, - "loss": 46.0, - "step": 5737 - }, - { - "epoch": 0.4387101706902154, - "grad_norm": 0.0009773393394425511, - "learning_rate": 0.00019999990535216554, - "loss": 46.0, - "step": 5738 - }, - { - "epoch": 0.43878662767360516, - "grad_norm": 0.0008442993857897818, - "learning_rate": 0.00019999990531911524, - "loss": 46.0, - "step": 5739 - }, - { - "epoch": 0.43886308465699486, - "grad_norm": 0.0020586398895829916, - "learning_rate": 0.00019999990528605916, - "loss": 46.0, - "step": 5740 - }, - { - "epoch": 0.43893954164038457, - "grad_norm": 0.0028369424398988485, - "learning_rate": 0.0001999999052529973, - "loss": 46.0, - "step": 5741 - }, - { - "epoch": 0.4390159986237743, - "grad_norm": 0.0022347168996930122, - "learning_rate": 0.0001999999052199297, - "loss": 46.0, - "step": 5742 - }, - { - "epoch": 0.43909245560716403, - "grad_norm": 0.0008101914427243173, - "learning_rate": 0.00019999990518685632, - "loss": 46.0, - "step": 5743 - }, - { - "epoch": 0.43916891259055374, - "grad_norm": 0.0004884616355411708, - "learning_rate": 0.00019999990515377715, - "loss": 46.0, - "step": 5744 - }, - { - "epoch": 0.43924536957394344, - "grad_norm": 0.0020497492514550686, - "learning_rate": 0.00019999990512069223, - "loss": 46.0, - "step": 5745 - }, - { - "epoch": 0.4393218265573332, - "grad_norm": 0.0013198519591242075, - "learning_rate": 0.00019999990508760154, - "loss": 46.0, - "step": 5746 - }, - { - "epoch": 0.4393982835407229, - "grad_norm": 0.0008747435640543699, - "learning_rate": 0.00019999990505450508, - "loss": 46.0, - "step": 5747 - }, - { - "epoch": 0.4394747405241126, - "grad_norm": 0.0019397714640945196, - "learning_rate": 0.00019999990502140284, - "loss": 46.0, - "step": 5748 - }, - { - "epoch": 0.4395511975075023, - "grad_norm": 0.0004283189191482961, - "learning_rate": 0.00019999990498829483, - "loss": 46.0, - "step": 5749 - }, - { - "epoch": 0.4396276544908921, - "grad_norm": 0.0016128912102431059, - "learning_rate": 0.00019999990495518107, - "loss": 46.0, - "step": 5750 - }, - { - "epoch": 0.4397041114742818, - "grad_norm": 0.0005147580523043871, - "learning_rate": 0.00019999990492206154, - "loss": 46.0, - "step": 5751 - }, - { - "epoch": 0.4397805684576715, - "grad_norm": 0.0009103418560698628, - "learning_rate": 0.00019999990488893624, - "loss": 46.0, - "step": 5752 - }, - { - "epoch": 0.4398570254410612, - "grad_norm": 0.0009675624896772206, - "learning_rate": 0.00019999990485580513, - "loss": 46.0, - "step": 5753 - }, - { - "epoch": 0.43993348242445096, - "grad_norm": 0.0031458481680601835, - "learning_rate": 0.0001999999048226683, - "loss": 46.0, - "step": 5754 - }, - { - "epoch": 0.44000993940784067, - "grad_norm": 0.000976794632151723, - "learning_rate": 0.00019999990478952566, - "loss": 46.0, - "step": 5755 - }, - { - "epoch": 0.44008639639123037, - "grad_norm": 0.0007982795359566808, - "learning_rate": 0.0001999999047563773, - "loss": 46.0, - "step": 5756 - }, - { - "epoch": 0.4401628533746201, - "grad_norm": 0.005540730431675911, - "learning_rate": 0.00019999990472322312, - "loss": 46.0, - "step": 5757 - }, - { - "epoch": 0.44023931035800984, - "grad_norm": 0.0014101737178862095, - "learning_rate": 0.0001999999046900632, - "loss": 46.0, - "step": 5758 - }, - { - "epoch": 0.44031576734139954, - "grad_norm": 0.001606898382306099, - "learning_rate": 0.00019999990465689748, - "loss": 46.0, - "step": 5759 - }, - { - "epoch": 0.44039222432478925, - "grad_norm": 0.0017210078658536077, - "learning_rate": 0.00019999990462372602, - "loss": 46.0, - "step": 5760 - }, - { - "epoch": 0.440468681308179, - "grad_norm": 0.0007553829345852137, - "learning_rate": 0.0001999999045905488, - "loss": 46.0, - "step": 5761 - }, - { - "epoch": 0.4405451382915687, - "grad_norm": 0.0029103581327944994, - "learning_rate": 0.0001999999045573658, - "loss": 46.0, - "step": 5762 - }, - { - "epoch": 0.4406215952749584, - "grad_norm": 0.0010250663617625833, - "learning_rate": 0.00019999990452417702, - "loss": 46.0, - "step": 5763 - }, - { - "epoch": 0.4406980522583481, - "grad_norm": 0.001137417508289218, - "learning_rate": 0.00019999990449098246, - "loss": 46.0, - "step": 5764 - }, - { - "epoch": 0.4407745092417379, - "grad_norm": 0.0021452028304338455, - "learning_rate": 0.00019999990445778215, - "loss": 46.0, - "step": 5765 - }, - { - "epoch": 0.4408509662251276, - "grad_norm": 0.0005929744220338762, - "learning_rate": 0.00019999990442457608, - "loss": 46.0, - "step": 5766 - }, - { - "epoch": 0.4409274232085173, - "grad_norm": 0.0013729935744777322, - "learning_rate": 0.00019999990439136423, - "loss": 46.0, - "step": 5767 - }, - { - "epoch": 0.441003880191907, - "grad_norm": 0.005637479946017265, - "learning_rate": 0.0001999999043581466, - "loss": 46.0, - "step": 5768 - }, - { - "epoch": 0.44108033717529677, - "grad_norm": 0.002816990949213505, - "learning_rate": 0.0001999999043249232, - "loss": 46.0, - "step": 5769 - }, - { - "epoch": 0.44115679415868647, - "grad_norm": 0.0018337401561439037, - "learning_rate": 0.00019999990429169406, - "loss": 46.0, - "step": 5770 - }, - { - "epoch": 0.4412332511420762, - "grad_norm": 0.001763531006872654, - "learning_rate": 0.00019999990425845915, - "loss": 46.0, - "step": 5771 - }, - { - "epoch": 0.4413097081254659, - "grad_norm": 0.0008495684596709907, - "learning_rate": 0.00019999990422521843, - "loss": 46.0, - "step": 5772 - }, - { - "epoch": 0.44138616510885564, - "grad_norm": 0.0013716942630708218, - "learning_rate": 0.00019999990419197197, - "loss": 46.0, - "step": 5773 - }, - { - "epoch": 0.44146262209224535, - "grad_norm": 0.00035818334436044097, - "learning_rate": 0.00019999990415871973, - "loss": 46.0, - "step": 5774 - }, - { - "epoch": 0.44153907907563505, - "grad_norm": 0.00099306704942137, - "learning_rate": 0.0001999999041254617, - "loss": 46.0, - "step": 5775 - }, - { - "epoch": 0.4416155360590248, - "grad_norm": 0.0016687854658812284, - "learning_rate": 0.00019999990409219794, - "loss": 46.0, - "step": 5776 - }, - { - "epoch": 0.4416919930424145, - "grad_norm": 0.0005627785576507449, - "learning_rate": 0.00019999990405892838, - "loss": 46.0, - "step": 5777 - }, - { - "epoch": 0.4417684500258042, - "grad_norm": 0.002996542025357485, - "learning_rate": 0.00019999990402565305, - "loss": 46.0, - "step": 5778 - }, - { - "epoch": 0.44184490700919393, - "grad_norm": 0.0002453405177220702, - "learning_rate": 0.00019999990399237197, - "loss": 46.0, - "step": 5779 - }, - { - "epoch": 0.4419213639925837, - "grad_norm": 0.0013348313514143229, - "learning_rate": 0.00019999990395908512, - "loss": 46.0, - "step": 5780 - }, - { - "epoch": 0.4419978209759734, - "grad_norm": 0.001339646871201694, - "learning_rate": 0.00019999990392579247, - "loss": 46.0, - "step": 5781 - }, - { - "epoch": 0.4420742779593631, - "grad_norm": 0.004314985126256943, - "learning_rate": 0.0001999999038924941, - "loss": 46.0, - "step": 5782 - }, - { - "epoch": 0.4421507349427528, - "grad_norm": 0.001006556092761457, - "learning_rate": 0.00019999990385918993, - "loss": 46.0, - "step": 5783 - }, - { - "epoch": 0.44222719192614257, - "grad_norm": 0.0005633236723951995, - "learning_rate": 0.00019999990382588002, - "loss": 46.0, - "step": 5784 - }, - { - "epoch": 0.4423036489095323, - "grad_norm": 0.0008139516576193273, - "learning_rate": 0.0001999999037925643, - "loss": 46.0, - "step": 5785 - }, - { - "epoch": 0.442380105892922, - "grad_norm": 0.0018054844113066792, - "learning_rate": 0.0001999999037592428, - "loss": 46.0, - "step": 5786 - }, - { - "epoch": 0.4424565628763117, - "grad_norm": 0.0005708512617275119, - "learning_rate": 0.0001999999037259156, - "loss": 46.0, - "step": 5787 - }, - { - "epoch": 0.44253301985970145, - "grad_norm": 0.0017614421667531133, - "learning_rate": 0.0001999999036925826, - "loss": 46.0, - "step": 5788 - }, - { - "epoch": 0.44260947684309115, - "grad_norm": 0.0024563679471611977, - "learning_rate": 0.00019999990365924378, - "loss": 46.0, - "step": 5789 - }, - { - "epoch": 0.44268593382648086, - "grad_norm": 0.0009294821065850556, - "learning_rate": 0.00019999990362589925, - "loss": 46.0, - "step": 5790 - }, - { - "epoch": 0.4427623908098706, - "grad_norm": 0.0014008278958499432, - "learning_rate": 0.00019999990359254892, - "loss": 46.0, - "step": 5791 - }, - { - "epoch": 0.4428388477932603, - "grad_norm": 0.0008367998525500298, - "learning_rate": 0.00019999990355919285, - "loss": 46.0, - "step": 5792 - }, - { - "epoch": 0.44291530477665003, - "grad_norm": 0.00150088791269809, - "learning_rate": 0.000199999903525831, - "loss": 46.0, - "step": 5793 - }, - { - "epoch": 0.44299176176003974, - "grad_norm": 0.0006685269763693213, - "learning_rate": 0.00019999990349246335, - "loss": 46.0, - "step": 5794 - }, - { - "epoch": 0.4430682187434295, - "grad_norm": 0.0012970648240298033, - "learning_rate": 0.00019999990345908998, - "loss": 46.0, - "step": 5795 - }, - { - "epoch": 0.4431446757268192, - "grad_norm": 0.010080215521156788, - "learning_rate": 0.0001999999034257108, - "loss": 46.0, - "step": 5796 - }, - { - "epoch": 0.4432211327102089, - "grad_norm": 0.004340726882219315, - "learning_rate": 0.00019999990339232587, - "loss": 46.0, - "step": 5797 - }, - { - "epoch": 0.4432975896935986, - "grad_norm": 0.0009696157067082822, - "learning_rate": 0.00019999990335893518, - "loss": 46.0, - "step": 5798 - }, - { - "epoch": 0.4433740466769884, - "grad_norm": 0.002490574959665537, - "learning_rate": 0.00019999990332553869, - "loss": 46.0, - "step": 5799 - }, - { - "epoch": 0.4434505036603781, - "grad_norm": 0.0014003097312524915, - "learning_rate": 0.00019999990329213642, - "loss": 46.0, - "step": 5800 - }, - { - "epoch": 0.4435269606437678, - "grad_norm": 0.001936648041009903, - "learning_rate": 0.00019999990325872844, - "loss": 46.0, - "step": 5801 - }, - { - "epoch": 0.44360341762715755, - "grad_norm": 0.0010092939483001828, - "learning_rate": 0.00019999990322531463, - "loss": 46.0, - "step": 5802 - }, - { - "epoch": 0.44367987461054725, - "grad_norm": 0.0018835609080269933, - "learning_rate": 0.0001999999031918951, - "loss": 46.0, - "step": 5803 - }, - { - "epoch": 0.44375633159393696, - "grad_norm": 0.0011895131319761276, - "learning_rate": 0.00019999990315846977, - "loss": 46.0, - "step": 5804 - }, - { - "epoch": 0.44383278857732666, - "grad_norm": 0.0028711638879030943, - "learning_rate": 0.00019999990312503867, - "loss": 46.0, - "step": 5805 - }, - { - "epoch": 0.4439092455607164, - "grad_norm": 0.001128563773818314, - "learning_rate": 0.00019999990309160182, - "loss": 46.0, - "step": 5806 - }, - { - "epoch": 0.44398570254410613, - "grad_norm": 0.0009731303434818983, - "learning_rate": 0.0001999999030581592, - "loss": 46.0, - "step": 5807 - }, - { - "epoch": 0.44406215952749584, - "grad_norm": 0.003529465990141034, - "learning_rate": 0.0001999999030247108, - "loss": 46.0, - "step": 5808 - }, - { - "epoch": 0.44413861651088554, - "grad_norm": 0.0011232653632760048, - "learning_rate": 0.0001999999029912566, - "loss": 46.0, - "step": 5809 - }, - { - "epoch": 0.4442150734942753, - "grad_norm": 0.0017714614514261484, - "learning_rate": 0.00019999990295779666, - "loss": 46.0, - "step": 5810 - }, - { - "epoch": 0.444291530477665, - "grad_norm": 0.001913544605486095, - "learning_rate": 0.00019999990292433097, - "loss": 46.0, - "step": 5811 - }, - { - "epoch": 0.4443679874610547, - "grad_norm": 0.0010140163358300924, - "learning_rate": 0.0001999999028908595, - "loss": 46.0, - "step": 5812 - }, - { - "epoch": 0.4444444444444444, - "grad_norm": 0.0013631342444568872, - "learning_rate": 0.00019999990285738225, - "loss": 46.0, - "step": 5813 - }, - { - "epoch": 0.4445209014278342, - "grad_norm": 0.0003541119513101876, - "learning_rate": 0.00019999990282389924, - "loss": 46.0, - "step": 5814 - }, - { - "epoch": 0.4445973584112239, - "grad_norm": 0.003940911497920752, - "learning_rate": 0.00019999990279041043, - "loss": 46.0, - "step": 5815 - }, - { - "epoch": 0.4446738153946136, - "grad_norm": 0.005468114744871855, - "learning_rate": 0.0001999999027569159, - "loss": 46.0, - "step": 5816 - }, - { - "epoch": 0.44475027237800335, - "grad_norm": 0.000980605836957693, - "learning_rate": 0.00019999990272341557, - "loss": 46.0, - "step": 5817 - }, - { - "epoch": 0.44482672936139306, - "grad_norm": 0.0004770248488057405, - "learning_rate": 0.00019999990268990947, - "loss": 46.0, - "step": 5818 - }, - { - "epoch": 0.44490318634478276, - "grad_norm": 0.0012241422664374113, - "learning_rate": 0.00019999990265639762, - "loss": 46.0, - "step": 5819 - }, - { - "epoch": 0.44497964332817247, - "grad_norm": 0.0010276573011651635, - "learning_rate": 0.00019999990262287997, - "loss": 46.0, - "step": 5820 - }, - { - "epoch": 0.44505610031156223, - "grad_norm": 0.002679398050531745, - "learning_rate": 0.00019999990258935657, - "loss": 46.0, - "step": 5821 - }, - { - "epoch": 0.44513255729495194, - "grad_norm": 0.0008766966057009995, - "learning_rate": 0.0001999999025558274, - "loss": 46.0, - "step": 5822 - }, - { - "epoch": 0.44520901427834164, - "grad_norm": 0.0024377433583140373, - "learning_rate": 0.00019999990252229246, - "loss": 46.0, - "step": 5823 - }, - { - "epoch": 0.44528547126173135, - "grad_norm": 0.0020526135340332985, - "learning_rate": 0.00019999990248875172, - "loss": 46.0, - "step": 5824 - }, - { - "epoch": 0.4453619282451211, - "grad_norm": 0.0018091736128553748, - "learning_rate": 0.00019999990245520529, - "loss": 46.0, - "step": 5825 - }, - { - "epoch": 0.4454383852285108, - "grad_norm": 0.0027146933134645224, - "learning_rate": 0.000199999902421653, - "loss": 46.0, - "step": 5826 - }, - { - "epoch": 0.4455148422119005, - "grad_norm": 0.0013257615501061082, - "learning_rate": 0.00019999990238809501, - "loss": 46.0, - "step": 5827 - }, - { - "epoch": 0.4455912991952902, - "grad_norm": 0.00398293137550354, - "learning_rate": 0.0001999999023545312, - "loss": 46.0, - "step": 5828 - }, - { - "epoch": 0.44566775617868, - "grad_norm": 0.0012344809947535396, - "learning_rate": 0.00019999990232096165, - "loss": 46.0, - "step": 5829 - }, - { - "epoch": 0.4457442131620697, - "grad_norm": 0.0004443094367161393, - "learning_rate": 0.00019999990228738632, - "loss": 46.0, - "step": 5830 - }, - { - "epoch": 0.4458206701454594, - "grad_norm": 0.006870817393064499, - "learning_rate": 0.00019999990225380522, - "loss": 46.0, - "step": 5831 - }, - { - "epoch": 0.44589712712884916, - "grad_norm": 0.0007931660511530936, - "learning_rate": 0.00019999990222021837, - "loss": 46.0, - "step": 5832 - }, - { - "epoch": 0.44597358411223886, - "grad_norm": 0.0028537327889353037, - "learning_rate": 0.00019999990218662572, - "loss": 46.0, - "step": 5833 - }, - { - "epoch": 0.44605004109562857, - "grad_norm": 0.00420724879950285, - "learning_rate": 0.0001999999021530273, - "loss": 46.0, - "step": 5834 - }, - { - "epoch": 0.4461264980790183, - "grad_norm": 0.0012185847153887153, - "learning_rate": 0.00019999990211942316, - "loss": 46.0, - "step": 5835 - }, - { - "epoch": 0.44620295506240804, - "grad_norm": 0.0011567919282242656, - "learning_rate": 0.00019999990208581321, - "loss": 46.0, - "step": 5836 - }, - { - "epoch": 0.44627941204579774, - "grad_norm": 0.001151342410594225, - "learning_rate": 0.00019999990205219747, - "loss": 46.0, - "step": 5837 - }, - { - "epoch": 0.44635586902918745, - "grad_norm": 0.0008848869474604726, - "learning_rate": 0.000199999902018576, - "loss": 46.0, - "step": 5838 - }, - { - "epoch": 0.44643232601257715, - "grad_norm": 0.001326793571934104, - "learning_rate": 0.00019999990198494875, - "loss": 46.0, - "step": 5839 - }, - { - "epoch": 0.4465087829959669, - "grad_norm": 0.0017756195738911629, - "learning_rate": 0.00019999990195131574, - "loss": 46.0, - "step": 5840 - }, - { - "epoch": 0.4465852399793566, - "grad_norm": 0.00062420783797279, - "learning_rate": 0.00019999990191767696, - "loss": 46.0, - "step": 5841 - }, - { - "epoch": 0.4466616969627463, - "grad_norm": 0.0010073896264657378, - "learning_rate": 0.00019999990188403237, - "loss": 46.0, - "step": 5842 - }, - { - "epoch": 0.44673815394613603, - "grad_norm": 0.002054106444120407, - "learning_rate": 0.00019999990185038205, - "loss": 46.0, - "step": 5843 - }, - { - "epoch": 0.4468146109295258, - "grad_norm": 0.0009332704939879477, - "learning_rate": 0.00019999990181672594, - "loss": 46.0, - "step": 5844 - }, - { - "epoch": 0.4468910679129155, - "grad_norm": 0.0018146971706300974, - "learning_rate": 0.0001999999017830641, - "loss": 46.0, - "step": 5845 - }, - { - "epoch": 0.4469675248963052, - "grad_norm": 0.00193692184984684, - "learning_rate": 0.00019999990174939645, - "loss": 46.0, - "step": 5846 - }, - { - "epoch": 0.44704398187969496, - "grad_norm": 0.0019344044849276543, - "learning_rate": 0.00019999990171572303, - "loss": 46.0, - "step": 5847 - }, - { - "epoch": 0.44712043886308467, - "grad_norm": 0.0016667699674144387, - "learning_rate": 0.00019999990168204386, - "loss": 46.0, - "step": 5848 - }, - { - "epoch": 0.4471968958464744, - "grad_norm": 0.0010337602579966187, - "learning_rate": 0.00019999990164835892, - "loss": 46.0, - "step": 5849 - }, - { - "epoch": 0.4472733528298641, - "grad_norm": 0.001886464306153357, - "learning_rate": 0.00019999990161466817, - "loss": 46.0, - "step": 5850 - }, - { - "epoch": 0.44734980981325384, - "grad_norm": 0.0007727887132205069, - "learning_rate": 0.0001999999015809717, - "loss": 46.0, - "step": 5851 - }, - { - "epoch": 0.44742626679664355, - "grad_norm": 0.0010142027167603374, - "learning_rate": 0.00019999990154726945, - "loss": 46.0, - "step": 5852 - }, - { - "epoch": 0.44750272378003325, - "grad_norm": 0.0023035744670778513, - "learning_rate": 0.00019999990151356141, - "loss": 46.0, - "step": 5853 - }, - { - "epoch": 0.44757918076342296, - "grad_norm": 0.0008648578659631312, - "learning_rate": 0.0001999999014798476, - "loss": 46.0, - "step": 5854 - }, - { - "epoch": 0.4476556377468127, - "grad_norm": 0.0037339525297284126, - "learning_rate": 0.00019999990144612805, - "loss": 46.0, - "step": 5855 - }, - { - "epoch": 0.4477320947302024, - "grad_norm": 0.0024114008992910385, - "learning_rate": 0.00019999990141240272, - "loss": 46.0, - "step": 5856 - }, - { - "epoch": 0.44780855171359213, - "grad_norm": 0.0029424072708934546, - "learning_rate": 0.00019999990137867162, - "loss": 46.0, - "step": 5857 - }, - { - "epoch": 0.44788500869698183, - "grad_norm": 0.0012212153524160385, - "learning_rate": 0.00019999990134493477, - "loss": 46.0, - "step": 5858 - }, - { - "epoch": 0.4479614656803716, - "grad_norm": 0.000532503763679415, - "learning_rate": 0.00019999990131119212, - "loss": 46.0, - "step": 5859 - }, - { - "epoch": 0.4480379226637613, - "grad_norm": 0.0035747734364122152, - "learning_rate": 0.0001999999012774437, - "loss": 46.0, - "step": 5860 - }, - { - "epoch": 0.448114379647151, - "grad_norm": 0.0007307583000510931, - "learning_rate": 0.00019999990124368953, - "loss": 46.0, - "step": 5861 - }, - { - "epoch": 0.44819083663054077, - "grad_norm": 0.007901514880359173, - "learning_rate": 0.00019999990120992957, - "loss": 46.0, - "step": 5862 - }, - { - "epoch": 0.4482672936139305, - "grad_norm": 0.0010505786631256342, - "learning_rate": 0.00019999990117616385, - "loss": 46.0, - "step": 5863 - }, - { - "epoch": 0.4483437505973202, - "grad_norm": 0.001217624288983643, - "learning_rate": 0.00019999990114239236, - "loss": 46.0, - "step": 5864 - }, - { - "epoch": 0.4484202075807099, - "grad_norm": 0.0005843573017045856, - "learning_rate": 0.0001999999011086151, - "loss": 46.0, - "step": 5865 - }, - { - "epoch": 0.44849666456409965, - "grad_norm": 0.0032574895303696394, - "learning_rate": 0.0001999999010748321, - "loss": 46.0, - "step": 5866 - }, - { - "epoch": 0.44857312154748935, - "grad_norm": 0.00047823303611949086, - "learning_rate": 0.00019999990104104328, - "loss": 46.0, - "step": 5867 - }, - { - "epoch": 0.44864957853087906, - "grad_norm": 0.0006631818832829595, - "learning_rate": 0.00019999990100724873, - "loss": 46.0, - "step": 5868 - }, - { - "epoch": 0.44872603551426876, - "grad_norm": 0.0012045379262417555, - "learning_rate": 0.00019999990097344838, - "loss": 46.0, - "step": 5869 - }, - { - "epoch": 0.4488024924976585, - "grad_norm": 0.0004973552422598004, - "learning_rate": 0.00019999990093964227, - "loss": 46.0, - "step": 5870 - }, - { - "epoch": 0.44887894948104823, - "grad_norm": 0.001408767537213862, - "learning_rate": 0.00019999990090583037, - "loss": 46.0, - "step": 5871 - }, - { - "epoch": 0.44895540646443793, - "grad_norm": 0.0006852729711681604, - "learning_rate": 0.00019999990087201278, - "loss": 46.0, - "step": 5872 - }, - { - "epoch": 0.44903186344782764, - "grad_norm": 0.0036148359067738056, - "learning_rate": 0.00019999990083818933, - "loss": 46.0, - "step": 5873 - }, - { - "epoch": 0.4491083204312174, - "grad_norm": 0.0009509313968010247, - "learning_rate": 0.00019999990080436016, - "loss": 46.0, - "step": 5874 - }, - { - "epoch": 0.4491847774146071, - "grad_norm": 0.001048155128955841, - "learning_rate": 0.0001999999007705252, - "loss": 46.0, - "step": 5875 - }, - { - "epoch": 0.4492612343979968, - "grad_norm": 0.00441478518769145, - "learning_rate": 0.00019999990073668448, - "loss": 46.0, - "step": 5876 - }, - { - "epoch": 0.4493376913813866, - "grad_norm": 0.0077624935656785965, - "learning_rate": 0.00019999990070283802, - "loss": 46.0, - "step": 5877 - }, - { - "epoch": 0.4494141483647763, - "grad_norm": 0.0004367143556009978, - "learning_rate": 0.00019999990066898573, - "loss": 46.0, - "step": 5878 - }, - { - "epoch": 0.449490605348166, - "grad_norm": 0.0009758782689459622, - "learning_rate": 0.0001999999006351277, - "loss": 46.0, - "step": 5879 - }, - { - "epoch": 0.4495670623315557, - "grad_norm": 0.0020039204973727465, - "learning_rate": 0.00019999990060126391, - "loss": 46.0, - "step": 5880 - }, - { - "epoch": 0.44964351931494545, - "grad_norm": 0.0015301794046536088, - "learning_rate": 0.00019999990056739433, - "loss": 46.0, - "step": 5881 - }, - { - "epoch": 0.44971997629833516, - "grad_norm": 0.0009870027424767613, - "learning_rate": 0.000199999900533519, - "loss": 46.0, - "step": 5882 - }, - { - "epoch": 0.44979643328172486, - "grad_norm": 0.002041150815784931, - "learning_rate": 0.00019999990049963788, - "loss": 46.0, - "step": 5883 - }, - { - "epoch": 0.44987289026511457, - "grad_norm": 0.0011921870755031705, - "learning_rate": 0.00019999990046575098, - "loss": 46.0, - "step": 5884 - }, - { - "epoch": 0.4499493472485043, - "grad_norm": 0.0007126278942450881, - "learning_rate": 0.00019999990043185835, - "loss": 46.0, - "step": 5885 - }, - { - "epoch": 0.45002580423189403, - "grad_norm": 0.001590652042068541, - "learning_rate": 0.00019999990039795993, - "loss": 46.0, - "step": 5886 - }, - { - "epoch": 0.45010226121528374, - "grad_norm": 0.0006485821213573217, - "learning_rate": 0.00019999990036405577, - "loss": 46.0, - "step": 5887 - }, - { - "epoch": 0.45017871819867344, - "grad_norm": 0.002779987407848239, - "learning_rate": 0.0001999999003301458, - "loss": 46.0, - "step": 5888 - }, - { - "epoch": 0.4502551751820632, - "grad_norm": 0.004117012023925781, - "learning_rate": 0.00019999990029623006, - "loss": 46.0, - "step": 5889 - }, - { - "epoch": 0.4503316321654529, - "grad_norm": 0.0006213849992491305, - "learning_rate": 0.00019999990026230857, - "loss": 46.0, - "step": 5890 - }, - { - "epoch": 0.4504080891488426, - "grad_norm": 0.0013996735215187073, - "learning_rate": 0.0001999999002283813, - "loss": 46.0, - "step": 5891 - }, - { - "epoch": 0.4504845461322324, - "grad_norm": 0.0005633053951896727, - "learning_rate": 0.00019999990019444827, - "loss": 46.0, - "step": 5892 - }, - { - "epoch": 0.4505610031156221, - "grad_norm": 0.0014910736354067922, - "learning_rate": 0.00019999990016050947, - "loss": 46.0, - "step": 5893 - }, - { - "epoch": 0.4506374600990118, - "grad_norm": 0.0011204888578504324, - "learning_rate": 0.0001999999001265649, - "loss": 46.0, - "step": 5894 - }, - { - "epoch": 0.4507139170824015, - "grad_norm": 0.0028882878832519054, - "learning_rate": 0.00019999990009261459, - "loss": 46.0, - "step": 5895 - }, - { - "epoch": 0.45079037406579126, - "grad_norm": 0.003089252393692732, - "learning_rate": 0.00019999990005865846, - "loss": 46.0, - "step": 5896 - }, - { - "epoch": 0.45086683104918096, - "grad_norm": 0.0008939317194744945, - "learning_rate": 0.00019999990002469658, - "loss": 46.0, - "step": 5897 - }, - { - "epoch": 0.45094328803257067, - "grad_norm": 0.0011388816637918353, - "learning_rate": 0.00019999989999072894, - "loss": 46.0, - "step": 5898 - }, - { - "epoch": 0.45101974501596037, - "grad_norm": 0.002678181044757366, - "learning_rate": 0.0001999998999567555, - "loss": 46.0, - "step": 5899 - }, - { - "epoch": 0.45109620199935013, - "grad_norm": 0.0006464957259595394, - "learning_rate": 0.0001999998999227763, - "loss": 46.0, - "step": 5900 - }, - { - "epoch": 0.45117265898273984, - "grad_norm": 0.001799222780391574, - "learning_rate": 0.00019999989988879135, - "loss": 46.0, - "step": 5901 - }, - { - "epoch": 0.45124911596612954, - "grad_norm": 0.007783942390233278, - "learning_rate": 0.00019999989985480064, - "loss": 46.0, - "step": 5902 - }, - { - "epoch": 0.45132557294951925, - "grad_norm": 0.001156967948190868, - "learning_rate": 0.00019999989982080412, - "loss": 46.0, - "step": 5903 - }, - { - "epoch": 0.451402029932909, - "grad_norm": 0.0018953500548377633, - "learning_rate": 0.00019999989978680186, - "loss": 46.0, - "step": 5904 - }, - { - "epoch": 0.4514784869162987, - "grad_norm": 0.0078031993471086025, - "learning_rate": 0.00019999989975279383, - "loss": 46.0, - "step": 5905 - }, - { - "epoch": 0.4515549438996884, - "grad_norm": 0.0007886604289524257, - "learning_rate": 0.00019999989971878002, - "loss": 46.0, - "step": 5906 - }, - { - "epoch": 0.4516314008830782, - "grad_norm": 0.0007624428835697472, - "learning_rate": 0.00019999989968476044, - "loss": 46.0, - "step": 5907 - }, - { - "epoch": 0.4517078578664679, - "grad_norm": 0.002622829517349601, - "learning_rate": 0.00019999989965073511, - "loss": 46.0, - "step": 5908 - }, - { - "epoch": 0.4517843148498576, - "grad_norm": 0.0009754533530212939, - "learning_rate": 0.000199999899616704, - "loss": 46.0, - "step": 5909 - }, - { - "epoch": 0.4518607718332473, - "grad_norm": 0.0009104540222324431, - "learning_rate": 0.0001999998995826671, - "loss": 46.0, - "step": 5910 - }, - { - "epoch": 0.45193722881663706, - "grad_norm": 0.0009701043018139899, - "learning_rate": 0.00019999989954862444, - "loss": 46.0, - "step": 5911 - }, - { - "epoch": 0.45201368580002677, - "grad_norm": 0.0005307946121320128, - "learning_rate": 0.00019999989951457602, - "loss": 46.0, - "step": 5912 - }, - { - "epoch": 0.45209014278341647, - "grad_norm": 0.00223224307410419, - "learning_rate": 0.00019999989948052185, - "loss": 46.0, - "step": 5913 - }, - { - "epoch": 0.4521665997668062, - "grad_norm": 0.0025332423392683268, - "learning_rate": 0.00019999989944646188, - "loss": 46.0, - "step": 5914 - }, - { - "epoch": 0.45224305675019594, - "grad_norm": 0.002023973735049367, - "learning_rate": 0.00019999989941239614, - "loss": 46.0, - "step": 5915 - }, - { - "epoch": 0.45231951373358564, - "grad_norm": 0.0026505168061703444, - "learning_rate": 0.00019999989937832463, - "loss": 46.0, - "step": 5916 - }, - { - "epoch": 0.45239597071697535, - "grad_norm": 0.005159546155482531, - "learning_rate": 0.0001999998993442474, - "loss": 46.0, - "step": 5917 - }, - { - "epoch": 0.4524724277003651, - "grad_norm": 0.001181524246931076, - "learning_rate": 0.00019999989931016434, - "loss": 46.0, - "step": 5918 - }, - { - "epoch": 0.4525488846837548, - "grad_norm": 0.002479558577761054, - "learning_rate": 0.00019999989927607553, - "loss": 46.0, - "step": 5919 - }, - { - "epoch": 0.4526253416671445, - "grad_norm": 0.001620599301531911, - "learning_rate": 0.00019999989924198095, - "loss": 46.0, - "step": 5920 - }, - { - "epoch": 0.4527017986505342, - "grad_norm": 0.0012340190587565303, - "learning_rate": 0.0001999998992078806, - "loss": 46.0, - "step": 5921 - }, - { - "epoch": 0.452778255633924, - "grad_norm": 0.0010031991405412555, - "learning_rate": 0.0001999998991737745, - "loss": 46.0, - "step": 5922 - }, - { - "epoch": 0.4528547126173137, - "grad_norm": 0.0005971941282041371, - "learning_rate": 0.0001999998991396626, - "loss": 46.0, - "step": 5923 - }, - { - "epoch": 0.4529311696007034, - "grad_norm": 0.00413657259196043, - "learning_rate": 0.00019999989910554495, - "loss": 46.0, - "step": 5924 - }, - { - "epoch": 0.4530076265840931, - "grad_norm": 0.0013001210754737258, - "learning_rate": 0.00019999989907142153, - "loss": 46.0, - "step": 5925 - }, - { - "epoch": 0.45308408356748286, - "grad_norm": 0.0014844145625829697, - "learning_rate": 0.00019999989903729233, - "loss": 46.0, - "step": 5926 - }, - { - "epoch": 0.45316054055087257, - "grad_norm": 0.0013648431049659848, - "learning_rate": 0.00019999989900315737, - "loss": 46.0, - "step": 5927 - }, - { - "epoch": 0.4532369975342623, - "grad_norm": 0.0011310586705803871, - "learning_rate": 0.00019999989896901663, - "loss": 46.0, - "step": 5928 - }, - { - "epoch": 0.453313454517652, - "grad_norm": 0.0011114954249933362, - "learning_rate": 0.0001999998989348701, - "loss": 46.0, - "step": 5929 - }, - { - "epoch": 0.45338991150104174, - "grad_norm": 0.0037474429700523615, - "learning_rate": 0.00019999989890071785, - "loss": 46.0, - "step": 5930 - }, - { - "epoch": 0.45346636848443145, - "grad_norm": 0.006916186306625605, - "learning_rate": 0.00019999989886655982, - "loss": 46.0, - "step": 5931 - }, - { - "epoch": 0.45354282546782115, - "grad_norm": 0.0017635890981182456, - "learning_rate": 0.00019999989883239598, - "loss": 46.0, - "step": 5932 - }, - { - "epoch": 0.4536192824512109, - "grad_norm": 0.0023119018878787756, - "learning_rate": 0.00019999989879822643, - "loss": 46.0, - "step": 5933 - }, - { - "epoch": 0.4536957394346006, - "grad_norm": 0.003179059596732259, - "learning_rate": 0.00019999989876405105, - "loss": 46.0, - "step": 5934 - }, - { - "epoch": 0.4537721964179903, - "grad_norm": 0.0011554177617654204, - "learning_rate": 0.00019999989872986995, - "loss": 46.0, - "step": 5935 - }, - { - "epoch": 0.45384865340138003, - "grad_norm": 0.001774788019247353, - "learning_rate": 0.00019999989869568308, - "loss": 46.0, - "step": 5936 - }, - { - "epoch": 0.4539251103847698, - "grad_norm": 0.0020505853462964296, - "learning_rate": 0.0001999998986614904, - "loss": 46.0, - "step": 5937 - }, - { - "epoch": 0.4540015673681595, - "grad_norm": 0.0008048502495512366, - "learning_rate": 0.00019999989862729196, - "loss": 46.0, - "step": 5938 - }, - { - "epoch": 0.4540780243515492, - "grad_norm": 0.0019646461587399244, - "learning_rate": 0.00019999989859308776, - "loss": 46.0, - "step": 5939 - }, - { - "epoch": 0.4541544813349389, - "grad_norm": 0.0013704269658774137, - "learning_rate": 0.0001999998985588778, - "loss": 46.0, - "step": 5940 - }, - { - "epoch": 0.45423093831832867, - "grad_norm": 0.001937161199748516, - "learning_rate": 0.00019999989852466206, - "loss": 46.0, - "step": 5941 - }, - { - "epoch": 0.4543073953017184, - "grad_norm": 0.0017611797666177154, - "learning_rate": 0.00019999989849044057, - "loss": 46.0, - "step": 5942 - }, - { - "epoch": 0.4543838522851081, - "grad_norm": 0.0006365999579429626, - "learning_rate": 0.00019999989845621328, - "loss": 46.0, - "step": 5943 - }, - { - "epoch": 0.4544603092684978, - "grad_norm": 0.001306456862948835, - "learning_rate": 0.00019999989842198022, - "loss": 46.0, - "step": 5944 - }, - { - "epoch": 0.45453676625188755, - "grad_norm": 0.0020896310452371836, - "learning_rate": 0.00019999989838774144, - "loss": 46.0, - "step": 5945 - }, - { - "epoch": 0.45461322323527725, - "grad_norm": 0.0011162115260958672, - "learning_rate": 0.00019999989835349684, - "loss": 46.0, - "step": 5946 - }, - { - "epoch": 0.45468968021866696, - "grad_norm": 0.0044331676326692104, - "learning_rate": 0.0001999998983192465, - "loss": 46.0, - "step": 5947 - }, - { - "epoch": 0.4547661372020567, - "grad_norm": 0.0015705449040979147, - "learning_rate": 0.00019999989828499038, - "loss": 46.0, - "step": 5948 - }, - { - "epoch": 0.4548425941854464, - "grad_norm": 0.0033875214867293835, - "learning_rate": 0.00019999989825072846, - "loss": 46.0, - "step": 5949 - }, - { - "epoch": 0.45491905116883613, - "grad_norm": 0.0010959822684526443, - "learning_rate": 0.0001999998982164608, - "loss": 46.0, - "step": 5950 - }, - { - "epoch": 0.45499550815222584, - "grad_norm": 0.0005235755234025419, - "learning_rate": 0.0001999998981821874, - "loss": 46.0, - "step": 5951 - }, - { - "epoch": 0.4550719651356156, - "grad_norm": 0.0021660521160811186, - "learning_rate": 0.00019999989814790817, - "loss": 46.0, - "step": 5952 - }, - { - "epoch": 0.4551484221190053, - "grad_norm": 0.01906907930970192, - "learning_rate": 0.0001999998981136232, - "loss": 46.0, - "step": 5953 - }, - { - "epoch": 0.455224879102395, - "grad_norm": 0.0008477027877233922, - "learning_rate": 0.0001999998980793325, - "loss": 46.0, - "step": 5954 - }, - { - "epoch": 0.4553013360857847, - "grad_norm": 0.0014201845042407513, - "learning_rate": 0.00019999989804503598, - "loss": 46.0, - "step": 5955 - }, - { - "epoch": 0.4553777930691745, - "grad_norm": 0.0007048692204989493, - "learning_rate": 0.0001999998980107337, - "loss": 46.0, - "step": 5956 - }, - { - "epoch": 0.4554542500525642, - "grad_norm": 0.0034078089520335197, - "learning_rate": 0.00019999989797642563, - "loss": 46.0, - "step": 5957 - }, - { - "epoch": 0.4555307070359539, - "grad_norm": 0.0017970558255910873, - "learning_rate": 0.00019999989794211183, - "loss": 46.0, - "step": 5958 - }, - { - "epoch": 0.4556071640193436, - "grad_norm": 0.00046388880582526326, - "learning_rate": 0.00019999989790779225, - "loss": 46.0, - "step": 5959 - }, - { - "epoch": 0.45568362100273335, - "grad_norm": 0.000561626220587641, - "learning_rate": 0.0001999998978734669, - "loss": 46.0, - "step": 5960 - }, - { - "epoch": 0.45576007798612306, - "grad_norm": 0.0004251794016454369, - "learning_rate": 0.00019999989783913574, - "loss": 46.0, - "step": 5961 - }, - { - "epoch": 0.45583653496951276, - "grad_norm": 0.0011329480912536383, - "learning_rate": 0.00019999989780479887, - "loss": 46.0, - "step": 5962 - }, - { - "epoch": 0.4559129919529025, - "grad_norm": 0.00039562658639624715, - "learning_rate": 0.0001999998977704562, - "loss": 46.0, - "step": 5963 - }, - { - "epoch": 0.45598944893629223, - "grad_norm": 0.004520610440522432, - "learning_rate": 0.00019999989773610778, - "loss": 46.0, - "step": 5964 - }, - { - "epoch": 0.45606590591968194, - "grad_norm": 0.001086476375348866, - "learning_rate": 0.00019999989770175356, - "loss": 46.0, - "step": 5965 - }, - { - "epoch": 0.45614236290307164, - "grad_norm": 0.0010470702545717359, - "learning_rate": 0.00019999989766739356, - "loss": 46.0, - "step": 5966 - }, - { - "epoch": 0.4562188198864614, - "grad_norm": 0.0009131265105679631, - "learning_rate": 0.00019999989763302788, - "loss": 46.0, - "step": 5967 - }, - { - "epoch": 0.4562952768698511, - "grad_norm": 0.002551504410803318, - "learning_rate": 0.00019999989759865634, - "loss": 46.0, - "step": 5968 - }, - { - "epoch": 0.4563717338532408, - "grad_norm": 0.000417816627305001, - "learning_rate": 0.00019999989756427905, - "loss": 46.0, - "step": 5969 - }, - { - "epoch": 0.4564481908366305, - "grad_norm": 0.003151189535856247, - "learning_rate": 0.00019999989752989602, - "loss": 46.0, - "step": 5970 - }, - { - "epoch": 0.4565246478200203, - "grad_norm": 0.0010885881492868066, - "learning_rate": 0.0001999998974955072, - "loss": 46.0, - "step": 5971 - }, - { - "epoch": 0.45660110480341, - "grad_norm": 0.00210529169999063, - "learning_rate": 0.00019999989746111258, - "loss": 46.0, - "step": 5972 - }, - { - "epoch": 0.4566775617867997, - "grad_norm": 0.0015237333718687296, - "learning_rate": 0.00019999989742671223, - "loss": 46.0, - "step": 5973 - }, - { - "epoch": 0.4567540187701894, - "grad_norm": 0.0024765983689576387, - "learning_rate": 0.0001999998973923061, - "loss": 46.0, - "step": 5974 - }, - { - "epoch": 0.45683047575357916, - "grad_norm": 0.0013245424488559365, - "learning_rate": 0.00019999989735789423, - "loss": 46.0, - "step": 5975 - }, - { - "epoch": 0.45690693273696886, - "grad_norm": 0.0010874953586608171, - "learning_rate": 0.00019999989732347656, - "loss": 46.0, - "step": 5976 - }, - { - "epoch": 0.45698338972035857, - "grad_norm": 0.00095972022973001, - "learning_rate": 0.0001999998972890531, - "loss": 46.0, - "step": 5977 - }, - { - "epoch": 0.45705984670374833, - "grad_norm": 0.000726853497326374, - "learning_rate": 0.0001999998972546239, - "loss": 46.0, - "step": 5978 - }, - { - "epoch": 0.45713630368713803, - "grad_norm": 0.0010844114003702998, - "learning_rate": 0.00019999989722018893, - "loss": 46.0, - "step": 5979 - }, - { - "epoch": 0.45721276067052774, - "grad_norm": 0.0014699650928378105, - "learning_rate": 0.0001999998971857482, - "loss": 46.0, - "step": 5980 - }, - { - "epoch": 0.45728921765391745, - "grad_norm": 0.0004494552849791944, - "learning_rate": 0.00019999989715130167, - "loss": 46.0, - "step": 5981 - }, - { - "epoch": 0.4573656746373072, - "grad_norm": 0.001418429659679532, - "learning_rate": 0.0001999998971168494, - "loss": 46.0, - "step": 5982 - }, - { - "epoch": 0.4574421316206969, - "grad_norm": 0.0006162348436191678, - "learning_rate": 0.00019999989708239133, - "loss": 46.0, - "step": 5983 - }, - { - "epoch": 0.4575185886040866, - "grad_norm": 0.001832794165238738, - "learning_rate": 0.0001999998970479275, - "loss": 46.0, - "step": 5984 - }, - { - "epoch": 0.4575950455874763, - "grad_norm": 0.0024114756379276514, - "learning_rate": 0.00019999989701345792, - "loss": 46.0, - "step": 5985 - }, - { - "epoch": 0.4576715025708661, - "grad_norm": 0.0015525853959843516, - "learning_rate": 0.00019999989697898257, - "loss": 46.0, - "step": 5986 - }, - { - "epoch": 0.4577479595542558, - "grad_norm": 0.0010440442711114883, - "learning_rate": 0.00019999989694450144, - "loss": 46.0, - "step": 5987 - }, - { - "epoch": 0.4578244165376455, - "grad_norm": 0.002270434284582734, - "learning_rate": 0.00019999989691001454, - "loss": 46.0, - "step": 5988 - }, - { - "epoch": 0.4579008735210352, - "grad_norm": 0.0013988602440804243, - "learning_rate": 0.00019999989687552184, - "loss": 46.0, - "step": 5989 - }, - { - "epoch": 0.45797733050442496, - "grad_norm": 0.001036861795000732, - "learning_rate": 0.0001999998968410234, - "loss": 46.0, - "step": 5990 - }, - { - "epoch": 0.45805378748781467, - "grad_norm": 0.000586047419346869, - "learning_rate": 0.0001999998968065192, - "loss": 46.0, - "step": 5991 - }, - { - "epoch": 0.4581302444712044, - "grad_norm": 0.002678936580196023, - "learning_rate": 0.00019999989677200924, - "loss": 46.0, - "step": 5992 - }, - { - "epoch": 0.45820670145459413, - "grad_norm": 0.0021975308191031218, - "learning_rate": 0.0001999998967374935, - "loss": 46.0, - "step": 5993 - }, - { - "epoch": 0.45828315843798384, - "grad_norm": 0.00013177323853597045, - "learning_rate": 0.00019999989670297196, - "loss": 46.0, - "step": 5994 - }, - { - "epoch": 0.45835961542137355, - "grad_norm": 0.001314301509410143, - "learning_rate": 0.00019999989666844467, - "loss": 46.0, - "step": 5995 - }, - { - "epoch": 0.45843607240476325, - "grad_norm": 0.0028579598292708397, - "learning_rate": 0.00019999989663391162, - "loss": 46.0, - "step": 5996 - }, - { - "epoch": 0.458512529388153, - "grad_norm": 0.0021432386711239815, - "learning_rate": 0.00019999989659937278, - "loss": 46.0, - "step": 5997 - }, - { - "epoch": 0.4585889863715427, - "grad_norm": 0.0007212893106043339, - "learning_rate": 0.0001999998965648282, - "loss": 46.0, - "step": 5998 - }, - { - "epoch": 0.4586654433549324, - "grad_norm": 0.002305446658283472, - "learning_rate": 0.00019999989653027785, - "loss": 46.0, - "step": 5999 - }, - { - "epoch": 0.45874190033832213, - "grad_norm": 0.003508553374558687, - "learning_rate": 0.0001999998964957217, - "loss": 46.0, - "step": 6000 - }, - { - "epoch": 0.4588183573217119, - "grad_norm": 0.0005807048873975873, - "learning_rate": 0.0001999998964611598, - "loss": 46.0, - "step": 6001 - }, - { - "epoch": 0.4588948143051016, - "grad_norm": 0.0016505331732332706, - "learning_rate": 0.0001999998964265921, - "loss": 46.0, - "step": 6002 - }, - { - "epoch": 0.4589712712884913, - "grad_norm": 0.0007950190338306129, - "learning_rate": 0.00019999989639201868, - "loss": 46.0, - "step": 6003 - }, - { - "epoch": 0.459047728271881, - "grad_norm": 0.000992244342342019, - "learning_rate": 0.00019999989635743947, - "loss": 46.0, - "step": 6004 - }, - { - "epoch": 0.45912418525527077, - "grad_norm": 0.002171586500480771, - "learning_rate": 0.0001999998963228545, - "loss": 46.0, - "step": 6005 - }, - { - "epoch": 0.4592006422386605, - "grad_norm": 0.0018271973822265863, - "learning_rate": 0.00019999989628826373, - "loss": 46.0, - "step": 6006 - }, - { - "epoch": 0.4592770992220502, - "grad_norm": 0.005714506842195988, - "learning_rate": 0.00019999989625366722, - "loss": 46.0, - "step": 6007 - }, - { - "epoch": 0.45935355620543994, - "grad_norm": 0.0015390970511361957, - "learning_rate": 0.0001999998962190649, - "loss": 46.0, - "step": 6008 - }, - { - "epoch": 0.45943001318882964, - "grad_norm": 0.002417241455987096, - "learning_rate": 0.00019999989618445688, - "loss": 46.0, - "step": 6009 - }, - { - "epoch": 0.45950647017221935, - "grad_norm": 0.004362266045063734, - "learning_rate": 0.00019999989614984305, - "loss": 46.0, - "step": 6010 - }, - { - "epoch": 0.45958292715560906, - "grad_norm": 0.0005166534683667123, - "learning_rate": 0.00019999989611522344, - "loss": 46.0, - "step": 6011 - }, - { - "epoch": 0.4596593841389988, - "grad_norm": 0.00039660531911067665, - "learning_rate": 0.0001999998960805981, - "loss": 46.0, - "step": 6012 - }, - { - "epoch": 0.4597358411223885, - "grad_norm": 0.0018095995765179396, - "learning_rate": 0.00019999989604596694, - "loss": 46.0, - "step": 6013 - }, - { - "epoch": 0.4598122981057782, - "grad_norm": 0.0007541960221715271, - "learning_rate": 0.00019999989601133, - "loss": 46.0, - "step": 6014 - }, - { - "epoch": 0.45988875508916793, - "grad_norm": 0.0006454598042182624, - "learning_rate": 0.00019999989597668737, - "loss": 46.0, - "step": 6015 - }, - { - "epoch": 0.4599652120725577, - "grad_norm": 0.006079236976802349, - "learning_rate": 0.00019999989594203892, - "loss": 46.0, - "step": 6016 - }, - { - "epoch": 0.4600416690559474, - "grad_norm": 0.0026544760912656784, - "learning_rate": 0.0001999998959073847, - "loss": 46.0, - "step": 6017 - }, - { - "epoch": 0.4601181260393371, - "grad_norm": 0.0006069413502700627, - "learning_rate": 0.0001999998958727247, - "loss": 46.0, - "step": 6018 - }, - { - "epoch": 0.4601945830227268, - "grad_norm": 0.0013491831487044692, - "learning_rate": 0.00019999989583805897, - "loss": 46.0, - "step": 6019 - }, - { - "epoch": 0.46027104000611657, - "grad_norm": 0.0005287861567921937, - "learning_rate": 0.00019999989580338744, - "loss": 46.0, - "step": 6020 - }, - { - "epoch": 0.4603474969895063, - "grad_norm": 0.0010659437393769622, - "learning_rate": 0.00019999989576871015, - "loss": 46.0, - "step": 6021 - }, - { - "epoch": 0.460423953972896, - "grad_norm": 0.0011204304173588753, - "learning_rate": 0.0001999998957340271, - "loss": 46.0, - "step": 6022 - }, - { - "epoch": 0.46050041095628574, - "grad_norm": 0.004781865514814854, - "learning_rate": 0.00019999989569933826, - "loss": 46.0, - "step": 6023 - }, - { - "epoch": 0.46057686793967545, - "grad_norm": 0.0009274246403947473, - "learning_rate": 0.00019999989566464366, - "loss": 46.0, - "step": 6024 - }, - { - "epoch": 0.46065332492306515, - "grad_norm": 0.002049343893304467, - "learning_rate": 0.00019999989562994328, - "loss": 46.0, - "step": 6025 - }, - { - "epoch": 0.46072978190645486, - "grad_norm": 0.0008803637465462089, - "learning_rate": 0.00019999989559523715, - "loss": 46.0, - "step": 6026 - }, - { - "epoch": 0.4608062388898446, - "grad_norm": 0.002005344955250621, - "learning_rate": 0.00019999989556052525, - "loss": 46.0, - "step": 6027 - }, - { - "epoch": 0.4608826958732343, - "grad_norm": 0.0008307746611535549, - "learning_rate": 0.00019999989552580756, - "loss": 46.0, - "step": 6028 - }, - { - "epoch": 0.46095915285662403, - "grad_norm": 0.006273068021982908, - "learning_rate": 0.0001999998954910841, - "loss": 46.0, - "step": 6029 - }, - { - "epoch": 0.46103560984001374, - "grad_norm": 0.001752551761455834, - "learning_rate": 0.0001999998954563549, - "loss": 46.0, - "step": 6030 - }, - { - "epoch": 0.4611120668234035, - "grad_norm": 0.0035552845802158117, - "learning_rate": 0.00019999989542161993, - "loss": 46.0, - "step": 6031 - }, - { - "epoch": 0.4611885238067932, - "grad_norm": 0.002459985902532935, - "learning_rate": 0.00019999989538687916, - "loss": 46.0, - "step": 6032 - }, - { - "epoch": 0.4612649807901829, - "grad_norm": 0.005091544706374407, - "learning_rate": 0.00019999989535213263, - "loss": 46.0, - "step": 6033 - }, - { - "epoch": 0.46134143777357267, - "grad_norm": 0.0005674569983966649, - "learning_rate": 0.0001999998953173803, - "loss": 46.0, - "step": 6034 - }, - { - "epoch": 0.4614178947569624, - "grad_norm": 0.0004276817780919373, - "learning_rate": 0.00019999989528262226, - "loss": 46.0, - "step": 6035 - }, - { - "epoch": 0.4614943517403521, - "grad_norm": 0.0017261023167520761, - "learning_rate": 0.00019999989524785842, - "loss": 46.0, - "step": 6036 - }, - { - "epoch": 0.4615708087237418, - "grad_norm": 0.002901071682572365, - "learning_rate": 0.00019999989521308882, - "loss": 46.0, - "step": 6037 - }, - { - "epoch": 0.46164726570713155, - "grad_norm": 0.012594775296747684, - "learning_rate": 0.00019999989517831347, - "loss": 46.0, - "step": 6038 - }, - { - "epoch": 0.46172372269052125, - "grad_norm": 0.0008829089347273111, - "learning_rate": 0.00019999989514353232, - "loss": 46.0, - "step": 6039 - }, - { - "epoch": 0.46180017967391096, - "grad_norm": 0.0010119960643351078, - "learning_rate": 0.0001999998951087454, - "loss": 46.0, - "step": 6040 - }, - { - "epoch": 0.46187663665730067, - "grad_norm": 0.0016015958972275257, - "learning_rate": 0.00019999989507395275, - "loss": 46.0, - "step": 6041 - }, - { - "epoch": 0.4619530936406904, - "grad_norm": 0.0009610798442736268, - "learning_rate": 0.00019999989503915425, - "loss": 46.0, - "step": 6042 - }, - { - "epoch": 0.46202955062408013, - "grad_norm": 0.0019293783698230982, - "learning_rate": 0.00019999989500435006, - "loss": 46.0, - "step": 6043 - }, - { - "epoch": 0.46210600760746984, - "grad_norm": 0.001704701455309987, - "learning_rate": 0.00019999989496954007, - "loss": 46.0, - "step": 6044 - }, - { - "epoch": 0.46218246459085954, - "grad_norm": 0.0010965772671625018, - "learning_rate": 0.0001999998949347243, - "loss": 46.0, - "step": 6045 - }, - { - "epoch": 0.4622589215742493, - "grad_norm": 0.0015787002630531788, - "learning_rate": 0.00019999989489990276, - "loss": 46.0, - "step": 6046 - }, - { - "epoch": 0.462335378557639, - "grad_norm": 0.0008652831893414259, - "learning_rate": 0.00019999989486507545, - "loss": 46.0, - "step": 6047 - }, - { - "epoch": 0.4624118355410287, - "grad_norm": 0.003549166489392519, - "learning_rate": 0.0001999998948302424, - "loss": 46.0, - "step": 6048 - }, - { - "epoch": 0.4624882925244185, - "grad_norm": 0.0005808480200357735, - "learning_rate": 0.00019999989479540356, - "loss": 46.0, - "step": 6049 - }, - { - "epoch": 0.4625647495078082, - "grad_norm": 0.0008718917379155755, - "learning_rate": 0.000199999894760559, - "loss": 46.0, - "step": 6050 - }, - { - "epoch": 0.4626412064911979, - "grad_norm": 0.0018762447871267796, - "learning_rate": 0.00019999989472570858, - "loss": 46.0, - "step": 6051 - }, - { - "epoch": 0.4627176634745876, - "grad_norm": 0.0008820659131743014, - "learning_rate": 0.00019999989469085246, - "loss": 46.0, - "step": 6052 - }, - { - "epoch": 0.46279412045797735, - "grad_norm": 0.0009781663538888097, - "learning_rate": 0.00019999989465599053, - "loss": 46.0, - "step": 6053 - }, - { - "epoch": 0.46287057744136706, - "grad_norm": 0.0018254210008308291, - "learning_rate": 0.00019999989462112286, - "loss": 46.0, - "step": 6054 - }, - { - "epoch": 0.46294703442475676, - "grad_norm": 0.0004859469481743872, - "learning_rate": 0.0001999998945862494, - "loss": 46.0, - "step": 6055 - }, - { - "epoch": 0.46302349140814647, - "grad_norm": 0.0009583156206645072, - "learning_rate": 0.00019999989455137017, - "loss": 46.0, - "step": 6056 - }, - { - "epoch": 0.46309994839153623, - "grad_norm": 0.0005690045072697103, - "learning_rate": 0.00019999989451648518, - "loss": 46.0, - "step": 6057 - }, - { - "epoch": 0.46317640537492594, - "grad_norm": 0.0017215729458257556, - "learning_rate": 0.00019999989448159442, - "loss": 46.0, - "step": 6058 - }, - { - "epoch": 0.46325286235831564, - "grad_norm": 0.0015063463943079114, - "learning_rate": 0.00019999989444669788, - "loss": 46.0, - "step": 6059 - }, - { - "epoch": 0.46332931934170535, - "grad_norm": 0.0025609892327338457, - "learning_rate": 0.0001999998944117956, - "loss": 46.0, - "step": 6060 - }, - { - "epoch": 0.4634057763250951, - "grad_norm": 0.0013538898201659322, - "learning_rate": 0.00019999989437688754, - "loss": 46.0, - "step": 6061 - }, - { - "epoch": 0.4634822333084848, - "grad_norm": 0.016068175435066223, - "learning_rate": 0.00019999989434197368, - "loss": 46.0, - "step": 6062 - }, - { - "epoch": 0.4635586902918745, - "grad_norm": 0.003152010263875127, - "learning_rate": 0.00019999989430705405, - "loss": 46.0, - "step": 6063 - }, - { - "epoch": 0.4636351472752643, - "grad_norm": 0.0006886878982186317, - "learning_rate": 0.00019999989427212867, - "loss": 46.0, - "step": 6064 - }, - { - "epoch": 0.463711604258654, - "grad_norm": 0.0049661388620734215, - "learning_rate": 0.00019999989423719755, - "loss": 46.0, - "step": 6065 - }, - { - "epoch": 0.4637880612420437, - "grad_norm": 0.0017762784846127033, - "learning_rate": 0.00019999989420226063, - "loss": 46.0, - "step": 6066 - }, - { - "epoch": 0.4638645182254334, - "grad_norm": 0.002183932811021805, - "learning_rate": 0.00019999989416731793, - "loss": 46.0, - "step": 6067 - }, - { - "epoch": 0.46394097520882316, - "grad_norm": 0.0007665642187930644, - "learning_rate": 0.00019999989413236948, - "loss": 46.0, - "step": 6068 - }, - { - "epoch": 0.46401743219221286, - "grad_norm": 0.0006764632416889071, - "learning_rate": 0.00019999989409741527, - "loss": 46.0, - "step": 6069 - }, - { - "epoch": 0.46409388917560257, - "grad_norm": 0.0012916539562866092, - "learning_rate": 0.00019999989406245528, - "loss": 46.0, - "step": 6070 - }, - { - "epoch": 0.4641703461589923, - "grad_norm": 0.002496344270184636, - "learning_rate": 0.0001999998940274895, - "loss": 46.0, - "step": 6071 - }, - { - "epoch": 0.46424680314238204, - "grad_norm": 0.0009445230825804174, - "learning_rate": 0.00019999989399251797, - "loss": 46.0, - "step": 6072 - }, - { - "epoch": 0.46432326012577174, - "grad_norm": 0.0006523270858451724, - "learning_rate": 0.00019999989395754066, - "loss": 46.0, - "step": 6073 - }, - { - "epoch": 0.46439971710916145, - "grad_norm": 0.0019406828796491027, - "learning_rate": 0.00019999989392255758, - "loss": 46.0, - "step": 6074 - }, - { - "epoch": 0.46447617409255115, - "grad_norm": 0.0013628193410113454, - "learning_rate": 0.00019999989388756875, - "loss": 46.0, - "step": 6075 - }, - { - "epoch": 0.4645526310759409, - "grad_norm": 0.00032049877336248755, - "learning_rate": 0.00019999989385257415, - "loss": 46.0, - "step": 6076 - }, - { - "epoch": 0.4646290880593306, - "grad_norm": 0.0016337080160155892, - "learning_rate": 0.00019999989381757374, - "loss": 46.0, - "step": 6077 - }, - { - "epoch": 0.4647055450427203, - "grad_norm": 0.0011174286482855678, - "learning_rate": 0.00019999989378256762, - "loss": 46.0, - "step": 6078 - }, - { - "epoch": 0.4647820020261101, - "grad_norm": 0.0011314835865050554, - "learning_rate": 0.00019999989374755567, - "loss": 46.0, - "step": 6079 - }, - { - "epoch": 0.4648584590094998, - "grad_norm": 0.001032881555147469, - "learning_rate": 0.00019999989371253797, - "loss": 46.0, - "step": 6080 - }, - { - "epoch": 0.4649349159928895, - "grad_norm": 0.001018154202029109, - "learning_rate": 0.00019999989367751453, - "loss": 46.0, - "step": 6081 - }, - { - "epoch": 0.4650113729762792, - "grad_norm": 0.0014510657638311386, - "learning_rate": 0.0001999998936424853, - "loss": 46.0, - "step": 6082 - }, - { - "epoch": 0.46508782995966896, - "grad_norm": 0.0013799158623442054, - "learning_rate": 0.00019999989360745032, - "loss": 46.0, - "step": 6083 - }, - { - "epoch": 0.46516428694305867, - "grad_norm": 0.0014790084678679705, - "learning_rate": 0.00019999989357240953, - "loss": 46.0, - "step": 6084 - }, - { - "epoch": 0.4652407439264484, - "grad_norm": 0.0023474215995520353, - "learning_rate": 0.000199999893537363, - "loss": 46.0, - "step": 6085 - }, - { - "epoch": 0.4653172009098381, - "grad_norm": 0.0020581530407071114, - "learning_rate": 0.00019999989350231068, - "loss": 46.0, - "step": 6086 - }, - { - "epoch": 0.46539365789322784, - "grad_norm": 0.00048755056923255324, - "learning_rate": 0.00019999989346725262, - "loss": 46.0, - "step": 6087 - }, - { - "epoch": 0.46547011487661755, - "grad_norm": 0.0004982934915460646, - "learning_rate": 0.00019999989343218877, - "loss": 46.0, - "step": 6088 - }, - { - "epoch": 0.46554657186000725, - "grad_norm": 0.00028664973797276616, - "learning_rate": 0.00019999989339711916, - "loss": 46.0, - "step": 6089 - }, - { - "epoch": 0.46562302884339696, - "grad_norm": 0.0008702285122126341, - "learning_rate": 0.00019999989336204376, - "loss": 46.0, - "step": 6090 - }, - { - "epoch": 0.4656994858267867, - "grad_norm": 0.0013627681182697415, - "learning_rate": 0.00019999989332696264, - "loss": 46.0, - "step": 6091 - }, - { - "epoch": 0.4657759428101764, - "grad_norm": 0.0028694255743175745, - "learning_rate": 0.00019999989329187569, - "loss": 46.0, - "step": 6092 - }, - { - "epoch": 0.46585239979356613, - "grad_norm": 0.0009175401064567268, - "learning_rate": 0.000199999893256783, - "loss": 46.0, - "step": 6093 - }, - { - "epoch": 0.4659288567769559, - "grad_norm": 0.0004798599402420223, - "learning_rate": 0.00019999989322168452, - "loss": 46.0, - "step": 6094 - }, - { - "epoch": 0.4660053137603456, - "grad_norm": 0.0006042023305781186, - "learning_rate": 0.0001999998931865803, - "loss": 46.0, - "step": 6095 - }, - { - "epoch": 0.4660817707437353, - "grad_norm": 0.0005490007461048663, - "learning_rate": 0.00019999989315147028, - "loss": 46.0, - "step": 6096 - }, - { - "epoch": 0.466158227727125, - "grad_norm": 0.0011405599070712924, - "learning_rate": 0.00019999989311635455, - "loss": 46.0, - "step": 6097 - }, - { - "epoch": 0.46623468471051477, - "grad_norm": 0.005511615425348282, - "learning_rate": 0.000199999893081233, - "loss": 46.0, - "step": 6098 - }, - { - "epoch": 0.4663111416939045, - "grad_norm": 0.0010426411172375083, - "learning_rate": 0.0001999998930461057, - "loss": 46.0, - "step": 6099 - }, - { - "epoch": 0.4663875986772942, - "grad_norm": 0.0018852632492780685, - "learning_rate": 0.00019999989301097262, - "loss": 46.0, - "step": 6100 - }, - { - "epoch": 0.4664640556606839, - "grad_norm": 0.0032607396133244038, - "learning_rate": 0.00019999989297583376, - "loss": 46.0, - "step": 6101 - }, - { - "epoch": 0.46654051264407365, - "grad_norm": 0.001016563386656344, - "learning_rate": 0.00019999989294068916, - "loss": 46.0, - "step": 6102 - }, - { - "epoch": 0.46661696962746335, - "grad_norm": 0.001638552756048739, - "learning_rate": 0.00019999989290553875, - "loss": 46.0, - "step": 6103 - }, - { - "epoch": 0.46669342661085306, - "grad_norm": 0.004358951468020678, - "learning_rate": 0.0001999998928703826, - "loss": 46.0, - "step": 6104 - }, - { - "epoch": 0.46676988359424276, - "grad_norm": 0.0011524200672283769, - "learning_rate": 0.00019999989283522068, - "loss": 46.0, - "step": 6105 - }, - { - "epoch": 0.4668463405776325, - "grad_norm": 0.0021133748814463615, - "learning_rate": 0.00019999989280005298, - "loss": 46.0, - "step": 6106 - }, - { - "epoch": 0.46692279756102223, - "grad_norm": 0.0026021746452897787, - "learning_rate": 0.00019999989276487951, - "loss": 46.0, - "step": 6107 - }, - { - "epoch": 0.46699925454441193, - "grad_norm": 0.0007394233834929764, - "learning_rate": 0.00019999989272970027, - "loss": 46.0, - "step": 6108 - }, - { - "epoch": 0.4670757115278017, - "grad_norm": 0.0026968824677169323, - "learning_rate": 0.00019999989269451528, - "loss": 46.0, - "step": 6109 - }, - { - "epoch": 0.4671521685111914, - "grad_norm": 0.0011821966618299484, - "learning_rate": 0.00019999989265932452, - "loss": 46.0, - "step": 6110 - }, - { - "epoch": 0.4672286254945811, - "grad_norm": 0.00032955125789158046, - "learning_rate": 0.00019999989262412795, - "loss": 46.0, - "step": 6111 - }, - { - "epoch": 0.4673050824779708, - "grad_norm": 0.001247753738425672, - "learning_rate": 0.00019999989258892567, - "loss": 46.0, - "step": 6112 - }, - { - "epoch": 0.4673815394613606, - "grad_norm": 0.00160357600543648, - "learning_rate": 0.00019999989255371756, - "loss": 46.0, - "step": 6113 - }, - { - "epoch": 0.4674579964447503, - "grad_norm": 0.0028064362704753876, - "learning_rate": 0.00019999989251850373, - "loss": 46.0, - "step": 6114 - }, - { - "epoch": 0.46753445342814, - "grad_norm": 0.0007253466756083071, - "learning_rate": 0.0001999998924832841, - "loss": 46.0, - "step": 6115 - }, - { - "epoch": 0.4676109104115297, - "grad_norm": 0.0016152405878528953, - "learning_rate": 0.0001999998924480587, - "loss": 46.0, - "step": 6116 - }, - { - "epoch": 0.46768736739491945, - "grad_norm": 0.0020839269272983074, - "learning_rate": 0.00019999989241282755, - "loss": 46.0, - "step": 6117 - }, - { - "epoch": 0.46776382437830916, - "grad_norm": 0.001287415623664856, - "learning_rate": 0.00019999989237759063, - "loss": 46.0, - "step": 6118 - }, - { - "epoch": 0.46784028136169886, - "grad_norm": 0.0011231377720832825, - "learning_rate": 0.00019999989234234793, - "loss": 46.0, - "step": 6119 - }, - { - "epoch": 0.46791673834508857, - "grad_norm": 0.001098466687835753, - "learning_rate": 0.00019999989230709946, - "loss": 46.0, - "step": 6120 - }, - { - "epoch": 0.46799319532847833, - "grad_norm": 0.0008879080414772034, - "learning_rate": 0.00019999989227184522, - "loss": 46.0, - "step": 6121 - }, - { - "epoch": 0.46806965231186803, - "grad_norm": 0.010542269796133041, - "learning_rate": 0.0001999998922365852, - "loss": 46.0, - "step": 6122 - }, - { - "epoch": 0.46814610929525774, - "grad_norm": 0.00624800892546773, - "learning_rate": 0.00019999989220131944, - "loss": 46.0, - "step": 6123 - }, - { - "epoch": 0.4682225662786475, - "grad_norm": 0.0024787664879113436, - "learning_rate": 0.00019999989216604787, - "loss": 46.0, - "step": 6124 - }, - { - "epoch": 0.4682990232620372, - "grad_norm": 0.001365824369713664, - "learning_rate": 0.00019999989213077056, - "loss": 46.0, - "step": 6125 - }, - { - "epoch": 0.4683754802454269, - "grad_norm": 0.001040915260091424, - "learning_rate": 0.00019999989209548748, - "loss": 46.0, - "step": 6126 - }, - { - "epoch": 0.4684519372288166, - "grad_norm": 0.0014266117941588163, - "learning_rate": 0.00019999989206019865, - "loss": 46.0, - "step": 6127 - }, - { - "epoch": 0.4685283942122064, - "grad_norm": 0.001384437084197998, - "learning_rate": 0.00019999989202490402, - "loss": 46.0, - "step": 6128 - }, - { - "epoch": 0.4686048511955961, - "grad_norm": 0.0007424904033541679, - "learning_rate": 0.00019999989198960362, - "loss": 46.0, - "step": 6129 - }, - { - "epoch": 0.4686813081789858, - "grad_norm": 0.004829507786780596, - "learning_rate": 0.00019999989195429744, - "loss": 46.0, - "step": 6130 - }, - { - "epoch": 0.4687577651623755, - "grad_norm": 0.0012627897085621953, - "learning_rate": 0.00019999989191898552, - "loss": 46.0, - "step": 6131 - }, - { - "epoch": 0.46883422214576526, - "grad_norm": 0.004621132742613554, - "learning_rate": 0.00019999989188366782, - "loss": 46.0, - "step": 6132 - }, - { - "epoch": 0.46891067912915496, - "grad_norm": 0.0010739917634055018, - "learning_rate": 0.00019999989184834436, - "loss": 46.0, - "step": 6133 - }, - { - "epoch": 0.46898713611254467, - "grad_norm": 0.0024414954241365194, - "learning_rate": 0.00019999989181301514, - "loss": 46.0, - "step": 6134 - }, - { - "epoch": 0.4690635930959344, - "grad_norm": 0.002693266374990344, - "learning_rate": 0.0001999998917776801, - "loss": 46.0, - "step": 6135 - }, - { - "epoch": 0.46914005007932413, - "grad_norm": 0.007640698924660683, - "learning_rate": 0.00019999989174233933, - "loss": 46.0, - "step": 6136 - }, - { - "epoch": 0.46921650706271384, - "grad_norm": 0.0008234454435296357, - "learning_rate": 0.00019999989170699277, - "loss": 46.0, - "step": 6137 - }, - { - "epoch": 0.46929296404610354, - "grad_norm": 0.0005543379229493439, - "learning_rate": 0.00019999989167164046, - "loss": 46.0, - "step": 6138 - }, - { - "epoch": 0.4693694210294933, - "grad_norm": 0.0006760939722880721, - "learning_rate": 0.00019999989163628238, - "loss": 46.0, - "step": 6139 - }, - { - "epoch": 0.469445878012883, - "grad_norm": 0.0007719618151895702, - "learning_rate": 0.00019999989160091852, - "loss": 46.0, - "step": 6140 - }, - { - "epoch": 0.4695223349962727, - "grad_norm": 0.002284866524860263, - "learning_rate": 0.0001999998915655489, - "loss": 46.0, - "step": 6141 - }, - { - "epoch": 0.4695987919796624, - "grad_norm": 0.00119221699424088, - "learning_rate": 0.00019999989153017352, - "loss": 46.0, - "step": 6142 - }, - { - "epoch": 0.4696752489630522, - "grad_norm": 0.0014594650128856301, - "learning_rate": 0.00019999989149479234, - "loss": 46.0, - "step": 6143 - }, - { - "epoch": 0.4697517059464419, - "grad_norm": 0.0007182607660070062, - "learning_rate": 0.00019999989145940542, - "loss": 46.0, - "step": 6144 - }, - { - "epoch": 0.4698281629298316, - "grad_norm": 0.008152276277542114, - "learning_rate": 0.0001999998914240127, - "loss": 46.0, - "step": 6145 - }, - { - "epoch": 0.4699046199132213, - "grad_norm": 0.0008363769738934934, - "learning_rate": 0.0001999998913886142, - "loss": 46.0, - "step": 6146 - }, - { - "epoch": 0.46998107689661106, - "grad_norm": 0.0019262407440692186, - "learning_rate": 0.00019999989135320999, - "loss": 46.0, - "step": 6147 - }, - { - "epoch": 0.47005753388000077, - "grad_norm": 0.0030531391967087984, - "learning_rate": 0.00019999989131779997, - "loss": 46.0, - "step": 6148 - }, - { - "epoch": 0.47013399086339047, - "grad_norm": 0.0013678909745067358, - "learning_rate": 0.00019999989128238418, - "loss": 46.0, - "step": 6149 - }, - { - "epoch": 0.47021044784678023, - "grad_norm": 0.0008489760803058743, - "learning_rate": 0.00019999989124696265, - "loss": 46.0, - "step": 6150 - }, - { - "epoch": 0.47028690483016994, - "grad_norm": 0.0008520630653947592, - "learning_rate": 0.00019999989121153534, - "loss": 46.0, - "step": 6151 - }, - { - "epoch": 0.47036336181355964, - "grad_norm": 0.0008328371914103627, - "learning_rate": 0.00019999989117610223, - "loss": 46.0, - "step": 6152 - }, - { - "epoch": 0.47043981879694935, - "grad_norm": 0.0006129646208137274, - "learning_rate": 0.00019999989114066334, - "loss": 46.0, - "step": 6153 - }, - { - "epoch": 0.4705162757803391, - "grad_norm": 0.003923851530998945, - "learning_rate": 0.00019999989110521872, - "loss": 46.0, - "step": 6154 - }, - { - "epoch": 0.4705927327637288, - "grad_norm": 0.0020324308425188065, - "learning_rate": 0.00019999989106976834, - "loss": 46.0, - "step": 6155 - }, - { - "epoch": 0.4706691897471185, - "grad_norm": 0.004159628413617611, - "learning_rate": 0.00019999989103431216, - "loss": 46.0, - "step": 6156 - }, - { - "epoch": 0.4707456467305082, - "grad_norm": 0.0014560986310243607, - "learning_rate": 0.00019999989099885024, - "loss": 46.0, - "step": 6157 - }, - { - "epoch": 0.470822103713898, - "grad_norm": 0.002111112466081977, - "learning_rate": 0.00019999989096338255, - "loss": 46.0, - "step": 6158 - }, - { - "epoch": 0.4708985606972877, - "grad_norm": 0.013774135150015354, - "learning_rate": 0.00019999989092790905, - "loss": 46.0, - "step": 6159 - }, - { - "epoch": 0.4709750176806774, - "grad_norm": 0.0010287988698109984, - "learning_rate": 0.0001999998908924298, - "loss": 46.0, - "step": 6160 - }, - { - "epoch": 0.4710514746640671, - "grad_norm": 0.002148106461390853, - "learning_rate": 0.0001999998908569448, - "loss": 46.0, - "step": 6161 - }, - { - "epoch": 0.47112793164745687, - "grad_norm": 0.0025398489087820053, - "learning_rate": 0.000199999890821454, - "loss": 46.0, - "step": 6162 - }, - { - "epoch": 0.47120438863084657, - "grad_norm": 0.0011332968715578318, - "learning_rate": 0.00019999989078595744, - "loss": 46.0, - "step": 6163 - }, - { - "epoch": 0.4712808456142363, - "grad_norm": 0.001397020765580237, - "learning_rate": 0.00019999989075045513, - "loss": 46.0, - "step": 6164 - }, - { - "epoch": 0.47135730259762604, - "grad_norm": 0.0066262464970350266, - "learning_rate": 0.00019999989071494705, - "loss": 46.0, - "step": 6165 - }, - { - "epoch": 0.47143375958101574, - "grad_norm": 0.0009376137750223279, - "learning_rate": 0.0001999998906794332, - "loss": 46.0, - "step": 6166 - }, - { - "epoch": 0.47151021656440545, - "grad_norm": 0.004165223799645901, - "learning_rate": 0.00019999989064391357, - "loss": 46.0, - "step": 6167 - }, - { - "epoch": 0.47158667354779515, - "grad_norm": 0.00034537186729721725, - "learning_rate": 0.00019999989060838814, - "loss": 46.0, - "step": 6168 - }, - { - "epoch": 0.4716631305311849, - "grad_norm": 0.0021305617410689592, - "learning_rate": 0.00019999989057285696, - "loss": 46.0, - "step": 6169 - }, - { - "epoch": 0.4717395875145746, - "grad_norm": 0.0006563275237567723, - "learning_rate": 0.00019999989053732001, - "loss": 46.0, - "step": 6170 - }, - { - "epoch": 0.4718160444979643, - "grad_norm": 0.0031731147319078445, - "learning_rate": 0.00019999989050177732, - "loss": 46.0, - "step": 6171 - }, - { - "epoch": 0.47189250148135403, - "grad_norm": 0.003867693245410919, - "learning_rate": 0.00019999989046622885, - "loss": 46.0, - "step": 6172 - }, - { - "epoch": 0.4719689584647438, - "grad_norm": 0.0010971968295052648, - "learning_rate": 0.0001999998904306746, - "loss": 46.0, - "step": 6173 - }, - { - "epoch": 0.4720454154481335, - "grad_norm": 0.0015497637214139104, - "learning_rate": 0.00019999989039511457, - "loss": 46.0, - "step": 6174 - }, - { - "epoch": 0.4721218724315232, - "grad_norm": 0.0017124320147559047, - "learning_rate": 0.0001999998903595488, - "loss": 46.0, - "step": 6175 - }, - { - "epoch": 0.4721983294149129, - "grad_norm": 0.000605948269367218, - "learning_rate": 0.00019999989032397724, - "loss": 46.0, - "step": 6176 - }, - { - "epoch": 0.47227478639830267, - "grad_norm": 0.0018868144834414124, - "learning_rate": 0.00019999989028839988, - "loss": 46.0, - "step": 6177 - }, - { - "epoch": 0.4723512433816924, - "grad_norm": 0.0013617558870464563, - "learning_rate": 0.0001999998902528168, - "loss": 46.0, - "step": 6178 - }, - { - "epoch": 0.4724277003650821, - "grad_norm": 0.0007929742569103837, - "learning_rate": 0.00019999989021722794, - "loss": 46.0, - "step": 6179 - }, - { - "epoch": 0.47250415734847184, - "grad_norm": 0.002169110579416156, - "learning_rate": 0.0001999998901816333, - "loss": 46.0, - "step": 6180 - }, - { - "epoch": 0.47258061433186155, - "grad_norm": 0.0012314191553741693, - "learning_rate": 0.00019999989014603291, - "loss": 46.0, - "step": 6181 - }, - { - "epoch": 0.47265707131525125, - "grad_norm": 0.0012225055834278464, - "learning_rate": 0.00019999989011042674, - "loss": 46.0, - "step": 6182 - }, - { - "epoch": 0.47273352829864096, - "grad_norm": 0.0028150416910648346, - "learning_rate": 0.0001999998900748148, - "loss": 46.0, - "step": 6183 - }, - { - "epoch": 0.4728099852820307, - "grad_norm": 0.0030481473077088594, - "learning_rate": 0.00019999989003919707, - "loss": 46.0, - "step": 6184 - }, - { - "epoch": 0.4728864422654204, - "grad_norm": 0.0008216250571422279, - "learning_rate": 0.0001999998900035736, - "loss": 46.0, - "step": 6185 - }, - { - "epoch": 0.47296289924881013, - "grad_norm": 0.0013899178011342883, - "learning_rate": 0.00019999988996794433, - "loss": 46.0, - "step": 6186 - }, - { - "epoch": 0.47303935623219984, - "grad_norm": 0.0044166324660182, - "learning_rate": 0.00019999988993230932, - "loss": 46.0, - "step": 6187 - }, - { - "epoch": 0.4731158132155896, - "grad_norm": 0.001612075255252421, - "learning_rate": 0.00019999988989666853, - "loss": 46.0, - "step": 6188 - }, - { - "epoch": 0.4731922701989793, - "grad_norm": 0.001719577587209642, - "learning_rate": 0.000199999889861022, - "loss": 46.0, - "step": 6189 - }, - { - "epoch": 0.473268727182369, - "grad_norm": 0.0013245035661384463, - "learning_rate": 0.00019999988982536966, - "loss": 46.0, - "step": 6190 - }, - { - "epoch": 0.4733451841657587, - "grad_norm": 0.0010736206313595176, - "learning_rate": 0.00019999988978971155, - "loss": 46.0, - "step": 6191 - }, - { - "epoch": 0.4734216411491485, - "grad_norm": 0.002807576907798648, - "learning_rate": 0.0001999998897540477, - "loss": 46.0, - "step": 6192 - }, - { - "epoch": 0.4734980981325382, - "grad_norm": 0.002152242697775364, - "learning_rate": 0.00019999988971837804, - "loss": 46.0, - "step": 6193 - }, - { - "epoch": 0.4735745551159279, - "grad_norm": 0.000758540176320821, - "learning_rate": 0.0001999998896827026, - "loss": 46.0, - "step": 6194 - }, - { - "epoch": 0.47365101209931765, - "grad_norm": 0.0031296033412218094, - "learning_rate": 0.00019999988964702146, - "loss": 46.0, - "step": 6195 - }, - { - "epoch": 0.47372746908270735, - "grad_norm": 0.0005724740331061184, - "learning_rate": 0.00019999988961133452, - "loss": 46.0, - "step": 6196 - }, - { - "epoch": 0.47380392606609706, - "grad_norm": 0.0023585292510688305, - "learning_rate": 0.0001999998895756418, - "loss": 46.0, - "step": 6197 - }, - { - "epoch": 0.47388038304948676, - "grad_norm": 0.0009502078755758703, - "learning_rate": 0.0001999998895399433, - "loss": 46.0, - "step": 6198 - }, - { - "epoch": 0.4739568400328765, - "grad_norm": 0.0016020425828173757, - "learning_rate": 0.00019999988950423906, - "loss": 46.0, - "step": 6199 - }, - { - "epoch": 0.47403329701626623, - "grad_norm": 0.000578279432374984, - "learning_rate": 0.00019999988946852904, - "loss": 46.0, - "step": 6200 - }, - { - "epoch": 0.47410975399965594, - "grad_norm": 0.001709281001240015, - "learning_rate": 0.00019999988943281326, - "loss": 46.0, - "step": 6201 - }, - { - "epoch": 0.47418621098304564, - "grad_norm": 0.004529502708464861, - "learning_rate": 0.0001999998893970917, - "loss": 46.0, - "step": 6202 - }, - { - "epoch": 0.4742626679664354, - "grad_norm": 0.0026438753120601177, - "learning_rate": 0.00019999988936136436, - "loss": 46.0, - "step": 6203 - }, - { - "epoch": 0.4743391249498251, - "grad_norm": 0.002564698690548539, - "learning_rate": 0.00019999988932563125, - "loss": 46.0, - "step": 6204 - }, - { - "epoch": 0.4744155819332148, - "grad_norm": 0.0011640641605481505, - "learning_rate": 0.0001999998892898924, - "loss": 46.0, - "step": 6205 - }, - { - "epoch": 0.4744920389166045, - "grad_norm": 0.01073073036968708, - "learning_rate": 0.00019999988925414774, - "loss": 46.0, - "step": 6206 - }, - { - "epoch": 0.4745684958999943, - "grad_norm": 0.0007749986834824085, - "learning_rate": 0.00019999988921839731, - "loss": 46.0, - "step": 6207 - }, - { - "epoch": 0.474644952883384, - "grad_norm": 0.0020734097342938185, - "learning_rate": 0.00019999988918264117, - "loss": 46.0, - "step": 6208 - }, - { - "epoch": 0.4747214098667737, - "grad_norm": 0.002942918334156275, - "learning_rate": 0.0001999998891468792, - "loss": 46.0, - "step": 6209 - }, - { - "epoch": 0.47479786685016345, - "grad_norm": 0.001187040819786489, - "learning_rate": 0.0001999998891111115, - "loss": 46.0, - "step": 6210 - }, - { - "epoch": 0.47487432383355316, - "grad_norm": 0.000389922788599506, - "learning_rate": 0.000199999889075338, - "loss": 46.0, - "step": 6211 - }, - { - "epoch": 0.47495078081694286, - "grad_norm": 0.0026759100146591663, - "learning_rate": 0.00019999988903955873, - "loss": 46.0, - "step": 6212 - }, - { - "epoch": 0.47502723780033257, - "grad_norm": 0.0015849982155486941, - "learning_rate": 0.00019999988900377372, - "loss": 46.0, - "step": 6213 - }, - { - "epoch": 0.47510369478372233, - "grad_norm": 0.0010976759949699044, - "learning_rate": 0.0001999998889679829, - "loss": 46.0, - "step": 6214 - }, - { - "epoch": 0.47518015176711204, - "grad_norm": 0.0004689736815635115, - "learning_rate": 0.00019999988893218634, - "loss": 46.0, - "step": 6215 - }, - { - "epoch": 0.47525660875050174, - "grad_norm": 0.000623491476289928, - "learning_rate": 0.000199999888896384, - "loss": 46.0, - "step": 6216 - }, - { - "epoch": 0.47533306573389145, - "grad_norm": 0.006456580478698015, - "learning_rate": 0.0001999998888605759, - "loss": 46.0, - "step": 6217 - }, - { - "epoch": 0.4754095227172812, - "grad_norm": 0.0010884033981710672, - "learning_rate": 0.00019999988882476205, - "loss": 46.0, - "step": 6218 - }, - { - "epoch": 0.4754859797006709, - "grad_norm": 0.0007241466082632542, - "learning_rate": 0.0001999998887889424, - "loss": 46.0, - "step": 6219 - }, - { - "epoch": 0.4755624366840606, - "grad_norm": 0.0008789958083070815, - "learning_rate": 0.000199999888753117, - "loss": 46.0, - "step": 6220 - }, - { - "epoch": 0.4756388936674503, - "grad_norm": 0.003025518264621496, - "learning_rate": 0.00019999988871728582, - "loss": 46.0, - "step": 6221 - }, - { - "epoch": 0.4757153506508401, - "grad_norm": 0.003622946795076132, - "learning_rate": 0.00019999988868144884, - "loss": 46.0, - "step": 6222 - }, - { - "epoch": 0.4757918076342298, - "grad_norm": 0.0004595489881467074, - "learning_rate": 0.00019999988864560612, - "loss": 46.0, - "step": 6223 - }, - { - "epoch": 0.4758682646176195, - "grad_norm": 0.001185062574222684, - "learning_rate": 0.00019999988860975766, - "loss": 46.0, - "step": 6224 - }, - { - "epoch": 0.47594472160100926, - "grad_norm": 0.0014367899857461452, - "learning_rate": 0.0001999998885739034, - "loss": 46.0, - "step": 6225 - }, - { - "epoch": 0.47602117858439896, - "grad_norm": 0.003645437303930521, - "learning_rate": 0.00019999988853804337, - "loss": 46.0, - "step": 6226 - }, - { - "epoch": 0.47609763556778867, - "grad_norm": 0.0038288708310574293, - "learning_rate": 0.00019999988850217756, - "loss": 46.0, - "step": 6227 - }, - { - "epoch": 0.4761740925511784, - "grad_norm": 0.0010920005152001977, - "learning_rate": 0.000199999888466306, - "loss": 46.0, - "step": 6228 - }, - { - "epoch": 0.47625054953456814, - "grad_norm": 0.0007951435400173068, - "learning_rate": 0.00019999988843042864, - "loss": 46.0, - "step": 6229 - }, - { - "epoch": 0.47632700651795784, - "grad_norm": 0.0006535447319038212, - "learning_rate": 0.00019999988839454553, - "loss": 46.0, - "step": 6230 - }, - { - "epoch": 0.47640346350134755, - "grad_norm": 0.0011695997091010213, - "learning_rate": 0.00019999988835865668, - "loss": 46.0, - "step": 6231 - }, - { - "epoch": 0.47647992048473725, - "grad_norm": 0.0009866493055596948, - "learning_rate": 0.00019999988832276202, - "loss": 46.0, - "step": 6232 - }, - { - "epoch": 0.476556377468127, - "grad_norm": 0.0006724954000674188, - "learning_rate": 0.0001999998882868616, - "loss": 46.0, - "step": 6233 - }, - { - "epoch": 0.4766328344515167, - "grad_norm": 0.0007678009569644928, - "learning_rate": 0.00019999988825095542, - "loss": 46.0, - "step": 6234 - }, - { - "epoch": 0.4767092914349064, - "grad_norm": 0.0009465903276577592, - "learning_rate": 0.00019999988821504347, - "loss": 46.0, - "step": 6235 - }, - { - "epoch": 0.47678574841829613, - "grad_norm": 0.0009502816246822476, - "learning_rate": 0.00019999988817912575, - "loss": 46.0, - "step": 6236 - }, - { - "epoch": 0.4768622054016859, - "grad_norm": 0.0016623563133180141, - "learning_rate": 0.00019999988814320226, - "loss": 46.0, - "step": 6237 - }, - { - "epoch": 0.4769386623850756, - "grad_norm": 0.0018453830853104591, - "learning_rate": 0.000199999888107273, - "loss": 46.0, - "step": 6238 - }, - { - "epoch": 0.4770151193684653, - "grad_norm": 0.0009387017344124615, - "learning_rate": 0.00019999988807133795, - "loss": 46.0, - "step": 6239 - }, - { - "epoch": 0.47709157635185506, - "grad_norm": 0.000980983255431056, - "learning_rate": 0.00019999988803539716, - "loss": 46.0, - "step": 6240 - }, - { - "epoch": 0.47716803333524477, - "grad_norm": 0.0016814226983115077, - "learning_rate": 0.0001999998879994506, - "loss": 46.0, - "step": 6241 - }, - { - "epoch": 0.4772444903186345, - "grad_norm": 0.0006290982710197568, - "learning_rate": 0.00019999988796349824, - "loss": 46.0, - "step": 6242 - }, - { - "epoch": 0.4773209473020242, - "grad_norm": 0.0026615469250828028, - "learning_rate": 0.0001999998879275401, - "loss": 46.0, - "step": 6243 - }, - { - "epoch": 0.47739740428541394, - "grad_norm": 0.0010419313330203295, - "learning_rate": 0.00019999988789157626, - "loss": 46.0, - "step": 6244 - }, - { - "epoch": 0.47747386126880365, - "grad_norm": 0.0010810858802869916, - "learning_rate": 0.0001999998878556066, - "loss": 46.0, - "step": 6245 - }, - { - "epoch": 0.47755031825219335, - "grad_norm": 0.0027360483072698116, - "learning_rate": 0.0001999998878196312, - "loss": 46.0, - "step": 6246 - }, - { - "epoch": 0.47762677523558306, - "grad_norm": 0.011292408220469952, - "learning_rate": 0.00019999988778365003, - "loss": 46.0, - "step": 6247 - }, - { - "epoch": 0.4777032322189728, - "grad_norm": 0.0015824967995285988, - "learning_rate": 0.00019999988774766305, - "loss": 46.0, - "step": 6248 - }, - { - "epoch": 0.4777796892023625, - "grad_norm": 0.0009166456875391304, - "learning_rate": 0.0001999998877116703, - "loss": 46.0, - "step": 6249 - }, - { - "epoch": 0.47785614618575223, - "grad_norm": 0.000988941639661789, - "learning_rate": 0.0001999998876756718, - "loss": 46.0, - "step": 6250 - }, - { - "epoch": 0.47793260316914193, - "grad_norm": 0.0010834858985617757, - "learning_rate": 0.00019999988763966755, - "loss": 46.0, - "step": 6251 - }, - { - "epoch": 0.4780090601525317, - "grad_norm": 0.004990748595446348, - "learning_rate": 0.0001999998876036575, - "loss": 46.0, - "step": 6252 - }, - { - "epoch": 0.4780855171359214, - "grad_norm": 0.0009165873634628952, - "learning_rate": 0.00019999988756764172, - "loss": 46.0, - "step": 6253 - }, - { - "epoch": 0.4781619741193111, - "grad_norm": 0.0022636260837316513, - "learning_rate": 0.00019999988753162016, - "loss": 46.0, - "step": 6254 - }, - { - "epoch": 0.47823843110270087, - "grad_norm": 0.003004265483468771, - "learning_rate": 0.0001999998874955928, - "loss": 46.0, - "step": 6255 - }, - { - "epoch": 0.4783148880860906, - "grad_norm": 0.002727489685639739, - "learning_rate": 0.0001999998874595597, - "loss": 46.0, - "step": 6256 - }, - { - "epoch": 0.4783913450694803, - "grad_norm": 0.004355646204203367, - "learning_rate": 0.0001999998874235208, - "loss": 46.0, - "step": 6257 - }, - { - "epoch": 0.47846780205287, - "grad_norm": 0.0008424715488217771, - "learning_rate": 0.00019999988738747616, - "loss": 46.0, - "step": 6258 - }, - { - "epoch": 0.47854425903625974, - "grad_norm": 0.0011882291873916984, - "learning_rate": 0.00019999988735142573, - "loss": 46.0, - "step": 6259 - }, - { - "epoch": 0.47862071601964945, - "grad_norm": 0.004274935927242041, - "learning_rate": 0.00019999988731536953, - "loss": 46.0, - "step": 6260 - }, - { - "epoch": 0.47869717300303916, - "grad_norm": 0.0005319232004694641, - "learning_rate": 0.00019999988727930758, - "loss": 46.0, - "step": 6261 - }, - { - "epoch": 0.47877362998642886, - "grad_norm": 0.0012364584254100919, - "learning_rate": 0.00019999988724323986, - "loss": 46.0, - "step": 6262 - }, - { - "epoch": 0.4788500869698186, - "grad_norm": 0.0007650853949598968, - "learning_rate": 0.00019999988720716637, - "loss": 46.0, - "step": 6263 - }, - { - "epoch": 0.47892654395320833, - "grad_norm": 0.0005413478938862681, - "learning_rate": 0.00019999988717108708, - "loss": 46.0, - "step": 6264 - }, - { - "epoch": 0.47900300093659803, - "grad_norm": 0.0005395942134782672, - "learning_rate": 0.00019999988713500204, - "loss": 46.0, - "step": 6265 - }, - { - "epoch": 0.4790794579199878, - "grad_norm": 0.006407381501048803, - "learning_rate": 0.00019999988709891125, - "loss": 46.0, - "step": 6266 - }, - { - "epoch": 0.4791559149033775, - "grad_norm": 0.003435243386775255, - "learning_rate": 0.00019999988706281466, - "loss": 46.0, - "step": 6267 - }, - { - "epoch": 0.4792323718867672, - "grad_norm": 0.0004913299344480038, - "learning_rate": 0.00019999988702671233, - "loss": 46.0, - "step": 6268 - }, - { - "epoch": 0.4793088288701569, - "grad_norm": 0.0004139500961173326, - "learning_rate": 0.00019999988699060422, - "loss": 46.0, - "step": 6269 - }, - { - "epoch": 0.47938528585354667, - "grad_norm": 0.0019083141814917326, - "learning_rate": 0.00019999988695449032, - "loss": 46.0, - "step": 6270 - }, - { - "epoch": 0.4794617428369364, - "grad_norm": 0.002150079468265176, - "learning_rate": 0.00019999988691837066, - "loss": 46.0, - "step": 6271 - }, - { - "epoch": 0.4795381998203261, - "grad_norm": 0.0035702658351510763, - "learning_rate": 0.00019999988688224524, - "loss": 46.0, - "step": 6272 - }, - { - "epoch": 0.4796146568037158, - "grad_norm": 0.001005249097943306, - "learning_rate": 0.00019999988684611406, - "loss": 46.0, - "step": 6273 - }, - { - "epoch": 0.47969111378710555, - "grad_norm": 0.007846593856811523, - "learning_rate": 0.0001999998868099771, - "loss": 46.0, - "step": 6274 - }, - { - "epoch": 0.47976757077049526, - "grad_norm": 0.0034158381167799234, - "learning_rate": 0.00019999988677383437, - "loss": 46.0, - "step": 6275 - }, - { - "epoch": 0.47984402775388496, - "grad_norm": 0.0012672198936343193, - "learning_rate": 0.00019999988673768585, - "loss": 46.0, - "step": 6276 - }, - { - "epoch": 0.47992048473727467, - "grad_norm": 0.0008209488005377352, - "learning_rate": 0.00019999988670153158, - "loss": 46.0, - "step": 6277 - }, - { - "epoch": 0.4799969417206644, - "grad_norm": 0.00043715073843486607, - "learning_rate": 0.00019999988666537154, - "loss": 46.0, - "step": 6278 - }, - { - "epoch": 0.48007339870405413, - "grad_norm": 0.0008419574005529284, - "learning_rate": 0.00019999988662920576, - "loss": 46.0, - "step": 6279 - }, - { - "epoch": 0.48014985568744384, - "grad_norm": 0.0011857226490974426, - "learning_rate": 0.00019999988659303414, - "loss": 46.0, - "step": 6280 - }, - { - "epoch": 0.4802263126708336, - "grad_norm": 0.0006404307205229998, - "learning_rate": 0.0001999998865568568, - "loss": 46.0, - "step": 6281 - }, - { - "epoch": 0.4803027696542233, - "grad_norm": 0.003302835626527667, - "learning_rate": 0.0001999998865206737, - "loss": 46.0, - "step": 6282 - }, - { - "epoch": 0.480379226637613, - "grad_norm": 0.01397534180432558, - "learning_rate": 0.0001999998864844848, - "loss": 46.0, - "step": 6283 - }, - { - "epoch": 0.4804556836210027, - "grad_norm": 0.0009030290530063212, - "learning_rate": 0.00019999988644829015, - "loss": 46.0, - "step": 6284 - }, - { - "epoch": 0.4805321406043925, - "grad_norm": 0.0011602970771491528, - "learning_rate": 0.00019999988641208972, - "loss": 46.0, - "step": 6285 - }, - { - "epoch": 0.4806085975877822, - "grad_norm": 0.00275371759198606, - "learning_rate": 0.00019999988637588355, - "loss": 46.0, - "step": 6286 - }, - { - "epoch": 0.4806850545711719, - "grad_norm": 0.001148720970377326, - "learning_rate": 0.00019999988633967157, - "loss": 46.0, - "step": 6287 - }, - { - "epoch": 0.4807615115545616, - "grad_norm": 0.0008375155739486217, - "learning_rate": 0.00019999988630345383, - "loss": 46.0, - "step": 6288 - }, - { - "epoch": 0.48083796853795135, - "grad_norm": 0.0009214160381816328, - "learning_rate": 0.0001999998862672303, - "loss": 46.0, - "step": 6289 - }, - { - "epoch": 0.48091442552134106, - "grad_norm": 0.0025782783050090075, - "learning_rate": 0.00019999988623100104, - "loss": 46.0, - "step": 6290 - }, - { - "epoch": 0.48099088250473077, - "grad_norm": 0.0027072604279965162, - "learning_rate": 0.00019999988619476603, - "loss": 46.0, - "step": 6291 - }, - { - "epoch": 0.48106733948812047, - "grad_norm": 0.005264668725430965, - "learning_rate": 0.0001999998861585252, - "loss": 46.0, - "step": 6292 - }, - { - "epoch": 0.48114379647151023, - "grad_norm": 0.0004953782190568745, - "learning_rate": 0.00019999988612227863, - "loss": 46.0, - "step": 6293 - }, - { - "epoch": 0.48122025345489994, - "grad_norm": 0.000940196739975363, - "learning_rate": 0.00019999988608602624, - "loss": 46.0, - "step": 6294 - }, - { - "epoch": 0.48129671043828964, - "grad_norm": 0.00042951980140060186, - "learning_rate": 0.00019999988604976814, - "loss": 46.0, - "step": 6295 - }, - { - "epoch": 0.4813731674216794, - "grad_norm": 0.0010089338757097721, - "learning_rate": 0.00019999988601350426, - "loss": 46.0, - "step": 6296 - }, - { - "epoch": 0.4814496244050691, - "grad_norm": 0.00034610298462212086, - "learning_rate": 0.00019999988597723458, - "loss": 46.0, - "step": 6297 - }, - { - "epoch": 0.4815260813884588, - "grad_norm": 0.0014529270119965076, - "learning_rate": 0.00019999988594095915, - "loss": 46.0, - "step": 6298 - }, - { - "epoch": 0.4816025383718485, - "grad_norm": 0.041110314428806305, - "learning_rate": 0.00019999988590467795, - "loss": 46.0, - "step": 6299 - }, - { - "epoch": 0.4816789953552383, - "grad_norm": 0.0025845463387668133, - "learning_rate": 0.000199999885868391, - "loss": 46.0, - "step": 6300 - }, - { - "epoch": 0.481755452338628, - "grad_norm": 0.0038914778269827366, - "learning_rate": 0.00019999988583209826, - "loss": 46.0, - "step": 6301 - }, - { - "epoch": 0.4818319093220177, - "grad_norm": 0.0010950975120067596, - "learning_rate": 0.00019999988579579974, - "loss": 46.0, - "step": 6302 - }, - { - "epoch": 0.4819083663054074, - "grad_norm": 0.001826742198318243, - "learning_rate": 0.00019999988575949548, - "loss": 46.0, - "step": 6303 - }, - { - "epoch": 0.48198482328879716, - "grad_norm": 0.0023944347631186247, - "learning_rate": 0.0001999998857231854, - "loss": 46.0, - "step": 6304 - }, - { - "epoch": 0.48206128027218687, - "grad_norm": 0.0033000996336340904, - "learning_rate": 0.0001999998856868696, - "loss": 46.0, - "step": 6305 - }, - { - "epoch": 0.48213773725557657, - "grad_norm": 0.0012456781696528196, - "learning_rate": 0.000199999885650548, - "loss": 46.0, - "step": 6306 - }, - { - "epoch": 0.4822141942389663, - "grad_norm": 0.001440512714907527, - "learning_rate": 0.00019999988561422068, - "loss": 46.0, - "step": 6307 - }, - { - "epoch": 0.48229065122235604, - "grad_norm": 0.0023240000009536743, - "learning_rate": 0.00019999988557788755, - "loss": 46.0, - "step": 6308 - }, - { - "epoch": 0.48236710820574574, - "grad_norm": 0.002259685890749097, - "learning_rate": 0.00019999988554154864, - "loss": 46.0, - "step": 6309 - }, - { - "epoch": 0.48244356518913545, - "grad_norm": 0.0012466987827792764, - "learning_rate": 0.000199999885505204, - "loss": 46.0, - "step": 6310 - }, - { - "epoch": 0.4825200221725252, - "grad_norm": 0.0006265821284614503, - "learning_rate": 0.00019999988546885356, - "loss": 46.0, - "step": 6311 - }, - { - "epoch": 0.4825964791559149, - "grad_norm": 0.0007399387541227043, - "learning_rate": 0.00019999988543249736, - "loss": 46.0, - "step": 6312 - }, - { - "epoch": 0.4826729361393046, - "grad_norm": 0.0017852268647402525, - "learning_rate": 0.0001999998853961354, - "loss": 46.0, - "step": 6313 - }, - { - "epoch": 0.4827493931226943, - "grad_norm": 0.0006445020553655922, - "learning_rate": 0.00019999988535976764, - "loss": 46.0, - "step": 6314 - }, - { - "epoch": 0.4828258501060841, - "grad_norm": 0.0011874273186549544, - "learning_rate": 0.00019999988532339412, - "loss": 46.0, - "step": 6315 - }, - { - "epoch": 0.4829023070894738, - "grad_norm": 0.0010938086779788136, - "learning_rate": 0.00019999988528701486, - "loss": 46.0, - "step": 6316 - }, - { - "epoch": 0.4829787640728635, - "grad_norm": 0.0034574014134705067, - "learning_rate": 0.0001999998852506298, - "loss": 46.0, - "step": 6317 - }, - { - "epoch": 0.4830552210562532, - "grad_norm": 0.003022773191332817, - "learning_rate": 0.00019999988521423898, - "loss": 46.0, - "step": 6318 - }, - { - "epoch": 0.48313167803964296, - "grad_norm": 0.0035729440860450268, - "learning_rate": 0.0001999998851778424, - "loss": 46.0, - "step": 6319 - }, - { - "epoch": 0.48320813502303267, - "grad_norm": 0.0007687106844969094, - "learning_rate": 0.00019999988514144006, - "loss": 46.0, - "step": 6320 - }, - { - "epoch": 0.4832845920064224, - "grad_norm": 0.0017719018505886197, - "learning_rate": 0.0001999998851050319, - "loss": 46.0, - "step": 6321 - }, - { - "epoch": 0.4833610489898121, - "grad_norm": 0.0025150200817734003, - "learning_rate": 0.00019999988506861803, - "loss": 46.0, - "step": 6322 - }, - { - "epoch": 0.48343750597320184, - "grad_norm": 0.0020292329136282206, - "learning_rate": 0.00019999988503219835, - "loss": 46.0, - "step": 6323 - }, - { - "epoch": 0.48351396295659155, - "grad_norm": 0.0011740690097212791, - "learning_rate": 0.00019999988499577295, - "loss": 46.0, - "step": 6324 - }, - { - "epoch": 0.48359041993998125, - "grad_norm": 0.002836063038557768, - "learning_rate": 0.00019999988495934172, - "loss": 46.0, - "step": 6325 - }, - { - "epoch": 0.483666876923371, - "grad_norm": 0.0003468339564278722, - "learning_rate": 0.00019999988492290472, - "loss": 46.0, - "step": 6326 - }, - { - "epoch": 0.4837433339067607, - "grad_norm": 0.0010148851433768868, - "learning_rate": 0.000199999884886462, - "loss": 46.0, - "step": 6327 - }, - { - "epoch": 0.4838197908901504, - "grad_norm": 0.002476799301803112, - "learning_rate": 0.00019999988485001349, - "loss": 46.0, - "step": 6328 - }, - { - "epoch": 0.48389624787354013, - "grad_norm": 0.0008766606915742159, - "learning_rate": 0.0001999998848135592, - "loss": 46.0, - "step": 6329 - }, - { - "epoch": 0.4839727048569299, - "grad_norm": 0.00042470821063034236, - "learning_rate": 0.00019999988477709915, - "loss": 46.0, - "step": 6330 - }, - { - "epoch": 0.4840491618403196, - "grad_norm": 0.0014834204921498895, - "learning_rate": 0.00019999988474063334, - "loss": 46.0, - "step": 6331 - }, - { - "epoch": 0.4841256188237093, - "grad_norm": 0.0006786793819628656, - "learning_rate": 0.00019999988470416176, - "loss": 46.0, - "step": 6332 - }, - { - "epoch": 0.484202075807099, - "grad_norm": 0.001179218990728259, - "learning_rate": 0.00019999988466768437, - "loss": 46.0, - "step": 6333 - }, - { - "epoch": 0.48427853279048877, - "grad_norm": 0.0006261045928113163, - "learning_rate": 0.00019999988463120124, - "loss": 46.0, - "step": 6334 - }, - { - "epoch": 0.4843549897738785, - "grad_norm": 0.0013071447610855103, - "learning_rate": 0.00019999988459471236, - "loss": 46.0, - "step": 6335 - }, - { - "epoch": 0.4844314467572682, - "grad_norm": 0.0010755738476291299, - "learning_rate": 0.0001999998845582177, - "loss": 46.0, - "step": 6336 - }, - { - "epoch": 0.4845079037406579, - "grad_norm": 0.0010401384206488729, - "learning_rate": 0.00019999988452171726, - "loss": 46.0, - "step": 6337 - }, - { - "epoch": 0.48458436072404765, - "grad_norm": 0.0011602689046412706, - "learning_rate": 0.00019999988448521103, - "loss": 46.0, - "step": 6338 - }, - { - "epoch": 0.48466081770743735, - "grad_norm": 0.0009871586225926876, - "learning_rate": 0.00019999988444869906, - "loss": 46.0, - "step": 6339 - }, - { - "epoch": 0.48473727469082706, - "grad_norm": 0.0008016563951969147, - "learning_rate": 0.00019999988441218131, - "loss": 46.0, - "step": 6340 - }, - { - "epoch": 0.4848137316742168, - "grad_norm": 0.001336134155280888, - "learning_rate": 0.0001999998843756578, - "loss": 46.0, - "step": 6341 - }, - { - "epoch": 0.4848901886576065, - "grad_norm": 0.0016448759706690907, - "learning_rate": 0.0001999998843391285, - "loss": 46.0, - "step": 6342 - }, - { - "epoch": 0.48496664564099623, - "grad_norm": 0.0005876056966371834, - "learning_rate": 0.00019999988430259347, - "loss": 46.0, - "step": 6343 - }, - { - "epoch": 0.48504310262438594, - "grad_norm": 0.006785241421312094, - "learning_rate": 0.00019999988426605265, - "loss": 46.0, - "step": 6344 - }, - { - "epoch": 0.4851195596077757, - "grad_norm": 0.0006073467084206641, - "learning_rate": 0.00019999988422950604, - "loss": 46.0, - "step": 6345 - }, - { - "epoch": 0.4851960165911654, - "grad_norm": 0.0005689759855158627, - "learning_rate": 0.00019999988419295368, - "loss": 46.0, - "step": 6346 - }, - { - "epoch": 0.4852724735745551, - "grad_norm": 0.0011022239923477173, - "learning_rate": 0.00019999988415639555, - "loss": 46.0, - "step": 6347 - }, - { - "epoch": 0.4853489305579448, - "grad_norm": 0.0011752459686249495, - "learning_rate": 0.00019999988411983165, - "loss": 46.0, - "step": 6348 - }, - { - "epoch": 0.4854253875413346, - "grad_norm": 0.0012171799317002296, - "learning_rate": 0.00019999988408326197, - "loss": 46.0, - "step": 6349 - }, - { - "epoch": 0.4855018445247243, - "grad_norm": 0.0023077267687767744, - "learning_rate": 0.00019999988404668652, - "loss": 46.0, - "step": 6350 - }, - { - "epoch": 0.485578301508114, - "grad_norm": 0.005109384190291166, - "learning_rate": 0.00019999988401010532, - "loss": 46.0, - "step": 6351 - }, - { - "epoch": 0.4856547584915037, - "grad_norm": 0.004220910370349884, - "learning_rate": 0.00019999988397351835, - "loss": 46.0, - "step": 6352 - }, - { - "epoch": 0.48573121547489345, - "grad_norm": 0.000715788861270994, - "learning_rate": 0.0001999998839369256, - "loss": 46.0, - "step": 6353 - }, - { - "epoch": 0.48580767245828316, - "grad_norm": 0.01046636514365673, - "learning_rate": 0.00019999988390032706, - "loss": 46.0, - "step": 6354 - }, - { - "epoch": 0.48588412944167286, - "grad_norm": 0.003636591136455536, - "learning_rate": 0.0001999998838637228, - "loss": 46.0, - "step": 6355 - }, - { - "epoch": 0.4859605864250626, - "grad_norm": 0.017490627244114876, - "learning_rate": 0.00019999988382711275, - "loss": 46.0, - "step": 6356 - }, - { - "epoch": 0.48603704340845233, - "grad_norm": 0.0006170101114548743, - "learning_rate": 0.00019999988379049692, - "loss": 46.0, - "step": 6357 - }, - { - "epoch": 0.48611350039184203, - "grad_norm": 0.0035188018810003996, - "learning_rate": 0.00019999988375387533, - "loss": 46.0, - "step": 6358 - }, - { - "epoch": 0.48618995737523174, - "grad_norm": 0.003433680161833763, - "learning_rate": 0.00019999988371724797, - "loss": 46.0, - "step": 6359 - }, - { - "epoch": 0.4862664143586215, - "grad_norm": 0.0006563492934219539, - "learning_rate": 0.00019999988368061481, - "loss": 46.0, - "step": 6360 - }, - { - "epoch": 0.4863428713420112, - "grad_norm": 0.009670495055615902, - "learning_rate": 0.0001999998836439759, - "loss": 46.0, - "step": 6361 - }, - { - "epoch": 0.4864193283254009, - "grad_norm": 0.0008141860598698258, - "learning_rate": 0.0001999998836073312, - "loss": 46.0, - "step": 6362 - }, - { - "epoch": 0.4864957853087906, - "grad_norm": 0.0006499075680039823, - "learning_rate": 0.0001999998835706808, - "loss": 46.0, - "step": 6363 - }, - { - "epoch": 0.4865722422921804, - "grad_norm": 0.0013896024320274591, - "learning_rate": 0.00019999988353402456, - "loss": 46.0, - "step": 6364 - }, - { - "epoch": 0.4866486992755701, - "grad_norm": 0.0007991960737854242, - "learning_rate": 0.0001999998834973626, - "loss": 46.0, - "step": 6365 - }, - { - "epoch": 0.4867251562589598, - "grad_norm": 0.0012706021079793572, - "learning_rate": 0.00019999988346069484, - "loss": 46.0, - "step": 6366 - }, - { - "epoch": 0.4868016132423495, - "grad_norm": 0.0010827386286109686, - "learning_rate": 0.00019999988342402132, - "loss": 46.0, - "step": 6367 - }, - { - "epoch": 0.48687807022573926, - "grad_norm": 0.004039994440972805, - "learning_rate": 0.00019999988338734203, - "loss": 46.0, - "step": 6368 - }, - { - "epoch": 0.48695452720912896, - "grad_norm": 0.0011493003694340587, - "learning_rate": 0.000199999883350657, - "loss": 46.0, - "step": 6369 - }, - { - "epoch": 0.48703098419251867, - "grad_norm": 0.0016639565583318472, - "learning_rate": 0.00019999988331396615, - "loss": 46.0, - "step": 6370 - }, - { - "epoch": 0.48710744117590843, - "grad_norm": 0.0009372640633955598, - "learning_rate": 0.00019999988327726954, - "loss": 46.0, - "step": 6371 - }, - { - "epoch": 0.48718389815929813, - "grad_norm": 0.007675940170884132, - "learning_rate": 0.00019999988324056719, - "loss": 46.0, - "step": 6372 - }, - { - "epoch": 0.48726035514268784, - "grad_norm": 0.0007198092062026262, - "learning_rate": 0.00019999988320385903, - "loss": 46.0, - "step": 6373 - }, - { - "epoch": 0.48733681212607755, - "grad_norm": 0.0006535969441756606, - "learning_rate": 0.00019999988316714512, - "loss": 46.0, - "step": 6374 - }, - { - "epoch": 0.4874132691094673, - "grad_norm": 0.0034300717525184155, - "learning_rate": 0.00019999988313042544, - "loss": 46.0, - "step": 6375 - }, - { - "epoch": 0.487489726092857, - "grad_norm": 0.0012840520357713103, - "learning_rate": 0.0001999998830937, - "loss": 46.0, - "step": 6376 - }, - { - "epoch": 0.4875661830762467, - "grad_norm": 0.000656074145808816, - "learning_rate": 0.0001999998830569688, - "loss": 46.0, - "step": 6377 - }, - { - "epoch": 0.4876426400596364, - "grad_norm": 0.0011463371338322759, - "learning_rate": 0.0001999998830202318, - "loss": 46.0, - "step": 6378 - }, - { - "epoch": 0.4877190970430262, - "grad_norm": 0.0008213612018153071, - "learning_rate": 0.00019999988298348903, - "loss": 46.0, - "step": 6379 - }, - { - "epoch": 0.4877955540264159, - "grad_norm": 0.0029335820581763983, - "learning_rate": 0.00019999988294674054, - "loss": 46.0, - "step": 6380 - }, - { - "epoch": 0.4878720110098056, - "grad_norm": 0.0005414159968495369, - "learning_rate": 0.00019999988290998625, - "loss": 46.0, - "step": 6381 - }, - { - "epoch": 0.48794846799319536, - "grad_norm": 0.0004125908308196813, - "learning_rate": 0.00019999988287322618, - "loss": 46.0, - "step": 6382 - }, - { - "epoch": 0.48802492497658506, - "grad_norm": 0.004388558678328991, - "learning_rate": 0.00019999988283646034, - "loss": 46.0, - "step": 6383 - }, - { - "epoch": 0.48810138195997477, - "grad_norm": 0.0010814444394782186, - "learning_rate": 0.00019999988279968873, - "loss": 46.0, - "step": 6384 - }, - { - "epoch": 0.4881778389433645, - "grad_norm": 0.001897557289339602, - "learning_rate": 0.00019999988276291135, - "loss": 46.0, - "step": 6385 - }, - { - "epoch": 0.48825429592675423, - "grad_norm": 0.0008687134250067174, - "learning_rate": 0.00019999988272612824, - "loss": 46.0, - "step": 6386 - }, - { - "epoch": 0.48833075291014394, - "grad_norm": 0.0009987280936911702, - "learning_rate": 0.0001999998826893393, - "loss": 46.0, - "step": 6387 - }, - { - "epoch": 0.48840720989353364, - "grad_norm": 0.0006901868619024754, - "learning_rate": 0.00019999988265254463, - "loss": 46.0, - "step": 6388 - }, - { - "epoch": 0.48848366687692335, - "grad_norm": 0.0005795607576146722, - "learning_rate": 0.00019999988261574418, - "loss": 46.0, - "step": 6389 - }, - { - "epoch": 0.4885601238603131, - "grad_norm": 0.003952940925955772, - "learning_rate": 0.000199999882578938, - "loss": 46.0, - "step": 6390 - }, - { - "epoch": 0.4886365808437028, - "grad_norm": 0.0011986320605501533, - "learning_rate": 0.000199999882542126, - "loss": 46.0, - "step": 6391 - }, - { - "epoch": 0.4887130378270925, - "grad_norm": 0.0006881254375912249, - "learning_rate": 0.00019999988250530822, - "loss": 46.0, - "step": 6392 - }, - { - "epoch": 0.48878949481048223, - "grad_norm": 0.01511130016297102, - "learning_rate": 0.0001999998824684847, - "loss": 46.0, - "step": 6393 - }, - { - "epoch": 0.488865951793872, - "grad_norm": 0.0015410013729706407, - "learning_rate": 0.00019999988243165538, - "loss": 46.0, - "step": 6394 - }, - { - "epoch": 0.4889424087772617, - "grad_norm": 0.0036170308012515306, - "learning_rate": 0.00019999988239482032, - "loss": 46.0, - "step": 6395 - }, - { - "epoch": 0.4890188657606514, - "grad_norm": 0.0007317258277907968, - "learning_rate": 0.00019999988235797948, - "loss": 46.0, - "step": 6396 - }, - { - "epoch": 0.48909532274404116, - "grad_norm": 0.0010901440400630236, - "learning_rate": 0.0001999998823211329, - "loss": 46.0, - "step": 6397 - }, - { - "epoch": 0.48917177972743087, - "grad_norm": 0.0006672409363090992, - "learning_rate": 0.0001999998822842805, - "loss": 46.0, - "step": 6398 - }, - { - "epoch": 0.48924823671082057, - "grad_norm": 0.0011397808557376266, - "learning_rate": 0.00019999988224742236, - "loss": 46.0, - "step": 6399 - }, - { - "epoch": 0.4893246936942103, - "grad_norm": 0.0008003698894754052, - "learning_rate": 0.00019999988221055848, - "loss": 46.0, - "step": 6400 - }, - { - "epoch": 0.48940115067760004, - "grad_norm": 0.002308068098500371, - "learning_rate": 0.00019999988217368877, - "loss": 46.0, - "step": 6401 - }, - { - "epoch": 0.48947760766098974, - "grad_norm": 0.0008845416596159339, - "learning_rate": 0.00019999988213681332, - "loss": 46.0, - "step": 6402 - }, - { - "epoch": 0.48955406464437945, - "grad_norm": 0.0008718667668290436, - "learning_rate": 0.00019999988209993213, - "loss": 46.0, - "step": 6403 - }, - { - "epoch": 0.48963052162776916, - "grad_norm": 0.0007148516015149653, - "learning_rate": 0.00019999988206304513, - "loss": 46.0, - "step": 6404 - }, - { - "epoch": 0.4897069786111589, - "grad_norm": 0.0003723963745869696, - "learning_rate": 0.00019999988202615233, - "loss": 46.0, - "step": 6405 - }, - { - "epoch": 0.4897834355945486, - "grad_norm": 0.0019716841634362936, - "learning_rate": 0.00019999988198925381, - "loss": 46.0, - "step": 6406 - }, - { - "epoch": 0.4898598925779383, - "grad_norm": 0.0017271449323743582, - "learning_rate": 0.00019999988195234952, - "loss": 46.0, - "step": 6407 - }, - { - "epoch": 0.48993634956132803, - "grad_norm": 0.005223521497100592, - "learning_rate": 0.00019999988191543946, - "loss": 46.0, - "step": 6408 - }, - { - "epoch": 0.4900128065447178, - "grad_norm": 0.0006758730160072446, - "learning_rate": 0.00019999988187852362, - "loss": 46.0, - "step": 6409 - }, - { - "epoch": 0.4900892635281075, - "grad_norm": 0.0004181225085631013, - "learning_rate": 0.000199999881841602, - "loss": 46.0, - "step": 6410 - }, - { - "epoch": 0.4901657205114972, - "grad_norm": 0.0006537472945638001, - "learning_rate": 0.00019999988180467463, - "loss": 46.0, - "step": 6411 - }, - { - "epoch": 0.49024217749488697, - "grad_norm": 0.0016621069516986609, - "learning_rate": 0.0001999998817677415, - "loss": 46.0, - "step": 6412 - }, - { - "epoch": 0.49031863447827667, - "grad_norm": 0.0028892322443425655, - "learning_rate": 0.00019999988173080257, - "loss": 46.0, - "step": 6413 - }, - { - "epoch": 0.4903950914616664, - "grad_norm": 0.0016960246721282601, - "learning_rate": 0.0001999998816938579, - "loss": 46.0, - "step": 6414 - }, - { - "epoch": 0.4904715484450561, - "grad_norm": 0.004497898742556572, - "learning_rate": 0.00019999988165690744, - "loss": 46.0, - "step": 6415 - }, - { - "epoch": 0.49054800542844584, - "grad_norm": 0.0006038066931068897, - "learning_rate": 0.00019999988161995122, - "loss": 46.0, - "step": 6416 - }, - { - "epoch": 0.49062446241183555, - "grad_norm": 0.010990196838974953, - "learning_rate": 0.00019999988158298925, - "loss": 46.0, - "step": 6417 - }, - { - "epoch": 0.49070091939522525, - "grad_norm": 0.0009468832286074758, - "learning_rate": 0.00019999988154602148, - "loss": 46.0, - "step": 6418 - }, - { - "epoch": 0.49077737637861496, - "grad_norm": 0.0031462351325899363, - "learning_rate": 0.00019999988150904793, - "loss": 46.0, - "step": 6419 - }, - { - "epoch": 0.4908538333620047, - "grad_norm": 0.0016019238391891122, - "learning_rate": 0.00019999988147206862, - "loss": 46.0, - "step": 6420 - }, - { - "epoch": 0.4909302903453944, - "grad_norm": 0.004620174877345562, - "learning_rate": 0.00019999988143508355, - "loss": 46.0, - "step": 6421 - }, - { - "epoch": 0.49100674732878413, - "grad_norm": 0.002342072082683444, - "learning_rate": 0.00019999988139809272, - "loss": 46.0, - "step": 6422 - }, - { - "epoch": 0.49108320431217384, - "grad_norm": 0.009439565241336823, - "learning_rate": 0.0001999998813610961, - "loss": 46.0, - "step": 6423 - }, - { - "epoch": 0.4911596612955636, - "grad_norm": 0.001219587167724967, - "learning_rate": 0.0001999998813240937, - "loss": 46.0, - "step": 6424 - }, - { - "epoch": 0.4912361182789533, - "grad_norm": 0.0008230343810282648, - "learning_rate": 0.00019999988128708557, - "loss": 46.0, - "step": 6425 - }, - { - "epoch": 0.491312575262343, - "grad_norm": 0.0014162807492539287, - "learning_rate": 0.00019999988125007166, - "loss": 46.0, - "step": 6426 - }, - { - "epoch": 0.49138903224573277, - "grad_norm": 0.0016766769113019109, - "learning_rate": 0.00019999988121305196, - "loss": 46.0, - "step": 6427 - }, - { - "epoch": 0.4914654892291225, - "grad_norm": 0.0006250288570299745, - "learning_rate": 0.0001999998811760265, - "loss": 46.0, - "step": 6428 - }, - { - "epoch": 0.4915419462125122, - "grad_norm": 0.0005541299469769001, - "learning_rate": 0.00019999988113899528, - "loss": 46.0, - "step": 6429 - }, - { - "epoch": 0.4916184031959019, - "grad_norm": 0.0016851290129125118, - "learning_rate": 0.0001999998811019583, - "loss": 46.0, - "step": 6430 - }, - { - "epoch": 0.49169486017929165, - "grad_norm": 0.010401006788015366, - "learning_rate": 0.00019999988106491552, - "loss": 46.0, - "step": 6431 - }, - { - "epoch": 0.49177131716268135, - "grad_norm": 0.001281034667044878, - "learning_rate": 0.00019999988102786697, - "loss": 46.0, - "step": 6432 - }, - { - "epoch": 0.49184777414607106, - "grad_norm": 0.0022689623292535543, - "learning_rate": 0.00019999988099081268, - "loss": 46.0, - "step": 6433 - }, - { - "epoch": 0.49192423112946077, - "grad_norm": 0.018421540036797523, - "learning_rate": 0.00019999988095375262, - "loss": 46.0, - "step": 6434 - }, - { - "epoch": 0.4920006881128505, - "grad_norm": 0.0012801343109458685, - "learning_rate": 0.00019999988091668676, - "loss": 46.0, - "step": 6435 - }, - { - "epoch": 0.49207714509624023, - "grad_norm": 0.0005204727640375495, - "learning_rate": 0.00019999988087961515, - "loss": 46.0, - "step": 6436 - }, - { - "epoch": 0.49215360207962994, - "grad_norm": 0.0020112525671720505, - "learning_rate": 0.00019999988084253776, - "loss": 46.0, - "step": 6437 - }, - { - "epoch": 0.49223005906301964, - "grad_norm": 0.00383200217038393, - "learning_rate": 0.0001999998808054546, - "loss": 46.0, - "step": 6438 - }, - { - "epoch": 0.4923065160464094, - "grad_norm": 0.0014632859965786338, - "learning_rate": 0.00019999988076836568, - "loss": 46.0, - "step": 6439 - }, - { - "epoch": 0.4923829730297991, - "grad_norm": 0.001580241252668202, - "learning_rate": 0.000199999880731271, - "loss": 46.0, - "step": 6440 - }, - { - "epoch": 0.4924594300131888, - "grad_norm": 0.0012626753887161613, - "learning_rate": 0.00019999988069417053, - "loss": 46.0, - "step": 6441 - }, - { - "epoch": 0.4925358869965786, - "grad_norm": 0.0011472193291410804, - "learning_rate": 0.0001999998806570643, - "loss": 46.0, - "step": 6442 - }, - { - "epoch": 0.4926123439799683, - "grad_norm": 0.00302050425671041, - "learning_rate": 0.0001999998806199523, - "loss": 46.0, - "step": 6443 - }, - { - "epoch": 0.492688800963358, - "grad_norm": 0.012528634630143642, - "learning_rate": 0.00019999988058283454, - "loss": 46.0, - "step": 6444 - }, - { - "epoch": 0.4927652579467477, - "grad_norm": 0.002775819506496191, - "learning_rate": 0.00019999988054571102, - "loss": 46.0, - "step": 6445 - }, - { - "epoch": 0.49284171493013745, - "grad_norm": 0.0007796332938596606, - "learning_rate": 0.0001999998805085817, - "loss": 46.0, - "step": 6446 - }, - { - "epoch": 0.49291817191352716, - "grad_norm": 0.003013971960172057, - "learning_rate": 0.0001999998804714466, - "loss": 46.0, - "step": 6447 - }, - { - "epoch": 0.49299462889691686, - "grad_norm": 0.0017489292658865452, - "learning_rate": 0.00019999988043430578, - "loss": 46.0, - "step": 6448 - }, - { - "epoch": 0.49307108588030657, - "grad_norm": 0.00688300421461463, - "learning_rate": 0.00019999988039715917, - "loss": 46.0, - "step": 6449 - }, - { - "epoch": 0.49314754286369633, - "grad_norm": 0.0005748681141994894, - "learning_rate": 0.00019999988036000676, - "loss": 46.0, - "step": 6450 - }, - { - "epoch": 0.49322399984708604, - "grad_norm": 0.0005769344279542565, - "learning_rate": 0.00019999988032284863, - "loss": 46.0, - "step": 6451 - }, - { - "epoch": 0.49330045683047574, - "grad_norm": 0.0007875912124291062, - "learning_rate": 0.0001999998802856847, - "loss": 46.0, - "step": 6452 - }, - { - "epoch": 0.49337691381386545, - "grad_norm": 0.0006917236023582518, - "learning_rate": 0.00019999988024851502, - "loss": 46.0, - "step": 6453 - }, - { - "epoch": 0.4934533707972552, - "grad_norm": 0.006839110050350428, - "learning_rate": 0.00019999988021133955, - "loss": 46.0, - "step": 6454 - }, - { - "epoch": 0.4935298277806449, - "grad_norm": 0.0008490763721056283, - "learning_rate": 0.00019999988017415832, - "loss": 46.0, - "step": 6455 - }, - { - "epoch": 0.4936062847640346, - "grad_norm": 0.0012166728265583515, - "learning_rate": 0.0001999998801369713, - "loss": 46.0, - "step": 6456 - }, - { - "epoch": 0.4936827417474244, - "grad_norm": 0.0005295455921441317, - "learning_rate": 0.00019999988009977853, - "loss": 46.0, - "step": 6457 - }, - { - "epoch": 0.4937591987308141, - "grad_norm": 0.0006590589182451367, - "learning_rate": 0.00019999988006258, - "loss": 46.0, - "step": 6458 - }, - { - "epoch": 0.4938356557142038, - "grad_norm": 0.0014256765134632587, - "learning_rate": 0.0001999998800253757, - "loss": 46.0, - "step": 6459 - }, - { - "epoch": 0.4939121126975935, - "grad_norm": 0.0011379234492778778, - "learning_rate": 0.0001999998799881656, - "loss": 46.0, - "step": 6460 - }, - { - "epoch": 0.49398856968098326, - "grad_norm": 0.0008576003019697964, - "learning_rate": 0.00019999987995094975, - "loss": 46.0, - "step": 6461 - }, - { - "epoch": 0.49406502666437296, - "grad_norm": 0.004929020535200834, - "learning_rate": 0.00019999987991372814, - "loss": 46.0, - "step": 6462 - }, - { - "epoch": 0.49414148364776267, - "grad_norm": 0.0016787319909781218, - "learning_rate": 0.00019999987987650073, - "loss": 46.0, - "step": 6463 - }, - { - "epoch": 0.4942179406311524, - "grad_norm": 0.0008172642556019127, - "learning_rate": 0.00019999987983926757, - "loss": 46.0, - "step": 6464 - }, - { - "epoch": 0.49429439761454214, - "grad_norm": 0.0009970619576051831, - "learning_rate": 0.00019999987980202867, - "loss": 46.0, - "step": 6465 - }, - { - "epoch": 0.49437085459793184, - "grad_norm": 0.000629753281828016, - "learning_rate": 0.00019999987976478397, - "loss": 46.0, - "step": 6466 - }, - { - "epoch": 0.49444731158132155, - "grad_norm": 0.0009540155297145247, - "learning_rate": 0.0001999998797275335, - "loss": 46.0, - "step": 6467 - }, - { - "epoch": 0.49452376856471125, - "grad_norm": 0.0036007454618811607, - "learning_rate": 0.00019999987969027724, - "loss": 46.0, - "step": 6468 - }, - { - "epoch": 0.494600225548101, - "grad_norm": 0.0007068145205266774, - "learning_rate": 0.00019999987965301524, - "loss": 46.0, - "step": 6469 - }, - { - "epoch": 0.4946766825314907, - "grad_norm": 0.003312127897515893, - "learning_rate": 0.00019999987961574748, - "loss": 46.0, - "step": 6470 - }, - { - "epoch": 0.4947531395148804, - "grad_norm": 0.0021120829042047262, - "learning_rate": 0.00019999987957847393, - "loss": 46.0, - "step": 6471 - }, - { - "epoch": 0.4948295964982702, - "grad_norm": 0.002806598087772727, - "learning_rate": 0.00019999987954119464, - "loss": 46.0, - "step": 6472 - }, - { - "epoch": 0.4949060534816599, - "grad_norm": 0.002394828712567687, - "learning_rate": 0.00019999987950390955, - "loss": 46.0, - "step": 6473 - }, - { - "epoch": 0.4949825104650496, - "grad_norm": 0.0004253096121829003, - "learning_rate": 0.0001999998794666187, - "loss": 46.0, - "step": 6474 - }, - { - "epoch": 0.4950589674484393, - "grad_norm": 0.0022463074419647455, - "learning_rate": 0.00019999987942932208, - "loss": 46.0, - "step": 6475 - }, - { - "epoch": 0.49513542443182906, - "grad_norm": 0.0017045411514118314, - "learning_rate": 0.0001999998793920197, - "loss": 46.0, - "step": 6476 - }, - { - "epoch": 0.49521188141521877, - "grad_norm": 0.001246497267857194, - "learning_rate": 0.00019999987935471152, - "loss": 46.0, - "step": 6477 - }, - { - "epoch": 0.4952883383986085, - "grad_norm": 0.0009801244596019387, - "learning_rate": 0.0001999998793173976, - "loss": 46.0, - "step": 6478 - }, - { - "epoch": 0.4953647953819982, - "grad_norm": 0.0005795637262053788, - "learning_rate": 0.00019999987928007791, - "loss": 46.0, - "step": 6479 - }, - { - "epoch": 0.49544125236538794, - "grad_norm": 0.0016051421407610178, - "learning_rate": 0.00019999987924275244, - "loss": 46.0, - "step": 6480 - }, - { - "epoch": 0.49551770934877765, - "grad_norm": 0.0010982013773173094, - "learning_rate": 0.0001999998792054212, - "loss": 46.0, - "step": 6481 - }, - { - "epoch": 0.49559416633216735, - "grad_norm": 0.0018951896345242858, - "learning_rate": 0.0001999998791680842, - "loss": 46.0, - "step": 6482 - }, - { - "epoch": 0.49567062331555706, - "grad_norm": 0.009181102737784386, - "learning_rate": 0.0001999998791307414, - "loss": 46.0, - "step": 6483 - }, - { - "epoch": 0.4957470802989468, - "grad_norm": 0.000797546876128763, - "learning_rate": 0.00019999987909339283, - "loss": 46.0, - "step": 6484 - }, - { - "epoch": 0.4958235372823365, - "grad_norm": 0.000757049594540149, - "learning_rate": 0.00019999987905603854, - "loss": 46.0, - "step": 6485 - }, - { - "epoch": 0.49589999426572623, - "grad_norm": 0.0016351699596270919, - "learning_rate": 0.00019999987901867845, - "loss": 46.0, - "step": 6486 - }, - { - "epoch": 0.495976451249116, - "grad_norm": 0.001320823677815497, - "learning_rate": 0.0001999998789813126, - "loss": 46.0, - "step": 6487 - }, - { - "epoch": 0.4960529082325057, - "grad_norm": 0.0006964000058360398, - "learning_rate": 0.00019999987894394095, - "loss": 46.0, - "step": 6488 - }, - { - "epoch": 0.4961293652158954, - "grad_norm": 0.000624475593212992, - "learning_rate": 0.00019999987890656357, - "loss": 46.0, - "step": 6489 - }, - { - "epoch": 0.4962058221992851, - "grad_norm": 0.0008864251431077719, - "learning_rate": 0.00019999987886918041, - "loss": 46.0, - "step": 6490 - }, - { - "epoch": 0.49628227918267487, - "grad_norm": 0.000672024383675307, - "learning_rate": 0.00019999987883179149, - "loss": 46.0, - "step": 6491 - }, - { - "epoch": 0.4963587361660646, - "grad_norm": 0.00029146266751922667, - "learning_rate": 0.00019999987879439676, - "loss": 46.0, - "step": 6492 - }, - { - "epoch": 0.4964351931494543, - "grad_norm": 0.002330700634047389, - "learning_rate": 0.0001999998787569963, - "loss": 46.0, - "step": 6493 - }, - { - "epoch": 0.496511650132844, - "grad_norm": 0.0022235598880797625, - "learning_rate": 0.00019999987871959006, - "loss": 46.0, - "step": 6494 - }, - { - "epoch": 0.49658810711623375, - "grad_norm": 0.0008372527081519365, - "learning_rate": 0.00019999987868217804, - "loss": 46.0, - "step": 6495 - }, - { - "epoch": 0.49666456409962345, - "grad_norm": 0.0009484909824095666, - "learning_rate": 0.0001999998786447603, - "loss": 46.0, - "step": 6496 - }, - { - "epoch": 0.49674102108301316, - "grad_norm": 0.00572090083733201, - "learning_rate": 0.00019999987860733673, - "loss": 46.0, - "step": 6497 - }, - { - "epoch": 0.4968174780664029, - "grad_norm": 0.0011128135956823826, - "learning_rate": 0.0001999998785699074, - "loss": 46.0, - "step": 6498 - }, - { - "epoch": 0.4968939350497926, - "grad_norm": 0.0007173815974965692, - "learning_rate": 0.00019999987853247232, - "loss": 46.0, - "step": 6499 - }, - { - "epoch": 0.49697039203318233, - "grad_norm": 0.0004026094975415617, - "learning_rate": 0.0001999998784950315, - "loss": 46.0, - "step": 6500 - }, - { - "epoch": 0.49704684901657203, - "grad_norm": 0.0025437013246119022, - "learning_rate": 0.00019999987845758483, - "loss": 46.0, - "step": 6501 - }, - { - "epoch": 0.4971233059999618, - "grad_norm": 0.004859997890889645, - "learning_rate": 0.00019999987842013244, - "loss": 46.0, - "step": 6502 - }, - { - "epoch": 0.4971997629833515, - "grad_norm": 0.0007493896409869194, - "learning_rate": 0.0001999998783826743, - "loss": 46.0, - "step": 6503 - }, - { - "epoch": 0.4972762199667412, - "grad_norm": 0.0010843091877177358, - "learning_rate": 0.00019999987834521033, - "loss": 46.0, - "step": 6504 - }, - { - "epoch": 0.4973526769501309, - "grad_norm": 0.004104179795831442, - "learning_rate": 0.00019999987830774063, - "loss": 46.0, - "step": 6505 - }, - { - "epoch": 0.4974291339335207, - "grad_norm": 0.0033110808581113815, - "learning_rate": 0.00019999987827026516, - "loss": 46.0, - "step": 6506 - }, - { - "epoch": 0.4975055909169104, - "grad_norm": 0.0006934861303307116, - "learning_rate": 0.00019999987823278393, - "loss": 46.0, - "step": 6507 - }, - { - "epoch": 0.4975820479003001, - "grad_norm": 0.006649675313383341, - "learning_rate": 0.0001999998781952969, - "loss": 46.0, - "step": 6508 - }, - { - "epoch": 0.4976585048836898, - "grad_norm": 0.002636313671246171, - "learning_rate": 0.00019999987815780412, - "loss": 46.0, - "step": 6509 - }, - { - "epoch": 0.49773496186707955, - "grad_norm": 0.001595677575096488, - "learning_rate": 0.00019999987812030558, - "loss": 46.0, - "step": 6510 - }, - { - "epoch": 0.49781141885046926, - "grad_norm": 0.001560879754833877, - "learning_rate": 0.00019999987808280126, - "loss": 46.0, - "step": 6511 - }, - { - "epoch": 0.49788787583385896, - "grad_norm": 0.0010170344030484557, - "learning_rate": 0.00019999987804529115, - "loss": 46.0, - "step": 6512 - }, - { - "epoch": 0.4979643328172487, - "grad_norm": 0.0006142422207631171, - "learning_rate": 0.00019999987800777528, - "loss": 46.0, - "step": 6513 - }, - { - "epoch": 0.49804078980063843, - "grad_norm": 0.001040930044837296, - "learning_rate": 0.00019999987797025365, - "loss": 46.0, - "step": 6514 - }, - { - "epoch": 0.49811724678402813, - "grad_norm": 0.0009109100210480392, - "learning_rate": 0.00019999987793272627, - "loss": 46.0, - "step": 6515 - }, - { - "epoch": 0.49819370376741784, - "grad_norm": 0.0010989943984895945, - "learning_rate": 0.0001999998778951931, - "loss": 46.0, - "step": 6516 - }, - { - "epoch": 0.4982701607508076, - "grad_norm": 0.0023832949809730053, - "learning_rate": 0.00019999987785765418, - "loss": 46.0, - "step": 6517 - }, - { - "epoch": 0.4983466177341973, - "grad_norm": 0.0045531452633440495, - "learning_rate": 0.00019999987782010946, - "loss": 46.0, - "step": 6518 - }, - { - "epoch": 0.498423074717587, - "grad_norm": 0.0014620426809415221, - "learning_rate": 0.00019999987778255898, - "loss": 46.0, - "step": 6519 - }, - { - "epoch": 0.4984995317009767, - "grad_norm": 0.001096074585802853, - "learning_rate": 0.00019999987774500273, - "loss": 46.0, - "step": 6520 - }, - { - "epoch": 0.4985759886843665, - "grad_norm": 0.0057863532565534115, - "learning_rate": 0.0001999998777074407, - "loss": 46.0, - "step": 6521 - }, - { - "epoch": 0.4986524456677562, - "grad_norm": 0.0012083373731002212, - "learning_rate": 0.00019999987766987292, - "loss": 46.0, - "step": 6522 - }, - { - "epoch": 0.4987289026511459, - "grad_norm": 0.002958812052384019, - "learning_rate": 0.00019999987763229937, - "loss": 46.0, - "step": 6523 - }, - { - "epoch": 0.4988053596345356, - "grad_norm": 0.008364793844521046, - "learning_rate": 0.00019999987759472006, - "loss": 46.0, - "step": 6524 - }, - { - "epoch": 0.49888181661792536, - "grad_norm": 0.0010773777030408382, - "learning_rate": 0.00019999987755713497, - "loss": 46.0, - "step": 6525 - }, - { - "epoch": 0.49895827360131506, - "grad_norm": 0.003642960684373975, - "learning_rate": 0.0001999998775195441, - "loss": 46.0, - "step": 6526 - }, - { - "epoch": 0.49903473058470477, - "grad_norm": 0.0007070060819387436, - "learning_rate": 0.00019999987748194745, - "loss": 46.0, - "step": 6527 - }, - { - "epoch": 0.4991111875680945, - "grad_norm": 0.0006475977716036141, - "learning_rate": 0.00019999987744434504, - "loss": 46.0, - "step": 6528 - }, - { - "epoch": 0.49918764455148423, - "grad_norm": 0.0013352928217500448, - "learning_rate": 0.00019999987740673686, - "loss": 46.0, - "step": 6529 - }, - { - "epoch": 0.49926410153487394, - "grad_norm": 0.0007934285094961524, - "learning_rate": 0.00019999987736912296, - "loss": 46.0, - "step": 6530 - }, - { - "epoch": 0.49934055851826364, - "grad_norm": 0.0018938110442832112, - "learning_rate": 0.00019999987733150323, - "loss": 46.0, - "step": 6531 - }, - { - "epoch": 0.4994170155016534, - "grad_norm": 0.0006374659715220332, - "learning_rate": 0.00019999987729387776, - "loss": 46.0, - "step": 6532 - }, - { - "epoch": 0.4994934724850431, - "grad_norm": 0.0004182657285127789, - "learning_rate": 0.00019999987725624648, - "loss": 46.0, - "step": 6533 - }, - { - "epoch": 0.4995699294684328, - "grad_norm": 0.001342519884929061, - "learning_rate": 0.00019999987721860949, - "loss": 46.0, - "step": 6534 - }, - { - "epoch": 0.4996463864518225, - "grad_norm": 0.012076162733137608, - "learning_rate": 0.00019999987718096672, - "loss": 46.0, - "step": 6535 - }, - { - "epoch": 0.4997228434352123, - "grad_norm": 0.002149401232600212, - "learning_rate": 0.00019999987714331815, - "loss": 46.0, - "step": 6536 - }, - { - "epoch": 0.499799300418602, - "grad_norm": 0.001540144789032638, - "learning_rate": 0.0001999998771056638, - "loss": 46.0, - "step": 6537 - }, - { - "epoch": 0.4998757574019917, - "grad_norm": 0.0009101862087845802, - "learning_rate": 0.00019999987706800372, - "loss": 46.0, - "step": 6538 - }, - { - "epoch": 0.4999522143853814, - "grad_norm": 0.00200356962159276, - "learning_rate": 0.00019999987703033783, - "loss": 46.0, - "step": 6539 - }, - { - "epoch": 0.5000286713687712, - "grad_norm": 0.0008327013929374516, - "learning_rate": 0.00019999987699266623, - "loss": 46.0, - "step": 6540 - }, - { - "epoch": 0.5001051283521608, - "grad_norm": 0.0006880591390654445, - "learning_rate": 0.00019999987695498882, - "loss": 46.0, - "step": 6541 - }, - { - "epoch": 0.5001815853355506, - "grad_norm": 0.0006306835566647351, - "learning_rate": 0.0001999998769173056, - "loss": 46.0, - "step": 6542 - }, - { - "epoch": 0.5002580423189403, - "grad_norm": 0.0007143211551010609, - "learning_rate": 0.0001999998768796167, - "loss": 46.0, - "step": 6543 - }, - { - "epoch": 0.50033449930233, - "grad_norm": 0.0011272953124716878, - "learning_rate": 0.00019999987684192195, - "loss": 46.0, - "step": 6544 - }, - { - "epoch": 0.5004109562857197, - "grad_norm": 0.0015187429962679744, - "learning_rate": 0.0001999998768042215, - "loss": 46.0, - "step": 6545 - }, - { - "epoch": 0.5004874132691095, - "grad_norm": 0.0007690527127124369, - "learning_rate": 0.00019999987676651523, - "loss": 46.0, - "step": 6546 - }, - { - "epoch": 0.5005638702524992, - "grad_norm": 0.0015275331679731607, - "learning_rate": 0.0001999998767288032, - "loss": 46.0, - "step": 6547 - }, - { - "epoch": 0.5006403272358889, - "grad_norm": 0.003748373594135046, - "learning_rate": 0.00019999987669108542, - "loss": 46.0, - "step": 6548 - }, - { - "epoch": 0.5007167842192787, - "grad_norm": 0.0017606825567781925, - "learning_rate": 0.00019999987665336185, - "loss": 46.0, - "step": 6549 - }, - { - "epoch": 0.5007932412026683, - "grad_norm": 0.00184637529309839, - "learning_rate": 0.00019999987661563254, - "loss": 46.0, - "step": 6550 - }, - { - "epoch": 0.5008696981860581, - "grad_norm": 0.0025023254565894604, - "learning_rate": 0.00019999987657789742, - "loss": 46.0, - "step": 6551 - }, - { - "epoch": 0.5009461551694477, - "grad_norm": 0.0009148131939582527, - "learning_rate": 0.00019999987654015656, - "loss": 46.0, - "step": 6552 - }, - { - "epoch": 0.5010226121528375, - "grad_norm": 0.0003834763483610004, - "learning_rate": 0.00019999987650240995, - "loss": 46.0, - "step": 6553 - }, - { - "epoch": 0.5010990691362273, - "grad_norm": 0.0009837960824370384, - "learning_rate": 0.00019999987646465752, - "loss": 46.0, - "step": 6554 - }, - { - "epoch": 0.5011755261196169, - "grad_norm": 0.0011544080916792154, - "learning_rate": 0.00019999987642689934, - "loss": 46.0, - "step": 6555 - }, - { - "epoch": 0.5012519831030067, - "grad_norm": 0.0033667783718556166, - "learning_rate": 0.0001999998763891354, - "loss": 46.0, - "step": 6556 - }, - { - "epoch": 0.5013284400863964, - "grad_norm": 0.0022614910267293453, - "learning_rate": 0.00019999987635136568, - "loss": 46.0, - "step": 6557 - }, - { - "epoch": 0.5014048970697861, - "grad_norm": 0.0014761054189875722, - "learning_rate": 0.00019999987631359018, - "loss": 46.0, - "step": 6558 - }, - { - "epoch": 0.5014813540531758, - "grad_norm": 0.00044820777839049697, - "learning_rate": 0.00019999987627580893, - "loss": 46.0, - "step": 6559 - }, - { - "epoch": 0.5015578110365656, - "grad_norm": 0.0009060293668881059, - "learning_rate": 0.0001999998762380219, - "loss": 46.0, - "step": 6560 - }, - { - "epoch": 0.5016342680199553, - "grad_norm": 0.002454856876283884, - "learning_rate": 0.00019999987620022912, - "loss": 46.0, - "step": 6561 - }, - { - "epoch": 0.501710725003345, - "grad_norm": 0.0029269622173160315, - "learning_rate": 0.00019999987616243058, - "loss": 46.0, - "step": 6562 - }, - { - "epoch": 0.5017871819867347, - "grad_norm": 0.0012639738852158189, - "learning_rate": 0.00019999987612462624, - "loss": 46.0, - "step": 6563 - }, - { - "epoch": 0.5018636389701244, - "grad_norm": 0.0018493705429136753, - "learning_rate": 0.00019999987608681612, - "loss": 46.0, - "step": 6564 - }, - { - "epoch": 0.5019400959535142, - "grad_norm": 0.0008241417235694826, - "learning_rate": 0.00019999987604900026, - "loss": 46.0, - "step": 6565 - }, - { - "epoch": 0.5020165529369038, - "grad_norm": 0.0014339416520670056, - "learning_rate": 0.00019999987601117863, - "loss": 46.0, - "step": 6566 - }, - { - "epoch": 0.5020930099202936, - "grad_norm": 0.0027079368010163307, - "learning_rate": 0.0001999998759733512, - "loss": 46.0, - "step": 6567 - }, - { - "epoch": 0.5021694669036834, - "grad_norm": 0.02328592538833618, - "learning_rate": 0.00019999987593551804, - "loss": 46.0, - "step": 6568 - }, - { - "epoch": 0.502245923887073, - "grad_norm": 0.0017986586317420006, - "learning_rate": 0.0001999998758976791, - "loss": 46.0, - "step": 6569 - }, - { - "epoch": 0.5023223808704628, - "grad_norm": 0.0018942091846838593, - "learning_rate": 0.00019999987585983436, - "loss": 46.0, - "step": 6570 - }, - { - "epoch": 0.5023988378538525, - "grad_norm": 0.002628538990393281, - "learning_rate": 0.00019999987582198386, - "loss": 46.0, - "step": 6571 - }, - { - "epoch": 0.5024752948372422, - "grad_norm": 0.00043532688869163394, - "learning_rate": 0.0001999998757841276, - "loss": 46.0, - "step": 6572 - }, - { - "epoch": 0.5025517518206319, - "grad_norm": 0.001176826423034072, - "learning_rate": 0.0001999998757462656, - "loss": 46.0, - "step": 6573 - }, - { - "epoch": 0.5026282088040216, - "grad_norm": 0.0012792424531653523, - "learning_rate": 0.0001999998757083978, - "loss": 46.0, - "step": 6574 - }, - { - "epoch": 0.5027046657874114, - "grad_norm": 0.0014877606881782413, - "learning_rate": 0.00019999987567052423, - "loss": 46.0, - "step": 6575 - }, - { - "epoch": 0.5027811227708011, - "grad_norm": 0.0019944391679018736, - "learning_rate": 0.0001999998756326449, - "loss": 46.0, - "step": 6576 - }, - { - "epoch": 0.5028575797541908, - "grad_norm": 0.000718877709005028, - "learning_rate": 0.00019999987559475978, - "loss": 46.0, - "step": 6577 - }, - { - "epoch": 0.5029340367375805, - "grad_norm": 0.0034903271589428186, - "learning_rate": 0.0001999998755568689, - "loss": 46.0, - "step": 6578 - }, - { - "epoch": 0.5030104937209703, - "grad_norm": 0.0020239094737917185, - "learning_rate": 0.00019999987551897226, - "loss": 46.0, - "step": 6579 - }, - { - "epoch": 0.5030869507043599, - "grad_norm": 0.0020184360910207033, - "learning_rate": 0.00019999987548106985, - "loss": 46.0, - "step": 6580 - }, - { - "epoch": 0.5031634076877497, - "grad_norm": 0.000886824622284621, - "learning_rate": 0.00019999987544316164, - "loss": 46.0, - "step": 6581 - }, - { - "epoch": 0.5032398646711393, - "grad_norm": 0.001010234234854579, - "learning_rate": 0.00019999987540524772, - "loss": 46.0, - "step": 6582 - }, - { - "epoch": 0.5033163216545291, - "grad_norm": 0.0035800074692815542, - "learning_rate": 0.00019999987536732802, - "loss": 46.0, - "step": 6583 - }, - { - "epoch": 0.5033927786379189, - "grad_norm": 0.008436190895736217, - "learning_rate": 0.0001999998753294025, - "loss": 46.0, - "step": 6584 - }, - { - "epoch": 0.5034692356213085, - "grad_norm": 0.0009393159998580813, - "learning_rate": 0.00019999987529147124, - "loss": 46.0, - "step": 6585 - }, - { - "epoch": 0.5035456926046983, - "grad_norm": 0.0007460570195689797, - "learning_rate": 0.00019999987525353422, - "loss": 46.0, - "step": 6586 - }, - { - "epoch": 0.503622149588088, - "grad_norm": 0.000494759064167738, - "learning_rate": 0.0001999998752155914, - "loss": 46.0, - "step": 6587 - }, - { - "epoch": 0.5036986065714777, - "grad_norm": 0.0031264338176697493, - "learning_rate": 0.00019999987517764286, - "loss": 46.0, - "step": 6588 - }, - { - "epoch": 0.5037750635548675, - "grad_norm": 0.0006070621311664581, - "learning_rate": 0.0001999998751396885, - "loss": 46.0, - "step": 6589 - }, - { - "epoch": 0.5038515205382572, - "grad_norm": 0.0008838371140882373, - "learning_rate": 0.0001999998751017284, - "loss": 46.0, - "step": 6590 - }, - { - "epoch": 0.5039279775216469, - "grad_norm": 0.0017949964385479689, - "learning_rate": 0.00019999987506376252, - "loss": 46.0, - "step": 6591 - }, - { - "epoch": 0.5040044345050366, - "grad_norm": 0.002289183670654893, - "learning_rate": 0.00019999987502579086, - "loss": 46.0, - "step": 6592 - }, - { - "epoch": 0.5040808914884263, - "grad_norm": 0.0007575471536256373, - "learning_rate": 0.00019999987498781345, - "loss": 46.0, - "step": 6593 - }, - { - "epoch": 0.504157348471816, - "grad_norm": 0.0010387235088273883, - "learning_rate": 0.00019999987494983027, - "loss": 46.0, - "step": 6594 - }, - { - "epoch": 0.5042338054552058, - "grad_norm": 0.0005088611505925655, - "learning_rate": 0.00019999987491184132, - "loss": 46.0, - "step": 6595 - }, - { - "epoch": 0.5043102624385954, - "grad_norm": 0.0009044331382028759, - "learning_rate": 0.00019999987487384657, - "loss": 46.0, - "step": 6596 - }, - { - "epoch": 0.5043867194219852, - "grad_norm": 0.0005453468766063452, - "learning_rate": 0.00019999987483584607, - "loss": 46.0, - "step": 6597 - }, - { - "epoch": 0.504463176405375, - "grad_norm": 0.0006305858842097223, - "learning_rate": 0.00019999987479783982, - "loss": 46.0, - "step": 6598 - }, - { - "epoch": 0.5045396333887646, - "grad_norm": 0.0025635375641286373, - "learning_rate": 0.0001999998747598278, - "loss": 46.0, - "step": 6599 - }, - { - "epoch": 0.5046160903721544, - "grad_norm": 0.001269631553441286, - "learning_rate": 0.00019999987472181, - "loss": 46.0, - "step": 6600 - }, - { - "epoch": 0.5046925473555441, - "grad_norm": 0.005238671321421862, - "learning_rate": 0.0001999998746837864, - "loss": 46.0, - "step": 6601 - }, - { - "epoch": 0.5047690043389338, - "grad_norm": 0.001603403128683567, - "learning_rate": 0.00019999987464575707, - "loss": 46.0, - "step": 6602 - }, - { - "epoch": 0.5048454613223236, - "grad_norm": 0.0021633345168083906, - "learning_rate": 0.000199999874607722, - "loss": 46.0, - "step": 6603 - }, - { - "epoch": 0.5049219183057132, - "grad_norm": 0.001273146946914494, - "learning_rate": 0.00019999987456968107, - "loss": 46.0, - "step": 6604 - }, - { - "epoch": 0.504998375289103, - "grad_norm": 0.0006083719781599939, - "learning_rate": 0.00019999987453163444, - "loss": 46.0, - "step": 6605 - }, - { - "epoch": 0.5050748322724927, - "grad_norm": 0.0014974843943491578, - "learning_rate": 0.000199999874493582, - "loss": 46.0, - "step": 6606 - }, - { - "epoch": 0.5051512892558824, - "grad_norm": 0.0010533076710999012, - "learning_rate": 0.00019999987445552383, - "loss": 46.0, - "step": 6607 - }, - { - "epoch": 0.5052277462392721, - "grad_norm": 0.0009454118553549051, - "learning_rate": 0.00019999987441745987, - "loss": 46.0, - "step": 6608 - }, - { - "epoch": 0.5053042032226619, - "grad_norm": 0.002649895614013076, - "learning_rate": 0.00019999987437939015, - "loss": 46.0, - "step": 6609 - }, - { - "epoch": 0.5053806602060515, - "grad_norm": 0.0005589655484072864, - "learning_rate": 0.00019999987434131465, - "loss": 46.0, - "step": 6610 - }, - { - "epoch": 0.5054571171894413, - "grad_norm": 0.002897490980103612, - "learning_rate": 0.00019999987430323338, - "loss": 46.0, - "step": 6611 - }, - { - "epoch": 0.505533574172831, - "grad_norm": 0.003157546278089285, - "learning_rate": 0.00019999987426514636, - "loss": 46.0, - "step": 6612 - }, - { - "epoch": 0.5056100311562207, - "grad_norm": 0.001781234983354807, - "learning_rate": 0.00019999987422705356, - "loss": 46.0, - "step": 6613 - }, - { - "epoch": 0.5056864881396105, - "grad_norm": 0.002597308252006769, - "learning_rate": 0.00019999987418895497, - "loss": 46.0, - "step": 6614 - }, - { - "epoch": 0.5057629451230001, - "grad_norm": 0.002923625288531184, - "learning_rate": 0.00019999987415085063, - "loss": 46.0, - "step": 6615 - }, - { - "epoch": 0.5058394021063899, - "grad_norm": 0.00042659268365241587, - "learning_rate": 0.00019999987411274052, - "loss": 46.0, - "step": 6616 - }, - { - "epoch": 0.5059158590897797, - "grad_norm": 0.0015453496016561985, - "learning_rate": 0.00019999987407462463, - "loss": 46.0, - "step": 6617 - }, - { - "epoch": 0.5059923160731693, - "grad_norm": 0.007973256520926952, - "learning_rate": 0.00019999987403650297, - "loss": 46.0, - "step": 6618 - }, - { - "epoch": 0.5060687730565591, - "grad_norm": 0.000626304536126554, - "learning_rate": 0.00019999987399837554, - "loss": 46.0, - "step": 6619 - }, - { - "epoch": 0.5061452300399488, - "grad_norm": 0.0013627674197778106, - "learning_rate": 0.00019999987396024236, - "loss": 46.0, - "step": 6620 - }, - { - "epoch": 0.5062216870233385, - "grad_norm": 0.0025127700064331293, - "learning_rate": 0.0001999998739221034, - "loss": 46.0, - "step": 6621 - }, - { - "epoch": 0.5062981440067282, - "grad_norm": 0.0005612922250293195, - "learning_rate": 0.00019999987388395865, - "loss": 46.0, - "step": 6622 - }, - { - "epoch": 0.5063746009901179, - "grad_norm": 0.004672839306294918, - "learning_rate": 0.00019999987384580815, - "loss": 46.0, - "step": 6623 - }, - { - "epoch": 0.5064510579735076, - "grad_norm": 0.0015346667496487498, - "learning_rate": 0.0001999998738076519, - "loss": 46.0, - "step": 6624 - }, - { - "epoch": 0.5065275149568974, - "grad_norm": 0.006579149980098009, - "learning_rate": 0.0001999998737694899, - "loss": 46.0, - "step": 6625 - }, - { - "epoch": 0.5066039719402871, - "grad_norm": 0.000537071842700243, - "learning_rate": 0.00019999987373132204, - "loss": 46.0, - "step": 6626 - }, - { - "epoch": 0.5066804289236768, - "grad_norm": 0.0015940542798489332, - "learning_rate": 0.00019999987369314848, - "loss": 46.0, - "step": 6627 - }, - { - "epoch": 0.5067568859070666, - "grad_norm": 0.0004707959305960685, - "learning_rate": 0.00019999987365496914, - "loss": 46.0, - "step": 6628 - }, - { - "epoch": 0.5068333428904562, - "grad_norm": 0.0019824965856969357, - "learning_rate": 0.00019999987361678402, - "loss": 46.0, - "step": 6629 - }, - { - "epoch": 0.506909799873846, - "grad_norm": 0.0037892009131610394, - "learning_rate": 0.00019999987357859314, - "loss": 46.0, - "step": 6630 - }, - { - "epoch": 0.5069862568572358, - "grad_norm": 0.003043479286134243, - "learning_rate": 0.00019999987354039648, - "loss": 46.0, - "step": 6631 - }, - { - "epoch": 0.5070627138406254, - "grad_norm": 0.0020136937964707613, - "learning_rate": 0.00019999987350219405, - "loss": 46.0, - "step": 6632 - }, - { - "epoch": 0.5071391708240152, - "grad_norm": 0.0012219158234074712, - "learning_rate": 0.00019999987346398587, - "loss": 46.0, - "step": 6633 - }, - { - "epoch": 0.5072156278074048, - "grad_norm": 0.0006614429294131696, - "learning_rate": 0.0001999998734257719, - "loss": 46.0, - "step": 6634 - }, - { - "epoch": 0.5072920847907946, - "grad_norm": 0.0023321930784732103, - "learning_rate": 0.00019999987338755216, - "loss": 46.0, - "step": 6635 - }, - { - "epoch": 0.5073685417741843, - "grad_norm": 0.0014058776432648301, - "learning_rate": 0.00019999987334932666, - "loss": 46.0, - "step": 6636 - }, - { - "epoch": 0.507444998757574, - "grad_norm": 0.0011735991574823856, - "learning_rate": 0.0001999998733110954, - "loss": 46.0, - "step": 6637 - }, - { - "epoch": 0.5075214557409637, - "grad_norm": 0.006963454186916351, - "learning_rate": 0.00019999987327285834, - "loss": 46.0, - "step": 6638 - }, - { - "epoch": 0.5075979127243535, - "grad_norm": 0.005320186261087656, - "learning_rate": 0.00019999987323461552, - "loss": 46.0, - "step": 6639 - }, - { - "epoch": 0.5076743697077432, - "grad_norm": 0.0020673151593655348, - "learning_rate": 0.00019999987319636696, - "loss": 46.0, - "step": 6640 - }, - { - "epoch": 0.5077508266911329, - "grad_norm": 0.00519458157941699, - "learning_rate": 0.00019999987315811262, - "loss": 46.0, - "step": 6641 - }, - { - "epoch": 0.5078272836745226, - "grad_norm": 0.001650924445129931, - "learning_rate": 0.00019999987311985248, - "loss": 46.0, - "step": 6642 - }, - { - "epoch": 0.5079037406579123, - "grad_norm": 0.0034306340385228395, - "learning_rate": 0.0001999998730815866, - "loss": 46.0, - "step": 6643 - }, - { - "epoch": 0.5079801976413021, - "grad_norm": 0.006050763186067343, - "learning_rate": 0.00019999987304331494, - "loss": 46.0, - "step": 6644 - }, - { - "epoch": 0.5080566546246917, - "grad_norm": 0.0003902481694240123, - "learning_rate": 0.0001999998730050375, - "loss": 46.0, - "step": 6645 - }, - { - "epoch": 0.5081331116080815, - "grad_norm": 0.0015269792638719082, - "learning_rate": 0.0001999998729667543, - "loss": 46.0, - "step": 6646 - }, - { - "epoch": 0.5082095685914713, - "grad_norm": 0.0017523665446788073, - "learning_rate": 0.00019999987292846535, - "loss": 46.0, - "step": 6647 - }, - { - "epoch": 0.5082860255748609, - "grad_norm": 0.0006248591234907508, - "learning_rate": 0.00019999987289017062, - "loss": 46.0, - "step": 6648 - }, - { - "epoch": 0.5083624825582507, - "grad_norm": 0.0016106307739391923, - "learning_rate": 0.00019999987285187012, - "loss": 46.0, - "step": 6649 - }, - { - "epoch": 0.5084389395416404, - "grad_norm": 0.003220010083168745, - "learning_rate": 0.00019999987281356382, - "loss": 46.0, - "step": 6650 - }, - { - "epoch": 0.5085153965250301, - "grad_norm": 0.0288386307656765, - "learning_rate": 0.00019999987277525178, - "loss": 46.0, - "step": 6651 - }, - { - "epoch": 0.5085918535084198, - "grad_norm": 0.000946781481616199, - "learning_rate": 0.00019999987273693396, - "loss": 46.0, - "step": 6652 - }, - { - "epoch": 0.5086683104918095, - "grad_norm": 0.001764478045515716, - "learning_rate": 0.0001999998726986104, - "loss": 46.0, - "step": 6653 - }, - { - "epoch": 0.5087447674751993, - "grad_norm": 0.0014122177381068468, - "learning_rate": 0.00019999987266028105, - "loss": 46.0, - "step": 6654 - }, - { - "epoch": 0.508821224458589, - "grad_norm": 0.0006862186128273606, - "learning_rate": 0.00019999987262194591, - "loss": 46.0, - "step": 6655 - }, - { - "epoch": 0.5088976814419787, - "grad_norm": 0.0030939560383558273, - "learning_rate": 0.00019999987258360503, - "loss": 46.0, - "step": 6656 - }, - { - "epoch": 0.5089741384253684, - "grad_norm": 0.005594997201114893, - "learning_rate": 0.00019999987254525837, - "loss": 46.0, - "step": 6657 - }, - { - "epoch": 0.5090505954087582, - "grad_norm": 0.0016314440872520208, - "learning_rate": 0.00019999987250690594, - "loss": 46.0, - "step": 6658 - }, - { - "epoch": 0.5091270523921478, - "grad_norm": 0.0028863984625786543, - "learning_rate": 0.00019999987246854773, - "loss": 46.0, - "step": 6659 - }, - { - "epoch": 0.5092035093755376, - "grad_norm": 0.0007835583528503776, - "learning_rate": 0.00019999987243018375, - "loss": 46.0, - "step": 6660 - }, - { - "epoch": 0.5092799663589274, - "grad_norm": 0.002227367600426078, - "learning_rate": 0.00019999987239181403, - "loss": 46.0, - "step": 6661 - }, - { - "epoch": 0.509356423342317, - "grad_norm": 0.00223711971193552, - "learning_rate": 0.0001999998723534385, - "loss": 46.0, - "step": 6662 - }, - { - "epoch": 0.5094328803257068, - "grad_norm": 0.007872717455029488, - "learning_rate": 0.00019999987231505726, - "loss": 46.0, - "step": 6663 - }, - { - "epoch": 0.5095093373090964, - "grad_norm": 0.0018282405799254775, - "learning_rate": 0.00019999987227667019, - "loss": 46.0, - "step": 6664 - }, - { - "epoch": 0.5095857942924862, - "grad_norm": 0.014191199094057083, - "learning_rate": 0.0001999998722382774, - "loss": 46.0, - "step": 6665 - }, - { - "epoch": 0.5096622512758759, - "grad_norm": 0.001074605737812817, - "learning_rate": 0.0001999998721998788, - "loss": 46.0, - "step": 6666 - }, - { - "epoch": 0.5097387082592656, - "grad_norm": 0.0009614795562811196, - "learning_rate": 0.0001999998721614744, - "loss": 46.0, - "step": 6667 - }, - { - "epoch": 0.5098151652426554, - "grad_norm": 0.0006571012199856341, - "learning_rate": 0.0001999998721230643, - "loss": 46.0, - "step": 6668 - }, - { - "epoch": 0.5098916222260451, - "grad_norm": 0.004921938758343458, - "learning_rate": 0.0001999998720846484, - "loss": 46.0, - "step": 6669 - }, - { - "epoch": 0.5099680792094348, - "grad_norm": 0.0006776495720259845, - "learning_rate": 0.00019999987204622675, - "loss": 46.0, - "step": 6670 - }, - { - "epoch": 0.5100445361928245, - "grad_norm": 0.0012581656919792295, - "learning_rate": 0.00019999987200779932, - "loss": 46.0, - "step": 6671 - }, - { - "epoch": 0.5101209931762143, - "grad_norm": 0.004164766985923052, - "learning_rate": 0.00019999987196936612, - "loss": 46.0, - "step": 6672 - }, - { - "epoch": 0.5101974501596039, - "grad_norm": 0.0020659894216805696, - "learning_rate": 0.00019999987193092714, - "loss": 46.0, - "step": 6673 - }, - { - "epoch": 0.5102739071429937, - "grad_norm": 0.0005768091650679708, - "learning_rate": 0.0001999998718924824, - "loss": 46.0, - "step": 6674 - }, - { - "epoch": 0.5103503641263833, - "grad_norm": 0.003232197370380163, - "learning_rate": 0.00019999987185403192, - "loss": 46.0, - "step": 6675 - }, - { - "epoch": 0.5104268211097731, - "grad_norm": 0.0008823472890071571, - "learning_rate": 0.0001999998718155756, - "loss": 46.0, - "step": 6676 - }, - { - "epoch": 0.5105032780931629, - "grad_norm": 0.0012526483042165637, - "learning_rate": 0.00019999987177711357, - "loss": 46.0, - "step": 6677 - }, - { - "epoch": 0.5105797350765525, - "grad_norm": 0.003952939994633198, - "learning_rate": 0.00019999987173864575, - "loss": 46.0, - "step": 6678 - }, - { - "epoch": 0.5106561920599423, - "grad_norm": 0.0011536605888977647, - "learning_rate": 0.00019999987170017216, - "loss": 46.0, - "step": 6679 - }, - { - "epoch": 0.510732649043332, - "grad_norm": 0.0005209247465245426, - "learning_rate": 0.0001999998716616928, - "loss": 46.0, - "step": 6680 - }, - { - "epoch": 0.5108091060267217, - "grad_norm": 0.000934875919483602, - "learning_rate": 0.00019999987162320766, - "loss": 46.0, - "step": 6681 - }, - { - "epoch": 0.5108855630101115, - "grad_norm": 0.0007178029045462608, - "learning_rate": 0.00019999987158471675, - "loss": 46.0, - "step": 6682 - }, - { - "epoch": 0.5109620199935011, - "grad_norm": 0.004913387820124626, - "learning_rate": 0.00019999987154622012, - "loss": 46.0, - "step": 6683 - }, - { - "epoch": 0.5110384769768909, - "grad_norm": 0.0005487127928063273, - "learning_rate": 0.00019999987150771768, - "loss": 46.0, - "step": 6684 - }, - { - "epoch": 0.5111149339602806, - "grad_norm": 0.00388889922760427, - "learning_rate": 0.00019999987146920945, - "loss": 46.0, - "step": 6685 - }, - { - "epoch": 0.5111913909436703, - "grad_norm": 0.0010851608822122216, - "learning_rate": 0.0001999998714306955, - "loss": 46.0, - "step": 6686 - }, - { - "epoch": 0.51126784792706, - "grad_norm": 0.0006419922574423254, - "learning_rate": 0.00019999987139217575, - "loss": 46.0, - "step": 6687 - }, - { - "epoch": 0.5113443049104498, - "grad_norm": 0.0006653190357610583, - "learning_rate": 0.00019999987135365022, - "loss": 46.0, - "step": 6688 - }, - { - "epoch": 0.5114207618938394, - "grad_norm": 0.0041973283514380455, - "learning_rate": 0.00019999987131511895, - "loss": 46.0, - "step": 6689 - }, - { - "epoch": 0.5114972188772292, - "grad_norm": 0.0015674334717914462, - "learning_rate": 0.0001999998712765819, - "loss": 46.0, - "step": 6690 - }, - { - "epoch": 0.511573675860619, - "grad_norm": 0.0006393588846549392, - "learning_rate": 0.00019999987123803906, - "loss": 46.0, - "step": 6691 - }, - { - "epoch": 0.5116501328440086, - "grad_norm": 0.0014182829763740301, - "learning_rate": 0.00019999987119949047, - "loss": 46.0, - "step": 6692 - }, - { - "epoch": 0.5117265898273984, - "grad_norm": 0.0005151458899490535, - "learning_rate": 0.00019999987116093613, - "loss": 46.0, - "step": 6693 - }, - { - "epoch": 0.511803046810788, - "grad_norm": 0.0007883227663114667, - "learning_rate": 0.000199999871122376, - "loss": 46.0, - "step": 6694 - }, - { - "epoch": 0.5118795037941778, - "grad_norm": 0.0020876929629594088, - "learning_rate": 0.0001999998710838101, - "loss": 46.0, - "step": 6695 - }, - { - "epoch": 0.5119559607775676, - "grad_norm": 0.0011576981050893664, - "learning_rate": 0.00019999987104523843, - "loss": 46.0, - "step": 6696 - }, - { - "epoch": 0.5120324177609572, - "grad_norm": 0.006071015726774931, - "learning_rate": 0.00019999987100666097, - "loss": 46.0, - "step": 6697 - }, - { - "epoch": 0.512108874744347, - "grad_norm": 0.0023508064914494753, - "learning_rate": 0.00019999987096807776, - "loss": 46.0, - "step": 6698 - }, - { - "epoch": 0.5121853317277367, - "grad_norm": 0.011246114037930965, - "learning_rate": 0.00019999987092948879, - "loss": 46.0, - "step": 6699 - }, - { - "epoch": 0.5122617887111264, - "grad_norm": 0.0011908836895599961, - "learning_rate": 0.00019999987089089406, - "loss": 46.0, - "step": 6700 - }, - { - "epoch": 0.5123382456945161, - "grad_norm": 0.0008464244892820716, - "learning_rate": 0.00019999987085229354, - "loss": 46.0, - "step": 6701 - }, - { - "epoch": 0.5124147026779059, - "grad_norm": 0.0011266465298831463, - "learning_rate": 0.00019999987081368724, - "loss": 46.0, - "step": 6702 - }, - { - "epoch": 0.5124911596612955, - "grad_norm": 0.0008307049283757806, - "learning_rate": 0.0001999998707750752, - "loss": 46.0, - "step": 6703 - }, - { - "epoch": 0.5125676166446853, - "grad_norm": 0.0011249450035393238, - "learning_rate": 0.00019999987073645738, - "loss": 46.0, - "step": 6704 - }, - { - "epoch": 0.512644073628075, - "grad_norm": 0.0010429281974211335, - "learning_rate": 0.00019999987069783379, - "loss": 46.0, - "step": 6705 - }, - { - "epoch": 0.5127205306114647, - "grad_norm": 0.0006533378036692739, - "learning_rate": 0.00019999987065920442, - "loss": 46.0, - "step": 6706 - }, - { - "epoch": 0.5127969875948545, - "grad_norm": 0.0013591040624305606, - "learning_rate": 0.00019999987062056928, - "loss": 46.0, - "step": 6707 - }, - { - "epoch": 0.5128734445782441, - "grad_norm": 0.002953270450234413, - "learning_rate": 0.00019999987058192837, - "loss": 46.0, - "step": 6708 - }, - { - "epoch": 0.5129499015616339, - "grad_norm": 0.004673848859965801, - "learning_rate": 0.00019999987054328172, - "loss": 46.0, - "step": 6709 - }, - { - "epoch": 0.5130263585450237, - "grad_norm": 0.001719867461360991, - "learning_rate": 0.00019999987050462928, - "loss": 46.0, - "step": 6710 - }, - { - "epoch": 0.5131028155284133, - "grad_norm": 0.00045337225310504436, - "learning_rate": 0.00019999987046597105, - "loss": 46.0, - "step": 6711 - }, - { - "epoch": 0.5131792725118031, - "grad_norm": 0.0038524160627275705, - "learning_rate": 0.00019999987042730708, - "loss": 46.0, - "step": 6712 - }, - { - "epoch": 0.5132557294951927, - "grad_norm": 0.001208741799928248, - "learning_rate": 0.00019999987038863732, - "loss": 46.0, - "step": 6713 - }, - { - "epoch": 0.5133321864785825, - "grad_norm": 0.005358150694519281, - "learning_rate": 0.0001999998703499618, - "loss": 46.0, - "step": 6714 - }, - { - "epoch": 0.5134086434619722, - "grad_norm": 0.0007636166992597282, - "learning_rate": 0.0001999998703112805, - "loss": 46.0, - "step": 6715 - }, - { - "epoch": 0.5134851004453619, - "grad_norm": 0.0011872861068695784, - "learning_rate": 0.00019999987027259346, - "loss": 46.0, - "step": 6716 - }, - { - "epoch": 0.5135615574287516, - "grad_norm": 0.0014966072048991919, - "learning_rate": 0.00019999987023390064, - "loss": 46.0, - "step": 6717 - }, - { - "epoch": 0.5136380144121414, - "grad_norm": 0.0007914376328699291, - "learning_rate": 0.00019999987019520202, - "loss": 46.0, - "step": 6718 - }, - { - "epoch": 0.5137144713955311, - "grad_norm": 0.003805845510214567, - "learning_rate": 0.00019999987015649766, - "loss": 46.0, - "step": 6719 - }, - { - "epoch": 0.5137909283789208, - "grad_norm": 0.0015213243896141648, - "learning_rate": 0.00019999987011778752, - "loss": 46.0, - "step": 6720 - }, - { - "epoch": 0.5138673853623106, - "grad_norm": 0.0033187270164489746, - "learning_rate": 0.00019999987007907164, - "loss": 46.0, - "step": 6721 - }, - { - "epoch": 0.5139438423457002, - "grad_norm": 0.000449590414064005, - "learning_rate": 0.00019999987004034995, - "loss": 46.0, - "step": 6722 - }, - { - "epoch": 0.51402029932909, - "grad_norm": 0.0016383439069613814, - "learning_rate": 0.0001999998700016225, - "loss": 46.0, - "step": 6723 - }, - { - "epoch": 0.5140967563124796, - "grad_norm": 0.0012532379478216171, - "learning_rate": 0.0001999998699628893, - "loss": 46.0, - "step": 6724 - }, - { - "epoch": 0.5141732132958694, - "grad_norm": 0.0015614822041243315, - "learning_rate": 0.00019999986992415031, - "loss": 46.0, - "step": 6725 - }, - { - "epoch": 0.5142496702792592, - "grad_norm": 0.003305091056972742, - "learning_rate": 0.00019999986988540556, - "loss": 46.0, - "step": 6726 - }, - { - "epoch": 0.5143261272626488, - "grad_norm": 0.0012602624483406544, - "learning_rate": 0.000199999869846655, - "loss": 46.0, - "step": 6727 - }, - { - "epoch": 0.5144025842460386, - "grad_norm": 0.0013412623666226864, - "learning_rate": 0.00019999986980789874, - "loss": 46.0, - "step": 6728 - }, - { - "epoch": 0.5144790412294283, - "grad_norm": 0.0009423950687050819, - "learning_rate": 0.00019999986976913667, - "loss": 46.0, - "step": 6729 - }, - { - "epoch": 0.514555498212818, - "grad_norm": 0.002963624894618988, - "learning_rate": 0.00019999986973036883, - "loss": 46.0, - "step": 6730 - }, - { - "epoch": 0.5146319551962077, - "grad_norm": 0.0029351431876420975, - "learning_rate": 0.00019999986969159526, - "loss": 46.0, - "step": 6731 - }, - { - "epoch": 0.5147084121795975, - "grad_norm": 0.0017759987385943532, - "learning_rate": 0.0001999998696528159, - "loss": 46.0, - "step": 6732 - }, - { - "epoch": 0.5147848691629872, - "grad_norm": 0.0011451707687228918, - "learning_rate": 0.00019999986961403074, - "loss": 46.0, - "step": 6733 - }, - { - "epoch": 0.5148613261463769, - "grad_norm": 0.0007711068028584123, - "learning_rate": 0.00019999986957523982, - "loss": 46.0, - "step": 6734 - }, - { - "epoch": 0.5149377831297666, - "grad_norm": 0.001106961746700108, - "learning_rate": 0.00019999986953644317, - "loss": 46.0, - "step": 6735 - }, - { - "epoch": 0.5150142401131563, - "grad_norm": 0.0007789528463035822, - "learning_rate": 0.0001999998694976407, - "loss": 46.0, - "step": 6736 - }, - { - "epoch": 0.5150906970965461, - "grad_norm": 0.0007508696871809661, - "learning_rate": 0.00019999986945883248, - "loss": 46.0, - "step": 6737 - }, - { - "epoch": 0.5151671540799357, - "grad_norm": 0.0011181732406839728, - "learning_rate": 0.0001999998694200185, - "loss": 46.0, - "step": 6738 - }, - { - "epoch": 0.5152436110633255, - "grad_norm": 0.007998952642083168, - "learning_rate": 0.00019999986938119878, - "loss": 46.0, - "step": 6739 - }, - { - "epoch": 0.5153200680467153, - "grad_norm": 0.0065696947276592255, - "learning_rate": 0.00019999986934237323, - "loss": 46.0, - "step": 6740 - }, - { - "epoch": 0.5153965250301049, - "grad_norm": 0.007332746405154467, - "learning_rate": 0.00019999986930354193, - "loss": 46.0, - "step": 6741 - }, - { - "epoch": 0.5154729820134947, - "grad_norm": 0.0007144419359974563, - "learning_rate": 0.00019999986926470486, - "loss": 46.0, - "step": 6742 - }, - { - "epoch": 0.5155494389968843, - "grad_norm": 0.0007409803802147508, - "learning_rate": 0.00019999986922586204, - "loss": 46.0, - "step": 6743 - }, - { - "epoch": 0.5156258959802741, - "grad_norm": 0.0044411346316337585, - "learning_rate": 0.00019999986918701343, - "loss": 46.0, - "step": 6744 - }, - { - "epoch": 0.5157023529636638, - "grad_norm": 0.0035553663037717342, - "learning_rate": 0.00019999986914815904, - "loss": 46.0, - "step": 6745 - }, - { - "epoch": 0.5157788099470535, - "grad_norm": 0.0007410652469843626, - "learning_rate": 0.00019999986910929893, - "loss": 46.0, - "step": 6746 - }, - { - "epoch": 0.5158552669304433, - "grad_norm": 0.0015842923894524574, - "learning_rate": 0.000199999869070433, - "loss": 46.0, - "step": 6747 - }, - { - "epoch": 0.515931723913833, - "grad_norm": 0.0009804507717490196, - "learning_rate": 0.00019999986903156133, - "loss": 46.0, - "step": 6748 - }, - { - "epoch": 0.5160081808972227, - "grad_norm": 0.0160980224609375, - "learning_rate": 0.00019999986899268388, - "loss": 46.0, - "step": 6749 - }, - { - "epoch": 0.5160846378806124, - "grad_norm": 0.0039010727778077126, - "learning_rate": 0.00019999986895380065, - "loss": 46.0, - "step": 6750 - }, - { - "epoch": 0.5161610948640022, - "grad_norm": 0.0031121016945689917, - "learning_rate": 0.00019999986891491167, - "loss": 46.0, - "step": 6751 - }, - { - "epoch": 0.5162375518473918, - "grad_norm": 0.0006660231156274676, - "learning_rate": 0.00019999986887601692, - "loss": 46.0, - "step": 6752 - }, - { - "epoch": 0.5163140088307816, - "grad_norm": 0.0027548077050596476, - "learning_rate": 0.0001999998688371164, - "loss": 46.0, - "step": 6753 - }, - { - "epoch": 0.5163904658141713, - "grad_norm": 0.0019324087770655751, - "learning_rate": 0.0001999998687982101, - "loss": 46.0, - "step": 6754 - }, - { - "epoch": 0.516466922797561, - "grad_norm": 0.004523304756730795, - "learning_rate": 0.000199999868759298, - "loss": 46.0, - "step": 6755 - }, - { - "epoch": 0.5165433797809508, - "grad_norm": 0.0009916049893945456, - "learning_rate": 0.00019999986872038016, - "loss": 46.0, - "step": 6756 - }, - { - "epoch": 0.5166198367643404, - "grad_norm": 0.0011290484108030796, - "learning_rate": 0.0001999998686814566, - "loss": 46.0, - "step": 6757 - }, - { - "epoch": 0.5166962937477302, - "grad_norm": 0.0005818207282572985, - "learning_rate": 0.0001999998686425272, - "loss": 46.0, - "step": 6758 - }, - { - "epoch": 0.51677275073112, - "grad_norm": 0.00037642737152054906, - "learning_rate": 0.00019999986860359207, - "loss": 46.0, - "step": 6759 - }, - { - "epoch": 0.5168492077145096, - "grad_norm": 0.0014851245796307921, - "learning_rate": 0.00019999986856465113, - "loss": 46.0, - "step": 6760 - }, - { - "epoch": 0.5169256646978994, - "grad_norm": 0.001152483862824738, - "learning_rate": 0.00019999986852570447, - "loss": 46.0, - "step": 6761 - }, - { - "epoch": 0.5170021216812891, - "grad_norm": 0.0012562167830765247, - "learning_rate": 0.00019999986848675202, - "loss": 46.0, - "step": 6762 - }, - { - "epoch": 0.5170785786646788, - "grad_norm": 0.0007644011056981981, - "learning_rate": 0.0001999998684477938, - "loss": 46.0, - "step": 6763 - }, - { - "epoch": 0.5171550356480685, - "grad_norm": 0.0011796654434874654, - "learning_rate": 0.00019999986840882978, - "loss": 46.0, - "step": 6764 - }, - { - "epoch": 0.5172314926314582, - "grad_norm": 0.0017730558756738901, - "learning_rate": 0.00019999986836986003, - "loss": 46.0, - "step": 6765 - }, - { - "epoch": 0.5173079496148479, - "grad_norm": 0.001144835026934743, - "learning_rate": 0.00019999986833088448, - "loss": 46.0, - "step": 6766 - }, - { - "epoch": 0.5173844065982377, - "grad_norm": 0.007220027968287468, - "learning_rate": 0.00019999986829190322, - "loss": 46.0, - "step": 6767 - }, - { - "epoch": 0.5174608635816274, - "grad_norm": 0.0005154610844329, - "learning_rate": 0.00019999986825291615, - "loss": 46.0, - "step": 6768 - }, - { - "epoch": 0.5175373205650171, - "grad_norm": 0.001148174749687314, - "learning_rate": 0.0001999998682139233, - "loss": 46.0, - "step": 6769 - }, - { - "epoch": 0.5176137775484069, - "grad_norm": 0.0029547400772571564, - "learning_rate": 0.00019999986817492469, - "loss": 46.0, - "step": 6770 - }, - { - "epoch": 0.5176902345317965, - "grad_norm": 0.0012961478205397725, - "learning_rate": 0.00019999986813592032, - "loss": 46.0, - "step": 6771 - }, - { - "epoch": 0.5177666915151863, - "grad_norm": 0.0010385812493041158, - "learning_rate": 0.00019999986809691016, - "loss": 46.0, - "step": 6772 - }, - { - "epoch": 0.5178431484985759, - "grad_norm": 0.002846222138032317, - "learning_rate": 0.00019999986805789422, - "loss": 46.0, - "step": 6773 - }, - { - "epoch": 0.5179196054819657, - "grad_norm": 0.0010120623046532273, - "learning_rate": 0.00019999986801887257, - "loss": 46.0, - "step": 6774 - }, - { - "epoch": 0.5179960624653555, - "grad_norm": 0.0028149408753961325, - "learning_rate": 0.0001999998679798451, - "loss": 46.0, - "step": 6775 - }, - { - "epoch": 0.5180725194487451, - "grad_norm": 0.0007786746136844158, - "learning_rate": 0.00019999986794081188, - "loss": 46.0, - "step": 6776 - }, - { - "epoch": 0.5181489764321349, - "grad_norm": 0.0013361842138692737, - "learning_rate": 0.0001999998679017729, - "loss": 46.0, - "step": 6777 - }, - { - "epoch": 0.5182254334155246, - "grad_norm": 0.0024689168203622103, - "learning_rate": 0.00019999986786272813, - "loss": 46.0, - "step": 6778 - }, - { - "epoch": 0.5183018903989143, - "grad_norm": 0.0007742635207250714, - "learning_rate": 0.00019999986782367758, - "loss": 46.0, - "step": 6779 - }, - { - "epoch": 0.518378347382304, - "grad_norm": 0.001180786988697946, - "learning_rate": 0.00019999986778462128, - "loss": 46.0, - "step": 6780 - }, - { - "epoch": 0.5184548043656938, - "grad_norm": 0.0010374423582106829, - "learning_rate": 0.0001999998677455592, - "loss": 46.0, - "step": 6781 - }, - { - "epoch": 0.5185312613490834, - "grad_norm": 0.0008883423288352787, - "learning_rate": 0.00019999986770649137, - "loss": 46.0, - "step": 6782 - }, - { - "epoch": 0.5186077183324732, - "grad_norm": 0.0010320040164515376, - "learning_rate": 0.00019999986766741775, - "loss": 46.0, - "step": 6783 - }, - { - "epoch": 0.5186841753158629, - "grad_norm": 0.0012594616273418069, - "learning_rate": 0.0001999998676283384, - "loss": 46.0, - "step": 6784 - }, - { - "epoch": 0.5187606322992526, - "grad_norm": 0.002100956393405795, - "learning_rate": 0.00019999986758925325, - "loss": 46.0, - "step": 6785 - }, - { - "epoch": 0.5188370892826424, - "grad_norm": 0.0009413774823769927, - "learning_rate": 0.00019999986755016232, - "loss": 46.0, - "step": 6786 - }, - { - "epoch": 0.518913546266032, - "grad_norm": 0.0008941596024669707, - "learning_rate": 0.0001999998675110656, - "loss": 46.0, - "step": 6787 - }, - { - "epoch": 0.5189900032494218, - "grad_norm": 0.0012038570130243897, - "learning_rate": 0.00019999986747196315, - "loss": 46.0, - "step": 6788 - }, - { - "epoch": 0.5190664602328116, - "grad_norm": 0.0013174674240872264, - "learning_rate": 0.00019999986743285492, - "loss": 46.0, - "step": 6789 - }, - { - "epoch": 0.5191429172162012, - "grad_norm": 0.0008081771666184068, - "learning_rate": 0.00019999986739374092, - "loss": 46.0, - "step": 6790 - }, - { - "epoch": 0.519219374199591, - "grad_norm": 0.01545399334281683, - "learning_rate": 0.00019999986735462117, - "loss": 46.0, - "step": 6791 - }, - { - "epoch": 0.5192958311829807, - "grad_norm": 0.0007040402269922197, - "learning_rate": 0.00019999986731549565, - "loss": 46.0, - "step": 6792 - }, - { - "epoch": 0.5193722881663704, - "grad_norm": 0.0011792993173003197, - "learning_rate": 0.00019999986727636433, - "loss": 46.0, - "step": 6793 - }, - { - "epoch": 0.5194487451497601, - "grad_norm": 0.0023760884068906307, - "learning_rate": 0.00019999986723722726, - "loss": 46.0, - "step": 6794 - }, - { - "epoch": 0.5195252021331498, - "grad_norm": 0.000887617701664567, - "learning_rate": 0.0001999998671980844, - "loss": 46.0, - "step": 6795 - }, - { - "epoch": 0.5196016591165395, - "grad_norm": 0.0006348420865833759, - "learning_rate": 0.0001999998671589358, - "loss": 46.0, - "step": 6796 - }, - { - "epoch": 0.5196781160999293, - "grad_norm": 0.0008443936822004616, - "learning_rate": 0.0001999998671197814, - "loss": 46.0, - "step": 6797 - }, - { - "epoch": 0.519754573083319, - "grad_norm": 0.0001689426862867549, - "learning_rate": 0.00019999986708062127, - "loss": 46.0, - "step": 6798 - }, - { - "epoch": 0.5198310300667087, - "grad_norm": 0.0036721364594995975, - "learning_rate": 0.00019999986704145534, - "loss": 46.0, - "step": 6799 - }, - { - "epoch": 0.5199074870500985, - "grad_norm": 0.0016476784367114305, - "learning_rate": 0.00019999986700228365, - "loss": 46.0, - "step": 6800 - }, - { - "epoch": 0.5199839440334881, - "grad_norm": 0.004639696795493364, - "learning_rate": 0.0001999998669631062, - "loss": 46.0, - "step": 6801 - }, - { - "epoch": 0.5200604010168779, - "grad_norm": 0.0019115731120109558, - "learning_rate": 0.00019999986692392297, - "loss": 46.0, - "step": 6802 - }, - { - "epoch": 0.5201368580002677, - "grad_norm": 0.0009177215397357941, - "learning_rate": 0.00019999986688473397, - "loss": 46.0, - "step": 6803 - }, - { - "epoch": 0.5202133149836573, - "grad_norm": 0.000698661373462528, - "learning_rate": 0.00019999986684553916, - "loss": 46.0, - "step": 6804 - }, - { - "epoch": 0.5202897719670471, - "grad_norm": 0.0009078040602616966, - "learning_rate": 0.00019999986680633864, - "loss": 46.0, - "step": 6805 - }, - { - "epoch": 0.5203662289504367, - "grad_norm": 0.0035628413315862417, - "learning_rate": 0.00019999986676713235, - "loss": 46.0, - "step": 6806 - }, - { - "epoch": 0.5204426859338265, - "grad_norm": 0.002117432653903961, - "learning_rate": 0.00019999986672792025, - "loss": 46.0, - "step": 6807 - }, - { - "epoch": 0.5205191429172162, - "grad_norm": 0.000610047543887049, - "learning_rate": 0.0001999998666887024, - "loss": 46.0, - "step": 6808 - }, - { - "epoch": 0.5205955999006059, - "grad_norm": 0.0017189646605402231, - "learning_rate": 0.00019999986664947882, - "loss": 46.0, - "step": 6809 - }, - { - "epoch": 0.5206720568839956, - "grad_norm": 0.0017658865544945002, - "learning_rate": 0.0001999998666102494, - "loss": 46.0, - "step": 6810 - }, - { - "epoch": 0.5207485138673854, - "grad_norm": 0.006992855574935675, - "learning_rate": 0.00019999986657101424, - "loss": 46.0, - "step": 6811 - }, - { - "epoch": 0.5208249708507751, - "grad_norm": 0.0018627009121701121, - "learning_rate": 0.0001999998665317733, - "loss": 46.0, - "step": 6812 - }, - { - "epoch": 0.5209014278341648, - "grad_norm": 0.0045731826685369015, - "learning_rate": 0.00019999986649252662, - "loss": 46.0, - "step": 6813 - }, - { - "epoch": 0.5209778848175545, - "grad_norm": 0.0011791486758738756, - "learning_rate": 0.00019999986645327417, - "loss": 46.0, - "step": 6814 - }, - { - "epoch": 0.5210543418009442, - "grad_norm": 0.004062290769070387, - "learning_rate": 0.00019999986641401594, - "loss": 46.0, - "step": 6815 - }, - { - "epoch": 0.521130798784334, - "grad_norm": 0.0006228104466572404, - "learning_rate": 0.00019999986637475194, - "loss": 46.0, - "step": 6816 - }, - { - "epoch": 0.5212072557677236, - "grad_norm": 0.0008903697016648948, - "learning_rate": 0.00019999986633548216, - "loss": 46.0, - "step": 6817 - }, - { - "epoch": 0.5212837127511134, - "grad_norm": 0.011292941868305206, - "learning_rate": 0.0001999998662962066, - "loss": 46.0, - "step": 6818 - }, - { - "epoch": 0.5213601697345032, - "grad_norm": 0.00036900979466736317, - "learning_rate": 0.00019999986625692532, - "loss": 46.0, - "step": 6819 - }, - { - "epoch": 0.5214366267178928, - "grad_norm": 0.0006371656199917197, - "learning_rate": 0.00019999986621763825, - "loss": 46.0, - "step": 6820 - }, - { - "epoch": 0.5215130837012826, - "grad_norm": 0.0010578773217275739, - "learning_rate": 0.00019999986617834535, - "loss": 46.0, - "step": 6821 - }, - { - "epoch": 0.5215895406846723, - "grad_norm": 0.0007229943294078112, - "learning_rate": 0.00019999986613904676, - "loss": 46.0, - "step": 6822 - }, - { - "epoch": 0.521665997668062, - "grad_norm": 0.0011240994790568948, - "learning_rate": 0.00019999986609974237, - "loss": 46.0, - "step": 6823 - }, - { - "epoch": 0.5217424546514517, - "grad_norm": 0.0005957692046649754, - "learning_rate": 0.0001999998660604322, - "loss": 46.0, - "step": 6824 - }, - { - "epoch": 0.5218189116348414, - "grad_norm": 0.0009057045099325478, - "learning_rate": 0.00019999986602111628, - "loss": 46.0, - "step": 6825 - }, - { - "epoch": 0.5218953686182312, - "grad_norm": 0.0009465592447668314, - "learning_rate": 0.0001999998659817946, - "loss": 46.0, - "step": 6826 - }, - { - "epoch": 0.5219718256016209, - "grad_norm": 0.0051507200114429, - "learning_rate": 0.0001999998659424671, - "loss": 46.0, - "step": 6827 - }, - { - "epoch": 0.5220482825850106, - "grad_norm": 0.00029329105746001005, - "learning_rate": 0.00019999986590313388, - "loss": 46.0, - "step": 6828 - }, - { - "epoch": 0.5221247395684003, - "grad_norm": 0.001336118089966476, - "learning_rate": 0.00019999986586379488, - "loss": 46.0, - "step": 6829 - }, - { - "epoch": 0.5222011965517901, - "grad_norm": 0.0012513954425230622, - "learning_rate": 0.0001999998658244501, - "loss": 46.0, - "step": 6830 - }, - { - "epoch": 0.5222776535351797, - "grad_norm": 0.0006752603221684694, - "learning_rate": 0.00019999986578509956, - "loss": 46.0, - "step": 6831 - }, - { - "epoch": 0.5223541105185695, - "grad_norm": 0.0008080375846475363, - "learning_rate": 0.00019999986574574324, - "loss": 46.0, - "step": 6832 - }, - { - "epoch": 0.5224305675019593, - "grad_norm": 0.0005798717611469328, - "learning_rate": 0.00019999986570638117, - "loss": 46.0, - "step": 6833 - }, - { - "epoch": 0.5225070244853489, - "grad_norm": 0.0009395437664352357, - "learning_rate": 0.00019999986566701333, - "loss": 46.0, - "step": 6834 - }, - { - "epoch": 0.5225834814687387, - "grad_norm": 0.002736675785854459, - "learning_rate": 0.00019999986562763968, - "loss": 46.0, - "step": 6835 - }, - { - "epoch": 0.5226599384521283, - "grad_norm": 0.0017854857724159956, - "learning_rate": 0.0001999998655882603, - "loss": 46.0, - "step": 6836 - }, - { - "epoch": 0.5227363954355181, - "grad_norm": 0.0009235875913873315, - "learning_rate": 0.00019999986554887513, - "loss": 46.0, - "step": 6837 - }, - { - "epoch": 0.5228128524189078, - "grad_norm": 0.0009338136296719313, - "learning_rate": 0.00019999986550948423, - "loss": 46.0, - "step": 6838 - }, - { - "epoch": 0.5228893094022975, - "grad_norm": 0.0005506203160621226, - "learning_rate": 0.00019999986547008752, - "loss": 46.0, - "step": 6839 - }, - { - "epoch": 0.5229657663856873, - "grad_norm": 0.0014457597862929106, - "learning_rate": 0.00019999986543068504, - "loss": 46.0, - "step": 6840 - }, - { - "epoch": 0.523042223369077, - "grad_norm": 0.0004840172769036144, - "learning_rate": 0.0001999998653912768, - "loss": 46.0, - "step": 6841 - }, - { - "epoch": 0.5231186803524667, - "grad_norm": 0.0014432499883696437, - "learning_rate": 0.00019999986535186278, - "loss": 46.0, - "step": 6842 - }, - { - "epoch": 0.5231951373358564, - "grad_norm": 0.0023334503639489412, - "learning_rate": 0.000199999865312443, - "loss": 46.0, - "step": 6843 - }, - { - "epoch": 0.5232715943192461, - "grad_norm": 0.0024276773910969496, - "learning_rate": 0.00019999986527301746, - "loss": 46.0, - "step": 6844 - }, - { - "epoch": 0.5233480513026358, - "grad_norm": 0.0013002149062231183, - "learning_rate": 0.00019999986523358616, - "loss": 46.0, - "step": 6845 - }, - { - "epoch": 0.5234245082860256, - "grad_norm": 0.0018470765789970756, - "learning_rate": 0.00019999986519414907, - "loss": 46.0, - "step": 6846 - }, - { - "epoch": 0.5235009652694153, - "grad_norm": 0.0013910942943766713, - "learning_rate": 0.0001999998651547062, - "loss": 46.0, - "step": 6847 - }, - { - "epoch": 0.523577422252805, - "grad_norm": 0.0009281396633014083, - "learning_rate": 0.00019999986511525756, - "loss": 46.0, - "step": 6848 - }, - { - "epoch": 0.5236538792361948, - "grad_norm": 0.002898489125072956, - "learning_rate": 0.00019999986507580317, - "loss": 46.0, - "step": 6849 - }, - { - "epoch": 0.5237303362195844, - "grad_norm": 0.0014577272813767195, - "learning_rate": 0.000199999865036343, - "loss": 46.0, - "step": 6850 - }, - { - "epoch": 0.5238067932029742, - "grad_norm": 0.0008228104561567307, - "learning_rate": 0.00019999986499687707, - "loss": 46.0, - "step": 6851 - }, - { - "epoch": 0.523883250186364, - "grad_norm": 0.0097061051055789, - "learning_rate": 0.0001999998649574054, - "loss": 46.0, - "step": 6852 - }, - { - "epoch": 0.5239597071697536, - "grad_norm": 0.0008371971198357642, - "learning_rate": 0.0001999998649179279, - "loss": 46.0, - "step": 6853 - }, - { - "epoch": 0.5240361641531434, - "grad_norm": 0.0013694411609321833, - "learning_rate": 0.00019999986487844465, - "loss": 46.0, - "step": 6854 - }, - { - "epoch": 0.524112621136533, - "grad_norm": 0.001899379538372159, - "learning_rate": 0.00019999986483895565, - "loss": 46.0, - "step": 6855 - }, - { - "epoch": 0.5241890781199228, - "grad_norm": 0.006647980771958828, - "learning_rate": 0.00019999986479946088, - "loss": 46.0, - "step": 6856 - }, - { - "epoch": 0.5242655351033125, - "grad_norm": 0.001068764366209507, - "learning_rate": 0.0001999998647599603, - "loss": 46.0, - "step": 6857 - }, - { - "epoch": 0.5243419920867022, - "grad_norm": 0.0006564805516973138, - "learning_rate": 0.000199999864720454, - "loss": 46.0, - "step": 6858 - }, - { - "epoch": 0.5244184490700919, - "grad_norm": 0.0010898180771619081, - "learning_rate": 0.0001999998646809419, - "loss": 46.0, - "step": 6859 - }, - { - "epoch": 0.5244949060534817, - "grad_norm": 0.00036350873415358365, - "learning_rate": 0.00019999986464142407, - "loss": 46.0, - "step": 6860 - }, - { - "epoch": 0.5245713630368714, - "grad_norm": 0.0018155863508582115, - "learning_rate": 0.00019999986460190043, - "loss": 46.0, - "step": 6861 - }, - { - "epoch": 0.5246478200202611, - "grad_norm": 0.0008048060699366033, - "learning_rate": 0.00019999986456237104, - "loss": 46.0, - "step": 6862 - }, - { - "epoch": 0.5247242770036509, - "grad_norm": 0.0026283569168299437, - "learning_rate": 0.00019999986452283585, - "loss": 46.0, - "step": 6863 - }, - { - "epoch": 0.5248007339870405, - "grad_norm": 0.001201946404762566, - "learning_rate": 0.00019999986448329492, - "loss": 46.0, - "step": 6864 - }, - { - "epoch": 0.5248771909704303, - "grad_norm": 0.0016349850920960307, - "learning_rate": 0.00019999986444374824, - "loss": 46.0, - "step": 6865 - }, - { - "epoch": 0.5249536479538199, - "grad_norm": 0.0006353700300678611, - "learning_rate": 0.00019999986440419573, - "loss": 46.0, - "step": 6866 - }, - { - "epoch": 0.5250301049372097, - "grad_norm": 0.00213131052441895, - "learning_rate": 0.0001999998643646375, - "loss": 46.0, - "step": 6867 - }, - { - "epoch": 0.5251065619205995, - "grad_norm": 0.0010265909368172288, - "learning_rate": 0.0001999998643250735, - "loss": 46.0, - "step": 6868 - }, - { - "epoch": 0.5251830189039891, - "grad_norm": 0.0015670554712414742, - "learning_rate": 0.0001999998642855037, - "loss": 46.0, - "step": 6869 - }, - { - "epoch": 0.5252594758873789, - "grad_norm": 0.0018927843775600195, - "learning_rate": 0.00019999986424592815, - "loss": 46.0, - "step": 6870 - }, - { - "epoch": 0.5253359328707686, - "grad_norm": 0.0010991941671818495, - "learning_rate": 0.00019999986420634683, - "loss": 46.0, - "step": 6871 - }, - { - "epoch": 0.5254123898541583, - "grad_norm": 0.001754280412569642, - "learning_rate": 0.00019999986416675976, - "loss": 46.0, - "step": 6872 - }, - { - "epoch": 0.525488846837548, - "grad_norm": 0.0015437264228239655, - "learning_rate": 0.0001999998641271669, - "loss": 46.0, - "step": 6873 - }, - { - "epoch": 0.5255653038209377, - "grad_norm": 0.0006507346406579018, - "learning_rate": 0.00019999986408756826, - "loss": 46.0, - "step": 6874 - }, - { - "epoch": 0.5256417608043275, - "grad_norm": 0.0018157438607886434, - "learning_rate": 0.00019999986404796384, - "loss": 46.0, - "step": 6875 - }, - { - "epoch": 0.5257182177877172, - "grad_norm": 0.001315286848694086, - "learning_rate": 0.00019999986400835365, - "loss": 46.0, - "step": 6876 - }, - { - "epoch": 0.5257946747711069, - "grad_norm": 0.0005139801069162786, - "learning_rate": 0.00019999986396873775, - "loss": 46.0, - "step": 6877 - }, - { - "epoch": 0.5258711317544966, - "grad_norm": 0.002010364318266511, - "learning_rate": 0.00019999986392911604, - "loss": 46.0, - "step": 6878 - }, - { - "epoch": 0.5259475887378864, - "grad_norm": 0.0009020005236379802, - "learning_rate": 0.00019999986388948856, - "loss": 46.0, - "step": 6879 - }, - { - "epoch": 0.526024045721276, - "grad_norm": 0.0010088416747748852, - "learning_rate": 0.0001999998638498553, - "loss": 46.0, - "step": 6880 - }, - { - "epoch": 0.5261005027046658, - "grad_norm": 0.0013785704504698515, - "learning_rate": 0.0001999998638102163, - "loss": 46.0, - "step": 6881 - }, - { - "epoch": 0.5261769596880556, - "grad_norm": 0.0015304688131436706, - "learning_rate": 0.00019999986377057153, - "loss": 46.0, - "step": 6882 - }, - { - "epoch": 0.5262534166714452, - "grad_norm": 0.0003160073247272521, - "learning_rate": 0.00019999986373092095, - "loss": 46.0, - "step": 6883 - }, - { - "epoch": 0.526329873654835, - "grad_norm": 0.0016546716215088964, - "learning_rate": 0.00019999986369126463, - "loss": 46.0, - "step": 6884 - }, - { - "epoch": 0.5264063306382246, - "grad_norm": 0.006996653508394957, - "learning_rate": 0.00019999986365160254, - "loss": 46.0, - "step": 6885 - }, - { - "epoch": 0.5264827876216144, - "grad_norm": 0.0025435148272663355, - "learning_rate": 0.00019999986361193467, - "loss": 46.0, - "step": 6886 - }, - { - "epoch": 0.5265592446050041, - "grad_norm": 0.0007781276945024729, - "learning_rate": 0.00019999986357226103, - "loss": 46.0, - "step": 6887 - }, - { - "epoch": 0.5266357015883938, - "grad_norm": 0.0015560660976916552, - "learning_rate": 0.00019999986353258162, - "loss": 46.0, - "step": 6888 - }, - { - "epoch": 0.5267121585717836, - "grad_norm": 0.004460847470909357, - "learning_rate": 0.00019999986349289646, - "loss": 46.0, - "step": 6889 - }, - { - "epoch": 0.5267886155551733, - "grad_norm": 0.002675467636436224, - "learning_rate": 0.00019999986345320552, - "loss": 46.0, - "step": 6890 - }, - { - "epoch": 0.526865072538563, - "grad_norm": 0.00148398382589221, - "learning_rate": 0.00019999986341350881, - "loss": 46.0, - "step": 6891 - }, - { - "epoch": 0.5269415295219527, - "grad_norm": 0.00960307102650404, - "learning_rate": 0.0001999998633738063, - "loss": 46.0, - "step": 6892 - }, - { - "epoch": 0.5270179865053425, - "grad_norm": 0.0012577949091792107, - "learning_rate": 0.00019999986333409808, - "loss": 46.0, - "step": 6893 - }, - { - "epoch": 0.5270944434887321, - "grad_norm": 0.0005339909694157541, - "learning_rate": 0.00019999986329438405, - "loss": 46.0, - "step": 6894 - }, - { - "epoch": 0.5271709004721219, - "grad_norm": 0.0020182239823043346, - "learning_rate": 0.00019999986325466428, - "loss": 46.0, - "step": 6895 - }, - { - "epoch": 0.5272473574555115, - "grad_norm": 0.0010359484003856778, - "learning_rate": 0.00019999986321493868, - "loss": 46.0, - "step": 6896 - }, - { - "epoch": 0.5273238144389013, - "grad_norm": 0.0010302109876647592, - "learning_rate": 0.00019999986317520736, - "loss": 46.0, - "step": 6897 - }, - { - "epoch": 0.5274002714222911, - "grad_norm": 0.001595202716998756, - "learning_rate": 0.0001999998631354703, - "loss": 46.0, - "step": 6898 - }, - { - "epoch": 0.5274767284056807, - "grad_norm": 0.001114501035772264, - "learning_rate": 0.00019999986309572742, - "loss": 46.0, - "step": 6899 - }, - { - "epoch": 0.5275531853890705, - "grad_norm": 0.0008797789923846722, - "learning_rate": 0.00019999986305597878, - "loss": 46.0, - "step": 6900 - }, - { - "epoch": 0.5276296423724602, - "grad_norm": 0.000978010008111596, - "learning_rate": 0.00019999986301622434, - "loss": 46.0, - "step": 6901 - }, - { - "epoch": 0.5277060993558499, - "grad_norm": 0.00047197192907333374, - "learning_rate": 0.0001999998629764642, - "loss": 46.0, - "step": 6902 - }, - { - "epoch": 0.5277825563392397, - "grad_norm": 0.00043294852366670966, - "learning_rate": 0.00019999986293669825, - "loss": 46.0, - "step": 6903 - }, - { - "epoch": 0.5278590133226293, - "grad_norm": 0.0006556301959790289, - "learning_rate": 0.00019999986289692654, - "loss": 46.0, - "step": 6904 - }, - { - "epoch": 0.5279354703060191, - "grad_norm": 0.002570302924141288, - "learning_rate": 0.00019999986285714903, - "loss": 46.0, - "step": 6905 - }, - { - "epoch": 0.5280119272894088, - "grad_norm": 0.003173999721184373, - "learning_rate": 0.0001999998628173658, - "loss": 46.0, - "step": 6906 - }, - { - "epoch": 0.5280883842727985, - "grad_norm": 0.001547998166643083, - "learning_rate": 0.00019999986277757675, - "loss": 46.0, - "step": 6907 - }, - { - "epoch": 0.5281648412561882, - "grad_norm": 0.0006692355964332819, - "learning_rate": 0.00019999986273778198, - "loss": 46.0, - "step": 6908 - }, - { - "epoch": 0.528241298239578, - "grad_norm": 0.005438718944787979, - "learning_rate": 0.0001999998626979814, - "loss": 46.0, - "step": 6909 - }, - { - "epoch": 0.5283177552229676, - "grad_norm": 0.0006363442516885698, - "learning_rate": 0.00019999986265817509, - "loss": 46.0, - "step": 6910 - }, - { - "epoch": 0.5283942122063574, - "grad_norm": 0.0013391983229666948, - "learning_rate": 0.000199999862618363, - "loss": 46.0, - "step": 6911 - }, - { - "epoch": 0.5284706691897472, - "grad_norm": 0.0017163304146379232, - "learning_rate": 0.00019999986257854512, - "loss": 46.0, - "step": 6912 - }, - { - "epoch": 0.5285471261731368, - "grad_norm": 0.0008894107886590064, - "learning_rate": 0.00019999986253872148, - "loss": 46.0, - "step": 6913 - }, - { - "epoch": 0.5286235831565266, - "grad_norm": 0.0005524918669834733, - "learning_rate": 0.0001999998624988921, - "loss": 46.0, - "step": 6914 - }, - { - "epoch": 0.5287000401399162, - "grad_norm": 0.00254958588629961, - "learning_rate": 0.00019999986245905688, - "loss": 46.0, - "step": 6915 - }, - { - "epoch": 0.528776497123306, - "grad_norm": 0.0010677631944417953, - "learning_rate": 0.00019999986241921595, - "loss": 46.0, - "step": 6916 - }, - { - "epoch": 0.5288529541066957, - "grad_norm": 0.0010262842988595366, - "learning_rate": 0.00019999986237936922, - "loss": 46.0, - "step": 6917 - }, - { - "epoch": 0.5289294110900854, - "grad_norm": 0.0010445505613461137, - "learning_rate": 0.00019999986233951674, - "loss": 46.0, - "step": 6918 - }, - { - "epoch": 0.5290058680734752, - "grad_norm": 0.0013391776010394096, - "learning_rate": 0.00019999986229965848, - "loss": 46.0, - "step": 6919 - }, - { - "epoch": 0.5290823250568649, - "grad_norm": 0.0036920837592333555, - "learning_rate": 0.00019999986225979446, - "loss": 46.0, - "step": 6920 - }, - { - "epoch": 0.5291587820402546, - "grad_norm": 0.0008187103667296469, - "learning_rate": 0.00019999986221992466, - "loss": 46.0, - "step": 6921 - }, - { - "epoch": 0.5292352390236443, - "grad_norm": 0.0009263358078896999, - "learning_rate": 0.00019999986218004908, - "loss": 46.0, - "step": 6922 - }, - { - "epoch": 0.5293116960070341, - "grad_norm": 0.0016071456484496593, - "learning_rate": 0.00019999986214016776, - "loss": 46.0, - "step": 6923 - }, - { - "epoch": 0.5293881529904237, - "grad_norm": 0.0012537785805761814, - "learning_rate": 0.00019999986210028067, - "loss": 46.0, - "step": 6924 - }, - { - "epoch": 0.5294646099738135, - "grad_norm": 0.005476954393088818, - "learning_rate": 0.00019999986206038778, - "loss": 46.0, - "step": 6925 - }, - { - "epoch": 0.5295410669572032, - "grad_norm": 0.0032883668318390846, - "learning_rate": 0.00019999986202048916, - "loss": 46.0, - "step": 6926 - }, - { - "epoch": 0.5296175239405929, - "grad_norm": 0.001330337836407125, - "learning_rate": 0.00019999986198058472, - "loss": 46.0, - "step": 6927 - }, - { - "epoch": 0.5296939809239827, - "grad_norm": 0.0025806999765336514, - "learning_rate": 0.00019999986194067456, - "loss": 46.0, - "step": 6928 - }, - { - "epoch": 0.5297704379073723, - "grad_norm": 0.001325944671407342, - "learning_rate": 0.0001999998619007586, - "loss": 46.0, - "step": 6929 - }, - { - "epoch": 0.5298468948907621, - "grad_norm": 0.0008160117431543767, - "learning_rate": 0.00019999986186083687, - "loss": 46.0, - "step": 6930 - }, - { - "epoch": 0.5299233518741518, - "grad_norm": 0.004486887715756893, - "learning_rate": 0.0001999998618209094, - "loss": 46.0, - "step": 6931 - }, - { - "epoch": 0.5299998088575415, - "grad_norm": 0.0032692733220756054, - "learning_rate": 0.00019999986178097614, - "loss": 46.0, - "step": 6932 - }, - { - "epoch": 0.5300762658409313, - "grad_norm": 0.0015005492605268955, - "learning_rate": 0.0001999998617410371, - "loss": 46.0, - "step": 6933 - }, - { - "epoch": 0.530152722824321, - "grad_norm": 0.001956478226929903, - "learning_rate": 0.0001999998617010923, - "loss": 46.0, - "step": 6934 - }, - { - "epoch": 0.5302291798077107, - "grad_norm": 0.004179133102297783, - "learning_rate": 0.00019999986166114176, - "loss": 46.0, - "step": 6935 - }, - { - "epoch": 0.5303056367911004, - "grad_norm": 0.004741376265883446, - "learning_rate": 0.00019999986162118542, - "loss": 46.0, - "step": 6936 - }, - { - "epoch": 0.5303820937744901, - "grad_norm": 0.0106956847012043, - "learning_rate": 0.0001999998615812233, - "loss": 46.0, - "step": 6937 - }, - { - "epoch": 0.5304585507578798, - "grad_norm": 0.0022359401918947697, - "learning_rate": 0.00019999986154125543, - "loss": 46.0, - "step": 6938 - }, - { - "epoch": 0.5305350077412696, - "grad_norm": 0.001093576429411769, - "learning_rate": 0.0001999998615012818, - "loss": 46.0, - "step": 6939 - }, - { - "epoch": 0.5306114647246593, - "grad_norm": 0.0022686878219246864, - "learning_rate": 0.00019999986146130238, - "loss": 46.0, - "step": 6940 - }, - { - "epoch": 0.530687921708049, - "grad_norm": 0.0009230774012394249, - "learning_rate": 0.0001999998614213172, - "loss": 46.0, - "step": 6941 - }, - { - "epoch": 0.5307643786914388, - "grad_norm": 0.0010031594429165125, - "learning_rate": 0.00019999986138132623, - "loss": 46.0, - "step": 6942 - }, - { - "epoch": 0.5308408356748284, - "grad_norm": 0.0012422491563484073, - "learning_rate": 0.0001999998613413295, - "loss": 46.0, - "step": 6943 - }, - { - "epoch": 0.5309172926582182, - "grad_norm": 0.00034665808198042214, - "learning_rate": 0.00019999986130132702, - "loss": 46.0, - "step": 6944 - }, - { - "epoch": 0.5309937496416078, - "grad_norm": 0.0005560446297749877, - "learning_rate": 0.00019999986126131877, - "loss": 46.0, - "step": 6945 - }, - { - "epoch": 0.5310702066249976, - "grad_norm": 0.009018749929964542, - "learning_rate": 0.00019999986122130471, - "loss": 46.0, - "step": 6946 - }, - { - "epoch": 0.5311466636083874, - "grad_norm": 0.0006785035948269069, - "learning_rate": 0.00019999986118128494, - "loss": 46.0, - "step": 6947 - }, - { - "epoch": 0.531223120591777, - "grad_norm": 0.0043200114741921425, - "learning_rate": 0.00019999986114125937, - "loss": 46.0, - "step": 6948 - }, - { - "epoch": 0.5312995775751668, - "grad_norm": 0.0007045610109344125, - "learning_rate": 0.00019999986110122802, - "loss": 46.0, - "step": 6949 - }, - { - "epoch": 0.5313760345585565, - "grad_norm": 0.00042669090908020735, - "learning_rate": 0.00019999986106119093, - "loss": 46.0, - "step": 6950 - }, - { - "epoch": 0.5314524915419462, - "grad_norm": 0.0005369912250898778, - "learning_rate": 0.00019999986102114804, - "loss": 46.0, - "step": 6951 - }, - { - "epoch": 0.5315289485253359, - "grad_norm": 0.003414823906496167, - "learning_rate": 0.0001999998609810994, - "loss": 46.0, - "step": 6952 - }, - { - "epoch": 0.5316054055087257, - "grad_norm": 0.0012889598729088902, - "learning_rate": 0.00019999986094104496, - "loss": 46.0, - "step": 6953 - }, - { - "epoch": 0.5316818624921154, - "grad_norm": 0.0009127567755058408, - "learning_rate": 0.00019999986090098477, - "loss": 46.0, - "step": 6954 - }, - { - "epoch": 0.5317583194755051, - "grad_norm": 0.0012592733837664127, - "learning_rate": 0.0001999998608609188, - "loss": 46.0, - "step": 6955 - }, - { - "epoch": 0.5318347764588948, - "grad_norm": 0.0006647682166658342, - "learning_rate": 0.0001999998608208471, - "loss": 46.0, - "step": 6956 - }, - { - "epoch": 0.5319112334422845, - "grad_norm": 0.00046009011566638947, - "learning_rate": 0.0001999998607807696, - "loss": 46.0, - "step": 6957 - }, - { - "epoch": 0.5319876904256743, - "grad_norm": 0.0010078222258016467, - "learning_rate": 0.00019999986074068632, - "loss": 46.0, - "step": 6958 - }, - { - "epoch": 0.5320641474090639, - "grad_norm": 0.0005325308884494007, - "learning_rate": 0.0001999998607005973, - "loss": 46.0, - "step": 6959 - }, - { - "epoch": 0.5321406043924537, - "grad_norm": 0.0021776785142719746, - "learning_rate": 0.00019999986066050252, - "loss": 46.0, - "step": 6960 - }, - { - "epoch": 0.5322170613758435, - "grad_norm": 0.0006236338522285223, - "learning_rate": 0.00019999986062040192, - "loss": 46.0, - "step": 6961 - }, - { - "epoch": 0.5322935183592331, - "grad_norm": 0.0080524031072855, - "learning_rate": 0.0001999998605802956, - "loss": 46.0, - "step": 6962 - }, - { - "epoch": 0.5323699753426229, - "grad_norm": 0.0004457864270079881, - "learning_rate": 0.00019999986054018346, - "loss": 46.0, - "step": 6963 - }, - { - "epoch": 0.5324464323260126, - "grad_norm": 0.0008286235970444977, - "learning_rate": 0.00019999986050006562, - "loss": 46.0, - "step": 6964 - }, - { - "epoch": 0.5325228893094023, - "grad_norm": 0.0010822893818840384, - "learning_rate": 0.00019999986045994198, - "loss": 46.0, - "step": 6965 - }, - { - "epoch": 0.532599346292792, - "grad_norm": 0.003305522259324789, - "learning_rate": 0.00019999986041981254, - "loss": 46.0, - "step": 6966 - }, - { - "epoch": 0.5326758032761817, - "grad_norm": 0.0008318547043018043, - "learning_rate": 0.00019999986037967733, - "loss": 46.0, - "step": 6967 - }, - { - "epoch": 0.5327522602595715, - "grad_norm": 0.0012184131192043424, - "learning_rate": 0.00019999986033953637, - "loss": 46.0, - "step": 6968 - }, - { - "epoch": 0.5328287172429612, - "grad_norm": 0.0009557275334373116, - "learning_rate": 0.00019999986029938964, - "loss": 46.0, - "step": 6969 - }, - { - "epoch": 0.5329051742263509, - "grad_norm": 0.0008392874151468277, - "learning_rate": 0.00019999986025923716, - "loss": 46.0, - "step": 6970 - }, - { - "epoch": 0.5329816312097406, - "grad_norm": 0.001108025317080319, - "learning_rate": 0.0001999998602190789, - "loss": 46.0, - "step": 6971 - }, - { - "epoch": 0.5330580881931304, - "grad_norm": 0.0009874349925667048, - "learning_rate": 0.00019999986017891485, - "loss": 46.0, - "step": 6972 - }, - { - "epoch": 0.53313454517652, - "grad_norm": 0.0005841582315042615, - "learning_rate": 0.00019999986013874503, - "loss": 46.0, - "step": 6973 - }, - { - "epoch": 0.5332110021599098, - "grad_norm": 0.0008298710454255342, - "learning_rate": 0.00019999986009856948, - "loss": 46.0, - "step": 6974 - }, - { - "epoch": 0.5332874591432994, - "grad_norm": 0.001268972409889102, - "learning_rate": 0.00019999986005838814, - "loss": 46.0, - "step": 6975 - }, - { - "epoch": 0.5333639161266892, - "grad_norm": 0.0010404494823887944, - "learning_rate": 0.00019999986001820102, - "loss": 46.0, - "step": 6976 - }, - { - "epoch": 0.533440373110079, - "grad_norm": 0.0017261148896068335, - "learning_rate": 0.00019999985997800815, - "loss": 46.0, - "step": 6977 - }, - { - "epoch": 0.5335168300934686, - "grad_norm": 0.000760427734348923, - "learning_rate": 0.00019999985993780949, - "loss": 46.0, - "step": 6978 - }, - { - "epoch": 0.5335932870768584, - "grad_norm": 0.0006444074097089469, - "learning_rate": 0.00019999985989760505, - "loss": 46.0, - "step": 6979 - }, - { - "epoch": 0.5336697440602481, - "grad_norm": 0.0007587940781377256, - "learning_rate": 0.00019999985985739486, - "loss": 46.0, - "step": 6980 - }, - { - "epoch": 0.5337462010436378, - "grad_norm": 0.0015740134986117482, - "learning_rate": 0.0001999998598171789, - "loss": 46.0, - "step": 6981 - }, - { - "epoch": 0.5338226580270276, - "grad_norm": 0.001439502346329391, - "learning_rate": 0.00019999985977695717, - "loss": 46.0, - "step": 6982 - }, - { - "epoch": 0.5338991150104173, - "grad_norm": 0.0013325884938240051, - "learning_rate": 0.00019999985973672966, - "loss": 46.0, - "step": 6983 - }, - { - "epoch": 0.533975571993807, - "grad_norm": 0.0023438294883817434, - "learning_rate": 0.00019999985969649639, - "loss": 46.0, - "step": 6984 - }, - { - "epoch": 0.5340520289771967, - "grad_norm": 0.0006498959846794605, - "learning_rate": 0.00019999985965625736, - "loss": 46.0, - "step": 6985 - }, - { - "epoch": 0.5341284859605864, - "grad_norm": 0.0018395694205537438, - "learning_rate": 0.00019999985961601256, - "loss": 46.0, - "step": 6986 - }, - { - "epoch": 0.5342049429439761, - "grad_norm": 0.000798806082457304, - "learning_rate": 0.00019999985957576196, - "loss": 46.0, - "step": 6987 - }, - { - "epoch": 0.5342813999273659, - "grad_norm": 0.002494304208084941, - "learning_rate": 0.00019999985953550562, - "loss": 46.0, - "step": 6988 - }, - { - "epoch": 0.5343578569107555, - "grad_norm": 0.0006741366232745349, - "learning_rate": 0.00019999985949524353, - "loss": 46.0, - "step": 6989 - }, - { - "epoch": 0.5344343138941453, - "grad_norm": 0.016659339889883995, - "learning_rate": 0.00019999985945497563, - "loss": 46.0, - "step": 6990 - }, - { - "epoch": 0.5345107708775351, - "grad_norm": 0.0033929001074284315, - "learning_rate": 0.00019999985941470197, - "loss": 46.0, - "step": 6991 - }, - { - "epoch": 0.5345872278609247, - "grad_norm": 0.0003985811781603843, - "learning_rate": 0.00019999985937442253, - "loss": 46.0, - "step": 6992 - }, - { - "epoch": 0.5346636848443145, - "grad_norm": 0.002357101533561945, - "learning_rate": 0.00019999985933413737, - "loss": 46.0, - "step": 6993 - }, - { - "epoch": 0.5347401418277042, - "grad_norm": 0.00506207812577486, - "learning_rate": 0.00019999985929384639, - "loss": 46.0, - "step": 6994 - }, - { - "epoch": 0.5348165988110939, - "grad_norm": 0.014972719363868237, - "learning_rate": 0.00019999985925354965, - "loss": 46.0, - "step": 6995 - }, - { - "epoch": 0.5348930557944837, - "grad_norm": 0.0010895323939621449, - "learning_rate": 0.00019999985921324712, - "loss": 46.0, - "step": 6996 - }, - { - "epoch": 0.5349695127778733, - "grad_norm": 0.0007342291064560413, - "learning_rate": 0.0001999998591729389, - "loss": 46.0, - "step": 6997 - }, - { - "epoch": 0.5350459697612631, - "grad_norm": 0.0018762554973363876, - "learning_rate": 0.00019999985913262482, - "loss": 46.0, - "step": 6998 - }, - { - "epoch": 0.5351224267446528, - "grad_norm": 0.0005539553822018206, - "learning_rate": 0.00019999985909230502, - "loss": 46.0, - "step": 6999 - }, - { - "epoch": 0.5351988837280425, - "grad_norm": 0.002060561440885067, - "learning_rate": 0.00019999985905197945, - "loss": 46.0, - "step": 7000 - }, - { - "epoch": 0.5352753407114322, - "grad_norm": 0.001272550318390131, - "learning_rate": 0.00019999985901164808, - "loss": 46.0, - "step": 7001 - }, - { - "epoch": 0.535351797694822, - "grad_norm": 0.0010857011657208204, - "learning_rate": 0.00019999985897131098, - "loss": 46.0, - "step": 7002 - }, - { - "epoch": 0.5354282546782116, - "grad_norm": 0.002568619092926383, - "learning_rate": 0.0001999998589309681, - "loss": 46.0, - "step": 7003 - }, - { - "epoch": 0.5355047116616014, - "grad_norm": 0.0037138089537620544, - "learning_rate": 0.00019999985889061943, - "loss": 46.0, - "step": 7004 - }, - { - "epoch": 0.5355811686449911, - "grad_norm": 0.0009612892754375935, - "learning_rate": 0.000199999858850265, - "loss": 46.0, - "step": 7005 - }, - { - "epoch": 0.5356576256283808, - "grad_norm": 0.002627705456689, - "learning_rate": 0.0001999998588099048, - "loss": 46.0, - "step": 7006 - }, - { - "epoch": 0.5357340826117706, - "grad_norm": 0.00771617516875267, - "learning_rate": 0.00019999985876953882, - "loss": 46.0, - "step": 7007 - }, - { - "epoch": 0.5358105395951602, - "grad_norm": 0.0007642025593668222, - "learning_rate": 0.0001999998587291671, - "loss": 46.0, - "step": 7008 - }, - { - "epoch": 0.53588699657855, - "grad_norm": 0.0040270183235406876, - "learning_rate": 0.00019999985868878958, - "loss": 46.0, - "step": 7009 - }, - { - "epoch": 0.5359634535619398, - "grad_norm": 0.010913661681115627, - "learning_rate": 0.0001999998586484063, - "loss": 46.0, - "step": 7010 - }, - { - "epoch": 0.5360399105453294, - "grad_norm": 0.002033588942140341, - "learning_rate": 0.00019999985860801725, - "loss": 46.0, - "step": 7011 - }, - { - "epoch": 0.5361163675287192, - "grad_norm": 0.010351110249757767, - "learning_rate": 0.00019999985856762245, - "loss": 46.0, - "step": 7012 - }, - { - "epoch": 0.5361928245121089, - "grad_norm": 0.0013268848415464163, - "learning_rate": 0.00019999985852722188, - "loss": 46.0, - "step": 7013 - }, - { - "epoch": 0.5362692814954986, - "grad_norm": 0.0012470822548493743, - "learning_rate": 0.0001999998584868155, - "loss": 46.0, - "step": 7014 - }, - { - "epoch": 0.5363457384788883, - "grad_norm": 0.0014491754118353128, - "learning_rate": 0.0001999998584464034, - "loss": 46.0, - "step": 7015 - }, - { - "epoch": 0.536422195462278, - "grad_norm": 0.0009737068903632462, - "learning_rate": 0.0001999998584059855, - "loss": 46.0, - "step": 7016 - }, - { - "epoch": 0.5364986524456677, - "grad_norm": 0.0014498020755127072, - "learning_rate": 0.00019999985836556184, - "loss": 46.0, - "step": 7017 - }, - { - "epoch": 0.5365751094290575, - "grad_norm": 0.0009031247464008629, - "learning_rate": 0.0001999998583251324, - "loss": 46.0, - "step": 7018 - }, - { - "epoch": 0.5366515664124472, - "grad_norm": 0.001969572389498353, - "learning_rate": 0.00019999985828469721, - "loss": 46.0, - "step": 7019 - }, - { - "epoch": 0.5367280233958369, - "grad_norm": 0.0006924162735231221, - "learning_rate": 0.00019999985824425626, - "loss": 46.0, - "step": 7020 - }, - { - "epoch": 0.5368044803792267, - "grad_norm": 0.000846325303427875, - "learning_rate": 0.0001999998582038095, - "loss": 46.0, - "step": 7021 - }, - { - "epoch": 0.5368809373626163, - "grad_norm": 0.0018873761873692274, - "learning_rate": 0.000199999858163357, - "loss": 46.0, - "step": 7022 - }, - { - "epoch": 0.5369573943460061, - "grad_norm": 0.0010873053688555956, - "learning_rate": 0.0001999998581228987, - "loss": 46.0, - "step": 7023 - }, - { - "epoch": 0.5370338513293959, - "grad_norm": 0.00426768371835351, - "learning_rate": 0.00019999985808243466, - "loss": 46.0, - "step": 7024 - }, - { - "epoch": 0.5371103083127855, - "grad_norm": 0.0008283958886750042, - "learning_rate": 0.00019999985804196487, - "loss": 46.0, - "step": 7025 - }, - { - "epoch": 0.5371867652961753, - "grad_norm": 0.0005977737600915134, - "learning_rate": 0.00019999985800148927, - "loss": 46.0, - "step": 7026 - }, - { - "epoch": 0.5372632222795649, - "grad_norm": 0.00611245259642601, - "learning_rate": 0.0001999998579610079, - "loss": 46.0, - "step": 7027 - }, - { - "epoch": 0.5373396792629547, - "grad_norm": 0.0009862891165539622, - "learning_rate": 0.00019999985792052078, - "loss": 46.0, - "step": 7028 - }, - { - "epoch": 0.5374161362463444, - "grad_norm": 0.004697184078395367, - "learning_rate": 0.0001999998578800279, - "loss": 46.0, - "step": 7029 - }, - { - "epoch": 0.5374925932297341, - "grad_norm": 0.0021030385978519917, - "learning_rate": 0.00019999985783952922, - "loss": 46.0, - "step": 7030 - }, - { - "epoch": 0.5375690502131238, - "grad_norm": 0.001516816671937704, - "learning_rate": 0.00019999985779902478, - "loss": 46.0, - "step": 7031 - }, - { - "epoch": 0.5376455071965136, - "grad_norm": 0.007988082244992256, - "learning_rate": 0.00019999985775851457, - "loss": 46.0, - "step": 7032 - }, - { - "epoch": 0.5377219641799033, - "grad_norm": 0.0012250806903466582, - "learning_rate": 0.00019999985771799862, - "loss": 46.0, - "step": 7033 - }, - { - "epoch": 0.537798421163293, - "grad_norm": 0.0036108752246946096, - "learning_rate": 0.00019999985767747686, - "loss": 46.0, - "step": 7034 - }, - { - "epoch": 0.5378748781466828, - "grad_norm": 0.0016761202132329345, - "learning_rate": 0.00019999985763694935, - "loss": 46.0, - "step": 7035 - }, - { - "epoch": 0.5379513351300724, - "grad_norm": 0.0017803935334086418, - "learning_rate": 0.00019999985759641608, - "loss": 46.0, - "step": 7036 - }, - { - "epoch": 0.5380277921134622, - "grad_norm": 0.001953388098627329, - "learning_rate": 0.00019999985755587703, - "loss": 46.0, - "step": 7037 - }, - { - "epoch": 0.5381042490968518, - "grad_norm": 0.0007448914693668485, - "learning_rate": 0.0001999998575153322, - "loss": 46.0, - "step": 7038 - }, - { - "epoch": 0.5381807060802416, - "grad_norm": 0.0007843797211535275, - "learning_rate": 0.00019999985747478163, - "loss": 46.0, - "step": 7039 - }, - { - "epoch": 0.5382571630636314, - "grad_norm": 0.0031101207714527845, - "learning_rate": 0.00019999985743422526, - "loss": 46.0, - "step": 7040 - }, - { - "epoch": 0.538333620047021, - "grad_norm": 0.00283940602093935, - "learning_rate": 0.00019999985739366314, - "loss": 46.0, - "step": 7041 - }, - { - "epoch": 0.5384100770304108, - "grad_norm": 0.00042539872811175883, - "learning_rate": 0.00019999985735309522, - "loss": 46.0, - "step": 7042 - }, - { - "epoch": 0.5384865340138005, - "grad_norm": 0.00046194077003747225, - "learning_rate": 0.00019999985731252156, - "loss": 46.0, - "step": 7043 - }, - { - "epoch": 0.5385629909971902, - "grad_norm": 0.0007489009294658899, - "learning_rate": 0.00019999985727194212, - "loss": 46.0, - "step": 7044 - }, - { - "epoch": 0.5386394479805799, - "grad_norm": 0.0006209298153407872, - "learning_rate": 0.00019999985723135694, - "loss": 46.0, - "step": 7045 - }, - { - "epoch": 0.5387159049639696, - "grad_norm": 0.0036026679445058107, - "learning_rate": 0.00019999985719076595, - "loss": 46.0, - "step": 7046 - }, - { - "epoch": 0.5387923619473594, - "grad_norm": 0.002750386716797948, - "learning_rate": 0.0001999998571501692, - "loss": 46.0, - "step": 7047 - }, - { - "epoch": 0.5388688189307491, - "grad_norm": 0.0005559534765779972, - "learning_rate": 0.0001999998571095667, - "loss": 46.0, - "step": 7048 - }, - { - "epoch": 0.5389452759141388, - "grad_norm": 0.0007957906927913427, - "learning_rate": 0.00019999985706895841, - "loss": 46.0, - "step": 7049 - }, - { - "epoch": 0.5390217328975285, - "grad_norm": 0.0021421632263809443, - "learning_rate": 0.00019999985702834434, - "loss": 46.0, - "step": 7050 - }, - { - "epoch": 0.5390981898809183, - "grad_norm": 0.0006082300678826869, - "learning_rate": 0.0001999998569877245, - "loss": 46.0, - "step": 7051 - }, - { - "epoch": 0.5391746468643079, - "grad_norm": 0.003624261822551489, - "learning_rate": 0.00019999985694709894, - "loss": 46.0, - "step": 7052 - }, - { - "epoch": 0.5392511038476977, - "grad_norm": 0.002115654991939664, - "learning_rate": 0.00019999985690646757, - "loss": 46.0, - "step": 7053 - }, - { - "epoch": 0.5393275608310875, - "grad_norm": 0.0008541772840544581, - "learning_rate": 0.00019999985686583043, - "loss": 46.0, - "step": 7054 - }, - { - "epoch": 0.5394040178144771, - "grad_norm": 0.00039037197711877525, - "learning_rate": 0.00019999985682518754, - "loss": 46.0, - "step": 7055 - }, - { - "epoch": 0.5394804747978669, - "grad_norm": 0.0018750993767753243, - "learning_rate": 0.00019999985678453887, - "loss": 46.0, - "step": 7056 - }, - { - "epoch": 0.5395569317812565, - "grad_norm": 0.005054390523582697, - "learning_rate": 0.0001999998567438844, - "loss": 46.0, - "step": 7057 - }, - { - "epoch": 0.5396333887646463, - "grad_norm": 0.002898718463256955, - "learning_rate": 0.00019999985670322422, - "loss": 46.0, - "step": 7058 - }, - { - "epoch": 0.539709845748036, - "grad_norm": 0.0029946218710392714, - "learning_rate": 0.00019999985666255824, - "loss": 46.0, - "step": 7059 - }, - { - "epoch": 0.5397863027314257, - "grad_norm": 0.0005702909547835588, - "learning_rate": 0.00019999985662188648, - "loss": 46.0, - "step": 7060 - }, - { - "epoch": 0.5398627597148155, - "grad_norm": 0.001118018408305943, - "learning_rate": 0.00019999985658120898, - "loss": 46.0, - "step": 7061 - }, - { - "epoch": 0.5399392166982052, - "grad_norm": 0.002047475427389145, - "learning_rate": 0.0001999998565405257, - "loss": 46.0, - "step": 7062 - }, - { - "epoch": 0.5400156736815949, - "grad_norm": 0.0029113979544490576, - "learning_rate": 0.00019999985649983662, - "loss": 46.0, - "step": 7063 - }, - { - "epoch": 0.5400921306649846, - "grad_norm": 0.0015522849280387163, - "learning_rate": 0.0001999998564591418, - "loss": 46.0, - "step": 7064 - }, - { - "epoch": 0.5401685876483744, - "grad_norm": 0.0013265385059639812, - "learning_rate": 0.0001999998564184412, - "loss": 46.0, - "step": 7065 - }, - { - "epoch": 0.540245044631764, - "grad_norm": 0.0007060448406264186, - "learning_rate": 0.00019999985637773486, - "loss": 46.0, - "step": 7066 - }, - { - "epoch": 0.5403215016151538, - "grad_norm": 0.0045955488458275795, - "learning_rate": 0.00019999985633702272, - "loss": 46.0, - "step": 7067 - }, - { - "epoch": 0.5403979585985434, - "grad_norm": 0.004033868201076984, - "learning_rate": 0.0001999998562963048, - "loss": 46.0, - "step": 7068 - }, - { - "epoch": 0.5404744155819332, - "grad_norm": 0.0019121209625154734, - "learning_rate": 0.00019999985625558116, - "loss": 46.0, - "step": 7069 - }, - { - "epoch": 0.540550872565323, - "grad_norm": 0.0013592569157481194, - "learning_rate": 0.0001999998562148517, - "loss": 46.0, - "step": 7070 - }, - { - "epoch": 0.5406273295487126, - "grad_norm": 0.0008571575162932277, - "learning_rate": 0.0001999998561741165, - "loss": 46.0, - "step": 7071 - }, - { - "epoch": 0.5407037865321024, - "grad_norm": 0.0030802690889686346, - "learning_rate": 0.0001999998561333755, - "loss": 46.0, - "step": 7072 - }, - { - "epoch": 0.5407802435154921, - "grad_norm": 0.0008141295402310789, - "learning_rate": 0.00019999985609262875, - "loss": 46.0, - "step": 7073 - }, - { - "epoch": 0.5408567004988818, - "grad_norm": 0.005090365651994944, - "learning_rate": 0.00019999985605187624, - "loss": 46.0, - "step": 7074 - }, - { - "epoch": 0.5409331574822716, - "grad_norm": 0.0018800608813762665, - "learning_rate": 0.00019999985601111794, - "loss": 46.0, - "step": 7075 - }, - { - "epoch": 0.5410096144656612, - "grad_norm": 0.0007478751358576119, - "learning_rate": 0.0001999998559703539, - "loss": 46.0, - "step": 7076 - }, - { - "epoch": 0.541086071449051, - "grad_norm": 0.0018071256345137954, - "learning_rate": 0.00019999985592958407, - "loss": 46.0, - "step": 7077 - }, - { - "epoch": 0.5411625284324407, - "grad_norm": 0.003690737532451749, - "learning_rate": 0.00019999985588880844, - "loss": 46.0, - "step": 7078 - }, - { - "epoch": 0.5412389854158304, - "grad_norm": 0.0018988515948876739, - "learning_rate": 0.0001999998558480271, - "loss": 46.0, - "step": 7079 - }, - { - "epoch": 0.5413154423992201, - "grad_norm": 0.0011649978114292026, - "learning_rate": 0.00019999985580723996, - "loss": 46.0, - "step": 7080 - }, - { - "epoch": 0.5413918993826099, - "grad_norm": 0.0005884484271518886, - "learning_rate": 0.00019999985576644704, - "loss": 46.0, - "step": 7081 - }, - { - "epoch": 0.5414683563659995, - "grad_norm": 0.0013894907897338271, - "learning_rate": 0.00019999985572564838, - "loss": 46.0, - "step": 7082 - }, - { - "epoch": 0.5415448133493893, - "grad_norm": 0.00674440385773778, - "learning_rate": 0.0001999998556848439, - "loss": 46.0, - "step": 7083 - }, - { - "epoch": 0.5416212703327791, - "grad_norm": 0.003660250222310424, - "learning_rate": 0.00019999985564403373, - "loss": 46.0, - "step": 7084 - }, - { - "epoch": 0.5416977273161687, - "grad_norm": 0.0015356509247794747, - "learning_rate": 0.00019999985560321775, - "loss": 46.0, - "step": 7085 - }, - { - "epoch": 0.5417741842995585, - "grad_norm": 0.0028624965343624353, - "learning_rate": 0.000199999855562396, - "loss": 46.0, - "step": 7086 - }, - { - "epoch": 0.5418506412829481, - "grad_norm": 0.0012801277916878462, - "learning_rate": 0.00019999985552156846, - "loss": 46.0, - "step": 7087 - }, - { - "epoch": 0.5419270982663379, - "grad_norm": 0.0008986828033812344, - "learning_rate": 0.00019999985548073515, - "loss": 46.0, - "step": 7088 - }, - { - "epoch": 0.5420035552497277, - "grad_norm": 0.0015479095745831728, - "learning_rate": 0.0001999998554398961, - "loss": 46.0, - "step": 7089 - }, - { - "epoch": 0.5420800122331173, - "grad_norm": 0.0010830943938344717, - "learning_rate": 0.00019999985539905128, - "loss": 46.0, - "step": 7090 - }, - { - "epoch": 0.5421564692165071, - "grad_norm": 0.0008617498679086566, - "learning_rate": 0.00019999985535820069, - "loss": 46.0, - "step": 7091 - }, - { - "epoch": 0.5422329261998968, - "grad_norm": 0.00154385797213763, - "learning_rate": 0.00019999985531734432, - "loss": 46.0, - "step": 7092 - }, - { - "epoch": 0.5423093831832865, - "grad_norm": 0.0031907237134873867, - "learning_rate": 0.00019999985527648217, - "loss": 46.0, - "step": 7093 - }, - { - "epoch": 0.5423858401666762, - "grad_norm": 0.0016584784025326371, - "learning_rate": 0.00019999985523561426, - "loss": 46.0, - "step": 7094 - }, - { - "epoch": 0.542462297150066, - "grad_norm": 0.001628034166060388, - "learning_rate": 0.0001999998551947406, - "loss": 46.0, - "step": 7095 - }, - { - "epoch": 0.5425387541334556, - "grad_norm": 0.002685130573809147, - "learning_rate": 0.00019999985515386113, - "loss": 46.0, - "step": 7096 - }, - { - "epoch": 0.5426152111168454, - "grad_norm": 0.003082075621932745, - "learning_rate": 0.0001999998551129759, - "loss": 46.0, - "step": 7097 - }, - { - "epoch": 0.5426916681002351, - "grad_norm": 0.0004978615907020867, - "learning_rate": 0.0001999998550720849, - "loss": 46.0, - "step": 7098 - }, - { - "epoch": 0.5427681250836248, - "grad_norm": 0.00067055260296911, - "learning_rate": 0.00019999985503118815, - "loss": 46.0, - "step": 7099 - }, - { - "epoch": 0.5428445820670146, - "grad_norm": 0.0007644226425327361, - "learning_rate": 0.00019999985499028565, - "loss": 46.0, - "step": 7100 - }, - { - "epoch": 0.5429210390504042, - "grad_norm": 0.004957432858645916, - "learning_rate": 0.00019999985494937735, - "loss": 46.0, - "step": 7101 - }, - { - "epoch": 0.542997496033794, - "grad_norm": 0.0004039366904180497, - "learning_rate": 0.00019999985490846327, - "loss": 46.0, - "step": 7102 - }, - { - "epoch": 0.5430739530171838, - "grad_norm": 0.0006158456089906394, - "learning_rate": 0.00019999985486754345, - "loss": 46.0, - "step": 7103 - }, - { - "epoch": 0.5431504100005734, - "grad_norm": 0.0005012454930692911, - "learning_rate": 0.00019999985482661785, - "loss": 46.0, - "step": 7104 - }, - { - "epoch": 0.5432268669839632, - "grad_norm": 0.0004433618451002985, - "learning_rate": 0.00019999985478568648, - "loss": 46.0, - "step": 7105 - }, - { - "epoch": 0.5433033239673528, - "grad_norm": 0.0007532319286838174, - "learning_rate": 0.00019999985474474934, - "loss": 46.0, - "step": 7106 - }, - { - "epoch": 0.5433797809507426, - "grad_norm": 0.0008327563991770148, - "learning_rate": 0.00019999985470380642, - "loss": 46.0, - "step": 7107 - }, - { - "epoch": 0.5434562379341323, - "grad_norm": 0.0008429056033492088, - "learning_rate": 0.00019999985466285773, - "loss": 46.0, - "step": 7108 - }, - { - "epoch": 0.543532694917522, - "grad_norm": 0.0013189580058678985, - "learning_rate": 0.0001999998546219033, - "loss": 46.0, - "step": 7109 - }, - { - "epoch": 0.5436091519009117, - "grad_norm": 0.0012941764434799552, - "learning_rate": 0.0001999998545809431, - "loss": 46.0, - "step": 7110 - }, - { - "epoch": 0.5436856088843015, - "grad_norm": 0.0017279449384659529, - "learning_rate": 0.0001999998545399771, - "loss": 46.0, - "step": 7111 - }, - { - "epoch": 0.5437620658676912, - "grad_norm": 0.001241541001945734, - "learning_rate": 0.00019999985449900535, - "loss": 46.0, - "step": 7112 - }, - { - "epoch": 0.5438385228510809, - "grad_norm": 0.002491633640602231, - "learning_rate": 0.0001999998544580278, - "loss": 46.0, - "step": 7113 - }, - { - "epoch": 0.5439149798344707, - "grad_norm": 0.000936816621106118, - "learning_rate": 0.0001999998544170445, - "loss": 46.0, - "step": 7114 - }, - { - "epoch": 0.5439914368178603, - "grad_norm": 0.0014724023640155792, - "learning_rate": 0.0001999998543760554, - "loss": 46.0, - "step": 7115 - }, - { - "epoch": 0.5440678938012501, - "grad_norm": 0.0041054049506783485, - "learning_rate": 0.0001999998543350606, - "loss": 46.0, - "step": 7116 - }, - { - "epoch": 0.5441443507846397, - "grad_norm": 0.002560204826295376, - "learning_rate": 0.00019999985429406, - "loss": 46.0, - "step": 7117 - }, - { - "epoch": 0.5442208077680295, - "grad_norm": 0.0015505847986787558, - "learning_rate": 0.0001999998542530536, - "loss": 46.0, - "step": 7118 - }, - { - "epoch": 0.5442972647514193, - "grad_norm": 0.0005005880375392735, - "learning_rate": 0.00019999985421204145, - "loss": 46.0, - "step": 7119 - }, - { - "epoch": 0.5443737217348089, - "grad_norm": 0.0006232415325939655, - "learning_rate": 0.00019999985417102357, - "loss": 46.0, - "step": 7120 - }, - { - "epoch": 0.5444501787181987, - "grad_norm": 0.0017121481942012906, - "learning_rate": 0.00019999985412999988, - "loss": 46.0, - "step": 7121 - }, - { - "epoch": 0.5445266357015884, - "grad_norm": 0.0010269352933391929, - "learning_rate": 0.0001999998540889704, - "loss": 46.0, - "step": 7122 - }, - { - "epoch": 0.5446030926849781, - "grad_norm": 0.0024696041364222765, - "learning_rate": 0.00019999985404793518, - "loss": 46.0, - "step": 7123 - }, - { - "epoch": 0.5446795496683678, - "grad_norm": 0.0011527878232300282, - "learning_rate": 0.0001999998540068942, - "loss": 46.0, - "step": 7124 - }, - { - "epoch": 0.5447560066517576, - "grad_norm": 0.001106124953366816, - "learning_rate": 0.00019999985396584744, - "loss": 46.0, - "step": 7125 - }, - { - "epoch": 0.5448324636351473, - "grad_norm": 0.0011542091378942132, - "learning_rate": 0.0001999998539247949, - "loss": 46.0, - "step": 7126 - }, - { - "epoch": 0.544908920618537, - "grad_norm": 0.0015792502090334892, - "learning_rate": 0.0001999998538837366, - "loss": 46.0, - "step": 7127 - }, - { - "epoch": 0.5449853776019267, - "grad_norm": 0.0016737999394536018, - "learning_rate": 0.00019999985384267253, - "loss": 46.0, - "step": 7128 - }, - { - "epoch": 0.5450618345853164, - "grad_norm": 0.001238030381500721, - "learning_rate": 0.0001999998538016027, - "loss": 46.0, - "step": 7129 - }, - { - "epoch": 0.5451382915687062, - "grad_norm": 0.0007080962532199919, - "learning_rate": 0.00019999985376052709, - "loss": 46.0, - "step": 7130 - }, - { - "epoch": 0.5452147485520958, - "grad_norm": 0.0005658137379214168, - "learning_rate": 0.00019999985371944572, - "loss": 46.0, - "step": 7131 - }, - { - "epoch": 0.5452912055354856, - "grad_norm": 0.0008155421819537878, - "learning_rate": 0.00019999985367835857, - "loss": 46.0, - "step": 7132 - }, - { - "epoch": 0.5453676625188754, - "grad_norm": 0.0021908506751060486, - "learning_rate": 0.00019999985363726563, - "loss": 46.0, - "step": 7133 - }, - { - "epoch": 0.545444119502265, - "grad_norm": 0.0006705531850457191, - "learning_rate": 0.00019999985359616694, - "loss": 46.0, - "step": 7134 - }, - { - "epoch": 0.5455205764856548, - "grad_norm": 0.0010617149528115988, - "learning_rate": 0.0001999998535550625, - "loss": 46.0, - "step": 7135 - }, - { - "epoch": 0.5455970334690444, - "grad_norm": 0.0013305466854944825, - "learning_rate": 0.00019999985351395227, - "loss": 46.0, - "step": 7136 - }, - { - "epoch": 0.5456734904524342, - "grad_norm": 0.0020592212677001953, - "learning_rate": 0.00019999985347283626, - "loss": 46.0, - "step": 7137 - }, - { - "epoch": 0.545749947435824, - "grad_norm": 0.007282910868525505, - "learning_rate": 0.0001999998534317145, - "loss": 46.0, - "step": 7138 - }, - { - "epoch": 0.5458264044192136, - "grad_norm": 0.0008885685238055885, - "learning_rate": 0.00019999985339058698, - "loss": 46.0, - "step": 7139 - }, - { - "epoch": 0.5459028614026034, - "grad_norm": 0.0005298163741827011, - "learning_rate": 0.00019999985334945368, - "loss": 46.0, - "step": 7140 - }, - { - "epoch": 0.5459793183859931, - "grad_norm": 0.0013140361988916993, - "learning_rate": 0.0001999998533083146, - "loss": 46.0, - "step": 7141 - }, - { - "epoch": 0.5460557753693828, - "grad_norm": 0.0007046398241072893, - "learning_rate": 0.00019999985326716978, - "loss": 46.0, - "step": 7142 - }, - { - "epoch": 0.5461322323527725, - "grad_norm": 0.0008345639798790216, - "learning_rate": 0.00019999985322601916, - "loss": 46.0, - "step": 7143 - }, - { - "epoch": 0.5462086893361623, - "grad_norm": 0.005981791764497757, - "learning_rate": 0.00019999985318486276, - "loss": 46.0, - "step": 7144 - }, - { - "epoch": 0.5462851463195519, - "grad_norm": 0.0032875980250537395, - "learning_rate": 0.00019999985314370062, - "loss": 46.0, - "step": 7145 - }, - { - "epoch": 0.5463616033029417, - "grad_norm": 0.0009305764106102288, - "learning_rate": 0.0001999998531025327, - "loss": 46.0, - "step": 7146 - }, - { - "epoch": 0.5464380602863314, - "grad_norm": 0.0010729838395491242, - "learning_rate": 0.000199999853061359, - "loss": 46.0, - "step": 7147 - }, - { - "epoch": 0.5465145172697211, - "grad_norm": 0.0007016872405074537, - "learning_rate": 0.00019999985302017955, - "loss": 46.0, - "step": 7148 - }, - { - "epoch": 0.5465909742531109, - "grad_norm": 0.001024897675961256, - "learning_rate": 0.00019999985297899432, - "loss": 46.0, - "step": 7149 - }, - { - "epoch": 0.5466674312365005, - "grad_norm": 0.0007103008101694286, - "learning_rate": 0.00019999985293780333, - "loss": 46.0, - "step": 7150 - }, - { - "epoch": 0.5467438882198903, - "grad_norm": 0.0005859124939888716, - "learning_rate": 0.00019999985289660655, - "loss": 46.0, - "step": 7151 - }, - { - "epoch": 0.54682034520328, - "grad_norm": 0.0006008106283843517, - "learning_rate": 0.00019999985285540402, - "loss": 46.0, - "step": 7152 - }, - { - "epoch": 0.5468968021866697, - "grad_norm": 0.0008637873688712716, - "learning_rate": 0.00019999985281419572, - "loss": 46.0, - "step": 7153 - }, - { - "epoch": 0.5469732591700595, - "grad_norm": 0.0007222691201604903, - "learning_rate": 0.00019999985277298165, - "loss": 46.0, - "step": 7154 - }, - { - "epoch": 0.5470497161534492, - "grad_norm": 0.002003852976486087, - "learning_rate": 0.0001999998527317618, - "loss": 46.0, - "step": 7155 - }, - { - "epoch": 0.5471261731368389, - "grad_norm": 0.0022850194945931435, - "learning_rate": 0.00019999985269053617, - "loss": 46.0, - "step": 7156 - }, - { - "epoch": 0.5472026301202286, - "grad_norm": 0.000572386896237731, - "learning_rate": 0.0001999998526493048, - "loss": 46.0, - "step": 7157 - }, - { - "epoch": 0.5472790871036183, - "grad_norm": 0.002787049626931548, - "learning_rate": 0.00019999985260806764, - "loss": 46.0, - "step": 7158 - }, - { - "epoch": 0.547355544087008, - "grad_norm": 0.0012862762669101357, - "learning_rate": 0.00019999985256682472, - "loss": 46.0, - "step": 7159 - }, - { - "epoch": 0.5474320010703978, - "grad_norm": 0.0011167486663907766, - "learning_rate": 0.00019999985252557603, - "loss": 46.0, - "step": 7160 - }, - { - "epoch": 0.5475084580537875, - "grad_norm": 0.0021482512820512056, - "learning_rate": 0.00019999985248432157, - "loss": 46.0, - "step": 7161 - }, - { - "epoch": 0.5475849150371772, - "grad_norm": 0.0014115293743088841, - "learning_rate": 0.00019999985244306134, - "loss": 46.0, - "step": 7162 - }, - { - "epoch": 0.547661372020567, - "grad_norm": 0.0011674423003569245, - "learning_rate": 0.00019999985240179533, - "loss": 46.0, - "step": 7163 - }, - { - "epoch": 0.5477378290039566, - "grad_norm": 0.0008137841359712183, - "learning_rate": 0.00019999985236052357, - "loss": 46.0, - "step": 7164 - }, - { - "epoch": 0.5478142859873464, - "grad_norm": 0.0005121002322994173, - "learning_rate": 0.00019999985231924602, - "loss": 46.0, - "step": 7165 - }, - { - "epoch": 0.5478907429707361, - "grad_norm": 0.001126022427342832, - "learning_rate": 0.00019999985227796272, - "loss": 46.0, - "step": 7166 - }, - { - "epoch": 0.5479671999541258, - "grad_norm": 0.00155731406994164, - "learning_rate": 0.00019999985223667364, - "loss": 46.0, - "step": 7167 - }, - { - "epoch": 0.5480436569375156, - "grad_norm": 0.0028543006628751755, - "learning_rate": 0.0001999998521953788, - "loss": 46.0, - "step": 7168 - }, - { - "epoch": 0.5481201139209052, - "grad_norm": 0.0006537465960718691, - "learning_rate": 0.00019999985215407817, - "loss": 46.0, - "step": 7169 - }, - { - "epoch": 0.548196570904295, - "grad_norm": 0.00226815277710557, - "learning_rate": 0.00019999985211277178, - "loss": 46.0, - "step": 7170 - }, - { - "epoch": 0.5482730278876847, - "grad_norm": 0.0025133562739938498, - "learning_rate": 0.0001999998520714596, - "loss": 46.0, - "step": 7171 - }, - { - "epoch": 0.5483494848710744, - "grad_norm": 0.0014155980898067355, - "learning_rate": 0.0001999998520301417, - "loss": 46.0, - "step": 7172 - }, - { - "epoch": 0.5484259418544641, - "grad_norm": 0.004426279105246067, - "learning_rate": 0.000199999851988818, - "loss": 46.0, - "step": 7173 - }, - { - "epoch": 0.5485023988378539, - "grad_norm": 0.0005192711832933128, - "learning_rate": 0.00019999985194748857, - "loss": 46.0, - "step": 7174 - }, - { - "epoch": 0.5485788558212435, - "grad_norm": 0.0006723931874148548, - "learning_rate": 0.0001999998519061533, - "loss": 46.0, - "step": 7175 - }, - { - "epoch": 0.5486553128046333, - "grad_norm": 0.0010698664700612426, - "learning_rate": 0.0001999998518648123, - "loss": 46.0, - "step": 7176 - }, - { - "epoch": 0.548731769788023, - "grad_norm": 0.0011616181582212448, - "learning_rate": 0.00019999985182346554, - "loss": 46.0, - "step": 7177 - }, - { - "epoch": 0.5488082267714127, - "grad_norm": 0.0011422683019191027, - "learning_rate": 0.000199999851782113, - "loss": 46.0, - "step": 7178 - }, - { - "epoch": 0.5488846837548025, - "grad_norm": 0.0023996406234800816, - "learning_rate": 0.0001999998517407547, - "loss": 46.0, - "step": 7179 - }, - { - "epoch": 0.5489611407381921, - "grad_norm": 0.0007847588276490569, - "learning_rate": 0.0001999998516993906, - "loss": 46.0, - "step": 7180 - }, - { - "epoch": 0.5490375977215819, - "grad_norm": 0.0026907974388450384, - "learning_rate": 0.00019999985165802074, - "loss": 46.0, - "step": 7181 - }, - { - "epoch": 0.5491140547049717, - "grad_norm": 0.0020355312153697014, - "learning_rate": 0.00019999985161664512, - "loss": 46.0, - "step": 7182 - }, - { - "epoch": 0.5491905116883613, - "grad_norm": 0.0010263108415529132, - "learning_rate": 0.00019999985157526372, - "loss": 46.0, - "step": 7183 - }, - { - "epoch": 0.5492669686717511, - "grad_norm": 0.0010934211313724518, - "learning_rate": 0.00019999985153387658, - "loss": 46.0, - "step": 7184 - }, - { - "epoch": 0.5493434256551408, - "grad_norm": 0.0007517405902035534, - "learning_rate": 0.00019999985149248364, - "loss": 46.0, - "step": 7185 - }, - { - "epoch": 0.5494198826385305, - "grad_norm": 0.0005203973269090056, - "learning_rate": 0.00019999985145108495, - "loss": 46.0, - "step": 7186 - }, - { - "epoch": 0.5494963396219202, - "grad_norm": 0.002755733672529459, - "learning_rate": 0.0001999998514096805, - "loss": 46.0, - "step": 7187 - }, - { - "epoch": 0.5495727966053099, - "grad_norm": 0.004364221356809139, - "learning_rate": 0.00019999985136827023, - "loss": 46.0, - "step": 7188 - }, - { - "epoch": 0.5496492535886996, - "grad_norm": 0.0014792496804147959, - "learning_rate": 0.00019999985132685425, - "loss": 46.0, - "step": 7189 - }, - { - "epoch": 0.5497257105720894, - "grad_norm": 0.00044737933785654604, - "learning_rate": 0.00019999985128543247, - "loss": 46.0, - "step": 7190 - }, - { - "epoch": 0.5498021675554791, - "grad_norm": 0.000552560028154403, - "learning_rate": 0.00019999985124400494, - "loss": 46.0, - "step": 7191 - }, - { - "epoch": 0.5498786245388688, - "grad_norm": 0.0008303321083076298, - "learning_rate": 0.0001999998512025716, - "loss": 46.0, - "step": 7192 - }, - { - "epoch": 0.5499550815222586, - "grad_norm": 0.0013077287003397942, - "learning_rate": 0.0001999998511611325, - "loss": 46.0, - "step": 7193 - }, - { - "epoch": 0.5500315385056482, - "grad_norm": 0.0029086717404425144, - "learning_rate": 0.00019999985111968766, - "loss": 46.0, - "step": 7194 - }, - { - "epoch": 0.550107995489038, - "grad_norm": 0.001223950763233006, - "learning_rate": 0.00019999985107823704, - "loss": 46.0, - "step": 7195 - }, - { - "epoch": 0.5501844524724278, - "grad_norm": 0.008994979783892632, - "learning_rate": 0.00019999985103678065, - "loss": 46.0, - "step": 7196 - }, - { - "epoch": 0.5502609094558174, - "grad_norm": 0.0031687754672020674, - "learning_rate": 0.00019999985099531848, - "loss": 46.0, - "step": 7197 - }, - { - "epoch": 0.5503373664392072, - "grad_norm": 0.0014216335257515311, - "learning_rate": 0.00019999985095385056, - "loss": 46.0, - "step": 7198 - }, - { - "epoch": 0.5504138234225968, - "grad_norm": 0.0010007154196500778, - "learning_rate": 0.00019999985091237688, - "loss": 46.0, - "step": 7199 - }, - { - "epoch": 0.5504902804059866, - "grad_norm": 0.0035625926684588194, - "learning_rate": 0.0001999998508708974, - "loss": 46.0, - "step": 7200 - }, - { - "epoch": 0.5505667373893763, - "grad_norm": 0.002085289219394326, - "learning_rate": 0.00019999985082941215, - "loss": 46.0, - "step": 7201 - }, - { - "epoch": 0.550643194372766, - "grad_norm": 0.004990634508430958, - "learning_rate": 0.00019999985078792115, - "loss": 46.0, - "step": 7202 - }, - { - "epoch": 0.5507196513561557, - "grad_norm": 0.0041598547250032425, - "learning_rate": 0.00019999985074642437, - "loss": 46.0, - "step": 7203 - }, - { - "epoch": 0.5507961083395455, - "grad_norm": 0.000868777628056705, - "learning_rate": 0.0001999998507049218, - "loss": 46.0, - "step": 7204 - }, - { - "epoch": 0.5508725653229352, - "grad_norm": 0.0008237641886807978, - "learning_rate": 0.00019999985066341348, - "loss": 46.0, - "step": 7205 - }, - { - "epoch": 0.5509490223063249, - "grad_norm": 0.0012723867548629642, - "learning_rate": 0.0001999998506218994, - "loss": 46.0, - "step": 7206 - }, - { - "epoch": 0.5510254792897146, - "grad_norm": 0.0005697850720025599, - "learning_rate": 0.00019999985058037956, - "loss": 46.0, - "step": 7207 - }, - { - "epoch": 0.5511019362731043, - "grad_norm": 0.004647390451282263, - "learning_rate": 0.00019999985053885394, - "loss": 46.0, - "step": 7208 - }, - { - "epoch": 0.5511783932564941, - "grad_norm": 0.0008499856339767575, - "learning_rate": 0.00019999985049732252, - "loss": 46.0, - "step": 7209 - }, - { - "epoch": 0.5512548502398837, - "grad_norm": 0.000891794275958091, - "learning_rate": 0.00019999985045578538, - "loss": 46.0, - "step": 7210 - }, - { - "epoch": 0.5513313072232735, - "grad_norm": 0.0006179591291584074, - "learning_rate": 0.00019999985041424244, - "loss": 46.0, - "step": 7211 - }, - { - "epoch": 0.5514077642066633, - "grad_norm": 0.0005138255655765533, - "learning_rate": 0.00019999985037269372, - "loss": 46.0, - "step": 7212 - }, - { - "epoch": 0.5514842211900529, - "grad_norm": 0.0007363785989582539, - "learning_rate": 0.00019999985033113926, - "loss": 46.0, - "step": 7213 - }, - { - "epoch": 0.5515606781734427, - "grad_norm": 0.0014757682802155614, - "learning_rate": 0.00019999985028957903, - "loss": 46.0, - "step": 7214 - }, - { - "epoch": 0.5516371351568324, - "grad_norm": 0.0030114136170595884, - "learning_rate": 0.00019999985024801302, - "loss": 46.0, - "step": 7215 - }, - { - "epoch": 0.5517135921402221, - "grad_norm": 0.0009294779738411307, - "learning_rate": 0.00019999985020644121, - "loss": 46.0, - "step": 7216 - }, - { - "epoch": 0.5517900491236118, - "grad_norm": 0.0008809330756776035, - "learning_rate": 0.00019999985016486366, - "loss": 46.0, - "step": 7217 - }, - { - "epoch": 0.5518665061070015, - "grad_norm": 0.0016561830416321754, - "learning_rate": 0.00019999985012328036, - "loss": 46.0, - "step": 7218 - }, - { - "epoch": 0.5519429630903913, - "grad_norm": 0.0008260655449703336, - "learning_rate": 0.00019999985008169126, - "loss": 46.0, - "step": 7219 - }, - { - "epoch": 0.552019420073781, - "grad_norm": 0.0005357759655453265, - "learning_rate": 0.0001999998500400964, - "loss": 46.0, - "step": 7220 - }, - { - "epoch": 0.5520958770571707, - "grad_norm": 0.0012031859951093793, - "learning_rate": 0.00019999984999849576, - "loss": 46.0, - "step": 7221 - }, - { - "epoch": 0.5521723340405604, - "grad_norm": 0.00213741441257298, - "learning_rate": 0.0001999998499568894, - "loss": 46.0, - "step": 7222 - }, - { - "epoch": 0.5522487910239502, - "grad_norm": 0.001000562566332519, - "learning_rate": 0.00019999984991527723, - "loss": 46.0, - "step": 7223 - }, - { - "epoch": 0.5523252480073398, - "grad_norm": 0.0037644400727003813, - "learning_rate": 0.00019999984987365926, - "loss": 46.0, - "step": 7224 - }, - { - "epoch": 0.5524017049907296, - "grad_norm": 0.0008638995932415128, - "learning_rate": 0.00019999984983203557, - "loss": 46.0, - "step": 7225 - }, - { - "epoch": 0.5524781619741194, - "grad_norm": 0.0009665734251029789, - "learning_rate": 0.0001999998497904061, - "loss": 46.0, - "step": 7226 - }, - { - "epoch": 0.552554618957509, - "grad_norm": 0.004106669686734676, - "learning_rate": 0.00019999984974877085, - "loss": 46.0, - "step": 7227 - }, - { - "epoch": 0.5526310759408988, - "grad_norm": 0.001969381934031844, - "learning_rate": 0.00019999984970712985, - "loss": 46.0, - "step": 7228 - }, - { - "epoch": 0.5527075329242884, - "grad_norm": 0.0009289950248785317, - "learning_rate": 0.00019999984966548304, - "loss": 46.0, - "step": 7229 - }, - { - "epoch": 0.5527839899076782, - "grad_norm": 0.0009822353022173047, - "learning_rate": 0.00019999984962383049, - "loss": 46.0, - "step": 7230 - }, - { - "epoch": 0.552860446891068, - "grad_norm": 0.0013525179820135236, - "learning_rate": 0.00019999984958217219, - "loss": 46.0, - "step": 7231 - }, - { - "epoch": 0.5529369038744576, - "grad_norm": 0.003662162460386753, - "learning_rate": 0.00019999984954050808, - "loss": 46.0, - "step": 7232 - }, - { - "epoch": 0.5530133608578474, - "grad_norm": 0.0022045017685741186, - "learning_rate": 0.0001999998494988382, - "loss": 46.0, - "step": 7233 - }, - { - "epoch": 0.5530898178412371, - "grad_norm": 0.0016932801809161901, - "learning_rate": 0.0001999998494571626, - "loss": 46.0, - "step": 7234 - }, - { - "epoch": 0.5531662748246268, - "grad_norm": 0.0018906567711383104, - "learning_rate": 0.0001999998494154812, - "loss": 46.0, - "step": 7235 - }, - { - "epoch": 0.5532427318080165, - "grad_norm": 0.0012126766378059983, - "learning_rate": 0.000199999849373794, - "loss": 46.0, - "step": 7236 - }, - { - "epoch": 0.5533191887914062, - "grad_norm": 0.0029551133047789335, - "learning_rate": 0.00019999984933210106, - "loss": 46.0, - "step": 7237 - }, - { - "epoch": 0.5533956457747959, - "grad_norm": 0.0027554978150874376, - "learning_rate": 0.00019999984929040238, - "loss": 46.0, - "step": 7238 - }, - { - "epoch": 0.5534721027581857, - "grad_norm": 0.0003183887165505439, - "learning_rate": 0.0001999998492486979, - "loss": 46.0, - "step": 7239 - }, - { - "epoch": 0.5535485597415754, - "grad_norm": 0.0015790038742125034, - "learning_rate": 0.00019999984920698766, - "loss": 46.0, - "step": 7240 - }, - { - "epoch": 0.5536250167249651, - "grad_norm": 0.0007497859187424183, - "learning_rate": 0.00019999984916527162, - "loss": 46.0, - "step": 7241 - }, - { - "epoch": 0.5537014737083549, - "grad_norm": 0.0012633674778044224, - "learning_rate": 0.00019999984912354984, - "loss": 46.0, - "step": 7242 - }, - { - "epoch": 0.5537779306917445, - "grad_norm": 0.004117726814001799, - "learning_rate": 0.0001999998490818223, - "loss": 46.0, - "step": 7243 - }, - { - "epoch": 0.5538543876751343, - "grad_norm": 0.0010197142837569118, - "learning_rate": 0.00019999984904008896, - "loss": 46.0, - "step": 7244 - }, - { - "epoch": 0.553930844658524, - "grad_norm": 0.0005212892428971827, - "learning_rate": 0.00019999984899834986, - "loss": 46.0, - "step": 7245 - }, - { - "epoch": 0.5540073016419137, - "grad_norm": 0.0004334588011261076, - "learning_rate": 0.000199999848956605, - "loss": 46.0, - "step": 7246 - }, - { - "epoch": 0.5540837586253035, - "grad_norm": 0.001607435755431652, - "learning_rate": 0.00019999984891485437, - "loss": 46.0, - "step": 7247 - }, - { - "epoch": 0.5541602156086931, - "grad_norm": 0.002185296034440398, - "learning_rate": 0.00019999984887309795, - "loss": 46.0, - "step": 7248 - }, - { - "epoch": 0.5542366725920829, - "grad_norm": 0.0016632411861792207, - "learning_rate": 0.0001999998488313358, - "loss": 46.0, - "step": 7249 - }, - { - "epoch": 0.5543131295754726, - "grad_norm": 0.0005494050565175712, - "learning_rate": 0.00019999984878956784, - "loss": 46.0, - "step": 7250 - }, - { - "epoch": 0.5543895865588623, - "grad_norm": 0.0006357296369969845, - "learning_rate": 0.00019999984874779413, - "loss": 46.0, - "step": 7251 - }, - { - "epoch": 0.554466043542252, - "grad_norm": 0.011412625201046467, - "learning_rate": 0.00019999984870601464, - "loss": 46.0, - "step": 7252 - }, - { - "epoch": 0.5545425005256418, - "grad_norm": 0.0039036523085087538, - "learning_rate": 0.00019999984866422938, - "loss": 46.0, - "step": 7253 - }, - { - "epoch": 0.5546189575090315, - "grad_norm": 0.0021096921991556883, - "learning_rate": 0.00019999984862243837, - "loss": 46.0, - "step": 7254 - }, - { - "epoch": 0.5546954144924212, - "grad_norm": 0.0006583980284631252, - "learning_rate": 0.00019999984858064157, - "loss": 46.0, - "step": 7255 - }, - { - "epoch": 0.554771871475811, - "grad_norm": 0.0006962026236578822, - "learning_rate": 0.00019999984853883901, - "loss": 46.0, - "step": 7256 - }, - { - "epoch": 0.5548483284592006, - "grad_norm": 0.0014208783395588398, - "learning_rate": 0.00019999984849703071, - "loss": 46.0, - "step": 7257 - }, - { - "epoch": 0.5549247854425904, - "grad_norm": 0.002004295354709029, - "learning_rate": 0.00019999984845521661, - "loss": 46.0, - "step": 7258 - }, - { - "epoch": 0.55500124242598, - "grad_norm": 0.0006543595809489489, - "learning_rate": 0.00019999984841339674, - "loss": 46.0, - "step": 7259 - }, - { - "epoch": 0.5550776994093698, - "grad_norm": 0.0024939037393778563, - "learning_rate": 0.0001999998483715711, - "loss": 46.0, - "step": 7260 - }, - { - "epoch": 0.5551541563927596, - "grad_norm": 0.006287538446485996, - "learning_rate": 0.0001999998483297397, - "loss": 46.0, - "step": 7261 - }, - { - "epoch": 0.5552306133761492, - "grad_norm": 0.0050195991061627865, - "learning_rate": 0.0001999998482879025, - "loss": 46.0, - "step": 7262 - }, - { - "epoch": 0.555307070359539, - "grad_norm": 0.00040017697028815746, - "learning_rate": 0.00019999984824605957, - "loss": 46.0, - "step": 7263 - }, - { - "epoch": 0.5553835273429287, - "grad_norm": 0.0022776280529797077, - "learning_rate": 0.00019999984820421088, - "loss": 46.0, - "step": 7264 - }, - { - "epoch": 0.5554599843263184, - "grad_norm": 0.0009931365493685007, - "learning_rate": 0.00019999984816235637, - "loss": 46.0, - "step": 7265 - }, - { - "epoch": 0.5555364413097081, - "grad_norm": 0.0003713297483045608, - "learning_rate": 0.00019999984812049614, - "loss": 46.0, - "step": 7266 - }, - { - "epoch": 0.5556128982930979, - "grad_norm": 0.000845150847453624, - "learning_rate": 0.0001999998480786301, - "loss": 46.0, - "step": 7267 - }, - { - "epoch": 0.5556893552764876, - "grad_norm": 0.005847814492881298, - "learning_rate": 0.0001999998480367583, - "loss": 46.0, - "step": 7268 - }, - { - "epoch": 0.5557658122598773, - "grad_norm": 0.0008431893074885011, - "learning_rate": 0.00019999984799488074, - "loss": 46.0, - "step": 7269 - }, - { - "epoch": 0.555842269243267, - "grad_norm": 0.000576764636207372, - "learning_rate": 0.00019999984795299742, - "loss": 46.0, - "step": 7270 - }, - { - "epoch": 0.5559187262266567, - "grad_norm": 0.0009986236691474915, - "learning_rate": 0.00019999984791110832, - "loss": 46.0, - "step": 7271 - }, - { - "epoch": 0.5559951832100465, - "grad_norm": 0.0007525266264565289, - "learning_rate": 0.00019999984786921344, - "loss": 46.0, - "step": 7272 - }, - { - "epoch": 0.5560716401934361, - "grad_norm": 0.0005763444933108985, - "learning_rate": 0.0001999998478273128, - "loss": 46.0, - "step": 7273 - }, - { - "epoch": 0.5561480971768259, - "grad_norm": 0.004906917922198772, - "learning_rate": 0.00019999984778540638, - "loss": 46.0, - "step": 7274 - }, - { - "epoch": 0.5562245541602157, - "grad_norm": 0.0024788579903542995, - "learning_rate": 0.0001999998477434942, - "loss": 46.0, - "step": 7275 - }, - { - "epoch": 0.5563010111436053, - "grad_norm": 0.0006713225739076734, - "learning_rate": 0.00019999984770157627, - "loss": 46.0, - "step": 7276 - }, - { - "epoch": 0.5563774681269951, - "grad_norm": 0.002713465830311179, - "learning_rate": 0.00019999984765965256, - "loss": 46.0, - "step": 7277 - }, - { - "epoch": 0.5564539251103847, - "grad_norm": 0.0006049118819646537, - "learning_rate": 0.00019999984761772305, - "loss": 46.0, - "step": 7278 - }, - { - "epoch": 0.5565303820937745, - "grad_norm": 0.0009185001254081726, - "learning_rate": 0.00019999984757578781, - "loss": 46.0, - "step": 7279 - }, - { - "epoch": 0.5566068390771642, - "grad_norm": 0.0005901929689571261, - "learning_rate": 0.0001999998475338468, - "loss": 46.0, - "step": 7280 - }, - { - "epoch": 0.5566832960605539, - "grad_norm": 0.001059014699421823, - "learning_rate": 0.0001999998474919, - "loss": 46.0, - "step": 7281 - }, - { - "epoch": 0.5567597530439437, - "grad_norm": 0.01876816712319851, - "learning_rate": 0.00019999984744994742, - "loss": 46.0, - "step": 7282 - }, - { - "epoch": 0.5568362100273334, - "grad_norm": 0.0008193185203708708, - "learning_rate": 0.00019999984740798907, - "loss": 46.0, - "step": 7283 - }, - { - "epoch": 0.5569126670107231, - "grad_norm": 0.0029996519442647696, - "learning_rate": 0.000199999847366025, - "loss": 46.0, - "step": 7284 - }, - { - "epoch": 0.5569891239941128, - "grad_norm": 0.0009654250461608171, - "learning_rate": 0.0001999998473240551, - "loss": 46.0, - "step": 7285 - }, - { - "epoch": 0.5570655809775026, - "grad_norm": 0.0018600947223603725, - "learning_rate": 0.00019999984728207948, - "loss": 46.0, - "step": 7286 - }, - { - "epoch": 0.5571420379608922, - "grad_norm": 0.011588250286877155, - "learning_rate": 0.00019999984724009806, - "loss": 46.0, - "step": 7287 - }, - { - "epoch": 0.557218494944282, - "grad_norm": 0.0015380606055259705, - "learning_rate": 0.00019999984719811087, - "loss": 46.0, - "step": 7288 - }, - { - "epoch": 0.5572949519276716, - "grad_norm": 0.00040448809158988297, - "learning_rate": 0.00019999984715611793, - "loss": 46.0, - "step": 7289 - }, - { - "epoch": 0.5573714089110614, - "grad_norm": 0.002322960877791047, - "learning_rate": 0.00019999984711411922, - "loss": 46.0, - "step": 7290 - }, - { - "epoch": 0.5574478658944512, - "grad_norm": 0.0014995394740253687, - "learning_rate": 0.00019999984707211473, - "loss": 46.0, - "step": 7291 - }, - { - "epoch": 0.5575243228778408, - "grad_norm": 0.0012440496357157826, - "learning_rate": 0.00019999984703010447, - "loss": 46.0, - "step": 7292 - }, - { - "epoch": 0.5576007798612306, - "grad_norm": 0.002473707776516676, - "learning_rate": 0.00019999984698808844, - "loss": 46.0, - "step": 7293 - }, - { - "epoch": 0.5576772368446203, - "grad_norm": 0.0009337534429505467, - "learning_rate": 0.0001999998469460666, - "loss": 46.0, - "step": 7294 - }, - { - "epoch": 0.55775369382801, - "grad_norm": 0.005159919150173664, - "learning_rate": 0.00019999984690403905, - "loss": 46.0, - "step": 7295 - }, - { - "epoch": 0.5578301508113997, - "grad_norm": 0.004803015384823084, - "learning_rate": 0.00019999984686200573, - "loss": 46.0, - "step": 7296 - }, - { - "epoch": 0.5579066077947895, - "grad_norm": 0.003702346235513687, - "learning_rate": 0.00019999984681996663, - "loss": 46.0, - "step": 7297 - }, - { - "epoch": 0.5579830647781792, - "grad_norm": 0.000839349115267396, - "learning_rate": 0.00019999984677792173, - "loss": 46.0, - "step": 7298 - }, - { - "epoch": 0.5580595217615689, - "grad_norm": 0.0026783442590385675, - "learning_rate": 0.0001999998467358711, - "loss": 46.0, - "step": 7299 - }, - { - "epoch": 0.5581359787449586, - "grad_norm": 0.0009273099130950868, - "learning_rate": 0.0001999998466938147, - "loss": 46.0, - "step": 7300 - }, - { - "epoch": 0.5582124357283483, - "grad_norm": 0.0013326219050213695, - "learning_rate": 0.0001999998466517525, - "loss": 46.0, - "step": 7301 - }, - { - "epoch": 0.5582888927117381, - "grad_norm": 0.0004435400478541851, - "learning_rate": 0.00019999984660968453, - "loss": 46.0, - "step": 7302 - }, - { - "epoch": 0.5583653496951277, - "grad_norm": 0.0006617952603846788, - "learning_rate": 0.00019999984656761082, - "loss": 46.0, - "step": 7303 - }, - { - "epoch": 0.5584418066785175, - "grad_norm": 0.0008876945939846337, - "learning_rate": 0.0001999998465255313, - "loss": 46.0, - "step": 7304 - }, - { - "epoch": 0.5585182636619073, - "grad_norm": 0.0027649810072034597, - "learning_rate": 0.00019999984648344605, - "loss": 46.0, - "step": 7305 - }, - { - "epoch": 0.5585947206452969, - "grad_norm": 0.0010028013493865728, - "learning_rate": 0.00019999984644135504, - "loss": 46.0, - "step": 7306 - }, - { - "epoch": 0.5586711776286867, - "grad_norm": 0.0015059361467137933, - "learning_rate": 0.00019999984639925824, - "loss": 46.0, - "step": 7307 - }, - { - "epoch": 0.5587476346120763, - "grad_norm": 0.002549107652157545, - "learning_rate": 0.00019999984635715566, - "loss": 46.0, - "step": 7308 - }, - { - "epoch": 0.5588240915954661, - "grad_norm": 0.0016522463411092758, - "learning_rate": 0.00019999984631504733, - "loss": 46.0, - "step": 7309 - }, - { - "epoch": 0.5589005485788558, - "grad_norm": 0.0010461328784003854, - "learning_rate": 0.0001999998462729332, - "loss": 46.0, - "step": 7310 - }, - { - "epoch": 0.5589770055622455, - "grad_norm": 0.00086758635006845, - "learning_rate": 0.00019999984623081334, - "loss": 46.0, - "step": 7311 - }, - { - "epoch": 0.5590534625456353, - "grad_norm": 0.0010320841101929545, - "learning_rate": 0.0001999998461886877, - "loss": 46.0, - "step": 7312 - }, - { - "epoch": 0.559129919529025, - "grad_norm": 0.0038554228376597166, - "learning_rate": 0.00019999984614655627, - "loss": 46.0, - "step": 7313 - }, - { - "epoch": 0.5592063765124147, - "grad_norm": 0.00397162139415741, - "learning_rate": 0.00019999984610441908, - "loss": 46.0, - "step": 7314 - }, - { - "epoch": 0.5592828334958044, - "grad_norm": 0.0019855108112096786, - "learning_rate": 0.00019999984606227614, - "loss": 46.0, - "step": 7315 - }, - { - "epoch": 0.5593592904791942, - "grad_norm": 0.0006833222578279674, - "learning_rate": 0.0001999998460201274, - "loss": 46.0, - "step": 7316 - }, - { - "epoch": 0.5594357474625838, - "grad_norm": 0.0004653666983358562, - "learning_rate": 0.00019999984597797292, - "loss": 46.0, - "step": 7317 - }, - { - "epoch": 0.5595122044459736, - "grad_norm": 0.0012717371573671699, - "learning_rate": 0.00019999984593581266, - "loss": 46.0, - "step": 7318 - }, - { - "epoch": 0.5595886614293633, - "grad_norm": 0.0011538080871105194, - "learning_rate": 0.0001999998458936466, - "loss": 46.0, - "step": 7319 - }, - { - "epoch": 0.559665118412753, - "grad_norm": 0.0020591483917087317, - "learning_rate": 0.00019999984585147482, - "loss": 46.0, - "step": 7320 - }, - { - "epoch": 0.5597415753961428, - "grad_norm": 0.004964011255651712, - "learning_rate": 0.00019999984580929721, - "loss": 46.0, - "step": 7321 - }, - { - "epoch": 0.5598180323795324, - "grad_norm": 0.006484550423920155, - "learning_rate": 0.0001999998457671139, - "loss": 46.0, - "step": 7322 - }, - { - "epoch": 0.5598944893629222, - "grad_norm": 0.00125783437397331, - "learning_rate": 0.0001999998457249248, - "loss": 46.0, - "step": 7323 - }, - { - "epoch": 0.559970946346312, - "grad_norm": 0.0018940666923299432, - "learning_rate": 0.0001999998456827299, - "loss": 46.0, - "step": 7324 - }, - { - "epoch": 0.5600474033297016, - "grad_norm": 0.0011719519970938563, - "learning_rate": 0.00019999984564052925, - "loss": 46.0, - "step": 7325 - }, - { - "epoch": 0.5601238603130914, - "grad_norm": 0.00046957639278844, - "learning_rate": 0.00019999984559832283, - "loss": 46.0, - "step": 7326 - }, - { - "epoch": 0.5602003172964811, - "grad_norm": 0.0015421287389472127, - "learning_rate": 0.00019999984555611064, - "loss": 46.0, - "step": 7327 - }, - { - "epoch": 0.5602767742798708, - "grad_norm": 0.0010119170183315873, - "learning_rate": 0.00019999984551389267, - "loss": 46.0, - "step": 7328 - }, - { - "epoch": 0.5603532312632605, - "grad_norm": 0.0009469077340327203, - "learning_rate": 0.00019999984547166893, - "loss": 46.0, - "step": 7329 - }, - { - "epoch": 0.5604296882466502, - "grad_norm": 0.00043641417869366705, - "learning_rate": 0.00019999984542943945, - "loss": 46.0, - "step": 7330 - }, - { - "epoch": 0.5605061452300399, - "grad_norm": 0.0014040989335626364, - "learning_rate": 0.0001999998453872042, - "loss": 46.0, - "step": 7331 - }, - { - "epoch": 0.5605826022134297, - "grad_norm": 0.00037488597445189953, - "learning_rate": 0.00019999984534496313, - "loss": 46.0, - "step": 7332 - }, - { - "epoch": 0.5606590591968194, - "grad_norm": 0.0006711481255479157, - "learning_rate": 0.00019999984530271633, - "loss": 46.0, - "step": 7333 - }, - { - "epoch": 0.5607355161802091, - "grad_norm": 0.006668330170214176, - "learning_rate": 0.00019999984526046375, - "loss": 46.0, - "step": 7334 - }, - { - "epoch": 0.5608119731635989, - "grad_norm": 0.006237075664103031, - "learning_rate": 0.00019999984521820542, - "loss": 46.0, - "step": 7335 - }, - { - "epoch": 0.5608884301469885, - "grad_norm": 0.0016187290893867612, - "learning_rate": 0.0001999998451759413, - "loss": 46.0, - "step": 7336 - }, - { - "epoch": 0.5609648871303783, - "grad_norm": 0.0005768187111243606, - "learning_rate": 0.0001999998451336714, - "loss": 46.0, - "step": 7337 - }, - { - "epoch": 0.5610413441137679, - "grad_norm": 0.0028132263105362654, - "learning_rate": 0.00019999984509139575, - "loss": 46.0, - "step": 7338 - }, - { - "epoch": 0.5611178010971577, - "grad_norm": 0.0008070542244240642, - "learning_rate": 0.00019999984504911434, - "loss": 46.0, - "step": 7339 - }, - { - "epoch": 0.5611942580805475, - "grad_norm": 0.001138027524575591, - "learning_rate": 0.00019999984500682715, - "loss": 46.0, - "step": 7340 - }, - { - "epoch": 0.5612707150639371, - "grad_norm": 0.0014547263272106647, - "learning_rate": 0.00019999984496453418, - "loss": 46.0, - "step": 7341 - }, - { - "epoch": 0.5613471720473269, - "grad_norm": 0.001572155742906034, - "learning_rate": 0.00019999984492223547, - "loss": 46.0, - "step": 7342 - }, - { - "epoch": 0.5614236290307166, - "grad_norm": 0.003951024264097214, - "learning_rate": 0.00019999984487993096, - "loss": 46.0, - "step": 7343 - }, - { - "epoch": 0.5615000860141063, - "grad_norm": 0.000999454059638083, - "learning_rate": 0.0001999998448376207, - "loss": 46.0, - "step": 7344 - }, - { - "epoch": 0.561576542997496, - "grad_norm": 0.0003573857247829437, - "learning_rate": 0.00019999984479530464, - "loss": 46.0, - "step": 7345 - }, - { - "epoch": 0.5616529999808858, - "grad_norm": 0.00541074899956584, - "learning_rate": 0.00019999984475298284, - "loss": 46.0, - "step": 7346 - }, - { - "epoch": 0.5617294569642755, - "grad_norm": 0.000958868651650846, - "learning_rate": 0.00019999984471065526, - "loss": 46.0, - "step": 7347 - }, - { - "epoch": 0.5618059139476652, - "grad_norm": 0.0005875806673429906, - "learning_rate": 0.0001999998446683219, - "loss": 46.0, - "step": 7348 - }, - { - "epoch": 0.5618823709310549, - "grad_norm": 0.0007857672171667218, - "learning_rate": 0.0001999998446259828, - "loss": 46.0, - "step": 7349 - }, - { - "epoch": 0.5619588279144446, - "grad_norm": 0.0007955322507768869, - "learning_rate": 0.0001999998445836379, - "loss": 46.0, - "step": 7350 - }, - { - "epoch": 0.5620352848978344, - "grad_norm": 0.0018155993893742561, - "learning_rate": 0.00019999984454128727, - "loss": 46.0, - "step": 7351 - }, - { - "epoch": 0.562111741881224, - "grad_norm": 0.0006256432388909161, - "learning_rate": 0.00019999984449893082, - "loss": 46.0, - "step": 7352 - }, - { - "epoch": 0.5621881988646138, - "grad_norm": 0.001237974502146244, - "learning_rate": 0.00019999984445656863, - "loss": 46.0, - "step": 7353 - }, - { - "epoch": 0.5622646558480036, - "grad_norm": 0.002257004613056779, - "learning_rate": 0.00019999984441420067, - "loss": 46.0, - "step": 7354 - }, - { - "epoch": 0.5623411128313932, - "grad_norm": 0.0014228172367438674, - "learning_rate": 0.00019999984437182693, - "loss": 46.0, - "step": 7355 - }, - { - "epoch": 0.562417569814783, - "grad_norm": 0.001369641860947013, - "learning_rate": 0.00019999984432944742, - "loss": 46.0, - "step": 7356 - }, - { - "epoch": 0.5624940267981727, - "grad_norm": 0.001717785489745438, - "learning_rate": 0.00019999984428706216, - "loss": 46.0, - "step": 7357 - }, - { - "epoch": 0.5625704837815624, - "grad_norm": 0.0029476010240614414, - "learning_rate": 0.0001999998442446711, - "loss": 46.0, - "step": 7358 - }, - { - "epoch": 0.5626469407649521, - "grad_norm": 0.0009090182138606906, - "learning_rate": 0.00019999984420227432, - "loss": 46.0, - "step": 7359 - }, - { - "epoch": 0.5627233977483418, - "grad_norm": 0.0008652320830151439, - "learning_rate": 0.00019999984415987172, - "loss": 46.0, - "step": 7360 - }, - { - "epoch": 0.5627998547317316, - "grad_norm": 0.0019179227529093623, - "learning_rate": 0.00019999984411746334, - "loss": 46.0, - "step": 7361 - }, - { - "epoch": 0.5628763117151213, - "grad_norm": 0.0011555092642083764, - "learning_rate": 0.00019999984407504924, - "loss": 46.0, - "step": 7362 - }, - { - "epoch": 0.562952768698511, - "grad_norm": 0.003565003629773855, - "learning_rate": 0.00019999984403262934, - "loss": 46.0, - "step": 7363 - }, - { - "epoch": 0.5630292256819007, - "grad_norm": 0.000671940972097218, - "learning_rate": 0.00019999984399020367, - "loss": 46.0, - "step": 7364 - }, - { - "epoch": 0.5631056826652905, - "grad_norm": 0.0031001626048237085, - "learning_rate": 0.00019999984394777226, - "loss": 46.0, - "step": 7365 - }, - { - "epoch": 0.5631821396486801, - "grad_norm": 0.0006579491309821606, - "learning_rate": 0.0001999998439053351, - "loss": 46.0, - "step": 7366 - }, - { - "epoch": 0.5632585966320699, - "grad_norm": 0.0016551838489249349, - "learning_rate": 0.0001999998438628921, - "loss": 46.0, - "step": 7367 - }, - { - "epoch": 0.5633350536154595, - "grad_norm": 0.0013096745824441314, - "learning_rate": 0.00019999984382044336, - "loss": 46.0, - "step": 7368 - }, - { - "epoch": 0.5634115105988493, - "grad_norm": 0.006040710024535656, - "learning_rate": 0.00019999984377798885, - "loss": 46.0, - "step": 7369 - }, - { - "epoch": 0.5634879675822391, - "grad_norm": 0.0038013113662600517, - "learning_rate": 0.0001999998437355286, - "loss": 46.0, - "step": 7370 - }, - { - "epoch": 0.5635644245656287, - "grad_norm": 0.0007698804838582873, - "learning_rate": 0.00019999984369306254, - "loss": 46.0, - "step": 7371 - }, - { - "epoch": 0.5636408815490185, - "grad_norm": 0.0009979300666600466, - "learning_rate": 0.00019999984365059073, - "loss": 46.0, - "step": 7372 - }, - { - "epoch": 0.5637173385324082, - "grad_norm": 0.0017158391419798136, - "learning_rate": 0.00019999984360811316, - "loss": 46.0, - "step": 7373 - }, - { - "epoch": 0.5637937955157979, - "grad_norm": 0.0009191979188472033, - "learning_rate": 0.0001999998435656298, - "loss": 46.0, - "step": 7374 - }, - { - "epoch": 0.5638702524991877, - "grad_norm": 0.014036411419510841, - "learning_rate": 0.00019999984352314068, - "loss": 46.0, - "step": 7375 - }, - { - "epoch": 0.5639467094825774, - "grad_norm": 0.0007032278226688504, - "learning_rate": 0.00019999984348064578, - "loss": 46.0, - "step": 7376 - }, - { - "epoch": 0.5640231664659671, - "grad_norm": 0.003338852897286415, - "learning_rate": 0.0001999998434381451, - "loss": 46.0, - "step": 7377 - }, - { - "epoch": 0.5640996234493568, - "grad_norm": 0.0019688718020915985, - "learning_rate": 0.00019999984339563867, - "loss": 46.0, - "step": 7378 - }, - { - "epoch": 0.5641760804327465, - "grad_norm": 0.001912647276185453, - "learning_rate": 0.0001999998433531265, - "loss": 46.0, - "step": 7379 - }, - { - "epoch": 0.5642525374161362, - "grad_norm": 0.001072608050890267, - "learning_rate": 0.0001999998433106085, - "loss": 46.0, - "step": 7380 - }, - { - "epoch": 0.564328994399526, - "grad_norm": 0.0010734391398727894, - "learning_rate": 0.00019999984326808478, - "loss": 46.0, - "step": 7381 - }, - { - "epoch": 0.5644054513829156, - "grad_norm": 0.0008450491586700082, - "learning_rate": 0.00019999984322555524, - "loss": 46.0, - "step": 7382 - }, - { - "epoch": 0.5644819083663054, - "grad_norm": 0.0007184364949353039, - "learning_rate": 0.00019999984318301998, - "loss": 46.0, - "step": 7383 - }, - { - "epoch": 0.5645583653496952, - "grad_norm": 0.0035899863578379154, - "learning_rate": 0.00019999984314047895, - "loss": 46.0, - "step": 7384 - }, - { - "epoch": 0.5646348223330848, - "grad_norm": 0.0004965142579749227, - "learning_rate": 0.00019999984309793212, - "loss": 46.0, - "step": 7385 - }, - { - "epoch": 0.5647112793164746, - "grad_norm": 0.0014849104918539524, - "learning_rate": 0.00019999984305537954, - "loss": 46.0, - "step": 7386 - }, - { - "epoch": 0.5647877362998643, - "grad_norm": 0.004753535613417625, - "learning_rate": 0.00019999984301282116, - "loss": 46.0, - "step": 7387 - }, - { - "epoch": 0.564864193283254, - "grad_norm": 0.0007656702073290944, - "learning_rate": 0.00019999984297025704, - "loss": 46.0, - "step": 7388 - }, - { - "epoch": 0.5649406502666438, - "grad_norm": 0.0014321764465421438, - "learning_rate": 0.00019999984292768717, - "loss": 46.0, - "step": 7389 - }, - { - "epoch": 0.5650171072500334, - "grad_norm": 0.0027056585531681776, - "learning_rate": 0.0001999998428851115, - "loss": 46.0, - "step": 7390 - }, - { - "epoch": 0.5650935642334232, - "grad_norm": 0.0007521676598116755, - "learning_rate": 0.00019999984284253003, - "loss": 46.0, - "step": 7391 - }, - { - "epoch": 0.5651700212168129, - "grad_norm": 0.001418567611835897, - "learning_rate": 0.00019999984279994287, - "loss": 46.0, - "step": 7392 - }, - { - "epoch": 0.5652464782002026, - "grad_norm": 0.0028426814824342728, - "learning_rate": 0.00019999984275734988, - "loss": 46.0, - "step": 7393 - }, - { - "epoch": 0.5653229351835923, - "grad_norm": 0.0005968738114461303, - "learning_rate": 0.00019999984271475114, - "loss": 46.0, - "step": 7394 - }, - { - "epoch": 0.5653993921669821, - "grad_norm": 0.0006396625540219247, - "learning_rate": 0.00019999984267214663, - "loss": 46.0, - "step": 7395 - }, - { - "epoch": 0.5654758491503717, - "grad_norm": 0.0004740832664538175, - "learning_rate": 0.00019999984262953632, - "loss": 46.0, - "step": 7396 - }, - { - "epoch": 0.5655523061337615, - "grad_norm": 0.0033132312819361687, - "learning_rate": 0.0001999998425869203, - "loss": 46.0, - "step": 7397 - }, - { - "epoch": 0.5656287631171513, - "grad_norm": 0.0005001610843464732, - "learning_rate": 0.00019999984254429846, - "loss": 46.0, - "step": 7398 - }, - { - "epoch": 0.5657052201005409, - "grad_norm": 0.00477063050493598, - "learning_rate": 0.00019999984250167088, - "loss": 46.0, - "step": 7399 - }, - { - "epoch": 0.5657816770839307, - "grad_norm": 0.001287501654587686, - "learning_rate": 0.0001999998424590375, - "loss": 46.0, - "step": 7400 - }, - { - "epoch": 0.5658581340673203, - "grad_norm": 0.00038623795262537897, - "learning_rate": 0.00019999984241639838, - "loss": 46.0, - "step": 7401 - }, - { - "epoch": 0.5659345910507101, - "grad_norm": 0.0008132167859002948, - "learning_rate": 0.0001999998423737535, - "loss": 46.0, - "step": 7402 - }, - { - "epoch": 0.5660110480340999, - "grad_norm": 0.005862409248948097, - "learning_rate": 0.00019999984233110284, - "loss": 46.0, - "step": 7403 - }, - { - "epoch": 0.5660875050174895, - "grad_norm": 0.0013818725710734725, - "learning_rate": 0.0001999998422884464, - "loss": 46.0, - "step": 7404 - }, - { - "epoch": 0.5661639620008793, - "grad_norm": 0.0013392572291195393, - "learning_rate": 0.00019999984224578418, - "loss": 46.0, - "step": 7405 - }, - { - "epoch": 0.566240418984269, - "grad_norm": 0.002496361965313554, - "learning_rate": 0.00019999984220311621, - "loss": 46.0, - "step": 7406 - }, - { - "epoch": 0.5663168759676587, - "grad_norm": 0.0072517115622758865, - "learning_rate": 0.00019999984216044245, - "loss": 46.0, - "step": 7407 - }, - { - "epoch": 0.5663933329510484, - "grad_norm": 0.0014614968094974756, - "learning_rate": 0.00019999984211776294, - "loss": 46.0, - "step": 7408 - }, - { - "epoch": 0.5664697899344381, - "grad_norm": 0.0031737065874040127, - "learning_rate": 0.00019999984207507766, - "loss": 46.0, - "step": 7409 - }, - { - "epoch": 0.5665462469178278, - "grad_norm": 0.0021332716569304466, - "learning_rate": 0.00019999984203238663, - "loss": 46.0, - "step": 7410 - }, - { - "epoch": 0.5666227039012176, - "grad_norm": 0.0007254000520333648, - "learning_rate": 0.0001999998419896898, - "loss": 46.0, - "step": 7411 - }, - { - "epoch": 0.5666991608846073, - "grad_norm": 0.0005509118782356381, - "learning_rate": 0.0001999998419469872, - "loss": 46.0, - "step": 7412 - }, - { - "epoch": 0.566775617867997, - "grad_norm": 0.000635275209788233, - "learning_rate": 0.00019999984190427882, - "loss": 46.0, - "step": 7413 - }, - { - "epoch": 0.5668520748513868, - "grad_norm": 0.0012590569676831365, - "learning_rate": 0.0001999998418615647, - "loss": 46.0, - "step": 7414 - }, - { - "epoch": 0.5669285318347764, - "grad_norm": 0.0009258652571588755, - "learning_rate": 0.00019999984181884482, - "loss": 46.0, - "step": 7415 - }, - { - "epoch": 0.5670049888181662, - "grad_norm": 0.001466510584577918, - "learning_rate": 0.00019999984177611913, - "loss": 46.0, - "step": 7416 - }, - { - "epoch": 0.567081445801556, - "grad_norm": 0.0009787505259737372, - "learning_rate": 0.0001999998417333877, - "loss": 46.0, - "step": 7417 - }, - { - "epoch": 0.5671579027849456, - "grad_norm": 0.0009220175561495125, - "learning_rate": 0.00019999984169065047, - "loss": 46.0, - "step": 7418 - }, - { - "epoch": 0.5672343597683354, - "grad_norm": 0.0020545015577226877, - "learning_rate": 0.0001999998416479075, - "loss": 46.0, - "step": 7419 - }, - { - "epoch": 0.567310816751725, - "grad_norm": 0.00028155589825473726, - "learning_rate": 0.00019999984160515877, - "loss": 46.0, - "step": 7420 - }, - { - "epoch": 0.5673872737351148, - "grad_norm": 0.005777201149612665, - "learning_rate": 0.00019999984156240423, - "loss": 46.0, - "step": 7421 - }, - { - "epoch": 0.5674637307185045, - "grad_norm": 0.002540904562920332, - "learning_rate": 0.00019999984151964395, - "loss": 46.0, - "step": 7422 - }, - { - "epoch": 0.5675401877018942, - "grad_norm": 0.000998013187199831, - "learning_rate": 0.0001999998414768779, - "loss": 46.0, - "step": 7423 - }, - { - "epoch": 0.5676166446852839, - "grad_norm": 0.001464120578020811, - "learning_rate": 0.0001999998414341061, - "loss": 46.0, - "step": 7424 - }, - { - "epoch": 0.5676931016686737, - "grad_norm": 0.007333226036280394, - "learning_rate": 0.00019999984139132848, - "loss": 46.0, - "step": 7425 - }, - { - "epoch": 0.5677695586520634, - "grad_norm": 0.0004693620721809566, - "learning_rate": 0.0001999998413485451, - "loss": 46.0, - "step": 7426 - }, - { - "epoch": 0.5678460156354531, - "grad_norm": 0.003078486304730177, - "learning_rate": 0.00019999984130575598, - "loss": 46.0, - "step": 7427 - }, - { - "epoch": 0.5679224726188429, - "grad_norm": 0.0005773595767095685, - "learning_rate": 0.00019999984126296106, - "loss": 46.0, - "step": 7428 - }, - { - "epoch": 0.5679989296022325, - "grad_norm": 0.004314554389566183, - "learning_rate": 0.0001999998412201604, - "loss": 46.0, - "step": 7429 - }, - { - "epoch": 0.5680753865856223, - "grad_norm": 0.0009478558786213398, - "learning_rate": 0.00019999984117735395, - "loss": 46.0, - "step": 7430 - }, - { - "epoch": 0.5681518435690119, - "grad_norm": 0.008602998219430447, - "learning_rate": 0.00019999984113454176, - "loss": 46.0, - "step": 7431 - }, - { - "epoch": 0.5682283005524017, - "grad_norm": 0.0003639942442532629, - "learning_rate": 0.00019999984109172377, - "loss": 46.0, - "step": 7432 - }, - { - "epoch": 0.5683047575357915, - "grad_norm": 0.004051700700074434, - "learning_rate": 0.0001999998410489, - "loss": 46.0, - "step": 7433 - }, - { - "epoch": 0.5683812145191811, - "grad_norm": 0.0008075557416304946, - "learning_rate": 0.00019999984100607047, - "loss": 46.0, - "step": 7434 - }, - { - "epoch": 0.5684576715025709, - "grad_norm": 0.0006399169797077775, - "learning_rate": 0.0001999998409632352, - "loss": 46.0, - "step": 7435 - }, - { - "epoch": 0.5685341284859606, - "grad_norm": 0.0016128354473039508, - "learning_rate": 0.00019999984092039413, - "loss": 46.0, - "step": 7436 - }, - { - "epoch": 0.5686105854693503, - "grad_norm": 0.001321173389442265, - "learning_rate": 0.0001999998408775473, - "loss": 46.0, - "step": 7437 - }, - { - "epoch": 0.56868704245274, - "grad_norm": 0.0007738806889392436, - "learning_rate": 0.00019999984083469472, - "loss": 46.0, - "step": 7438 - }, - { - "epoch": 0.5687634994361297, - "grad_norm": 0.0007164178532548249, - "learning_rate": 0.00019999984079183632, - "loss": 46.0, - "step": 7439 - }, - { - "epoch": 0.5688399564195195, - "grad_norm": 0.0008193447138182819, - "learning_rate": 0.00019999984074897223, - "loss": 46.0, - "step": 7440 - }, - { - "epoch": 0.5689164134029092, - "grad_norm": 0.0010059851920232177, - "learning_rate": 0.0001999998407061023, - "loss": 46.0, - "step": 7441 - }, - { - "epoch": 0.5689928703862989, - "grad_norm": 0.004593095742166042, - "learning_rate": 0.00019999984066322663, - "loss": 46.0, - "step": 7442 - }, - { - "epoch": 0.5690693273696886, - "grad_norm": 0.0010517298942431808, - "learning_rate": 0.0001999998406203452, - "loss": 46.0, - "step": 7443 - }, - { - "epoch": 0.5691457843530784, - "grad_norm": 0.0018093701219186187, - "learning_rate": 0.00019999984057745795, - "loss": 46.0, - "step": 7444 - }, - { - "epoch": 0.569222241336468, - "grad_norm": 0.006365989334881306, - "learning_rate": 0.00019999984053456499, - "loss": 46.0, - "step": 7445 - }, - { - "epoch": 0.5692986983198578, - "grad_norm": 0.0016057032626122236, - "learning_rate": 0.00019999984049166622, - "loss": 46.0, - "step": 7446 - }, - { - "epoch": 0.5693751553032476, - "grad_norm": 0.0006403520237654448, - "learning_rate": 0.00019999984044876169, - "loss": 46.0, - "step": 7447 - }, - { - "epoch": 0.5694516122866372, - "grad_norm": 0.0008778295014053583, - "learning_rate": 0.0001999998404058514, - "loss": 46.0, - "step": 7448 - }, - { - "epoch": 0.569528069270027, - "grad_norm": 0.003783304011449218, - "learning_rate": 0.00019999984036293535, - "loss": 46.0, - "step": 7449 - }, - { - "epoch": 0.5696045262534166, - "grad_norm": 0.0012332770274952054, - "learning_rate": 0.00019999984032001355, - "loss": 46.0, - "step": 7450 - }, - { - "epoch": 0.5696809832368064, - "grad_norm": 0.0024706649128347635, - "learning_rate": 0.00019999984027708592, - "loss": 46.0, - "step": 7451 - }, - { - "epoch": 0.5697574402201961, - "grad_norm": 0.001178263803012669, - "learning_rate": 0.00019999984023415254, - "loss": 46.0, - "step": 7452 - }, - { - "epoch": 0.5698338972035858, - "grad_norm": 0.0047165993601083755, - "learning_rate": 0.00019999984019121342, - "loss": 46.0, - "step": 7453 - }, - { - "epoch": 0.5699103541869756, - "grad_norm": 0.0017295429715886712, - "learning_rate": 0.00019999984014826852, - "loss": 46.0, - "step": 7454 - }, - { - "epoch": 0.5699868111703653, - "grad_norm": 0.0021189383696764708, - "learning_rate": 0.00019999984010531783, - "loss": 46.0, - "step": 7455 - }, - { - "epoch": 0.570063268153755, - "grad_norm": 0.0014236309798434377, - "learning_rate": 0.00019999984006236136, - "loss": 46.0, - "step": 7456 - }, - { - "epoch": 0.5701397251371447, - "grad_norm": 0.0012797509552910924, - "learning_rate": 0.00019999984001939917, - "loss": 46.0, - "step": 7457 - }, - { - "epoch": 0.5702161821205345, - "grad_norm": 0.000946762622334063, - "learning_rate": 0.00019999983997643118, - "loss": 46.0, - "step": 7458 - }, - { - "epoch": 0.5702926391039241, - "grad_norm": 0.0022813996765762568, - "learning_rate": 0.00019999983993345742, - "loss": 46.0, - "step": 7459 - }, - { - "epoch": 0.5703690960873139, - "grad_norm": 0.0027368422597646713, - "learning_rate": 0.00019999983989047788, - "loss": 46.0, - "step": 7460 - }, - { - "epoch": 0.5704455530707035, - "grad_norm": 0.003609990468248725, - "learning_rate": 0.00019999983984749257, - "loss": 46.0, - "step": 7461 - }, - { - "epoch": 0.5705220100540933, - "grad_norm": 0.0014453730545938015, - "learning_rate": 0.00019999983980450154, - "loss": 46.0, - "step": 7462 - }, - { - "epoch": 0.5705984670374831, - "grad_norm": 0.0005166811752133071, - "learning_rate": 0.00019999983976150469, - "loss": 46.0, - "step": 7463 - }, - { - "epoch": 0.5706749240208727, - "grad_norm": 0.0007537009660154581, - "learning_rate": 0.00019999983971850208, - "loss": 46.0, - "step": 7464 - }, - { - "epoch": 0.5707513810042625, - "grad_norm": 0.0005977752152830362, - "learning_rate": 0.0001999998396754937, - "loss": 46.0, - "step": 7465 - }, - { - "epoch": 0.5708278379876522, - "grad_norm": 0.0032258043065667152, - "learning_rate": 0.00019999983963247956, - "loss": 46.0, - "step": 7466 - }, - { - "epoch": 0.5709042949710419, - "grad_norm": 0.0008098523248918355, - "learning_rate": 0.00019999983958945964, - "loss": 46.0, - "step": 7467 - }, - { - "epoch": 0.5709807519544317, - "grad_norm": 0.0006344974972307682, - "learning_rate": 0.00019999983954643397, - "loss": 46.0, - "step": 7468 - }, - { - "epoch": 0.5710572089378213, - "grad_norm": 0.0013121215160936117, - "learning_rate": 0.00019999983950340252, - "loss": 46.0, - "step": 7469 - }, - { - "epoch": 0.5711336659212111, - "grad_norm": 0.0006446399493142962, - "learning_rate": 0.0001999998394603653, - "loss": 46.0, - "step": 7470 - }, - { - "epoch": 0.5712101229046008, - "grad_norm": 0.0012016990222036839, - "learning_rate": 0.00019999983941732232, - "loss": 46.0, - "step": 7471 - }, - { - "epoch": 0.5712865798879905, - "grad_norm": 0.002046516863629222, - "learning_rate": 0.00019999983937427356, - "loss": 46.0, - "step": 7472 - }, - { - "epoch": 0.5713630368713802, - "grad_norm": 0.001340606715530157, - "learning_rate": 0.00019999983933121902, - "loss": 46.0, - "step": 7473 - }, - { - "epoch": 0.57143949385477, - "grad_norm": 0.0003149217809550464, - "learning_rate": 0.00019999983928815874, - "loss": 46.0, - "step": 7474 - }, - { - "epoch": 0.5715159508381596, - "grad_norm": 0.0018251878209412098, - "learning_rate": 0.00019999983924509266, - "loss": 46.0, - "step": 7475 - }, - { - "epoch": 0.5715924078215494, - "grad_norm": 0.001373132923617959, - "learning_rate": 0.00019999983920202083, - "loss": 46.0, - "step": 7476 - }, - { - "epoch": 0.5716688648049392, - "grad_norm": 0.0004142786201555282, - "learning_rate": 0.00019999983915894323, - "loss": 46.0, - "step": 7477 - }, - { - "epoch": 0.5717453217883288, - "grad_norm": 0.0012667272239923477, - "learning_rate": 0.00019999983911585985, - "loss": 46.0, - "step": 7478 - }, - { - "epoch": 0.5718217787717186, - "grad_norm": 0.014262200333178043, - "learning_rate": 0.00019999983907277068, - "loss": 46.0, - "step": 7479 - }, - { - "epoch": 0.5718982357551082, - "grad_norm": 0.0013388856314122677, - "learning_rate": 0.00019999983902967578, - "loss": 46.0, - "step": 7480 - }, - { - "epoch": 0.571974692738498, - "grad_norm": 0.0036675715819001198, - "learning_rate": 0.0001999998389865751, - "loss": 46.0, - "step": 7481 - }, - { - "epoch": 0.5720511497218878, - "grad_norm": 0.0008023617556318641, - "learning_rate": 0.00019999983894346864, - "loss": 46.0, - "step": 7482 - }, - { - "epoch": 0.5721276067052774, - "grad_norm": 0.0011382491793483496, - "learning_rate": 0.00019999983890035643, - "loss": 46.0, - "step": 7483 - }, - { - "epoch": 0.5722040636886672, - "grad_norm": 0.0008855733321979642, - "learning_rate": 0.00019999983885723844, - "loss": 46.0, - "step": 7484 - }, - { - "epoch": 0.5722805206720569, - "grad_norm": 0.008134298957884312, - "learning_rate": 0.00019999983881411465, - "loss": 46.0, - "step": 7485 - }, - { - "epoch": 0.5723569776554466, - "grad_norm": 0.007042621728032827, - "learning_rate": 0.00019999983877098514, - "loss": 46.0, - "step": 7486 - }, - { - "epoch": 0.5724334346388363, - "grad_norm": 0.0024729925207793713, - "learning_rate": 0.00019999983872784983, - "loss": 46.0, - "step": 7487 - }, - { - "epoch": 0.5725098916222261, - "grad_norm": 0.0006616662722080946, - "learning_rate": 0.00019999983868470878, - "loss": 46.0, - "step": 7488 - }, - { - "epoch": 0.5725863486056157, - "grad_norm": 0.00438628951087594, - "learning_rate": 0.00019999983864156192, - "loss": 46.0, - "step": 7489 - }, - { - "epoch": 0.5726628055890055, - "grad_norm": 0.0014128299662843347, - "learning_rate": 0.00019999983859840932, - "loss": 46.0, - "step": 7490 - }, - { - "epoch": 0.5727392625723952, - "grad_norm": 0.0017200742149725556, - "learning_rate": 0.00019999983855525092, - "loss": 46.0, - "step": 7491 - }, - { - "epoch": 0.5728157195557849, - "grad_norm": 0.0012010735226795077, - "learning_rate": 0.0001999998385120868, - "loss": 46.0, - "step": 7492 - }, - { - "epoch": 0.5728921765391747, - "grad_norm": 0.00677097961306572, - "learning_rate": 0.00019999983846891688, - "loss": 46.0, - "step": 7493 - }, - { - "epoch": 0.5729686335225643, - "grad_norm": 0.004874946549534798, - "learning_rate": 0.00019999983842574118, - "loss": 46.0, - "step": 7494 - }, - { - "epoch": 0.5730450905059541, - "grad_norm": 0.001123951398767531, - "learning_rate": 0.00019999983838255974, - "loss": 46.0, - "step": 7495 - }, - { - "epoch": 0.5731215474893439, - "grad_norm": 0.0020737103186547756, - "learning_rate": 0.0001999998383393725, - "loss": 46.0, - "step": 7496 - }, - { - "epoch": 0.5731980044727335, - "grad_norm": 0.0005366839468479156, - "learning_rate": 0.0001999998382961795, - "loss": 46.0, - "step": 7497 - }, - { - "epoch": 0.5732744614561233, - "grad_norm": 0.0006064717308618128, - "learning_rate": 0.00019999983825298075, - "loss": 46.0, - "step": 7498 - }, - { - "epoch": 0.573350918439513, - "grad_norm": 0.00137209496460855, - "learning_rate": 0.0001999998382097762, - "loss": 46.0, - "step": 7499 - }, - { - "epoch": 0.5734273754229027, - "grad_norm": 0.0029947252478450537, - "learning_rate": 0.0001999998381665659, - "loss": 46.0, - "step": 7500 - }, - { - "epoch": 0.5735038324062924, - "grad_norm": 0.0044737015850842, - "learning_rate": 0.00019999983812334982, - "loss": 46.0, - "step": 7501 - }, - { - "epoch": 0.5735802893896821, - "grad_norm": 0.0005357933114282787, - "learning_rate": 0.000199999838080128, - "loss": 46.0, - "step": 7502 - }, - { - "epoch": 0.5736567463730718, - "grad_norm": 0.001026822137646377, - "learning_rate": 0.00019999983803690037, - "loss": 46.0, - "step": 7503 - }, - { - "epoch": 0.5737332033564616, - "grad_norm": 0.001595445442944765, - "learning_rate": 0.000199999837993667, - "loss": 46.0, - "step": 7504 - }, - { - "epoch": 0.5738096603398513, - "grad_norm": 0.0010531191946938634, - "learning_rate": 0.00019999983795042784, - "loss": 46.0, - "step": 7505 - }, - { - "epoch": 0.573886117323241, - "grad_norm": 0.0007644860306754708, - "learning_rate": 0.00019999983790718292, - "loss": 46.0, - "step": 7506 - }, - { - "epoch": 0.5739625743066308, - "grad_norm": 0.002083940664306283, - "learning_rate": 0.00019999983786393223, - "loss": 46.0, - "step": 7507 - }, - { - "epoch": 0.5740390312900204, - "grad_norm": 0.0017230160301551223, - "learning_rate": 0.00019999983782067579, - "loss": 46.0, - "step": 7508 - }, - { - "epoch": 0.5741154882734102, - "grad_norm": 0.001040980452671647, - "learning_rate": 0.00019999983777741354, - "loss": 46.0, - "step": 7509 - }, - { - "epoch": 0.5741919452567998, - "grad_norm": 0.0011150571517646313, - "learning_rate": 0.00019999983773414556, - "loss": 46.0, - "step": 7510 - }, - { - "epoch": 0.5742684022401896, - "grad_norm": 0.0009803981520235538, - "learning_rate": 0.0001999998376908718, - "loss": 46.0, - "step": 7511 - }, - { - "epoch": 0.5743448592235794, - "grad_norm": 0.004075199831277132, - "learning_rate": 0.00019999983764759226, - "loss": 46.0, - "step": 7512 - }, - { - "epoch": 0.574421316206969, - "grad_norm": 0.0021375571377575397, - "learning_rate": 0.00019999983760430695, - "loss": 46.0, - "step": 7513 - }, - { - "epoch": 0.5744977731903588, - "grad_norm": 0.0006680492660962045, - "learning_rate": 0.00019999983756101587, - "loss": 46.0, - "step": 7514 - }, - { - "epoch": 0.5745742301737485, - "grad_norm": 0.006769824307411909, - "learning_rate": 0.00019999983751771902, - "loss": 46.0, - "step": 7515 - }, - { - "epoch": 0.5746506871571382, - "grad_norm": 0.001280179712921381, - "learning_rate": 0.0001999998374744164, - "loss": 46.0, - "step": 7516 - }, - { - "epoch": 0.574727144140528, - "grad_norm": 0.0006041454616934061, - "learning_rate": 0.000199999837431108, - "loss": 46.0, - "step": 7517 - }, - { - "epoch": 0.5748036011239177, - "grad_norm": 0.0036631599068641663, - "learning_rate": 0.00019999983738779387, - "loss": 46.0, - "step": 7518 - }, - { - "epoch": 0.5748800581073074, - "grad_norm": 0.002040630904957652, - "learning_rate": 0.00019999983734447394, - "loss": 46.0, - "step": 7519 - }, - { - "epoch": 0.5749565150906971, - "grad_norm": 0.0007929601124487817, - "learning_rate": 0.00019999983730114825, - "loss": 46.0, - "step": 7520 - }, - { - "epoch": 0.5750329720740868, - "grad_norm": 0.0013229057658463717, - "learning_rate": 0.00019999983725781678, - "loss": 46.0, - "step": 7521 - }, - { - "epoch": 0.5751094290574765, - "grad_norm": 0.0031627847347408533, - "learning_rate": 0.00019999983721447957, - "loss": 46.0, - "step": 7522 - }, - { - "epoch": 0.5751858860408663, - "grad_norm": 0.001706066308543086, - "learning_rate": 0.00019999983717113655, - "loss": 46.0, - "step": 7523 - }, - { - "epoch": 0.5752623430242559, - "grad_norm": 0.0009224142995662987, - "learning_rate": 0.0001999998371277878, - "loss": 46.0, - "step": 7524 - }, - { - "epoch": 0.5753388000076457, - "grad_norm": 0.0008795763133093715, - "learning_rate": 0.00019999983708443323, - "loss": 46.0, - "step": 7525 - }, - { - "epoch": 0.5754152569910355, - "grad_norm": 0.0023847175762057304, - "learning_rate": 0.00019999983704107295, - "loss": 46.0, - "step": 7526 - }, - { - "epoch": 0.5754917139744251, - "grad_norm": 0.00121345988009125, - "learning_rate": 0.00019999983699770687, - "loss": 46.0, - "step": 7527 - }, - { - "epoch": 0.5755681709578149, - "grad_norm": 0.007021130528301001, - "learning_rate": 0.000199999836954335, - "loss": 46.0, - "step": 7528 - }, - { - "epoch": 0.5756446279412046, - "grad_norm": 0.001035722205415368, - "learning_rate": 0.00019999983691095739, - "loss": 46.0, - "step": 7529 - }, - { - "epoch": 0.5757210849245943, - "grad_norm": 0.0010075479513034225, - "learning_rate": 0.00019999983686757398, - "loss": 46.0, - "step": 7530 - }, - { - "epoch": 0.575797541907984, - "grad_norm": 0.004983509890735149, - "learning_rate": 0.00019999983682418484, - "loss": 46.0, - "step": 7531 - }, - { - "epoch": 0.5758739988913737, - "grad_norm": 0.0005552451475523412, - "learning_rate": 0.00019999983678078992, - "loss": 46.0, - "step": 7532 - }, - { - "epoch": 0.5759504558747635, - "grad_norm": 0.0005114002269692719, - "learning_rate": 0.00019999983673738922, - "loss": 46.0, - "step": 7533 - }, - { - "epoch": 0.5760269128581532, - "grad_norm": 0.0009269884903915226, - "learning_rate": 0.00019999983669398275, - "loss": 46.0, - "step": 7534 - }, - { - "epoch": 0.5761033698415429, - "grad_norm": 0.0003901891177520156, - "learning_rate": 0.0001999998366505705, - "loss": 46.0, - "step": 7535 - }, - { - "epoch": 0.5761798268249326, - "grad_norm": 0.0006088235531933606, - "learning_rate": 0.00019999983660715253, - "loss": 46.0, - "step": 7536 - }, - { - "epoch": 0.5762562838083224, - "grad_norm": 0.0029156827367842197, - "learning_rate": 0.00019999983656372874, - "loss": 46.0, - "step": 7537 - }, - { - "epoch": 0.576332740791712, - "grad_norm": 0.0011136867105960846, - "learning_rate": 0.0001999998365202992, - "loss": 46.0, - "step": 7538 - }, - { - "epoch": 0.5764091977751018, - "grad_norm": 0.0053533646278083324, - "learning_rate": 0.00019999983647686392, - "loss": 46.0, - "step": 7539 - }, - { - "epoch": 0.5764856547584915, - "grad_norm": 0.0015955432318150997, - "learning_rate": 0.00019999983643342282, - "loss": 46.0, - "step": 7540 - }, - { - "epoch": 0.5765621117418812, - "grad_norm": 0.0009845118038356304, - "learning_rate": 0.00019999983638997596, - "loss": 46.0, - "step": 7541 - }, - { - "epoch": 0.576638568725271, - "grad_norm": 0.0016344516770914197, - "learning_rate": 0.00019999983634652336, - "loss": 46.0, - "step": 7542 - }, - { - "epoch": 0.5767150257086606, - "grad_norm": 0.001754777505993843, - "learning_rate": 0.00019999983630306496, - "loss": 46.0, - "step": 7543 - }, - { - "epoch": 0.5767914826920504, - "grad_norm": 0.0009846682660281658, - "learning_rate": 0.00019999983625960079, - "loss": 46.0, - "step": 7544 - }, - { - "epoch": 0.5768679396754401, - "grad_norm": 0.0007837713928893209, - "learning_rate": 0.00019999983621613087, - "loss": 46.0, - "step": 7545 - }, - { - "epoch": 0.5769443966588298, - "grad_norm": 0.0015967205399647355, - "learning_rate": 0.00019999983617265517, - "loss": 46.0, - "step": 7546 - }, - { - "epoch": 0.5770208536422196, - "grad_norm": 0.004502040334045887, - "learning_rate": 0.0001999998361291737, - "loss": 46.0, - "step": 7547 - }, - { - "epoch": 0.5770973106256093, - "grad_norm": 0.001678119646385312, - "learning_rate": 0.00019999983608568646, - "loss": 46.0, - "step": 7548 - }, - { - "epoch": 0.577173767608999, - "grad_norm": 0.0014648191863670945, - "learning_rate": 0.00019999983604219345, - "loss": 46.0, - "step": 7549 - }, - { - "epoch": 0.5772502245923887, - "grad_norm": 0.003734854981303215, - "learning_rate": 0.00019999983599869466, - "loss": 46.0, - "step": 7550 - }, - { - "epoch": 0.5773266815757784, - "grad_norm": 0.0024289051070809364, - "learning_rate": 0.00019999983595519013, - "loss": 46.0, - "step": 7551 - }, - { - "epoch": 0.5774031385591681, - "grad_norm": 0.005789545364677906, - "learning_rate": 0.00019999983591167982, - "loss": 46.0, - "step": 7552 - }, - { - "epoch": 0.5774795955425579, - "grad_norm": 0.0009317718795500696, - "learning_rate": 0.00019999983586816374, - "loss": 46.0, - "step": 7553 - }, - { - "epoch": 0.5775560525259475, - "grad_norm": 0.0021060071885585785, - "learning_rate": 0.00019999983582464191, - "loss": 46.0, - "step": 7554 - }, - { - "epoch": 0.5776325095093373, - "grad_norm": 0.0016839266754686832, - "learning_rate": 0.0001999998357811143, - "loss": 46.0, - "step": 7555 - }, - { - "epoch": 0.5777089664927271, - "grad_norm": 0.001243123202584684, - "learning_rate": 0.00019999983573758089, - "loss": 46.0, - "step": 7556 - }, - { - "epoch": 0.5777854234761167, - "grad_norm": 0.000614280579611659, - "learning_rate": 0.0001999998356940417, - "loss": 46.0, - "step": 7557 - }, - { - "epoch": 0.5778618804595065, - "grad_norm": 0.002122477162629366, - "learning_rate": 0.0001999998356504968, - "loss": 46.0, - "step": 7558 - }, - { - "epoch": 0.5779383374428962, - "grad_norm": 0.0017634991090744734, - "learning_rate": 0.00019999983560694607, - "loss": 46.0, - "step": 7559 - }, - { - "epoch": 0.5780147944262859, - "grad_norm": 0.0014423084212467074, - "learning_rate": 0.0001999998355633896, - "loss": 46.0, - "step": 7560 - }, - { - "epoch": 0.5780912514096757, - "grad_norm": 0.00041379593312740326, - "learning_rate": 0.00019999983551982736, - "loss": 46.0, - "step": 7561 - }, - { - "epoch": 0.5781677083930653, - "grad_norm": 0.00222440785728395, - "learning_rate": 0.00019999983547625935, - "loss": 46.0, - "step": 7562 - }, - { - "epoch": 0.5782441653764551, - "grad_norm": 0.0009318541851826012, - "learning_rate": 0.00019999983543268556, - "loss": 46.0, - "step": 7563 - }, - { - "epoch": 0.5783206223598448, - "grad_norm": 0.0007590082241222262, - "learning_rate": 0.00019999983538910603, - "loss": 46.0, - "step": 7564 - }, - { - "epoch": 0.5783970793432345, - "grad_norm": 0.004010630305856466, - "learning_rate": 0.00019999983534552072, - "loss": 46.0, - "step": 7565 - }, - { - "epoch": 0.5784735363266242, - "grad_norm": 0.0023934165947139263, - "learning_rate": 0.00019999983530192962, - "loss": 46.0, - "step": 7566 - }, - { - "epoch": 0.578549993310014, - "grad_norm": 0.0020920245442539454, - "learning_rate": 0.00019999983525833276, - "loss": 46.0, - "step": 7567 - }, - { - "epoch": 0.5786264502934036, - "grad_norm": 0.0008549208287149668, - "learning_rate": 0.00019999983521473013, - "loss": 46.0, - "step": 7568 - }, - { - "epoch": 0.5787029072767934, - "grad_norm": 0.002753807231783867, - "learning_rate": 0.00019999983517112176, - "loss": 46.0, - "step": 7569 - }, - { - "epoch": 0.5787793642601831, - "grad_norm": 0.0021882413420826197, - "learning_rate": 0.0001999998351275076, - "loss": 46.0, - "step": 7570 - }, - { - "epoch": 0.5788558212435728, - "grad_norm": 0.001145792193710804, - "learning_rate": 0.00019999983508388764, - "loss": 46.0, - "step": 7571 - }, - { - "epoch": 0.5789322782269626, - "grad_norm": 0.000683027203194797, - "learning_rate": 0.00019999983504026195, - "loss": 46.0, - "step": 7572 - }, - { - "epoch": 0.5790087352103522, - "grad_norm": 0.0017354689771309495, - "learning_rate": 0.00019999983499663045, - "loss": 46.0, - "step": 7573 - }, - { - "epoch": 0.579085192193742, - "grad_norm": 0.0015501282177865505, - "learning_rate": 0.00019999983495299324, - "loss": 46.0, - "step": 7574 - }, - { - "epoch": 0.5791616491771318, - "grad_norm": 0.0053193713538348675, - "learning_rate": 0.00019999983490935023, - "loss": 46.0, - "step": 7575 - }, - { - "epoch": 0.5792381061605214, - "grad_norm": 0.0010329082142561674, - "learning_rate": 0.0001999998348657014, - "loss": 46.0, - "step": 7576 - }, - { - "epoch": 0.5793145631439112, - "grad_norm": 0.008275882340967655, - "learning_rate": 0.00019999983482204688, - "loss": 46.0, - "step": 7577 - }, - { - "epoch": 0.5793910201273009, - "grad_norm": 0.004763545002788305, - "learning_rate": 0.00019999983477838655, - "loss": 46.0, - "step": 7578 - }, - { - "epoch": 0.5794674771106906, - "grad_norm": 0.001086041796952486, - "learning_rate": 0.00019999983473472047, - "loss": 46.0, - "step": 7579 - }, - { - "epoch": 0.5795439340940803, - "grad_norm": 0.0013631421606987715, - "learning_rate": 0.0001999998346910486, - "loss": 46.0, - "step": 7580 - }, - { - "epoch": 0.57962039107747, - "grad_norm": 0.0008691237308084965, - "learning_rate": 0.000199999834647371, - "loss": 46.0, - "step": 7581 - }, - { - "epoch": 0.5796968480608597, - "grad_norm": 0.001286480575799942, - "learning_rate": 0.0001999998346036876, - "loss": 46.0, - "step": 7582 - }, - { - "epoch": 0.5797733050442495, - "grad_norm": 0.0023991360794752836, - "learning_rate": 0.0001999998345599984, - "loss": 46.0, - "step": 7583 - }, - { - "epoch": 0.5798497620276392, - "grad_norm": 0.0008301687194034457, - "learning_rate": 0.0001999998345163035, - "loss": 46.0, - "step": 7584 - }, - { - "epoch": 0.5799262190110289, - "grad_norm": 0.0012167587410658598, - "learning_rate": 0.00019999983447260277, - "loss": 46.0, - "step": 7585 - }, - { - "epoch": 0.5800026759944187, - "grad_norm": 0.00042961325380019844, - "learning_rate": 0.00019999983442889628, - "loss": 46.0, - "step": 7586 - }, - { - "epoch": 0.5800791329778083, - "grad_norm": 0.0027072043158113956, - "learning_rate": 0.00019999983438518407, - "loss": 46.0, - "step": 7587 - }, - { - "epoch": 0.5801555899611981, - "grad_norm": 0.0007777933496981859, - "learning_rate": 0.00019999983434146605, - "loss": 46.0, - "step": 7588 - }, - { - "epoch": 0.5802320469445879, - "grad_norm": 0.0025236657820641994, - "learning_rate": 0.00019999983429774227, - "loss": 46.0, - "step": 7589 - }, - { - "epoch": 0.5803085039279775, - "grad_norm": 0.00039633637061342597, - "learning_rate": 0.0001999998342540127, - "loss": 46.0, - "step": 7590 - }, - { - "epoch": 0.5803849609113673, - "grad_norm": 0.0013137951027601957, - "learning_rate": 0.0001999998342102774, - "loss": 46.0, - "step": 7591 - }, - { - "epoch": 0.5804614178947569, - "grad_norm": 0.00047574235941283405, - "learning_rate": 0.0001999998341665363, - "loss": 46.0, - "step": 7592 - }, - { - "epoch": 0.5805378748781467, - "grad_norm": 0.0009591268608346581, - "learning_rate": 0.00019999983412278941, - "loss": 46.0, - "step": 7593 - }, - { - "epoch": 0.5806143318615364, - "grad_norm": 0.009303290396928787, - "learning_rate": 0.0001999998340790368, - "loss": 46.0, - "step": 7594 - }, - { - "epoch": 0.5806907888449261, - "grad_norm": 0.007136994507163763, - "learning_rate": 0.0001999998340352784, - "loss": 46.0, - "step": 7595 - }, - { - "epoch": 0.5807672458283158, - "grad_norm": 0.0003480181621853262, - "learning_rate": 0.00019999983399151422, - "loss": 46.0, - "step": 7596 - }, - { - "epoch": 0.5808437028117056, - "grad_norm": 0.0011027996661141515, - "learning_rate": 0.0001999998339477443, - "loss": 46.0, - "step": 7597 - }, - { - "epoch": 0.5809201597950953, - "grad_norm": 0.0011567899491637945, - "learning_rate": 0.00019999983390396858, - "loss": 46.0, - "step": 7598 - }, - { - "epoch": 0.580996616778485, - "grad_norm": 0.0067599681206047535, - "learning_rate": 0.00019999983386018708, - "loss": 46.0, - "step": 7599 - }, - { - "epoch": 0.5810730737618747, - "grad_norm": 0.002067169174551964, - "learning_rate": 0.00019999983381639984, - "loss": 46.0, - "step": 7600 - }, - { - "epoch": 0.5811495307452644, - "grad_norm": 0.0005350538995116949, - "learning_rate": 0.00019999983377260683, - "loss": 46.0, - "step": 7601 - }, - { - "epoch": 0.5812259877286542, - "grad_norm": 0.006487096194177866, - "learning_rate": 0.00019999983372880805, - "loss": 46.0, - "step": 7602 - }, - { - "epoch": 0.5813024447120438, - "grad_norm": 0.0007618963718414307, - "learning_rate": 0.00019999983368500349, - "loss": 46.0, - "step": 7603 - }, - { - "epoch": 0.5813789016954336, - "grad_norm": 0.0011088476749137044, - "learning_rate": 0.00019999983364119315, - "loss": 46.0, - "step": 7604 - }, - { - "epoch": 0.5814553586788234, - "grad_norm": 0.0008921415428631008, - "learning_rate": 0.00019999983359737707, - "loss": 46.0, - "step": 7605 - }, - { - "epoch": 0.581531815662213, - "grad_norm": 0.0014126216992735863, - "learning_rate": 0.0001999998335535552, - "loss": 46.0, - "step": 7606 - }, - { - "epoch": 0.5816082726456028, - "grad_norm": 0.001030371873639524, - "learning_rate": 0.00019999983350972754, - "loss": 46.0, - "step": 7607 - }, - { - "epoch": 0.5816847296289925, - "grad_norm": 0.000780138245318085, - "learning_rate": 0.00019999983346589417, - "loss": 46.0, - "step": 7608 - }, - { - "epoch": 0.5817611866123822, - "grad_norm": 0.006118282675743103, - "learning_rate": 0.000199999833422055, - "loss": 46.0, - "step": 7609 - }, - { - "epoch": 0.581837643595772, - "grad_norm": 0.0013880191836506128, - "learning_rate": 0.00019999983337821005, - "loss": 46.0, - "step": 7610 - }, - { - "epoch": 0.5819141005791616, - "grad_norm": 0.0011992092477157712, - "learning_rate": 0.00019999983333435933, - "loss": 46.0, - "step": 7611 - }, - { - "epoch": 0.5819905575625514, - "grad_norm": 0.0006638895138166845, - "learning_rate": 0.00019999983329050286, - "loss": 46.0, - "step": 7612 - }, - { - "epoch": 0.5820670145459411, - "grad_norm": 0.00025403572362847626, - "learning_rate": 0.0001999998332466406, - "loss": 46.0, - "step": 7613 - }, - { - "epoch": 0.5821434715293308, - "grad_norm": 0.0013250509509816766, - "learning_rate": 0.00019999983320277259, - "loss": 46.0, - "step": 7614 - }, - { - "epoch": 0.5822199285127205, - "grad_norm": 0.0005754359299317002, - "learning_rate": 0.0001999998331588988, - "loss": 46.0, - "step": 7615 - }, - { - "epoch": 0.5822963854961103, - "grad_norm": 0.0012737332144752145, - "learning_rate": 0.00019999983311501924, - "loss": 46.0, - "step": 7616 - }, - { - "epoch": 0.5823728424794999, - "grad_norm": 0.0009545432985760272, - "learning_rate": 0.0001999998330711339, - "loss": 46.0, - "step": 7617 - }, - { - "epoch": 0.5824492994628897, - "grad_norm": 0.0007713224622420967, - "learning_rate": 0.0001999998330272428, - "loss": 46.0, - "step": 7618 - }, - { - "epoch": 0.5825257564462795, - "grad_norm": 0.0009839808335527778, - "learning_rate": 0.00019999983298334595, - "loss": 46.0, - "step": 7619 - }, - { - "epoch": 0.5826022134296691, - "grad_norm": 0.001255712122656405, - "learning_rate": 0.0001999998329394433, - "loss": 46.0, - "step": 7620 - }, - { - "epoch": 0.5826786704130589, - "grad_norm": 0.0008356580510735512, - "learning_rate": 0.0001999998328955349, - "loss": 46.0, - "step": 7621 - }, - { - "epoch": 0.5827551273964485, - "grad_norm": 0.00168485043104738, - "learning_rate": 0.00019999983285162072, - "loss": 46.0, - "step": 7622 - }, - { - "epoch": 0.5828315843798383, - "grad_norm": 0.0011813770979642868, - "learning_rate": 0.00019999983280770075, - "loss": 46.0, - "step": 7623 - }, - { - "epoch": 0.582908041363228, - "grad_norm": 0.0008950316114351153, - "learning_rate": 0.00019999983276377506, - "loss": 46.0, - "step": 7624 - }, - { - "epoch": 0.5829844983466177, - "grad_norm": 0.001904475619085133, - "learning_rate": 0.00019999983271984357, - "loss": 46.0, - "step": 7625 - }, - { - "epoch": 0.5830609553300075, - "grad_norm": 0.0005529436166398227, - "learning_rate": 0.00019999983267590633, - "loss": 46.0, - "step": 7626 - }, - { - "epoch": 0.5831374123133972, - "grad_norm": 0.00045964884338900447, - "learning_rate": 0.0001999998326319633, - "loss": 46.0, - "step": 7627 - }, - { - "epoch": 0.5832138692967869, - "grad_norm": 0.00109334965236485, - "learning_rate": 0.0001999998325880145, - "loss": 46.0, - "step": 7628 - }, - { - "epoch": 0.5832903262801766, - "grad_norm": 0.0016947025433182716, - "learning_rate": 0.00019999983254405994, - "loss": 46.0, - "step": 7629 - }, - { - "epoch": 0.5833667832635664, - "grad_norm": 0.0005835481570102274, - "learning_rate": 0.0001999998325000996, - "loss": 46.0, - "step": 7630 - }, - { - "epoch": 0.583443240246956, - "grad_norm": 0.0016982399392873049, - "learning_rate": 0.0001999998324561335, - "loss": 46.0, - "step": 7631 - }, - { - "epoch": 0.5835196972303458, - "grad_norm": 0.00038127953303046525, - "learning_rate": 0.00019999983241216165, - "loss": 46.0, - "step": 7632 - }, - { - "epoch": 0.5835961542137355, - "grad_norm": 0.0013964161043986678, - "learning_rate": 0.000199999832368184, - "loss": 46.0, - "step": 7633 - }, - { - "epoch": 0.5836726111971252, - "grad_norm": 0.0006763490382581949, - "learning_rate": 0.00019999983232420058, - "loss": 46.0, - "step": 7634 - }, - { - "epoch": 0.583749068180515, - "grad_norm": 0.0017157947877421975, - "learning_rate": 0.00019999983228021143, - "loss": 46.0, - "step": 7635 - }, - { - "epoch": 0.5838255251639046, - "grad_norm": 0.0012805898441001773, - "learning_rate": 0.00019999983223621649, - "loss": 46.0, - "step": 7636 - }, - { - "epoch": 0.5839019821472944, - "grad_norm": 0.0017527329036965966, - "learning_rate": 0.00019999983219221574, - "loss": 46.0, - "step": 7637 - }, - { - "epoch": 0.5839784391306841, - "grad_norm": 0.0005397163331508636, - "learning_rate": 0.00019999983214820927, - "loss": 46.0, - "step": 7638 - }, - { - "epoch": 0.5840548961140738, - "grad_norm": 0.0017146962927654386, - "learning_rate": 0.000199999832104197, - "loss": 46.0, - "step": 7639 - }, - { - "epoch": 0.5841313530974636, - "grad_norm": 0.0013553574681282043, - "learning_rate": 0.00019999983206017897, - "loss": 46.0, - "step": 7640 - }, - { - "epoch": 0.5842078100808532, - "grad_norm": 0.001664946204982698, - "learning_rate": 0.00019999983201615518, - "loss": 46.0, - "step": 7641 - }, - { - "epoch": 0.584284267064243, - "grad_norm": 0.01198570430278778, - "learning_rate": 0.00019999983197212563, - "loss": 46.0, - "step": 7642 - }, - { - "epoch": 0.5843607240476327, - "grad_norm": 0.0011399604845792055, - "learning_rate": 0.0001999998319280903, - "loss": 46.0, - "step": 7643 - }, - { - "epoch": 0.5844371810310224, - "grad_norm": 0.004268769174814224, - "learning_rate": 0.0001999998318840492, - "loss": 46.0, - "step": 7644 - }, - { - "epoch": 0.5845136380144121, - "grad_norm": 0.00304241175763309, - "learning_rate": 0.0001999998318400023, - "loss": 46.0, - "step": 7645 - }, - { - "epoch": 0.5845900949978019, - "grad_norm": 0.0013686362653970718, - "learning_rate": 0.00019999983179594968, - "loss": 46.0, - "step": 7646 - }, - { - "epoch": 0.5846665519811916, - "grad_norm": 0.0018477820558473468, - "learning_rate": 0.00019999983175189126, - "loss": 46.0, - "step": 7647 - }, - { - "epoch": 0.5847430089645813, - "grad_norm": 0.008912919089198112, - "learning_rate": 0.00019999983170782706, - "loss": 46.0, - "step": 7648 - }, - { - "epoch": 0.5848194659479711, - "grad_norm": 0.002248775213956833, - "learning_rate": 0.00019999983166375712, - "loss": 46.0, - "step": 7649 - }, - { - "epoch": 0.5848959229313607, - "grad_norm": 0.014130390249192715, - "learning_rate": 0.0001999998316196814, - "loss": 46.0, - "step": 7650 - }, - { - "epoch": 0.5849723799147505, - "grad_norm": 0.0009850258938968182, - "learning_rate": 0.0001999998315755999, - "loss": 46.0, - "step": 7651 - }, - { - "epoch": 0.5850488368981401, - "grad_norm": 0.0012152059935033321, - "learning_rate": 0.00019999983153151264, - "loss": 46.0, - "step": 7652 - }, - { - "epoch": 0.5851252938815299, - "grad_norm": 0.0006443813326768577, - "learning_rate": 0.00019999983148741963, - "loss": 46.0, - "step": 7653 - }, - { - "epoch": 0.5852017508649197, - "grad_norm": 0.006645088084042072, - "learning_rate": 0.00019999983144332082, - "loss": 46.0, - "step": 7654 - }, - { - "epoch": 0.5852782078483093, - "grad_norm": 0.0019394843839108944, - "learning_rate": 0.00019999983139921626, - "loss": 46.0, - "step": 7655 - }, - { - "epoch": 0.5853546648316991, - "grad_norm": 0.0010840287432074547, - "learning_rate": 0.00019999983135510593, - "loss": 46.0, - "step": 7656 - }, - { - "epoch": 0.5854311218150888, - "grad_norm": 0.0036767369601875544, - "learning_rate": 0.00019999983131098982, - "loss": 46.0, - "step": 7657 - }, - { - "epoch": 0.5855075787984785, - "grad_norm": 0.002124917460605502, - "learning_rate": 0.00019999983126686792, - "loss": 46.0, - "step": 7658 - }, - { - "epoch": 0.5855840357818682, - "grad_norm": 0.004471937194466591, - "learning_rate": 0.0001999998312227403, - "loss": 46.0, - "step": 7659 - }, - { - "epoch": 0.585660492765258, - "grad_norm": 0.0017449866281822324, - "learning_rate": 0.00019999983117860687, - "loss": 46.0, - "step": 7660 - }, - { - "epoch": 0.5857369497486477, - "grad_norm": 0.001235291245393455, - "learning_rate": 0.0001999998311344677, - "loss": 46.0, - "step": 7661 - }, - { - "epoch": 0.5858134067320374, - "grad_norm": 0.0008355448953807354, - "learning_rate": 0.00019999983109032275, - "loss": 46.0, - "step": 7662 - }, - { - "epoch": 0.5858898637154271, - "grad_norm": 0.0008515469380654395, - "learning_rate": 0.000199999831046172, - "loss": 46.0, - "step": 7663 - }, - { - "epoch": 0.5859663206988168, - "grad_norm": 0.0006487657665275037, - "learning_rate": 0.0001999998310020155, - "loss": 46.0, - "step": 7664 - }, - { - "epoch": 0.5860427776822066, - "grad_norm": 0.0010451796697452664, - "learning_rate": 0.00019999983095785327, - "loss": 46.0, - "step": 7665 - }, - { - "epoch": 0.5861192346655962, - "grad_norm": 0.0032195597887039185, - "learning_rate": 0.00019999983091368524, - "loss": 46.0, - "step": 7666 - }, - { - "epoch": 0.586195691648986, - "grad_norm": 0.0005423553520813584, - "learning_rate": 0.00019999983086951142, - "loss": 46.0, - "step": 7667 - }, - { - "epoch": 0.5862721486323758, - "grad_norm": 0.002259672386571765, - "learning_rate": 0.00019999983082533187, - "loss": 46.0, - "step": 7668 - }, - { - "epoch": 0.5863486056157654, - "grad_norm": 0.002088018227368593, - "learning_rate": 0.0001999998307811465, - "loss": 46.0, - "step": 7669 - }, - { - "epoch": 0.5864250625991552, - "grad_norm": 0.0031793571542948484, - "learning_rate": 0.0001999998307369554, - "loss": 46.0, - "step": 7670 - }, - { - "epoch": 0.5865015195825448, - "grad_norm": 0.002627237234264612, - "learning_rate": 0.00019999983069275855, - "loss": 46.0, - "step": 7671 - }, - { - "epoch": 0.5865779765659346, - "grad_norm": 0.0046811169013381, - "learning_rate": 0.0001999998306485559, - "loss": 46.0, - "step": 7672 - }, - { - "epoch": 0.5866544335493243, - "grad_norm": 0.0012938586296513677, - "learning_rate": 0.00019999983060434745, - "loss": 46.0, - "step": 7673 - }, - { - "epoch": 0.586730890532714, - "grad_norm": 0.002174362074583769, - "learning_rate": 0.00019999983056013328, - "loss": 46.0, - "step": 7674 - }, - { - "epoch": 0.5868073475161037, - "grad_norm": 0.0023047400172799826, - "learning_rate": 0.00019999983051591333, - "loss": 46.0, - "step": 7675 - }, - { - "epoch": 0.5868838044994935, - "grad_norm": 0.0009289035806432366, - "learning_rate": 0.0001999998304716876, - "loss": 46.0, - "step": 7676 - }, - { - "epoch": 0.5869602614828832, - "grad_norm": 0.0010724244639277458, - "learning_rate": 0.0001999998304274561, - "loss": 46.0, - "step": 7677 - }, - { - "epoch": 0.5870367184662729, - "grad_norm": 0.0007620546966791153, - "learning_rate": 0.00019999983038321883, - "loss": 46.0, - "step": 7678 - }, - { - "epoch": 0.5871131754496627, - "grad_norm": 0.0021984796039760113, - "learning_rate": 0.0001999998303389758, - "loss": 46.0, - "step": 7679 - }, - { - "epoch": 0.5871896324330523, - "grad_norm": 0.001679317676462233, - "learning_rate": 0.000199999830294727, - "loss": 46.0, - "step": 7680 - }, - { - "epoch": 0.5872660894164421, - "grad_norm": 0.0013205293798819184, - "learning_rate": 0.00019999983025047242, - "loss": 46.0, - "step": 7681 - }, - { - "epoch": 0.5873425463998317, - "grad_norm": 0.0008109578629955649, - "learning_rate": 0.0001999998302062121, - "loss": 46.0, - "step": 7682 - }, - { - "epoch": 0.5874190033832215, - "grad_norm": 0.0011380797950550914, - "learning_rate": 0.00019999983016194596, - "loss": 46.0, - "step": 7683 - }, - { - "epoch": 0.5874954603666113, - "grad_norm": 0.0008693085401318967, - "learning_rate": 0.00019999983011767408, - "loss": 46.0, - "step": 7684 - }, - { - "epoch": 0.5875719173500009, - "grad_norm": 0.0016144621185958385, - "learning_rate": 0.00019999983007339643, - "loss": 46.0, - "step": 7685 - }, - { - "epoch": 0.5876483743333907, - "grad_norm": 0.0011661348398774862, - "learning_rate": 0.000199999830029113, - "loss": 46.0, - "step": 7686 - }, - { - "epoch": 0.5877248313167804, - "grad_norm": 0.0020094676874578, - "learning_rate": 0.00019999982998482384, - "loss": 46.0, - "step": 7687 - }, - { - "epoch": 0.5878012883001701, - "grad_norm": 0.003406036412343383, - "learning_rate": 0.0001999998299405289, - "loss": 46.0, - "step": 7688 - }, - { - "epoch": 0.5878777452835598, - "grad_norm": 0.0016322359442710876, - "learning_rate": 0.00019999982989622815, - "loss": 46.0, - "step": 7689 - }, - { - "epoch": 0.5879542022669496, - "grad_norm": 0.003237111261114478, - "learning_rate": 0.00019999982985192163, - "loss": 46.0, - "step": 7690 - }, - { - "epoch": 0.5880306592503393, - "grad_norm": 0.0024859909899532795, - "learning_rate": 0.0001999998298076094, - "loss": 46.0, - "step": 7691 - }, - { - "epoch": 0.588107116233729, - "grad_norm": 0.004241071175783873, - "learning_rate": 0.00019999982976329133, - "loss": 46.0, - "step": 7692 - }, - { - "epoch": 0.5881835732171187, - "grad_norm": 0.0011420173104852438, - "learning_rate": 0.00019999982971896754, - "loss": 46.0, - "step": 7693 - }, - { - "epoch": 0.5882600302005084, - "grad_norm": 0.0020061784889549017, - "learning_rate": 0.00019999982967463796, - "loss": 46.0, - "step": 7694 - }, - { - "epoch": 0.5883364871838982, - "grad_norm": 0.0017800317145884037, - "learning_rate": 0.0001999998296303026, - "loss": 46.0, - "step": 7695 - }, - { - "epoch": 0.5884129441672878, - "grad_norm": 0.002415225375443697, - "learning_rate": 0.0001999998295859615, - "loss": 46.0, - "step": 7696 - }, - { - "epoch": 0.5884894011506776, - "grad_norm": 0.0008627810748293996, - "learning_rate": 0.0001999998295416146, - "loss": 46.0, - "step": 7697 - }, - { - "epoch": 0.5885658581340674, - "grad_norm": 0.0005771710420958698, - "learning_rate": 0.00019999982949726194, - "loss": 46.0, - "step": 7698 - }, - { - "epoch": 0.588642315117457, - "grad_norm": 0.00194309011567384, - "learning_rate": 0.00019999982945290352, - "loss": 46.0, - "step": 7699 - }, - { - "epoch": 0.5887187721008468, - "grad_norm": 0.001984003931283951, - "learning_rate": 0.00019999982940853935, - "loss": 46.0, - "step": 7700 - }, - { - "epoch": 0.5887952290842364, - "grad_norm": 0.0009429525234736502, - "learning_rate": 0.00019999982936416938, - "loss": 46.0, - "step": 7701 - }, - { - "epoch": 0.5888716860676262, - "grad_norm": 0.013471740297973156, - "learning_rate": 0.00019999982931979363, - "loss": 46.0, - "step": 7702 - }, - { - "epoch": 0.588948143051016, - "grad_norm": 0.0010712002404034138, - "learning_rate": 0.00019999982927541214, - "loss": 46.0, - "step": 7703 - }, - { - "epoch": 0.5890246000344056, - "grad_norm": 0.0004987602587789297, - "learning_rate": 0.00019999982923102488, - "loss": 46.0, - "step": 7704 - }, - { - "epoch": 0.5891010570177954, - "grad_norm": 0.0008510476327501237, - "learning_rate": 0.00019999982918663184, - "loss": 46.0, - "step": 7705 - }, - { - "epoch": 0.5891775140011851, - "grad_norm": 0.004711621440947056, - "learning_rate": 0.00019999982914223303, - "loss": 46.0, - "step": 7706 - }, - { - "epoch": 0.5892539709845748, - "grad_norm": 0.0006135948933660984, - "learning_rate": 0.00019999982909782844, - "loss": 46.0, - "step": 7707 - }, - { - "epoch": 0.5893304279679645, - "grad_norm": 0.00043897968134842813, - "learning_rate": 0.0001999998290534181, - "loss": 46.0, - "step": 7708 - }, - { - "epoch": 0.5894068849513543, - "grad_norm": 0.0013876077719032764, - "learning_rate": 0.00019999982900900198, - "loss": 46.0, - "step": 7709 - }, - { - "epoch": 0.5894833419347439, - "grad_norm": 0.0017914950149133801, - "learning_rate": 0.0001999998289645801, - "loss": 46.0, - "step": 7710 - }, - { - "epoch": 0.5895597989181337, - "grad_norm": 0.0011490164324641228, - "learning_rate": 0.00019999982892015243, - "loss": 46.0, - "step": 7711 - }, - { - "epoch": 0.5896362559015234, - "grad_norm": 0.0015233343001455069, - "learning_rate": 0.000199999828875719, - "loss": 46.0, - "step": 7712 - }, - { - "epoch": 0.5897127128849131, - "grad_norm": 0.013940849341452122, - "learning_rate": 0.0001999998288312798, - "loss": 46.0, - "step": 7713 - }, - { - "epoch": 0.5897891698683029, - "grad_norm": 0.0010767108760774136, - "learning_rate": 0.00019999982878683486, - "loss": 46.0, - "step": 7714 - }, - { - "epoch": 0.5898656268516925, - "grad_norm": 0.0030015925876796246, - "learning_rate": 0.00019999982874238412, - "loss": 46.0, - "step": 7715 - }, - { - "epoch": 0.5899420838350823, - "grad_norm": 0.0034688913729041815, - "learning_rate": 0.00019999982869792763, - "loss": 46.0, - "step": 7716 - }, - { - "epoch": 0.590018540818472, - "grad_norm": 0.00441532488912344, - "learning_rate": 0.00019999982865346534, - "loss": 46.0, - "step": 7717 - }, - { - "epoch": 0.5900949978018617, - "grad_norm": 0.002796633169054985, - "learning_rate": 0.00019999982860899727, - "loss": 46.0, - "step": 7718 - }, - { - "epoch": 0.5901714547852515, - "grad_norm": 0.0012663773959502578, - "learning_rate": 0.0001999998285645235, - "loss": 46.0, - "step": 7719 - }, - { - "epoch": 0.5902479117686412, - "grad_norm": 0.0014051677426323295, - "learning_rate": 0.0001999998285200439, - "loss": 46.0, - "step": 7720 - }, - { - "epoch": 0.5903243687520309, - "grad_norm": 0.0014222945319488645, - "learning_rate": 0.00019999982847555855, - "loss": 46.0, - "step": 7721 - }, - { - "epoch": 0.5904008257354206, - "grad_norm": 0.0015413087094202638, - "learning_rate": 0.00019999982843106745, - "loss": 46.0, - "step": 7722 - }, - { - "epoch": 0.5904772827188103, - "grad_norm": 0.0007205578149296343, - "learning_rate": 0.00019999982838657054, - "loss": 46.0, - "step": 7723 - }, - { - "epoch": 0.5905537397022, - "grad_norm": 0.001248725806362927, - "learning_rate": 0.00019999982834206786, - "loss": 46.0, - "step": 7724 - }, - { - "epoch": 0.5906301966855898, - "grad_norm": 0.0006239841459318995, - "learning_rate": 0.00019999982829755947, - "loss": 46.0, - "step": 7725 - }, - { - "epoch": 0.5907066536689795, - "grad_norm": 0.002972880844026804, - "learning_rate": 0.00019999982825304527, - "loss": 46.0, - "step": 7726 - }, - { - "epoch": 0.5907831106523692, - "grad_norm": 0.0017908732406795025, - "learning_rate": 0.00019999982820852527, - "loss": 46.0, - "step": 7727 - }, - { - "epoch": 0.590859567635759, - "grad_norm": 0.00220440118573606, - "learning_rate": 0.00019999982816399956, - "loss": 46.0, - "step": 7728 - }, - { - "epoch": 0.5909360246191486, - "grad_norm": 0.0013985465047881007, - "learning_rate": 0.00019999982811946804, - "loss": 46.0, - "step": 7729 - }, - { - "epoch": 0.5910124816025384, - "grad_norm": 0.0011062863050028682, - "learning_rate": 0.00019999982807493075, - "loss": 46.0, - "step": 7730 - }, - { - "epoch": 0.5910889385859281, - "grad_norm": 0.007403626572340727, - "learning_rate": 0.0001999998280303877, - "loss": 46.0, - "step": 7731 - }, - { - "epoch": 0.5911653955693178, - "grad_norm": 0.0008812104351818562, - "learning_rate": 0.00019999982798583893, - "loss": 46.0, - "step": 7732 - }, - { - "epoch": 0.5912418525527076, - "grad_norm": 0.0009975922293961048, - "learning_rate": 0.00019999982794128435, - "loss": 46.0, - "step": 7733 - }, - { - "epoch": 0.5913183095360972, - "grad_norm": 0.0015556385042145848, - "learning_rate": 0.000199999827896724, - "loss": 46.0, - "step": 7734 - }, - { - "epoch": 0.591394766519487, - "grad_norm": 0.001604054938070476, - "learning_rate": 0.00019999982785215786, - "loss": 46.0, - "step": 7735 - }, - { - "epoch": 0.5914712235028767, - "grad_norm": 0.0004728210042230785, - "learning_rate": 0.00019999982780758595, - "loss": 46.0, - "step": 7736 - }, - { - "epoch": 0.5915476804862664, - "grad_norm": 0.0004678861587308347, - "learning_rate": 0.0001999998277630083, - "loss": 46.0, - "step": 7737 - }, - { - "epoch": 0.5916241374696561, - "grad_norm": 0.004436808172613382, - "learning_rate": 0.00019999982771842488, - "loss": 46.0, - "step": 7738 - }, - { - "epoch": 0.5917005944530459, - "grad_norm": 0.0026647469494491816, - "learning_rate": 0.00019999982767383566, - "loss": 46.0, - "step": 7739 - }, - { - "epoch": 0.5917770514364356, - "grad_norm": 0.0007762060849927366, - "learning_rate": 0.0001999998276292407, - "loss": 46.0, - "step": 7740 - }, - { - "epoch": 0.5918535084198253, - "grad_norm": 0.00167200923897326, - "learning_rate": 0.00019999982758463994, - "loss": 46.0, - "step": 7741 - }, - { - "epoch": 0.591929965403215, - "grad_norm": 0.0010982464300468564, - "learning_rate": 0.00019999982754003343, - "loss": 46.0, - "step": 7742 - }, - { - "epoch": 0.5920064223866047, - "grad_norm": 0.004163967911154032, - "learning_rate": 0.00019999982749542116, - "loss": 46.0, - "step": 7743 - }, - { - "epoch": 0.5920828793699945, - "grad_norm": 0.0007136771455407143, - "learning_rate": 0.00019999982745080313, - "loss": 46.0, - "step": 7744 - }, - { - "epoch": 0.5921593363533841, - "grad_norm": 0.0030117679852992296, - "learning_rate": 0.00019999982740617932, - "loss": 46.0, - "step": 7745 - }, - { - "epoch": 0.5922357933367739, - "grad_norm": 0.0010822394397109747, - "learning_rate": 0.0001999998273615497, - "loss": 46.0, - "step": 7746 - }, - { - "epoch": 0.5923122503201637, - "grad_norm": 0.002164625097066164, - "learning_rate": 0.00019999982731691435, - "loss": 46.0, - "step": 7747 - }, - { - "epoch": 0.5923887073035533, - "grad_norm": 0.0013500876957550645, - "learning_rate": 0.00019999982727227322, - "loss": 46.0, - "step": 7748 - }, - { - "epoch": 0.5924651642869431, - "grad_norm": 0.0006591337732970715, - "learning_rate": 0.00019999982722762634, - "loss": 46.0, - "step": 7749 - }, - { - "epoch": 0.5925416212703328, - "grad_norm": 0.0062608374282717705, - "learning_rate": 0.00019999982718297367, - "loss": 46.0, - "step": 7750 - }, - { - "epoch": 0.5926180782537225, - "grad_norm": 0.0012927285861223936, - "learning_rate": 0.00019999982713831522, - "loss": 46.0, - "step": 7751 - }, - { - "epoch": 0.5926945352371122, - "grad_norm": 0.0009843326406553388, - "learning_rate": 0.00019999982709365105, - "loss": 46.0, - "step": 7752 - }, - { - "epoch": 0.5927709922205019, - "grad_norm": 0.0004142065590713173, - "learning_rate": 0.00019999982704898105, - "loss": 46.0, - "step": 7753 - }, - { - "epoch": 0.5928474492038917, - "grad_norm": 0.0009537625010125339, - "learning_rate": 0.0001999998270043053, - "loss": 46.0, - "step": 7754 - }, - { - "epoch": 0.5929239061872814, - "grad_norm": 0.0022059367038309574, - "learning_rate": 0.0001999998269596238, - "loss": 46.0, - "step": 7755 - }, - { - "epoch": 0.5930003631706711, - "grad_norm": 0.01708313263952732, - "learning_rate": 0.00019999982691493653, - "loss": 46.0, - "step": 7756 - }, - { - "epoch": 0.5930768201540608, - "grad_norm": 0.0021000804845243692, - "learning_rate": 0.00019999982687024347, - "loss": 46.0, - "step": 7757 - }, - { - "epoch": 0.5931532771374506, - "grad_norm": 0.00047445925883948803, - "learning_rate": 0.00019999982682554466, - "loss": 46.0, - "step": 7758 - }, - { - "epoch": 0.5932297341208402, - "grad_norm": 0.0005947981844656169, - "learning_rate": 0.00019999982678084007, - "loss": 46.0, - "step": 7759 - }, - { - "epoch": 0.59330619110423, - "grad_norm": 0.0010240054689347744, - "learning_rate": 0.0001999998267361297, - "loss": 46.0, - "step": 7760 - }, - { - "epoch": 0.5933826480876198, - "grad_norm": 0.0018377226078882813, - "learning_rate": 0.00019999982669141356, - "loss": 46.0, - "step": 7761 - }, - { - "epoch": 0.5934591050710094, - "grad_norm": 0.0004205318109598011, - "learning_rate": 0.00019999982664669168, - "loss": 46.0, - "step": 7762 - }, - { - "epoch": 0.5935355620543992, - "grad_norm": 0.0016403035260736942, - "learning_rate": 0.000199999826601964, - "loss": 46.0, - "step": 7763 - }, - { - "epoch": 0.5936120190377888, - "grad_norm": 0.010571778751909733, - "learning_rate": 0.00019999982655723059, - "loss": 46.0, - "step": 7764 - }, - { - "epoch": 0.5936884760211786, - "grad_norm": 0.0055676233023405075, - "learning_rate": 0.00019999982651249136, - "loss": 46.0, - "step": 7765 - }, - { - "epoch": 0.5937649330045683, - "grad_norm": 0.0028461567126214504, - "learning_rate": 0.00019999982646774637, - "loss": 46.0, - "step": 7766 - }, - { - "epoch": 0.593841389987958, - "grad_norm": 0.001996994251385331, - "learning_rate": 0.00019999982642299565, - "loss": 46.0, - "step": 7767 - }, - { - "epoch": 0.5939178469713478, - "grad_norm": 0.0009702043025754392, - "learning_rate": 0.00019999982637823913, - "loss": 46.0, - "step": 7768 - }, - { - "epoch": 0.5939943039547375, - "grad_norm": 0.004336015321314335, - "learning_rate": 0.00019999982633347684, - "loss": 46.0, - "step": 7769 - }, - { - "epoch": 0.5940707609381272, - "grad_norm": 0.003671329701319337, - "learning_rate": 0.00019999982628870878, - "loss": 46.0, - "step": 7770 - }, - { - "epoch": 0.5941472179215169, - "grad_norm": 0.0006680940859951079, - "learning_rate": 0.00019999982624393497, - "loss": 46.0, - "step": 7771 - }, - { - "epoch": 0.5942236749049066, - "grad_norm": 0.001191614312119782, - "learning_rate": 0.00019999982619915536, - "loss": 46.0, - "step": 7772 - }, - { - "epoch": 0.5943001318882963, - "grad_norm": 0.0011037515942007303, - "learning_rate": 0.00019999982615437003, - "loss": 46.0, - "step": 7773 - }, - { - "epoch": 0.5943765888716861, - "grad_norm": 0.002332849195227027, - "learning_rate": 0.00019999982610957888, - "loss": 46.0, - "step": 7774 - }, - { - "epoch": 0.5944530458550757, - "grad_norm": 0.004200548864901066, - "learning_rate": 0.00019999982606478197, - "loss": 46.0, - "step": 7775 - }, - { - "epoch": 0.5945295028384655, - "grad_norm": 0.0016789293149486184, - "learning_rate": 0.0001999998260199793, - "loss": 46.0, - "step": 7776 - }, - { - "epoch": 0.5946059598218553, - "grad_norm": 0.002002288121730089, - "learning_rate": 0.00019999982597517088, - "loss": 46.0, - "step": 7777 - }, - { - "epoch": 0.5946824168052449, - "grad_norm": 0.0017278622835874557, - "learning_rate": 0.00019999982593035665, - "loss": 46.0, - "step": 7778 - }, - { - "epoch": 0.5947588737886347, - "grad_norm": 0.000411067099776119, - "learning_rate": 0.00019999982588553668, - "loss": 46.0, - "step": 7779 - }, - { - "epoch": 0.5948353307720244, - "grad_norm": 0.0026848837733268738, - "learning_rate": 0.00019999982584071094, - "loss": 46.0, - "step": 7780 - }, - { - "epoch": 0.5949117877554141, - "grad_norm": 0.0009578131139278412, - "learning_rate": 0.00019999982579587943, - "loss": 46.0, - "step": 7781 - }, - { - "epoch": 0.5949882447388039, - "grad_norm": 0.0007319899741560221, - "learning_rate": 0.00019999982575104214, - "loss": 46.0, - "step": 7782 - }, - { - "epoch": 0.5950647017221935, - "grad_norm": 0.0005809739814139903, - "learning_rate": 0.00019999982570619907, - "loss": 46.0, - "step": 7783 - }, - { - "epoch": 0.5951411587055833, - "grad_norm": 0.0007317565032280982, - "learning_rate": 0.00019999982566135024, - "loss": 46.0, - "step": 7784 - }, - { - "epoch": 0.595217615688973, - "grad_norm": 0.0006904666079208255, - "learning_rate": 0.00019999982561649566, - "loss": 46.0, - "step": 7785 - }, - { - "epoch": 0.5952940726723627, - "grad_norm": 0.0070307450369000435, - "learning_rate": 0.00019999982557163527, - "loss": 46.0, - "step": 7786 - }, - { - "epoch": 0.5953705296557524, - "grad_norm": 0.0012231232831254601, - "learning_rate": 0.00019999982552676917, - "loss": 46.0, - "step": 7787 - }, - { - "epoch": 0.5954469866391422, - "grad_norm": 0.0014784516533836722, - "learning_rate": 0.00019999982548189724, - "loss": 46.0, - "step": 7788 - }, - { - "epoch": 0.5955234436225318, - "grad_norm": 0.002267831237986684, - "learning_rate": 0.0001999998254370196, - "loss": 46.0, - "step": 7789 - }, - { - "epoch": 0.5955999006059216, - "grad_norm": 0.0007285760948434472, - "learning_rate": 0.00019999982539213614, - "loss": 46.0, - "step": 7790 - }, - { - "epoch": 0.5956763575893114, - "grad_norm": 0.0023656245321035385, - "learning_rate": 0.00019999982534724692, - "loss": 46.0, - "step": 7791 - }, - { - "epoch": 0.595752814572701, - "grad_norm": 0.0031207932624965906, - "learning_rate": 0.00019999982530235192, - "loss": 46.0, - "step": 7792 - }, - { - "epoch": 0.5958292715560908, - "grad_norm": 0.001985993469133973, - "learning_rate": 0.00019999982525745118, - "loss": 46.0, - "step": 7793 - }, - { - "epoch": 0.5959057285394804, - "grad_norm": 0.005390759091824293, - "learning_rate": 0.00019999982521254467, - "loss": 46.0, - "step": 7794 - }, - { - "epoch": 0.5959821855228702, - "grad_norm": 0.00456582847982645, - "learning_rate": 0.00019999982516763238, - "loss": 46.0, - "step": 7795 - }, - { - "epoch": 0.59605864250626, - "grad_norm": 0.0006566466181538999, - "learning_rate": 0.00019999982512271434, - "loss": 46.0, - "step": 7796 - }, - { - "epoch": 0.5961350994896496, - "grad_norm": 0.0006910919910296798, - "learning_rate": 0.00019999982507779048, - "loss": 46.0, - "step": 7797 - }, - { - "epoch": 0.5962115564730394, - "grad_norm": 0.0011979263508692384, - "learning_rate": 0.0001999998250328609, - "loss": 46.0, - "step": 7798 - }, - { - "epoch": 0.5962880134564291, - "grad_norm": 0.0018503390019759536, - "learning_rate": 0.00019999982498792554, - "loss": 46.0, - "step": 7799 - }, - { - "epoch": 0.5963644704398188, - "grad_norm": 0.0005962568102404475, - "learning_rate": 0.0001999998249429844, - "loss": 46.0, - "step": 7800 - }, - { - "epoch": 0.5964409274232085, - "grad_norm": 0.001471091527491808, - "learning_rate": 0.00019999982489803748, - "loss": 46.0, - "step": 7801 - }, - { - "epoch": 0.5965173844065982, - "grad_norm": 0.0025981185026466846, - "learning_rate": 0.0001999998248530848, - "loss": 46.0, - "step": 7802 - }, - { - "epoch": 0.5965938413899879, - "grad_norm": 0.0013325948966667056, - "learning_rate": 0.0001999998248081264, - "loss": 46.0, - "step": 7803 - }, - { - "epoch": 0.5966702983733777, - "grad_norm": 0.0005899730604141951, - "learning_rate": 0.00019999982476316214, - "loss": 46.0, - "step": 7804 - }, - { - "epoch": 0.5967467553567674, - "grad_norm": 0.0016896016895771027, - "learning_rate": 0.00019999982471819217, - "loss": 46.0, - "step": 7805 - }, - { - "epoch": 0.5968232123401571, - "grad_norm": 0.0009710498852655292, - "learning_rate": 0.0001999998246732164, - "loss": 46.0, - "step": 7806 - }, - { - "epoch": 0.5968996693235469, - "grad_norm": 0.006422850769013166, - "learning_rate": 0.0001999998246282349, - "loss": 46.0, - "step": 7807 - }, - { - "epoch": 0.5969761263069365, - "grad_norm": 0.004061803687363863, - "learning_rate": 0.0001999998245832476, - "loss": 46.0, - "step": 7808 - }, - { - "epoch": 0.5970525832903263, - "grad_norm": 0.0005329784471541643, - "learning_rate": 0.0001999998245382545, - "loss": 46.0, - "step": 7809 - }, - { - "epoch": 0.597129040273716, - "grad_norm": 0.001856053713709116, - "learning_rate": 0.00019999982449325573, - "loss": 46.0, - "step": 7810 - }, - { - "epoch": 0.5972054972571057, - "grad_norm": 0.0009692359599284828, - "learning_rate": 0.0001999998244482511, - "loss": 46.0, - "step": 7811 - }, - { - "epoch": 0.5972819542404955, - "grad_norm": 0.0038092106115072966, - "learning_rate": 0.00019999982440324074, - "loss": 46.0, - "step": 7812 - }, - { - "epoch": 0.5973584112238851, - "grad_norm": 0.00114718871191144, - "learning_rate": 0.00019999982435822458, - "loss": 46.0, - "step": 7813 - }, - { - "epoch": 0.5974348682072749, - "grad_norm": 0.0014407079434022307, - "learning_rate": 0.00019999982431320268, - "loss": 46.0, - "step": 7814 - }, - { - "epoch": 0.5975113251906646, - "grad_norm": 0.000780620553996414, - "learning_rate": 0.000199999824268175, - "loss": 46.0, - "step": 7815 - }, - { - "epoch": 0.5975877821740543, - "grad_norm": 0.0029868101701140404, - "learning_rate": 0.00019999982422314155, - "loss": 46.0, - "step": 7816 - }, - { - "epoch": 0.597664239157444, - "grad_norm": 0.0012581741902977228, - "learning_rate": 0.00019999982417810236, - "loss": 46.0, - "step": 7817 - }, - { - "epoch": 0.5977406961408338, - "grad_norm": 0.002613708609715104, - "learning_rate": 0.00019999982413305736, - "loss": 46.0, - "step": 7818 - }, - { - "epoch": 0.5978171531242235, - "grad_norm": 0.00441998615860939, - "learning_rate": 0.0001999998240880066, - "loss": 46.0, - "step": 7819 - }, - { - "epoch": 0.5978936101076132, - "grad_norm": 0.0007168451556935906, - "learning_rate": 0.00019999982404295005, - "loss": 46.0, - "step": 7820 - }, - { - "epoch": 0.597970067091003, - "grad_norm": 0.0018303974065929651, - "learning_rate": 0.00019999982399788777, - "loss": 46.0, - "step": 7821 - }, - { - "epoch": 0.5980465240743926, - "grad_norm": 0.006136002950370312, - "learning_rate": 0.0001999998239528197, - "loss": 46.0, - "step": 7822 - }, - { - "epoch": 0.5981229810577824, - "grad_norm": 0.0034344345331192017, - "learning_rate": 0.00019999982390774587, - "loss": 46.0, - "step": 7823 - }, - { - "epoch": 0.598199438041172, - "grad_norm": 0.001203719642944634, - "learning_rate": 0.00019999982386266626, - "loss": 46.0, - "step": 7824 - }, - { - "epoch": 0.5982758950245618, - "grad_norm": 0.0009653617744334042, - "learning_rate": 0.0001999998238175809, - "loss": 46.0, - "step": 7825 - }, - { - "epoch": 0.5983523520079516, - "grad_norm": 0.0034613104071468115, - "learning_rate": 0.00019999982377248975, - "loss": 46.0, - "step": 7826 - }, - { - "epoch": 0.5984288089913412, - "grad_norm": 0.0007446022354997694, - "learning_rate": 0.00019999982372739282, - "loss": 46.0, - "step": 7827 - }, - { - "epoch": 0.598505265974731, - "grad_norm": 0.0010518801864236593, - "learning_rate": 0.00019999982368229015, - "loss": 46.0, - "step": 7828 - }, - { - "epoch": 0.5985817229581207, - "grad_norm": 0.0018843668513000011, - "learning_rate": 0.0001999998236371817, - "loss": 46.0, - "step": 7829 - }, - { - "epoch": 0.5986581799415104, - "grad_norm": 0.0016771783120930195, - "learning_rate": 0.00019999982359206748, - "loss": 46.0, - "step": 7830 - }, - { - "epoch": 0.5987346369249001, - "grad_norm": 0.0005323274526745081, - "learning_rate": 0.00019999982354694748, - "loss": 46.0, - "step": 7831 - }, - { - "epoch": 0.5988110939082898, - "grad_norm": 0.0016021371120586991, - "learning_rate": 0.00019999982350182174, - "loss": 46.0, - "step": 7832 - }, - { - "epoch": 0.5988875508916796, - "grad_norm": 0.003104321425780654, - "learning_rate": 0.00019999982345669023, - "loss": 46.0, - "step": 7833 - }, - { - "epoch": 0.5989640078750693, - "grad_norm": 0.002220702823251486, - "learning_rate": 0.0001999998234115529, - "loss": 46.0, - "step": 7834 - }, - { - "epoch": 0.599040464858459, - "grad_norm": 0.0008930038893595338, - "learning_rate": 0.00019999982336640985, - "loss": 46.0, - "step": 7835 - }, - { - "epoch": 0.5991169218418487, - "grad_norm": 0.0012662962544709444, - "learning_rate": 0.000199999823321261, - "loss": 46.0, - "step": 7836 - }, - { - "epoch": 0.5991933788252385, - "grad_norm": 0.008340027183294296, - "learning_rate": 0.0001999998232761064, - "loss": 46.0, - "step": 7837 - }, - { - "epoch": 0.5992698358086281, - "grad_norm": 0.0023606475442647934, - "learning_rate": 0.00019999982323094603, - "loss": 46.0, - "step": 7838 - }, - { - "epoch": 0.5993462927920179, - "grad_norm": 0.003128261771053076, - "learning_rate": 0.00019999982318577987, - "loss": 46.0, - "step": 7839 - }, - { - "epoch": 0.5994227497754077, - "grad_norm": 0.0008913141791708767, - "learning_rate": 0.00019999982314060797, - "loss": 46.0, - "step": 7840 - }, - { - "epoch": 0.5994992067587973, - "grad_norm": 0.0008210894884541631, - "learning_rate": 0.0001999998230954303, - "loss": 46.0, - "step": 7841 - }, - { - "epoch": 0.5995756637421871, - "grad_norm": 0.004353249911218882, - "learning_rate": 0.00019999982305024682, - "loss": 46.0, - "step": 7842 - }, - { - "epoch": 0.5996521207255767, - "grad_norm": 0.0010502181248739362, - "learning_rate": 0.00019999982300505763, - "loss": 46.0, - "step": 7843 - }, - { - "epoch": 0.5997285777089665, - "grad_norm": 0.004726924002170563, - "learning_rate": 0.0001999998229598626, - "loss": 46.0, - "step": 7844 - }, - { - "epoch": 0.5998050346923562, - "grad_norm": 0.0016735497629269958, - "learning_rate": 0.00019999982291466184, - "loss": 46.0, - "step": 7845 - }, - { - "epoch": 0.5998814916757459, - "grad_norm": 0.0004080400103703141, - "learning_rate": 0.00019999982286945532, - "loss": 46.0, - "step": 7846 - }, - { - "epoch": 0.5999579486591357, - "grad_norm": 0.002207372570410371, - "learning_rate": 0.000199999822824243, - "loss": 46.0, - "step": 7847 - }, - { - "epoch": 0.6000344056425254, - "grad_norm": 0.004215470980852842, - "learning_rate": 0.00019999982277902495, - "loss": 46.0, - "step": 7848 - }, - { - "epoch": 0.6001108626259151, - "grad_norm": 0.000501008820720017, - "learning_rate": 0.0001999998227338011, - "loss": 46.0, - "step": 7849 - }, - { - "epoch": 0.6001873196093048, - "grad_norm": 0.0012892954982817173, - "learning_rate": 0.0001999998226885715, - "loss": 46.0, - "step": 7850 - }, - { - "epoch": 0.6002637765926946, - "grad_norm": 0.00043995975283905864, - "learning_rate": 0.00019999982264333612, - "loss": 46.0, - "step": 7851 - }, - { - "epoch": 0.6003402335760842, - "grad_norm": 0.0005717137246392667, - "learning_rate": 0.00019999982259809497, - "loss": 46.0, - "step": 7852 - }, - { - "epoch": 0.600416690559474, - "grad_norm": 0.0047810813412070274, - "learning_rate": 0.00019999982255284807, - "loss": 46.0, - "step": 7853 - }, - { - "epoch": 0.6004931475428636, - "grad_norm": 0.0008196977432817221, - "learning_rate": 0.00019999982250759537, - "loss": 46.0, - "step": 7854 - }, - { - "epoch": 0.6005696045262534, - "grad_norm": 0.0018608559621497989, - "learning_rate": 0.00019999982246233692, - "loss": 46.0, - "step": 7855 - }, - { - "epoch": 0.6006460615096432, - "grad_norm": 0.0017331335693597794, - "learning_rate": 0.0001999998224170727, - "loss": 46.0, - "step": 7856 - }, - { - "epoch": 0.6007225184930328, - "grad_norm": 0.0011089193867519498, - "learning_rate": 0.0001999998223718027, - "loss": 46.0, - "step": 7857 - }, - { - "epoch": 0.6007989754764226, - "grad_norm": 0.0006854445673525333, - "learning_rate": 0.0001999998223265269, - "loss": 46.0, - "step": 7858 - }, - { - "epoch": 0.6008754324598123, - "grad_norm": 0.0005723850918002427, - "learning_rate": 0.00019999982228124542, - "loss": 46.0, - "step": 7859 - }, - { - "epoch": 0.600951889443202, - "grad_norm": 0.0025426107458770275, - "learning_rate": 0.00019999982223595808, - "loss": 46.0, - "step": 7860 - }, - { - "epoch": 0.6010283464265918, - "grad_norm": 0.004472413565963507, - "learning_rate": 0.00019999982219066502, - "loss": 46.0, - "step": 7861 - }, - { - "epoch": 0.6011048034099815, - "grad_norm": 0.0015187521930783987, - "learning_rate": 0.00019999982214536618, - "loss": 46.0, - "step": 7862 - }, - { - "epoch": 0.6011812603933712, - "grad_norm": 0.0010940533829852939, - "learning_rate": 0.00019999982210006155, - "loss": 46.0, - "step": 7863 - }, - { - "epoch": 0.6012577173767609, - "grad_norm": 0.0012296380009502172, - "learning_rate": 0.0001999998220547512, - "loss": 46.0, - "step": 7864 - }, - { - "epoch": 0.6013341743601506, - "grad_norm": 0.004293881822377443, - "learning_rate": 0.00019999982200943504, - "loss": 46.0, - "step": 7865 - }, - { - "epoch": 0.6014106313435403, - "grad_norm": 0.0005095797823742032, - "learning_rate": 0.00019999982196411311, - "loss": 46.0, - "step": 7866 - }, - { - "epoch": 0.6014870883269301, - "grad_norm": 0.0035696239210665226, - "learning_rate": 0.00019999982191878544, - "loss": 46.0, - "step": 7867 - }, - { - "epoch": 0.6015635453103197, - "grad_norm": 0.0011099465191364288, - "learning_rate": 0.00019999982187345197, - "loss": 46.0, - "step": 7868 - }, - { - "epoch": 0.6016400022937095, - "grad_norm": 0.001394714810885489, - "learning_rate": 0.00019999982182811272, - "loss": 46.0, - "step": 7869 - }, - { - "epoch": 0.6017164592770993, - "grad_norm": 0.0007722071604803205, - "learning_rate": 0.00019999982178276775, - "loss": 46.0, - "step": 7870 - }, - { - "epoch": 0.6017929162604889, - "grad_norm": 0.005232236813753843, - "learning_rate": 0.00019999982173741696, - "loss": 46.0, - "step": 7871 - }, - { - "epoch": 0.6018693732438787, - "grad_norm": 0.005517760291695595, - "learning_rate": 0.00019999982169206044, - "loss": 46.0, - "step": 7872 - }, - { - "epoch": 0.6019458302272683, - "grad_norm": 0.0018902813317254186, - "learning_rate": 0.00019999982164669813, - "loss": 46.0, - "step": 7873 - }, - { - "epoch": 0.6020222872106581, - "grad_norm": 0.0006760531687177718, - "learning_rate": 0.00019999982160133004, - "loss": 46.0, - "step": 7874 - }, - { - "epoch": 0.6020987441940479, - "grad_norm": 0.0036640206817537546, - "learning_rate": 0.0001999998215559562, - "loss": 46.0, - "step": 7875 - }, - { - "epoch": 0.6021752011774375, - "grad_norm": 0.0014028297737240791, - "learning_rate": 0.0001999998215105766, - "loss": 46.0, - "step": 7876 - }, - { - "epoch": 0.6022516581608273, - "grad_norm": 0.0012549128150567412, - "learning_rate": 0.00019999982146519122, - "loss": 46.0, - "step": 7877 - }, - { - "epoch": 0.602328115144217, - "grad_norm": 0.0011920961551368237, - "learning_rate": 0.00019999982141980004, - "loss": 46.0, - "step": 7878 - }, - { - "epoch": 0.6024045721276067, - "grad_norm": 0.0015891494695097208, - "learning_rate": 0.0001999998213744031, - "loss": 46.0, - "step": 7879 - }, - { - "epoch": 0.6024810291109964, - "grad_norm": 0.004706768784672022, - "learning_rate": 0.00019999982132900044, - "loss": 46.0, - "step": 7880 - }, - { - "epoch": 0.6025574860943862, - "grad_norm": 0.001879567396827042, - "learning_rate": 0.000199999821283592, - "loss": 46.0, - "step": 7881 - }, - { - "epoch": 0.6026339430777758, - "grad_norm": 0.0005421018577180803, - "learning_rate": 0.00019999982123817774, - "loss": 46.0, - "step": 7882 - }, - { - "epoch": 0.6027104000611656, - "grad_norm": 0.001354339998215437, - "learning_rate": 0.00019999982119275775, - "loss": 46.0, - "step": 7883 - }, - { - "epoch": 0.6027868570445553, - "grad_norm": 0.0010297372937202454, - "learning_rate": 0.00019999982114733196, - "loss": 46.0, - "step": 7884 - }, - { - "epoch": 0.602863314027945, - "grad_norm": 0.0020425557158887386, - "learning_rate": 0.00019999982110190044, - "loss": 46.0, - "step": 7885 - }, - { - "epoch": 0.6029397710113348, - "grad_norm": 0.0009307325235567987, - "learning_rate": 0.00019999982105646313, - "loss": 46.0, - "step": 7886 - }, - { - "epoch": 0.6030162279947244, - "grad_norm": 0.0024466970935463905, - "learning_rate": 0.00019999982101102004, - "loss": 46.0, - "step": 7887 - }, - { - "epoch": 0.6030926849781142, - "grad_norm": 0.002209039404988289, - "learning_rate": 0.0001999998209655712, - "loss": 46.0, - "step": 7888 - }, - { - "epoch": 0.603169141961504, - "grad_norm": 0.009476684033870697, - "learning_rate": 0.00019999982092011657, - "loss": 46.0, - "step": 7889 - }, - { - "epoch": 0.6032455989448936, - "grad_norm": 0.0020331223495304585, - "learning_rate": 0.0001999998208746562, - "loss": 46.0, - "step": 7890 - }, - { - "epoch": 0.6033220559282834, - "grad_norm": 0.0011414854088798165, - "learning_rate": 0.00019999982082919004, - "loss": 46.0, - "step": 7891 - }, - { - "epoch": 0.6033985129116731, - "grad_norm": 0.003357579465955496, - "learning_rate": 0.0001999998207837181, - "loss": 46.0, - "step": 7892 - }, - { - "epoch": 0.6034749698950628, - "grad_norm": 0.0023027893621474504, - "learning_rate": 0.00019999982073824038, - "loss": 46.0, - "step": 7893 - }, - { - "epoch": 0.6035514268784525, - "grad_norm": 0.0022761691361665726, - "learning_rate": 0.00019999982069275697, - "loss": 46.0, - "step": 7894 - }, - { - "epoch": 0.6036278838618422, - "grad_norm": 0.0017769032856449485, - "learning_rate": 0.0001999998206472677, - "loss": 46.0, - "step": 7895 - }, - { - "epoch": 0.603704340845232, - "grad_norm": 0.0011424003168940544, - "learning_rate": 0.00019999982060177272, - "loss": 46.0, - "step": 7896 - }, - { - "epoch": 0.6037807978286217, - "grad_norm": 0.001006170641630888, - "learning_rate": 0.00019999982055627193, - "loss": 46.0, - "step": 7897 - }, - { - "epoch": 0.6038572548120114, - "grad_norm": 0.0008326191455125809, - "learning_rate": 0.0001999998205107654, - "loss": 46.0, - "step": 7898 - }, - { - "epoch": 0.6039337117954011, - "grad_norm": 0.0012525704223662615, - "learning_rate": 0.00019999982046525308, - "loss": 46.0, - "step": 7899 - }, - { - "epoch": 0.6040101687787909, - "grad_norm": 0.0011818106286227703, - "learning_rate": 0.000199999820419735, - "loss": 46.0, - "step": 7900 - }, - { - "epoch": 0.6040866257621805, - "grad_norm": 0.003493251046165824, - "learning_rate": 0.00019999982037421116, - "loss": 46.0, - "step": 7901 - }, - { - "epoch": 0.6041630827455703, - "grad_norm": 0.002517836634069681, - "learning_rate": 0.00019999982032868152, - "loss": 46.0, - "step": 7902 - }, - { - "epoch": 0.6042395397289599, - "grad_norm": 0.0043289246968925, - "learning_rate": 0.00019999982028314614, - "loss": 46.0, - "step": 7903 - }, - { - "epoch": 0.6043159967123497, - "grad_norm": 0.0020103594288229942, - "learning_rate": 0.000199999820237605, - "loss": 46.0, - "step": 7904 - }, - { - "epoch": 0.6043924536957395, - "grad_norm": 0.0015729309525340796, - "learning_rate": 0.00019999982019205806, - "loss": 46.0, - "step": 7905 - }, - { - "epoch": 0.6044689106791291, - "grad_norm": 0.0006856784457340837, - "learning_rate": 0.00019999982014650536, - "loss": 46.0, - "step": 7906 - }, - { - "epoch": 0.6045453676625189, - "grad_norm": 0.0011072318302467465, - "learning_rate": 0.0001999998201009469, - "loss": 46.0, - "step": 7907 - }, - { - "epoch": 0.6046218246459086, - "grad_norm": 0.000863327644765377, - "learning_rate": 0.00019999982005538264, - "loss": 46.0, - "step": 7908 - }, - { - "epoch": 0.6046982816292983, - "grad_norm": 0.0004334252153057605, - "learning_rate": 0.00019999982000981262, - "loss": 46.0, - "step": 7909 - }, - { - "epoch": 0.604774738612688, - "grad_norm": 0.002265630755573511, - "learning_rate": 0.00019999981996423686, - "loss": 46.0, - "step": 7910 - }, - { - "epoch": 0.6048511955960778, - "grad_norm": 0.0028832312673330307, - "learning_rate": 0.00019999981991865532, - "loss": 46.0, - "step": 7911 - }, - { - "epoch": 0.6049276525794675, - "grad_norm": 0.0005825895350426435, - "learning_rate": 0.00019999981987306803, - "loss": 46.0, - "step": 7912 - }, - { - "epoch": 0.6050041095628572, - "grad_norm": 0.002411558758467436, - "learning_rate": 0.00019999981982747494, - "loss": 46.0, - "step": 7913 - }, - { - "epoch": 0.6050805665462469, - "grad_norm": 0.004140479490160942, - "learning_rate": 0.00019999981978187608, - "loss": 46.0, - "step": 7914 - }, - { - "epoch": 0.6051570235296366, - "grad_norm": 0.000905551656614989, - "learning_rate": 0.00019999981973627145, - "loss": 46.0, - "step": 7915 - }, - { - "epoch": 0.6052334805130264, - "grad_norm": 0.0014777554897591472, - "learning_rate": 0.00019999981969066104, - "loss": 46.0, - "step": 7916 - }, - { - "epoch": 0.605309937496416, - "grad_norm": 0.0005173294921405613, - "learning_rate": 0.00019999981964504492, - "loss": 46.0, - "step": 7917 - }, - { - "epoch": 0.6053863944798058, - "grad_norm": 0.000806057418230921, - "learning_rate": 0.00019999981959942296, - "loss": 46.0, - "step": 7918 - }, - { - "epoch": 0.6054628514631956, - "grad_norm": 0.0005943129071965814, - "learning_rate": 0.00019999981955379526, - "loss": 46.0, - "step": 7919 - }, - { - "epoch": 0.6055393084465852, - "grad_norm": 0.0008464242564514279, - "learning_rate": 0.0001999998195081618, - "loss": 46.0, - "step": 7920 - }, - { - "epoch": 0.605615765429975, - "grad_norm": 0.00390784302726388, - "learning_rate": 0.00019999981946252255, - "loss": 46.0, - "step": 7921 - }, - { - "epoch": 0.6056922224133647, - "grad_norm": 0.0017563611036166549, - "learning_rate": 0.00019999981941687755, - "loss": 46.0, - "step": 7922 - }, - { - "epoch": 0.6057686793967544, - "grad_norm": 0.0008239975431933999, - "learning_rate": 0.00019999981937122676, - "loss": 46.0, - "step": 7923 - }, - { - "epoch": 0.6058451363801441, - "grad_norm": 0.0017751911655068398, - "learning_rate": 0.00019999981932557022, - "loss": 46.0, - "step": 7924 - }, - { - "epoch": 0.6059215933635338, - "grad_norm": 0.0009364386787638068, - "learning_rate": 0.0001999998192799079, - "loss": 46.0, - "step": 7925 - }, - { - "epoch": 0.6059980503469236, - "grad_norm": 0.0024566175416111946, - "learning_rate": 0.0001999998192342398, - "loss": 46.0, - "step": 7926 - }, - { - "epoch": 0.6060745073303133, - "grad_norm": 0.00040578775224275887, - "learning_rate": 0.00019999981918856596, - "loss": 46.0, - "step": 7927 - }, - { - "epoch": 0.606150964313703, - "grad_norm": 0.003301683347672224, - "learning_rate": 0.00019999981914288633, - "loss": 46.0, - "step": 7928 - }, - { - "epoch": 0.6062274212970927, - "grad_norm": 0.0013802727917209268, - "learning_rate": 0.00019999981909720095, - "loss": 46.0, - "step": 7929 - }, - { - "epoch": 0.6063038782804825, - "grad_norm": 0.0008085070294328034, - "learning_rate": 0.00019999981905150977, - "loss": 46.0, - "step": 7930 - }, - { - "epoch": 0.6063803352638721, - "grad_norm": 0.0013770516961812973, - "learning_rate": 0.00019999981900581284, - "loss": 46.0, - "step": 7931 - }, - { - "epoch": 0.6064567922472619, - "grad_norm": 0.0006124796345829964, - "learning_rate": 0.00019999981896011014, - "loss": 46.0, - "step": 7932 - }, - { - "epoch": 0.6065332492306515, - "grad_norm": 0.002153716515749693, - "learning_rate": 0.00019999981891440167, - "loss": 46.0, - "step": 7933 - }, - { - "epoch": 0.6066097062140413, - "grad_norm": 0.0024859863333404064, - "learning_rate": 0.00019999981886868742, - "loss": 46.0, - "step": 7934 - }, - { - "epoch": 0.6066861631974311, - "grad_norm": 0.0010924540692940354, - "learning_rate": 0.00019999981882296738, - "loss": 46.0, - "step": 7935 - }, - { - "epoch": 0.6067626201808207, - "grad_norm": 0.0022507929243147373, - "learning_rate": 0.0001999998187772416, - "loss": 46.0, - "step": 7936 - }, - { - "epoch": 0.6068390771642105, - "grad_norm": 0.0013681759592145681, - "learning_rate": 0.00019999981873151005, - "loss": 46.0, - "step": 7937 - }, - { - "epoch": 0.6069155341476002, - "grad_norm": 0.0009938521543517709, - "learning_rate": 0.00019999981868577276, - "loss": 46.0, - "step": 7938 - }, - { - "epoch": 0.6069919911309899, - "grad_norm": 0.0006067179492674768, - "learning_rate": 0.00019999981864002965, - "loss": 46.0, - "step": 7939 - }, - { - "epoch": 0.6070684481143797, - "grad_norm": 0.0008704601787030697, - "learning_rate": 0.00019999981859428081, - "loss": 46.0, - "step": 7940 - }, - { - "epoch": 0.6071449050977694, - "grad_norm": 0.0016925816889852285, - "learning_rate": 0.00019999981854852615, - "loss": 46.0, - "step": 7941 - }, - { - "epoch": 0.6072213620811591, - "grad_norm": 0.000867995957378298, - "learning_rate": 0.0001999998185027658, - "loss": 46.0, - "step": 7942 - }, - { - "epoch": 0.6072978190645488, - "grad_norm": 0.000922888983041048, - "learning_rate": 0.0001999998184569996, - "loss": 46.0, - "step": 7943 - }, - { - "epoch": 0.6073742760479385, - "grad_norm": 0.0008711973787285388, - "learning_rate": 0.00019999981841122767, - "loss": 46.0, - "step": 7944 - }, - { - "epoch": 0.6074507330313282, - "grad_norm": 0.0008109626942314208, - "learning_rate": 0.00019999981836544997, - "loss": 46.0, - "step": 7945 - }, - { - "epoch": 0.607527190014718, - "grad_norm": 0.014748835004866123, - "learning_rate": 0.00019999981831966647, - "loss": 46.0, - "step": 7946 - }, - { - "epoch": 0.6076036469981076, - "grad_norm": 0.0026019506622105837, - "learning_rate": 0.00019999981827387723, - "loss": 46.0, - "step": 7947 - }, - { - "epoch": 0.6076801039814974, - "grad_norm": 0.0014633170794695616, - "learning_rate": 0.00019999981822808223, - "loss": 46.0, - "step": 7948 - }, - { - "epoch": 0.6077565609648872, - "grad_norm": 0.0025430629029870033, - "learning_rate": 0.00019999981818228144, - "loss": 46.0, - "step": 7949 - }, - { - "epoch": 0.6078330179482768, - "grad_norm": 0.0008037923253141344, - "learning_rate": 0.00019999981813647488, - "loss": 46.0, - "step": 7950 - }, - { - "epoch": 0.6079094749316666, - "grad_norm": 0.0009830540511757135, - "learning_rate": 0.00019999981809066256, - "loss": 46.0, - "step": 7951 - }, - { - "epoch": 0.6079859319150563, - "grad_norm": 0.002180629177019, - "learning_rate": 0.00019999981804484448, - "loss": 46.0, - "step": 7952 - }, - { - "epoch": 0.608062388898446, - "grad_norm": 0.0002933908544946462, - "learning_rate": 0.00019999981799902062, - "loss": 46.0, - "step": 7953 - }, - { - "epoch": 0.6081388458818358, - "grad_norm": 0.0020938822999596596, - "learning_rate": 0.00019999981795319099, - "loss": 46.0, - "step": 7954 - }, - { - "epoch": 0.6082153028652254, - "grad_norm": 0.0031283488497138023, - "learning_rate": 0.00019999981790735558, - "loss": 46.0, - "step": 7955 - }, - { - "epoch": 0.6082917598486152, - "grad_norm": 0.0013937586918473244, - "learning_rate": 0.00019999981786151443, - "loss": 46.0, - "step": 7956 - }, - { - "epoch": 0.6083682168320049, - "grad_norm": 0.0025686935987323523, - "learning_rate": 0.00019999981781566745, - "loss": 46.0, - "step": 7957 - }, - { - "epoch": 0.6084446738153946, - "grad_norm": 0.0008471534820273519, - "learning_rate": 0.00019999981776981475, - "loss": 46.0, - "step": 7958 - }, - { - "epoch": 0.6085211307987843, - "grad_norm": 0.0034409454092383385, - "learning_rate": 0.00019999981772395628, - "loss": 46.0, - "step": 7959 - }, - { - "epoch": 0.6085975877821741, - "grad_norm": 0.009748689830303192, - "learning_rate": 0.00019999981767809203, - "loss": 46.0, - "step": 7960 - }, - { - "epoch": 0.6086740447655637, - "grad_norm": 0.0012783779529854655, - "learning_rate": 0.000199999817632222, - "loss": 46.0, - "step": 7961 - }, - { - "epoch": 0.6087505017489535, - "grad_norm": 0.0011554979719221592, - "learning_rate": 0.00019999981758634622, - "loss": 46.0, - "step": 7962 - }, - { - "epoch": 0.6088269587323433, - "grad_norm": 0.0010530215222388506, - "learning_rate": 0.00019999981754046465, - "loss": 46.0, - "step": 7963 - }, - { - "epoch": 0.6089034157157329, - "grad_norm": 0.0032395594753324986, - "learning_rate": 0.00019999981749457734, - "loss": 46.0, - "step": 7964 - }, - { - "epoch": 0.6089798726991227, - "grad_norm": 0.0005449085729196668, - "learning_rate": 0.00019999981744868426, - "loss": 46.0, - "step": 7965 - }, - { - "epoch": 0.6090563296825123, - "grad_norm": 0.007412794977426529, - "learning_rate": 0.0001999998174027854, - "loss": 46.0, - "step": 7966 - }, - { - "epoch": 0.6091327866659021, - "grad_norm": 0.0006506446516141295, - "learning_rate": 0.00019999981735688077, - "loss": 46.0, - "step": 7967 - }, - { - "epoch": 0.6092092436492919, - "grad_norm": 0.0011552934302017093, - "learning_rate": 0.00019999981731097036, - "loss": 46.0, - "step": 7968 - }, - { - "epoch": 0.6092857006326815, - "grad_norm": 0.0006012298399582505, - "learning_rate": 0.00019999981726505416, - "loss": 46.0, - "step": 7969 - }, - { - "epoch": 0.6093621576160713, - "grad_norm": 0.0014015400083735585, - "learning_rate": 0.00019999981721913223, - "loss": 46.0, - "step": 7970 - }, - { - "epoch": 0.609438614599461, - "grad_norm": 0.0005265754880383611, - "learning_rate": 0.00019999981717320453, - "loss": 46.0, - "step": 7971 - }, - { - "epoch": 0.6095150715828507, - "grad_norm": 0.0010723063023760915, - "learning_rate": 0.00019999981712727106, - "loss": 46.0, - "step": 7972 - }, - { - "epoch": 0.6095915285662404, - "grad_norm": 0.0007148168515414, - "learning_rate": 0.0001999998170813318, - "loss": 46.0, - "step": 7973 - }, - { - "epoch": 0.6096679855496301, - "grad_norm": 0.0014183580642566085, - "learning_rate": 0.00019999981703538677, - "loss": 46.0, - "step": 7974 - }, - { - "epoch": 0.6097444425330198, - "grad_norm": 0.0021027768962085247, - "learning_rate": 0.00019999981698943598, - "loss": 46.0, - "step": 7975 - }, - { - "epoch": 0.6098208995164096, - "grad_norm": 0.0005962667055428028, - "learning_rate": 0.0001999998169434794, - "loss": 46.0, - "step": 7976 - }, - { - "epoch": 0.6098973564997993, - "grad_norm": 0.00548368226736784, - "learning_rate": 0.0001999998168975171, - "loss": 46.0, - "step": 7977 - }, - { - "epoch": 0.609973813483189, - "grad_norm": 0.0010928643168881536, - "learning_rate": 0.000199999816851549, - "loss": 46.0, - "step": 7978 - }, - { - "epoch": 0.6100502704665788, - "grad_norm": 0.001230394234880805, - "learning_rate": 0.00019999981680557513, - "loss": 46.0, - "step": 7979 - }, - { - "epoch": 0.6101267274499684, - "grad_norm": 0.002780554350465536, - "learning_rate": 0.0001999998167595955, - "loss": 46.0, - "step": 7980 - }, - { - "epoch": 0.6102031844333582, - "grad_norm": 0.0009225890971720219, - "learning_rate": 0.00019999981671361007, - "loss": 46.0, - "step": 7981 - }, - { - "epoch": 0.610279641416748, - "grad_norm": 0.0007890506531111896, - "learning_rate": 0.00019999981666761891, - "loss": 46.0, - "step": 7982 - }, - { - "epoch": 0.6103560984001376, - "grad_norm": 0.0011693231062963605, - "learning_rate": 0.00019999981662162196, - "loss": 46.0, - "step": 7983 - }, - { - "epoch": 0.6104325553835274, - "grad_norm": 0.0018700744258239865, - "learning_rate": 0.00019999981657561926, - "loss": 46.0, - "step": 7984 - }, - { - "epoch": 0.610509012366917, - "grad_norm": 0.0013759142020717263, - "learning_rate": 0.00019999981652961076, - "loss": 46.0, - "step": 7985 - }, - { - "epoch": 0.6105854693503068, - "grad_norm": 0.0035913782194256783, - "learning_rate": 0.00019999981648359652, - "loss": 46.0, - "step": 7986 - }, - { - "epoch": 0.6106619263336965, - "grad_norm": 0.003008329076692462, - "learning_rate": 0.00019999981643757647, - "loss": 46.0, - "step": 7987 - }, - { - "epoch": 0.6107383833170862, - "grad_norm": 0.0024229709524661303, - "learning_rate": 0.0001999998163915507, - "loss": 46.0, - "step": 7988 - }, - { - "epoch": 0.610814840300476, - "grad_norm": 0.000699841242749244, - "learning_rate": 0.00019999981634551914, - "loss": 46.0, - "step": 7989 - }, - { - "epoch": 0.6108912972838657, - "grad_norm": 0.0006508107762783766, - "learning_rate": 0.00019999981629948178, - "loss": 46.0, - "step": 7990 - }, - { - "epoch": 0.6109677542672554, - "grad_norm": 0.004089084919542074, - "learning_rate": 0.00019999981625343872, - "loss": 46.0, - "step": 7991 - }, - { - "epoch": 0.6110442112506451, - "grad_norm": 0.0006521567120216787, - "learning_rate": 0.00019999981620738984, - "loss": 46.0, - "step": 7992 - }, - { - "epoch": 0.6111206682340349, - "grad_norm": 0.0008699916652403772, - "learning_rate": 0.00019999981616133518, - "loss": 46.0, - "step": 7993 - }, - { - "epoch": 0.6111971252174245, - "grad_norm": 0.0006521597388200462, - "learning_rate": 0.00019999981611527477, - "loss": 46.0, - "step": 7994 - }, - { - "epoch": 0.6112735822008143, - "grad_norm": 0.0009300427045673132, - "learning_rate": 0.00019999981606920862, - "loss": 46.0, - "step": 7995 - }, - { - "epoch": 0.6113500391842039, - "grad_norm": 0.004025891423225403, - "learning_rate": 0.00019999981602313667, - "loss": 46.0, - "step": 7996 - }, - { - "epoch": 0.6114264961675937, - "grad_norm": 0.0010910285636782646, - "learning_rate": 0.00019999981597705895, - "loss": 46.0, - "step": 7997 - }, - { - "epoch": 0.6115029531509835, - "grad_norm": 0.0014916922664269805, - "learning_rate": 0.00019999981593097545, - "loss": 46.0, - "step": 7998 - }, - { - "epoch": 0.6115794101343731, - "grad_norm": 0.0008063392015174031, - "learning_rate": 0.00019999981588488618, - "loss": 46.0, - "step": 7999 - }, - { - "epoch": 0.6116558671177629, - "grad_norm": 0.0016277688555419445, - "learning_rate": 0.00019999981583879118, - "loss": 46.0, - "step": 8000 - }, - { - "epoch": 0.6117323241011526, - "grad_norm": 0.0021985953208059072, - "learning_rate": 0.0001999998157926904, - "loss": 46.0, - "step": 8001 - }, - { - "epoch": 0.6118087810845423, - "grad_norm": 0.0006408132030628622, - "learning_rate": 0.0001999998157465838, - "loss": 46.0, - "step": 8002 - }, - { - "epoch": 0.611885238067932, - "grad_norm": 0.0009227868868038058, - "learning_rate": 0.0001999998157004715, - "loss": 46.0, - "step": 8003 - }, - { - "epoch": 0.6119616950513217, - "grad_norm": 0.0007038955227471888, - "learning_rate": 0.00019999981565435338, - "loss": 46.0, - "step": 8004 - }, - { - "epoch": 0.6120381520347115, - "grad_norm": 0.0008833712199702859, - "learning_rate": 0.0001999998156082295, - "loss": 46.0, - "step": 8005 - }, - { - "epoch": 0.6121146090181012, - "grad_norm": 0.0031500300392508507, - "learning_rate": 0.00019999981556209986, - "loss": 46.0, - "step": 8006 - }, - { - "epoch": 0.6121910660014909, - "grad_norm": 0.0020365710370242596, - "learning_rate": 0.00019999981551596446, - "loss": 46.0, - "step": 8007 - }, - { - "epoch": 0.6122675229848806, - "grad_norm": 0.000716263079084456, - "learning_rate": 0.00019999981546982328, - "loss": 46.0, - "step": 8008 - }, - { - "epoch": 0.6123439799682704, - "grad_norm": 0.0014570709317922592, - "learning_rate": 0.00019999981542367633, - "loss": 46.0, - "step": 8009 - }, - { - "epoch": 0.61242043695166, - "grad_norm": 0.0013562020612880588, - "learning_rate": 0.0001999998153775236, - "loss": 46.0, - "step": 8010 - }, - { - "epoch": 0.6124968939350498, - "grad_norm": 0.0018453149823471904, - "learning_rate": 0.00019999981533136508, - "loss": 46.0, - "step": 8011 - }, - { - "epoch": 0.6125733509184396, - "grad_norm": 0.002312688622623682, - "learning_rate": 0.00019999981528520084, - "loss": 46.0, - "step": 8012 - }, - { - "epoch": 0.6126498079018292, - "grad_norm": 0.0105765201151371, - "learning_rate": 0.00019999981523903082, - "loss": 46.0, - "step": 8013 - }, - { - "epoch": 0.612726264885219, - "grad_norm": 0.005194495897740126, - "learning_rate": 0.00019999981519285503, - "loss": 46.0, - "step": 8014 - }, - { - "epoch": 0.6128027218686086, - "grad_norm": 0.0015117257134988904, - "learning_rate": 0.00019999981514667346, - "loss": 46.0, - "step": 8015 - }, - { - "epoch": 0.6128791788519984, - "grad_norm": 0.0010569387814030051, - "learning_rate": 0.0001999998151004861, - "loss": 46.0, - "step": 8016 - }, - { - "epoch": 0.6129556358353881, - "grad_norm": 0.0006272075697779655, - "learning_rate": 0.00019999981505429299, - "loss": 46.0, - "step": 8017 - }, - { - "epoch": 0.6130320928187778, - "grad_norm": 0.0040042707696557045, - "learning_rate": 0.00019999981500809413, - "loss": 46.0, - "step": 8018 - }, - { - "epoch": 0.6131085498021676, - "grad_norm": 0.005988226737827063, - "learning_rate": 0.0001999998149618895, - "loss": 46.0, - "step": 8019 - }, - { - "epoch": 0.6131850067855573, - "grad_norm": 0.0014884572010487318, - "learning_rate": 0.00019999981491567907, - "loss": 46.0, - "step": 8020 - }, - { - "epoch": 0.613261463768947, - "grad_norm": 0.001359344576485455, - "learning_rate": 0.0001999998148694629, - "loss": 46.0, - "step": 8021 - }, - { - "epoch": 0.6133379207523367, - "grad_norm": 0.003955220337957144, - "learning_rate": 0.00019999981482324094, - "loss": 46.0, - "step": 8022 - }, - { - "epoch": 0.6134143777357265, - "grad_norm": 0.0016917415196076035, - "learning_rate": 0.0001999998147770132, - "loss": 46.0, - "step": 8023 - }, - { - "epoch": 0.6134908347191161, - "grad_norm": 0.0012416922254487872, - "learning_rate": 0.00019999981473077974, - "loss": 46.0, - "step": 8024 - }, - { - "epoch": 0.6135672917025059, - "grad_norm": 0.002291525946930051, - "learning_rate": 0.00019999981468454044, - "loss": 46.0, - "step": 8025 - }, - { - "epoch": 0.6136437486858956, - "grad_norm": 0.001350725069642067, - "learning_rate": 0.00019999981463829543, - "loss": 46.0, - "step": 8026 - }, - { - "epoch": 0.6137202056692853, - "grad_norm": 0.0006322698318399489, - "learning_rate": 0.0001999998145920446, - "loss": 46.0, - "step": 8027 - }, - { - "epoch": 0.6137966626526751, - "grad_norm": 0.0008371872827410698, - "learning_rate": 0.00019999981454578807, - "loss": 46.0, - "step": 8028 - }, - { - "epoch": 0.6138731196360647, - "grad_norm": 0.0026724180206656456, - "learning_rate": 0.0001999998144995257, - "loss": 46.0, - "step": 8029 - }, - { - "epoch": 0.6139495766194545, - "grad_norm": 0.0005557225667871535, - "learning_rate": 0.0001999998144532576, - "loss": 46.0, - "step": 8030 - }, - { - "epoch": 0.6140260336028442, - "grad_norm": 0.0018545619677752256, - "learning_rate": 0.0001999998144069837, - "loss": 46.0, - "step": 8031 - }, - { - "epoch": 0.6141024905862339, - "grad_norm": 0.0029547943267971277, - "learning_rate": 0.00019999981436070408, - "loss": 46.0, - "step": 8032 - }, - { - "epoch": 0.6141789475696237, - "grad_norm": 0.0007139172521419823, - "learning_rate": 0.00019999981431441865, - "loss": 46.0, - "step": 8033 - }, - { - "epoch": 0.6142554045530133, - "grad_norm": 0.0006746540893800557, - "learning_rate": 0.00019999981426812747, - "loss": 46.0, - "step": 8034 - }, - { - "epoch": 0.6143318615364031, - "grad_norm": 0.0074030994437634945, - "learning_rate": 0.00019999981422183052, - "loss": 46.0, - "step": 8035 - }, - { - "epoch": 0.6144083185197928, - "grad_norm": 0.004032945726066828, - "learning_rate": 0.0001999998141755278, - "loss": 46.0, - "step": 8036 - }, - { - "epoch": 0.6144847755031825, - "grad_norm": 0.0027331418823450804, - "learning_rate": 0.00019999981412921927, - "loss": 46.0, - "step": 8037 - }, - { - "epoch": 0.6145612324865722, - "grad_norm": 0.0005232125404290855, - "learning_rate": 0.00019999981408290506, - "loss": 46.0, - "step": 8038 - }, - { - "epoch": 0.614637689469962, - "grad_norm": 0.003908711951225996, - "learning_rate": 0.000199999814036585, - "loss": 46.0, - "step": 8039 - }, - { - "epoch": 0.6147141464533517, - "grad_norm": 0.0006954863201826811, - "learning_rate": 0.0001999998139902592, - "loss": 46.0, - "step": 8040 - }, - { - "epoch": 0.6147906034367414, - "grad_norm": 0.0011021033860743046, - "learning_rate": 0.00019999981394392763, - "loss": 46.0, - "step": 8041 - }, - { - "epoch": 0.6148670604201312, - "grad_norm": 0.0013622321421280503, - "learning_rate": 0.0001999998138975903, - "loss": 46.0, - "step": 8042 - }, - { - "epoch": 0.6149435174035208, - "grad_norm": 0.0008329287520609796, - "learning_rate": 0.00019999981385124718, - "loss": 46.0, - "step": 8043 - }, - { - "epoch": 0.6150199743869106, - "grad_norm": 0.0026834269519895315, - "learning_rate": 0.0001999998138048983, - "loss": 46.0, - "step": 8044 - }, - { - "epoch": 0.6150964313703002, - "grad_norm": 0.0016994967591017485, - "learning_rate": 0.00019999981375854364, - "loss": 46.0, - "step": 8045 - }, - { - "epoch": 0.61517288835369, - "grad_norm": 0.0007733700331300497, - "learning_rate": 0.0001999998137121832, - "loss": 46.0, - "step": 8046 - }, - { - "epoch": 0.6152493453370798, - "grad_norm": 0.0008033501217141747, - "learning_rate": 0.00019999981366581704, - "loss": 46.0, - "step": 8047 - }, - { - "epoch": 0.6153258023204694, - "grad_norm": 0.0006857018452137709, - "learning_rate": 0.00019999981361944506, - "loss": 46.0, - "step": 8048 - }, - { - "epoch": 0.6154022593038592, - "grad_norm": 0.0024543199688196182, - "learning_rate": 0.00019999981357306733, - "loss": 46.0, - "step": 8049 - }, - { - "epoch": 0.6154787162872489, - "grad_norm": 0.0034503464121371508, - "learning_rate": 0.00019999981352668384, - "loss": 46.0, - "step": 8050 - }, - { - "epoch": 0.6155551732706386, - "grad_norm": 0.0011730798287317157, - "learning_rate": 0.00019999981348029457, - "loss": 46.0, - "step": 8051 - }, - { - "epoch": 0.6156316302540283, - "grad_norm": 0.001017491682432592, - "learning_rate": 0.00019999981343389952, - "loss": 46.0, - "step": 8052 - }, - { - "epoch": 0.6157080872374181, - "grad_norm": 0.0011439293157309294, - "learning_rate": 0.00019999981338749873, - "loss": 46.0, - "step": 8053 - }, - { - "epoch": 0.6157845442208078, - "grad_norm": 0.000915037642698735, - "learning_rate": 0.00019999981334109217, - "loss": 46.0, - "step": 8054 - }, - { - "epoch": 0.6158610012041975, - "grad_norm": 0.002680210629478097, - "learning_rate": 0.00019999981329467983, - "loss": 46.0, - "step": 8055 - }, - { - "epoch": 0.6159374581875872, - "grad_norm": 0.002151148859411478, - "learning_rate": 0.00019999981324826172, - "loss": 46.0, - "step": 8056 - }, - { - "epoch": 0.6160139151709769, - "grad_norm": 0.0017771321581676602, - "learning_rate": 0.00019999981320183784, - "loss": 46.0, - "step": 8057 - }, - { - "epoch": 0.6160903721543667, - "grad_norm": 0.0005965745658613741, - "learning_rate": 0.00019999981315540815, - "loss": 46.0, - "step": 8058 - }, - { - "epoch": 0.6161668291377563, - "grad_norm": 0.0004076090408489108, - "learning_rate": 0.00019999981310897272, - "loss": 46.0, - "step": 8059 - }, - { - "epoch": 0.6162432861211461, - "grad_norm": 0.0014622843591496348, - "learning_rate": 0.00019999981306253155, - "loss": 46.0, - "step": 8060 - }, - { - "epoch": 0.6163197431045359, - "grad_norm": 0.0011214729165658355, - "learning_rate": 0.0001999998130160846, - "loss": 46.0, - "step": 8061 - }, - { - "epoch": 0.6163962000879255, - "grad_norm": 0.005945032462477684, - "learning_rate": 0.00019999981296963185, - "loss": 46.0, - "step": 8062 - }, - { - "epoch": 0.6164726570713153, - "grad_norm": 0.001295273075811565, - "learning_rate": 0.00019999981292317335, - "loss": 46.0, - "step": 8063 - }, - { - "epoch": 0.6165491140547049, - "grad_norm": 0.001352186081930995, - "learning_rate": 0.00019999981287670908, - "loss": 46.0, - "step": 8064 - }, - { - "epoch": 0.6166255710380947, - "grad_norm": 0.005008733365684748, - "learning_rate": 0.00019999981283023904, - "loss": 46.0, - "step": 8065 - }, - { - "epoch": 0.6167020280214844, - "grad_norm": 0.0027853958308696747, - "learning_rate": 0.00019999981278376322, - "loss": 46.0, - "step": 8066 - }, - { - "epoch": 0.6167784850048741, - "grad_norm": 0.004503860138356686, - "learning_rate": 0.00019999981273728166, - "loss": 46.0, - "step": 8067 - }, - { - "epoch": 0.6168549419882638, - "grad_norm": 0.0007594541530124843, - "learning_rate": 0.00019999981269079432, - "loss": 46.0, - "step": 8068 - }, - { - "epoch": 0.6169313989716536, - "grad_norm": 0.002667501801624894, - "learning_rate": 0.00019999981264430118, - "loss": 46.0, - "step": 8069 - }, - { - "epoch": 0.6170078559550433, - "grad_norm": 0.0008925874135456979, - "learning_rate": 0.0001999998125978023, - "loss": 46.0, - "step": 8070 - }, - { - "epoch": 0.617084312938433, - "grad_norm": 0.0010283252922818065, - "learning_rate": 0.00019999981255129764, - "loss": 46.0, - "step": 8071 - }, - { - "epoch": 0.6171607699218228, - "grad_norm": 0.0024446446914225817, - "learning_rate": 0.00019999981250478724, - "loss": 46.0, - "step": 8072 - }, - { - "epoch": 0.6172372269052124, - "grad_norm": 0.0012787660816684365, - "learning_rate": 0.000199999812458271, - "loss": 46.0, - "step": 8073 - }, - { - "epoch": 0.6173136838886022, - "grad_norm": 0.0007039426127448678, - "learning_rate": 0.0001999998124117491, - "loss": 46.0, - "step": 8074 - }, - { - "epoch": 0.6173901408719918, - "grad_norm": 0.0008649210212752223, - "learning_rate": 0.0001999998123652213, - "loss": 46.0, - "step": 8075 - }, - { - "epoch": 0.6174665978553816, - "grad_norm": 0.0013115047477185726, - "learning_rate": 0.00019999981231868784, - "loss": 46.0, - "step": 8076 - }, - { - "epoch": 0.6175430548387714, - "grad_norm": 0.001115027698688209, - "learning_rate": 0.00019999981227214854, - "loss": 46.0, - "step": 8077 - }, - { - "epoch": 0.617619511822161, - "grad_norm": 0.0025179898366332054, - "learning_rate": 0.0001999998122256035, - "loss": 46.0, - "step": 8078 - }, - { - "epoch": 0.6176959688055508, - "grad_norm": 0.0010529948631301522, - "learning_rate": 0.00019999981217905268, - "loss": 46.0, - "step": 8079 - }, - { - "epoch": 0.6177724257889405, - "grad_norm": 0.0008856932399794459, - "learning_rate": 0.0001999998121324961, - "loss": 46.0, - "step": 8080 - }, - { - "epoch": 0.6178488827723302, - "grad_norm": 0.0019840889144688845, - "learning_rate": 0.00019999981208593376, - "loss": 46.0, - "step": 8081 - }, - { - "epoch": 0.61792533975572, - "grad_norm": 0.0014206685591489077, - "learning_rate": 0.00019999981203936565, - "loss": 46.0, - "step": 8082 - }, - { - "epoch": 0.6180017967391097, - "grad_norm": 0.0009865479078143835, - "learning_rate": 0.00019999981199279177, - "loss": 46.0, - "step": 8083 - }, - { - "epoch": 0.6180782537224994, - "grad_norm": 0.0011100240517407656, - "learning_rate": 0.00019999981194621208, - "loss": 46.0, - "step": 8084 - }, - { - "epoch": 0.6181547107058891, - "grad_norm": 0.011240546591579914, - "learning_rate": 0.00019999981189962668, - "loss": 46.0, - "step": 8085 - }, - { - "epoch": 0.6182311676892788, - "grad_norm": 0.0008880603709258139, - "learning_rate": 0.00019999981185303548, - "loss": 46.0, - "step": 8086 - }, - { - "epoch": 0.6183076246726685, - "grad_norm": 0.0014324372168630362, - "learning_rate": 0.0001999998118064385, - "loss": 46.0, - "step": 8087 - }, - { - "epoch": 0.6183840816560583, - "grad_norm": 0.001325063407421112, - "learning_rate": 0.00019999981175983575, - "loss": 46.0, - "step": 8088 - }, - { - "epoch": 0.6184605386394479, - "grad_norm": 0.006308611016720533, - "learning_rate": 0.00019999981171322725, - "loss": 46.0, - "step": 8089 - }, - { - "epoch": 0.6185369956228377, - "grad_norm": 0.0012192374560981989, - "learning_rate": 0.00019999981166661298, - "loss": 46.0, - "step": 8090 - }, - { - "epoch": 0.6186134526062275, - "grad_norm": 0.00036524387542158365, - "learning_rate": 0.00019999981161999294, - "loss": 46.0, - "step": 8091 - }, - { - "epoch": 0.6186899095896171, - "grad_norm": 0.0008525089360773563, - "learning_rate": 0.00019999981157336712, - "loss": 46.0, - "step": 8092 - }, - { - "epoch": 0.6187663665730069, - "grad_norm": 0.007759463042020798, - "learning_rate": 0.00019999981152673553, - "loss": 46.0, - "step": 8093 - }, - { - "epoch": 0.6188428235563966, - "grad_norm": 0.0008733192807994783, - "learning_rate": 0.0001999998114800982, - "loss": 46.0, - "step": 8094 - }, - { - "epoch": 0.6189192805397863, - "grad_norm": 0.0014871356543153524, - "learning_rate": 0.00019999981143345506, - "loss": 46.0, - "step": 8095 - }, - { - "epoch": 0.618995737523176, - "grad_norm": 0.00141401297878474, - "learning_rate": 0.00019999981138680618, - "loss": 46.0, - "step": 8096 - }, - { - "epoch": 0.6190721945065657, - "grad_norm": 0.0007656815578229725, - "learning_rate": 0.00019999981134015152, - "loss": 46.0, - "step": 8097 - }, - { - "epoch": 0.6191486514899555, - "grad_norm": 0.0015928716165944934, - "learning_rate": 0.00019999981129349107, - "loss": 46.0, - "step": 8098 - }, - { - "epoch": 0.6192251084733452, - "grad_norm": 0.0012576529989019036, - "learning_rate": 0.00019999981124682486, - "loss": 46.0, - "step": 8099 - }, - { - "epoch": 0.6193015654567349, - "grad_norm": 0.0030896265525370836, - "learning_rate": 0.00019999981120015292, - "loss": 46.0, - "step": 8100 - }, - { - "epoch": 0.6193780224401246, - "grad_norm": 0.010342762805521488, - "learning_rate": 0.00019999981115347517, - "loss": 46.0, - "step": 8101 - }, - { - "epoch": 0.6194544794235144, - "grad_norm": 0.0005783736705780029, - "learning_rate": 0.00019999981110679164, - "loss": 46.0, - "step": 8102 - }, - { - "epoch": 0.619530936406904, - "grad_norm": 0.002448518993332982, - "learning_rate": 0.00019999981106010237, - "loss": 46.0, - "step": 8103 - }, - { - "epoch": 0.6196073933902938, - "grad_norm": 0.0011705290526151657, - "learning_rate": 0.00019999981101340733, - "loss": 46.0, - "step": 8104 - }, - { - "epoch": 0.6196838503736835, - "grad_norm": 0.005643675569444895, - "learning_rate": 0.00019999981096670652, - "loss": 46.0, - "step": 8105 - }, - { - "epoch": 0.6197603073570732, - "grad_norm": 0.001870142761617899, - "learning_rate": 0.00019999981091999993, - "loss": 46.0, - "step": 8106 - }, - { - "epoch": 0.619836764340463, - "grad_norm": 0.00089590362040326, - "learning_rate": 0.0001999998108732876, - "loss": 46.0, - "step": 8107 - }, - { - "epoch": 0.6199132213238526, - "grad_norm": 0.0008799677598290145, - "learning_rate": 0.00019999981082656945, - "loss": 46.0, - "step": 8108 - }, - { - "epoch": 0.6199896783072424, - "grad_norm": 0.002297072671353817, - "learning_rate": 0.00019999981077984554, - "loss": 46.0, - "step": 8109 - }, - { - "epoch": 0.6200661352906321, - "grad_norm": 0.001265001017600298, - "learning_rate": 0.0001999998107331159, - "loss": 46.0, - "step": 8110 - }, - { - "epoch": 0.6201425922740218, - "grad_norm": 0.0022822138853371143, - "learning_rate": 0.00019999981068638046, - "loss": 46.0, - "step": 8111 - }, - { - "epoch": 0.6202190492574116, - "grad_norm": 0.0011523495195433497, - "learning_rate": 0.00019999981063963923, - "loss": 46.0, - "step": 8112 - }, - { - "epoch": 0.6202955062408013, - "grad_norm": 0.0010711951181292534, - "learning_rate": 0.00019999981059289228, - "loss": 46.0, - "step": 8113 - }, - { - "epoch": 0.620371963224191, - "grad_norm": 0.00091460911789909, - "learning_rate": 0.00019999981054613953, - "loss": 46.0, - "step": 8114 - }, - { - "epoch": 0.6204484202075807, - "grad_norm": 0.0050215148366987705, - "learning_rate": 0.00019999981049938104, - "loss": 46.0, - "step": 8115 - }, - { - "epoch": 0.6205248771909704, - "grad_norm": 0.0029333815909922123, - "learning_rate": 0.00019999981045261674, - "loss": 46.0, - "step": 8116 - }, - { - "epoch": 0.6206013341743601, - "grad_norm": 0.0007092771702446043, - "learning_rate": 0.0001999998104058467, - "loss": 46.0, - "step": 8117 - }, - { - "epoch": 0.6206777911577499, - "grad_norm": 0.0018388079479336739, - "learning_rate": 0.00019999981035907088, - "loss": 46.0, - "step": 8118 - }, - { - "epoch": 0.6207542481411396, - "grad_norm": 0.004218053538352251, - "learning_rate": 0.00019999981031228927, - "loss": 46.0, - "step": 8119 - }, - { - "epoch": 0.6208307051245293, - "grad_norm": 0.0007422819035127759, - "learning_rate": 0.00019999981026550193, - "loss": 46.0, - "step": 8120 - }, - { - "epoch": 0.6209071621079191, - "grad_norm": 0.0019210479222238064, - "learning_rate": 0.0001999998102187088, - "loss": 46.0, - "step": 8121 - }, - { - "epoch": 0.6209836190913087, - "grad_norm": 0.005070696584880352, - "learning_rate": 0.0001999998101719099, - "loss": 46.0, - "step": 8122 - }, - { - "epoch": 0.6210600760746985, - "grad_norm": 0.0004544100956991315, - "learning_rate": 0.00019999981012510523, - "loss": 46.0, - "step": 8123 - }, - { - "epoch": 0.6211365330580882, - "grad_norm": 0.0013448148965835571, - "learning_rate": 0.0001999998100782948, - "loss": 46.0, - "step": 8124 - }, - { - "epoch": 0.6212129900414779, - "grad_norm": 0.001338648027740419, - "learning_rate": 0.00019999981003147857, - "loss": 46.0, - "step": 8125 - }, - { - "epoch": 0.6212894470248677, - "grad_norm": 0.003315623616799712, - "learning_rate": 0.0001999998099846566, - "loss": 46.0, - "step": 8126 - }, - { - "epoch": 0.6213659040082573, - "grad_norm": 0.0006857843254692852, - "learning_rate": 0.00019999980993782885, - "loss": 46.0, - "step": 8127 - }, - { - "epoch": 0.6214423609916471, - "grad_norm": 0.0009068864746950567, - "learning_rate": 0.00019999980989099533, - "loss": 46.0, - "step": 8128 - }, - { - "epoch": 0.6215188179750368, - "grad_norm": 0.0015287484275177121, - "learning_rate": 0.00019999980984415608, - "loss": 46.0, - "step": 8129 - }, - { - "epoch": 0.6215952749584265, - "grad_norm": 0.0013803041074424982, - "learning_rate": 0.00019999980979731102, - "loss": 46.0, - "step": 8130 - }, - { - "epoch": 0.6216717319418162, - "grad_norm": 0.011175794526934624, - "learning_rate": 0.0001999998097504602, - "loss": 46.0, - "step": 8131 - }, - { - "epoch": 0.621748188925206, - "grad_norm": 0.0009191324934363365, - "learning_rate": 0.00019999980970360358, - "loss": 46.0, - "step": 8132 - }, - { - "epoch": 0.6218246459085957, - "grad_norm": 0.003293754532933235, - "learning_rate": 0.00019999980965674122, - "loss": 46.0, - "step": 8133 - }, - { - "epoch": 0.6219011028919854, - "grad_norm": 0.001244703191332519, - "learning_rate": 0.00019999980960987311, - "loss": 46.0, - "step": 8134 - }, - { - "epoch": 0.6219775598753751, - "grad_norm": 0.0016437621088698506, - "learning_rate": 0.0001999998095629992, - "loss": 46.0, - "step": 8135 - }, - { - "epoch": 0.6220540168587648, - "grad_norm": 0.001206371234729886, - "learning_rate": 0.00019999980951611955, - "loss": 46.0, - "step": 8136 - }, - { - "epoch": 0.6221304738421546, - "grad_norm": 0.0010340720182284713, - "learning_rate": 0.0001999998094692341, - "loss": 46.0, - "step": 8137 - }, - { - "epoch": 0.6222069308255442, - "grad_norm": 0.0015247574774548411, - "learning_rate": 0.0001999998094223429, - "loss": 46.0, - "step": 8138 - }, - { - "epoch": 0.622283387808934, - "grad_norm": 0.0011224700137972832, - "learning_rate": 0.00019999980937544592, - "loss": 46.0, - "step": 8139 - }, - { - "epoch": 0.6223598447923238, - "grad_norm": 0.0004623421700671315, - "learning_rate": 0.00019999980932854317, - "loss": 46.0, - "step": 8140 - }, - { - "epoch": 0.6224363017757134, - "grad_norm": 0.0005331024876795709, - "learning_rate": 0.00019999980928163465, - "loss": 46.0, - "step": 8141 - }, - { - "epoch": 0.6225127587591032, - "grad_norm": 0.005343261640518904, - "learning_rate": 0.00019999980923472035, - "loss": 46.0, - "step": 8142 - }, - { - "epoch": 0.6225892157424929, - "grad_norm": 0.0006872197845950723, - "learning_rate": 0.0001999998091878003, - "loss": 46.0, - "step": 8143 - }, - { - "epoch": 0.6226656727258826, - "grad_norm": 0.001922086114063859, - "learning_rate": 0.0001999998091408745, - "loss": 46.0, - "step": 8144 - }, - { - "epoch": 0.6227421297092723, - "grad_norm": 0.0020486663561314344, - "learning_rate": 0.00019999980909394288, - "loss": 46.0, - "step": 8145 - }, - { - "epoch": 0.622818586692662, - "grad_norm": 0.001683650421909988, - "learning_rate": 0.00019999980904700552, - "loss": 46.0, - "step": 8146 - }, - { - "epoch": 0.6228950436760518, - "grad_norm": 0.0016485704109072685, - "learning_rate": 0.00019999980900006236, - "loss": 46.0, - "step": 8147 - }, - { - "epoch": 0.6229715006594415, - "grad_norm": 0.0005011818720959127, - "learning_rate": 0.00019999980895311347, - "loss": 46.0, - "step": 8148 - }, - { - "epoch": 0.6230479576428312, - "grad_norm": 0.0012467411579564214, - "learning_rate": 0.00019999980890615882, - "loss": 46.0, - "step": 8149 - }, - { - "epoch": 0.6231244146262209, - "grad_norm": 0.0012904610484838486, - "learning_rate": 0.0001999998088591984, - "loss": 46.0, - "step": 8150 - }, - { - "epoch": 0.6232008716096107, - "grad_norm": 0.006659666076302528, - "learning_rate": 0.00019999980881223214, - "loss": 46.0, - "step": 8151 - }, - { - "epoch": 0.6232773285930003, - "grad_norm": 0.001498241676017642, - "learning_rate": 0.00019999980876526016, - "loss": 46.0, - "step": 8152 - }, - { - "epoch": 0.6233537855763901, - "grad_norm": 0.0018610622501000762, - "learning_rate": 0.00019999980871828244, - "loss": 46.0, - "step": 8153 - }, - { - "epoch": 0.6234302425597799, - "grad_norm": 0.0009072546381503344, - "learning_rate": 0.00019999980867129892, - "loss": 46.0, - "step": 8154 - }, - { - "epoch": 0.6235066995431695, - "grad_norm": 0.002177738817408681, - "learning_rate": 0.0001999998086243096, - "loss": 46.0, - "step": 8155 - }, - { - "epoch": 0.6235831565265593, - "grad_norm": 0.0026074019260704517, - "learning_rate": 0.00019999980857731455, - "loss": 46.0, - "step": 8156 - }, - { - "epoch": 0.6236596135099489, - "grad_norm": 0.0005360660725273192, - "learning_rate": 0.00019999980853031374, - "loss": 46.0, - "step": 8157 - }, - { - "epoch": 0.6237360704933387, - "grad_norm": 0.0013998214853927493, - "learning_rate": 0.00019999980848330712, - "loss": 46.0, - "step": 8158 - }, - { - "epoch": 0.6238125274767284, - "grad_norm": 0.0039712293073534966, - "learning_rate": 0.00019999980843629476, - "loss": 46.0, - "step": 8159 - }, - { - "epoch": 0.6238889844601181, - "grad_norm": 0.0010126747656613588, - "learning_rate": 0.00019999980838927663, - "loss": 46.0, - "step": 8160 - }, - { - "epoch": 0.6239654414435079, - "grad_norm": 0.0031328152399510145, - "learning_rate": 0.00019999980834225272, - "loss": 46.0, - "step": 8161 - }, - { - "epoch": 0.6240418984268976, - "grad_norm": 0.0005414151819422841, - "learning_rate": 0.000199999808295223, - "loss": 46.0, - "step": 8162 - }, - { - "epoch": 0.6241183554102873, - "grad_norm": 0.0018214802257716656, - "learning_rate": 0.00019999980824818758, - "loss": 46.0, - "step": 8163 - }, - { - "epoch": 0.624194812393677, - "grad_norm": 0.0006216823821887374, - "learning_rate": 0.00019999980820114635, - "loss": 46.0, - "step": 8164 - }, - { - "epoch": 0.6242712693770667, - "grad_norm": 0.0010871891863644123, - "learning_rate": 0.0001999998081540994, - "loss": 46.0, - "step": 8165 - }, - { - "epoch": 0.6243477263604564, - "grad_norm": 0.0006974285934120417, - "learning_rate": 0.00019999980810704663, - "loss": 46.0, - "step": 8166 - }, - { - "epoch": 0.6244241833438462, - "grad_norm": 0.001885958481580019, - "learning_rate": 0.00019999980805998814, - "loss": 46.0, - "step": 8167 - }, - { - "epoch": 0.6245006403272358, - "grad_norm": 0.002691855188459158, - "learning_rate": 0.00019999980801292382, - "loss": 46.0, - "step": 8168 - }, - { - "epoch": 0.6245770973106256, - "grad_norm": 0.000914715463295579, - "learning_rate": 0.00019999980796585378, - "loss": 46.0, - "step": 8169 - }, - { - "epoch": 0.6246535542940154, - "grad_norm": 0.010826588608324528, - "learning_rate": 0.00019999980791877793, - "loss": 46.0, - "step": 8170 - }, - { - "epoch": 0.624730011277405, - "grad_norm": 0.0010591283207759261, - "learning_rate": 0.00019999980787169635, - "loss": 46.0, - "step": 8171 - }, - { - "epoch": 0.6248064682607948, - "grad_norm": 0.0016578563954681158, - "learning_rate": 0.00019999980782460896, - "loss": 46.0, - "step": 8172 - }, - { - "epoch": 0.6248829252441845, - "grad_norm": 0.0008777394541539252, - "learning_rate": 0.00019999980777751582, - "loss": 46.0, - "step": 8173 - }, - { - "epoch": 0.6249593822275742, - "grad_norm": 0.012118935585021973, - "learning_rate": 0.00019999980773041692, - "loss": 46.0, - "step": 8174 - }, - { - "epoch": 0.625035839210964, - "grad_norm": 0.005874015856534243, - "learning_rate": 0.00019999980768331223, - "loss": 46.0, - "step": 8175 - }, - { - "epoch": 0.6251122961943536, - "grad_norm": 0.0074394322000443935, - "learning_rate": 0.00019999980763620178, - "loss": 46.0, - "step": 8176 - }, - { - "epoch": 0.6251887531777434, - "grad_norm": 0.002545940689742565, - "learning_rate": 0.00019999980758908555, - "loss": 46.0, - "step": 8177 - }, - { - "epoch": 0.6252652101611331, - "grad_norm": 0.003837330499663949, - "learning_rate": 0.0001999998075419636, - "loss": 46.0, - "step": 8178 - }, - { - "epoch": 0.6253416671445228, - "grad_norm": 0.0022016491275280714, - "learning_rate": 0.00019999980749483586, - "loss": 46.0, - "step": 8179 - }, - { - "epoch": 0.6254181241279125, - "grad_norm": 0.003395914565771818, - "learning_rate": 0.0001999998074477023, - "loss": 46.0, - "step": 8180 - }, - { - "epoch": 0.6254945811113023, - "grad_norm": 0.000628401234280318, - "learning_rate": 0.00019999980740056301, - "loss": 46.0, - "step": 8181 - }, - { - "epoch": 0.625571038094692, - "grad_norm": 0.001131386961787939, - "learning_rate": 0.00019999980735341795, - "loss": 46.0, - "step": 8182 - }, - { - "epoch": 0.6256474950780817, - "grad_norm": 0.0005662293406203389, - "learning_rate": 0.0001999998073062671, - "loss": 46.0, - "step": 8183 - }, - { - "epoch": 0.6257239520614715, - "grad_norm": 0.0011541598942130804, - "learning_rate": 0.0001999998072591105, - "loss": 46.0, - "step": 8184 - }, - { - "epoch": 0.6258004090448611, - "grad_norm": 0.0013944199308753014, - "learning_rate": 0.00019999980721194813, - "loss": 46.0, - "step": 8185 - }, - { - "epoch": 0.6258768660282509, - "grad_norm": 0.0013343654572963715, - "learning_rate": 0.00019999980716478, - "loss": 46.0, - "step": 8186 - }, - { - "epoch": 0.6259533230116405, - "grad_norm": 0.0031992613803595304, - "learning_rate": 0.0001999998071176061, - "loss": 46.0, - "step": 8187 - }, - { - "epoch": 0.6260297799950303, - "grad_norm": 0.0018886931939050555, - "learning_rate": 0.0001999998070704264, - "loss": 46.0, - "step": 8188 - }, - { - "epoch": 0.62610623697842, - "grad_norm": 0.00206000916659832, - "learning_rate": 0.00019999980702324095, - "loss": 46.0, - "step": 8189 - }, - { - "epoch": 0.6261826939618097, - "grad_norm": 0.0018751887837424874, - "learning_rate": 0.00019999980697604972, - "loss": 46.0, - "step": 8190 - }, - { - "epoch": 0.6262591509451995, - "grad_norm": 0.0003227642737329006, - "learning_rate": 0.00019999980692885275, - "loss": 46.0, - "step": 8191 - }, - { - "epoch": 0.6263356079285892, - "grad_norm": 0.007246968802064657, - "learning_rate": 0.00019999980688164998, - "loss": 46.0, - "step": 8192 - }, - { - "epoch": 0.6264120649119789, - "grad_norm": 0.0010604896815493703, - "learning_rate": 0.00019999980683444143, - "loss": 46.0, - "step": 8193 - }, - { - "epoch": 0.6264885218953686, - "grad_norm": 0.00804203748703003, - "learning_rate": 0.00019999980678722716, - "loss": 46.0, - "step": 8194 - }, - { - "epoch": 0.6265649788787584, - "grad_norm": 0.0009484539623372257, - "learning_rate": 0.0001999998067400071, - "loss": 46.0, - "step": 8195 - }, - { - "epoch": 0.626641435862148, - "grad_norm": 0.0018112839898094535, - "learning_rate": 0.00019999980669278125, - "loss": 46.0, - "step": 8196 - }, - { - "epoch": 0.6267178928455378, - "grad_norm": 0.003642122261226177, - "learning_rate": 0.00019999980664554964, - "loss": 46.0, - "step": 8197 - }, - { - "epoch": 0.6267943498289275, - "grad_norm": 0.0020308259408921003, - "learning_rate": 0.00019999980659831228, - "loss": 46.0, - "step": 8198 - }, - { - "epoch": 0.6268708068123172, - "grad_norm": 0.004944133572280407, - "learning_rate": 0.00019999980655106914, - "loss": 46.0, - "step": 8199 - }, - { - "epoch": 0.626947263795707, - "grad_norm": 0.0009574219002388418, - "learning_rate": 0.0001999998065038202, - "loss": 46.0, - "step": 8200 - }, - { - "epoch": 0.6270237207790966, - "grad_norm": 0.001406748779118061, - "learning_rate": 0.00019999980645656553, - "loss": 46.0, - "step": 8201 - }, - { - "epoch": 0.6271001777624864, - "grad_norm": 0.002005796879529953, - "learning_rate": 0.00019999980640930508, - "loss": 46.0, - "step": 8202 - }, - { - "epoch": 0.6271766347458761, - "grad_norm": 0.002738900948315859, - "learning_rate": 0.00019999980636203885, - "loss": 46.0, - "step": 8203 - }, - { - "epoch": 0.6272530917292658, - "grad_norm": 0.0008467985899187624, - "learning_rate": 0.00019999980631476687, - "loss": 46.0, - "step": 8204 - }, - { - "epoch": 0.6273295487126556, - "grad_norm": 0.004121803212910891, - "learning_rate": 0.0001999998062674891, - "loss": 46.0, - "step": 8205 - }, - { - "epoch": 0.6274060056960452, - "grad_norm": 0.002115719486027956, - "learning_rate": 0.00019999980622020558, - "loss": 46.0, - "step": 8206 - }, - { - "epoch": 0.627482462679435, - "grad_norm": 0.0015576917212456465, - "learning_rate": 0.00019999980617291626, - "loss": 46.0, - "step": 8207 - }, - { - "epoch": 0.6275589196628247, - "grad_norm": 0.0008969299378804862, - "learning_rate": 0.0001999998061256212, - "loss": 46.0, - "step": 8208 - }, - { - "epoch": 0.6276353766462144, - "grad_norm": 0.000680507393553853, - "learning_rate": 0.00019999980607832035, - "loss": 46.0, - "step": 8209 - }, - { - "epoch": 0.6277118336296041, - "grad_norm": 0.0009740458917804062, - "learning_rate": 0.00019999980603101376, - "loss": 46.0, - "step": 8210 - }, - { - "epoch": 0.6277882906129939, - "grad_norm": 0.0013327578781172633, - "learning_rate": 0.00019999980598370138, - "loss": 46.0, - "step": 8211 - }, - { - "epoch": 0.6278647475963836, - "grad_norm": 0.01327575370669365, - "learning_rate": 0.00019999980593638324, - "loss": 46.0, - "step": 8212 - }, - { - "epoch": 0.6279412045797733, - "grad_norm": 0.0007239340338855982, - "learning_rate": 0.0001999998058890593, - "loss": 46.0, - "step": 8213 - }, - { - "epoch": 0.6280176615631631, - "grad_norm": 0.0015642091166228056, - "learning_rate": 0.0001999998058417296, - "loss": 46.0, - "step": 8214 - }, - { - "epoch": 0.6280941185465527, - "grad_norm": 0.0007748593343421817, - "learning_rate": 0.00019999980579439415, - "loss": 46.0, - "step": 8215 - }, - { - "epoch": 0.6281705755299425, - "grad_norm": 0.007889986969530582, - "learning_rate": 0.00019999980574705295, - "loss": 46.0, - "step": 8216 - }, - { - "epoch": 0.6282470325133321, - "grad_norm": 0.0004565949202515185, - "learning_rate": 0.00019999980569970592, - "loss": 46.0, - "step": 8217 - }, - { - "epoch": 0.6283234894967219, - "grad_norm": 0.0015378898242488503, - "learning_rate": 0.0001999998056523532, - "loss": 46.0, - "step": 8218 - }, - { - "epoch": 0.6283999464801117, - "grad_norm": 0.000738327915314585, - "learning_rate": 0.00019999980560499463, - "loss": 46.0, - "step": 8219 - }, - { - "epoch": 0.6284764034635013, - "grad_norm": 0.0005164701724424958, - "learning_rate": 0.00019999980555763033, - "loss": 46.0, - "step": 8220 - }, - { - "epoch": 0.6285528604468911, - "grad_norm": 0.0018236987525597215, - "learning_rate": 0.00019999980551026027, - "loss": 46.0, - "step": 8221 - }, - { - "epoch": 0.6286293174302808, - "grad_norm": 0.000812305836006999, - "learning_rate": 0.00019999980546288443, - "loss": 46.0, - "step": 8222 - }, - { - "epoch": 0.6287057744136705, - "grad_norm": 0.0007858831668272614, - "learning_rate": 0.00019999980541550284, - "loss": 46.0, - "step": 8223 - }, - { - "epoch": 0.6287822313970602, - "grad_norm": 0.00037164639797993004, - "learning_rate": 0.00019999980536811545, - "loss": 46.0, - "step": 8224 - }, - { - "epoch": 0.62885868838045, - "grad_norm": 0.0006847066688351333, - "learning_rate": 0.0001999998053207223, - "loss": 46.0, - "step": 8225 - }, - { - "epoch": 0.6289351453638397, - "grad_norm": 0.002156647387892008, - "learning_rate": 0.00019999980527332336, - "loss": 46.0, - "step": 8226 - }, - { - "epoch": 0.6290116023472294, - "grad_norm": 0.0020973009523004293, - "learning_rate": 0.00019999980522591865, - "loss": 46.0, - "step": 8227 - }, - { - "epoch": 0.6290880593306191, - "grad_norm": 0.0009336823131889105, - "learning_rate": 0.00019999980517850823, - "loss": 46.0, - "step": 8228 - }, - { - "epoch": 0.6291645163140088, - "grad_norm": 0.002602536929771304, - "learning_rate": 0.00019999980513109197, - "loss": 46.0, - "step": 8229 - }, - { - "epoch": 0.6292409732973986, - "grad_norm": 0.0010431433329358697, - "learning_rate": 0.00019999980508367, - "loss": 46.0, - "step": 8230 - }, - { - "epoch": 0.6293174302807882, - "grad_norm": 0.002886660862714052, - "learning_rate": 0.0001999998050362422, - "loss": 46.0, - "step": 8231 - }, - { - "epoch": 0.629393887264178, - "grad_norm": 0.0011985368328168988, - "learning_rate": 0.00019999980498880868, - "loss": 46.0, - "step": 8232 - }, - { - "epoch": 0.6294703442475678, - "grad_norm": 0.004041264299303293, - "learning_rate": 0.00019999980494136938, - "loss": 46.0, - "step": 8233 - }, - { - "epoch": 0.6295468012309574, - "grad_norm": 0.0075932457111775875, - "learning_rate": 0.00019999980489392432, - "loss": 46.0, - "step": 8234 - }, - { - "epoch": 0.6296232582143472, - "grad_norm": 0.0013405568897724152, - "learning_rate": 0.00019999980484647348, - "loss": 46.0, - "step": 8235 - }, - { - "epoch": 0.6296997151977368, - "grad_norm": 0.001531782210804522, - "learning_rate": 0.00019999980479901687, - "loss": 46.0, - "step": 8236 - }, - { - "epoch": 0.6297761721811266, - "grad_norm": 0.0022413202095776796, - "learning_rate": 0.00019999980475155445, - "loss": 46.0, - "step": 8237 - }, - { - "epoch": 0.6298526291645163, - "grad_norm": 0.0007129863370209932, - "learning_rate": 0.0001999998047040863, - "loss": 46.0, - "step": 8238 - }, - { - "epoch": 0.629929086147906, - "grad_norm": 0.0027562961913645267, - "learning_rate": 0.00019999980465661239, - "loss": 46.0, - "step": 8239 - }, - { - "epoch": 0.6300055431312958, - "grad_norm": 0.002757111331447959, - "learning_rate": 0.0001999998046091327, - "loss": 46.0, - "step": 8240 - }, - { - "epoch": 0.6300820001146855, - "grad_norm": 0.0022866460494697094, - "learning_rate": 0.00019999980456164723, - "loss": 46.0, - "step": 8241 - }, - { - "epoch": 0.6301584570980752, - "grad_norm": 0.0006270198500715196, - "learning_rate": 0.000199999804514156, - "loss": 46.0, - "step": 8242 - }, - { - "epoch": 0.6302349140814649, - "grad_norm": 0.0008527711615897715, - "learning_rate": 0.000199999804466659, - "loss": 46.0, - "step": 8243 - }, - { - "epoch": 0.6303113710648547, - "grad_norm": 0.0011923557613044977, - "learning_rate": 0.00019999980441915623, - "loss": 46.0, - "step": 8244 - }, - { - "epoch": 0.6303878280482443, - "grad_norm": 0.0007259712438099086, - "learning_rate": 0.00019999980437164768, - "loss": 46.0, - "step": 8245 - }, - { - "epoch": 0.6304642850316341, - "grad_norm": 0.0030345323029905558, - "learning_rate": 0.00019999980432413339, - "loss": 46.0, - "step": 8246 - }, - { - "epoch": 0.6305407420150237, - "grad_norm": 0.0038378837052732706, - "learning_rate": 0.00019999980427661332, - "loss": 46.0, - "step": 8247 - }, - { - "epoch": 0.6306171989984135, - "grad_norm": 0.002348220907151699, - "learning_rate": 0.00019999980422908745, - "loss": 46.0, - "step": 8248 - }, - { - "epoch": 0.6306936559818033, - "grad_norm": 0.0005703881615772843, - "learning_rate": 0.00019999980418155584, - "loss": 46.0, - "step": 8249 - }, - { - "epoch": 0.6307701129651929, - "grad_norm": 0.0022459183819592, - "learning_rate": 0.00019999980413401845, - "loss": 46.0, - "step": 8250 - }, - { - "epoch": 0.6308465699485827, - "grad_norm": 0.001081667491234839, - "learning_rate": 0.0001999998040864753, - "loss": 46.0, - "step": 8251 - }, - { - "epoch": 0.6309230269319724, - "grad_norm": 0.0026280456222593784, - "learning_rate": 0.0001999998040389264, - "loss": 46.0, - "step": 8252 - }, - { - "epoch": 0.6309994839153621, - "grad_norm": 0.0006093858974054456, - "learning_rate": 0.00019999980399137165, - "loss": 46.0, - "step": 8253 - }, - { - "epoch": 0.6310759408987519, - "grad_norm": 0.0010126649867743254, - "learning_rate": 0.0001999998039438112, - "loss": 46.0, - "step": 8254 - }, - { - "epoch": 0.6311523978821416, - "grad_norm": 0.0009760482935234904, - "learning_rate": 0.00019999980389624498, - "loss": 46.0, - "step": 8255 - }, - { - "epoch": 0.6312288548655313, - "grad_norm": 0.00223044422455132, - "learning_rate": 0.00019999980384867295, - "loss": 46.0, - "step": 8256 - }, - { - "epoch": 0.631305311848921, - "grad_norm": 0.0011948707979172468, - "learning_rate": 0.0001999998038010952, - "loss": 46.0, - "step": 8257 - }, - { - "epoch": 0.6313817688323107, - "grad_norm": 0.0007559588411822915, - "learning_rate": 0.00019999980375351166, - "loss": 46.0, - "step": 8258 - }, - { - "epoch": 0.6314582258157004, - "grad_norm": 0.0055906991474330425, - "learning_rate": 0.00019999980370592234, - "loss": 46.0, - "step": 8259 - }, - { - "epoch": 0.6315346827990902, - "grad_norm": 0.0008051712648011744, - "learning_rate": 0.00019999980365832727, - "loss": 46.0, - "step": 8260 - }, - { - "epoch": 0.6316111397824798, - "grad_norm": 0.0005880105891264975, - "learning_rate": 0.0001999998036107264, - "loss": 46.0, - "step": 8261 - }, - { - "epoch": 0.6316875967658696, - "grad_norm": 0.0015040229773148894, - "learning_rate": 0.0001999998035631198, - "loss": 46.0, - "step": 8262 - }, - { - "epoch": 0.6317640537492594, - "grad_norm": 0.0010844470234587789, - "learning_rate": 0.00019999980351550743, - "loss": 46.0, - "step": 8263 - }, - { - "epoch": 0.631840510732649, - "grad_norm": 0.0018527957145124674, - "learning_rate": 0.00019999980346788925, - "loss": 46.0, - "step": 8264 - }, - { - "epoch": 0.6319169677160388, - "grad_norm": 0.0013868644600734115, - "learning_rate": 0.00019999980342026531, - "loss": 46.0, - "step": 8265 - }, - { - "epoch": 0.6319934246994284, - "grad_norm": 0.0009188971016556025, - "learning_rate": 0.0001999998033726356, - "loss": 46.0, - "step": 8266 - }, - { - "epoch": 0.6320698816828182, - "grad_norm": 0.0010394947603344917, - "learning_rate": 0.00019999980332500016, - "loss": 46.0, - "step": 8267 - }, - { - "epoch": 0.632146338666208, - "grad_norm": 0.0007365021738223732, - "learning_rate": 0.00019999980327735893, - "loss": 46.0, - "step": 8268 - }, - { - "epoch": 0.6322227956495976, - "grad_norm": 0.0018793733324855566, - "learning_rate": 0.0001999998032297119, - "loss": 46.0, - "step": 8269 - }, - { - "epoch": 0.6322992526329874, - "grad_norm": 0.0008051790064200759, - "learning_rate": 0.00019999980318205913, - "loss": 46.0, - "step": 8270 - }, - { - "epoch": 0.6323757096163771, - "grad_norm": 0.000983710866421461, - "learning_rate": 0.00019999980313440059, - "loss": 46.0, - "step": 8271 - }, - { - "epoch": 0.6324521665997668, - "grad_norm": 0.0017727516824379563, - "learning_rate": 0.00019999980308673627, - "loss": 46.0, - "step": 8272 - }, - { - "epoch": 0.6325286235831565, - "grad_norm": 0.0009731962345540524, - "learning_rate": 0.00019999980303906617, - "loss": 46.0, - "step": 8273 - }, - { - "epoch": 0.6326050805665463, - "grad_norm": 0.0012013515224680305, - "learning_rate": 0.00019999980299139033, - "loss": 46.0, - "step": 8274 - }, - { - "epoch": 0.632681537549936, - "grad_norm": 0.0020780726335942745, - "learning_rate": 0.00019999980294370872, - "loss": 46.0, - "step": 8275 - }, - { - "epoch": 0.6327579945333257, - "grad_norm": 0.008914541453123093, - "learning_rate": 0.0001999998028960213, - "loss": 46.0, - "step": 8276 - }, - { - "epoch": 0.6328344515167154, - "grad_norm": 0.0010731208603829145, - "learning_rate": 0.00019999980284832818, - "loss": 46.0, - "step": 8277 - }, - { - "epoch": 0.6329109085001051, - "grad_norm": 0.00032802208443172276, - "learning_rate": 0.00019999980280062922, - "loss": 46.0, - "step": 8278 - }, - { - "epoch": 0.6329873654834949, - "grad_norm": 0.004165690392255783, - "learning_rate": 0.00019999980275292454, - "loss": 46.0, - "step": 8279 - }, - { - "epoch": 0.6330638224668845, - "grad_norm": 0.0015048672212287784, - "learning_rate": 0.0001999998027052141, - "loss": 46.0, - "step": 8280 - }, - { - "epoch": 0.6331402794502743, - "grad_norm": 0.0012001678114756942, - "learning_rate": 0.00019999980265749783, - "loss": 46.0, - "step": 8281 - }, - { - "epoch": 0.633216736433664, - "grad_norm": 0.0010596668580546975, - "learning_rate": 0.00019999980260977584, - "loss": 46.0, - "step": 8282 - }, - { - "epoch": 0.6332931934170537, - "grad_norm": 0.0011254140408709645, - "learning_rate": 0.000199999802562048, - "loss": 46.0, - "step": 8283 - }, - { - "epoch": 0.6333696504004435, - "grad_norm": 0.0038946825079619884, - "learning_rate": 0.0001999998025143145, - "loss": 46.0, - "step": 8284 - }, - { - "epoch": 0.6334461073838332, - "grad_norm": 0.000450453459052369, - "learning_rate": 0.00019999980246657517, - "loss": 46.0, - "step": 8285 - }, - { - "epoch": 0.6335225643672229, - "grad_norm": 0.0007265485473908484, - "learning_rate": 0.0001999998024188301, - "loss": 46.0, - "step": 8286 - }, - { - "epoch": 0.6335990213506126, - "grad_norm": 0.001173571334220469, - "learning_rate": 0.0001999998023710792, - "loss": 46.0, - "step": 8287 - }, - { - "epoch": 0.6336754783340023, - "grad_norm": 0.000545788963790983, - "learning_rate": 0.0001999998023233226, - "loss": 46.0, - "step": 8288 - }, - { - "epoch": 0.633751935317392, - "grad_norm": 0.0032594939693808556, - "learning_rate": 0.00019999980227556022, - "loss": 46.0, - "step": 8289 - }, - { - "epoch": 0.6338283923007818, - "grad_norm": 0.003350586863234639, - "learning_rate": 0.00019999980222779206, - "loss": 46.0, - "step": 8290 - }, - { - "epoch": 0.6339048492841715, - "grad_norm": 0.0007236524834297597, - "learning_rate": 0.0001999998021800181, - "loss": 46.0, - "step": 8291 - }, - { - "epoch": 0.6339813062675612, - "grad_norm": 0.0021102663595229387, - "learning_rate": 0.00019999980213223842, - "loss": 46.0, - "step": 8292 - }, - { - "epoch": 0.634057763250951, - "grad_norm": 0.0012011940125375986, - "learning_rate": 0.00019999980208445294, - "loss": 46.0, - "step": 8293 - }, - { - "epoch": 0.6341342202343406, - "grad_norm": 0.0018167115049436688, - "learning_rate": 0.00019999980203666171, - "loss": 46.0, - "step": 8294 - }, - { - "epoch": 0.6342106772177304, - "grad_norm": 0.0015411480562761426, - "learning_rate": 0.0001999998019888647, - "loss": 46.0, - "step": 8295 - }, - { - "epoch": 0.63428713420112, - "grad_norm": 0.0023441424127668142, - "learning_rate": 0.00019999980194106192, - "loss": 46.0, - "step": 8296 - }, - { - "epoch": 0.6343635911845098, - "grad_norm": 0.0012845075689256191, - "learning_rate": 0.00019999980189325334, - "loss": 46.0, - "step": 8297 - }, - { - "epoch": 0.6344400481678996, - "grad_norm": 0.0010803508339449763, - "learning_rate": 0.00019999980184543903, - "loss": 46.0, - "step": 8298 - }, - { - "epoch": 0.6345165051512892, - "grad_norm": 0.002772475127130747, - "learning_rate": 0.00019999980179761893, - "loss": 46.0, - "step": 8299 - }, - { - "epoch": 0.634592962134679, - "grad_norm": 0.0005241383914835751, - "learning_rate": 0.0001999998017497931, - "loss": 46.0, - "step": 8300 - }, - { - "epoch": 0.6346694191180687, - "grad_norm": 0.001759040867909789, - "learning_rate": 0.00019999980170196143, - "loss": 46.0, - "step": 8301 - }, - { - "epoch": 0.6347458761014584, - "grad_norm": 0.0011967982864007354, - "learning_rate": 0.00019999980165412407, - "loss": 46.0, - "step": 8302 - }, - { - "epoch": 0.6348223330848481, - "grad_norm": 0.0032175721134990454, - "learning_rate": 0.00019999980160628089, - "loss": 46.0, - "step": 8303 - }, - { - "epoch": 0.6348987900682379, - "grad_norm": 0.0010704339947551489, - "learning_rate": 0.00019999980155843195, - "loss": 46.0, - "step": 8304 - }, - { - "epoch": 0.6349752470516276, - "grad_norm": 0.01669190265238285, - "learning_rate": 0.00019999980151057725, - "loss": 46.0, - "step": 8305 - }, - { - "epoch": 0.6350517040350173, - "grad_norm": 0.008821866475045681, - "learning_rate": 0.00019999980146271677, - "loss": 46.0, - "step": 8306 - }, - { - "epoch": 0.635128161018407, - "grad_norm": 0.001014237990602851, - "learning_rate": 0.00019999980141485052, - "loss": 46.0, - "step": 8307 - }, - { - "epoch": 0.6352046180017967, - "grad_norm": 0.000294274854240939, - "learning_rate": 0.00019999980136697852, - "loss": 46.0, - "step": 8308 - }, - { - "epoch": 0.6352810749851865, - "grad_norm": 0.004478603135794401, - "learning_rate": 0.00019999980131910072, - "loss": 46.0, - "step": 8309 - }, - { - "epoch": 0.6353575319685761, - "grad_norm": 0.002247787779197097, - "learning_rate": 0.00019999980127121718, - "loss": 46.0, - "step": 8310 - }, - { - "epoch": 0.6354339889519659, - "grad_norm": 0.0011244042543694377, - "learning_rate": 0.00019999980122332786, - "loss": 46.0, - "step": 8311 - }, - { - "epoch": 0.6355104459353557, - "grad_norm": 0.0011448069708421826, - "learning_rate": 0.00019999980117543277, - "loss": 46.0, - "step": 8312 - }, - { - "epoch": 0.6355869029187453, - "grad_norm": 0.0018306816928088665, - "learning_rate": 0.00019999980112753193, - "loss": 46.0, - "step": 8313 - }, - { - "epoch": 0.6356633599021351, - "grad_norm": 0.0006840021233074367, - "learning_rate": 0.00019999980107962526, - "loss": 46.0, - "step": 8314 - }, - { - "epoch": 0.6357398168855248, - "grad_norm": 0.0007227238384075463, - "learning_rate": 0.00019999980103171288, - "loss": 46.0, - "step": 8315 - }, - { - "epoch": 0.6358162738689145, - "grad_norm": 0.0007279314449988306, - "learning_rate": 0.00019999980098379472, - "loss": 46.0, - "step": 8316 - }, - { - "epoch": 0.6358927308523042, - "grad_norm": 0.004097754135727882, - "learning_rate": 0.00019999980093587076, - "loss": 46.0, - "step": 8317 - }, - { - "epoch": 0.6359691878356939, - "grad_norm": 0.001811531838029623, - "learning_rate": 0.00019999980088794106, - "loss": 46.0, - "step": 8318 - }, - { - "epoch": 0.6360456448190837, - "grad_norm": 0.0006689854781143367, - "learning_rate": 0.00019999980084000558, - "loss": 46.0, - "step": 8319 - }, - { - "epoch": 0.6361221018024734, - "grad_norm": 0.005581785459071398, - "learning_rate": 0.00019999980079206433, - "loss": 46.0, - "step": 8320 - }, - { - "epoch": 0.6361985587858631, - "grad_norm": 0.0024075352121144533, - "learning_rate": 0.00019999980074411733, - "loss": 46.0, - "step": 8321 - }, - { - "epoch": 0.6362750157692528, - "grad_norm": 0.005853780545294285, - "learning_rate": 0.00019999980069616453, - "loss": 46.0, - "step": 8322 - }, - { - "epoch": 0.6363514727526426, - "grad_norm": 0.0012639662018045783, - "learning_rate": 0.00019999980064820596, - "loss": 46.0, - "step": 8323 - }, - { - "epoch": 0.6364279297360322, - "grad_norm": 0.0008265635115094483, - "learning_rate": 0.00019999980060024164, - "loss": 46.0, - "step": 8324 - }, - { - "epoch": 0.636504386719422, - "grad_norm": 0.0022197996731847525, - "learning_rate": 0.00019999980055227155, - "loss": 46.0, - "step": 8325 - }, - { - "epoch": 0.6365808437028118, - "grad_norm": 0.0010472809663042426, - "learning_rate": 0.0001999998005042957, - "loss": 46.0, - "step": 8326 - }, - { - "epoch": 0.6366573006862014, - "grad_norm": 0.0014795337338000536, - "learning_rate": 0.00019999980045631405, - "loss": 46.0, - "step": 8327 - }, - { - "epoch": 0.6367337576695912, - "grad_norm": 0.0063463435508310795, - "learning_rate": 0.00019999980040832663, - "loss": 46.0, - "step": 8328 - }, - { - "epoch": 0.6368102146529808, - "grad_norm": 0.0015335968928411603, - "learning_rate": 0.00019999980036033348, - "loss": 46.0, - "step": 8329 - }, - { - "epoch": 0.6368866716363706, - "grad_norm": 0.00292824674397707, - "learning_rate": 0.00019999980031233455, - "loss": 46.0, - "step": 8330 - }, - { - "epoch": 0.6369631286197603, - "grad_norm": 0.002795613370835781, - "learning_rate": 0.00019999980026432984, - "loss": 46.0, - "step": 8331 - }, - { - "epoch": 0.63703958560315, - "grad_norm": 0.002934791147708893, - "learning_rate": 0.00019999980021631934, - "loss": 46.0, - "step": 8332 - }, - { - "epoch": 0.6371160425865398, - "grad_norm": 0.00047825308865867555, - "learning_rate": 0.00019999980016830308, - "loss": 46.0, - "step": 8333 - }, - { - "epoch": 0.6371924995699295, - "grad_norm": 0.0004855692677665502, - "learning_rate": 0.00019999980012028106, - "loss": 46.0, - "step": 8334 - }, - { - "epoch": 0.6372689565533192, - "grad_norm": 0.0018347181612625718, - "learning_rate": 0.0001999998000722533, - "loss": 46.0, - "step": 8335 - }, - { - "epoch": 0.6373454135367089, - "grad_norm": 0.0009170413250103593, - "learning_rate": 0.00019999980002421972, - "loss": 46.0, - "step": 8336 - }, - { - "epoch": 0.6374218705200986, - "grad_norm": 0.0011994520900771022, - "learning_rate": 0.0001999997999761804, - "loss": 46.0, - "step": 8337 - }, - { - "epoch": 0.6374983275034883, - "grad_norm": 0.0035632005892693996, - "learning_rate": 0.00019999979992813528, - "loss": 46.0, - "step": 8338 - }, - { - "epoch": 0.6375747844868781, - "grad_norm": 0.0035316585563123226, - "learning_rate": 0.00019999979988008444, - "loss": 46.0, - "step": 8339 - }, - { - "epoch": 0.6376512414702677, - "grad_norm": 0.001511100446805358, - "learning_rate": 0.00019999979983202778, - "loss": 46.0, - "step": 8340 - }, - { - "epoch": 0.6377276984536575, - "grad_norm": 0.0014686223585158587, - "learning_rate": 0.0001999997997839654, - "loss": 46.0, - "step": 8341 - }, - { - "epoch": 0.6378041554370473, - "grad_norm": 0.0008743414655327797, - "learning_rate": 0.0001999997997358972, - "loss": 46.0, - "step": 8342 - }, - { - "epoch": 0.6378806124204369, - "grad_norm": 0.0006099130841903389, - "learning_rate": 0.00019999979968782325, - "loss": 46.0, - "step": 8343 - }, - { - "epoch": 0.6379570694038267, - "grad_norm": 0.001428297022357583, - "learning_rate": 0.00019999979963974355, - "loss": 46.0, - "step": 8344 - }, - { - "epoch": 0.6380335263872164, - "grad_norm": 0.0022314409725368023, - "learning_rate": 0.00019999979959165807, - "loss": 46.0, - "step": 8345 - }, - { - "epoch": 0.6381099833706061, - "grad_norm": 0.0009802368003875017, - "learning_rate": 0.00019999979954356682, - "loss": 46.0, - "step": 8346 - }, - { - "epoch": 0.6381864403539959, - "grad_norm": 0.0011869882000610232, - "learning_rate": 0.0001999997994954698, - "loss": 46.0, - "step": 8347 - }, - { - "epoch": 0.6382628973373855, - "grad_norm": 0.002710545901209116, - "learning_rate": 0.000199999799447367, - "loss": 46.0, - "step": 8348 - }, - { - "epoch": 0.6383393543207753, - "grad_norm": 0.0013702786527574062, - "learning_rate": 0.00019999979939925843, - "loss": 46.0, - "step": 8349 - }, - { - "epoch": 0.638415811304165, - "grad_norm": 0.005586983636021614, - "learning_rate": 0.00019999979935114414, - "loss": 46.0, - "step": 8350 - }, - { - "epoch": 0.6384922682875547, - "grad_norm": 0.001324632903560996, - "learning_rate": 0.00019999979930302402, - "loss": 46.0, - "step": 8351 - }, - { - "epoch": 0.6385687252709444, - "grad_norm": 0.0011119857663288713, - "learning_rate": 0.00019999979925489813, - "loss": 46.0, - "step": 8352 - }, - { - "epoch": 0.6386451822543342, - "grad_norm": 0.001104339025914669, - "learning_rate": 0.00019999979920676652, - "loss": 46.0, - "step": 8353 - }, - { - "epoch": 0.6387216392377238, - "grad_norm": 0.002143976278603077, - "learning_rate": 0.0001999997991586291, - "loss": 46.0, - "step": 8354 - }, - { - "epoch": 0.6387980962211136, - "grad_norm": 0.006432107649743557, - "learning_rate": 0.00019999979911048592, - "loss": 46.0, - "step": 8355 - }, - { - "epoch": 0.6388745532045034, - "grad_norm": 0.00257153925485909, - "learning_rate": 0.00019999979906233696, - "loss": 46.0, - "step": 8356 - }, - { - "epoch": 0.638951010187893, - "grad_norm": 0.0016004828503355384, - "learning_rate": 0.00019999979901418223, - "loss": 46.0, - "step": 8357 - }, - { - "epoch": 0.6390274671712828, - "grad_norm": 0.0038370173424482346, - "learning_rate": 0.00019999979896602175, - "loss": 46.0, - "step": 8358 - }, - { - "epoch": 0.6391039241546724, - "grad_norm": 0.0010702681029215455, - "learning_rate": 0.00019999979891785553, - "loss": 46.0, - "step": 8359 - }, - { - "epoch": 0.6391803811380622, - "grad_norm": 0.0013894864823669195, - "learning_rate": 0.00019999979886968348, - "loss": 46.0, - "step": 8360 - }, - { - "epoch": 0.639256838121452, - "grad_norm": 0.000811832258477807, - "learning_rate": 0.0001999997988215057, - "loss": 46.0, - "step": 8361 - }, - { - "epoch": 0.6393332951048416, - "grad_norm": 0.003757540602236986, - "learning_rate": 0.0001999997987733221, - "loss": 46.0, - "step": 8362 - }, - { - "epoch": 0.6394097520882314, - "grad_norm": 0.00047705232282169163, - "learning_rate": 0.0001999997987251328, - "loss": 46.0, - "step": 8363 - }, - { - "epoch": 0.6394862090716211, - "grad_norm": 0.0007783086621202528, - "learning_rate": 0.0001999997986769377, - "loss": 46.0, - "step": 8364 - }, - { - "epoch": 0.6395626660550108, - "grad_norm": 0.0009130511316470802, - "learning_rate": 0.0001999997986287368, - "loss": 46.0, - "step": 8365 - }, - { - "epoch": 0.6396391230384005, - "grad_norm": 0.0010191791225224733, - "learning_rate": 0.00019999979858053018, - "loss": 46.0, - "step": 8366 - }, - { - "epoch": 0.6397155800217902, - "grad_norm": 0.0006976840668357909, - "learning_rate": 0.00019999979853231776, - "loss": 46.0, - "step": 8367 - }, - { - "epoch": 0.63979203700518, - "grad_norm": 0.0009047634666785598, - "learning_rate": 0.00019999979848409958, - "loss": 46.0, - "step": 8368 - }, - { - "epoch": 0.6398684939885697, - "grad_norm": 0.002331351861357689, - "learning_rate": 0.00019999979843587565, - "loss": 46.0, - "step": 8369 - }, - { - "epoch": 0.6399449509719594, - "grad_norm": 0.008381582796573639, - "learning_rate": 0.00019999979838764592, - "loss": 46.0, - "step": 8370 - }, - { - "epoch": 0.6400214079553491, - "grad_norm": 0.001435493933968246, - "learning_rate": 0.00019999979833941042, - "loss": 46.0, - "step": 8371 - }, - { - "epoch": 0.6400978649387389, - "grad_norm": 0.0012833019718527794, - "learning_rate": 0.00019999979829116916, - "loss": 46.0, - "step": 8372 - }, - { - "epoch": 0.6401743219221285, - "grad_norm": 0.0005923905991949141, - "learning_rate": 0.00019999979824292211, - "loss": 46.0, - "step": 8373 - }, - { - "epoch": 0.6402507789055183, - "grad_norm": 0.0009728985605761409, - "learning_rate": 0.00019999979819466934, - "loss": 46.0, - "step": 8374 - }, - { - "epoch": 0.640327235888908, - "grad_norm": 0.0011056882794946432, - "learning_rate": 0.00019999979814641077, - "loss": 46.0, - "step": 8375 - }, - { - "epoch": 0.6404036928722977, - "grad_norm": 0.001359409885481, - "learning_rate": 0.00019999979809814643, - "loss": 46.0, - "step": 8376 - }, - { - "epoch": 0.6404801498556875, - "grad_norm": 0.002422122750431299, - "learning_rate": 0.0001999997980498763, - "loss": 46.0, - "step": 8377 - }, - { - "epoch": 0.6405566068390771, - "grad_norm": 0.00031479887547902763, - "learning_rate": 0.00019999979800160045, - "loss": 46.0, - "step": 8378 - }, - { - "epoch": 0.6406330638224669, - "grad_norm": 0.011981228366494179, - "learning_rate": 0.0001999997979533188, - "loss": 46.0, - "step": 8379 - }, - { - "epoch": 0.6407095208058566, - "grad_norm": 0.0006490522646345198, - "learning_rate": 0.00019999979790503137, - "loss": 46.0, - "step": 8380 - }, - { - "epoch": 0.6407859777892463, - "grad_norm": 0.0013353211106732488, - "learning_rate": 0.00019999979785673822, - "loss": 46.0, - "step": 8381 - }, - { - "epoch": 0.640862434772636, - "grad_norm": 0.0013245070585981011, - "learning_rate": 0.00019999979780843926, - "loss": 46.0, - "step": 8382 - }, - { - "epoch": 0.6409388917560258, - "grad_norm": 0.001455620746128261, - "learning_rate": 0.00019999979776013453, - "loss": 46.0, - "step": 8383 - }, - { - "epoch": 0.6410153487394155, - "grad_norm": 0.000765423581469804, - "learning_rate": 0.00019999979771182405, - "loss": 46.0, - "step": 8384 - }, - { - "epoch": 0.6410918057228052, - "grad_norm": 0.0006489277584478259, - "learning_rate": 0.00019999979766350778, - "loss": 46.0, - "step": 8385 - }, - { - "epoch": 0.641168262706195, - "grad_norm": 0.0006296727806329727, - "learning_rate": 0.00019999979761518578, - "loss": 46.0, - "step": 8386 - }, - { - "epoch": 0.6412447196895846, - "grad_norm": 0.012314872816205025, - "learning_rate": 0.00019999979756685793, - "loss": 46.0, - "step": 8387 - }, - { - "epoch": 0.6413211766729744, - "grad_norm": 0.0015245723770931363, - "learning_rate": 0.00019999979751852439, - "loss": 46.0, - "step": 8388 - }, - { - "epoch": 0.641397633656364, - "grad_norm": 0.00026419066125527024, - "learning_rate": 0.00019999979747018504, - "loss": 46.0, - "step": 8389 - }, - { - "epoch": 0.6414740906397538, - "grad_norm": 0.0019564004614949226, - "learning_rate": 0.00019999979742183995, - "loss": 46.0, - "step": 8390 - }, - { - "epoch": 0.6415505476231436, - "grad_norm": 0.0019270264310762286, - "learning_rate": 0.00019999979737348906, - "loss": 46.0, - "step": 8391 - }, - { - "epoch": 0.6416270046065332, - "grad_norm": 0.0012498401338234544, - "learning_rate": 0.0001999997973251324, - "loss": 46.0, - "step": 8392 - }, - { - "epoch": 0.641703461589923, - "grad_norm": 0.0017809005221351981, - "learning_rate": 0.00019999979727677, - "loss": 46.0, - "step": 8393 - }, - { - "epoch": 0.6417799185733127, - "grad_norm": 0.0014072373742237687, - "learning_rate": 0.0001999997972284018, - "loss": 46.0, - "step": 8394 - }, - { - "epoch": 0.6418563755567024, - "grad_norm": 0.001852648681961, - "learning_rate": 0.00019999979718002785, - "loss": 46.0, - "step": 8395 - }, - { - "epoch": 0.6419328325400921, - "grad_norm": 0.0007831387338228524, - "learning_rate": 0.00019999979713164812, - "loss": 46.0, - "step": 8396 - }, - { - "epoch": 0.6420092895234818, - "grad_norm": 0.0017284360947087407, - "learning_rate": 0.00019999979708326264, - "loss": 46.0, - "step": 8397 - }, - { - "epoch": 0.6420857465068716, - "grad_norm": 0.0005251671536825597, - "learning_rate": 0.00019999979703487136, - "loss": 46.0, - "step": 8398 - }, - { - "epoch": 0.6421622034902613, - "grad_norm": 0.0015043087769299746, - "learning_rate": 0.00019999979698647434, - "loss": 46.0, - "step": 8399 - }, - { - "epoch": 0.642238660473651, - "grad_norm": 0.01601600833237171, - "learning_rate": 0.00019999979693807155, - "loss": 46.0, - "step": 8400 - }, - { - "epoch": 0.6423151174570407, - "grad_norm": 0.0008529069600626826, - "learning_rate": 0.00019999979688966298, - "loss": 46.0, - "step": 8401 - }, - { - "epoch": 0.6423915744404305, - "grad_norm": 0.0010558547219261527, - "learning_rate": 0.00019999979684124863, - "loss": 46.0, - "step": 8402 - }, - { - "epoch": 0.6424680314238201, - "grad_norm": 0.001270571257919073, - "learning_rate": 0.00019999979679282852, - "loss": 46.0, - "step": 8403 - }, - { - "epoch": 0.6425444884072099, - "grad_norm": 0.0008635505218990147, - "learning_rate": 0.00019999979674440263, - "loss": 46.0, - "step": 8404 - }, - { - "epoch": 0.6426209453905997, - "grad_norm": 0.0009533761767670512, - "learning_rate": 0.000199999796695971, - "loss": 46.0, - "step": 8405 - }, - { - "epoch": 0.6426974023739893, - "grad_norm": 0.001203278312459588, - "learning_rate": 0.00019999979664753355, - "loss": 46.0, - "step": 8406 - }, - { - "epoch": 0.6427738593573791, - "grad_norm": 0.0006614010781049728, - "learning_rate": 0.00019999979659909037, - "loss": 46.0, - "step": 8407 - }, - { - "epoch": 0.6428503163407687, - "grad_norm": 0.0009135161526501179, - "learning_rate": 0.0001999997965506414, - "loss": 46.0, - "step": 8408 - }, - { - "epoch": 0.6429267733241585, - "grad_norm": 0.0011862055398523808, - "learning_rate": 0.00019999979650218668, - "loss": 46.0, - "step": 8409 - }, - { - "epoch": 0.6430032303075482, - "grad_norm": 0.0013963677920401096, - "learning_rate": 0.00019999979645372618, - "loss": 46.0, - "step": 8410 - }, - { - "epoch": 0.6430796872909379, - "grad_norm": 0.0018271836452186108, - "learning_rate": 0.00019999979640525993, - "loss": 46.0, - "step": 8411 - }, - { - "epoch": 0.6431561442743277, - "grad_norm": 0.0003863652527797967, - "learning_rate": 0.0001999997963567879, - "loss": 46.0, - "step": 8412 - }, - { - "epoch": 0.6432326012577174, - "grad_norm": 0.0006500778836198151, - "learning_rate": 0.00019999979630831008, - "loss": 46.0, - "step": 8413 - }, - { - "epoch": 0.6433090582411071, - "grad_norm": 0.01121943537145853, - "learning_rate": 0.0001999997962598265, - "loss": 46.0, - "step": 8414 - }, - { - "epoch": 0.6433855152244968, - "grad_norm": 0.0007760943844914436, - "learning_rate": 0.00019999979621133714, - "loss": 46.0, - "step": 8415 - }, - { - "epoch": 0.6434619722078866, - "grad_norm": 0.0007321322336792946, - "learning_rate": 0.00019999979616284203, - "loss": 46.0, - "step": 8416 - }, - { - "epoch": 0.6435384291912762, - "grad_norm": 0.0016459973994642496, - "learning_rate": 0.00019999979611434117, - "loss": 46.0, - "step": 8417 - }, - { - "epoch": 0.643614886174666, - "grad_norm": 0.01081672590225935, - "learning_rate": 0.0001999997960658345, - "loss": 46.0, - "step": 8418 - }, - { - "epoch": 0.6436913431580557, - "grad_norm": 0.00067540118470788, - "learning_rate": 0.0001999997960173221, - "loss": 46.0, - "step": 8419 - }, - { - "epoch": 0.6437678001414454, - "grad_norm": 0.0011252046097069979, - "learning_rate": 0.00019999979596880388, - "loss": 46.0, - "step": 8420 - }, - { - "epoch": 0.6438442571248352, - "grad_norm": 0.0007321266457438469, - "learning_rate": 0.00019999979592027993, - "loss": 46.0, - "step": 8421 - }, - { - "epoch": 0.6439207141082248, - "grad_norm": 0.0006748631130903959, - "learning_rate": 0.0001999997958717502, - "loss": 46.0, - "step": 8422 - }, - { - "epoch": 0.6439971710916146, - "grad_norm": 0.0035102139227092266, - "learning_rate": 0.00019999979582321472, - "loss": 46.0, - "step": 8423 - }, - { - "epoch": 0.6440736280750043, - "grad_norm": 0.009357602335512638, - "learning_rate": 0.00019999979577467342, - "loss": 46.0, - "step": 8424 - }, - { - "epoch": 0.644150085058394, - "grad_norm": 0.001554184011183679, - "learning_rate": 0.0001999997957261264, - "loss": 46.0, - "step": 8425 - }, - { - "epoch": 0.6442265420417838, - "grad_norm": 0.0011146211763843894, - "learning_rate": 0.00019999979567757357, - "loss": 46.0, - "step": 8426 - }, - { - "epoch": 0.6443029990251735, - "grad_norm": 0.0004390115791466087, - "learning_rate": 0.00019999979562901498, - "loss": 46.0, - "step": 8427 - }, - { - "epoch": 0.6443794560085632, - "grad_norm": 0.0005093348445370793, - "learning_rate": 0.00019999979558045066, - "loss": 46.0, - "step": 8428 - }, - { - "epoch": 0.6444559129919529, - "grad_norm": 0.0009902993915602565, - "learning_rate": 0.00019999979553188054, - "loss": 46.0, - "step": 8429 - }, - { - "epoch": 0.6445323699753426, - "grad_norm": 0.003410734236240387, - "learning_rate": 0.00019999979548330466, - "loss": 46.0, - "step": 8430 - }, - { - "epoch": 0.6446088269587323, - "grad_norm": 0.001540257129818201, - "learning_rate": 0.000199999795434723, - "loss": 46.0, - "step": 8431 - }, - { - "epoch": 0.6446852839421221, - "grad_norm": 0.0010448395041748881, - "learning_rate": 0.00019999979538613558, - "loss": 46.0, - "step": 8432 - }, - { - "epoch": 0.6447617409255118, - "grad_norm": 0.0009175291052088141, - "learning_rate": 0.00019999979533754237, - "loss": 46.0, - "step": 8433 - }, - { - "epoch": 0.6448381979089015, - "grad_norm": 0.0010905092349275947, - "learning_rate": 0.00019999979528894342, - "loss": 46.0, - "step": 8434 - }, - { - "epoch": 0.6449146548922913, - "grad_norm": 0.0007185473805293441, - "learning_rate": 0.0001999997952403387, - "loss": 46.0, - "step": 8435 - }, - { - "epoch": 0.6449911118756809, - "grad_norm": 0.0008109049522317946, - "learning_rate": 0.0001999997951917282, - "loss": 46.0, - "step": 8436 - }, - { - "epoch": 0.6450675688590707, - "grad_norm": 0.001640122034586966, - "learning_rate": 0.00019999979514311188, - "loss": 46.0, - "step": 8437 - }, - { - "epoch": 0.6451440258424603, - "grad_norm": 0.001342583680525422, - "learning_rate": 0.00019999979509448986, - "loss": 46.0, - "step": 8438 - }, - { - "epoch": 0.6452204828258501, - "grad_norm": 0.0009414669475518167, - "learning_rate": 0.00019999979504586204, - "loss": 46.0, - "step": 8439 - }, - { - "epoch": 0.6452969398092399, - "grad_norm": 0.0004461360222194344, - "learning_rate": 0.00019999979499722847, - "loss": 46.0, - "step": 8440 - }, - { - "epoch": 0.6453733967926295, - "grad_norm": 0.005630668718367815, - "learning_rate": 0.00019999979494858913, - "loss": 46.0, - "step": 8441 - }, - { - "epoch": 0.6454498537760193, - "grad_norm": 0.005121064372360706, - "learning_rate": 0.000199999794899944, - "loss": 46.0, - "step": 8442 - }, - { - "epoch": 0.645526310759409, - "grad_norm": 0.0007692367071285844, - "learning_rate": 0.0001999997948512931, - "loss": 46.0, - "step": 8443 - }, - { - "epoch": 0.6456027677427987, - "grad_norm": 0.000752636871766299, - "learning_rate": 0.00019999979480263646, - "loss": 46.0, - "step": 8444 - }, - { - "epoch": 0.6456792247261884, - "grad_norm": 0.0007875836454331875, - "learning_rate": 0.00019999979475397402, - "loss": 46.0, - "step": 8445 - }, - { - "epoch": 0.6457556817095782, - "grad_norm": 0.0010688817128539085, - "learning_rate": 0.00019999979470530584, - "loss": 46.0, - "step": 8446 - }, - { - "epoch": 0.6458321386929678, - "grad_norm": 0.002124267164617777, - "learning_rate": 0.00019999979465663186, - "loss": 46.0, - "step": 8447 - }, - { - "epoch": 0.6459085956763576, - "grad_norm": 0.0005629121442325413, - "learning_rate": 0.00019999979460795213, - "loss": 46.0, - "step": 8448 - }, - { - "epoch": 0.6459850526597473, - "grad_norm": 0.0009229386341758072, - "learning_rate": 0.0001999997945592666, - "loss": 46.0, - "step": 8449 - }, - { - "epoch": 0.646061509643137, - "grad_norm": 0.000907323497813195, - "learning_rate": 0.00019999979451057533, - "loss": 46.0, - "step": 8450 - }, - { - "epoch": 0.6461379666265268, - "grad_norm": 0.0011599069694057107, - "learning_rate": 0.00019999979446187828, - "loss": 46.0, - "step": 8451 - }, - { - "epoch": 0.6462144236099164, - "grad_norm": 0.0029595019295811653, - "learning_rate": 0.00019999979441317548, - "loss": 46.0, - "step": 8452 - }, - { - "epoch": 0.6462908805933062, - "grad_norm": 0.0013973616296425462, - "learning_rate": 0.00019999979436446689, - "loss": 46.0, - "step": 8453 - }, - { - "epoch": 0.646367337576696, - "grad_norm": 0.0009979789610952139, - "learning_rate": 0.00019999979431575254, - "loss": 46.0, - "step": 8454 - }, - { - "epoch": 0.6464437945600856, - "grad_norm": 0.0008624042966403067, - "learning_rate": 0.0001999997942670324, - "loss": 46.0, - "step": 8455 - }, - { - "epoch": 0.6465202515434754, - "grad_norm": 0.006532927975058556, - "learning_rate": 0.00019999979421830654, - "loss": 46.0, - "step": 8456 - }, - { - "epoch": 0.6465967085268651, - "grad_norm": 0.001143553527072072, - "learning_rate": 0.00019999979416957485, - "loss": 46.0, - "step": 8457 - }, - { - "epoch": 0.6466731655102548, - "grad_norm": 0.00062465516384691, - "learning_rate": 0.00019999979412083744, - "loss": 46.0, - "step": 8458 - }, - { - "epoch": 0.6467496224936445, - "grad_norm": 0.0025251784827560186, - "learning_rate": 0.00019999979407209426, - "loss": 46.0, - "step": 8459 - }, - { - "epoch": 0.6468260794770342, - "grad_norm": 0.0016846662620082498, - "learning_rate": 0.00019999979402334528, - "loss": 46.0, - "step": 8460 - }, - { - "epoch": 0.646902536460424, - "grad_norm": 0.0018301091622561216, - "learning_rate": 0.00019999979397459052, - "loss": 46.0, - "step": 8461 - }, - { - "epoch": 0.6469789934438137, - "grad_norm": 0.0025677981320768595, - "learning_rate": 0.00019999979392583002, - "loss": 46.0, - "step": 8462 - }, - { - "epoch": 0.6470554504272034, - "grad_norm": 0.0011541744461283088, - "learning_rate": 0.00019999979387706374, - "loss": 46.0, - "step": 8463 - }, - { - "epoch": 0.6471319074105931, - "grad_norm": 0.003928376827389002, - "learning_rate": 0.0001999997938282917, - "loss": 46.0, - "step": 8464 - }, - { - "epoch": 0.6472083643939829, - "grad_norm": 0.0005821063532494009, - "learning_rate": 0.0001999997937795139, - "loss": 46.0, - "step": 8465 - }, - { - "epoch": 0.6472848213773725, - "grad_norm": 0.0033516576513648033, - "learning_rate": 0.0001999997937307303, - "loss": 46.0, - "step": 8466 - }, - { - "epoch": 0.6473612783607623, - "grad_norm": 0.00267504109069705, - "learning_rate": 0.00019999979368194094, - "loss": 46.0, - "step": 8467 - }, - { - "epoch": 0.6474377353441519, - "grad_norm": 0.0011814562603831291, - "learning_rate": 0.00019999979363314582, - "loss": 46.0, - "step": 8468 - }, - { - "epoch": 0.6475141923275417, - "grad_norm": 0.001345281838439405, - "learning_rate": 0.0001999997935843449, - "loss": 46.0, - "step": 8469 - }, - { - "epoch": 0.6475906493109315, - "grad_norm": 0.0007618293166160583, - "learning_rate": 0.00019999979353553827, - "loss": 46.0, - "step": 8470 - }, - { - "epoch": 0.6476671062943211, - "grad_norm": 0.010481813922524452, - "learning_rate": 0.00019999979348672584, - "loss": 46.0, - "step": 8471 - }, - { - "epoch": 0.6477435632777109, - "grad_norm": 0.0016783365281298757, - "learning_rate": 0.00019999979343790763, - "loss": 46.0, - "step": 8472 - }, - { - "epoch": 0.6478200202611006, - "grad_norm": 0.0021542124450206757, - "learning_rate": 0.00019999979338908365, - "loss": 46.0, - "step": 8473 - }, - { - "epoch": 0.6478964772444903, - "grad_norm": 0.001814293791539967, - "learning_rate": 0.0001999997933402539, - "loss": 46.0, - "step": 8474 - }, - { - "epoch": 0.64797293422788, - "grad_norm": 0.003838587086647749, - "learning_rate": 0.00019999979329141842, - "loss": 46.0, - "step": 8475 - }, - { - "epoch": 0.6480493912112698, - "grad_norm": 0.0007807528018020093, - "learning_rate": 0.00019999979324257711, - "loss": 46.0, - "step": 8476 - }, - { - "epoch": 0.6481258481946595, - "grad_norm": 0.0013903791550546885, - "learning_rate": 0.00019999979319373007, - "loss": 46.0, - "step": 8477 - }, - { - "epoch": 0.6482023051780492, - "grad_norm": 0.0014009339502081275, - "learning_rate": 0.00019999979314487724, - "loss": 46.0, - "step": 8478 - }, - { - "epoch": 0.6482787621614389, - "grad_norm": 0.0008330792188644409, - "learning_rate": 0.00019999979309601865, - "loss": 46.0, - "step": 8479 - }, - { - "epoch": 0.6483552191448286, - "grad_norm": 0.008371387608349323, - "learning_rate": 0.0001999997930471543, - "loss": 46.0, - "step": 8480 - }, - { - "epoch": 0.6484316761282184, - "grad_norm": 0.0049489461816847324, - "learning_rate": 0.00019999979299828417, - "loss": 46.0, - "step": 8481 - }, - { - "epoch": 0.648508133111608, - "grad_norm": 0.004436735063791275, - "learning_rate": 0.00019999979294940828, - "loss": 46.0, - "step": 8482 - }, - { - "epoch": 0.6485845900949978, - "grad_norm": 0.0019072848372161388, - "learning_rate": 0.00019999979290052662, - "loss": 46.0, - "step": 8483 - }, - { - "epoch": 0.6486610470783876, - "grad_norm": 0.0015642609214410186, - "learning_rate": 0.00019999979285163918, - "loss": 46.0, - "step": 8484 - }, - { - "epoch": 0.6487375040617772, - "grad_norm": 0.002694055438041687, - "learning_rate": 0.00019999979280274597, - "loss": 46.0, - "step": 8485 - }, - { - "epoch": 0.648813961045167, - "grad_norm": 0.0026235098484903574, - "learning_rate": 0.000199999792753847, - "loss": 46.0, - "step": 8486 - }, - { - "epoch": 0.6488904180285567, - "grad_norm": 0.00028394997934810817, - "learning_rate": 0.00019999979270494224, - "loss": 46.0, - "step": 8487 - }, - { - "epoch": 0.6489668750119464, - "grad_norm": 0.0022435025312006474, - "learning_rate": 0.00019999979265603176, - "loss": 46.0, - "step": 8488 - }, - { - "epoch": 0.6490433319953361, - "grad_norm": 0.001888152677565813, - "learning_rate": 0.00019999979260711546, - "loss": 46.0, - "step": 8489 - }, - { - "epoch": 0.6491197889787258, - "grad_norm": 0.004005166701972485, - "learning_rate": 0.00019999979255819338, - "loss": 46.0, - "step": 8490 - }, - { - "epoch": 0.6491962459621156, - "grad_norm": 0.003759379032999277, - "learning_rate": 0.0001999997925092656, - "loss": 46.0, - "step": 8491 - }, - { - "epoch": 0.6492727029455053, - "grad_norm": 0.0005304914084263146, - "learning_rate": 0.00019999979246033197, - "loss": 46.0, - "step": 8492 - }, - { - "epoch": 0.649349159928895, - "grad_norm": 0.0012637015897780657, - "learning_rate": 0.00019999979241139263, - "loss": 46.0, - "step": 8493 - }, - { - "epoch": 0.6494256169122847, - "grad_norm": 0.0011224878253415227, - "learning_rate": 0.00019999979236244749, - "loss": 46.0, - "step": 8494 - }, - { - "epoch": 0.6495020738956745, - "grad_norm": 0.005963853560388088, - "learning_rate": 0.0001999997923134966, - "loss": 46.0, - "step": 8495 - }, - { - "epoch": 0.6495785308790641, - "grad_norm": 0.0006357973325066268, - "learning_rate": 0.00019999979226453994, - "loss": 46.0, - "step": 8496 - }, - { - "epoch": 0.6496549878624539, - "grad_norm": 0.0005483375862240791, - "learning_rate": 0.0001999997922155775, - "loss": 46.0, - "step": 8497 - }, - { - "epoch": 0.6497314448458436, - "grad_norm": 0.001385939191095531, - "learning_rate": 0.0001999997921666093, - "loss": 46.0, - "step": 8498 - }, - { - "epoch": 0.6498079018292333, - "grad_norm": 0.0025952893774956465, - "learning_rate": 0.0001999997921176353, - "loss": 46.0, - "step": 8499 - }, - { - "epoch": 0.6498843588126231, - "grad_norm": 0.0007776801940053701, - "learning_rate": 0.00019999979206865556, - "loss": 46.0, - "step": 8500 - }, - { - "epoch": 0.6499608157960127, - "grad_norm": 0.0005324630183167756, - "learning_rate": 0.00019999979201967003, - "loss": 46.0, - "step": 8501 - }, - { - "epoch": 0.6500372727794025, - "grad_norm": 0.0015339070232585073, - "learning_rate": 0.00019999979197067876, - "loss": 46.0, - "step": 8502 - }, - { - "epoch": 0.6501137297627922, - "grad_norm": 0.0013700415147468448, - "learning_rate": 0.0001999997919216817, - "loss": 46.0, - "step": 8503 - }, - { - "epoch": 0.6501901867461819, - "grad_norm": 0.0020882831886410713, - "learning_rate": 0.00019999979187267886, - "loss": 46.0, - "step": 8504 - }, - { - "epoch": 0.6502666437295717, - "grad_norm": 0.0018810388864949346, - "learning_rate": 0.0001999997918236703, - "loss": 46.0, - "step": 8505 - }, - { - "epoch": 0.6503431007129614, - "grad_norm": 0.0012970364186912775, - "learning_rate": 0.0001999997917746559, - "loss": 46.0, - "step": 8506 - }, - { - "epoch": 0.6504195576963511, - "grad_norm": 0.0009746933355927467, - "learning_rate": 0.00019999979172563576, - "loss": 46.0, - "step": 8507 - }, - { - "epoch": 0.6504960146797408, - "grad_norm": 0.005081235896795988, - "learning_rate": 0.00019999979167660987, - "loss": 46.0, - "step": 8508 - }, - { - "epoch": 0.6505724716631305, - "grad_norm": 0.001871288288384676, - "learning_rate": 0.0001999997916275782, - "loss": 46.0, - "step": 8509 - }, - { - "epoch": 0.6506489286465202, - "grad_norm": 0.002059999154880643, - "learning_rate": 0.00019999979157854077, - "loss": 46.0, - "step": 8510 - }, - { - "epoch": 0.65072538562991, - "grad_norm": 0.002449559746310115, - "learning_rate": 0.00019999979152949754, - "loss": 46.0, - "step": 8511 - }, - { - "epoch": 0.6508018426132997, - "grad_norm": 0.0018699516076594591, - "learning_rate": 0.00019999979148044856, - "loss": 46.0, - "step": 8512 - }, - { - "epoch": 0.6508782995966894, - "grad_norm": 0.0009525343193672597, - "learning_rate": 0.00019999979143139383, - "loss": 46.0, - "step": 8513 - }, - { - "epoch": 0.6509547565800792, - "grad_norm": 0.0006265377742238343, - "learning_rate": 0.0001999997913823333, - "loss": 46.0, - "step": 8514 - }, - { - "epoch": 0.6510312135634688, - "grad_norm": 0.000944600673392415, - "learning_rate": 0.000199999791333267, - "loss": 46.0, - "step": 8515 - }, - { - "epoch": 0.6511076705468586, - "grad_norm": 0.0008313765865750611, - "learning_rate": 0.00019999979128419495, - "loss": 46.0, - "step": 8516 - }, - { - "epoch": 0.6511841275302483, - "grad_norm": 0.0014036077773198485, - "learning_rate": 0.0001999997912351171, - "loss": 46.0, - "step": 8517 - }, - { - "epoch": 0.651260584513638, - "grad_norm": 0.0011903411941602826, - "learning_rate": 0.00019999979118603354, - "loss": 46.0, - "step": 8518 - }, - { - "epoch": 0.6513370414970278, - "grad_norm": 0.0015697837807238102, - "learning_rate": 0.00019999979113694417, - "loss": 46.0, - "step": 8519 - }, - { - "epoch": 0.6514134984804174, - "grad_norm": 0.0015175606822595, - "learning_rate": 0.00019999979108784903, - "loss": 46.0, - "step": 8520 - }, - { - "epoch": 0.6514899554638072, - "grad_norm": 0.0007969412254169583, - "learning_rate": 0.00019999979103874812, - "loss": 46.0, - "step": 8521 - }, - { - "epoch": 0.6515664124471969, - "grad_norm": 0.0008238257141783834, - "learning_rate": 0.00019999979098964143, - "loss": 46.0, - "step": 8522 - }, - { - "epoch": 0.6516428694305866, - "grad_norm": 0.005502438638359308, - "learning_rate": 0.000199999790940529, - "loss": 46.0, - "step": 8523 - }, - { - "epoch": 0.6517193264139763, - "grad_norm": 0.000979154952801764, - "learning_rate": 0.0001999997908914108, - "loss": 46.0, - "step": 8524 - }, - { - "epoch": 0.6517957833973661, - "grad_norm": 0.009840414859354496, - "learning_rate": 0.0001999997908422868, - "loss": 46.0, - "step": 8525 - }, - { - "epoch": 0.6518722403807558, - "grad_norm": 0.0016871714033186436, - "learning_rate": 0.00019999979079315705, - "loss": 46.0, - "step": 8526 - }, - { - "epoch": 0.6519486973641455, - "grad_norm": 0.004615066573023796, - "learning_rate": 0.00019999979074402152, - "loss": 46.0, - "step": 8527 - }, - { - "epoch": 0.6520251543475352, - "grad_norm": 0.0008247296791523695, - "learning_rate": 0.00019999979069488025, - "loss": 46.0, - "step": 8528 - }, - { - "epoch": 0.6521016113309249, - "grad_norm": 0.0007270098431035876, - "learning_rate": 0.00019999979064573318, - "loss": 46.0, - "step": 8529 - }, - { - "epoch": 0.6521780683143147, - "grad_norm": 0.002789735794067383, - "learning_rate": 0.00019999979059658033, - "loss": 46.0, - "step": 8530 - }, - { - "epoch": 0.6522545252977043, - "grad_norm": 0.0013686614111065865, - "learning_rate": 0.00019999979054742176, - "loss": 46.0, - "step": 8531 - }, - { - "epoch": 0.6523309822810941, - "grad_norm": 0.0030278346966952085, - "learning_rate": 0.0001999997904982574, - "loss": 46.0, - "step": 8532 - }, - { - "epoch": 0.6524074392644839, - "grad_norm": 0.0013672002824023366, - "learning_rate": 0.00019999979044908725, - "loss": 46.0, - "step": 8533 - }, - { - "epoch": 0.6524838962478735, - "grad_norm": 0.002181177493184805, - "learning_rate": 0.00019999979039991134, - "loss": 46.0, - "step": 8534 - }, - { - "epoch": 0.6525603532312633, - "grad_norm": 0.0010011212434619665, - "learning_rate": 0.00019999979035072965, - "loss": 46.0, - "step": 8535 - }, - { - "epoch": 0.652636810214653, - "grad_norm": 0.0024739818181842566, - "learning_rate": 0.00019999979030154222, - "loss": 46.0, - "step": 8536 - }, - { - "epoch": 0.6527132671980427, - "grad_norm": 0.0017216530395671725, - "learning_rate": 0.00019999979025234899, - "loss": 46.0, - "step": 8537 - }, - { - "epoch": 0.6527897241814324, - "grad_norm": 0.0016360745066776872, - "learning_rate": 0.00019999979020315, - "loss": 46.0, - "step": 8538 - }, - { - "epoch": 0.6528661811648221, - "grad_norm": 0.0016927840188145638, - "learning_rate": 0.00019999979015394525, - "loss": 46.0, - "step": 8539 - }, - { - "epoch": 0.6529426381482119, - "grad_norm": 0.001046431134454906, - "learning_rate": 0.0001999997901047347, - "loss": 46.0, - "step": 8540 - }, - { - "epoch": 0.6530190951316016, - "grad_norm": 0.0011743007926270366, - "learning_rate": 0.00019999979005551842, - "loss": 46.0, - "step": 8541 - }, - { - "epoch": 0.6530955521149913, - "grad_norm": 0.0008669368689879775, - "learning_rate": 0.00019999979000629635, - "loss": 46.0, - "step": 8542 - }, - { - "epoch": 0.653172009098381, - "grad_norm": 0.00373827968724072, - "learning_rate": 0.00019999978995706853, - "loss": 46.0, - "step": 8543 - }, - { - "epoch": 0.6532484660817708, - "grad_norm": 0.0005279045435599983, - "learning_rate": 0.0001999997899078349, - "loss": 46.0, - "step": 8544 - }, - { - "epoch": 0.6533249230651604, - "grad_norm": 0.0005049153114669025, - "learning_rate": 0.00019999978985859554, - "loss": 46.0, - "step": 8545 - }, - { - "epoch": 0.6534013800485502, - "grad_norm": 0.00194987328723073, - "learning_rate": 0.0001999997898093504, - "loss": 46.0, - "step": 8546 - }, - { - "epoch": 0.65347783703194, - "grad_norm": 0.0013528362615033984, - "learning_rate": 0.0001999997897600995, - "loss": 46.0, - "step": 8547 - }, - { - "epoch": 0.6535542940153296, - "grad_norm": 0.00036748481215909123, - "learning_rate": 0.0001999997897108428, - "loss": 46.0, - "step": 8548 - }, - { - "epoch": 0.6536307509987194, - "grad_norm": 0.003745422698557377, - "learning_rate": 0.00019999978966158037, - "loss": 46.0, - "step": 8549 - }, - { - "epoch": 0.653707207982109, - "grad_norm": 0.00045087194303050637, - "learning_rate": 0.00019999978961231216, - "loss": 46.0, - "step": 8550 - }, - { - "epoch": 0.6537836649654988, - "grad_norm": 0.0016072599682956934, - "learning_rate": 0.00019999978956303815, - "loss": 46.0, - "step": 8551 - }, - { - "epoch": 0.6538601219488885, - "grad_norm": 0.001266929553821683, - "learning_rate": 0.00019999978951375843, - "loss": 46.0, - "step": 8552 - }, - { - "epoch": 0.6539365789322782, - "grad_norm": 0.0007900830241851509, - "learning_rate": 0.00019999978946447287, - "loss": 46.0, - "step": 8553 - }, - { - "epoch": 0.654013035915668, - "grad_norm": 0.0005708296084776521, - "learning_rate": 0.00019999978941518157, - "loss": 46.0, - "step": 8554 - }, - { - "epoch": 0.6540894928990577, - "grad_norm": 0.006334260571748018, - "learning_rate": 0.00019999978936588453, - "loss": 46.0, - "step": 8555 - }, - { - "epoch": 0.6541659498824474, - "grad_norm": 0.0016598799265921116, - "learning_rate": 0.00019999978931658168, - "loss": 46.0, - "step": 8556 - }, - { - "epoch": 0.6542424068658371, - "grad_norm": 0.0004519044596236199, - "learning_rate": 0.0001999997892672731, - "loss": 46.0, - "step": 8557 - }, - { - "epoch": 0.6543188638492269, - "grad_norm": 0.0003759263490792364, - "learning_rate": 0.0001999997892179587, - "loss": 46.0, - "step": 8558 - }, - { - "epoch": 0.6543953208326165, - "grad_norm": 0.002499691443517804, - "learning_rate": 0.00019999978916863855, - "loss": 46.0, - "step": 8559 - }, - { - "epoch": 0.6544717778160063, - "grad_norm": 0.0007688610348850489, - "learning_rate": 0.00019999978911931264, - "loss": 46.0, - "step": 8560 - }, - { - "epoch": 0.654548234799396, - "grad_norm": 0.0012953185942023993, - "learning_rate": 0.00019999978906998095, - "loss": 46.0, - "step": 8561 - }, - { - "epoch": 0.6546246917827857, - "grad_norm": 0.0012482163729146123, - "learning_rate": 0.0001999997890206435, - "loss": 46.0, - "step": 8562 - }, - { - "epoch": 0.6547011487661755, - "grad_norm": 0.0009858411503955722, - "learning_rate": 0.0001999997889713003, - "loss": 46.0, - "step": 8563 - }, - { - "epoch": 0.6547776057495651, - "grad_norm": 0.001155849196948111, - "learning_rate": 0.00019999978892195128, - "loss": 46.0, - "step": 8564 - }, - { - "epoch": 0.6548540627329549, - "grad_norm": 0.00048214488197118044, - "learning_rate": 0.00019999978887259653, - "loss": 46.0, - "step": 8565 - }, - { - "epoch": 0.6549305197163446, - "grad_norm": 0.0011347491526976228, - "learning_rate": 0.000199999788823236, - "loss": 46.0, - "step": 8566 - }, - { - "epoch": 0.6550069766997343, - "grad_norm": 0.004829889163374901, - "learning_rate": 0.0001999997887738697, - "loss": 46.0, - "step": 8567 - }, - { - "epoch": 0.655083433683124, - "grad_norm": 0.0015248326817527413, - "learning_rate": 0.00019999978872449763, - "loss": 46.0, - "step": 8568 - }, - { - "epoch": 0.6551598906665137, - "grad_norm": 0.0005274652503430843, - "learning_rate": 0.00019999978867511978, - "loss": 46.0, - "step": 8569 - }, - { - "epoch": 0.6552363476499035, - "grad_norm": 0.0014829428400844336, - "learning_rate": 0.0001999997886257362, - "loss": 46.0, - "step": 8570 - }, - { - "epoch": 0.6553128046332932, - "grad_norm": 0.0006693553877994418, - "learning_rate": 0.0001999997885763468, - "loss": 46.0, - "step": 8571 - }, - { - "epoch": 0.6553892616166829, - "grad_norm": 0.0012639377964660525, - "learning_rate": 0.00019999978852695165, - "loss": 46.0, - "step": 8572 - }, - { - "epoch": 0.6554657186000726, - "grad_norm": 0.010828603059053421, - "learning_rate": 0.00019999978847755074, - "loss": 46.0, - "step": 8573 - }, - { - "epoch": 0.6555421755834624, - "grad_norm": 0.0010106988484039903, - "learning_rate": 0.00019999978842814405, - "loss": 46.0, - "step": 8574 - }, - { - "epoch": 0.655618632566852, - "grad_norm": 0.0007190277683548629, - "learning_rate": 0.0001999997883787316, - "loss": 46.0, - "step": 8575 - }, - { - "epoch": 0.6556950895502418, - "grad_norm": 0.0035418092738837004, - "learning_rate": 0.0001999997883293134, - "loss": 46.0, - "step": 8576 - }, - { - "epoch": 0.6557715465336316, - "grad_norm": 0.0019862360786646605, - "learning_rate": 0.00019999978827988938, - "loss": 46.0, - "step": 8577 - }, - { - "epoch": 0.6558480035170212, - "grad_norm": 0.0008121223654597998, - "learning_rate": 0.00019999978823045963, - "loss": 46.0, - "step": 8578 - }, - { - "epoch": 0.655924460500411, - "grad_norm": 0.00486897025257349, - "learning_rate": 0.0001999997881810241, - "loss": 46.0, - "step": 8579 - }, - { - "epoch": 0.6560009174838006, - "grad_norm": 0.0011067648883908987, - "learning_rate": 0.0001999997881315828, - "loss": 46.0, - "step": 8580 - }, - { - "epoch": 0.6560773744671904, - "grad_norm": 0.0010947034461423755, - "learning_rate": 0.0001999997880821357, - "loss": 46.0, - "step": 8581 - }, - { - "epoch": 0.6561538314505801, - "grad_norm": 0.005246949382126331, - "learning_rate": 0.00019999978803268288, - "loss": 46.0, - "step": 8582 - }, - { - "epoch": 0.6562302884339698, - "grad_norm": 0.0006006269832141697, - "learning_rate": 0.00019999978798322426, - "loss": 46.0, - "step": 8583 - }, - { - "epoch": 0.6563067454173596, - "grad_norm": 0.0011809815187007189, - "learning_rate": 0.0001999997879337599, - "loss": 46.0, - "step": 8584 - }, - { - "epoch": 0.6563832024007493, - "grad_norm": 0.0010445114457979798, - "learning_rate": 0.00019999978788428973, - "loss": 46.0, - "step": 8585 - }, - { - "epoch": 0.656459659384139, - "grad_norm": 0.0049142492935061455, - "learning_rate": 0.00019999978783481382, - "loss": 46.0, - "step": 8586 - }, - { - "epoch": 0.6565361163675287, - "grad_norm": 0.0014443404506891966, - "learning_rate": 0.00019999978778533213, - "loss": 46.0, - "step": 8587 - }, - { - "epoch": 0.6566125733509185, - "grad_norm": 0.0005549098714254797, - "learning_rate": 0.00019999978773584467, - "loss": 46.0, - "step": 8588 - }, - { - "epoch": 0.6566890303343081, - "grad_norm": 0.002379411133006215, - "learning_rate": 0.00019999978768635144, - "loss": 46.0, - "step": 8589 - }, - { - "epoch": 0.6567654873176979, - "grad_norm": 0.0010705901077017188, - "learning_rate": 0.00019999978763685246, - "loss": 46.0, - "step": 8590 - }, - { - "epoch": 0.6568419443010876, - "grad_norm": 0.0005772141157649457, - "learning_rate": 0.00019999978758734768, - "loss": 46.0, - "step": 8591 - }, - { - "epoch": 0.6569184012844773, - "grad_norm": 0.0047366805374622345, - "learning_rate": 0.00019999978753783713, - "loss": 46.0, - "step": 8592 - }, - { - "epoch": 0.6569948582678671, - "grad_norm": 0.0015679936623200774, - "learning_rate": 0.00019999978748832083, - "loss": 46.0, - "step": 8593 - }, - { - "epoch": 0.6570713152512567, - "grad_norm": 0.0009844069136306643, - "learning_rate": 0.00019999978743879876, - "loss": 46.0, - "step": 8594 - }, - { - "epoch": 0.6571477722346465, - "grad_norm": 0.0007176010985858738, - "learning_rate": 0.0001999997873892709, - "loss": 46.0, - "step": 8595 - }, - { - "epoch": 0.6572242292180362, - "grad_norm": 0.0012983622727915645, - "learning_rate": 0.0001999997873397373, - "loss": 46.0, - "step": 8596 - }, - { - "epoch": 0.6573006862014259, - "grad_norm": 0.0005127433687448502, - "learning_rate": 0.00019999978729019793, - "loss": 46.0, - "step": 8597 - }, - { - "epoch": 0.6573771431848157, - "grad_norm": 0.0007021460914984345, - "learning_rate": 0.00019999978724065276, - "loss": 46.0, - "step": 8598 - }, - { - "epoch": 0.6574536001682053, - "grad_norm": 0.00133537407964468, - "learning_rate": 0.00019999978719110185, - "loss": 46.0, - "step": 8599 - }, - { - "epoch": 0.6575300571515951, - "grad_norm": 0.004666220396757126, - "learning_rate": 0.00019999978714154513, - "loss": 46.0, - "step": 8600 - }, - { - "epoch": 0.6576065141349848, - "grad_norm": 0.0004905962850898504, - "learning_rate": 0.0001999997870919827, - "loss": 46.0, - "step": 8601 - }, - { - "epoch": 0.6576829711183745, - "grad_norm": 0.002547111362218857, - "learning_rate": 0.00019999978704241447, - "loss": 46.0, - "step": 8602 - }, - { - "epoch": 0.6577594281017642, - "grad_norm": 0.001990128308534622, - "learning_rate": 0.00019999978699284046, - "loss": 46.0, - "step": 8603 - }, - { - "epoch": 0.657835885085154, - "grad_norm": 0.0022737362887710333, - "learning_rate": 0.00019999978694326068, - "loss": 46.0, - "step": 8604 - }, - { - "epoch": 0.6579123420685437, - "grad_norm": 0.004663367755711079, - "learning_rate": 0.00019999978689367516, - "loss": 46.0, - "step": 8605 - }, - { - "epoch": 0.6579887990519334, - "grad_norm": 0.0008131011272780597, - "learning_rate": 0.00019999978684408386, - "loss": 46.0, - "step": 8606 - }, - { - "epoch": 0.6580652560353232, - "grad_norm": 0.0016348271165043116, - "learning_rate": 0.00019999978679448676, - "loss": 46.0, - "step": 8607 - }, - { - "epoch": 0.6581417130187128, - "grad_norm": 0.0009887042688205838, - "learning_rate": 0.00019999978674488392, - "loss": 46.0, - "step": 8608 - }, - { - "epoch": 0.6582181700021026, - "grad_norm": 0.008708581328392029, - "learning_rate": 0.0001999997866952753, - "loss": 46.0, - "step": 8609 - }, - { - "epoch": 0.6582946269854922, - "grad_norm": 0.0010642745764926076, - "learning_rate": 0.00019999978664566093, - "loss": 46.0, - "step": 8610 - }, - { - "epoch": 0.658371083968882, - "grad_norm": 0.0014296063454821706, - "learning_rate": 0.00019999978659604074, - "loss": 46.0, - "step": 8611 - }, - { - "epoch": 0.6584475409522718, - "grad_norm": 0.006288490723818541, - "learning_rate": 0.00019999978654641485, - "loss": 46.0, - "step": 8612 - }, - { - "epoch": 0.6585239979356614, - "grad_norm": 0.0026276526041328907, - "learning_rate": 0.00019999978649678314, - "loss": 46.0, - "step": 8613 - }, - { - "epoch": 0.6586004549190512, - "grad_norm": 0.0018835540395230055, - "learning_rate": 0.00019999978644714568, - "loss": 46.0, - "step": 8614 - }, - { - "epoch": 0.6586769119024409, - "grad_norm": 0.0016152536263689399, - "learning_rate": 0.00019999978639750245, - "loss": 46.0, - "step": 8615 - }, - { - "epoch": 0.6587533688858306, - "grad_norm": 0.0010026829550042748, - "learning_rate": 0.00019999978634785347, - "loss": 46.0, - "step": 8616 - }, - { - "epoch": 0.6588298258692203, - "grad_norm": 0.001898156013339758, - "learning_rate": 0.0001999997862981987, - "loss": 46.0, - "step": 8617 - }, - { - "epoch": 0.6589062828526101, - "grad_norm": 0.0030765426345169544, - "learning_rate": 0.00019999978624853814, - "loss": 46.0, - "step": 8618 - }, - { - "epoch": 0.6589827398359998, - "grad_norm": 0.0014737654710188508, - "learning_rate": 0.00019999978619887184, - "loss": 46.0, - "step": 8619 - }, - { - "epoch": 0.6590591968193895, - "grad_norm": 0.0020420304499566555, - "learning_rate": 0.00019999978614919974, - "loss": 46.0, - "step": 8620 - }, - { - "epoch": 0.6591356538027792, - "grad_norm": 0.0014412233140319586, - "learning_rate": 0.00019999978609952187, - "loss": 46.0, - "step": 8621 - }, - { - "epoch": 0.6592121107861689, - "grad_norm": 0.0011346039827913046, - "learning_rate": 0.00019999978604983828, - "loss": 46.0, - "step": 8622 - }, - { - "epoch": 0.6592885677695587, - "grad_norm": 0.004264628514647484, - "learning_rate": 0.00019999978600014889, - "loss": 46.0, - "step": 8623 - }, - { - "epoch": 0.6593650247529483, - "grad_norm": 0.0006827763863839209, - "learning_rate": 0.00019999978595045372, - "loss": 46.0, - "step": 8624 - }, - { - "epoch": 0.6594414817363381, - "grad_norm": 0.0021015778183937073, - "learning_rate": 0.00019999978590075278, - "loss": 46.0, - "step": 8625 - }, - { - "epoch": 0.6595179387197279, - "grad_norm": 0.0024254578165709972, - "learning_rate": 0.0001999997858510461, - "loss": 46.0, - "step": 8626 - }, - { - "epoch": 0.6595943957031175, - "grad_norm": 0.024874890223145485, - "learning_rate": 0.00019999978580133364, - "loss": 46.0, - "step": 8627 - }, - { - "epoch": 0.6596708526865073, - "grad_norm": 0.001570564811117947, - "learning_rate": 0.0001999997857516154, - "loss": 46.0, - "step": 8628 - }, - { - "epoch": 0.6597473096698969, - "grad_norm": 0.0005236054421402514, - "learning_rate": 0.0001999997857018914, - "loss": 46.0, - "step": 8629 - }, - { - "epoch": 0.6598237666532867, - "grad_norm": 0.001178934471681714, - "learning_rate": 0.00019999978565216162, - "loss": 46.0, - "step": 8630 - }, - { - "epoch": 0.6599002236366764, - "grad_norm": 0.000714669527951628, - "learning_rate": 0.0001999997856024261, - "loss": 46.0, - "step": 8631 - }, - { - "epoch": 0.6599766806200661, - "grad_norm": 0.0004759739385917783, - "learning_rate": 0.00019999978555268477, - "loss": 46.0, - "step": 8632 - }, - { - "epoch": 0.6600531376034559, - "grad_norm": 0.0013500095810741186, - "learning_rate": 0.00019999978550293767, - "loss": 46.0, - "step": 8633 - }, - { - "epoch": 0.6601295945868456, - "grad_norm": 0.005612598732113838, - "learning_rate": 0.00019999978545318483, - "loss": 46.0, - "step": 8634 - }, - { - "epoch": 0.6602060515702353, - "grad_norm": 0.0008059635874815285, - "learning_rate": 0.0001999997854034262, - "loss": 46.0, - "step": 8635 - }, - { - "epoch": 0.660282508553625, - "grad_norm": 0.0010862040799111128, - "learning_rate": 0.00019999978535366182, - "loss": 46.0, - "step": 8636 - }, - { - "epoch": 0.6603589655370148, - "grad_norm": 0.001186320441775024, - "learning_rate": 0.00019999978530389165, - "loss": 46.0, - "step": 8637 - }, - { - "epoch": 0.6604354225204044, - "grad_norm": 0.0012938138097524643, - "learning_rate": 0.00019999978525411574, - "loss": 46.0, - "step": 8638 - }, - { - "epoch": 0.6605118795037942, - "grad_norm": 0.0003736805228982121, - "learning_rate": 0.00019999978520433403, - "loss": 46.0, - "step": 8639 - }, - { - "epoch": 0.6605883364871838, - "grad_norm": 0.000617126643192023, - "learning_rate": 0.00019999978515454657, - "loss": 46.0, - "step": 8640 - }, - { - "epoch": 0.6606647934705736, - "grad_norm": 0.00038321109605021775, - "learning_rate": 0.0001999997851047533, - "loss": 46.0, - "step": 8641 - }, - { - "epoch": 0.6607412504539634, - "grad_norm": 0.0010610792087391019, - "learning_rate": 0.0001999997850549543, - "loss": 46.0, - "step": 8642 - }, - { - "epoch": 0.660817707437353, - "grad_norm": 0.0003566528612282127, - "learning_rate": 0.00019999978500514953, - "loss": 46.0, - "step": 8643 - }, - { - "epoch": 0.6608941644207428, - "grad_norm": 0.0018905200995504856, - "learning_rate": 0.000199999784955339, - "loss": 46.0, - "step": 8644 - }, - { - "epoch": 0.6609706214041325, - "grad_norm": 0.0006439249846152961, - "learning_rate": 0.00019999978490552268, - "loss": 46.0, - "step": 8645 - }, - { - "epoch": 0.6610470783875222, - "grad_norm": 0.0014772162539884448, - "learning_rate": 0.00019999978485570058, - "loss": 46.0, - "step": 8646 - }, - { - "epoch": 0.661123535370912, - "grad_norm": 0.000992226880043745, - "learning_rate": 0.00019999978480587273, - "loss": 46.0, - "step": 8647 - }, - { - "epoch": 0.6611999923543017, - "grad_norm": 0.0009126553777605295, - "learning_rate": 0.0001999997847560391, - "loss": 46.0, - "step": 8648 - }, - { - "epoch": 0.6612764493376914, - "grad_norm": 0.009299108758568764, - "learning_rate": 0.0001999997847061997, - "loss": 46.0, - "step": 8649 - }, - { - "epoch": 0.6613529063210811, - "grad_norm": 0.0009371150517836213, - "learning_rate": 0.00019999978465635456, - "loss": 46.0, - "step": 8650 - }, - { - "epoch": 0.6614293633044708, - "grad_norm": 0.0017397486371919513, - "learning_rate": 0.00019999978460650362, - "loss": 46.0, - "step": 8651 - }, - { - "epoch": 0.6615058202878605, - "grad_norm": 0.0018773350166156888, - "learning_rate": 0.00019999978455664688, - "loss": 46.0, - "step": 8652 - }, - { - "epoch": 0.6615822772712503, - "grad_norm": 0.001606195350177586, - "learning_rate": 0.00019999978450678443, - "loss": 46.0, - "step": 8653 - }, - { - "epoch": 0.66165873425464, - "grad_norm": 0.0018110673408955336, - "learning_rate": 0.0001999997844569162, - "loss": 46.0, - "step": 8654 - }, - { - "epoch": 0.6617351912380297, - "grad_norm": 0.0011335978051647544, - "learning_rate": 0.0001999997844070422, - "loss": 46.0, - "step": 8655 - }, - { - "epoch": 0.6618116482214195, - "grad_norm": 0.00031011944520287216, - "learning_rate": 0.0001999997843571624, - "loss": 46.0, - "step": 8656 - }, - { - "epoch": 0.6618881052048091, - "grad_norm": 0.0009702396928332746, - "learning_rate": 0.00019999978430727686, - "loss": 46.0, - "step": 8657 - }, - { - "epoch": 0.6619645621881989, - "grad_norm": 0.004137062001973391, - "learning_rate": 0.00019999978425738554, - "loss": 46.0, - "step": 8658 - }, - { - "epoch": 0.6620410191715885, - "grad_norm": 0.0056906575337052345, - "learning_rate": 0.00019999978420748846, - "loss": 46.0, - "step": 8659 - }, - { - "epoch": 0.6621174761549783, - "grad_norm": 0.002599082188680768, - "learning_rate": 0.00019999978415758557, - "loss": 46.0, - "step": 8660 - }, - { - "epoch": 0.662193933138368, - "grad_norm": 0.0013367700157687068, - "learning_rate": 0.00019999978410767697, - "loss": 46.0, - "step": 8661 - }, - { - "epoch": 0.6622703901217577, - "grad_norm": 0.0017191254300996661, - "learning_rate": 0.00019999978405776256, - "loss": 46.0, - "step": 8662 - }, - { - "epoch": 0.6623468471051475, - "grad_norm": 0.002444988815113902, - "learning_rate": 0.00019999978400784242, - "loss": 46.0, - "step": 8663 - }, - { - "epoch": 0.6624233040885372, - "grad_norm": 0.005234445445239544, - "learning_rate": 0.00019999978395791645, - "loss": 46.0, - "step": 8664 - }, - { - "epoch": 0.6624997610719269, - "grad_norm": 0.0013008839450776577, - "learning_rate": 0.00019999978390798474, - "loss": 46.0, - "step": 8665 - }, - { - "epoch": 0.6625762180553166, - "grad_norm": 0.004277431406080723, - "learning_rate": 0.00019999978385804729, - "loss": 46.0, - "step": 8666 - }, - { - "epoch": 0.6626526750387064, - "grad_norm": 0.0008872722391970456, - "learning_rate": 0.00019999978380810405, - "loss": 46.0, - "step": 8667 - }, - { - "epoch": 0.662729132022096, - "grad_norm": 0.0009823297150433064, - "learning_rate": 0.00019999978375815502, - "loss": 46.0, - "step": 8668 - }, - { - "epoch": 0.6628055890054858, - "grad_norm": 0.0015522469766438007, - "learning_rate": 0.00019999978370820024, - "loss": 46.0, - "step": 8669 - }, - { - "epoch": 0.6628820459888755, - "grad_norm": 0.0016563733806833625, - "learning_rate": 0.0001999997836582397, - "loss": 46.0, - "step": 8670 - }, - { - "epoch": 0.6629585029722652, - "grad_norm": 0.0034450446255505085, - "learning_rate": 0.0001999997836082734, - "loss": 46.0, - "step": 8671 - }, - { - "epoch": 0.663034959955655, - "grad_norm": 0.002170608378946781, - "learning_rate": 0.00019999978355830127, - "loss": 46.0, - "step": 8672 - }, - { - "epoch": 0.6631114169390446, - "grad_norm": 0.002843655413016677, - "learning_rate": 0.00019999978350832343, - "loss": 46.0, - "step": 8673 - }, - { - "epoch": 0.6631878739224344, - "grad_norm": 0.0008554289815947413, - "learning_rate": 0.00019999978345833978, - "loss": 46.0, - "step": 8674 - }, - { - "epoch": 0.6632643309058242, - "grad_norm": 0.001418053638190031, - "learning_rate": 0.0001999997834083504, - "loss": 46.0, - "step": 8675 - }, - { - "epoch": 0.6633407878892138, - "grad_norm": 0.001097179250791669, - "learning_rate": 0.0001999997833583552, - "loss": 46.0, - "step": 8676 - }, - { - "epoch": 0.6634172448726036, - "grad_norm": 0.0012433599913492799, - "learning_rate": 0.00019999978330835426, - "loss": 46.0, - "step": 8677 - }, - { - "epoch": 0.6634937018559933, - "grad_norm": 0.0006016894476488233, - "learning_rate": 0.00019999978325834755, - "loss": 46.0, - "step": 8678 - }, - { - "epoch": 0.663570158839383, - "grad_norm": 0.00232914206571877, - "learning_rate": 0.0001999997832083351, - "loss": 46.0, - "step": 8679 - }, - { - "epoch": 0.6636466158227727, - "grad_norm": 0.0014480187091976404, - "learning_rate": 0.00019999978315831686, - "loss": 46.0, - "step": 8680 - }, - { - "epoch": 0.6637230728061624, - "grad_norm": 0.0008242285111919045, - "learning_rate": 0.00019999978310829283, - "loss": 46.0, - "step": 8681 - }, - { - "epoch": 0.6637995297895521, - "grad_norm": 0.0009306977153755724, - "learning_rate": 0.00019999978305826303, - "loss": 46.0, - "step": 8682 - }, - { - "epoch": 0.6638759867729419, - "grad_norm": 0.0011325350496917963, - "learning_rate": 0.0001999997830082275, - "loss": 46.0, - "step": 8683 - }, - { - "epoch": 0.6639524437563316, - "grad_norm": 0.0006435626419261098, - "learning_rate": 0.00019999978295818615, - "loss": 46.0, - "step": 8684 - }, - { - "epoch": 0.6640289007397213, - "grad_norm": 0.001619804766960442, - "learning_rate": 0.00019999978290813908, - "loss": 46.0, - "step": 8685 - }, - { - "epoch": 0.6641053577231111, - "grad_norm": 0.0015315627679228783, - "learning_rate": 0.00019999978285808624, - "loss": 46.0, - "step": 8686 - }, - { - "epoch": 0.6641818147065007, - "grad_norm": 0.0012504010228440166, - "learning_rate": 0.00019999978280802757, - "loss": 46.0, - "step": 8687 - }, - { - "epoch": 0.6642582716898905, - "grad_norm": 0.0029816979076713324, - "learning_rate": 0.00019999978275796315, - "loss": 46.0, - "step": 8688 - }, - { - "epoch": 0.6643347286732803, - "grad_norm": 0.037114787846803665, - "learning_rate": 0.00019999978270789299, - "loss": 46.0, - "step": 8689 - }, - { - "epoch": 0.6644111856566699, - "grad_norm": 0.002542545786127448, - "learning_rate": 0.00019999978265781708, - "loss": 46.0, - "step": 8690 - }, - { - "epoch": 0.6644876426400597, - "grad_norm": 0.0014440712984651327, - "learning_rate": 0.00019999978260773537, - "loss": 46.0, - "step": 8691 - }, - { - "epoch": 0.6645640996234493, - "grad_norm": 0.0018695704638957977, - "learning_rate": 0.00019999978255764788, - "loss": 46.0, - "step": 8692 - }, - { - "epoch": 0.6646405566068391, - "grad_norm": 0.00045301203499548137, - "learning_rate": 0.00019999978250755462, - "loss": 46.0, - "step": 8693 - }, - { - "epoch": 0.6647170135902288, - "grad_norm": 0.0008245119825005531, - "learning_rate": 0.00019999978245745562, - "loss": 46.0, - "step": 8694 - }, - { - "epoch": 0.6647934705736185, - "grad_norm": 0.0021519893780350685, - "learning_rate": 0.00019999978240735082, - "loss": 46.0, - "step": 8695 - }, - { - "epoch": 0.6648699275570082, - "grad_norm": 0.002213072497397661, - "learning_rate": 0.00019999978235724027, - "loss": 46.0, - "step": 8696 - }, - { - "epoch": 0.664946384540398, - "grad_norm": 0.0012607017997652292, - "learning_rate": 0.00019999978230712394, - "loss": 46.0, - "step": 8697 - }, - { - "epoch": 0.6650228415237877, - "grad_norm": 0.004147354979068041, - "learning_rate": 0.00019999978225700184, - "loss": 46.0, - "step": 8698 - }, - { - "epoch": 0.6650992985071774, - "grad_norm": 0.004091346170753241, - "learning_rate": 0.00019999978220687397, - "loss": 46.0, - "step": 8699 - }, - { - "epoch": 0.6651757554905671, - "grad_norm": 0.0015534093836322427, - "learning_rate": 0.00019999978215674033, - "loss": 46.0, - "step": 8700 - }, - { - "epoch": 0.6652522124739568, - "grad_norm": 0.0006063755135983229, - "learning_rate": 0.00019999978210660094, - "loss": 46.0, - "step": 8701 - }, - { - "epoch": 0.6653286694573466, - "grad_norm": 0.0021936220582574606, - "learning_rate": 0.00019999978205645575, - "loss": 46.0, - "step": 8702 - }, - { - "epoch": 0.6654051264407362, - "grad_norm": 0.0005755945458076894, - "learning_rate": 0.00019999978200630484, - "loss": 46.0, - "step": 8703 - }, - { - "epoch": 0.665481583424126, - "grad_norm": 0.002572490368038416, - "learning_rate": 0.0001999997819561481, - "loss": 46.0, - "step": 8704 - }, - { - "epoch": 0.6655580404075158, - "grad_norm": 0.0002919635735452175, - "learning_rate": 0.00019999978190598562, - "loss": 46.0, - "step": 8705 - }, - { - "epoch": 0.6656344973909054, - "grad_norm": 0.0014955428196117282, - "learning_rate": 0.00019999978185581736, - "loss": 46.0, - "step": 8706 - }, - { - "epoch": 0.6657109543742952, - "grad_norm": 0.0015673916786909103, - "learning_rate": 0.00019999978180564333, - "loss": 46.0, - "step": 8707 - }, - { - "epoch": 0.6657874113576849, - "grad_norm": 0.0006684870459139347, - "learning_rate": 0.00019999978175546355, - "loss": 46.0, - "step": 8708 - }, - { - "epoch": 0.6658638683410746, - "grad_norm": 0.001302677090279758, - "learning_rate": 0.00019999978170527798, - "loss": 46.0, - "step": 8709 - }, - { - "epoch": 0.6659403253244643, - "grad_norm": 0.001877059112302959, - "learning_rate": 0.00019999978165508665, - "loss": 46.0, - "step": 8710 - }, - { - "epoch": 0.666016782307854, - "grad_norm": 0.002077690325677395, - "learning_rate": 0.00019999978160488958, - "loss": 46.0, - "step": 8711 - }, - { - "epoch": 0.6660932392912438, - "grad_norm": 0.0006872828234918416, - "learning_rate": 0.0001999997815546867, - "loss": 46.0, - "step": 8712 - }, - { - "epoch": 0.6661696962746335, - "grad_norm": 0.0012548677623271942, - "learning_rate": 0.00019999978150447807, - "loss": 46.0, - "step": 8713 - }, - { - "epoch": 0.6662461532580232, - "grad_norm": 0.0009545075590722263, - "learning_rate": 0.00019999978145426365, - "loss": 46.0, - "step": 8714 - }, - { - "epoch": 0.6663226102414129, - "grad_norm": 0.001718643237836659, - "learning_rate": 0.00019999978140404346, - "loss": 46.0, - "step": 8715 - }, - { - "epoch": 0.6663990672248027, - "grad_norm": 0.0006353132193908095, - "learning_rate": 0.00019999978135381752, - "loss": 46.0, - "step": 8716 - }, - { - "epoch": 0.6664755242081923, - "grad_norm": 0.001213490148074925, - "learning_rate": 0.00019999978130358581, - "loss": 46.0, - "step": 8717 - }, - { - "epoch": 0.6665519811915821, - "grad_norm": 0.0008136135875247419, - "learning_rate": 0.00019999978125334833, - "loss": 46.0, - "step": 8718 - }, - { - "epoch": 0.6666284381749719, - "grad_norm": 0.001238768920302391, - "learning_rate": 0.00019999978120310505, - "loss": 46.0, - "step": 8719 - }, - { - "epoch": 0.6667048951583615, - "grad_norm": 0.001272666617296636, - "learning_rate": 0.00019999978115285604, - "loss": 46.0, - "step": 8720 - }, - { - "epoch": 0.6667813521417513, - "grad_norm": 0.0008869747398421168, - "learning_rate": 0.00019999978110260124, - "loss": 46.0, - "step": 8721 - }, - { - "epoch": 0.6668578091251409, - "grad_norm": 0.0006244395044632256, - "learning_rate": 0.00019999978105234066, - "loss": 46.0, - "step": 8722 - }, - { - "epoch": 0.6669342661085307, - "grad_norm": 0.0008299718028865755, - "learning_rate": 0.00019999978100207434, - "loss": 46.0, - "step": 8723 - }, - { - "epoch": 0.6670107230919204, - "grad_norm": 0.001618312206119299, - "learning_rate": 0.00019999978095180224, - "loss": 46.0, - "step": 8724 - }, - { - "epoch": 0.6670871800753101, - "grad_norm": 0.0037032710388302803, - "learning_rate": 0.00019999978090152437, - "loss": 46.0, - "step": 8725 - }, - { - "epoch": 0.6671636370586999, - "grad_norm": 0.006620650179684162, - "learning_rate": 0.00019999978085124073, - "loss": 46.0, - "step": 8726 - }, - { - "epoch": 0.6672400940420896, - "grad_norm": 0.0008105770102702081, - "learning_rate": 0.0001999997808009513, - "loss": 46.0, - "step": 8727 - }, - { - "epoch": 0.6673165510254793, - "grad_norm": 0.001030712272040546, - "learning_rate": 0.00019999978075065612, - "loss": 46.0, - "step": 8728 - }, - { - "epoch": 0.667393008008869, - "grad_norm": 0.0026812171563506126, - "learning_rate": 0.00019999978070035521, - "loss": 46.0, - "step": 8729 - }, - { - "epoch": 0.6674694649922587, - "grad_norm": 0.0029469558503478765, - "learning_rate": 0.00019999978065004848, - "loss": 46.0, - "step": 8730 - }, - { - "epoch": 0.6675459219756484, - "grad_norm": 0.0011797200422734022, - "learning_rate": 0.000199999780599736, - "loss": 46.0, - "step": 8731 - }, - { - "epoch": 0.6676223789590382, - "grad_norm": 0.0009308723965659738, - "learning_rate": 0.00019999978054941774, - "loss": 46.0, - "step": 8732 - }, - { - "epoch": 0.6676988359424278, - "grad_norm": 0.0008208405924960971, - "learning_rate": 0.0001999997804990937, - "loss": 46.0, - "step": 8733 - }, - { - "epoch": 0.6677752929258176, - "grad_norm": 0.001317824237048626, - "learning_rate": 0.0001999997804487639, - "loss": 46.0, - "step": 8734 - }, - { - "epoch": 0.6678517499092074, - "grad_norm": 0.0006504436605609953, - "learning_rate": 0.00019999978039842833, - "loss": 46.0, - "step": 8735 - }, - { - "epoch": 0.667928206892597, - "grad_norm": 0.002607166301459074, - "learning_rate": 0.000199999780348087, - "loss": 46.0, - "step": 8736 - }, - { - "epoch": 0.6680046638759868, - "grad_norm": 0.0026411728467792273, - "learning_rate": 0.00019999978029773988, - "loss": 46.0, - "step": 8737 - }, - { - "epoch": 0.6680811208593765, - "grad_norm": 0.0009394661756232381, - "learning_rate": 0.000199999780247387, - "loss": 46.0, - "step": 8738 - }, - { - "epoch": 0.6681575778427662, - "grad_norm": 0.0008242005715146661, - "learning_rate": 0.0001999997801970284, - "loss": 46.0, - "step": 8739 - }, - { - "epoch": 0.668234034826156, - "grad_norm": 0.0014916467480361462, - "learning_rate": 0.00019999978014666398, - "loss": 46.0, - "step": 8740 - }, - { - "epoch": 0.6683104918095456, - "grad_norm": 0.0021101636812090874, - "learning_rate": 0.0001999997800962938, - "loss": 46.0, - "step": 8741 - }, - { - "epoch": 0.6683869487929354, - "grad_norm": 0.0018937013810500503, - "learning_rate": 0.00019999978004591783, - "loss": 46.0, - "step": 8742 - }, - { - "epoch": 0.6684634057763251, - "grad_norm": 0.0007638661772944033, - "learning_rate": 0.00019999977999553612, - "loss": 46.0, - "step": 8743 - }, - { - "epoch": 0.6685398627597148, - "grad_norm": 0.0008760401979088783, - "learning_rate": 0.00019999977994514863, - "loss": 46.0, - "step": 8744 - }, - { - "epoch": 0.6686163197431045, - "grad_norm": 0.0012413676595315337, - "learning_rate": 0.00019999977989475535, - "loss": 46.0, - "step": 8745 - }, - { - "epoch": 0.6686927767264943, - "grad_norm": 0.000798044609837234, - "learning_rate": 0.00019999977984435635, - "loss": 46.0, - "step": 8746 - }, - { - "epoch": 0.668769233709884, - "grad_norm": 0.005128706805408001, - "learning_rate": 0.00019999977979395155, - "loss": 46.0, - "step": 8747 - }, - { - "epoch": 0.6688456906932737, - "grad_norm": 0.004137082956731319, - "learning_rate": 0.00019999977974354097, - "loss": 46.0, - "step": 8748 - }, - { - "epoch": 0.6689221476766635, - "grad_norm": 0.0022486187517642975, - "learning_rate": 0.00019999977969312462, - "loss": 46.0, - "step": 8749 - }, - { - "epoch": 0.6689986046600531, - "grad_norm": 0.001786054577678442, - "learning_rate": 0.00019999977964270252, - "loss": 46.0, - "step": 8750 - }, - { - "epoch": 0.6690750616434429, - "grad_norm": 0.0018166654044762254, - "learning_rate": 0.00019999977959227465, - "loss": 46.0, - "step": 8751 - }, - { - "epoch": 0.6691515186268325, - "grad_norm": 0.0010487374383956194, - "learning_rate": 0.00019999977954184098, - "loss": 46.0, - "step": 8752 - }, - { - "epoch": 0.6692279756102223, - "grad_norm": 0.0004923665546812117, - "learning_rate": 0.00019999977949140157, - "loss": 46.0, - "step": 8753 - }, - { - "epoch": 0.669304432593612, - "grad_norm": 0.0011399348732084036, - "learning_rate": 0.0001999997794409564, - "loss": 46.0, - "step": 8754 - }, - { - "epoch": 0.6693808895770017, - "grad_norm": 0.002236952306702733, - "learning_rate": 0.00019999977939050542, - "loss": 46.0, - "step": 8755 - }, - { - "epoch": 0.6694573465603915, - "grad_norm": 0.001290624146349728, - "learning_rate": 0.00019999977934004873, - "loss": 46.0, - "step": 8756 - }, - { - "epoch": 0.6695338035437812, - "grad_norm": 0.001181522966362536, - "learning_rate": 0.00019999977928958622, - "loss": 46.0, - "step": 8757 - }, - { - "epoch": 0.6696102605271709, - "grad_norm": 0.0014388780109584332, - "learning_rate": 0.00019999977923911794, - "loss": 46.0, - "step": 8758 - }, - { - "epoch": 0.6696867175105606, - "grad_norm": 0.002978184726089239, - "learning_rate": 0.0001999997791886439, - "loss": 46.0, - "step": 8759 - }, - { - "epoch": 0.6697631744939503, - "grad_norm": 0.00031134928576648235, - "learning_rate": 0.0001999997791381641, - "loss": 46.0, - "step": 8760 - }, - { - "epoch": 0.66983963147734, - "grad_norm": 0.0006983547355048358, - "learning_rate": 0.00019999977908767856, - "loss": 46.0, - "step": 8761 - }, - { - "epoch": 0.6699160884607298, - "grad_norm": 0.0008624501642771065, - "learning_rate": 0.0001999997790371872, - "loss": 46.0, - "step": 8762 - }, - { - "epoch": 0.6699925454441195, - "grad_norm": 0.001591513748280704, - "learning_rate": 0.0001999997789866901, - "loss": 46.0, - "step": 8763 - }, - { - "epoch": 0.6700690024275092, - "grad_norm": 0.0013368689687922597, - "learning_rate": 0.00019999977893618722, - "loss": 46.0, - "step": 8764 - }, - { - "epoch": 0.670145459410899, - "grad_norm": 0.0015445018652826548, - "learning_rate": 0.00019999977888567858, - "loss": 46.0, - "step": 8765 - }, - { - "epoch": 0.6702219163942886, - "grad_norm": 0.004002527333796024, - "learning_rate": 0.00019999977883516413, - "loss": 46.0, - "step": 8766 - }, - { - "epoch": 0.6702983733776784, - "grad_norm": 0.005256397649645805, - "learning_rate": 0.00019999977878464397, - "loss": 46.0, - "step": 8767 - }, - { - "epoch": 0.6703748303610682, - "grad_norm": 0.0008160103461705148, - "learning_rate": 0.00019999977873411798, - "loss": 46.0, - "step": 8768 - }, - { - "epoch": 0.6704512873444578, - "grad_norm": 0.002868467476218939, - "learning_rate": 0.00019999977868358627, - "loss": 46.0, - "step": 8769 - }, - { - "epoch": 0.6705277443278476, - "grad_norm": 0.000999022158794105, - "learning_rate": 0.0001999997786330488, - "loss": 46.0, - "step": 8770 - }, - { - "epoch": 0.6706042013112372, - "grad_norm": 0.0007276691612787545, - "learning_rate": 0.0001999997785825055, - "loss": 46.0, - "step": 8771 - }, - { - "epoch": 0.670680658294627, - "grad_norm": 0.005667826626449823, - "learning_rate": 0.00019999977853195648, - "loss": 46.0, - "step": 8772 - }, - { - "epoch": 0.6707571152780167, - "grad_norm": 0.0006296706269495189, - "learning_rate": 0.00019999977848140168, - "loss": 46.0, - "step": 8773 - }, - { - "epoch": 0.6708335722614064, - "grad_norm": 0.0021969785448163748, - "learning_rate": 0.0001999997784308411, - "loss": 46.0, - "step": 8774 - }, - { - "epoch": 0.6709100292447961, - "grad_norm": 0.001086077420040965, - "learning_rate": 0.00019999977838027478, - "loss": 46.0, - "step": 8775 - }, - { - "epoch": 0.6709864862281859, - "grad_norm": 0.0003675601037684828, - "learning_rate": 0.00019999977832970266, - "loss": 46.0, - "step": 8776 - }, - { - "epoch": 0.6710629432115756, - "grad_norm": 0.0008748634718358517, - "learning_rate": 0.00019999977827912476, - "loss": 46.0, - "step": 8777 - }, - { - "epoch": 0.6711394001949653, - "grad_norm": 0.003663973417133093, - "learning_rate": 0.00019999977822854112, - "loss": 46.0, - "step": 8778 - }, - { - "epoch": 0.6712158571783551, - "grad_norm": 0.006362304091453552, - "learning_rate": 0.00019999977817795168, - "loss": 46.0, - "step": 8779 - }, - { - "epoch": 0.6712923141617447, - "grad_norm": 0.01718462072312832, - "learning_rate": 0.00019999977812735652, - "loss": 46.0, - "step": 8780 - }, - { - "epoch": 0.6713687711451345, - "grad_norm": 0.006038782652467489, - "learning_rate": 0.00019999977807675555, - "loss": 46.0, - "step": 8781 - }, - { - "epoch": 0.6714452281285241, - "grad_norm": 0.0007000819896347821, - "learning_rate": 0.0001999997780261488, - "loss": 46.0, - "step": 8782 - }, - { - "epoch": 0.6715216851119139, - "grad_norm": 0.0009150332189165056, - "learning_rate": 0.0001999997779755363, - "loss": 46.0, - "step": 8783 - }, - { - "epoch": 0.6715981420953037, - "grad_norm": 0.0010293731465935707, - "learning_rate": 0.00019999977792491805, - "loss": 46.0, - "step": 8784 - }, - { - "epoch": 0.6716745990786933, - "grad_norm": 0.003607455873861909, - "learning_rate": 0.000199999777874294, - "loss": 46.0, - "step": 8785 - }, - { - "epoch": 0.6717510560620831, - "grad_norm": 0.001263849320821464, - "learning_rate": 0.0001999997778236642, - "loss": 46.0, - "step": 8786 - }, - { - "epoch": 0.6718275130454728, - "grad_norm": 0.001157538965344429, - "learning_rate": 0.00019999977777302862, - "loss": 46.0, - "step": 8787 - }, - { - "epoch": 0.6719039700288625, - "grad_norm": 0.01405640970915556, - "learning_rate": 0.00019999977772238727, - "loss": 46.0, - "step": 8788 - }, - { - "epoch": 0.6719804270122522, - "grad_norm": 0.001509845140390098, - "learning_rate": 0.00019999977767174018, - "loss": 46.0, - "step": 8789 - }, - { - "epoch": 0.672056883995642, - "grad_norm": 0.004036091733723879, - "learning_rate": 0.00019999977762108725, - "loss": 46.0, - "step": 8790 - }, - { - "epoch": 0.6721333409790317, - "grad_norm": 0.0017574878875166178, - "learning_rate": 0.00019999977757042864, - "loss": 46.0, - "step": 8791 - }, - { - "epoch": 0.6722097979624214, - "grad_norm": 0.004809914156794548, - "learning_rate": 0.00019999977751976417, - "loss": 46.0, - "step": 8792 - }, - { - "epoch": 0.6722862549458111, - "grad_norm": 0.0014662700705230236, - "learning_rate": 0.000199999777469094, - "loss": 46.0, - "step": 8793 - }, - { - "epoch": 0.6723627119292008, - "grad_norm": 0.0010704055894166231, - "learning_rate": 0.00019999977741841805, - "loss": 46.0, - "step": 8794 - }, - { - "epoch": 0.6724391689125906, - "grad_norm": 0.0007873719441704452, - "learning_rate": 0.0001999997773677363, - "loss": 46.0, - "step": 8795 - }, - { - "epoch": 0.6725156258959802, - "grad_norm": 0.0006021240260452032, - "learning_rate": 0.0001999997773170488, - "loss": 46.0, - "step": 8796 - }, - { - "epoch": 0.67259208287937, - "grad_norm": 0.0010698512196540833, - "learning_rate": 0.00019999977726635552, - "loss": 46.0, - "step": 8797 - }, - { - "epoch": 0.6726685398627598, - "grad_norm": 0.0013258474646136165, - "learning_rate": 0.0001999997772156565, - "loss": 46.0, - "step": 8798 - }, - { - "epoch": 0.6727449968461494, - "grad_norm": 0.005320283584296703, - "learning_rate": 0.0001999997771649517, - "loss": 46.0, - "step": 8799 - }, - { - "epoch": 0.6728214538295392, - "grad_norm": 0.000826517934910953, - "learning_rate": 0.0001999997771142411, - "loss": 46.0, - "step": 8800 - }, - { - "epoch": 0.6728979108129288, - "grad_norm": 0.0025309508200734854, - "learning_rate": 0.00019999977706352474, - "loss": 46.0, - "step": 8801 - }, - { - "epoch": 0.6729743677963186, - "grad_norm": 0.0007077916525304317, - "learning_rate": 0.00019999977701280264, - "loss": 46.0, - "step": 8802 - }, - { - "epoch": 0.6730508247797083, - "grad_norm": 0.0010015856241807342, - "learning_rate": 0.00019999977696207475, - "loss": 46.0, - "step": 8803 - }, - { - "epoch": 0.673127281763098, - "grad_norm": 0.0018026576144620776, - "learning_rate": 0.00019999977691134108, - "loss": 46.0, - "step": 8804 - }, - { - "epoch": 0.6732037387464878, - "grad_norm": 0.0009123882045969367, - "learning_rate": 0.00019999977686060167, - "loss": 46.0, - "step": 8805 - }, - { - "epoch": 0.6732801957298775, - "grad_norm": 0.0013137912610545754, - "learning_rate": 0.00019999977680985648, - "loss": 46.0, - "step": 8806 - }, - { - "epoch": 0.6733566527132672, - "grad_norm": 0.0033961795270442963, - "learning_rate": 0.0001999997767591055, - "loss": 46.0, - "step": 8807 - }, - { - "epoch": 0.6734331096966569, - "grad_norm": 0.0008632354438304901, - "learning_rate": 0.00019999977670834875, - "loss": 46.0, - "step": 8808 - }, - { - "epoch": 0.6735095666800467, - "grad_norm": 0.0010651986813172698, - "learning_rate": 0.00019999977665758625, - "loss": 46.0, - "step": 8809 - }, - { - "epoch": 0.6735860236634363, - "grad_norm": 0.001592569868080318, - "learning_rate": 0.00019999977660681796, - "loss": 46.0, - "step": 8810 - }, - { - "epoch": 0.6736624806468261, - "grad_norm": 0.007569534238427877, - "learning_rate": 0.00019999977655604394, - "loss": 46.0, - "step": 8811 - }, - { - "epoch": 0.6737389376302158, - "grad_norm": 0.0007504120003432035, - "learning_rate": 0.00019999977650526413, - "loss": 46.0, - "step": 8812 - }, - { - "epoch": 0.6738153946136055, - "grad_norm": 0.005444258451461792, - "learning_rate": 0.00019999977645447853, - "loss": 46.0, - "step": 8813 - }, - { - "epoch": 0.6738918515969953, - "grad_norm": 0.0027945602778345346, - "learning_rate": 0.00019999977640368719, - "loss": 46.0, - "step": 8814 - }, - { - "epoch": 0.6739683085803849, - "grad_norm": 0.0004073910531587899, - "learning_rate": 0.00019999977635289006, - "loss": 46.0, - "step": 8815 - }, - { - "epoch": 0.6740447655637747, - "grad_norm": 0.00032433439628221095, - "learning_rate": 0.0001999997763020872, - "loss": 46.0, - "step": 8816 - }, - { - "epoch": 0.6741212225471644, - "grad_norm": 0.005800970364362001, - "learning_rate": 0.0001999997762512785, - "loss": 46.0, - "step": 8817 - }, - { - "epoch": 0.6741976795305541, - "grad_norm": 0.004759421572089195, - "learning_rate": 0.00019999977620046409, - "loss": 46.0, - "step": 8818 - }, - { - "epoch": 0.6742741365139439, - "grad_norm": 0.0019921069033443928, - "learning_rate": 0.0001999997761496439, - "loss": 46.0, - "step": 8819 - }, - { - "epoch": 0.6743505934973336, - "grad_norm": 0.0009454063256271183, - "learning_rate": 0.00019999977609881794, - "loss": 46.0, - "step": 8820 - }, - { - "epoch": 0.6744270504807233, - "grad_norm": 0.0005759777850471437, - "learning_rate": 0.00019999977604798617, - "loss": 46.0, - "step": 8821 - }, - { - "epoch": 0.674503507464113, - "grad_norm": 0.006274321582168341, - "learning_rate": 0.00019999977599714867, - "loss": 46.0, - "step": 8822 - }, - { - "epoch": 0.6745799644475027, - "grad_norm": 0.0047287954948842525, - "learning_rate": 0.00019999977594630539, - "loss": 46.0, - "step": 8823 - }, - { - "epoch": 0.6746564214308924, - "grad_norm": 0.004649490118026733, - "learning_rate": 0.00019999977589545636, - "loss": 46.0, - "step": 8824 - }, - { - "epoch": 0.6747328784142822, - "grad_norm": 0.002104192739352584, - "learning_rate": 0.00019999977584460156, - "loss": 46.0, - "step": 8825 - }, - { - "epoch": 0.6748093353976719, - "grad_norm": 0.0007555439369753003, - "learning_rate": 0.00019999977579374093, - "loss": 46.0, - "step": 8826 - }, - { - "epoch": 0.6748857923810616, - "grad_norm": 0.0009805556619539857, - "learning_rate": 0.0001999997757428746, - "loss": 46.0, - "step": 8827 - }, - { - "epoch": 0.6749622493644514, - "grad_norm": 0.0012776469811797142, - "learning_rate": 0.00019999977569200249, - "loss": 46.0, - "step": 8828 - }, - { - "epoch": 0.675038706347841, - "grad_norm": 0.0030522739980369806, - "learning_rate": 0.00019999977564112456, - "loss": 46.0, - "step": 8829 - }, - { - "epoch": 0.6751151633312308, - "grad_norm": 0.0009938946459442377, - "learning_rate": 0.00019999977559024092, - "loss": 46.0, - "step": 8830 - }, - { - "epoch": 0.6751916203146204, - "grad_norm": 0.0013739357236772776, - "learning_rate": 0.0001999997755393515, - "loss": 46.0, - "step": 8831 - }, - { - "epoch": 0.6752680772980102, - "grad_norm": 0.0009283007238991559, - "learning_rate": 0.0001999997754884563, - "loss": 46.0, - "step": 8832 - }, - { - "epoch": 0.6753445342814, - "grad_norm": 0.0009329275926575065, - "learning_rate": 0.00019999977543755533, - "loss": 46.0, - "step": 8833 - }, - { - "epoch": 0.6754209912647896, - "grad_norm": 0.000396784336771816, - "learning_rate": 0.00019999977538664857, - "loss": 46.0, - "step": 8834 - }, - { - "epoch": 0.6754974482481794, - "grad_norm": 0.0003582837525755167, - "learning_rate": 0.00019999977533573606, - "loss": 46.0, - "step": 8835 - }, - { - "epoch": 0.6755739052315691, - "grad_norm": 0.0007325966143980622, - "learning_rate": 0.00019999977528481776, - "loss": 46.0, - "step": 8836 - }, - { - "epoch": 0.6756503622149588, - "grad_norm": 0.0003932283434551209, - "learning_rate": 0.00019999977523389376, - "loss": 46.0, - "step": 8837 - }, - { - "epoch": 0.6757268191983485, - "grad_norm": 0.0005122474394738674, - "learning_rate": 0.0001999997751829639, - "loss": 46.0, - "step": 8838 - }, - { - "epoch": 0.6758032761817383, - "grad_norm": 0.0011652312241494656, - "learning_rate": 0.00019999977513202833, - "loss": 46.0, - "step": 8839 - }, - { - "epoch": 0.675879733165128, - "grad_norm": 0.0018340147798880935, - "learning_rate": 0.00019999977508108695, - "loss": 46.0, - "step": 8840 - }, - { - "epoch": 0.6759561901485177, - "grad_norm": 0.0009766505099833012, - "learning_rate": 0.00019999977503013986, - "loss": 46.0, - "step": 8841 - }, - { - "epoch": 0.6760326471319074, - "grad_norm": 0.0013186981668695807, - "learning_rate": 0.00019999977497918696, - "loss": 46.0, - "step": 8842 - }, - { - "epoch": 0.6761091041152971, - "grad_norm": 0.0016813150141388178, - "learning_rate": 0.00019999977492822827, - "loss": 46.0, - "step": 8843 - }, - { - "epoch": 0.6761855610986869, - "grad_norm": 0.0020165392197668552, - "learning_rate": 0.00019999977487726386, - "loss": 46.0, - "step": 8844 - }, - { - "epoch": 0.6762620180820765, - "grad_norm": 0.0019567101262509823, - "learning_rate": 0.00019999977482629364, - "loss": 46.0, - "step": 8845 - }, - { - "epoch": 0.6763384750654663, - "grad_norm": 0.004047292750328779, - "learning_rate": 0.00019999977477531768, - "loss": 46.0, - "step": 8846 - }, - { - "epoch": 0.676414932048856, - "grad_norm": 0.0005070011247880757, - "learning_rate": 0.00019999977472433592, - "loss": 46.0, - "step": 8847 - }, - { - "epoch": 0.6764913890322457, - "grad_norm": 0.0006757982773706317, - "learning_rate": 0.0001999997746733484, - "loss": 46.0, - "step": 8848 - }, - { - "epoch": 0.6765678460156355, - "grad_norm": 0.0025839151348918676, - "learning_rate": 0.00019999977462235513, - "loss": 46.0, - "step": 8849 - }, - { - "epoch": 0.6766443029990252, - "grad_norm": 0.004773588385432959, - "learning_rate": 0.00019999977457135608, - "loss": 46.0, - "step": 8850 - }, - { - "epoch": 0.6767207599824149, - "grad_norm": 0.0009126248769462109, - "learning_rate": 0.00019999977452035125, - "loss": 46.0, - "step": 8851 - }, - { - "epoch": 0.6767972169658046, - "grad_norm": 0.0008942832937464118, - "learning_rate": 0.00019999977446934065, - "loss": 46.0, - "step": 8852 - }, - { - "epoch": 0.6768736739491943, - "grad_norm": 0.0007386683719232678, - "learning_rate": 0.0001999997744183243, - "loss": 46.0, - "step": 8853 - }, - { - "epoch": 0.676950130932584, - "grad_norm": 0.0030498644337058067, - "learning_rate": 0.00019999977436730218, - "loss": 46.0, - "step": 8854 - }, - { - "epoch": 0.6770265879159738, - "grad_norm": 0.0010214113863185048, - "learning_rate": 0.0001999997743162743, - "loss": 46.0, - "step": 8855 - }, - { - "epoch": 0.6771030448993635, - "grad_norm": 0.0022191761527210474, - "learning_rate": 0.0001999997742652406, - "loss": 46.0, - "step": 8856 - }, - { - "epoch": 0.6771795018827532, - "grad_norm": 0.0016494735609740019, - "learning_rate": 0.00019999977421420118, - "loss": 46.0, - "step": 8857 - }, - { - "epoch": 0.677255958866143, - "grad_norm": 0.00178253254853189, - "learning_rate": 0.00019999977416315597, - "loss": 46.0, - "step": 8858 - }, - { - "epoch": 0.6773324158495326, - "grad_norm": 0.0026668112259358168, - "learning_rate": 0.000199999774112105, - "loss": 46.0, - "step": 8859 - }, - { - "epoch": 0.6774088728329224, - "grad_norm": 0.001375620486214757, - "learning_rate": 0.00019999977406104824, - "loss": 46.0, - "step": 8860 - }, - { - "epoch": 0.677485329816312, - "grad_norm": 0.0012475703842937946, - "learning_rate": 0.0001999997740099857, - "loss": 46.0, - "step": 8861 - }, - { - "epoch": 0.6775617867997018, - "grad_norm": 0.0014985468005761504, - "learning_rate": 0.00019999977395891746, - "loss": 46.0, - "step": 8862 - }, - { - "epoch": 0.6776382437830916, - "grad_norm": 0.0013834508135914803, - "learning_rate": 0.00019999977390784338, - "loss": 46.0, - "step": 8863 - }, - { - "epoch": 0.6777147007664812, - "grad_norm": 0.000844665861222893, - "learning_rate": 0.00019999977385676355, - "loss": 46.0, - "step": 8864 - }, - { - "epoch": 0.677791157749871, - "grad_norm": 0.0010332379024475813, - "learning_rate": 0.00019999977380567797, - "loss": 46.0, - "step": 8865 - }, - { - "epoch": 0.6778676147332607, - "grad_norm": 0.00038638152182102203, - "learning_rate": 0.0001999997737545866, - "loss": 46.0, - "step": 8866 - }, - { - "epoch": 0.6779440717166504, - "grad_norm": 0.0012070524971932173, - "learning_rate": 0.00019999977370348945, - "loss": 46.0, - "step": 8867 - }, - { - "epoch": 0.6780205287000401, - "grad_norm": 0.0006206673569977283, - "learning_rate": 0.00019999977365238656, - "loss": 46.0, - "step": 8868 - }, - { - "epoch": 0.6780969856834299, - "grad_norm": 0.006029623094946146, - "learning_rate": 0.0001999997736012779, - "loss": 46.0, - "step": 8869 - }, - { - "epoch": 0.6781734426668196, - "grad_norm": 0.00040032059769146144, - "learning_rate": 0.00019999977355016345, - "loss": 46.0, - "step": 8870 - }, - { - "epoch": 0.6782498996502093, - "grad_norm": 0.0024517986457794905, - "learning_rate": 0.00019999977349904327, - "loss": 46.0, - "step": 8871 - }, - { - "epoch": 0.678326356633599, - "grad_norm": 0.0011035199277102947, - "learning_rate": 0.00019999977344791728, - "loss": 46.0, - "step": 8872 - }, - { - "epoch": 0.6784028136169887, - "grad_norm": 0.0012842753203585744, - "learning_rate": 0.00019999977339678552, - "loss": 46.0, - "step": 8873 - }, - { - "epoch": 0.6784792706003785, - "grad_norm": 0.0007621884578838944, - "learning_rate": 0.000199999773345648, - "loss": 46.0, - "step": 8874 - }, - { - "epoch": 0.6785557275837681, - "grad_norm": 0.0011028656736016273, - "learning_rate": 0.0001999997732945047, - "loss": 46.0, - "step": 8875 - }, - { - "epoch": 0.6786321845671579, - "grad_norm": 0.0025248867459595203, - "learning_rate": 0.00019999977324335565, - "loss": 46.0, - "step": 8876 - }, - { - "epoch": 0.6787086415505477, - "grad_norm": 0.002693133894354105, - "learning_rate": 0.00019999977319220085, - "loss": 46.0, - "step": 8877 - }, - { - "epoch": 0.6787850985339373, - "grad_norm": 0.0029521293472498655, - "learning_rate": 0.00019999977314104022, - "loss": 46.0, - "step": 8878 - }, - { - "epoch": 0.6788615555173271, - "grad_norm": 0.005102098919451237, - "learning_rate": 0.00019999977308987385, - "loss": 46.0, - "step": 8879 - }, - { - "epoch": 0.6789380125007168, - "grad_norm": 0.0018173924181610346, - "learning_rate": 0.00019999977303870173, - "loss": 46.0, - "step": 8880 - }, - { - "epoch": 0.6790144694841065, - "grad_norm": 0.0009427020559087396, - "learning_rate": 0.0001999997729875238, - "loss": 46.0, - "step": 8881 - }, - { - "epoch": 0.6790909264674962, - "grad_norm": 0.0032207202166318893, - "learning_rate": 0.00019999977293634017, - "loss": 46.0, - "step": 8882 - }, - { - "epoch": 0.6791673834508859, - "grad_norm": 0.003478718688711524, - "learning_rate": 0.00019999977288515073, - "loss": 46.0, - "step": 8883 - }, - { - "epoch": 0.6792438404342757, - "grad_norm": 0.0006191230495460331, - "learning_rate": 0.0001999997728339555, - "loss": 46.0, - "step": 8884 - }, - { - "epoch": 0.6793202974176654, - "grad_norm": 0.0007372137042693794, - "learning_rate": 0.0001999997727827545, - "loss": 46.0, - "step": 8885 - }, - { - "epoch": 0.6793967544010551, - "grad_norm": 0.0017002734821289778, - "learning_rate": 0.00019999977273154777, - "loss": 46.0, - "step": 8886 - }, - { - "epoch": 0.6794732113844448, - "grad_norm": 0.0017562441062182188, - "learning_rate": 0.00019999977268033524, - "loss": 46.0, - "step": 8887 - }, - { - "epoch": 0.6795496683678346, - "grad_norm": 0.001531782909296453, - "learning_rate": 0.00019999977262911696, - "loss": 46.0, - "step": 8888 - }, - { - "epoch": 0.6796261253512242, - "grad_norm": 0.0022313494700938463, - "learning_rate": 0.0001999997725778929, - "loss": 46.0, - "step": 8889 - }, - { - "epoch": 0.679702582334614, - "grad_norm": 0.00450655585154891, - "learning_rate": 0.00019999977252666308, - "loss": 46.0, - "step": 8890 - }, - { - "epoch": 0.6797790393180037, - "grad_norm": 0.0012120966566726565, - "learning_rate": 0.00019999977247542745, - "loss": 46.0, - "step": 8891 - }, - { - "epoch": 0.6798554963013934, - "grad_norm": 0.0006009733187966049, - "learning_rate": 0.00019999977242418608, - "loss": 46.0, - "step": 8892 - }, - { - "epoch": 0.6799319532847832, - "grad_norm": 0.008062581531703472, - "learning_rate": 0.00019999977237293896, - "loss": 46.0, - "step": 8893 - }, - { - "epoch": 0.6800084102681728, - "grad_norm": 0.0005266519146971405, - "learning_rate": 0.00019999977232168606, - "loss": 46.0, - "step": 8894 - }, - { - "epoch": 0.6800848672515626, - "grad_norm": 0.0008005801355466247, - "learning_rate": 0.0001999997722704274, - "loss": 46.0, - "step": 8895 - }, - { - "epoch": 0.6801613242349523, - "grad_norm": 0.000989048508927226, - "learning_rate": 0.00019999977221916293, - "loss": 46.0, - "step": 8896 - }, - { - "epoch": 0.680237781218342, - "grad_norm": 0.001267099753022194, - "learning_rate": 0.00019999977216789272, - "loss": 46.0, - "step": 8897 - }, - { - "epoch": 0.6803142382017318, - "grad_norm": 0.002835105871781707, - "learning_rate": 0.00019999977211661673, - "loss": 46.0, - "step": 8898 - }, - { - "epoch": 0.6803906951851215, - "grad_norm": 0.0022383895702660084, - "learning_rate": 0.00019999977206533497, - "loss": 46.0, - "step": 8899 - }, - { - "epoch": 0.6804671521685112, - "grad_norm": 0.0006995209841988981, - "learning_rate": 0.00019999977201404744, - "loss": 46.0, - "step": 8900 - }, - { - "epoch": 0.6805436091519009, - "grad_norm": 0.00225619925186038, - "learning_rate": 0.00019999977196275416, - "loss": 46.0, - "step": 8901 - }, - { - "epoch": 0.6806200661352906, - "grad_norm": 0.0011576348915696144, - "learning_rate": 0.0001999997719114551, - "loss": 46.0, - "step": 8902 - }, - { - "epoch": 0.6806965231186803, - "grad_norm": 0.0013492741854861379, - "learning_rate": 0.00019999977186015025, - "loss": 46.0, - "step": 8903 - }, - { - "epoch": 0.6807729801020701, - "grad_norm": 0.005938329268246889, - "learning_rate": 0.00019999977180883965, - "loss": 46.0, - "step": 8904 - }, - { - "epoch": 0.6808494370854598, - "grad_norm": 0.004824361763894558, - "learning_rate": 0.00019999977175752328, - "loss": 46.0, - "step": 8905 - }, - { - "epoch": 0.6809258940688495, - "grad_norm": 0.0023215790279209614, - "learning_rate": 0.00019999977170620113, - "loss": 46.0, - "step": 8906 - }, - { - "epoch": 0.6810023510522393, - "grad_norm": 0.004163461271673441, - "learning_rate": 0.00019999977165487324, - "loss": 46.0, - "step": 8907 - }, - { - "epoch": 0.6810788080356289, - "grad_norm": 0.0015719932271167636, - "learning_rate": 0.00019999977160353955, - "loss": 46.0, - "step": 8908 - }, - { - "epoch": 0.6811552650190187, - "grad_norm": 0.0004061298677697778, - "learning_rate": 0.0001999997715522001, - "loss": 46.0, - "step": 8909 - }, - { - "epoch": 0.6812317220024084, - "grad_norm": 0.0009165135561488569, - "learning_rate": 0.0001999997715008549, - "loss": 46.0, - "step": 8910 - }, - { - "epoch": 0.6813081789857981, - "grad_norm": 0.0004233189392834902, - "learning_rate": 0.0001999997714495039, - "loss": 46.0, - "step": 8911 - }, - { - "epoch": 0.6813846359691879, - "grad_norm": 0.0029324786737561226, - "learning_rate": 0.00019999977139814715, - "loss": 46.0, - "step": 8912 - }, - { - "epoch": 0.6814610929525775, - "grad_norm": 0.0029001713264733553, - "learning_rate": 0.00019999977134678462, - "loss": 46.0, - "step": 8913 - }, - { - "epoch": 0.6815375499359673, - "grad_norm": 0.0070222048088908195, - "learning_rate": 0.00019999977129541634, - "loss": 46.0, - "step": 8914 - }, - { - "epoch": 0.681614006919357, - "grad_norm": 0.003727530362084508, - "learning_rate": 0.00019999977124404226, - "loss": 46.0, - "step": 8915 - }, - { - "epoch": 0.6816904639027467, - "grad_norm": 0.0010502394288778305, - "learning_rate": 0.00019999977119266243, - "loss": 46.0, - "step": 8916 - }, - { - "epoch": 0.6817669208861364, - "grad_norm": 0.0012994263088330626, - "learning_rate": 0.00019999977114127683, - "loss": 46.0, - "step": 8917 - }, - { - "epoch": 0.6818433778695262, - "grad_norm": 0.0008250034297816455, - "learning_rate": 0.00019999977108988546, - "loss": 46.0, - "step": 8918 - }, - { - "epoch": 0.6819198348529159, - "grad_norm": 0.0052488106302917, - "learning_rate": 0.0001999997710384883, - "loss": 46.0, - "step": 8919 - }, - { - "epoch": 0.6819962918363056, - "grad_norm": 0.0008516021771356463, - "learning_rate": 0.00019999977098708537, - "loss": 46.0, - "step": 8920 - }, - { - "epoch": 0.6820727488196954, - "grad_norm": 0.0012623557122424245, - "learning_rate": 0.00019999977093567673, - "loss": 46.0, - "step": 8921 - }, - { - "epoch": 0.682149205803085, - "grad_norm": 0.0024201348423957825, - "learning_rate": 0.00019999977088426226, - "loss": 46.0, - "step": 8922 - }, - { - "epoch": 0.6822256627864748, - "grad_norm": 0.001519442885182798, - "learning_rate": 0.00019999977083284205, - "loss": 46.0, - "step": 8923 - }, - { - "epoch": 0.6823021197698644, - "grad_norm": 0.0008020659442991018, - "learning_rate": 0.00019999977078141606, - "loss": 46.0, - "step": 8924 - }, - { - "epoch": 0.6823785767532542, - "grad_norm": 0.001165450899861753, - "learning_rate": 0.00019999977072998428, - "loss": 46.0, - "step": 8925 - }, - { - "epoch": 0.682455033736644, - "grad_norm": 0.0011818933999165893, - "learning_rate": 0.00019999977067854677, - "loss": 46.0, - "step": 8926 - }, - { - "epoch": 0.6825314907200336, - "grad_norm": 0.0005220667808316648, - "learning_rate": 0.00019999977062710347, - "loss": 46.0, - "step": 8927 - }, - { - "epoch": 0.6826079477034234, - "grad_norm": 0.007757478393614292, - "learning_rate": 0.00019999977057565439, - "loss": 46.0, - "step": 8928 - }, - { - "epoch": 0.6826844046868131, - "grad_norm": 0.0008062376291491091, - "learning_rate": 0.00019999977052419956, - "loss": 46.0, - "step": 8929 - }, - { - "epoch": 0.6827608616702028, - "grad_norm": 0.001988654024899006, - "learning_rate": 0.00019999977047273893, - "loss": 46.0, - "step": 8930 - }, - { - "epoch": 0.6828373186535925, - "grad_norm": 0.006442064419388771, - "learning_rate": 0.00019999977042127256, - "loss": 46.0, - "step": 8931 - }, - { - "epoch": 0.6829137756369822, - "grad_norm": 0.00390633475035429, - "learning_rate": 0.00019999977036980042, - "loss": 46.0, - "step": 8932 - }, - { - "epoch": 0.682990232620372, - "grad_norm": 0.0003620848001446575, - "learning_rate": 0.0001999997703183225, - "loss": 46.0, - "step": 8933 - }, - { - "epoch": 0.6830666896037617, - "grad_norm": 0.0018519113073125482, - "learning_rate": 0.00019999977026683883, - "loss": 46.0, - "step": 8934 - }, - { - "epoch": 0.6831431465871514, - "grad_norm": 0.00046496186405420303, - "learning_rate": 0.00019999977021534937, - "loss": 46.0, - "step": 8935 - }, - { - "epoch": 0.6832196035705411, - "grad_norm": 0.001361010828986764, - "learning_rate": 0.00019999977016385415, - "loss": 46.0, - "step": 8936 - }, - { - "epoch": 0.6832960605539309, - "grad_norm": 0.0026377395261079073, - "learning_rate": 0.00019999977011235317, - "loss": 46.0, - "step": 8937 - }, - { - "epoch": 0.6833725175373205, - "grad_norm": 0.0013580245431512594, - "learning_rate": 0.00019999977006084638, - "loss": 46.0, - "step": 8938 - }, - { - "epoch": 0.6834489745207103, - "grad_norm": 0.0012145794462412596, - "learning_rate": 0.00019999977000933385, - "loss": 46.0, - "step": 8939 - }, - { - "epoch": 0.6835254315041001, - "grad_norm": 0.0017297255108132958, - "learning_rate": 0.00019999976995781554, - "loss": 46.0, - "step": 8940 - }, - { - "epoch": 0.6836018884874897, - "grad_norm": 0.0010634642094373703, - "learning_rate": 0.0001999997699062915, - "loss": 46.0, - "step": 8941 - }, - { - "epoch": 0.6836783454708795, - "grad_norm": 0.0006453762180171907, - "learning_rate": 0.00019999976985476164, - "loss": 46.0, - "step": 8942 - }, - { - "epoch": 0.6837548024542691, - "grad_norm": 0.002811832819133997, - "learning_rate": 0.00019999976980322604, - "loss": 46.0, - "step": 8943 - }, - { - "epoch": 0.6838312594376589, - "grad_norm": 0.0016413466073572636, - "learning_rate": 0.00019999976975168467, - "loss": 46.0, - "step": 8944 - }, - { - "epoch": 0.6839077164210486, - "grad_norm": 0.00123019446618855, - "learning_rate": 0.0001999997697001375, - "loss": 46.0, - "step": 8945 - }, - { - "epoch": 0.6839841734044383, - "grad_norm": 0.0018856578972190619, - "learning_rate": 0.0001999997696485846, - "loss": 46.0, - "step": 8946 - }, - { - "epoch": 0.684060630387828, - "grad_norm": 0.0008849318255670369, - "learning_rate": 0.00019999976959702588, - "loss": 46.0, - "step": 8947 - }, - { - "epoch": 0.6841370873712178, - "grad_norm": 0.0006442017038352787, - "learning_rate": 0.00019999976954546145, - "loss": 46.0, - "step": 8948 - }, - { - "epoch": 0.6842135443546075, - "grad_norm": 0.0009710351005196571, - "learning_rate": 0.00019999976949389123, - "loss": 46.0, - "step": 8949 - }, - { - "epoch": 0.6842900013379972, - "grad_norm": 0.0006140318582765758, - "learning_rate": 0.00019999976944231522, - "loss": 46.0, - "step": 8950 - }, - { - "epoch": 0.684366458321387, - "grad_norm": 0.0018044919706881046, - "learning_rate": 0.00019999976939073346, - "loss": 46.0, - "step": 8951 - }, - { - "epoch": 0.6844429153047766, - "grad_norm": 0.007445411756634712, - "learning_rate": 0.00019999976933914593, - "loss": 46.0, - "step": 8952 - }, - { - "epoch": 0.6845193722881664, - "grad_norm": 0.0022069085389375687, - "learning_rate": 0.00019999976928755263, - "loss": 46.0, - "step": 8953 - }, - { - "epoch": 0.684595829271556, - "grad_norm": 0.0034716997761279345, - "learning_rate": 0.00019999976923595355, - "loss": 46.0, - "step": 8954 - }, - { - "epoch": 0.6846722862549458, - "grad_norm": 0.00046213821042329073, - "learning_rate": 0.0001999997691843487, - "loss": 46.0, - "step": 8955 - }, - { - "epoch": 0.6847487432383356, - "grad_norm": 0.002667208667844534, - "learning_rate": 0.00019999976913273807, - "loss": 46.0, - "step": 8956 - }, - { - "epoch": 0.6848252002217252, - "grad_norm": 0.0014564176090061665, - "learning_rate": 0.00019999976908112173, - "loss": 46.0, - "step": 8957 - }, - { - "epoch": 0.684901657205115, - "grad_norm": 0.001079651527106762, - "learning_rate": 0.00019999976902949955, - "loss": 46.0, - "step": 8958 - }, - { - "epoch": 0.6849781141885047, - "grad_norm": 0.002777957124635577, - "learning_rate": 0.00019999976897787166, - "loss": 46.0, - "step": 8959 - }, - { - "epoch": 0.6850545711718944, - "grad_norm": 0.00052559346659109, - "learning_rate": 0.00019999976892623794, - "loss": 46.0, - "step": 8960 - }, - { - "epoch": 0.6851310281552841, - "grad_norm": 0.0010202423436567187, - "learning_rate": 0.00019999976887459848, - "loss": 46.0, - "step": 8961 - }, - { - "epoch": 0.6852074851386738, - "grad_norm": 0.0005036952206864953, - "learning_rate": 0.00019999976882295327, - "loss": 46.0, - "step": 8962 - }, - { - "epoch": 0.6852839421220636, - "grad_norm": 0.01150734256953001, - "learning_rate": 0.00019999976877130225, - "loss": 46.0, - "step": 8963 - }, - { - "epoch": 0.6853603991054533, - "grad_norm": 0.0013397192815318704, - "learning_rate": 0.0001999997687196455, - "loss": 46.0, - "step": 8964 - }, - { - "epoch": 0.685436856088843, - "grad_norm": 0.0007708031916990876, - "learning_rate": 0.00019999976866798296, - "loss": 46.0, - "step": 8965 - }, - { - "epoch": 0.6855133130722327, - "grad_norm": 0.015683762729167938, - "learning_rate": 0.00019999976861631463, - "loss": 46.0, - "step": 8966 - }, - { - "epoch": 0.6855897700556225, - "grad_norm": 0.005421179346740246, - "learning_rate": 0.00019999976856464055, - "loss": 46.0, - "step": 8967 - }, - { - "epoch": 0.6856662270390121, - "grad_norm": 0.001229059649631381, - "learning_rate": 0.00019999976851296073, - "loss": 46.0, - "step": 8968 - }, - { - "epoch": 0.6857426840224019, - "grad_norm": 0.0017709207022562623, - "learning_rate": 0.0001999997684612751, - "loss": 46.0, - "step": 8969 - }, - { - "epoch": 0.6858191410057917, - "grad_norm": 0.001023811404593289, - "learning_rate": 0.00019999976840958373, - "loss": 46.0, - "step": 8970 - }, - { - "epoch": 0.6858955979891813, - "grad_norm": 0.0012827676255255938, - "learning_rate": 0.00019999976835788656, - "loss": 46.0, - "step": 8971 - }, - { - "epoch": 0.6859720549725711, - "grad_norm": 0.0009215387399308383, - "learning_rate": 0.00019999976830618364, - "loss": 46.0, - "step": 8972 - }, - { - "epoch": 0.6860485119559607, - "grad_norm": 0.0006364753935486078, - "learning_rate": 0.00019999976825447495, - "loss": 46.0, - "step": 8973 - }, - { - "epoch": 0.6861249689393505, - "grad_norm": 0.004577720537781715, - "learning_rate": 0.00019999976820276049, - "loss": 46.0, - "step": 8974 - }, - { - "epoch": 0.6862014259227402, - "grad_norm": 0.003056132700294256, - "learning_rate": 0.00019999976815104025, - "loss": 46.0, - "step": 8975 - }, - { - "epoch": 0.6862778829061299, - "grad_norm": 0.0026472159661352634, - "learning_rate": 0.00019999976809931424, - "loss": 46.0, - "step": 8976 - }, - { - "epoch": 0.6863543398895197, - "grad_norm": 0.0005493867793120444, - "learning_rate": 0.00019999976804758248, - "loss": 46.0, - "step": 8977 - }, - { - "epoch": 0.6864307968729094, - "grad_norm": 0.0009548880043439567, - "learning_rate": 0.00019999976799584495, - "loss": 46.0, - "step": 8978 - }, - { - "epoch": 0.6865072538562991, - "grad_norm": 0.0005695368745364249, - "learning_rate": 0.00019999976794410164, - "loss": 46.0, - "step": 8979 - }, - { - "epoch": 0.6865837108396888, - "grad_norm": 0.0012504211626946926, - "learning_rate": 0.00019999976789235256, - "loss": 46.0, - "step": 8980 - }, - { - "epoch": 0.6866601678230786, - "grad_norm": 0.0008027313160710037, - "learning_rate": 0.0001999997678405977, - "loss": 46.0, - "step": 8981 - }, - { - "epoch": 0.6867366248064682, - "grad_norm": 0.0011888340814039111, - "learning_rate": 0.0001999997677888371, - "loss": 46.0, - "step": 8982 - }, - { - "epoch": 0.686813081789858, - "grad_norm": 0.0024902618024498224, - "learning_rate": 0.00019999976773707072, - "loss": 46.0, - "step": 8983 - }, - { - "epoch": 0.6868895387732477, - "grad_norm": 0.0011751026613637805, - "learning_rate": 0.00019999976768529854, - "loss": 46.0, - "step": 8984 - }, - { - "epoch": 0.6869659957566374, - "grad_norm": 0.00040528629324398935, - "learning_rate": 0.00019999976763352063, - "loss": 46.0, - "step": 8985 - }, - { - "epoch": 0.6870424527400272, - "grad_norm": 0.000988964457064867, - "learning_rate": 0.00019999976758173693, - "loss": 46.0, - "step": 8986 - }, - { - "epoch": 0.6871189097234168, - "grad_norm": 0.017140034586191177, - "learning_rate": 0.00019999976752994747, - "loss": 46.0, - "step": 8987 - }, - { - "epoch": 0.6871953667068066, - "grad_norm": 0.00042539279093034565, - "learning_rate": 0.00019999976747815223, - "loss": 46.0, - "step": 8988 - }, - { - "epoch": 0.6872718236901963, - "grad_norm": 0.0017999152187258005, - "learning_rate": 0.00019999976742635122, - "loss": 46.0, - "step": 8989 - }, - { - "epoch": 0.687348280673586, - "grad_norm": 0.0004646909364964813, - "learning_rate": 0.00019999976737454444, - "loss": 46.0, - "step": 8990 - }, - { - "epoch": 0.6874247376569758, - "grad_norm": 0.0021967217326164246, - "learning_rate": 0.00019999976732273188, - "loss": 46.0, - "step": 8991 - }, - { - "epoch": 0.6875011946403654, - "grad_norm": 0.0008120503625832498, - "learning_rate": 0.0001999997672709136, - "loss": 46.0, - "step": 8992 - }, - { - "epoch": 0.6875776516237552, - "grad_norm": 0.00040943597559817135, - "learning_rate": 0.0001999997672190895, - "loss": 46.0, - "step": 8993 - }, - { - "epoch": 0.6876541086071449, - "grad_norm": 0.001688003190793097, - "learning_rate": 0.00019999976716725964, - "loss": 46.0, - "step": 8994 - }, - { - "epoch": 0.6877305655905346, - "grad_norm": 0.000609360751695931, - "learning_rate": 0.00019999976711542402, - "loss": 46.0, - "step": 8995 - }, - { - "epoch": 0.6878070225739243, - "grad_norm": 0.0012643372174352407, - "learning_rate": 0.00019999976706358265, - "loss": 46.0, - "step": 8996 - }, - { - "epoch": 0.6878834795573141, - "grad_norm": 0.002649756846949458, - "learning_rate": 0.00019999976701173548, - "loss": 46.0, - "step": 8997 - }, - { - "epoch": 0.6879599365407038, - "grad_norm": 0.0009037270210683346, - "learning_rate": 0.00019999976695988256, - "loss": 46.0, - "step": 8998 - }, - { - "epoch": 0.6880363935240935, - "grad_norm": 0.001698114792816341, - "learning_rate": 0.00019999976690802384, - "loss": 46.0, - "step": 8999 - }, - { - "epoch": 0.6881128505074833, - "grad_norm": 0.0005454815691336989, - "learning_rate": 0.0001999997668561594, - "loss": 46.0, - "step": 9000 - }, - { - "epoch": 0.6881893074908729, - "grad_norm": 0.0011061440454795957, - "learning_rate": 0.00019999976680428917, - "loss": 46.0, - "step": 9001 - }, - { - "epoch": 0.6882657644742627, - "grad_norm": 0.0007159507949836552, - "learning_rate": 0.00019999976675241316, - "loss": 46.0, - "step": 9002 - }, - { - "epoch": 0.6883422214576523, - "grad_norm": 0.004143567755818367, - "learning_rate": 0.00019999976670053137, - "loss": 46.0, - "step": 9003 - }, - { - "epoch": 0.6884186784410421, - "grad_norm": 0.0030515280086547136, - "learning_rate": 0.00019999976664864381, - "loss": 46.0, - "step": 9004 - }, - { - "epoch": 0.6884951354244319, - "grad_norm": 0.0023069658782333136, - "learning_rate": 0.00019999976659675054, - "loss": 46.0, - "step": 9005 - }, - { - "epoch": 0.6885715924078215, - "grad_norm": 0.002587481401860714, - "learning_rate": 0.00019999976654485143, - "loss": 46.0, - "step": 9006 - }, - { - "epoch": 0.6886480493912113, - "grad_norm": 0.0004480658390093595, - "learning_rate": 0.00019999976649294658, - "loss": 46.0, - "step": 9007 - }, - { - "epoch": 0.688724506374601, - "grad_norm": 0.006968831643462181, - "learning_rate": 0.00019999976644103596, - "loss": 46.0, - "step": 9008 - }, - { - "epoch": 0.6888009633579907, - "grad_norm": 0.00040929080569185317, - "learning_rate": 0.00019999976638911956, - "loss": 46.0, - "step": 9009 - }, - { - "epoch": 0.6888774203413804, - "grad_norm": 0.0018370988545939326, - "learning_rate": 0.0001999997663371974, - "loss": 46.0, - "step": 9010 - }, - { - "epoch": 0.6889538773247702, - "grad_norm": 0.0027835245709866285, - "learning_rate": 0.00019999976628526947, - "loss": 46.0, - "step": 9011 - }, - { - "epoch": 0.6890303343081599, - "grad_norm": 0.00038406450767070055, - "learning_rate": 0.00019999976623333575, - "loss": 46.0, - "step": 9012 - }, - { - "epoch": 0.6891067912915496, - "grad_norm": 0.0006710010347887874, - "learning_rate": 0.0001999997661813963, - "loss": 46.0, - "step": 9013 - }, - { - "epoch": 0.6891832482749393, - "grad_norm": 0.003953936044126749, - "learning_rate": 0.00019999976612945102, - "loss": 46.0, - "step": 9014 - }, - { - "epoch": 0.689259705258329, - "grad_norm": 0.001941786496900022, - "learning_rate": 0.00019999976607750004, - "loss": 46.0, - "step": 9015 - }, - { - "epoch": 0.6893361622417188, - "grad_norm": 0.0009593384456820786, - "learning_rate": 0.00019999976602554326, - "loss": 46.0, - "step": 9016 - }, - { - "epoch": 0.6894126192251084, - "grad_norm": 0.0019117763731628656, - "learning_rate": 0.0001999997659735807, - "loss": 46.0, - "step": 9017 - }, - { - "epoch": 0.6894890762084982, - "grad_norm": 0.009926487691700459, - "learning_rate": 0.0001999997659216124, - "loss": 46.0, - "step": 9018 - }, - { - "epoch": 0.689565533191888, - "grad_norm": 0.0008937942329794168, - "learning_rate": 0.00019999976586963832, - "loss": 46.0, - "step": 9019 - }, - { - "epoch": 0.6896419901752776, - "grad_norm": 0.0005348284030333161, - "learning_rate": 0.00019999976581765844, - "loss": 46.0, - "step": 9020 - }, - { - "epoch": 0.6897184471586674, - "grad_norm": 0.0007350753294304013, - "learning_rate": 0.00019999976576567284, - "loss": 46.0, - "step": 9021 - }, - { - "epoch": 0.6897949041420571, - "grad_norm": 0.0008200214942917228, - "learning_rate": 0.00019999976571368145, - "loss": 46.0, - "step": 9022 - }, - { - "epoch": 0.6898713611254468, - "grad_norm": 0.0008654919802211225, - "learning_rate": 0.00019999976566168428, - "loss": 46.0, - "step": 9023 - }, - { - "epoch": 0.6899478181088365, - "grad_norm": 0.0009617213509045541, - "learning_rate": 0.00019999976560968133, - "loss": 46.0, - "step": 9024 - }, - { - "epoch": 0.6900242750922262, - "grad_norm": 0.002543957205489278, - "learning_rate": 0.00019999976555767264, - "loss": 46.0, - "step": 9025 - }, - { - "epoch": 0.690100732075616, - "grad_norm": 0.000933670555241406, - "learning_rate": 0.00019999976550565815, - "loss": 46.0, - "step": 9026 - }, - { - "epoch": 0.6901771890590057, - "grad_norm": 0.0017495944630354643, - "learning_rate": 0.0001999997654536379, - "loss": 46.0, - "step": 9027 - }, - { - "epoch": 0.6902536460423954, - "grad_norm": 0.00033723912201821804, - "learning_rate": 0.0001999997654016119, - "loss": 46.0, - "step": 9028 - }, - { - "epoch": 0.6903301030257851, - "grad_norm": 0.0016255220398306847, - "learning_rate": 0.00019999976534958012, - "loss": 46.0, - "step": 9029 - }, - { - "epoch": 0.6904065600091749, - "grad_norm": 0.0008925366564653814, - "learning_rate": 0.00019999976529754256, - "loss": 46.0, - "step": 9030 - }, - { - "epoch": 0.6904830169925645, - "grad_norm": 0.0007710131467320025, - "learning_rate": 0.00019999976524549923, - "loss": 46.0, - "step": 9031 - }, - { - "epoch": 0.6905594739759543, - "grad_norm": 0.0025350386276841164, - "learning_rate": 0.00019999976519345015, - "loss": 46.0, - "step": 9032 - }, - { - "epoch": 0.690635930959344, - "grad_norm": 0.007017096038907766, - "learning_rate": 0.0001999997651413953, - "loss": 46.0, - "step": 9033 - }, - { - "epoch": 0.6907123879427337, - "grad_norm": 0.0012645142851397395, - "learning_rate": 0.00019999976508933465, - "loss": 46.0, - "step": 9034 - }, - { - "epoch": 0.6907888449261235, - "grad_norm": 0.0017301932675763965, - "learning_rate": 0.00019999976503726828, - "loss": 46.0, - "step": 9035 - }, - { - "epoch": 0.6908653019095131, - "grad_norm": 0.001873756991699338, - "learning_rate": 0.00019999976498519611, - "loss": 46.0, - "step": 9036 - }, - { - "epoch": 0.6909417588929029, - "grad_norm": 0.0015052204253152013, - "learning_rate": 0.00019999976493311817, - "loss": 46.0, - "step": 9037 - }, - { - "epoch": 0.6910182158762926, - "grad_norm": 0.0005426730494946241, - "learning_rate": 0.00019999976488103448, - "loss": 46.0, - "step": 9038 - }, - { - "epoch": 0.6910946728596823, - "grad_norm": 0.0009391424828208983, - "learning_rate": 0.000199999764828945, - "loss": 46.0, - "step": 9039 - }, - { - "epoch": 0.691171129843072, - "grad_norm": 0.0008600588771514595, - "learning_rate": 0.00019999976477684975, - "loss": 46.0, - "step": 9040 - }, - { - "epoch": 0.6912475868264618, - "grad_norm": 0.005597707349807024, - "learning_rate": 0.00019999976472474872, - "loss": 46.0, - "step": 9041 - }, - { - "epoch": 0.6913240438098515, - "grad_norm": 0.0007188781164586544, - "learning_rate": 0.00019999976467264193, - "loss": 46.0, - "step": 9042 - }, - { - "epoch": 0.6914005007932412, - "grad_norm": 0.0011846552370116115, - "learning_rate": 0.00019999976462052938, - "loss": 46.0, - "step": 9043 - }, - { - "epoch": 0.6914769577766309, - "grad_norm": 0.0013120861258357763, - "learning_rate": 0.00019999976456841107, - "loss": 46.0, - "step": 9044 - }, - { - "epoch": 0.6915534147600206, - "grad_norm": 0.0013271295465528965, - "learning_rate": 0.00019999976451628697, - "loss": 46.0, - "step": 9045 - }, - { - "epoch": 0.6916298717434104, - "grad_norm": 0.002557573840022087, - "learning_rate": 0.0001999997644641571, - "loss": 46.0, - "step": 9046 - }, - { - "epoch": 0.6917063287268, - "grad_norm": 0.0007955206092447042, - "learning_rate": 0.00019999976441202147, - "loss": 46.0, - "step": 9047 - }, - { - "epoch": 0.6917827857101898, - "grad_norm": 0.008305965922772884, - "learning_rate": 0.00019999976435988007, - "loss": 46.0, - "step": 9048 - }, - { - "epoch": 0.6918592426935796, - "grad_norm": 0.0017414254834875464, - "learning_rate": 0.0001999997643077329, - "loss": 46.0, - "step": 9049 - }, - { - "epoch": 0.6919356996769692, - "grad_norm": 0.0029711704701185226, - "learning_rate": 0.00019999976425557996, - "loss": 46.0, - "step": 9050 - }, - { - "epoch": 0.692012156660359, - "grad_norm": 0.0006545691285282373, - "learning_rate": 0.00019999976420342127, - "loss": 46.0, - "step": 9051 - }, - { - "epoch": 0.6920886136437487, - "grad_norm": 0.0005934067303314805, - "learning_rate": 0.00019999976415125675, - "loss": 46.0, - "step": 9052 - }, - { - "epoch": 0.6921650706271384, - "grad_norm": 0.0013071164721623063, - "learning_rate": 0.00019999976409908654, - "loss": 46.0, - "step": 9053 - }, - { - "epoch": 0.6922415276105282, - "grad_norm": 0.0006706046406179667, - "learning_rate": 0.0001999997640469105, - "loss": 46.0, - "step": 9054 - }, - { - "epoch": 0.6923179845939178, - "grad_norm": 0.005196481943130493, - "learning_rate": 0.00019999976399472872, - "loss": 46.0, - "step": 9055 - }, - { - "epoch": 0.6923944415773076, - "grad_norm": 0.0010553019819781184, - "learning_rate": 0.00019999976394254116, - "loss": 46.0, - "step": 9056 - }, - { - "epoch": 0.6924708985606973, - "grad_norm": 0.0028756125830113888, - "learning_rate": 0.00019999976389034783, - "loss": 46.0, - "step": 9057 - }, - { - "epoch": 0.692547355544087, - "grad_norm": 0.004338743630796671, - "learning_rate": 0.00019999976383814873, - "loss": 46.0, - "step": 9058 - }, - { - "epoch": 0.6926238125274767, - "grad_norm": 0.0021503502503037453, - "learning_rate": 0.00019999976378594388, - "loss": 46.0, - "step": 9059 - }, - { - "epoch": 0.6927002695108665, - "grad_norm": 0.00505692046135664, - "learning_rate": 0.00019999976373373323, - "loss": 46.0, - "step": 9060 - }, - { - "epoch": 0.6927767264942561, - "grad_norm": 0.0008375363540835679, - "learning_rate": 0.00019999976368151684, - "loss": 46.0, - "step": 9061 - }, - { - "epoch": 0.6928531834776459, - "grad_norm": 0.000604130094870925, - "learning_rate": 0.00019999976362929467, - "loss": 46.0, - "step": 9062 - }, - { - "epoch": 0.6929296404610356, - "grad_norm": 0.00730582932010293, - "learning_rate": 0.00019999976357706672, - "loss": 46.0, - "step": 9063 - }, - { - "epoch": 0.6930060974444253, - "grad_norm": 0.0010035594459623098, - "learning_rate": 0.000199999763524833, - "loss": 46.0, - "step": 9064 - }, - { - "epoch": 0.6930825544278151, - "grad_norm": 0.0008801851072348654, - "learning_rate": 0.00019999976347259352, - "loss": 46.0, - "step": 9065 - }, - { - "epoch": 0.6931590114112047, - "grad_norm": 0.000766176322940737, - "learning_rate": 0.00019999976342034828, - "loss": 46.0, - "step": 9066 - }, - { - "epoch": 0.6932354683945945, - "grad_norm": 0.0018666146788746119, - "learning_rate": 0.00019999976336809727, - "loss": 46.0, - "step": 9067 - }, - { - "epoch": 0.6933119253779843, - "grad_norm": 0.002000202191993594, - "learning_rate": 0.00019999976331584046, - "loss": 46.0, - "step": 9068 - }, - { - "epoch": 0.6933883823613739, - "grad_norm": 0.007076604757457972, - "learning_rate": 0.0001999997632635779, - "loss": 46.0, - "step": 9069 - }, - { - "epoch": 0.6934648393447637, - "grad_norm": 0.0018796473741531372, - "learning_rate": 0.00019999976321130957, - "loss": 46.0, - "step": 9070 - }, - { - "epoch": 0.6935412963281534, - "grad_norm": 0.0034080531913787127, - "learning_rate": 0.00019999976315903547, - "loss": 46.0, - "step": 9071 - }, - { - "epoch": 0.6936177533115431, - "grad_norm": 0.0007652313215658069, - "learning_rate": 0.00019999976310675562, - "loss": 46.0, - "step": 9072 - }, - { - "epoch": 0.6936942102949328, - "grad_norm": 0.026359155774116516, - "learning_rate": 0.00019999976305446997, - "loss": 46.0, - "step": 9073 - }, - { - "epoch": 0.6937706672783225, - "grad_norm": 0.0022392745595425367, - "learning_rate": 0.00019999976300217858, - "loss": 46.0, - "step": 9074 - }, - { - "epoch": 0.6938471242617122, - "grad_norm": 0.0008756750612519681, - "learning_rate": 0.0001999997629498814, - "loss": 46.0, - "step": 9075 - }, - { - "epoch": 0.693923581245102, - "grad_norm": 0.0017607525223866105, - "learning_rate": 0.00019999976289757844, - "loss": 46.0, - "step": 9076 - }, - { - "epoch": 0.6940000382284917, - "grad_norm": 0.0018702505622059107, - "learning_rate": 0.00019999976284526972, - "loss": 46.0, - "step": 9077 - }, - { - "epoch": 0.6940764952118814, - "grad_norm": 0.0010729047935456038, - "learning_rate": 0.00019999976279295526, - "loss": 46.0, - "step": 9078 - }, - { - "epoch": 0.6941529521952712, - "grad_norm": 0.0004929465940222144, - "learning_rate": 0.00019999976274063502, - "loss": 46.0, - "step": 9079 - }, - { - "epoch": 0.6942294091786608, - "grad_norm": 0.020234225317835808, - "learning_rate": 0.000199999762688309, - "loss": 46.0, - "step": 9080 - }, - { - "epoch": 0.6943058661620506, - "grad_norm": 0.000776316097471863, - "learning_rate": 0.00019999976263597718, - "loss": 46.0, - "step": 9081 - }, - { - "epoch": 0.6943823231454403, - "grad_norm": 0.0022005108185112476, - "learning_rate": 0.00019999976258363962, - "loss": 46.0, - "step": 9082 - }, - { - "epoch": 0.69445878012883, - "grad_norm": 0.001105863368138671, - "learning_rate": 0.0001999997625312963, - "loss": 46.0, - "step": 9083 - }, - { - "epoch": 0.6945352371122198, - "grad_norm": 0.005786404013633728, - "learning_rate": 0.0001999997624789472, - "loss": 46.0, - "step": 9084 - }, - { - "epoch": 0.6946116940956094, - "grad_norm": 0.0029700950253754854, - "learning_rate": 0.0001999997624265923, - "loss": 46.0, - "step": 9085 - }, - { - "epoch": 0.6946881510789992, - "grad_norm": 0.0004369357484392822, - "learning_rate": 0.0001999997623742317, - "loss": 46.0, - "step": 9086 - }, - { - "epoch": 0.6947646080623889, - "grad_norm": 0.0018057163106277585, - "learning_rate": 0.00019999976232186527, - "loss": 46.0, - "step": 9087 - }, - { - "epoch": 0.6948410650457786, - "grad_norm": 0.001960674999281764, - "learning_rate": 0.00019999976226949307, - "loss": 46.0, - "step": 9088 - }, - { - "epoch": 0.6949175220291683, - "grad_norm": 0.004416829440742731, - "learning_rate": 0.00019999976221711513, - "loss": 46.0, - "step": 9089 - }, - { - "epoch": 0.6949939790125581, - "grad_norm": 0.0008817437337711453, - "learning_rate": 0.0001999997621647314, - "loss": 46.0, - "step": 9090 - }, - { - "epoch": 0.6950704359959478, - "grad_norm": 0.002371092326939106, - "learning_rate": 0.00019999976211234195, - "loss": 46.0, - "step": 9091 - }, - { - "epoch": 0.6951468929793375, - "grad_norm": 0.0018999106250703335, - "learning_rate": 0.0001999997620599467, - "loss": 46.0, - "step": 9092 - }, - { - "epoch": 0.6952233499627272, - "grad_norm": 0.0011110103223472834, - "learning_rate": 0.00019999976200754565, - "loss": 46.0, - "step": 9093 - }, - { - "epoch": 0.6952998069461169, - "grad_norm": 0.0004885838716290891, - "learning_rate": 0.00019999976195513884, - "loss": 46.0, - "step": 9094 - }, - { - "epoch": 0.6953762639295067, - "grad_norm": 0.003622288117185235, - "learning_rate": 0.0001999997619027263, - "loss": 46.0, - "step": 9095 - }, - { - "epoch": 0.6954527209128963, - "grad_norm": 0.0008509297040291131, - "learning_rate": 0.00019999976185030796, - "loss": 46.0, - "step": 9096 - }, - { - "epoch": 0.6955291778962861, - "grad_norm": 0.0036821668036282063, - "learning_rate": 0.00019999976179788386, - "loss": 46.0, - "step": 9097 - }, - { - "epoch": 0.6956056348796759, - "grad_norm": 0.0013152624014765024, - "learning_rate": 0.00019999976174545398, - "loss": 46.0, - "step": 9098 - }, - { - "epoch": 0.6956820918630655, - "grad_norm": 0.0007734769606031477, - "learning_rate": 0.00019999976169301836, - "loss": 46.0, - "step": 9099 - }, - { - "epoch": 0.6957585488464553, - "grad_norm": 0.0007958488422445953, - "learning_rate": 0.0001999997616405769, - "loss": 46.0, - "step": 9100 - }, - { - "epoch": 0.695835005829845, - "grad_norm": 0.0016632117331027985, - "learning_rate": 0.00019999976158812974, - "loss": 46.0, - "step": 9101 - }, - { - "epoch": 0.6959114628132347, - "grad_norm": 0.0016646585427224636, - "learning_rate": 0.0001999997615356768, - "loss": 46.0, - "step": 9102 - }, - { - "epoch": 0.6959879197966244, - "grad_norm": 0.000806248455774039, - "learning_rate": 0.00019999976148321808, - "loss": 46.0, - "step": 9103 - }, - { - "epoch": 0.6960643767800141, - "grad_norm": 0.0009553435957059264, - "learning_rate": 0.0001999997614307536, - "loss": 46.0, - "step": 9104 - }, - { - "epoch": 0.6961408337634039, - "grad_norm": 0.0034235992934554815, - "learning_rate": 0.0001999997613782833, - "loss": 46.0, - "step": 9105 - }, - { - "epoch": 0.6962172907467936, - "grad_norm": 0.0015158647438511252, - "learning_rate": 0.0001999997613258073, - "loss": 46.0, - "step": 9106 - }, - { - "epoch": 0.6962937477301833, - "grad_norm": 0.002113425638526678, - "learning_rate": 0.00019999976127332549, - "loss": 46.0, - "step": 9107 - }, - { - "epoch": 0.696370204713573, - "grad_norm": 0.003075334010645747, - "learning_rate": 0.00019999976122083793, - "loss": 46.0, - "step": 9108 - }, - { - "epoch": 0.6964466616969628, - "grad_norm": 0.0024250419810414314, - "learning_rate": 0.00019999976116834458, - "loss": 46.0, - "step": 9109 - }, - { - "epoch": 0.6965231186803524, - "grad_norm": 0.001397660467773676, - "learning_rate": 0.00019999976111584547, - "loss": 46.0, - "step": 9110 - }, - { - "epoch": 0.6965995756637422, - "grad_norm": 0.002280839253216982, - "learning_rate": 0.00019999976106334062, - "loss": 46.0, - "step": 9111 - }, - { - "epoch": 0.696676032647132, - "grad_norm": 0.0025200205855071545, - "learning_rate": 0.00019999976101082995, - "loss": 46.0, - "step": 9112 - }, - { - "epoch": 0.6967524896305216, - "grad_norm": 0.003927274141460657, - "learning_rate": 0.00019999976095831355, - "loss": 46.0, - "step": 9113 - }, - { - "epoch": 0.6968289466139114, - "grad_norm": 0.001499826554208994, - "learning_rate": 0.00019999976090579136, - "loss": 46.0, - "step": 9114 - }, - { - "epoch": 0.696905403597301, - "grad_norm": 0.005215655546635389, - "learning_rate": 0.00019999976085326342, - "loss": 46.0, - "step": 9115 - }, - { - "epoch": 0.6969818605806908, - "grad_norm": 0.0010160794481635094, - "learning_rate": 0.0001999997608007297, - "loss": 46.0, - "step": 9116 - }, - { - "epoch": 0.6970583175640805, - "grad_norm": 0.00043587910477072, - "learning_rate": 0.0001999997607481902, - "loss": 46.0, - "step": 9117 - }, - { - "epoch": 0.6971347745474702, - "grad_norm": 0.021117571741342545, - "learning_rate": 0.00019999976069564492, - "loss": 46.0, - "step": 9118 - }, - { - "epoch": 0.69721123153086, - "grad_norm": 0.0010300461435690522, - "learning_rate": 0.0001999997606430939, - "loss": 46.0, - "step": 9119 - }, - { - "epoch": 0.6972876885142497, - "grad_norm": 0.0002722138015087694, - "learning_rate": 0.0001999997605905371, - "loss": 46.0, - "step": 9120 - }, - { - "epoch": 0.6973641454976394, - "grad_norm": 0.0016803985927253962, - "learning_rate": 0.00019999976053797452, - "loss": 46.0, - "step": 9121 - }, - { - "epoch": 0.6974406024810291, - "grad_norm": 0.0006275622290559113, - "learning_rate": 0.0001999997604854062, - "loss": 46.0, - "step": 9122 - }, - { - "epoch": 0.6975170594644188, - "grad_norm": 0.002100080018863082, - "learning_rate": 0.0001999997604328321, - "loss": 46.0, - "step": 9123 - }, - { - "epoch": 0.6975935164478085, - "grad_norm": 0.002103107515722513, - "learning_rate": 0.00019999976038025222, - "loss": 46.0, - "step": 9124 - }, - { - "epoch": 0.6976699734311983, - "grad_norm": 0.0014708503149449825, - "learning_rate": 0.00019999976032766657, - "loss": 46.0, - "step": 9125 - }, - { - "epoch": 0.697746430414588, - "grad_norm": 0.015570555813610554, - "learning_rate": 0.00019999976027507515, - "loss": 46.0, - "step": 9126 - }, - { - "epoch": 0.6978228873979777, - "grad_norm": 0.0009374309447593987, - "learning_rate": 0.00019999976022247795, - "loss": 46.0, - "step": 9127 - }, - { - "epoch": 0.6978993443813675, - "grad_norm": 0.001767691457644105, - "learning_rate": 0.000199999760169875, - "loss": 46.0, - "step": 9128 - }, - { - "epoch": 0.6979758013647571, - "grad_norm": 0.00047994355554692447, - "learning_rate": 0.0001999997601172663, - "loss": 46.0, - "step": 9129 - }, - { - "epoch": 0.6980522583481469, - "grad_norm": 0.0019308087648823857, - "learning_rate": 0.00019999976006465178, - "loss": 46.0, - "step": 9130 - }, - { - "epoch": 0.6981287153315366, - "grad_norm": 0.0012461959850043058, - "learning_rate": 0.00019999976001203152, - "loss": 46.0, - "step": 9131 - }, - { - "epoch": 0.6982051723149263, - "grad_norm": 0.0004380648897495121, - "learning_rate": 0.00019999975995940548, - "loss": 46.0, - "step": 9132 - }, - { - "epoch": 0.698281629298316, - "grad_norm": 0.0007305003819055855, - "learning_rate": 0.00019999975990677368, - "loss": 46.0, - "step": 9133 - }, - { - "epoch": 0.6983580862817057, - "grad_norm": 0.0006421058787964284, - "learning_rate": 0.0001999997598541361, - "loss": 46.0, - "step": 9134 - }, - { - "epoch": 0.6984345432650955, - "grad_norm": 0.0012774879578500986, - "learning_rate": 0.00019999975980149274, - "loss": 46.0, - "step": 9135 - }, - { - "epoch": 0.6985110002484852, - "grad_norm": 0.008576181717216969, - "learning_rate": 0.00019999975974884367, - "loss": 46.0, - "step": 9136 - }, - { - "epoch": 0.6985874572318749, - "grad_norm": 0.0006904984475113451, - "learning_rate": 0.00019999975969618876, - "loss": 46.0, - "step": 9137 - }, - { - "epoch": 0.6986639142152646, - "grad_norm": 0.0010577266803011298, - "learning_rate": 0.00019999975964352811, - "loss": 46.0, - "step": 9138 - }, - { - "epoch": 0.6987403711986544, - "grad_norm": 0.0019243519054725766, - "learning_rate": 0.0001999997595908617, - "loss": 46.0, - "step": 9139 - }, - { - "epoch": 0.698816828182044, - "grad_norm": 0.001724465168081224, - "learning_rate": 0.0001999997595381895, - "loss": 46.0, - "step": 9140 - }, - { - "epoch": 0.6988932851654338, - "grad_norm": 0.0019569003488868475, - "learning_rate": 0.00019999975948551156, - "loss": 46.0, - "step": 9141 - }, - { - "epoch": 0.6989697421488236, - "grad_norm": 0.0033014847431331873, - "learning_rate": 0.00019999975943282782, - "loss": 46.0, - "step": 9142 - }, - { - "epoch": 0.6990461991322132, - "grad_norm": 0.0007960941875353456, - "learning_rate": 0.00019999975938013833, - "loss": 46.0, - "step": 9143 - }, - { - "epoch": 0.699122656115603, - "grad_norm": 0.009571060538291931, - "learning_rate": 0.00019999975932744307, - "loss": 46.0, - "step": 9144 - }, - { - "epoch": 0.6991991130989926, - "grad_norm": 0.0039282264187932014, - "learning_rate": 0.000199999759274742, - "loss": 46.0, - "step": 9145 - }, - { - "epoch": 0.6992755700823824, - "grad_norm": 0.0022329073399305344, - "learning_rate": 0.0001999997592220352, - "loss": 46.0, - "step": 9146 - }, - { - "epoch": 0.6993520270657722, - "grad_norm": 0.001143449218943715, - "learning_rate": 0.00019999975916932264, - "loss": 46.0, - "step": 9147 - }, - { - "epoch": 0.6994284840491618, - "grad_norm": 0.0013885560911148787, - "learning_rate": 0.0001999997591166043, - "loss": 46.0, - "step": 9148 - }, - { - "epoch": 0.6995049410325516, - "grad_norm": 0.004010376520454884, - "learning_rate": 0.00019999975906388019, - "loss": 46.0, - "step": 9149 - }, - { - "epoch": 0.6995813980159413, - "grad_norm": 0.013738351874053478, - "learning_rate": 0.00019999975901115028, - "loss": 46.0, - "step": 9150 - }, - { - "epoch": 0.699657854999331, - "grad_norm": 0.002059882739558816, - "learning_rate": 0.00019999975895841466, - "loss": 46.0, - "step": 9151 - }, - { - "epoch": 0.6997343119827207, - "grad_norm": 0.000560998625587672, - "learning_rate": 0.00019999975890567322, - "loss": 46.0, - "step": 9152 - }, - { - "epoch": 0.6998107689661105, - "grad_norm": 0.0019092325819656253, - "learning_rate": 0.00019999975885292602, - "loss": 46.0, - "step": 9153 - }, - { - "epoch": 0.6998872259495001, - "grad_norm": 0.00193807203322649, - "learning_rate": 0.00019999975880017305, - "loss": 46.0, - "step": 9154 - }, - { - "epoch": 0.6999636829328899, - "grad_norm": 0.0005407581920735538, - "learning_rate": 0.0001999997587474143, - "loss": 46.0, - "step": 9155 - }, - { - "epoch": 0.7000401399162796, - "grad_norm": 0.0022905785590410233, - "learning_rate": 0.00019999975869464982, - "loss": 46.0, - "step": 9156 - }, - { - "epoch": 0.7001165968996693, - "grad_norm": 0.0008564593154005706, - "learning_rate": 0.00019999975864187956, - "loss": 46.0, - "step": 9157 - }, - { - "epoch": 0.7001930538830591, - "grad_norm": 0.00206946418620646, - "learning_rate": 0.00019999975858910353, - "loss": 46.0, - "step": 9158 - }, - { - "epoch": 0.7002695108664487, - "grad_norm": 0.0030055183451622725, - "learning_rate": 0.00019999975853632172, - "loss": 46.0, - "step": 9159 - }, - { - "epoch": 0.7003459678498385, - "grad_norm": 0.00043971664854325354, - "learning_rate": 0.00019999975848353414, - "loss": 46.0, - "step": 9160 - }, - { - "epoch": 0.7004224248332283, - "grad_norm": 0.0006825425662100315, - "learning_rate": 0.00019999975843074078, - "loss": 46.0, - "step": 9161 - }, - { - "epoch": 0.7004988818166179, - "grad_norm": 0.001231030561029911, - "learning_rate": 0.00019999975837794168, - "loss": 46.0, - "step": 9162 - }, - { - "epoch": 0.7005753388000077, - "grad_norm": 0.0006522739422507584, - "learning_rate": 0.00019999975832513678, - "loss": 46.0, - "step": 9163 - }, - { - "epoch": 0.7006517957833973, - "grad_norm": 0.0005894287023693323, - "learning_rate": 0.00019999975827232613, - "loss": 46.0, - "step": 9164 - }, - { - "epoch": 0.7007282527667871, - "grad_norm": 0.0002801214868668467, - "learning_rate": 0.00019999975821950971, - "loss": 46.0, - "step": 9165 - }, - { - "epoch": 0.7008047097501768, - "grad_norm": 0.0009813535725697875, - "learning_rate": 0.00019999975816668752, - "loss": 46.0, - "step": 9166 - }, - { - "epoch": 0.7008811667335665, - "grad_norm": 0.002128161024302244, - "learning_rate": 0.00019999975811385952, - "loss": 46.0, - "step": 9167 - }, - { - "epoch": 0.7009576237169562, - "grad_norm": 0.005774526856839657, - "learning_rate": 0.0001999997580610258, - "loss": 46.0, - "step": 9168 - }, - { - "epoch": 0.701034080700346, - "grad_norm": 0.00047978476504795253, - "learning_rate": 0.00019999975800818632, - "loss": 46.0, - "step": 9169 - }, - { - "epoch": 0.7011105376837357, - "grad_norm": 0.0010162268299609423, - "learning_rate": 0.00019999975795534104, - "loss": 46.0, - "step": 9170 - }, - { - "epoch": 0.7011869946671254, - "grad_norm": 0.0006295115454122424, - "learning_rate": 0.00019999975790249, - "loss": 46.0, - "step": 9171 - }, - { - "epoch": 0.7012634516505152, - "grad_norm": 0.004258931614458561, - "learning_rate": 0.0001999997578496332, - "loss": 46.0, - "step": 9172 - }, - { - "epoch": 0.7013399086339048, - "grad_norm": 0.0014116342645138502, - "learning_rate": 0.00019999975779677059, - "loss": 46.0, - "step": 9173 - }, - { - "epoch": 0.7014163656172946, - "grad_norm": 0.0009487050119787455, - "learning_rate": 0.00019999975774390226, - "loss": 46.0, - "step": 9174 - }, - { - "epoch": 0.7014928226006842, - "grad_norm": 0.0024084998294711113, - "learning_rate": 0.00019999975769102813, - "loss": 46.0, - "step": 9175 - }, - { - "epoch": 0.701569279584074, - "grad_norm": 0.006702831480652094, - "learning_rate": 0.00019999975763814823, - "loss": 46.0, - "step": 9176 - }, - { - "epoch": 0.7016457365674638, - "grad_norm": 0.004745201673358679, - "learning_rate": 0.00019999975758526258, - "loss": 46.0, - "step": 9177 - }, - { - "epoch": 0.7017221935508534, - "grad_norm": 0.0005848890286870301, - "learning_rate": 0.00019999975753237114, - "loss": 46.0, - "step": 9178 - }, - { - "epoch": 0.7017986505342432, - "grad_norm": 0.0007510999566875398, - "learning_rate": 0.00019999975747947394, - "loss": 46.0, - "step": 9179 - }, - { - "epoch": 0.7018751075176329, - "grad_norm": 0.004274856299161911, - "learning_rate": 0.00019999975742657097, - "loss": 46.0, - "step": 9180 - }, - { - "epoch": 0.7019515645010226, - "grad_norm": 0.001858519040979445, - "learning_rate": 0.00019999975737366223, - "loss": 46.0, - "step": 9181 - }, - { - "epoch": 0.7020280214844123, - "grad_norm": 0.001513935625553131, - "learning_rate": 0.00019999975732074775, - "loss": 46.0, - "step": 9182 - }, - { - "epoch": 0.7021044784678021, - "grad_norm": 0.0012599901529029012, - "learning_rate": 0.00019999975726782746, - "loss": 46.0, - "step": 9183 - }, - { - "epoch": 0.7021809354511918, - "grad_norm": 0.0012123690685257316, - "learning_rate": 0.0001999997572149014, - "loss": 46.0, - "step": 9184 - }, - { - "epoch": 0.7022573924345815, - "grad_norm": 0.0011055892100557685, - "learning_rate": 0.00019999975716196962, - "loss": 46.0, - "step": 9185 - }, - { - "epoch": 0.7023338494179712, - "grad_norm": 0.0008655319106765091, - "learning_rate": 0.00019999975710903204, - "loss": 46.0, - "step": 9186 - }, - { - "epoch": 0.7024103064013609, - "grad_norm": 0.001162463566288352, - "learning_rate": 0.00019999975705608866, - "loss": 46.0, - "step": 9187 - }, - { - "epoch": 0.7024867633847507, - "grad_norm": 0.0008038083324208856, - "learning_rate": 0.00019999975700313956, - "loss": 46.0, - "step": 9188 - }, - { - "epoch": 0.7025632203681403, - "grad_norm": 0.0011236275313422084, - "learning_rate": 0.00019999975695018468, - "loss": 46.0, - "step": 9189 - }, - { - "epoch": 0.7026396773515301, - "grad_norm": 0.0024578431621193886, - "learning_rate": 0.00019999975689722398, - "loss": 46.0, - "step": 9190 - }, - { - "epoch": 0.7027161343349199, - "grad_norm": 0.0011110417544841766, - "learning_rate": 0.0001999997568442576, - "loss": 46.0, - "step": 9191 - }, - { - "epoch": 0.7027925913183095, - "grad_norm": 0.0006508385413326323, - "learning_rate": 0.00019999975679128537, - "loss": 46.0, - "step": 9192 - }, - { - "epoch": 0.7028690483016993, - "grad_norm": 0.0013018023455515504, - "learning_rate": 0.0001999997567383074, - "loss": 46.0, - "step": 9193 - }, - { - "epoch": 0.7029455052850889, - "grad_norm": 0.002450330648571253, - "learning_rate": 0.00019999975668532366, - "loss": 46.0, - "step": 9194 - }, - { - "epoch": 0.7030219622684787, - "grad_norm": 0.0014332069549709558, - "learning_rate": 0.00019999975663233415, - "loss": 46.0, - "step": 9195 - }, - { - "epoch": 0.7030984192518684, - "grad_norm": 0.0004257578984834254, - "learning_rate": 0.00019999975657933886, - "loss": 46.0, - "step": 9196 - }, - { - "epoch": 0.7031748762352581, - "grad_norm": 0.002065675798803568, - "learning_rate": 0.00019999975652633783, - "loss": 46.0, - "step": 9197 - }, - { - "epoch": 0.7032513332186479, - "grad_norm": 0.0013056508032605052, - "learning_rate": 0.000199999756473331, - "loss": 46.0, - "step": 9198 - }, - { - "epoch": 0.7033277902020376, - "grad_norm": 0.005665769334882498, - "learning_rate": 0.00019999975642031844, - "loss": 46.0, - "step": 9199 - }, - { - "epoch": 0.7034042471854273, - "grad_norm": 0.0012516052229329944, - "learning_rate": 0.00019999975636730006, - "loss": 46.0, - "step": 9200 - }, - { - "epoch": 0.703480704168817, - "grad_norm": 0.0014841699739918113, - "learning_rate": 0.00019999975631427593, - "loss": 46.0, - "step": 9201 - }, - { - "epoch": 0.7035571611522068, - "grad_norm": 0.005775256548076868, - "learning_rate": 0.00019999975626124603, - "loss": 46.0, - "step": 9202 - }, - { - "epoch": 0.7036336181355964, - "grad_norm": 0.002213146770372987, - "learning_rate": 0.00019999975620821036, - "loss": 46.0, - "step": 9203 - }, - { - "epoch": 0.7037100751189862, - "grad_norm": 0.0024645323865115643, - "learning_rate": 0.00019999975615516894, - "loss": 46.0, - "step": 9204 - }, - { - "epoch": 0.7037865321023759, - "grad_norm": 0.0006344622815959156, - "learning_rate": 0.00019999975610212174, - "loss": 46.0, - "step": 9205 - }, - { - "epoch": 0.7038629890857656, - "grad_norm": 0.0006042519817128778, - "learning_rate": 0.00019999975604906878, - "loss": 46.0, - "step": 9206 - }, - { - "epoch": 0.7039394460691554, - "grad_norm": 0.0026298025622963905, - "learning_rate": 0.00019999975599601, - "loss": 46.0, - "step": 9207 - }, - { - "epoch": 0.704015903052545, - "grad_norm": 0.0004056597244925797, - "learning_rate": 0.00019999975594294552, - "loss": 46.0, - "step": 9208 - }, - { - "epoch": 0.7040923600359348, - "grad_norm": 0.0016110679134726524, - "learning_rate": 0.00019999975588987524, - "loss": 46.0, - "step": 9209 - }, - { - "epoch": 0.7041688170193245, - "grad_norm": 0.0079397764056921, - "learning_rate": 0.0001999997558367992, - "loss": 46.0, - "step": 9210 - }, - { - "epoch": 0.7042452740027142, - "grad_norm": 0.0010373565601184964, - "learning_rate": 0.00019999975578371734, - "loss": 46.0, - "step": 9211 - }, - { - "epoch": 0.704321730986104, - "grad_norm": 0.0010001541813835502, - "learning_rate": 0.00019999975573062976, - "loss": 46.0, - "step": 9212 - }, - { - "epoch": 0.7043981879694937, - "grad_norm": 0.0020129969343543053, - "learning_rate": 0.0001999997556775364, - "loss": 46.0, - "step": 9213 - }, - { - "epoch": 0.7044746449528834, - "grad_norm": 0.001528335502371192, - "learning_rate": 0.00019999975562443728, - "loss": 46.0, - "step": 9214 - }, - { - "epoch": 0.7045511019362731, - "grad_norm": 0.0021050376817584038, - "learning_rate": 0.00019999975557133238, - "loss": 46.0, - "step": 9215 - }, - { - "epoch": 0.7046275589196628, - "grad_norm": 0.0008230862440541387, - "learning_rate": 0.0001999997555182217, - "loss": 46.0, - "step": 9216 - }, - { - "epoch": 0.7047040159030525, - "grad_norm": 0.0007504072273150086, - "learning_rate": 0.0001999997554651053, - "loss": 46.0, - "step": 9217 - }, - { - "epoch": 0.7047804728864423, - "grad_norm": 0.0009644035599194467, - "learning_rate": 0.00019999975541198307, - "loss": 46.0, - "step": 9218 - }, - { - "epoch": 0.704856929869832, - "grad_norm": 0.007080076262354851, - "learning_rate": 0.0001999997553588551, - "loss": 46.0, - "step": 9219 - }, - { - "epoch": 0.7049333868532217, - "grad_norm": 0.0008152625523507595, - "learning_rate": 0.00019999975530572134, - "loss": 46.0, - "step": 9220 - }, - { - "epoch": 0.7050098438366115, - "grad_norm": 0.0013208973687142134, - "learning_rate": 0.00019999975525258185, - "loss": 46.0, - "step": 9221 - }, - { - "epoch": 0.7050863008200011, - "grad_norm": 0.0008160319412127137, - "learning_rate": 0.00019999975519943654, - "loss": 46.0, - "step": 9222 - }, - { - "epoch": 0.7051627578033909, - "grad_norm": 0.030889302492141724, - "learning_rate": 0.0001999997551462855, - "loss": 46.0, - "step": 9223 - }, - { - "epoch": 0.7052392147867805, - "grad_norm": 0.001989106647670269, - "learning_rate": 0.0001999997550931287, - "loss": 46.0, - "step": 9224 - }, - { - "epoch": 0.7053156717701703, - "grad_norm": 0.0018548467196524143, - "learning_rate": 0.00019999975503996612, - "loss": 46.0, - "step": 9225 - }, - { - "epoch": 0.70539212875356, - "grad_norm": 0.0035306604113429785, - "learning_rate": 0.00019999975498679774, - "loss": 46.0, - "step": 9226 - }, - { - "epoch": 0.7054685857369497, - "grad_norm": 0.001718108425848186, - "learning_rate": 0.00019999975493362358, - "loss": 46.0, - "step": 9227 - }, - { - "epoch": 0.7055450427203395, - "grad_norm": 0.004146223422139883, - "learning_rate": 0.0001999997548804437, - "loss": 46.0, - "step": 9228 - }, - { - "epoch": 0.7056214997037292, - "grad_norm": 0.0005129209021106362, - "learning_rate": 0.00019999975482725804, - "loss": 46.0, - "step": 9229 - }, - { - "epoch": 0.7056979566871189, - "grad_norm": 0.0012858293484896421, - "learning_rate": 0.0001999997547740666, - "loss": 46.0, - "step": 9230 - }, - { - "epoch": 0.7057744136705086, - "grad_norm": 0.0014148908667266369, - "learning_rate": 0.00019999975472086937, - "loss": 46.0, - "step": 9231 - }, - { - "epoch": 0.7058508706538984, - "grad_norm": 0.0011945606674998999, - "learning_rate": 0.0001999997546676664, - "loss": 46.0, - "step": 9232 - }, - { - "epoch": 0.705927327637288, - "grad_norm": 0.003520740196108818, - "learning_rate": 0.00019999975461445767, - "loss": 46.0, - "step": 9233 - }, - { - "epoch": 0.7060037846206778, - "grad_norm": 0.0057270401157438755, - "learning_rate": 0.00019999975456124313, - "loss": 46.0, - "step": 9234 - }, - { - "epoch": 0.7060802416040675, - "grad_norm": 0.0006096501601859927, - "learning_rate": 0.00019999975450802287, - "loss": 46.0, - "step": 9235 - }, - { - "epoch": 0.7061566985874572, - "grad_norm": 0.0007158907828852534, - "learning_rate": 0.0001999997544547968, - "loss": 46.0, - "step": 9236 - }, - { - "epoch": 0.706233155570847, - "grad_norm": 0.006918021943420172, - "learning_rate": 0.00019999975440156497, - "loss": 46.0, - "step": 9237 - }, - { - "epoch": 0.7063096125542366, - "grad_norm": 0.0007008587708696723, - "learning_rate": 0.00019999975434832737, - "loss": 46.0, - "step": 9238 - }, - { - "epoch": 0.7063860695376264, - "grad_norm": 0.0029186594765633345, - "learning_rate": 0.000199999754295084, - "loss": 46.0, - "step": 9239 - }, - { - "epoch": 0.7064625265210162, - "grad_norm": 0.0011055928189307451, - "learning_rate": 0.0001999997542418349, - "loss": 46.0, - "step": 9240 - }, - { - "epoch": 0.7065389835044058, - "grad_norm": 0.0013629210880026221, - "learning_rate": 0.00019999975418858, - "loss": 46.0, - "step": 9241 - }, - { - "epoch": 0.7066154404877956, - "grad_norm": 0.008726016618311405, - "learning_rate": 0.00019999975413531932, - "loss": 46.0, - "step": 9242 - }, - { - "epoch": 0.7066918974711853, - "grad_norm": 0.012779823504388332, - "learning_rate": 0.00019999975408205284, - "loss": 46.0, - "step": 9243 - }, - { - "epoch": 0.706768354454575, - "grad_norm": 0.003101082518696785, - "learning_rate": 0.00019999975402878065, - "loss": 46.0, - "step": 9244 - }, - { - "epoch": 0.7068448114379647, - "grad_norm": 0.0008287486271001399, - "learning_rate": 0.00019999975397550266, - "loss": 46.0, - "step": 9245 - }, - { - "epoch": 0.7069212684213544, - "grad_norm": 0.0012902788585051894, - "learning_rate": 0.00019999975392221892, - "loss": 46.0, - "step": 9246 - }, - { - "epoch": 0.7069977254047441, - "grad_norm": 0.0008837492787279189, - "learning_rate": 0.0001999997538689294, - "loss": 46.0, - "step": 9247 - }, - { - "epoch": 0.7070741823881339, - "grad_norm": 0.0036821383982896805, - "learning_rate": 0.0001999997538156341, - "loss": 46.0, - "step": 9248 - }, - { - "epoch": 0.7071506393715236, - "grad_norm": 0.0007667118334211409, - "learning_rate": 0.00019999975376233303, - "loss": 46.0, - "step": 9249 - }, - { - "epoch": 0.7072270963549133, - "grad_norm": 0.0017325804801657796, - "learning_rate": 0.00019999975370902623, - "loss": 46.0, - "step": 9250 - }, - { - "epoch": 0.7073035533383031, - "grad_norm": 0.00407186197116971, - "learning_rate": 0.00019999975365571362, - "loss": 46.0, - "step": 9251 - }, - { - "epoch": 0.7073800103216927, - "grad_norm": 0.0011923268903046846, - "learning_rate": 0.00019999975360239524, - "loss": 46.0, - "step": 9252 - }, - { - "epoch": 0.7074564673050825, - "grad_norm": 0.002721782075241208, - "learning_rate": 0.00019999975354907112, - "loss": 46.0, - "step": 9253 - }, - { - "epoch": 0.7075329242884723, - "grad_norm": 0.0014553384389728308, - "learning_rate": 0.00019999975349574122, - "loss": 46.0, - "step": 9254 - }, - { - "epoch": 0.7076093812718619, - "grad_norm": 0.0015644950326532125, - "learning_rate": 0.00019999975344240555, - "loss": 46.0, - "step": 9255 - }, - { - "epoch": 0.7076858382552517, - "grad_norm": 0.0011328462278470397, - "learning_rate": 0.0001999997533890641, - "loss": 46.0, - "step": 9256 - }, - { - "epoch": 0.7077622952386413, - "grad_norm": 0.0014063374837860465, - "learning_rate": 0.00019999975333571688, - "loss": 46.0, - "step": 9257 - }, - { - "epoch": 0.7078387522220311, - "grad_norm": 0.001097632572054863, - "learning_rate": 0.00019999975328236392, - "loss": 46.0, - "step": 9258 - }, - { - "epoch": 0.7079152092054208, - "grad_norm": 0.0024974090047180653, - "learning_rate": 0.00019999975322900515, - "loss": 46.0, - "step": 9259 - }, - { - "epoch": 0.7079916661888105, - "grad_norm": 0.0012440483551472425, - "learning_rate": 0.0001999997531756406, - "loss": 46.0, - "step": 9260 - }, - { - "epoch": 0.7080681231722002, - "grad_norm": 0.0024394432548433542, - "learning_rate": 0.00019999975312227032, - "loss": 46.0, - "step": 9261 - }, - { - "epoch": 0.70814458015559, - "grad_norm": 0.008031205274164677, - "learning_rate": 0.00019999975306889427, - "loss": 46.0, - "step": 9262 - }, - { - "epoch": 0.7082210371389797, - "grad_norm": 0.004843798466026783, - "learning_rate": 0.00019999975301551243, - "loss": 46.0, - "step": 9263 - }, - { - "epoch": 0.7082974941223694, - "grad_norm": 0.0010757792042568326, - "learning_rate": 0.00019999975296212483, - "loss": 46.0, - "step": 9264 - }, - { - "epoch": 0.7083739511057591, - "grad_norm": 0.0013805648777633905, - "learning_rate": 0.00019999975290873148, - "loss": 46.0, - "step": 9265 - }, - { - "epoch": 0.7084504080891488, - "grad_norm": 0.0012855901150032878, - "learning_rate": 0.00019999975285533235, - "loss": 46.0, - "step": 9266 - }, - { - "epoch": 0.7085268650725386, - "grad_norm": 0.002793646417558193, - "learning_rate": 0.00019999975280192742, - "loss": 46.0, - "step": 9267 - }, - { - "epoch": 0.7086033220559282, - "grad_norm": 0.0009424712625332177, - "learning_rate": 0.00019999975274851675, - "loss": 46.0, - "step": 9268 - }, - { - "epoch": 0.708679779039318, - "grad_norm": 0.00178941257763654, - "learning_rate": 0.0001999997526951003, - "loss": 46.0, - "step": 9269 - }, - { - "epoch": 0.7087562360227078, - "grad_norm": 0.0018387021264061332, - "learning_rate": 0.0001999997526416781, - "loss": 46.0, - "step": 9270 - }, - { - "epoch": 0.7088326930060974, - "grad_norm": 0.0013985419645905495, - "learning_rate": 0.0001999997525882501, - "loss": 46.0, - "step": 9271 - }, - { - "epoch": 0.7089091499894872, - "grad_norm": 0.0015780134126543999, - "learning_rate": 0.00019999975253481633, - "loss": 46.0, - "step": 9272 - }, - { - "epoch": 0.7089856069728769, - "grad_norm": 0.0007537794299423695, - "learning_rate": 0.00019999975248137682, - "loss": 46.0, - "step": 9273 - }, - { - "epoch": 0.7090620639562666, - "grad_norm": 0.0018102899193763733, - "learning_rate": 0.00019999975242793153, - "loss": 46.0, - "step": 9274 - }, - { - "epoch": 0.7091385209396563, - "grad_norm": 0.0011644712649285793, - "learning_rate": 0.00019999975237448045, - "loss": 46.0, - "step": 9275 - }, - { - "epoch": 0.709214977923046, - "grad_norm": 0.0007414617575705051, - "learning_rate": 0.00019999975232102361, - "loss": 46.0, - "step": 9276 - }, - { - "epoch": 0.7092914349064358, - "grad_norm": 0.0005451389588415623, - "learning_rate": 0.00019999975226756104, - "loss": 46.0, - "step": 9277 - }, - { - "epoch": 0.7093678918898255, - "grad_norm": 0.0007670748163945973, - "learning_rate": 0.00019999975221409266, - "loss": 46.0, - "step": 9278 - }, - { - "epoch": 0.7094443488732152, - "grad_norm": 0.002322677057236433, - "learning_rate": 0.0001999997521606185, - "loss": 46.0, - "step": 9279 - }, - { - "epoch": 0.7095208058566049, - "grad_norm": 0.0007842218619771302, - "learning_rate": 0.0001999997521071386, - "loss": 46.0, - "step": 9280 - }, - { - "epoch": 0.7095972628399947, - "grad_norm": 0.0007776504498906434, - "learning_rate": 0.0001999997520536529, - "loss": 46.0, - "step": 9281 - }, - { - "epoch": 0.7096737198233843, - "grad_norm": 0.0037316500674933195, - "learning_rate": 0.0001999997520001615, - "loss": 46.0, - "step": 9282 - }, - { - "epoch": 0.7097501768067741, - "grad_norm": 0.0015603293431922793, - "learning_rate": 0.00019999975194666424, - "loss": 46.0, - "step": 9283 - }, - { - "epoch": 0.7098266337901639, - "grad_norm": 0.003073350293561816, - "learning_rate": 0.00019999975189316125, - "loss": 46.0, - "step": 9284 - }, - { - "epoch": 0.7099030907735535, - "grad_norm": 0.000701656099408865, - "learning_rate": 0.0001999997518396525, - "loss": 46.0, - "step": 9285 - }, - { - "epoch": 0.7099795477569433, - "grad_norm": 0.0029347811359912157, - "learning_rate": 0.00019999975178613797, - "loss": 46.0, - "step": 9286 - }, - { - "epoch": 0.7100560047403329, - "grad_norm": 0.004827440250664949, - "learning_rate": 0.00019999975173261766, - "loss": 46.0, - "step": 9287 - }, - { - "epoch": 0.7101324617237227, - "grad_norm": 0.003475607605651021, - "learning_rate": 0.0001999997516790916, - "loss": 46.0, - "step": 9288 - }, - { - "epoch": 0.7102089187071124, - "grad_norm": 0.0017068135784938931, - "learning_rate": 0.00019999975162555977, - "loss": 46.0, - "step": 9289 - }, - { - "epoch": 0.7102853756905021, - "grad_norm": 0.001960651483386755, - "learning_rate": 0.00019999975157202217, - "loss": 46.0, - "step": 9290 - }, - { - "epoch": 0.7103618326738919, - "grad_norm": 0.0010313292732462287, - "learning_rate": 0.0001999997515184788, - "loss": 46.0, - "step": 9291 - }, - { - "epoch": 0.7104382896572816, - "grad_norm": 0.0017544184811413288, - "learning_rate": 0.00019999975146492964, - "loss": 46.0, - "step": 9292 - }, - { - "epoch": 0.7105147466406713, - "grad_norm": 0.0026029690634459257, - "learning_rate": 0.0001999997514113747, - "loss": 46.0, - "step": 9293 - }, - { - "epoch": 0.710591203624061, - "grad_norm": 0.0009716750937514007, - "learning_rate": 0.00019999975135781407, - "loss": 46.0, - "step": 9294 - }, - { - "epoch": 0.7106676606074507, - "grad_norm": 0.0003008923085872084, - "learning_rate": 0.0001999997513042476, - "loss": 46.0, - "step": 9295 - }, - { - "epoch": 0.7107441175908404, - "grad_norm": 0.0020217609126120806, - "learning_rate": 0.00019999975125067535, - "loss": 46.0, - "step": 9296 - }, - { - "epoch": 0.7108205745742302, - "grad_norm": 0.0019697891548275948, - "learning_rate": 0.00019999975119709739, - "loss": 46.0, - "step": 9297 - }, - { - "epoch": 0.7108970315576199, - "grad_norm": 0.0009867234621196985, - "learning_rate": 0.00019999975114351362, - "loss": 46.0, - "step": 9298 - }, - { - "epoch": 0.7109734885410096, - "grad_norm": 0.001355454558506608, - "learning_rate": 0.00019999975108992408, - "loss": 46.0, - "step": 9299 - }, - { - "epoch": 0.7110499455243994, - "grad_norm": 0.00173101422842592, - "learning_rate": 0.00019999975103632877, - "loss": 46.0, - "step": 9300 - }, - { - "epoch": 0.711126402507789, - "grad_norm": 0.0006403400329872966, - "learning_rate": 0.0001999997509827277, - "loss": 46.0, - "step": 9301 - }, - { - "epoch": 0.7112028594911788, - "grad_norm": 0.0008197331335395575, - "learning_rate": 0.00019999975092912088, - "loss": 46.0, - "step": 9302 - }, - { - "epoch": 0.7112793164745685, - "grad_norm": 0.0034717696253210306, - "learning_rate": 0.00019999975087550828, - "loss": 46.0, - "step": 9303 - }, - { - "epoch": 0.7113557734579582, - "grad_norm": 0.0008460204699076712, - "learning_rate": 0.0001999997508218899, - "loss": 46.0, - "step": 9304 - }, - { - "epoch": 0.711432230441348, - "grad_norm": 0.003026822814717889, - "learning_rate": 0.00019999975076826575, - "loss": 46.0, - "step": 9305 - }, - { - "epoch": 0.7115086874247376, - "grad_norm": 0.003876257222145796, - "learning_rate": 0.00019999975071463585, - "loss": 46.0, - "step": 9306 - }, - { - "epoch": 0.7115851444081274, - "grad_norm": 0.0005720927729271352, - "learning_rate": 0.00019999975066100015, - "loss": 46.0, - "step": 9307 - }, - { - "epoch": 0.7116616013915171, - "grad_norm": 0.0005936491652391851, - "learning_rate": 0.0001999997506073587, - "loss": 46.0, - "step": 9308 - }, - { - "epoch": 0.7117380583749068, - "grad_norm": 0.0007157169166021049, - "learning_rate": 0.00019999975055371146, - "loss": 46.0, - "step": 9309 - }, - { - "epoch": 0.7118145153582965, - "grad_norm": 0.0006979728350415826, - "learning_rate": 0.00019999975050005847, - "loss": 46.0, - "step": 9310 - }, - { - "epoch": 0.7118909723416863, - "grad_norm": 0.005947049241513014, - "learning_rate": 0.0001999997504463997, - "loss": 46.0, - "step": 9311 - }, - { - "epoch": 0.711967429325076, - "grad_norm": 0.0008080280967988074, - "learning_rate": 0.00019999975039273517, - "loss": 46.0, - "step": 9312 - }, - { - "epoch": 0.7120438863084657, - "grad_norm": 0.01812349632382393, - "learning_rate": 0.00019999975033906488, - "loss": 46.0, - "step": 9313 - }, - { - "epoch": 0.7121203432918555, - "grad_norm": 0.0006871453369967639, - "learning_rate": 0.0001999997502853888, - "loss": 46.0, - "step": 9314 - }, - { - "epoch": 0.7121968002752451, - "grad_norm": 0.007056377362459898, - "learning_rate": 0.00019999975023170697, - "loss": 46.0, - "step": 9315 - }, - { - "epoch": 0.7122732572586349, - "grad_norm": 0.0011114017106592655, - "learning_rate": 0.00019999975017801936, - "loss": 46.0, - "step": 9316 - }, - { - "epoch": 0.7123497142420245, - "grad_norm": 0.0007559072109870613, - "learning_rate": 0.00019999975012432596, - "loss": 46.0, - "step": 9317 - }, - { - "epoch": 0.7124261712254143, - "grad_norm": 0.0008278271416202188, - "learning_rate": 0.0001999997500706268, - "loss": 46.0, - "step": 9318 - }, - { - "epoch": 0.7125026282088041, - "grad_norm": 0.001481667859479785, - "learning_rate": 0.0001999997500169219, - "loss": 46.0, - "step": 9319 - }, - { - "epoch": 0.7125790851921937, - "grad_norm": 0.0004982512909919024, - "learning_rate": 0.0001999997499632112, - "loss": 46.0, - "step": 9320 - }, - { - "epoch": 0.7126555421755835, - "grad_norm": 0.0022554430179297924, - "learning_rate": 0.00019999974990949474, - "loss": 46.0, - "step": 9321 - }, - { - "epoch": 0.7127319991589732, - "grad_norm": 0.007220863830298185, - "learning_rate": 0.00019999974985577252, - "loss": 46.0, - "step": 9322 - }, - { - "epoch": 0.7128084561423629, - "grad_norm": 0.0027372404001653194, - "learning_rate": 0.00019999974980204453, - "loss": 46.0, - "step": 9323 - }, - { - "epoch": 0.7128849131257526, - "grad_norm": 0.0011329575208947062, - "learning_rate": 0.00019999974974831077, - "loss": 46.0, - "step": 9324 - }, - { - "epoch": 0.7129613701091423, - "grad_norm": 0.002743436023592949, - "learning_rate": 0.00019999974969457123, - "loss": 46.0, - "step": 9325 - }, - { - "epoch": 0.713037827092532, - "grad_norm": 0.0008240920142270625, - "learning_rate": 0.00019999974964082592, - "loss": 46.0, - "step": 9326 - }, - { - "epoch": 0.7131142840759218, - "grad_norm": 0.0025407446082681417, - "learning_rate": 0.00019999974958707483, - "loss": 46.0, - "step": 9327 - }, - { - "epoch": 0.7131907410593115, - "grad_norm": 0.0006714505143463612, - "learning_rate": 0.000199999749533318, - "loss": 46.0, - "step": 9328 - }, - { - "epoch": 0.7132671980427012, - "grad_norm": 0.0024302825331687927, - "learning_rate": 0.0001999997494795554, - "loss": 46.0, - "step": 9329 - }, - { - "epoch": 0.713343655026091, - "grad_norm": 0.0012964991619810462, - "learning_rate": 0.000199999749425787, - "loss": 46.0, - "step": 9330 - }, - { - "epoch": 0.7134201120094806, - "grad_norm": 0.0013611806789413095, - "learning_rate": 0.00019999974937201284, - "loss": 46.0, - "step": 9331 - }, - { - "epoch": 0.7134965689928704, - "grad_norm": 0.002300181658938527, - "learning_rate": 0.00019999974931823294, - "loss": 46.0, - "step": 9332 - }, - { - "epoch": 0.7135730259762602, - "grad_norm": 0.0010458847973495722, - "learning_rate": 0.00019999974926444725, - "loss": 46.0, - "step": 9333 - }, - { - "epoch": 0.7136494829596498, - "grad_norm": 0.0011069714091718197, - "learning_rate": 0.00019999974921065578, - "loss": 46.0, - "step": 9334 - }, - { - "epoch": 0.7137259399430396, - "grad_norm": 0.0025952591095119715, - "learning_rate": 0.00019999974915685856, - "loss": 46.0, - "step": 9335 - }, - { - "epoch": 0.7138023969264292, - "grad_norm": 0.007478729356080294, - "learning_rate": 0.00019999974910305554, - "loss": 46.0, - "step": 9336 - }, - { - "epoch": 0.713878853909819, - "grad_norm": 0.0013561181258410215, - "learning_rate": 0.00019999974904924678, - "loss": 46.0, - "step": 9337 - }, - { - "epoch": 0.7139553108932087, - "grad_norm": 0.0025987953413277864, - "learning_rate": 0.00019999974899543224, - "loss": 46.0, - "step": 9338 - }, - { - "epoch": 0.7140317678765984, - "grad_norm": 0.001967592863366008, - "learning_rate": 0.00019999974894161193, - "loss": 46.0, - "step": 9339 - }, - { - "epoch": 0.7141082248599881, - "grad_norm": 0.0010752426460385323, - "learning_rate": 0.00019999974888778584, - "loss": 46.0, - "step": 9340 - }, - { - "epoch": 0.7141846818433779, - "grad_norm": 0.0007942437077872455, - "learning_rate": 0.000199999748833954, - "loss": 46.0, - "step": 9341 - }, - { - "epoch": 0.7142611388267676, - "grad_norm": 0.001369924983009696, - "learning_rate": 0.00019999974878011638, - "loss": 46.0, - "step": 9342 - }, - { - "epoch": 0.7143375958101573, - "grad_norm": 0.0008023129776120186, - "learning_rate": 0.00019999974872627298, - "loss": 46.0, - "step": 9343 - }, - { - "epoch": 0.7144140527935471, - "grad_norm": 0.002127670682966709, - "learning_rate": 0.00019999974867242383, - "loss": 46.0, - "step": 9344 - }, - { - "epoch": 0.7144905097769367, - "grad_norm": 0.001691145938821137, - "learning_rate": 0.0001999997486185689, - "loss": 46.0, - "step": 9345 - }, - { - "epoch": 0.7145669667603265, - "grad_norm": 0.0008705343352630734, - "learning_rate": 0.00019999974856470823, - "loss": 46.0, - "step": 9346 - }, - { - "epoch": 0.7146434237437161, - "grad_norm": 0.0011588613269850612, - "learning_rate": 0.00019999974851084176, - "loss": 46.0, - "step": 9347 - }, - { - "epoch": 0.7147198807271059, - "grad_norm": 0.0005489863106049597, - "learning_rate": 0.00019999974845696952, - "loss": 46.0, - "step": 9348 - }, - { - "epoch": 0.7147963377104957, - "grad_norm": 0.0039374688640236855, - "learning_rate": 0.00019999974840309153, - "loss": 46.0, - "step": 9349 - }, - { - "epoch": 0.7148727946938853, - "grad_norm": 0.0008682627812959254, - "learning_rate": 0.00019999974834920776, - "loss": 46.0, - "step": 9350 - }, - { - "epoch": 0.7149492516772751, - "grad_norm": 0.0016881709452718496, - "learning_rate": 0.0001999997482953182, - "loss": 46.0, - "step": 9351 - }, - { - "epoch": 0.7150257086606648, - "grad_norm": 0.001021580072119832, - "learning_rate": 0.00019999974824142292, - "loss": 46.0, - "step": 9352 - }, - { - "epoch": 0.7151021656440545, - "grad_norm": 0.002316372701898217, - "learning_rate": 0.0001999997481875218, - "loss": 46.0, - "step": 9353 - }, - { - "epoch": 0.7151786226274442, - "grad_norm": 0.0009586425148881972, - "learning_rate": 0.00019999974813361498, - "loss": 46.0, - "step": 9354 - }, - { - "epoch": 0.7152550796108339, - "grad_norm": 0.0009738823282532394, - "learning_rate": 0.00019999974807970237, - "loss": 46.0, - "step": 9355 - }, - { - "epoch": 0.7153315365942237, - "grad_norm": 0.0012651185970753431, - "learning_rate": 0.00019999974802578397, - "loss": 46.0, - "step": 9356 - }, - { - "epoch": 0.7154079935776134, - "grad_norm": 0.005341680254787207, - "learning_rate": 0.00019999974797185982, - "loss": 46.0, - "step": 9357 - }, - { - "epoch": 0.7154844505610031, - "grad_norm": 0.004591882228851318, - "learning_rate": 0.0001999997479179299, - "loss": 46.0, - "step": 9358 - }, - { - "epoch": 0.7155609075443928, - "grad_norm": 0.0059111532755196095, - "learning_rate": 0.00019999974786399417, - "loss": 46.0, - "step": 9359 - }, - { - "epoch": 0.7156373645277826, - "grad_norm": 0.0009546828223392367, - "learning_rate": 0.00019999974781005273, - "loss": 46.0, - "step": 9360 - }, - { - "epoch": 0.7157138215111722, - "grad_norm": 0.0016816677525639534, - "learning_rate": 0.00019999974775610548, - "loss": 46.0, - "step": 9361 - }, - { - "epoch": 0.715790278494562, - "grad_norm": 0.0035789271350950003, - "learning_rate": 0.00019999974770215247, - "loss": 46.0, - "step": 9362 - }, - { - "epoch": 0.7158667354779518, - "grad_norm": 0.0019321265863254666, - "learning_rate": 0.0001999997476481937, - "loss": 46.0, - "step": 9363 - }, - { - "epoch": 0.7159431924613414, - "grad_norm": 0.003660425543785095, - "learning_rate": 0.00019999974759422914, - "loss": 46.0, - "step": 9364 - }, - { - "epoch": 0.7160196494447312, - "grad_norm": 0.001513624913059175, - "learning_rate": 0.00019999974754025885, - "loss": 46.0, - "step": 9365 - }, - { - "epoch": 0.7160961064281208, - "grad_norm": 0.002380894962698221, - "learning_rate": 0.00019999974748628277, - "loss": 46.0, - "step": 9366 - }, - { - "epoch": 0.7161725634115106, - "grad_norm": 0.0006959422025829554, - "learning_rate": 0.0001999997474323009, - "loss": 46.0, - "step": 9367 - }, - { - "epoch": 0.7162490203949003, - "grad_norm": 0.000567566545214504, - "learning_rate": 0.00019999974737831328, - "loss": 46.0, - "step": 9368 - }, - { - "epoch": 0.71632547737829, - "grad_norm": 0.0010751682566478848, - "learning_rate": 0.0001999997473243199, - "loss": 46.0, - "step": 9369 - }, - { - "epoch": 0.7164019343616798, - "grad_norm": 0.00132025929633528, - "learning_rate": 0.00019999974727032073, - "loss": 46.0, - "step": 9370 - }, - { - "epoch": 0.7164783913450695, - "grad_norm": 0.0022320637945085764, - "learning_rate": 0.0001999997472163158, - "loss": 46.0, - "step": 9371 - }, - { - "epoch": 0.7165548483284592, - "grad_norm": 0.0006569395773112774, - "learning_rate": 0.00019999974716230508, - "loss": 46.0, - "step": 9372 - }, - { - "epoch": 0.7166313053118489, - "grad_norm": 0.0011366188991814852, - "learning_rate": 0.00019999974710828864, - "loss": 46.0, - "step": 9373 - }, - { - "epoch": 0.7167077622952387, - "grad_norm": 0.0009507978102192283, - "learning_rate": 0.0001999997470542664, - "loss": 46.0, - "step": 9374 - }, - { - "epoch": 0.7167842192786283, - "grad_norm": 0.0014681183965876698, - "learning_rate": 0.00019999974700023838, - "loss": 46.0, - "step": 9375 - }, - { - "epoch": 0.7168606762620181, - "grad_norm": 0.0011173589155077934, - "learning_rate": 0.00019999974694620462, - "loss": 46.0, - "step": 9376 - }, - { - "epoch": 0.7169371332454078, - "grad_norm": 0.0025795476976782084, - "learning_rate": 0.00019999974689216505, - "loss": 46.0, - "step": 9377 - }, - { - "epoch": 0.7170135902287975, - "grad_norm": 0.007265380583703518, - "learning_rate": 0.00019999974683811974, - "loss": 46.0, - "step": 9378 - }, - { - "epoch": 0.7170900472121873, - "grad_norm": 0.0008986399625428021, - "learning_rate": 0.0001999997467840687, - "loss": 46.0, - "step": 9379 - }, - { - "epoch": 0.7171665041955769, - "grad_norm": 0.005378580652177334, - "learning_rate": 0.0001999997467300118, - "loss": 46.0, - "step": 9380 - }, - { - "epoch": 0.7172429611789667, - "grad_norm": 0.000935537158511579, - "learning_rate": 0.00019999974667594917, - "loss": 46.0, - "step": 9381 - }, - { - "epoch": 0.7173194181623564, - "grad_norm": 0.0021799695678055286, - "learning_rate": 0.0001999997466218808, - "loss": 46.0, - "step": 9382 - }, - { - "epoch": 0.7173958751457461, - "grad_norm": 0.0007234615040943027, - "learning_rate": 0.00019999974656780662, - "loss": 46.0, - "step": 9383 - }, - { - "epoch": 0.7174723321291359, - "grad_norm": 0.0031444921623915434, - "learning_rate": 0.0001999997465137267, - "loss": 46.0, - "step": 9384 - }, - { - "epoch": 0.7175487891125256, - "grad_norm": 0.0008040877874009311, - "learning_rate": 0.000199999746459641, - "loss": 46.0, - "step": 9385 - }, - { - "epoch": 0.7176252460959153, - "grad_norm": 0.0009112692205235362, - "learning_rate": 0.0001999997464055495, - "loss": 46.0, - "step": 9386 - }, - { - "epoch": 0.717701703079305, - "grad_norm": 0.0024245227687060833, - "learning_rate": 0.0001999997463514523, - "loss": 46.0, - "step": 9387 - }, - { - "epoch": 0.7177781600626947, - "grad_norm": 0.003240007907152176, - "learning_rate": 0.00019999974629734924, - "loss": 46.0, - "step": 9388 - }, - { - "epoch": 0.7178546170460844, - "grad_norm": 0.0009320462704636157, - "learning_rate": 0.00019999974624324048, - "loss": 46.0, - "step": 9389 - }, - { - "epoch": 0.7179310740294742, - "grad_norm": 0.0011084995931014419, - "learning_rate": 0.00019999974618912594, - "loss": 46.0, - "step": 9390 - }, - { - "epoch": 0.7180075310128639, - "grad_norm": 0.0008703387575224042, - "learning_rate": 0.0001999997461350056, - "loss": 46.0, - "step": 9391 - }, - { - "epoch": 0.7180839879962536, - "grad_norm": 0.003441119333729148, - "learning_rate": 0.00019999974608087953, - "loss": 46.0, - "step": 9392 - }, - { - "epoch": 0.7181604449796434, - "grad_norm": 0.004153742920607328, - "learning_rate": 0.00019999974602674767, - "loss": 46.0, - "step": 9393 - }, - { - "epoch": 0.718236901963033, - "grad_norm": 0.0016669102478772402, - "learning_rate": 0.00019999974597261004, - "loss": 46.0, - "step": 9394 - }, - { - "epoch": 0.7183133589464228, - "grad_norm": 0.001168415299616754, - "learning_rate": 0.00019999974591846664, - "loss": 46.0, - "step": 9395 - }, - { - "epoch": 0.7183898159298124, - "grad_norm": 0.0010830056853592396, - "learning_rate": 0.00019999974586431746, - "loss": 46.0, - "step": 9396 - }, - { - "epoch": 0.7184662729132022, - "grad_norm": 0.0005991084035485983, - "learning_rate": 0.00019999974581016254, - "loss": 46.0, - "step": 9397 - }, - { - "epoch": 0.718542729896592, - "grad_norm": 0.0016091811703518033, - "learning_rate": 0.00019999974575600184, - "loss": 46.0, - "step": 9398 - }, - { - "epoch": 0.7186191868799816, - "grad_norm": 0.0005256792646832764, - "learning_rate": 0.00019999974570183534, - "loss": 46.0, - "step": 9399 - }, - { - "epoch": 0.7186956438633714, - "grad_norm": 0.0030196753796190023, - "learning_rate": 0.00019999974564766313, - "loss": 46.0, - "step": 9400 - }, - { - "epoch": 0.7187721008467611, - "grad_norm": 0.0011832899181172252, - "learning_rate": 0.0001999997455934851, - "loss": 46.0, - "step": 9401 - }, - { - "epoch": 0.7188485578301508, - "grad_norm": 0.0023695374839007854, - "learning_rate": 0.00019999974553930132, - "loss": 46.0, - "step": 9402 - }, - { - "epoch": 0.7189250148135405, - "grad_norm": 0.0012815966038033366, - "learning_rate": 0.0001999997454851118, - "loss": 46.0, - "step": 9403 - }, - { - "epoch": 0.7190014717969303, - "grad_norm": 0.0008994894451461732, - "learning_rate": 0.00019999974543091645, - "loss": 46.0, - "step": 9404 - }, - { - "epoch": 0.71907792878032, - "grad_norm": 0.0035863597877323627, - "learning_rate": 0.00019999974537671537, - "loss": 46.0, - "step": 9405 - }, - { - "epoch": 0.7191543857637097, - "grad_norm": 0.0020268044900149107, - "learning_rate": 0.0001999997453225085, - "loss": 46.0, - "step": 9406 - }, - { - "epoch": 0.7192308427470994, - "grad_norm": 0.0016784875188022852, - "learning_rate": 0.00019999974526829588, - "loss": 46.0, - "step": 9407 - }, - { - "epoch": 0.7193072997304891, - "grad_norm": 0.005460328422486782, - "learning_rate": 0.00019999974521407748, - "loss": 46.0, - "step": 9408 - }, - { - "epoch": 0.7193837567138789, - "grad_norm": 0.0005024038255214691, - "learning_rate": 0.0001999997451598533, - "loss": 46.0, - "step": 9409 - }, - { - "epoch": 0.7194602136972685, - "grad_norm": 0.000776283850427717, - "learning_rate": 0.00019999974510562335, - "loss": 46.0, - "step": 9410 - }, - { - "epoch": 0.7195366706806583, - "grad_norm": 0.0005845642299391329, - "learning_rate": 0.00019999974505138766, - "loss": 46.0, - "step": 9411 - }, - { - "epoch": 0.7196131276640481, - "grad_norm": 0.001419172971509397, - "learning_rate": 0.0001999997449971462, - "loss": 46.0, - "step": 9412 - }, - { - "epoch": 0.7196895846474377, - "grad_norm": 0.002445223508402705, - "learning_rate": 0.00019999974494289892, - "loss": 46.0, - "step": 9413 - }, - { - "epoch": 0.7197660416308275, - "grad_norm": 0.006486148573458195, - "learning_rate": 0.00019999974488864593, - "loss": 46.0, - "step": 9414 - }, - { - "epoch": 0.7198424986142172, - "grad_norm": 0.0013122617965564132, - "learning_rate": 0.00019999974483438711, - "loss": 46.0, - "step": 9415 - }, - { - "epoch": 0.7199189555976069, - "grad_norm": 0.0008856218191795051, - "learning_rate": 0.00019999974478012258, - "loss": 46.0, - "step": 9416 - }, - { - "epoch": 0.7199954125809966, - "grad_norm": 0.0012524225749075413, - "learning_rate": 0.00019999974472585224, - "loss": 46.0, - "step": 9417 - }, - { - "epoch": 0.7200718695643863, - "grad_norm": 0.0009493715479038656, - "learning_rate": 0.00019999974467157613, - "loss": 46.0, - "step": 9418 - }, - { - "epoch": 0.720148326547776, - "grad_norm": 0.0013101128861308098, - "learning_rate": 0.0001999997446172943, - "loss": 46.0, - "step": 9419 - }, - { - "epoch": 0.7202247835311658, - "grad_norm": 0.002712149638682604, - "learning_rate": 0.00019999974456300668, - "loss": 46.0, - "step": 9420 - }, - { - "epoch": 0.7203012405145555, - "grad_norm": 0.0015899811405688524, - "learning_rate": 0.00019999974450871327, - "loss": 46.0, - "step": 9421 - }, - { - "epoch": 0.7203776974979452, - "grad_norm": 0.002752808853983879, - "learning_rate": 0.00019999974445441407, - "loss": 46.0, - "step": 9422 - }, - { - "epoch": 0.720454154481335, - "grad_norm": 0.0015156165463849902, - "learning_rate": 0.00019999974440010915, - "loss": 46.0, - "step": 9423 - }, - { - "epoch": 0.7205306114647246, - "grad_norm": 0.00050859444309026, - "learning_rate": 0.00019999974434579842, - "loss": 46.0, - "step": 9424 - }, - { - "epoch": 0.7206070684481144, - "grad_norm": 0.0006458958378061652, - "learning_rate": 0.00019999974429148196, - "loss": 46.0, - "step": 9425 - }, - { - "epoch": 0.720683525431504, - "grad_norm": 0.0008962931460700929, - "learning_rate": 0.00019999974423715971, - "loss": 46.0, - "step": 9426 - }, - { - "epoch": 0.7207599824148938, - "grad_norm": 0.0012574514839798212, - "learning_rate": 0.00019999974418283167, - "loss": 46.0, - "step": 9427 - }, - { - "epoch": 0.7208364393982836, - "grad_norm": 0.0061298212967813015, - "learning_rate": 0.00019999974412849788, - "loss": 46.0, - "step": 9428 - }, - { - "epoch": 0.7209128963816732, - "grad_norm": 0.0022923534270375967, - "learning_rate": 0.00019999974407415835, - "loss": 46.0, - "step": 9429 - }, - { - "epoch": 0.720989353365063, - "grad_norm": 0.0011839504586532712, - "learning_rate": 0.000199999744019813, - "loss": 46.0, - "step": 9430 - }, - { - "epoch": 0.7210658103484527, - "grad_norm": 0.0017941249534487724, - "learning_rate": 0.0001999997439654619, - "loss": 46.0, - "step": 9431 - }, - { - "epoch": 0.7211422673318424, - "grad_norm": 0.012152974493801594, - "learning_rate": 0.00019999974391110505, - "loss": 46.0, - "step": 9432 - }, - { - "epoch": 0.7212187243152322, - "grad_norm": 0.0014832686865702271, - "learning_rate": 0.00019999974385674242, - "loss": 46.0, - "step": 9433 - }, - { - "epoch": 0.7212951812986219, - "grad_norm": 0.0010577032808214426, - "learning_rate": 0.00019999974380237402, - "loss": 46.0, - "step": 9434 - }, - { - "epoch": 0.7213716382820116, - "grad_norm": 0.006536453031003475, - "learning_rate": 0.00019999974374799984, - "loss": 46.0, - "step": 9435 - }, - { - "epoch": 0.7214480952654013, - "grad_norm": 0.0027409642934799194, - "learning_rate": 0.0001999997436936199, - "loss": 46.0, - "step": 9436 - }, - { - "epoch": 0.721524552248791, - "grad_norm": 0.0027364809066057205, - "learning_rate": 0.0001999997436392342, - "loss": 46.0, - "step": 9437 - }, - { - "epoch": 0.7216010092321807, - "grad_norm": 0.0019776425324380398, - "learning_rate": 0.00019999974358484267, - "loss": 46.0, - "step": 9438 - }, - { - "epoch": 0.7216774662155705, - "grad_norm": 0.0006617572507821023, - "learning_rate": 0.00019999974353044543, - "loss": 46.0, - "step": 9439 - }, - { - "epoch": 0.7217539231989601, - "grad_norm": 0.0010409303940832615, - "learning_rate": 0.00019999974347604242, - "loss": 46.0, - "step": 9440 - }, - { - "epoch": 0.7218303801823499, - "grad_norm": 0.0015258465427905321, - "learning_rate": 0.00019999974342163363, - "loss": 46.0, - "step": 9441 - }, - { - "epoch": 0.7219068371657397, - "grad_norm": 0.0007176099461503327, - "learning_rate": 0.00019999974336721907, - "loss": 46.0, - "step": 9442 - }, - { - "epoch": 0.7219832941491293, - "grad_norm": 0.003987742122262716, - "learning_rate": 0.00019999974331279873, - "loss": 46.0, - "step": 9443 - }, - { - "epoch": 0.7220597511325191, - "grad_norm": 0.001661333255469799, - "learning_rate": 0.00019999974325837262, - "loss": 46.0, - "step": 9444 - }, - { - "epoch": 0.7221362081159088, - "grad_norm": 0.0009441061993129551, - "learning_rate": 0.00019999974320394077, - "loss": 46.0, - "step": 9445 - }, - { - "epoch": 0.7222126650992985, - "grad_norm": 0.0009892223170027137, - "learning_rate": 0.00019999974314950314, - "loss": 46.0, - "step": 9446 - }, - { - "epoch": 0.7222891220826883, - "grad_norm": 0.0009868749184533954, - "learning_rate": 0.00019999974309505974, - "loss": 46.0, - "step": 9447 - }, - { - "epoch": 0.7223655790660779, - "grad_norm": 0.0004595013742800802, - "learning_rate": 0.00019999974304061056, - "loss": 46.0, - "step": 9448 - }, - { - "epoch": 0.7224420360494677, - "grad_norm": 0.0015255939215421677, - "learning_rate": 0.0001999997429861556, - "loss": 46.0, - "step": 9449 - }, - { - "epoch": 0.7225184930328574, - "grad_norm": 0.003566581755876541, - "learning_rate": 0.0001999997429316949, - "loss": 46.0, - "step": 9450 - }, - { - "epoch": 0.7225949500162471, - "grad_norm": 0.0010437725577503443, - "learning_rate": 0.0001999997428772284, - "loss": 46.0, - "step": 9451 - }, - { - "epoch": 0.7226714069996368, - "grad_norm": 0.0011425354750826955, - "learning_rate": 0.00019999974282275615, - "loss": 46.0, - "step": 9452 - }, - { - "epoch": 0.7227478639830266, - "grad_norm": 0.0015790280885994434, - "learning_rate": 0.00019999974276827814, - "loss": 46.0, - "step": 9453 - }, - { - "epoch": 0.7228243209664162, - "grad_norm": 0.002870062133297324, - "learning_rate": 0.00019999974271379432, - "loss": 46.0, - "step": 9454 - }, - { - "epoch": 0.722900777949806, - "grad_norm": 0.0038742884062230587, - "learning_rate": 0.00019999974265930476, - "loss": 46.0, - "step": 9455 - }, - { - "epoch": 0.7229772349331957, - "grad_norm": 0.0011135550448670983, - "learning_rate": 0.00019999974260480943, - "loss": 46.0, - "step": 9456 - }, - { - "epoch": 0.7230536919165854, - "grad_norm": 0.0004728232743218541, - "learning_rate": 0.00019999974255030834, - "loss": 46.0, - "step": 9457 - }, - { - "epoch": 0.7231301488999752, - "grad_norm": 0.00045553603558801115, - "learning_rate": 0.00019999974249580146, - "loss": 46.0, - "step": 9458 - }, - { - "epoch": 0.7232066058833648, - "grad_norm": 0.0006534690619446337, - "learning_rate": 0.0001999997424412888, - "loss": 46.0, - "step": 9459 - }, - { - "epoch": 0.7232830628667546, - "grad_norm": 0.00052379269618541, - "learning_rate": 0.0001999997423867704, - "loss": 46.0, - "step": 9460 - }, - { - "epoch": 0.7233595198501444, - "grad_norm": 0.000713361834641546, - "learning_rate": 0.00019999974233224623, - "loss": 46.0, - "step": 9461 - }, - { - "epoch": 0.723435976833534, - "grad_norm": 0.001291573396883905, - "learning_rate": 0.00019999974227771628, - "loss": 46.0, - "step": 9462 - }, - { - "epoch": 0.7235124338169238, - "grad_norm": 0.0012073114048689604, - "learning_rate": 0.00019999974222318056, - "loss": 46.0, - "step": 9463 - }, - { - "epoch": 0.7235888908003135, - "grad_norm": 0.0005278547760099173, - "learning_rate": 0.0001999997421686391, - "loss": 46.0, - "step": 9464 - }, - { - "epoch": 0.7236653477837032, - "grad_norm": 0.0012327833101153374, - "learning_rate": 0.00019999974211409182, - "loss": 46.0, - "step": 9465 - }, - { - "epoch": 0.7237418047670929, - "grad_norm": 0.0005570958601310849, - "learning_rate": 0.0001999997420595388, - "loss": 46.0, - "step": 9466 - }, - { - "epoch": 0.7238182617504826, - "grad_norm": 0.005379240959882736, - "learning_rate": 0.00019999974200498, - "loss": 46.0, - "step": 9467 - }, - { - "epoch": 0.7238947187338723, - "grad_norm": 0.003918815404176712, - "learning_rate": 0.00019999974195041543, - "loss": 46.0, - "step": 9468 - }, - { - "epoch": 0.7239711757172621, - "grad_norm": 0.0003865146136377007, - "learning_rate": 0.0001999997418958451, - "loss": 46.0, - "step": 9469 - }, - { - "epoch": 0.7240476327006518, - "grad_norm": 0.0012673111632466316, - "learning_rate": 0.000199999741841269, - "loss": 46.0, - "step": 9470 - }, - { - "epoch": 0.7241240896840415, - "grad_norm": 0.0009129495010711253, - "learning_rate": 0.0001999997417866871, - "loss": 46.0, - "step": 9471 - }, - { - "epoch": 0.7242005466674313, - "grad_norm": 0.002420772099867463, - "learning_rate": 0.00019999974173209948, - "loss": 46.0, - "step": 9472 - }, - { - "epoch": 0.7242770036508209, - "grad_norm": 0.0008737086900509894, - "learning_rate": 0.00019999974167750605, - "loss": 46.0, - "step": 9473 - }, - { - "epoch": 0.7243534606342107, - "grad_norm": 0.0014347423566505313, - "learning_rate": 0.00019999974162290688, - "loss": 46.0, - "step": 9474 - }, - { - "epoch": 0.7244299176176004, - "grad_norm": 0.0052198376506567, - "learning_rate": 0.00019999974156830193, - "loss": 46.0, - "step": 9475 - }, - { - "epoch": 0.7245063746009901, - "grad_norm": 0.0011726554948836565, - "learning_rate": 0.0001999997415136912, - "loss": 46.0, - "step": 9476 - }, - { - "epoch": 0.7245828315843799, - "grad_norm": 0.0011018315562978387, - "learning_rate": 0.00019999974145907471, - "loss": 46.0, - "step": 9477 - }, - { - "epoch": 0.7246592885677695, - "grad_norm": 0.0013048032997176051, - "learning_rate": 0.00019999974140445245, - "loss": 46.0, - "step": 9478 - }, - { - "epoch": 0.7247357455511593, - "grad_norm": 0.000518255343195051, - "learning_rate": 0.00019999974134982443, - "loss": 46.0, - "step": 9479 - }, - { - "epoch": 0.724812202534549, - "grad_norm": 0.0013148487778380513, - "learning_rate": 0.00019999974129519062, - "loss": 46.0, - "step": 9480 - }, - { - "epoch": 0.7248886595179387, - "grad_norm": 0.0003296970680821687, - "learning_rate": 0.00019999974124055106, - "loss": 46.0, - "step": 9481 - }, - { - "epoch": 0.7249651165013284, - "grad_norm": 0.0005345446988940239, - "learning_rate": 0.00019999974118590572, - "loss": 46.0, - "step": 9482 - }, - { - "epoch": 0.7250415734847182, - "grad_norm": 0.0007220385014079511, - "learning_rate": 0.0001999997411312546, - "loss": 46.0, - "step": 9483 - }, - { - "epoch": 0.7251180304681079, - "grad_norm": 0.0019289315678179264, - "learning_rate": 0.00019999974107659773, - "loss": 46.0, - "step": 9484 - }, - { - "epoch": 0.7251944874514976, - "grad_norm": 0.004281338769942522, - "learning_rate": 0.00019999974102193508, - "loss": 46.0, - "step": 9485 - }, - { - "epoch": 0.7252709444348874, - "grad_norm": 0.003049361752346158, - "learning_rate": 0.00019999974096726665, - "loss": 46.0, - "step": 9486 - }, - { - "epoch": 0.725347401418277, - "grad_norm": 0.004174086730927229, - "learning_rate": 0.00019999974091259248, - "loss": 46.0, - "step": 9487 - }, - { - "epoch": 0.7254238584016668, - "grad_norm": 0.0007800482562743127, - "learning_rate": 0.00019999974085791253, - "loss": 46.0, - "step": 9488 - }, - { - "epoch": 0.7255003153850564, - "grad_norm": 0.00039280750206671655, - "learning_rate": 0.0001999997408032268, - "loss": 46.0, - "step": 9489 - }, - { - "epoch": 0.7255767723684462, - "grad_norm": 0.0008713495335541666, - "learning_rate": 0.0001999997407485353, - "loss": 46.0, - "step": 9490 - }, - { - "epoch": 0.725653229351836, - "grad_norm": 0.002624642802402377, - "learning_rate": 0.00019999974069383804, - "loss": 46.0, - "step": 9491 - }, - { - "epoch": 0.7257296863352256, - "grad_norm": 0.0021717939525842667, - "learning_rate": 0.000199999740639135, - "loss": 46.0, - "step": 9492 - }, - { - "epoch": 0.7258061433186154, - "grad_norm": 0.0008572438382543623, - "learning_rate": 0.00019999974058442622, - "loss": 46.0, - "step": 9493 - }, - { - "epoch": 0.7258826003020051, - "grad_norm": 0.0007696160464547575, - "learning_rate": 0.00019999974052971166, - "loss": 46.0, - "step": 9494 - }, - { - "epoch": 0.7259590572853948, - "grad_norm": 0.0011571563081815839, - "learning_rate": 0.0001999997404749913, - "loss": 46.0, - "step": 9495 - }, - { - "epoch": 0.7260355142687845, - "grad_norm": 0.0063188462518155575, - "learning_rate": 0.0001999997404202652, - "loss": 46.0, - "step": 9496 - }, - { - "epoch": 0.7261119712521742, - "grad_norm": 0.0014230284141376615, - "learning_rate": 0.00019999974036553333, - "loss": 46.0, - "step": 9497 - }, - { - "epoch": 0.726188428235564, - "grad_norm": 0.0006551842670887709, - "learning_rate": 0.00019999974031079568, - "loss": 46.0, - "step": 9498 - }, - { - "epoch": 0.7262648852189537, - "grad_norm": 0.004869310650974512, - "learning_rate": 0.00019999974025605222, - "loss": 46.0, - "step": 9499 - }, - { - "epoch": 0.7263413422023434, - "grad_norm": 0.0010433326242491603, - "learning_rate": 0.00019999974020130305, - "loss": 46.0, - "step": 9500 - }, - { - "epoch": 0.7264177991857331, - "grad_norm": 0.0059239184483885765, - "learning_rate": 0.0001999997401465481, - "loss": 46.0, - "step": 9501 - }, - { - "epoch": 0.7264942561691229, - "grad_norm": 0.0012868397170677781, - "learning_rate": 0.00019999974009178738, - "loss": 46.0, - "step": 9502 - }, - { - "epoch": 0.7265707131525125, - "grad_norm": 0.0005700111505575478, - "learning_rate": 0.00019999974003702086, - "loss": 46.0, - "step": 9503 - }, - { - "epoch": 0.7266471701359023, - "grad_norm": 0.0010297095868736506, - "learning_rate": 0.00019999973998224862, - "loss": 46.0, - "step": 9504 - }, - { - "epoch": 0.7267236271192921, - "grad_norm": 0.0006494633853435516, - "learning_rate": 0.00019999973992747058, - "loss": 46.0, - "step": 9505 - }, - { - "epoch": 0.7268000841026817, - "grad_norm": 0.0012249941937625408, - "learning_rate": 0.00019999973987268677, - "loss": 46.0, - "step": 9506 - }, - { - "epoch": 0.7268765410860715, - "grad_norm": 0.0019425624050199986, - "learning_rate": 0.0001999997398178972, - "loss": 46.0, - "step": 9507 - }, - { - "epoch": 0.7269529980694611, - "grad_norm": 0.0015153740532696247, - "learning_rate": 0.00019999973976310184, - "loss": 46.0, - "step": 9508 - }, - { - "epoch": 0.7270294550528509, - "grad_norm": 0.0012835317756980658, - "learning_rate": 0.00019999973970830074, - "loss": 46.0, - "step": 9509 - }, - { - "epoch": 0.7271059120362406, - "grad_norm": 0.0008624490583315492, - "learning_rate": 0.00019999973965349386, - "loss": 46.0, - "step": 9510 - }, - { - "epoch": 0.7271823690196303, - "grad_norm": 0.0003058585280086845, - "learning_rate": 0.0001999997395986812, - "loss": 46.0, - "step": 9511 - }, - { - "epoch": 0.72725882600302, - "grad_norm": 0.000640116457361728, - "learning_rate": 0.00019999973954386277, - "loss": 46.0, - "step": 9512 - }, - { - "epoch": 0.7273352829864098, - "grad_norm": 0.004966209176927805, - "learning_rate": 0.00019999973948903857, - "loss": 46.0, - "step": 9513 - }, - { - "epoch": 0.7274117399697995, - "grad_norm": 0.0007462220964953303, - "learning_rate": 0.00019999973943420863, - "loss": 46.0, - "step": 9514 - }, - { - "epoch": 0.7274881969531892, - "grad_norm": 0.0009834475349634886, - "learning_rate": 0.0001999997393793729, - "loss": 46.0, - "step": 9515 - }, - { - "epoch": 0.727564653936579, - "grad_norm": 0.0015895604155957699, - "learning_rate": 0.0001999997393245314, - "loss": 46.0, - "step": 9516 - }, - { - "epoch": 0.7276411109199686, - "grad_norm": 0.003138477448374033, - "learning_rate": 0.00019999973926968415, - "loss": 46.0, - "step": 9517 - }, - { - "epoch": 0.7277175679033584, - "grad_norm": 0.0010540066286921501, - "learning_rate": 0.0001999997392148311, - "loss": 46.0, - "step": 9518 - }, - { - "epoch": 0.727794024886748, - "grad_norm": 0.0012817922979593277, - "learning_rate": 0.0001999997391599723, - "loss": 46.0, - "step": 9519 - }, - { - "epoch": 0.7278704818701378, - "grad_norm": 0.0017212582752108574, - "learning_rate": 0.00019999973910510773, - "loss": 46.0, - "step": 9520 - }, - { - "epoch": 0.7279469388535276, - "grad_norm": 0.0005717522581107914, - "learning_rate": 0.00019999973905023737, - "loss": 46.0, - "step": 9521 - }, - { - "epoch": 0.7280233958369172, - "grad_norm": 0.000797246175352484, - "learning_rate": 0.00019999973899536126, - "loss": 46.0, - "step": 9522 - }, - { - "epoch": 0.728099852820307, - "grad_norm": 0.0013025507796555758, - "learning_rate": 0.00019999973894047938, - "loss": 46.0, - "step": 9523 - }, - { - "epoch": 0.7281763098036967, - "grad_norm": 0.005301415454596281, - "learning_rate": 0.0001999997388855917, - "loss": 46.0, - "step": 9524 - }, - { - "epoch": 0.7282527667870864, - "grad_norm": 0.0012863590382039547, - "learning_rate": 0.0001999997388306983, - "loss": 46.0, - "step": 9525 - }, - { - "epoch": 0.7283292237704762, - "grad_norm": 0.0009101226460188627, - "learning_rate": 0.0001999997387757991, - "loss": 46.0, - "step": 9526 - }, - { - "epoch": 0.7284056807538658, - "grad_norm": 0.0009201113716699183, - "learning_rate": 0.00019999973872089416, - "loss": 46.0, - "step": 9527 - }, - { - "epoch": 0.7284821377372556, - "grad_norm": 0.0028858466539531946, - "learning_rate": 0.0001999997386659834, - "loss": 46.0, - "step": 9528 - }, - { - "epoch": 0.7285585947206453, - "grad_norm": 0.00046491200919263065, - "learning_rate": 0.0001999997386110669, - "loss": 46.0, - "step": 9529 - }, - { - "epoch": 0.728635051704035, - "grad_norm": 0.0006526696961373091, - "learning_rate": 0.00019999973855614465, - "loss": 46.0, - "step": 9530 - }, - { - "epoch": 0.7287115086874247, - "grad_norm": 0.0004188350576441735, - "learning_rate": 0.00019999973850121658, - "loss": 46.0, - "step": 9531 - }, - { - "epoch": 0.7287879656708145, - "grad_norm": 0.008840947411954403, - "learning_rate": 0.0001999997384462828, - "loss": 46.0, - "step": 9532 - }, - { - "epoch": 0.7288644226542041, - "grad_norm": 0.0023184798192232847, - "learning_rate": 0.0001999997383913432, - "loss": 46.0, - "step": 9533 - }, - { - "epoch": 0.7289408796375939, - "grad_norm": 0.00394762959331274, - "learning_rate": 0.00019999973833639785, - "loss": 46.0, - "step": 9534 - }, - { - "epoch": 0.7290173366209837, - "grad_norm": 0.0009867673506960273, - "learning_rate": 0.00019999973828144674, - "loss": 46.0, - "step": 9535 - }, - { - "epoch": 0.7290937936043733, - "grad_norm": 0.00379791553132236, - "learning_rate": 0.00019999973822648986, - "loss": 46.0, - "step": 9536 - }, - { - "epoch": 0.7291702505877631, - "grad_norm": 0.0010544833494350314, - "learning_rate": 0.0001999997381715272, - "loss": 46.0, - "step": 9537 - }, - { - "epoch": 0.7292467075711527, - "grad_norm": 0.0007636480731889606, - "learning_rate": 0.00019999973811655875, - "loss": 46.0, - "step": 9538 - }, - { - "epoch": 0.7293231645545425, - "grad_norm": 0.0008713103015907109, - "learning_rate": 0.00019999973806158455, - "loss": 46.0, - "step": 9539 - }, - { - "epoch": 0.7293996215379323, - "grad_norm": 0.001506561879068613, - "learning_rate": 0.00019999973800660458, - "loss": 46.0, - "step": 9540 - }, - { - "epoch": 0.7294760785213219, - "grad_norm": 0.0005990981007926166, - "learning_rate": 0.0001999997379516189, - "loss": 46.0, - "step": 9541 - }, - { - "epoch": 0.7295525355047117, - "grad_norm": 0.0004713807429652661, - "learning_rate": 0.00019999973789662737, - "loss": 46.0, - "step": 9542 - }, - { - "epoch": 0.7296289924881014, - "grad_norm": 0.0010668407194316387, - "learning_rate": 0.00019999973784163007, - "loss": 46.0, - "step": 9543 - }, - { - "epoch": 0.7297054494714911, - "grad_norm": 0.003675210988149047, - "learning_rate": 0.00019999973778662703, - "loss": 46.0, - "step": 9544 - }, - { - "epoch": 0.7297819064548808, - "grad_norm": 0.0007254960364662111, - "learning_rate": 0.00019999973773161825, - "loss": 46.0, - "step": 9545 - }, - { - "epoch": 0.7298583634382706, - "grad_norm": 0.0009725384297780693, - "learning_rate": 0.00019999973767660366, - "loss": 46.0, - "step": 9546 - }, - { - "epoch": 0.7299348204216602, - "grad_norm": 0.0052064149640500546, - "learning_rate": 0.0001999997376215833, - "loss": 46.0, - "step": 9547 - }, - { - "epoch": 0.73001127740505, - "grad_norm": 0.000842469627968967, - "learning_rate": 0.00019999973756655717, - "loss": 46.0, - "step": 9548 - }, - { - "epoch": 0.7300877343884397, - "grad_norm": 0.0023045376874506474, - "learning_rate": 0.0001999997375115253, - "loss": 46.0, - "step": 9549 - }, - { - "epoch": 0.7301641913718294, - "grad_norm": 0.0016457663150504231, - "learning_rate": 0.00019999973745648763, - "loss": 46.0, - "step": 9550 - }, - { - "epoch": 0.7302406483552192, - "grad_norm": 0.00207923143170774, - "learning_rate": 0.0001999997374014442, - "loss": 46.0, - "step": 9551 - }, - { - "epoch": 0.7303171053386088, - "grad_norm": 0.0013082396471872926, - "learning_rate": 0.000199999737346395, - "loss": 46.0, - "step": 9552 - }, - { - "epoch": 0.7303935623219986, - "grad_norm": 0.0026289978995919228, - "learning_rate": 0.00019999973729134003, - "loss": 46.0, - "step": 9553 - }, - { - "epoch": 0.7304700193053884, - "grad_norm": 0.0012528073275461793, - "learning_rate": 0.0001999997372362793, - "loss": 46.0, - "step": 9554 - }, - { - "epoch": 0.730546476288778, - "grad_norm": 0.003209579735994339, - "learning_rate": 0.0001999997371812128, - "loss": 46.0, - "step": 9555 - }, - { - "epoch": 0.7306229332721678, - "grad_norm": 0.0030588265508413315, - "learning_rate": 0.0001999997371261405, - "loss": 46.0, - "step": 9556 - }, - { - "epoch": 0.7306993902555574, - "grad_norm": 0.0012199330376461148, - "learning_rate": 0.00019999973707106246, - "loss": 46.0, - "step": 9557 - }, - { - "epoch": 0.7307758472389472, - "grad_norm": 0.00031229775049723685, - "learning_rate": 0.00019999973701597865, - "loss": 46.0, - "step": 9558 - }, - { - "epoch": 0.7308523042223369, - "grad_norm": 0.014457184821367264, - "learning_rate": 0.00019999973696088906, - "loss": 46.0, - "step": 9559 - }, - { - "epoch": 0.7309287612057266, - "grad_norm": 0.003756424644961953, - "learning_rate": 0.00019999973690579373, - "loss": 46.0, - "step": 9560 - }, - { - "epoch": 0.7310052181891163, - "grad_norm": 0.000609002890996635, - "learning_rate": 0.0001999997368506926, - "loss": 46.0, - "step": 9561 - }, - { - "epoch": 0.7310816751725061, - "grad_norm": 0.0040803165175020695, - "learning_rate": 0.0001999997367955857, - "loss": 46.0, - "step": 9562 - }, - { - "epoch": 0.7311581321558958, - "grad_norm": 0.0038511387538164854, - "learning_rate": 0.00019999973674047304, - "loss": 46.0, - "step": 9563 - }, - { - "epoch": 0.7312345891392855, - "grad_norm": 0.00056040333583951, - "learning_rate": 0.00019999973668535459, - "loss": 46.0, - "step": 9564 - }, - { - "epoch": 0.7313110461226753, - "grad_norm": 0.0032748880330473185, - "learning_rate": 0.00019999973663023039, - "loss": 46.0, - "step": 9565 - }, - { - "epoch": 0.7313875031060649, - "grad_norm": 0.0007885222439654171, - "learning_rate": 0.00019999973657510044, - "loss": 46.0, - "step": 9566 - }, - { - "epoch": 0.7314639600894547, - "grad_norm": 0.0008641025051474571, - "learning_rate": 0.0001999997365199647, - "loss": 46.0, - "step": 9567 - }, - { - "epoch": 0.7315404170728443, - "grad_norm": 0.01447670254856348, - "learning_rate": 0.00019999973646482317, - "loss": 46.0, - "step": 9568 - }, - { - "epoch": 0.7316168740562341, - "grad_norm": 0.000940754369366914, - "learning_rate": 0.0001999997364096759, - "loss": 46.0, - "step": 9569 - }, - { - "epoch": 0.7316933310396239, - "grad_norm": 0.0031623272225260735, - "learning_rate": 0.00019999973635452284, - "loss": 46.0, - "step": 9570 - }, - { - "epoch": 0.7317697880230135, - "grad_norm": 0.0012078157160431147, - "learning_rate": 0.00019999973629936403, - "loss": 46.0, - "step": 9571 - }, - { - "epoch": 0.7318462450064033, - "grad_norm": 0.0011179299326613545, - "learning_rate": 0.00019999973624419944, - "loss": 46.0, - "step": 9572 - }, - { - "epoch": 0.731922701989793, - "grad_norm": 0.001043855445459485, - "learning_rate": 0.00019999973618902908, - "loss": 46.0, - "step": 9573 - }, - { - "epoch": 0.7319991589731827, - "grad_norm": 0.0009011423098854721, - "learning_rate": 0.00019999973613385298, - "loss": 46.0, - "step": 9574 - }, - { - "epoch": 0.7320756159565724, - "grad_norm": 0.002464559394866228, - "learning_rate": 0.00019999973607867107, - "loss": 46.0, - "step": 9575 - }, - { - "epoch": 0.7321520729399622, - "grad_norm": 0.006772432941943407, - "learning_rate": 0.00019999973602348342, - "loss": 46.0, - "step": 9576 - }, - { - "epoch": 0.7322285299233519, - "grad_norm": 0.003458540653809905, - "learning_rate": 0.00019999973596828997, - "loss": 46.0, - "step": 9577 - }, - { - "epoch": 0.7323049869067416, - "grad_norm": 0.0009902992751449347, - "learning_rate": 0.00019999973591309077, - "loss": 46.0, - "step": 9578 - }, - { - "epoch": 0.7323814438901313, - "grad_norm": 0.0011761031346395612, - "learning_rate": 0.0001999997358578858, - "loss": 46.0, - "step": 9579 - }, - { - "epoch": 0.732457900873521, - "grad_norm": 0.0012663932284340262, - "learning_rate": 0.00019999973580267505, - "loss": 46.0, - "step": 9580 - }, - { - "epoch": 0.7325343578569108, - "grad_norm": 0.0015342329861596227, - "learning_rate": 0.0001999997357474585, - "loss": 46.0, - "step": 9581 - }, - { - "epoch": 0.7326108148403004, - "grad_norm": 0.002045195549726486, - "learning_rate": 0.00019999973569223624, - "loss": 46.0, - "step": 9582 - }, - { - "epoch": 0.7326872718236902, - "grad_norm": 0.001635118154808879, - "learning_rate": 0.0001999997356370082, - "loss": 46.0, - "step": 9583 - }, - { - "epoch": 0.73276372880708, - "grad_norm": 0.0013396850554272532, - "learning_rate": 0.00019999973558177439, - "loss": 46.0, - "step": 9584 - }, - { - "epoch": 0.7328401857904696, - "grad_norm": 0.00120553991291672, - "learning_rate": 0.0001999997355265348, - "loss": 46.0, - "step": 9585 - }, - { - "epoch": 0.7329166427738594, - "grad_norm": 0.0008809276041574776, - "learning_rate": 0.00019999973547128944, - "loss": 46.0, - "step": 9586 - }, - { - "epoch": 0.732993099757249, - "grad_norm": 0.002531450940296054, - "learning_rate": 0.00019999973541603828, - "loss": 46.0, - "step": 9587 - }, - { - "epoch": 0.7330695567406388, - "grad_norm": 0.00129566655959934, - "learning_rate": 0.0001999997353607814, - "loss": 46.0, - "step": 9588 - }, - { - "epoch": 0.7331460137240285, - "grad_norm": 0.0023907015565782785, - "learning_rate": 0.00019999973530551875, - "loss": 46.0, - "step": 9589 - }, - { - "epoch": 0.7332224707074182, - "grad_norm": 0.0010989034781232476, - "learning_rate": 0.0001999997352502503, - "loss": 46.0, - "step": 9590 - }, - { - "epoch": 0.733298927690808, - "grad_norm": 0.0020173180382698774, - "learning_rate": 0.0001999997351949761, - "loss": 46.0, - "step": 9591 - }, - { - "epoch": 0.7333753846741977, - "grad_norm": 0.0014055392239242792, - "learning_rate": 0.0001999997351396961, - "loss": 46.0, - "step": 9592 - }, - { - "epoch": 0.7334518416575874, - "grad_norm": 0.0010951553704217076, - "learning_rate": 0.00019999973508441035, - "loss": 46.0, - "step": 9593 - }, - { - "epoch": 0.7335282986409771, - "grad_norm": 0.003435376100242138, - "learning_rate": 0.00019999973502911886, - "loss": 46.0, - "step": 9594 - }, - { - "epoch": 0.7336047556243669, - "grad_norm": 0.002582656918093562, - "learning_rate": 0.0001999997349738216, - "loss": 46.0, - "step": 9595 - }, - { - "epoch": 0.7336812126077565, - "grad_norm": 0.0032408363185822964, - "learning_rate": 0.0001999997349185185, - "loss": 46.0, - "step": 9596 - }, - { - "epoch": 0.7337576695911463, - "grad_norm": 0.001116379862651229, - "learning_rate": 0.0001999997348632097, - "loss": 46.0, - "step": 9597 - }, - { - "epoch": 0.733834126574536, - "grad_norm": 0.0016293646767735481, - "learning_rate": 0.0001999997348078951, - "loss": 46.0, - "step": 9598 - }, - { - "epoch": 0.7339105835579257, - "grad_norm": 0.0011346143437549472, - "learning_rate": 0.00019999973475257475, - "loss": 46.0, - "step": 9599 - }, - { - "epoch": 0.7339870405413155, - "grad_norm": 0.001031039864756167, - "learning_rate": 0.0001999997346972486, - "loss": 46.0, - "step": 9600 - }, - { - "epoch": 0.7340634975247051, - "grad_norm": 0.0014087734743952751, - "learning_rate": 0.0001999997346419167, - "loss": 46.0, - "step": 9601 - }, - { - "epoch": 0.7341399545080949, - "grad_norm": 0.001699765445664525, - "learning_rate": 0.00019999973458657903, - "loss": 46.0, - "step": 9602 - }, - { - "epoch": 0.7342164114914846, - "grad_norm": 0.0037602009251713753, - "learning_rate": 0.0001999997345312356, - "loss": 46.0, - "step": 9603 - }, - { - "epoch": 0.7342928684748743, - "grad_norm": 0.0031309055630117655, - "learning_rate": 0.00019999973447588638, - "loss": 46.0, - "step": 9604 - }, - { - "epoch": 0.734369325458264, - "grad_norm": 0.001099355984479189, - "learning_rate": 0.0001999997344205314, - "loss": 46.0, - "step": 9605 - }, - { - "epoch": 0.7344457824416538, - "grad_norm": 0.0019889497198164463, - "learning_rate": 0.00019999973436517066, - "loss": 46.0, - "step": 9606 - }, - { - "epoch": 0.7345222394250435, - "grad_norm": 0.001312068896368146, - "learning_rate": 0.00019999973430980412, - "loss": 46.0, - "step": 9607 - }, - { - "epoch": 0.7345986964084332, - "grad_norm": 0.0009766578441485763, - "learning_rate": 0.00019999973425443183, - "loss": 46.0, - "step": 9608 - }, - { - "epoch": 0.7346751533918229, - "grad_norm": 0.0015836225356906652, - "learning_rate": 0.0001999997341990538, - "loss": 46.0, - "step": 9609 - }, - { - "epoch": 0.7347516103752126, - "grad_norm": 0.005435020197182894, - "learning_rate": 0.00019999973414366998, - "loss": 46.0, - "step": 9610 - }, - { - "epoch": 0.7348280673586024, - "grad_norm": 0.0008086469606496394, - "learning_rate": 0.00019999973408828036, - "loss": 46.0, - "step": 9611 - }, - { - "epoch": 0.734904524341992, - "grad_norm": 0.0009257084457203746, - "learning_rate": 0.000199999734032885, - "loss": 46.0, - "step": 9612 - }, - { - "epoch": 0.7349809813253818, - "grad_norm": 0.006190138403326273, - "learning_rate": 0.00019999973397748387, - "loss": 46.0, - "step": 9613 - }, - { - "epoch": 0.7350574383087716, - "grad_norm": 0.0014728892128914595, - "learning_rate": 0.00019999973392207697, - "loss": 46.0, - "step": 9614 - }, - { - "epoch": 0.7351338952921612, - "grad_norm": 0.0016258620889857411, - "learning_rate": 0.0001999997338666643, - "loss": 46.0, - "step": 9615 - }, - { - "epoch": 0.735210352275551, - "grad_norm": 0.0003624729870352894, - "learning_rate": 0.00019999973381124587, - "loss": 46.0, - "step": 9616 - }, - { - "epoch": 0.7352868092589407, - "grad_norm": 0.0011338959448039532, - "learning_rate": 0.00019999973375582164, - "loss": 46.0, - "step": 9617 - }, - { - "epoch": 0.7353632662423304, - "grad_norm": 0.0010770147200673819, - "learning_rate": 0.00019999973370039164, - "loss": 46.0, - "step": 9618 - }, - { - "epoch": 0.7354397232257202, - "grad_norm": 0.00017523637507110834, - "learning_rate": 0.0001999997336449559, - "loss": 46.0, - "step": 9619 - }, - { - "epoch": 0.7355161802091098, - "grad_norm": 0.001183304819278419, - "learning_rate": 0.00019999973358951438, - "loss": 46.0, - "step": 9620 - }, - { - "epoch": 0.7355926371924996, - "grad_norm": 0.0023626259062439203, - "learning_rate": 0.00019999973353406709, - "loss": 46.0, - "step": 9621 - }, - { - "epoch": 0.7356690941758893, - "grad_norm": 0.0006327573210000992, - "learning_rate": 0.00019999973347861402, - "loss": 46.0, - "step": 9622 - }, - { - "epoch": 0.735745551159279, - "grad_norm": 0.0005722071509808302, - "learning_rate": 0.0001999997334231552, - "loss": 46.0, - "step": 9623 - }, - { - "epoch": 0.7358220081426687, - "grad_norm": 0.0014855496119707823, - "learning_rate": 0.00019999973336769062, - "loss": 46.0, - "step": 9624 - }, - { - "epoch": 0.7358984651260585, - "grad_norm": 0.017810819670557976, - "learning_rate": 0.00019999973331222024, - "loss": 46.0, - "step": 9625 - }, - { - "epoch": 0.7359749221094481, - "grad_norm": 0.0010145645355805755, - "learning_rate": 0.0001999997332567441, - "loss": 46.0, - "step": 9626 - }, - { - "epoch": 0.7360513790928379, - "grad_norm": 0.0006681100931018591, - "learning_rate": 0.0001999997332012622, - "loss": 46.0, - "step": 9627 - }, - { - "epoch": 0.7361278360762276, - "grad_norm": 0.0016074924496933818, - "learning_rate": 0.00019999973314577453, - "loss": 46.0, - "step": 9628 - }, - { - "epoch": 0.7362042930596173, - "grad_norm": 0.0013465629890561104, - "learning_rate": 0.00019999973309028107, - "loss": 46.0, - "step": 9629 - }, - { - "epoch": 0.7362807500430071, - "grad_norm": 0.00095395214157179, - "learning_rate": 0.00019999973303478185, - "loss": 46.0, - "step": 9630 - }, - { - "epoch": 0.7363572070263967, - "grad_norm": 0.0012761947000399232, - "learning_rate": 0.00019999973297927688, - "loss": 46.0, - "step": 9631 - }, - { - "epoch": 0.7364336640097865, - "grad_norm": 0.004218399990350008, - "learning_rate": 0.00019999973292376613, - "loss": 46.0, - "step": 9632 - }, - { - "epoch": 0.7365101209931763, - "grad_norm": 0.0012143789790570736, - "learning_rate": 0.0001999997328682496, - "loss": 46.0, - "step": 9633 - }, - { - "epoch": 0.7365865779765659, - "grad_norm": 0.0026241170708090067, - "learning_rate": 0.0001999997328127273, - "loss": 46.0, - "step": 9634 - }, - { - "epoch": 0.7366630349599557, - "grad_norm": 0.0006415036623366177, - "learning_rate": 0.00019999973275719923, - "loss": 46.0, - "step": 9635 - }, - { - "epoch": 0.7367394919433454, - "grad_norm": 0.009661107324063778, - "learning_rate": 0.00019999973270166542, - "loss": 46.0, - "step": 9636 - }, - { - "epoch": 0.7368159489267351, - "grad_norm": 0.0006895619444549084, - "learning_rate": 0.0001999997326461258, - "loss": 46.0, - "step": 9637 - }, - { - "epoch": 0.7368924059101248, - "grad_norm": 0.010084619745612144, - "learning_rate": 0.00019999973259058045, - "loss": 46.0, - "step": 9638 - }, - { - "epoch": 0.7369688628935145, - "grad_norm": 0.0026864074170589447, - "learning_rate": 0.0001999997325350293, - "loss": 46.0, - "step": 9639 - }, - { - "epoch": 0.7370453198769042, - "grad_norm": 0.0007298939162865281, - "learning_rate": 0.0001999997324794724, - "loss": 46.0, - "step": 9640 - }, - { - "epoch": 0.737121776860294, - "grad_norm": 0.0010473444126546383, - "learning_rate": 0.0001999997324239097, - "loss": 46.0, - "step": 9641 - }, - { - "epoch": 0.7371982338436837, - "grad_norm": 0.002117954893037677, - "learning_rate": 0.00019999973236834126, - "loss": 46.0, - "step": 9642 - }, - { - "epoch": 0.7372746908270734, - "grad_norm": 0.001089767785742879, - "learning_rate": 0.00019999973231276706, - "loss": 46.0, - "step": 9643 - }, - { - "epoch": 0.7373511478104632, - "grad_norm": 0.007818181067705154, - "learning_rate": 0.00019999973225718706, - "loss": 46.0, - "step": 9644 - }, - { - "epoch": 0.7374276047938528, - "grad_norm": 0.0018394073704257607, - "learning_rate": 0.00019999973220160132, - "loss": 46.0, - "step": 9645 - }, - { - "epoch": 0.7375040617772426, - "grad_norm": 0.0010977019555866718, - "learning_rate": 0.00019999973214600977, - "loss": 46.0, - "step": 9646 - }, - { - "epoch": 0.7375805187606324, - "grad_norm": 0.0007522209198214114, - "learning_rate": 0.00019999973209041248, - "loss": 46.0, - "step": 9647 - }, - { - "epoch": 0.737656975744022, - "grad_norm": 0.0010153116891160607, - "learning_rate": 0.00019999973203480942, - "loss": 46.0, - "step": 9648 - }, - { - "epoch": 0.7377334327274118, - "grad_norm": 0.0008870738674886525, - "learning_rate": 0.00019999973197920058, - "loss": 46.0, - "step": 9649 - }, - { - "epoch": 0.7378098897108014, - "grad_norm": 0.0005648533115163445, - "learning_rate": 0.00019999973192358597, - "loss": 46.0, - "step": 9650 - }, - { - "epoch": 0.7378863466941912, - "grad_norm": 0.0011166848707944155, - "learning_rate": 0.0001999997318679656, - "loss": 46.0, - "step": 9651 - }, - { - "epoch": 0.7379628036775809, - "grad_norm": 0.005908910185098648, - "learning_rate": 0.00019999973181233945, - "loss": 46.0, - "step": 9652 - }, - { - "epoch": 0.7380392606609706, - "grad_norm": 0.0004542939132079482, - "learning_rate": 0.00019999973175670755, - "loss": 46.0, - "step": 9653 - }, - { - "epoch": 0.7381157176443603, - "grad_norm": 0.0005001468816772103, - "learning_rate": 0.00019999973170106987, - "loss": 46.0, - "step": 9654 - }, - { - "epoch": 0.7381921746277501, - "grad_norm": 0.002464883727952838, - "learning_rate": 0.00019999973164542642, - "loss": 46.0, - "step": 9655 - }, - { - "epoch": 0.7382686316111398, - "grad_norm": 0.0045583778992295265, - "learning_rate": 0.0001999997315897772, - "loss": 46.0, - "step": 9656 - }, - { - "epoch": 0.7383450885945295, - "grad_norm": 0.0022312586661428213, - "learning_rate": 0.00019999973153412223, - "loss": 46.0, - "step": 9657 - }, - { - "epoch": 0.7384215455779192, - "grad_norm": 0.004471499938517809, - "learning_rate": 0.00019999973147846145, - "loss": 46.0, - "step": 9658 - }, - { - "epoch": 0.7384980025613089, - "grad_norm": 0.00023420136130880564, - "learning_rate": 0.0001999997314227949, - "loss": 46.0, - "step": 9659 - }, - { - "epoch": 0.7385744595446987, - "grad_norm": 0.0015983417397364974, - "learning_rate": 0.00019999973136712262, - "loss": 46.0, - "step": 9660 - }, - { - "epoch": 0.7386509165280883, - "grad_norm": 0.0041715409606695175, - "learning_rate": 0.00019999973131144456, - "loss": 46.0, - "step": 9661 - }, - { - "epoch": 0.7387273735114781, - "grad_norm": 0.006407590117305517, - "learning_rate": 0.0001999997312557607, - "loss": 46.0, - "step": 9662 - }, - { - "epoch": 0.7388038304948679, - "grad_norm": 0.001083396840840578, - "learning_rate": 0.0001999997312000711, - "loss": 46.0, - "step": 9663 - }, - { - "epoch": 0.7388802874782575, - "grad_norm": 0.0015013846568763256, - "learning_rate": 0.00019999973114437572, - "loss": 46.0, - "step": 9664 - }, - { - "epoch": 0.7389567444616473, - "grad_norm": 0.0031238263472914696, - "learning_rate": 0.0001999997310886746, - "loss": 46.0, - "step": 9665 - }, - { - "epoch": 0.739033201445037, - "grad_norm": 0.0012393043143674731, - "learning_rate": 0.00019999973103296766, - "loss": 46.0, - "step": 9666 - }, - { - "epoch": 0.7391096584284267, - "grad_norm": 0.0024985058698803186, - "learning_rate": 0.00019999973097725498, - "loss": 46.0, - "step": 9667 - }, - { - "epoch": 0.7391861154118164, - "grad_norm": 0.008887705393135548, - "learning_rate": 0.00019999973092153653, - "loss": 46.0, - "step": 9668 - }, - { - "epoch": 0.7392625723952061, - "grad_norm": 0.0010518525959923863, - "learning_rate": 0.0001999997308658123, - "loss": 46.0, - "step": 9669 - }, - { - "epoch": 0.7393390293785959, - "grad_norm": 0.0009578320314176381, - "learning_rate": 0.0001999997308100823, - "loss": 46.0, - "step": 9670 - }, - { - "epoch": 0.7394154863619856, - "grad_norm": 0.004931585397571325, - "learning_rate": 0.00019999973075434654, - "loss": 46.0, - "step": 9671 - }, - { - "epoch": 0.7394919433453753, - "grad_norm": 0.00102918257471174, - "learning_rate": 0.00019999973069860502, - "loss": 46.0, - "step": 9672 - }, - { - "epoch": 0.739568400328765, - "grad_norm": 0.003197824815288186, - "learning_rate": 0.00019999973064285773, - "loss": 46.0, - "step": 9673 - }, - { - "epoch": 0.7396448573121548, - "grad_norm": 0.0030899906996637583, - "learning_rate": 0.00019999973058710467, - "loss": 46.0, - "step": 9674 - }, - { - "epoch": 0.7397213142955444, - "grad_norm": 0.0005368997808545828, - "learning_rate": 0.00019999973053134583, - "loss": 46.0, - "step": 9675 - }, - { - "epoch": 0.7397977712789342, - "grad_norm": 0.0014108498580753803, - "learning_rate": 0.00019999973047558122, - "loss": 46.0, - "step": 9676 - }, - { - "epoch": 0.739874228262324, - "grad_norm": 0.0004695046809501946, - "learning_rate": 0.0001999997304198108, - "loss": 46.0, - "step": 9677 - }, - { - "epoch": 0.7399506852457136, - "grad_norm": 0.0023242507595568895, - "learning_rate": 0.00019999973036403468, - "loss": 46.0, - "step": 9678 - }, - { - "epoch": 0.7400271422291034, - "grad_norm": 0.0005327652324922383, - "learning_rate": 0.00019999973030825278, - "loss": 46.0, - "step": 9679 - }, - { - "epoch": 0.740103599212493, - "grad_norm": 0.0008565311436541378, - "learning_rate": 0.00019999973025246507, - "loss": 46.0, - "step": 9680 - }, - { - "epoch": 0.7401800561958828, - "grad_norm": 0.002024158136919141, - "learning_rate": 0.00019999973019667162, - "loss": 46.0, - "step": 9681 - }, - { - "epoch": 0.7402565131792725, - "grad_norm": 0.0007599971140734851, - "learning_rate": 0.0001999997301408724, - "loss": 46.0, - "step": 9682 - }, - { - "epoch": 0.7403329701626622, - "grad_norm": 0.001449097995646298, - "learning_rate": 0.0001999997300850674, - "loss": 46.0, - "step": 9683 - }, - { - "epoch": 0.740409427146052, - "grad_norm": 0.0006649132119491696, - "learning_rate": 0.00019999973002925666, - "loss": 46.0, - "step": 9684 - }, - { - "epoch": 0.7404858841294417, - "grad_norm": 0.002705745631828904, - "learning_rate": 0.0001999997299734401, - "loss": 46.0, - "step": 9685 - }, - { - "epoch": 0.7405623411128314, - "grad_norm": 0.0006340504041872919, - "learning_rate": 0.0001999997299176178, - "loss": 46.0, - "step": 9686 - }, - { - "epoch": 0.7406387980962211, - "grad_norm": 0.0028744409792125225, - "learning_rate": 0.00019999972986178973, - "loss": 46.0, - "step": 9687 - }, - { - "epoch": 0.7407152550796108, - "grad_norm": 0.0009442242444492877, - "learning_rate": 0.0001999997298059559, - "loss": 46.0, - "step": 9688 - }, - { - "epoch": 0.7407917120630005, - "grad_norm": 0.0015868137124925852, - "learning_rate": 0.00019999972975011626, - "loss": 46.0, - "step": 9689 - }, - { - "epoch": 0.7408681690463903, - "grad_norm": 0.00102346227504313, - "learning_rate": 0.00019999972969427088, - "loss": 46.0, - "step": 9690 - }, - { - "epoch": 0.74094462602978, - "grad_norm": 0.0021459476556628942, - "learning_rate": 0.00019999972963841972, - "loss": 46.0, - "step": 9691 - }, - { - "epoch": 0.7410210830131697, - "grad_norm": 0.005106296390295029, - "learning_rate": 0.00019999972958256281, - "loss": 46.0, - "step": 9692 - }, - { - "epoch": 0.7410975399965595, - "grad_norm": 0.001213806332089007, - "learning_rate": 0.0001999997295267001, - "loss": 46.0, - "step": 9693 - }, - { - "epoch": 0.7411739969799491, - "grad_norm": 0.0004491073777899146, - "learning_rate": 0.0001999997294708317, - "loss": 46.0, - "step": 9694 - }, - { - "epoch": 0.7412504539633389, - "grad_norm": 0.0002139282296411693, - "learning_rate": 0.00019999972941495746, - "loss": 46.0, - "step": 9695 - }, - { - "epoch": 0.7413269109467286, - "grad_norm": 0.0006848351331427693, - "learning_rate": 0.00019999972935907744, - "loss": 46.0, - "step": 9696 - }, - { - "epoch": 0.7414033679301183, - "grad_norm": 0.012589477933943272, - "learning_rate": 0.00019999972930319167, - "loss": 46.0, - "step": 9697 - }, - { - "epoch": 0.7414798249135081, - "grad_norm": 0.005463951267302036, - "learning_rate": 0.00019999972924730015, - "loss": 46.0, - "step": 9698 - }, - { - "epoch": 0.7415562818968977, - "grad_norm": 0.0004043464723508805, - "learning_rate": 0.00019999972919140284, - "loss": 46.0, - "step": 9699 - }, - { - "epoch": 0.7416327388802875, - "grad_norm": 0.0008798484923318028, - "learning_rate": 0.00019999972913549975, - "loss": 46.0, - "step": 9700 - }, - { - "epoch": 0.7417091958636772, - "grad_norm": 0.0009397157118655741, - "learning_rate": 0.00019999972907959094, - "loss": 46.0, - "step": 9701 - }, - { - "epoch": 0.7417856528470669, - "grad_norm": 0.0006381947896443307, - "learning_rate": 0.0001999997290236763, - "loss": 46.0, - "step": 9702 - }, - { - "epoch": 0.7418621098304566, - "grad_norm": 0.06752842664718628, - "learning_rate": 0.00019999972896775592, - "loss": 46.0, - "step": 9703 - }, - { - "epoch": 0.7419385668138464, - "grad_norm": 0.001153147080913186, - "learning_rate": 0.00019999972891182976, - "loss": 46.0, - "step": 9704 - }, - { - "epoch": 0.742015023797236, - "grad_norm": 0.011964002624154091, - "learning_rate": 0.00019999972885589783, - "loss": 46.0, - "step": 9705 - }, - { - "epoch": 0.7420914807806258, - "grad_norm": 0.0015160597395151854, - "learning_rate": 0.00019999972879996015, - "loss": 46.0, - "step": 9706 - }, - { - "epoch": 0.7421679377640156, - "grad_norm": 0.0010189347667619586, - "learning_rate": 0.00019999972874401668, - "loss": 46.0, - "step": 9707 - }, - { - "epoch": 0.7422443947474052, - "grad_norm": 0.0024226978421211243, - "learning_rate": 0.00019999972868806745, - "loss": 46.0, - "step": 9708 - }, - { - "epoch": 0.742320851730795, - "grad_norm": 0.0012253199238330126, - "learning_rate": 0.00019999972863211246, - "loss": 46.0, - "step": 9709 - }, - { - "epoch": 0.7423973087141846, - "grad_norm": 0.0007613091729581356, - "learning_rate": 0.0001999997285761517, - "loss": 46.0, - "step": 9710 - }, - { - "epoch": 0.7424737656975744, - "grad_norm": 0.0007229610346257687, - "learning_rate": 0.00019999972852018517, - "loss": 46.0, - "step": 9711 - }, - { - "epoch": 0.7425502226809642, - "grad_norm": 0.0011592023074626923, - "learning_rate": 0.00019999972846421283, - "loss": 46.0, - "step": 9712 - }, - { - "epoch": 0.7426266796643538, - "grad_norm": 0.0009097720612771809, - "learning_rate": 0.00019999972840823476, - "loss": 46.0, - "step": 9713 - }, - { - "epoch": 0.7427031366477436, - "grad_norm": 0.0038393689319491386, - "learning_rate": 0.0001999997283522509, - "loss": 46.0, - "step": 9714 - }, - { - "epoch": 0.7427795936311333, - "grad_norm": 0.003027722705155611, - "learning_rate": 0.0001999997282962613, - "loss": 46.0, - "step": 9715 - }, - { - "epoch": 0.742856050614523, - "grad_norm": 0.0016874130815267563, - "learning_rate": 0.0001999997282402659, - "loss": 46.0, - "step": 9716 - }, - { - "epoch": 0.7429325075979127, - "grad_norm": 0.0004744026518892497, - "learning_rate": 0.00019999972818426475, - "loss": 46.0, - "step": 9717 - }, - { - "epoch": 0.7430089645813025, - "grad_norm": 0.000717241142410785, - "learning_rate": 0.00019999972812825782, - "loss": 46.0, - "step": 9718 - }, - { - "epoch": 0.7430854215646922, - "grad_norm": 0.001474284683354199, - "learning_rate": 0.00019999972807224512, - "loss": 46.0, - "step": 9719 - }, - { - "epoch": 0.7431618785480819, - "grad_norm": 0.008101926185190678, - "learning_rate": 0.00019999972801622667, - "loss": 46.0, - "step": 9720 - }, - { - "epoch": 0.7432383355314716, - "grad_norm": 0.0013627730077132583, - "learning_rate": 0.00019999972796020242, - "loss": 46.0, - "step": 9721 - }, - { - "epoch": 0.7433147925148613, - "grad_norm": 0.001118904328905046, - "learning_rate": 0.00019999972790417242, - "loss": 46.0, - "step": 9722 - }, - { - "epoch": 0.7433912494982511, - "grad_norm": 0.003855801885947585, - "learning_rate": 0.00019999972784813665, - "loss": 46.0, - "step": 9723 - }, - { - "epoch": 0.7434677064816407, - "grad_norm": 0.0007810776005499065, - "learning_rate": 0.00019999972779209514, - "loss": 46.0, - "step": 9724 - }, - { - "epoch": 0.7435441634650305, - "grad_norm": 0.004293626639991999, - "learning_rate": 0.0001999997277360478, - "loss": 46.0, - "step": 9725 - }, - { - "epoch": 0.7436206204484203, - "grad_norm": 0.004228371661156416, - "learning_rate": 0.0001999997276799947, - "loss": 46.0, - "step": 9726 - }, - { - "epoch": 0.7436970774318099, - "grad_norm": 0.0007387892110273242, - "learning_rate": 0.00019999972762393587, - "loss": 46.0, - "step": 9727 - }, - { - "epoch": 0.7437735344151997, - "grad_norm": 0.006005508359521627, - "learning_rate": 0.00019999972756787123, - "loss": 46.0, - "step": 9728 - }, - { - "epoch": 0.7438499913985893, - "grad_norm": 0.00045582870370708406, - "learning_rate": 0.00019999972751180085, - "loss": 46.0, - "step": 9729 - }, - { - "epoch": 0.7439264483819791, - "grad_norm": 0.0075628506019711494, - "learning_rate": 0.0001999997274557247, - "loss": 46.0, - "step": 9730 - }, - { - "epoch": 0.7440029053653688, - "grad_norm": 0.0009257580968551338, - "learning_rate": 0.0001999997273996428, - "loss": 46.0, - "step": 9731 - }, - { - "epoch": 0.7440793623487585, - "grad_norm": 0.003533395240083337, - "learning_rate": 0.0001999997273435551, - "loss": 46.0, - "step": 9732 - }, - { - "epoch": 0.7441558193321482, - "grad_norm": 0.0004218671820126474, - "learning_rate": 0.0001999997272874616, - "loss": 46.0, - "step": 9733 - }, - { - "epoch": 0.744232276315538, - "grad_norm": 0.0024166631046682596, - "learning_rate": 0.0001999997272313624, - "loss": 46.0, - "step": 9734 - }, - { - "epoch": 0.7443087332989277, - "grad_norm": 0.0003856563998851925, - "learning_rate": 0.00019999972717525737, - "loss": 46.0, - "step": 9735 - }, - { - "epoch": 0.7443851902823174, - "grad_norm": 0.0014682149048894644, - "learning_rate": 0.0001999997271191466, - "loss": 46.0, - "step": 9736 - }, - { - "epoch": 0.7444616472657072, - "grad_norm": 0.0009955930290743709, - "learning_rate": 0.00019999972706303006, - "loss": 46.0, - "step": 9737 - }, - { - "epoch": 0.7445381042490968, - "grad_norm": 0.005137870088219643, - "learning_rate": 0.00019999972700690774, - "loss": 46.0, - "step": 9738 - }, - { - "epoch": 0.7446145612324866, - "grad_norm": 0.002106881234794855, - "learning_rate": 0.00019999972695077965, - "loss": 46.0, - "step": 9739 - }, - { - "epoch": 0.7446910182158762, - "grad_norm": 0.01727037876844406, - "learning_rate": 0.0001999997268946458, - "loss": 46.0, - "step": 9740 - }, - { - "epoch": 0.744767475199266, - "grad_norm": 0.0017578959232196212, - "learning_rate": 0.00019999972683850618, - "loss": 46.0, - "step": 9741 - }, - { - "epoch": 0.7448439321826558, - "grad_norm": 0.00151543109677732, - "learning_rate": 0.0001999997267823608, - "loss": 46.0, - "step": 9742 - }, - { - "epoch": 0.7449203891660454, - "grad_norm": 0.0031912578269839287, - "learning_rate": 0.0001999997267262096, - "loss": 46.0, - "step": 9743 - }, - { - "epoch": 0.7449968461494352, - "grad_norm": 0.002707613632082939, - "learning_rate": 0.00019999972667005268, - "loss": 46.0, - "step": 9744 - }, - { - "epoch": 0.7450733031328249, - "grad_norm": 0.001352560007944703, - "learning_rate": 0.00019999972661388998, - "loss": 46.0, - "step": 9745 - }, - { - "epoch": 0.7451497601162146, - "grad_norm": 0.0010036175372079015, - "learning_rate": 0.0001999997265577215, - "loss": 46.0, - "step": 9746 - }, - { - "epoch": 0.7452262170996043, - "grad_norm": 0.00435328995808959, - "learning_rate": 0.00019999972650154728, - "loss": 46.0, - "step": 9747 - }, - { - "epoch": 0.7453026740829941, - "grad_norm": 0.0018448621267452836, - "learning_rate": 0.00019999972644536726, - "loss": 46.0, - "step": 9748 - }, - { - "epoch": 0.7453791310663838, - "grad_norm": 0.0012686033733189106, - "learning_rate": 0.00019999972638918146, - "loss": 46.0, - "step": 9749 - }, - { - "epoch": 0.7454555880497735, - "grad_norm": 0.0008322701323777437, - "learning_rate": 0.00019999972633298992, - "loss": 46.0, - "step": 9750 - }, - { - "epoch": 0.7455320450331632, - "grad_norm": 0.0006211692816577852, - "learning_rate": 0.00019999972627679263, - "loss": 46.0, - "step": 9751 - }, - { - "epoch": 0.7456085020165529, - "grad_norm": 0.0003974427527282387, - "learning_rate": 0.00019999972622058954, - "loss": 46.0, - "step": 9752 - }, - { - "epoch": 0.7456849589999427, - "grad_norm": 0.0022102068178355694, - "learning_rate": 0.00019999972616438068, - "loss": 46.0, - "step": 9753 - }, - { - "epoch": 0.7457614159833323, - "grad_norm": 0.0013326670741662383, - "learning_rate": 0.00019999972610816607, - "loss": 46.0, - "step": 9754 - }, - { - "epoch": 0.7458378729667221, - "grad_norm": 0.001092038583010435, - "learning_rate": 0.0001999997260519457, - "loss": 46.0, - "step": 9755 - }, - { - "epoch": 0.7459143299501119, - "grad_norm": 0.0013066742103546858, - "learning_rate": 0.0001999997259957195, - "loss": 46.0, - "step": 9756 - }, - { - "epoch": 0.7459907869335015, - "grad_norm": 0.002295650076121092, - "learning_rate": 0.00019999972593948758, - "loss": 46.0, - "step": 9757 - }, - { - "epoch": 0.7460672439168913, - "grad_norm": 0.00035254412796348333, - "learning_rate": 0.00019999972588324988, - "loss": 46.0, - "step": 9758 - }, - { - "epoch": 0.7461437009002809, - "grad_norm": 0.002182136755436659, - "learning_rate": 0.0001999997258270064, - "loss": 46.0, - "step": 9759 - }, - { - "epoch": 0.7462201578836707, - "grad_norm": 0.0008121103746816516, - "learning_rate": 0.00019999972577075715, - "loss": 46.0, - "step": 9760 - }, - { - "epoch": 0.7462966148670604, - "grad_norm": 0.0012518854346126318, - "learning_rate": 0.00019999972571450213, - "loss": 46.0, - "step": 9761 - }, - { - "epoch": 0.7463730718504501, - "grad_norm": 0.0006228789570741355, - "learning_rate": 0.00019999972565824136, - "loss": 46.0, - "step": 9762 - }, - { - "epoch": 0.7464495288338399, - "grad_norm": 0.001432525459676981, - "learning_rate": 0.00019999972560197482, - "loss": 46.0, - "step": 9763 - }, - { - "epoch": 0.7465259858172296, - "grad_norm": 0.0011126719182357192, - "learning_rate": 0.0001999997255457025, - "loss": 46.0, - "step": 9764 - }, - { - "epoch": 0.7466024428006193, - "grad_norm": 0.002074331045150757, - "learning_rate": 0.00019999972548942442, - "loss": 46.0, - "step": 9765 - }, - { - "epoch": 0.746678899784009, - "grad_norm": 0.002931632800027728, - "learning_rate": 0.00019999972543314053, - "loss": 46.0, - "step": 9766 - }, - { - "epoch": 0.7467553567673988, - "grad_norm": 0.012228799052536488, - "learning_rate": 0.0001999997253768509, - "loss": 46.0, - "step": 9767 - }, - { - "epoch": 0.7468318137507884, - "grad_norm": 0.0009336150833405554, - "learning_rate": 0.0001999997253205555, - "loss": 46.0, - "step": 9768 - }, - { - "epoch": 0.7469082707341782, - "grad_norm": 0.00102690898347646, - "learning_rate": 0.00019999972526425435, - "loss": 46.0, - "step": 9769 - }, - { - "epoch": 0.7469847277175679, - "grad_norm": 0.0003415764949750155, - "learning_rate": 0.0001999997252079474, - "loss": 46.0, - "step": 9770 - }, - { - "epoch": 0.7470611847009576, - "grad_norm": 0.002098718425258994, - "learning_rate": 0.0001999997251516347, - "loss": 46.0, - "step": 9771 - }, - { - "epoch": 0.7471376416843474, - "grad_norm": 0.00022140626970212907, - "learning_rate": 0.00019999972509531625, - "loss": 46.0, - "step": 9772 - }, - { - "epoch": 0.747214098667737, - "grad_norm": 0.0008160950383171439, - "learning_rate": 0.00019999972503899197, - "loss": 46.0, - "step": 9773 - }, - { - "epoch": 0.7472905556511268, - "grad_norm": 0.0030269743874669075, - "learning_rate": 0.00019999972498266198, - "loss": 46.0, - "step": 9774 - }, - { - "epoch": 0.7473670126345165, - "grad_norm": 0.0037928048986941576, - "learning_rate": 0.00019999972492632618, - "loss": 46.0, - "step": 9775 - }, - { - "epoch": 0.7474434696179062, - "grad_norm": 0.001923106610774994, - "learning_rate": 0.00019999972486998461, - "loss": 46.0, - "step": 9776 - }, - { - "epoch": 0.747519926601296, - "grad_norm": 0.0006949757807888091, - "learning_rate": 0.00019999972481363733, - "loss": 46.0, - "step": 9777 - }, - { - "epoch": 0.7475963835846857, - "grad_norm": 0.0025377334095537663, - "learning_rate": 0.00019999972475728424, - "loss": 46.0, - "step": 9778 - }, - { - "epoch": 0.7476728405680754, - "grad_norm": 0.001489555579610169, - "learning_rate": 0.00019999972470092535, - "loss": 46.0, - "step": 9779 - }, - { - "epoch": 0.7477492975514651, - "grad_norm": 0.0019057902973145247, - "learning_rate": 0.0001999997246445607, - "loss": 46.0, - "step": 9780 - }, - { - "epoch": 0.7478257545348548, - "grad_norm": 0.0005225634085945785, - "learning_rate": 0.00019999972458819033, - "loss": 46.0, - "step": 9781 - }, - { - "epoch": 0.7479022115182445, - "grad_norm": 0.0012569152750074863, - "learning_rate": 0.00019999972453181415, - "loss": 46.0, - "step": 9782 - }, - { - "epoch": 0.7479786685016343, - "grad_norm": 0.0033967429772019386, - "learning_rate": 0.00019999972447543222, - "loss": 46.0, - "step": 9783 - }, - { - "epoch": 0.748055125485024, - "grad_norm": 0.0016485553933307528, - "learning_rate": 0.00019999972441904452, - "loss": 46.0, - "step": 9784 - }, - { - "epoch": 0.7481315824684137, - "grad_norm": 0.0008147096959874034, - "learning_rate": 0.00019999972436265102, - "loss": 46.0, - "step": 9785 - }, - { - "epoch": 0.7482080394518035, - "grad_norm": 0.0026291979011148214, - "learning_rate": 0.00019999972430625177, - "loss": 46.0, - "step": 9786 - }, - { - "epoch": 0.7482844964351931, - "grad_norm": 0.0017728663515299559, - "learning_rate": 0.00019999972424984678, - "loss": 46.0, - "step": 9787 - }, - { - "epoch": 0.7483609534185829, - "grad_norm": 0.0016757167177274823, - "learning_rate": 0.00019999972419343598, - "loss": 46.0, - "step": 9788 - }, - { - "epoch": 0.7484374104019725, - "grad_norm": 0.0026348098181188107, - "learning_rate": 0.0001999997241370194, - "loss": 46.0, - "step": 9789 - }, - { - "epoch": 0.7485138673853623, - "grad_norm": 0.025955254212021828, - "learning_rate": 0.0001999997240805971, - "loss": 46.0, - "step": 9790 - }, - { - "epoch": 0.7485903243687521, - "grad_norm": 0.0026988142635673285, - "learning_rate": 0.000199999724024169, - "loss": 46.0, - "step": 9791 - }, - { - "epoch": 0.7486667813521417, - "grad_norm": 0.0017523474525660276, - "learning_rate": 0.00019999972396773515, - "loss": 46.0, - "step": 9792 - }, - { - "epoch": 0.7487432383355315, - "grad_norm": 0.0005799881764687598, - "learning_rate": 0.0001999997239112955, - "loss": 46.0, - "step": 9793 - }, - { - "epoch": 0.7488196953189212, - "grad_norm": 0.0014589010970667005, - "learning_rate": 0.0001999997238548501, - "loss": 46.0, - "step": 9794 - }, - { - "epoch": 0.7488961523023109, - "grad_norm": 0.0022163703106343746, - "learning_rate": 0.00019999972379839892, - "loss": 46.0, - "step": 9795 - }, - { - "epoch": 0.7489726092857006, - "grad_norm": 0.001036088913679123, - "learning_rate": 0.00019999972374194197, - "loss": 46.0, - "step": 9796 - }, - { - "epoch": 0.7490490662690904, - "grad_norm": 0.001170091680251062, - "learning_rate": 0.00019999972368547927, - "loss": 46.0, - "step": 9797 - }, - { - "epoch": 0.74912552325248, - "grad_norm": 0.006794528104364872, - "learning_rate": 0.0001999997236290108, - "loss": 46.0, - "step": 9798 - }, - { - "epoch": 0.7492019802358698, - "grad_norm": 0.002051667310297489, - "learning_rate": 0.00019999972357253654, - "loss": 46.0, - "step": 9799 - }, - { - "epoch": 0.7492784372192595, - "grad_norm": 0.000676435069181025, - "learning_rate": 0.00019999972351605655, - "loss": 46.0, - "step": 9800 - }, - { - "epoch": 0.7493548942026492, - "grad_norm": 0.00033562039607204497, - "learning_rate": 0.00019999972345957075, - "loss": 46.0, - "step": 9801 - }, - { - "epoch": 0.749431351186039, - "grad_norm": 0.0004428086685948074, - "learning_rate": 0.0001999997234030792, - "loss": 46.0, - "step": 9802 - }, - { - "epoch": 0.7495078081694286, - "grad_norm": 0.001040892326273024, - "learning_rate": 0.00019999972334658184, - "loss": 46.0, - "step": 9803 - }, - { - "epoch": 0.7495842651528184, - "grad_norm": 0.0007352923857979476, - "learning_rate": 0.00019999972329007876, - "loss": 46.0, - "step": 9804 - }, - { - "epoch": 0.7496607221362082, - "grad_norm": 0.0024205262307077646, - "learning_rate": 0.00019999972323356987, - "loss": 46.0, - "step": 9805 - }, - { - "epoch": 0.7497371791195978, - "grad_norm": 0.0021224578376859426, - "learning_rate": 0.00019999972317705526, - "loss": 46.0, - "step": 9806 - }, - { - "epoch": 0.7498136361029876, - "grad_norm": 0.0049773999489843845, - "learning_rate": 0.00019999972312053485, - "loss": 46.0, - "step": 9807 - }, - { - "epoch": 0.7498900930863773, - "grad_norm": 0.0021834401413798332, - "learning_rate": 0.00019999972306400867, - "loss": 46.0, - "step": 9808 - }, - { - "epoch": 0.749966550069767, - "grad_norm": 0.01164508331567049, - "learning_rate": 0.00019999972300747674, - "loss": 46.0, - "step": 9809 - }, - { - "epoch": 0.7500430070531567, - "grad_norm": 0.004073922988027334, - "learning_rate": 0.00019999972295093902, - "loss": 46.0, - "step": 9810 - }, - { - "epoch": 0.7501194640365464, - "grad_norm": 0.0008712432463653386, - "learning_rate": 0.00019999972289439554, - "loss": 46.0, - "step": 9811 - }, - { - "epoch": 0.7501959210199362, - "grad_norm": 0.0016555271577090025, - "learning_rate": 0.0001999997228378463, - "loss": 46.0, - "step": 9812 - }, - { - "epoch": 0.7502723780033259, - "grad_norm": 0.0003782497951760888, - "learning_rate": 0.00019999972278129127, - "loss": 46.0, - "step": 9813 - }, - { - "epoch": 0.7503488349867156, - "grad_norm": 0.0021239907946437597, - "learning_rate": 0.00019999972272473045, - "loss": 46.0, - "step": 9814 - }, - { - "epoch": 0.7504252919701053, - "grad_norm": 0.0009066574275493622, - "learning_rate": 0.00019999972266816388, - "loss": 46.0, - "step": 9815 - }, - { - "epoch": 0.7505017489534951, - "grad_norm": 0.0007880771881900728, - "learning_rate": 0.0001999997226115916, - "loss": 46.0, - "step": 9816 - }, - { - "epoch": 0.7505782059368847, - "grad_norm": 0.000987007631920278, - "learning_rate": 0.00019999972255501348, - "loss": 46.0, - "step": 9817 - }, - { - "epoch": 0.7506546629202745, - "grad_norm": 0.0022335790563374758, - "learning_rate": 0.00019999972249842962, - "loss": 46.0, - "step": 9818 - }, - { - "epoch": 0.7507311199036641, - "grad_norm": 0.0009881261503323913, - "learning_rate": 0.00019999972244183996, - "loss": 46.0, - "step": 9819 - }, - { - "epoch": 0.7508075768870539, - "grad_norm": 0.0028058013413101435, - "learning_rate": 0.00019999972238524455, - "loss": 46.0, - "step": 9820 - }, - { - "epoch": 0.7508840338704437, - "grad_norm": 0.0009051478118635714, - "learning_rate": 0.0001999997223286434, - "loss": 46.0, - "step": 9821 - }, - { - "epoch": 0.7509604908538333, - "grad_norm": 0.0011644193436950445, - "learning_rate": 0.00019999972227203644, - "loss": 46.0, - "step": 9822 - }, - { - "epoch": 0.7510369478372231, - "grad_norm": 0.003027724800631404, - "learning_rate": 0.00019999972221542372, - "loss": 46.0, - "step": 9823 - }, - { - "epoch": 0.7511134048206128, - "grad_norm": 0.0034554265439510345, - "learning_rate": 0.00019999972215880524, - "loss": 46.0, - "step": 9824 - }, - { - "epoch": 0.7511898618040025, - "grad_norm": 0.0034576591569930315, - "learning_rate": 0.000199999722102181, - "loss": 46.0, - "step": 9825 - }, - { - "epoch": 0.7512663187873923, - "grad_norm": 0.0009959448361769319, - "learning_rate": 0.00019999972204555095, - "loss": 46.0, - "step": 9826 - }, - { - "epoch": 0.751342775770782, - "grad_norm": 0.0002472540072631091, - "learning_rate": 0.00019999972198891515, - "loss": 46.0, - "step": 9827 - }, - { - "epoch": 0.7514192327541717, - "grad_norm": 0.0032396509777754545, - "learning_rate": 0.0001999997219322736, - "loss": 46.0, - "step": 9828 - }, - { - "epoch": 0.7514956897375614, - "grad_norm": 0.0009097022120840847, - "learning_rate": 0.00019999972187562624, - "loss": 46.0, - "step": 9829 - }, - { - "epoch": 0.7515721467209511, - "grad_norm": 0.0012322955299168825, - "learning_rate": 0.00019999972181897318, - "loss": 46.0, - "step": 9830 - }, - { - "epoch": 0.7516486037043408, - "grad_norm": 0.001780449179932475, - "learning_rate": 0.00019999972176231427, - "loss": 46.0, - "step": 9831 - }, - { - "epoch": 0.7517250606877306, - "grad_norm": 0.0020478214137256145, - "learning_rate": 0.00019999972170564964, - "loss": 46.0, - "step": 9832 - }, - { - "epoch": 0.7518015176711202, - "grad_norm": 0.0005412310129031539, - "learning_rate": 0.00019999972164897926, - "loss": 46.0, - "step": 9833 - }, - { - "epoch": 0.75187797465451, - "grad_norm": 0.0031512987334281206, - "learning_rate": 0.00019999972159230308, - "loss": 46.0, - "step": 9834 - }, - { - "epoch": 0.7519544316378998, - "grad_norm": 0.0008630980155430734, - "learning_rate": 0.0001999997215356211, - "loss": 46.0, - "step": 9835 - }, - { - "epoch": 0.7520308886212894, - "grad_norm": 0.0009712826577015221, - "learning_rate": 0.0001999997214789334, - "loss": 46.0, - "step": 9836 - }, - { - "epoch": 0.7521073456046792, - "grad_norm": 0.0009332008776254952, - "learning_rate": 0.0001999997214222399, - "loss": 46.0, - "step": 9837 - }, - { - "epoch": 0.7521838025880689, - "grad_norm": 0.0010059881024062634, - "learning_rate": 0.00019999972136554065, - "loss": 46.0, - "step": 9838 - }, - { - "epoch": 0.7522602595714586, - "grad_norm": 0.0036950900685042143, - "learning_rate": 0.0001999997213088356, - "loss": 46.0, - "step": 9839 - }, - { - "epoch": 0.7523367165548484, - "grad_norm": 0.0007250310736708343, - "learning_rate": 0.00019999972125212483, - "loss": 46.0, - "step": 9840 - }, - { - "epoch": 0.752413173538238, - "grad_norm": 0.004591570235788822, - "learning_rate": 0.00019999972119540824, - "loss": 46.0, - "step": 9841 - }, - { - "epoch": 0.7524896305216278, - "grad_norm": 0.0012448506895452738, - "learning_rate": 0.00019999972113868592, - "loss": 46.0, - "step": 9842 - }, - { - "epoch": 0.7525660875050175, - "grad_norm": 0.00035542011028155684, - "learning_rate": 0.00019999972108195778, - "loss": 46.0, - "step": 9843 - }, - { - "epoch": 0.7526425444884072, - "grad_norm": 0.0013621420366689563, - "learning_rate": 0.00019999972102522392, - "loss": 46.0, - "step": 9844 - }, - { - "epoch": 0.7527190014717969, - "grad_norm": 0.0008436488569714129, - "learning_rate": 0.0001999997209684843, - "loss": 46.0, - "step": 9845 - }, - { - "epoch": 0.7527954584551867, - "grad_norm": 0.0010542816016823053, - "learning_rate": 0.00019999972091173888, - "loss": 46.0, - "step": 9846 - }, - { - "epoch": 0.7528719154385763, - "grad_norm": 0.006132427137345076, - "learning_rate": 0.0001999997208549877, - "loss": 46.0, - "step": 9847 - }, - { - "epoch": 0.7529483724219661, - "grad_norm": 0.0012286645360291004, - "learning_rate": 0.00019999972079823075, - "loss": 46.0, - "step": 9848 - }, - { - "epoch": 0.7530248294053559, - "grad_norm": 0.000861355394590646, - "learning_rate": 0.00019999972074146802, - "loss": 46.0, - "step": 9849 - }, - { - "epoch": 0.7531012863887455, - "grad_norm": 0.0007934892200864851, - "learning_rate": 0.00019999972068469952, - "loss": 46.0, - "step": 9850 - }, - { - "epoch": 0.7531777433721353, - "grad_norm": 0.003642365802079439, - "learning_rate": 0.00019999972062792525, - "loss": 46.0, - "step": 9851 - }, - { - "epoch": 0.7532542003555249, - "grad_norm": 0.0037795936223119497, - "learning_rate": 0.00019999972057114525, - "loss": 46.0, - "step": 9852 - }, - { - "epoch": 0.7533306573389147, - "grad_norm": 0.0013161622919142246, - "learning_rate": 0.00019999972051435943, - "loss": 46.0, - "step": 9853 - }, - { - "epoch": 0.7534071143223044, - "grad_norm": 0.0008625526097603142, - "learning_rate": 0.00019999972045756787, - "loss": 46.0, - "step": 9854 - }, - { - "epoch": 0.7534835713056941, - "grad_norm": 0.0017944211140275002, - "learning_rate": 0.00019999972040077053, - "loss": 46.0, - "step": 9855 - }, - { - "epoch": 0.7535600282890839, - "grad_norm": 0.0017904648557305336, - "learning_rate": 0.0001999997203439674, - "loss": 46.0, - "step": 9856 - }, - { - "epoch": 0.7536364852724736, - "grad_norm": 0.0009710281156003475, - "learning_rate": 0.00019999972028715855, - "loss": 46.0, - "step": 9857 - }, - { - "epoch": 0.7537129422558633, - "grad_norm": 0.003670301754027605, - "learning_rate": 0.0001999997202303439, - "loss": 46.0, - "step": 9858 - }, - { - "epoch": 0.753789399239253, - "grad_norm": 0.001092874095775187, - "learning_rate": 0.00019999972017352348, - "loss": 46.0, - "step": 9859 - }, - { - "epoch": 0.7538658562226427, - "grad_norm": 0.0006195945898070931, - "learning_rate": 0.0001999997201166973, - "loss": 46.0, - "step": 9860 - }, - { - "epoch": 0.7539423132060324, - "grad_norm": 0.0008521394338458776, - "learning_rate": 0.00019999972005986532, - "loss": 46.0, - "step": 9861 - }, - { - "epoch": 0.7540187701894222, - "grad_norm": 0.0012158494209870696, - "learning_rate": 0.0001999997200030276, - "loss": 46.0, - "step": 9862 - }, - { - "epoch": 0.7540952271728119, - "grad_norm": 0.0013173875631764531, - "learning_rate": 0.00019999971994618412, - "loss": 46.0, - "step": 9863 - }, - { - "epoch": 0.7541716841562016, - "grad_norm": 0.001688745222054422, - "learning_rate": 0.00019999971988933485, - "loss": 46.0, - "step": 9864 - }, - { - "epoch": 0.7542481411395914, - "grad_norm": 0.004590890370309353, - "learning_rate": 0.0001999997198324798, - "loss": 46.0, - "step": 9865 - }, - { - "epoch": 0.754324598122981, - "grad_norm": 0.0009763687266968191, - "learning_rate": 0.000199999719775619, - "loss": 46.0, - "step": 9866 - }, - { - "epoch": 0.7544010551063708, - "grad_norm": 0.006379557307809591, - "learning_rate": 0.00019999971971875244, - "loss": 46.0, - "step": 9867 - }, - { - "epoch": 0.7544775120897605, - "grad_norm": 0.001985793700441718, - "learning_rate": 0.0001999997196618801, - "loss": 46.0, - "step": 9868 - }, - { - "epoch": 0.7545539690731502, - "grad_norm": 0.0010254301596432924, - "learning_rate": 0.000199999719605002, - "loss": 46.0, - "step": 9869 - }, - { - "epoch": 0.75463042605654, - "grad_norm": 0.0014939854154363275, - "learning_rate": 0.0001999997195481181, - "loss": 46.0, - "step": 9870 - }, - { - "epoch": 0.7547068830399296, - "grad_norm": 0.0033801430836319923, - "learning_rate": 0.00019999971949122847, - "loss": 46.0, - "step": 9871 - }, - { - "epoch": 0.7547833400233194, - "grad_norm": 0.0013937511248514056, - "learning_rate": 0.00019999971943433304, - "loss": 46.0, - "step": 9872 - }, - { - "epoch": 0.7548597970067091, - "grad_norm": 0.0010351731907576323, - "learning_rate": 0.00019999971937743186, - "loss": 46.0, - "step": 9873 - }, - { - "epoch": 0.7549362539900988, - "grad_norm": 0.0011348961852490902, - "learning_rate": 0.0001999997193205249, - "loss": 46.0, - "step": 9874 - }, - { - "epoch": 0.7550127109734885, - "grad_norm": 0.0009345394209958613, - "learning_rate": 0.00019999971926361218, - "loss": 46.0, - "step": 9875 - }, - { - "epoch": 0.7550891679568783, - "grad_norm": 0.000685446837451309, - "learning_rate": 0.00019999971920669368, - "loss": 46.0, - "step": 9876 - }, - { - "epoch": 0.755165624940268, - "grad_norm": 0.00556287681683898, - "learning_rate": 0.0001999997191497694, - "loss": 46.0, - "step": 9877 - }, - { - "epoch": 0.7552420819236577, - "grad_norm": 0.00029644460300914943, - "learning_rate": 0.00019999971909283936, - "loss": 46.0, - "step": 9878 - }, - { - "epoch": 0.7553185389070475, - "grad_norm": 0.0007393202395178378, - "learning_rate": 0.00019999971903590357, - "loss": 46.0, - "step": 9879 - }, - { - "epoch": 0.7553949958904371, - "grad_norm": 0.000838323263451457, - "learning_rate": 0.000199999718978962, - "loss": 46.0, - "step": 9880 - }, - { - "epoch": 0.7554714528738269, - "grad_norm": 0.0008017085492610931, - "learning_rate": 0.00019999971892201463, - "loss": 46.0, - "step": 9881 - }, - { - "epoch": 0.7555479098572165, - "grad_norm": 0.0038079828955233097, - "learning_rate": 0.00019999971886506152, - "loss": 46.0, - "step": 9882 - }, - { - "epoch": 0.7556243668406063, - "grad_norm": 0.0025249787140637636, - "learning_rate": 0.00019999971880810263, - "loss": 46.0, - "step": 9883 - }, - { - "epoch": 0.7557008238239961, - "grad_norm": 0.001586354337632656, - "learning_rate": 0.000199999718751138, - "loss": 46.0, - "step": 9884 - }, - { - "epoch": 0.7557772808073857, - "grad_norm": 0.003486678237095475, - "learning_rate": 0.0001999997186941676, - "loss": 46.0, - "step": 9885 - }, - { - "epoch": 0.7558537377907755, - "grad_norm": 0.0005069439648650587, - "learning_rate": 0.00019999971863719142, - "loss": 46.0, - "step": 9886 - }, - { - "epoch": 0.7559301947741652, - "grad_norm": 0.0019650235772132874, - "learning_rate": 0.00019999971858020944, - "loss": 46.0, - "step": 9887 - }, - { - "epoch": 0.7560066517575549, - "grad_norm": 0.0006303850677795708, - "learning_rate": 0.0001999997185232217, - "loss": 46.0, - "step": 9888 - }, - { - "epoch": 0.7560831087409446, - "grad_norm": 0.002826918847858906, - "learning_rate": 0.0001999997184662282, - "loss": 46.0, - "step": 9889 - }, - { - "epoch": 0.7561595657243343, - "grad_norm": 0.0014048358425498009, - "learning_rate": 0.00019999971840922894, - "loss": 46.0, - "step": 9890 - }, - { - "epoch": 0.756236022707724, - "grad_norm": 0.0015641612699255347, - "learning_rate": 0.0001999997183522239, - "loss": 46.0, - "step": 9891 - }, - { - "epoch": 0.7563124796911138, - "grad_norm": 0.001855909125879407, - "learning_rate": 0.0001999997182952131, - "loss": 46.0, - "step": 9892 - }, - { - "epoch": 0.7563889366745035, - "grad_norm": 0.003341649891808629, - "learning_rate": 0.00019999971823819653, - "loss": 46.0, - "step": 9893 - }, - { - "epoch": 0.7564653936578932, - "grad_norm": 0.002157648093998432, - "learning_rate": 0.00019999971818117417, - "loss": 46.0, - "step": 9894 - }, - { - "epoch": 0.756541850641283, - "grad_norm": 0.0012987025547772646, - "learning_rate": 0.00019999971812414605, - "loss": 46.0, - "step": 9895 - }, - { - "epoch": 0.7566183076246726, - "grad_norm": 0.0004675733798649162, - "learning_rate": 0.00019999971806711214, - "loss": 46.0, - "step": 9896 - }, - { - "epoch": 0.7566947646080624, - "grad_norm": 0.0015582921914756298, - "learning_rate": 0.0001999997180100725, - "loss": 46.0, - "step": 9897 - }, - { - "epoch": 0.7567712215914522, - "grad_norm": 0.0011863448889926076, - "learning_rate": 0.00019999971795302707, - "loss": 46.0, - "step": 9898 - }, - { - "epoch": 0.7568476785748418, - "grad_norm": 0.0015999755123630166, - "learning_rate": 0.00019999971789597587, - "loss": 46.0, - "step": 9899 - }, - { - "epoch": 0.7569241355582316, - "grad_norm": 0.0007188166491687298, - "learning_rate": 0.00019999971783891894, - "loss": 46.0, - "step": 9900 - }, - { - "epoch": 0.7570005925416212, - "grad_norm": 0.000480824412079528, - "learning_rate": 0.0001999997177818562, - "loss": 46.0, - "step": 9901 - }, - { - "epoch": 0.757077049525011, - "grad_norm": 0.004491476342082024, - "learning_rate": 0.0001999997177247877, - "loss": 46.0, - "step": 9902 - }, - { - "epoch": 0.7571535065084007, - "grad_norm": 0.0012539024464786053, - "learning_rate": 0.00019999971766771342, - "loss": 46.0, - "step": 9903 - }, - { - "epoch": 0.7572299634917904, - "grad_norm": 0.0012809648178517818, - "learning_rate": 0.00019999971761063337, - "loss": 46.0, - "step": 9904 - }, - { - "epoch": 0.7573064204751802, - "grad_norm": 0.002720392309129238, - "learning_rate": 0.00019999971755354755, - "loss": 46.0, - "step": 9905 - }, - { - "epoch": 0.7573828774585699, - "grad_norm": 0.0015013209776952863, - "learning_rate": 0.000199999717496456, - "loss": 46.0, - "step": 9906 - }, - { - "epoch": 0.7574593344419596, - "grad_norm": 0.0018568446394056082, - "learning_rate": 0.00019999971743935865, - "loss": 46.0, - "step": 9907 - }, - { - "epoch": 0.7575357914253493, - "grad_norm": 0.0007769854855723679, - "learning_rate": 0.0001999997173822555, - "loss": 46.0, - "step": 9908 - }, - { - "epoch": 0.7576122484087391, - "grad_norm": 0.0007741964654996991, - "learning_rate": 0.00019999971732514662, - "loss": 46.0, - "step": 9909 - }, - { - "epoch": 0.7576887053921287, - "grad_norm": 0.0008315082523040473, - "learning_rate": 0.00019999971726803196, - "loss": 46.0, - "step": 9910 - }, - { - "epoch": 0.7577651623755185, - "grad_norm": 0.000988107523880899, - "learning_rate": 0.00019999971721091156, - "loss": 46.0, - "step": 9911 - }, - { - "epoch": 0.7578416193589081, - "grad_norm": 0.004938120488077402, - "learning_rate": 0.00019999971715378533, - "loss": 46.0, - "step": 9912 - }, - { - "epoch": 0.7579180763422979, - "grad_norm": 0.0009589770343154669, - "learning_rate": 0.0001999997170966534, - "loss": 46.0, - "step": 9913 - }, - { - "epoch": 0.7579945333256877, - "grad_norm": 0.0013909179251641035, - "learning_rate": 0.00019999971703951562, - "loss": 46.0, - "step": 9914 - }, - { - "epoch": 0.7580709903090773, - "grad_norm": 0.0011247647926211357, - "learning_rate": 0.00019999971698237215, - "loss": 46.0, - "step": 9915 - }, - { - "epoch": 0.7581474472924671, - "grad_norm": 0.0031595658510923386, - "learning_rate": 0.00019999971692522288, - "loss": 46.0, - "step": 9916 - }, - { - "epoch": 0.7582239042758568, - "grad_norm": 0.0028955473098903894, - "learning_rate": 0.00019999971686806783, - "loss": 46.0, - "step": 9917 - }, - { - "epoch": 0.7583003612592465, - "grad_norm": 0.004822517279535532, - "learning_rate": 0.000199999716810907, - "loss": 46.0, - "step": 9918 - }, - { - "epoch": 0.7583768182426363, - "grad_norm": 0.0007866416126489639, - "learning_rate": 0.00019999971675374042, - "loss": 46.0, - "step": 9919 - }, - { - "epoch": 0.7584532752260259, - "grad_norm": 0.0013370139058679342, - "learning_rate": 0.00019999971669656808, - "loss": 46.0, - "step": 9920 - }, - { - "epoch": 0.7585297322094157, - "grad_norm": 0.005823432933539152, - "learning_rate": 0.00019999971663938997, - "loss": 46.0, - "step": 9921 - }, - { - "epoch": 0.7586061891928054, - "grad_norm": 0.0009634061716496944, - "learning_rate": 0.00019999971658220606, - "loss": 46.0, - "step": 9922 - }, - { - "epoch": 0.7586826461761951, - "grad_norm": 0.0007240860722959042, - "learning_rate": 0.0001999997165250164, - "loss": 46.0, - "step": 9923 - }, - { - "epoch": 0.7587591031595848, - "grad_norm": 0.0006135416333563626, - "learning_rate": 0.000199999716467821, - "loss": 46.0, - "step": 9924 - }, - { - "epoch": 0.7588355601429746, - "grad_norm": 0.00035883820964954793, - "learning_rate": 0.00019999971641061976, - "loss": 46.0, - "step": 9925 - }, - { - "epoch": 0.7589120171263642, - "grad_norm": 0.0023508856538683176, - "learning_rate": 0.0001999997163534128, - "loss": 46.0, - "step": 9926 - }, - { - "epoch": 0.758988474109754, - "grad_norm": 0.003199746599420905, - "learning_rate": 0.00019999971629620006, - "loss": 46.0, - "step": 9927 - }, - { - "epoch": 0.7590649310931438, - "grad_norm": 0.0014498098753392696, - "learning_rate": 0.00019999971623898156, - "loss": 46.0, - "step": 9928 - }, - { - "epoch": 0.7591413880765334, - "grad_norm": 0.0010453228605911136, - "learning_rate": 0.00019999971618175728, - "loss": 46.0, - "step": 9929 - }, - { - "epoch": 0.7592178450599232, - "grad_norm": 0.0013948811683803797, - "learning_rate": 0.00019999971612452724, - "loss": 46.0, - "step": 9930 - }, - { - "epoch": 0.7592943020433128, - "grad_norm": 0.002305847592651844, - "learning_rate": 0.00019999971606729142, - "loss": 46.0, - "step": 9931 - }, - { - "epoch": 0.7593707590267026, - "grad_norm": 0.0024088099598884583, - "learning_rate": 0.00019999971601004983, - "loss": 46.0, - "step": 9932 - }, - { - "epoch": 0.7594472160100924, - "grad_norm": 0.0008411537273786962, - "learning_rate": 0.00019999971595280246, - "loss": 46.0, - "step": 9933 - }, - { - "epoch": 0.759523672993482, - "grad_norm": 0.0027810793835669756, - "learning_rate": 0.00019999971589554935, - "loss": 46.0, - "step": 9934 - }, - { - "epoch": 0.7596001299768718, - "grad_norm": 0.0029860492795705795, - "learning_rate": 0.00019999971583829047, - "loss": 46.0, - "step": 9935 - }, - { - "epoch": 0.7596765869602615, - "grad_norm": 0.0019378914730623364, - "learning_rate": 0.00019999971578102578, - "loss": 46.0, - "step": 9936 - }, - { - "epoch": 0.7597530439436512, - "grad_norm": 0.0011977998074144125, - "learning_rate": 0.00019999971572375538, - "loss": 46.0, - "step": 9937 - }, - { - "epoch": 0.7598295009270409, - "grad_norm": 0.001180144026875496, - "learning_rate": 0.00019999971566647914, - "loss": 46.0, - "step": 9938 - }, - { - "epoch": 0.7599059579104307, - "grad_norm": 0.0007810101960785687, - "learning_rate": 0.0001999997156091972, - "loss": 46.0, - "step": 9939 - }, - { - "epoch": 0.7599824148938203, - "grad_norm": 0.0006825368036516011, - "learning_rate": 0.00019999971555190947, - "loss": 46.0, - "step": 9940 - }, - { - "epoch": 0.7600588718772101, - "grad_norm": 0.0009707232820801437, - "learning_rate": 0.00019999971549461594, - "loss": 46.0, - "step": 9941 - }, - { - "epoch": 0.7601353288605998, - "grad_norm": 0.0010974803008139133, - "learning_rate": 0.00019999971543731667, - "loss": 46.0, - "step": 9942 - }, - { - "epoch": 0.7602117858439895, - "grad_norm": 0.003200179897248745, - "learning_rate": 0.0001999997153800116, - "loss": 46.0, - "step": 9943 - }, - { - "epoch": 0.7602882428273793, - "grad_norm": 0.0021847127936780453, - "learning_rate": 0.00019999971532270078, - "loss": 46.0, - "step": 9944 - }, - { - "epoch": 0.7603646998107689, - "grad_norm": 0.0005863320548087358, - "learning_rate": 0.00019999971526538421, - "loss": 46.0, - "step": 9945 - }, - { - "epoch": 0.7604411567941587, - "grad_norm": 0.008221834897994995, - "learning_rate": 0.00019999971520806185, - "loss": 46.0, - "step": 9946 - }, - { - "epoch": 0.7605176137775485, - "grad_norm": 0.0010193337220698595, - "learning_rate": 0.0001999997151507337, - "loss": 46.0, - "step": 9947 - }, - { - "epoch": 0.7605940707609381, - "grad_norm": 0.0005310768028721213, - "learning_rate": 0.00019999971509339983, - "loss": 46.0, - "step": 9948 - }, - { - "epoch": 0.7606705277443279, - "grad_norm": 0.001330150873400271, - "learning_rate": 0.00019999971503606014, - "loss": 46.0, - "step": 9949 - }, - { - "epoch": 0.7607469847277176, - "grad_norm": 0.001468959730118513, - "learning_rate": 0.00019999971497871474, - "loss": 46.0, - "step": 9950 - }, - { - "epoch": 0.7608234417111073, - "grad_norm": 0.000902983476407826, - "learning_rate": 0.00019999971492136353, - "loss": 46.0, - "step": 9951 - }, - { - "epoch": 0.760899898694497, - "grad_norm": 0.0020855721086263657, - "learning_rate": 0.00019999971486400655, - "loss": 46.0, - "step": 9952 - }, - { - "epoch": 0.7609763556778867, - "grad_norm": 0.0006067457725293934, - "learning_rate": 0.0001999997148066438, - "loss": 46.0, - "step": 9953 - }, - { - "epoch": 0.7610528126612764, - "grad_norm": 0.0014846770791336894, - "learning_rate": 0.00019999971474927528, - "loss": 46.0, - "step": 9954 - }, - { - "epoch": 0.7611292696446662, - "grad_norm": 0.0006153883296065032, - "learning_rate": 0.000199999714691901, - "loss": 46.0, - "step": 9955 - }, - { - "epoch": 0.7612057266280559, - "grad_norm": 0.003313641529530287, - "learning_rate": 0.00019999971463452093, - "loss": 46.0, - "step": 9956 - }, - { - "epoch": 0.7612821836114456, - "grad_norm": 0.007295271381735802, - "learning_rate": 0.00019999971457713514, - "loss": 46.0, - "step": 9957 - }, - { - "epoch": 0.7613586405948354, - "grad_norm": 0.0059340219013392925, - "learning_rate": 0.00019999971451974355, - "loss": 46.0, - "step": 9958 - }, - { - "epoch": 0.761435097578225, - "grad_norm": 0.0018647159449756145, - "learning_rate": 0.00019999971446234619, - "loss": 46.0, - "step": 9959 - }, - { - "epoch": 0.7615115545616148, - "grad_norm": 0.0006109130918048322, - "learning_rate": 0.00019999971440494305, - "loss": 46.0, - "step": 9960 - }, - { - "epoch": 0.7615880115450044, - "grad_norm": 0.0020299204625189304, - "learning_rate": 0.00019999971434753416, - "loss": 46.0, - "step": 9961 - }, - { - "epoch": 0.7616644685283942, - "grad_norm": 0.0012731908354908228, - "learning_rate": 0.0001999997142901195, - "loss": 46.0, - "step": 9962 - }, - { - "epoch": 0.761740925511784, - "grad_norm": 0.011232506483793259, - "learning_rate": 0.00019999971423269902, - "loss": 46.0, - "step": 9963 - }, - { - "epoch": 0.7618173824951736, - "grad_norm": 0.0007498626364395022, - "learning_rate": 0.00019999971417527284, - "loss": 46.0, - "step": 9964 - }, - { - "epoch": 0.7618938394785634, - "grad_norm": 0.006138013210147619, - "learning_rate": 0.00019999971411784086, - "loss": 46.0, - "step": 9965 - }, - { - "epoch": 0.7619702964619531, - "grad_norm": 0.0006603064830414951, - "learning_rate": 0.0001999997140604031, - "loss": 46.0, - "step": 9966 - }, - { - "epoch": 0.7620467534453428, - "grad_norm": 0.0010145963169634342, - "learning_rate": 0.00019999971400295961, - "loss": 46.0, - "step": 9967 - }, - { - "epoch": 0.7621232104287325, - "grad_norm": 0.0020180607680231333, - "learning_rate": 0.00019999971394551032, - "loss": 46.0, - "step": 9968 - }, - { - "epoch": 0.7621996674121223, - "grad_norm": 0.0018007459584623575, - "learning_rate": 0.00019999971388805525, - "loss": 46.0, - "step": 9969 - }, - { - "epoch": 0.762276124395512, - "grad_norm": 0.0006252261810004711, - "learning_rate": 0.00019999971383059443, - "loss": 46.0, - "step": 9970 - }, - { - "epoch": 0.7623525813789017, - "grad_norm": 0.005264383740723133, - "learning_rate": 0.00019999971377312786, - "loss": 46.0, - "step": 9971 - }, - { - "epoch": 0.7624290383622914, - "grad_norm": 0.0010963367531076074, - "learning_rate": 0.00019999971371565547, - "loss": 46.0, - "step": 9972 - }, - { - "epoch": 0.7625054953456811, - "grad_norm": 0.000961070996709168, - "learning_rate": 0.00019999971365817736, - "loss": 46.0, - "step": 9973 - }, - { - "epoch": 0.7625819523290709, - "grad_norm": 0.0021992179099470377, - "learning_rate": 0.00019999971360069345, - "loss": 46.0, - "step": 9974 - }, - { - "epoch": 0.7626584093124605, - "grad_norm": 0.0020298175513744354, - "learning_rate": 0.00019999971354320376, - "loss": 46.0, - "step": 9975 - }, - { - "epoch": 0.7627348662958503, - "grad_norm": 0.00126090447884053, - "learning_rate": 0.00019999971348570833, - "loss": 46.0, - "step": 9976 - }, - { - "epoch": 0.7628113232792401, - "grad_norm": 0.0012229454005137086, - "learning_rate": 0.00019999971342820713, - "loss": 46.0, - "step": 9977 - }, - { - "epoch": 0.7628877802626297, - "grad_norm": 0.0005309944390319288, - "learning_rate": 0.00019999971337070015, - "loss": 46.0, - "step": 9978 - }, - { - "epoch": 0.7629642372460195, - "grad_norm": 0.000510432873852551, - "learning_rate": 0.0001999997133131874, - "loss": 46.0, - "step": 9979 - }, - { - "epoch": 0.7630406942294092, - "grad_norm": 0.002291478216648102, - "learning_rate": 0.00019999971325566888, - "loss": 46.0, - "step": 9980 - }, - { - "epoch": 0.7631171512127989, - "grad_norm": 0.002825989620760083, - "learning_rate": 0.00019999971319814458, - "loss": 46.0, - "step": 9981 - }, - { - "epoch": 0.7631936081961886, - "grad_norm": 0.005555382464081049, - "learning_rate": 0.0001999997131406145, - "loss": 46.0, - "step": 9982 - }, - { - "epoch": 0.7632700651795783, - "grad_norm": 0.0030556845013052225, - "learning_rate": 0.00019999971308307872, - "loss": 46.0, - "step": 9983 - }, - { - "epoch": 0.763346522162968, - "grad_norm": 0.0011028972221538424, - "learning_rate": 0.0001999997130255371, - "loss": 46.0, - "step": 9984 - }, - { - "epoch": 0.7634229791463578, - "grad_norm": 0.0009719515219330788, - "learning_rate": 0.00019999971296798976, - "loss": 46.0, - "step": 9985 - }, - { - "epoch": 0.7634994361297475, - "grad_norm": 0.0018168498063459992, - "learning_rate": 0.0001999997129104366, - "loss": 46.0, - "step": 9986 - }, - { - "epoch": 0.7635758931131372, - "grad_norm": 0.0030886325985193253, - "learning_rate": 0.0001999997128528777, - "loss": 46.0, - "step": 9987 - }, - { - "epoch": 0.763652350096527, - "grad_norm": 0.0023773168213665485, - "learning_rate": 0.00019999971279531306, - "loss": 46.0, - "step": 9988 - }, - { - "epoch": 0.7637288070799166, - "grad_norm": 0.0010059482883661985, - "learning_rate": 0.00019999971273774257, - "loss": 46.0, - "step": 9989 - }, - { - "epoch": 0.7638052640633064, - "grad_norm": 0.00400012219324708, - "learning_rate": 0.00019999971268016637, - "loss": 46.0, - "step": 9990 - }, - { - "epoch": 0.763881721046696, - "grad_norm": 0.0010531320003792644, - "learning_rate": 0.0001999997126225844, - "loss": 46.0, - "step": 9991 - }, - { - "epoch": 0.7639581780300858, - "grad_norm": 0.005792735144495964, - "learning_rate": 0.00019999971256499667, - "loss": 46.0, - "step": 9992 - }, - { - "epoch": 0.7640346350134756, - "grad_norm": 0.0011541342828422785, - "learning_rate": 0.00019999971250740312, - "loss": 46.0, - "step": 9993 - }, - { - "epoch": 0.7641110919968652, - "grad_norm": 0.004007813986390829, - "learning_rate": 0.00019999971244980382, - "loss": 46.0, - "step": 9994 - }, - { - "epoch": 0.764187548980255, - "grad_norm": 0.0011837673373520374, - "learning_rate": 0.00019999971239219877, - "loss": 46.0, - "step": 9995 - }, - { - "epoch": 0.7642640059636447, - "grad_norm": 0.0006242874660529196, - "learning_rate": 0.00019999971233458793, - "loss": 46.0, - "step": 9996 - }, - { - "epoch": 0.7643404629470344, - "grad_norm": 0.0004229389887768775, - "learning_rate": 0.00019999971227697134, - "loss": 46.0, - "step": 9997 - }, - { - "epoch": 0.7644169199304242, - "grad_norm": 0.0009345015860162675, - "learning_rate": 0.00019999971221934898, - "loss": 46.0, - "step": 9998 - }, - { - "epoch": 0.7644933769138139, - "grad_norm": 0.0008252540719695389, - "learning_rate": 0.00019999971216172084, - "loss": 46.0, - "step": 9999 - }, - { - "epoch": 0.7645698338972036, - "grad_norm": 0.0016681652050465345, - "learning_rate": 0.00019999971210408695, - "loss": 46.0, - "step": 10000 - }, - { - "epoch": 0.7646462908805933, - "grad_norm": 0.0009806439047679305, - "learning_rate": 0.00019999971204644724, - "loss": 46.0, - "step": 10001 - }, - { - "epoch": 0.764722747863983, - "grad_norm": 0.0029184853192418814, - "learning_rate": 0.0001999997119888018, - "loss": 46.0, - "step": 10002 - }, - { - "epoch": 0.7647992048473727, - "grad_norm": 0.002406930085271597, - "learning_rate": 0.00019999971193115058, - "loss": 46.0, - "step": 10003 - }, - { - "epoch": 0.7648756618307625, - "grad_norm": 0.0013237128732725978, - "learning_rate": 0.00019999971187349363, - "loss": 46.0, - "step": 10004 - }, - { - "epoch": 0.7649521188141521, - "grad_norm": 0.0009538425947539508, - "learning_rate": 0.00019999971181583085, - "loss": 46.0, - "step": 10005 - }, - { - "epoch": 0.7650285757975419, - "grad_norm": 0.0014584185555577278, - "learning_rate": 0.00019999971175816233, - "loss": 46.0, - "step": 10006 - }, - { - "epoch": 0.7651050327809317, - "grad_norm": 0.001428199466317892, - "learning_rate": 0.00019999971170048803, - "loss": 46.0, - "step": 10007 - }, - { - "epoch": 0.7651814897643213, - "grad_norm": 0.001289650797843933, - "learning_rate": 0.00019999971164280796, - "loss": 46.0, - "step": 10008 - }, - { - "epoch": 0.7652579467477111, - "grad_norm": 0.001247707405127585, - "learning_rate": 0.00019999971158512215, - "loss": 46.0, - "step": 10009 - }, - { - "epoch": 0.7653344037311008, - "grad_norm": 0.003696620464324951, - "learning_rate": 0.00019999971152743053, - "loss": 46.0, - "step": 10010 - }, - { - "epoch": 0.7654108607144905, - "grad_norm": 0.000639944220893085, - "learning_rate": 0.00019999971146973317, - "loss": 46.0, - "step": 10011 - }, - { - "epoch": 0.7654873176978803, - "grad_norm": 0.0026720098685473204, - "learning_rate": 0.00019999971141203006, - "loss": 46.0, - "step": 10012 - }, - { - "epoch": 0.7655637746812699, - "grad_norm": 0.0007816367433406413, - "learning_rate": 0.00019999971135432112, - "loss": 46.0, - "step": 10013 - }, - { - "epoch": 0.7656402316646597, - "grad_norm": 0.001667184173129499, - "learning_rate": 0.00019999971129660643, - "loss": 46.0, - "step": 10014 - }, - { - "epoch": 0.7657166886480494, - "grad_norm": 0.0010481046047061682, - "learning_rate": 0.00019999971123888598, - "loss": 46.0, - "step": 10015 - }, - { - "epoch": 0.7657931456314391, - "grad_norm": 0.0035507171414792538, - "learning_rate": 0.0001999997111811598, - "loss": 46.0, - "step": 10016 - }, - { - "epoch": 0.7658696026148288, - "grad_norm": 0.010692774318158627, - "learning_rate": 0.0001999997111234278, - "loss": 46.0, - "step": 10017 - }, - { - "epoch": 0.7659460595982186, - "grad_norm": 0.0014783920487388968, - "learning_rate": 0.00019999971106569002, - "loss": 46.0, - "step": 10018 - }, - { - "epoch": 0.7660225165816082, - "grad_norm": 0.0014465571148321033, - "learning_rate": 0.0001999997110079465, - "loss": 46.0, - "step": 10019 - }, - { - "epoch": 0.766098973564998, - "grad_norm": 0.0019483668729662895, - "learning_rate": 0.0001999997109501972, - "loss": 46.0, - "step": 10020 - }, - { - "epoch": 0.7661754305483877, - "grad_norm": 0.0007734148530289531, - "learning_rate": 0.00019999971089244213, - "loss": 46.0, - "step": 10021 - }, - { - "epoch": 0.7662518875317774, - "grad_norm": 0.0006514661945402622, - "learning_rate": 0.00019999971083468129, - "loss": 46.0, - "step": 10022 - }, - { - "epoch": 0.7663283445151672, - "grad_norm": 0.0006578920874744654, - "learning_rate": 0.0001999997107769147, - "loss": 46.0, - "step": 10023 - }, - { - "epoch": 0.7664048014985568, - "grad_norm": 0.0010734456591308117, - "learning_rate": 0.00019999971071914233, - "loss": 46.0, - "step": 10024 - }, - { - "epoch": 0.7664812584819466, - "grad_norm": 0.0026550416368991137, - "learning_rate": 0.0001999997106613642, - "loss": 46.0, - "step": 10025 - }, - { - "epoch": 0.7665577154653364, - "grad_norm": 0.001050087041221559, - "learning_rate": 0.00019999971060358026, - "loss": 46.0, - "step": 10026 - }, - { - "epoch": 0.766634172448726, - "grad_norm": 0.0006363586871884763, - "learning_rate": 0.0001999997105457906, - "loss": 46.0, - "step": 10027 - }, - { - "epoch": 0.7667106294321158, - "grad_norm": 0.001123207388445735, - "learning_rate": 0.00019999971048799515, - "loss": 46.0, - "step": 10028 - }, - { - "epoch": 0.7667870864155055, - "grad_norm": 0.0021201076451689005, - "learning_rate": 0.00019999971043019392, - "loss": 46.0, - "step": 10029 - }, - { - "epoch": 0.7668635433988952, - "grad_norm": 0.0008306613890454173, - "learning_rate": 0.0001999997103723869, - "loss": 46.0, - "step": 10030 - }, - { - "epoch": 0.7669400003822849, - "grad_norm": 0.0012050301302224398, - "learning_rate": 0.0001999997103145742, - "loss": 46.0, - "step": 10031 - }, - { - "epoch": 0.7670164573656746, - "grad_norm": 0.0010074771707877517, - "learning_rate": 0.00019999971025675564, - "loss": 46.0, - "step": 10032 - }, - { - "epoch": 0.7670929143490643, - "grad_norm": 0.011882204562425613, - "learning_rate": 0.00019999971019893137, - "loss": 46.0, - "step": 10033 - }, - { - "epoch": 0.7671693713324541, - "grad_norm": 0.005684808362275362, - "learning_rate": 0.00019999971014110127, - "loss": 46.0, - "step": 10034 - }, - { - "epoch": 0.7672458283158438, - "grad_norm": 0.0023781689815223217, - "learning_rate": 0.00019999971008326543, - "loss": 46.0, - "step": 10035 - }, - { - "epoch": 0.7673222852992335, - "grad_norm": 0.01235031895339489, - "learning_rate": 0.00019999971002542384, - "loss": 46.0, - "step": 10036 - }, - { - "epoch": 0.7673987422826233, - "grad_norm": 0.001249405206181109, - "learning_rate": 0.00019999970996757648, - "loss": 46.0, - "step": 10037 - }, - { - "epoch": 0.7674751992660129, - "grad_norm": 0.0008550244965590537, - "learning_rate": 0.0001999997099097233, - "loss": 46.0, - "step": 10038 - }, - { - "epoch": 0.7675516562494027, - "grad_norm": 0.0011739303590729833, - "learning_rate": 0.0001999997098518644, - "loss": 46.0, - "step": 10039 - }, - { - "epoch": 0.7676281132327925, - "grad_norm": 0.0013601342216134071, - "learning_rate": 0.00019999970979399972, - "loss": 46.0, - "step": 10040 - }, - { - "epoch": 0.7677045702161821, - "grad_norm": 0.0012962105683982372, - "learning_rate": 0.00019999970973612924, - "loss": 46.0, - "step": 10041 - }, - { - "epoch": 0.7677810271995719, - "grad_norm": 0.008770779706537724, - "learning_rate": 0.00019999970967825303, - "loss": 46.0, - "step": 10042 - }, - { - "epoch": 0.7678574841829615, - "grad_norm": 0.0013921101344749331, - "learning_rate": 0.00019999970962037106, - "loss": 46.0, - "step": 10043 - }, - { - "epoch": 0.7679339411663513, - "grad_norm": 0.0010800652671605349, - "learning_rate": 0.00019999970956248325, - "loss": 46.0, - "step": 10044 - }, - { - "epoch": 0.768010398149741, - "grad_norm": 0.0005175610422156751, - "learning_rate": 0.00019999970950458976, - "loss": 46.0, - "step": 10045 - }, - { - "epoch": 0.7680868551331307, - "grad_norm": 0.00039670823025517166, - "learning_rate": 0.00019999970944669043, - "loss": 46.0, - "step": 10046 - }, - { - "epoch": 0.7681633121165204, - "grad_norm": 0.002805059775710106, - "learning_rate": 0.00019999970938878536, - "loss": 46.0, - "step": 10047 - }, - { - "epoch": 0.7682397690999102, - "grad_norm": 0.0008304428192786872, - "learning_rate": 0.00019999970933087452, - "loss": 46.0, - "step": 10048 - }, - { - "epoch": 0.7683162260832999, - "grad_norm": 0.0008633029647171497, - "learning_rate": 0.00019999970927295793, - "loss": 46.0, - "step": 10049 - }, - { - "epoch": 0.7683926830666896, - "grad_norm": 0.0013363687321543694, - "learning_rate": 0.00019999970921503554, - "loss": 46.0, - "step": 10050 - }, - { - "epoch": 0.7684691400500793, - "grad_norm": 0.0007525449036620557, - "learning_rate": 0.0001999997091571074, - "loss": 46.0, - "step": 10051 - }, - { - "epoch": 0.768545597033469, - "grad_norm": 0.002808792283758521, - "learning_rate": 0.00019999970909917347, - "loss": 46.0, - "step": 10052 - }, - { - "epoch": 0.7686220540168588, - "grad_norm": 0.0007879525073803961, - "learning_rate": 0.0001999997090412338, - "loss": 46.0, - "step": 10053 - }, - { - "epoch": 0.7686985110002484, - "grad_norm": 0.001690176664851606, - "learning_rate": 0.00019999970898328836, - "loss": 46.0, - "step": 10054 - }, - { - "epoch": 0.7687749679836382, - "grad_norm": 0.004503827542066574, - "learning_rate": 0.0001999997089253371, - "loss": 46.0, - "step": 10055 - }, - { - "epoch": 0.768851424967028, - "grad_norm": 0.00091963866725564, - "learning_rate": 0.0001999997088673801, - "loss": 46.0, - "step": 10056 - }, - { - "epoch": 0.7689278819504176, - "grad_norm": 0.0014203267637640238, - "learning_rate": 0.00019999970880941735, - "loss": 46.0, - "step": 10057 - }, - { - "epoch": 0.7690043389338074, - "grad_norm": 0.003359684254974127, - "learning_rate": 0.00019999970875144883, - "loss": 46.0, - "step": 10058 - }, - { - "epoch": 0.7690807959171971, - "grad_norm": 0.0010892476420849562, - "learning_rate": 0.0001999997086934745, - "loss": 46.0, - "step": 10059 - }, - { - "epoch": 0.7691572529005868, - "grad_norm": 0.000888099311850965, - "learning_rate": 0.0001999997086354944, - "loss": 46.0, - "step": 10060 - }, - { - "epoch": 0.7692337098839765, - "grad_norm": 0.001151447999291122, - "learning_rate": 0.0001999997085775086, - "loss": 46.0, - "step": 10061 - }, - { - "epoch": 0.7693101668673662, - "grad_norm": 0.0020006056874990463, - "learning_rate": 0.00019999970851951698, - "loss": 46.0, - "step": 10062 - }, - { - "epoch": 0.769386623850756, - "grad_norm": 0.0014457236975431442, - "learning_rate": 0.0001999997084615196, - "loss": 46.0, - "step": 10063 - }, - { - "epoch": 0.7694630808341457, - "grad_norm": 0.0010294867679476738, - "learning_rate": 0.00019999970840351645, - "loss": 46.0, - "step": 10064 - }, - { - "epoch": 0.7695395378175354, - "grad_norm": 0.0005748987314291298, - "learning_rate": 0.00019999970834550754, - "loss": 46.0, - "step": 10065 - }, - { - "epoch": 0.7696159948009251, - "grad_norm": 0.001188112422823906, - "learning_rate": 0.00019999970828749283, - "loss": 46.0, - "step": 10066 - }, - { - "epoch": 0.7696924517843149, - "grad_norm": 0.0028988078702241182, - "learning_rate": 0.00019999970822947238, - "loss": 46.0, - "step": 10067 - }, - { - "epoch": 0.7697689087677045, - "grad_norm": 0.0007686301250942051, - "learning_rate": 0.00019999970817144615, - "loss": 46.0, - "step": 10068 - }, - { - "epoch": 0.7698453657510943, - "grad_norm": 0.0038459505885839462, - "learning_rate": 0.00019999970811341414, - "loss": 46.0, - "step": 10069 - }, - { - "epoch": 0.7699218227344841, - "grad_norm": 0.0004141342651564628, - "learning_rate": 0.00019999970805537637, - "loss": 46.0, - "step": 10070 - }, - { - "epoch": 0.7699982797178737, - "grad_norm": 0.0011534850345924497, - "learning_rate": 0.00019999970799733285, - "loss": 46.0, - "step": 10071 - }, - { - "epoch": 0.7700747367012635, - "grad_norm": 0.0019190703751519322, - "learning_rate": 0.00019999970793928355, - "loss": 46.0, - "step": 10072 - }, - { - "epoch": 0.7701511936846531, - "grad_norm": 0.003268068889155984, - "learning_rate": 0.00019999970788122845, - "loss": 46.0, - "step": 10073 - }, - { - "epoch": 0.7702276506680429, - "grad_norm": 0.0019710457418113947, - "learning_rate": 0.0001999997078231676, - "loss": 46.0, - "step": 10074 - }, - { - "epoch": 0.7703041076514326, - "grad_norm": 0.0041036042384803295, - "learning_rate": 0.00019999970776510102, - "loss": 46.0, - "step": 10075 - }, - { - "epoch": 0.7703805646348223, - "grad_norm": 0.0012877883855253458, - "learning_rate": 0.00019999970770702863, - "loss": 46.0, - "step": 10076 - }, - { - "epoch": 0.7704570216182121, - "grad_norm": 0.0023239273577928543, - "learning_rate": 0.00019999970764895047, - "loss": 46.0, - "step": 10077 - }, - { - "epoch": 0.7705334786016018, - "grad_norm": 0.002815867541357875, - "learning_rate": 0.00019999970759086654, - "loss": 46.0, - "step": 10078 - }, - { - "epoch": 0.7706099355849915, - "grad_norm": 0.0008432841277681291, - "learning_rate": 0.00019999970753277685, - "loss": 46.0, - "step": 10079 - }, - { - "epoch": 0.7706863925683812, - "grad_norm": 0.00033620651811361313, - "learning_rate": 0.00019999970747468137, - "loss": 46.0, - "step": 10080 - }, - { - "epoch": 0.770762849551771, - "grad_norm": 0.0012031936785206199, - "learning_rate": 0.00019999970741658017, - "loss": 46.0, - "step": 10081 - }, - { - "epoch": 0.7708393065351606, - "grad_norm": 0.000884325650986284, - "learning_rate": 0.00019999970735847317, - "loss": 46.0, - "step": 10082 - }, - { - "epoch": 0.7709157635185504, - "grad_norm": 0.0004033608711324632, - "learning_rate": 0.00019999970730036036, - "loss": 46.0, - "step": 10083 - }, - { - "epoch": 0.77099222050194, - "grad_norm": 0.0015430657658725977, - "learning_rate": 0.00019999970724224184, - "loss": 46.0, - "step": 10084 - }, - { - "epoch": 0.7710686774853298, - "grad_norm": 0.0012954644626006484, - "learning_rate": 0.00019999970718411755, - "loss": 46.0, - "step": 10085 - }, - { - "epoch": 0.7711451344687196, - "grad_norm": 0.0052140625193715096, - "learning_rate": 0.00019999970712598745, - "loss": 46.0, - "step": 10086 - }, - { - "epoch": 0.7712215914521092, - "grad_norm": 0.0019443245837464929, - "learning_rate": 0.0001999997070678516, - "loss": 46.0, - "step": 10087 - }, - { - "epoch": 0.771298048435499, - "grad_norm": 0.0048497579991817474, - "learning_rate": 0.00019999970700970997, - "loss": 46.0, - "step": 10088 - }, - { - "epoch": 0.7713745054188887, - "grad_norm": 0.0006835806998424232, - "learning_rate": 0.0001999997069515626, - "loss": 46.0, - "step": 10089 - }, - { - "epoch": 0.7714509624022784, - "grad_norm": 0.0033077120315283537, - "learning_rate": 0.00019999970689340944, - "loss": 46.0, - "step": 10090 - }, - { - "epoch": 0.7715274193856682, - "grad_norm": 0.001092192716896534, - "learning_rate": 0.0001999997068352505, - "loss": 46.0, - "step": 10091 - }, - { - "epoch": 0.7716038763690578, - "grad_norm": 0.004216737579554319, - "learning_rate": 0.00019999970677708583, - "loss": 46.0, - "step": 10092 - }, - { - "epoch": 0.7716803333524476, - "grad_norm": 0.001018659444525838, - "learning_rate": 0.00019999970671891534, - "loss": 46.0, - "step": 10093 - }, - { - "epoch": 0.7717567903358373, - "grad_norm": 0.002728919265791774, - "learning_rate": 0.00019999970666073911, - "loss": 46.0, - "step": 10094 - }, - { - "epoch": 0.771833247319227, - "grad_norm": 0.003599038114771247, - "learning_rate": 0.00019999970660255714, - "loss": 46.0, - "step": 10095 - }, - { - "epoch": 0.7719097043026167, - "grad_norm": 0.0007787556387484074, - "learning_rate": 0.00019999970654436934, - "loss": 46.0, - "step": 10096 - }, - { - "epoch": 0.7719861612860065, - "grad_norm": 0.0020864014513790607, - "learning_rate": 0.00019999970648617582, - "loss": 46.0, - "step": 10097 - }, - { - "epoch": 0.7720626182693962, - "grad_norm": 0.0010055698221549392, - "learning_rate": 0.0001999997064279765, - "loss": 46.0, - "step": 10098 - }, - { - "epoch": 0.7721390752527859, - "grad_norm": 0.000786547374445945, - "learning_rate": 0.0001999997063697714, - "loss": 46.0, - "step": 10099 - }, - { - "epoch": 0.7722155322361757, - "grad_norm": 0.0012661585351452231, - "learning_rate": 0.00019999970631156058, - "loss": 46.0, - "step": 10100 - }, - { - "epoch": 0.7722919892195653, - "grad_norm": 0.0011497859377413988, - "learning_rate": 0.00019999970625334394, - "loss": 46.0, - "step": 10101 - }, - { - "epoch": 0.7723684462029551, - "grad_norm": 0.004212136846035719, - "learning_rate": 0.00019999970619512153, - "loss": 46.0, - "step": 10102 - }, - { - "epoch": 0.7724449031863447, - "grad_norm": 0.0006643040687777102, - "learning_rate": 0.00019999970613689342, - "loss": 46.0, - "step": 10103 - }, - { - "epoch": 0.7725213601697345, - "grad_norm": 0.0018662265501916409, - "learning_rate": 0.00019999970607865946, - "loss": 46.0, - "step": 10104 - }, - { - "epoch": 0.7725978171531243, - "grad_norm": 0.0005415039486251771, - "learning_rate": 0.00019999970602041977, - "loss": 46.0, - "step": 10105 - }, - { - "epoch": 0.7726742741365139, - "grad_norm": 0.0031976881437003613, - "learning_rate": 0.0001999997059621743, - "loss": 46.0, - "step": 10106 - }, - { - "epoch": 0.7727507311199037, - "grad_norm": 0.015071452595293522, - "learning_rate": 0.00019999970590392306, - "loss": 46.0, - "step": 10107 - }, - { - "epoch": 0.7728271881032934, - "grad_norm": 0.0006291063036769629, - "learning_rate": 0.00019999970584566606, - "loss": 46.0, - "step": 10108 - }, - { - "epoch": 0.7729036450866831, - "grad_norm": 0.0007333066314458847, - "learning_rate": 0.0001999997057874033, - "loss": 46.0, - "step": 10109 - }, - { - "epoch": 0.7729801020700728, - "grad_norm": 0.003073690924793482, - "learning_rate": 0.00019999970572913474, - "loss": 46.0, - "step": 10110 - }, - { - "epoch": 0.7730565590534626, - "grad_norm": 0.0013207502197474241, - "learning_rate": 0.00019999970567086042, - "loss": 46.0, - "step": 10111 - }, - { - "epoch": 0.7731330160368522, - "grad_norm": 0.0030079903081059456, - "learning_rate": 0.00019999970561258032, - "loss": 46.0, - "step": 10112 - }, - { - "epoch": 0.773209473020242, - "grad_norm": 0.0005014106864109635, - "learning_rate": 0.00019999970555429448, - "loss": 46.0, - "step": 10113 - }, - { - "epoch": 0.7732859300036317, - "grad_norm": 0.00029658337007276714, - "learning_rate": 0.00019999970549600287, - "loss": 46.0, - "step": 10114 - }, - { - "epoch": 0.7733623869870214, - "grad_norm": 0.0013163614785298705, - "learning_rate": 0.00019999970543770548, - "loss": 46.0, - "step": 10115 - }, - { - "epoch": 0.7734388439704112, - "grad_norm": 0.0018276681657880545, - "learning_rate": 0.00019999970537940232, - "loss": 46.0, - "step": 10116 - }, - { - "epoch": 0.7735153009538008, - "grad_norm": 0.004299021791666746, - "learning_rate": 0.00019999970532109338, - "loss": 46.0, - "step": 10117 - }, - { - "epoch": 0.7735917579371906, - "grad_norm": 0.0011009597219526768, - "learning_rate": 0.00019999970526277867, - "loss": 46.0, - "step": 10118 - }, - { - "epoch": 0.7736682149205804, - "grad_norm": 0.000331989664118737, - "learning_rate": 0.00019999970520445822, - "loss": 46.0, - "step": 10119 - }, - { - "epoch": 0.77374467190397, - "grad_norm": 0.00110848608892411, - "learning_rate": 0.000199999705146132, - "loss": 46.0, - "step": 10120 - }, - { - "epoch": 0.7738211288873598, - "grad_norm": 0.0003543139318935573, - "learning_rate": 0.00019999970508779996, - "loss": 46.0, - "step": 10121 - }, - { - "epoch": 0.7738975858707494, - "grad_norm": 0.0002889388124458492, - "learning_rate": 0.0001999997050294622, - "loss": 46.0, - "step": 10122 - }, - { - "epoch": 0.7739740428541392, - "grad_norm": 0.0030940936412662268, - "learning_rate": 0.00019999970497111864, - "loss": 46.0, - "step": 10123 - }, - { - "epoch": 0.7740504998375289, - "grad_norm": 0.0007068327395245433, - "learning_rate": 0.00019999970491276932, - "loss": 46.0, - "step": 10124 - }, - { - "epoch": 0.7741269568209186, - "grad_norm": 0.0019399757729843259, - "learning_rate": 0.00019999970485441422, - "loss": 46.0, - "step": 10125 - }, - { - "epoch": 0.7742034138043083, - "grad_norm": 0.0028171653393656015, - "learning_rate": 0.00019999970479605338, - "loss": 46.0, - "step": 10126 - }, - { - "epoch": 0.7742798707876981, - "grad_norm": 0.001109176897443831, - "learning_rate": 0.00019999970473768674, - "loss": 46.0, - "step": 10127 - }, - { - "epoch": 0.7743563277710878, - "grad_norm": 0.0022520788479596376, - "learning_rate": 0.00019999970467931435, - "loss": 46.0, - "step": 10128 - }, - { - "epoch": 0.7744327847544775, - "grad_norm": 0.004060881678014994, - "learning_rate": 0.0001999997046209362, - "loss": 46.0, - "step": 10129 - }, - { - "epoch": 0.7745092417378673, - "grad_norm": 0.0006325824069790542, - "learning_rate": 0.00019999970456255226, - "loss": 46.0, - "step": 10130 - }, - { - "epoch": 0.7745856987212569, - "grad_norm": 0.009161490947008133, - "learning_rate": 0.00019999970450416258, - "loss": 46.0, - "step": 10131 - }, - { - "epoch": 0.7746621557046467, - "grad_norm": 0.0016569256549701095, - "learning_rate": 0.00019999970444576707, - "loss": 46.0, - "step": 10132 - }, - { - "epoch": 0.7747386126880363, - "grad_norm": 0.0018305061385035515, - "learning_rate": 0.00019999970438736584, - "loss": 46.0, - "step": 10133 - }, - { - "epoch": 0.7748150696714261, - "grad_norm": 0.000851231103297323, - "learning_rate": 0.00019999970432895884, - "loss": 46.0, - "step": 10134 - }, - { - "epoch": 0.7748915266548159, - "grad_norm": 0.0010570806916803122, - "learning_rate": 0.00019999970427054604, - "loss": 46.0, - "step": 10135 - }, - { - "epoch": 0.7749679836382055, - "grad_norm": 0.0009224657551385462, - "learning_rate": 0.00019999970421212752, - "loss": 46.0, - "step": 10136 - }, - { - "epoch": 0.7750444406215953, - "grad_norm": 0.0005287441308610141, - "learning_rate": 0.00019999970415370317, - "loss": 46.0, - "step": 10137 - }, - { - "epoch": 0.775120897604985, - "grad_norm": 0.0024804340209811926, - "learning_rate": 0.00019999970409527307, - "loss": 46.0, - "step": 10138 - }, - { - "epoch": 0.7751973545883747, - "grad_norm": 0.0013349383370950818, - "learning_rate": 0.00019999970403683723, - "loss": 46.0, - "step": 10139 - }, - { - "epoch": 0.7752738115717644, - "grad_norm": 0.0006791549385525286, - "learning_rate": 0.0001999997039783956, - "loss": 46.0, - "step": 10140 - }, - { - "epoch": 0.7753502685551542, - "grad_norm": 0.002364273415878415, - "learning_rate": 0.0001999997039199482, - "loss": 46.0, - "step": 10141 - }, - { - "epoch": 0.7754267255385439, - "grad_norm": 0.003655507694929838, - "learning_rate": 0.00019999970386149502, - "loss": 46.0, - "step": 10142 - }, - { - "epoch": 0.7755031825219336, - "grad_norm": 0.0025535959284752607, - "learning_rate": 0.0001999997038030361, - "loss": 46.0, - "step": 10143 - }, - { - "epoch": 0.7755796395053233, - "grad_norm": 0.0012152957497164607, - "learning_rate": 0.00019999970374457137, - "loss": 46.0, - "step": 10144 - }, - { - "epoch": 0.775656096488713, - "grad_norm": 0.0010598603403195739, - "learning_rate": 0.0001999997036861009, - "loss": 46.0, - "step": 10145 - }, - { - "epoch": 0.7757325534721028, - "grad_norm": 0.0007946204277686775, - "learning_rate": 0.00019999970362762466, - "loss": 46.0, - "step": 10146 - }, - { - "epoch": 0.7758090104554924, - "grad_norm": 0.0011012312024831772, - "learning_rate": 0.00019999970356914264, - "loss": 46.0, - "step": 10147 - }, - { - "epoch": 0.7758854674388822, - "grad_norm": 0.0010971761075779796, - "learning_rate": 0.00019999970351065486, - "loss": 46.0, - "step": 10148 - }, - { - "epoch": 0.775961924422272, - "grad_norm": 0.002020234940573573, - "learning_rate": 0.00019999970345216132, - "loss": 46.0, - "step": 10149 - }, - { - "epoch": 0.7760383814056616, - "grad_norm": 0.003866098588332534, - "learning_rate": 0.000199999703393662, - "loss": 46.0, - "step": 10150 - }, - { - "epoch": 0.7761148383890514, - "grad_norm": 0.0005179843283258379, - "learning_rate": 0.0001999997033351569, - "loss": 46.0, - "step": 10151 - }, - { - "epoch": 0.776191295372441, - "grad_norm": 0.0011416098568588495, - "learning_rate": 0.00019999970327664603, - "loss": 46.0, - "step": 10152 - }, - { - "epoch": 0.7762677523558308, - "grad_norm": 0.0037071979604661465, - "learning_rate": 0.00019999970321812942, - "loss": 46.0, - "step": 10153 - }, - { - "epoch": 0.7763442093392205, - "grad_norm": 0.0022465926595032215, - "learning_rate": 0.000199999703159607, - "loss": 46.0, - "step": 10154 - }, - { - "epoch": 0.7764206663226102, - "grad_norm": 0.000830504868645221, - "learning_rate": 0.00019999970310107882, - "loss": 46.0, - "step": 10155 - }, - { - "epoch": 0.776497123306, - "grad_norm": 0.002061625011265278, - "learning_rate": 0.00019999970304254488, - "loss": 46.0, - "step": 10156 - }, - { - "epoch": 0.7765735802893897, - "grad_norm": 0.0021929454524070024, - "learning_rate": 0.00019999970298400518, - "loss": 46.0, - "step": 10157 - }, - { - "epoch": 0.7766500372727794, - "grad_norm": 0.00038512024912051857, - "learning_rate": 0.00019999970292545972, - "loss": 46.0, - "step": 10158 - }, - { - "epoch": 0.7767264942561691, - "grad_norm": 0.0007712931255809963, - "learning_rate": 0.00019999970286690847, - "loss": 46.0, - "step": 10159 - }, - { - "epoch": 0.7768029512395589, - "grad_norm": 0.0007853909046389163, - "learning_rate": 0.00019999970280835144, - "loss": 46.0, - "step": 10160 - }, - { - "epoch": 0.7768794082229485, - "grad_norm": 0.0010165219428017735, - "learning_rate": 0.00019999970274978867, - "loss": 46.0, - "step": 10161 - }, - { - "epoch": 0.7769558652063383, - "grad_norm": 0.0006136196898296475, - "learning_rate": 0.0001999997026912201, - "loss": 46.0, - "step": 10162 - }, - { - "epoch": 0.777032322189728, - "grad_norm": 0.0019868165254592896, - "learning_rate": 0.00019999970263264577, - "loss": 46.0, - "step": 10163 - }, - { - "epoch": 0.7771087791731177, - "grad_norm": 0.0005031025502830744, - "learning_rate": 0.00019999970257406568, - "loss": 46.0, - "step": 10164 - }, - { - "epoch": 0.7771852361565075, - "grad_norm": 0.00151635252404958, - "learning_rate": 0.00019999970251547984, - "loss": 46.0, - "step": 10165 - }, - { - "epoch": 0.7772616931398971, - "grad_norm": 0.0004623220593202859, - "learning_rate": 0.00019999970245688817, - "loss": 46.0, - "step": 10166 - }, - { - "epoch": 0.7773381501232869, - "grad_norm": 0.0013860411709174514, - "learning_rate": 0.00019999970239829078, - "loss": 46.0, - "step": 10167 - }, - { - "epoch": 0.7774146071066766, - "grad_norm": 0.0015587200177833438, - "learning_rate": 0.00019999970233968762, - "loss": 46.0, - "step": 10168 - }, - { - "epoch": 0.7774910640900663, - "grad_norm": 0.0007903753430582583, - "learning_rate": 0.00019999970228107866, - "loss": 46.0, - "step": 10169 - }, - { - "epoch": 0.7775675210734561, - "grad_norm": 0.003197759622707963, - "learning_rate": 0.00019999970222246395, - "loss": 46.0, - "step": 10170 - }, - { - "epoch": 0.7776439780568458, - "grad_norm": 0.005163267254829407, - "learning_rate": 0.00019999970216384347, - "loss": 46.0, - "step": 10171 - }, - { - "epoch": 0.7777204350402355, - "grad_norm": 0.0005579793942160904, - "learning_rate": 0.00019999970210521722, - "loss": 46.0, - "step": 10172 - }, - { - "epoch": 0.7777968920236252, - "grad_norm": 0.0018871133215725422, - "learning_rate": 0.0001999997020465852, - "loss": 46.0, - "step": 10173 - }, - { - "epoch": 0.7778733490070149, - "grad_norm": 0.001524643157608807, - "learning_rate": 0.00019999970198794742, - "loss": 46.0, - "step": 10174 - }, - { - "epoch": 0.7779498059904046, - "grad_norm": 0.005308306775987148, - "learning_rate": 0.00019999970192930387, - "loss": 46.0, - "step": 10175 - }, - { - "epoch": 0.7780262629737944, - "grad_norm": 0.0004678885452449322, - "learning_rate": 0.00019999970187065453, - "loss": 46.0, - "step": 10176 - }, - { - "epoch": 0.778102719957184, - "grad_norm": 0.001828834880143404, - "learning_rate": 0.00019999970181199943, - "loss": 46.0, - "step": 10177 - }, - { - "epoch": 0.7781791769405738, - "grad_norm": 0.0013521984219551086, - "learning_rate": 0.00019999970175333857, - "loss": 46.0, - "step": 10178 - }, - { - "epoch": 0.7782556339239636, - "grad_norm": 0.001654301187954843, - "learning_rate": 0.00019999970169467193, - "loss": 46.0, - "step": 10179 - }, - { - "epoch": 0.7783320909073532, - "grad_norm": 0.0006634952733293176, - "learning_rate": 0.00019999970163599954, - "loss": 46.0, - "step": 10180 - }, - { - "epoch": 0.778408547890743, - "grad_norm": 0.005076178349554539, - "learning_rate": 0.00019999970157732135, - "loss": 46.0, - "step": 10181 - }, - { - "epoch": 0.7784850048741326, - "grad_norm": 0.0007074660388752818, - "learning_rate": 0.00019999970151863742, - "loss": 46.0, - "step": 10182 - }, - { - "epoch": 0.7785614618575224, - "grad_norm": 0.000865244772285223, - "learning_rate": 0.00019999970145994768, - "loss": 46.0, - "step": 10183 - }, - { - "epoch": 0.7786379188409122, - "grad_norm": 0.0011318132746964693, - "learning_rate": 0.0001999997014012522, - "loss": 46.0, - "step": 10184 - }, - { - "epoch": 0.7787143758243018, - "grad_norm": 0.0006942532490938902, - "learning_rate": 0.00019999970134255098, - "loss": 46.0, - "step": 10185 - }, - { - "epoch": 0.7787908328076916, - "grad_norm": 0.0020117023959755898, - "learning_rate": 0.00019999970128384392, - "loss": 46.0, - "step": 10186 - }, - { - "epoch": 0.7788672897910813, - "grad_norm": 0.000562444212846458, - "learning_rate": 0.00019999970122513115, - "loss": 46.0, - "step": 10187 - }, - { - "epoch": 0.778943746774471, - "grad_norm": 0.0008933614590205252, - "learning_rate": 0.00019999970116641258, - "loss": 46.0, - "step": 10188 - }, - { - "epoch": 0.7790202037578607, - "grad_norm": 0.0021899689454585314, - "learning_rate": 0.00019999970110768826, - "loss": 46.0, - "step": 10189 - }, - { - "epoch": 0.7790966607412505, - "grad_norm": 0.0010486440733075142, - "learning_rate": 0.00019999970104895814, - "loss": 46.0, - "step": 10190 - }, - { - "epoch": 0.7791731177246402, - "grad_norm": 0.0009347231825813651, - "learning_rate": 0.00019999970099022227, - "loss": 46.0, - "step": 10191 - }, - { - "epoch": 0.7792495747080299, - "grad_norm": 0.002102338243275881, - "learning_rate": 0.00019999970093148063, - "loss": 46.0, - "step": 10192 - }, - { - "epoch": 0.7793260316914196, - "grad_norm": 0.0018903817981481552, - "learning_rate": 0.00019999970087273324, - "loss": 46.0, - "step": 10193 - }, - { - "epoch": 0.7794024886748093, - "grad_norm": 0.0010162407997995615, - "learning_rate": 0.00019999970081398006, - "loss": 46.0, - "step": 10194 - }, - { - "epoch": 0.7794789456581991, - "grad_norm": 0.0005231364048086107, - "learning_rate": 0.0001999997007552211, - "loss": 46.0, - "step": 10195 - }, - { - "epoch": 0.7795554026415887, - "grad_norm": 0.0015077412826940417, - "learning_rate": 0.0001999997006964564, - "loss": 46.0, - "step": 10196 - }, - { - "epoch": 0.7796318596249785, - "grad_norm": 0.00283474731259048, - "learning_rate": 0.0001999997006376859, - "loss": 46.0, - "step": 10197 - }, - { - "epoch": 0.7797083166083683, - "grad_norm": 0.002034452510997653, - "learning_rate": 0.00019999970057890963, - "loss": 46.0, - "step": 10198 - }, - { - "epoch": 0.7797847735917579, - "grad_norm": 0.0038028694689273834, - "learning_rate": 0.00019999970052012763, - "loss": 46.0, - "step": 10199 - }, - { - "epoch": 0.7798612305751477, - "grad_norm": 0.0016110887518152595, - "learning_rate": 0.00019999970046133983, - "loss": 46.0, - "step": 10200 - }, - { - "epoch": 0.7799376875585374, - "grad_norm": 0.0029270027298480272, - "learning_rate": 0.00019999970040254628, - "loss": 46.0, - "step": 10201 - }, - { - "epoch": 0.7800141445419271, - "grad_norm": 0.0013306328328326344, - "learning_rate": 0.00019999970034374694, - "loss": 46.0, - "step": 10202 - }, - { - "epoch": 0.7800906015253168, - "grad_norm": 0.0011153396917507052, - "learning_rate": 0.00019999970028494182, - "loss": 46.0, - "step": 10203 - }, - { - "epoch": 0.7801670585087065, - "grad_norm": 0.001664671115577221, - "learning_rate": 0.00019999970022613095, - "loss": 46.0, - "step": 10204 - }, - { - "epoch": 0.7802435154920963, - "grad_norm": 0.001390974037349224, - "learning_rate": 0.0001999997001673143, - "loss": 46.0, - "step": 10205 - }, - { - "epoch": 0.780319972475486, - "grad_norm": 0.0008468308369629085, - "learning_rate": 0.0001999997001084919, - "loss": 46.0, - "step": 10206 - }, - { - "epoch": 0.7803964294588757, - "grad_norm": 0.0034670219756662846, - "learning_rate": 0.0001999997000496637, - "loss": 46.0, - "step": 10207 - }, - { - "epoch": 0.7804728864422654, - "grad_norm": 0.0007849931134842336, - "learning_rate": 0.00019999969999082978, - "loss": 46.0, - "step": 10208 - }, - { - "epoch": 0.7805493434256552, - "grad_norm": 0.005797098390758038, - "learning_rate": 0.00019999969993199005, - "loss": 46.0, - "step": 10209 - }, - { - "epoch": 0.7806258004090448, - "grad_norm": 0.0027351134922355413, - "learning_rate": 0.00019999969987314454, - "loss": 46.0, - "step": 10210 - }, - { - "epoch": 0.7807022573924346, - "grad_norm": 0.004473397042602301, - "learning_rate": 0.00019999969981429331, - "loss": 46.0, - "step": 10211 - }, - { - "epoch": 0.7807787143758244, - "grad_norm": 0.0014184268657118082, - "learning_rate": 0.00019999969975543626, - "loss": 46.0, - "step": 10212 - }, - { - "epoch": 0.780855171359214, - "grad_norm": 0.0017004070105031133, - "learning_rate": 0.0001999996996965735, - "loss": 46.0, - "step": 10213 - }, - { - "epoch": 0.7809316283426038, - "grad_norm": 0.0006384268635883927, - "learning_rate": 0.00019999969963770494, - "loss": 46.0, - "step": 10214 - }, - { - "epoch": 0.7810080853259934, - "grad_norm": 0.0009152454440481961, - "learning_rate": 0.00019999969957883057, - "loss": 46.0, - "step": 10215 - }, - { - "epoch": 0.7810845423093832, - "grad_norm": 0.0047762952744960785, - "learning_rate": 0.00019999969951995045, - "loss": 46.0, - "step": 10216 - }, - { - "epoch": 0.7811609992927729, - "grad_norm": 0.0029480753000825644, - "learning_rate": 0.00019999969946106458, - "loss": 46.0, - "step": 10217 - }, - { - "epoch": 0.7812374562761626, - "grad_norm": 0.0013304011663421988, - "learning_rate": 0.00019999969940217297, - "loss": 46.0, - "step": 10218 - }, - { - "epoch": 0.7813139132595524, - "grad_norm": 0.00200113607570529, - "learning_rate": 0.00019999969934327553, - "loss": 46.0, - "step": 10219 - }, - { - "epoch": 0.7813903702429421, - "grad_norm": 0.0030732930172234774, - "learning_rate": 0.00019999969928437237, - "loss": 46.0, - "step": 10220 - }, - { - "epoch": 0.7814668272263318, - "grad_norm": 0.0006126696243882179, - "learning_rate": 0.0001999996992254634, - "loss": 46.0, - "step": 10221 - }, - { - "epoch": 0.7815432842097215, - "grad_norm": 0.0005815611802972853, - "learning_rate": 0.00019999969916654868, - "loss": 46.0, - "step": 10222 - }, - { - "epoch": 0.7816197411931112, - "grad_norm": 0.0032430393621325493, - "learning_rate": 0.0001999996991076282, - "loss": 46.0, - "step": 10223 - }, - { - "epoch": 0.7816961981765009, - "grad_norm": 0.0007613180205225945, - "learning_rate": 0.00019999969904870192, - "loss": 46.0, - "step": 10224 - }, - { - "epoch": 0.7817726551598907, - "grad_norm": 0.005472178105264902, - "learning_rate": 0.0001999996989897699, - "loss": 46.0, - "step": 10225 - }, - { - "epoch": 0.7818491121432803, - "grad_norm": 0.006193201057612896, - "learning_rate": 0.0001999996989308321, - "loss": 46.0, - "step": 10226 - }, - { - "epoch": 0.7819255691266701, - "grad_norm": 0.004918051417917013, - "learning_rate": 0.00019999969887188855, - "loss": 46.0, - "step": 10227 - }, - { - "epoch": 0.7820020261100599, - "grad_norm": 0.0014459715457633138, - "learning_rate": 0.0001999996988129392, - "loss": 46.0, - "step": 10228 - }, - { - "epoch": 0.7820784830934495, - "grad_norm": 0.0005648498190566897, - "learning_rate": 0.00019999969875398408, - "loss": 46.0, - "step": 10229 - }, - { - "epoch": 0.7821549400768393, - "grad_norm": 0.0006629560375586152, - "learning_rate": 0.00019999969869502322, - "loss": 46.0, - "step": 10230 - }, - { - "epoch": 0.782231397060229, - "grad_norm": 0.0026891680900007486, - "learning_rate": 0.00019999969863605655, - "loss": 46.0, - "step": 10231 - }, - { - "epoch": 0.7823078540436187, - "grad_norm": 0.004870935343205929, - "learning_rate": 0.00019999969857708414, - "loss": 46.0, - "step": 10232 - }, - { - "epoch": 0.7823843110270084, - "grad_norm": 0.0006571028498001397, - "learning_rate": 0.00019999969851810595, - "loss": 46.0, - "step": 10233 - }, - { - "epoch": 0.7824607680103981, - "grad_norm": 0.0005809029680676758, - "learning_rate": 0.00019999969845912202, - "loss": 46.0, - "step": 10234 - }, - { - "epoch": 0.7825372249937879, - "grad_norm": 0.0005806274130009115, - "learning_rate": 0.00019999969840013226, - "loss": 46.0, - "step": 10235 - }, - { - "epoch": 0.7826136819771776, - "grad_norm": 0.0031394949182868004, - "learning_rate": 0.00019999969834113678, - "loss": 46.0, - "step": 10236 - }, - { - "epoch": 0.7826901389605673, - "grad_norm": 0.004411129746586084, - "learning_rate": 0.0001999996982821355, - "loss": 46.0, - "step": 10237 - }, - { - "epoch": 0.782766595943957, - "grad_norm": 0.0006739192176610231, - "learning_rate": 0.00019999969822312848, - "loss": 46.0, - "step": 10238 - }, - { - "epoch": 0.7828430529273468, - "grad_norm": 0.007350389380007982, - "learning_rate": 0.00019999969816411568, - "loss": 46.0, - "step": 10239 - }, - { - "epoch": 0.7829195099107364, - "grad_norm": 0.0009272333700209856, - "learning_rate": 0.0001999996981050971, - "loss": 46.0, - "step": 10240 - }, - { - "epoch": 0.7829959668941262, - "grad_norm": 0.001023614197038114, - "learning_rate": 0.00019999969804607276, - "loss": 46.0, - "step": 10241 - }, - { - "epoch": 0.783072423877516, - "grad_norm": 0.0007502685184590518, - "learning_rate": 0.00019999969798704264, - "loss": 46.0, - "step": 10242 - }, - { - "epoch": 0.7831488808609056, - "grad_norm": 0.0005459418753162026, - "learning_rate": 0.00019999969792800678, - "loss": 46.0, - "step": 10243 - }, - { - "epoch": 0.7832253378442954, - "grad_norm": 0.0034213298931717873, - "learning_rate": 0.00019999969786896514, - "loss": 46.0, - "step": 10244 - }, - { - "epoch": 0.783301794827685, - "grad_norm": 0.0033136524725705385, - "learning_rate": 0.00019999969780991773, - "loss": 46.0, - "step": 10245 - }, - { - "epoch": 0.7833782518110748, - "grad_norm": 0.0016369274817407131, - "learning_rate": 0.0001999996977508645, - "loss": 46.0, - "step": 10246 - }, - { - "epoch": 0.7834547087944645, - "grad_norm": 0.004078489728271961, - "learning_rate": 0.00019999969769180558, - "loss": 46.0, - "step": 10247 - }, - { - "epoch": 0.7835311657778542, - "grad_norm": 0.0018563922494649887, - "learning_rate": 0.00019999969763274082, - "loss": 46.0, - "step": 10248 - }, - { - "epoch": 0.783607622761244, - "grad_norm": 0.001281578792259097, - "learning_rate": 0.00019999969757367034, - "loss": 46.0, - "step": 10249 - }, - { - "epoch": 0.7836840797446337, - "grad_norm": 0.00502871535718441, - "learning_rate": 0.0001999996975145941, - "loss": 46.0, - "step": 10250 - }, - { - "epoch": 0.7837605367280234, - "grad_norm": 0.0013296377146616578, - "learning_rate": 0.00019999969745551204, - "loss": 46.0, - "step": 10251 - }, - { - "epoch": 0.7838369937114131, - "grad_norm": 0.02295924723148346, - "learning_rate": 0.00019999969739642424, - "loss": 46.0, - "step": 10252 - }, - { - "epoch": 0.7839134506948028, - "grad_norm": 0.0017064297571778297, - "learning_rate": 0.00019999969733733067, - "loss": 46.0, - "step": 10253 - }, - { - "epoch": 0.7839899076781925, - "grad_norm": 0.005058642942458391, - "learning_rate": 0.00019999969727823132, - "loss": 46.0, - "step": 10254 - }, - { - "epoch": 0.7840663646615823, - "grad_norm": 0.0006025419570505619, - "learning_rate": 0.0001999996972191262, - "loss": 46.0, - "step": 10255 - }, - { - "epoch": 0.784142821644972, - "grad_norm": 0.0011545703746378422, - "learning_rate": 0.0001999996971600153, - "loss": 46.0, - "step": 10256 - }, - { - "epoch": 0.7842192786283617, - "grad_norm": 0.002726361621171236, - "learning_rate": 0.00019999969710089865, - "loss": 46.0, - "step": 10257 - }, - { - "epoch": 0.7842957356117515, - "grad_norm": 0.004911541007459164, - "learning_rate": 0.00019999969704177623, - "loss": 46.0, - "step": 10258 - }, - { - "epoch": 0.7843721925951411, - "grad_norm": 0.0012584724463522434, - "learning_rate": 0.00019999969698264805, - "loss": 46.0, - "step": 10259 - }, - { - "epoch": 0.7844486495785309, - "grad_norm": 0.0026344831567257643, - "learning_rate": 0.0001999996969235141, - "loss": 46.0, - "step": 10260 - }, - { - "epoch": 0.7845251065619206, - "grad_norm": 0.005456394515931606, - "learning_rate": 0.00019999969686437433, - "loss": 46.0, - "step": 10261 - }, - { - "epoch": 0.7846015635453103, - "grad_norm": 0.0013098577037453651, - "learning_rate": 0.00019999969680522885, - "loss": 46.0, - "step": 10262 - }, - { - "epoch": 0.7846780205287001, - "grad_norm": 0.002115991897881031, - "learning_rate": 0.0001999996967460776, - "loss": 46.0, - "step": 10263 - }, - { - "epoch": 0.7847544775120897, - "grad_norm": 0.008396445773541927, - "learning_rate": 0.00019999969668692055, - "loss": 46.0, - "step": 10264 - }, - { - "epoch": 0.7848309344954795, - "grad_norm": 0.0008707360248081386, - "learning_rate": 0.00019999969662775775, - "loss": 46.0, - "step": 10265 - }, - { - "epoch": 0.7849073914788692, - "grad_norm": 0.003703216090798378, - "learning_rate": 0.00019999969656858918, - "loss": 46.0, - "step": 10266 - }, - { - "epoch": 0.7849838484622589, - "grad_norm": 0.00292229070328176, - "learning_rate": 0.0001999996965094148, - "loss": 46.0, - "step": 10267 - }, - { - "epoch": 0.7850603054456486, - "grad_norm": 0.0012993005802854896, - "learning_rate": 0.00019999969645023472, - "loss": 46.0, - "step": 10268 - }, - { - "epoch": 0.7851367624290384, - "grad_norm": 0.0010618198430165648, - "learning_rate": 0.00019999969639104882, - "loss": 46.0, - "step": 10269 - }, - { - "epoch": 0.785213219412428, - "grad_norm": 0.001083186361938715, - "learning_rate": 0.00019999969633185716, - "loss": 46.0, - "step": 10270 - }, - { - "epoch": 0.7852896763958178, - "grad_norm": 0.0007116859778761864, - "learning_rate": 0.00019999969627265975, - "loss": 46.0, - "step": 10271 - }, - { - "epoch": 0.7853661333792076, - "grad_norm": 0.0013811420649290085, - "learning_rate": 0.00019999969621345654, - "loss": 46.0, - "step": 10272 - }, - { - "epoch": 0.7854425903625972, - "grad_norm": 0.0006520661991089582, - "learning_rate": 0.00019999969615424758, - "loss": 46.0, - "step": 10273 - }, - { - "epoch": 0.785519047345987, - "grad_norm": 0.0034866214264184237, - "learning_rate": 0.00019999969609503287, - "loss": 46.0, - "step": 10274 - }, - { - "epoch": 0.7855955043293766, - "grad_norm": 0.0006679798243567348, - "learning_rate": 0.00019999969603581234, - "loss": 46.0, - "step": 10275 - }, - { - "epoch": 0.7856719613127664, - "grad_norm": 0.0013419750612229109, - "learning_rate": 0.0001999996959765861, - "loss": 46.0, - "step": 10276 - }, - { - "epoch": 0.7857484182961562, - "grad_norm": 0.0017901909304782748, - "learning_rate": 0.00019999969591735404, - "loss": 46.0, - "step": 10277 - }, - { - "epoch": 0.7858248752795458, - "grad_norm": 0.008774053305387497, - "learning_rate": 0.0001999996958581162, - "loss": 46.0, - "step": 10278 - }, - { - "epoch": 0.7859013322629356, - "grad_norm": 0.0010192327899858356, - "learning_rate": 0.00019999969579887267, - "loss": 46.0, - "step": 10279 - }, - { - "epoch": 0.7859777892463253, - "grad_norm": 0.0008977089892141521, - "learning_rate": 0.0001999996957396233, - "loss": 46.0, - "step": 10280 - }, - { - "epoch": 0.786054246229715, - "grad_norm": 0.0007017637835815549, - "learning_rate": 0.00019999969568036815, - "loss": 46.0, - "step": 10281 - }, - { - "epoch": 0.7861307032131047, - "grad_norm": 0.002642162377014756, - "learning_rate": 0.0001999996956211073, - "loss": 46.0, - "step": 10282 - }, - { - "epoch": 0.7862071601964944, - "grad_norm": 0.001508566434495151, - "learning_rate": 0.00019999969556184062, - "loss": 46.0, - "step": 10283 - }, - { - "epoch": 0.7862836171798842, - "grad_norm": 0.0007882867357693613, - "learning_rate": 0.0001999996955025682, - "loss": 46.0, - "step": 10284 - }, - { - "epoch": 0.7863600741632739, - "grad_norm": 0.0009465863695368171, - "learning_rate": 0.00019999969544329, - "loss": 46.0, - "step": 10285 - }, - { - "epoch": 0.7864365311466636, - "grad_norm": 0.005636065267026424, - "learning_rate": 0.00019999969538400601, - "loss": 46.0, - "step": 10286 - }, - { - "epoch": 0.7865129881300533, - "grad_norm": 0.0009969661477953196, - "learning_rate": 0.0001999996953247163, - "loss": 46.0, - "step": 10287 - }, - { - "epoch": 0.7865894451134431, - "grad_norm": 0.0012537481961771846, - "learning_rate": 0.0001999996952654208, - "loss": 46.0, - "step": 10288 - }, - { - "epoch": 0.7866659020968327, - "grad_norm": 0.0007035795715637505, - "learning_rate": 0.00019999969520611953, - "loss": 46.0, - "step": 10289 - }, - { - "epoch": 0.7867423590802225, - "grad_norm": 0.0008786415564827621, - "learning_rate": 0.00019999969514681248, - "loss": 46.0, - "step": 10290 - }, - { - "epoch": 0.7868188160636123, - "grad_norm": 0.0008917577797546983, - "learning_rate": 0.00019999969508749965, - "loss": 46.0, - "step": 10291 - }, - { - "epoch": 0.7868952730470019, - "grad_norm": 0.009642994031310081, - "learning_rate": 0.00019999969502818108, - "loss": 46.0, - "step": 10292 - }, - { - "epoch": 0.7869717300303917, - "grad_norm": 0.0035529329907149076, - "learning_rate": 0.00019999969496885673, - "loss": 46.0, - "step": 10293 - }, - { - "epoch": 0.7870481870137813, - "grad_norm": 0.0007940498762764037, - "learning_rate": 0.00019999969490952662, - "loss": 46.0, - "step": 10294 - }, - { - "epoch": 0.7871246439971711, - "grad_norm": 0.002146205399185419, - "learning_rate": 0.00019999969485019073, - "loss": 46.0, - "step": 10295 - }, - { - "epoch": 0.7872011009805608, - "grad_norm": 0.0012229300336912274, - "learning_rate": 0.00019999969479084906, - "loss": 46.0, - "step": 10296 - }, - { - "epoch": 0.7872775579639505, - "grad_norm": 0.0012784464051946998, - "learning_rate": 0.00019999969473150162, - "loss": 46.0, - "step": 10297 - }, - { - "epoch": 0.7873540149473403, - "grad_norm": 0.00109715829603374, - "learning_rate": 0.00019999969467214844, - "loss": 46.0, - "step": 10298 - }, - { - "epoch": 0.78743047193073, - "grad_norm": 0.004224192816764116, - "learning_rate": 0.00019999969461278948, - "loss": 46.0, - "step": 10299 - }, - { - "epoch": 0.7875069289141197, - "grad_norm": 0.0013706808676943183, - "learning_rate": 0.00019999969455342472, - "loss": 46.0, - "step": 10300 - }, - { - "epoch": 0.7875833858975094, - "grad_norm": 0.0010997304925695062, - "learning_rate": 0.00019999969449405422, - "loss": 46.0, - "step": 10301 - }, - { - "epoch": 0.7876598428808992, - "grad_norm": 0.003662483301013708, - "learning_rate": 0.00019999969443467794, - "loss": 46.0, - "step": 10302 - }, - { - "epoch": 0.7877362998642888, - "grad_norm": 0.0007888961117714643, - "learning_rate": 0.0001999996943752959, - "loss": 46.0, - "step": 10303 - }, - { - "epoch": 0.7878127568476786, - "grad_norm": 0.0008473096531815827, - "learning_rate": 0.00019999969431590807, - "loss": 46.0, - "step": 10304 - }, - { - "epoch": 0.7878892138310682, - "grad_norm": 0.0004846773808822036, - "learning_rate": 0.0001999996942565145, - "loss": 46.0, - "step": 10305 - }, - { - "epoch": 0.787965670814458, - "grad_norm": 0.0019808681681752205, - "learning_rate": 0.00019999969419711512, - "loss": 46.0, - "step": 10306 - }, - { - "epoch": 0.7880421277978478, - "grad_norm": 0.0046212985180318356, - "learning_rate": 0.00019999969413771003, - "loss": 46.0, - "step": 10307 - }, - { - "epoch": 0.7881185847812374, - "grad_norm": 0.0014043668052181602, - "learning_rate": 0.00019999969407829911, - "loss": 46.0, - "step": 10308 - }, - { - "epoch": 0.7881950417646272, - "grad_norm": 0.0004871142446063459, - "learning_rate": 0.00019999969401888245, - "loss": 46.0, - "step": 10309 - }, - { - "epoch": 0.7882714987480169, - "grad_norm": 0.0039797076024115086, - "learning_rate": 0.00019999969395946004, - "loss": 46.0, - "step": 10310 - }, - { - "epoch": 0.7883479557314066, - "grad_norm": 0.0009994247229769826, - "learning_rate": 0.0001999996939000318, - "loss": 46.0, - "step": 10311 - }, - { - "epoch": 0.7884244127147964, - "grad_norm": 0.0012315146159380674, - "learning_rate": 0.00019999969384059784, - "loss": 46.0, - "step": 10312 - }, - { - "epoch": 0.7885008696981861, - "grad_norm": 0.0006372160860337317, - "learning_rate": 0.00019999969378115811, - "loss": 46.0, - "step": 10313 - }, - { - "epoch": 0.7885773266815758, - "grad_norm": 0.0007672370411455631, - "learning_rate": 0.0001999996937217126, - "loss": 46.0, - "step": 10314 - }, - { - "epoch": 0.7886537836649655, - "grad_norm": 0.0013643394922837615, - "learning_rate": 0.00019999969366226133, - "loss": 46.0, - "step": 10315 - }, - { - "epoch": 0.7887302406483552, - "grad_norm": 0.0003806533932220191, - "learning_rate": 0.00019999969360280425, - "loss": 46.0, - "step": 10316 - }, - { - "epoch": 0.7888066976317449, - "grad_norm": 0.002345460932701826, - "learning_rate": 0.00019999969354334146, - "loss": 46.0, - "step": 10317 - }, - { - "epoch": 0.7888831546151347, - "grad_norm": 0.0020158749539405107, - "learning_rate": 0.00019999969348387286, - "loss": 46.0, - "step": 10318 - }, - { - "epoch": 0.7889596115985243, - "grad_norm": 0.0008241494651883841, - "learning_rate": 0.00019999969342439852, - "loss": 46.0, - "step": 10319 - }, - { - "epoch": 0.7890360685819141, - "grad_norm": 0.005219350568950176, - "learning_rate": 0.00019999969336491837, - "loss": 46.0, - "step": 10320 - }, - { - "epoch": 0.7891125255653039, - "grad_norm": 0.002630602801218629, - "learning_rate": 0.00019999969330543248, - "loss": 46.0, - "step": 10321 - }, - { - "epoch": 0.7891889825486935, - "grad_norm": 0.0018792249029502273, - "learning_rate": 0.00019999969324594082, - "loss": 46.0, - "step": 10322 - }, - { - "epoch": 0.7892654395320833, - "grad_norm": 0.007390186656266451, - "learning_rate": 0.00019999969318644338, - "loss": 46.0, - "step": 10323 - }, - { - "epoch": 0.7893418965154729, - "grad_norm": 0.0010319760767742991, - "learning_rate": 0.00019999969312694017, - "loss": 46.0, - "step": 10324 - }, - { - "epoch": 0.7894183534988627, - "grad_norm": 0.002190877916291356, - "learning_rate": 0.0001999996930674312, - "loss": 46.0, - "step": 10325 - }, - { - "epoch": 0.7894948104822525, - "grad_norm": 0.0005150993238203228, - "learning_rate": 0.00019999969300791645, - "loss": 46.0, - "step": 10326 - }, - { - "epoch": 0.7895712674656421, - "grad_norm": 0.0010977080091834068, - "learning_rate": 0.00019999969294839595, - "loss": 46.0, - "step": 10327 - }, - { - "epoch": 0.7896477244490319, - "grad_norm": 0.007419740781188011, - "learning_rate": 0.00019999969288886967, - "loss": 46.0, - "step": 10328 - }, - { - "epoch": 0.7897241814324216, - "grad_norm": 0.0008296669693663716, - "learning_rate": 0.00019999969282933762, - "loss": 46.0, - "step": 10329 - }, - { - "epoch": 0.7898006384158113, - "grad_norm": 0.006710233632475138, - "learning_rate": 0.0001999996927697998, - "loss": 46.0, - "step": 10330 - }, - { - "epoch": 0.789877095399201, - "grad_norm": 0.008683396503329277, - "learning_rate": 0.0001999996927102562, - "loss": 46.0, - "step": 10331 - }, - { - "epoch": 0.7899535523825908, - "grad_norm": 0.0016000174218788743, - "learning_rate": 0.00019999969265070683, - "loss": 46.0, - "step": 10332 - }, - { - "epoch": 0.7900300093659804, - "grad_norm": 0.0019550633151084185, - "learning_rate": 0.00019999969259115171, - "loss": 46.0, - "step": 10333 - }, - { - "epoch": 0.7901064663493702, - "grad_norm": 0.0013147916179150343, - "learning_rate": 0.0001999996925315908, - "loss": 46.0, - "step": 10334 - }, - { - "epoch": 0.7901829233327599, - "grad_norm": 0.0022051376290619373, - "learning_rate": 0.00019999969247202413, - "loss": 46.0, - "step": 10335 - }, - { - "epoch": 0.7902593803161496, - "grad_norm": 0.0014337224420160055, - "learning_rate": 0.0001999996924124517, - "loss": 46.0, - "step": 10336 - }, - { - "epoch": 0.7903358372995394, - "grad_norm": 0.004368383903056383, - "learning_rate": 0.00019999969235287349, - "loss": 46.0, - "step": 10337 - }, - { - "epoch": 0.790412294282929, - "grad_norm": 0.007982072420418262, - "learning_rate": 0.00019999969229328953, - "loss": 46.0, - "step": 10338 - }, - { - "epoch": 0.7904887512663188, - "grad_norm": 0.0023622328881174326, - "learning_rate": 0.0001999996922336998, - "loss": 46.0, - "step": 10339 - }, - { - "epoch": 0.7905652082497086, - "grad_norm": 0.006284235045313835, - "learning_rate": 0.00019999969217410427, - "loss": 46.0, - "step": 10340 - }, - { - "epoch": 0.7906416652330982, - "grad_norm": 0.019389530643820763, - "learning_rate": 0.00019999969211450296, - "loss": 46.0, - "step": 10341 - }, - { - "epoch": 0.790718122216488, - "grad_norm": 0.0011169412173330784, - "learning_rate": 0.00019999969205489591, - "loss": 46.0, - "step": 10342 - }, - { - "epoch": 0.7907945791998777, - "grad_norm": 0.0009455361287109554, - "learning_rate": 0.0001999996919952831, - "loss": 46.0, - "step": 10343 - }, - { - "epoch": 0.7908710361832674, - "grad_norm": 0.0007107259007170796, - "learning_rate": 0.0001999996919356645, - "loss": 46.0, - "step": 10344 - }, - { - "epoch": 0.7909474931666571, - "grad_norm": 0.0011275571305304766, - "learning_rate": 0.00019999969187604015, - "loss": 46.0, - "step": 10345 - }, - { - "epoch": 0.7910239501500468, - "grad_norm": 0.0015255759935826063, - "learning_rate": 0.00019999969181641, - "loss": 46.0, - "step": 10346 - }, - { - "epoch": 0.7911004071334365, - "grad_norm": 0.003709301119670272, - "learning_rate": 0.0001999996917567741, - "loss": 46.0, - "step": 10347 - }, - { - "epoch": 0.7911768641168263, - "grad_norm": 0.0009050164953805506, - "learning_rate": 0.00019999969169713243, - "loss": 46.0, - "step": 10348 - }, - { - "epoch": 0.791253321100216, - "grad_norm": 0.0017321555642411113, - "learning_rate": 0.000199999691637485, - "loss": 46.0, - "step": 10349 - }, - { - "epoch": 0.7913297780836057, - "grad_norm": 0.005666300188750029, - "learning_rate": 0.00019999969157783178, - "loss": 46.0, - "step": 10350 - }, - { - "epoch": 0.7914062350669955, - "grad_norm": 0.0011831017909571528, - "learning_rate": 0.00019999969151817282, - "loss": 46.0, - "step": 10351 - }, - { - "epoch": 0.7914826920503851, - "grad_norm": 0.0010023321956396103, - "learning_rate": 0.00019999969145850807, - "loss": 46.0, - "step": 10352 - }, - { - "epoch": 0.7915591490337749, - "grad_norm": 0.0016831184038892388, - "learning_rate": 0.0001999996913988375, - "loss": 46.0, - "step": 10353 - }, - { - "epoch": 0.7916356060171645, - "grad_norm": 0.0014527245657518506, - "learning_rate": 0.00019999969133916126, - "loss": 46.0, - "step": 10354 - }, - { - "epoch": 0.7917120630005543, - "grad_norm": 0.008887079544365406, - "learning_rate": 0.00019999969127947918, - "loss": 46.0, - "step": 10355 - }, - { - "epoch": 0.7917885199839441, - "grad_norm": 0.0027604512870311737, - "learning_rate": 0.00019999969121979136, - "loss": 46.0, - "step": 10356 - }, - { - "epoch": 0.7918649769673337, - "grad_norm": 0.003108508652076125, - "learning_rate": 0.0001999996911600978, - "loss": 46.0, - "step": 10357 - }, - { - "epoch": 0.7919414339507235, - "grad_norm": 0.0009689987055025995, - "learning_rate": 0.0001999996911003984, - "loss": 46.0, - "step": 10358 - }, - { - "epoch": 0.7920178909341132, - "grad_norm": 0.0038455866742879152, - "learning_rate": 0.00019999969104069328, - "loss": 46.0, - "step": 10359 - }, - { - "epoch": 0.7920943479175029, - "grad_norm": 0.0010078534251078963, - "learning_rate": 0.00019999969098098236, - "loss": 46.0, - "step": 10360 - }, - { - "epoch": 0.7921708049008926, - "grad_norm": 0.0011107171885669231, - "learning_rate": 0.0001999996909212657, - "loss": 46.0, - "step": 10361 - }, - { - "epoch": 0.7922472618842824, - "grad_norm": 0.0013087345287203789, - "learning_rate": 0.00019999969086154323, - "loss": 46.0, - "step": 10362 - }, - { - "epoch": 0.792323718867672, - "grad_norm": 0.0017957051750272512, - "learning_rate": 0.00019999969080181505, - "loss": 46.0, - "step": 10363 - }, - { - "epoch": 0.7924001758510618, - "grad_norm": 0.0013963307719677687, - "learning_rate": 0.00019999969074208104, - "loss": 46.0, - "step": 10364 - }, - { - "epoch": 0.7924766328344515, - "grad_norm": 0.0005416147178038955, - "learning_rate": 0.0001999996906823413, - "loss": 46.0, - "step": 10365 - }, - { - "epoch": 0.7925530898178412, - "grad_norm": 0.00043462254689075053, - "learning_rate": 0.00019999969062259578, - "loss": 46.0, - "step": 10366 - }, - { - "epoch": 0.792629546801231, - "grad_norm": 0.0008399094804190099, - "learning_rate": 0.00019999969056284448, - "loss": 46.0, - "step": 10367 - }, - { - "epoch": 0.7927060037846206, - "grad_norm": 0.005973747931420803, - "learning_rate": 0.00019999969050308743, - "loss": 46.0, - "step": 10368 - }, - { - "epoch": 0.7927824607680104, - "grad_norm": 0.0009809896582737565, - "learning_rate": 0.0001999996904433246, - "loss": 46.0, - "step": 10369 - }, - { - "epoch": 0.7928589177514002, - "grad_norm": 0.002539858454838395, - "learning_rate": 0.000199999690383556, - "loss": 46.0, - "step": 10370 - }, - { - "epoch": 0.7929353747347898, - "grad_norm": 0.0010078662307932973, - "learning_rate": 0.00019999969032378164, - "loss": 46.0, - "step": 10371 - }, - { - "epoch": 0.7930118317181796, - "grad_norm": 0.003399464301764965, - "learning_rate": 0.0001999996902640015, - "loss": 46.0, - "step": 10372 - }, - { - "epoch": 0.7930882887015693, - "grad_norm": 0.0008202534518204629, - "learning_rate": 0.00019999969020421558, - "loss": 46.0, - "step": 10373 - }, - { - "epoch": 0.793164745684959, - "grad_norm": 0.0017233005492016673, - "learning_rate": 0.00019999969014442392, - "loss": 46.0, - "step": 10374 - }, - { - "epoch": 0.7932412026683487, - "grad_norm": 0.010492541827261448, - "learning_rate": 0.00019999969008462648, - "loss": 46.0, - "step": 10375 - }, - { - "epoch": 0.7933176596517384, - "grad_norm": 0.0009122971096076071, - "learning_rate": 0.00019999969002482324, - "loss": 46.0, - "step": 10376 - }, - { - "epoch": 0.7933941166351282, - "grad_norm": 0.0024784451816231012, - "learning_rate": 0.0001999996899650143, - "loss": 46.0, - "step": 10377 - }, - { - "epoch": 0.7934705736185179, - "grad_norm": 0.004751408938318491, - "learning_rate": 0.0001999996899051995, - "loss": 46.0, - "step": 10378 - }, - { - "epoch": 0.7935470306019076, - "grad_norm": 0.0006340565741993487, - "learning_rate": 0.000199999689845379, - "loss": 46.0, - "step": 10379 - }, - { - "epoch": 0.7936234875852973, - "grad_norm": 0.0019558495841920376, - "learning_rate": 0.00019999968978555267, - "loss": 46.0, - "step": 10380 - }, - { - "epoch": 0.7936999445686871, - "grad_norm": 0.006155429407954216, - "learning_rate": 0.00019999968972572062, - "loss": 46.0, - "step": 10381 - }, - { - "epoch": 0.7937764015520767, - "grad_norm": 0.003428464289754629, - "learning_rate": 0.0001999996896658828, - "loss": 46.0, - "step": 10382 - }, - { - "epoch": 0.7938528585354665, - "grad_norm": 0.0026111796032637358, - "learning_rate": 0.0001999996896060392, - "loss": 46.0, - "step": 10383 - }, - { - "epoch": 0.7939293155188561, - "grad_norm": 0.000979086384177208, - "learning_rate": 0.00019999968954618983, - "loss": 46.0, - "step": 10384 - }, - { - "epoch": 0.7940057725022459, - "grad_norm": 0.0015180292539298534, - "learning_rate": 0.0001999996894863347, - "loss": 46.0, - "step": 10385 - }, - { - "epoch": 0.7940822294856357, - "grad_norm": 0.004844078794121742, - "learning_rate": 0.00019999968942647378, - "loss": 46.0, - "step": 10386 - }, - { - "epoch": 0.7941586864690253, - "grad_norm": 0.0005243421765044332, - "learning_rate": 0.0001999996893666071, - "loss": 46.0, - "step": 10387 - }, - { - "epoch": 0.7942351434524151, - "grad_norm": 0.0013097668997943401, - "learning_rate": 0.00019999968930673465, - "loss": 46.0, - "step": 10388 - }, - { - "epoch": 0.7943116004358048, - "grad_norm": 0.0018528528744354844, - "learning_rate": 0.00019999968924685644, - "loss": 46.0, - "step": 10389 - }, - { - "epoch": 0.7943880574191945, - "grad_norm": 0.0007080174400471151, - "learning_rate": 0.00019999968918697243, - "loss": 46.0, - "step": 10390 - }, - { - "epoch": 0.7944645144025843, - "grad_norm": 0.0022853994742035866, - "learning_rate": 0.0001999996891270827, - "loss": 46.0, - "step": 10391 - }, - { - "epoch": 0.794540971385974, - "grad_norm": 0.000591819582041353, - "learning_rate": 0.00019999968906718715, - "loss": 46.0, - "step": 10392 - }, - { - "epoch": 0.7946174283693637, - "grad_norm": 0.00037622853415086865, - "learning_rate": 0.00019999968900728584, - "loss": 46.0, - "step": 10393 - }, - { - "epoch": 0.7946938853527534, - "grad_norm": 0.0004591784963849932, - "learning_rate": 0.0001999996889473788, - "loss": 46.0, - "step": 10394 - }, - { - "epoch": 0.7947703423361431, - "grad_norm": 0.001266356324777007, - "learning_rate": 0.00019999968888746597, - "loss": 46.0, - "step": 10395 - }, - { - "epoch": 0.7948467993195328, - "grad_norm": 0.001246437313966453, - "learning_rate": 0.00019999968882754735, - "loss": 46.0, - "step": 10396 - }, - { - "epoch": 0.7949232563029226, - "grad_norm": 0.0015925847692415118, - "learning_rate": 0.00019999968876762298, - "loss": 46.0, - "step": 10397 - }, - { - "epoch": 0.7949997132863122, - "grad_norm": 0.0017261621542274952, - "learning_rate": 0.00019999968870769284, - "loss": 46.0, - "step": 10398 - }, - { - "epoch": 0.795076170269702, - "grad_norm": 0.001677903113886714, - "learning_rate": 0.00019999968864775692, - "loss": 46.0, - "step": 10399 - }, - { - "epoch": 0.7951526272530918, - "grad_norm": 0.0011187755735591054, - "learning_rate": 0.00019999968858781526, - "loss": 46.0, - "step": 10400 - }, - { - "epoch": 0.7952290842364814, - "grad_norm": 0.0026313646230846643, - "learning_rate": 0.0001999996885278678, - "loss": 46.0, - "step": 10401 - }, - { - "epoch": 0.7953055412198712, - "grad_norm": 0.0035825709346681833, - "learning_rate": 0.0001999996884679146, - "loss": 46.0, - "step": 10402 - }, - { - "epoch": 0.7953819982032609, - "grad_norm": 0.0006700589437969029, - "learning_rate": 0.00019999968840795558, - "loss": 46.0, - "step": 10403 - }, - { - "epoch": 0.7954584551866506, - "grad_norm": 0.0045277769677340984, - "learning_rate": 0.00019999968834799082, - "loss": 46.0, - "step": 10404 - }, - { - "epoch": 0.7955349121700404, - "grad_norm": 0.001390729914419353, - "learning_rate": 0.00019999968828802032, - "loss": 46.0, - "step": 10405 - }, - { - "epoch": 0.79561136915343, - "grad_norm": 0.001328416052274406, - "learning_rate": 0.000199999688228044, - "loss": 46.0, - "step": 10406 - }, - { - "epoch": 0.7956878261368198, - "grad_norm": 0.0007779764127917588, - "learning_rate": 0.00019999968816806194, - "loss": 46.0, - "step": 10407 - }, - { - "epoch": 0.7957642831202095, - "grad_norm": 0.0014895581407472491, - "learning_rate": 0.0001999996881080741, - "loss": 46.0, - "step": 10408 - }, - { - "epoch": 0.7958407401035992, - "grad_norm": 0.0009545899811200798, - "learning_rate": 0.0001999996880480805, - "loss": 46.0, - "step": 10409 - }, - { - "epoch": 0.7959171970869889, - "grad_norm": 0.00044253855594433844, - "learning_rate": 0.0001999996879880811, - "loss": 46.0, - "step": 10410 - }, - { - "epoch": 0.7959936540703787, - "grad_norm": 0.0026480432134121656, - "learning_rate": 0.00019999968792807596, - "loss": 46.0, - "step": 10411 - }, - { - "epoch": 0.7960701110537683, - "grad_norm": 0.0008752365247346461, - "learning_rate": 0.00019999968786806505, - "loss": 46.0, - "step": 10412 - }, - { - "epoch": 0.7961465680371581, - "grad_norm": 0.0005672427942045033, - "learning_rate": 0.00019999968780804836, - "loss": 46.0, - "step": 10413 - }, - { - "epoch": 0.7962230250205478, - "grad_norm": 0.0009923962643370032, - "learning_rate": 0.00019999968774802592, - "loss": 46.0, - "step": 10414 - }, - { - "epoch": 0.7962994820039375, - "grad_norm": 0.0012087089708074927, - "learning_rate": 0.00019999968768799771, - "loss": 46.0, - "step": 10415 - }, - { - "epoch": 0.7963759389873273, - "grad_norm": 0.004050011280924082, - "learning_rate": 0.00019999968762796368, - "loss": 46.0, - "step": 10416 - }, - { - "epoch": 0.7964523959707169, - "grad_norm": 0.0016919528134167194, - "learning_rate": 0.00019999968756792392, - "loss": 46.0, - "step": 10417 - }, - { - "epoch": 0.7965288529541067, - "grad_norm": 0.0013214662903919816, - "learning_rate": 0.00019999968750787842, - "loss": 46.0, - "step": 10418 - }, - { - "epoch": 0.7966053099374965, - "grad_norm": 0.002011113567277789, - "learning_rate": 0.00019999968744782712, - "loss": 46.0, - "step": 10419 - }, - { - "epoch": 0.7966817669208861, - "grad_norm": 0.003301904071122408, - "learning_rate": 0.00019999968738777004, - "loss": 46.0, - "step": 10420 - }, - { - "epoch": 0.7967582239042759, - "grad_norm": 0.0023058359511196613, - "learning_rate": 0.0001999996873277072, - "loss": 46.0, - "step": 10421 - }, - { - "epoch": 0.7968346808876656, - "grad_norm": 0.00020906612917315215, - "learning_rate": 0.00019999968726763857, - "loss": 46.0, - "step": 10422 - }, - { - "epoch": 0.7969111378710553, - "grad_norm": 0.004945096559822559, - "learning_rate": 0.00019999968720756418, - "loss": 46.0, - "step": 10423 - }, - { - "epoch": 0.796987594854445, - "grad_norm": 0.0030127139762043953, - "learning_rate": 0.00019999968714748406, - "loss": 46.0, - "step": 10424 - }, - { - "epoch": 0.7970640518378347, - "grad_norm": 0.00133426277898252, - "learning_rate": 0.00019999968708739815, - "loss": 46.0, - "step": 10425 - }, - { - "epoch": 0.7971405088212244, - "grad_norm": 0.0004186853184364736, - "learning_rate": 0.00019999968702730646, - "loss": 46.0, - "step": 10426 - }, - { - "epoch": 0.7972169658046142, - "grad_norm": 0.0022164955735206604, - "learning_rate": 0.000199999686967209, - "loss": 46.0, - "step": 10427 - }, - { - "epoch": 0.7972934227880039, - "grad_norm": 0.0007602505502291024, - "learning_rate": 0.0001999996869071058, - "loss": 46.0, - "step": 10428 - }, - { - "epoch": 0.7973698797713936, - "grad_norm": 0.0011165005853399634, - "learning_rate": 0.00019999968684699678, - "loss": 46.0, - "step": 10429 - }, - { - "epoch": 0.7974463367547834, - "grad_norm": 0.001903340220451355, - "learning_rate": 0.00019999968678688202, - "loss": 46.0, - "step": 10430 - }, - { - "epoch": 0.797522793738173, - "grad_norm": 0.0011740203481167555, - "learning_rate": 0.00019999968672676147, - "loss": 46.0, - "step": 10431 - }, - { - "epoch": 0.7975992507215628, - "grad_norm": 0.003760506631806493, - "learning_rate": 0.00019999968666663517, - "loss": 46.0, - "step": 10432 - }, - { - "epoch": 0.7976757077049526, - "grad_norm": 0.0007388813537545502, - "learning_rate": 0.0001999996866065031, - "loss": 46.0, - "step": 10433 - }, - { - "epoch": 0.7977521646883422, - "grad_norm": 0.000699860043823719, - "learning_rate": 0.00019999968654636524, - "loss": 46.0, - "step": 10434 - }, - { - "epoch": 0.797828621671732, - "grad_norm": 0.0009509455412626266, - "learning_rate": 0.00019999968648622165, - "loss": 46.0, - "step": 10435 - }, - { - "epoch": 0.7979050786551216, - "grad_norm": 0.0013718181289732456, - "learning_rate": 0.00019999968642607225, - "loss": 46.0, - "step": 10436 - }, - { - "epoch": 0.7979815356385114, - "grad_norm": 0.0018047906924039125, - "learning_rate": 0.0001999996863659171, - "loss": 46.0, - "step": 10437 - }, - { - "epoch": 0.7980579926219011, - "grad_norm": 0.000929017725866288, - "learning_rate": 0.0001999996863057562, - "loss": 46.0, - "step": 10438 - }, - { - "epoch": 0.7981344496052908, - "grad_norm": 0.007691532839089632, - "learning_rate": 0.0001999996862455895, - "loss": 46.0, - "step": 10439 - }, - { - "epoch": 0.7982109065886805, - "grad_norm": 0.0008329108823090792, - "learning_rate": 0.00019999968618541705, - "loss": 46.0, - "step": 10440 - }, - { - "epoch": 0.7982873635720703, - "grad_norm": 0.0005319429328665137, - "learning_rate": 0.0001999996861252388, - "loss": 46.0, - "step": 10441 - }, - { - "epoch": 0.79836382055546, - "grad_norm": 0.0009459342109039426, - "learning_rate": 0.0001999996860650548, - "loss": 46.0, - "step": 10442 - }, - { - "epoch": 0.7984402775388497, - "grad_norm": 0.0009394192602485418, - "learning_rate": 0.00019999968600486505, - "loss": 46.0, - "step": 10443 - }, - { - "epoch": 0.7985167345222395, - "grad_norm": 0.002740062540397048, - "learning_rate": 0.00019999968594466952, - "loss": 46.0, - "step": 10444 - }, - { - "epoch": 0.7985931915056291, - "grad_norm": 0.001409469055943191, - "learning_rate": 0.0001999996858844682, - "loss": 46.0, - "step": 10445 - }, - { - "epoch": 0.7986696484890189, - "grad_norm": 0.004490251187235117, - "learning_rate": 0.00019999968582426112, - "loss": 46.0, - "step": 10446 - }, - { - "epoch": 0.7987461054724085, - "grad_norm": 0.0017022996908053756, - "learning_rate": 0.0001999996857640483, - "loss": 46.0, - "step": 10447 - }, - { - "epoch": 0.7988225624557983, - "grad_norm": 0.001921234535984695, - "learning_rate": 0.00019999968570382968, - "loss": 46.0, - "step": 10448 - }, - { - "epoch": 0.7988990194391881, - "grad_norm": 0.0009661572403274477, - "learning_rate": 0.00019999968564360528, - "loss": 46.0, - "step": 10449 - }, - { - "epoch": 0.7989754764225777, - "grad_norm": 0.0013993048341944814, - "learning_rate": 0.00019999968558337514, - "loss": 46.0, - "step": 10450 - }, - { - "epoch": 0.7990519334059675, - "grad_norm": 0.001107872580178082, - "learning_rate": 0.0001999996855231392, - "loss": 46.0, - "step": 10451 - }, - { - "epoch": 0.7991283903893572, - "grad_norm": 0.0006258583744056523, - "learning_rate": 0.0001999996854628975, - "loss": 46.0, - "step": 10452 - }, - { - "epoch": 0.7992048473727469, - "grad_norm": 0.0011497348314151168, - "learning_rate": 0.00019999968540265005, - "loss": 46.0, - "step": 10453 - }, - { - "epoch": 0.7992813043561366, - "grad_norm": 0.015417809598147869, - "learning_rate": 0.00019999968534239684, - "loss": 46.0, - "step": 10454 - }, - { - "epoch": 0.7993577613395263, - "grad_norm": 0.0012206232640892267, - "learning_rate": 0.0001999996852821378, - "loss": 46.0, - "step": 10455 - }, - { - "epoch": 0.7994342183229161, - "grad_norm": 0.0011034088674932718, - "learning_rate": 0.00019999968522187305, - "loss": 46.0, - "step": 10456 - }, - { - "epoch": 0.7995106753063058, - "grad_norm": 0.0007100504008121789, - "learning_rate": 0.00019999968516160252, - "loss": 46.0, - "step": 10457 - }, - { - "epoch": 0.7995871322896955, - "grad_norm": 0.0025360225699841976, - "learning_rate": 0.0001999996851013262, - "loss": 46.0, - "step": 10458 - }, - { - "epoch": 0.7996635892730852, - "grad_norm": 0.0007951929001137614, - "learning_rate": 0.00019999968504104412, - "loss": 46.0, - "step": 10459 - }, - { - "epoch": 0.799740046256475, - "grad_norm": 0.0011786696268245578, - "learning_rate": 0.00019999968498075627, - "loss": 46.0, - "step": 10460 - }, - { - "epoch": 0.7998165032398646, - "grad_norm": 0.004398555960506201, - "learning_rate": 0.00019999968492046265, - "loss": 46.0, - "step": 10461 - }, - { - "epoch": 0.7998929602232544, - "grad_norm": 0.0007128039142116904, - "learning_rate": 0.00019999968486016328, - "loss": 46.0, - "step": 10462 - }, - { - "epoch": 0.7999694172066442, - "grad_norm": 0.0024774540215730667, - "learning_rate": 0.00019999968479985812, - "loss": 46.0, - "step": 10463 - }, - { - "epoch": 0.8000458741900338, - "grad_norm": 0.007371298037469387, - "learning_rate": 0.0001999996847395472, - "loss": 46.0, - "step": 10464 - }, - { - "epoch": 0.8001223311734236, - "grad_norm": 0.0011722599156200886, - "learning_rate": 0.0001999996846792305, - "loss": 46.0, - "step": 10465 - }, - { - "epoch": 0.8001987881568132, - "grad_norm": 0.00029976724181324244, - "learning_rate": 0.000199999684618908, - "loss": 46.0, - "step": 10466 - }, - { - "epoch": 0.800275245140203, - "grad_norm": 0.0010954602621495724, - "learning_rate": 0.0001999996845585798, - "loss": 46.0, - "step": 10467 - }, - { - "epoch": 0.8003517021235927, - "grad_norm": 0.0011874493211507797, - "learning_rate": 0.00019999968449824578, - "loss": 46.0, - "step": 10468 - }, - { - "epoch": 0.8004281591069824, - "grad_norm": 0.0016141465166583657, - "learning_rate": 0.00019999968443790603, - "loss": 46.0, - "step": 10469 - }, - { - "epoch": 0.8005046160903722, - "grad_norm": 0.0012601014459505677, - "learning_rate": 0.00019999968437756048, - "loss": 46.0, - "step": 10470 - }, - { - "epoch": 0.8005810730737619, - "grad_norm": 0.0020059049129486084, - "learning_rate": 0.00019999968431720917, - "loss": 46.0, - "step": 10471 - }, - { - "epoch": 0.8006575300571516, - "grad_norm": 0.0005422209505923092, - "learning_rate": 0.00019999968425685207, - "loss": 46.0, - "step": 10472 - }, - { - "epoch": 0.8007339870405413, - "grad_norm": 0.0010006512748077512, - "learning_rate": 0.00019999968419648925, - "loss": 46.0, - "step": 10473 - }, - { - "epoch": 0.8008104440239311, - "grad_norm": 0.0040566627867519855, - "learning_rate": 0.0001999996841361206, - "loss": 46.0, - "step": 10474 - }, - { - "epoch": 0.8008869010073207, - "grad_norm": 0.0007985107367858291, - "learning_rate": 0.00019999968407574624, - "loss": 46.0, - "step": 10475 - }, - { - "epoch": 0.8009633579907105, - "grad_norm": 0.0014343177899718285, - "learning_rate": 0.00019999968401536607, - "loss": 46.0, - "step": 10476 - }, - { - "epoch": 0.8010398149741002, - "grad_norm": 0.00050419254694134, - "learning_rate": 0.00019999968395498013, - "loss": 46.0, - "step": 10477 - }, - { - "epoch": 0.8011162719574899, - "grad_norm": 0.0014299041358754039, - "learning_rate": 0.00019999968389458844, - "loss": 46.0, - "step": 10478 - }, - { - "epoch": 0.8011927289408797, - "grad_norm": 0.0025366810150444508, - "learning_rate": 0.00019999968383419095, - "loss": 46.0, - "step": 10479 - }, - { - "epoch": 0.8012691859242693, - "grad_norm": 0.004242269322276115, - "learning_rate": 0.00019999968377378772, - "loss": 46.0, - "step": 10480 - }, - { - "epoch": 0.8013456429076591, - "grad_norm": 0.0012129811802878976, - "learning_rate": 0.0001999996837133787, - "loss": 46.0, - "step": 10481 - }, - { - "epoch": 0.8014220998910488, - "grad_norm": 0.003892101813107729, - "learning_rate": 0.00019999968365296396, - "loss": 46.0, - "step": 10482 - }, - { - "epoch": 0.8014985568744385, - "grad_norm": 0.0007331674569286406, - "learning_rate": 0.00019999968359254343, - "loss": 46.0, - "step": 10483 - }, - { - "epoch": 0.8015750138578283, - "grad_norm": 0.0009749920573085546, - "learning_rate": 0.00019999968353211708, - "loss": 46.0, - "step": 10484 - }, - { - "epoch": 0.8016514708412179, - "grad_norm": 0.009521284140646458, - "learning_rate": 0.000199999683471685, - "loss": 46.0, - "step": 10485 - }, - { - "epoch": 0.8017279278246077, - "grad_norm": 0.0014507188461720943, - "learning_rate": 0.00019999968341124716, - "loss": 46.0, - "step": 10486 - }, - { - "epoch": 0.8018043848079974, - "grad_norm": 0.00235563050955534, - "learning_rate": 0.00019999968335080354, - "loss": 46.0, - "step": 10487 - }, - { - "epoch": 0.8018808417913871, - "grad_norm": 0.010525641962885857, - "learning_rate": 0.00019999968329035411, - "loss": 46.0, - "step": 10488 - }, - { - "epoch": 0.8019572987747768, - "grad_norm": 0.00044382206397131085, - "learning_rate": 0.00019999968322989897, - "loss": 46.0, - "step": 10489 - }, - { - "epoch": 0.8020337557581666, - "grad_norm": 0.0006507197394967079, - "learning_rate": 0.00019999968316943803, - "loss": 46.0, - "step": 10490 - }, - { - "epoch": 0.8021102127415562, - "grad_norm": 0.0005255329888314009, - "learning_rate": 0.00019999968310897135, - "loss": 46.0, - "step": 10491 - }, - { - "epoch": 0.802186669724946, - "grad_norm": 0.0008625674527138472, - "learning_rate": 0.00019999968304849889, - "loss": 46.0, - "step": 10492 - }, - { - "epoch": 0.8022631267083358, - "grad_norm": 0.0030411214102059603, - "learning_rate": 0.00019999968298802063, - "loss": 46.0, - "step": 10493 - }, - { - "epoch": 0.8023395836917254, - "grad_norm": 0.0010720710270106792, - "learning_rate": 0.00019999968292753662, - "loss": 46.0, - "step": 10494 - }, - { - "epoch": 0.8024160406751152, - "grad_norm": 0.003892915789037943, - "learning_rate": 0.00019999968286704684, - "loss": 46.0, - "step": 10495 - }, - { - "epoch": 0.8024924976585048, - "grad_norm": 0.0009724826668389142, - "learning_rate": 0.0001999996828065513, - "loss": 46.0, - "step": 10496 - }, - { - "epoch": 0.8025689546418946, - "grad_norm": 0.0009155191364698112, - "learning_rate": 0.00019999968274604998, - "loss": 46.0, - "step": 10497 - }, - { - "epoch": 0.8026454116252844, - "grad_norm": 0.007622969336807728, - "learning_rate": 0.0001999996826855429, - "loss": 46.0, - "step": 10498 - }, - { - "epoch": 0.802721868608674, - "grad_norm": 0.0004769606457557529, - "learning_rate": 0.00019999968262503004, - "loss": 46.0, - "step": 10499 - }, - { - "epoch": 0.8027983255920638, - "grad_norm": 0.0014206409687176347, - "learning_rate": 0.00019999968256451142, - "loss": 46.0, - "step": 10500 - }, - { - "epoch": 0.8028747825754535, - "grad_norm": 0.0012016526889055967, - "learning_rate": 0.00019999968250398702, - "loss": 46.0, - "step": 10501 - }, - { - "epoch": 0.8029512395588432, - "grad_norm": 0.0016582179814577103, - "learning_rate": 0.00019999968244345686, - "loss": 46.0, - "step": 10502 - }, - { - "epoch": 0.8030276965422329, - "grad_norm": 0.0010918057523667812, - "learning_rate": 0.00019999968238292091, - "loss": 46.0, - "step": 10503 - }, - { - "epoch": 0.8031041535256227, - "grad_norm": 0.0010731570655480027, - "learning_rate": 0.00019999968232237923, - "loss": 46.0, - "step": 10504 - }, - { - "epoch": 0.8031806105090123, - "grad_norm": 0.0007092529558576643, - "learning_rate": 0.00019999968226183174, - "loss": 46.0, - "step": 10505 - }, - { - "epoch": 0.8032570674924021, - "grad_norm": 0.0007144889677874744, - "learning_rate": 0.00019999968220127848, - "loss": 46.0, - "step": 10506 - }, - { - "epoch": 0.8033335244757918, - "grad_norm": 0.0019835426937788725, - "learning_rate": 0.0001999996821407195, - "loss": 46.0, - "step": 10507 - }, - { - "epoch": 0.8034099814591815, - "grad_norm": 0.0013765008188784122, - "learning_rate": 0.00019999968208015472, - "loss": 46.0, - "step": 10508 - }, - { - "epoch": 0.8034864384425713, - "grad_norm": 0.003526052925735712, - "learning_rate": 0.00019999968201958417, - "loss": 46.0, - "step": 10509 - }, - { - "epoch": 0.8035628954259609, - "grad_norm": 0.01059807650744915, - "learning_rate": 0.00019999968195900784, - "loss": 46.0, - "step": 10510 - }, - { - "epoch": 0.8036393524093507, - "grad_norm": 0.00045217445585876703, - "learning_rate": 0.00019999968189842574, - "loss": 46.0, - "step": 10511 - }, - { - "epoch": 0.8037158093927405, - "grad_norm": 0.0024424255825579166, - "learning_rate": 0.0001999996818378379, - "loss": 46.0, - "step": 10512 - }, - { - "epoch": 0.8037922663761301, - "grad_norm": 0.0009442882146686316, - "learning_rate": 0.00019999968177724427, - "loss": 46.0, - "step": 10513 - }, - { - "epoch": 0.8038687233595199, - "grad_norm": 0.0008289010147564113, - "learning_rate": 0.00019999968171664488, - "loss": 46.0, - "step": 10514 - }, - { - "epoch": 0.8039451803429095, - "grad_norm": 0.0006760719697922468, - "learning_rate": 0.0001999996816560397, - "loss": 46.0, - "step": 10515 - }, - { - "epoch": 0.8040216373262993, - "grad_norm": 0.0017378043849021196, - "learning_rate": 0.00019999968159542877, - "loss": 46.0, - "step": 10516 - }, - { - "epoch": 0.804098094309689, - "grad_norm": 0.0046377889811992645, - "learning_rate": 0.00019999968153481206, - "loss": 46.0, - "step": 10517 - }, - { - "epoch": 0.8041745512930787, - "grad_norm": 0.0007898385520093143, - "learning_rate": 0.00019999968147418957, - "loss": 46.0, - "step": 10518 - }, - { - "epoch": 0.8042510082764684, - "grad_norm": 0.00434258533641696, - "learning_rate": 0.00019999968141356137, - "loss": 46.0, - "step": 10519 - }, - { - "epoch": 0.8043274652598582, - "grad_norm": 0.0014697652077302337, - "learning_rate": 0.00019999968135292733, - "loss": 46.0, - "step": 10520 - }, - { - "epoch": 0.8044039222432479, - "grad_norm": 0.001740574254654348, - "learning_rate": 0.00019999968129228755, - "loss": 46.0, - "step": 10521 - }, - { - "epoch": 0.8044803792266376, - "grad_norm": 0.000758209964260459, - "learning_rate": 0.000199999681231642, - "loss": 46.0, - "step": 10522 - }, - { - "epoch": 0.8045568362100274, - "grad_norm": 0.001774105941876769, - "learning_rate": 0.00019999968117099067, - "loss": 46.0, - "step": 10523 - }, - { - "epoch": 0.804633293193417, - "grad_norm": 0.001304331235587597, - "learning_rate": 0.00019999968111033357, - "loss": 46.0, - "step": 10524 - }, - { - "epoch": 0.8047097501768068, - "grad_norm": 0.0006807984900660813, - "learning_rate": 0.0001999996810496707, - "loss": 46.0, - "step": 10525 - }, - { - "epoch": 0.8047862071601964, - "grad_norm": 0.0006133479764685035, - "learning_rate": 0.00019999968098900208, - "loss": 46.0, - "step": 10526 - }, - { - "epoch": 0.8048626641435862, - "grad_norm": 0.0009403585572727025, - "learning_rate": 0.0001999996809283277, - "loss": 46.0, - "step": 10527 - }, - { - "epoch": 0.804939121126976, - "grad_norm": 0.0006824696902185678, - "learning_rate": 0.00019999968086764752, - "loss": 46.0, - "step": 10528 - }, - { - "epoch": 0.8050155781103656, - "grad_norm": 0.001689778408035636, - "learning_rate": 0.00019999968080696155, - "loss": 46.0, - "step": 10529 - }, - { - "epoch": 0.8050920350937554, - "grad_norm": 0.0009304852574132383, - "learning_rate": 0.00019999968074626987, - "loss": 46.0, - "step": 10530 - }, - { - "epoch": 0.8051684920771451, - "grad_norm": 0.0010523987002670765, - "learning_rate": 0.0001999996806855724, - "loss": 46.0, - "step": 10531 - }, - { - "epoch": 0.8052449490605348, - "grad_norm": 0.0015985339414328337, - "learning_rate": 0.00019999968062486912, - "loss": 46.0, - "step": 10532 - }, - { - "epoch": 0.8053214060439245, - "grad_norm": 0.0005110496422275901, - "learning_rate": 0.00019999968056416014, - "loss": 46.0, - "step": 10533 - }, - { - "epoch": 0.8053978630273143, - "grad_norm": 0.0008613889222033322, - "learning_rate": 0.00019999968050344534, - "loss": 46.0, - "step": 10534 - }, - { - "epoch": 0.805474320010704, - "grad_norm": 0.0006032785749994218, - "learning_rate": 0.00019999968044272478, - "loss": 46.0, - "step": 10535 - }, - { - "epoch": 0.8055507769940937, - "grad_norm": 0.0021466233301907778, - "learning_rate": 0.00019999968038199846, - "loss": 46.0, - "step": 10536 - }, - { - "epoch": 0.8056272339774834, - "grad_norm": 0.0006493675173260272, - "learning_rate": 0.00019999968032126636, - "loss": 46.0, - "step": 10537 - }, - { - "epoch": 0.8057036909608731, - "grad_norm": 0.00239465176127851, - "learning_rate": 0.00019999968026052848, - "loss": 46.0, - "step": 10538 - }, - { - "epoch": 0.8057801479442629, - "grad_norm": 0.000779195106588304, - "learning_rate": 0.00019999968019978486, - "loss": 46.0, - "step": 10539 - }, - { - "epoch": 0.8058566049276525, - "grad_norm": 0.005790887400507927, - "learning_rate": 0.00019999968013903544, - "loss": 46.0, - "step": 10540 - }, - { - "epoch": 0.8059330619110423, - "grad_norm": 0.00028756994288414717, - "learning_rate": 0.0001999996800782803, - "loss": 46.0, - "step": 10541 - }, - { - "epoch": 0.8060095188944321, - "grad_norm": 0.0033163148909807205, - "learning_rate": 0.00019999968001751937, - "loss": 46.0, - "step": 10542 - }, - { - "epoch": 0.8060859758778217, - "grad_norm": 0.0010280621936544776, - "learning_rate": 0.00019999967995675263, - "loss": 46.0, - "step": 10543 - }, - { - "epoch": 0.8061624328612115, - "grad_norm": 0.0008250966784544289, - "learning_rate": 0.00019999967989598014, - "loss": 46.0, - "step": 10544 - }, - { - "epoch": 0.8062388898446012, - "grad_norm": 0.0018860476557165384, - "learning_rate": 0.0001999996798352019, - "loss": 46.0, - "step": 10545 - }, - { - "epoch": 0.8063153468279909, - "grad_norm": 0.00048766477266326547, - "learning_rate": 0.0001999996797744179, - "loss": 46.0, - "step": 10546 - }, - { - "epoch": 0.8063918038113806, - "grad_norm": 0.0006458511925302446, - "learning_rate": 0.0001999996797136281, - "loss": 46.0, - "step": 10547 - }, - { - "epoch": 0.8064682607947703, - "grad_norm": 0.0010112955933436751, - "learning_rate": 0.00019999967965283254, - "loss": 46.0, - "step": 10548 - }, - { - "epoch": 0.8065447177781601, - "grad_norm": 0.0039372690953314304, - "learning_rate": 0.00019999967959203122, - "loss": 46.0, - "step": 10549 - }, - { - "epoch": 0.8066211747615498, - "grad_norm": 0.0015946135390549898, - "learning_rate": 0.00019999967953122412, - "loss": 46.0, - "step": 10550 - }, - { - "epoch": 0.8066976317449395, - "grad_norm": 0.0023373279254883528, - "learning_rate": 0.00019999967947041124, - "loss": 46.0, - "step": 10551 - }, - { - "epoch": 0.8067740887283292, - "grad_norm": 0.003955328371375799, - "learning_rate": 0.0001999996794095926, - "loss": 46.0, - "step": 10552 - }, - { - "epoch": 0.806850545711719, - "grad_norm": 0.0017770793056115508, - "learning_rate": 0.00019999967934876823, - "loss": 46.0, - "step": 10553 - }, - { - "epoch": 0.8069270026951086, - "grad_norm": 0.0029069725424051285, - "learning_rate": 0.00019999967928793804, - "loss": 46.0, - "step": 10554 - }, - { - "epoch": 0.8070034596784984, - "grad_norm": 0.0012229240965098143, - "learning_rate": 0.00019999967922710207, - "loss": 46.0, - "step": 10555 - }, - { - "epoch": 0.807079916661888, - "grad_norm": 0.0008900503744371235, - "learning_rate": 0.0001999996791662604, - "loss": 46.0, - "step": 10556 - }, - { - "epoch": 0.8071563736452778, - "grad_norm": 0.000807922100648284, - "learning_rate": 0.00019999967910541288, - "loss": 46.0, - "step": 10557 - }, - { - "epoch": 0.8072328306286676, - "grad_norm": 0.00023124992731027305, - "learning_rate": 0.00019999967904455964, - "loss": 46.0, - "step": 10558 - }, - { - "epoch": 0.8073092876120572, - "grad_norm": 0.0033947890624403954, - "learning_rate": 0.0001999996789837006, - "loss": 46.0, - "step": 10559 - }, - { - "epoch": 0.807385744595447, - "grad_norm": 0.0008910118485800922, - "learning_rate": 0.00019999967892283583, - "loss": 46.0, - "step": 10560 - }, - { - "epoch": 0.8074622015788367, - "grad_norm": 0.011881954967975616, - "learning_rate": 0.00019999967886196528, - "loss": 46.0, - "step": 10561 - }, - { - "epoch": 0.8075386585622264, - "grad_norm": 0.0012975921854376793, - "learning_rate": 0.00019999967880108895, - "loss": 46.0, - "step": 10562 - }, - { - "epoch": 0.8076151155456162, - "grad_norm": 0.011599060148000717, - "learning_rate": 0.00019999967874020685, - "loss": 46.0, - "step": 10563 - }, - { - "epoch": 0.8076915725290059, - "grad_norm": 0.017311716452240944, - "learning_rate": 0.00019999967867931898, - "loss": 46.0, - "step": 10564 - }, - { - "epoch": 0.8077680295123956, - "grad_norm": 0.004300176165997982, - "learning_rate": 0.00019999967861842534, - "loss": 46.0, - "step": 10565 - }, - { - "epoch": 0.8078444864957853, - "grad_norm": 0.001962413312867284, - "learning_rate": 0.00019999967855752592, - "loss": 46.0, - "step": 10566 - }, - { - "epoch": 0.807920943479175, - "grad_norm": 0.0010369386291131377, - "learning_rate": 0.00019999967849662072, - "loss": 46.0, - "step": 10567 - }, - { - "epoch": 0.8079974004625647, - "grad_norm": 0.0007597673102281988, - "learning_rate": 0.00019999967843570979, - "loss": 46.0, - "step": 10568 - }, - { - "epoch": 0.8080738574459545, - "grad_norm": 0.0006968869711272418, - "learning_rate": 0.00019999967837479307, - "loss": 46.0, - "step": 10569 - }, - { - "epoch": 0.8081503144293442, - "grad_norm": 0.0018348144367337227, - "learning_rate": 0.0001999996783138706, - "loss": 46.0, - "step": 10570 - }, - { - "epoch": 0.8082267714127339, - "grad_norm": 0.0006603803485631943, - "learning_rate": 0.00019999967825294236, - "loss": 46.0, - "step": 10571 - }, - { - "epoch": 0.8083032283961237, - "grad_norm": 0.0015955101698637009, - "learning_rate": 0.00019999967819200832, - "loss": 46.0, - "step": 10572 - }, - { - "epoch": 0.8083796853795133, - "grad_norm": 0.00055325822904706, - "learning_rate": 0.0001999996781310685, - "loss": 46.0, - "step": 10573 - }, - { - "epoch": 0.8084561423629031, - "grad_norm": 0.0011604411993175745, - "learning_rate": 0.00019999967807012294, - "loss": 46.0, - "step": 10574 - }, - { - "epoch": 0.8085325993462928, - "grad_norm": 0.0025593554601073265, - "learning_rate": 0.00019999967800917161, - "loss": 46.0, - "step": 10575 - }, - { - "epoch": 0.8086090563296825, - "grad_norm": 0.0004935767501592636, - "learning_rate": 0.00019999967794821451, - "loss": 46.0, - "step": 10576 - }, - { - "epoch": 0.8086855133130723, - "grad_norm": 0.002077204640954733, - "learning_rate": 0.00019999967788725164, - "loss": 46.0, - "step": 10577 - }, - { - "epoch": 0.8087619702964619, - "grad_norm": 0.0006066091009415686, - "learning_rate": 0.00019999967782628302, - "loss": 46.0, - "step": 10578 - }, - { - "epoch": 0.8088384272798517, - "grad_norm": 0.0005193352117203176, - "learning_rate": 0.00019999967776530858, - "loss": 46.0, - "step": 10579 - }, - { - "epoch": 0.8089148842632414, - "grad_norm": 0.0038145852740854025, - "learning_rate": 0.0001999996777043284, - "loss": 46.0, - "step": 10580 - }, - { - "epoch": 0.8089913412466311, - "grad_norm": 0.0009923079051077366, - "learning_rate": 0.00019999967764334247, - "loss": 46.0, - "step": 10581 - }, - { - "epoch": 0.8090677982300208, - "grad_norm": 0.0008851053426042199, - "learning_rate": 0.00019999967758235074, - "loss": 46.0, - "step": 10582 - }, - { - "epoch": 0.8091442552134106, - "grad_norm": 0.002756862435489893, - "learning_rate": 0.00019999967752135325, - "loss": 46.0, - "step": 10583 - }, - { - "epoch": 0.8092207121968003, - "grad_norm": 0.0009387233876623213, - "learning_rate": 0.00019999967746035, - "loss": 46.0, - "step": 10584 - }, - { - "epoch": 0.80929716918019, - "grad_norm": 0.0013590651797130704, - "learning_rate": 0.000199999677399341, - "loss": 46.0, - "step": 10585 - }, - { - "epoch": 0.8093736261635797, - "grad_norm": 0.006492145359516144, - "learning_rate": 0.00019999967733832618, - "loss": 46.0, - "step": 10586 - }, - { - "epoch": 0.8094500831469694, - "grad_norm": 0.0019619599916040897, - "learning_rate": 0.00019999967727730563, - "loss": 46.0, - "step": 10587 - }, - { - "epoch": 0.8095265401303592, - "grad_norm": 0.0008877667132765055, - "learning_rate": 0.00019999967721627928, - "loss": 46.0, - "step": 10588 - }, - { - "epoch": 0.8096029971137488, - "grad_norm": 0.0017375691095367074, - "learning_rate": 0.00019999967715524718, - "loss": 46.0, - "step": 10589 - }, - { - "epoch": 0.8096794540971386, - "grad_norm": 0.005117062479257584, - "learning_rate": 0.00019999967709420928, - "loss": 46.0, - "step": 10590 - }, - { - "epoch": 0.8097559110805284, - "grad_norm": 0.0035905144177377224, - "learning_rate": 0.00019999967703316564, - "loss": 46.0, - "step": 10591 - }, - { - "epoch": 0.809832368063918, - "grad_norm": 0.00300415325909853, - "learning_rate": 0.00019999967697211624, - "loss": 46.0, - "step": 10592 - }, - { - "epoch": 0.8099088250473078, - "grad_norm": 0.000932666240260005, - "learning_rate": 0.00019999967691106105, - "loss": 46.0, - "step": 10593 - }, - { - "epoch": 0.8099852820306975, - "grad_norm": 0.0014116576639935374, - "learning_rate": 0.0001999996768500001, - "loss": 46.0, - "step": 10594 - }, - { - "epoch": 0.8100617390140872, - "grad_norm": 0.0007366169593296945, - "learning_rate": 0.00019999967678893337, - "loss": 46.0, - "step": 10595 - }, - { - "epoch": 0.8101381959974769, - "grad_norm": 0.002435180824249983, - "learning_rate": 0.0001999996767278609, - "loss": 46.0, - "step": 10596 - }, - { - "epoch": 0.8102146529808666, - "grad_norm": 0.0018935957923531532, - "learning_rate": 0.0001999996766667826, - "loss": 46.0, - "step": 10597 - }, - { - "epoch": 0.8102911099642564, - "grad_norm": 0.0006910284864716232, - "learning_rate": 0.00019999967660569857, - "loss": 46.0, - "step": 10598 - }, - { - "epoch": 0.8103675669476461, - "grad_norm": 0.001545571954920888, - "learning_rate": 0.0001999996765446088, - "loss": 46.0, - "step": 10599 - }, - { - "epoch": 0.8104440239310358, - "grad_norm": 0.000960881938226521, - "learning_rate": 0.00019999967648351322, - "loss": 46.0, - "step": 10600 - }, - { - "epoch": 0.8105204809144255, - "grad_norm": 0.0007635874208062887, - "learning_rate": 0.0001999996764224119, - "loss": 46.0, - "step": 10601 - }, - { - "epoch": 0.8105969378978153, - "grad_norm": 0.0011450700694695115, - "learning_rate": 0.0001999996763613048, - "loss": 46.0, - "step": 10602 - }, - { - "epoch": 0.8106733948812049, - "grad_norm": 0.002021638909354806, - "learning_rate": 0.0001999996763001919, - "loss": 46.0, - "step": 10603 - }, - { - "epoch": 0.8107498518645947, - "grad_norm": 0.0009661731892265379, - "learning_rate": 0.00019999967623907325, - "loss": 46.0, - "step": 10604 - }, - { - "epoch": 0.8108263088479845, - "grad_norm": 0.0011234754929319024, - "learning_rate": 0.00019999967617794883, - "loss": 46.0, - "step": 10605 - }, - { - "epoch": 0.8109027658313741, - "grad_norm": 0.0013114104513078928, - "learning_rate": 0.00019999967611681867, - "loss": 46.0, - "step": 10606 - }, - { - "epoch": 0.8109792228147639, - "grad_norm": 0.0027648042887449265, - "learning_rate": 0.00019999967605568268, - "loss": 46.0, - "step": 10607 - }, - { - "epoch": 0.8110556797981535, - "grad_norm": 0.008482534438371658, - "learning_rate": 0.00019999967599454096, - "loss": 46.0, - "step": 10608 - }, - { - "epoch": 0.8111321367815433, - "grad_norm": 0.000731699401512742, - "learning_rate": 0.00019999967593339348, - "loss": 46.0, - "step": 10609 - }, - { - "epoch": 0.811208593764933, - "grad_norm": 0.0006798803224228323, - "learning_rate": 0.00019999967587224022, - "loss": 46.0, - "step": 10610 - }, - { - "epoch": 0.8112850507483227, - "grad_norm": 0.0024888545740395784, - "learning_rate": 0.0001999996758110812, - "loss": 46.0, - "step": 10611 - }, - { - "epoch": 0.8113615077317125, - "grad_norm": 0.0004918433842249215, - "learning_rate": 0.00019999967574991639, - "loss": 46.0, - "step": 10612 - }, - { - "epoch": 0.8114379647151022, - "grad_norm": 0.0011814339086413383, - "learning_rate": 0.0001999996756887458, - "loss": 46.0, - "step": 10613 - }, - { - "epoch": 0.8115144216984919, - "grad_norm": 0.0029570814222097397, - "learning_rate": 0.00019999967562756948, - "loss": 46.0, - "step": 10614 - }, - { - "epoch": 0.8115908786818816, - "grad_norm": 0.0005436950013972819, - "learning_rate": 0.00019999967556638736, - "loss": 46.0, - "step": 10615 - }, - { - "epoch": 0.8116673356652713, - "grad_norm": 0.0006963139167055488, - "learning_rate": 0.00019999967550519946, - "loss": 46.0, - "step": 10616 - }, - { - "epoch": 0.811743792648661, - "grad_norm": 0.00921026524156332, - "learning_rate": 0.00019999967544400584, - "loss": 46.0, - "step": 10617 - }, - { - "epoch": 0.8118202496320508, - "grad_norm": 0.0007407678058370948, - "learning_rate": 0.0001999996753828064, - "loss": 46.0, - "step": 10618 - }, - { - "epoch": 0.8118967066154404, - "grad_norm": 0.0011966772144660354, - "learning_rate": 0.00019999967532160123, - "loss": 46.0, - "step": 10619 - }, - { - "epoch": 0.8119731635988302, - "grad_norm": 0.0012969464296475053, - "learning_rate": 0.00019999967526039027, - "loss": 46.0, - "step": 10620 - }, - { - "epoch": 0.81204962058222, - "grad_norm": 0.0015509049408137798, - "learning_rate": 0.00019999967519917356, - "loss": 46.0, - "step": 10621 - }, - { - "epoch": 0.8121260775656096, - "grad_norm": 0.0048314714804291725, - "learning_rate": 0.00019999967513795105, - "loss": 46.0, - "step": 10622 - }, - { - "epoch": 0.8122025345489994, - "grad_norm": 0.0007092577288858593, - "learning_rate": 0.00019999967507672276, - "loss": 46.0, - "step": 10623 - }, - { - "epoch": 0.8122789915323891, - "grad_norm": 0.0033081660512834787, - "learning_rate": 0.00019999967501548876, - "loss": 46.0, - "step": 10624 - }, - { - "epoch": 0.8123554485157788, - "grad_norm": 0.0013496287865564227, - "learning_rate": 0.00019999967495424895, - "loss": 46.0, - "step": 10625 - }, - { - "epoch": 0.8124319054991685, - "grad_norm": 0.003140041371807456, - "learning_rate": 0.00019999967489300335, - "loss": 46.0, - "step": 10626 - }, - { - "epoch": 0.8125083624825582, - "grad_norm": 0.000635093601886183, - "learning_rate": 0.00019999967483175202, - "loss": 46.0, - "step": 10627 - }, - { - "epoch": 0.812584819465948, - "grad_norm": 0.0047507272101938725, - "learning_rate": 0.0001999996747704949, - "loss": 46.0, - "step": 10628 - }, - { - "epoch": 0.8126612764493377, - "grad_norm": 0.0018666168907657266, - "learning_rate": 0.00019999967470923203, - "loss": 46.0, - "step": 10629 - }, - { - "epoch": 0.8127377334327274, - "grad_norm": 0.0006282232934609056, - "learning_rate": 0.00019999967464796336, - "loss": 46.0, - "step": 10630 - }, - { - "epoch": 0.8128141904161171, - "grad_norm": 0.0007475577294826508, - "learning_rate": 0.00019999967458668897, - "loss": 46.0, - "step": 10631 - }, - { - "epoch": 0.8128906473995069, - "grad_norm": 0.0014038359513506293, - "learning_rate": 0.00019999967452540875, - "loss": 46.0, - "step": 10632 - }, - { - "epoch": 0.8129671043828965, - "grad_norm": 0.0006167926476337016, - "learning_rate": 0.0001999996744641228, - "loss": 46.0, - "step": 10633 - }, - { - "epoch": 0.8130435613662863, - "grad_norm": 0.007281303871423006, - "learning_rate": 0.00019999967440283105, - "loss": 46.0, - "step": 10634 - }, - { - "epoch": 0.8131200183496761, - "grad_norm": 0.004556306637823582, - "learning_rate": 0.00019999967434153356, - "loss": 46.0, - "step": 10635 - }, - { - "epoch": 0.8131964753330657, - "grad_norm": 0.0009176505845971406, - "learning_rate": 0.0001999996742802303, - "loss": 46.0, - "step": 10636 - }, - { - "epoch": 0.8132729323164555, - "grad_norm": 0.0008387612760998309, - "learning_rate": 0.00019999967421892127, - "loss": 46.0, - "step": 10637 - }, - { - "epoch": 0.8133493892998451, - "grad_norm": 0.00030251938733272254, - "learning_rate": 0.00019999967415760644, - "loss": 46.0, - "step": 10638 - }, - { - "epoch": 0.8134258462832349, - "grad_norm": 0.003133848076686263, - "learning_rate": 0.00019999967409628586, - "loss": 46.0, - "step": 10639 - }, - { - "epoch": 0.8135023032666246, - "grad_norm": 0.001175959245301783, - "learning_rate": 0.00019999967403495954, - "loss": 46.0, - "step": 10640 - }, - { - "epoch": 0.8135787602500143, - "grad_norm": 0.0019240969559177756, - "learning_rate": 0.0001999996739736274, - "loss": 46.0, - "step": 10641 - }, - { - "epoch": 0.8136552172334041, - "grad_norm": 0.0012036896077916026, - "learning_rate": 0.00019999967391228952, - "loss": 46.0, - "step": 10642 - }, - { - "epoch": 0.8137316742167938, - "grad_norm": 0.0007384893833659589, - "learning_rate": 0.00019999967385094588, - "loss": 46.0, - "step": 10643 - }, - { - "epoch": 0.8138081312001835, - "grad_norm": 0.0023318736348301172, - "learning_rate": 0.00019999967378959646, - "loss": 46.0, - "step": 10644 - }, - { - "epoch": 0.8138845881835732, - "grad_norm": 0.003980925772339106, - "learning_rate": 0.00019999967372824124, - "loss": 46.0, - "step": 10645 - }, - { - "epoch": 0.8139610451669629, - "grad_norm": 0.0014317728346213698, - "learning_rate": 0.00019999967366688028, - "loss": 46.0, - "step": 10646 - }, - { - "epoch": 0.8140375021503526, - "grad_norm": 0.00195276306476444, - "learning_rate": 0.00019999967360551354, - "loss": 46.0, - "step": 10647 - }, - { - "epoch": 0.8141139591337424, - "grad_norm": 0.002367905108258128, - "learning_rate": 0.00019999967354414105, - "loss": 46.0, - "step": 10648 - }, - { - "epoch": 0.814190416117132, - "grad_norm": 0.00027691235300153494, - "learning_rate": 0.00019999967348276277, - "loss": 46.0, - "step": 10649 - }, - { - "epoch": 0.8142668731005218, - "grad_norm": 0.0026305727660655975, - "learning_rate": 0.00019999967342137874, - "loss": 46.0, - "step": 10650 - }, - { - "epoch": 0.8143433300839116, - "grad_norm": 0.008274825289845467, - "learning_rate": 0.00019999967335998894, - "loss": 46.0, - "step": 10651 - }, - { - "epoch": 0.8144197870673012, - "grad_norm": 0.0010693571530282497, - "learning_rate": 0.00019999967329859336, - "loss": 46.0, - "step": 10652 - }, - { - "epoch": 0.814496244050691, - "grad_norm": 0.0006609584670513868, - "learning_rate": 0.000199999673237192, - "loss": 46.0, - "step": 10653 - }, - { - "epoch": 0.8145727010340807, - "grad_norm": 0.0014914325438439846, - "learning_rate": 0.00019999967317578488, - "loss": 46.0, - "step": 10654 - }, - { - "epoch": 0.8146491580174704, - "grad_norm": 0.0011444612173363566, - "learning_rate": 0.000199999673114372, - "loss": 46.0, - "step": 10655 - }, - { - "epoch": 0.8147256150008602, - "grad_norm": 0.0005577148986048996, - "learning_rate": 0.00019999967305295332, - "loss": 46.0, - "step": 10656 - }, - { - "epoch": 0.8148020719842498, - "grad_norm": 0.0010017806198447943, - "learning_rate": 0.0001999996729915289, - "loss": 46.0, - "step": 10657 - }, - { - "epoch": 0.8148785289676396, - "grad_norm": 0.002900887979194522, - "learning_rate": 0.0001999996729300987, - "loss": 46.0, - "step": 10658 - }, - { - "epoch": 0.8149549859510293, - "grad_norm": 0.005152315832674503, - "learning_rate": 0.00019999967286866275, - "loss": 46.0, - "step": 10659 - }, - { - "epoch": 0.815031442934419, - "grad_norm": 0.0006522878538817167, - "learning_rate": 0.000199999672807221, - "loss": 46.0, - "step": 10660 - }, - { - "epoch": 0.8151078999178087, - "grad_norm": 0.0017765983939170837, - "learning_rate": 0.00019999967274577353, - "loss": 46.0, - "step": 10661 - }, - { - "epoch": 0.8151843569011985, - "grad_norm": 0.0010508804116398096, - "learning_rate": 0.00019999967268432024, - "loss": 46.0, - "step": 10662 - }, - { - "epoch": 0.8152608138845882, - "grad_norm": 0.0006718711229041219, - "learning_rate": 0.00019999967262286118, - "loss": 46.0, - "step": 10663 - }, - { - "epoch": 0.8153372708679779, - "grad_norm": 0.00036310870200395584, - "learning_rate": 0.0001999996725613964, - "loss": 46.0, - "step": 10664 - }, - { - "epoch": 0.8154137278513677, - "grad_norm": 0.0056223091669380665, - "learning_rate": 0.0001999996724999258, - "loss": 46.0, - "step": 10665 - }, - { - "epoch": 0.8154901848347573, - "grad_norm": 0.0005277669406495988, - "learning_rate": 0.00019999967243844945, - "loss": 46.0, - "step": 10666 - }, - { - "epoch": 0.8155666418181471, - "grad_norm": 0.006589485332369804, - "learning_rate": 0.00019999967237696733, - "loss": 46.0, - "step": 10667 - }, - { - "epoch": 0.8156430988015367, - "grad_norm": 0.0006965463981032372, - "learning_rate": 0.00019999967231547943, - "loss": 46.0, - "step": 10668 - }, - { - "epoch": 0.8157195557849265, - "grad_norm": 0.0004891663556918502, - "learning_rate": 0.00019999967225398576, - "loss": 46.0, - "step": 10669 - }, - { - "epoch": 0.8157960127683163, - "grad_norm": 0.0007956529152579606, - "learning_rate": 0.00019999967219248635, - "loss": 46.0, - "step": 10670 - }, - { - "epoch": 0.8158724697517059, - "grad_norm": 0.0005942249554209411, - "learning_rate": 0.00019999967213098113, - "loss": 46.0, - "step": 10671 - }, - { - "epoch": 0.8159489267350957, - "grad_norm": 0.0014059175737202168, - "learning_rate": 0.00019999967206947016, - "loss": 46.0, - "step": 10672 - }, - { - "epoch": 0.8160253837184854, - "grad_norm": 0.006083428859710693, - "learning_rate": 0.00019999967200795346, - "loss": 46.0, - "step": 10673 - }, - { - "epoch": 0.8161018407018751, - "grad_norm": 0.0006601167260669172, - "learning_rate": 0.00019999967194643095, - "loss": 46.0, - "step": 10674 - }, - { - "epoch": 0.8161782976852648, - "grad_norm": 0.0009683783282525837, - "learning_rate": 0.00019999967188490266, - "loss": 46.0, - "step": 10675 - }, - { - "epoch": 0.8162547546686546, - "grad_norm": 0.0008553531370125711, - "learning_rate": 0.00019999967182336863, - "loss": 46.0, - "step": 10676 - }, - { - "epoch": 0.8163312116520443, - "grad_norm": 0.0009982092306017876, - "learning_rate": 0.0001999996717618288, - "loss": 46.0, - "step": 10677 - }, - { - "epoch": 0.816407668635434, - "grad_norm": 0.008247574791312218, - "learning_rate": 0.00019999967170028322, - "loss": 46.0, - "step": 10678 - }, - { - "epoch": 0.8164841256188237, - "grad_norm": 0.00040816611726768315, - "learning_rate": 0.00019999967163873185, - "loss": 46.0, - "step": 10679 - }, - { - "epoch": 0.8165605826022134, - "grad_norm": 0.0005039381212554872, - "learning_rate": 0.00019999967157717475, - "loss": 46.0, - "step": 10680 - }, - { - "epoch": 0.8166370395856032, - "grad_norm": 0.0011511798948049545, - "learning_rate": 0.00019999967151561183, - "loss": 46.0, - "step": 10681 - }, - { - "epoch": 0.8167134965689928, - "grad_norm": 0.0011001338716596365, - "learning_rate": 0.00019999967145404316, - "loss": 46.0, - "step": 10682 - }, - { - "epoch": 0.8167899535523826, - "grad_norm": 0.0010085070971399546, - "learning_rate": 0.00019999967139246874, - "loss": 46.0, - "step": 10683 - }, - { - "epoch": 0.8168664105357724, - "grad_norm": 0.0016138849314302206, - "learning_rate": 0.00019999967133088855, - "loss": 46.0, - "step": 10684 - }, - { - "epoch": 0.816942867519162, - "grad_norm": 0.0036172098480165005, - "learning_rate": 0.0001999996712693026, - "loss": 46.0, - "step": 10685 - }, - { - "epoch": 0.8170193245025518, - "grad_norm": 0.005899055395275354, - "learning_rate": 0.00019999967120771082, - "loss": 46.0, - "step": 10686 - }, - { - "epoch": 0.8170957814859414, - "grad_norm": 0.002268416341394186, - "learning_rate": 0.00019999967114611331, - "loss": 46.0, - "step": 10687 - }, - { - "epoch": 0.8171722384693312, - "grad_norm": 0.0010936105391010642, - "learning_rate": 0.00019999967108451006, - "loss": 46.0, - "step": 10688 - }, - { - "epoch": 0.8172486954527209, - "grad_norm": 0.0028971240390092134, - "learning_rate": 0.00019999967102290098, - "loss": 46.0, - "step": 10689 - }, - { - "epoch": 0.8173251524361106, - "grad_norm": 0.0011245894711464643, - "learning_rate": 0.00019999967096128617, - "loss": 46.0, - "step": 10690 - }, - { - "epoch": 0.8174016094195004, - "grad_norm": 0.0011584118474274874, - "learning_rate": 0.0001999996708996656, - "loss": 46.0, - "step": 10691 - }, - { - "epoch": 0.8174780664028901, - "grad_norm": 0.0005201590829528868, - "learning_rate": 0.00019999967083803922, - "loss": 46.0, - "step": 10692 - }, - { - "epoch": 0.8175545233862798, - "grad_norm": 0.0021525018382817507, - "learning_rate": 0.00019999967077640712, - "loss": 46.0, - "step": 10693 - }, - { - "epoch": 0.8176309803696695, - "grad_norm": 0.0022422950714826584, - "learning_rate": 0.00019999967071476923, - "loss": 46.0, - "step": 10694 - }, - { - "epoch": 0.8177074373530593, - "grad_norm": 0.0012664056848734617, - "learning_rate": 0.00019999967065312556, - "loss": 46.0, - "step": 10695 - }, - { - "epoch": 0.8177838943364489, - "grad_norm": 0.001776279415935278, - "learning_rate": 0.00019999967059147611, - "loss": 46.0, - "step": 10696 - }, - { - "epoch": 0.8178603513198387, - "grad_norm": 0.00044295971747487783, - "learning_rate": 0.00019999967052982092, - "loss": 46.0, - "step": 10697 - }, - { - "epoch": 0.8179368083032283, - "grad_norm": 0.0013613871997222304, - "learning_rate": 0.00019999967046815993, - "loss": 46.0, - "step": 10698 - }, - { - "epoch": 0.8180132652866181, - "grad_norm": 0.007926327176392078, - "learning_rate": 0.0001999996704064932, - "loss": 46.0, - "step": 10699 - }, - { - "epoch": 0.8180897222700079, - "grad_norm": 0.0012601747876033187, - "learning_rate": 0.0001999996703448207, - "loss": 46.0, - "step": 10700 - }, - { - "epoch": 0.8181661792533975, - "grad_norm": 0.0035691813100129366, - "learning_rate": 0.0001999996702831424, - "loss": 46.0, - "step": 10701 - }, - { - "epoch": 0.8182426362367873, - "grad_norm": 0.001062513911165297, - "learning_rate": 0.00019999967022145832, - "loss": 46.0, - "step": 10702 - }, - { - "epoch": 0.818319093220177, - "grad_norm": 0.0005897298105992377, - "learning_rate": 0.00019999967015976852, - "loss": 46.0, - "step": 10703 - }, - { - "epoch": 0.8183955502035667, - "grad_norm": 0.0011999744456261396, - "learning_rate": 0.00019999967009807294, - "loss": 46.0, - "step": 10704 - }, - { - "epoch": 0.8184720071869565, - "grad_norm": 0.0008478070958517492, - "learning_rate": 0.0001999996700363716, - "loss": 46.0, - "step": 10705 - }, - { - "epoch": 0.8185484641703462, - "grad_norm": 0.0010699164122343063, - "learning_rate": 0.00019999966997466447, - "loss": 46.0, - "step": 10706 - }, - { - "epoch": 0.8186249211537359, - "grad_norm": 0.0023799254558980465, - "learning_rate": 0.00019999966991295155, - "loss": 46.0, - "step": 10707 - }, - { - "epoch": 0.8187013781371256, - "grad_norm": 0.0009910025401040912, - "learning_rate": 0.00019999966985123288, - "loss": 46.0, - "step": 10708 - }, - { - "epoch": 0.8187778351205153, - "grad_norm": 0.0004909580457024276, - "learning_rate": 0.00019999966978950844, - "loss": 46.0, - "step": 10709 - }, - { - "epoch": 0.818854292103905, - "grad_norm": 0.000413259316701442, - "learning_rate": 0.00019999966972777825, - "loss": 46.0, - "step": 10710 - }, - { - "epoch": 0.8189307490872948, - "grad_norm": 0.002241626149043441, - "learning_rate": 0.00019999966966604226, - "loss": 46.0, - "step": 10711 - }, - { - "epoch": 0.8190072060706844, - "grad_norm": 0.00330294668674469, - "learning_rate": 0.0001999996696043005, - "loss": 46.0, - "step": 10712 - }, - { - "epoch": 0.8190836630540742, - "grad_norm": 0.002269588178023696, - "learning_rate": 0.000199999669542553, - "loss": 46.0, - "step": 10713 - }, - { - "epoch": 0.819160120037464, - "grad_norm": 0.0020843122620135546, - "learning_rate": 0.0001999996694807997, - "loss": 46.0, - "step": 10714 - }, - { - "epoch": 0.8192365770208536, - "grad_norm": 0.001133975456468761, - "learning_rate": 0.00019999966941904065, - "loss": 46.0, - "step": 10715 - }, - { - "epoch": 0.8193130340042434, - "grad_norm": 0.0030970703810453415, - "learning_rate": 0.00019999966935727584, - "loss": 46.0, - "step": 10716 - }, - { - "epoch": 0.819389490987633, - "grad_norm": 0.0015342723345384002, - "learning_rate": 0.00019999966929550524, - "loss": 46.0, - "step": 10717 - }, - { - "epoch": 0.8194659479710228, - "grad_norm": 0.0004633585922420025, - "learning_rate": 0.0001999996692337289, - "loss": 46.0, - "step": 10718 - }, - { - "epoch": 0.8195424049544126, - "grad_norm": 0.001683680689893663, - "learning_rate": 0.00019999966917194674, - "loss": 46.0, - "step": 10719 - }, - { - "epoch": 0.8196188619378022, - "grad_norm": 0.0003087381483055651, - "learning_rate": 0.00019999966911015885, - "loss": 46.0, - "step": 10720 - }, - { - "epoch": 0.819695318921192, - "grad_norm": 0.001977110281586647, - "learning_rate": 0.00019999966904836518, - "loss": 46.0, - "step": 10721 - }, - { - "epoch": 0.8197717759045817, - "grad_norm": 0.01460284274071455, - "learning_rate": 0.00019999966898656574, - "loss": 46.0, - "step": 10722 - }, - { - "epoch": 0.8198482328879714, - "grad_norm": 0.0020444500260055065, - "learning_rate": 0.00019999966892476052, - "loss": 46.0, - "step": 10723 - }, - { - "epoch": 0.8199246898713611, - "grad_norm": 0.0009977342560887337, - "learning_rate": 0.00019999966886294956, - "loss": 46.0, - "step": 10724 - }, - { - "epoch": 0.8200011468547509, - "grad_norm": 0.0009037595009431243, - "learning_rate": 0.0001999996688011328, - "loss": 46.0, - "step": 10725 - }, - { - "epoch": 0.8200776038381405, - "grad_norm": 0.0007984795374795794, - "learning_rate": 0.0001999996687393103, - "loss": 46.0, - "step": 10726 - }, - { - "epoch": 0.8201540608215303, - "grad_norm": 0.0008109110058285296, - "learning_rate": 0.00019999966867748198, - "loss": 46.0, - "step": 10727 - }, - { - "epoch": 0.82023051780492, - "grad_norm": 0.0009472433011978865, - "learning_rate": 0.00019999966861564795, - "loss": 46.0, - "step": 10728 - }, - { - "epoch": 0.8203069747883097, - "grad_norm": 0.00224841246381402, - "learning_rate": 0.0001999996685538081, - "loss": 46.0, - "step": 10729 - }, - { - "epoch": 0.8203834317716995, - "grad_norm": 0.00199357932433486, - "learning_rate": 0.0001999996684919625, - "loss": 46.0, - "step": 10730 - }, - { - "epoch": 0.8204598887550891, - "grad_norm": 0.013042778708040714, - "learning_rate": 0.00019999966843011114, - "loss": 46.0, - "step": 10731 - }, - { - "epoch": 0.8205363457384789, - "grad_norm": 0.0010406305082142353, - "learning_rate": 0.00019999966836825402, - "loss": 46.0, - "step": 10732 - }, - { - "epoch": 0.8206128027218687, - "grad_norm": 0.000800828798674047, - "learning_rate": 0.00019999966830639112, - "loss": 46.0, - "step": 10733 - }, - { - "epoch": 0.8206892597052583, - "grad_norm": 0.0023250156082212925, - "learning_rate": 0.00019999966824452243, - "loss": 46.0, - "step": 10734 - }, - { - "epoch": 0.8207657166886481, - "grad_norm": 0.0008186423219740391, - "learning_rate": 0.00019999966818264799, - "loss": 46.0, - "step": 10735 - }, - { - "epoch": 0.8208421736720378, - "grad_norm": 0.0007703353185206652, - "learning_rate": 0.00019999966812076777, - "loss": 46.0, - "step": 10736 - }, - { - "epoch": 0.8209186306554275, - "grad_norm": 0.0005793662858195603, - "learning_rate": 0.0001999996680588818, - "loss": 46.0, - "step": 10737 - }, - { - "epoch": 0.8209950876388172, - "grad_norm": 0.0016379449516534805, - "learning_rate": 0.00019999966799699005, - "loss": 46.0, - "step": 10738 - }, - { - "epoch": 0.8210715446222069, - "grad_norm": 0.0008216226706281304, - "learning_rate": 0.00019999966793509254, - "loss": 46.0, - "step": 10739 - }, - { - "epoch": 0.8211480016055966, - "grad_norm": 0.0015827440656721592, - "learning_rate": 0.00019999966787318926, - "loss": 46.0, - "step": 10740 - }, - { - "epoch": 0.8212244585889864, - "grad_norm": 0.0004435455775819719, - "learning_rate": 0.0001999996678112802, - "loss": 46.0, - "step": 10741 - }, - { - "epoch": 0.8213009155723761, - "grad_norm": 0.003094179555773735, - "learning_rate": 0.00019999966774936537, - "loss": 46.0, - "step": 10742 - }, - { - "epoch": 0.8213773725557658, - "grad_norm": 0.005434252321720123, - "learning_rate": 0.00019999966768744477, - "loss": 46.0, - "step": 10743 - }, - { - "epoch": 0.8214538295391556, - "grad_norm": 0.0007276431424543262, - "learning_rate": 0.0001999996676255184, - "loss": 46.0, - "step": 10744 - }, - { - "epoch": 0.8215302865225452, - "grad_norm": 0.015207141637802124, - "learning_rate": 0.00019999966756358624, - "loss": 46.0, - "step": 10745 - }, - { - "epoch": 0.821606743505935, - "grad_norm": 0.0004469765699468553, - "learning_rate": 0.00019999966750164835, - "loss": 46.0, - "step": 10746 - }, - { - "epoch": 0.8216832004893246, - "grad_norm": 0.0018683691741898656, - "learning_rate": 0.00019999966743970465, - "loss": 46.0, - "step": 10747 - }, - { - "epoch": 0.8217596574727144, - "grad_norm": 0.0024904152378439903, - "learning_rate": 0.0001999996673777552, - "loss": 46.0, - "step": 10748 - }, - { - "epoch": 0.8218361144561042, - "grad_norm": 0.0008083500433713198, - "learning_rate": 0.0001999996673158, - "loss": 46.0, - "step": 10749 - }, - { - "epoch": 0.8219125714394938, - "grad_norm": 0.0014286641962826252, - "learning_rate": 0.00019999966725383904, - "loss": 46.0, - "step": 10750 - }, - { - "epoch": 0.8219890284228836, - "grad_norm": 0.002709595952183008, - "learning_rate": 0.00019999966719187227, - "loss": 46.0, - "step": 10751 - }, - { - "epoch": 0.8220654854062733, - "grad_norm": 0.0018716215854510665, - "learning_rate": 0.00019999966712989974, - "loss": 46.0, - "step": 10752 - }, - { - "epoch": 0.822141942389663, - "grad_norm": 0.0027791548054665327, - "learning_rate": 0.00019999966706792143, - "loss": 46.0, - "step": 10753 - }, - { - "epoch": 0.8222183993730527, - "grad_norm": 0.0019235272193327546, - "learning_rate": 0.00019999966700593737, - "loss": 46.0, - "step": 10754 - }, - { - "epoch": 0.8222948563564425, - "grad_norm": 0.0007547183195129037, - "learning_rate": 0.00019999966694394757, - "loss": 46.0, - "step": 10755 - }, - { - "epoch": 0.8223713133398322, - "grad_norm": 0.0007512698648497462, - "learning_rate": 0.00019999966688195197, - "loss": 46.0, - "step": 10756 - }, - { - "epoch": 0.8224477703232219, - "grad_norm": 0.0010405313223600388, - "learning_rate": 0.0001999996668199506, - "loss": 46.0, - "step": 10757 - }, - { - "epoch": 0.8225242273066116, - "grad_norm": 0.0031745682936161757, - "learning_rate": 0.00019999966675794345, - "loss": 46.0, - "step": 10758 - }, - { - "epoch": 0.8226006842900013, - "grad_norm": 0.002374935895204544, - "learning_rate": 0.00019999966669593055, - "loss": 46.0, - "step": 10759 - }, - { - "epoch": 0.8226771412733911, - "grad_norm": 0.0010144548723474145, - "learning_rate": 0.00019999966663391186, - "loss": 46.0, - "step": 10760 - }, - { - "epoch": 0.8227535982567807, - "grad_norm": 0.005825607106089592, - "learning_rate": 0.00019999966657188742, - "loss": 46.0, - "step": 10761 - }, - { - "epoch": 0.8228300552401705, - "grad_norm": 0.0011752373538911343, - "learning_rate": 0.0001999996665098572, - "loss": 46.0, - "step": 10762 - }, - { - "epoch": 0.8229065122235603, - "grad_norm": 0.002835820661857724, - "learning_rate": 0.0001999996664478212, - "loss": 46.0, - "step": 10763 - }, - { - "epoch": 0.8229829692069499, - "grad_norm": 0.0052048806101083755, - "learning_rate": 0.00019999966638577945, - "loss": 46.0, - "step": 10764 - }, - { - "epoch": 0.8230594261903397, - "grad_norm": 0.0040244534611701965, - "learning_rate": 0.00019999966632373192, - "loss": 46.0, - "step": 10765 - }, - { - "epoch": 0.8231358831737294, - "grad_norm": 0.0019150939770042896, - "learning_rate": 0.00019999966626167863, - "loss": 46.0, - "step": 10766 - }, - { - "epoch": 0.8232123401571191, - "grad_norm": 0.005140490364283323, - "learning_rate": 0.00019999966619961955, - "loss": 46.0, - "step": 10767 - }, - { - "epoch": 0.8232887971405088, - "grad_norm": 0.0002480422263033688, - "learning_rate": 0.00019999966613755472, - "loss": 46.0, - "step": 10768 - }, - { - "epoch": 0.8233652541238985, - "grad_norm": 0.001252923859283328, - "learning_rate": 0.00019999966607548412, - "loss": 46.0, - "step": 10769 - }, - { - "epoch": 0.8234417111072883, - "grad_norm": 0.013171762228012085, - "learning_rate": 0.00019999966601340775, - "loss": 46.0, - "step": 10770 - }, - { - "epoch": 0.823518168090678, - "grad_norm": 0.0011907482985407114, - "learning_rate": 0.0001999996659513256, - "loss": 46.0, - "step": 10771 - }, - { - "epoch": 0.8235946250740677, - "grad_norm": 0.004106555134057999, - "learning_rate": 0.0001999996658892377, - "loss": 46.0, - "step": 10772 - }, - { - "epoch": 0.8236710820574574, - "grad_norm": 0.0010761055164039135, - "learning_rate": 0.000199999665827144, - "loss": 46.0, - "step": 10773 - }, - { - "epoch": 0.8237475390408472, - "grad_norm": 0.0006391893839463592, - "learning_rate": 0.00019999966576504457, - "loss": 46.0, - "step": 10774 - }, - { - "epoch": 0.8238239960242368, - "grad_norm": 0.00033305137185379863, - "learning_rate": 0.00019999966570293935, - "loss": 46.0, - "step": 10775 - }, - { - "epoch": 0.8239004530076266, - "grad_norm": 0.0011488631134852767, - "learning_rate": 0.00019999966564082837, - "loss": 46.0, - "step": 10776 - }, - { - "epoch": 0.8239769099910164, - "grad_norm": 0.0009050683584064245, - "learning_rate": 0.0001999996655787116, - "loss": 46.0, - "step": 10777 - }, - { - "epoch": 0.824053366974406, - "grad_norm": 0.00082078215200454, - "learning_rate": 0.0001999996655165891, - "loss": 46.0, - "step": 10778 - }, - { - "epoch": 0.8241298239577958, - "grad_norm": 0.0022648824378848076, - "learning_rate": 0.0001999996654544608, - "loss": 46.0, - "step": 10779 - }, - { - "epoch": 0.8242062809411854, - "grad_norm": 0.012408829294145107, - "learning_rate": 0.0001999996653923267, - "loss": 46.0, - "step": 10780 - }, - { - "epoch": 0.8242827379245752, - "grad_norm": 0.001504181302152574, - "learning_rate": 0.00019999966533018685, - "loss": 46.0, - "step": 10781 - }, - { - "epoch": 0.8243591949079649, - "grad_norm": 0.0017542120767757297, - "learning_rate": 0.00019999966526804125, - "loss": 46.0, - "step": 10782 - }, - { - "epoch": 0.8244356518913546, - "grad_norm": 0.002071010647341609, - "learning_rate": 0.00019999966520588988, - "loss": 46.0, - "step": 10783 - }, - { - "epoch": 0.8245121088747444, - "grad_norm": 0.001121000968851149, - "learning_rate": 0.00019999966514373273, - "loss": 46.0, - "step": 10784 - }, - { - "epoch": 0.8245885658581341, - "grad_norm": 0.00761817954480648, - "learning_rate": 0.0001999996650815698, - "loss": 46.0, - "step": 10785 - }, - { - "epoch": 0.8246650228415238, - "grad_norm": 0.00100034533534199, - "learning_rate": 0.00019999966501940114, - "loss": 46.0, - "step": 10786 - }, - { - "epoch": 0.8247414798249135, - "grad_norm": 0.0016872057458385825, - "learning_rate": 0.0001999996649572267, - "loss": 46.0, - "step": 10787 - }, - { - "epoch": 0.8248179368083032, - "grad_norm": 0.0004919639322906733, - "learning_rate": 0.00019999966489504646, - "loss": 46.0, - "step": 10788 - }, - { - "epoch": 0.8248943937916929, - "grad_norm": 0.003121000248938799, - "learning_rate": 0.00019999966483286047, - "loss": 46.0, - "step": 10789 - }, - { - "epoch": 0.8249708507750827, - "grad_norm": 0.0013548723654821515, - "learning_rate": 0.0001999996647706687, - "loss": 46.0, - "step": 10790 - }, - { - "epoch": 0.8250473077584723, - "grad_norm": 0.001999072963371873, - "learning_rate": 0.00019999966470847117, - "loss": 46.0, - "step": 10791 - }, - { - "epoch": 0.8251237647418621, - "grad_norm": 0.0009184314985759556, - "learning_rate": 0.0001999996646462679, - "loss": 46.0, - "step": 10792 - }, - { - "epoch": 0.8252002217252519, - "grad_norm": 0.0008142598089762032, - "learning_rate": 0.0001999996645840588, - "loss": 46.0, - "step": 10793 - }, - { - "epoch": 0.8252766787086415, - "grad_norm": 0.00563464593142271, - "learning_rate": 0.00019999966452184399, - "loss": 46.0, - "step": 10794 - }, - { - "epoch": 0.8253531356920313, - "grad_norm": 0.0011567280162125826, - "learning_rate": 0.00019999966445962336, - "loss": 46.0, - "step": 10795 - }, - { - "epoch": 0.825429592675421, - "grad_norm": 0.001093081315048039, - "learning_rate": 0.00019999966439739698, - "loss": 46.0, - "step": 10796 - }, - { - "epoch": 0.8255060496588107, - "grad_norm": 0.011824234388768673, - "learning_rate": 0.00019999966433516484, - "loss": 46.0, - "step": 10797 - }, - { - "epoch": 0.8255825066422005, - "grad_norm": 0.00643664225935936, - "learning_rate": 0.00019999966427292694, - "loss": 46.0, - "step": 10798 - }, - { - "epoch": 0.8256589636255901, - "grad_norm": 0.001147162402048707, - "learning_rate": 0.00019999966421068325, - "loss": 46.0, - "step": 10799 - }, - { - "epoch": 0.8257354206089799, - "grad_norm": 0.0072053843177855015, - "learning_rate": 0.00019999966414843375, - "loss": 46.0, - "step": 10800 - }, - { - "epoch": 0.8258118775923696, - "grad_norm": 0.0027590978424996138, - "learning_rate": 0.00019999966408617854, - "loss": 46.0, - "step": 10801 - }, - { - "epoch": 0.8258883345757593, - "grad_norm": 0.0019405200146138668, - "learning_rate": 0.00019999966402391755, - "loss": 46.0, - "step": 10802 - }, - { - "epoch": 0.825964791559149, - "grad_norm": 0.0010096848709508777, - "learning_rate": 0.0001999996639616508, - "loss": 46.0, - "step": 10803 - }, - { - "epoch": 0.8260412485425388, - "grad_norm": 0.0007796257850714028, - "learning_rate": 0.00019999966389937828, - "loss": 46.0, - "step": 10804 - }, - { - "epoch": 0.8261177055259284, - "grad_norm": 0.003733782097697258, - "learning_rate": 0.00019999966383709995, - "loss": 46.0, - "step": 10805 - }, - { - "epoch": 0.8261941625093182, - "grad_norm": 0.001648182631470263, - "learning_rate": 0.00019999966377481587, - "loss": 46.0, - "step": 10806 - }, - { - "epoch": 0.826270619492708, - "grad_norm": 0.0006677035125903785, - "learning_rate": 0.00019999966371252604, - "loss": 46.0, - "step": 10807 - }, - { - "epoch": 0.8263470764760976, - "grad_norm": 0.0012601150665432215, - "learning_rate": 0.0001999996636502304, - "loss": 46.0, - "step": 10808 - }, - { - "epoch": 0.8264235334594874, - "grad_norm": 0.0005845642881467938, - "learning_rate": 0.00019999966358792907, - "loss": 46.0, - "step": 10809 - }, - { - "epoch": 0.826499990442877, - "grad_norm": 0.0007311388035304844, - "learning_rate": 0.00019999966352562192, - "loss": 46.0, - "step": 10810 - }, - { - "epoch": 0.8265764474262668, - "grad_norm": 0.0030390354804694653, - "learning_rate": 0.00019999966346330897, - "loss": 46.0, - "step": 10811 - }, - { - "epoch": 0.8266529044096566, - "grad_norm": 0.0011061836266890168, - "learning_rate": 0.00019999966340099028, - "loss": 46.0, - "step": 10812 - }, - { - "epoch": 0.8267293613930462, - "grad_norm": 0.0008378637721762061, - "learning_rate": 0.0001999996633386658, - "loss": 46.0, - "step": 10813 - }, - { - "epoch": 0.826805818376436, - "grad_norm": 0.001671507372520864, - "learning_rate": 0.0001999996632763356, - "loss": 46.0, - "step": 10814 - }, - { - "epoch": 0.8268822753598257, - "grad_norm": 0.0006811353377997875, - "learning_rate": 0.0001999996632139996, - "loss": 46.0, - "step": 10815 - }, - { - "epoch": 0.8269587323432154, - "grad_norm": 0.0005022775731049478, - "learning_rate": 0.00019999966315165785, - "loss": 46.0, - "step": 10816 - }, - { - "epoch": 0.8270351893266051, - "grad_norm": 0.002233087783679366, - "learning_rate": 0.00019999966308931031, - "loss": 46.0, - "step": 10817 - }, - { - "epoch": 0.8271116463099948, - "grad_norm": 0.0010761056328192353, - "learning_rate": 0.00019999966302695698, - "loss": 46.0, - "step": 10818 - }, - { - "epoch": 0.8271881032933845, - "grad_norm": 0.0008984995656646788, - "learning_rate": 0.00019999966296459793, - "loss": 46.0, - "step": 10819 - }, - { - "epoch": 0.8272645602767743, - "grad_norm": 0.0017700594617053866, - "learning_rate": 0.00019999966290223307, - "loss": 46.0, - "step": 10820 - }, - { - "epoch": 0.827341017260164, - "grad_norm": 0.0027520039584487677, - "learning_rate": 0.00019999966283986247, - "loss": 46.0, - "step": 10821 - }, - { - "epoch": 0.8274174742435537, - "grad_norm": 0.0024779317900538445, - "learning_rate": 0.0001999996627774861, - "loss": 46.0, - "step": 10822 - }, - { - "epoch": 0.8274939312269435, - "grad_norm": 0.0005166740738786757, - "learning_rate": 0.00019999966271510393, - "loss": 46.0, - "step": 10823 - }, - { - "epoch": 0.8275703882103331, - "grad_norm": 0.0012331565376371145, - "learning_rate": 0.000199999662652716, - "loss": 46.0, - "step": 10824 - }, - { - "epoch": 0.8276468451937229, - "grad_norm": 0.0009226638940162957, - "learning_rate": 0.0001999996625903223, - "loss": 46.0, - "step": 10825 - }, - { - "epoch": 0.8277233021771127, - "grad_norm": 0.0009670732542872429, - "learning_rate": 0.00019999966252792284, - "loss": 46.0, - "step": 10826 - }, - { - "epoch": 0.8277997591605023, - "grad_norm": 0.00636687595397234, - "learning_rate": 0.00019999966246551763, - "loss": 46.0, - "step": 10827 - }, - { - "epoch": 0.8278762161438921, - "grad_norm": 0.003914323635399342, - "learning_rate": 0.00019999966240310664, - "loss": 46.0, - "step": 10828 - }, - { - "epoch": 0.8279526731272817, - "grad_norm": 0.0007384219206869602, - "learning_rate": 0.00019999966234068986, - "loss": 46.0, - "step": 10829 - }, - { - "epoch": 0.8280291301106715, - "grad_norm": 0.0010801958851516247, - "learning_rate": 0.0001999996622782673, - "loss": 46.0, - "step": 10830 - }, - { - "epoch": 0.8281055870940612, - "grad_norm": 0.0011917497031390667, - "learning_rate": 0.00019999966221583902, - "loss": 46.0, - "step": 10831 - }, - { - "epoch": 0.8281820440774509, - "grad_norm": 0.006292206700891256, - "learning_rate": 0.0001999996621534049, - "loss": 46.0, - "step": 10832 - }, - { - "epoch": 0.8282585010608406, - "grad_norm": 0.006909584626555443, - "learning_rate": 0.00019999966209096508, - "loss": 46.0, - "step": 10833 - }, - { - "epoch": 0.8283349580442304, - "grad_norm": 0.00443050405010581, - "learning_rate": 0.00019999966202851948, - "loss": 46.0, - "step": 10834 - }, - { - "epoch": 0.8284114150276201, - "grad_norm": 0.002551465993747115, - "learning_rate": 0.00019999966196606808, - "loss": 46.0, - "step": 10835 - }, - { - "epoch": 0.8284878720110098, - "grad_norm": 0.00040333790821023285, - "learning_rate": 0.00019999966190361094, - "loss": 46.0, - "step": 10836 - }, - { - "epoch": 0.8285643289943996, - "grad_norm": 0.0009858980774879456, - "learning_rate": 0.000199999661841148, - "loss": 46.0, - "step": 10837 - }, - { - "epoch": 0.8286407859777892, - "grad_norm": 0.0018890650244429708, - "learning_rate": 0.0001999996617786793, - "loss": 46.0, - "step": 10838 - }, - { - "epoch": 0.828717242961179, - "grad_norm": 0.0012757593067362905, - "learning_rate": 0.00019999966171620483, - "loss": 46.0, - "step": 10839 - }, - { - "epoch": 0.8287936999445686, - "grad_norm": 0.0037090429104864597, - "learning_rate": 0.00019999966165372461, - "loss": 46.0, - "step": 10840 - }, - { - "epoch": 0.8288701569279584, - "grad_norm": 0.00043109917896799743, - "learning_rate": 0.0001999996615912386, - "loss": 46.0, - "step": 10841 - }, - { - "epoch": 0.8289466139113482, - "grad_norm": 0.0008832283201627433, - "learning_rate": 0.00019999966152874684, - "loss": 46.0, - "step": 10842 - }, - { - "epoch": 0.8290230708947378, - "grad_norm": 0.005266947206109762, - "learning_rate": 0.00019999966146624928, - "loss": 46.0, - "step": 10843 - }, - { - "epoch": 0.8290995278781276, - "grad_norm": 0.0007477087783627212, - "learning_rate": 0.00019999966140374597, - "loss": 46.0, - "step": 10844 - }, - { - "epoch": 0.8291759848615173, - "grad_norm": 0.0017315337900072336, - "learning_rate": 0.0001999996613412369, - "loss": 46.0, - "step": 10845 - }, - { - "epoch": 0.829252441844907, - "grad_norm": 0.00042855628998950124, - "learning_rate": 0.00019999966127872204, - "loss": 46.0, - "step": 10846 - }, - { - "epoch": 0.8293288988282967, - "grad_norm": 0.002416914328932762, - "learning_rate": 0.00019999966121620144, - "loss": 46.0, - "step": 10847 - }, - { - "epoch": 0.8294053558116864, - "grad_norm": 0.0008425017585977912, - "learning_rate": 0.00019999966115367504, - "loss": 46.0, - "step": 10848 - }, - { - "epoch": 0.8294818127950762, - "grad_norm": 0.0006613319274038076, - "learning_rate": 0.00019999966109114287, - "loss": 46.0, - "step": 10849 - }, - { - "epoch": 0.8295582697784659, - "grad_norm": 0.0006813930813223124, - "learning_rate": 0.00019999966102860495, - "loss": 46.0, - "step": 10850 - }, - { - "epoch": 0.8296347267618556, - "grad_norm": 0.0009660691721364856, - "learning_rate": 0.00019999966096606126, - "loss": 46.0, - "step": 10851 - }, - { - "epoch": 0.8297111837452453, - "grad_norm": 0.001846193103119731, - "learning_rate": 0.0001999996609035118, - "loss": 46.0, - "step": 10852 - }, - { - "epoch": 0.8297876407286351, - "grad_norm": 0.0004581077373586595, - "learning_rate": 0.00019999966084095655, - "loss": 46.0, - "step": 10853 - }, - { - "epoch": 0.8298640977120247, - "grad_norm": 0.0007270847563631833, - "learning_rate": 0.00019999966077839556, - "loss": 46.0, - "step": 10854 - }, - { - "epoch": 0.8299405546954145, - "grad_norm": 0.003207017667591572, - "learning_rate": 0.00019999966071582875, - "loss": 46.0, - "step": 10855 - }, - { - "epoch": 0.8300170116788043, - "grad_norm": 0.0007058214396238327, - "learning_rate": 0.00019999966065325624, - "loss": 46.0, - "step": 10856 - }, - { - "epoch": 0.8300934686621939, - "grad_norm": 0.0005304698133841157, - "learning_rate": 0.00019999966059067788, - "loss": 46.0, - "step": 10857 - }, - { - "epoch": 0.8301699256455837, - "grad_norm": 0.0007276951801031828, - "learning_rate": 0.00019999966052809383, - "loss": 46.0, - "step": 10858 - }, - { - "epoch": 0.8302463826289733, - "grad_norm": 0.014067895710468292, - "learning_rate": 0.00019999966046550398, - "loss": 46.0, - "step": 10859 - }, - { - "epoch": 0.8303228396123631, - "grad_norm": 0.0038100602105259895, - "learning_rate": 0.00019999966040290835, - "loss": 46.0, - "step": 10860 - }, - { - "epoch": 0.8303992965957528, - "grad_norm": 0.0006950997631065547, - "learning_rate": 0.00019999966034030698, - "loss": 46.0, - "step": 10861 - }, - { - "epoch": 0.8304757535791425, - "grad_norm": 0.0005952907376922667, - "learning_rate": 0.0001999996602776998, - "loss": 46.0, - "step": 10862 - }, - { - "epoch": 0.8305522105625323, - "grad_norm": 0.0015130196698009968, - "learning_rate": 0.00019999966021508686, - "loss": 46.0, - "step": 10863 - }, - { - "epoch": 0.830628667545922, - "grad_norm": 0.001703130081295967, - "learning_rate": 0.00019999966015246817, - "loss": 46.0, - "step": 10864 - }, - { - "epoch": 0.8307051245293117, - "grad_norm": 0.0018298194045200944, - "learning_rate": 0.0001999996600898437, - "loss": 46.0, - "step": 10865 - }, - { - "epoch": 0.8307815815127014, - "grad_norm": 0.000880430219694972, - "learning_rate": 0.00019999966002721346, - "loss": 46.0, - "step": 10866 - }, - { - "epoch": 0.8308580384960912, - "grad_norm": 0.0017140473937615752, - "learning_rate": 0.00019999965996457745, - "loss": 46.0, - "step": 10867 - }, - { - "epoch": 0.8309344954794808, - "grad_norm": 0.002834649058058858, - "learning_rate": 0.0001999996599019357, - "loss": 46.0, - "step": 10868 - }, - { - "epoch": 0.8310109524628706, - "grad_norm": 0.0008349040290340781, - "learning_rate": 0.00019999965983928816, - "loss": 46.0, - "step": 10869 - }, - { - "epoch": 0.8310874094462603, - "grad_norm": 0.0014007369754835963, - "learning_rate": 0.00019999965977663482, - "loss": 46.0, - "step": 10870 - }, - { - "epoch": 0.83116386642965, - "grad_norm": 0.0007154003251343966, - "learning_rate": 0.00019999965971397572, - "loss": 46.0, - "step": 10871 - }, - { - "epoch": 0.8312403234130398, - "grad_norm": 0.0011900861281901598, - "learning_rate": 0.00019999965965131086, - "loss": 46.0, - "step": 10872 - }, - { - "epoch": 0.8313167803964294, - "grad_norm": 0.0006056425627321005, - "learning_rate": 0.00019999965958864024, - "loss": 46.0, - "step": 10873 - }, - { - "epoch": 0.8313932373798192, - "grad_norm": 0.001429014722816646, - "learning_rate": 0.00019999965952596384, - "loss": 46.0, - "step": 10874 - }, - { - "epoch": 0.8314696943632089, - "grad_norm": 0.001696994062513113, - "learning_rate": 0.00019999965946328167, - "loss": 46.0, - "step": 10875 - }, - { - "epoch": 0.8315461513465986, - "grad_norm": 0.0015285846311599016, - "learning_rate": 0.00019999965940059375, - "loss": 46.0, - "step": 10876 - }, - { - "epoch": 0.8316226083299884, - "grad_norm": 0.014070739969611168, - "learning_rate": 0.00019999965933790005, - "loss": 46.0, - "step": 10877 - }, - { - "epoch": 0.831699065313378, - "grad_norm": 0.0012685131514444947, - "learning_rate": 0.00019999965927520056, - "loss": 46.0, - "step": 10878 - }, - { - "epoch": 0.8317755222967678, - "grad_norm": 0.001588876242749393, - "learning_rate": 0.00019999965921249535, - "loss": 46.0, - "step": 10879 - }, - { - "epoch": 0.8318519792801575, - "grad_norm": 0.0022325317841023207, - "learning_rate": 0.00019999965914978434, - "loss": 46.0, - "step": 10880 - }, - { - "epoch": 0.8319284362635472, - "grad_norm": 0.0030568428337574005, - "learning_rate": 0.00019999965908706755, - "loss": 46.0, - "step": 10881 - }, - { - "epoch": 0.8320048932469369, - "grad_norm": 0.0018602528143674135, - "learning_rate": 0.00019999965902434502, - "loss": 46.0, - "step": 10882 - }, - { - "epoch": 0.8320813502303267, - "grad_norm": 0.003781463485211134, - "learning_rate": 0.00019999965896161669, - "loss": 46.0, - "step": 10883 - }, - { - "epoch": 0.8321578072137163, - "grad_norm": 0.001798297162167728, - "learning_rate": 0.00019999965889888258, - "loss": 46.0, - "step": 10884 - }, - { - "epoch": 0.8322342641971061, - "grad_norm": 0.001416568411514163, - "learning_rate": 0.00019999965883614273, - "loss": 46.0, - "step": 10885 - }, - { - "epoch": 0.8323107211804959, - "grad_norm": 0.0013424010248854756, - "learning_rate": 0.0001999996587733971, - "loss": 46.0, - "step": 10886 - }, - { - "epoch": 0.8323871781638855, - "grad_norm": 0.0008791974978521466, - "learning_rate": 0.00019999965871064573, - "loss": 46.0, - "step": 10887 - }, - { - "epoch": 0.8324636351472753, - "grad_norm": 0.0017793815350160003, - "learning_rate": 0.00019999965864788856, - "loss": 46.0, - "step": 10888 - }, - { - "epoch": 0.8325400921306649, - "grad_norm": 0.0031849441584199667, - "learning_rate": 0.00019999965858512558, - "loss": 46.0, - "step": 10889 - }, - { - "epoch": 0.8326165491140547, - "grad_norm": 0.0033617066219449043, - "learning_rate": 0.00019999965852235692, - "loss": 46.0, - "step": 10890 - }, - { - "epoch": 0.8326930060974445, - "grad_norm": 0.0019287305185571313, - "learning_rate": 0.00019999965845958243, - "loss": 46.0, - "step": 10891 - }, - { - "epoch": 0.8327694630808341, - "grad_norm": 0.0017537970561534166, - "learning_rate": 0.0001999996583968022, - "loss": 46.0, - "step": 10892 - }, - { - "epoch": 0.8328459200642239, - "grad_norm": 0.0013972556916996837, - "learning_rate": 0.00019999965833401617, - "loss": 46.0, - "step": 10893 - }, - { - "epoch": 0.8329223770476136, - "grad_norm": 0.002614380093291402, - "learning_rate": 0.0001999996582712244, - "loss": 46.0, - "step": 10894 - }, - { - "epoch": 0.8329988340310033, - "grad_norm": 0.0013157302746549249, - "learning_rate": 0.00019999965820842683, - "loss": 46.0, - "step": 10895 - }, - { - "epoch": 0.833075291014393, - "grad_norm": 0.003121002810075879, - "learning_rate": 0.00019999965814562352, - "loss": 46.0, - "step": 10896 - }, - { - "epoch": 0.8331517479977828, - "grad_norm": 0.0009520279709249735, - "learning_rate": 0.00019999965808281442, - "loss": 46.0, - "step": 10897 - }, - { - "epoch": 0.8332282049811724, - "grad_norm": 0.000802506459876895, - "learning_rate": 0.00019999965801999957, - "loss": 46.0, - "step": 10898 - }, - { - "epoch": 0.8333046619645622, - "grad_norm": 0.0016850921092554927, - "learning_rate": 0.00019999965795717891, - "loss": 46.0, - "step": 10899 - }, - { - "epoch": 0.8333811189479519, - "grad_norm": 0.002314093057066202, - "learning_rate": 0.00019999965789435254, - "loss": 46.0, - "step": 10900 - }, - { - "epoch": 0.8334575759313416, - "grad_norm": 0.0005904283025301993, - "learning_rate": 0.00019999965783152037, - "loss": 46.0, - "step": 10901 - }, - { - "epoch": 0.8335340329147314, - "grad_norm": 0.001012057880870998, - "learning_rate": 0.00019999965776868242, - "loss": 46.0, - "step": 10902 - }, - { - "epoch": 0.833610489898121, - "grad_norm": 0.0033467928878962994, - "learning_rate": 0.00019999965770583873, - "loss": 46.0, - "step": 10903 - }, - { - "epoch": 0.8336869468815108, - "grad_norm": 0.0008123525767587125, - "learning_rate": 0.0001999996576429892, - "loss": 46.0, - "step": 10904 - }, - { - "epoch": 0.8337634038649006, - "grad_norm": 0.002545438939705491, - "learning_rate": 0.000199999657580134, - "loss": 46.0, - "step": 10905 - }, - { - "epoch": 0.8338398608482902, - "grad_norm": 0.003804426407441497, - "learning_rate": 0.000199999657517273, - "loss": 46.0, - "step": 10906 - }, - { - "epoch": 0.83391631783168, - "grad_norm": 0.0008537026587873697, - "learning_rate": 0.0001999996574544062, - "loss": 46.0, - "step": 10907 - }, - { - "epoch": 0.8339927748150697, - "grad_norm": 0.0052169146947562695, - "learning_rate": 0.00019999965739153365, - "loss": 46.0, - "step": 10908 - }, - { - "epoch": 0.8340692317984594, - "grad_norm": 0.000630547758191824, - "learning_rate": 0.00019999965732865531, - "loss": 46.0, - "step": 10909 - }, - { - "epoch": 0.8341456887818491, - "grad_norm": 0.0006478666327893734, - "learning_rate": 0.00019999965726577124, - "loss": 46.0, - "step": 10910 - }, - { - "epoch": 0.8342221457652388, - "grad_norm": 0.0011280137114226818, - "learning_rate": 0.00019999965720288136, - "loss": 46.0, - "step": 10911 - }, - { - "epoch": 0.8342986027486285, - "grad_norm": 0.0007642895798198879, - "learning_rate": 0.00019999965713998573, - "loss": 46.0, - "step": 10912 - }, - { - "epoch": 0.8343750597320183, - "grad_norm": 0.0017754929140210152, - "learning_rate": 0.00019999965707708433, - "loss": 46.0, - "step": 10913 - }, - { - "epoch": 0.834451516715408, - "grad_norm": 0.0012570193503051996, - "learning_rate": 0.00019999965701417716, - "loss": 46.0, - "step": 10914 - }, - { - "epoch": 0.8345279736987977, - "grad_norm": 0.007381140254437923, - "learning_rate": 0.00019999965695126422, - "loss": 46.0, - "step": 10915 - }, - { - "epoch": 0.8346044306821875, - "grad_norm": 0.0016811782261356711, - "learning_rate": 0.0001999996568883455, - "loss": 46.0, - "step": 10916 - }, - { - "epoch": 0.8346808876655771, - "grad_norm": 0.0025787365157157183, - "learning_rate": 0.00019999965682542103, - "loss": 46.0, - "step": 10917 - }, - { - "epoch": 0.8347573446489669, - "grad_norm": 0.002313877223059535, - "learning_rate": 0.00019999965676249077, - "loss": 46.0, - "step": 10918 - }, - { - "epoch": 0.8348338016323565, - "grad_norm": 0.000879984232597053, - "learning_rate": 0.00019999965669955473, - "loss": 46.0, - "step": 10919 - }, - { - "epoch": 0.8349102586157463, - "grad_norm": 0.0037839370779693127, - "learning_rate": 0.00019999965663661294, - "loss": 46.0, - "step": 10920 - }, - { - "epoch": 0.8349867155991361, - "grad_norm": 0.00028221411048434675, - "learning_rate": 0.00019999965657366538, - "loss": 46.0, - "step": 10921 - }, - { - "epoch": 0.8350631725825257, - "grad_norm": 0.002603070577606559, - "learning_rate": 0.00019999965651071205, - "loss": 46.0, - "step": 10922 - }, - { - "epoch": 0.8351396295659155, - "grad_norm": 0.0010584990959614515, - "learning_rate": 0.00019999965644775297, - "loss": 46.0, - "step": 10923 - }, - { - "epoch": 0.8352160865493052, - "grad_norm": 0.0024331440217792988, - "learning_rate": 0.00019999965638478812, - "loss": 46.0, - "step": 10924 - }, - { - "epoch": 0.8352925435326949, - "grad_norm": 0.0020674951374530792, - "learning_rate": 0.00019999965632181747, - "loss": 46.0, - "step": 10925 - }, - { - "epoch": 0.8353690005160846, - "grad_norm": 0.0007721393485553563, - "learning_rate": 0.00019999965625884107, - "loss": 46.0, - "step": 10926 - }, - { - "epoch": 0.8354454574994744, - "grad_norm": 0.005738811567425728, - "learning_rate": 0.0001999996561958589, - "loss": 46.0, - "step": 10927 - }, - { - "epoch": 0.8355219144828641, - "grad_norm": 0.0010936089092865586, - "learning_rate": 0.00019999965613287098, - "loss": 46.0, - "step": 10928 - }, - { - "epoch": 0.8355983714662538, - "grad_norm": 0.0014200042933225632, - "learning_rate": 0.00019999965606987724, - "loss": 46.0, - "step": 10929 - }, - { - "epoch": 0.8356748284496435, - "grad_norm": 0.0028505187947303057, - "learning_rate": 0.00019999965600687775, - "loss": 46.0, - "step": 10930 - }, - { - "epoch": 0.8357512854330332, - "grad_norm": 0.003321971744298935, - "learning_rate": 0.0001999996559438725, - "loss": 46.0, - "step": 10931 - }, - { - "epoch": 0.835827742416423, - "grad_norm": 0.00102518021594733, - "learning_rate": 0.0001999996558808615, - "loss": 46.0, - "step": 10932 - }, - { - "epoch": 0.8359041993998126, - "grad_norm": 0.0007095005712471902, - "learning_rate": 0.0001999996558178447, - "loss": 46.0, - "step": 10933 - }, - { - "epoch": 0.8359806563832024, - "grad_norm": 0.0009429772035218775, - "learning_rate": 0.00019999965575482213, - "loss": 46.0, - "step": 10934 - }, - { - "epoch": 0.8360571133665922, - "grad_norm": 0.0004319078288972378, - "learning_rate": 0.0001999996556917938, - "loss": 46.0, - "step": 10935 - }, - { - "epoch": 0.8361335703499818, - "grad_norm": 0.0011591074289754033, - "learning_rate": 0.00019999965562875972, - "loss": 46.0, - "step": 10936 - }, - { - "epoch": 0.8362100273333716, - "grad_norm": 0.0014750271802768111, - "learning_rate": 0.00019999965556571984, - "loss": 46.0, - "step": 10937 - }, - { - "epoch": 0.8362864843167613, - "grad_norm": 0.0008080319967120886, - "learning_rate": 0.0001999996555026742, - "loss": 46.0, - "step": 10938 - }, - { - "epoch": 0.836362941300151, - "grad_norm": 0.0018655186286196113, - "learning_rate": 0.0001999996554396228, - "loss": 46.0, - "step": 10939 - }, - { - "epoch": 0.8364393982835407, - "grad_norm": 0.0009518834995105863, - "learning_rate": 0.00019999965537656562, - "loss": 46.0, - "step": 10940 - }, - { - "epoch": 0.8365158552669304, - "grad_norm": 0.0027765887789428234, - "learning_rate": 0.00019999965531350267, - "loss": 46.0, - "step": 10941 - }, - { - "epoch": 0.8365923122503202, - "grad_norm": 0.008856285363435745, - "learning_rate": 0.00019999965525043396, - "loss": 46.0, - "step": 10942 - }, - { - "epoch": 0.8366687692337099, - "grad_norm": 0.0009769691387191415, - "learning_rate": 0.0001999996551873595, - "loss": 46.0, - "step": 10943 - }, - { - "epoch": 0.8367452262170996, - "grad_norm": 0.001627656165510416, - "learning_rate": 0.00019999965512427923, - "loss": 46.0, - "step": 10944 - }, - { - "epoch": 0.8368216832004893, - "grad_norm": 0.0008649210794828832, - "learning_rate": 0.0001999996550611932, - "loss": 46.0, - "step": 10945 - }, - { - "epoch": 0.8368981401838791, - "grad_norm": 0.0009970496175810695, - "learning_rate": 0.0001999996549981014, - "loss": 46.0, - "step": 10946 - }, - { - "epoch": 0.8369745971672687, - "grad_norm": 0.0019105859100818634, - "learning_rate": 0.00019999965493500382, - "loss": 46.0, - "step": 10947 - }, - { - "epoch": 0.8370510541506585, - "grad_norm": 0.003146178089082241, - "learning_rate": 0.0001999996548719005, - "loss": 46.0, - "step": 10948 - }, - { - "epoch": 0.8371275111340482, - "grad_norm": 0.0014504408463835716, - "learning_rate": 0.0001999996548087914, - "loss": 46.0, - "step": 10949 - }, - { - "epoch": 0.8372039681174379, - "grad_norm": 0.0009967993246391416, - "learning_rate": 0.00019999965474567653, - "loss": 46.0, - "step": 10950 - }, - { - "epoch": 0.8372804251008277, - "grad_norm": 0.001014632172882557, - "learning_rate": 0.0001999996546825559, - "loss": 46.0, - "step": 10951 - }, - { - "epoch": 0.8373568820842173, - "grad_norm": 0.0010587768629193306, - "learning_rate": 0.00019999965461942946, - "loss": 46.0, - "step": 10952 - }, - { - "epoch": 0.8374333390676071, - "grad_norm": 0.003790617687627673, - "learning_rate": 0.0001999996545562973, - "loss": 46.0, - "step": 10953 - }, - { - "epoch": 0.8375097960509968, - "grad_norm": 0.001789894886314869, - "learning_rate": 0.00019999965449315937, - "loss": 46.0, - "step": 10954 - }, - { - "epoch": 0.8375862530343865, - "grad_norm": 0.0018467131303623319, - "learning_rate": 0.00019999965443001565, - "loss": 46.0, - "step": 10955 - }, - { - "epoch": 0.8376627100177763, - "grad_norm": 0.0013756228145211935, - "learning_rate": 0.00019999965436686613, - "loss": 46.0, - "step": 10956 - }, - { - "epoch": 0.837739167001166, - "grad_norm": 0.00186286517418921, - "learning_rate": 0.0001999996543037109, - "loss": 46.0, - "step": 10957 - }, - { - "epoch": 0.8378156239845557, - "grad_norm": 0.0010459361365064979, - "learning_rate": 0.00019999965424054986, - "loss": 46.0, - "step": 10958 - }, - { - "epoch": 0.8378920809679454, - "grad_norm": 0.005547845270484686, - "learning_rate": 0.00019999965417738307, - "loss": 46.0, - "step": 10959 - }, - { - "epoch": 0.8379685379513351, - "grad_norm": 0.0012270724400877953, - "learning_rate": 0.0001999996541142105, - "loss": 46.0, - "step": 10960 - }, - { - "epoch": 0.8380449949347248, - "grad_norm": 0.0010208089370280504, - "learning_rate": 0.00019999965405103219, - "loss": 46.0, - "step": 10961 - }, - { - "epoch": 0.8381214519181146, - "grad_norm": 0.0015101042808964849, - "learning_rate": 0.00019999965398784805, - "loss": 46.0, - "step": 10962 - }, - { - "epoch": 0.8381979089015043, - "grad_norm": 0.00341527396813035, - "learning_rate": 0.00019999965392465818, - "loss": 46.0, - "step": 10963 - }, - { - "epoch": 0.838274365884894, - "grad_norm": 0.0078042359091341496, - "learning_rate": 0.00019999965386146253, - "loss": 46.0, - "step": 10964 - }, - { - "epoch": 0.8383508228682838, - "grad_norm": 0.000756177119910717, - "learning_rate": 0.00019999965379826113, - "loss": 46.0, - "step": 10965 - }, - { - "epoch": 0.8384272798516734, - "grad_norm": 0.0030392338521778584, - "learning_rate": 0.00019999965373505396, - "loss": 46.0, - "step": 10966 - }, - { - "epoch": 0.8385037368350632, - "grad_norm": 0.0022330654319375753, - "learning_rate": 0.000199999653671841, - "loss": 46.0, - "step": 10967 - }, - { - "epoch": 0.838580193818453, - "grad_norm": 0.0009384745499119163, - "learning_rate": 0.00019999965360862227, - "loss": 46.0, - "step": 10968 - }, - { - "epoch": 0.8386566508018426, - "grad_norm": 0.0008377086487598717, - "learning_rate": 0.00019999965354539778, - "loss": 46.0, - "step": 10969 - }, - { - "epoch": 0.8387331077852324, - "grad_norm": 0.007736325729638338, - "learning_rate": 0.00019999965348216754, - "loss": 46.0, - "step": 10970 - }, - { - "epoch": 0.838809564768622, - "grad_norm": 0.0012282304232940078, - "learning_rate": 0.0001999996534189315, - "loss": 46.0, - "step": 10971 - }, - { - "epoch": 0.8388860217520118, - "grad_norm": 0.001501261256635189, - "learning_rate": 0.0001999996533556897, - "loss": 46.0, - "step": 10972 - }, - { - "epoch": 0.8389624787354015, - "grad_norm": 0.0007189962780103087, - "learning_rate": 0.00019999965329244214, - "loss": 46.0, - "step": 10973 - }, - { - "epoch": 0.8390389357187912, - "grad_norm": 0.0009197764447890222, - "learning_rate": 0.00019999965322918878, - "loss": 46.0, - "step": 10974 - }, - { - "epoch": 0.8391153927021809, - "grad_norm": 0.0011863221880048513, - "learning_rate": 0.0001999996531659297, - "loss": 46.0, - "step": 10975 - }, - { - "epoch": 0.8391918496855707, - "grad_norm": 0.0017392291920259595, - "learning_rate": 0.0001999996531026648, - "loss": 46.0, - "step": 10976 - }, - { - "epoch": 0.8392683066689604, - "grad_norm": 0.0011469278251752257, - "learning_rate": 0.00019999965303939417, - "loss": 46.0, - "step": 10977 - }, - { - "epoch": 0.8393447636523501, - "grad_norm": 0.000676978612318635, - "learning_rate": 0.00019999965297611775, - "loss": 46.0, - "step": 10978 - }, - { - "epoch": 0.8394212206357398, - "grad_norm": 0.0029597305692732334, - "learning_rate": 0.00019999965291283558, - "loss": 46.0, - "step": 10979 - }, - { - "epoch": 0.8394976776191295, - "grad_norm": 0.002962978556752205, - "learning_rate": 0.0001999996528495476, - "loss": 46.0, - "step": 10980 - }, - { - "epoch": 0.8395741346025193, - "grad_norm": 0.007286848500370979, - "learning_rate": 0.0001999996527862539, - "loss": 46.0, - "step": 10981 - }, - { - "epoch": 0.8396505915859089, - "grad_norm": 0.000952498521655798, - "learning_rate": 0.0001999996527229544, - "loss": 46.0, - "step": 10982 - }, - { - "epoch": 0.8397270485692987, - "grad_norm": 0.000845861854031682, - "learning_rate": 0.00019999965265964914, - "loss": 46.0, - "step": 10983 - }, - { - "epoch": 0.8398035055526885, - "grad_norm": 0.0031289902981370687, - "learning_rate": 0.00019999965259633807, - "loss": 46.0, - "step": 10984 - }, - { - "epoch": 0.8398799625360781, - "grad_norm": 0.0004959868383593857, - "learning_rate": 0.00019999965253302132, - "loss": 46.0, - "step": 10985 - }, - { - "epoch": 0.8399564195194679, - "grad_norm": 0.002900330815464258, - "learning_rate": 0.0001999996524696987, - "loss": 46.0, - "step": 10986 - }, - { - "epoch": 0.8400328765028576, - "grad_norm": 0.001075619482435286, - "learning_rate": 0.0001999996524063704, - "loss": 46.0, - "step": 10987 - }, - { - "epoch": 0.8401093334862473, - "grad_norm": 0.0013092114822939038, - "learning_rate": 0.00019999965234303625, - "loss": 46.0, - "step": 10988 - }, - { - "epoch": 0.840185790469637, - "grad_norm": 0.0011865614214912057, - "learning_rate": 0.0001999996522796964, - "loss": 46.0, - "step": 10989 - }, - { - "epoch": 0.8402622474530267, - "grad_norm": 0.00552182924002409, - "learning_rate": 0.00019999965221635075, - "loss": 46.0, - "step": 10990 - }, - { - "epoch": 0.8403387044364165, - "grad_norm": 0.000710930849891156, - "learning_rate": 0.00019999965215299935, - "loss": 46.0, - "step": 10991 - }, - { - "epoch": 0.8404151614198062, - "grad_norm": 0.0005091328639537096, - "learning_rate": 0.00019999965208964215, - "loss": 46.0, - "step": 10992 - }, - { - "epoch": 0.8404916184031959, - "grad_norm": 0.006268938537687063, - "learning_rate": 0.00019999965202627918, - "loss": 46.0, - "step": 10993 - }, - { - "epoch": 0.8405680753865856, - "grad_norm": 0.0009775500511750579, - "learning_rate": 0.00019999965196291046, - "loss": 46.0, - "step": 10994 - }, - { - "epoch": 0.8406445323699754, - "grad_norm": 0.0009568944224156439, - "learning_rate": 0.00019999965189953595, - "loss": 46.0, - "step": 10995 - }, - { - "epoch": 0.840720989353365, - "grad_norm": 0.000858356012031436, - "learning_rate": 0.0001999996518361557, - "loss": 46.0, - "step": 10996 - }, - { - "epoch": 0.8407974463367548, - "grad_norm": 0.0009429023484699428, - "learning_rate": 0.00019999965177276967, - "loss": 46.0, - "step": 10997 - }, - { - "epoch": 0.8408739033201446, - "grad_norm": 0.0012169571127742529, - "learning_rate": 0.00019999965170937784, - "loss": 46.0, - "step": 10998 - }, - { - "epoch": 0.8409503603035342, - "grad_norm": 0.0035304888151586056, - "learning_rate": 0.00019999965164598028, - "loss": 46.0, - "step": 10999 - }, - { - "epoch": 0.841026817286924, - "grad_norm": 0.0006491626263596117, - "learning_rate": 0.00019999965158257695, - "loss": 46.0, - "step": 11000 - }, - { - "epoch": 0.8411032742703136, - "grad_norm": 0.000558013969566673, - "learning_rate": 0.00019999965151916782, - "loss": 46.0, - "step": 11001 - }, - { - "epoch": 0.8411797312537034, - "grad_norm": 0.0011213729158043861, - "learning_rate": 0.00019999965145575294, - "loss": 46.0, - "step": 11002 - }, - { - "epoch": 0.8412561882370931, - "grad_norm": 0.0007211608462966979, - "learning_rate": 0.0001999996513923323, - "loss": 46.0, - "step": 11003 - }, - { - "epoch": 0.8413326452204828, - "grad_norm": 0.0010748731438070536, - "learning_rate": 0.00019999965132890587, - "loss": 46.0, - "step": 11004 - }, - { - "epoch": 0.8414091022038725, - "grad_norm": 0.00047332001850008965, - "learning_rate": 0.0001999996512654737, - "loss": 46.0, - "step": 11005 - }, - { - "epoch": 0.8414855591872623, - "grad_norm": 0.0031150744762271643, - "learning_rate": 0.00019999965120203573, - "loss": 46.0, - "step": 11006 - }, - { - "epoch": 0.841562016170652, - "grad_norm": 0.0020026129204779863, - "learning_rate": 0.000199999651138592, - "loss": 46.0, - "step": 11007 - }, - { - "epoch": 0.8416384731540417, - "grad_norm": 0.003529926994815469, - "learning_rate": 0.00019999965107514252, - "loss": 46.0, - "step": 11008 - }, - { - "epoch": 0.8417149301374315, - "grad_norm": 0.0034201047383248806, - "learning_rate": 0.00019999965101168723, - "loss": 46.0, - "step": 11009 - }, - { - "epoch": 0.8417913871208211, - "grad_norm": 0.0007661358686164021, - "learning_rate": 0.0001999996509482262, - "loss": 46.0, - "step": 11010 - }, - { - "epoch": 0.8418678441042109, - "grad_norm": 0.0006424448220059276, - "learning_rate": 0.0001999996508847594, - "loss": 46.0, - "step": 11011 - }, - { - "epoch": 0.8419443010876005, - "grad_norm": 0.0010926688555628061, - "learning_rate": 0.00019999965082128683, - "loss": 46.0, - "step": 11012 - }, - { - "epoch": 0.8420207580709903, - "grad_norm": 0.000931408314500004, - "learning_rate": 0.00019999965075780847, - "loss": 46.0, - "step": 11013 - }, - { - "epoch": 0.8420972150543801, - "grad_norm": 0.0004905432579107583, - "learning_rate": 0.00019999965069432434, - "loss": 46.0, - "step": 11014 - }, - { - "epoch": 0.8421736720377697, - "grad_norm": 0.0017119819531217217, - "learning_rate": 0.00019999965063083447, - "loss": 46.0, - "step": 11015 - }, - { - "epoch": 0.8422501290211595, - "grad_norm": 0.0004936507903039455, - "learning_rate": 0.0001999996505673388, - "loss": 46.0, - "step": 11016 - }, - { - "epoch": 0.8423265860045492, - "grad_norm": 0.004982360173016787, - "learning_rate": 0.00019999965050383737, - "loss": 46.0, - "step": 11017 - }, - { - "epoch": 0.8424030429879389, - "grad_norm": 0.001137531828135252, - "learning_rate": 0.0001999996504403302, - "loss": 46.0, - "step": 11018 - }, - { - "epoch": 0.8424794999713286, - "grad_norm": 0.004952119197696447, - "learning_rate": 0.00019999965037681726, - "loss": 46.0, - "step": 11019 - }, - { - "epoch": 0.8425559569547183, - "grad_norm": 0.0004241155693307519, - "learning_rate": 0.0001999996503132985, - "loss": 46.0, - "step": 11020 - }, - { - "epoch": 0.8426324139381081, - "grad_norm": 0.0018657136242836714, - "learning_rate": 0.000199999650249774, - "loss": 46.0, - "step": 11021 - }, - { - "epoch": 0.8427088709214978, - "grad_norm": 0.0008627755451016128, - "learning_rate": 0.00019999965018624373, - "loss": 46.0, - "step": 11022 - }, - { - "epoch": 0.8427853279048875, - "grad_norm": 0.00034224175033159554, - "learning_rate": 0.0001999996501227077, - "loss": 46.0, - "step": 11023 - }, - { - "epoch": 0.8428617848882772, - "grad_norm": 0.0003670469159260392, - "learning_rate": 0.0001999996500591659, - "loss": 46.0, - "step": 11024 - }, - { - "epoch": 0.842938241871667, - "grad_norm": 0.0015784529969096184, - "learning_rate": 0.0001999996499956183, - "loss": 46.0, - "step": 11025 - }, - { - "epoch": 0.8430146988550566, - "grad_norm": 0.0021029640920460224, - "learning_rate": 0.00019999964993206497, - "loss": 46.0, - "step": 11026 - }, - { - "epoch": 0.8430911558384464, - "grad_norm": 0.009369504638016224, - "learning_rate": 0.00019999964986850584, - "loss": 46.0, - "step": 11027 - }, - { - "epoch": 0.8431676128218362, - "grad_norm": 0.0016328691272065043, - "learning_rate": 0.00019999964980494094, - "loss": 46.0, - "step": 11028 - }, - { - "epoch": 0.8432440698052258, - "grad_norm": 0.0029586199671030045, - "learning_rate": 0.00019999964974137032, - "loss": 46.0, - "step": 11029 - }, - { - "epoch": 0.8433205267886156, - "grad_norm": 0.001808575470931828, - "learning_rate": 0.00019999964967779387, - "loss": 46.0, - "step": 11030 - }, - { - "epoch": 0.8433969837720052, - "grad_norm": 0.0016076787142083049, - "learning_rate": 0.00019999964961421167, - "loss": 46.0, - "step": 11031 - }, - { - "epoch": 0.843473440755395, - "grad_norm": 0.00102371780667454, - "learning_rate": 0.0001999996495506237, - "loss": 46.0, - "step": 11032 - }, - { - "epoch": 0.8435498977387847, - "grad_norm": 0.0009408775367774069, - "learning_rate": 0.00019999964948703, - "loss": 46.0, - "step": 11033 - }, - { - "epoch": 0.8436263547221744, - "grad_norm": 0.004709445405751467, - "learning_rate": 0.00019999964942343044, - "loss": 46.0, - "step": 11034 - }, - { - "epoch": 0.8437028117055642, - "grad_norm": 0.0003644133103080094, - "learning_rate": 0.0001999996493598252, - "loss": 46.0, - "step": 11035 - }, - { - "epoch": 0.8437792686889539, - "grad_norm": 0.0007538195350207388, - "learning_rate": 0.00019999964929621415, - "loss": 46.0, - "step": 11036 - }, - { - "epoch": 0.8438557256723436, - "grad_norm": 0.004987620282918215, - "learning_rate": 0.00019999964923259734, - "loss": 46.0, - "step": 11037 - }, - { - "epoch": 0.8439321826557333, - "grad_norm": 0.003025482874363661, - "learning_rate": 0.00019999964916897475, - "loss": 46.0, - "step": 11038 - }, - { - "epoch": 0.8440086396391231, - "grad_norm": 0.0007983259274624288, - "learning_rate": 0.0001999996491053464, - "loss": 46.0, - "step": 11039 - }, - { - "epoch": 0.8440850966225127, - "grad_norm": 0.0033262609504163265, - "learning_rate": 0.0001999996490417123, - "loss": 46.0, - "step": 11040 - }, - { - "epoch": 0.8441615536059025, - "grad_norm": 0.0013376990100368857, - "learning_rate": 0.0001999996489780724, - "loss": 46.0, - "step": 11041 - }, - { - "epoch": 0.8442380105892922, - "grad_norm": 0.000613740470726043, - "learning_rate": 0.00019999964891442672, - "loss": 46.0, - "step": 11042 - }, - { - "epoch": 0.8443144675726819, - "grad_norm": 0.0005987072945572436, - "learning_rate": 0.00019999964885077532, - "loss": 46.0, - "step": 11043 - }, - { - "epoch": 0.8443909245560717, - "grad_norm": 0.0016557784983888268, - "learning_rate": 0.00019999964878711813, - "loss": 46.0, - "step": 11044 - }, - { - "epoch": 0.8444673815394613, - "grad_norm": 0.003649662947282195, - "learning_rate": 0.00019999964872345513, - "loss": 46.0, - "step": 11045 - }, - { - "epoch": 0.8445438385228511, - "grad_norm": 0.005795789882540703, - "learning_rate": 0.00019999964865978641, - "loss": 46.0, - "step": 11046 - }, - { - "epoch": 0.8446202955062408, - "grad_norm": 0.0015953467227518559, - "learning_rate": 0.0001999996485961119, - "loss": 46.0, - "step": 11047 - }, - { - "epoch": 0.8446967524896305, - "grad_norm": 0.02038198709487915, - "learning_rate": 0.00019999964853243164, - "loss": 46.0, - "step": 11048 - }, - { - "epoch": 0.8447732094730203, - "grad_norm": 0.00900246761739254, - "learning_rate": 0.0001999996484687456, - "loss": 46.0, - "step": 11049 - }, - { - "epoch": 0.8448496664564099, - "grad_norm": 0.0009198650950565934, - "learning_rate": 0.0001999996484050538, - "loss": 46.0, - "step": 11050 - }, - { - "epoch": 0.8449261234397997, - "grad_norm": 0.004574987571686506, - "learning_rate": 0.00019999964834135618, - "loss": 46.0, - "step": 11051 - }, - { - "epoch": 0.8450025804231894, - "grad_norm": 0.002760454546660185, - "learning_rate": 0.00019999964827765283, - "loss": 46.0, - "step": 11052 - }, - { - "epoch": 0.8450790374065791, - "grad_norm": 0.0028988949488848448, - "learning_rate": 0.00019999964821394372, - "loss": 46.0, - "step": 11053 - }, - { - "epoch": 0.8451554943899688, - "grad_norm": 0.0010574010666459799, - "learning_rate": 0.00019999964815022882, - "loss": 46.0, - "step": 11054 - }, - { - "epoch": 0.8452319513733586, - "grad_norm": 0.0006346292793750763, - "learning_rate": 0.00019999964808650815, - "loss": 46.0, - "step": 11055 - }, - { - "epoch": 0.8453084083567483, - "grad_norm": 0.0007920832140371203, - "learning_rate": 0.00019999964802278172, - "loss": 46.0, - "step": 11056 - }, - { - "epoch": 0.845384865340138, - "grad_norm": 0.005531759932637215, - "learning_rate": 0.00019999964795904953, - "loss": 46.0, - "step": 11057 - }, - { - "epoch": 0.8454613223235278, - "grad_norm": 0.0012296109925955534, - "learning_rate": 0.00019999964789531153, - "loss": 46.0, - "step": 11058 - }, - { - "epoch": 0.8455377793069174, - "grad_norm": 0.0041253287345170975, - "learning_rate": 0.00019999964783156782, - "loss": 46.0, - "step": 11059 - }, - { - "epoch": 0.8456142362903072, - "grad_norm": 0.0007868785760365427, - "learning_rate": 0.00019999964776781833, - "loss": 46.0, - "step": 11060 - }, - { - "epoch": 0.8456906932736968, - "grad_norm": 0.003124518319964409, - "learning_rate": 0.00019999964770406304, - "loss": 46.0, - "step": 11061 - }, - { - "epoch": 0.8457671502570866, - "grad_norm": 0.0015684838872402906, - "learning_rate": 0.00019999964764030198, - "loss": 46.0, - "step": 11062 - }, - { - "epoch": 0.8458436072404764, - "grad_norm": 0.0009703235700726509, - "learning_rate": 0.00019999964757653514, - "loss": 46.0, - "step": 11063 - }, - { - "epoch": 0.845920064223866, - "grad_norm": 0.001825161511078477, - "learning_rate": 0.00019999964751276256, - "loss": 46.0, - "step": 11064 - }, - { - "epoch": 0.8459965212072558, - "grad_norm": 0.0023887422867119312, - "learning_rate": 0.0001999996474489842, - "loss": 46.0, - "step": 11065 - }, - { - "epoch": 0.8460729781906455, - "grad_norm": 0.0004532198654487729, - "learning_rate": 0.0001999996473852001, - "loss": 46.0, - "step": 11066 - }, - { - "epoch": 0.8461494351740352, - "grad_norm": 0.0016221263213083148, - "learning_rate": 0.0001999996473214102, - "loss": 46.0, - "step": 11067 - }, - { - "epoch": 0.8462258921574249, - "grad_norm": 0.000907170819118619, - "learning_rate": 0.00019999964725761452, - "loss": 46.0, - "step": 11068 - }, - { - "epoch": 0.8463023491408147, - "grad_norm": 0.0017541300039738417, - "learning_rate": 0.0001999996471938131, - "loss": 46.0, - "step": 11069 - }, - { - "epoch": 0.8463788061242044, - "grad_norm": 0.0010579756926745176, - "learning_rate": 0.0001999996471300059, - "loss": 46.0, - "step": 11070 - }, - { - "epoch": 0.8464552631075941, - "grad_norm": 0.0019380139419808984, - "learning_rate": 0.0001999996470661929, - "loss": 46.0, - "step": 11071 - }, - { - "epoch": 0.8465317200909838, - "grad_norm": 0.0015164641663432121, - "learning_rate": 0.00019999964700237417, - "loss": 46.0, - "step": 11072 - }, - { - "epoch": 0.8466081770743735, - "grad_norm": 0.0033727106638252735, - "learning_rate": 0.00019999964693854965, - "loss": 46.0, - "step": 11073 - }, - { - "epoch": 0.8466846340577633, - "grad_norm": 0.0005675025749951601, - "learning_rate": 0.0001999996468747194, - "loss": 46.0, - "step": 11074 - }, - { - "epoch": 0.8467610910411529, - "grad_norm": 0.0031972487922757864, - "learning_rate": 0.00019999964681088336, - "loss": 46.0, - "step": 11075 - }, - { - "epoch": 0.8468375480245427, - "grad_norm": 0.001838152064010501, - "learning_rate": 0.00019999964674704152, - "loss": 46.0, - "step": 11076 - }, - { - "epoch": 0.8469140050079325, - "grad_norm": 0.0014730638358741999, - "learning_rate": 0.00019999964668319394, - "loss": 46.0, - "step": 11077 - }, - { - "epoch": 0.8469904619913221, - "grad_norm": 0.0008131054346449673, - "learning_rate": 0.00019999964661934056, - "loss": 46.0, - "step": 11078 - }, - { - "epoch": 0.8470669189747119, - "grad_norm": 0.005546481814235449, - "learning_rate": 0.00019999964655548146, - "loss": 46.0, - "step": 11079 - }, - { - "epoch": 0.8471433759581015, - "grad_norm": 0.00039659501635469496, - "learning_rate": 0.00019999964649161655, - "loss": 46.0, - "step": 11080 - }, - { - "epoch": 0.8472198329414913, - "grad_norm": 0.0009099820163100958, - "learning_rate": 0.00019999964642774588, - "loss": 46.0, - "step": 11081 - }, - { - "epoch": 0.847296289924881, - "grad_norm": 0.0009537608711980283, - "learning_rate": 0.00019999964636386946, - "loss": 46.0, - "step": 11082 - }, - { - "epoch": 0.8473727469082707, - "grad_norm": 0.0013637441443279386, - "learning_rate": 0.0001999996462999872, - "loss": 46.0, - "step": 11083 - }, - { - "epoch": 0.8474492038916605, - "grad_norm": 0.0018717522034421563, - "learning_rate": 0.00019999964623609927, - "loss": 46.0, - "step": 11084 - }, - { - "epoch": 0.8475256608750502, - "grad_norm": 0.0009365485166199505, - "learning_rate": 0.00019999964617220553, - "loss": 46.0, - "step": 11085 - }, - { - "epoch": 0.8476021178584399, - "grad_norm": 0.00041334074921905994, - "learning_rate": 0.000199999646108306, - "loss": 46.0, - "step": 11086 - }, - { - "epoch": 0.8476785748418296, - "grad_norm": 0.001404003007337451, - "learning_rate": 0.00019999964604440072, - "loss": 46.0, - "step": 11087 - }, - { - "epoch": 0.8477550318252194, - "grad_norm": 0.006241766735911369, - "learning_rate": 0.0001999996459804897, - "loss": 46.0, - "step": 11088 - }, - { - "epoch": 0.847831488808609, - "grad_norm": 0.0035682839807122946, - "learning_rate": 0.00019999964591657285, - "loss": 46.0, - "step": 11089 - }, - { - "epoch": 0.8479079457919988, - "grad_norm": 0.0008588465279899538, - "learning_rate": 0.00019999964585265024, - "loss": 46.0, - "step": 11090 - }, - { - "epoch": 0.8479844027753884, - "grad_norm": 0.0008802936063148081, - "learning_rate": 0.0001999996457887219, - "loss": 46.0, - "step": 11091 - }, - { - "epoch": 0.8480608597587782, - "grad_norm": 0.000702622055541724, - "learning_rate": 0.00019999964572478776, - "loss": 46.0, - "step": 11092 - }, - { - "epoch": 0.848137316742168, - "grad_norm": 0.0009303967235609889, - "learning_rate": 0.00019999964566084786, - "loss": 46.0, - "step": 11093 - }, - { - "epoch": 0.8482137737255576, - "grad_norm": 0.0006240560906007886, - "learning_rate": 0.00019999964559690218, - "loss": 46.0, - "step": 11094 - }, - { - "epoch": 0.8482902307089474, - "grad_norm": 0.0005857710493728518, - "learning_rate": 0.00019999964553295074, - "loss": 46.0, - "step": 11095 - }, - { - "epoch": 0.8483666876923371, - "grad_norm": 0.00160969328135252, - "learning_rate": 0.00019999964546899354, - "loss": 46.0, - "step": 11096 - }, - { - "epoch": 0.8484431446757268, - "grad_norm": 0.0006553277489729226, - "learning_rate": 0.00019999964540503057, - "loss": 46.0, - "step": 11097 - }, - { - "epoch": 0.8485196016591166, - "grad_norm": 0.0014790561981499195, - "learning_rate": 0.0001999996453410618, - "loss": 46.0, - "step": 11098 - }, - { - "epoch": 0.8485960586425063, - "grad_norm": 0.003423925954848528, - "learning_rate": 0.0001999996452770873, - "loss": 46.0, - "step": 11099 - }, - { - "epoch": 0.848672515625896, - "grad_norm": 0.0011463325936347246, - "learning_rate": 0.000199999645213107, - "loss": 46.0, - "step": 11100 - }, - { - "epoch": 0.8487489726092857, - "grad_norm": 0.00351628172211349, - "learning_rate": 0.00019999964514912094, - "loss": 46.0, - "step": 11101 - }, - { - "epoch": 0.8488254295926754, - "grad_norm": 0.0008316357852891088, - "learning_rate": 0.00019999964508512913, - "loss": 46.0, - "step": 11102 - }, - { - "epoch": 0.8489018865760651, - "grad_norm": 0.0015575755387544632, - "learning_rate": 0.00019999964502113153, - "loss": 46.0, - "step": 11103 - }, - { - "epoch": 0.8489783435594549, - "grad_norm": 0.005252194125205278, - "learning_rate": 0.00019999964495712817, - "loss": 46.0, - "step": 11104 - }, - { - "epoch": 0.8490548005428445, - "grad_norm": 0.0025980074424296618, - "learning_rate": 0.00019999964489311904, - "loss": 46.0, - "step": 11105 - }, - { - "epoch": 0.8491312575262343, - "grad_norm": 0.0012850436614826322, - "learning_rate": 0.00019999964482910414, - "loss": 46.0, - "step": 11106 - }, - { - "epoch": 0.8492077145096241, - "grad_norm": 0.004240087699145079, - "learning_rate": 0.00019999964476508347, - "loss": 46.0, - "step": 11107 - }, - { - "epoch": 0.8492841714930137, - "grad_norm": 0.00125785858836025, - "learning_rate": 0.00019999964470105702, - "loss": 46.0, - "step": 11108 - }, - { - "epoch": 0.8493606284764035, - "grad_norm": 0.001575937494635582, - "learning_rate": 0.0001999996446370248, - "loss": 46.0, - "step": 11109 - }, - { - "epoch": 0.8494370854597931, - "grad_norm": 0.0029926816932857037, - "learning_rate": 0.00019999964457298683, - "loss": 46.0, - "step": 11110 - }, - { - "epoch": 0.8495135424431829, - "grad_norm": 0.0015967695508152246, - "learning_rate": 0.0001999996445089431, - "loss": 46.0, - "step": 11111 - }, - { - "epoch": 0.8495899994265727, - "grad_norm": 0.002085303422063589, - "learning_rate": 0.00019999964444489357, - "loss": 46.0, - "step": 11112 - }, - { - "epoch": 0.8496664564099623, - "grad_norm": 0.007869662716984749, - "learning_rate": 0.00019999964438083826, - "loss": 46.0, - "step": 11113 - }, - { - "epoch": 0.8497429133933521, - "grad_norm": 0.01517061423510313, - "learning_rate": 0.00019999964431677723, - "loss": 46.0, - "step": 11114 - }, - { - "epoch": 0.8498193703767418, - "grad_norm": 0.0007287377375178039, - "learning_rate": 0.0001999996442527104, - "loss": 46.0, - "step": 11115 - }, - { - "epoch": 0.8498958273601315, - "grad_norm": 0.0007352664833888412, - "learning_rate": 0.00019999964418863778, - "loss": 46.0, - "step": 11116 - }, - { - "epoch": 0.8499722843435212, - "grad_norm": 0.0016861286712810397, - "learning_rate": 0.00019999964412455943, - "loss": 46.0, - "step": 11117 - }, - { - "epoch": 0.850048741326911, - "grad_norm": 0.0006984604406170547, - "learning_rate": 0.0001999996440604753, - "loss": 46.0, - "step": 11118 - }, - { - "epoch": 0.8501251983103006, - "grad_norm": 0.0010811056708917022, - "learning_rate": 0.00019999964399638537, - "loss": 46.0, - "step": 11119 - }, - { - "epoch": 0.8502016552936904, - "grad_norm": 0.000977845280431211, - "learning_rate": 0.00019999964393228973, - "loss": 46.0, - "step": 11120 - }, - { - "epoch": 0.8502781122770801, - "grad_norm": 0.0015058178687468171, - "learning_rate": 0.00019999964386818825, - "loss": 46.0, - "step": 11121 - }, - { - "epoch": 0.8503545692604698, - "grad_norm": 0.0035912117455154657, - "learning_rate": 0.00019999964380408106, - "loss": 46.0, - "step": 11122 - }, - { - "epoch": 0.8504310262438596, - "grad_norm": 0.0021168177481740713, - "learning_rate": 0.00019999964373996806, - "loss": 46.0, - "step": 11123 - }, - { - "epoch": 0.8505074832272492, - "grad_norm": 0.0013354021357372403, - "learning_rate": 0.00019999964367584932, - "loss": 46.0, - "step": 11124 - }, - { - "epoch": 0.850583940210639, - "grad_norm": 0.0007549573783762753, - "learning_rate": 0.00019999964361172478, - "loss": 46.0, - "step": 11125 - }, - { - "epoch": 0.8506603971940288, - "grad_norm": 0.0015961505705490708, - "learning_rate": 0.0001999996435475945, - "loss": 46.0, - "step": 11126 - }, - { - "epoch": 0.8507368541774184, - "grad_norm": 0.0022256355732679367, - "learning_rate": 0.00019999964348345843, - "loss": 46.0, - "step": 11127 - }, - { - "epoch": 0.8508133111608082, - "grad_norm": 0.0009828897891566157, - "learning_rate": 0.0001999996434193166, - "loss": 46.0, - "step": 11128 - }, - { - "epoch": 0.8508897681441979, - "grad_norm": 0.004272941034287214, - "learning_rate": 0.00019999964335516902, - "loss": 46.0, - "step": 11129 - }, - { - "epoch": 0.8509662251275876, - "grad_norm": 0.0005844312254339457, - "learning_rate": 0.00019999964329101564, - "loss": 46.0, - "step": 11130 - }, - { - "epoch": 0.8510426821109773, - "grad_norm": 0.0011244529159739614, - "learning_rate": 0.00019999964322685648, - "loss": 46.0, - "step": 11131 - }, - { - "epoch": 0.851119139094367, - "grad_norm": 0.0015478788409382105, - "learning_rate": 0.0001999996431626916, - "loss": 46.0, - "step": 11132 - }, - { - "epoch": 0.8511955960777567, - "grad_norm": 0.005694926716387272, - "learning_rate": 0.00019999964309852094, - "loss": 46.0, - "step": 11133 - }, - { - "epoch": 0.8512720530611465, - "grad_norm": 0.001463546184822917, - "learning_rate": 0.00019999964303434446, - "loss": 46.0, - "step": 11134 - }, - { - "epoch": 0.8513485100445362, - "grad_norm": 0.003945269156247377, - "learning_rate": 0.00019999964297016224, - "loss": 46.0, - "step": 11135 - }, - { - "epoch": 0.8514249670279259, - "grad_norm": 0.0008292829734273255, - "learning_rate": 0.00019999964290597427, - "loss": 46.0, - "step": 11136 - }, - { - "epoch": 0.8515014240113157, - "grad_norm": 0.0009785188594833016, - "learning_rate": 0.00019999964284178053, - "loss": 46.0, - "step": 11137 - }, - { - "epoch": 0.8515778809947053, - "grad_norm": 0.0005827966961078346, - "learning_rate": 0.000199999642777581, - "loss": 46.0, - "step": 11138 - }, - { - "epoch": 0.8516543379780951, - "grad_norm": 0.0020266349893063307, - "learning_rate": 0.00019999964271337568, - "loss": 46.0, - "step": 11139 - }, - { - "epoch": 0.8517307949614848, - "grad_norm": 0.0021023531444370747, - "learning_rate": 0.00019999964264916462, - "loss": 46.0, - "step": 11140 - }, - { - "epoch": 0.8518072519448745, - "grad_norm": 0.0008576546097174287, - "learning_rate": 0.00019999964258494778, - "loss": 46.0, - "step": 11141 - }, - { - "epoch": 0.8518837089282643, - "grad_norm": 0.0013772320235148072, - "learning_rate": 0.00019999964252072518, - "loss": 46.0, - "step": 11142 - }, - { - "epoch": 0.8519601659116539, - "grad_norm": 0.0018279260257259011, - "learning_rate": 0.0001999996424564968, - "loss": 46.0, - "step": 11143 - }, - { - "epoch": 0.8520366228950437, - "grad_norm": 0.0017454697517678142, - "learning_rate": 0.00019999964239226267, - "loss": 46.0, - "step": 11144 - }, - { - "epoch": 0.8521130798784334, - "grad_norm": 0.001503758248873055, - "learning_rate": 0.00019999964232802277, - "loss": 46.0, - "step": 11145 - }, - { - "epoch": 0.8521895368618231, - "grad_norm": 0.001236366922967136, - "learning_rate": 0.0001999996422637771, - "loss": 46.0, - "step": 11146 - }, - { - "epoch": 0.8522659938452128, - "grad_norm": 0.0011803583474829793, - "learning_rate": 0.00019999964219952565, - "loss": 46.0, - "step": 11147 - }, - { - "epoch": 0.8523424508286026, - "grad_norm": 0.0004411491972859949, - "learning_rate": 0.00019999964213526843, - "loss": 46.0, - "step": 11148 - }, - { - "epoch": 0.8524189078119923, - "grad_norm": 0.0015384680591523647, - "learning_rate": 0.00019999964207100543, - "loss": 46.0, - "step": 11149 - }, - { - "epoch": 0.852495364795382, - "grad_norm": 0.0007610170287080109, - "learning_rate": 0.00019999964200673667, - "loss": 46.0, - "step": 11150 - }, - { - "epoch": 0.8525718217787717, - "grad_norm": 0.0011325698578730226, - "learning_rate": 0.00019999964194246215, - "loss": 46.0, - "step": 11151 - }, - { - "epoch": 0.8526482787621614, - "grad_norm": 0.0018241963116452098, - "learning_rate": 0.00019999964187818187, - "loss": 46.0, - "step": 11152 - }, - { - "epoch": 0.8527247357455512, - "grad_norm": 0.0009631628054194152, - "learning_rate": 0.00019999964181389578, - "loss": 46.0, - "step": 11153 - }, - { - "epoch": 0.8528011927289408, - "grad_norm": 0.005874826572835445, - "learning_rate": 0.00019999964174960395, - "loss": 46.0, - "step": 11154 - }, - { - "epoch": 0.8528776497123306, - "grad_norm": 0.0007128996076062322, - "learning_rate": 0.00019999964168530634, - "loss": 46.0, - "step": 11155 - }, - { - "epoch": 0.8529541066957204, - "grad_norm": 0.00024089418002404273, - "learning_rate": 0.00019999964162100296, - "loss": 46.0, - "step": 11156 - }, - { - "epoch": 0.85303056367911, - "grad_norm": 0.0044003985822200775, - "learning_rate": 0.0001999996415566938, - "loss": 46.0, - "step": 11157 - }, - { - "epoch": 0.8531070206624998, - "grad_norm": 0.000694796210154891, - "learning_rate": 0.0001999996414923789, - "loss": 46.0, - "step": 11158 - }, - { - "epoch": 0.8531834776458895, - "grad_norm": 0.0005199249135330319, - "learning_rate": 0.00019999964142805823, - "loss": 46.0, - "step": 11159 - }, - { - "epoch": 0.8532599346292792, - "grad_norm": 0.0031427121721208096, - "learning_rate": 0.00019999964136373178, - "loss": 46.0, - "step": 11160 - }, - { - "epoch": 0.8533363916126689, - "grad_norm": 0.00044588546734303236, - "learning_rate": 0.00019999964129939954, - "loss": 46.0, - "step": 11161 - }, - { - "epoch": 0.8534128485960586, - "grad_norm": 0.002548330230638385, - "learning_rate": 0.00019999964123506157, - "loss": 46.0, - "step": 11162 - }, - { - "epoch": 0.8534893055794484, - "grad_norm": 0.0016298653790727258, - "learning_rate": 0.0001999996411707178, - "loss": 46.0, - "step": 11163 - }, - { - "epoch": 0.8535657625628381, - "grad_norm": 0.0025588199496269226, - "learning_rate": 0.00019999964110636826, - "loss": 46.0, - "step": 11164 - }, - { - "epoch": 0.8536422195462278, - "grad_norm": 0.0007251531351357698, - "learning_rate": 0.00019999964104201298, - "loss": 46.0, - "step": 11165 - }, - { - "epoch": 0.8537186765296175, - "grad_norm": 0.0007614085334353149, - "learning_rate": 0.0001999996409776519, - "loss": 46.0, - "step": 11166 - }, - { - "epoch": 0.8537951335130073, - "grad_norm": 0.0009560105972923338, - "learning_rate": 0.00019999964091328506, - "loss": 46.0, - "step": 11167 - }, - { - "epoch": 0.8538715904963969, - "grad_norm": 0.00316926883533597, - "learning_rate": 0.00019999964084891245, - "loss": 46.0, - "step": 11168 - }, - { - "epoch": 0.8539480474797867, - "grad_norm": 0.0007053738227114081, - "learning_rate": 0.00019999964078453407, - "loss": 46.0, - "step": 11169 - }, - { - "epoch": 0.8540245044631765, - "grad_norm": 0.001694960636086762, - "learning_rate": 0.00019999964072014992, - "loss": 46.0, - "step": 11170 - }, - { - "epoch": 0.8541009614465661, - "grad_norm": 0.0009114079875871539, - "learning_rate": 0.00019999964065576, - "loss": 46.0, - "step": 11171 - }, - { - "epoch": 0.8541774184299559, - "grad_norm": 0.0010451360139995813, - "learning_rate": 0.00019999964059136432, - "loss": 46.0, - "step": 11172 - }, - { - "epoch": 0.8542538754133455, - "grad_norm": 0.0018855428788810968, - "learning_rate": 0.00019999964052696287, - "loss": 46.0, - "step": 11173 - }, - { - "epoch": 0.8543303323967353, - "grad_norm": 0.0038219306152313948, - "learning_rate": 0.00019999964046255565, - "loss": 46.0, - "step": 11174 - }, - { - "epoch": 0.854406789380125, - "grad_norm": 0.0015638156328350306, - "learning_rate": 0.00019999964039814266, - "loss": 46.0, - "step": 11175 - }, - { - "epoch": 0.8544832463635147, - "grad_norm": 0.0005671442486345768, - "learning_rate": 0.0001999996403337239, - "loss": 46.0, - "step": 11176 - }, - { - "epoch": 0.8545597033469045, - "grad_norm": 0.0006012267549522221, - "learning_rate": 0.00019999964026929935, - "loss": 46.0, - "step": 11177 - }, - { - "epoch": 0.8546361603302942, - "grad_norm": 0.002317205537110567, - "learning_rate": 0.00019999964020486907, - "loss": 46.0, - "step": 11178 - }, - { - "epoch": 0.8547126173136839, - "grad_norm": 0.0008620276930741966, - "learning_rate": 0.00019999964014043298, - "loss": 46.0, - "step": 11179 - }, - { - "epoch": 0.8547890742970736, - "grad_norm": 0.011933354660868645, - "learning_rate": 0.00019999964007599115, - "loss": 46.0, - "step": 11180 - }, - { - "epoch": 0.8548655312804633, - "grad_norm": 0.0005438775406219065, - "learning_rate": 0.00019999964001154354, - "loss": 46.0, - "step": 11181 - }, - { - "epoch": 0.854941988263853, - "grad_norm": 0.00038960689562372863, - "learning_rate": 0.00019999963994709016, - "loss": 46.0, - "step": 11182 - }, - { - "epoch": 0.8550184452472428, - "grad_norm": 0.00303665385581553, - "learning_rate": 0.000199999639882631, - "loss": 46.0, - "step": 11183 - }, - { - "epoch": 0.8550949022306324, - "grad_norm": 0.0013340001460164785, - "learning_rate": 0.0001999996398181661, - "loss": 46.0, - "step": 11184 - }, - { - "epoch": 0.8551713592140222, - "grad_norm": 0.0007283794111572206, - "learning_rate": 0.0001999996397536954, - "loss": 46.0, - "step": 11185 - }, - { - "epoch": 0.855247816197412, - "grad_norm": 0.0010545131517574191, - "learning_rate": 0.00019999963968921893, - "loss": 46.0, - "step": 11186 - }, - { - "epoch": 0.8553242731808016, - "grad_norm": 0.002913212403655052, - "learning_rate": 0.00019999963962473671, - "loss": 46.0, - "step": 11187 - }, - { - "epoch": 0.8554007301641914, - "grad_norm": 0.0012835368979722261, - "learning_rate": 0.00019999963956024872, - "loss": 46.0, - "step": 11188 - }, - { - "epoch": 0.8554771871475811, - "grad_norm": 0.0009773829951882362, - "learning_rate": 0.00019999963949575496, - "loss": 46.0, - "step": 11189 - }, - { - "epoch": 0.8555536441309708, - "grad_norm": 0.002376238815486431, - "learning_rate": 0.00019999963943125542, - "loss": 46.0, - "step": 11190 - }, - { - "epoch": 0.8556301011143606, - "grad_norm": 0.008335355669260025, - "learning_rate": 0.00019999963936675013, - "loss": 46.0, - "step": 11191 - }, - { - "epoch": 0.8557065580977502, - "grad_norm": 0.00114829174708575, - "learning_rate": 0.00019999963930223904, - "loss": 46.0, - "step": 11192 - }, - { - "epoch": 0.85578301508114, - "grad_norm": 0.0018514475086703897, - "learning_rate": 0.00019999963923772218, - "loss": 46.0, - "step": 11193 - }, - { - "epoch": 0.8558594720645297, - "grad_norm": 0.0006004343740642071, - "learning_rate": 0.00019999963917319958, - "loss": 46.0, - "step": 11194 - }, - { - "epoch": 0.8559359290479194, - "grad_norm": 0.0010789603693410754, - "learning_rate": 0.0001999996391086712, - "loss": 46.0, - "step": 11195 - }, - { - "epoch": 0.8560123860313091, - "grad_norm": 0.00040760572301223874, - "learning_rate": 0.00019999963904413705, - "loss": 46.0, - "step": 11196 - }, - { - "epoch": 0.8560888430146989, - "grad_norm": 0.0021404919680207968, - "learning_rate": 0.00019999963897959712, - "loss": 46.0, - "step": 11197 - }, - { - "epoch": 0.8561652999980885, - "grad_norm": 0.0016223929123952985, - "learning_rate": 0.00019999963891505145, - "loss": 46.0, - "step": 11198 - }, - { - "epoch": 0.8562417569814783, - "grad_norm": 0.0014355364255607128, - "learning_rate": 0.00019999963885049997, - "loss": 46.0, - "step": 11199 - }, - { - "epoch": 0.8563182139648681, - "grad_norm": 0.0005289613036438823, - "learning_rate": 0.00019999963878594276, - "loss": 46.0, - "step": 11200 - }, - { - "epoch": 0.8563946709482577, - "grad_norm": 0.00042334923637099564, - "learning_rate": 0.00019999963872137974, - "loss": 46.0, - "step": 11201 - }, - { - "epoch": 0.8564711279316475, - "grad_norm": 0.00808667205274105, - "learning_rate": 0.00019999963865681097, - "loss": 46.0, - "step": 11202 - }, - { - "epoch": 0.8565475849150371, - "grad_norm": 0.0019520564237609506, - "learning_rate": 0.00019999963859223643, - "loss": 46.0, - "step": 11203 - }, - { - "epoch": 0.8566240418984269, - "grad_norm": 0.005279670935124159, - "learning_rate": 0.00019999963852765612, - "loss": 46.0, - "step": 11204 - }, - { - "epoch": 0.8567004988818167, - "grad_norm": 0.0017014906043186784, - "learning_rate": 0.00019999963846307006, - "loss": 46.0, - "step": 11205 - }, - { - "epoch": 0.8567769558652063, - "grad_norm": 0.0012440314749255776, - "learning_rate": 0.00019999963839847823, - "loss": 46.0, - "step": 11206 - }, - { - "epoch": 0.8568534128485961, - "grad_norm": 0.0011593655217438936, - "learning_rate": 0.0001999996383338806, - "loss": 46.0, - "step": 11207 - }, - { - "epoch": 0.8569298698319858, - "grad_norm": 0.0005625446792691946, - "learning_rate": 0.00019999963826927721, - "loss": 46.0, - "step": 11208 - }, - { - "epoch": 0.8570063268153755, - "grad_norm": 0.001205519656650722, - "learning_rate": 0.00019999963820466806, - "loss": 46.0, - "step": 11209 - }, - { - "epoch": 0.8570827837987652, - "grad_norm": 0.002979275770485401, - "learning_rate": 0.0001999996381400531, - "loss": 46.0, - "step": 11210 - }, - { - "epoch": 0.8571592407821549, - "grad_norm": 0.0053017837926745415, - "learning_rate": 0.00019999963807543244, - "loss": 46.0, - "step": 11211 - }, - { - "epoch": 0.8572356977655446, - "grad_norm": 0.001378338783979416, - "learning_rate": 0.00019999963801080596, - "loss": 46.0, - "step": 11212 - }, - { - "epoch": 0.8573121547489344, - "grad_norm": 0.0009639773052185774, - "learning_rate": 0.00019999963794617375, - "loss": 46.0, - "step": 11213 - }, - { - "epoch": 0.8573886117323241, - "grad_norm": 0.000531878205947578, - "learning_rate": 0.00019999963788153573, - "loss": 46.0, - "step": 11214 - }, - { - "epoch": 0.8574650687157138, - "grad_norm": 0.002438716823235154, - "learning_rate": 0.00019999963781689196, - "loss": 46.0, - "step": 11215 - }, - { - "epoch": 0.8575415256991036, - "grad_norm": 0.0016653784550726414, - "learning_rate": 0.00019999963775224245, - "loss": 46.0, - "step": 11216 - }, - { - "epoch": 0.8576179826824932, - "grad_norm": 0.0018046468030661345, - "learning_rate": 0.0001999996376875871, - "loss": 46.0, - "step": 11217 - }, - { - "epoch": 0.857694439665883, - "grad_norm": 0.001049616257660091, - "learning_rate": 0.00019999963762292605, - "loss": 46.0, - "step": 11218 - }, - { - "epoch": 0.8577708966492728, - "grad_norm": 0.0015249074203893542, - "learning_rate": 0.0001999996375582592, - "loss": 46.0, - "step": 11219 - }, - { - "epoch": 0.8578473536326624, - "grad_norm": 0.0028940935153514147, - "learning_rate": 0.00019999963749358656, - "loss": 46.0, - "step": 11220 - }, - { - "epoch": 0.8579238106160522, - "grad_norm": 0.001310089137405157, - "learning_rate": 0.00019999963742890818, - "loss": 46.0, - "step": 11221 - }, - { - "epoch": 0.8580002675994418, - "grad_norm": 0.0037354696542024612, - "learning_rate": 0.00019999963736422403, - "loss": 46.0, - "step": 11222 - }, - { - "epoch": 0.8580767245828316, - "grad_norm": 0.009329929947853088, - "learning_rate": 0.00019999963729953408, - "loss": 46.0, - "step": 11223 - }, - { - "epoch": 0.8581531815662213, - "grad_norm": 0.0006990203983150423, - "learning_rate": 0.0001999996372348384, - "loss": 46.0, - "step": 11224 - }, - { - "epoch": 0.858229638549611, - "grad_norm": 0.0011387696722522378, - "learning_rate": 0.00019999963717013693, - "loss": 46.0, - "step": 11225 - }, - { - "epoch": 0.8583060955330007, - "grad_norm": 0.0014611412771046162, - "learning_rate": 0.0001999996371054297, - "loss": 46.0, - "step": 11226 - }, - { - "epoch": 0.8583825525163905, - "grad_norm": 0.0030784413684159517, - "learning_rate": 0.0001999996370407167, - "loss": 46.0, - "step": 11227 - }, - { - "epoch": 0.8584590094997802, - "grad_norm": 0.0019194413907825947, - "learning_rate": 0.00019999963697599793, - "loss": 46.0, - "step": 11228 - }, - { - "epoch": 0.8585354664831699, - "grad_norm": 0.0009426483884453773, - "learning_rate": 0.00019999963691127336, - "loss": 46.0, - "step": 11229 - }, - { - "epoch": 0.8586119234665597, - "grad_norm": 0.0016573058674111962, - "learning_rate": 0.00019999963684654305, - "loss": 46.0, - "step": 11230 - }, - { - "epoch": 0.8586883804499493, - "grad_norm": 0.000970787659753114, - "learning_rate": 0.00019999963678180697, - "loss": 46.0, - "step": 11231 - }, - { - "epoch": 0.8587648374333391, - "grad_norm": 0.0022142468951642513, - "learning_rate": 0.0001999996367170651, - "loss": 46.0, - "step": 11232 - }, - { - "epoch": 0.8588412944167287, - "grad_norm": 0.0033295524772256613, - "learning_rate": 0.0001999996366523175, - "loss": 46.0, - "step": 11233 - }, - { - "epoch": 0.8589177514001185, - "grad_norm": 0.002869711024686694, - "learning_rate": 0.00019999963658756412, - "loss": 46.0, - "step": 11234 - }, - { - "epoch": 0.8589942083835083, - "grad_norm": 0.0017400338547304273, - "learning_rate": 0.00019999963652280494, - "loss": 46.0, - "step": 11235 - }, - { - "epoch": 0.8590706653668979, - "grad_norm": 0.004591533448547125, - "learning_rate": 0.00019999963645804002, - "loss": 46.0, - "step": 11236 - }, - { - "epoch": 0.8591471223502877, - "grad_norm": 0.0020419990178197622, - "learning_rate": 0.00019999963639326932, - "loss": 46.0, - "step": 11237 - }, - { - "epoch": 0.8592235793336774, - "grad_norm": 0.0014866740675643086, - "learning_rate": 0.00019999963632849288, - "loss": 46.0, - "step": 11238 - }, - { - "epoch": 0.8593000363170671, - "grad_norm": 0.0024214345030486584, - "learning_rate": 0.00019999963626371063, - "loss": 46.0, - "step": 11239 - }, - { - "epoch": 0.8593764933004568, - "grad_norm": 0.0003167380637023598, - "learning_rate": 0.0001999996361989226, - "loss": 46.0, - "step": 11240 - }, - { - "epoch": 0.8594529502838466, - "grad_norm": 0.0008764404919929802, - "learning_rate": 0.00019999963613412885, - "loss": 46.0, - "step": 11241 - }, - { - "epoch": 0.8595294072672363, - "grad_norm": 0.0015325612621381879, - "learning_rate": 0.0001999996360693293, - "loss": 46.0, - "step": 11242 - }, - { - "epoch": 0.859605864250626, - "grad_norm": 0.006553499028086662, - "learning_rate": 0.00019999963600452397, - "loss": 46.0, - "step": 11243 - }, - { - "epoch": 0.8596823212340157, - "grad_norm": 0.0015160355251282454, - "learning_rate": 0.00019999963593971288, - "loss": 46.0, - "step": 11244 - }, - { - "epoch": 0.8597587782174054, - "grad_norm": 0.0014304619980975986, - "learning_rate": 0.00019999963587489603, - "loss": 46.0, - "step": 11245 - }, - { - "epoch": 0.8598352352007952, - "grad_norm": 0.001085482188500464, - "learning_rate": 0.00019999963581007345, - "loss": 46.0, - "step": 11246 - }, - { - "epoch": 0.8599116921841848, - "grad_norm": 0.0038877027109265327, - "learning_rate": 0.00019999963574524502, - "loss": 46.0, - "step": 11247 - }, - { - "epoch": 0.8599881491675746, - "grad_norm": 0.001028078026138246, - "learning_rate": 0.00019999963568041086, - "loss": 46.0, - "step": 11248 - }, - { - "epoch": 0.8600646061509644, - "grad_norm": 0.0021918523125350475, - "learning_rate": 0.00019999963561557094, - "loss": 46.0, - "step": 11249 - }, - { - "epoch": 0.860141063134354, - "grad_norm": 0.0009138314635492861, - "learning_rate": 0.00019999963555072524, - "loss": 46.0, - "step": 11250 - }, - { - "epoch": 0.8602175201177438, - "grad_norm": 0.0034649111330509186, - "learning_rate": 0.00019999963548587374, - "loss": 46.0, - "step": 11251 - }, - { - "epoch": 0.8602939771011334, - "grad_norm": 0.007519397884607315, - "learning_rate": 0.00019999963542101652, - "loss": 46.0, - "step": 11252 - }, - { - "epoch": 0.8603704340845232, - "grad_norm": 0.0009737313375808299, - "learning_rate": 0.0001999996353561535, - "loss": 46.0, - "step": 11253 - }, - { - "epoch": 0.860446891067913, - "grad_norm": 0.006674016825854778, - "learning_rate": 0.00019999963529128474, - "loss": 46.0, - "step": 11254 - }, - { - "epoch": 0.8605233480513026, - "grad_norm": 0.0019541995134204626, - "learning_rate": 0.00019999963522641018, - "loss": 46.0, - "step": 11255 - }, - { - "epoch": 0.8605998050346924, - "grad_norm": 0.0005965818418189883, - "learning_rate": 0.00019999963516152987, - "loss": 46.0, - "step": 11256 - }, - { - "epoch": 0.8606762620180821, - "grad_norm": 0.0006793156499043107, - "learning_rate": 0.00019999963509664378, - "loss": 46.0, - "step": 11257 - }, - { - "epoch": 0.8607527190014718, - "grad_norm": 0.0008767470135353506, - "learning_rate": 0.00019999963503175192, - "loss": 46.0, - "step": 11258 - }, - { - "epoch": 0.8608291759848615, - "grad_norm": 0.0008187260827980936, - "learning_rate": 0.0001999996349668543, - "loss": 46.0, - "step": 11259 - }, - { - "epoch": 0.8609056329682513, - "grad_norm": 0.005235785618424416, - "learning_rate": 0.0001999996349019509, - "loss": 46.0, - "step": 11260 - }, - { - "epoch": 0.8609820899516409, - "grad_norm": 0.0013601203681901097, - "learning_rate": 0.00019999963483704173, - "loss": 46.0, - "step": 11261 - }, - { - "epoch": 0.8610585469350307, - "grad_norm": 0.0025896672159433365, - "learning_rate": 0.0001999996347721268, - "loss": 46.0, - "step": 11262 - }, - { - "epoch": 0.8611350039184203, - "grad_norm": 0.001386967720463872, - "learning_rate": 0.00019999963470720608, - "loss": 46.0, - "step": 11263 - }, - { - "epoch": 0.8612114609018101, - "grad_norm": 0.0002685583021957427, - "learning_rate": 0.0001999996346422796, - "loss": 46.0, - "step": 11264 - }, - { - "epoch": 0.8612879178851999, - "grad_norm": 0.010421804152429104, - "learning_rate": 0.0001999996345773474, - "loss": 46.0, - "step": 11265 - }, - { - "epoch": 0.8613643748685895, - "grad_norm": 0.005579240154474974, - "learning_rate": 0.00019999963451240938, - "loss": 46.0, - "step": 11266 - }, - { - "epoch": 0.8614408318519793, - "grad_norm": 0.0008963888976722956, - "learning_rate": 0.00019999963444746559, - "loss": 46.0, - "step": 11267 - }, - { - "epoch": 0.861517288835369, - "grad_norm": 0.000862924731336534, - "learning_rate": 0.00019999963438251602, - "loss": 46.0, - "step": 11268 - }, - { - "epoch": 0.8615937458187587, - "grad_norm": 0.0012422333238646388, - "learning_rate": 0.0001999996343175607, - "loss": 46.0, - "step": 11269 - }, - { - "epoch": 0.8616702028021485, - "grad_norm": 0.01048423070460558, - "learning_rate": 0.00019999963425259965, - "loss": 46.0, - "step": 11270 - }, - { - "epoch": 0.8617466597855382, - "grad_norm": 0.0011771812569350004, - "learning_rate": 0.00019999963418763277, - "loss": 46.0, - "step": 11271 - }, - { - "epoch": 0.8618231167689279, - "grad_norm": 0.0005978705012239516, - "learning_rate": 0.00019999963412266014, - "loss": 46.0, - "step": 11272 - }, - { - "epoch": 0.8618995737523176, - "grad_norm": 0.0022190730087459087, - "learning_rate": 0.00019999963405768173, - "loss": 46.0, - "step": 11273 - }, - { - "epoch": 0.8619760307357073, - "grad_norm": 0.0024961759336292744, - "learning_rate": 0.00019999963399269758, - "loss": 46.0, - "step": 11274 - }, - { - "epoch": 0.862052487719097, - "grad_norm": 0.0005841923411935568, - "learning_rate": 0.00019999963392770763, - "loss": 46.0, - "step": 11275 - }, - { - "epoch": 0.8621289447024868, - "grad_norm": 0.0007702383445575833, - "learning_rate": 0.00019999963386271193, - "loss": 46.0, - "step": 11276 - }, - { - "epoch": 0.8622054016858764, - "grad_norm": 0.0006330618634819984, - "learning_rate": 0.00019999963379771046, - "loss": 46.0, - "step": 11277 - }, - { - "epoch": 0.8622818586692662, - "grad_norm": 0.004083727952092886, - "learning_rate": 0.00019999963373270322, - "loss": 46.0, - "step": 11278 - }, - { - "epoch": 0.862358315652656, - "grad_norm": 0.0007187266601249576, - "learning_rate": 0.0001999996336676902, - "loss": 46.0, - "step": 11279 - }, - { - "epoch": 0.8624347726360456, - "grad_norm": 0.009648003615438938, - "learning_rate": 0.0001999996336026714, - "loss": 46.0, - "step": 11280 - }, - { - "epoch": 0.8625112296194354, - "grad_norm": 0.0017131298081949353, - "learning_rate": 0.00019999963353764687, - "loss": 46.0, - "step": 11281 - }, - { - "epoch": 0.862587686602825, - "grad_norm": 0.0006826784228906035, - "learning_rate": 0.00019999963347261653, - "loss": 46.0, - "step": 11282 - }, - { - "epoch": 0.8626641435862148, - "grad_norm": 0.002370436443015933, - "learning_rate": 0.00019999963340758042, - "loss": 46.0, - "step": 11283 - }, - { - "epoch": 0.8627406005696046, - "grad_norm": 0.0073270793072879314, - "learning_rate": 0.00019999963334253856, - "loss": 46.0, - "step": 11284 - }, - { - "epoch": 0.8628170575529942, - "grad_norm": 0.0020656436681747437, - "learning_rate": 0.00019999963327749093, - "loss": 46.0, - "step": 11285 - }, - { - "epoch": 0.862893514536384, - "grad_norm": 0.0022376365959644318, - "learning_rate": 0.00019999963321243756, - "loss": 46.0, - "step": 11286 - }, - { - "epoch": 0.8629699715197737, - "grad_norm": 0.0009171944693662226, - "learning_rate": 0.00019999963314737838, - "loss": 46.0, - "step": 11287 - }, - { - "epoch": 0.8630464285031634, - "grad_norm": 0.0007794692646712065, - "learning_rate": 0.00019999963308231343, - "loss": 46.0, - "step": 11288 - }, - { - "epoch": 0.8631228854865531, - "grad_norm": 0.003385237418115139, - "learning_rate": 0.0001999996330172427, - "loss": 46.0, - "step": 11289 - }, - { - "epoch": 0.8631993424699429, - "grad_norm": 0.0006665576947852969, - "learning_rate": 0.00019999963295216623, - "loss": 46.0, - "step": 11290 - }, - { - "epoch": 0.8632757994533325, - "grad_norm": 0.0030527880880981684, - "learning_rate": 0.000199999632887084, - "loss": 46.0, - "step": 11291 - }, - { - "epoch": 0.8633522564367223, - "grad_norm": 0.005277436226606369, - "learning_rate": 0.000199999632821996, - "loss": 46.0, - "step": 11292 - }, - { - "epoch": 0.863428713420112, - "grad_norm": 0.0018622911302372813, - "learning_rate": 0.0001999996327569022, - "loss": 46.0, - "step": 11293 - }, - { - "epoch": 0.8635051704035017, - "grad_norm": 0.0007191606564447284, - "learning_rate": 0.00019999963269180264, - "loss": 46.0, - "step": 11294 - }, - { - "epoch": 0.8635816273868915, - "grad_norm": 0.0006722731050103903, - "learning_rate": 0.0001999996326266973, - "loss": 46.0, - "step": 11295 - }, - { - "epoch": 0.8636580843702811, - "grad_norm": 0.0009683463722467422, - "learning_rate": 0.00019999963256158622, - "loss": 46.0, - "step": 11296 - }, - { - "epoch": 0.8637345413536709, - "grad_norm": 0.0009882425656542182, - "learning_rate": 0.00019999963249646936, - "loss": 46.0, - "step": 11297 - }, - { - "epoch": 0.8638109983370607, - "grad_norm": 0.0004792765248566866, - "learning_rate": 0.0001999996324313467, - "loss": 46.0, - "step": 11298 - }, - { - "epoch": 0.8638874553204503, - "grad_norm": 0.007468396332114935, - "learning_rate": 0.00019999963236621833, - "loss": 46.0, - "step": 11299 - }, - { - "epoch": 0.8639639123038401, - "grad_norm": 0.0005085182492621243, - "learning_rate": 0.00019999963230108415, - "loss": 46.0, - "step": 11300 - }, - { - "epoch": 0.8640403692872298, - "grad_norm": 0.0005188824143260717, - "learning_rate": 0.0001999996322359442, - "loss": 46.0, - "step": 11301 - }, - { - "epoch": 0.8641168262706195, - "grad_norm": 0.0004394619318190962, - "learning_rate": 0.00019999963217079848, - "loss": 46.0, - "step": 11302 - }, - { - "epoch": 0.8641932832540092, - "grad_norm": 0.004168111830949783, - "learning_rate": 0.000199999632105647, - "loss": 46.0, - "step": 11303 - }, - { - "epoch": 0.8642697402373989, - "grad_norm": 0.0009028415661305189, - "learning_rate": 0.00019999963204048976, - "loss": 46.0, - "step": 11304 - }, - { - "epoch": 0.8643461972207886, - "grad_norm": 0.0008435260970145464, - "learning_rate": 0.00019999963197532675, - "loss": 46.0, - "step": 11305 - }, - { - "epoch": 0.8644226542041784, - "grad_norm": 0.002249644137918949, - "learning_rate": 0.00019999963191015793, - "loss": 46.0, - "step": 11306 - }, - { - "epoch": 0.8644991111875681, - "grad_norm": 0.0014918898232281208, - "learning_rate": 0.0001999996318449834, - "loss": 46.0, - "step": 11307 - }, - { - "epoch": 0.8645755681709578, - "grad_norm": 0.0006185891688801348, - "learning_rate": 0.00019999963177980305, - "loss": 46.0, - "step": 11308 - }, - { - "epoch": 0.8646520251543476, - "grad_norm": 0.004601870197802782, - "learning_rate": 0.00019999963171461697, - "loss": 46.0, - "step": 11309 - }, - { - "epoch": 0.8647284821377372, - "grad_norm": 0.0011978121474385262, - "learning_rate": 0.0001999996316494251, - "loss": 46.0, - "step": 11310 - }, - { - "epoch": 0.864804939121127, - "grad_norm": 0.0011850838782265782, - "learning_rate": 0.00019999963158422746, - "loss": 46.0, - "step": 11311 - }, - { - "epoch": 0.8648813961045166, - "grad_norm": 0.000607462483458221, - "learning_rate": 0.00019999963151902405, - "loss": 46.0, - "step": 11312 - }, - { - "epoch": 0.8649578530879064, - "grad_norm": 0.001027140417136252, - "learning_rate": 0.00019999963145381488, - "loss": 46.0, - "step": 11313 - }, - { - "epoch": 0.8650343100712962, - "grad_norm": 0.002121127210557461, - "learning_rate": 0.00019999963138859995, - "loss": 46.0, - "step": 11314 - }, - { - "epoch": 0.8651107670546858, - "grad_norm": 0.003903008298948407, - "learning_rate": 0.00019999963132337923, - "loss": 46.0, - "step": 11315 - }, - { - "epoch": 0.8651872240380756, - "grad_norm": 0.0011306352680549026, - "learning_rate": 0.00019999963125815273, - "loss": 46.0, - "step": 11316 - }, - { - "epoch": 0.8652636810214653, - "grad_norm": 0.006063655484467745, - "learning_rate": 0.00019999963119292046, - "loss": 46.0, - "step": 11317 - }, - { - "epoch": 0.865340138004855, - "grad_norm": 0.0003820773563347757, - "learning_rate": 0.00019999963112768244, - "loss": 46.0, - "step": 11318 - }, - { - "epoch": 0.8654165949882447, - "grad_norm": 0.0009221856016665697, - "learning_rate": 0.00019999963106243865, - "loss": 46.0, - "step": 11319 - }, - { - "epoch": 0.8654930519716345, - "grad_norm": 0.0034840903244912624, - "learning_rate": 0.0001999996309971891, - "loss": 46.0, - "step": 11320 - }, - { - "epoch": 0.8655695089550242, - "grad_norm": 0.0037149747367948294, - "learning_rate": 0.00019999963093193378, - "loss": 46.0, - "step": 11321 - }, - { - "epoch": 0.8656459659384139, - "grad_norm": 0.0015489269280806184, - "learning_rate": 0.00019999963086667267, - "loss": 46.0, - "step": 11322 - }, - { - "epoch": 0.8657224229218036, - "grad_norm": 0.0005270421970635653, - "learning_rate": 0.00019999963080140581, - "loss": 46.0, - "step": 11323 - }, - { - "epoch": 0.8657988799051933, - "grad_norm": 0.002188637852668762, - "learning_rate": 0.00019999963073613316, - "loss": 46.0, - "step": 11324 - }, - { - "epoch": 0.8658753368885831, - "grad_norm": 0.0012622360372915864, - "learning_rate": 0.00019999963067085475, - "loss": 46.0, - "step": 11325 - }, - { - "epoch": 0.8659517938719727, - "grad_norm": 0.00045606374624185264, - "learning_rate": 0.00019999963060557058, - "loss": 46.0, - "step": 11326 - }, - { - "epoch": 0.8660282508553625, - "grad_norm": 0.001388874021358788, - "learning_rate": 0.00019999963054028063, - "loss": 46.0, - "step": 11327 - }, - { - "epoch": 0.8661047078387523, - "grad_norm": 0.0024835302028805017, - "learning_rate": 0.0001999996304749849, - "loss": 46.0, - "step": 11328 - }, - { - "epoch": 0.8661811648221419, - "grad_norm": 0.0007556821801699698, - "learning_rate": 0.00019999963040968343, - "loss": 46.0, - "step": 11329 - }, - { - "epoch": 0.8662576218055317, - "grad_norm": 0.001214748015627265, - "learning_rate": 0.00019999963034437616, - "loss": 46.0, - "step": 11330 - }, - { - "epoch": 0.8663340787889214, - "grad_norm": 0.0018981675384566188, - "learning_rate": 0.00019999963027906315, - "loss": 46.0, - "step": 11331 - }, - { - "epoch": 0.8664105357723111, - "grad_norm": 0.0024927256163209677, - "learning_rate": 0.00019999963021374436, - "loss": 46.0, - "step": 11332 - }, - { - "epoch": 0.8664869927557008, - "grad_norm": 0.0017383882077410817, - "learning_rate": 0.0001999996301484198, - "loss": 46.0, - "step": 11333 - }, - { - "epoch": 0.8665634497390905, - "grad_norm": 0.00047205790178850293, - "learning_rate": 0.00019999963008308943, - "loss": 46.0, - "step": 11334 - }, - { - "epoch": 0.8666399067224803, - "grad_norm": 0.0007868478423915803, - "learning_rate": 0.00019999963001775335, - "loss": 46.0, - "step": 11335 - }, - { - "epoch": 0.86671636370587, - "grad_norm": 0.0018248240230605006, - "learning_rate": 0.0001999996299524115, - "loss": 46.0, - "step": 11336 - }, - { - "epoch": 0.8667928206892597, - "grad_norm": 0.001080757356248796, - "learning_rate": 0.00019999962988706383, - "loss": 46.0, - "step": 11337 - }, - { - "epoch": 0.8668692776726494, - "grad_norm": 0.0012168008834123611, - "learning_rate": 0.00019999962982171043, - "loss": 46.0, - "step": 11338 - }, - { - "epoch": 0.8669457346560392, - "grad_norm": 0.0025758317206054926, - "learning_rate": 0.00019999962975635125, - "loss": 46.0, - "step": 11339 - }, - { - "epoch": 0.8670221916394288, - "grad_norm": 0.003014654852449894, - "learning_rate": 0.00019999962969098628, - "loss": 46.0, - "step": 11340 - }, - { - "epoch": 0.8670986486228186, - "grad_norm": 0.001204540953040123, - "learning_rate": 0.00019999962962561555, - "loss": 46.0, - "step": 11341 - }, - { - "epoch": 0.8671751056062083, - "grad_norm": 0.0004886524984613061, - "learning_rate": 0.00019999962956023909, - "loss": 46.0, - "step": 11342 - }, - { - "epoch": 0.867251562589598, - "grad_norm": 0.0012577750021591783, - "learning_rate": 0.00019999962949485682, - "loss": 46.0, - "step": 11343 - }, - { - "epoch": 0.8673280195729878, - "grad_norm": 0.001744294073432684, - "learning_rate": 0.0001999996294294688, - "loss": 46.0, - "step": 11344 - }, - { - "epoch": 0.8674044765563774, - "grad_norm": 0.0018252218142151833, - "learning_rate": 0.00019999962936407498, - "loss": 46.0, - "step": 11345 - }, - { - "epoch": 0.8674809335397672, - "grad_norm": 0.0011272367555648088, - "learning_rate": 0.00019999962929867545, - "loss": 46.0, - "step": 11346 - }, - { - "epoch": 0.867557390523157, - "grad_norm": 0.0003052992688026279, - "learning_rate": 0.00019999962923327008, - "loss": 46.0, - "step": 11347 - }, - { - "epoch": 0.8676338475065466, - "grad_norm": 0.0009380700648762286, - "learning_rate": 0.00019999962916785897, - "loss": 46.0, - "step": 11348 - }, - { - "epoch": 0.8677103044899364, - "grad_norm": 0.0056645493023097515, - "learning_rate": 0.0001999996291024421, - "loss": 46.0, - "step": 11349 - }, - { - "epoch": 0.8677867614733261, - "grad_norm": 0.003473804797977209, - "learning_rate": 0.00019999962903701946, - "loss": 46.0, - "step": 11350 - }, - { - "epoch": 0.8678632184567158, - "grad_norm": 0.004365224391222, - "learning_rate": 0.00019999962897159106, - "loss": 46.0, - "step": 11351 - }, - { - "epoch": 0.8679396754401055, - "grad_norm": 0.0015035831602290273, - "learning_rate": 0.00019999962890615686, - "loss": 46.0, - "step": 11352 - }, - { - "epoch": 0.8680161324234952, - "grad_norm": 0.0015193622093647718, - "learning_rate": 0.00019999962884071693, - "loss": 46.0, - "step": 11353 - }, - { - "epoch": 0.8680925894068849, - "grad_norm": 0.0034061369951814413, - "learning_rate": 0.00019999962877527118, - "loss": 46.0, - "step": 11354 - }, - { - "epoch": 0.8681690463902747, - "grad_norm": 0.0014110647607594728, - "learning_rate": 0.00019999962870981971, - "loss": 46.0, - "step": 11355 - }, - { - "epoch": 0.8682455033736644, - "grad_norm": 0.0010795751586556435, - "learning_rate": 0.00019999962864436244, - "loss": 46.0, - "step": 11356 - }, - { - "epoch": 0.8683219603570541, - "grad_norm": 0.00052928231889382, - "learning_rate": 0.00019999962857889943, - "loss": 46.0, - "step": 11357 - }, - { - "epoch": 0.8683984173404439, - "grad_norm": 0.00542057491838932, - "learning_rate": 0.0001999996285134306, - "loss": 46.0, - "step": 11358 - }, - { - "epoch": 0.8684748743238335, - "grad_norm": 0.0006601628265343606, - "learning_rate": 0.00019999962844795602, - "loss": 46.0, - "step": 11359 - }, - { - "epoch": 0.8685513313072233, - "grad_norm": 0.012000552378594875, - "learning_rate": 0.0001999996283824757, - "loss": 46.0, - "step": 11360 - }, - { - "epoch": 0.868627788290613, - "grad_norm": 0.0008332718862220645, - "learning_rate": 0.0001999996283169896, - "loss": 46.0, - "step": 11361 - }, - { - "epoch": 0.8687042452740027, - "grad_norm": 0.0010078441118821502, - "learning_rate": 0.00019999962825149772, - "loss": 46.0, - "step": 11362 - }, - { - "epoch": 0.8687807022573925, - "grad_norm": 0.0005545616731978953, - "learning_rate": 0.00019999962818600007, - "loss": 46.0, - "step": 11363 - }, - { - "epoch": 0.8688571592407821, - "grad_norm": 0.001575276255607605, - "learning_rate": 0.00019999962812049664, - "loss": 46.0, - "step": 11364 - }, - { - "epoch": 0.8689336162241719, - "grad_norm": 0.0008677774458192289, - "learning_rate": 0.00019999962805498746, - "loss": 46.0, - "step": 11365 - }, - { - "epoch": 0.8690100732075616, - "grad_norm": 0.0012390395859256387, - "learning_rate": 0.0001999996279894725, - "loss": 46.0, - "step": 11366 - }, - { - "epoch": 0.8690865301909513, - "grad_norm": 0.0005452482146210968, - "learning_rate": 0.00019999962792395176, - "loss": 46.0, - "step": 11367 - }, - { - "epoch": 0.869162987174341, - "grad_norm": 0.0007087248377501965, - "learning_rate": 0.0001999996278584253, - "loss": 46.0, - "step": 11368 - }, - { - "epoch": 0.8692394441577308, - "grad_norm": 0.0018589590908959508, - "learning_rate": 0.00019999962779289302, - "loss": 46.0, - "step": 11369 - }, - { - "epoch": 0.8693159011411205, - "grad_norm": 0.0007085982942953706, - "learning_rate": 0.00019999962772735498, - "loss": 46.0, - "step": 11370 - }, - { - "epoch": 0.8693923581245102, - "grad_norm": 0.008411173708736897, - "learning_rate": 0.0001999996276618112, - "loss": 46.0, - "step": 11371 - }, - { - "epoch": 0.8694688151079, - "grad_norm": 0.0010668684262782335, - "learning_rate": 0.00019999962759626163, - "loss": 46.0, - "step": 11372 - }, - { - "epoch": 0.8695452720912896, - "grad_norm": 0.0014721545157954097, - "learning_rate": 0.0001999996275307063, - "loss": 46.0, - "step": 11373 - }, - { - "epoch": 0.8696217290746794, - "grad_norm": 0.0007569894660264254, - "learning_rate": 0.00019999962746514518, - "loss": 46.0, - "step": 11374 - }, - { - "epoch": 0.869698186058069, - "grad_norm": 0.001771575422026217, - "learning_rate": 0.0001999996273995783, - "loss": 46.0, - "step": 11375 - }, - { - "epoch": 0.8697746430414588, - "grad_norm": 0.0004106743144802749, - "learning_rate": 0.00019999962733400565, - "loss": 46.0, - "step": 11376 - }, - { - "epoch": 0.8698511000248486, - "grad_norm": 0.0017075147479772568, - "learning_rate": 0.0001999996272684272, - "loss": 46.0, - "step": 11377 - }, - { - "epoch": 0.8699275570082382, - "grad_norm": 0.0007278526318259537, - "learning_rate": 0.00019999962720284304, - "loss": 46.0, - "step": 11378 - }, - { - "epoch": 0.870004013991628, - "grad_norm": 0.00116728525608778, - "learning_rate": 0.00019999962713725307, - "loss": 46.0, - "step": 11379 - }, - { - "epoch": 0.8700804709750177, - "grad_norm": 0.0018945814808830619, - "learning_rate": 0.00019999962707165737, - "loss": 46.0, - "step": 11380 - }, - { - "epoch": 0.8701569279584074, - "grad_norm": 0.0013922067591920495, - "learning_rate": 0.00019999962700605585, - "loss": 46.0, - "step": 11381 - }, - { - "epoch": 0.8702333849417971, - "grad_norm": 0.0015502232126891613, - "learning_rate": 0.00019999962694044858, - "loss": 46.0, - "step": 11382 - }, - { - "epoch": 0.8703098419251868, - "grad_norm": 0.0017657580319792032, - "learning_rate": 0.00019999962687483556, - "loss": 46.0, - "step": 11383 - }, - { - "epoch": 0.8703862989085766, - "grad_norm": 0.0009936497081071138, - "learning_rate": 0.00019999962680921672, - "loss": 46.0, - "step": 11384 - }, - { - "epoch": 0.8704627558919663, - "grad_norm": 0.0018627546960487962, - "learning_rate": 0.00019999962674359216, - "loss": 46.0, - "step": 11385 - }, - { - "epoch": 0.870539212875356, - "grad_norm": 0.0011218518484383821, - "learning_rate": 0.00019999962667796182, - "loss": 46.0, - "step": 11386 - }, - { - "epoch": 0.8706156698587457, - "grad_norm": 0.0012299276422709227, - "learning_rate": 0.00019999962661232571, - "loss": 46.0, - "step": 11387 - }, - { - "epoch": 0.8706921268421355, - "grad_norm": 0.0016529296990484, - "learning_rate": 0.00019999962654668383, - "loss": 46.0, - "step": 11388 - }, - { - "epoch": 0.8707685838255251, - "grad_norm": 0.000581545929890126, - "learning_rate": 0.00019999962648103615, - "loss": 46.0, - "step": 11389 - }, - { - "epoch": 0.8708450408089149, - "grad_norm": 0.000293553777737543, - "learning_rate": 0.00019999962641538275, - "loss": 46.0, - "step": 11390 - }, - { - "epoch": 0.8709214977923047, - "grad_norm": 0.002803260926157236, - "learning_rate": 0.00019999962634972355, - "loss": 46.0, - "step": 11391 - }, - { - "epoch": 0.8709979547756943, - "grad_norm": 0.0007345350459218025, - "learning_rate": 0.0001999996262840586, - "loss": 46.0, - "step": 11392 - }, - { - "epoch": 0.8710744117590841, - "grad_norm": 0.001073502586223185, - "learning_rate": 0.00019999962621838785, - "loss": 46.0, - "step": 11393 - }, - { - "epoch": 0.8711508687424737, - "grad_norm": 0.0007213017670437694, - "learning_rate": 0.00019999962615271138, - "loss": 46.0, - "step": 11394 - }, - { - "epoch": 0.8712273257258635, - "grad_norm": 0.0008859593071974814, - "learning_rate": 0.0001999996260870291, - "loss": 46.0, - "step": 11395 - }, - { - "epoch": 0.8713037827092532, - "grad_norm": 0.0007252567447721958, - "learning_rate": 0.00019999962602134107, - "loss": 46.0, - "step": 11396 - }, - { - "epoch": 0.8713802396926429, - "grad_norm": 0.0010628559393808246, - "learning_rate": 0.00019999962595564725, - "loss": 46.0, - "step": 11397 - }, - { - "epoch": 0.8714566966760326, - "grad_norm": 0.0016480223275721073, - "learning_rate": 0.00019999962588994767, - "loss": 46.0, - "step": 11398 - }, - { - "epoch": 0.8715331536594224, - "grad_norm": 0.0021526247728616, - "learning_rate": 0.00019999962582424233, - "loss": 46.0, - "step": 11399 - }, - { - "epoch": 0.8716096106428121, - "grad_norm": 0.0008695118012838066, - "learning_rate": 0.00019999962575853122, - "loss": 46.0, - "step": 11400 - }, - { - "epoch": 0.8716860676262018, - "grad_norm": 0.000764188589528203, - "learning_rate": 0.0001999996256928143, - "loss": 46.0, - "step": 11401 - }, - { - "epoch": 0.8717625246095916, - "grad_norm": 0.001357584842480719, - "learning_rate": 0.00019999962562709166, - "loss": 46.0, - "step": 11402 - }, - { - "epoch": 0.8718389815929812, - "grad_norm": 0.0009459906723350286, - "learning_rate": 0.00019999962556136326, - "loss": 46.0, - "step": 11403 - }, - { - "epoch": 0.871915438576371, - "grad_norm": 0.0006871233927085996, - "learning_rate": 0.00019999962549562905, - "loss": 46.0, - "step": 11404 - }, - { - "epoch": 0.8719918955597606, - "grad_norm": 0.000523898343089968, - "learning_rate": 0.00019999962542988908, - "loss": 46.0, - "step": 11405 - }, - { - "epoch": 0.8720683525431504, - "grad_norm": 0.0005584381869994104, - "learning_rate": 0.00019999962536414336, - "loss": 46.0, - "step": 11406 - }, - { - "epoch": 0.8721448095265402, - "grad_norm": 0.001396314473822713, - "learning_rate": 0.00019999962529839186, - "loss": 46.0, - "step": 11407 - }, - { - "epoch": 0.8722212665099298, - "grad_norm": 0.0014026289572939277, - "learning_rate": 0.00019999962523263457, - "loss": 46.0, - "step": 11408 - }, - { - "epoch": 0.8722977234933196, - "grad_norm": 0.0014972700737416744, - "learning_rate": 0.00019999962516687155, - "loss": 46.0, - "step": 11409 - }, - { - "epoch": 0.8723741804767093, - "grad_norm": 0.0042268093675374985, - "learning_rate": 0.00019999962510110274, - "loss": 46.0, - "step": 11410 - }, - { - "epoch": 0.872450637460099, - "grad_norm": 0.0016035304870456457, - "learning_rate": 0.00019999962503532818, - "loss": 46.0, - "step": 11411 - }, - { - "epoch": 0.8725270944434887, - "grad_norm": 0.000624780310317874, - "learning_rate": 0.00019999962496954781, - "loss": 46.0, - "step": 11412 - }, - { - "epoch": 0.8726035514268784, - "grad_norm": 0.000314099044771865, - "learning_rate": 0.00019999962490376168, - "loss": 46.0, - "step": 11413 - }, - { - "epoch": 0.8726800084102682, - "grad_norm": 0.0017387644620612264, - "learning_rate": 0.0001999996248379698, - "loss": 46.0, - "step": 11414 - }, - { - "epoch": 0.8727564653936579, - "grad_norm": 0.0011003370163962245, - "learning_rate": 0.00019999962477217214, - "loss": 46.0, - "step": 11415 - }, - { - "epoch": 0.8728329223770476, - "grad_norm": 0.002337732817977667, - "learning_rate": 0.00019999962470636871, - "loss": 46.0, - "step": 11416 - }, - { - "epoch": 0.8729093793604373, - "grad_norm": 0.0011581971775740385, - "learning_rate": 0.0001999996246405595, - "loss": 46.0, - "step": 11417 - }, - { - "epoch": 0.8729858363438271, - "grad_norm": 0.0002553130907472223, - "learning_rate": 0.00019999962457474454, - "loss": 46.0, - "step": 11418 - }, - { - "epoch": 0.8730622933272167, - "grad_norm": 0.0037137335166335106, - "learning_rate": 0.00019999962450892382, - "loss": 46.0, - "step": 11419 - }, - { - "epoch": 0.8731387503106065, - "grad_norm": 0.0021945994812995195, - "learning_rate": 0.00019999962444309732, - "loss": 46.0, - "step": 11420 - }, - { - "epoch": 0.8732152072939963, - "grad_norm": 0.000980915268883109, - "learning_rate": 0.00019999962437726503, - "loss": 46.0, - "step": 11421 - }, - { - "epoch": 0.8732916642773859, - "grad_norm": 0.011972527951002121, - "learning_rate": 0.000199999624311427, - "loss": 46.0, - "step": 11422 - }, - { - "epoch": 0.8733681212607757, - "grad_norm": 0.0011863175313919783, - "learning_rate": 0.00019999962424558317, - "loss": 46.0, - "step": 11423 - }, - { - "epoch": 0.8734445782441653, - "grad_norm": 0.001201195758767426, - "learning_rate": 0.00019999962417973358, - "loss": 46.0, - "step": 11424 - }, - { - "epoch": 0.8735210352275551, - "grad_norm": 0.001934562111273408, - "learning_rate": 0.00019999962411387822, - "loss": 46.0, - "step": 11425 - }, - { - "epoch": 0.8735974922109448, - "grad_norm": 0.0010991540038958192, - "learning_rate": 0.00019999962404801714, - "loss": 46.0, - "step": 11426 - }, - { - "epoch": 0.8736739491943345, - "grad_norm": 0.0019634398631751537, - "learning_rate": 0.00019999962398215023, - "loss": 46.0, - "step": 11427 - }, - { - "epoch": 0.8737504061777243, - "grad_norm": 0.00113029929343611, - "learning_rate": 0.00019999962391627758, - "loss": 46.0, - "step": 11428 - }, - { - "epoch": 0.873826863161114, - "grad_norm": 0.0027148055378347635, - "learning_rate": 0.00019999962385039915, - "loss": 46.0, - "step": 11429 - }, - { - "epoch": 0.8739033201445037, - "grad_norm": 0.0009400181588716805, - "learning_rate": 0.00019999962378451492, - "loss": 46.0, - "step": 11430 - }, - { - "epoch": 0.8739797771278934, - "grad_norm": 0.000677421863656491, - "learning_rate": 0.00019999962371862497, - "loss": 46.0, - "step": 11431 - }, - { - "epoch": 0.8740562341112832, - "grad_norm": 0.0012904831673949957, - "learning_rate": 0.00019999962365272925, - "loss": 46.0, - "step": 11432 - }, - { - "epoch": 0.8741326910946728, - "grad_norm": 0.000991379958577454, - "learning_rate": 0.00019999962358682773, - "loss": 46.0, - "step": 11433 - }, - { - "epoch": 0.8742091480780626, - "grad_norm": 0.00108067668043077, - "learning_rate": 0.00019999962352092046, - "loss": 46.0, - "step": 11434 - }, - { - "epoch": 0.8742856050614523, - "grad_norm": 0.00024224857043009251, - "learning_rate": 0.0001999996234550074, - "loss": 46.0, - "step": 11435 - }, - { - "epoch": 0.874362062044842, - "grad_norm": 0.0020552342757582664, - "learning_rate": 0.00019999962338908858, - "loss": 46.0, - "step": 11436 - }, - { - "epoch": 0.8744385190282318, - "grad_norm": 0.002490504877641797, - "learning_rate": 0.000199999623323164, - "loss": 46.0, - "step": 11437 - }, - { - "epoch": 0.8745149760116214, - "grad_norm": 0.003215127857401967, - "learning_rate": 0.00019999962325723366, - "loss": 46.0, - "step": 11438 - }, - { - "epoch": 0.8745914329950112, - "grad_norm": 0.0010782964527606964, - "learning_rate": 0.00019999962319129752, - "loss": 46.0, - "step": 11439 - }, - { - "epoch": 0.874667889978401, - "grad_norm": 0.002760393312200904, - "learning_rate": 0.00019999962312535562, - "loss": 46.0, - "step": 11440 - }, - { - "epoch": 0.8747443469617906, - "grad_norm": 0.008062370121479034, - "learning_rate": 0.00019999962305940796, - "loss": 46.0, - "step": 11441 - }, - { - "epoch": 0.8748208039451804, - "grad_norm": 0.0013652686029672623, - "learning_rate": 0.0001999996229934545, - "loss": 46.0, - "step": 11442 - }, - { - "epoch": 0.87489726092857, - "grad_norm": 0.0006283313850872219, - "learning_rate": 0.00019999962292749533, - "loss": 46.0, - "step": 11443 - }, - { - "epoch": 0.8749737179119598, - "grad_norm": 0.0018913072999566793, - "learning_rate": 0.00019999962286153036, - "loss": 46.0, - "step": 11444 - }, - { - "epoch": 0.8750501748953495, - "grad_norm": 0.0015715928748250008, - "learning_rate": 0.0001999996227955596, - "loss": 46.0, - "step": 11445 - }, - { - "epoch": 0.8751266318787392, - "grad_norm": 0.0009337567607872188, - "learning_rate": 0.0001999996227295831, - "loss": 46.0, - "step": 11446 - }, - { - "epoch": 0.8752030888621289, - "grad_norm": 0.0008720124023966491, - "learning_rate": 0.00019999962266360082, - "loss": 46.0, - "step": 11447 - }, - { - "epoch": 0.8752795458455187, - "grad_norm": 0.0028761515859514475, - "learning_rate": 0.00019999962259761278, - "loss": 46.0, - "step": 11448 - }, - { - "epoch": 0.8753560028289084, - "grad_norm": 0.0021358646918088198, - "learning_rate": 0.00019999962253161897, - "loss": 46.0, - "step": 11449 - }, - { - "epoch": 0.8754324598122981, - "grad_norm": 0.0011539262486621737, - "learning_rate": 0.00019999962246561935, - "loss": 46.0, - "step": 11450 - }, - { - "epoch": 0.8755089167956879, - "grad_norm": 0.0015491455560550094, - "learning_rate": 0.00019999962239961402, - "loss": 46.0, - "step": 11451 - }, - { - "epoch": 0.8755853737790775, - "grad_norm": 0.0028538417536765337, - "learning_rate": 0.00019999962233360288, - "loss": 46.0, - "step": 11452 - }, - { - "epoch": 0.8756618307624673, - "grad_norm": 0.0008607959607616067, - "learning_rate": 0.000199999622267586, - "loss": 46.0, - "step": 11453 - }, - { - "epoch": 0.8757382877458569, - "grad_norm": 0.0011676391586661339, - "learning_rate": 0.00019999962220156335, - "loss": 46.0, - "step": 11454 - }, - { - "epoch": 0.8758147447292467, - "grad_norm": 0.0004308215866331011, - "learning_rate": 0.0001999996221355349, - "loss": 46.0, - "step": 11455 - }, - { - "epoch": 0.8758912017126365, - "grad_norm": 0.0004989307490177453, - "learning_rate": 0.0001999996220695007, - "loss": 46.0, - "step": 11456 - }, - { - "epoch": 0.8759676586960261, - "grad_norm": 0.0005106266471557319, - "learning_rate": 0.00019999962200346072, - "loss": 46.0, - "step": 11457 - }, - { - "epoch": 0.8760441156794159, - "grad_norm": 0.0008801653166301548, - "learning_rate": 0.00019999962193741497, - "loss": 46.0, - "step": 11458 - }, - { - "epoch": 0.8761205726628056, - "grad_norm": 0.0011956244707107544, - "learning_rate": 0.00019999962187136348, - "loss": 46.0, - "step": 11459 - }, - { - "epoch": 0.8761970296461953, - "grad_norm": 0.0030010908376425505, - "learning_rate": 0.00019999962180530619, - "loss": 46.0, - "step": 11460 - }, - { - "epoch": 0.876273486629585, - "grad_norm": 0.0036698300391435623, - "learning_rate": 0.00019999962173924312, - "loss": 46.0, - "step": 11461 - }, - { - "epoch": 0.8763499436129748, - "grad_norm": 0.0029740636236965656, - "learning_rate": 0.00019999962167317433, - "loss": 46.0, - "step": 11462 - }, - { - "epoch": 0.8764264005963645, - "grad_norm": 0.0015087099745869637, - "learning_rate": 0.00019999962160709972, - "loss": 46.0, - "step": 11463 - }, - { - "epoch": 0.8765028575797542, - "grad_norm": 0.0025338975246995687, - "learning_rate": 0.00019999962154101936, - "loss": 46.0, - "step": 11464 - }, - { - "epoch": 0.8765793145631439, - "grad_norm": 0.00045561863225884736, - "learning_rate": 0.0001999996214749332, - "loss": 46.0, - "step": 11465 - }, - { - "epoch": 0.8766557715465336, - "grad_norm": 0.0019635034259408712, - "learning_rate": 0.00019999962140884132, - "loss": 46.0, - "step": 11466 - }, - { - "epoch": 0.8767322285299234, - "grad_norm": 0.000976012903265655, - "learning_rate": 0.00019999962134274366, - "loss": 46.0, - "step": 11467 - }, - { - "epoch": 0.876808685513313, - "grad_norm": 0.0024508091155439615, - "learning_rate": 0.00019999962127664023, - "loss": 46.0, - "step": 11468 - }, - { - "epoch": 0.8768851424967028, - "grad_norm": 0.0013388919178396463, - "learning_rate": 0.000199999621210531, - "loss": 46.0, - "step": 11469 - }, - { - "epoch": 0.8769615994800926, - "grad_norm": 0.0010337402345612645, - "learning_rate": 0.00019999962114441603, - "loss": 46.0, - "step": 11470 - }, - { - "epoch": 0.8770380564634822, - "grad_norm": 0.0010264250449836254, - "learning_rate": 0.0001999996210782953, - "loss": 46.0, - "step": 11471 - }, - { - "epoch": 0.877114513446872, - "grad_norm": 0.0011336618335917592, - "learning_rate": 0.00019999962101216877, - "loss": 46.0, - "step": 11472 - }, - { - "epoch": 0.8771909704302617, - "grad_norm": 0.0020531604532152414, - "learning_rate": 0.0001999996209460365, - "loss": 46.0, - "step": 11473 - }, - { - "epoch": 0.8772674274136514, - "grad_norm": 0.002687373897060752, - "learning_rate": 0.00019999962087989843, - "loss": 46.0, - "step": 11474 - }, - { - "epoch": 0.8773438843970411, - "grad_norm": 0.0014271598774939775, - "learning_rate": 0.00019999962081375462, - "loss": 46.0, - "step": 11475 - }, - { - "epoch": 0.8774203413804308, - "grad_norm": 0.0007046546088531613, - "learning_rate": 0.000199999620747605, - "loss": 46.0, - "step": 11476 - }, - { - "epoch": 0.8774967983638206, - "grad_norm": 0.0013938278425484896, - "learning_rate": 0.00019999962068144967, - "loss": 46.0, - "step": 11477 - }, - { - "epoch": 0.8775732553472103, - "grad_norm": 0.0018538323929533362, - "learning_rate": 0.0001999996206152885, - "loss": 46.0, - "step": 11478 - }, - { - "epoch": 0.8776497123306, - "grad_norm": 0.0005432175239548087, - "learning_rate": 0.00019999962054912163, - "loss": 46.0, - "step": 11479 - }, - { - "epoch": 0.8777261693139897, - "grad_norm": 0.0021084975451231003, - "learning_rate": 0.00019999962048294895, - "loss": 46.0, - "step": 11480 - }, - { - "epoch": 0.8778026262973795, - "grad_norm": 0.004460969474166632, - "learning_rate": 0.00019999962041677052, - "loss": 46.0, - "step": 11481 - }, - { - "epoch": 0.8778790832807691, - "grad_norm": 0.006277133245021105, - "learning_rate": 0.0001999996203505863, - "loss": 46.0, - "step": 11482 - }, - { - "epoch": 0.8779555402641589, - "grad_norm": 0.0010204113787040114, - "learning_rate": 0.00019999962028439635, - "loss": 46.0, - "step": 11483 - }, - { - "epoch": 0.8780319972475485, - "grad_norm": 0.0017665511695668101, - "learning_rate": 0.00019999962021820058, - "loss": 46.0, - "step": 11484 - }, - { - "epoch": 0.8781084542309383, - "grad_norm": 0.0009677269263193011, - "learning_rate": 0.00019999962015199906, - "loss": 46.0, - "step": 11485 - }, - { - "epoch": 0.8781849112143281, - "grad_norm": 0.0011772130383178592, - "learning_rate": 0.0001999996200857918, - "loss": 46.0, - "step": 11486 - }, - { - "epoch": 0.8782613681977177, - "grad_norm": 0.0035982343833893538, - "learning_rate": 0.0001999996200195787, - "loss": 46.0, - "step": 11487 - }, - { - "epoch": 0.8783378251811075, - "grad_norm": 0.0003681460511870682, - "learning_rate": 0.00019999961995335988, - "loss": 46.0, - "step": 11488 - }, - { - "epoch": 0.8784142821644972, - "grad_norm": 0.010014706291258335, - "learning_rate": 0.0001999996198871353, - "loss": 46.0, - "step": 11489 - }, - { - "epoch": 0.8784907391478869, - "grad_norm": 0.0012948894873261452, - "learning_rate": 0.0001999996198209049, - "loss": 46.0, - "step": 11490 - }, - { - "epoch": 0.8785671961312767, - "grad_norm": 0.0012301455717533827, - "learning_rate": 0.00019999961975466878, - "loss": 46.0, - "step": 11491 - }, - { - "epoch": 0.8786436531146664, - "grad_norm": 0.0006541036418639123, - "learning_rate": 0.0001999996196884269, - "loss": 46.0, - "step": 11492 - }, - { - "epoch": 0.8787201100980561, - "grad_norm": 0.0022805549670010805, - "learning_rate": 0.00019999961962217922, - "loss": 46.0, - "step": 11493 - }, - { - "epoch": 0.8787965670814458, - "grad_norm": 0.0005298719624988735, - "learning_rate": 0.00019999961955592576, - "loss": 46.0, - "step": 11494 - }, - { - "epoch": 0.8788730240648355, - "grad_norm": 0.0003304577840026468, - "learning_rate": 0.00019999961948966654, - "loss": 46.0, - "step": 11495 - }, - { - "epoch": 0.8789494810482252, - "grad_norm": 0.0008061114349402487, - "learning_rate": 0.00019999961942340156, - "loss": 46.0, - "step": 11496 - }, - { - "epoch": 0.879025938031615, - "grad_norm": 0.003267186926677823, - "learning_rate": 0.00019999961935713084, - "loss": 46.0, - "step": 11497 - }, - { - "epoch": 0.8791023950150046, - "grad_norm": 0.0009649894782342017, - "learning_rate": 0.0001999996192908543, - "loss": 46.0, - "step": 11498 - }, - { - "epoch": 0.8791788519983944, - "grad_norm": 0.0018604411743581295, - "learning_rate": 0.00019999961922457203, - "loss": 46.0, - "step": 11499 - }, - { - "epoch": 0.8792553089817842, - "grad_norm": 0.001565420301631093, - "learning_rate": 0.00019999961915828396, - "loss": 46.0, - "step": 11500 - }, - { - "epoch": 0.8793317659651738, - "grad_norm": 0.0007320715230889618, - "learning_rate": 0.00019999961909199012, - "loss": 46.0, - "step": 11501 - }, - { - "epoch": 0.8794082229485636, - "grad_norm": 0.001505018793977797, - "learning_rate": 0.00019999961902569054, - "loss": 46.0, - "step": 11502 - }, - { - "epoch": 0.8794846799319533, - "grad_norm": 0.0018255822360515594, - "learning_rate": 0.00019999961895938518, - "loss": 46.0, - "step": 11503 - }, - { - "epoch": 0.879561136915343, - "grad_norm": 0.0010151483584195375, - "learning_rate": 0.00019999961889307404, - "loss": 46.0, - "step": 11504 - }, - { - "epoch": 0.8796375938987328, - "grad_norm": 0.000422679033363238, - "learning_rate": 0.00019999961882675714, - "loss": 46.0, - "step": 11505 - }, - { - "epoch": 0.8797140508821224, - "grad_norm": 0.011802221648395061, - "learning_rate": 0.00019999961876043446, - "loss": 46.0, - "step": 11506 - }, - { - "epoch": 0.8797905078655122, - "grad_norm": 0.0008850020822137594, - "learning_rate": 0.000199999618694106, - "loss": 46.0, - "step": 11507 - }, - { - "epoch": 0.8798669648489019, - "grad_norm": 0.001157082268036902, - "learning_rate": 0.00019999961862777178, - "loss": 46.0, - "step": 11508 - }, - { - "epoch": 0.8799434218322916, - "grad_norm": 0.0022078221663832664, - "learning_rate": 0.00019999961856143178, - "loss": 46.0, - "step": 11509 - }, - { - "epoch": 0.8800198788156813, - "grad_norm": 0.0013158532092347741, - "learning_rate": 0.00019999961849508603, - "loss": 46.0, - "step": 11510 - }, - { - "epoch": 0.8800963357990711, - "grad_norm": 0.00198129890486598, - "learning_rate": 0.0001999996184287345, - "loss": 46.0, - "step": 11511 - }, - { - "epoch": 0.8801727927824607, - "grad_norm": 0.00289785023778677, - "learning_rate": 0.00019999961836237722, - "loss": 46.0, - "step": 11512 - }, - { - "epoch": 0.8802492497658505, - "grad_norm": 0.001195678603835404, - "learning_rate": 0.00019999961829601418, - "loss": 46.0, - "step": 11513 - }, - { - "epoch": 0.8803257067492402, - "grad_norm": 0.0008867033175192773, - "learning_rate": 0.00019999961822964534, - "loss": 46.0, - "step": 11514 - }, - { - "epoch": 0.8804021637326299, - "grad_norm": 0.0015654033049941063, - "learning_rate": 0.00019999961816327073, - "loss": 46.0, - "step": 11515 - }, - { - "epoch": 0.8804786207160197, - "grad_norm": 0.00294181308709085, - "learning_rate": 0.00019999961809689037, - "loss": 46.0, - "step": 11516 - }, - { - "epoch": 0.8805550776994093, - "grad_norm": 0.0011055272771045566, - "learning_rate": 0.0001999996180305042, - "loss": 46.0, - "step": 11517 - }, - { - "epoch": 0.8806315346827991, - "grad_norm": 0.0006200874340720475, - "learning_rate": 0.0001999996179641123, - "loss": 46.0, - "step": 11518 - }, - { - "epoch": 0.8807079916661888, - "grad_norm": 0.0015877856640145183, - "learning_rate": 0.00019999961789771465, - "loss": 46.0, - "step": 11519 - }, - { - "epoch": 0.8807844486495785, - "grad_norm": 0.00277850404381752, - "learning_rate": 0.00019999961783131117, - "loss": 46.0, - "step": 11520 - }, - { - "epoch": 0.8808609056329683, - "grad_norm": 0.0005010172608308494, - "learning_rate": 0.00019999961776490197, - "loss": 46.0, - "step": 11521 - }, - { - "epoch": 0.880937362616358, - "grad_norm": 0.00411998899653554, - "learning_rate": 0.00019999961769848697, - "loss": 46.0, - "step": 11522 - }, - { - "epoch": 0.8810138195997477, - "grad_norm": 0.0008906330913305283, - "learning_rate": 0.00019999961763206623, - "loss": 46.0, - "step": 11523 - }, - { - "epoch": 0.8810902765831374, - "grad_norm": 0.0031794554088264704, - "learning_rate": 0.0001999996175656397, - "loss": 46.0, - "step": 11524 - }, - { - "epoch": 0.8811667335665271, - "grad_norm": 0.000812284357380122, - "learning_rate": 0.00019999961749920741, - "loss": 46.0, - "step": 11525 - }, - { - "epoch": 0.8812431905499168, - "grad_norm": 0.0008765440434217453, - "learning_rate": 0.00019999961743276935, - "loss": 46.0, - "step": 11526 - }, - { - "epoch": 0.8813196475333066, - "grad_norm": 0.009660166688263416, - "learning_rate": 0.0001999996173663255, - "loss": 46.0, - "step": 11527 - }, - { - "epoch": 0.8813961045166963, - "grad_norm": 0.0019364120671525598, - "learning_rate": 0.0001999996172998759, - "loss": 46.0, - "step": 11528 - }, - { - "epoch": 0.881472561500086, - "grad_norm": 0.010289045050740242, - "learning_rate": 0.0001999996172334205, - "loss": 46.0, - "step": 11529 - }, - { - "epoch": 0.8815490184834758, - "grad_norm": 0.0009243633830919862, - "learning_rate": 0.00019999961716695938, - "loss": 46.0, - "step": 11530 - }, - { - "epoch": 0.8816254754668654, - "grad_norm": 0.0007554742624051869, - "learning_rate": 0.0001999996171004925, - "loss": 46.0, - "step": 11531 - }, - { - "epoch": 0.8817019324502552, - "grad_norm": 0.000755980028770864, - "learning_rate": 0.0001999996170340198, - "loss": 46.0, - "step": 11532 - }, - { - "epoch": 0.881778389433645, - "grad_norm": 0.0024595565628260374, - "learning_rate": 0.0001999996169675413, - "loss": 46.0, - "step": 11533 - }, - { - "epoch": 0.8818548464170346, - "grad_norm": 0.0010758370626717806, - "learning_rate": 0.00019999961690105714, - "loss": 46.0, - "step": 11534 - }, - { - "epoch": 0.8819313034004244, - "grad_norm": 0.0009160145418718457, - "learning_rate": 0.00019999961683456711, - "loss": 46.0, - "step": 11535 - }, - { - "epoch": 0.882007760383814, - "grad_norm": 0.0012632475700229406, - "learning_rate": 0.00019999961676807137, - "loss": 46.0, - "step": 11536 - }, - { - "epoch": 0.8820842173672038, - "grad_norm": 0.0011564275482669473, - "learning_rate": 0.00019999961670156985, - "loss": 46.0, - "step": 11537 - }, - { - "epoch": 0.8821606743505935, - "grad_norm": 0.0015774703351780772, - "learning_rate": 0.00019999961663506256, - "loss": 46.0, - "step": 11538 - }, - { - "epoch": 0.8822371313339832, - "grad_norm": 0.001014707493595779, - "learning_rate": 0.00019999961656854947, - "loss": 46.0, - "step": 11539 - }, - { - "epoch": 0.8823135883173729, - "grad_norm": 0.001826371531933546, - "learning_rate": 0.00019999961650203063, - "loss": 46.0, - "step": 11540 - }, - { - "epoch": 0.8823900453007627, - "grad_norm": 0.0008806149126030505, - "learning_rate": 0.00019999961643550601, - "loss": 46.0, - "step": 11541 - }, - { - "epoch": 0.8824665022841524, - "grad_norm": 0.0020287863444536924, - "learning_rate": 0.00019999961636897566, - "loss": 46.0, - "step": 11542 - }, - { - "epoch": 0.8825429592675421, - "grad_norm": 0.008840177208185196, - "learning_rate": 0.00019999961630243952, - "loss": 46.0, - "step": 11543 - }, - { - "epoch": 0.8826194162509318, - "grad_norm": 0.0022187193389981985, - "learning_rate": 0.0001999996162358976, - "loss": 46.0, - "step": 11544 - }, - { - "epoch": 0.8826958732343215, - "grad_norm": 0.000950249785091728, - "learning_rate": 0.0001999996161693499, - "loss": 46.0, - "step": 11545 - }, - { - "epoch": 0.8827723302177113, - "grad_norm": 0.0014825165271759033, - "learning_rate": 0.00019999961610279646, - "loss": 46.0, - "step": 11546 - }, - { - "epoch": 0.8828487872011009, - "grad_norm": 0.0013281184947118163, - "learning_rate": 0.00019999961603623723, - "loss": 46.0, - "step": 11547 - }, - { - "epoch": 0.8829252441844907, - "grad_norm": 0.0014837002381682396, - "learning_rate": 0.00019999961596967226, - "loss": 46.0, - "step": 11548 - }, - { - "epoch": 0.8830017011678805, - "grad_norm": 0.0020646746270358562, - "learning_rate": 0.0001999996159031015, - "loss": 46.0, - "step": 11549 - }, - { - "epoch": 0.8830781581512701, - "grad_norm": 0.004786214325577021, - "learning_rate": 0.00019999961583652494, - "loss": 46.0, - "step": 11550 - }, - { - "epoch": 0.8831546151346599, - "grad_norm": 0.0009910727385431528, - "learning_rate": 0.00019999961576994265, - "loss": 46.0, - "step": 11551 - }, - { - "epoch": 0.8832310721180496, - "grad_norm": 0.004271871875971556, - "learning_rate": 0.00019999961570335456, - "loss": 46.0, - "step": 11552 - }, - { - "epoch": 0.8833075291014393, - "grad_norm": 0.0004526250122580677, - "learning_rate": 0.00019999961563676075, - "loss": 46.0, - "step": 11553 - }, - { - "epoch": 0.883383986084829, - "grad_norm": 0.00037091010017320514, - "learning_rate": 0.00019999961557016114, - "loss": 46.0, - "step": 11554 - }, - { - "epoch": 0.8834604430682187, - "grad_norm": 0.0019659800454974174, - "learning_rate": 0.00019999961550355578, - "loss": 46.0, - "step": 11555 - }, - { - "epoch": 0.8835369000516085, - "grad_norm": 0.003968382719904184, - "learning_rate": 0.00019999961543694462, - "loss": 46.0, - "step": 11556 - }, - { - "epoch": 0.8836133570349982, - "grad_norm": 0.0007019545300863683, - "learning_rate": 0.00019999961537032769, - "loss": 46.0, - "step": 11557 - }, - { - "epoch": 0.8836898140183879, - "grad_norm": 0.00446880841627717, - "learning_rate": 0.000199999615303705, - "loss": 46.0, - "step": 11558 - }, - { - "epoch": 0.8837662710017776, - "grad_norm": 0.0005624861223623157, - "learning_rate": 0.00019999961523707656, - "loss": 46.0, - "step": 11559 - }, - { - "epoch": 0.8838427279851674, - "grad_norm": 0.001459189341403544, - "learning_rate": 0.00019999961517044233, - "loss": 46.0, - "step": 11560 - }, - { - "epoch": 0.883919184968557, - "grad_norm": 0.0010422280756756663, - "learning_rate": 0.00019999961510380233, - "loss": 46.0, - "step": 11561 - }, - { - "epoch": 0.8839956419519468, - "grad_norm": 0.0006864677416160703, - "learning_rate": 0.00019999961503715656, - "loss": 46.0, - "step": 11562 - }, - { - "epoch": 0.8840720989353366, - "grad_norm": 0.0005981221911497414, - "learning_rate": 0.00019999961497050504, - "loss": 46.0, - "step": 11563 - }, - { - "epoch": 0.8841485559187262, - "grad_norm": 0.0005547473556362092, - "learning_rate": 0.00019999961490384772, - "loss": 46.0, - "step": 11564 - }, - { - "epoch": 0.884225012902116, - "grad_norm": 0.001367216114886105, - "learning_rate": 0.00019999961483718466, - "loss": 46.0, - "step": 11565 - }, - { - "epoch": 0.8843014698855056, - "grad_norm": 0.0006109584355726838, - "learning_rate": 0.00019999961477051582, - "loss": 46.0, - "step": 11566 - }, - { - "epoch": 0.8843779268688954, - "grad_norm": 0.0009721220703795552, - "learning_rate": 0.0001999996147038412, - "loss": 46.0, - "step": 11567 - }, - { - "epoch": 0.8844543838522851, - "grad_norm": 0.0004529489378910512, - "learning_rate": 0.00019999961463716082, - "loss": 46.0, - "step": 11568 - }, - { - "epoch": 0.8845308408356748, - "grad_norm": 0.001516729942522943, - "learning_rate": 0.00019999961457047466, - "loss": 46.0, - "step": 11569 - }, - { - "epoch": 0.8846072978190646, - "grad_norm": 0.0006887958734296262, - "learning_rate": 0.00019999961450378276, - "loss": 46.0, - "step": 11570 - }, - { - "epoch": 0.8846837548024543, - "grad_norm": 0.0011303386418148875, - "learning_rate": 0.00019999961443708505, - "loss": 46.0, - "step": 11571 - }, - { - "epoch": 0.884760211785844, - "grad_norm": 0.0038444004021584988, - "learning_rate": 0.0001999996143703816, - "loss": 46.0, - "step": 11572 - }, - { - "epoch": 0.8848366687692337, - "grad_norm": 0.037785839289426804, - "learning_rate": 0.00019999961430367238, - "loss": 46.0, - "step": 11573 - }, - { - "epoch": 0.8849131257526234, - "grad_norm": 0.0011691901599988341, - "learning_rate": 0.00019999961423695738, - "loss": 46.0, - "step": 11574 - }, - { - "epoch": 0.8849895827360131, - "grad_norm": 0.0053506395779550076, - "learning_rate": 0.00019999961417023663, - "loss": 46.0, - "step": 11575 - }, - { - "epoch": 0.8850660397194029, - "grad_norm": 0.0003868041967507452, - "learning_rate": 0.0001999996141035101, - "loss": 46.0, - "step": 11576 - }, - { - "epoch": 0.8851424967027925, - "grad_norm": 0.0008278048480860889, - "learning_rate": 0.00019999961403677777, - "loss": 46.0, - "step": 11577 - }, - { - "epoch": 0.8852189536861823, - "grad_norm": 0.0013785305200144649, - "learning_rate": 0.0001999996139700397, - "loss": 46.0, - "step": 11578 - }, - { - "epoch": 0.8852954106695721, - "grad_norm": 0.0004402343765832484, - "learning_rate": 0.00019999961390329584, - "loss": 46.0, - "step": 11579 - }, - { - "epoch": 0.8853718676529617, - "grad_norm": 0.00043451273813843727, - "learning_rate": 0.00019999961383654623, - "loss": 46.0, - "step": 11580 - }, - { - "epoch": 0.8854483246363515, - "grad_norm": 0.0015124910278245807, - "learning_rate": 0.00019999961376979087, - "loss": 46.0, - "step": 11581 - }, - { - "epoch": 0.8855247816197412, - "grad_norm": 0.0009297903161495924, - "learning_rate": 0.0001999996137030297, - "loss": 46.0, - "step": 11582 - }, - { - "epoch": 0.8856012386031309, - "grad_norm": 0.00126979174092412, - "learning_rate": 0.00019999961363626278, - "loss": 46.0, - "step": 11583 - }, - { - "epoch": 0.8856776955865207, - "grad_norm": 0.0021595198195427656, - "learning_rate": 0.00019999961356949008, - "loss": 46.0, - "step": 11584 - }, - { - "epoch": 0.8857541525699103, - "grad_norm": 0.0016694475198164582, - "learning_rate": 0.00019999961350271163, - "loss": 46.0, - "step": 11585 - }, - { - "epoch": 0.8858306095533001, - "grad_norm": 0.003402827540412545, - "learning_rate": 0.0001999996134359274, - "loss": 46.0, - "step": 11586 - }, - { - "epoch": 0.8859070665366898, - "grad_norm": 0.002378500299528241, - "learning_rate": 0.0001999996133691374, - "loss": 46.0, - "step": 11587 - }, - { - "epoch": 0.8859835235200795, - "grad_norm": 0.0005293606664054096, - "learning_rate": 0.00019999961330234163, - "loss": 46.0, - "step": 11588 - }, - { - "epoch": 0.8860599805034692, - "grad_norm": 0.001085847383365035, - "learning_rate": 0.0001999996132355401, - "loss": 46.0, - "step": 11589 - }, - { - "epoch": 0.886136437486859, - "grad_norm": 0.0016055391170084476, - "learning_rate": 0.00019999961316873277, - "loss": 46.0, - "step": 11590 - }, - { - "epoch": 0.8862128944702486, - "grad_norm": 0.001958891050890088, - "learning_rate": 0.0001999996131019197, - "loss": 46.0, - "step": 11591 - }, - { - "epoch": 0.8862893514536384, - "grad_norm": 0.0007393836276605725, - "learning_rate": 0.00019999961303510087, - "loss": 46.0, - "step": 11592 - }, - { - "epoch": 0.8863658084370282, - "grad_norm": 0.0020159322302788496, - "learning_rate": 0.00019999961296827623, - "loss": 46.0, - "step": 11593 - }, - { - "epoch": 0.8864422654204178, - "grad_norm": 0.001277353148907423, - "learning_rate": 0.00019999961290144587, - "loss": 46.0, - "step": 11594 - }, - { - "epoch": 0.8865187224038076, - "grad_norm": 0.0013615416828542948, - "learning_rate": 0.00019999961283460969, - "loss": 46.0, - "step": 11595 - }, - { - "epoch": 0.8865951793871972, - "grad_norm": 0.0003089065430685878, - "learning_rate": 0.00019999961276776778, - "loss": 46.0, - "step": 11596 - }, - { - "epoch": 0.886671636370587, - "grad_norm": 0.0004677719553001225, - "learning_rate": 0.0001999996127009201, - "loss": 46.0, - "step": 11597 - }, - { - "epoch": 0.8867480933539768, - "grad_norm": 0.0008280706242658198, - "learning_rate": 0.00019999961263406663, - "loss": 46.0, - "step": 11598 - }, - { - "epoch": 0.8868245503373664, - "grad_norm": 0.00181598833296448, - "learning_rate": 0.00019999961256720737, - "loss": 46.0, - "step": 11599 - }, - { - "epoch": 0.8869010073207562, - "grad_norm": 0.002929729875177145, - "learning_rate": 0.00019999961250034238, - "loss": 46.0, - "step": 11600 - }, - { - "epoch": 0.8869774643041459, - "grad_norm": 0.00047074060421437025, - "learning_rate": 0.0001999996124334716, - "loss": 46.0, - "step": 11601 - }, - { - "epoch": 0.8870539212875356, - "grad_norm": 0.0009137978195212781, - "learning_rate": 0.00019999961236659506, - "loss": 46.0, - "step": 11602 - }, - { - "epoch": 0.8871303782709253, - "grad_norm": 0.0007314422400668263, - "learning_rate": 0.00019999961229971277, - "loss": 46.0, - "step": 11603 - }, - { - "epoch": 0.8872068352543151, - "grad_norm": 0.0011030100286006927, - "learning_rate": 0.00019999961223282465, - "loss": 46.0, - "step": 11604 - }, - { - "epoch": 0.8872832922377047, - "grad_norm": 0.00048081023851409554, - "learning_rate": 0.00019999961216593081, - "loss": 46.0, - "step": 11605 - }, - { - "epoch": 0.8873597492210945, - "grad_norm": 0.003363759955391288, - "learning_rate": 0.0001999996120990312, - "loss": 46.0, - "step": 11606 - }, - { - "epoch": 0.8874362062044842, - "grad_norm": 0.0019067656248807907, - "learning_rate": 0.00019999961203212582, - "loss": 46.0, - "step": 11607 - }, - { - "epoch": 0.8875126631878739, - "grad_norm": 0.004441200289875269, - "learning_rate": 0.00019999961196521464, - "loss": 46.0, - "step": 11608 - }, - { - "epoch": 0.8875891201712637, - "grad_norm": 0.00021467953047249466, - "learning_rate": 0.00019999961189829773, - "loss": 46.0, - "step": 11609 - }, - { - "epoch": 0.8876655771546533, - "grad_norm": 0.0012116761645302176, - "learning_rate": 0.00019999961183137503, - "loss": 46.0, - "step": 11610 - }, - { - "epoch": 0.8877420341380431, - "grad_norm": 0.002231928054243326, - "learning_rate": 0.00019999961176444658, - "loss": 46.0, - "step": 11611 - }, - { - "epoch": 0.8878184911214329, - "grad_norm": 0.00046282069524750113, - "learning_rate": 0.00019999961169751233, - "loss": 46.0, - "step": 11612 - }, - { - "epoch": 0.8878949481048225, - "grad_norm": 0.0018814190989360213, - "learning_rate": 0.00019999961163057233, - "loss": 46.0, - "step": 11613 - }, - { - "epoch": 0.8879714050882123, - "grad_norm": 0.008047036826610565, - "learning_rate": 0.00019999961156362653, - "loss": 46.0, - "step": 11614 - }, - { - "epoch": 0.8880478620716019, - "grad_norm": 0.001296870643272996, - "learning_rate": 0.00019999961149667499, - "loss": 46.0, - "step": 11615 - }, - { - "epoch": 0.8881243190549917, - "grad_norm": 0.0011320675257593393, - "learning_rate": 0.0001999996114297177, - "loss": 46.0, - "step": 11616 - }, - { - "epoch": 0.8882007760383814, - "grad_norm": 0.0011050697648897767, - "learning_rate": 0.0001999996113627546, - "loss": 46.0, - "step": 11617 - }, - { - "epoch": 0.8882772330217711, - "grad_norm": 0.001434743870049715, - "learning_rate": 0.00019999961129578577, - "loss": 46.0, - "step": 11618 - }, - { - "epoch": 0.8883536900051608, - "grad_norm": 0.0028370614163577557, - "learning_rate": 0.00019999961122881113, - "loss": 46.0, - "step": 11619 - }, - { - "epoch": 0.8884301469885506, - "grad_norm": 0.0010172122856602073, - "learning_rate": 0.00019999961116183075, - "loss": 46.0, - "step": 11620 - }, - { - "epoch": 0.8885066039719403, - "grad_norm": 0.002100787591189146, - "learning_rate": 0.0001999996110948446, - "loss": 46.0, - "step": 11621 - }, - { - "epoch": 0.88858306095533, - "grad_norm": 0.001675075851380825, - "learning_rate": 0.00019999961102785266, - "loss": 46.0, - "step": 11622 - }, - { - "epoch": 0.8886595179387198, - "grad_norm": 0.0018334781052544713, - "learning_rate": 0.00019999961096085495, - "loss": 46.0, - "step": 11623 - }, - { - "epoch": 0.8887359749221094, - "grad_norm": 0.000838214298710227, - "learning_rate": 0.00019999961089385148, - "loss": 46.0, - "step": 11624 - }, - { - "epoch": 0.8888124319054992, - "grad_norm": 0.0004943794338032603, - "learning_rate": 0.00019999961082684225, - "loss": 46.0, - "step": 11625 - }, - { - "epoch": 0.8888888888888888, - "grad_norm": 0.001850284170359373, - "learning_rate": 0.00019999961075982726, - "loss": 46.0, - "step": 11626 - }, - { - "epoch": 0.8889653458722786, - "grad_norm": 0.004019557498395443, - "learning_rate": 0.00019999961069280646, - "loss": 46.0, - "step": 11627 - }, - { - "epoch": 0.8890418028556684, - "grad_norm": 0.004177835304290056, - "learning_rate": 0.0001999996106257799, - "loss": 46.0, - "step": 11628 - }, - { - "epoch": 0.889118259839058, - "grad_norm": 0.002234953921288252, - "learning_rate": 0.0001999996105587476, - "loss": 46.0, - "step": 11629 - }, - { - "epoch": 0.8891947168224478, - "grad_norm": 0.004928039386868477, - "learning_rate": 0.0001999996104917095, - "loss": 46.0, - "step": 11630 - }, - { - "epoch": 0.8892711738058375, - "grad_norm": 0.002008287003263831, - "learning_rate": 0.00019999961042466567, - "loss": 46.0, - "step": 11631 - }, - { - "epoch": 0.8893476307892272, - "grad_norm": 0.0008996453252620995, - "learning_rate": 0.00019999961035761603, - "loss": 46.0, - "step": 11632 - }, - { - "epoch": 0.889424087772617, - "grad_norm": 0.002622775500640273, - "learning_rate": 0.00019999961029056062, - "loss": 46.0, - "step": 11633 - }, - { - "epoch": 0.8895005447560067, - "grad_norm": 0.0016102243680506945, - "learning_rate": 0.00019999961022349946, - "loss": 46.0, - "step": 11634 - }, - { - "epoch": 0.8895770017393964, - "grad_norm": 0.000692696834448725, - "learning_rate": 0.00019999961015643253, - "loss": 46.0, - "step": 11635 - }, - { - "epoch": 0.8896534587227861, - "grad_norm": 0.0012462381273508072, - "learning_rate": 0.00019999961008935983, - "loss": 46.0, - "step": 11636 - }, - { - "epoch": 0.8897299157061758, - "grad_norm": 0.006560949143022299, - "learning_rate": 0.00019999961002228135, - "loss": 46.0, - "step": 11637 - }, - { - "epoch": 0.8898063726895655, - "grad_norm": 0.0011459467932581902, - "learning_rate": 0.00019999960995519713, - "loss": 46.0, - "step": 11638 - }, - { - "epoch": 0.8898828296729553, - "grad_norm": 0.0008364997338503599, - "learning_rate": 0.00019999960988810713, - "loss": 46.0, - "step": 11639 - }, - { - "epoch": 0.8899592866563449, - "grad_norm": 0.0014103270368650556, - "learning_rate": 0.00019999960982101134, - "loss": 46.0, - "step": 11640 - }, - { - "epoch": 0.8900357436397347, - "grad_norm": 0.0010833328124135733, - "learning_rate": 0.0001999996097539098, - "loss": 46.0, - "step": 11641 - }, - { - "epoch": 0.8901122006231245, - "grad_norm": 0.0007472778088413179, - "learning_rate": 0.00019999960968680248, - "loss": 46.0, - "step": 11642 - }, - { - "epoch": 0.8901886576065141, - "grad_norm": 0.0022222402039915323, - "learning_rate": 0.00019999960961968939, - "loss": 46.0, - "step": 11643 - }, - { - "epoch": 0.8902651145899039, - "grad_norm": 0.0004993730690330267, - "learning_rate": 0.00019999960955257052, - "loss": 46.0, - "step": 11644 - }, - { - "epoch": 0.8903415715732935, - "grad_norm": 0.007205368485301733, - "learning_rate": 0.00019999960948544588, - "loss": 46.0, - "step": 11645 - }, - { - "epoch": 0.8904180285566833, - "grad_norm": 0.0006561383488588035, - "learning_rate": 0.0001999996094183155, - "loss": 46.0, - "step": 11646 - }, - { - "epoch": 0.890494485540073, - "grad_norm": 0.005198752973228693, - "learning_rate": 0.00019999960935117934, - "loss": 46.0, - "step": 11647 - }, - { - "epoch": 0.8905709425234627, - "grad_norm": 0.0007756437989883125, - "learning_rate": 0.00019999960928403741, - "loss": 46.0, - "step": 11648 - }, - { - "epoch": 0.8906473995068525, - "grad_norm": 0.004213810432702303, - "learning_rate": 0.00019999960921688968, - "loss": 46.0, - "step": 11649 - }, - { - "epoch": 0.8907238564902422, - "grad_norm": 0.0007201172411441803, - "learning_rate": 0.0001999996091497362, - "loss": 46.0, - "step": 11650 - }, - { - "epoch": 0.8908003134736319, - "grad_norm": 0.0017625095788389444, - "learning_rate": 0.00019999960908257698, - "loss": 46.0, - "step": 11651 - }, - { - "epoch": 0.8908767704570216, - "grad_norm": 0.001020937692373991, - "learning_rate": 0.00019999960901541196, - "loss": 46.0, - "step": 11652 - }, - { - "epoch": 0.8909532274404114, - "grad_norm": 0.0006207632832229137, - "learning_rate": 0.00019999960894824116, - "loss": 46.0, - "step": 11653 - }, - { - "epoch": 0.891029684423801, - "grad_norm": 0.011001263745129108, - "learning_rate": 0.00019999960888106465, - "loss": 46.0, - "step": 11654 - }, - { - "epoch": 0.8911061414071908, - "grad_norm": 0.0006805288139730692, - "learning_rate": 0.00019999960881388228, - "loss": 46.0, - "step": 11655 - }, - { - "epoch": 0.8911825983905804, - "grad_norm": 0.0016909033292904496, - "learning_rate": 0.00019999960874669421, - "loss": 46.0, - "step": 11656 - }, - { - "epoch": 0.8912590553739702, - "grad_norm": 0.0006358883692882955, - "learning_rate": 0.00019999960867950035, - "loss": 46.0, - "step": 11657 - }, - { - "epoch": 0.89133551235736, - "grad_norm": 0.0017439447110518813, - "learning_rate": 0.00019999960861230074, - "loss": 46.0, - "step": 11658 - }, - { - "epoch": 0.8914119693407496, - "grad_norm": 0.0032483956310898066, - "learning_rate": 0.00019999960854509533, - "loss": 46.0, - "step": 11659 - }, - { - "epoch": 0.8914884263241394, - "grad_norm": 0.0015791610348969698, - "learning_rate": 0.00019999960847788417, - "loss": 46.0, - "step": 11660 - }, - { - "epoch": 0.8915648833075291, - "grad_norm": 0.0009216535254381597, - "learning_rate": 0.00019999960841066722, - "loss": 46.0, - "step": 11661 - }, - { - "epoch": 0.8916413402909188, - "grad_norm": 0.0004906052490696311, - "learning_rate": 0.00019999960834344451, - "loss": 46.0, - "step": 11662 - }, - { - "epoch": 0.8917177972743086, - "grad_norm": 0.0006257439381442964, - "learning_rate": 0.000199999608276216, - "loss": 46.0, - "step": 11663 - }, - { - "epoch": 0.8917942542576983, - "grad_norm": 0.006156679708510637, - "learning_rate": 0.00019999960820898182, - "loss": 46.0, - "step": 11664 - }, - { - "epoch": 0.891870711241088, - "grad_norm": 0.001959191868081689, - "learning_rate": 0.00019999960814174177, - "loss": 46.0, - "step": 11665 - }, - { - "epoch": 0.8919471682244777, - "grad_norm": 0.0006653974414803088, - "learning_rate": 0.000199999608074496, - "loss": 46.0, - "step": 11666 - }, - { - "epoch": 0.8920236252078674, - "grad_norm": 0.0007571681635454297, - "learning_rate": 0.00019999960800724443, - "loss": 46.0, - "step": 11667 - }, - { - "epoch": 0.8921000821912571, - "grad_norm": 0.000815580424387008, - "learning_rate": 0.0001999996079399871, - "loss": 46.0, - "step": 11668 - }, - { - "epoch": 0.8921765391746469, - "grad_norm": 0.0005262845079414546, - "learning_rate": 0.00019999960787272402, - "loss": 46.0, - "step": 11669 - }, - { - "epoch": 0.8922529961580365, - "grad_norm": 0.0008712175185792148, - "learning_rate": 0.00019999960780545516, - "loss": 46.0, - "step": 11670 - }, - { - "epoch": 0.8923294531414263, - "grad_norm": 0.0016177580691874027, - "learning_rate": 0.00019999960773818052, - "loss": 46.0, - "step": 11671 - }, - { - "epoch": 0.8924059101248161, - "grad_norm": 0.005622413009405136, - "learning_rate": 0.0001999996076709001, - "loss": 46.0, - "step": 11672 - }, - { - "epoch": 0.8924823671082057, - "grad_norm": 0.0005647125071845949, - "learning_rate": 0.00019999960760361395, - "loss": 46.0, - "step": 11673 - }, - { - "epoch": 0.8925588240915955, - "grad_norm": 0.0008469205931760371, - "learning_rate": 0.000199999607536322, - "loss": 46.0, - "step": 11674 - }, - { - "epoch": 0.8926352810749851, - "grad_norm": 0.001232604612596333, - "learning_rate": 0.0001999996074690243, - "loss": 46.0, - "step": 11675 - }, - { - "epoch": 0.8927117380583749, - "grad_norm": 0.0005928436294198036, - "learning_rate": 0.00019999960740172082, - "loss": 46.0, - "step": 11676 - }, - { - "epoch": 0.8927881950417647, - "grad_norm": 0.0054781511425971985, - "learning_rate": 0.00019999960733441157, - "loss": 46.0, - "step": 11677 - }, - { - "epoch": 0.8928646520251543, - "grad_norm": 0.0027831760235130787, - "learning_rate": 0.00019999960726709655, - "loss": 46.0, - "step": 11678 - }, - { - "epoch": 0.8929411090085441, - "grad_norm": 0.0008409085567109287, - "learning_rate": 0.00019999960719977575, - "loss": 46.0, - "step": 11679 - }, - { - "epoch": 0.8930175659919338, - "grad_norm": 0.0005970362108200788, - "learning_rate": 0.0001999996071324492, - "loss": 46.0, - "step": 11680 - }, - { - "epoch": 0.8930940229753235, - "grad_norm": 0.0013522456865757704, - "learning_rate": 0.0001999996070651169, - "loss": 46.0, - "step": 11681 - }, - { - "epoch": 0.8931704799587132, - "grad_norm": 0.002104173880070448, - "learning_rate": 0.00019999960699777878, - "loss": 46.0, - "step": 11682 - }, - { - "epoch": 0.893246936942103, - "grad_norm": 0.0014125931775197387, - "learning_rate": 0.0001999996069304349, - "loss": 46.0, - "step": 11683 - }, - { - "epoch": 0.8933233939254926, - "grad_norm": 0.0009136369335465133, - "learning_rate": 0.00019999960686308528, - "loss": 46.0, - "step": 11684 - }, - { - "epoch": 0.8933998509088824, - "grad_norm": 0.0018417006358504295, - "learning_rate": 0.0001999996067957299, - "loss": 46.0, - "step": 11685 - }, - { - "epoch": 0.8934763078922721, - "grad_norm": 0.0033245659433305264, - "learning_rate": 0.0001999996067283687, - "loss": 46.0, - "step": 11686 - }, - { - "epoch": 0.8935527648756618, - "grad_norm": 0.0010629085591062903, - "learning_rate": 0.00019999960666100176, - "loss": 46.0, - "step": 11687 - }, - { - "epoch": 0.8936292218590516, - "grad_norm": 0.0011256822617724538, - "learning_rate": 0.00019999960659362905, - "loss": 46.0, - "step": 11688 - }, - { - "epoch": 0.8937056788424412, - "grad_norm": 0.0003571473353076726, - "learning_rate": 0.00019999960652625055, - "loss": 46.0, - "step": 11689 - }, - { - "epoch": 0.893782135825831, - "grad_norm": 0.002232632366940379, - "learning_rate": 0.00019999960645886633, - "loss": 46.0, - "step": 11690 - }, - { - "epoch": 0.8938585928092208, - "grad_norm": 0.003224589629098773, - "learning_rate": 0.00019999960639147628, - "loss": 46.0, - "step": 11691 - }, - { - "epoch": 0.8939350497926104, - "grad_norm": 0.0012783812126144767, - "learning_rate": 0.0001999996063240805, - "loss": 46.0, - "step": 11692 - }, - { - "epoch": 0.8940115067760002, - "grad_norm": 0.003401727182790637, - "learning_rate": 0.00019999960625667894, - "loss": 46.0, - "step": 11693 - }, - { - "epoch": 0.8940879637593899, - "grad_norm": 0.0010558475041761994, - "learning_rate": 0.00019999960618927162, - "loss": 46.0, - "step": 11694 - }, - { - "epoch": 0.8941644207427796, - "grad_norm": 0.0017676184652373195, - "learning_rate": 0.00019999960612185854, - "loss": 46.0, - "step": 11695 - }, - { - "epoch": 0.8942408777261693, - "grad_norm": 0.005570777226239443, - "learning_rate": 0.00019999960605443965, - "loss": 46.0, - "step": 11696 - }, - { - "epoch": 0.894317334709559, - "grad_norm": 0.005142435431480408, - "learning_rate": 0.000199999605987015, - "loss": 46.0, - "step": 11697 - }, - { - "epoch": 0.8943937916929487, - "grad_norm": 0.0008430549642071128, - "learning_rate": 0.0001999996059195846, - "loss": 46.0, - "step": 11698 - }, - { - "epoch": 0.8944702486763385, - "grad_norm": 0.0010345809860154986, - "learning_rate": 0.00019999960585214842, - "loss": 46.0, - "step": 11699 - }, - { - "epoch": 0.8945467056597282, - "grad_norm": 0.0023996320087462664, - "learning_rate": 0.0001999996057847065, - "loss": 46.0, - "step": 11700 - }, - { - "epoch": 0.8946231626431179, - "grad_norm": 0.008272090926766396, - "learning_rate": 0.00019999960571725876, - "loss": 46.0, - "step": 11701 - }, - { - "epoch": 0.8946996196265077, - "grad_norm": 0.0006001748261041939, - "learning_rate": 0.00019999960564980529, - "loss": 46.0, - "step": 11702 - }, - { - "epoch": 0.8947760766098973, - "grad_norm": 0.0004926119581796229, - "learning_rate": 0.00019999960558234604, - "loss": 46.0, - "step": 11703 - }, - { - "epoch": 0.8948525335932871, - "grad_norm": 0.00027394964126870036, - "learning_rate": 0.000199999605514881, - "loss": 46.0, - "step": 11704 - }, - { - "epoch": 0.8949289905766769, - "grad_norm": 0.001660117064602673, - "learning_rate": 0.00019999960544741022, - "loss": 46.0, - "step": 11705 - }, - { - "epoch": 0.8950054475600665, - "grad_norm": 0.009312672540545464, - "learning_rate": 0.00019999960537993365, - "loss": 46.0, - "step": 11706 - }, - { - "epoch": 0.8950819045434563, - "grad_norm": 0.001353538129478693, - "learning_rate": 0.00019999960531245133, - "loss": 46.0, - "step": 11707 - }, - { - "epoch": 0.8951583615268459, - "grad_norm": 0.00027032053912989795, - "learning_rate": 0.00019999960524496322, - "loss": 46.0, - "step": 11708 - }, - { - "epoch": 0.8952348185102357, - "grad_norm": 0.002126175444573164, - "learning_rate": 0.00019999960517746933, - "loss": 46.0, - "step": 11709 - }, - { - "epoch": 0.8953112754936254, - "grad_norm": 0.0014371026773005724, - "learning_rate": 0.0001999996051099697, - "loss": 46.0, - "step": 11710 - }, - { - "epoch": 0.8953877324770151, - "grad_norm": 0.002477489411830902, - "learning_rate": 0.00019999960504246428, - "loss": 46.0, - "step": 11711 - }, - { - "epoch": 0.8954641894604048, - "grad_norm": 0.0011744264047592878, - "learning_rate": 0.0001999996049749531, - "loss": 46.0, - "step": 11712 - }, - { - "epoch": 0.8955406464437946, - "grad_norm": 0.001536023337393999, - "learning_rate": 0.00019999960490743617, - "loss": 46.0, - "step": 11713 - }, - { - "epoch": 0.8956171034271843, - "grad_norm": 0.001650497317314148, - "learning_rate": 0.00019999960483991345, - "loss": 46.0, - "step": 11714 - }, - { - "epoch": 0.895693560410574, - "grad_norm": 0.001094701001420617, - "learning_rate": 0.00019999960477238497, - "loss": 46.0, - "step": 11715 - }, - { - "epoch": 0.8957700173939637, - "grad_norm": 0.0008322000503540039, - "learning_rate": 0.0001999996047048507, - "loss": 46.0, - "step": 11716 - }, - { - "epoch": 0.8958464743773534, - "grad_norm": 0.007792453747242689, - "learning_rate": 0.00019999960463731067, - "loss": 46.0, - "step": 11717 - }, - { - "epoch": 0.8959229313607432, - "grad_norm": 0.0010417783632874489, - "learning_rate": 0.00019999960456976488, - "loss": 46.0, - "step": 11718 - }, - { - "epoch": 0.8959993883441328, - "grad_norm": 0.008582200855016708, - "learning_rate": 0.0001999996045022133, - "loss": 46.0, - "step": 11719 - }, - { - "epoch": 0.8960758453275226, - "grad_norm": 0.0038891613949090242, - "learning_rate": 0.00019999960443465597, - "loss": 46.0, - "step": 11720 - }, - { - "epoch": 0.8961523023109124, - "grad_norm": 0.0007805288769304752, - "learning_rate": 0.00019999960436709285, - "loss": 46.0, - "step": 11721 - }, - { - "epoch": 0.896228759294302, - "grad_norm": 0.00013388553634285927, - "learning_rate": 0.000199999604299524, - "loss": 46.0, - "step": 11722 - }, - { - "epoch": 0.8963052162776918, - "grad_norm": 0.013632994145154953, - "learning_rate": 0.00019999960423194935, - "loss": 46.0, - "step": 11723 - }, - { - "epoch": 0.8963816732610815, - "grad_norm": 0.0011647128267213702, - "learning_rate": 0.00019999960416436895, - "loss": 46.0, - "step": 11724 - }, - { - "epoch": 0.8964581302444712, - "grad_norm": 0.006610801909118891, - "learning_rate": 0.0001999996040967828, - "loss": 46.0, - "step": 11725 - }, - { - "epoch": 0.896534587227861, - "grad_norm": 0.002584711881354451, - "learning_rate": 0.0001999996040291908, - "loss": 46.0, - "step": 11726 - }, - { - "epoch": 0.8966110442112506, - "grad_norm": 0.0007482058135792613, - "learning_rate": 0.0001999996039615931, - "loss": 46.0, - "step": 11727 - }, - { - "epoch": 0.8966875011946404, - "grad_norm": 0.00039417415973730385, - "learning_rate": 0.0001999996038939896, - "loss": 46.0, - "step": 11728 - }, - { - "epoch": 0.8967639581780301, - "grad_norm": 0.0006242976523935795, - "learning_rate": 0.00019999960382638036, - "loss": 46.0, - "step": 11729 - }, - { - "epoch": 0.8968404151614198, - "grad_norm": 0.0009961030445992947, - "learning_rate": 0.0001999996037587653, - "loss": 46.0, - "step": 11730 - }, - { - "epoch": 0.8969168721448095, - "grad_norm": 0.001067009405232966, - "learning_rate": 0.0001999996036911445, - "loss": 46.0, - "step": 11731 - }, - { - "epoch": 0.8969933291281993, - "grad_norm": 0.008063516579568386, - "learning_rate": 0.00019999960362351794, - "loss": 46.0, - "step": 11732 - }, - { - "epoch": 0.8970697861115889, - "grad_norm": 0.004624022636562586, - "learning_rate": 0.00019999960355588563, - "loss": 46.0, - "step": 11733 - }, - { - "epoch": 0.8971462430949787, - "grad_norm": 0.0015413794899359345, - "learning_rate": 0.00019999960348824751, - "loss": 46.0, - "step": 11734 - }, - { - "epoch": 0.8972227000783685, - "grad_norm": 0.0008905536960810423, - "learning_rate": 0.00019999960342060363, - "loss": 46.0, - "step": 11735 - }, - { - "epoch": 0.8972991570617581, - "grad_norm": 0.0008385450346395373, - "learning_rate": 0.000199999603352954, - "loss": 46.0, - "step": 11736 - }, - { - "epoch": 0.8973756140451479, - "grad_norm": 0.0007168976007960737, - "learning_rate": 0.00019999960328529856, - "loss": 46.0, - "step": 11737 - }, - { - "epoch": 0.8974520710285375, - "grad_norm": 0.002199511043727398, - "learning_rate": 0.0001999996032176374, - "loss": 46.0, - "step": 11738 - }, - { - "epoch": 0.8975285280119273, - "grad_norm": 0.0013263440923765302, - "learning_rate": 0.00019999960314997045, - "loss": 46.0, - "step": 11739 - }, - { - "epoch": 0.897604984995317, - "grad_norm": 0.0005957005196250975, - "learning_rate": 0.0001999996030822977, - "loss": 46.0, - "step": 11740 - }, - { - "epoch": 0.8976814419787067, - "grad_norm": 0.0005969987832941115, - "learning_rate": 0.00019999960301461922, - "loss": 46.0, - "step": 11741 - }, - { - "epoch": 0.8977578989620965, - "grad_norm": 0.0008814689936116338, - "learning_rate": 0.00019999960294693497, - "loss": 46.0, - "step": 11742 - }, - { - "epoch": 0.8978343559454862, - "grad_norm": 0.0024248005356639624, - "learning_rate": 0.00019999960287924492, - "loss": 46.0, - "step": 11743 - }, - { - "epoch": 0.8979108129288759, - "grad_norm": 0.020057395100593567, - "learning_rate": 0.00019999960281154913, - "loss": 46.0, - "step": 11744 - }, - { - "epoch": 0.8979872699122656, - "grad_norm": 0.00041233684169128537, - "learning_rate": 0.00019999960274384756, - "loss": 46.0, - "step": 11745 - }, - { - "epoch": 0.8980637268956553, - "grad_norm": 0.0044325231574475765, - "learning_rate": 0.00019999960267614022, - "loss": 46.0, - "step": 11746 - }, - { - "epoch": 0.898140183879045, - "grad_norm": 0.0007187029696069658, - "learning_rate": 0.00019999960260842713, - "loss": 46.0, - "step": 11747 - }, - { - "epoch": 0.8982166408624348, - "grad_norm": 0.004973837174475193, - "learning_rate": 0.0001999996025407082, - "loss": 46.0, - "step": 11748 - }, - { - "epoch": 0.8982930978458245, - "grad_norm": 0.0007522447849623859, - "learning_rate": 0.0001999996024729836, - "loss": 46.0, - "step": 11749 - }, - { - "epoch": 0.8983695548292142, - "grad_norm": 0.005311708431690931, - "learning_rate": 0.00019999960240525314, - "loss": 46.0, - "step": 11750 - }, - { - "epoch": 0.898446011812604, - "grad_norm": 0.0008693598210811615, - "learning_rate": 0.000199999602337517, - "loss": 46.0, - "step": 11751 - }, - { - "epoch": 0.8985224687959936, - "grad_norm": 0.0028668921440839767, - "learning_rate": 0.00019999960226977503, - "loss": 46.0, - "step": 11752 - }, - { - "epoch": 0.8985989257793834, - "grad_norm": 0.002102965721860528, - "learning_rate": 0.00019999960220202728, - "loss": 46.0, - "step": 11753 - }, - { - "epoch": 0.8986753827627731, - "grad_norm": 0.0024905819445848465, - "learning_rate": 0.0001999996021342738, - "loss": 46.0, - "step": 11754 - }, - { - "epoch": 0.8987518397461628, - "grad_norm": 0.0023743496276438236, - "learning_rate": 0.00019999960206651453, - "loss": 46.0, - "step": 11755 - }, - { - "epoch": 0.8988282967295526, - "grad_norm": 0.0034692531917244196, - "learning_rate": 0.0001999996019987495, - "loss": 46.0, - "step": 11756 - }, - { - "epoch": 0.8989047537129422, - "grad_norm": 0.0008326807292178273, - "learning_rate": 0.00019999960193097871, - "loss": 46.0, - "step": 11757 - }, - { - "epoch": 0.898981210696332, - "grad_norm": 0.0012438230914995074, - "learning_rate": 0.00019999960186320212, - "loss": 46.0, - "step": 11758 - }, - { - "epoch": 0.8990576676797217, - "grad_norm": 0.0006790123879909515, - "learning_rate": 0.00019999960179541978, - "loss": 46.0, - "step": 11759 - }, - { - "epoch": 0.8991341246631114, - "grad_norm": 0.0044629559852182865, - "learning_rate": 0.0001999996017276317, - "loss": 46.0, - "step": 11760 - }, - { - "epoch": 0.8992105816465011, - "grad_norm": 0.0008896850631572306, - "learning_rate": 0.00019999960165983778, - "loss": 46.0, - "step": 11761 - }, - { - "epoch": 0.8992870386298909, - "grad_norm": 0.0016496492316946387, - "learning_rate": 0.00019999960159203814, - "loss": 46.0, - "step": 11762 - }, - { - "epoch": 0.8993634956132806, - "grad_norm": 0.0018810631008818746, - "learning_rate": 0.0001999996015242327, - "loss": 46.0, - "step": 11763 - }, - { - "epoch": 0.8994399525966703, - "grad_norm": 0.002070453716441989, - "learning_rate": 0.00019999960145642153, - "loss": 46.0, - "step": 11764 - }, - { - "epoch": 0.8995164095800601, - "grad_norm": 0.0038174346555024385, - "learning_rate": 0.00019999960138860457, - "loss": 46.0, - "step": 11765 - }, - { - "epoch": 0.8995928665634497, - "grad_norm": 0.0037040775641798973, - "learning_rate": 0.00019999960132078184, - "loss": 46.0, - "step": 11766 - }, - { - "epoch": 0.8996693235468395, - "grad_norm": 0.0005760633503086865, - "learning_rate": 0.00019999960125295334, - "loss": 46.0, - "step": 11767 - }, - { - "epoch": 0.8997457805302291, - "grad_norm": 0.001522030564956367, - "learning_rate": 0.00019999960118511907, - "loss": 46.0, - "step": 11768 - }, - { - "epoch": 0.8998222375136189, - "grad_norm": 0.0015499969013035297, - "learning_rate": 0.00019999960111727905, - "loss": 46.0, - "step": 11769 - }, - { - "epoch": 0.8998986944970087, - "grad_norm": 0.0011950712651014328, - "learning_rate": 0.00019999960104943325, - "loss": 46.0, - "step": 11770 - }, - { - "epoch": 0.8999751514803983, - "grad_norm": 0.0011982048163190484, - "learning_rate": 0.00019999960098158166, - "loss": 46.0, - "step": 11771 - }, - { - "epoch": 0.9000516084637881, - "grad_norm": 0.0005346702528186142, - "learning_rate": 0.00019999960091372432, - "loss": 46.0, - "step": 11772 - }, - { - "epoch": 0.9001280654471778, - "grad_norm": 0.0011791560100391507, - "learning_rate": 0.0001999996008458612, - "loss": 46.0, - "step": 11773 - }, - { - "epoch": 0.9002045224305675, - "grad_norm": 0.002212457824498415, - "learning_rate": 0.00019999960077799232, - "loss": 46.0, - "step": 11774 - }, - { - "epoch": 0.9002809794139572, - "grad_norm": 0.0005708799581043422, - "learning_rate": 0.00019999960071011766, - "loss": 46.0, - "step": 11775 - }, - { - "epoch": 0.9003574363973469, - "grad_norm": 0.0013216623337939382, - "learning_rate": 0.00019999960064223725, - "loss": 46.0, - "step": 11776 - }, - { - "epoch": 0.9004338933807366, - "grad_norm": 0.0006090451497584581, - "learning_rate": 0.00019999960057435104, - "loss": 46.0, - "step": 11777 - }, - { - "epoch": 0.9005103503641264, - "grad_norm": 0.0013295685639604926, - "learning_rate": 0.0001999996005064591, - "loss": 46.0, - "step": 11778 - }, - { - "epoch": 0.9005868073475161, - "grad_norm": 0.0014774148585274816, - "learning_rate": 0.00019999960043856136, - "loss": 46.0, - "step": 11779 - }, - { - "epoch": 0.9006632643309058, - "grad_norm": 0.00426880456507206, - "learning_rate": 0.00019999960037065786, - "loss": 46.0, - "step": 11780 - }, - { - "epoch": 0.9007397213142956, - "grad_norm": 0.0018135968130081892, - "learning_rate": 0.00019999960030274858, - "loss": 46.0, - "step": 11781 - }, - { - "epoch": 0.9008161782976852, - "grad_norm": 0.0010952582815662026, - "learning_rate": 0.00019999960023483354, - "loss": 46.0, - "step": 11782 - }, - { - "epoch": 0.900892635281075, - "grad_norm": 0.0005496160010807216, - "learning_rate": 0.00019999960016691274, - "loss": 46.0, - "step": 11783 - }, - { - "epoch": 0.9009690922644648, - "grad_norm": 0.015186472795903683, - "learning_rate": 0.00019999960009898615, - "loss": 46.0, - "step": 11784 - }, - { - "epoch": 0.9010455492478544, - "grad_norm": 0.001273954170756042, - "learning_rate": 0.00019999960003105384, - "loss": 46.0, - "step": 11785 - }, - { - "epoch": 0.9011220062312442, - "grad_norm": 0.0008249178063124418, - "learning_rate": 0.00019999959996311567, - "loss": 46.0, - "step": 11786 - }, - { - "epoch": 0.9011984632146338, - "grad_norm": 0.00112240226007998, - "learning_rate": 0.0001999995998951718, - "loss": 46.0, - "step": 11787 - }, - { - "epoch": 0.9012749201980236, - "grad_norm": 0.0011776859173551202, - "learning_rate": 0.00019999959982722217, - "loss": 46.0, - "step": 11788 - }, - { - "epoch": 0.9013513771814133, - "grad_norm": 0.0015126215294003487, - "learning_rate": 0.00019999959975926674, - "loss": 46.0, - "step": 11789 - }, - { - "epoch": 0.901427834164803, - "grad_norm": 0.001288728672079742, - "learning_rate": 0.00019999959969130556, - "loss": 46.0, - "step": 11790 - }, - { - "epoch": 0.9015042911481927, - "grad_norm": 0.0008634102996438742, - "learning_rate": 0.00019999959962333858, - "loss": 46.0, - "step": 11791 - }, - { - "epoch": 0.9015807481315825, - "grad_norm": 0.004667697940021753, - "learning_rate": 0.00019999959955536585, - "loss": 46.0, - "step": 11792 - }, - { - "epoch": 0.9016572051149722, - "grad_norm": 0.003966478165239096, - "learning_rate": 0.00019999959948738732, - "loss": 46.0, - "step": 11793 - }, - { - "epoch": 0.9017336620983619, - "grad_norm": 0.003953876905143261, - "learning_rate": 0.00019999959941940305, - "loss": 46.0, - "step": 11794 - }, - { - "epoch": 0.9018101190817517, - "grad_norm": 0.0011055568465963006, - "learning_rate": 0.000199999599351413, - "loss": 46.0, - "step": 11795 - }, - { - "epoch": 0.9018865760651413, - "grad_norm": 0.0010162289254367352, - "learning_rate": 0.0001999995992834172, - "loss": 46.0, - "step": 11796 - }, - { - "epoch": 0.9019630330485311, - "grad_norm": 0.000930530484765768, - "learning_rate": 0.00019999959921541562, - "loss": 46.0, - "step": 11797 - }, - { - "epoch": 0.9020394900319207, - "grad_norm": 0.0034277592785656452, - "learning_rate": 0.00019999959914740828, - "loss": 46.0, - "step": 11798 - }, - { - "epoch": 0.9021159470153105, - "grad_norm": 0.0017827311530709267, - "learning_rate": 0.00019999959907939516, - "loss": 46.0, - "step": 11799 - }, - { - "epoch": 0.9021924039987003, - "grad_norm": 0.0007436890155076981, - "learning_rate": 0.00019999959901137628, - "loss": 46.0, - "step": 11800 - }, - { - "epoch": 0.9022688609820899, - "grad_norm": 0.0025699662510305643, - "learning_rate": 0.00019999959894335161, - "loss": 46.0, - "step": 11801 - }, - { - "epoch": 0.9023453179654797, - "grad_norm": 0.0004999426892027259, - "learning_rate": 0.00019999959887532118, - "loss": 46.0, - "step": 11802 - }, - { - "epoch": 0.9024217749488694, - "grad_norm": 0.001900402596220374, - "learning_rate": 0.00019999959880728497, - "loss": 46.0, - "step": 11803 - }, - { - "epoch": 0.9024982319322591, - "grad_norm": 0.0009961911709979177, - "learning_rate": 0.00019999959873924302, - "loss": 46.0, - "step": 11804 - }, - { - "epoch": 0.9025746889156488, - "grad_norm": 0.002137347124516964, - "learning_rate": 0.0001999995986711953, - "loss": 46.0, - "step": 11805 - }, - { - "epoch": 0.9026511458990385, - "grad_norm": 0.0026213969103991985, - "learning_rate": 0.00019999959860314177, - "loss": 46.0, - "step": 11806 - }, - { - "epoch": 0.9027276028824283, - "grad_norm": 0.001633918029256165, - "learning_rate": 0.0001999995985350825, - "loss": 46.0, - "step": 11807 - }, - { - "epoch": 0.902804059865818, - "grad_norm": 0.0010500723728910089, - "learning_rate": 0.00019999959846701745, - "loss": 46.0, - "step": 11808 - }, - { - "epoch": 0.9028805168492077, - "grad_norm": 0.003920499235391617, - "learning_rate": 0.00019999959839894665, - "loss": 46.0, - "step": 11809 - }, - { - "epoch": 0.9029569738325974, - "grad_norm": 0.002261185785755515, - "learning_rate": 0.00019999959833087006, - "loss": 46.0, - "step": 11810 - }, - { - "epoch": 0.9030334308159872, - "grad_norm": 0.0009962526382878423, - "learning_rate": 0.00019999959826278772, - "loss": 46.0, - "step": 11811 - }, - { - "epoch": 0.9031098877993768, - "grad_norm": 0.004018166568130255, - "learning_rate": 0.0001999995981946996, - "loss": 46.0, - "step": 11812 - }, - { - "epoch": 0.9031863447827666, - "grad_norm": 0.0011788710253313184, - "learning_rate": 0.0001999995981266057, - "loss": 46.0, - "step": 11813 - }, - { - "epoch": 0.9032628017661564, - "grad_norm": 0.0008982756990008056, - "learning_rate": 0.00019999959805850603, - "loss": 46.0, - "step": 11814 - }, - { - "epoch": 0.903339258749546, - "grad_norm": 0.0012094053672626615, - "learning_rate": 0.0001999995979904006, - "loss": 46.0, - "step": 11815 - }, - { - "epoch": 0.9034157157329358, - "grad_norm": 0.0008921209955587983, - "learning_rate": 0.00019999959792228942, - "loss": 46.0, - "step": 11816 - }, - { - "epoch": 0.9034921727163254, - "grad_norm": 0.0032483821269124746, - "learning_rate": 0.00019999959785417244, - "loss": 46.0, - "step": 11817 - }, - { - "epoch": 0.9035686296997152, - "grad_norm": 0.013966198079288006, - "learning_rate": 0.00019999959778604969, - "loss": 46.0, - "step": 11818 - }, - { - "epoch": 0.903645086683105, - "grad_norm": 0.0007771443924866617, - "learning_rate": 0.00019999959771792119, - "loss": 46.0, - "step": 11819 - }, - { - "epoch": 0.9037215436664946, - "grad_norm": 0.0014522760175168514, - "learning_rate": 0.0001999995976497869, - "loss": 46.0, - "step": 11820 - }, - { - "epoch": 0.9037980006498844, - "grad_norm": 0.0007052583387121558, - "learning_rate": 0.00019999959758164687, - "loss": 46.0, - "step": 11821 - }, - { - "epoch": 0.9038744576332741, - "grad_norm": 0.001760429935529828, - "learning_rate": 0.00019999959751350105, - "loss": 46.0, - "step": 11822 - }, - { - "epoch": 0.9039509146166638, - "grad_norm": 0.001570952939800918, - "learning_rate": 0.00019999959744534945, - "loss": 46.0, - "step": 11823 - }, - { - "epoch": 0.9040273716000535, - "grad_norm": 0.0005313638248480856, - "learning_rate": 0.0001999995973771921, - "loss": 46.0, - "step": 11824 - }, - { - "epoch": 0.9041038285834433, - "grad_norm": 0.004057899583131075, - "learning_rate": 0.000199999597309029, - "loss": 46.0, - "step": 11825 - }, - { - "epoch": 0.9041802855668329, - "grad_norm": 0.0005371117731556296, - "learning_rate": 0.0001999995972408601, - "loss": 46.0, - "step": 11826 - }, - { - "epoch": 0.9042567425502227, - "grad_norm": 0.0006020376458764076, - "learning_rate": 0.00019999959717268543, - "loss": 46.0, - "step": 11827 - }, - { - "epoch": 0.9043331995336124, - "grad_norm": 0.004249220713973045, - "learning_rate": 0.000199999597104505, - "loss": 46.0, - "step": 11828 - }, - { - "epoch": 0.9044096565170021, - "grad_norm": 0.0011660712771117687, - "learning_rate": 0.0001999995970363188, - "loss": 46.0, - "step": 11829 - }, - { - "epoch": 0.9044861135003919, - "grad_norm": 0.0050075482577085495, - "learning_rate": 0.0001999995969681268, - "loss": 46.0, - "step": 11830 - }, - { - "epoch": 0.9045625704837815, - "grad_norm": 0.001171441632322967, - "learning_rate": 0.00019999959689992908, - "loss": 46.0, - "step": 11831 - }, - { - "epoch": 0.9046390274671713, - "grad_norm": 0.0019744911696761847, - "learning_rate": 0.00019999959683172556, - "loss": 46.0, - "step": 11832 - }, - { - "epoch": 0.904715484450561, - "grad_norm": 0.0023193301167339087, - "learning_rate": 0.00019999959676351628, - "loss": 46.0, - "step": 11833 - }, - { - "epoch": 0.9047919414339507, - "grad_norm": 0.0021487916819751263, - "learning_rate": 0.0001999995966953012, - "loss": 46.0, - "step": 11834 - }, - { - "epoch": 0.9048683984173405, - "grad_norm": 0.0007565730484202504, - "learning_rate": 0.00019999959662708042, - "loss": 46.0, - "step": 11835 - }, - { - "epoch": 0.9049448554007302, - "grad_norm": 0.0021633936557918787, - "learning_rate": 0.00019999959655885382, - "loss": 46.0, - "step": 11836 - }, - { - "epoch": 0.9050213123841199, - "grad_norm": 0.0017209731740877032, - "learning_rate": 0.00019999959649062148, - "loss": 46.0, - "step": 11837 - }, - { - "epoch": 0.9050977693675096, - "grad_norm": 0.007892737165093422, - "learning_rate": 0.00019999959642238334, - "loss": 46.0, - "step": 11838 - }, - { - "epoch": 0.9051742263508993, - "grad_norm": 0.0010190209141001105, - "learning_rate": 0.00019999959635413946, - "loss": 46.0, - "step": 11839 - }, - { - "epoch": 0.905250683334289, - "grad_norm": 0.0011227353243157268, - "learning_rate": 0.00019999959628588977, - "loss": 46.0, - "step": 11840 - }, - { - "epoch": 0.9053271403176788, - "grad_norm": 0.0023314955178648233, - "learning_rate": 0.00019999959621763434, - "loss": 46.0, - "step": 11841 - }, - { - "epoch": 0.9054035973010685, - "grad_norm": 0.0015706990379840136, - "learning_rate": 0.00019999959614937313, - "loss": 46.0, - "step": 11842 - }, - { - "epoch": 0.9054800542844582, - "grad_norm": 0.0009050035150721669, - "learning_rate": 0.00019999959608110615, - "loss": 46.0, - "step": 11843 - }, - { - "epoch": 0.905556511267848, - "grad_norm": 0.0073762615211308, - "learning_rate": 0.00019999959601283343, - "loss": 46.0, - "step": 11844 - }, - { - "epoch": 0.9056329682512376, - "grad_norm": 0.0005101165734231472, - "learning_rate": 0.0001999995959445549, - "loss": 46.0, - "step": 11845 - }, - { - "epoch": 0.9057094252346274, - "grad_norm": 0.0012179447803646326, - "learning_rate": 0.00019999959587627063, - "loss": 46.0, - "step": 11846 - }, - { - "epoch": 0.905785882218017, - "grad_norm": 0.0008587847696617246, - "learning_rate": 0.00019999959580798056, - "loss": 46.0, - "step": 11847 - }, - { - "epoch": 0.9058623392014068, - "grad_norm": 0.0013332751113921404, - "learning_rate": 0.00019999959573968474, - "loss": 46.0, - "step": 11848 - }, - { - "epoch": 0.9059387961847966, - "grad_norm": 0.0009754467755556107, - "learning_rate": 0.00019999959567138314, - "loss": 46.0, - "step": 11849 - }, - { - "epoch": 0.9060152531681862, - "grad_norm": 0.0006352203781716526, - "learning_rate": 0.0001999995956030758, - "loss": 46.0, - "step": 11850 - }, - { - "epoch": 0.906091710151576, - "grad_norm": 0.0004887196118943393, - "learning_rate": 0.00019999959553476267, - "loss": 46.0, - "step": 11851 - }, - { - "epoch": 0.9061681671349657, - "grad_norm": 0.004350871779024601, - "learning_rate": 0.00019999959546644375, - "loss": 46.0, - "step": 11852 - }, - { - "epoch": 0.9062446241183554, - "grad_norm": 0.0017469220329076052, - "learning_rate": 0.0001999995953981191, - "loss": 46.0, - "step": 11853 - }, - { - "epoch": 0.9063210811017451, - "grad_norm": 0.0005943359574303031, - "learning_rate": 0.00019999959532978866, - "loss": 46.0, - "step": 11854 - }, - { - "epoch": 0.9063975380851349, - "grad_norm": 0.004706950858235359, - "learning_rate": 0.00019999959526145246, - "loss": 46.0, - "step": 11855 - }, - { - "epoch": 0.9064739950685246, - "grad_norm": 0.0005976425018161535, - "learning_rate": 0.00019999959519311048, - "loss": 46.0, - "step": 11856 - }, - { - "epoch": 0.9065504520519143, - "grad_norm": 0.001496283570304513, - "learning_rate": 0.00019999959512476272, - "loss": 46.0, - "step": 11857 - }, - { - "epoch": 0.906626909035304, - "grad_norm": 0.002717680297791958, - "learning_rate": 0.00019999959505640922, - "loss": 46.0, - "step": 11858 - }, - { - "epoch": 0.9067033660186937, - "grad_norm": 0.002634023316204548, - "learning_rate": 0.00019999959498804995, - "loss": 46.0, - "step": 11859 - }, - { - "epoch": 0.9067798230020835, - "grad_norm": 0.0014204670442268252, - "learning_rate": 0.00019999959491968488, - "loss": 46.0, - "step": 11860 - }, - { - "epoch": 0.9068562799854731, - "grad_norm": 0.0014687500661239028, - "learning_rate": 0.00019999959485131406, - "loss": 46.0, - "step": 11861 - }, - { - "epoch": 0.9069327369688629, - "grad_norm": 0.006440106779336929, - "learning_rate": 0.00019999959478293747, - "loss": 46.0, - "step": 11862 - }, - { - "epoch": 0.9070091939522527, - "grad_norm": 0.0011966609163209796, - "learning_rate": 0.0001999995947145551, - "loss": 46.0, - "step": 11863 - }, - { - "epoch": 0.9070856509356423, - "grad_norm": 0.0011822740780189633, - "learning_rate": 0.00019999959464616696, - "loss": 46.0, - "step": 11864 - }, - { - "epoch": 0.9071621079190321, - "grad_norm": 0.0030144276097416878, - "learning_rate": 0.00019999959457777305, - "loss": 46.0, - "step": 11865 - }, - { - "epoch": 0.9072385649024218, - "grad_norm": 0.000799511035438627, - "learning_rate": 0.00019999959450937337, - "loss": 46.0, - "step": 11866 - }, - { - "epoch": 0.9073150218858115, - "grad_norm": 0.0014971902128309011, - "learning_rate": 0.00019999959444096793, - "loss": 46.0, - "step": 11867 - }, - { - "epoch": 0.9073914788692012, - "grad_norm": 0.0023763212375342846, - "learning_rate": 0.00019999959437255673, - "loss": 46.0, - "step": 11868 - }, - { - "epoch": 0.9074679358525909, - "grad_norm": 0.0034972601570189, - "learning_rate": 0.00019999959430413978, - "loss": 46.0, - "step": 11869 - }, - { - "epoch": 0.9075443928359807, - "grad_norm": 0.0026538101956248283, - "learning_rate": 0.000199999594235717, - "loss": 46.0, - "step": 11870 - }, - { - "epoch": 0.9076208498193704, - "grad_norm": 0.0020829658024013042, - "learning_rate": 0.0001999995941672885, - "loss": 46.0, - "step": 11871 - }, - { - "epoch": 0.9076973068027601, - "grad_norm": 0.0010882171336561441, - "learning_rate": 0.0001999995940988542, - "loss": 46.0, - "step": 11872 - }, - { - "epoch": 0.9077737637861498, - "grad_norm": 0.001266672508791089, - "learning_rate": 0.00019999959403041415, - "loss": 46.0, - "step": 11873 - }, - { - "epoch": 0.9078502207695396, - "grad_norm": 0.010262281633913517, - "learning_rate": 0.00019999959396196833, - "loss": 46.0, - "step": 11874 - }, - { - "epoch": 0.9079266777529292, - "grad_norm": 0.0014136049430817366, - "learning_rate": 0.00019999959389351672, - "loss": 46.0, - "step": 11875 - }, - { - "epoch": 0.908003134736319, - "grad_norm": 0.0016992302844300866, - "learning_rate": 0.00019999959382505935, - "loss": 46.0, - "step": 11876 - }, - { - "epoch": 0.9080795917197086, - "grad_norm": 0.0009087096550501883, - "learning_rate": 0.0001999995937565962, - "loss": 46.0, - "step": 11877 - }, - { - "epoch": 0.9081560487030984, - "grad_norm": 0.0008233482367359102, - "learning_rate": 0.00019999959368812733, - "loss": 46.0, - "step": 11878 - }, - { - "epoch": 0.9082325056864882, - "grad_norm": 0.0004427512758411467, - "learning_rate": 0.00019999959361965264, - "loss": 46.0, - "step": 11879 - }, - { - "epoch": 0.9083089626698778, - "grad_norm": 0.0008080598781816661, - "learning_rate": 0.00019999959355117218, - "loss": 46.0, - "step": 11880 - }, - { - "epoch": 0.9083854196532676, - "grad_norm": 0.0011526867747306824, - "learning_rate": 0.00019999959348268598, - "loss": 46.0, - "step": 11881 - }, - { - "epoch": 0.9084618766366573, - "grad_norm": 0.0012742433464154601, - "learning_rate": 0.000199999593414194, - "loss": 46.0, - "step": 11882 - }, - { - "epoch": 0.908538333620047, - "grad_norm": 0.0008148119668476284, - "learning_rate": 0.00019999959334569625, - "loss": 46.0, - "step": 11883 - }, - { - "epoch": 0.9086147906034368, - "grad_norm": 0.0019154173787683249, - "learning_rate": 0.00019999959327719272, - "loss": 46.0, - "step": 11884 - }, - { - "epoch": 0.9086912475868265, - "grad_norm": 0.0005655597778968513, - "learning_rate": 0.00019999959320868342, - "loss": 46.0, - "step": 11885 - }, - { - "epoch": 0.9087677045702162, - "grad_norm": 0.0006353000644594431, - "learning_rate": 0.00019999959314016835, - "loss": 46.0, - "step": 11886 - }, - { - "epoch": 0.9088441615536059, - "grad_norm": 0.0008929286850616336, - "learning_rate": 0.00019999959307164756, - "loss": 46.0, - "step": 11887 - }, - { - "epoch": 0.9089206185369956, - "grad_norm": 0.00280123483389616, - "learning_rate": 0.00019999959300312097, - "loss": 46.0, - "step": 11888 - }, - { - "epoch": 0.9089970755203853, - "grad_norm": 0.0006641016807407141, - "learning_rate": 0.00019999959293458857, - "loss": 46.0, - "step": 11889 - }, - { - "epoch": 0.9090735325037751, - "grad_norm": 0.0028449599631130695, - "learning_rate": 0.00019999959286605044, - "loss": 46.0, - "step": 11890 - }, - { - "epoch": 0.9091499894871647, - "grad_norm": 0.002026725560426712, - "learning_rate": 0.00019999959279750655, - "loss": 46.0, - "step": 11891 - }, - { - "epoch": 0.9092264464705545, - "grad_norm": 0.002606593305245042, - "learning_rate": 0.00019999959272895687, - "loss": 46.0, - "step": 11892 - }, - { - "epoch": 0.9093029034539443, - "grad_norm": 0.0005219989689067006, - "learning_rate": 0.00019999959266040143, - "loss": 46.0, - "step": 11893 - }, - { - "epoch": 0.9093793604373339, - "grad_norm": 0.0028256692457944155, - "learning_rate": 0.0001999995925918402, - "loss": 46.0, - "step": 11894 - }, - { - "epoch": 0.9094558174207237, - "grad_norm": 0.002417660551145673, - "learning_rate": 0.0001999995925232732, - "loss": 46.0, - "step": 11895 - }, - { - "epoch": 0.9095322744041134, - "grad_norm": 0.0008055018843151629, - "learning_rate": 0.00019999959245470045, - "loss": 46.0, - "step": 11896 - }, - { - "epoch": 0.9096087313875031, - "grad_norm": 0.000683775229845196, - "learning_rate": 0.00019999959238612195, - "loss": 46.0, - "step": 11897 - }, - { - "epoch": 0.9096851883708928, - "grad_norm": 0.0032831078860908747, - "learning_rate": 0.00019999959231753765, - "loss": 46.0, - "step": 11898 - }, - { - "epoch": 0.9097616453542825, - "grad_norm": 0.0011444385163486004, - "learning_rate": 0.00019999959224894758, - "loss": 46.0, - "step": 11899 - }, - { - "epoch": 0.9098381023376723, - "grad_norm": 0.003476099343970418, - "learning_rate": 0.00019999959218035173, - "loss": 46.0, - "step": 11900 - }, - { - "epoch": 0.909914559321062, - "grad_norm": 0.0008008545846678317, - "learning_rate": 0.00019999959211175014, - "loss": 46.0, - "step": 11901 - }, - { - "epoch": 0.9099910163044517, - "grad_norm": 0.0021953631658107042, - "learning_rate": 0.00019999959204314278, - "loss": 46.0, - "step": 11902 - }, - { - "epoch": 0.9100674732878414, - "grad_norm": 0.0002964919258374721, - "learning_rate": 0.0001999995919745296, - "loss": 46.0, - "step": 11903 - }, - { - "epoch": 0.9101439302712312, - "grad_norm": 0.001034496701322496, - "learning_rate": 0.00019999959190591073, - "loss": 46.0, - "step": 11904 - }, - { - "epoch": 0.9102203872546208, - "grad_norm": 0.0013356952695176005, - "learning_rate": 0.00019999959183728604, - "loss": 46.0, - "step": 11905 - }, - { - "epoch": 0.9102968442380106, - "grad_norm": 0.02589714340865612, - "learning_rate": 0.00019999959176865558, - "loss": 46.0, - "step": 11906 - }, - { - "epoch": 0.9103733012214003, - "grad_norm": 0.0020736269652843475, - "learning_rate": 0.00019999959170001938, - "loss": 46.0, - "step": 11907 - }, - { - "epoch": 0.91044975820479, - "grad_norm": 0.005621438380330801, - "learning_rate": 0.0001999995916313774, - "loss": 46.0, - "step": 11908 - }, - { - "epoch": 0.9105262151881798, - "grad_norm": 0.0028752838261425495, - "learning_rate": 0.00019999959156272965, - "loss": 46.0, - "step": 11909 - }, - { - "epoch": 0.9106026721715694, - "grad_norm": 0.006113926414400339, - "learning_rate": 0.00019999959149407612, - "loss": 46.0, - "step": 11910 - }, - { - "epoch": 0.9106791291549592, - "grad_norm": 0.0011571331415325403, - "learning_rate": 0.00019999959142541683, - "loss": 46.0, - "step": 11911 - }, - { - "epoch": 0.910755586138349, - "grad_norm": 0.0034599846694618464, - "learning_rate": 0.00019999959135675176, - "loss": 46.0, - "step": 11912 - }, - { - "epoch": 0.9108320431217386, - "grad_norm": 0.0006363982101902366, - "learning_rate": 0.0001999995912880809, - "loss": 46.0, - "step": 11913 - }, - { - "epoch": 0.9109085001051284, - "grad_norm": 0.002225884236395359, - "learning_rate": 0.00019999959121940432, - "loss": 46.0, - "step": 11914 - }, - { - "epoch": 0.9109849570885181, - "grad_norm": 0.01591384783387184, - "learning_rate": 0.00019999959115072193, - "loss": 46.0, - "step": 11915 - }, - { - "epoch": 0.9110614140719078, - "grad_norm": 0.0013474604347720742, - "learning_rate": 0.0001999995910820338, - "loss": 46.0, - "step": 11916 - }, - { - "epoch": 0.9111378710552975, - "grad_norm": 0.0008312687277793884, - "learning_rate": 0.00019999959101333988, - "loss": 46.0, - "step": 11917 - }, - { - "epoch": 0.9112143280386872, - "grad_norm": 0.0008586280164308846, - "learning_rate": 0.0001999995909446402, - "loss": 46.0, - "step": 11918 - }, - { - "epoch": 0.9112907850220769, - "grad_norm": 0.0019733973313122988, - "learning_rate": 0.00019999959087593476, - "loss": 46.0, - "step": 11919 - }, - { - "epoch": 0.9113672420054667, - "grad_norm": 0.005005836486816406, - "learning_rate": 0.00019999959080722353, - "loss": 46.0, - "step": 11920 - }, - { - "epoch": 0.9114436989888564, - "grad_norm": 0.0009364400175400078, - "learning_rate": 0.00019999959073850653, - "loss": 46.0, - "step": 11921 - }, - { - "epoch": 0.9115201559722461, - "grad_norm": 0.002472240710631013, - "learning_rate": 0.00019999959066978378, - "loss": 46.0, - "step": 11922 - }, - { - "epoch": 0.9115966129556359, - "grad_norm": 0.0018947651842609048, - "learning_rate": 0.00019999959060105525, - "loss": 46.0, - "step": 11923 - }, - { - "epoch": 0.9116730699390255, - "grad_norm": 0.0008839494548738003, - "learning_rate": 0.00019999959053232095, - "loss": 46.0, - "step": 11924 - }, - { - "epoch": 0.9117495269224153, - "grad_norm": 0.0013301677536219358, - "learning_rate": 0.00019999959046358088, - "loss": 46.0, - "step": 11925 - }, - { - "epoch": 0.911825983905805, - "grad_norm": 0.0009192051948048174, - "learning_rate": 0.00019999959039483504, - "loss": 46.0, - "step": 11926 - }, - { - "epoch": 0.9119024408891947, - "grad_norm": 0.000963798665907234, - "learning_rate": 0.00019999959032608345, - "loss": 46.0, - "step": 11927 - }, - { - "epoch": 0.9119788978725845, - "grad_norm": 0.000848840456455946, - "learning_rate": 0.00019999959025732606, - "loss": 46.0, - "step": 11928 - }, - { - "epoch": 0.9120553548559741, - "grad_norm": 0.00038978931843303144, - "learning_rate": 0.00019999959018856292, - "loss": 46.0, - "step": 11929 - }, - { - "epoch": 0.9121318118393639, - "grad_norm": 0.00507147703319788, - "learning_rate": 0.000199999590119794, - "loss": 46.0, - "step": 11930 - }, - { - "epoch": 0.9122082688227536, - "grad_norm": 0.0007379365270026028, - "learning_rate": 0.00019999959005101932, - "loss": 46.0, - "step": 11931 - }, - { - "epoch": 0.9122847258061433, - "grad_norm": 0.0013775257393717766, - "learning_rate": 0.00019999958998223887, - "loss": 46.0, - "step": 11932 - }, - { - "epoch": 0.912361182789533, - "grad_norm": 0.0009456261759623885, - "learning_rate": 0.00019999958991345263, - "loss": 46.0, - "step": 11933 - }, - { - "epoch": 0.9124376397729228, - "grad_norm": 0.0014599458081647754, - "learning_rate": 0.00019999958984466066, - "loss": 46.0, - "step": 11934 - }, - { - "epoch": 0.9125140967563125, - "grad_norm": 0.011240809224545956, - "learning_rate": 0.00019999958977586288, - "loss": 46.0, - "step": 11935 - }, - { - "epoch": 0.9125905537397022, - "grad_norm": 0.0004105728876311332, - "learning_rate": 0.00019999958970705936, - "loss": 46.0, - "step": 11936 - }, - { - "epoch": 0.9126670107230919, - "grad_norm": 0.0007375653367489576, - "learning_rate": 0.00019999958963825006, - "loss": 46.0, - "step": 11937 - }, - { - "epoch": 0.9127434677064816, - "grad_norm": 0.0009479140280745924, - "learning_rate": 0.000199999589569435, - "loss": 46.0, - "step": 11938 - }, - { - "epoch": 0.9128199246898714, - "grad_norm": 0.001707016839645803, - "learning_rate": 0.00019999958950061414, - "loss": 46.0, - "step": 11939 - }, - { - "epoch": 0.912896381673261, - "grad_norm": 0.0024091594386845827, - "learning_rate": 0.00019999958943178753, - "loss": 46.0, - "step": 11940 - }, - { - "epoch": 0.9129728386566508, - "grad_norm": 0.009356711991131306, - "learning_rate": 0.00019999958936295516, - "loss": 46.0, - "step": 11941 - }, - { - "epoch": 0.9130492956400406, - "grad_norm": 0.001243910868652165, - "learning_rate": 0.00019999958929411702, - "loss": 46.0, - "step": 11942 - }, - { - "epoch": 0.9131257526234302, - "grad_norm": 0.000702616642229259, - "learning_rate": 0.00019999958922527309, - "loss": 46.0, - "step": 11943 - }, - { - "epoch": 0.91320220960682, - "grad_norm": 0.002289367141202092, - "learning_rate": 0.0001999995891564234, - "loss": 46.0, - "step": 11944 - }, - { - "epoch": 0.9132786665902097, - "grad_norm": 0.0012534311972558498, - "learning_rate": 0.00019999958908756795, - "loss": 46.0, - "step": 11945 - }, - { - "epoch": 0.9133551235735994, - "grad_norm": 0.0011242461623623967, - "learning_rate": 0.00019999958901870671, - "loss": 46.0, - "step": 11946 - }, - { - "epoch": 0.9134315805569891, - "grad_norm": 0.00039433466736227274, - "learning_rate": 0.0001999995889498397, - "loss": 46.0, - "step": 11947 - }, - { - "epoch": 0.9135080375403788, - "grad_norm": 0.003199234139174223, - "learning_rate": 0.00019999958888096696, - "loss": 46.0, - "step": 11948 - }, - { - "epoch": 0.9135844945237686, - "grad_norm": 0.0010197182418778539, - "learning_rate": 0.00019999958881208844, - "loss": 46.0, - "step": 11949 - }, - { - "epoch": 0.9136609515071583, - "grad_norm": 0.0025528890546411276, - "learning_rate": 0.0001999995887432041, - "loss": 46.0, - "step": 11950 - }, - { - "epoch": 0.913737408490548, - "grad_norm": 0.0021053338423371315, - "learning_rate": 0.00019999958867431404, - "loss": 46.0, - "step": 11951 - }, - { - "epoch": 0.9138138654739377, - "grad_norm": 0.0011797461193054914, - "learning_rate": 0.0001999995886054182, - "loss": 46.0, - "step": 11952 - }, - { - "epoch": 0.9138903224573275, - "grad_norm": 0.0015162909403443336, - "learning_rate": 0.00019999958853651658, - "loss": 46.0, - "step": 11953 - }, - { - "epoch": 0.9139667794407171, - "grad_norm": 0.004422291181981564, - "learning_rate": 0.00019999958846760922, - "loss": 46.0, - "step": 11954 - }, - { - "epoch": 0.9140432364241069, - "grad_norm": 0.0011769947595894337, - "learning_rate": 0.00019999958839869605, - "loss": 46.0, - "step": 11955 - }, - { - "epoch": 0.9141196934074967, - "grad_norm": 0.0025571975857019424, - "learning_rate": 0.00019999958832977714, - "loss": 46.0, - "step": 11956 - }, - { - "epoch": 0.9141961503908863, - "grad_norm": 0.00062129448633641, - "learning_rate": 0.00019999958826085246, - "loss": 46.0, - "step": 11957 - }, - { - "epoch": 0.9142726073742761, - "grad_norm": 0.0020329910330474377, - "learning_rate": 0.000199999588191922, - "loss": 46.0, - "step": 11958 - }, - { - "epoch": 0.9143490643576657, - "grad_norm": 0.001169761293567717, - "learning_rate": 0.00019999958812298577, - "loss": 46.0, - "step": 11959 - }, - { - "epoch": 0.9144255213410555, - "grad_norm": 0.002692839130759239, - "learning_rate": 0.00019999958805404377, - "loss": 46.0, - "step": 11960 - }, - { - "epoch": 0.9145019783244452, - "grad_norm": 0.0006688666180707514, - "learning_rate": 0.000199999587985096, - "loss": 46.0, - "step": 11961 - }, - { - "epoch": 0.9145784353078349, - "grad_norm": 0.0013408127706497908, - "learning_rate": 0.00019999958791614247, - "loss": 46.0, - "step": 11962 - }, - { - "epoch": 0.9146548922912247, - "grad_norm": 0.00237915082834661, - "learning_rate": 0.00019999958784718314, - "loss": 46.0, - "step": 11963 - }, - { - "epoch": 0.9147313492746144, - "grad_norm": 0.0014154831878840923, - "learning_rate": 0.00019999958777821807, - "loss": 46.0, - "step": 11964 - }, - { - "epoch": 0.9148078062580041, - "grad_norm": 0.0015954760601744056, - "learning_rate": 0.00019999958770924723, - "loss": 46.0, - "step": 11965 - }, - { - "epoch": 0.9148842632413938, - "grad_norm": 0.005710669793188572, - "learning_rate": 0.0001999995876402706, - "loss": 46.0, - "step": 11966 - }, - { - "epoch": 0.9149607202247836, - "grad_norm": 0.0010667579481378198, - "learning_rate": 0.00019999958757128825, - "loss": 46.0, - "step": 11967 - }, - { - "epoch": 0.9150371772081732, - "grad_norm": 0.0015672474401071668, - "learning_rate": 0.0001999995875023001, - "loss": 46.0, - "step": 11968 - }, - { - "epoch": 0.915113634191563, - "grad_norm": 0.0029464783146977425, - "learning_rate": 0.00019999958743330618, - "loss": 46.0, - "step": 11969 - }, - { - "epoch": 0.9151900911749526, - "grad_norm": 0.004860200919210911, - "learning_rate": 0.0001999995873643065, - "loss": 46.0, - "step": 11970 - }, - { - "epoch": 0.9152665481583424, - "grad_norm": 0.0027938722632825375, - "learning_rate": 0.000199999587295301, - "loss": 46.0, - "step": 11971 - }, - { - "epoch": 0.9153430051417322, - "grad_norm": 0.005090589635074139, - "learning_rate": 0.00019999958722628978, - "loss": 46.0, - "step": 11972 - }, - { - "epoch": 0.9154194621251218, - "grad_norm": 0.001975435996428132, - "learning_rate": 0.00019999958715727277, - "loss": 46.0, - "step": 11973 - }, - { - "epoch": 0.9154959191085116, - "grad_norm": 0.00369916670024395, - "learning_rate": 0.00019999958708825002, - "loss": 46.0, - "step": 11974 - }, - { - "epoch": 0.9155723760919013, - "grad_norm": 0.0041347891092300415, - "learning_rate": 0.00019999958701922145, - "loss": 46.0, - "step": 11975 - }, - { - "epoch": 0.915648833075291, - "grad_norm": 0.001029394450597465, - "learning_rate": 0.00019999958695018718, - "loss": 46.0, - "step": 11976 - }, - { - "epoch": 0.9157252900586808, - "grad_norm": 0.005500374361872673, - "learning_rate": 0.00019999958688114708, - "loss": 46.0, - "step": 11977 - }, - { - "epoch": 0.9158017470420704, - "grad_norm": 0.0014091175980865955, - "learning_rate": 0.00019999958681210124, - "loss": 46.0, - "step": 11978 - }, - { - "epoch": 0.9158782040254602, - "grad_norm": 0.0011467932490631938, - "learning_rate": 0.00019999958674304962, - "loss": 46.0, - "step": 11979 - }, - { - "epoch": 0.9159546610088499, - "grad_norm": 0.0007589886081404984, - "learning_rate": 0.00019999958667399223, - "loss": 46.0, - "step": 11980 - }, - { - "epoch": 0.9160311179922396, - "grad_norm": 0.0007121781236492097, - "learning_rate": 0.00019999958660492907, - "loss": 46.0, - "step": 11981 - }, - { - "epoch": 0.9161075749756293, - "grad_norm": 0.007561048958450556, - "learning_rate": 0.00019999958653586016, - "loss": 46.0, - "step": 11982 - }, - { - "epoch": 0.9161840319590191, - "grad_norm": 0.008352844044566154, - "learning_rate": 0.00019999958646678545, - "loss": 46.0, - "step": 11983 - }, - { - "epoch": 0.9162604889424087, - "grad_norm": 0.001725569716654718, - "learning_rate": 0.00019999958639770496, - "loss": 46.0, - "step": 11984 - }, - { - "epoch": 0.9163369459257985, - "grad_norm": 0.003644380485638976, - "learning_rate": 0.00019999958632861876, - "loss": 46.0, - "step": 11985 - }, - { - "epoch": 0.9164134029091883, - "grad_norm": 0.0012136718723922968, - "learning_rate": 0.00019999958625952676, - "loss": 46.0, - "step": 11986 - }, - { - "epoch": 0.9164898598925779, - "grad_norm": 0.0022887163795530796, - "learning_rate": 0.00019999958619042898, - "loss": 46.0, - "step": 11987 - }, - { - "epoch": 0.9165663168759677, - "grad_norm": 0.011860202997922897, - "learning_rate": 0.00019999958612132543, - "loss": 46.0, - "step": 11988 - }, - { - "epoch": 0.9166427738593573, - "grad_norm": 0.0006480338633991778, - "learning_rate": 0.00019999958605221614, - "loss": 46.0, - "step": 11989 - }, - { - "epoch": 0.9167192308427471, - "grad_norm": 0.0028399424627423286, - "learning_rate": 0.00019999958598310104, - "loss": 46.0, - "step": 11990 - }, - { - "epoch": 0.9167956878261369, - "grad_norm": 0.0005744567606598139, - "learning_rate": 0.00019999958591398017, - "loss": 46.0, - "step": 11991 - }, - { - "epoch": 0.9168721448095265, - "grad_norm": 0.0009081577882170677, - "learning_rate": 0.00019999958584485355, - "loss": 46.0, - "step": 11992 - }, - { - "epoch": 0.9169486017929163, - "grad_norm": 0.0006073405966162682, - "learning_rate": 0.0001999995857757212, - "loss": 46.0, - "step": 11993 - }, - { - "epoch": 0.917025058776306, - "grad_norm": 0.0006281262612901628, - "learning_rate": 0.00019999958570658303, - "loss": 46.0, - "step": 11994 - }, - { - "epoch": 0.9171015157596957, - "grad_norm": 0.0022540143691003323, - "learning_rate": 0.0001999995856374391, - "loss": 46.0, - "step": 11995 - }, - { - "epoch": 0.9171779727430854, - "grad_norm": 0.008779694326221943, - "learning_rate": 0.0001999995855682894, - "loss": 46.0, - "step": 11996 - }, - { - "epoch": 0.9172544297264752, - "grad_norm": 0.0010701692663133144, - "learning_rate": 0.00019999958549913392, - "loss": 46.0, - "step": 11997 - }, - { - "epoch": 0.9173308867098648, - "grad_norm": 0.0010692435316741467, - "learning_rate": 0.0001999995854299727, - "loss": 46.0, - "step": 11998 - }, - { - "epoch": 0.9174073436932546, - "grad_norm": 0.00120479182805866, - "learning_rate": 0.0001999995853608057, - "loss": 46.0, - "step": 11999 - }, - { - "epoch": 0.9174838006766443, - "grad_norm": 0.0023994368966668844, - "learning_rate": 0.00019999958529163292, - "loss": 46.0, - "step": 12000 - }, - { - "epoch": 0.917560257660034, - "grad_norm": 0.003430549055337906, - "learning_rate": 0.00019999958522245437, - "loss": 46.0, - "step": 12001 - }, - { - "epoch": 0.9176367146434238, - "grad_norm": 0.0007764084148220718, - "learning_rate": 0.00019999958515327007, - "loss": 46.0, - "step": 12002 - }, - { - "epoch": 0.9177131716268134, - "grad_norm": 0.0011081916745752096, - "learning_rate": 0.00019999958508408, - "loss": 46.0, - "step": 12003 - }, - { - "epoch": 0.9177896286102032, - "grad_norm": 0.0006723771221004426, - "learning_rate": 0.00019999958501488413, - "loss": 46.0, - "step": 12004 - }, - { - "epoch": 0.917866085593593, - "grad_norm": 0.0004039190534967929, - "learning_rate": 0.00019999958494568252, - "loss": 46.0, - "step": 12005 - }, - { - "epoch": 0.9179425425769826, - "grad_norm": 0.002531598322093487, - "learning_rate": 0.00019999958487647513, - "loss": 46.0, - "step": 12006 - }, - { - "epoch": 0.9180189995603724, - "grad_norm": 0.001886343234218657, - "learning_rate": 0.00019999958480726196, - "loss": 46.0, - "step": 12007 - }, - { - "epoch": 0.918095456543762, - "grad_norm": 0.0007136269705370069, - "learning_rate": 0.000199999584738043, - "loss": 46.0, - "step": 12008 - }, - { - "epoch": 0.9181719135271518, - "grad_norm": 0.0016071625286713243, - "learning_rate": 0.00019999958466881832, - "loss": 46.0, - "step": 12009 - }, - { - "epoch": 0.9182483705105415, - "grad_norm": 0.0032566061709076166, - "learning_rate": 0.00019999958459958786, - "loss": 46.0, - "step": 12010 - }, - { - "epoch": 0.9183248274939312, - "grad_norm": 0.0015204158844426274, - "learning_rate": 0.0001999995845303516, - "loss": 46.0, - "step": 12011 - }, - { - "epoch": 0.918401284477321, - "grad_norm": 0.000433815032010898, - "learning_rate": 0.0001999995844611096, - "loss": 46.0, - "step": 12012 - }, - { - "epoch": 0.9184777414607107, - "grad_norm": 0.0011306294472888112, - "learning_rate": 0.00019999958439186183, - "loss": 46.0, - "step": 12013 - }, - { - "epoch": 0.9185541984441004, - "grad_norm": 0.0009235804900527, - "learning_rate": 0.00019999958432260828, - "loss": 46.0, - "step": 12014 - }, - { - "epoch": 0.9186306554274901, - "grad_norm": 0.0018773576011881232, - "learning_rate": 0.00019999958425334896, - "loss": 46.0, - "step": 12015 - }, - { - "epoch": 0.9187071124108799, - "grad_norm": 0.0010949296411126852, - "learning_rate": 0.00019999958418408386, - "loss": 46.0, - "step": 12016 - }, - { - "epoch": 0.9187835693942695, - "grad_norm": 0.0010235997615382075, - "learning_rate": 0.00019999958411481302, - "loss": 46.0, - "step": 12017 - }, - { - "epoch": 0.9188600263776593, - "grad_norm": 0.002141745062544942, - "learning_rate": 0.00019999958404553637, - "loss": 46.0, - "step": 12018 - }, - { - "epoch": 0.9189364833610489, - "grad_norm": 0.0003547972009982914, - "learning_rate": 0.000199999583976254, - "loss": 46.0, - "step": 12019 - }, - { - "epoch": 0.9190129403444387, - "grad_norm": 0.002120896242558956, - "learning_rate": 0.00019999958390696585, - "loss": 46.0, - "step": 12020 - }, - { - "epoch": 0.9190893973278285, - "grad_norm": 0.005077350419014692, - "learning_rate": 0.0001999995838376719, - "loss": 46.0, - "step": 12021 - }, - { - "epoch": 0.9191658543112181, - "grad_norm": 0.00373087078332901, - "learning_rate": 0.0001999995837683722, - "loss": 46.0, - "step": 12022 - }, - { - "epoch": 0.9192423112946079, - "grad_norm": 0.0008808451821096241, - "learning_rate": 0.00019999958369906672, - "loss": 46.0, - "step": 12023 - }, - { - "epoch": 0.9193187682779976, - "grad_norm": 0.0010077588958665729, - "learning_rate": 0.0001999995836297555, - "loss": 46.0, - "step": 12024 - }, - { - "epoch": 0.9193952252613873, - "grad_norm": 0.0006262955139391124, - "learning_rate": 0.00019999958356043846, - "loss": 46.0, - "step": 12025 - }, - { - "epoch": 0.919471682244777, - "grad_norm": 0.0011571147479116917, - "learning_rate": 0.0001999995834911157, - "loss": 46.0, - "step": 12026 - }, - { - "epoch": 0.9195481392281668, - "grad_norm": 0.001044245669618249, - "learning_rate": 0.00019999958342178714, - "loss": 46.0, - "step": 12027 - }, - { - "epoch": 0.9196245962115565, - "grad_norm": 0.0031220833770930767, - "learning_rate": 0.00019999958335245284, - "loss": 46.0, - "step": 12028 - }, - { - "epoch": 0.9197010531949462, - "grad_norm": 0.0018916547996923327, - "learning_rate": 0.00019999958328311275, - "loss": 46.0, - "step": 12029 - }, - { - "epoch": 0.9197775101783359, - "grad_norm": 0.0017111971974372864, - "learning_rate": 0.00019999958321376688, - "loss": 46.0, - "step": 12030 - }, - { - "epoch": 0.9198539671617256, - "grad_norm": 0.0009090888779610395, - "learning_rate": 0.00019999958314441524, - "loss": 46.0, - "step": 12031 - }, - { - "epoch": 0.9199304241451154, - "grad_norm": 0.0014227371430024505, - "learning_rate": 0.00019999958307505785, - "loss": 46.0, - "step": 12032 - }, - { - "epoch": 0.920006881128505, - "grad_norm": 0.0024583826307207346, - "learning_rate": 0.0001999995830056947, - "loss": 46.0, - "step": 12033 - }, - { - "epoch": 0.9200833381118948, - "grad_norm": 0.004276834893971682, - "learning_rate": 0.00019999958293632575, - "loss": 46.0, - "step": 12034 - }, - { - "epoch": 0.9201597950952846, - "grad_norm": 0.004093404859304428, - "learning_rate": 0.00019999958286695107, - "loss": 46.0, - "step": 12035 - }, - { - "epoch": 0.9202362520786742, - "grad_norm": 0.00042168053914792836, - "learning_rate": 0.00019999958279757056, - "loss": 46.0, - "step": 12036 - }, - { - "epoch": 0.920312709062064, - "grad_norm": 0.0009360152180306613, - "learning_rate": 0.00019999958272818433, - "loss": 46.0, - "step": 12037 - }, - { - "epoch": 0.9203891660454536, - "grad_norm": 0.002095120958983898, - "learning_rate": 0.00019999958265879233, - "loss": 46.0, - "step": 12038 - }, - { - "epoch": 0.9204656230288434, - "grad_norm": 0.001412541838362813, - "learning_rate": 0.00019999958258939455, - "loss": 46.0, - "step": 12039 - }, - { - "epoch": 0.9205420800122331, - "grad_norm": 0.002074155490845442, - "learning_rate": 0.00019999958251999098, - "loss": 46.0, - "step": 12040 - }, - { - "epoch": 0.9206185369956228, - "grad_norm": 0.0013546498958021402, - "learning_rate": 0.00019999958245058168, - "loss": 46.0, - "step": 12041 - }, - { - "epoch": 0.9206949939790126, - "grad_norm": 0.00046360664418898523, - "learning_rate": 0.0001999995823811666, - "loss": 46.0, - "step": 12042 - }, - { - "epoch": 0.9207714509624023, - "grad_norm": 0.0016613882035017014, - "learning_rate": 0.00019999958231174572, - "loss": 46.0, - "step": 12043 - }, - { - "epoch": 0.920847907945792, - "grad_norm": 0.002969025168567896, - "learning_rate": 0.00019999958224231908, - "loss": 46.0, - "step": 12044 - }, - { - "epoch": 0.9209243649291817, - "grad_norm": 0.0013480096822604537, - "learning_rate": 0.0001999995821728867, - "loss": 46.0, - "step": 12045 - }, - { - "epoch": 0.9210008219125715, - "grad_norm": 0.00027013677754439414, - "learning_rate": 0.00019999958210344853, - "loss": 46.0, - "step": 12046 - }, - { - "epoch": 0.9210772788959611, - "grad_norm": 0.010235710069537163, - "learning_rate": 0.00019999958203400456, - "loss": 46.0, - "step": 12047 - }, - { - "epoch": 0.9211537358793509, - "grad_norm": 0.0003867988125421107, - "learning_rate": 0.00019999958196455486, - "loss": 46.0, - "step": 12048 - }, - { - "epoch": 0.9212301928627405, - "grad_norm": 0.003273334354162216, - "learning_rate": 0.0001999995818950994, - "loss": 46.0, - "step": 12049 - }, - { - "epoch": 0.9213066498461303, - "grad_norm": 0.0009624137892387807, - "learning_rate": 0.00019999958182563815, - "loss": 46.0, - "step": 12050 - }, - { - "epoch": 0.9213831068295201, - "grad_norm": 0.0034911471884697676, - "learning_rate": 0.00019999958175617112, - "loss": 46.0, - "step": 12051 - }, - { - "epoch": 0.9214595638129097, - "grad_norm": 0.0014526054728776217, - "learning_rate": 0.00019999958168669834, - "loss": 46.0, - "step": 12052 - }, - { - "epoch": 0.9215360207962995, - "grad_norm": 0.003096493659541011, - "learning_rate": 0.0001999995816172198, - "loss": 46.0, - "step": 12053 - }, - { - "epoch": 0.9216124777796892, - "grad_norm": 0.0016938558546826243, - "learning_rate": 0.00019999958154773547, - "loss": 46.0, - "step": 12054 - }, - { - "epoch": 0.9216889347630789, - "grad_norm": 0.0010012559359893203, - "learning_rate": 0.00019999958147824538, - "loss": 46.0, - "step": 12055 - }, - { - "epoch": 0.9217653917464687, - "grad_norm": 0.0010494185844436288, - "learning_rate": 0.0001999995814087495, - "loss": 46.0, - "step": 12056 - }, - { - "epoch": 0.9218418487298584, - "grad_norm": 0.003322313306853175, - "learning_rate": 0.00019999958133924787, - "loss": 46.0, - "step": 12057 - }, - { - "epoch": 0.9219183057132481, - "grad_norm": 0.001662363763898611, - "learning_rate": 0.00019999958126974048, - "loss": 46.0, - "step": 12058 - }, - { - "epoch": 0.9219947626966378, - "grad_norm": 0.001989088486880064, - "learning_rate": 0.00019999958120022732, - "loss": 46.0, - "step": 12059 - }, - { - "epoch": 0.9220712196800275, - "grad_norm": 0.000909341499209404, - "learning_rate": 0.00019999958113070835, - "loss": 46.0, - "step": 12060 - }, - { - "epoch": 0.9221476766634172, - "grad_norm": 0.0027445184532552958, - "learning_rate": 0.00019999958106118367, - "loss": 46.0, - "step": 12061 - }, - { - "epoch": 0.922224133646807, - "grad_norm": 0.0008998824050650001, - "learning_rate": 0.00019999958099165316, - "loss": 46.0, - "step": 12062 - }, - { - "epoch": 0.9223005906301966, - "grad_norm": 0.002928989240899682, - "learning_rate": 0.00019999958092211694, - "loss": 46.0, - "step": 12063 - }, - { - "epoch": 0.9223770476135864, - "grad_norm": 0.002669268986210227, - "learning_rate": 0.00019999958085257488, - "loss": 46.0, - "step": 12064 - }, - { - "epoch": 0.9224535045969762, - "grad_norm": 0.0023691777605563402, - "learning_rate": 0.0001999995807830271, - "loss": 46.0, - "step": 12065 - }, - { - "epoch": 0.9225299615803658, - "grad_norm": 0.0011108532780781388, - "learning_rate": 0.00019999958071347356, - "loss": 46.0, - "step": 12066 - }, - { - "epoch": 0.9226064185637556, - "grad_norm": 0.0008968866313807666, - "learning_rate": 0.00019999958064391424, - "loss": 46.0, - "step": 12067 - }, - { - "epoch": 0.9226828755471453, - "grad_norm": 0.0010244551813229918, - "learning_rate": 0.00019999958057434914, - "loss": 46.0, - "step": 12068 - }, - { - "epoch": 0.922759332530535, - "grad_norm": 0.001125934300944209, - "learning_rate": 0.00019999958050477827, - "loss": 46.0, - "step": 12069 - }, - { - "epoch": 0.9228357895139248, - "grad_norm": 0.00163006572984159, - "learning_rate": 0.00019999958043520163, - "loss": 46.0, - "step": 12070 - }, - { - "epoch": 0.9229122464973144, - "grad_norm": 0.0016496857861056924, - "learning_rate": 0.00019999958036561924, - "loss": 46.0, - "step": 12071 - }, - { - "epoch": 0.9229887034807042, - "grad_norm": 0.005408870987594128, - "learning_rate": 0.00019999958029603106, - "loss": 46.0, - "step": 12072 - }, - { - "epoch": 0.9230651604640939, - "grad_norm": 0.0012288877042010427, - "learning_rate": 0.00019999958022643712, - "loss": 46.0, - "step": 12073 - }, - { - "epoch": 0.9231416174474836, - "grad_norm": 0.006090317387133837, - "learning_rate": 0.0001999995801568374, - "loss": 46.0, - "step": 12074 - }, - { - "epoch": 0.9232180744308733, - "grad_norm": 0.005107733886688948, - "learning_rate": 0.00019999958008723193, - "loss": 46.0, - "step": 12075 - }, - { - "epoch": 0.9232945314142631, - "grad_norm": 0.003579028183594346, - "learning_rate": 0.00019999958001762065, - "loss": 46.0, - "step": 12076 - }, - { - "epoch": 0.9233709883976527, - "grad_norm": 0.0010870733531191945, - "learning_rate": 0.00019999957994800365, - "loss": 46.0, - "step": 12077 - }, - { - "epoch": 0.9234474453810425, - "grad_norm": 0.0013418698217719793, - "learning_rate": 0.00019999957987838085, - "loss": 46.0, - "step": 12078 - }, - { - "epoch": 0.9235239023644322, - "grad_norm": 0.00044725221232511103, - "learning_rate": 0.0001999995798087523, - "loss": 46.0, - "step": 12079 - }, - { - "epoch": 0.9236003593478219, - "grad_norm": 0.003975850064307451, - "learning_rate": 0.00019999957973911795, - "loss": 46.0, - "step": 12080 - }, - { - "epoch": 0.9236768163312117, - "grad_norm": 0.0017167160985991359, - "learning_rate": 0.00019999957966947786, - "loss": 46.0, - "step": 12081 - }, - { - "epoch": 0.9237532733146013, - "grad_norm": 0.005557652097195387, - "learning_rate": 0.000199999579599832, - "loss": 46.0, - "step": 12082 - }, - { - "epoch": 0.9238297302979911, - "grad_norm": 0.0005236169672571123, - "learning_rate": 0.00019999957953018035, - "loss": 46.0, - "step": 12083 - }, - { - "epoch": 0.9239061872813809, - "grad_norm": 0.0010544555261731148, - "learning_rate": 0.00019999957946052293, - "loss": 46.0, - "step": 12084 - }, - { - "epoch": 0.9239826442647705, - "grad_norm": 0.0007437972817569971, - "learning_rate": 0.00019999957939085977, - "loss": 46.0, - "step": 12085 - }, - { - "epoch": 0.9240591012481603, - "grad_norm": 0.0011064991122111678, - "learning_rate": 0.0001999995793211908, - "loss": 46.0, - "step": 12086 - }, - { - "epoch": 0.92413555823155, - "grad_norm": 0.0014206628547981381, - "learning_rate": 0.0001999995792515161, - "loss": 46.0, - "step": 12087 - }, - { - "epoch": 0.9242120152149397, - "grad_norm": 0.0007029398693703115, - "learning_rate": 0.00019999957918183562, - "loss": 46.0, - "step": 12088 - }, - { - "epoch": 0.9242884721983294, - "grad_norm": 0.0015322756953537464, - "learning_rate": 0.00019999957911214937, - "loss": 46.0, - "step": 12089 - }, - { - "epoch": 0.9243649291817191, - "grad_norm": 0.00215447461232543, - "learning_rate": 0.00019999957904245737, - "loss": 46.0, - "step": 12090 - }, - { - "epoch": 0.9244413861651088, - "grad_norm": 0.0012567087542265654, - "learning_rate": 0.00019999957897275957, - "loss": 46.0, - "step": 12091 - }, - { - "epoch": 0.9245178431484986, - "grad_norm": 0.0009863495361059904, - "learning_rate": 0.000199999578903056, - "loss": 46.0, - "step": 12092 - }, - { - "epoch": 0.9245943001318883, - "grad_norm": 0.0005452686455100775, - "learning_rate": 0.00019999957883334664, - "loss": 46.0, - "step": 12093 - }, - { - "epoch": 0.924670757115278, - "grad_norm": 0.001038632821291685, - "learning_rate": 0.00019999957876363155, - "loss": 46.0, - "step": 12094 - }, - { - "epoch": 0.9247472140986678, - "grad_norm": 0.0009294984047301114, - "learning_rate": 0.00019999957869391068, - "loss": 46.0, - "step": 12095 - }, - { - "epoch": 0.9248236710820574, - "grad_norm": 0.001106839976273477, - "learning_rate": 0.00019999957862418404, - "loss": 46.0, - "step": 12096 - }, - { - "epoch": 0.9249001280654472, - "grad_norm": 0.0019431129330769181, - "learning_rate": 0.00019999957855445163, - "loss": 46.0, - "step": 12097 - }, - { - "epoch": 0.924976585048837, - "grad_norm": 0.002364834537729621, - "learning_rate": 0.00019999957848471347, - "loss": 46.0, - "step": 12098 - }, - { - "epoch": 0.9250530420322266, - "grad_norm": 0.001127138384617865, - "learning_rate": 0.00019999957841496953, - "loss": 46.0, - "step": 12099 - }, - { - "epoch": 0.9251294990156164, - "grad_norm": 0.0011678830487653613, - "learning_rate": 0.0001999995783452198, - "loss": 46.0, - "step": 12100 - }, - { - "epoch": 0.925205955999006, - "grad_norm": 0.0008586464100517333, - "learning_rate": 0.00019999957827546432, - "loss": 46.0, - "step": 12101 - }, - { - "epoch": 0.9252824129823958, - "grad_norm": 0.0038136092480272055, - "learning_rate": 0.00019999957820570306, - "loss": 46.0, - "step": 12102 - }, - { - "epoch": 0.9253588699657855, - "grad_norm": 0.0007142901886254549, - "learning_rate": 0.000199999578135936, - "loss": 46.0, - "step": 12103 - }, - { - "epoch": 0.9254353269491752, - "grad_norm": 0.00046255378401838243, - "learning_rate": 0.00019999957806616324, - "loss": 46.0, - "step": 12104 - }, - { - "epoch": 0.925511783932565, - "grad_norm": 0.0012896499829366803, - "learning_rate": 0.00019999957799638466, - "loss": 46.0, - "step": 12105 - }, - { - "epoch": 0.9255882409159547, - "grad_norm": 0.0020871255546808243, - "learning_rate": 0.00019999957792660031, - "loss": 46.0, - "step": 12106 - }, - { - "epoch": 0.9256646978993444, - "grad_norm": 0.0013764593750238419, - "learning_rate": 0.00019999957785681022, - "loss": 46.0, - "step": 12107 - }, - { - "epoch": 0.9257411548827341, - "grad_norm": 0.0007337106508202851, - "learning_rate": 0.00019999957778701435, - "loss": 46.0, - "step": 12108 - }, - { - "epoch": 0.9258176118661238, - "grad_norm": 0.0021013757213950157, - "learning_rate": 0.00019999957771721269, - "loss": 46.0, - "step": 12109 - }, - { - "epoch": 0.9258940688495135, - "grad_norm": 0.002680626232177019, - "learning_rate": 0.0001999995776474053, - "loss": 46.0, - "step": 12110 - }, - { - "epoch": 0.9259705258329033, - "grad_norm": 0.0012728364672511816, - "learning_rate": 0.0001999995775775921, - "loss": 46.0, - "step": 12111 - }, - { - "epoch": 0.9260469828162929, - "grad_norm": 0.0010688784532248974, - "learning_rate": 0.00019999957750777315, - "loss": 46.0, - "step": 12112 - }, - { - "epoch": 0.9261234397996827, - "grad_norm": 0.0008296940359286964, - "learning_rate": 0.00019999957743794844, - "loss": 46.0, - "step": 12113 - }, - { - "epoch": 0.9261998967830725, - "grad_norm": 0.00026726440410129726, - "learning_rate": 0.00019999957736811794, - "loss": 46.0, - "step": 12114 - }, - { - "epoch": 0.9262763537664621, - "grad_norm": 0.009986361488699913, - "learning_rate": 0.00019999957729828168, - "loss": 46.0, - "step": 12115 - }, - { - "epoch": 0.9263528107498519, - "grad_norm": 0.010265923105180264, - "learning_rate": 0.00019999957722843968, - "loss": 46.0, - "step": 12116 - }, - { - "epoch": 0.9264292677332416, - "grad_norm": 0.0009398945840075612, - "learning_rate": 0.00019999957715859185, - "loss": 46.0, - "step": 12117 - }, - { - "epoch": 0.9265057247166313, - "grad_norm": 0.0010210475884377956, - "learning_rate": 0.00019999957708873828, - "loss": 46.0, - "step": 12118 - }, - { - "epoch": 0.926582181700021, - "grad_norm": 0.002771566156297922, - "learning_rate": 0.00019999957701887896, - "loss": 46.0, - "step": 12119 - }, - { - "epoch": 0.9266586386834107, - "grad_norm": 0.0021849293261766434, - "learning_rate": 0.00019999957694901387, - "loss": 46.0, - "step": 12120 - }, - { - "epoch": 0.9267350956668005, - "grad_norm": 0.00046759648830629885, - "learning_rate": 0.00019999957687914297, - "loss": 46.0, - "step": 12121 - }, - { - "epoch": 0.9268115526501902, - "grad_norm": 0.0012820098781958222, - "learning_rate": 0.00019999957680926633, - "loss": 46.0, - "step": 12122 - }, - { - "epoch": 0.9268880096335799, - "grad_norm": 0.004446228034794331, - "learning_rate": 0.0001999995767393839, - "loss": 46.0, - "step": 12123 - }, - { - "epoch": 0.9269644666169696, - "grad_norm": 0.000262592569924891, - "learning_rate": 0.0001999995766694957, - "loss": 46.0, - "step": 12124 - }, - { - "epoch": 0.9270409236003594, - "grad_norm": 0.0006439832504838705, - "learning_rate": 0.00019999957659960177, - "loss": 46.0, - "step": 12125 - }, - { - "epoch": 0.927117380583749, - "grad_norm": 0.0006937747821211815, - "learning_rate": 0.00019999957652970204, - "loss": 46.0, - "step": 12126 - }, - { - "epoch": 0.9271938375671388, - "grad_norm": 0.0019572488963603973, - "learning_rate": 0.00019999957645979656, - "loss": 46.0, - "step": 12127 - }, - { - "epoch": 0.9272702945505286, - "grad_norm": 0.0122261643409729, - "learning_rate": 0.00019999957638988528, - "loss": 46.0, - "step": 12128 - }, - { - "epoch": 0.9273467515339182, - "grad_norm": 0.0009052679524756968, - "learning_rate": 0.00019999957631996825, - "loss": 46.0, - "step": 12129 - }, - { - "epoch": 0.927423208517308, - "grad_norm": 0.0015283863758668303, - "learning_rate": 0.00019999957625004545, - "loss": 46.0, - "step": 12130 - }, - { - "epoch": 0.9274996655006976, - "grad_norm": 0.0026827522087842226, - "learning_rate": 0.0001999995761801169, - "loss": 46.0, - "step": 12131 - }, - { - "epoch": 0.9275761224840874, - "grad_norm": 0.0002819252840708941, - "learning_rate": 0.00019999957611018253, - "loss": 46.0, - "step": 12132 - }, - { - "epoch": 0.9276525794674771, - "grad_norm": 0.0007557621574960649, - "learning_rate": 0.00019999957604024244, - "loss": 46.0, - "step": 12133 - }, - { - "epoch": 0.9277290364508668, - "grad_norm": 0.0011717005399987102, - "learning_rate": 0.00019999957597029657, - "loss": 46.0, - "step": 12134 - }, - { - "epoch": 0.9278054934342566, - "grad_norm": 0.0007860861369408667, - "learning_rate": 0.00019999957590034493, - "loss": 46.0, - "step": 12135 - }, - { - "epoch": 0.9278819504176463, - "grad_norm": 0.004998686723411083, - "learning_rate": 0.00019999957583038752, - "loss": 46.0, - "step": 12136 - }, - { - "epoch": 0.927958407401036, - "grad_norm": 0.004086183849722147, - "learning_rate": 0.00019999957576042433, - "loss": 46.0, - "step": 12137 - }, - { - "epoch": 0.9280348643844257, - "grad_norm": 0.0010672503849491477, - "learning_rate": 0.00019999957569045534, - "loss": 46.0, - "step": 12138 - }, - { - "epoch": 0.9281113213678154, - "grad_norm": 0.0057138013653457165, - "learning_rate": 0.0001999995756204806, - "loss": 46.0, - "step": 12139 - }, - { - "epoch": 0.9281877783512051, - "grad_norm": 0.0020797853358089924, - "learning_rate": 0.00019999957555050016, - "loss": 46.0, - "step": 12140 - }, - { - "epoch": 0.9282642353345949, - "grad_norm": 0.00601800624281168, - "learning_rate": 0.00019999957548051388, - "loss": 46.0, - "step": 12141 - }, - { - "epoch": 0.9283406923179846, - "grad_norm": 0.002383144572377205, - "learning_rate": 0.00019999957541052182, - "loss": 46.0, - "step": 12142 - }, - { - "epoch": 0.9284171493013743, - "grad_norm": 0.011489329859614372, - "learning_rate": 0.00019999957534052402, - "loss": 46.0, - "step": 12143 - }, - { - "epoch": 0.9284936062847641, - "grad_norm": 0.004554878454655409, - "learning_rate": 0.00019999957527052048, - "loss": 46.0, - "step": 12144 - }, - { - "epoch": 0.9285700632681537, - "grad_norm": 0.0009480147855356336, - "learning_rate": 0.0001999995752005111, - "loss": 46.0, - "step": 12145 - }, - { - "epoch": 0.9286465202515435, - "grad_norm": 0.002283143810927868, - "learning_rate": 0.000199999575130496, - "loss": 46.0, - "step": 12146 - }, - { - "epoch": 0.9287229772349332, - "grad_norm": 0.006962282117456198, - "learning_rate": 0.00019999957506047512, - "loss": 46.0, - "step": 12147 - }, - { - "epoch": 0.9287994342183229, - "grad_norm": 0.0014182819286361337, - "learning_rate": 0.00019999957499044845, - "loss": 46.0, - "step": 12148 - }, - { - "epoch": 0.9288758912017127, - "grad_norm": 0.0007593846530653536, - "learning_rate": 0.00019999957492041606, - "loss": 46.0, - "step": 12149 - }, - { - "epoch": 0.9289523481851023, - "grad_norm": 0.02158031240105629, - "learning_rate": 0.00019999957485037788, - "loss": 46.0, - "step": 12150 - }, - { - "epoch": 0.9290288051684921, - "grad_norm": 0.0009281465318053961, - "learning_rate": 0.00019999957478033392, - "loss": 46.0, - "step": 12151 - }, - { - "epoch": 0.9291052621518818, - "grad_norm": 0.00036368679138831794, - "learning_rate": 0.00019999957471028416, - "loss": 46.0, - "step": 12152 - }, - { - "epoch": 0.9291817191352715, - "grad_norm": 0.006944100372493267, - "learning_rate": 0.0001999995746402287, - "loss": 46.0, - "step": 12153 - }, - { - "epoch": 0.9292581761186612, - "grad_norm": 0.002146515529602766, - "learning_rate": 0.0001999995745701674, - "loss": 46.0, - "step": 12154 - }, - { - "epoch": 0.929334633102051, - "grad_norm": 0.0015281970845535398, - "learning_rate": 0.00019999957450010037, - "loss": 46.0, - "step": 12155 - }, - { - "epoch": 0.9294110900854406, - "grad_norm": 0.001936238957569003, - "learning_rate": 0.00019999957443002757, - "loss": 46.0, - "step": 12156 - }, - { - "epoch": 0.9294875470688304, - "grad_norm": 0.002263550413772464, - "learning_rate": 0.00019999957435994897, - "loss": 46.0, - "step": 12157 - }, - { - "epoch": 0.9295640040522202, - "grad_norm": 0.001114763435907662, - "learning_rate": 0.00019999957428986465, - "loss": 46.0, - "step": 12158 - }, - { - "epoch": 0.9296404610356098, - "grad_norm": 0.0018859588308259845, - "learning_rate": 0.00019999957421977453, - "loss": 46.0, - "step": 12159 - }, - { - "epoch": 0.9297169180189996, - "grad_norm": 0.0029676223639398813, - "learning_rate": 0.00019999957414967864, - "loss": 46.0, - "step": 12160 - }, - { - "epoch": 0.9297933750023892, - "grad_norm": 0.0008736909367144108, - "learning_rate": 0.000199999574079577, - "loss": 46.0, - "step": 12161 - }, - { - "epoch": 0.929869831985779, - "grad_norm": 0.000999182229861617, - "learning_rate": 0.00019999957400946956, - "loss": 46.0, - "step": 12162 - }, - { - "epoch": 0.9299462889691688, - "grad_norm": 0.0012130794348195195, - "learning_rate": 0.00019999957393935638, - "loss": 46.0, - "step": 12163 - }, - { - "epoch": 0.9300227459525584, - "grad_norm": 0.0031265621073544025, - "learning_rate": 0.00019999957386923742, - "loss": 46.0, - "step": 12164 - }, - { - "epoch": 0.9300992029359482, - "grad_norm": 0.00073995441198349, - "learning_rate": 0.00019999957379911268, - "loss": 46.0, - "step": 12165 - }, - { - "epoch": 0.9301756599193379, - "grad_norm": 0.005015944130718708, - "learning_rate": 0.0001999995737289822, - "loss": 46.0, - "step": 12166 - }, - { - "epoch": 0.9302521169027276, - "grad_norm": 0.0020655386615544558, - "learning_rate": 0.00019999957365884592, - "loss": 46.0, - "step": 12167 - }, - { - "epoch": 0.9303285738861173, - "grad_norm": 0.006768765859305859, - "learning_rate": 0.00019999957358870387, - "loss": 46.0, - "step": 12168 - }, - { - "epoch": 0.930405030869507, - "grad_norm": 0.002902182750403881, - "learning_rate": 0.00019999957351855607, - "loss": 46.0, - "step": 12169 - }, - { - "epoch": 0.9304814878528967, - "grad_norm": 0.0009677843190729618, - "learning_rate": 0.0001999995734484025, - "loss": 46.0, - "step": 12170 - }, - { - "epoch": 0.9305579448362865, - "grad_norm": 0.0009656138718128204, - "learning_rate": 0.00019999957337824315, - "loss": 46.0, - "step": 12171 - }, - { - "epoch": 0.9306344018196762, - "grad_norm": 0.0026252625975757837, - "learning_rate": 0.00019999957330807803, - "loss": 46.0, - "step": 12172 - }, - { - "epoch": 0.9307108588030659, - "grad_norm": 0.0032303030602633953, - "learning_rate": 0.00019999957323790714, - "loss": 46.0, - "step": 12173 - }, - { - "epoch": 0.9307873157864557, - "grad_norm": 0.0008673305273987353, - "learning_rate": 0.00019999957316773048, - "loss": 46.0, - "step": 12174 - }, - { - "epoch": 0.9308637727698453, - "grad_norm": 0.0017201764276251197, - "learning_rate": 0.00019999957309754806, - "loss": 46.0, - "step": 12175 - }, - { - "epoch": 0.9309402297532351, - "grad_norm": 0.0009072452085092664, - "learning_rate": 0.00019999957302735988, - "loss": 46.0, - "step": 12176 - }, - { - "epoch": 0.9310166867366249, - "grad_norm": 0.0005744668887928128, - "learning_rate": 0.0001999995729571659, - "loss": 46.0, - "step": 12177 - }, - { - "epoch": 0.9310931437200145, - "grad_norm": 0.0011844061082229018, - "learning_rate": 0.00019999957288696619, - "loss": 46.0, - "step": 12178 - }, - { - "epoch": 0.9311696007034043, - "grad_norm": 0.0006330212927423418, - "learning_rate": 0.00019999957281676065, - "loss": 46.0, - "step": 12179 - }, - { - "epoch": 0.9312460576867939, - "grad_norm": 0.001336416695266962, - "learning_rate": 0.0001999995727465494, - "loss": 46.0, - "step": 12180 - }, - { - "epoch": 0.9313225146701837, - "grad_norm": 0.0029856860637664795, - "learning_rate": 0.00019999957267633235, - "loss": 46.0, - "step": 12181 - }, - { - "epoch": 0.9313989716535734, - "grad_norm": 0.004477132577449083, - "learning_rate": 0.00019999957260610955, - "loss": 46.0, - "step": 12182 - }, - { - "epoch": 0.9314754286369631, - "grad_norm": 0.0030429819598793983, - "learning_rate": 0.00019999957253588098, - "loss": 46.0, - "step": 12183 - }, - { - "epoch": 0.9315518856203528, - "grad_norm": 0.0019116413313895464, - "learning_rate": 0.00019999957246564663, - "loss": 46.0, - "step": 12184 - }, - { - "epoch": 0.9316283426037426, - "grad_norm": 0.0015499474247917533, - "learning_rate": 0.0001999995723954065, - "loss": 46.0, - "step": 12185 - }, - { - "epoch": 0.9317047995871323, - "grad_norm": 0.002475033514201641, - "learning_rate": 0.0001999995723251606, - "loss": 46.0, - "step": 12186 - }, - { - "epoch": 0.931781256570522, - "grad_norm": 0.0020716821309179068, - "learning_rate": 0.00019999957225490893, - "loss": 46.0, - "step": 12187 - }, - { - "epoch": 0.9318577135539118, - "grad_norm": 0.002118935575708747, - "learning_rate": 0.00019999957218465154, - "loss": 46.0, - "step": 12188 - }, - { - "epoch": 0.9319341705373014, - "grad_norm": 0.0021088302601128817, - "learning_rate": 0.0001999995721143883, - "loss": 46.0, - "step": 12189 - }, - { - "epoch": 0.9320106275206912, - "grad_norm": 0.0006917533464729786, - "learning_rate": 0.00019999957204411937, - "loss": 46.0, - "step": 12190 - }, - { - "epoch": 0.9320870845040808, - "grad_norm": 0.0017022602260112762, - "learning_rate": 0.0001999995719738446, - "loss": 46.0, - "step": 12191 - }, - { - "epoch": 0.9321635414874706, - "grad_norm": 0.0008511582273058593, - "learning_rate": 0.0001999995719035641, - "loss": 46.0, - "step": 12192 - }, - { - "epoch": 0.9322399984708604, - "grad_norm": 0.0029274257831275463, - "learning_rate": 0.00019999957183327785, - "loss": 46.0, - "step": 12193 - }, - { - "epoch": 0.93231645545425, - "grad_norm": 0.000857875740621239, - "learning_rate": 0.00019999957176298577, - "loss": 46.0, - "step": 12194 - }, - { - "epoch": 0.9323929124376398, - "grad_norm": 0.0006062813336029649, - "learning_rate": 0.00019999957169268798, - "loss": 46.0, - "step": 12195 - }, - { - "epoch": 0.9324693694210295, - "grad_norm": 0.0007906821556389332, - "learning_rate": 0.00019999957162238438, - "loss": 46.0, - "step": 12196 - }, - { - "epoch": 0.9325458264044192, - "grad_norm": 0.0007957852212712169, - "learning_rate": 0.00019999957155207506, - "loss": 46.0, - "step": 12197 - }, - { - "epoch": 0.932622283387809, - "grad_norm": 0.0011687454534694552, - "learning_rate": 0.0001999995714817599, - "loss": 46.0, - "step": 12198 - }, - { - "epoch": 0.9326987403711987, - "grad_norm": 0.0018368266755715013, - "learning_rate": 0.00019999957141143902, - "loss": 46.0, - "step": 12199 - }, - { - "epoch": 0.9327751973545884, - "grad_norm": 0.0008439357043243945, - "learning_rate": 0.00019999957134111238, - "loss": 46.0, - "step": 12200 - }, - { - "epoch": 0.9328516543379781, - "grad_norm": 0.0007795752608217299, - "learning_rate": 0.00019999957127077992, - "loss": 46.0, - "step": 12201 - }, - { - "epoch": 0.9329281113213678, - "grad_norm": 0.0008637941791675985, - "learning_rate": 0.00019999957120044176, - "loss": 46.0, - "step": 12202 - }, - { - "epoch": 0.9330045683047575, - "grad_norm": 0.0037607033737003803, - "learning_rate": 0.00019999957113009774, - "loss": 46.0, - "step": 12203 - }, - { - "epoch": 0.9330810252881473, - "grad_norm": 0.0033118084538728, - "learning_rate": 0.000199999571059748, - "loss": 46.0, - "step": 12204 - }, - { - "epoch": 0.9331574822715369, - "grad_norm": 0.0006602016510441899, - "learning_rate": 0.00019999957098939253, - "loss": 46.0, - "step": 12205 - }, - { - "epoch": 0.9332339392549267, - "grad_norm": 0.0010778333526104689, - "learning_rate": 0.00019999957091903126, - "loss": 46.0, - "step": 12206 - }, - { - "epoch": 0.9333103962383165, - "grad_norm": 0.0034906051587313414, - "learning_rate": 0.0001999995708486642, - "loss": 46.0, - "step": 12207 - }, - { - "epoch": 0.9333868532217061, - "grad_norm": 0.0019480622140690684, - "learning_rate": 0.00019999957077829138, - "loss": 46.0, - "step": 12208 - }, - { - "epoch": 0.9334633102050959, - "grad_norm": 0.0034833564423024654, - "learning_rate": 0.0001999995707079128, - "loss": 46.0, - "step": 12209 - }, - { - "epoch": 0.9335397671884855, - "grad_norm": 0.0009912169771268964, - "learning_rate": 0.00019999957063752843, - "loss": 46.0, - "step": 12210 - }, - { - "epoch": 0.9336162241718753, - "grad_norm": 0.0009244707180187106, - "learning_rate": 0.0001999995705671383, - "loss": 46.0, - "step": 12211 - }, - { - "epoch": 0.933692681155265, - "grad_norm": 0.0034028750378638506, - "learning_rate": 0.00019999957049674242, - "loss": 46.0, - "step": 12212 - }, - { - "epoch": 0.9337691381386547, - "grad_norm": 0.0011907349107787013, - "learning_rate": 0.00019999957042634073, - "loss": 46.0, - "step": 12213 - }, - { - "epoch": 0.9338455951220445, - "grad_norm": 0.002029647119343281, - "learning_rate": 0.00019999957035593332, - "loss": 46.0, - "step": 12214 - }, - { - "epoch": 0.9339220521054342, - "grad_norm": 0.0009335228241980076, - "learning_rate": 0.0001999995702855201, - "loss": 46.0, - "step": 12215 - }, - { - "epoch": 0.9339985090888239, - "grad_norm": 0.0028611645102500916, - "learning_rate": 0.00019999957021510115, - "loss": 46.0, - "step": 12216 - }, - { - "epoch": 0.9340749660722136, - "grad_norm": 0.0008055506623350084, - "learning_rate": 0.00019999957014467642, - "loss": 46.0, - "step": 12217 - }, - { - "epoch": 0.9341514230556034, - "grad_norm": 0.0021064123138785362, - "learning_rate": 0.00019999957007424591, - "loss": 46.0, - "step": 12218 - }, - { - "epoch": 0.934227880038993, - "grad_norm": 0.0012956928694620728, - "learning_rate": 0.00019999957000380964, - "loss": 46.0, - "step": 12219 - }, - { - "epoch": 0.9343043370223828, - "grad_norm": 0.002445425372570753, - "learning_rate": 0.00019999956993336756, - "loss": 46.0, - "step": 12220 - }, - { - "epoch": 0.9343807940057725, - "grad_norm": 0.005571737419813871, - "learning_rate": 0.00019999956986291973, - "loss": 46.0, - "step": 12221 - }, - { - "epoch": 0.9344572509891622, - "grad_norm": 0.0006551257101818919, - "learning_rate": 0.00019999956979246613, - "loss": 46.0, - "step": 12222 - }, - { - "epoch": 0.934533707972552, - "grad_norm": 0.0006578739848919213, - "learning_rate": 0.0001999995697220068, - "loss": 46.0, - "step": 12223 - }, - { - "epoch": 0.9346101649559416, - "grad_norm": 0.0012671825243160129, - "learning_rate": 0.00019999956965154167, - "loss": 46.0, - "step": 12224 - }, - { - "epoch": 0.9346866219393314, - "grad_norm": 0.0017036617500707507, - "learning_rate": 0.00019999956958107075, - "loss": 46.0, - "step": 12225 - }, - { - "epoch": 0.9347630789227211, - "grad_norm": 0.002068986650556326, - "learning_rate": 0.00019999956951059411, - "loss": 46.0, - "step": 12226 - }, - { - "epoch": 0.9348395359061108, - "grad_norm": 0.00040822228766046464, - "learning_rate": 0.00019999956944011168, - "loss": 46.0, - "step": 12227 - }, - { - "epoch": 0.9349159928895006, - "grad_norm": 0.0007027806714177132, - "learning_rate": 0.00019999956936962346, - "loss": 46.0, - "step": 12228 - }, - { - "epoch": 0.9349924498728903, - "grad_norm": 0.005172847304493189, - "learning_rate": 0.00019999956929912948, - "loss": 46.0, - "step": 12229 - }, - { - "epoch": 0.93506890685628, - "grad_norm": 0.0010630633914843202, - "learning_rate": 0.00019999956922862975, - "loss": 46.0, - "step": 12230 - }, - { - "epoch": 0.9351453638396697, - "grad_norm": 0.0009215231402777135, - "learning_rate": 0.00019999956915812424, - "loss": 46.0, - "step": 12231 - }, - { - "epoch": 0.9352218208230594, - "grad_norm": 0.0010831003310158849, - "learning_rate": 0.00019999956908761294, - "loss": 46.0, - "step": 12232 - }, - { - "epoch": 0.9352982778064491, - "grad_norm": 0.002291974378749728, - "learning_rate": 0.0001999995690170959, - "loss": 46.0, - "step": 12233 - }, - { - "epoch": 0.9353747347898389, - "grad_norm": 0.0016022274503484368, - "learning_rate": 0.0001999995689465731, - "loss": 46.0, - "step": 12234 - }, - { - "epoch": 0.9354511917732286, - "grad_norm": 0.0019400925375521183, - "learning_rate": 0.0001999995688760445, - "loss": 46.0, - "step": 12235 - }, - { - "epoch": 0.9355276487566183, - "grad_norm": 0.0031868666410446167, - "learning_rate": 0.00019999956880551012, - "loss": 46.0, - "step": 12236 - }, - { - "epoch": 0.9356041057400081, - "grad_norm": 0.0010111619485542178, - "learning_rate": 0.00019999956873497, - "loss": 46.0, - "step": 12237 - }, - { - "epoch": 0.9356805627233977, - "grad_norm": 0.0007766479975543916, - "learning_rate": 0.00019999956866442408, - "loss": 46.0, - "step": 12238 - }, - { - "epoch": 0.9357570197067875, - "grad_norm": 0.0019978678319603205, - "learning_rate": 0.00019999956859387245, - "loss": 46.0, - "step": 12239 - }, - { - "epoch": 0.9358334766901771, - "grad_norm": 0.0013941236538812518, - "learning_rate": 0.00019999956852331498, - "loss": 46.0, - "step": 12240 - }, - { - "epoch": 0.9359099336735669, - "grad_norm": 0.0013099727220833302, - "learning_rate": 0.0001999995684527518, - "loss": 46.0, - "step": 12241 - }, - { - "epoch": 0.9359863906569567, - "grad_norm": 0.0008953211945481598, - "learning_rate": 0.00019999956838218278, - "loss": 46.0, - "step": 12242 - }, - { - "epoch": 0.9360628476403463, - "grad_norm": 0.001701546716503799, - "learning_rate": 0.00019999956831160805, - "loss": 46.0, - "step": 12243 - }, - { - "epoch": 0.9361393046237361, - "grad_norm": 0.002749634673818946, - "learning_rate": 0.00019999956824102755, - "loss": 46.0, - "step": 12244 - }, - { - "epoch": 0.9362157616071258, - "grad_norm": 0.0016872003907337785, - "learning_rate": 0.00019999956817044124, - "loss": 46.0, - "step": 12245 - }, - { - "epoch": 0.9362922185905155, - "grad_norm": 0.0015682270750403404, - "learning_rate": 0.00019999956809984922, - "loss": 46.0, - "step": 12246 - }, - { - "epoch": 0.9363686755739052, - "grad_norm": 0.0013069581473246217, - "learning_rate": 0.00019999956802925137, - "loss": 46.0, - "step": 12247 - }, - { - "epoch": 0.936445132557295, - "grad_norm": 0.0005373494932428002, - "learning_rate": 0.00019999956795864777, - "loss": 46.0, - "step": 12248 - }, - { - "epoch": 0.9365215895406847, - "grad_norm": 0.006909727584570646, - "learning_rate": 0.00019999956788803843, - "loss": 46.0, - "step": 12249 - }, - { - "epoch": 0.9365980465240744, - "grad_norm": 0.002061674138531089, - "learning_rate": 0.00019999956781742328, - "loss": 46.0, - "step": 12250 - }, - { - "epoch": 0.9366745035074641, - "grad_norm": 0.0009205021196976304, - "learning_rate": 0.00019999956774680236, - "loss": 46.0, - "step": 12251 - }, - { - "epoch": 0.9367509604908538, - "grad_norm": 0.0012277847854420543, - "learning_rate": 0.00019999956767617573, - "loss": 46.0, - "step": 12252 - }, - { - "epoch": 0.9368274174742436, - "grad_norm": 0.0014238427393138409, - "learning_rate": 0.0001999995676055433, - "loss": 46.0, - "step": 12253 - }, - { - "epoch": 0.9369038744576332, - "grad_norm": 0.004767351783812046, - "learning_rate": 0.00019999956753490508, - "loss": 46.0, - "step": 12254 - }, - { - "epoch": 0.936980331441023, - "grad_norm": 0.005136764608323574, - "learning_rate": 0.0001999995674642611, - "loss": 46.0, - "step": 12255 - }, - { - "epoch": 0.9370567884244128, - "grad_norm": 0.0005738785257562995, - "learning_rate": 0.00019999956739361134, - "loss": 46.0, - "step": 12256 - }, - { - "epoch": 0.9371332454078024, - "grad_norm": 0.001265040598809719, - "learning_rate": 0.0001999995673229558, - "loss": 46.0, - "step": 12257 - }, - { - "epoch": 0.9372097023911922, - "grad_norm": 0.0011224690824747086, - "learning_rate": 0.00019999956725229453, - "loss": 46.0, - "step": 12258 - }, - { - "epoch": 0.9372861593745819, - "grad_norm": 0.003243668470531702, - "learning_rate": 0.00019999956718162748, - "loss": 46.0, - "step": 12259 - }, - { - "epoch": 0.9373626163579716, - "grad_norm": 0.0005513266660273075, - "learning_rate": 0.00019999956711095465, - "loss": 46.0, - "step": 12260 - }, - { - "epoch": 0.9374390733413613, - "grad_norm": 0.007840940728783607, - "learning_rate": 0.00019999956704027606, - "loss": 46.0, - "step": 12261 - }, - { - "epoch": 0.937515530324751, - "grad_norm": 0.002623951528221369, - "learning_rate": 0.00019999956696959168, - "loss": 46.0, - "step": 12262 - }, - { - "epoch": 0.9375919873081408, - "grad_norm": 0.00418903399258852, - "learning_rate": 0.00019999956689890157, - "loss": 46.0, - "step": 12263 - }, - { - "epoch": 0.9376684442915305, - "grad_norm": 0.006563264410942793, - "learning_rate": 0.00019999956682820565, - "loss": 46.0, - "step": 12264 - }, - { - "epoch": 0.9377449012749202, - "grad_norm": 0.0010737586999312043, - "learning_rate": 0.00019999956675750396, - "loss": 46.0, - "step": 12265 - }, - { - "epoch": 0.9378213582583099, - "grad_norm": 0.0031286252196878195, - "learning_rate": 0.00019999956668679652, - "loss": 46.0, - "step": 12266 - }, - { - "epoch": 0.9378978152416997, - "grad_norm": 0.0010336136911064386, - "learning_rate": 0.0001999995666160833, - "loss": 46.0, - "step": 12267 - }, - { - "epoch": 0.9379742722250893, - "grad_norm": 0.0005840155063197017, - "learning_rate": 0.00019999956654536433, - "loss": 46.0, - "step": 12268 - }, - { - "epoch": 0.9380507292084791, - "grad_norm": 0.0002955625532194972, - "learning_rate": 0.00019999956647463957, - "loss": 46.0, - "step": 12269 - }, - { - "epoch": 0.9381271861918687, - "grad_norm": 0.0015363151906058192, - "learning_rate": 0.00019999956640390907, - "loss": 46.0, - "step": 12270 - }, - { - "epoch": 0.9382036431752585, - "grad_norm": 0.001137645565904677, - "learning_rate": 0.00019999956633317276, - "loss": 46.0, - "step": 12271 - }, - { - "epoch": 0.9382801001586483, - "grad_norm": 0.001029876060783863, - "learning_rate": 0.0001999995662624307, - "loss": 46.0, - "step": 12272 - }, - { - "epoch": 0.9383565571420379, - "grad_norm": 0.0008531315252184868, - "learning_rate": 0.0001999995661916829, - "loss": 46.0, - "step": 12273 - }, - { - "epoch": 0.9384330141254277, - "grad_norm": 0.0009198343614116311, - "learning_rate": 0.0001999995661209293, - "loss": 46.0, - "step": 12274 - }, - { - "epoch": 0.9385094711088174, - "grad_norm": 0.001144756213761866, - "learning_rate": 0.0001999995660501699, - "loss": 46.0, - "step": 12275 - }, - { - "epoch": 0.9385859280922071, - "grad_norm": 0.000733811582904309, - "learning_rate": 0.00019999956597940478, - "loss": 46.0, - "step": 12276 - }, - { - "epoch": 0.9386623850755969, - "grad_norm": 0.001582833705469966, - "learning_rate": 0.00019999956590863386, - "loss": 46.0, - "step": 12277 - }, - { - "epoch": 0.9387388420589866, - "grad_norm": 0.0004762540338560939, - "learning_rate": 0.0001999995658378572, - "loss": 46.0, - "step": 12278 - }, - { - "epoch": 0.9388152990423763, - "grad_norm": 0.0022855489514768124, - "learning_rate": 0.00019999956576707476, - "loss": 46.0, - "step": 12279 - }, - { - "epoch": 0.938891756025766, - "grad_norm": 0.0014094322687014937, - "learning_rate": 0.00019999956569628652, - "loss": 46.0, - "step": 12280 - }, - { - "epoch": 0.9389682130091557, - "grad_norm": 0.0006910916999913752, - "learning_rate": 0.00019999956562549254, - "loss": 46.0, - "step": 12281 - }, - { - "epoch": 0.9390446699925454, - "grad_norm": 0.0011549191549420357, - "learning_rate": 0.00019999956555469278, - "loss": 46.0, - "step": 12282 - }, - { - "epoch": 0.9391211269759352, - "grad_norm": 0.0006775555666536093, - "learning_rate": 0.00019999956548388728, - "loss": 46.0, - "step": 12283 - }, - { - "epoch": 0.9391975839593248, - "grad_norm": 0.0043896036222577095, - "learning_rate": 0.00019999956541307597, - "loss": 46.0, - "step": 12284 - }, - { - "epoch": 0.9392740409427146, - "grad_norm": 0.0014866306446492672, - "learning_rate": 0.00019999956534225892, - "loss": 46.0, - "step": 12285 - }, - { - "epoch": 0.9393504979261044, - "grad_norm": 0.0008481458062306046, - "learning_rate": 0.00019999956527143607, - "loss": 46.0, - "step": 12286 - }, - { - "epoch": 0.939426954909494, - "grad_norm": 0.0006656827754341066, - "learning_rate": 0.00019999956520060747, - "loss": 46.0, - "step": 12287 - }, - { - "epoch": 0.9395034118928838, - "grad_norm": 0.0012219274649396539, - "learning_rate": 0.0001999995651297731, - "loss": 46.0, - "step": 12288 - }, - { - "epoch": 0.9395798688762735, - "grad_norm": 0.0010271830251440406, - "learning_rate": 0.000199999565058933, - "loss": 46.0, - "step": 12289 - }, - { - "epoch": 0.9396563258596632, - "grad_norm": 0.00046428918722085655, - "learning_rate": 0.00019999956498808704, - "loss": 46.0, - "step": 12290 - }, - { - "epoch": 0.939732782843053, - "grad_norm": 0.0013342913007363677, - "learning_rate": 0.00019999956491723538, - "loss": 46.0, - "step": 12291 - }, - { - "epoch": 0.9398092398264426, - "grad_norm": 0.0012896527769044042, - "learning_rate": 0.00019999956484637794, - "loss": 46.0, - "step": 12292 - }, - { - "epoch": 0.9398856968098324, - "grad_norm": 0.01178223267197609, - "learning_rate": 0.0001999995647755147, - "loss": 46.0, - "step": 12293 - }, - { - "epoch": 0.9399621537932221, - "grad_norm": 0.0011867908760905266, - "learning_rate": 0.00019999956470464572, - "loss": 46.0, - "step": 12294 - }, - { - "epoch": 0.9400386107766118, - "grad_norm": 0.0029271161183714867, - "learning_rate": 0.00019999956463377097, - "loss": 46.0, - "step": 12295 - }, - { - "epoch": 0.9401150677600015, - "grad_norm": 0.0008796355105005205, - "learning_rate": 0.00019999956456289044, - "loss": 46.0, - "step": 12296 - }, - { - "epoch": 0.9401915247433913, - "grad_norm": 0.0010045936796814203, - "learning_rate": 0.00019999956449200413, - "loss": 46.0, - "step": 12297 - }, - { - "epoch": 0.9402679817267809, - "grad_norm": 0.0005430988967418671, - "learning_rate": 0.00019999956442111208, - "loss": 46.0, - "step": 12298 - }, - { - "epoch": 0.9403444387101707, - "grad_norm": 0.0020924268756061792, - "learning_rate": 0.00019999956435021426, - "loss": 46.0, - "step": 12299 - }, - { - "epoch": 0.9404208956935605, - "grad_norm": 0.0008773243171162903, - "learning_rate": 0.00019999956427931064, - "loss": 46.0, - "step": 12300 - }, - { - "epoch": 0.9404973526769501, - "grad_norm": 0.006207504775375128, - "learning_rate": 0.00019999956420840127, - "loss": 46.0, - "step": 12301 - }, - { - "epoch": 0.9405738096603399, - "grad_norm": 0.017069486901164055, - "learning_rate": 0.00019999956413748612, - "loss": 46.0, - "step": 12302 - }, - { - "epoch": 0.9406502666437295, - "grad_norm": 0.0011084326542913914, - "learning_rate": 0.0001999995640665652, - "loss": 46.0, - "step": 12303 - }, - { - "epoch": 0.9407267236271193, - "grad_norm": 0.00774594210088253, - "learning_rate": 0.00019999956399563852, - "loss": 46.0, - "step": 12304 - }, - { - "epoch": 0.940803180610509, - "grad_norm": 0.0011596804251894355, - "learning_rate": 0.00019999956392470608, - "loss": 46.0, - "step": 12305 - }, - { - "epoch": 0.9408796375938987, - "grad_norm": 0.0007955761393532157, - "learning_rate": 0.00019999956385376787, - "loss": 46.0, - "step": 12306 - }, - { - "epoch": 0.9409560945772885, - "grad_norm": 0.0007006413070484996, - "learning_rate": 0.00019999956378282389, - "loss": 46.0, - "step": 12307 - }, - { - "epoch": 0.9410325515606782, - "grad_norm": 0.0021923750173300505, - "learning_rate": 0.0001999995637118741, - "loss": 46.0, - "step": 12308 - }, - { - "epoch": 0.9411090085440679, - "grad_norm": 0.001045380369760096, - "learning_rate": 0.00019999956364091857, - "loss": 46.0, - "step": 12309 - }, - { - "epoch": 0.9411854655274576, - "grad_norm": 0.0004303822643123567, - "learning_rate": 0.00019999956356995727, - "loss": 46.0, - "step": 12310 - }, - { - "epoch": 0.9412619225108473, - "grad_norm": 0.0014972623903304338, - "learning_rate": 0.00019999956349899022, - "loss": 46.0, - "step": 12311 - }, - { - "epoch": 0.941338379494237, - "grad_norm": 0.00041206591413356364, - "learning_rate": 0.00019999956342801737, - "loss": 46.0, - "step": 12312 - }, - { - "epoch": 0.9414148364776268, - "grad_norm": 0.0027731028385460377, - "learning_rate": 0.00019999956335703875, - "loss": 46.0, - "step": 12313 - }, - { - "epoch": 0.9414912934610165, - "grad_norm": 0.010360583662986755, - "learning_rate": 0.00019999956328605438, - "loss": 46.0, - "step": 12314 - }, - { - "epoch": 0.9415677504444062, - "grad_norm": 0.004714208655059338, - "learning_rate": 0.00019999956321506423, - "loss": 46.0, - "step": 12315 - }, - { - "epoch": 0.941644207427796, - "grad_norm": 0.0022166899871081114, - "learning_rate": 0.00019999956314406834, - "loss": 46.0, - "step": 12316 - }, - { - "epoch": 0.9417206644111856, - "grad_norm": 0.0011904215207323432, - "learning_rate": 0.00019999956307306666, - "loss": 46.0, - "step": 12317 - }, - { - "epoch": 0.9417971213945754, - "grad_norm": 0.0035876703914254904, - "learning_rate": 0.0001999995630020592, - "loss": 46.0, - "step": 12318 - }, - { - "epoch": 0.9418735783779651, - "grad_norm": 0.003940943628549576, - "learning_rate": 0.00019999956293104596, - "loss": 46.0, - "step": 12319 - }, - { - "epoch": 0.9419500353613548, - "grad_norm": 0.0007792055839672685, - "learning_rate": 0.000199999562860027, - "loss": 46.0, - "step": 12320 - }, - { - "epoch": 0.9420264923447446, - "grad_norm": 0.006641231942921877, - "learning_rate": 0.00019999956278900222, - "loss": 46.0, - "step": 12321 - }, - { - "epoch": 0.9421029493281342, - "grad_norm": 0.007249461021274328, - "learning_rate": 0.0001999995627179717, - "loss": 46.0, - "step": 12322 - }, - { - "epoch": 0.942179406311524, - "grad_norm": 0.0008969199261628091, - "learning_rate": 0.0001999995626469354, - "loss": 46.0, - "step": 12323 - }, - { - "epoch": 0.9422558632949137, - "grad_norm": 0.0011842675739899278, - "learning_rate": 0.0001999995625758933, - "loss": 46.0, - "step": 12324 - }, - { - "epoch": 0.9423323202783034, - "grad_norm": 0.0007077105692587793, - "learning_rate": 0.00019999956250484546, - "loss": 46.0, - "step": 12325 - }, - { - "epoch": 0.9424087772616931, - "grad_norm": 0.0017282310873270035, - "learning_rate": 0.00019999956243379186, - "loss": 46.0, - "step": 12326 - }, - { - "epoch": 0.9424852342450829, - "grad_norm": 0.0009200253407470882, - "learning_rate": 0.00019999956236273247, - "loss": 46.0, - "step": 12327 - }, - { - "epoch": 0.9425616912284726, - "grad_norm": 0.0015863269800320268, - "learning_rate": 0.00019999956229166735, - "loss": 46.0, - "step": 12328 - }, - { - "epoch": 0.9426381482118623, - "grad_norm": 0.002071093302220106, - "learning_rate": 0.00019999956222059643, - "loss": 46.0, - "step": 12329 - }, - { - "epoch": 0.9427146051952521, - "grad_norm": 0.0011401161318644881, - "learning_rate": 0.00019999956214951974, - "loss": 46.0, - "step": 12330 - }, - { - "epoch": 0.9427910621786417, - "grad_norm": 0.0006816425593569875, - "learning_rate": 0.00019999956207843728, - "loss": 46.0, - "step": 12331 - }, - { - "epoch": 0.9428675191620315, - "grad_norm": 0.001684514805674553, - "learning_rate": 0.00019999956200734907, - "loss": 46.0, - "step": 12332 - }, - { - "epoch": 0.9429439761454211, - "grad_norm": 0.004684693645685911, - "learning_rate": 0.00019999956193625506, - "loss": 46.0, - "step": 12333 - }, - { - "epoch": 0.9430204331288109, - "grad_norm": 0.0007201653206720948, - "learning_rate": 0.0001999995618651553, - "loss": 46.0, - "step": 12334 - }, - { - "epoch": 0.9430968901122007, - "grad_norm": 0.0011610996443778276, - "learning_rate": 0.00019999956179404978, - "loss": 46.0, - "step": 12335 - }, - { - "epoch": 0.9431733470955903, - "grad_norm": 0.0022103332448750734, - "learning_rate": 0.00019999956172293845, - "loss": 46.0, - "step": 12336 - }, - { - "epoch": 0.9432498040789801, - "grad_norm": 0.001221153186634183, - "learning_rate": 0.0001999995616518214, - "loss": 46.0, - "step": 12337 - }, - { - "epoch": 0.9433262610623698, - "grad_norm": 0.0012803459540009499, - "learning_rate": 0.00019999956158069855, - "loss": 46.0, - "step": 12338 - }, - { - "epoch": 0.9434027180457595, - "grad_norm": 0.00155855983030051, - "learning_rate": 0.00019999956150956993, - "loss": 46.0, - "step": 12339 - }, - { - "epoch": 0.9434791750291492, - "grad_norm": 0.0004491157305892557, - "learning_rate": 0.00019999956143843556, - "loss": 46.0, - "step": 12340 - }, - { - "epoch": 0.9435556320125389, - "grad_norm": 0.003105051815509796, - "learning_rate": 0.00019999956136729542, - "loss": 46.0, - "step": 12341 - }, - { - "epoch": 0.9436320889959287, - "grad_norm": 0.0007455964805558324, - "learning_rate": 0.00019999956129614947, - "loss": 46.0, - "step": 12342 - }, - { - "epoch": 0.9437085459793184, - "grad_norm": 0.0008121364517137408, - "learning_rate": 0.00019999956122499778, - "loss": 46.0, - "step": 12343 - }, - { - "epoch": 0.9437850029627081, - "grad_norm": 0.0010009503457695246, - "learning_rate": 0.00019999956115384032, - "loss": 46.0, - "step": 12344 - }, - { - "epoch": 0.9438614599460978, - "grad_norm": 0.003333230270072818, - "learning_rate": 0.0001999995610826771, - "loss": 46.0, - "step": 12345 - }, - { - "epoch": 0.9439379169294876, - "grad_norm": 0.003051307750865817, - "learning_rate": 0.0001999995610115081, - "loss": 46.0, - "step": 12346 - }, - { - "epoch": 0.9440143739128772, - "grad_norm": 0.0004749305371660739, - "learning_rate": 0.00019999956094033335, - "loss": 46.0, - "step": 12347 - }, - { - "epoch": 0.944090830896267, - "grad_norm": 0.0007354088011197746, - "learning_rate": 0.00019999956086915282, - "loss": 46.0, - "step": 12348 - }, - { - "epoch": 0.9441672878796568, - "grad_norm": 0.0005137072876095772, - "learning_rate": 0.00019999956079796652, - "loss": 46.0, - "step": 12349 - }, - { - "epoch": 0.9442437448630464, - "grad_norm": 0.0006483962060883641, - "learning_rate": 0.00019999956072677441, - "loss": 46.0, - "step": 12350 - }, - { - "epoch": 0.9443202018464362, - "grad_norm": 0.0018804912688210607, - "learning_rate": 0.0001999995606555766, - "loss": 46.0, - "step": 12351 - }, - { - "epoch": 0.9443966588298258, - "grad_norm": 0.0014351321151480079, - "learning_rate": 0.00019999956058437297, - "loss": 46.0, - "step": 12352 - }, - { - "epoch": 0.9444731158132156, - "grad_norm": 0.0006286718999035656, - "learning_rate": 0.00019999956051316357, - "loss": 46.0, - "step": 12353 - }, - { - "epoch": 0.9445495727966053, - "grad_norm": 0.001512345508672297, - "learning_rate": 0.0001999995604419484, - "loss": 46.0, - "step": 12354 - }, - { - "epoch": 0.944626029779995, - "grad_norm": 0.0006150231347419322, - "learning_rate": 0.00019999956037072752, - "loss": 46.0, - "step": 12355 - }, - { - "epoch": 0.9447024867633848, - "grad_norm": 0.0012920809676870704, - "learning_rate": 0.0001999995602995008, - "loss": 46.0, - "step": 12356 - }, - { - "epoch": 0.9447789437467745, - "grad_norm": 0.0013207169249653816, - "learning_rate": 0.00019999956022826836, - "loss": 46.0, - "step": 12357 - }, - { - "epoch": 0.9448554007301642, - "grad_norm": 0.006703382823616266, - "learning_rate": 0.00019999956015703013, - "loss": 46.0, - "step": 12358 - }, - { - "epoch": 0.9449318577135539, - "grad_norm": 0.008000780828297138, - "learning_rate": 0.00019999956008578612, - "loss": 46.0, - "step": 12359 - }, - { - "epoch": 0.9450083146969437, - "grad_norm": 0.0021756684873253107, - "learning_rate": 0.00019999956001453634, - "loss": 46.0, - "step": 12360 - }, - { - "epoch": 0.9450847716803333, - "grad_norm": 0.001456227619200945, - "learning_rate": 0.0001999995599432808, - "loss": 46.0, - "step": 12361 - }, - { - "epoch": 0.9451612286637231, - "grad_norm": 0.0035714353434741497, - "learning_rate": 0.0001999995598720195, - "loss": 46.0, - "step": 12362 - }, - { - "epoch": 0.9452376856471127, - "grad_norm": 0.0011776788160204887, - "learning_rate": 0.00019999955980075243, - "loss": 46.0, - "step": 12363 - }, - { - "epoch": 0.9453141426305025, - "grad_norm": 0.0006154084112495184, - "learning_rate": 0.00019999955972947956, - "loss": 46.0, - "step": 12364 - }, - { - "epoch": 0.9453905996138923, - "grad_norm": 0.0017240220913663507, - "learning_rate": 0.00019999955965820093, - "loss": 46.0, - "step": 12365 - }, - { - "epoch": 0.9454670565972819, - "grad_norm": 0.0017344050575047731, - "learning_rate": 0.0001999995595869166, - "loss": 46.0, - "step": 12366 - }, - { - "epoch": 0.9455435135806717, - "grad_norm": 0.00508534163236618, - "learning_rate": 0.00019999955951562642, - "loss": 46.0, - "step": 12367 - }, - { - "epoch": 0.9456199705640614, - "grad_norm": 0.000537124287802726, - "learning_rate": 0.00019999955944433048, - "loss": 46.0, - "step": 12368 - }, - { - "epoch": 0.9456964275474511, - "grad_norm": 0.0002548867487348616, - "learning_rate": 0.0001999995593730288, - "loss": 46.0, - "step": 12369 - }, - { - "epoch": 0.9457728845308409, - "grad_norm": 0.0026585611049085855, - "learning_rate": 0.00019999955930172133, - "loss": 46.0, - "step": 12370 - }, - { - "epoch": 0.9458493415142305, - "grad_norm": 0.002630931092426181, - "learning_rate": 0.0001999995592304081, - "loss": 46.0, - "step": 12371 - }, - { - "epoch": 0.9459257984976203, - "grad_norm": 0.0030295723117887974, - "learning_rate": 0.0001999995591590891, - "loss": 46.0, - "step": 12372 - }, - { - "epoch": 0.94600225548101, - "grad_norm": 0.0005532665527425706, - "learning_rate": 0.0001999995590877643, - "loss": 46.0, - "step": 12373 - }, - { - "epoch": 0.9460787124643997, - "grad_norm": 0.0017418792704120278, - "learning_rate": 0.00019999955901643378, - "loss": 46.0, - "step": 12374 - }, - { - "epoch": 0.9461551694477894, - "grad_norm": 0.0006412502261810005, - "learning_rate": 0.00019999955894509748, - "loss": 46.0, - "step": 12375 - }, - { - "epoch": 0.9462316264311792, - "grad_norm": 0.0010607157601043582, - "learning_rate": 0.0001999995588737554, - "loss": 46.0, - "step": 12376 - }, - { - "epoch": 0.9463080834145688, - "grad_norm": 0.0014223423786461353, - "learning_rate": 0.00019999955880240755, - "loss": 46.0, - "step": 12377 - }, - { - "epoch": 0.9463845403979586, - "grad_norm": 0.00037162474473007023, - "learning_rate": 0.0001999995587310539, - "loss": 46.0, - "step": 12378 - }, - { - "epoch": 0.9464609973813484, - "grad_norm": 0.0007151460740715265, - "learning_rate": 0.00019999955865969454, - "loss": 46.0, - "step": 12379 - }, - { - "epoch": 0.946537454364738, - "grad_norm": 0.0018177909078076482, - "learning_rate": 0.00019999955858832937, - "loss": 46.0, - "step": 12380 - }, - { - "epoch": 0.9466139113481278, - "grad_norm": 0.0011734383879229426, - "learning_rate": 0.00019999955851695845, - "loss": 46.0, - "step": 12381 - }, - { - "epoch": 0.9466903683315174, - "grad_norm": 0.0010106373811140656, - "learning_rate": 0.00019999955844558174, - "loss": 46.0, - "step": 12382 - }, - { - "epoch": 0.9467668253149072, - "grad_norm": 0.0010133297182619572, - "learning_rate": 0.00019999955837419927, - "loss": 46.0, - "step": 12383 - }, - { - "epoch": 0.946843282298297, - "grad_norm": 0.0007849809480831027, - "learning_rate": 0.00019999955830281104, - "loss": 46.0, - "step": 12384 - }, - { - "epoch": 0.9469197392816866, - "grad_norm": 0.004273064434528351, - "learning_rate": 0.00019999955823141706, - "loss": 46.0, - "step": 12385 - }, - { - "epoch": 0.9469961962650764, - "grad_norm": 0.0011846947018057108, - "learning_rate": 0.00019999955816001728, - "loss": 46.0, - "step": 12386 - }, - { - "epoch": 0.9470726532484661, - "grad_norm": 0.002637983998283744, - "learning_rate": 0.00019999955808861172, - "loss": 46.0, - "step": 12387 - }, - { - "epoch": 0.9471491102318558, - "grad_norm": 0.0005716111045330763, - "learning_rate": 0.0001999995580172004, - "loss": 46.0, - "step": 12388 - }, - { - "epoch": 0.9472255672152455, - "grad_norm": 0.0006461918819695711, - "learning_rate": 0.00019999955794578332, - "loss": 46.0, - "step": 12389 - }, - { - "epoch": 0.9473020241986353, - "grad_norm": 0.002386166714131832, - "learning_rate": 0.0001999995578743605, - "loss": 46.0, - "step": 12390 - }, - { - "epoch": 0.947378481182025, - "grad_norm": 0.000282397901173681, - "learning_rate": 0.00019999955780293185, - "loss": 46.0, - "step": 12391 - }, - { - "epoch": 0.9474549381654147, - "grad_norm": 0.0017616646364331245, - "learning_rate": 0.00019999955773149745, - "loss": 46.0, - "step": 12392 - }, - { - "epoch": 0.9475313951488044, - "grad_norm": 0.0009565998334437609, - "learning_rate": 0.00019999955766005729, - "loss": 46.0, - "step": 12393 - }, - { - "epoch": 0.9476078521321941, - "grad_norm": 0.009229855611920357, - "learning_rate": 0.00019999955758861137, - "loss": 46.0, - "step": 12394 - }, - { - "epoch": 0.9476843091155839, - "grad_norm": 0.0028080889023840427, - "learning_rate": 0.00019999955751715966, - "loss": 46.0, - "step": 12395 - }, - { - "epoch": 0.9477607660989735, - "grad_norm": 0.000975471397396177, - "learning_rate": 0.0001999995574457022, - "loss": 46.0, - "step": 12396 - }, - { - "epoch": 0.9478372230823633, - "grad_norm": 0.0013243749272078276, - "learning_rate": 0.00019999955737423893, - "loss": 46.0, - "step": 12397 - }, - { - "epoch": 0.947913680065753, - "grad_norm": 0.0006749395979568362, - "learning_rate": 0.00019999955730276995, - "loss": 46.0, - "step": 12398 - }, - { - "epoch": 0.9479901370491427, - "grad_norm": 0.0018248808337375522, - "learning_rate": 0.00019999955723129517, - "loss": 46.0, - "step": 12399 - }, - { - "epoch": 0.9480665940325325, - "grad_norm": 0.000542721594683826, - "learning_rate": 0.00019999955715981464, - "loss": 46.0, - "step": 12400 - }, - { - "epoch": 0.9481430510159221, - "grad_norm": 0.0011014159535989165, - "learning_rate": 0.00019999955708832832, - "loss": 46.0, - "step": 12401 - }, - { - "epoch": 0.9482195079993119, - "grad_norm": 0.0021464594174176455, - "learning_rate": 0.00019999955701683621, - "loss": 46.0, - "step": 12402 - }, - { - "epoch": 0.9482959649827016, - "grad_norm": 0.004897886421531439, - "learning_rate": 0.0001999995569453384, - "loss": 46.0, - "step": 12403 - }, - { - "epoch": 0.9483724219660913, - "grad_norm": 0.0016293496591970325, - "learning_rate": 0.00019999955687383475, - "loss": 46.0, - "step": 12404 - }, - { - "epoch": 0.948448878949481, - "grad_norm": 0.0016750118229538202, - "learning_rate": 0.00019999955680232535, - "loss": 46.0, - "step": 12405 - }, - { - "epoch": 0.9485253359328708, - "grad_norm": 0.0031048518139868975, - "learning_rate": 0.00019999955673081018, - "loss": 46.0, - "step": 12406 - }, - { - "epoch": 0.9486017929162605, - "grad_norm": 0.010521024465560913, - "learning_rate": 0.00019999955665928924, - "loss": 46.0, - "step": 12407 - }, - { - "epoch": 0.9486782498996502, - "grad_norm": 0.0007901581702753901, - "learning_rate": 0.00019999955658776255, - "loss": 46.0, - "step": 12408 - }, - { - "epoch": 0.94875470688304, - "grad_norm": 0.0008239212911576033, - "learning_rate": 0.0001999995565162301, - "loss": 46.0, - "step": 12409 - }, - { - "epoch": 0.9488311638664296, - "grad_norm": 0.0004952264134772122, - "learning_rate": 0.00019999955644469183, - "loss": 46.0, - "step": 12410 - }, - { - "epoch": 0.9489076208498194, - "grad_norm": 0.007967387326061726, - "learning_rate": 0.00019999955637314782, - "loss": 46.0, - "step": 12411 - }, - { - "epoch": 0.948984077833209, - "grad_norm": 0.0026131724007427692, - "learning_rate": 0.00019999955630159804, - "loss": 46.0, - "step": 12412 - }, - { - "epoch": 0.9490605348165988, - "grad_norm": 0.0010560484370216727, - "learning_rate": 0.00019999955623004252, - "loss": 46.0, - "step": 12413 - }, - { - "epoch": 0.9491369917999886, - "grad_norm": 0.0011084924917668104, - "learning_rate": 0.0001999995561584812, - "loss": 46.0, - "step": 12414 - }, - { - "epoch": 0.9492134487833782, - "grad_norm": 0.0009459021966904402, - "learning_rate": 0.00019999955608691409, - "loss": 46.0, - "step": 12415 - }, - { - "epoch": 0.949289905766768, - "grad_norm": 0.0019901844207197428, - "learning_rate": 0.00019999955601534124, - "loss": 46.0, - "step": 12416 - }, - { - "epoch": 0.9493663627501577, - "grad_norm": 0.000977960298769176, - "learning_rate": 0.00019999955594376262, - "loss": 46.0, - "step": 12417 - }, - { - "epoch": 0.9494428197335474, - "grad_norm": 0.0002895413781516254, - "learning_rate": 0.0001999995558721782, - "loss": 46.0, - "step": 12418 - }, - { - "epoch": 0.9495192767169371, - "grad_norm": 0.0006636523175984621, - "learning_rate": 0.00019999955580058806, - "loss": 46.0, - "step": 12419 - }, - { - "epoch": 0.9495957337003269, - "grad_norm": 0.001335454755462706, - "learning_rate": 0.00019999955572899212, - "loss": 46.0, - "step": 12420 - }, - { - "epoch": 0.9496721906837166, - "grad_norm": 0.000873195007443428, - "learning_rate": 0.0001999995556573904, - "loss": 46.0, - "step": 12421 - }, - { - "epoch": 0.9497486476671063, - "grad_norm": 0.0046980599872767925, - "learning_rate": 0.00019999955558578294, - "loss": 46.0, - "step": 12422 - }, - { - "epoch": 0.949825104650496, - "grad_norm": 0.00045450410107150674, - "learning_rate": 0.00019999955551416968, - "loss": 46.0, - "step": 12423 - }, - { - "epoch": 0.9499015616338857, - "grad_norm": 0.0032560836989432573, - "learning_rate": 0.00019999955544255067, - "loss": 46.0, - "step": 12424 - }, - { - "epoch": 0.9499780186172755, - "grad_norm": 0.0008326891111209989, - "learning_rate": 0.0001999995553709259, - "loss": 46.0, - "step": 12425 - }, - { - "epoch": 0.9500544756006651, - "grad_norm": 0.0008003672119230032, - "learning_rate": 0.00019999955529929534, - "loss": 46.0, - "step": 12426 - }, - { - "epoch": 0.9501309325840549, - "grad_norm": 0.0005340115167200565, - "learning_rate": 0.000199999555227659, - "loss": 46.0, - "step": 12427 - }, - { - "epoch": 0.9502073895674447, - "grad_norm": 0.0011407658457756042, - "learning_rate": 0.00019999955515601694, - "loss": 46.0, - "step": 12428 - }, - { - "epoch": 0.9502838465508343, - "grad_norm": 0.0012825331650674343, - "learning_rate": 0.00019999955508436906, - "loss": 46.0, - "step": 12429 - }, - { - "epoch": 0.9503603035342241, - "grad_norm": 0.002244018716737628, - "learning_rate": 0.00019999955501271544, - "loss": 46.0, - "step": 12430 - }, - { - "epoch": 0.9504367605176138, - "grad_norm": 0.002747810911387205, - "learning_rate": 0.00019999955494105605, - "loss": 46.0, - "step": 12431 - }, - { - "epoch": 0.9505132175010035, - "grad_norm": 0.000954570947214961, - "learning_rate": 0.00019999955486939088, - "loss": 46.0, - "step": 12432 - }, - { - "epoch": 0.9505896744843932, - "grad_norm": 0.001188487745821476, - "learning_rate": 0.00019999955479771994, - "loss": 46.0, - "step": 12433 - }, - { - "epoch": 0.9506661314677829, - "grad_norm": 0.0026089854072779417, - "learning_rate": 0.00019999955472604322, - "loss": 46.0, - "step": 12434 - }, - { - "epoch": 0.9507425884511727, - "grad_norm": 0.0009334481437690556, - "learning_rate": 0.00019999955465436076, - "loss": 46.0, - "step": 12435 - }, - { - "epoch": 0.9508190454345624, - "grad_norm": 0.0011899399105459452, - "learning_rate": 0.0001999995545826725, - "loss": 46.0, - "step": 12436 - }, - { - "epoch": 0.9508955024179521, - "grad_norm": 0.0013193285558372736, - "learning_rate": 0.00019999955451097847, - "loss": 46.0, - "step": 12437 - }, - { - "epoch": 0.9509719594013418, - "grad_norm": 0.0012993119889870286, - "learning_rate": 0.00019999955443927872, - "loss": 46.0, - "step": 12438 - }, - { - "epoch": 0.9510484163847316, - "grad_norm": 0.0010145334526896477, - "learning_rate": 0.00019999955436757314, - "loss": 46.0, - "step": 12439 - }, - { - "epoch": 0.9511248733681212, - "grad_norm": 0.0008084794972091913, - "learning_rate": 0.00019999955429586183, - "loss": 46.0, - "step": 12440 - }, - { - "epoch": 0.951201330351511, - "grad_norm": 0.0010774113470688462, - "learning_rate": 0.0001999995542241447, - "loss": 46.0, - "step": 12441 - }, - { - "epoch": 0.9512777873349006, - "grad_norm": 0.004175710491836071, - "learning_rate": 0.00019999955415242186, - "loss": 46.0, - "step": 12442 - }, - { - "epoch": 0.9513542443182904, - "grad_norm": 0.00113750493619591, - "learning_rate": 0.00019999955408069324, - "loss": 46.0, - "step": 12443 - }, - { - "epoch": 0.9514307013016802, - "grad_norm": 0.001192736905068159, - "learning_rate": 0.00019999955400895885, - "loss": 46.0, - "step": 12444 - }, - { - "epoch": 0.9515071582850698, - "grad_norm": 0.0011997545370832086, - "learning_rate": 0.00019999955393721868, - "loss": 46.0, - "step": 12445 - }, - { - "epoch": 0.9515836152684596, - "grad_norm": 0.001010085572488606, - "learning_rate": 0.00019999955386547274, - "loss": 46.0, - "step": 12446 - }, - { - "epoch": 0.9516600722518493, - "grad_norm": 0.0008382750093005598, - "learning_rate": 0.00019999955379372103, - "loss": 46.0, - "step": 12447 - }, - { - "epoch": 0.951736529235239, - "grad_norm": 0.0012769688619300723, - "learning_rate": 0.00019999955372196354, - "loss": 46.0, - "step": 12448 - }, - { - "epoch": 0.9518129862186288, - "grad_norm": 0.0012173093855381012, - "learning_rate": 0.00019999955365020028, - "loss": 46.0, - "step": 12449 - }, - { - "epoch": 0.9518894432020185, - "grad_norm": 0.0008360305218957365, - "learning_rate": 0.00019999955357843127, - "loss": 46.0, - "step": 12450 - }, - { - "epoch": 0.9519659001854082, - "grad_norm": 0.000583948043640703, - "learning_rate": 0.00019999955350665646, - "loss": 46.0, - "step": 12451 - }, - { - "epoch": 0.9520423571687979, - "grad_norm": 0.01885116659104824, - "learning_rate": 0.00019999955343487594, - "loss": 46.0, - "step": 12452 - }, - { - "epoch": 0.9521188141521876, - "grad_norm": 0.0010129577713087201, - "learning_rate": 0.0001999995533630896, - "loss": 46.0, - "step": 12453 - }, - { - "epoch": 0.9521952711355773, - "grad_norm": 0.012866014614701271, - "learning_rate": 0.00019999955329129748, - "loss": 46.0, - "step": 12454 - }, - { - "epoch": 0.9522717281189671, - "grad_norm": 0.006403828505426645, - "learning_rate": 0.00019999955321949964, - "loss": 46.0, - "step": 12455 - }, - { - "epoch": 0.9523481851023567, - "grad_norm": 0.0012411960633471608, - "learning_rate": 0.00019999955314769602, - "loss": 46.0, - "step": 12456 - }, - { - "epoch": 0.9524246420857465, - "grad_norm": 0.004769875202327967, - "learning_rate": 0.0001999995530758866, - "loss": 46.0, - "step": 12457 - }, - { - "epoch": 0.9525010990691363, - "grad_norm": 0.0012672748416662216, - "learning_rate": 0.00019999955300407143, - "loss": 46.0, - "step": 12458 - }, - { - "epoch": 0.9525775560525259, - "grad_norm": 0.0007318942225538194, - "learning_rate": 0.0001999995529322505, - "loss": 46.0, - "step": 12459 - }, - { - "epoch": 0.9526540130359157, - "grad_norm": 0.007978350855410099, - "learning_rate": 0.00019999955286042375, - "loss": 46.0, - "step": 12460 - }, - { - "epoch": 0.9527304700193054, - "grad_norm": 0.0011821523075923324, - "learning_rate": 0.0001999995527885913, - "loss": 46.0, - "step": 12461 - }, - { - "epoch": 0.9528069270026951, - "grad_norm": 0.0014150685165077448, - "learning_rate": 0.00019999955271675303, - "loss": 46.0, - "step": 12462 - }, - { - "epoch": 0.9528833839860849, - "grad_norm": 0.004567085299640894, - "learning_rate": 0.000199999552644909, - "loss": 46.0, - "step": 12463 - }, - { - "epoch": 0.9529598409694745, - "grad_norm": 0.00717273959890008, - "learning_rate": 0.00019999955257305922, - "loss": 46.0, - "step": 12464 - }, - { - "epoch": 0.9530362979528643, - "grad_norm": 0.0015309195732697845, - "learning_rate": 0.00019999955250120366, - "loss": 46.0, - "step": 12465 - }, - { - "epoch": 0.953112754936254, - "grad_norm": 0.01209405530244112, - "learning_rate": 0.0001999995524293423, - "loss": 46.0, - "step": 12466 - }, - { - "epoch": 0.9531892119196437, - "grad_norm": 0.0038007544353604317, - "learning_rate": 0.00019999955235747524, - "loss": 46.0, - "step": 12467 - }, - { - "epoch": 0.9532656689030334, - "grad_norm": 0.0006017562700435519, - "learning_rate": 0.00019999955228560236, - "loss": 46.0, - "step": 12468 - }, - { - "epoch": 0.9533421258864232, - "grad_norm": 0.0009332410409115255, - "learning_rate": 0.00019999955221372372, - "loss": 46.0, - "step": 12469 - }, - { - "epoch": 0.9534185828698128, - "grad_norm": 0.0012395849917083979, - "learning_rate": 0.00019999955214183932, - "loss": 46.0, - "step": 12470 - }, - { - "epoch": 0.9534950398532026, - "grad_norm": 0.0014149347553029656, - "learning_rate": 0.00019999955206994913, - "loss": 46.0, - "step": 12471 - }, - { - "epoch": 0.9535714968365923, - "grad_norm": 0.0031176290940493345, - "learning_rate": 0.0001999995519980532, - "loss": 46.0, - "step": 12472 - }, - { - "epoch": 0.953647953819982, - "grad_norm": 0.008678838610649109, - "learning_rate": 0.0001999995519261515, - "loss": 46.0, - "step": 12473 - }, - { - "epoch": 0.9537244108033718, - "grad_norm": 0.000764036609325558, - "learning_rate": 0.000199999551854244, - "loss": 46.0, - "step": 12474 - }, - { - "epoch": 0.9538008677867614, - "grad_norm": 0.0007010448607616127, - "learning_rate": 0.00019999955178233073, - "loss": 46.0, - "step": 12475 - }, - { - "epoch": 0.9538773247701512, - "grad_norm": 0.005646991543471813, - "learning_rate": 0.00019999955171041172, - "loss": 46.0, - "step": 12476 - }, - { - "epoch": 0.953953781753541, - "grad_norm": 0.0020706935320049524, - "learning_rate": 0.00019999955163848695, - "loss": 46.0, - "step": 12477 - }, - { - "epoch": 0.9540302387369306, - "grad_norm": 0.0028199441730976105, - "learning_rate": 0.00019999955156655637, - "loss": 46.0, - "step": 12478 - }, - { - "epoch": 0.9541066957203204, - "grad_norm": 0.002688672160729766, - "learning_rate": 0.00019999955149462, - "loss": 46.0, - "step": 12479 - }, - { - "epoch": 0.9541831527037101, - "grad_norm": 0.0019149164436385036, - "learning_rate": 0.0001999995514226779, - "loss": 46.0, - "step": 12480 - }, - { - "epoch": 0.9542596096870998, - "grad_norm": 0.004896718543022871, - "learning_rate": 0.00019999955135073004, - "loss": 46.0, - "step": 12481 - }, - { - "epoch": 0.9543360666704895, - "grad_norm": 0.0018319346709176898, - "learning_rate": 0.00019999955127877642, - "loss": 46.0, - "step": 12482 - }, - { - "epoch": 0.9544125236538792, - "grad_norm": 0.001564774545840919, - "learning_rate": 0.00019999955120681703, - "loss": 46.0, - "step": 12483 - }, - { - "epoch": 0.954488980637269, - "grad_norm": 0.0007748169591650367, - "learning_rate": 0.00019999955113485186, - "loss": 46.0, - "step": 12484 - }, - { - "epoch": 0.9545654376206587, - "grad_norm": 0.016518425196409225, - "learning_rate": 0.0001999995510628809, - "loss": 46.0, - "step": 12485 - }, - { - "epoch": 0.9546418946040484, - "grad_norm": 0.0025407939683645964, - "learning_rate": 0.00019999955099090418, - "loss": 46.0, - "step": 12486 - }, - { - "epoch": 0.9547183515874381, - "grad_norm": 0.0009022768936119974, - "learning_rate": 0.0001999995509189217, - "loss": 46.0, - "step": 12487 - }, - { - "epoch": 0.9547948085708279, - "grad_norm": 0.0006046746857464314, - "learning_rate": 0.00019999955084693344, - "loss": 46.0, - "step": 12488 - }, - { - "epoch": 0.9548712655542175, - "grad_norm": 0.0035914992913603783, - "learning_rate": 0.0001999995507749394, - "loss": 46.0, - "step": 12489 - }, - { - "epoch": 0.9549477225376073, - "grad_norm": 0.0009309165761806071, - "learning_rate": 0.0001999995507029396, - "loss": 46.0, - "step": 12490 - }, - { - "epoch": 0.955024179520997, - "grad_norm": 0.0028376539703458548, - "learning_rate": 0.00019999955063093404, - "loss": 46.0, - "step": 12491 - }, - { - "epoch": 0.9551006365043867, - "grad_norm": 0.0013215922517701983, - "learning_rate": 0.0001999995505589227, - "loss": 46.0, - "step": 12492 - }, - { - "epoch": 0.9551770934877765, - "grad_norm": 0.0007341204909607768, - "learning_rate": 0.0001999995504869056, - "loss": 46.0, - "step": 12493 - }, - { - "epoch": 0.9552535504711661, - "grad_norm": 0.0008842049282975495, - "learning_rate": 0.00019999955041488275, - "loss": 46.0, - "step": 12494 - }, - { - "epoch": 0.9553300074545559, - "grad_norm": 0.0005446483264677227, - "learning_rate": 0.0001999995503428541, - "loss": 46.0, - "step": 12495 - }, - { - "epoch": 0.9554064644379456, - "grad_norm": 0.004784720484167337, - "learning_rate": 0.00019999955027081968, - "loss": 46.0, - "step": 12496 - }, - { - "epoch": 0.9554829214213353, - "grad_norm": 0.0022217079531401396, - "learning_rate": 0.0001999995501987795, - "loss": 46.0, - "step": 12497 - }, - { - "epoch": 0.955559378404725, - "grad_norm": 0.0006384294247254729, - "learning_rate": 0.00019999955012673355, - "loss": 46.0, - "step": 12498 - }, - { - "epoch": 0.9556358353881148, - "grad_norm": 0.000698225514497608, - "learning_rate": 0.0001999995500546818, - "loss": 46.0, - "step": 12499 - }, - { - "epoch": 0.9557122923715045, - "grad_norm": 0.0022178853396326303, - "learning_rate": 0.00019999954998262432, - "loss": 46.0, - "step": 12500 - }, - { - "epoch": 0.9557887493548942, - "grad_norm": 0.0015768782468512654, - "learning_rate": 0.00019999954991056106, - "loss": 46.0, - "step": 12501 - }, - { - "epoch": 0.9558652063382839, - "grad_norm": 0.001325781224295497, - "learning_rate": 0.00019999954983849203, - "loss": 46.0, - "step": 12502 - }, - { - "epoch": 0.9559416633216736, - "grad_norm": 0.001395052415318787, - "learning_rate": 0.00019999954976641723, - "loss": 46.0, - "step": 12503 - }, - { - "epoch": 0.9560181203050634, - "grad_norm": 0.0008487051236443222, - "learning_rate": 0.00019999954969433667, - "loss": 46.0, - "step": 12504 - }, - { - "epoch": 0.956094577288453, - "grad_norm": 0.00044739051372744143, - "learning_rate": 0.00019999954962225035, - "loss": 46.0, - "step": 12505 - }, - { - "epoch": 0.9561710342718428, - "grad_norm": 0.0011321419151499867, - "learning_rate": 0.00019999954955015822, - "loss": 46.0, - "step": 12506 - }, - { - "epoch": 0.9562474912552326, - "grad_norm": 0.0014656601706519723, - "learning_rate": 0.00019999954947806035, - "loss": 46.0, - "step": 12507 - }, - { - "epoch": 0.9563239482386222, - "grad_norm": 0.001833436544984579, - "learning_rate": 0.0001999995494059567, - "loss": 46.0, - "step": 12508 - }, - { - "epoch": 0.956400405222012, - "grad_norm": 0.0014022770337760448, - "learning_rate": 0.0001999995493338473, - "loss": 46.0, - "step": 12509 - }, - { - "epoch": 0.9564768622054017, - "grad_norm": 0.0027344259433448315, - "learning_rate": 0.0001999995492617321, - "loss": 46.0, - "step": 12510 - }, - { - "epoch": 0.9565533191887914, - "grad_norm": 0.001271241344511509, - "learning_rate": 0.00019999954918961118, - "loss": 46.0, - "step": 12511 - }, - { - "epoch": 0.9566297761721811, - "grad_norm": 0.003318602219223976, - "learning_rate": 0.00019999954911748444, - "loss": 46.0, - "step": 12512 - }, - { - "epoch": 0.9567062331555708, - "grad_norm": 0.0015493493992835283, - "learning_rate": 0.00019999954904535193, - "loss": 46.0, - "step": 12513 - }, - { - "epoch": 0.9567826901389606, - "grad_norm": 0.0019136162009090185, - "learning_rate": 0.0001999995489732137, - "loss": 46.0, - "step": 12514 - }, - { - "epoch": 0.9568591471223503, - "grad_norm": 0.0011575878597795963, - "learning_rate": 0.00019999954890106966, - "loss": 46.0, - "step": 12515 - }, - { - "epoch": 0.95693560410574, - "grad_norm": 0.0031630985904484987, - "learning_rate": 0.00019999954882891986, - "loss": 46.0, - "step": 12516 - }, - { - "epoch": 0.9570120610891297, - "grad_norm": 0.0006764872232452035, - "learning_rate": 0.00019999954875676428, - "loss": 46.0, - "step": 12517 - }, - { - "epoch": 0.9570885180725195, - "grad_norm": 0.0009080265881493688, - "learning_rate": 0.00019999954868460295, - "loss": 46.0, - "step": 12518 - }, - { - "epoch": 0.9571649750559091, - "grad_norm": 0.0010366225615143776, - "learning_rate": 0.00019999954861243583, - "loss": 46.0, - "step": 12519 - }, - { - "epoch": 0.9572414320392989, - "grad_norm": 0.001006502308882773, - "learning_rate": 0.00019999954854026296, - "loss": 46.0, - "step": 12520 - }, - { - "epoch": 0.9573178890226887, - "grad_norm": 0.0032326679211109877, - "learning_rate": 0.0001999995484680843, - "loss": 46.0, - "step": 12521 - }, - { - "epoch": 0.9573943460060783, - "grad_norm": 0.003837198717519641, - "learning_rate": 0.0001999995483958999, - "loss": 46.0, - "step": 12522 - }, - { - "epoch": 0.9574708029894681, - "grad_norm": 0.0007604262791574001, - "learning_rate": 0.0001999995483237097, - "loss": 46.0, - "step": 12523 - }, - { - "epoch": 0.9575472599728577, - "grad_norm": 0.0005066022858954966, - "learning_rate": 0.00019999954825151373, - "loss": 46.0, - "step": 12524 - }, - { - "epoch": 0.9576237169562475, - "grad_norm": 0.002728054765611887, - "learning_rate": 0.00019999954817931202, - "loss": 46.0, - "step": 12525 - }, - { - "epoch": 0.9577001739396372, - "grad_norm": 0.0008250894607044756, - "learning_rate": 0.00019999954810710454, - "loss": 46.0, - "step": 12526 - }, - { - "epoch": 0.9577766309230269, - "grad_norm": 0.0007391558028757572, - "learning_rate": 0.00019999954803489125, - "loss": 46.0, - "step": 12527 - }, - { - "epoch": 0.9578530879064167, - "grad_norm": 0.0015902697341516614, - "learning_rate": 0.00019999954796267222, - "loss": 46.0, - "step": 12528 - }, - { - "epoch": 0.9579295448898064, - "grad_norm": 0.004454911686480045, - "learning_rate": 0.00019999954789044741, - "loss": 46.0, - "step": 12529 - }, - { - "epoch": 0.9580060018731961, - "grad_norm": 0.0015787851298227906, - "learning_rate": 0.00019999954781821686, - "loss": 46.0, - "step": 12530 - }, - { - "epoch": 0.9580824588565858, - "grad_norm": 0.004207047168165445, - "learning_rate": 0.0001999995477459805, - "loss": 46.0, - "step": 12531 - }, - { - "epoch": 0.9581589158399756, - "grad_norm": 0.003194646444171667, - "learning_rate": 0.0001999995476737384, - "loss": 46.0, - "step": 12532 - }, - { - "epoch": 0.9582353728233652, - "grad_norm": 0.0010382853215560317, - "learning_rate": 0.00019999954760149051, - "loss": 46.0, - "step": 12533 - }, - { - "epoch": 0.958311829806755, - "grad_norm": 0.0005894839996472001, - "learning_rate": 0.0001999995475292369, - "loss": 46.0, - "step": 12534 - }, - { - "epoch": 0.9583882867901447, - "grad_norm": 0.0008184568723663688, - "learning_rate": 0.00019999954745697745, - "loss": 46.0, - "step": 12535 - }, - { - "epoch": 0.9584647437735344, - "grad_norm": 0.0004331482632551342, - "learning_rate": 0.00019999954738471226, - "loss": 46.0, - "step": 12536 - }, - { - "epoch": 0.9585412007569242, - "grad_norm": 0.0005351289291866124, - "learning_rate": 0.0001999995473124413, - "loss": 46.0, - "step": 12537 - }, - { - "epoch": 0.9586176577403138, - "grad_norm": 0.0005616553826257586, - "learning_rate": 0.00019999954724016455, - "loss": 46.0, - "step": 12538 - }, - { - "epoch": 0.9586941147237036, - "grad_norm": 0.0013907364336773753, - "learning_rate": 0.00019999954716788207, - "loss": 46.0, - "step": 12539 - }, - { - "epoch": 0.9587705717070933, - "grad_norm": 0.0023821413051337004, - "learning_rate": 0.0001999995470955938, - "loss": 46.0, - "step": 12540 - }, - { - "epoch": 0.958847028690483, - "grad_norm": 0.0005421300302259624, - "learning_rate": 0.00019999954702329978, - "loss": 46.0, - "step": 12541 - }, - { - "epoch": 0.9589234856738728, - "grad_norm": 0.0013058108743280172, - "learning_rate": 0.00019999954695099997, - "loss": 46.0, - "step": 12542 - }, - { - "epoch": 0.9589999426572624, - "grad_norm": 0.0006744128768332303, - "learning_rate": 0.00019999954687869437, - "loss": 46.0, - "step": 12543 - }, - { - "epoch": 0.9590763996406522, - "grad_norm": 0.0012755459174513817, - "learning_rate": 0.00019999954680638307, - "loss": 46.0, - "step": 12544 - }, - { - "epoch": 0.9591528566240419, - "grad_norm": 0.0013100079959258437, - "learning_rate": 0.00019999954673406595, - "loss": 46.0, - "step": 12545 - }, - { - "epoch": 0.9592293136074316, - "grad_norm": 0.001862332341261208, - "learning_rate": 0.00019999954666174305, - "loss": 46.0, - "step": 12546 - }, - { - "epoch": 0.9593057705908213, - "grad_norm": 0.001624559867195785, - "learning_rate": 0.0001999995465894144, - "loss": 46.0, - "step": 12547 - }, - { - "epoch": 0.9593822275742111, - "grad_norm": 0.0007144755800254643, - "learning_rate": 0.00019999954651707998, - "loss": 46.0, - "step": 12548 - }, - { - "epoch": 0.9594586845576007, - "grad_norm": 0.0039685554802417755, - "learning_rate": 0.00019999954644473977, - "loss": 46.0, - "step": 12549 - }, - { - "epoch": 0.9595351415409905, - "grad_norm": 0.006061859428882599, - "learning_rate": 0.00019999954637239383, - "loss": 46.0, - "step": 12550 - }, - { - "epoch": 0.9596115985243803, - "grad_norm": 0.003831411711871624, - "learning_rate": 0.0001999995463000421, - "loss": 46.0, - "step": 12551 - }, - { - "epoch": 0.9596880555077699, - "grad_norm": 0.002832041122019291, - "learning_rate": 0.0001999995462276846, - "loss": 46.0, - "step": 12552 - }, - { - "epoch": 0.9597645124911597, - "grad_norm": 0.0012886578915640712, - "learning_rate": 0.00019999954615532132, - "loss": 46.0, - "step": 12553 - }, - { - "epoch": 0.9598409694745493, - "grad_norm": 0.003579822601750493, - "learning_rate": 0.0001999995460829523, - "loss": 46.0, - "step": 12554 - }, - { - "epoch": 0.9599174264579391, - "grad_norm": 0.0016573643079027534, - "learning_rate": 0.00019999954601057746, - "loss": 46.0, - "step": 12555 - }, - { - "epoch": 0.9599938834413289, - "grad_norm": 0.00741255609318614, - "learning_rate": 0.0001999995459381969, - "loss": 46.0, - "step": 12556 - }, - { - "epoch": 0.9600703404247185, - "grad_norm": 0.005340883042663336, - "learning_rate": 0.00019999954586581055, - "loss": 46.0, - "step": 12557 - }, - { - "epoch": 0.9601467974081083, - "grad_norm": 0.0013376573333516717, - "learning_rate": 0.00019999954579341843, - "loss": 46.0, - "step": 12558 - }, - { - "epoch": 0.960223254391498, - "grad_norm": 0.0012810935731977224, - "learning_rate": 0.00019999954572102056, - "loss": 46.0, - "step": 12559 - }, - { - "epoch": 0.9602997113748877, - "grad_norm": 0.006521451752632856, - "learning_rate": 0.0001999995456486169, - "loss": 46.0, - "step": 12560 - }, - { - "epoch": 0.9603761683582774, - "grad_norm": 0.001418704865500331, - "learning_rate": 0.00019999954557620747, - "loss": 46.0, - "step": 12561 - }, - { - "epoch": 0.9604526253416672, - "grad_norm": 0.006944640539586544, - "learning_rate": 0.00019999954550379228, - "loss": 46.0, - "step": 12562 - }, - { - "epoch": 0.9605290823250568, - "grad_norm": 0.0013539890060201287, - "learning_rate": 0.0001999995454313713, - "loss": 46.0, - "step": 12563 - }, - { - "epoch": 0.9606055393084466, - "grad_norm": 0.0010226204758509994, - "learning_rate": 0.00019999954535894458, - "loss": 46.0, - "step": 12564 - }, - { - "epoch": 0.9606819962918363, - "grad_norm": 0.0019486999372020364, - "learning_rate": 0.0001999995452865121, - "loss": 46.0, - "step": 12565 - }, - { - "epoch": 0.960758453275226, - "grad_norm": 0.0009200394852086902, - "learning_rate": 0.0001999995452140738, - "loss": 46.0, - "step": 12566 - }, - { - "epoch": 0.9608349102586158, - "grad_norm": 0.0016339634312316775, - "learning_rate": 0.00019999954514162975, - "loss": 46.0, - "step": 12567 - }, - { - "epoch": 0.9609113672420054, - "grad_norm": 0.005042479373514652, - "learning_rate": 0.00019999954506917994, - "loss": 46.0, - "step": 12568 - }, - { - "epoch": 0.9609878242253952, - "grad_norm": 0.007448961026966572, - "learning_rate": 0.00019999954499672437, - "loss": 46.0, - "step": 12569 - }, - { - "epoch": 0.961064281208785, - "grad_norm": 0.0008751733112148941, - "learning_rate": 0.00019999954492426301, - "loss": 46.0, - "step": 12570 - }, - { - "epoch": 0.9611407381921746, - "grad_norm": 0.004641556181013584, - "learning_rate": 0.00019999954485179592, - "loss": 46.0, - "step": 12571 - }, - { - "epoch": 0.9612171951755644, - "grad_norm": 0.012658430263400078, - "learning_rate": 0.00019999954477932302, - "loss": 46.0, - "step": 12572 - }, - { - "epoch": 0.961293652158954, - "grad_norm": 0.0006108662346377969, - "learning_rate": 0.00019999954470684435, - "loss": 46.0, - "step": 12573 - }, - { - "epoch": 0.9613701091423438, - "grad_norm": 0.0007247099420055747, - "learning_rate": 0.00019999954463435993, - "loss": 46.0, - "step": 12574 - }, - { - "epoch": 0.9614465661257335, - "grad_norm": 0.0007045581587590277, - "learning_rate": 0.00019999954456186974, - "loss": 46.0, - "step": 12575 - }, - { - "epoch": 0.9615230231091232, - "grad_norm": 0.0021125529892742634, - "learning_rate": 0.00019999954448937378, - "loss": 46.0, - "step": 12576 - }, - { - "epoch": 0.961599480092513, - "grad_norm": 0.0011173088569194078, - "learning_rate": 0.00019999954441687204, - "loss": 46.0, - "step": 12577 - }, - { - "epoch": 0.9616759370759027, - "grad_norm": 0.001079391222447157, - "learning_rate": 0.00019999954434436453, - "loss": 46.0, - "step": 12578 - }, - { - "epoch": 0.9617523940592924, - "grad_norm": 0.004714257083833218, - "learning_rate": 0.00019999954427185124, - "loss": 46.0, - "step": 12579 - }, - { - "epoch": 0.9618288510426821, - "grad_norm": 0.0009884992614388466, - "learning_rate": 0.0001999995441993322, - "loss": 46.0, - "step": 12580 - }, - { - "epoch": 0.9619053080260719, - "grad_norm": 0.0007750187651254237, - "learning_rate": 0.00019999954412680738, - "loss": 46.0, - "step": 12581 - }, - { - "epoch": 0.9619817650094615, - "grad_norm": 0.000613724288996309, - "learning_rate": 0.0001999995440542768, - "loss": 46.0, - "step": 12582 - }, - { - "epoch": 0.9620582219928513, - "grad_norm": 0.0011867601424455643, - "learning_rate": 0.00019999954398174045, - "loss": 46.0, - "step": 12583 - }, - { - "epoch": 0.9621346789762409, - "grad_norm": 0.005172676872462034, - "learning_rate": 0.00019999954390919835, - "loss": 46.0, - "step": 12584 - }, - { - "epoch": 0.9622111359596307, - "grad_norm": 0.003148970426991582, - "learning_rate": 0.00019999954383665046, - "loss": 46.0, - "step": 12585 - }, - { - "epoch": 0.9622875929430205, - "grad_norm": 0.0015872607473284006, - "learning_rate": 0.00019999954376409679, - "loss": 46.0, - "step": 12586 - }, - { - "epoch": 0.9623640499264101, - "grad_norm": 0.0004961260710842907, - "learning_rate": 0.00019999954369153734, - "loss": 46.0, - "step": 12587 - }, - { - "epoch": 0.9624405069097999, - "grad_norm": 0.0038851506542414427, - "learning_rate": 0.00019999954361897215, - "loss": 46.0, - "step": 12588 - }, - { - "epoch": 0.9625169638931896, - "grad_norm": 0.0035033011808991432, - "learning_rate": 0.0001999995435464012, - "loss": 46.0, - "step": 12589 - }, - { - "epoch": 0.9625934208765793, - "grad_norm": 0.0013847750378772616, - "learning_rate": 0.00019999954347382442, - "loss": 46.0, - "step": 12590 - }, - { - "epoch": 0.962669877859969, - "grad_norm": 0.000358601741027087, - "learning_rate": 0.00019999954340124194, - "loss": 46.0, - "step": 12591 - }, - { - "epoch": 0.9627463348433588, - "grad_norm": 0.0013354334514588118, - "learning_rate": 0.00019999954332865368, - "loss": 46.0, - "step": 12592 - }, - { - "epoch": 0.9628227918267485, - "grad_norm": 0.0005576562252826989, - "learning_rate": 0.0001999995432560596, - "loss": 46.0, - "step": 12593 - }, - { - "epoch": 0.9628992488101382, - "grad_norm": 0.014594055712223053, - "learning_rate": 0.00019999954318345982, - "loss": 46.0, - "step": 12594 - }, - { - "epoch": 0.9629757057935279, - "grad_norm": 0.0014263063203543425, - "learning_rate": 0.00019999954311085422, - "loss": 46.0, - "step": 12595 - }, - { - "epoch": 0.9630521627769176, - "grad_norm": 0.0002965877647511661, - "learning_rate": 0.00019999954303824287, - "loss": 46.0, - "step": 12596 - }, - { - "epoch": 0.9631286197603074, - "grad_norm": 0.0016289090272039175, - "learning_rate": 0.00019999954296562574, - "loss": 46.0, - "step": 12597 - }, - { - "epoch": 0.963205076743697, - "grad_norm": 0.0012613135622814298, - "learning_rate": 0.00019999954289300284, - "loss": 46.0, - "step": 12598 - }, - { - "epoch": 0.9632815337270868, - "grad_norm": 0.0008691719849593937, - "learning_rate": 0.00019999954282037417, - "loss": 46.0, - "step": 12599 - }, - { - "epoch": 0.9633579907104766, - "grad_norm": 0.00310651701875031, - "learning_rate": 0.00019999954274773973, - "loss": 46.0, - "step": 12600 - }, - { - "epoch": 0.9634344476938662, - "grad_norm": 0.000629986054264009, - "learning_rate": 0.00019999954267509954, - "loss": 46.0, - "step": 12601 - }, - { - "epoch": 0.963510904677256, - "grad_norm": 0.0020488090813159943, - "learning_rate": 0.00019999954260245358, - "loss": 46.0, - "step": 12602 - }, - { - "epoch": 0.9635873616606456, - "grad_norm": 0.0006753480993211269, - "learning_rate": 0.00019999954252980184, - "loss": 46.0, - "step": 12603 - }, - { - "epoch": 0.9636638186440354, - "grad_norm": 0.0006479251314885914, - "learning_rate": 0.00019999954245714433, - "loss": 46.0, - "step": 12604 - }, - { - "epoch": 0.9637402756274251, - "grad_norm": 0.0009644933161325753, - "learning_rate": 0.00019999954238448104, - "loss": 46.0, - "step": 12605 - }, - { - "epoch": 0.9638167326108148, - "grad_norm": 0.0007472362485714257, - "learning_rate": 0.000199999542311812, - "loss": 46.0, - "step": 12606 - }, - { - "epoch": 0.9638931895942046, - "grad_norm": 0.0012204254744574428, - "learning_rate": 0.00019999954223913718, - "loss": 46.0, - "step": 12607 - }, - { - "epoch": 0.9639696465775943, - "grad_norm": 0.001085710246115923, - "learning_rate": 0.00019999954216645658, - "loss": 46.0, - "step": 12608 - }, - { - "epoch": 0.964046103560984, - "grad_norm": 0.0029610525816679, - "learning_rate": 0.00019999954209377023, - "loss": 46.0, - "step": 12609 - }, - { - "epoch": 0.9641225605443737, - "grad_norm": 0.0007604038692079484, - "learning_rate": 0.0001999995420210781, - "loss": 46.0, - "step": 12610 - }, - { - "epoch": 0.9641990175277635, - "grad_norm": 0.0009824297158047557, - "learning_rate": 0.0001999995419483802, - "loss": 46.0, - "step": 12611 - }, - { - "epoch": 0.9642754745111531, - "grad_norm": 0.0010285725584253669, - "learning_rate": 0.00019999954187567657, - "loss": 46.0, - "step": 12612 - }, - { - "epoch": 0.9643519314945429, - "grad_norm": 0.01939595490694046, - "learning_rate": 0.00019999954180296712, - "loss": 46.0, - "step": 12613 - }, - { - "epoch": 0.9644283884779326, - "grad_norm": 0.008662334643304348, - "learning_rate": 0.0001999995417302519, - "loss": 46.0, - "step": 12614 - }, - { - "epoch": 0.9645048454613223, - "grad_norm": 0.00046927505172789097, - "learning_rate": 0.00019999954165753091, - "loss": 46.0, - "step": 12615 - }, - { - "epoch": 0.9645813024447121, - "grad_norm": 0.004104883410036564, - "learning_rate": 0.00019999954158480418, - "loss": 46.0, - "step": 12616 - }, - { - "epoch": 0.9646577594281017, - "grad_norm": 0.0008808405254967511, - "learning_rate": 0.00019999954151207167, - "loss": 46.0, - "step": 12617 - }, - { - "epoch": 0.9647342164114915, - "grad_norm": 0.0020914131309837103, - "learning_rate": 0.00019999954143933338, - "loss": 46.0, - "step": 12618 - }, - { - "epoch": 0.9648106733948812, - "grad_norm": 0.0008840143564157188, - "learning_rate": 0.00019999954136658933, - "loss": 46.0, - "step": 12619 - }, - { - "epoch": 0.9648871303782709, - "grad_norm": 0.0013798632426187396, - "learning_rate": 0.00019999954129383952, - "loss": 46.0, - "step": 12620 - }, - { - "epoch": 0.9649635873616607, - "grad_norm": 0.0010010011028498411, - "learning_rate": 0.00019999954122108392, - "loss": 46.0, - "step": 12621 - }, - { - "epoch": 0.9650400443450504, - "grad_norm": 0.0016264660516753793, - "learning_rate": 0.00019999954114832254, - "loss": 46.0, - "step": 12622 - }, - { - "epoch": 0.9651165013284401, - "grad_norm": 0.0007057492621243, - "learning_rate": 0.00019999954107555542, - "loss": 46.0, - "step": 12623 - }, - { - "epoch": 0.9651929583118298, - "grad_norm": 0.00040417982381768525, - "learning_rate": 0.00019999954100278252, - "loss": 46.0, - "step": 12624 - }, - { - "epoch": 0.9652694152952195, - "grad_norm": 0.0018019978888332844, - "learning_rate": 0.00019999954093000385, - "loss": 46.0, - "step": 12625 - }, - { - "epoch": 0.9653458722786092, - "grad_norm": 0.0008747209794819355, - "learning_rate": 0.00019999954085721944, - "loss": 46.0, - "step": 12626 - }, - { - "epoch": 0.965422329261999, - "grad_norm": 0.0005741056520491838, - "learning_rate": 0.00019999954078442925, - "loss": 46.0, - "step": 12627 - }, - { - "epoch": 0.9654987862453887, - "grad_norm": 0.002164460252970457, - "learning_rate": 0.00019999954071163326, - "loss": 46.0, - "step": 12628 - }, - { - "epoch": 0.9655752432287784, - "grad_norm": 0.0007059042691253126, - "learning_rate": 0.0001999995406388315, - "loss": 46.0, - "step": 12629 - }, - { - "epoch": 0.9656517002121682, - "grad_norm": 0.0008277862798422575, - "learning_rate": 0.00019999954056602398, - "loss": 46.0, - "step": 12630 - }, - { - "epoch": 0.9657281571955578, - "grad_norm": 0.00032410197309218347, - "learning_rate": 0.0001999995404932107, - "loss": 46.0, - "step": 12631 - }, - { - "epoch": 0.9658046141789476, - "grad_norm": 0.0015394572401419282, - "learning_rate": 0.00019999954042039164, - "loss": 46.0, - "step": 12632 - }, - { - "epoch": 0.9658810711623372, - "grad_norm": 0.0048556337133049965, - "learning_rate": 0.00019999954034756681, - "loss": 46.0, - "step": 12633 - }, - { - "epoch": 0.965957528145727, - "grad_norm": 0.0011588882189244032, - "learning_rate": 0.00019999954027473624, - "loss": 46.0, - "step": 12634 - }, - { - "epoch": 0.9660339851291168, - "grad_norm": 0.0005232239491306245, - "learning_rate": 0.0001999995402018999, - "loss": 46.0, - "step": 12635 - }, - { - "epoch": 0.9661104421125064, - "grad_norm": 0.004094188567250967, - "learning_rate": 0.00019999954012905774, - "loss": 46.0, - "step": 12636 - }, - { - "epoch": 0.9661868990958962, - "grad_norm": 0.004282247740775347, - "learning_rate": 0.00019999954005620984, - "loss": 46.0, - "step": 12637 - }, - { - "epoch": 0.9662633560792859, - "grad_norm": 0.001113204751163721, - "learning_rate": 0.00019999953998335617, - "loss": 46.0, - "step": 12638 - }, - { - "epoch": 0.9663398130626756, - "grad_norm": 0.0012121719773858786, - "learning_rate": 0.0001999995399104967, - "loss": 46.0, - "step": 12639 - }, - { - "epoch": 0.9664162700460653, - "grad_norm": 0.0010293213417753577, - "learning_rate": 0.00019999953983763154, - "loss": 46.0, - "step": 12640 - }, - { - "epoch": 0.9664927270294551, - "grad_norm": 0.0015262255910784006, - "learning_rate": 0.00019999953976476052, - "loss": 46.0, - "step": 12641 - }, - { - "epoch": 0.9665691840128448, - "grad_norm": 0.0011345866369083524, - "learning_rate": 0.0001999995396918838, - "loss": 46.0, - "step": 12642 - }, - { - "epoch": 0.9666456409962345, - "grad_norm": 0.0023789769038558006, - "learning_rate": 0.00019999953961900125, - "loss": 46.0, - "step": 12643 - }, - { - "epoch": 0.9667220979796242, - "grad_norm": 0.00601214962080121, - "learning_rate": 0.000199999539546113, - "loss": 46.0, - "step": 12644 - }, - { - "epoch": 0.9667985549630139, - "grad_norm": 0.0010593831539154053, - "learning_rate": 0.00019999953947321894, - "loss": 46.0, - "step": 12645 - }, - { - "epoch": 0.9668750119464037, - "grad_norm": 0.001333468477241695, - "learning_rate": 0.0001999995394003191, - "loss": 46.0, - "step": 12646 - }, - { - "epoch": 0.9669514689297933, - "grad_norm": 0.0004939683130942285, - "learning_rate": 0.0001999995393274135, - "loss": 46.0, - "step": 12647 - }, - { - "epoch": 0.9670279259131831, - "grad_norm": 0.003756856080144644, - "learning_rate": 0.00019999953925450216, - "loss": 46.0, - "step": 12648 - }, - { - "epoch": 0.9671043828965729, - "grad_norm": 0.000527061230968684, - "learning_rate": 0.000199999539181585, - "loss": 46.0, - "step": 12649 - }, - { - "epoch": 0.9671808398799625, - "grad_norm": 0.0013170158490538597, - "learning_rate": 0.0001999995391086621, - "loss": 46.0, - "step": 12650 - }, - { - "epoch": 0.9672572968633523, - "grad_norm": 0.0006425055908039212, - "learning_rate": 0.00019999953903573344, - "loss": 46.0, - "step": 12651 - }, - { - "epoch": 0.967333753846742, - "grad_norm": 0.0015387942548841238, - "learning_rate": 0.00019999953896279897, - "loss": 46.0, - "step": 12652 - }, - { - "epoch": 0.9674102108301317, - "grad_norm": 0.0023090660106390715, - "learning_rate": 0.00019999953888985878, - "loss": 46.0, - "step": 12653 - }, - { - "epoch": 0.9674866678135214, - "grad_norm": 0.0005368542042560875, - "learning_rate": 0.00019999953881691277, - "loss": 46.0, - "step": 12654 - }, - { - "epoch": 0.9675631247969111, - "grad_norm": 0.0006658294587396085, - "learning_rate": 0.00019999953874396106, - "loss": 46.0, - "step": 12655 - }, - { - "epoch": 0.9676395817803009, - "grad_norm": 0.006444654893130064, - "learning_rate": 0.00019999953867100355, - "loss": 46.0, - "step": 12656 - }, - { - "epoch": 0.9677160387636906, - "grad_norm": 0.000931173621211201, - "learning_rate": 0.00019999953859804024, - "loss": 46.0, - "step": 12657 - }, - { - "epoch": 0.9677924957470803, - "grad_norm": 0.0013718197587877512, - "learning_rate": 0.00019999953852507116, - "loss": 46.0, - "step": 12658 - }, - { - "epoch": 0.96786895273047, - "grad_norm": 0.0008891511242836714, - "learning_rate": 0.00019999953845209633, - "loss": 46.0, - "step": 12659 - }, - { - "epoch": 0.9679454097138598, - "grad_norm": 0.0009287648135796189, - "learning_rate": 0.00019999953837911572, - "loss": 46.0, - "step": 12660 - }, - { - "epoch": 0.9680218666972494, - "grad_norm": 0.0005668742815032601, - "learning_rate": 0.00019999953830612937, - "loss": 46.0, - "step": 12661 - }, - { - "epoch": 0.9680983236806392, - "grad_norm": 0.0014932118356227875, - "learning_rate": 0.00019999953823313725, - "loss": 46.0, - "step": 12662 - }, - { - "epoch": 0.968174780664029, - "grad_norm": 0.001959705725312233, - "learning_rate": 0.00019999953816013936, - "loss": 46.0, - "step": 12663 - }, - { - "epoch": 0.9682512376474186, - "grad_norm": 0.0032820964697748423, - "learning_rate": 0.00019999953808713566, - "loss": 46.0, - "step": 12664 - }, - { - "epoch": 0.9683276946308084, - "grad_norm": 0.0005027010920457542, - "learning_rate": 0.00019999953801412622, - "loss": 46.0, - "step": 12665 - }, - { - "epoch": 0.968404151614198, - "grad_norm": 0.000707439670804888, - "learning_rate": 0.000199999537941111, - "loss": 46.0, - "step": 12666 - }, - { - "epoch": 0.9684806085975878, - "grad_norm": 0.0026558099780231714, - "learning_rate": 0.00019999953786809, - "loss": 46.0, - "step": 12667 - }, - { - "epoch": 0.9685570655809775, - "grad_norm": 0.002453891560435295, - "learning_rate": 0.00019999953779506325, - "loss": 46.0, - "step": 12668 - }, - { - "epoch": 0.9686335225643672, - "grad_norm": 0.001021756324917078, - "learning_rate": 0.00019999953772203071, - "loss": 46.0, - "step": 12669 - }, - { - "epoch": 0.968709979547757, - "grad_norm": 0.0033573920372873545, - "learning_rate": 0.00019999953764899243, - "loss": 46.0, - "step": 12670 - }, - { - "epoch": 0.9687864365311467, - "grad_norm": 0.006432279944419861, - "learning_rate": 0.00019999953757594838, - "loss": 46.0, - "step": 12671 - }, - { - "epoch": 0.9688628935145364, - "grad_norm": 0.0007216979865916073, - "learning_rate": 0.00019999953750289855, - "loss": 46.0, - "step": 12672 - }, - { - "epoch": 0.9689393504979261, - "grad_norm": 0.004251041915267706, - "learning_rate": 0.00019999953742984295, - "loss": 46.0, - "step": 12673 - }, - { - "epoch": 0.9690158074813158, - "grad_norm": 0.0021378234960138798, - "learning_rate": 0.0001999995373567816, - "loss": 46.0, - "step": 12674 - }, - { - "epoch": 0.9690922644647055, - "grad_norm": 0.0002845959970727563, - "learning_rate": 0.00019999953728371445, - "loss": 46.0, - "step": 12675 - }, - { - "epoch": 0.9691687214480953, - "grad_norm": 0.0016557008493691683, - "learning_rate": 0.00019999953721064153, - "loss": 46.0, - "step": 12676 - }, - { - "epoch": 0.9692451784314849, - "grad_norm": 0.0042579397559165955, - "learning_rate": 0.00019999953713756286, - "loss": 46.0, - "step": 12677 - }, - { - "epoch": 0.9693216354148747, - "grad_norm": 0.0010371737880632281, - "learning_rate": 0.00019999953706447841, - "loss": 46.0, - "step": 12678 - }, - { - "epoch": 0.9693980923982645, - "grad_norm": 0.00591895030811429, - "learning_rate": 0.0001999995369913882, - "loss": 46.0, - "step": 12679 - }, - { - "epoch": 0.9694745493816541, - "grad_norm": 0.00045719597255811095, - "learning_rate": 0.0001999995369182922, - "loss": 46.0, - "step": 12680 - }, - { - "epoch": 0.9695510063650439, - "grad_norm": 0.002347303321585059, - "learning_rate": 0.00019999953684519045, - "loss": 46.0, - "step": 12681 - }, - { - "epoch": 0.9696274633484336, - "grad_norm": 0.003925171680748463, - "learning_rate": 0.0001999995367720829, - "loss": 46.0, - "step": 12682 - }, - { - "epoch": 0.9697039203318233, - "grad_norm": 0.0010088527342304587, - "learning_rate": 0.00019999953669896963, - "loss": 46.0, - "step": 12683 - }, - { - "epoch": 0.969780377315213, - "grad_norm": 0.005125761032104492, - "learning_rate": 0.00019999953662585055, - "loss": 46.0, - "step": 12684 - }, - { - "epoch": 0.9698568342986027, - "grad_norm": 0.002419125521555543, - "learning_rate": 0.00019999953655272572, - "loss": 46.0, - "step": 12685 - }, - { - "epoch": 0.9699332912819925, - "grad_norm": 0.0014551643980666995, - "learning_rate": 0.00019999953647959512, - "loss": 46.0, - "step": 12686 - }, - { - "epoch": 0.9700097482653822, - "grad_norm": 0.0025780065916478634, - "learning_rate": 0.00019999953640645877, - "loss": 46.0, - "step": 12687 - }, - { - "epoch": 0.9700862052487719, - "grad_norm": 0.0015552312834188342, - "learning_rate": 0.00019999953633331662, - "loss": 46.0, - "step": 12688 - }, - { - "epoch": 0.9701626622321616, - "grad_norm": 0.0022849254310131073, - "learning_rate": 0.0001999995362601687, - "loss": 46.0, - "step": 12689 - }, - { - "epoch": 0.9702391192155514, - "grad_norm": 0.0013499159831553698, - "learning_rate": 0.000199999536187015, - "loss": 46.0, - "step": 12690 - }, - { - "epoch": 0.970315576198941, - "grad_norm": 0.0007311037043109536, - "learning_rate": 0.0001999995361138556, - "loss": 46.0, - "step": 12691 - }, - { - "epoch": 0.9703920331823308, - "grad_norm": 0.01081959716975689, - "learning_rate": 0.00019999953604069034, - "loss": 46.0, - "step": 12692 - }, - { - "epoch": 0.9704684901657206, - "grad_norm": 0.0008478965610265732, - "learning_rate": 0.00019999953596751936, - "loss": 46.0, - "step": 12693 - }, - { - "epoch": 0.9705449471491102, - "grad_norm": 0.003631509840488434, - "learning_rate": 0.0001999995358943426, - "loss": 46.0, - "step": 12694 - }, - { - "epoch": 0.9706214041325, - "grad_norm": 0.005794324912130833, - "learning_rate": 0.00019999953582116006, - "loss": 46.0, - "step": 12695 - }, - { - "epoch": 0.9706978611158896, - "grad_norm": 0.0017354452284052968, - "learning_rate": 0.00019999953574797178, - "loss": 46.0, - "step": 12696 - }, - { - "epoch": 0.9707743180992794, - "grad_norm": 0.001040946808643639, - "learning_rate": 0.0001999995356747777, - "loss": 46.0, - "step": 12697 - }, - { - "epoch": 0.9708507750826691, - "grad_norm": 0.0013337313430383801, - "learning_rate": 0.00019999953560157787, - "loss": 46.0, - "step": 12698 - }, - { - "epoch": 0.9709272320660588, - "grad_norm": 0.0008756202296353877, - "learning_rate": 0.00019999953552837227, - "loss": 46.0, - "step": 12699 - }, - { - "epoch": 0.9710036890494486, - "grad_norm": 0.0015684895915910602, - "learning_rate": 0.0001999995354551609, - "loss": 46.0, - "step": 12700 - }, - { - "epoch": 0.9710801460328383, - "grad_norm": 0.0011862721294164658, - "learning_rate": 0.00019999953538194377, - "loss": 46.0, - "step": 12701 - }, - { - "epoch": 0.971156603016228, - "grad_norm": 0.007497219834476709, - "learning_rate": 0.00019999953530872082, - "loss": 46.0, - "step": 12702 - }, - { - "epoch": 0.9712330599996177, - "grad_norm": 0.0016313801752403378, - "learning_rate": 0.00019999953523549215, - "loss": 46.0, - "step": 12703 - }, - { - "epoch": 0.9713095169830074, - "grad_norm": 0.003111917292699218, - "learning_rate": 0.00019999953516225768, - "loss": 46.0, - "step": 12704 - }, - { - "epoch": 0.9713859739663971, - "grad_norm": 0.0006332274642772973, - "learning_rate": 0.00019999953508901747, - "loss": 46.0, - "step": 12705 - }, - { - "epoch": 0.9714624309497869, - "grad_norm": 0.0022243268322199583, - "learning_rate": 0.00019999953501577148, - "loss": 46.0, - "step": 12706 - }, - { - "epoch": 0.9715388879331766, - "grad_norm": 0.008653603494167328, - "learning_rate": 0.00019999953494251972, - "loss": 46.0, - "step": 12707 - }, - { - "epoch": 0.9716153449165663, - "grad_norm": 0.0011655132984742522, - "learning_rate": 0.00019999953486926218, - "loss": 46.0, - "step": 12708 - }, - { - "epoch": 0.9716918018999561, - "grad_norm": 0.002224584110081196, - "learning_rate": 0.0001999995347959989, - "loss": 46.0, - "step": 12709 - }, - { - "epoch": 0.9717682588833457, - "grad_norm": 0.0027827557642012835, - "learning_rate": 0.00019999953472272982, - "loss": 46.0, - "step": 12710 - }, - { - "epoch": 0.9718447158667355, - "grad_norm": 0.002156534232199192, - "learning_rate": 0.00019999953464945496, - "loss": 46.0, - "step": 12711 - }, - { - "epoch": 0.9719211728501252, - "grad_norm": 0.0018375476356595755, - "learning_rate": 0.00019999953457617436, - "loss": 46.0, - "step": 12712 - }, - { - "epoch": 0.9719976298335149, - "grad_norm": 0.001420433516614139, - "learning_rate": 0.000199999534502888, - "loss": 46.0, - "step": 12713 - }, - { - "epoch": 0.9720740868169047, - "grad_norm": 0.001599422306753695, - "learning_rate": 0.00019999953442959584, - "loss": 46.0, - "step": 12714 - }, - { - "epoch": 0.9721505438002943, - "grad_norm": 0.0008550670463591814, - "learning_rate": 0.00019999953435629792, - "loss": 46.0, - "step": 12715 - }, - { - "epoch": 0.9722270007836841, - "grad_norm": 0.0008036239305511117, - "learning_rate": 0.00019999953428299425, - "loss": 46.0, - "step": 12716 - }, - { - "epoch": 0.9723034577670738, - "grad_norm": 0.0008184924954548478, - "learning_rate": 0.0001999995342096848, - "loss": 46.0, - "step": 12717 - }, - { - "epoch": 0.9723799147504635, - "grad_norm": 0.0019802553579211235, - "learning_rate": 0.00019999953413636957, - "loss": 46.0, - "step": 12718 - }, - { - "epoch": 0.9724563717338532, - "grad_norm": 0.0005797239718958735, - "learning_rate": 0.00019999953406304855, - "loss": 46.0, - "step": 12719 - }, - { - "epoch": 0.972532828717243, - "grad_norm": 0.0009108522208407521, - "learning_rate": 0.00019999953398972182, - "loss": 46.0, - "step": 12720 - }, - { - "epoch": 0.9726092857006327, - "grad_norm": 0.002286546630784869, - "learning_rate": 0.00019999953391638928, - "loss": 46.0, - "step": 12721 - }, - { - "epoch": 0.9726857426840224, - "grad_norm": 0.001663442817516625, - "learning_rate": 0.00019999953384305098, - "loss": 46.0, - "step": 12722 - }, - { - "epoch": 0.9727621996674122, - "grad_norm": 0.0009871290531009436, - "learning_rate": 0.0001999995337697069, - "loss": 46.0, - "step": 12723 - }, - { - "epoch": 0.9728386566508018, - "grad_norm": 0.0026840870268642902, - "learning_rate": 0.00019999953369635707, - "loss": 46.0, - "step": 12724 - }, - { - "epoch": 0.9729151136341916, - "grad_norm": 0.0010838746093213558, - "learning_rate": 0.00019999953362300144, - "loss": 46.0, - "step": 12725 - }, - { - "epoch": 0.9729915706175812, - "grad_norm": 0.0013851015828549862, - "learning_rate": 0.00019999953354964006, - "loss": 46.0, - "step": 12726 - }, - { - "epoch": 0.973068027600971, - "grad_norm": 0.0010990941664204001, - "learning_rate": 0.00019999953347627292, - "loss": 46.0, - "step": 12727 - }, - { - "epoch": 0.9731444845843608, - "grad_norm": 0.0007844744250178337, - "learning_rate": 0.0001999995334029, - "loss": 46.0, - "step": 12728 - }, - { - "epoch": 0.9732209415677504, - "grad_norm": 0.0009142103954218328, - "learning_rate": 0.0001999995333295213, - "loss": 46.0, - "step": 12729 - }, - { - "epoch": 0.9732973985511402, - "grad_norm": 0.0035712618846446276, - "learning_rate": 0.00019999953325613686, - "loss": 46.0, - "step": 12730 - }, - { - "epoch": 0.9733738555345299, - "grad_norm": 0.002294503152370453, - "learning_rate": 0.00019999953318274662, - "loss": 46.0, - "step": 12731 - }, - { - "epoch": 0.9734503125179196, - "grad_norm": 0.007922537624835968, - "learning_rate": 0.00019999953310935063, - "loss": 46.0, - "step": 12732 - }, - { - "epoch": 0.9735267695013093, - "grad_norm": 0.0007058531045913696, - "learning_rate": 0.00019999953303594884, - "loss": 46.0, - "step": 12733 - }, - { - "epoch": 0.973603226484699, - "grad_norm": 0.005658953450620174, - "learning_rate": 0.00019999953296254134, - "loss": 46.0, - "step": 12734 - }, - { - "epoch": 0.9736796834680888, - "grad_norm": 0.0011385917896404862, - "learning_rate": 0.00019999953288912803, - "loss": 46.0, - "step": 12735 - }, - { - "epoch": 0.9737561404514785, - "grad_norm": 0.0032692786771804094, - "learning_rate": 0.00019999953281570895, - "loss": 46.0, - "step": 12736 - }, - { - "epoch": 0.9738325974348682, - "grad_norm": 0.0006996606243774295, - "learning_rate": 0.00019999953274228412, - "loss": 46.0, - "step": 12737 - }, - { - "epoch": 0.9739090544182579, - "grad_norm": 0.0013061792124062777, - "learning_rate": 0.00019999953266885346, - "loss": 46.0, - "step": 12738 - }, - { - "epoch": 0.9739855114016477, - "grad_norm": 0.000654165749438107, - "learning_rate": 0.00019999953259541712, - "loss": 46.0, - "step": 12739 - }, - { - "epoch": 0.9740619683850373, - "grad_norm": 0.0006127366214059293, - "learning_rate": 0.00019999953252197497, - "loss": 46.0, - "step": 12740 - }, - { - "epoch": 0.9741384253684271, - "grad_norm": 0.0013876823941245675, - "learning_rate": 0.00019999953244852702, - "loss": 46.0, - "step": 12741 - }, - { - "epoch": 0.9742148823518169, - "grad_norm": 0.00219386862590909, - "learning_rate": 0.00019999953237507333, - "loss": 46.0, - "step": 12742 - }, - { - "epoch": 0.9742913393352065, - "grad_norm": 0.0007976947817951441, - "learning_rate": 0.0001999995323016139, - "loss": 46.0, - "step": 12743 - }, - { - "epoch": 0.9743677963185963, - "grad_norm": 0.0011126977624371648, - "learning_rate": 0.00019999953222814865, - "loss": 46.0, - "step": 12744 - }, - { - "epoch": 0.9744442533019859, - "grad_norm": 0.001341003691777587, - "learning_rate": 0.00019999953215467766, - "loss": 46.0, - "step": 12745 - }, - { - "epoch": 0.9745207102853757, - "grad_norm": 0.0020163992885500193, - "learning_rate": 0.00019999953208120087, - "loss": 46.0, - "step": 12746 - }, - { - "epoch": 0.9745971672687654, - "grad_norm": 0.0019039264880120754, - "learning_rate": 0.00019999953200771834, - "loss": 46.0, - "step": 12747 - }, - { - "epoch": 0.9746736242521551, - "grad_norm": 0.0010318623390048742, - "learning_rate": 0.00019999953193423006, - "loss": 46.0, - "step": 12748 - }, - { - "epoch": 0.9747500812355449, - "grad_norm": 0.0017280089668929577, - "learning_rate": 0.00019999953186073595, - "loss": 46.0, - "step": 12749 - }, - { - "epoch": 0.9748265382189346, - "grad_norm": 0.0007378591108135879, - "learning_rate": 0.00019999953178723612, - "loss": 46.0, - "step": 12750 - }, - { - "epoch": 0.9749029952023243, - "grad_norm": 0.0013012961717322469, - "learning_rate": 0.0001999995317137305, - "loss": 46.0, - "step": 12751 - }, - { - "epoch": 0.974979452185714, - "grad_norm": 0.0008283415227197111, - "learning_rate": 0.0001999995316402191, - "loss": 46.0, - "step": 12752 - }, - { - "epoch": 0.9750559091691038, - "grad_norm": 0.0007211164338514209, - "learning_rate": 0.00019999953156670195, - "loss": 46.0, - "step": 12753 - }, - { - "epoch": 0.9751323661524934, - "grad_norm": 0.0016468371031805873, - "learning_rate": 0.00019999953149317902, - "loss": 46.0, - "step": 12754 - }, - { - "epoch": 0.9752088231358832, - "grad_norm": 0.0018547163344919682, - "learning_rate": 0.00019999953141965033, - "loss": 46.0, - "step": 12755 - }, - { - "epoch": 0.9752852801192728, - "grad_norm": 0.001210667658597231, - "learning_rate": 0.00019999953134611586, - "loss": 46.0, - "step": 12756 - }, - { - "epoch": 0.9753617371026626, - "grad_norm": 0.005310072097927332, - "learning_rate": 0.00019999953127257562, - "loss": 46.0, - "step": 12757 - }, - { - "epoch": 0.9754381940860524, - "grad_norm": 0.003956550732254982, - "learning_rate": 0.00019999953119902964, - "loss": 46.0, - "step": 12758 - }, - { - "epoch": 0.975514651069442, - "grad_norm": 0.0024545593187212944, - "learning_rate": 0.00019999953112547787, - "loss": 46.0, - "step": 12759 - }, - { - "epoch": 0.9755911080528318, - "grad_norm": 0.0032583093270659447, - "learning_rate": 0.00019999953105192031, - "loss": 46.0, - "step": 12760 - }, - { - "epoch": 0.9756675650362215, - "grad_norm": 0.0008235130808316171, - "learning_rate": 0.000199999530978357, - "loss": 46.0, - "step": 12761 - }, - { - "epoch": 0.9757440220196112, - "grad_norm": 0.002267898293212056, - "learning_rate": 0.00019999953090478793, - "loss": 46.0, - "step": 12762 - }, - { - "epoch": 0.975820479003001, - "grad_norm": 0.0022681127302348614, - "learning_rate": 0.00019999953083121307, - "loss": 46.0, - "step": 12763 - }, - { - "epoch": 0.9758969359863907, - "grad_norm": 0.0011638295836746693, - "learning_rate": 0.00019999953075763247, - "loss": 46.0, - "step": 12764 - }, - { - "epoch": 0.9759733929697804, - "grad_norm": 0.007835728116333485, - "learning_rate": 0.0001999995306840461, - "loss": 46.0, - "step": 12765 - }, - { - "epoch": 0.9760498499531701, - "grad_norm": 0.0005981057765893638, - "learning_rate": 0.00019999953061045392, - "loss": 46.0, - "step": 12766 - }, - { - "epoch": 0.9761263069365598, - "grad_norm": 0.002416054019704461, - "learning_rate": 0.000199999530536856, - "loss": 46.0, - "step": 12767 - }, - { - "epoch": 0.9762027639199495, - "grad_norm": 0.003757932223379612, - "learning_rate": 0.0001999995304632523, - "loss": 46.0, - "step": 12768 - }, - { - "epoch": 0.9762792209033393, - "grad_norm": 0.003490697592496872, - "learning_rate": 0.00019999953038964284, - "loss": 46.0, - "step": 12769 - }, - { - "epoch": 0.976355677886729, - "grad_norm": 0.001993812620639801, - "learning_rate": 0.00019999953031602758, - "loss": 46.0, - "step": 12770 - }, - { - "epoch": 0.9764321348701187, - "grad_norm": 0.004712095018476248, - "learning_rate": 0.00019999953024240662, - "loss": 46.0, - "step": 12771 - }, - { - "epoch": 0.9765085918535085, - "grad_norm": 0.0052865780889987946, - "learning_rate": 0.0001999995301687798, - "loss": 46.0, - "step": 12772 - }, - { - "epoch": 0.9765850488368981, - "grad_norm": 0.0007005177903920412, - "learning_rate": 0.00019999953009514727, - "loss": 46.0, - "step": 12773 - }, - { - "epoch": 0.9766615058202879, - "grad_norm": 0.001018381561152637, - "learning_rate": 0.00019999953002150896, - "loss": 46.0, - "step": 12774 - }, - { - "epoch": 0.9767379628036775, - "grad_norm": 0.0016934463055804372, - "learning_rate": 0.00019999952994786488, - "loss": 46.0, - "step": 12775 - }, - { - "epoch": 0.9768144197870673, - "grad_norm": 0.013121184892952442, - "learning_rate": 0.00019999952987421503, - "loss": 46.0, - "step": 12776 - }, - { - "epoch": 0.976890876770457, - "grad_norm": 0.0019209851743653417, - "learning_rate": 0.0001999995298005594, - "loss": 46.0, - "step": 12777 - }, - { - "epoch": 0.9769673337538467, - "grad_norm": 0.0007861430640332401, - "learning_rate": 0.00019999952972689803, - "loss": 46.0, - "step": 12778 - }, - { - "epoch": 0.9770437907372365, - "grad_norm": 0.0008459283271804452, - "learning_rate": 0.00019999952965323085, - "loss": 46.0, - "step": 12779 - }, - { - "epoch": 0.9771202477206262, - "grad_norm": 0.0008801782387308776, - "learning_rate": 0.00019999952957955793, - "loss": 46.0, - "step": 12780 - }, - { - "epoch": 0.9771967047040159, - "grad_norm": 0.0008988889749161899, - "learning_rate": 0.00019999952950587924, - "loss": 46.0, - "step": 12781 - }, - { - "epoch": 0.9772731616874056, - "grad_norm": 0.0008732814458198845, - "learning_rate": 0.00019999952943219475, - "loss": 46.0, - "step": 12782 - }, - { - "epoch": 0.9773496186707954, - "grad_norm": 0.001274761976674199, - "learning_rate": 0.00019999952935850453, - "loss": 46.0, - "step": 12783 - }, - { - "epoch": 0.977426075654185, - "grad_norm": 0.0013002234045416117, - "learning_rate": 0.00019999952928480852, - "loss": 46.0, - "step": 12784 - }, - { - "epoch": 0.9775025326375748, - "grad_norm": 0.001604813733138144, - "learning_rate": 0.00019999952921110673, - "loss": 46.0, - "step": 12785 - }, - { - "epoch": 0.9775789896209645, - "grad_norm": 0.0005631727981381118, - "learning_rate": 0.0001999995291373992, - "loss": 46.0, - "step": 12786 - }, - { - "epoch": 0.9776554466043542, - "grad_norm": 0.001688444404862821, - "learning_rate": 0.0001999995290636859, - "loss": 46.0, - "step": 12787 - }, - { - "epoch": 0.977731903587744, - "grad_norm": 0.000580216699745506, - "learning_rate": 0.0001999995289899668, - "loss": 46.0, - "step": 12788 - }, - { - "epoch": 0.9778083605711336, - "grad_norm": 0.001351632410660386, - "learning_rate": 0.00019999952891624193, - "loss": 46.0, - "step": 12789 - }, - { - "epoch": 0.9778848175545234, - "grad_norm": 0.01026106532663107, - "learning_rate": 0.0001999995288425113, - "loss": 46.0, - "step": 12790 - }, - { - "epoch": 0.9779612745379131, - "grad_norm": 0.0030651770066469908, - "learning_rate": 0.00019999952876877493, - "loss": 46.0, - "step": 12791 - }, - { - "epoch": 0.9780377315213028, - "grad_norm": 0.001269552856683731, - "learning_rate": 0.00019999952869503276, - "loss": 46.0, - "step": 12792 - }, - { - "epoch": 0.9781141885046926, - "grad_norm": 0.0006460731965489686, - "learning_rate": 0.00019999952862128484, - "loss": 46.0, - "step": 12793 - }, - { - "epoch": 0.9781906454880823, - "grad_norm": 0.0023850095458328724, - "learning_rate": 0.00019999952854753112, - "loss": 46.0, - "step": 12794 - }, - { - "epoch": 0.978267102471472, - "grad_norm": 0.0010806452482938766, - "learning_rate": 0.00019999952847377165, - "loss": 46.0, - "step": 12795 - }, - { - "epoch": 0.9783435594548617, - "grad_norm": 0.0018902196316048503, - "learning_rate": 0.00019999952840000642, - "loss": 46.0, - "step": 12796 - }, - { - "epoch": 0.9784200164382514, - "grad_norm": 0.0007955400506034493, - "learning_rate": 0.0001999995283262354, - "loss": 46.0, - "step": 12797 - }, - { - "epoch": 0.9784964734216411, - "grad_norm": 0.001599586452357471, - "learning_rate": 0.00019999952825245862, - "loss": 46.0, - "step": 12798 - }, - { - "epoch": 0.9785729304050309, - "grad_norm": 0.00045560463331639767, - "learning_rate": 0.00019999952817867608, - "loss": 46.0, - "step": 12799 - }, - { - "epoch": 0.9786493873884206, - "grad_norm": 0.0010739242425188422, - "learning_rate": 0.00019999952810488775, - "loss": 46.0, - "step": 12800 - }, - { - "epoch": 0.9787258443718103, - "grad_norm": 0.0008624702459201217, - "learning_rate": 0.00019999952803109367, - "loss": 46.0, - "step": 12801 - }, - { - "epoch": 0.9788023013552001, - "grad_norm": 0.0006362621206790209, - "learning_rate": 0.0001999995279572938, - "loss": 46.0, - "step": 12802 - }, - { - "epoch": 0.9788787583385897, - "grad_norm": 0.0010870335390791297, - "learning_rate": 0.0001999995278834882, - "loss": 46.0, - "step": 12803 - }, - { - "epoch": 0.9789552153219795, - "grad_norm": 0.0011031472822651267, - "learning_rate": 0.0001999995278096768, - "loss": 46.0, - "step": 12804 - }, - { - "epoch": 0.9790316723053691, - "grad_norm": 0.0018994042184203863, - "learning_rate": 0.00019999952773585964, - "loss": 46.0, - "step": 12805 - }, - { - "epoch": 0.9791081292887589, - "grad_norm": 0.0002852639590855688, - "learning_rate": 0.0001999995276620367, - "loss": 46.0, - "step": 12806 - }, - { - "epoch": 0.9791845862721487, - "grad_norm": 0.0026290160603821278, - "learning_rate": 0.00019999952758820798, - "loss": 46.0, - "step": 12807 - }, - { - "epoch": 0.9792610432555383, - "grad_norm": 0.001884467783384025, - "learning_rate": 0.0001999995275143735, - "loss": 46.0, - "step": 12808 - }, - { - "epoch": 0.9793375002389281, - "grad_norm": 0.019786866381764412, - "learning_rate": 0.00019999952744053327, - "loss": 46.0, - "step": 12809 - }, - { - "epoch": 0.9794139572223178, - "grad_norm": 0.0008956074598245323, - "learning_rate": 0.00019999952736668726, - "loss": 46.0, - "step": 12810 - }, - { - "epoch": 0.9794904142057075, - "grad_norm": 0.0010076133767142892, - "learning_rate": 0.00019999952729283547, - "loss": 46.0, - "step": 12811 - }, - { - "epoch": 0.9795668711890972, - "grad_norm": 0.0008984691230580211, - "learning_rate": 0.00019999952721897791, - "loss": 46.0, - "step": 12812 - }, - { - "epoch": 0.979643328172487, - "grad_norm": 0.0017784673254936934, - "learning_rate": 0.0001999995271451146, - "loss": 46.0, - "step": 12813 - }, - { - "epoch": 0.9797197851558767, - "grad_norm": 0.001371965860016644, - "learning_rate": 0.00019999952707124553, - "loss": 46.0, - "step": 12814 - }, - { - "epoch": 0.9797962421392664, - "grad_norm": 0.001625708187930286, - "learning_rate": 0.00019999952699737065, - "loss": 46.0, - "step": 12815 - }, - { - "epoch": 0.9798726991226561, - "grad_norm": 0.017182547599077225, - "learning_rate": 0.00019999952692349005, - "loss": 46.0, - "step": 12816 - }, - { - "epoch": 0.9799491561060458, - "grad_norm": 0.00389941711910069, - "learning_rate": 0.00019999952684960362, - "loss": 46.0, - "step": 12817 - }, - { - "epoch": 0.9800256130894356, - "grad_norm": 0.006198948714882135, - "learning_rate": 0.00019999952677571145, - "loss": 46.0, - "step": 12818 - }, - { - "epoch": 0.9801020700728252, - "grad_norm": 0.0012636421015486121, - "learning_rate": 0.00019999952670181353, - "loss": 46.0, - "step": 12819 - }, - { - "epoch": 0.980178527056215, - "grad_norm": 0.0007669960032217205, - "learning_rate": 0.0001999995266279098, - "loss": 46.0, - "step": 12820 - }, - { - "epoch": 0.9802549840396048, - "grad_norm": 0.002090106951072812, - "learning_rate": 0.00019999952655400035, - "loss": 46.0, - "step": 12821 - }, - { - "epoch": 0.9803314410229944, - "grad_norm": 0.0005873393383808434, - "learning_rate": 0.00019999952648008508, - "loss": 46.0, - "step": 12822 - }, - { - "epoch": 0.9804078980063842, - "grad_norm": 0.003474652534350753, - "learning_rate": 0.0001999995264061641, - "loss": 46.0, - "step": 12823 - }, - { - "epoch": 0.9804843549897739, - "grad_norm": 0.0016198530793190002, - "learning_rate": 0.0001999995263322373, - "loss": 46.0, - "step": 12824 - }, - { - "epoch": 0.9805608119731636, - "grad_norm": 0.0009181445930153131, - "learning_rate": 0.00019999952625830475, - "loss": 46.0, - "step": 12825 - }, - { - "epoch": 0.9806372689565533, - "grad_norm": 0.006141019985079765, - "learning_rate": 0.00019999952618436644, - "loss": 46.0, - "step": 12826 - }, - { - "epoch": 0.980713725939943, - "grad_norm": 0.001359390327706933, - "learning_rate": 0.00019999952611042234, - "loss": 46.0, - "step": 12827 - }, - { - "epoch": 0.9807901829233328, - "grad_norm": 0.003163157496601343, - "learning_rate": 0.00019999952603647249, - "loss": 46.0, - "step": 12828 - }, - { - "epoch": 0.9808666399067225, - "grad_norm": 0.0006966502987779677, - "learning_rate": 0.00019999952596251683, - "loss": 46.0, - "step": 12829 - }, - { - "epoch": 0.9809430968901122, - "grad_norm": 0.0005184897454455495, - "learning_rate": 0.00019999952588855543, - "loss": 46.0, - "step": 12830 - }, - { - "epoch": 0.9810195538735019, - "grad_norm": 0.002286858158186078, - "learning_rate": 0.00019999952581458826, - "loss": 46.0, - "step": 12831 - }, - { - "epoch": 0.9810960108568917, - "grad_norm": 0.004737162962555885, - "learning_rate": 0.00019999952574061532, - "loss": 46.0, - "step": 12832 - }, - { - "epoch": 0.9811724678402813, - "grad_norm": 0.000951636815443635, - "learning_rate": 0.00019999952566663662, - "loss": 46.0, - "step": 12833 - }, - { - "epoch": 0.9812489248236711, - "grad_norm": 0.001245854771696031, - "learning_rate": 0.00019999952559265213, - "loss": 46.0, - "step": 12834 - }, - { - "epoch": 0.9813253818070607, - "grad_norm": 0.010222101584076881, - "learning_rate": 0.0001999995255186619, - "loss": 46.0, - "step": 12835 - }, - { - "epoch": 0.9814018387904505, - "grad_norm": 0.0003897756105288863, - "learning_rate": 0.00019999952544466588, - "loss": 46.0, - "step": 12836 - }, - { - "epoch": 0.9814782957738403, - "grad_norm": 0.001301162177696824, - "learning_rate": 0.0001999995253706641, - "loss": 46.0, - "step": 12837 - }, - { - "epoch": 0.9815547527572299, - "grad_norm": 0.0008635082631371915, - "learning_rate": 0.00019999952529665654, - "loss": 46.0, - "step": 12838 - }, - { - "epoch": 0.9816312097406197, - "grad_norm": 0.002238568617030978, - "learning_rate": 0.0001999995252226432, - "loss": 46.0, - "step": 12839 - }, - { - "epoch": 0.9817076667240094, - "grad_norm": 0.0009574776631779969, - "learning_rate": 0.0001999995251486241, - "loss": 46.0, - "step": 12840 - }, - { - "epoch": 0.9817841237073991, - "grad_norm": 0.0015107898507267237, - "learning_rate": 0.00019999952507459925, - "loss": 46.0, - "step": 12841 - }, - { - "epoch": 0.9818605806907889, - "grad_norm": 0.006809369195252657, - "learning_rate": 0.00019999952500056862, - "loss": 46.0, - "step": 12842 - }, - { - "epoch": 0.9819370376741786, - "grad_norm": 0.001495785778388381, - "learning_rate": 0.0001999995249265322, - "loss": 46.0, - "step": 12843 - }, - { - "epoch": 0.9820134946575683, - "grad_norm": 0.0007589769084006548, - "learning_rate": 0.00019999952485249005, - "loss": 46.0, - "step": 12844 - }, - { - "epoch": 0.982089951640958, - "grad_norm": 0.0015061625745147467, - "learning_rate": 0.00019999952477844208, - "loss": 46.0, - "step": 12845 - }, - { - "epoch": 0.9821664086243477, - "grad_norm": 0.001059127040207386, - "learning_rate": 0.00019999952470438838, - "loss": 46.0, - "step": 12846 - }, - { - "epoch": 0.9822428656077374, - "grad_norm": 0.0019423268968239427, - "learning_rate": 0.00019999952463032892, - "loss": 46.0, - "step": 12847 - }, - { - "epoch": 0.9823193225911272, - "grad_norm": 0.002396803814917803, - "learning_rate": 0.00019999952455626365, - "loss": 46.0, - "step": 12848 - }, - { - "epoch": 0.9823957795745168, - "grad_norm": 0.0012942602625116706, - "learning_rate": 0.00019999952448219264, - "loss": 46.0, - "step": 12849 - }, - { - "epoch": 0.9824722365579066, - "grad_norm": 0.001291621127165854, - "learning_rate": 0.00019999952440811586, - "loss": 46.0, - "step": 12850 - }, - { - "epoch": 0.9825486935412964, - "grad_norm": 0.015728944912552834, - "learning_rate": 0.00019999952433403327, - "loss": 46.0, - "step": 12851 - }, - { - "epoch": 0.982625150524686, - "grad_norm": 0.002567258197814226, - "learning_rate": 0.00019999952425994497, - "loss": 46.0, - "step": 12852 - }, - { - "epoch": 0.9827016075080758, - "grad_norm": 0.0008253243868239224, - "learning_rate": 0.00019999952418585086, - "loss": 46.0, - "step": 12853 - }, - { - "epoch": 0.9827780644914655, - "grad_norm": 0.0002872333861887455, - "learning_rate": 0.00019999952411175098, - "loss": 46.0, - "step": 12854 - }, - { - "epoch": 0.9828545214748552, - "grad_norm": 0.0011262326734140515, - "learning_rate": 0.00019999952403764536, - "loss": 46.0, - "step": 12855 - }, - { - "epoch": 0.982930978458245, - "grad_norm": 0.0009918413124978542, - "learning_rate": 0.00019999952396353393, - "loss": 46.0, - "step": 12856 - }, - { - "epoch": 0.9830074354416346, - "grad_norm": 0.0010470869019627571, - "learning_rate": 0.0001999995238894168, - "loss": 46.0, - "step": 12857 - }, - { - "epoch": 0.9830838924250244, - "grad_norm": 0.001586166792549193, - "learning_rate": 0.00019999952381529384, - "loss": 46.0, - "step": 12858 - }, - { - "epoch": 0.9831603494084141, - "grad_norm": 0.004364318680018187, - "learning_rate": 0.0001999995237411651, - "loss": 46.0, - "step": 12859 - }, - { - "epoch": 0.9832368063918038, - "grad_norm": 0.0009643954690545797, - "learning_rate": 0.00019999952366703063, - "loss": 46.0, - "step": 12860 - }, - { - "epoch": 0.9833132633751935, - "grad_norm": 0.0022769563365727663, - "learning_rate": 0.0001999995235928904, - "loss": 46.0, - "step": 12861 - }, - { - "epoch": 0.9833897203585833, - "grad_norm": 0.0022856772411614656, - "learning_rate": 0.00019999952351874435, - "loss": 46.0, - "step": 12862 - }, - { - "epoch": 0.983466177341973, - "grad_norm": 0.0018426288152113557, - "learning_rate": 0.00019999952344459257, - "loss": 46.0, - "step": 12863 - }, - { - "epoch": 0.9835426343253627, - "grad_norm": 0.005390865262597799, - "learning_rate": 0.00019999952337043498, - "loss": 46.0, - "step": 12864 - }, - { - "epoch": 0.9836190913087524, - "grad_norm": 0.0012018068227916956, - "learning_rate": 0.00019999952329627168, - "loss": 46.0, - "step": 12865 - }, - { - "epoch": 0.9836955482921421, - "grad_norm": 0.0018065322656184435, - "learning_rate": 0.00019999952322210257, - "loss": 46.0, - "step": 12866 - }, - { - "epoch": 0.9837720052755319, - "grad_norm": 0.0006826574099250138, - "learning_rate": 0.0001999995231479277, - "loss": 46.0, - "step": 12867 - }, - { - "epoch": 0.9838484622589215, - "grad_norm": 0.009405352175235748, - "learning_rate": 0.00019999952307374707, - "loss": 46.0, - "step": 12868 - }, - { - "epoch": 0.9839249192423113, - "grad_norm": 0.0009445718606002629, - "learning_rate": 0.00019999952299956065, - "loss": 46.0, - "step": 12869 - }, - { - "epoch": 0.984001376225701, - "grad_norm": 0.0024388430174440145, - "learning_rate": 0.00019999952292536847, - "loss": 46.0, - "step": 12870 - }, - { - "epoch": 0.9840778332090907, - "grad_norm": 0.0004667453467845917, - "learning_rate": 0.00019999952285117053, - "loss": 46.0, - "step": 12871 - }, - { - "epoch": 0.9841542901924805, - "grad_norm": 0.0019605106208473444, - "learning_rate": 0.0001999995227769668, - "loss": 46.0, - "step": 12872 - }, - { - "epoch": 0.9842307471758702, - "grad_norm": 0.0015231982106342912, - "learning_rate": 0.00019999952270275732, - "loss": 46.0, - "step": 12873 - }, - { - "epoch": 0.9843072041592599, - "grad_norm": 0.0011590765789151192, - "learning_rate": 0.00019999952262854208, - "loss": 46.0, - "step": 12874 - }, - { - "epoch": 0.9843836611426496, - "grad_norm": 0.0013544122921302915, - "learning_rate": 0.00019999952255432104, - "loss": 46.0, - "step": 12875 - }, - { - "epoch": 0.9844601181260393, - "grad_norm": 0.007144773378968239, - "learning_rate": 0.00019999952248009426, - "loss": 46.0, - "step": 12876 - }, - { - "epoch": 0.984536575109429, - "grad_norm": 0.0005381340743042529, - "learning_rate": 0.00019999952240586167, - "loss": 46.0, - "step": 12877 - }, - { - "epoch": 0.9846130320928188, - "grad_norm": 0.007375964429229498, - "learning_rate": 0.00019999952233162337, - "loss": 46.0, - "step": 12878 - }, - { - "epoch": 0.9846894890762085, - "grad_norm": 0.004662943538278341, - "learning_rate": 0.00019999952225737926, - "loss": 46.0, - "step": 12879 - }, - { - "epoch": 0.9847659460595982, - "grad_norm": 0.0012037480482831597, - "learning_rate": 0.00019999952218312936, - "loss": 46.0, - "step": 12880 - }, - { - "epoch": 0.984842403042988, - "grad_norm": 0.0010006591910496354, - "learning_rate": 0.00019999952210887373, - "loss": 46.0, - "step": 12881 - }, - { - "epoch": 0.9849188600263776, - "grad_norm": 0.0024163899943232536, - "learning_rate": 0.00019999952203461234, - "loss": 46.0, - "step": 12882 - }, - { - "epoch": 0.9849953170097674, - "grad_norm": 0.0008803425589576364, - "learning_rate": 0.00019999952196034514, - "loss": 46.0, - "step": 12883 - }, - { - "epoch": 0.9850717739931572, - "grad_norm": 0.0007207217277027667, - "learning_rate": 0.0001999995218860722, - "loss": 46.0, - "step": 12884 - }, - { - "epoch": 0.9851482309765468, - "grad_norm": 0.0009455467225052416, - "learning_rate": 0.00019999952181179348, - "loss": 46.0, - "step": 12885 - }, - { - "epoch": 0.9852246879599366, - "grad_norm": 0.0005324353114701807, - "learning_rate": 0.00019999952173750898, - "loss": 46.0, - "step": 12886 - }, - { - "epoch": 0.9853011449433262, - "grad_norm": 0.004103631712496281, - "learning_rate": 0.00019999952166321872, - "loss": 46.0, - "step": 12887 - }, - { - "epoch": 0.985377601926716, - "grad_norm": 0.0023021239321678877, - "learning_rate": 0.0001999995215889227, - "loss": 46.0, - "step": 12888 - }, - { - "epoch": 0.9854540589101057, - "grad_norm": 0.004153313580900431, - "learning_rate": 0.00019999952151462092, - "loss": 46.0, - "step": 12889 - }, - { - "epoch": 0.9855305158934954, - "grad_norm": 0.0007513034506700933, - "learning_rate": 0.00019999952144031334, - "loss": 46.0, - "step": 12890 - }, - { - "epoch": 0.9856069728768851, - "grad_norm": 0.0020820791833102703, - "learning_rate": 0.000199999521366, - "loss": 46.0, - "step": 12891 - }, - { - "epoch": 0.9856834298602749, - "grad_norm": 0.0007823725463822484, - "learning_rate": 0.0001999995212916809, - "loss": 46.0, - "step": 12892 - }, - { - "epoch": 0.9857598868436646, - "grad_norm": 0.001226668944582343, - "learning_rate": 0.00019999952121735603, - "loss": 46.0, - "step": 12893 - }, - { - "epoch": 0.9858363438270543, - "grad_norm": 0.00089498411398381, - "learning_rate": 0.00019999952114302537, - "loss": 46.0, - "step": 12894 - }, - { - "epoch": 0.9859128008104441, - "grad_norm": 0.001373312552459538, - "learning_rate": 0.00019999952106868895, - "loss": 46.0, - "step": 12895 - }, - { - "epoch": 0.9859892577938337, - "grad_norm": 0.0038569029420614243, - "learning_rate": 0.00019999952099434678, - "loss": 46.0, - "step": 12896 - }, - { - "epoch": 0.9860657147772235, - "grad_norm": 0.005535216070711613, - "learning_rate": 0.00019999952091999884, - "loss": 46.0, - "step": 12897 - }, - { - "epoch": 0.9861421717606131, - "grad_norm": 0.0007432788843289018, - "learning_rate": 0.00019999952084564512, - "loss": 46.0, - "step": 12898 - }, - { - "epoch": 0.9862186287440029, - "grad_norm": 0.0024454211816191673, - "learning_rate": 0.0001999995207712856, - "loss": 46.0, - "step": 12899 - }, - { - "epoch": 0.9862950857273927, - "grad_norm": 0.0028503509238362312, - "learning_rate": 0.00019999952069692036, - "loss": 46.0, - "step": 12900 - }, - { - "epoch": 0.9863715427107823, - "grad_norm": 0.0021564550697803497, - "learning_rate": 0.00019999952062254935, - "loss": 46.0, - "step": 12901 - }, - { - "epoch": 0.9864479996941721, - "grad_norm": 0.002211756305769086, - "learning_rate": 0.0001999995205481725, - "loss": 46.0, - "step": 12902 - }, - { - "epoch": 0.9865244566775618, - "grad_norm": 0.0018920553848147392, - "learning_rate": 0.00019999952047378996, - "loss": 46.0, - "step": 12903 - }, - { - "epoch": 0.9866009136609515, - "grad_norm": 0.0025991916190832853, - "learning_rate": 0.00019999952039940162, - "loss": 46.0, - "step": 12904 - }, - { - "epoch": 0.9866773706443412, - "grad_norm": 0.000843998568598181, - "learning_rate": 0.00019999952032500752, - "loss": 46.0, - "step": 12905 - }, - { - "epoch": 0.9867538276277309, - "grad_norm": 0.0016047506360337138, - "learning_rate": 0.00019999952025060762, - "loss": 46.0, - "step": 12906 - }, - { - "epoch": 0.9868302846111207, - "grad_norm": 0.0007460570195689797, - "learning_rate": 0.000199999520176202, - "loss": 46.0, - "step": 12907 - }, - { - "epoch": 0.9869067415945104, - "grad_norm": 0.004684058949351311, - "learning_rate": 0.00019999952010179057, - "loss": 46.0, - "step": 12908 - }, - { - "epoch": 0.9869831985779001, - "grad_norm": 0.006778838112950325, - "learning_rate": 0.0001999995200273734, - "loss": 46.0, - "step": 12909 - }, - { - "epoch": 0.9870596555612898, - "grad_norm": 0.0008412369061261415, - "learning_rate": 0.00019999951995295043, - "loss": 46.0, - "step": 12910 - }, - { - "epoch": 0.9871361125446796, - "grad_norm": 0.004750713240355253, - "learning_rate": 0.0001999995198785217, - "loss": 46.0, - "step": 12911 - }, - { - "epoch": 0.9872125695280692, - "grad_norm": 0.0012419220292940736, - "learning_rate": 0.0001999995198040872, - "loss": 46.0, - "step": 12912 - }, - { - "epoch": 0.987289026511459, - "grad_norm": 0.005220053251832724, - "learning_rate": 0.00019999951972964695, - "loss": 46.0, - "step": 12913 - }, - { - "epoch": 0.9873654834948488, - "grad_norm": 0.0013945535756647587, - "learning_rate": 0.00019999951965520092, - "loss": 46.0, - "step": 12914 - }, - { - "epoch": 0.9874419404782384, - "grad_norm": 0.0021088188514113426, - "learning_rate": 0.0001999995195807491, - "loss": 46.0, - "step": 12915 - }, - { - "epoch": 0.9875183974616282, - "grad_norm": 0.00351086538285017, - "learning_rate": 0.00019999951950629155, - "loss": 46.0, - "step": 12916 - }, - { - "epoch": 0.9875948544450178, - "grad_norm": 0.0007880371995270252, - "learning_rate": 0.00019999951943182822, - "loss": 46.0, - "step": 12917 - }, - { - "epoch": 0.9876713114284076, - "grad_norm": 0.0011672667460516095, - "learning_rate": 0.0001999995193573591, - "loss": 46.0, - "step": 12918 - }, - { - "epoch": 0.9877477684117973, - "grad_norm": 0.01388244517147541, - "learning_rate": 0.0001999995192828842, - "loss": 46.0, - "step": 12919 - }, - { - "epoch": 0.987824225395187, - "grad_norm": 0.002916842233389616, - "learning_rate": 0.00019999951920840356, - "loss": 46.0, - "step": 12920 - }, - { - "epoch": 0.9879006823785768, - "grad_norm": 0.002946749795228243, - "learning_rate": 0.00019999951913391714, - "loss": 46.0, - "step": 12921 - }, - { - "epoch": 0.9879771393619665, - "grad_norm": 0.0003848739725071937, - "learning_rate": 0.00019999951905942497, - "loss": 46.0, - "step": 12922 - }, - { - "epoch": 0.9880535963453562, - "grad_norm": 0.000876496487762779, - "learning_rate": 0.000199999518984927, - "loss": 46.0, - "step": 12923 - }, - { - "epoch": 0.9881300533287459, - "grad_norm": 0.0003296476788818836, - "learning_rate": 0.00019999951891042328, - "loss": 46.0, - "step": 12924 - }, - { - "epoch": 0.9882065103121357, - "grad_norm": 0.002046069363132119, - "learning_rate": 0.0001999995188359138, - "loss": 46.0, - "step": 12925 - }, - { - "epoch": 0.9882829672955253, - "grad_norm": 0.0017047772416844964, - "learning_rate": 0.0001999995187613985, - "loss": 46.0, - "step": 12926 - }, - { - "epoch": 0.9883594242789151, - "grad_norm": 0.00694029126316309, - "learning_rate": 0.0001999995186868775, - "loss": 46.0, - "step": 12927 - }, - { - "epoch": 0.9884358812623047, - "grad_norm": 0.002277107909321785, - "learning_rate": 0.00019999951861235068, - "loss": 46.0, - "step": 12928 - }, - { - "epoch": 0.9885123382456945, - "grad_norm": 0.0008195373229682446, - "learning_rate": 0.00019999951853781812, - "loss": 46.0, - "step": 12929 - }, - { - "epoch": 0.9885887952290843, - "grad_norm": 0.001289637410081923, - "learning_rate": 0.00019999951846327976, - "loss": 46.0, - "step": 12930 - }, - { - "epoch": 0.9886652522124739, - "grad_norm": 0.0011808108538389206, - "learning_rate": 0.00019999951838873563, - "loss": 46.0, - "step": 12931 - }, - { - "epoch": 0.9887417091958637, - "grad_norm": 0.0015951062086969614, - "learning_rate": 0.00019999951831418575, - "loss": 46.0, - "step": 12932 - }, - { - "epoch": 0.9888181661792534, - "grad_norm": 0.0011097746901214123, - "learning_rate": 0.0001999995182396301, - "loss": 46.0, - "step": 12933 - }, - { - "epoch": 0.9888946231626431, - "grad_norm": 0.00202350364997983, - "learning_rate": 0.00019999951816506868, - "loss": 46.0, - "step": 12934 - }, - { - "epoch": 0.9889710801460329, - "grad_norm": 0.0007234001532196999, - "learning_rate": 0.00019999951809050148, - "loss": 46.0, - "step": 12935 - }, - { - "epoch": 0.9890475371294225, - "grad_norm": 0.004777347669005394, - "learning_rate": 0.00019999951801592854, - "loss": 46.0, - "step": 12936 - }, - { - "epoch": 0.9891239941128123, - "grad_norm": 0.0010399366728961468, - "learning_rate": 0.00019999951794134982, - "loss": 46.0, - "step": 12937 - }, - { - "epoch": 0.989200451096202, - "grad_norm": 0.016312703490257263, - "learning_rate": 0.0001999995178667653, - "loss": 46.0, - "step": 12938 - }, - { - "epoch": 0.9892769080795917, - "grad_norm": 0.0014975982485339046, - "learning_rate": 0.00019999951779217504, - "loss": 46.0, - "step": 12939 - }, - { - "epoch": 0.9893533650629814, - "grad_norm": 0.0010700864950194955, - "learning_rate": 0.00019999951771757898, - "loss": 46.0, - "step": 12940 - }, - { - "epoch": 0.9894298220463712, - "grad_norm": 0.0015709844883531332, - "learning_rate": 0.0001999995176429772, - "loss": 46.0, - "step": 12941 - }, - { - "epoch": 0.9895062790297608, - "grad_norm": 0.0007254788069985807, - "learning_rate": 0.00019999951756836964, - "loss": 46.0, - "step": 12942 - }, - { - "epoch": 0.9895827360131506, - "grad_norm": 0.0008481065160594881, - "learning_rate": 0.00019999951749375628, - "loss": 46.0, - "step": 12943 - }, - { - "epoch": 0.9896591929965404, - "grad_norm": 0.005952953360974789, - "learning_rate": 0.00019999951741913715, - "loss": 46.0, - "step": 12944 - }, - { - "epoch": 0.98973564997993, - "grad_norm": 0.0022617268841713667, - "learning_rate": 0.00019999951734451225, - "loss": 46.0, - "step": 12945 - }, - { - "epoch": 0.9898121069633198, - "grad_norm": 0.000859533145558089, - "learning_rate": 0.00019999951726988163, - "loss": 46.0, - "step": 12946 - }, - { - "epoch": 0.9898885639467094, - "grad_norm": 0.0007730942452326417, - "learning_rate": 0.0001999995171952452, - "loss": 46.0, - "step": 12947 - }, - { - "epoch": 0.9899650209300992, - "grad_norm": 0.0013047890970483422, - "learning_rate": 0.000199999517120603, - "loss": 46.0, - "step": 12948 - }, - { - "epoch": 0.990041477913489, - "grad_norm": 0.002887323033064604, - "learning_rate": 0.00019999951704595504, - "loss": 46.0, - "step": 12949 - }, - { - "epoch": 0.9901179348968786, - "grad_norm": 0.012278687208890915, - "learning_rate": 0.0001999995169713013, - "loss": 46.0, - "step": 12950 - }, - { - "epoch": 0.9901943918802684, - "grad_norm": 0.00662717130035162, - "learning_rate": 0.0001999995168966418, - "loss": 46.0, - "step": 12951 - }, - { - "epoch": 0.9902708488636581, - "grad_norm": 0.00028733929502777755, - "learning_rate": 0.00019999951682197654, - "loss": 46.0, - "step": 12952 - }, - { - "epoch": 0.9903473058470478, - "grad_norm": 0.0008203035686165094, - "learning_rate": 0.00019999951674730548, - "loss": 46.0, - "step": 12953 - }, - { - "epoch": 0.9904237628304375, - "grad_norm": 0.0008644326007924974, - "learning_rate": 0.0001999995166726287, - "loss": 46.0, - "step": 12954 - }, - { - "epoch": 0.9905002198138273, - "grad_norm": 0.0007698223926126957, - "learning_rate": 0.0001999995165979461, - "loss": 46.0, - "step": 12955 - }, - { - "epoch": 0.990576676797217, - "grad_norm": 0.0032603333238512278, - "learning_rate": 0.00019999951652325778, - "loss": 46.0, - "step": 12956 - }, - { - "epoch": 0.9906531337806067, - "grad_norm": 0.0008211845415644348, - "learning_rate": 0.00019999951644856365, - "loss": 46.0, - "step": 12957 - }, - { - "epoch": 0.9907295907639964, - "grad_norm": 0.001471293973736465, - "learning_rate": 0.00019999951637386375, - "loss": 46.0, - "step": 12958 - }, - { - "epoch": 0.9908060477473861, - "grad_norm": 0.0007434911094605923, - "learning_rate": 0.0001999995162991581, - "loss": 46.0, - "step": 12959 - }, - { - "epoch": 0.9908825047307759, - "grad_norm": 0.0005880248500034213, - "learning_rate": 0.00019999951622444665, - "loss": 46.0, - "step": 12960 - }, - { - "epoch": 0.9909589617141655, - "grad_norm": 0.0018656188622117043, - "learning_rate": 0.00019999951614972948, - "loss": 46.0, - "step": 12961 - }, - { - "epoch": 0.9910354186975553, - "grad_norm": 0.0009604257065802813, - "learning_rate": 0.0001999995160750065, - "loss": 46.0, - "step": 12962 - }, - { - "epoch": 0.991111875680945, - "grad_norm": 0.0029789903201162815, - "learning_rate": 0.00019999951600027776, - "loss": 46.0, - "step": 12963 - }, - { - "epoch": 0.9911883326643347, - "grad_norm": 0.0010843200143426657, - "learning_rate": 0.00019999951592554328, - "loss": 46.0, - "step": 12964 - }, - { - "epoch": 0.9912647896477245, - "grad_norm": 0.0012757823569700122, - "learning_rate": 0.000199999515850803, - "loss": 46.0, - "step": 12965 - }, - { - "epoch": 0.9913412466311141, - "grad_norm": 0.0004343360778875649, - "learning_rate": 0.00019999951577605695, - "loss": 46.0, - "step": 12966 - }, - { - "epoch": 0.9914177036145039, - "grad_norm": 0.0005968990735709667, - "learning_rate": 0.00019999951570130514, - "loss": 46.0, - "step": 12967 - }, - { - "epoch": 0.9914941605978936, - "grad_norm": 0.0009484199108555913, - "learning_rate": 0.00019999951562654756, - "loss": 46.0, - "step": 12968 - }, - { - "epoch": 0.9915706175812833, - "grad_norm": 0.0032046486157923937, - "learning_rate": 0.0001999995155517842, - "loss": 46.0, - "step": 12969 - }, - { - "epoch": 0.991647074564673, - "grad_norm": 0.0008889741147868335, - "learning_rate": 0.00019999951547701507, - "loss": 46.0, - "step": 12970 - }, - { - "epoch": 0.9917235315480628, - "grad_norm": 0.002994607901200652, - "learning_rate": 0.0001999995154022402, - "loss": 46.0, - "step": 12971 - }, - { - "epoch": 0.9917999885314525, - "grad_norm": 0.001974208513274789, - "learning_rate": 0.00019999951532745955, - "loss": 46.0, - "step": 12972 - }, - { - "epoch": 0.9918764455148422, - "grad_norm": 0.0020898994989693165, - "learning_rate": 0.0001999995152526731, - "loss": 46.0, - "step": 12973 - }, - { - "epoch": 0.991952902498232, - "grad_norm": 0.00042279413901269436, - "learning_rate": 0.00019999951517788093, - "loss": 46.0, - "step": 12974 - }, - { - "epoch": 0.9920293594816216, - "grad_norm": 0.0012046464253216982, - "learning_rate": 0.00019999951510308296, - "loss": 46.0, - "step": 12975 - }, - { - "epoch": 0.9921058164650114, - "grad_norm": 0.0009185183444060385, - "learning_rate": 0.00019999951502827921, - "loss": 46.0, - "step": 12976 - }, - { - "epoch": 0.992182273448401, - "grad_norm": 0.001820918289013207, - "learning_rate": 0.0001999995149534697, - "loss": 46.0, - "step": 12977 - }, - { - "epoch": 0.9922587304317908, - "grad_norm": 0.0009391932399012148, - "learning_rate": 0.00019999951487865444, - "loss": 46.0, - "step": 12978 - }, - { - "epoch": 0.9923351874151806, - "grad_norm": 0.0007493648445233703, - "learning_rate": 0.0001999995148038334, - "loss": 46.0, - "step": 12979 - }, - { - "epoch": 0.9924116443985702, - "grad_norm": 0.008705347776412964, - "learning_rate": 0.00019999951472900656, - "loss": 46.0, - "step": 12980 - }, - { - "epoch": 0.99248810138196, - "grad_norm": 0.007132470607757568, - "learning_rate": 0.00019999951465417398, - "loss": 46.0, - "step": 12981 - }, - { - "epoch": 0.9925645583653497, - "grad_norm": 0.0007482675136998296, - "learning_rate": 0.00019999951457933562, - "loss": 46.0, - "step": 12982 - }, - { - "epoch": 0.9926410153487394, - "grad_norm": 0.005840052384883165, - "learning_rate": 0.0001999995145044915, - "loss": 46.0, - "step": 12983 - }, - { - "epoch": 0.9927174723321291, - "grad_norm": 0.0007353696273639798, - "learning_rate": 0.0001999995144296416, - "loss": 46.0, - "step": 12984 - }, - { - "epoch": 0.9927939293155189, - "grad_norm": 0.0005675255670212209, - "learning_rate": 0.00019999951435478594, - "loss": 46.0, - "step": 12985 - }, - { - "epoch": 0.9928703862989086, - "grad_norm": 0.0013363094767555594, - "learning_rate": 0.00019999951427992452, - "loss": 46.0, - "step": 12986 - }, - { - "epoch": 0.9929468432822983, - "grad_norm": 0.0011422872776165605, - "learning_rate": 0.00019999951420505733, - "loss": 46.0, - "step": 12987 - }, - { - "epoch": 0.993023300265688, - "grad_norm": 0.000899986713193357, - "learning_rate": 0.00019999951413018433, - "loss": 46.0, - "step": 12988 - }, - { - "epoch": 0.9930997572490777, - "grad_norm": 0.001702246954664588, - "learning_rate": 0.00019999951405530561, - "loss": 46.0, - "step": 12989 - }, - { - "epoch": 0.9931762142324675, - "grad_norm": 0.0013333183014765382, - "learning_rate": 0.0001999995139804211, - "loss": 46.0, - "step": 12990 - }, - { - "epoch": 0.9932526712158571, - "grad_norm": 0.0005960882408544421, - "learning_rate": 0.00019999951390553084, - "loss": 46.0, - "step": 12991 - }, - { - "epoch": 0.9933291281992469, - "grad_norm": 0.0015765996649861336, - "learning_rate": 0.00019999951383063477, - "loss": 46.0, - "step": 12992 - }, - { - "epoch": 0.9934055851826367, - "grad_norm": 0.001805830281227827, - "learning_rate": 0.00019999951375573296, - "loss": 46.0, - "step": 12993 - }, - { - "epoch": 0.9934820421660263, - "grad_norm": 0.0014324636431410909, - "learning_rate": 0.00019999951368082535, - "loss": 46.0, - "step": 12994 - }, - { - "epoch": 0.9935584991494161, - "grad_norm": 0.0017241350142285228, - "learning_rate": 0.00019999951360591203, - "loss": 46.0, - "step": 12995 - }, - { - "epoch": 0.9936349561328058, - "grad_norm": 0.004104727879166603, - "learning_rate": 0.0001999995135309929, - "loss": 46.0, - "step": 12996 - }, - { - "epoch": 0.9937114131161955, - "grad_norm": 0.000588090973906219, - "learning_rate": 0.00019999951345606797, - "loss": 46.0, - "step": 12997 - }, - { - "epoch": 0.9937878700995852, - "grad_norm": 0.0009377379319630563, - "learning_rate": 0.00019999951338113735, - "loss": 46.0, - "step": 12998 - }, - { - "epoch": 0.9938643270829749, - "grad_norm": 0.00223111012019217, - "learning_rate": 0.00019999951330620087, - "loss": 46.0, - "step": 12999 - }, - { - "epoch": 0.9939407840663647, - "grad_norm": 0.0014962464338168502, - "learning_rate": 0.0001999995132312587, - "loss": 46.0, - "step": 13000 - }, - { - "epoch": 0.9940172410497544, - "grad_norm": 0.0025067706592381, - "learning_rate": 0.0001999995131563107, - "loss": 46.0, - "step": 13001 - }, - { - "epoch": 0.9940936980331441, - "grad_norm": 0.0007811136892996728, - "learning_rate": 0.00019999951308135696, - "loss": 46.0, - "step": 13002 - }, - { - "epoch": 0.9941701550165338, - "grad_norm": 0.0003123429778497666, - "learning_rate": 0.00019999951300639745, - "loss": 46.0, - "step": 13003 - }, - { - "epoch": 0.9942466119999236, - "grad_norm": 0.0007085145334713161, - "learning_rate": 0.00019999951293143219, - "loss": 46.0, - "step": 13004 - }, - { - "epoch": 0.9943230689833132, - "grad_norm": 0.0009960256284102798, - "learning_rate": 0.00019999951285646112, - "loss": 46.0, - "step": 13005 - }, - { - "epoch": 0.994399525966703, - "grad_norm": 0.001250269589945674, - "learning_rate": 0.0001999995127814843, - "loss": 46.0, - "step": 13006 - }, - { - "epoch": 0.9944759829500927, - "grad_norm": 0.0006877474370412529, - "learning_rate": 0.00019999951270650173, - "loss": 46.0, - "step": 13007 - }, - { - "epoch": 0.9945524399334824, - "grad_norm": 0.00038921754457987845, - "learning_rate": 0.00019999951263151338, - "loss": 46.0, - "step": 13008 - }, - { - "epoch": 0.9946288969168722, - "grad_norm": 0.006857279222458601, - "learning_rate": 0.00019999951255651922, - "loss": 46.0, - "step": 13009 - }, - { - "epoch": 0.9947053539002618, - "grad_norm": 0.0006055301637388766, - "learning_rate": 0.00019999951248151935, - "loss": 46.0, - "step": 13010 - }, - { - "epoch": 0.9947818108836516, - "grad_norm": 0.004898465238511562, - "learning_rate": 0.00019999951240651367, - "loss": 46.0, - "step": 13011 - }, - { - "epoch": 0.9948582678670413, - "grad_norm": 0.0009586650994606316, - "learning_rate": 0.00019999951233150225, - "loss": 46.0, - "step": 13012 - }, - { - "epoch": 0.994934724850431, - "grad_norm": 0.005516965873539448, - "learning_rate": 0.00019999951225648503, - "loss": 46.0, - "step": 13013 - }, - { - "epoch": 0.9950111818338208, - "grad_norm": 0.0011231043608859181, - "learning_rate": 0.00019999951218146206, - "loss": 46.0, - "step": 13014 - }, - { - "epoch": 0.9950876388172105, - "grad_norm": 0.000755190965719521, - "learning_rate": 0.00019999951210643332, - "loss": 46.0, - "step": 13015 - }, - { - "epoch": 0.9951640958006002, - "grad_norm": 0.001508427201770246, - "learning_rate": 0.0001999995120313988, - "loss": 46.0, - "step": 13016 - }, - { - "epoch": 0.9952405527839899, - "grad_norm": 0.0011397219495847821, - "learning_rate": 0.0001999995119563585, - "loss": 46.0, - "step": 13017 - }, - { - "epoch": 0.9953170097673796, - "grad_norm": 0.0012535445857793093, - "learning_rate": 0.00019999951188131245, - "loss": 46.0, - "step": 13018 - }, - { - "epoch": 0.9953934667507693, - "grad_norm": 0.00167899951338768, - "learning_rate": 0.00019999951180626064, - "loss": 46.0, - "step": 13019 - }, - { - "epoch": 0.9954699237341591, - "grad_norm": 0.0012216983595862985, - "learning_rate": 0.00019999951173120306, - "loss": 46.0, - "step": 13020 - }, - { - "epoch": 0.9955463807175488, - "grad_norm": 0.0009084090124815702, - "learning_rate": 0.00019999951165613968, - "loss": 46.0, - "step": 13021 - }, - { - "epoch": 0.9956228377009385, - "grad_norm": 0.0032085846178233624, - "learning_rate": 0.00019999951158107055, - "loss": 46.0, - "step": 13022 - }, - { - "epoch": 0.9956992946843283, - "grad_norm": 0.0009710604790598154, - "learning_rate": 0.00019999951150599565, - "loss": 46.0, - "step": 13023 - }, - { - "epoch": 0.9957757516677179, - "grad_norm": 0.0004906139802187681, - "learning_rate": 0.000199999511430915, - "loss": 46.0, - "step": 13024 - }, - { - "epoch": 0.9958522086511077, - "grad_norm": 0.0015517802676185966, - "learning_rate": 0.00019999951135582852, - "loss": 46.0, - "step": 13025 - }, - { - "epoch": 0.9959286656344974, - "grad_norm": 0.0008460269309580326, - "learning_rate": 0.00019999951128073633, - "loss": 46.0, - "step": 13026 - }, - { - "epoch": 0.9960051226178871, - "grad_norm": 0.003025216283276677, - "learning_rate": 0.00019999951120563836, - "loss": 46.0, - "step": 13027 - }, - { - "epoch": 0.9960815796012769, - "grad_norm": 0.0029430841095745564, - "learning_rate": 0.00019999951113053462, - "loss": 46.0, - "step": 13028 - }, - { - "epoch": 0.9961580365846665, - "grad_norm": 0.0014440554659813643, - "learning_rate": 0.0001999995110554251, - "loss": 46.0, - "step": 13029 - }, - { - "epoch": 0.9962344935680563, - "grad_norm": 0.0015573466662317514, - "learning_rate": 0.0001999995109803098, - "loss": 46.0, - "step": 13030 - }, - { - "epoch": 0.996310950551446, - "grad_norm": 0.0021596390288323164, - "learning_rate": 0.00019999951090518875, - "loss": 46.0, - "step": 13031 - }, - { - "epoch": 0.9963874075348357, - "grad_norm": 0.003103324444964528, - "learning_rate": 0.00019999951083006195, - "loss": 46.0, - "step": 13032 - }, - { - "epoch": 0.9964638645182254, - "grad_norm": 0.0006132619455456734, - "learning_rate": 0.00019999951075492934, - "loss": 46.0, - "step": 13033 - }, - { - "epoch": 0.9965403215016152, - "grad_norm": 0.0036434493958950043, - "learning_rate": 0.00019999951067979096, - "loss": 46.0, - "step": 13034 - }, - { - "epoch": 0.9966167784850049, - "grad_norm": 0.005200007930397987, - "learning_rate": 0.00019999951060464683, - "loss": 46.0, - "step": 13035 - }, - { - "epoch": 0.9966932354683946, - "grad_norm": 0.0008730081608518958, - "learning_rate": 0.00019999951052949695, - "loss": 46.0, - "step": 13036 - }, - { - "epoch": 0.9967696924517843, - "grad_norm": 0.000802867638412863, - "learning_rate": 0.00019999951045434128, - "loss": 46.0, - "step": 13037 - }, - { - "epoch": 0.996846149435174, - "grad_norm": 0.0015725247794762254, - "learning_rate": 0.0001999995103791798, - "loss": 46.0, - "step": 13038 - }, - { - "epoch": 0.9969226064185638, - "grad_norm": 0.0009133379790000618, - "learning_rate": 0.0001999995103040126, - "loss": 46.0, - "step": 13039 - }, - { - "epoch": 0.9969990634019534, - "grad_norm": 0.0016108219278976321, - "learning_rate": 0.00019999951022883961, - "loss": 46.0, - "step": 13040 - }, - { - "epoch": 0.9970755203853432, - "grad_norm": 0.0011954944347962737, - "learning_rate": 0.00019999951015366087, - "loss": 46.0, - "step": 13041 - }, - { - "epoch": 0.997151977368733, - "grad_norm": 0.0008768549305386841, - "learning_rate": 0.00019999951007847636, - "loss": 46.0, - "step": 13042 - }, - { - "epoch": 0.9972284343521226, - "grad_norm": 0.0012309534940868616, - "learning_rate": 0.00019999951000328607, - "loss": 46.0, - "step": 13043 - }, - { - "epoch": 0.9973048913355124, - "grad_norm": 0.0010584152769297361, - "learning_rate": 0.00019999950992809004, - "loss": 46.0, - "step": 13044 - }, - { - "epoch": 0.9973813483189021, - "grad_norm": 0.0006732388865202665, - "learning_rate": 0.00019999950985288817, - "loss": 46.0, - "step": 13045 - }, - { - "epoch": 0.9974578053022918, - "grad_norm": 0.0024629042018204927, - "learning_rate": 0.0001999995097776806, - "loss": 46.0, - "step": 13046 - }, - { - "epoch": 0.9975342622856815, - "grad_norm": 0.0008411156595684588, - "learning_rate": 0.0001999995097024672, - "loss": 46.0, - "step": 13047 - }, - { - "epoch": 0.9976107192690712, - "grad_norm": 0.006107902619987726, - "learning_rate": 0.00019999950962724808, - "loss": 46.0, - "step": 13048 - }, - { - "epoch": 0.997687176252461, - "grad_norm": 0.0011961539275944233, - "learning_rate": 0.00019999950955202318, - "loss": 46.0, - "step": 13049 - }, - { - "epoch": 0.9977636332358507, - "grad_norm": 0.0010772492969408631, - "learning_rate": 0.0001999995094767925, - "loss": 46.0, - "step": 13050 - }, - { - "epoch": 0.9978400902192404, - "grad_norm": 0.0015731386374682188, - "learning_rate": 0.00019999950940155606, - "loss": 46.0, - "step": 13051 - }, - { - "epoch": 0.9979165472026301, - "grad_norm": 0.0006390456692315638, - "learning_rate": 0.00019999950932631384, - "loss": 46.0, - "step": 13052 - }, - { - "epoch": 0.9979930041860199, - "grad_norm": 0.0008038360392674804, - "learning_rate": 0.00019999950925106584, - "loss": 46.0, - "step": 13053 - }, - { - "epoch": 0.9980694611694095, - "grad_norm": 0.0027667242102324963, - "learning_rate": 0.0001999995091758121, - "loss": 46.0, - "step": 13054 - }, - { - "epoch": 0.9981459181527993, - "grad_norm": 0.0018887505866587162, - "learning_rate": 0.00019999950910055262, - "loss": 46.0, - "step": 13055 - }, - { - "epoch": 0.998222375136189, - "grad_norm": 0.001135754631832242, - "learning_rate": 0.0001999995090252873, - "loss": 46.0, - "step": 13056 - }, - { - "epoch": 0.9982988321195787, - "grad_norm": 0.0006053533870726824, - "learning_rate": 0.00019999950895001624, - "loss": 46.0, - "step": 13057 - }, - { - "epoch": 0.9983752891029685, - "grad_norm": 0.0021726926788687706, - "learning_rate": 0.0001999995088747394, - "loss": 46.0, - "step": 13058 - }, - { - "epoch": 0.9984517460863581, - "grad_norm": 0.0007626822334714234, - "learning_rate": 0.00019999950879945682, - "loss": 46.0, - "step": 13059 - }, - { - "epoch": 0.9985282030697479, - "grad_norm": 0.014977667480707169, - "learning_rate": 0.00019999950872416844, - "loss": 46.0, - "step": 13060 - }, - { - "epoch": 0.9986046600531376, - "grad_norm": 0.0008592166122980416, - "learning_rate": 0.00019999950864887432, - "loss": 46.0, - "step": 13061 - }, - { - "epoch": 0.9986811170365273, - "grad_norm": 0.004858432337641716, - "learning_rate": 0.00019999950857357442, - "loss": 46.0, - "step": 13062 - }, - { - "epoch": 0.998757574019917, - "grad_norm": 0.0010461441706866026, - "learning_rate": 0.00019999950849826871, - "loss": 46.0, - "step": 13063 - }, - { - "epoch": 0.9988340310033068, - "grad_norm": 0.0016068416880443692, - "learning_rate": 0.00019999950842295727, - "loss": 46.0, - "step": 13064 - }, - { - "epoch": 0.9989104879866965, - "grad_norm": 0.0017768868710845709, - "learning_rate": 0.00019999950834764007, - "loss": 46.0, - "step": 13065 - }, - { - "epoch": 0.9989869449700862, - "grad_norm": 0.001869536703452468, - "learning_rate": 0.00019999950827231708, - "loss": 46.0, - "step": 13066 - }, - { - "epoch": 0.9990634019534759, - "grad_norm": 0.0028397131245583296, - "learning_rate": 0.0001999995081969883, - "loss": 46.0, - "step": 13067 - }, - { - "epoch": 0.9991398589368656, - "grad_norm": 0.006175707560032606, - "learning_rate": 0.00019999950812165377, - "loss": 46.0, - "step": 13068 - }, - { - "epoch": 0.9992163159202554, - "grad_norm": 0.0005917723174206913, - "learning_rate": 0.0001999995080463135, - "loss": 46.0, - "step": 13069 - }, - { - "epoch": 0.999292772903645, - "grad_norm": 0.001969987526535988, - "learning_rate": 0.00019999950797096742, - "loss": 46.0, - "step": 13070 - }, - { - "epoch": 0.9993692298870348, - "grad_norm": 0.0016098202904686332, - "learning_rate": 0.00019999950789561561, - "loss": 46.0, - "step": 13071 - }, - { - "epoch": 0.9994456868704246, - "grad_norm": 0.002032308839261532, - "learning_rate": 0.000199999507820258, - "loss": 46.0, - "step": 13072 - }, - { - "epoch": 0.9995221438538142, - "grad_norm": 0.003767122281715274, - "learning_rate": 0.00019999950774489463, - "loss": 46.0, - "step": 13073 - }, - { - "epoch": 0.999598600837204, - "grad_norm": 0.0013361971359699965, - "learning_rate": 0.0001999995076695255, - "loss": 46.0, - "step": 13074 - }, - { - "epoch": 0.9996750578205937, - "grad_norm": 0.0003991764970123768, - "learning_rate": 0.00019999950759415057, - "loss": 46.0, - "step": 13075 - }, - { - "epoch": 0.9997515148039834, - "grad_norm": 0.0013790132943540812, - "learning_rate": 0.0001999995075187699, - "loss": 46.0, - "step": 13076 - }, - { - "epoch": 0.9998279717873731, - "grad_norm": 0.004516522400081158, - "learning_rate": 0.00019999950744338345, - "loss": 46.0, - "step": 13077 - }, - { - "epoch": 0.9999044287707628, - "grad_norm": 0.0006289720768108964, - "learning_rate": 0.00019999950736799123, - "loss": 46.0, - "step": 13078 - }, - { - "epoch": 0.9999808857541526, - "grad_norm": 0.000514974060934037, - "learning_rate": 0.00019999950729259323, - "loss": 46.0, - "step": 13079 - }, - { - "epoch": 0.9999808857541526, - "eval_loss": 11.5, - "eval_runtime": 31.2973, - "eval_samples_per_second": 175.99, - "eval_steps_per_second": 87.995, - "step": 13079 - }, - { - "epoch": 1.0000573427375423, - "grad_norm": 0.0016246383311226964, - "learning_rate": 0.0001999995072171895, - "loss": 46.0, - "step": 13080 - }, - { - "epoch": 1.000133799720932, - "grad_norm": 0.0010487493127584457, - "learning_rate": 0.00019999950714177995, - "loss": 46.0, - "step": 13081 - }, - { - "epoch": 1.0002102567043216, - "grad_norm": 0.015009724535048008, - "learning_rate": 0.00019999950706636467, - "loss": 46.0, - "step": 13082 - }, - { - "epoch": 1.0002867136877114, - "grad_norm": 0.0030437775421887636, - "learning_rate": 0.00019999950699094358, - "loss": 46.0, - "step": 13083 - }, - { - "epoch": 1.0003631706711011, - "grad_norm": 0.003403489477932453, - "learning_rate": 0.00019999950691551677, - "loss": 46.0, - "step": 13084 - }, - { - "epoch": 1.000439627654491, - "grad_norm": 0.0054220883175730705, - "learning_rate": 0.00019999950684008416, - "loss": 46.0, - "step": 13085 - }, - { - "epoch": 1.0005160846378807, - "grad_norm": 0.0017335679149255157, - "learning_rate": 0.00019999950676464578, - "loss": 46.0, - "step": 13086 - }, - { - "epoch": 1.0005925416212704, - "grad_norm": 0.0010313049424439669, - "learning_rate": 0.00019999950668920163, - "loss": 46.0, - "step": 13087 - }, - { - "epoch": 1.00066899860466, - "grad_norm": 0.006911544594913721, - "learning_rate": 0.00019999950661375173, - "loss": 46.0, - "step": 13088 - }, - { - "epoch": 1.0007454555880497, - "grad_norm": 0.002955998294055462, - "learning_rate": 0.00019999950653829606, - "loss": 46.0, - "step": 13089 - }, - { - "epoch": 1.0008219125714395, - "grad_norm": 0.001319556962698698, - "learning_rate": 0.0001999995064628346, - "loss": 46.0, - "step": 13090 - }, - { - "epoch": 1.0008983695548292, - "grad_norm": 0.0020879770163446665, - "learning_rate": 0.00019999950638736736, - "loss": 46.0, - "step": 13091 - }, - { - "epoch": 1.000974826538219, - "grad_norm": 0.00044731638627126813, - "learning_rate": 0.00019999950631189437, - "loss": 46.0, - "step": 13092 - }, - { - "epoch": 1.0010512835216085, - "grad_norm": 0.0010726484470069408, - "learning_rate": 0.00019999950623641563, - "loss": 46.0, - "step": 13093 - }, - { - "epoch": 1.0011277405049983, - "grad_norm": 0.00800649356096983, - "learning_rate": 0.0001999995061609311, - "loss": 46.0, - "step": 13094 - }, - { - "epoch": 1.001204197488388, - "grad_norm": 0.000554498634301126, - "learning_rate": 0.0001999995060854408, - "loss": 46.0, - "step": 13095 - }, - { - "epoch": 1.0012806544717778, - "grad_norm": 0.0008250930113717914, - "learning_rate": 0.00019999950600994471, - "loss": 46.0, - "step": 13096 - }, - { - "epoch": 1.0013571114551676, - "grad_norm": 0.0049845450557768345, - "learning_rate": 0.00019999950593444288, - "loss": 46.0, - "step": 13097 - }, - { - "epoch": 1.0014335684385574, - "grad_norm": 0.0013318548444658518, - "learning_rate": 0.00019999950585893527, - "loss": 46.0, - "step": 13098 - }, - { - "epoch": 1.001510025421947, - "grad_norm": 0.0008032101904973388, - "learning_rate": 0.0001999995057834219, - "loss": 46.0, - "step": 13099 - }, - { - "epoch": 1.0015864824053367, - "grad_norm": 0.0021659035701304674, - "learning_rate": 0.00019999950570790277, - "loss": 46.0, - "step": 13100 - }, - { - "epoch": 1.0016629393887264, - "grad_norm": 0.0013529403368011117, - "learning_rate": 0.00019999950563237784, - "loss": 46.0, - "step": 13101 - }, - { - "epoch": 1.0017393963721162, - "grad_norm": 0.0020392779260873795, - "learning_rate": 0.00019999950555684717, - "loss": 46.0, - "step": 13102 - }, - { - "epoch": 1.001815853355506, - "grad_norm": 0.0012425929307937622, - "learning_rate": 0.00019999950548131072, - "loss": 46.0, - "step": 13103 - }, - { - "epoch": 1.0018923103388955, - "grad_norm": 0.015269949100911617, - "learning_rate": 0.00019999950540576847, - "loss": 46.0, - "step": 13104 - }, - { - "epoch": 1.0019687673222852, - "grad_norm": 0.0005601951852440834, - "learning_rate": 0.0001999995053302205, - "loss": 46.0, - "step": 13105 - }, - { - "epoch": 1.002045224305675, - "grad_norm": 0.0028679859824478626, - "learning_rate": 0.00019999950525466674, - "loss": 46.0, - "step": 13106 - }, - { - "epoch": 1.0021216812890648, - "grad_norm": 0.0007895689341239631, - "learning_rate": 0.0001999995051791072, - "loss": 46.0, - "step": 13107 - }, - { - "epoch": 1.0021981382724545, - "grad_norm": 0.0011641173623502254, - "learning_rate": 0.00019999950510354189, - "loss": 46.0, - "step": 13108 - }, - { - "epoch": 1.0022745952558443, - "grad_norm": 0.0010575753403827548, - "learning_rate": 0.00019999950502797083, - "loss": 46.0, - "step": 13109 - }, - { - "epoch": 1.0023510522392338, - "grad_norm": 0.0012260411167517304, - "learning_rate": 0.000199999504952394, - "loss": 46.0, - "step": 13110 - }, - { - "epoch": 1.0024275092226236, - "grad_norm": 0.0023909483570605516, - "learning_rate": 0.00019999950487681139, - "loss": 46.0, - "step": 13111 - }, - { - "epoch": 1.0025039662060133, - "grad_norm": 0.00043188981362618506, - "learning_rate": 0.000199999504801223, - "loss": 46.0, - "step": 13112 - }, - { - "epoch": 1.002580423189403, - "grad_norm": 0.0014382440131157637, - "learning_rate": 0.00019999950472562885, - "loss": 46.0, - "step": 13113 - }, - { - "epoch": 1.0026568801727929, - "grad_norm": 0.0017269690288230777, - "learning_rate": 0.00019999950465002893, - "loss": 46.0, - "step": 13114 - }, - { - "epoch": 1.0027333371561824, - "grad_norm": 0.002471608342602849, - "learning_rate": 0.00019999950457442325, - "loss": 46.0, - "step": 13115 - }, - { - "epoch": 1.0028097941395722, - "grad_norm": 0.0006464608595706522, - "learning_rate": 0.0001999995044988118, - "loss": 46.0, - "step": 13116 - }, - { - "epoch": 1.002886251122962, - "grad_norm": 0.0016540633514523506, - "learning_rate": 0.00019999950442319456, - "loss": 46.0, - "step": 13117 - }, - { - "epoch": 1.0029627081063517, - "grad_norm": 0.0014798224437981844, - "learning_rate": 0.00019999950434757157, - "loss": 46.0, - "step": 13118 - }, - { - "epoch": 1.0030391650897414, - "grad_norm": 0.002955448580905795, - "learning_rate": 0.0001999995042719428, - "loss": 46.0, - "step": 13119 - }, - { - "epoch": 1.0031156220731312, - "grad_norm": 0.0012454278767108917, - "learning_rate": 0.00019999950419630826, - "loss": 46.0, - "step": 13120 - }, - { - "epoch": 1.0031920790565207, - "grad_norm": 0.0008011875906959176, - "learning_rate": 0.00019999950412066797, - "loss": 46.0, - "step": 13121 - }, - { - "epoch": 1.0032685360399105, - "grad_norm": 0.0013702984433621168, - "learning_rate": 0.00019999950404502191, - "loss": 46.0, - "step": 13122 - }, - { - "epoch": 1.0033449930233003, - "grad_norm": 0.0023872603196650743, - "learning_rate": 0.00019999950396937005, - "loss": 46.0, - "step": 13123 - }, - { - "epoch": 1.00342145000669, - "grad_norm": 0.002514572348445654, - "learning_rate": 0.00019999950389371245, - "loss": 46.0, - "step": 13124 - }, - { - "epoch": 1.0034979069900798, - "grad_norm": 0.002194985980167985, - "learning_rate": 0.00019999950381804907, - "loss": 46.0, - "step": 13125 - }, - { - "epoch": 1.0035743639734693, - "grad_norm": 0.0006772606866434216, - "learning_rate": 0.0001999995037423799, - "loss": 46.0, - "step": 13126 - }, - { - "epoch": 1.003650820956859, - "grad_norm": 0.0011174071114510298, - "learning_rate": 0.00019999950366670496, - "loss": 46.0, - "step": 13127 - }, - { - "epoch": 1.0037272779402489, - "grad_norm": 0.00036141840973868966, - "learning_rate": 0.00019999950359102432, - "loss": 46.0, - "step": 13128 - }, - { - "epoch": 1.0038037349236386, - "grad_norm": 0.001346914446912706, - "learning_rate": 0.00019999950351533782, - "loss": 46.0, - "step": 13129 - }, - { - "epoch": 1.0038801919070284, - "grad_norm": 0.0006093299016356468, - "learning_rate": 0.00019999950343964562, - "loss": 46.0, - "step": 13130 - }, - { - "epoch": 1.0039566488904181, - "grad_norm": 0.0038393668364733458, - "learning_rate": 0.0001999995033639476, - "loss": 46.0, - "step": 13131 - }, - { - "epoch": 1.0040331058738077, - "grad_norm": 0.0022960868664085865, - "learning_rate": 0.00019999950328824384, - "loss": 46.0, - "step": 13132 - }, - { - "epoch": 1.0041095628571974, - "grad_norm": 0.0014905341668054461, - "learning_rate": 0.0001999995032125343, - "loss": 46.0, - "step": 13133 - }, - { - "epoch": 1.0041860198405872, - "grad_norm": 0.0013899640180170536, - "learning_rate": 0.000199999503136819, - "loss": 46.0, - "step": 13134 - }, - { - "epoch": 1.004262476823977, - "grad_norm": 0.0014219989534467459, - "learning_rate": 0.00019999950306109793, - "loss": 46.0, - "step": 13135 - }, - { - "epoch": 1.0043389338073667, - "grad_norm": 0.012358224019408226, - "learning_rate": 0.0001999995029853711, - "loss": 46.0, - "step": 13136 - }, - { - "epoch": 1.0044153907907563, - "grad_norm": 0.0007344927871599793, - "learning_rate": 0.00019999950290963846, - "loss": 46.0, - "step": 13137 - }, - { - "epoch": 1.004491847774146, - "grad_norm": 0.002723429352045059, - "learning_rate": 0.00019999950283390008, - "loss": 46.0, - "step": 13138 - }, - { - "epoch": 1.0045683047575358, - "grad_norm": 0.003247004933655262, - "learning_rate": 0.0001999995027581559, - "loss": 46.0, - "step": 13139 - }, - { - "epoch": 1.0046447617409255, - "grad_norm": 0.0010048558469861746, - "learning_rate": 0.000199999502682406, - "loss": 46.0, - "step": 13140 - }, - { - "epoch": 1.0047212187243153, - "grad_norm": 0.0008429336594417691, - "learning_rate": 0.0001999995026066503, - "loss": 46.0, - "step": 13141 - }, - { - "epoch": 1.004797675707705, - "grad_norm": 0.0013114274479448795, - "learning_rate": 0.00019999950253088886, - "loss": 46.0, - "step": 13142 - }, - { - "epoch": 1.0048741326910946, - "grad_norm": 0.005130447447299957, - "learning_rate": 0.0001999995024551216, - "loss": 46.0, - "step": 13143 - }, - { - "epoch": 1.0049505896744844, - "grad_norm": 0.0170944444835186, - "learning_rate": 0.00019999950237934862, - "loss": 46.0, - "step": 13144 - }, - { - "epoch": 1.0050270466578741, - "grad_norm": 0.0013413139386102557, - "learning_rate": 0.00019999950230356985, - "loss": 46.0, - "step": 13145 - }, - { - "epoch": 1.0051035036412639, - "grad_norm": 0.0005037693190388381, - "learning_rate": 0.00019999950222778531, - "loss": 46.0, - "step": 13146 - }, - { - "epoch": 1.0051799606246536, - "grad_norm": 0.0011129215126857162, - "learning_rate": 0.000199999502151995, - "loss": 46.0, - "step": 13147 - }, - { - "epoch": 1.0052564176080432, - "grad_norm": 0.001147244474850595, - "learning_rate": 0.00019999950207619894, - "loss": 46.0, - "step": 13148 - }, - { - "epoch": 1.005332874591433, - "grad_norm": 0.0036876939702779055, - "learning_rate": 0.00019999950200039708, - "loss": 46.0, - "step": 13149 - }, - { - "epoch": 1.0054093315748227, - "grad_norm": 0.0028461748734116554, - "learning_rate": 0.00019999950192458945, - "loss": 46.0, - "step": 13150 - }, - { - "epoch": 1.0054857885582125, - "grad_norm": 0.0005899151437915862, - "learning_rate": 0.00019999950184877604, - "loss": 46.0, - "step": 13151 - }, - { - "epoch": 1.0055622455416022, - "grad_norm": 0.00482617923989892, - "learning_rate": 0.00019999950177295692, - "loss": 46.0, - "step": 13152 - }, - { - "epoch": 1.0056387025249918, - "grad_norm": 0.003291077446192503, - "learning_rate": 0.00019999950169713197, - "loss": 46.0, - "step": 13153 - }, - { - "epoch": 1.0057151595083815, - "grad_norm": 0.0009646614198572934, - "learning_rate": 0.0001999995016213013, - "loss": 46.0, - "step": 13154 - }, - { - "epoch": 1.0057916164917713, - "grad_norm": 0.0016465745866298676, - "learning_rate": 0.00019999950154546482, - "loss": 46.0, - "step": 13155 - }, - { - "epoch": 1.005868073475161, - "grad_norm": 0.0007563810795545578, - "learning_rate": 0.00019999950146962258, - "loss": 46.0, - "step": 13156 - }, - { - "epoch": 1.0059445304585508, - "grad_norm": 0.0047136046923696995, - "learning_rate": 0.00019999950139377458, - "loss": 46.0, - "step": 13157 - }, - { - "epoch": 1.0060209874419406, - "grad_norm": 0.006196219008415937, - "learning_rate": 0.00019999950131792082, - "loss": 46.0, - "step": 13158 - }, - { - "epoch": 1.0060974444253301, - "grad_norm": 0.008243533782660961, - "learning_rate": 0.00019999950124206128, - "loss": 46.0, - "step": 13159 - }, - { - "epoch": 1.0061739014087199, - "grad_norm": 0.0009325897553935647, - "learning_rate": 0.00019999950116619597, - "loss": 46.0, - "step": 13160 - }, - { - "epoch": 1.0062503583921096, - "grad_norm": 0.0012052697129547596, - "learning_rate": 0.00019999950109032488, - "loss": 46.0, - "step": 13161 - }, - { - "epoch": 1.0063268153754994, - "grad_norm": 0.0006250923033803701, - "learning_rate": 0.00019999950101444802, - "loss": 46.0, - "step": 13162 - }, - { - "epoch": 1.0064032723588892, - "grad_norm": 0.0007843942730687559, - "learning_rate": 0.00019999950093856542, - "loss": 46.0, - "step": 13163 - }, - { - "epoch": 1.0064797293422787, - "grad_norm": 0.0008219070732593536, - "learning_rate": 0.00019999950086267704, - "loss": 46.0, - "step": 13164 - }, - { - "epoch": 1.0065561863256685, - "grad_norm": 0.0008874632185325027, - "learning_rate": 0.00019999950078678286, - "loss": 46.0, - "step": 13165 - }, - { - "epoch": 1.0066326433090582, - "grad_norm": 0.0010848186211660504, - "learning_rate": 0.00019999950071088293, - "loss": 46.0, - "step": 13166 - }, - { - "epoch": 1.006709100292448, - "grad_norm": 0.0026227065827697515, - "learning_rate": 0.00019999950063497726, - "loss": 46.0, - "step": 13167 - }, - { - "epoch": 1.0067855572758377, - "grad_norm": 0.0010908190160989761, - "learning_rate": 0.0001999995005590658, - "loss": 46.0, - "step": 13168 - }, - { - "epoch": 1.0068620142592275, - "grad_norm": 0.005107661709189415, - "learning_rate": 0.00019999950048314852, - "loss": 46.0, - "step": 13169 - }, - { - "epoch": 1.006938471242617, - "grad_norm": 0.0012098741717636585, - "learning_rate": 0.00019999950040722552, - "loss": 46.0, - "step": 13170 - }, - { - "epoch": 1.0070149282260068, - "grad_norm": 0.0022341248113662004, - "learning_rate": 0.00019999950033129676, - "loss": 46.0, - "step": 13171 - }, - { - "epoch": 1.0070913852093966, - "grad_norm": 0.002104635816067457, - "learning_rate": 0.0001999995002553622, - "loss": 46.0, - "step": 13172 - }, - { - "epoch": 1.0071678421927863, - "grad_norm": 0.0008151038200594485, - "learning_rate": 0.00019999950017942188, - "loss": 46.0, - "step": 13173 - }, - { - "epoch": 1.007244299176176, - "grad_norm": 0.0011404327815398574, - "learning_rate": 0.00019999950010347582, - "loss": 46.0, - "step": 13174 - }, - { - "epoch": 1.0073207561595656, - "grad_norm": 0.001159876468591392, - "learning_rate": 0.00019999950002752396, - "loss": 46.0, - "step": 13175 - }, - { - "epoch": 1.0073972131429554, - "grad_norm": 0.0019495253218337893, - "learning_rate": 0.00019999949995156633, - "loss": 46.0, - "step": 13176 - }, - { - "epoch": 1.0074736701263451, - "grad_norm": 0.0019082440994679928, - "learning_rate": 0.00019999949987560295, - "loss": 46.0, - "step": 13177 - }, - { - "epoch": 1.007550127109735, - "grad_norm": 0.0011245633941143751, - "learning_rate": 0.00019999949979963378, - "loss": 46.0, - "step": 13178 - }, - { - "epoch": 1.0076265840931247, - "grad_norm": 0.0007572720642201602, - "learning_rate": 0.00019999949972365885, - "loss": 46.0, - "step": 13179 - }, - { - "epoch": 1.0077030410765144, - "grad_norm": 0.001434324774891138, - "learning_rate": 0.00019999949964767818, - "loss": 46.0, - "step": 13180 - }, - { - "epoch": 1.007779498059904, - "grad_norm": 0.0024929086212068796, - "learning_rate": 0.00019999949957169168, - "loss": 46.0, - "step": 13181 - }, - { - "epoch": 1.0078559550432937, - "grad_norm": 0.0015253580641001463, - "learning_rate": 0.00019999949949569946, - "loss": 46.0, - "step": 13182 - }, - { - "epoch": 1.0079324120266835, - "grad_norm": 0.01165300328284502, - "learning_rate": 0.00019999949941970144, - "loss": 46.0, - "step": 13183 - }, - { - "epoch": 1.0080088690100732, - "grad_norm": 0.0026303925551474094, - "learning_rate": 0.00019999949934369765, - "loss": 46.0, - "step": 13184 - }, - { - "epoch": 1.008085325993463, - "grad_norm": 0.001410634838975966, - "learning_rate": 0.00019999949926768814, - "loss": 46.0, - "step": 13185 - }, - { - "epoch": 1.0081617829768525, - "grad_norm": 0.0008460862445645034, - "learning_rate": 0.0001999994991916728, - "loss": 46.0, - "step": 13186 - }, - { - "epoch": 1.0082382399602423, - "grad_norm": 0.0015290474984794855, - "learning_rate": 0.0001999994991156517, - "loss": 46.0, - "step": 13187 - }, - { - "epoch": 1.008314696943632, - "grad_norm": 0.009030179120600224, - "learning_rate": 0.00019999949903962488, - "loss": 46.0, - "step": 13188 - }, - { - "epoch": 1.0083911539270218, - "grad_norm": 0.0005173471290618181, - "learning_rate": 0.00019999949896359223, - "loss": 46.0, - "step": 13189 - }, - { - "epoch": 1.0084676109104116, - "grad_norm": 0.015396231785416603, - "learning_rate": 0.00019999949888755385, - "loss": 46.0, - "step": 13190 - }, - { - "epoch": 1.0085440678938014, - "grad_norm": 0.001245530555024743, - "learning_rate": 0.00019999949881150967, - "loss": 46.0, - "step": 13191 - }, - { - "epoch": 1.008620524877191, - "grad_norm": 0.0016823558835312724, - "learning_rate": 0.00019999949873545974, - "loss": 46.0, - "step": 13192 - }, - { - "epoch": 1.0086969818605807, - "grad_norm": 0.0013860964681953192, - "learning_rate": 0.00019999949865940405, - "loss": 46.0, - "step": 13193 - }, - { - "epoch": 1.0087734388439704, - "grad_norm": 0.0006061822059564292, - "learning_rate": 0.00019999949858334257, - "loss": 46.0, - "step": 13194 - }, - { - "epoch": 1.0088498958273602, - "grad_norm": 0.0014511196641251445, - "learning_rate": 0.00019999949850727533, - "loss": 46.0, - "step": 13195 - }, - { - "epoch": 1.00892635281075, - "grad_norm": 0.012013912200927734, - "learning_rate": 0.0001999994984312023, - "loss": 46.0, - "step": 13196 - }, - { - "epoch": 1.0090028097941395, - "grad_norm": 0.0006593851139768958, - "learning_rate": 0.00019999949835512352, - "loss": 46.0, - "step": 13197 - }, - { - "epoch": 1.0090792667775292, - "grad_norm": 0.001966786803677678, - "learning_rate": 0.00019999949827903898, - "loss": 46.0, - "step": 13198 - }, - { - "epoch": 1.009155723760919, - "grad_norm": 0.0030168199446052313, - "learning_rate": 0.00019999949820294867, - "loss": 46.0, - "step": 13199 - }, - { - "epoch": 1.0092321807443088, - "grad_norm": 0.0005610954249277711, - "learning_rate": 0.00019999949812685256, - "loss": 46.0, - "step": 13200 - }, - { - "epoch": 1.0093086377276985, - "grad_norm": 0.004524660762399435, - "learning_rate": 0.00019999949805075073, - "loss": 46.0, - "step": 13201 - }, - { - "epoch": 1.0093850947110883, - "grad_norm": 0.0009732217877171934, - "learning_rate": 0.00019999949797464307, - "loss": 46.0, - "step": 13202 - }, - { - "epoch": 1.0094615516944778, - "grad_norm": 0.0009303790284320712, - "learning_rate": 0.0001999994978985297, - "loss": 46.0, - "step": 13203 - }, - { - "epoch": 1.0095380086778676, - "grad_norm": 0.0014858862850815058, - "learning_rate": 0.00019999949782241054, - "loss": 46.0, - "step": 13204 - }, - { - "epoch": 1.0096144656612573, - "grad_norm": 0.013205749914050102, - "learning_rate": 0.0001999994977462856, - "loss": 46.0, - "step": 13205 - }, - { - "epoch": 1.009690922644647, - "grad_norm": 0.0023184148594737053, - "learning_rate": 0.0001999994976701549, - "loss": 46.0, - "step": 13206 - }, - { - "epoch": 1.0097673796280369, - "grad_norm": 0.0007458882755599916, - "learning_rate": 0.00019999949759401842, - "loss": 46.0, - "step": 13207 - }, - { - "epoch": 1.0098438366114264, - "grad_norm": 0.0008488466846756637, - "learning_rate": 0.0001999994975178762, - "loss": 46.0, - "step": 13208 - }, - { - "epoch": 1.0099202935948162, - "grad_norm": 0.0012341069523245096, - "learning_rate": 0.00019999949744172816, - "loss": 46.0, - "step": 13209 - }, - { - "epoch": 1.009996750578206, - "grad_norm": 0.0029805428348481655, - "learning_rate": 0.00019999949736557437, - "loss": 46.0, - "step": 13210 - }, - { - "epoch": 1.0100732075615957, - "grad_norm": 0.000832177815027535, - "learning_rate": 0.00019999949728941483, - "loss": 46.0, - "step": 13211 - }, - { - "epoch": 1.0101496645449854, - "grad_norm": 0.0015722366515547037, - "learning_rate": 0.00019999949721324952, - "loss": 46.0, - "step": 13212 - }, - { - "epoch": 1.0102261215283752, - "grad_norm": 0.00213468074798584, - "learning_rate": 0.0001999994971370784, - "loss": 46.0, - "step": 13213 - }, - { - "epoch": 1.0103025785117647, - "grad_norm": 0.000951695314142853, - "learning_rate": 0.00019999949706090155, - "loss": 46.0, - "step": 13214 - }, - { - "epoch": 1.0103790354951545, - "grad_norm": 0.0011788548436015844, - "learning_rate": 0.00019999949698471894, - "loss": 46.0, - "step": 13215 - }, - { - "epoch": 1.0104554924785443, - "grad_norm": 0.0008633707766421139, - "learning_rate": 0.00019999949690853054, - "loss": 46.0, - "step": 13216 - }, - { - "epoch": 1.010531949461934, - "grad_norm": 0.002745748497545719, - "learning_rate": 0.00019999949683233633, - "loss": 46.0, - "step": 13217 - }, - { - "epoch": 1.0106084064453238, - "grad_norm": 0.0008100612321868539, - "learning_rate": 0.0001999994967561364, - "loss": 46.0, - "step": 13218 - }, - { - "epoch": 1.0106848634287133, - "grad_norm": 0.0012199390912428498, - "learning_rate": 0.00019999949667993068, - "loss": 46.0, - "step": 13219 - }, - { - "epoch": 1.010761320412103, - "grad_norm": 0.0004941097577102482, - "learning_rate": 0.00019999949660371921, - "loss": 46.0, - "step": 13220 - }, - { - "epoch": 1.0108377773954929, - "grad_norm": 0.0009790302719920874, - "learning_rate": 0.000199999496527502, - "loss": 46.0, - "step": 13221 - }, - { - "epoch": 1.0109142343788826, - "grad_norm": 0.0009613094734959304, - "learning_rate": 0.00019999949645127898, - "loss": 46.0, - "step": 13222 - }, - { - "epoch": 1.0109906913622724, - "grad_norm": 0.0006092821713536978, - "learning_rate": 0.0001999994963750502, - "loss": 46.0, - "step": 13223 - }, - { - "epoch": 1.011067148345662, - "grad_norm": 0.0016754708485677838, - "learning_rate": 0.00019999949629881562, - "loss": 46.0, - "step": 13224 - }, - { - "epoch": 1.0111436053290517, - "grad_norm": 0.0004980962257832289, - "learning_rate": 0.0001999994962225753, - "loss": 46.0, - "step": 13225 - }, - { - "epoch": 1.0112200623124414, - "grad_norm": 0.0007395015563815832, - "learning_rate": 0.00019999949614632923, - "loss": 46.0, - "step": 13226 - }, - { - "epoch": 1.0112965192958312, - "grad_norm": 0.0007635345100425184, - "learning_rate": 0.00019999949607007734, - "loss": 46.0, - "step": 13227 - }, - { - "epoch": 1.011372976279221, - "grad_norm": 0.000687832129187882, - "learning_rate": 0.0001999994959938197, - "loss": 46.0, - "step": 13228 - }, - { - "epoch": 1.0114494332626107, - "grad_norm": 0.0007053890149109066, - "learning_rate": 0.0001999994959175563, - "loss": 46.0, - "step": 13229 - }, - { - "epoch": 1.0115258902460003, - "grad_norm": 0.0016934218583628535, - "learning_rate": 0.00019999949584128713, - "loss": 46.0, - "step": 13230 - }, - { - "epoch": 1.01160234722939, - "grad_norm": 0.00045626488281413913, - "learning_rate": 0.0001999994957650122, - "loss": 46.0, - "step": 13231 - }, - { - "epoch": 1.0116788042127798, - "grad_norm": 0.0004521886585280299, - "learning_rate": 0.00019999949568873148, - "loss": 46.0, - "step": 13232 - }, - { - "epoch": 1.0117552611961695, - "grad_norm": 0.0012220500502735376, - "learning_rate": 0.00019999949561244499, - "loss": 46.0, - "step": 13233 - }, - { - "epoch": 1.0118317181795593, - "grad_norm": 0.0003151035634800792, - "learning_rate": 0.00019999949553615277, - "loss": 46.0, - "step": 13234 - }, - { - "epoch": 1.0119081751629488, - "grad_norm": 0.003835513722151518, - "learning_rate": 0.00019999949545985475, - "loss": 46.0, - "step": 13235 - }, - { - "epoch": 1.0119846321463386, - "grad_norm": 0.005143537186086178, - "learning_rate": 0.00019999949538355096, - "loss": 46.0, - "step": 13236 - }, - { - "epoch": 1.0120610891297284, - "grad_norm": 0.0028145890682935715, - "learning_rate": 0.0001999994953072414, - "loss": 46.0, - "step": 13237 - }, - { - "epoch": 1.0121375461131181, - "grad_norm": 0.00553949223831296, - "learning_rate": 0.00019999949523092606, - "loss": 46.0, - "step": 13238 - }, - { - "epoch": 1.0122140030965079, - "grad_norm": 0.0038574826903641224, - "learning_rate": 0.00019999949515460497, - "loss": 46.0, - "step": 13239 - }, - { - "epoch": 1.0122904600798976, - "grad_norm": 0.0019628724548965693, - "learning_rate": 0.00019999949507827812, - "loss": 46.0, - "step": 13240 - }, - { - "epoch": 1.0123669170632872, - "grad_norm": 0.0009287242428399622, - "learning_rate": 0.0001999994950019455, - "loss": 46.0, - "step": 13241 - }, - { - "epoch": 1.012443374046677, - "grad_norm": 0.0003255531773902476, - "learning_rate": 0.00019999949492560708, - "loss": 46.0, - "step": 13242 - }, - { - "epoch": 1.0125198310300667, - "grad_norm": 0.000495244050398469, - "learning_rate": 0.0001999994948492629, - "loss": 46.0, - "step": 13243 - }, - { - "epoch": 1.0125962880134565, - "grad_norm": 0.003493133233860135, - "learning_rate": 0.00019999949477291296, - "loss": 46.0, - "step": 13244 - }, - { - "epoch": 1.0126727449968462, - "grad_norm": 0.0012319928500801325, - "learning_rate": 0.00019999949469655723, - "loss": 46.0, - "step": 13245 - }, - { - "epoch": 1.0127492019802358, - "grad_norm": 0.008995313197374344, - "learning_rate": 0.00019999949462019576, - "loss": 46.0, - "step": 13246 - }, - { - "epoch": 1.0128256589636255, - "grad_norm": 0.0009984509088099003, - "learning_rate": 0.00019999949454382852, - "loss": 46.0, - "step": 13247 - }, - { - "epoch": 1.0129021159470153, - "grad_norm": 0.001639196532778442, - "learning_rate": 0.0001999994944674555, - "loss": 46.0, - "step": 13248 - }, - { - "epoch": 1.012978572930405, - "grad_norm": 0.0011900431709364057, - "learning_rate": 0.0001999994943910767, - "loss": 46.0, - "step": 13249 - }, - { - "epoch": 1.0130550299137948, - "grad_norm": 0.00172789569478482, - "learning_rate": 0.00019999949431469215, - "loss": 46.0, - "step": 13250 - }, - { - "epoch": 1.0131314868971846, - "grad_norm": 0.0007873403374105692, - "learning_rate": 0.0001999994942383018, - "loss": 46.0, - "step": 13251 - }, - { - "epoch": 1.0132079438805741, - "grad_norm": 0.0007505659596063197, - "learning_rate": 0.0001999994941619057, - "loss": 46.0, - "step": 13252 - }, - { - "epoch": 1.0132844008639639, - "grad_norm": 0.0012867376208305359, - "learning_rate": 0.00019999949408550384, - "loss": 46.0, - "step": 13253 - }, - { - "epoch": 1.0133608578473536, - "grad_norm": 0.00035619662958197296, - "learning_rate": 0.0001999994940090962, - "loss": 46.0, - "step": 13254 - }, - { - "epoch": 1.0134373148307434, - "grad_norm": 0.0005880212993361056, - "learning_rate": 0.0001999994939326828, - "loss": 46.0, - "step": 13255 - }, - { - "epoch": 1.0135137718141332, - "grad_norm": 0.0019242139533162117, - "learning_rate": 0.00019999949385626363, - "loss": 46.0, - "step": 13256 - }, - { - "epoch": 1.0135902287975227, - "grad_norm": 0.0005807740380987525, - "learning_rate": 0.00019999949377983865, - "loss": 46.0, - "step": 13257 - }, - { - "epoch": 1.0136666857809125, - "grad_norm": 0.002704866463318467, - "learning_rate": 0.00019999949370340798, - "loss": 46.0, - "step": 13258 - }, - { - "epoch": 1.0137431427643022, - "grad_norm": 0.0008134253439493477, - "learning_rate": 0.00019999949362697149, - "loss": 46.0, - "step": 13259 - }, - { - "epoch": 1.013819599747692, - "grad_norm": 0.0030122932512313128, - "learning_rate": 0.00019999949355052922, - "loss": 46.0, - "step": 13260 - }, - { - "epoch": 1.0138960567310817, - "grad_norm": 0.0007891881396062672, - "learning_rate": 0.0001999994934740812, - "loss": 46.0, - "step": 13261 - }, - { - "epoch": 1.0139725137144715, - "grad_norm": 0.0003403721784707159, - "learning_rate": 0.00019999949339762744, - "loss": 46.0, - "step": 13262 - }, - { - "epoch": 1.014048970697861, - "grad_norm": 0.0008272282429970801, - "learning_rate": 0.00019999949332116785, - "loss": 46.0, - "step": 13263 - }, - { - "epoch": 1.0141254276812508, - "grad_norm": 0.0017194539541378617, - "learning_rate": 0.0001999994932447025, - "loss": 46.0, - "step": 13264 - }, - { - "epoch": 1.0142018846646406, - "grad_norm": 0.0006940565654076636, - "learning_rate": 0.0001999994931682314, - "loss": 46.0, - "step": 13265 - }, - { - "epoch": 1.0142783416480303, - "grad_norm": 0.0007200895925052464, - "learning_rate": 0.00019999949309175454, - "loss": 46.0, - "step": 13266 - }, - { - "epoch": 1.01435479863142, - "grad_norm": 0.003316490910947323, - "learning_rate": 0.0001999994930152719, - "loss": 46.0, - "step": 13267 - }, - { - "epoch": 1.0144312556148096, - "grad_norm": 0.017602525651454926, - "learning_rate": 0.00019999949293878348, - "loss": 46.0, - "step": 13268 - }, - { - "epoch": 1.0145077125981994, - "grad_norm": 0.001880124444141984, - "learning_rate": 0.0001999994928622893, - "loss": 46.0, - "step": 13269 - }, - { - "epoch": 1.0145841695815891, - "grad_norm": 0.0006637490005232394, - "learning_rate": 0.00019999949278578938, - "loss": 46.0, - "step": 13270 - }, - { - "epoch": 1.014660626564979, - "grad_norm": 0.0006572098936885595, - "learning_rate": 0.00019999949270928363, - "loss": 46.0, - "step": 13271 - }, - { - "epoch": 1.0147370835483687, - "grad_norm": 0.0007339658332057297, - "learning_rate": 0.0001999994926327722, - "loss": 46.0, - "step": 13272 - }, - { - "epoch": 1.0148135405317584, - "grad_norm": 0.002393106697127223, - "learning_rate": 0.0001999994925562549, - "loss": 46.0, - "step": 13273 - }, - { - "epoch": 1.014889997515148, - "grad_norm": 0.00019928716938011348, - "learning_rate": 0.00019999949247973187, - "loss": 46.0, - "step": 13274 - }, - { - "epoch": 1.0149664544985377, - "grad_norm": 0.0009872004156932235, - "learning_rate": 0.0001999994924032031, - "loss": 46.0, - "step": 13275 - }, - { - "epoch": 1.0150429114819275, - "grad_norm": 0.003996880725026131, - "learning_rate": 0.00019999949232666852, - "loss": 46.0, - "step": 13276 - }, - { - "epoch": 1.0151193684653173, - "grad_norm": 0.0033545023761689663, - "learning_rate": 0.00019999949225012819, - "loss": 46.0, - "step": 13277 - }, - { - "epoch": 1.015195825448707, - "grad_norm": 0.0006512168911285698, - "learning_rate": 0.00019999949217358208, - "loss": 46.0, - "step": 13278 - }, - { - "epoch": 1.0152722824320966, - "grad_norm": 0.00299880956299603, - "learning_rate": 0.00019999949209703022, - "loss": 46.0, - "step": 13279 - }, - { - "epoch": 1.0153487394154863, - "grad_norm": 0.008543360978364944, - "learning_rate": 0.00019999949202047256, - "loss": 46.0, - "step": 13280 - }, - { - "epoch": 1.015425196398876, - "grad_norm": 0.003519149264320731, - "learning_rate": 0.00019999949194390916, - "loss": 46.0, - "step": 13281 - }, - { - "epoch": 1.0155016533822658, - "grad_norm": 0.0004817867302335799, - "learning_rate": 0.00019999949186733998, - "loss": 46.0, - "step": 13282 - }, - { - "epoch": 1.0155781103656556, - "grad_norm": 0.004248388111591339, - "learning_rate": 0.00019999949179076503, - "loss": 46.0, - "step": 13283 - }, - { - "epoch": 1.0156545673490451, - "grad_norm": 0.0012315039057284594, - "learning_rate": 0.0001999994917141843, - "loss": 46.0, - "step": 13284 - }, - { - "epoch": 1.015731024332435, - "grad_norm": 0.0005654659471474588, - "learning_rate": 0.00019999949163759781, - "loss": 46.0, - "step": 13285 - }, - { - "epoch": 1.0158074813158247, - "grad_norm": 0.0018208416877314448, - "learning_rate": 0.00019999949156100554, - "loss": 46.0, - "step": 13286 - }, - { - "epoch": 1.0158839382992144, - "grad_norm": 0.0006994802388362586, - "learning_rate": 0.00019999949148440753, - "loss": 46.0, - "step": 13287 - }, - { - "epoch": 1.0159603952826042, - "grad_norm": 0.004237137734889984, - "learning_rate": 0.00019999949140780374, - "loss": 46.0, - "step": 13288 - }, - { - "epoch": 1.016036852265994, - "grad_norm": 0.0010248580947518349, - "learning_rate": 0.00019999949133119418, - "loss": 46.0, - "step": 13289 - }, - { - "epoch": 1.0161133092493835, - "grad_norm": 0.01344903465360403, - "learning_rate": 0.00019999949125457884, - "loss": 46.0, - "step": 13290 - }, - { - "epoch": 1.0161897662327732, - "grad_norm": 0.0041988142766058445, - "learning_rate": 0.00019999949117795773, - "loss": 46.0, - "step": 13291 - }, - { - "epoch": 1.016266223216163, - "grad_norm": 0.0003930846869479865, - "learning_rate": 0.00019999949110133085, - "loss": 46.0, - "step": 13292 - }, - { - "epoch": 1.0163426801995528, - "grad_norm": 0.0017059551319107413, - "learning_rate": 0.0001999994910246982, - "loss": 46.0, - "step": 13293 - }, - { - "epoch": 1.0164191371829425, - "grad_norm": 0.0024940853472799063, - "learning_rate": 0.0001999994909480598, - "loss": 46.0, - "step": 13294 - }, - { - "epoch": 1.016495594166332, - "grad_norm": 0.01116472389549017, - "learning_rate": 0.0001999994908714156, - "loss": 46.0, - "step": 13295 - }, - { - "epoch": 1.0165720511497218, - "grad_norm": 0.0014680898748338223, - "learning_rate": 0.00019999949079476566, - "loss": 46.0, - "step": 13296 - }, - { - "epoch": 1.0166485081331116, - "grad_norm": 0.000381072866730392, - "learning_rate": 0.00019999949071810994, - "loss": 46.0, - "step": 13297 - }, - { - "epoch": 1.0167249651165013, - "grad_norm": 0.0006147310486994684, - "learning_rate": 0.00019999949064144844, - "loss": 46.0, - "step": 13298 - }, - { - "epoch": 1.016801422099891, - "grad_norm": 0.0005943061551079154, - "learning_rate": 0.00019999949056478117, - "loss": 46.0, - "step": 13299 - }, - { - "epoch": 1.0168778790832809, - "grad_norm": 0.0005558032426051795, - "learning_rate": 0.00019999949048810813, - "loss": 46.0, - "step": 13300 - }, - { - "epoch": 1.0169543360666704, - "grad_norm": 0.003409595927223563, - "learning_rate": 0.00019999949041142934, - "loss": 46.0, - "step": 13301 - }, - { - "epoch": 1.0170307930500602, - "grad_norm": 0.0012485369807109237, - "learning_rate": 0.00019999949033474478, - "loss": 46.0, - "step": 13302 - }, - { - "epoch": 1.01710725003345, - "grad_norm": 0.0011931437766179442, - "learning_rate": 0.00019999949025805442, - "loss": 46.0, - "step": 13303 - }, - { - "epoch": 1.0171837070168397, - "grad_norm": 0.0007169584278017282, - "learning_rate": 0.00019999949018135833, - "loss": 46.0, - "step": 13304 - }, - { - "epoch": 1.0172601640002294, - "grad_norm": 0.0010900953784585, - "learning_rate": 0.00019999949010465645, - "loss": 46.0, - "step": 13305 - }, - { - "epoch": 1.017336620983619, - "grad_norm": 0.000607198104262352, - "learning_rate": 0.0001999994900279488, - "loss": 46.0, - "step": 13306 - }, - { - "epoch": 1.0174130779670087, - "grad_norm": 0.001631378778256476, - "learning_rate": 0.0001999994899512354, - "loss": 46.0, - "step": 13307 - }, - { - "epoch": 1.0174895349503985, - "grad_norm": 0.0014307759702205658, - "learning_rate": 0.0001999994898745162, - "loss": 46.0, - "step": 13308 - }, - { - "epoch": 1.0175659919337883, - "grad_norm": 0.0016589186852797866, - "learning_rate": 0.00019999948979779124, - "loss": 46.0, - "step": 13309 - }, - { - "epoch": 1.017642448917178, - "grad_norm": 0.0007353859255090356, - "learning_rate": 0.00019999948972106052, - "loss": 46.0, - "step": 13310 - }, - { - "epoch": 1.0177189059005678, - "grad_norm": 0.0008505401201546192, - "learning_rate": 0.00019999948964432402, - "loss": 46.0, - "step": 13311 - }, - { - "epoch": 1.0177953628839573, - "grad_norm": 0.0011174505343660712, - "learning_rate": 0.00019999948956758175, - "loss": 46.0, - "step": 13312 - }, - { - "epoch": 1.017871819867347, - "grad_norm": 0.0049111987464129925, - "learning_rate": 0.00019999948949083374, - "loss": 46.0, - "step": 13313 - }, - { - "epoch": 1.0179482768507369, - "grad_norm": 0.0017285100184381008, - "learning_rate": 0.00019999948941407992, - "loss": 46.0, - "step": 13314 - }, - { - "epoch": 1.0180247338341266, - "grad_norm": 0.004865770693868399, - "learning_rate": 0.00019999948933732036, - "loss": 46.0, - "step": 13315 - }, - { - "epoch": 1.0181011908175164, - "grad_norm": 0.0007946233381517231, - "learning_rate": 0.000199999489260555, - "loss": 46.0, - "step": 13316 - }, - { - "epoch": 1.018177647800906, - "grad_norm": 0.0014810017310082912, - "learning_rate": 0.0001999994891837839, - "loss": 46.0, - "step": 13317 - }, - { - "epoch": 1.0182541047842957, - "grad_norm": 0.000969009764958173, - "learning_rate": 0.000199999489107007, - "loss": 46.0, - "step": 13318 - }, - { - "epoch": 1.0183305617676854, - "grad_norm": 0.0005195743287913501, - "learning_rate": 0.00019999948903022435, - "loss": 46.0, - "step": 13319 - }, - { - "epoch": 1.0184070187510752, - "grad_norm": 0.0011100205592811108, - "learning_rate": 0.00019999948895343595, - "loss": 46.0, - "step": 13320 - }, - { - "epoch": 1.018483475734465, - "grad_norm": 0.000961338693741709, - "learning_rate": 0.00019999948887664175, - "loss": 46.0, - "step": 13321 - }, - { - "epoch": 1.0185599327178547, - "grad_norm": 0.0011881001992151141, - "learning_rate": 0.0001999994887998418, - "loss": 46.0, - "step": 13322 - }, - { - "epoch": 1.0186363897012443, - "grad_norm": 0.00022212143812794238, - "learning_rate": 0.00019999948872303607, - "loss": 46.0, - "step": 13323 - }, - { - "epoch": 1.018712846684634, - "grad_norm": 0.000817084452137351, - "learning_rate": 0.00019999948864622458, - "loss": 46.0, - "step": 13324 - }, - { - "epoch": 1.0187893036680238, - "grad_norm": 0.0029416445177048445, - "learning_rate": 0.0001999994885694073, - "loss": 46.0, - "step": 13325 - }, - { - "epoch": 1.0188657606514135, - "grad_norm": 0.0013460393529385328, - "learning_rate": 0.00019999948849258427, - "loss": 46.0, - "step": 13326 - }, - { - "epoch": 1.0189422176348033, - "grad_norm": 0.001525513012893498, - "learning_rate": 0.00019999948841575545, - "loss": 46.0, - "step": 13327 - }, - { - "epoch": 1.0190186746181928, - "grad_norm": 0.0045488192699849606, - "learning_rate": 0.00019999948833892092, - "loss": 46.0, - "step": 13328 - }, - { - "epoch": 1.0190951316015826, - "grad_norm": 0.0013691463973373175, - "learning_rate": 0.00019999948826208055, - "loss": 46.0, - "step": 13329 - }, - { - "epoch": 1.0191715885849724, - "grad_norm": 0.0034634231124073267, - "learning_rate": 0.00019999948818523442, - "loss": 46.0, - "step": 13330 - }, - { - "epoch": 1.0192480455683621, - "grad_norm": 0.00534837506711483, - "learning_rate": 0.00019999948810838256, - "loss": 46.0, - "step": 13331 - }, - { - "epoch": 1.0193245025517519, - "grad_norm": 0.0017972958739846945, - "learning_rate": 0.00019999948803152488, - "loss": 46.0, - "step": 13332 - }, - { - "epoch": 1.0194009595351416, - "grad_norm": 0.0009605927625671029, - "learning_rate": 0.00019999948795466148, - "loss": 46.0, - "step": 13333 - }, - { - "epoch": 1.0194774165185312, - "grad_norm": 0.0020247125066816807, - "learning_rate": 0.00019999948787779228, - "loss": 46.0, - "step": 13334 - }, - { - "epoch": 1.019553873501921, - "grad_norm": 0.0022912139538675547, - "learning_rate": 0.00019999948780091733, - "loss": 46.0, - "step": 13335 - }, - { - "epoch": 1.0196303304853107, - "grad_norm": 0.003716285340487957, - "learning_rate": 0.00019999948772403658, - "loss": 46.0, - "step": 13336 - }, - { - "epoch": 1.0197067874687005, - "grad_norm": 0.0010237148962914944, - "learning_rate": 0.00019999948764715008, - "loss": 46.0, - "step": 13337 - }, - { - "epoch": 1.0197832444520902, - "grad_norm": 0.0006742149125784636, - "learning_rate": 0.00019999948757025782, - "loss": 46.0, - "step": 13338 - }, - { - "epoch": 1.0198597014354798, - "grad_norm": 0.0009522874606773257, - "learning_rate": 0.00019999948749335977, - "loss": 46.0, - "step": 13339 - }, - { - "epoch": 1.0199361584188695, - "grad_norm": 0.0011698524467647076, - "learning_rate": 0.00019999948741645599, - "loss": 46.0, - "step": 13340 - }, - { - "epoch": 1.0200126154022593, - "grad_norm": 0.0035062176175415516, - "learning_rate": 0.0001999994873395464, - "loss": 46.0, - "step": 13341 - }, - { - "epoch": 1.020089072385649, - "grad_norm": 0.002914591459557414, - "learning_rate": 0.00019999948726263104, - "loss": 46.0, - "step": 13342 - }, - { - "epoch": 1.0201655293690388, - "grad_norm": 0.019517388194799423, - "learning_rate": 0.00019999948718570995, - "loss": 46.0, - "step": 13343 - }, - { - "epoch": 1.0202419863524286, - "grad_norm": 0.0014743616338819265, - "learning_rate": 0.00019999948710878304, - "loss": 46.0, - "step": 13344 - }, - { - "epoch": 1.0203184433358181, - "grad_norm": 0.002358692465350032, - "learning_rate": 0.00019999948703185042, - "loss": 46.0, - "step": 13345 - }, - { - "epoch": 1.0203949003192079, - "grad_norm": 0.0009612751891836524, - "learning_rate": 0.00019999948695491196, - "loss": 46.0, - "step": 13346 - }, - { - "epoch": 1.0204713573025976, - "grad_norm": 0.0011470431927591562, - "learning_rate": 0.00019999948687796776, - "loss": 46.0, - "step": 13347 - }, - { - "epoch": 1.0205478142859874, - "grad_norm": 0.0013494361191987991, - "learning_rate": 0.0001999994868010178, - "loss": 46.0, - "step": 13348 - }, - { - "epoch": 1.0206242712693772, - "grad_norm": 0.000935558695346117, - "learning_rate": 0.0001999994867240621, - "loss": 46.0, - "step": 13349 - }, - { - "epoch": 1.0207007282527667, - "grad_norm": 0.0006529902457259595, - "learning_rate": 0.00019999948664710057, - "loss": 46.0, - "step": 13350 - }, - { - "epoch": 1.0207771852361565, - "grad_norm": 0.0035267509520053864, - "learning_rate": 0.0001999994865701333, - "loss": 46.0, - "step": 13351 - }, - { - "epoch": 1.0208536422195462, - "grad_norm": 0.0013048126129433513, - "learning_rate": 0.00019999948649316026, - "loss": 46.0, - "step": 13352 - }, - { - "epoch": 1.020930099202936, - "grad_norm": 0.013325194828212261, - "learning_rate": 0.00019999948641618147, - "loss": 46.0, - "step": 13353 - }, - { - "epoch": 1.0210065561863257, - "grad_norm": 0.000999304698780179, - "learning_rate": 0.00019999948633919688, - "loss": 46.0, - "step": 13354 - }, - { - "epoch": 1.0210830131697153, - "grad_norm": 0.0035504517145454884, - "learning_rate": 0.00019999948626220652, - "loss": 46.0, - "step": 13355 - }, - { - "epoch": 1.021159470153105, - "grad_norm": 0.00038704698090441525, - "learning_rate": 0.0001999994861852104, - "loss": 46.0, - "step": 13356 - }, - { - "epoch": 1.0212359271364948, - "grad_norm": 0.0003472507814876735, - "learning_rate": 0.00019999948610820853, - "loss": 46.0, - "step": 13357 - }, - { - "epoch": 1.0213123841198846, - "grad_norm": 0.0015108231455087662, - "learning_rate": 0.00019999948603120088, - "loss": 46.0, - "step": 13358 - }, - { - "epoch": 1.0213888411032743, - "grad_norm": 0.0012180168414488435, - "learning_rate": 0.00019999948595418742, - "loss": 46.0, - "step": 13359 - }, - { - "epoch": 1.021465298086664, - "grad_norm": 0.004298059735447168, - "learning_rate": 0.00019999948587716825, - "loss": 46.0, - "step": 13360 - }, - { - "epoch": 1.0215417550700536, - "grad_norm": 0.0006079935701563954, - "learning_rate": 0.00019999948580014327, - "loss": 46.0, - "step": 13361 - }, - { - "epoch": 1.0216182120534434, - "grad_norm": 0.009199530817568302, - "learning_rate": 0.00019999948572311252, - "loss": 46.0, - "step": 13362 - }, - { - "epoch": 1.0216946690368331, - "grad_norm": 0.0019859843887388706, - "learning_rate": 0.00019999948564607603, - "loss": 46.0, - "step": 13363 - }, - { - "epoch": 1.021771126020223, - "grad_norm": 0.0011677610455080867, - "learning_rate": 0.00019999948556903376, - "loss": 46.0, - "step": 13364 - }, - { - "epoch": 1.0218475830036127, - "grad_norm": 0.0007705016760155559, - "learning_rate": 0.00019999948549198572, - "loss": 46.0, - "step": 13365 - }, - { - "epoch": 1.0219240399870022, - "grad_norm": 0.002353726653382182, - "learning_rate": 0.0001999994854149319, - "loss": 46.0, - "step": 13366 - }, - { - "epoch": 1.022000496970392, - "grad_norm": 0.000638998462818563, - "learning_rate": 0.00019999948533787234, - "loss": 46.0, - "step": 13367 - }, - { - "epoch": 1.0220769539537817, - "grad_norm": 0.0008855049381963909, - "learning_rate": 0.00019999948526080695, - "loss": 46.0, - "step": 13368 - }, - { - "epoch": 1.0221534109371715, - "grad_norm": 0.0015744090778753161, - "learning_rate": 0.00019999948518373585, - "loss": 46.0, - "step": 13369 - }, - { - "epoch": 1.0222298679205613, - "grad_norm": 0.0008924994617700577, - "learning_rate": 0.00019999948510665897, - "loss": 46.0, - "step": 13370 - }, - { - "epoch": 1.022306324903951, - "grad_norm": 0.0006267475546337664, - "learning_rate": 0.00019999948502957628, - "loss": 46.0, - "step": 13371 - }, - { - "epoch": 1.0223827818873406, - "grad_norm": 0.0006603386136703193, - "learning_rate": 0.00019999948495248786, - "loss": 46.0, - "step": 13372 - }, - { - "epoch": 1.0224592388707303, - "grad_norm": 0.0005316564347594976, - "learning_rate": 0.00019999948487539365, - "loss": 46.0, - "step": 13373 - }, - { - "epoch": 1.02253569585412, - "grad_norm": 0.0010533621534705162, - "learning_rate": 0.00019999948479829368, - "loss": 46.0, - "step": 13374 - }, - { - "epoch": 1.0226121528375098, - "grad_norm": 0.005168898031115532, - "learning_rate": 0.00019999948472118796, - "loss": 46.0, - "step": 13375 - }, - { - "epoch": 1.0226886098208996, - "grad_norm": 0.0019177552312612534, - "learning_rate": 0.00019999948464407646, - "loss": 46.0, - "step": 13376 - }, - { - "epoch": 1.0227650668042891, - "grad_norm": 0.0007928262930363417, - "learning_rate": 0.00019999948456695917, - "loss": 46.0, - "step": 13377 - }, - { - "epoch": 1.022841523787679, - "grad_norm": 0.001959329703822732, - "learning_rate": 0.00019999948448983613, - "loss": 46.0, - "step": 13378 - }, - { - "epoch": 1.0229179807710687, - "grad_norm": 0.0010179789969697595, - "learning_rate": 0.00019999948441270731, - "loss": 46.0, - "step": 13379 - }, - { - "epoch": 1.0229944377544584, - "grad_norm": 0.0007430230616591871, - "learning_rate": 0.00019999948433557273, - "loss": 46.0, - "step": 13380 - }, - { - "epoch": 1.0230708947378482, - "grad_norm": 0.00038856040919199586, - "learning_rate": 0.00019999948425843237, - "loss": 46.0, - "step": 13381 - }, - { - "epoch": 1.023147351721238, - "grad_norm": 0.002925224369391799, - "learning_rate": 0.00019999948418128626, - "loss": 46.0, - "step": 13382 - }, - { - "epoch": 1.0232238087046275, - "grad_norm": 0.008724424056708813, - "learning_rate": 0.00019999948410413435, - "loss": 46.0, - "step": 13383 - }, - { - "epoch": 1.0233002656880172, - "grad_norm": 0.0011514967773109674, - "learning_rate": 0.0001999994840269767, - "loss": 46.0, - "step": 13384 - }, - { - "epoch": 1.023376722671407, - "grad_norm": 0.0021016153041273355, - "learning_rate": 0.00019999948394981327, - "loss": 46.0, - "step": 13385 - }, - { - "epoch": 1.0234531796547968, - "grad_norm": 0.0003936931607313454, - "learning_rate": 0.00019999948387264407, - "loss": 46.0, - "step": 13386 - }, - { - "epoch": 1.0235296366381865, - "grad_norm": 0.0008404591935686767, - "learning_rate": 0.00019999948379546912, - "loss": 46.0, - "step": 13387 - }, - { - "epoch": 1.023606093621576, - "grad_norm": 0.005145175848156214, - "learning_rate": 0.00019999948371828837, - "loss": 46.0, - "step": 13388 - }, - { - "epoch": 1.0236825506049658, - "grad_norm": 0.0010951557196676731, - "learning_rate": 0.00019999948364110182, - "loss": 46.0, - "step": 13389 - }, - { - "epoch": 1.0237590075883556, - "grad_norm": 0.0028131543658673763, - "learning_rate": 0.00019999948356390958, - "loss": 46.0, - "step": 13390 - }, - { - "epoch": 1.0238354645717453, - "grad_norm": 0.0021544003393501043, - "learning_rate": 0.00019999948348671152, - "loss": 46.0, - "step": 13391 - }, - { - "epoch": 1.023911921555135, - "grad_norm": 0.0024513881653547287, - "learning_rate": 0.0001999994834095077, - "loss": 46.0, - "step": 13392 - }, - { - "epoch": 1.0239883785385249, - "grad_norm": 0.0012055312981829047, - "learning_rate": 0.0001999994833322981, - "loss": 46.0, - "step": 13393 - }, - { - "epoch": 1.0240648355219144, - "grad_norm": 0.0018854618538171053, - "learning_rate": 0.00019999948325508275, - "loss": 46.0, - "step": 13394 - }, - { - "epoch": 1.0241412925053042, - "grad_norm": 0.0010407872032374144, - "learning_rate": 0.00019999948317786165, - "loss": 46.0, - "step": 13395 - }, - { - "epoch": 1.024217749488694, - "grad_norm": 0.005921361036598682, - "learning_rate": 0.00019999948310063474, - "loss": 46.0, - "step": 13396 - }, - { - "epoch": 1.0242942064720837, - "grad_norm": 0.0005181310698390007, - "learning_rate": 0.00019999948302340209, - "loss": 46.0, - "step": 13397 - }, - { - "epoch": 1.0243706634554735, - "grad_norm": 0.000706980936229229, - "learning_rate": 0.00019999948294616363, - "loss": 46.0, - "step": 13398 - }, - { - "epoch": 1.024447120438863, - "grad_norm": 0.0009316926589235663, - "learning_rate": 0.00019999948286891943, - "loss": 46.0, - "step": 13399 - }, - { - "epoch": 1.0245235774222528, - "grad_norm": 0.001712973229587078, - "learning_rate": 0.00019999948279166946, - "loss": 46.0, - "step": 13400 - }, - { - "epoch": 1.0246000344056425, - "grad_norm": 0.0007629567408002913, - "learning_rate": 0.00019999948271441374, - "loss": 46.0, - "step": 13401 - }, - { - "epoch": 1.0246764913890323, - "grad_norm": 0.000793912447988987, - "learning_rate": 0.00019999948263715222, - "loss": 46.0, - "step": 13402 - }, - { - "epoch": 1.024752948372422, - "grad_norm": 0.0046837893314659595, - "learning_rate": 0.00019999948255988492, - "loss": 46.0, - "step": 13403 - }, - { - "epoch": 1.0248294053558118, - "grad_norm": 0.0014919465174898505, - "learning_rate": 0.00019999948248261185, - "loss": 46.0, - "step": 13404 - }, - { - "epoch": 1.0249058623392013, - "grad_norm": 0.00041987377335317433, - "learning_rate": 0.00019999948240533307, - "loss": 46.0, - "step": 13405 - }, - { - "epoch": 1.024982319322591, - "grad_norm": 0.0010832062689587474, - "learning_rate": 0.00019999948232804845, - "loss": 46.0, - "step": 13406 - }, - { - "epoch": 1.0250587763059809, - "grad_norm": 0.00044721554149873555, - "learning_rate": 0.0001999994822507581, - "loss": 46.0, - "step": 13407 - }, - { - "epoch": 1.0251352332893706, - "grad_norm": 0.0005603706231340766, - "learning_rate": 0.00019999948217346196, - "loss": 46.0, - "step": 13408 - }, - { - "epoch": 1.0252116902727604, - "grad_norm": 0.002323447959497571, - "learning_rate": 0.00019999948209616005, - "loss": 46.0, - "step": 13409 - }, - { - "epoch": 1.02528814725615, - "grad_norm": 0.000664880673866719, - "learning_rate": 0.0001999994820188524, - "loss": 46.0, - "step": 13410 - }, - { - "epoch": 1.0253646042395397, - "grad_norm": 0.002324435394257307, - "learning_rate": 0.00019999948194153897, - "loss": 46.0, - "step": 13411 - }, - { - "epoch": 1.0254410612229294, - "grad_norm": 0.0006417757831513882, - "learning_rate": 0.00019999948186421977, - "loss": 46.0, - "step": 13412 - }, - { - "epoch": 1.0255175182063192, - "grad_norm": 0.0009849769994616508, - "learning_rate": 0.00019999948178689477, - "loss": 46.0, - "step": 13413 - }, - { - "epoch": 1.025593975189709, - "grad_norm": 0.008826961740851402, - "learning_rate": 0.00019999948170956405, - "loss": 46.0, - "step": 13414 - }, - { - "epoch": 1.0256704321730985, - "grad_norm": 0.00046254077460616827, - "learning_rate": 0.00019999948163222753, - "loss": 46.0, - "step": 13415 - }, - { - "epoch": 1.0257468891564883, - "grad_norm": 0.0015090582892298698, - "learning_rate": 0.00019999948155488526, - "loss": 46.0, - "step": 13416 - }, - { - "epoch": 1.025823346139878, - "grad_norm": 0.0014890835154801607, - "learning_rate": 0.00019999948147753717, - "loss": 46.0, - "step": 13417 - }, - { - "epoch": 1.0258998031232678, - "grad_norm": 0.0030610349494963884, - "learning_rate": 0.00019999948140018335, - "loss": 46.0, - "step": 13418 - }, - { - "epoch": 1.0259762601066575, - "grad_norm": 0.0007048352854326367, - "learning_rate": 0.00019999948132282377, - "loss": 46.0, - "step": 13419 - }, - { - "epoch": 1.0260527170900473, - "grad_norm": 0.0008247590158134699, - "learning_rate": 0.0001999994812454584, - "loss": 46.0, - "step": 13420 - }, - { - "epoch": 1.0261291740734368, - "grad_norm": 0.001291550463065505, - "learning_rate": 0.0001999994811680873, - "loss": 46.0, - "step": 13421 - }, - { - "epoch": 1.0262056310568266, - "grad_norm": 0.008750289678573608, - "learning_rate": 0.0001999994810907104, - "loss": 46.0, - "step": 13422 - }, - { - "epoch": 1.0262820880402164, - "grad_norm": 0.00046774253132753074, - "learning_rate": 0.00019999948101332771, - "loss": 46.0, - "step": 13423 - }, - { - "epoch": 1.0263585450236061, - "grad_norm": 0.0005651692044921219, - "learning_rate": 0.0001999994809359393, - "loss": 46.0, - "step": 13424 - }, - { - "epoch": 1.026435002006996, - "grad_norm": 0.0030300442595034838, - "learning_rate": 0.0001999994808585451, - "loss": 46.0, - "step": 13425 - }, - { - "epoch": 1.0265114589903854, - "grad_norm": 0.0016867343802005053, - "learning_rate": 0.0001999994807811451, - "loss": 46.0, - "step": 13426 - }, - { - "epoch": 1.0265879159737752, - "grad_norm": 0.0009941098978742957, - "learning_rate": 0.00019999948070373934, - "loss": 46.0, - "step": 13427 - }, - { - "epoch": 1.026664372957165, - "grad_norm": 0.006308149080723524, - "learning_rate": 0.0001999994806263278, - "loss": 46.0, - "step": 13428 - }, - { - "epoch": 1.0267408299405547, - "grad_norm": 0.008601488545536995, - "learning_rate": 0.00019999948054891053, - "loss": 46.0, - "step": 13429 - }, - { - "epoch": 1.0268172869239445, - "grad_norm": 0.000377606600522995, - "learning_rate": 0.0001999994804714875, - "loss": 46.0, - "step": 13430 - }, - { - "epoch": 1.0268937439073342, - "grad_norm": 0.0009216060279868543, - "learning_rate": 0.00019999948039405865, - "loss": 46.0, - "step": 13431 - }, - { - "epoch": 1.0269702008907238, - "grad_norm": 0.004727725405246019, - "learning_rate": 0.00019999948031662406, - "loss": 46.0, - "step": 13432 - }, - { - "epoch": 1.0270466578741135, - "grad_norm": 0.002066485583782196, - "learning_rate": 0.0001999994802391837, - "loss": 46.0, - "step": 13433 - }, - { - "epoch": 1.0271231148575033, - "grad_norm": 0.0011169894132763147, - "learning_rate": 0.00019999948016173757, - "loss": 46.0, - "step": 13434 - }, - { - "epoch": 1.027199571840893, - "grad_norm": 0.002267979783937335, - "learning_rate": 0.00019999948008428566, - "loss": 46.0, - "step": 13435 - }, - { - "epoch": 1.0272760288242828, - "grad_norm": 0.0012321372050791979, - "learning_rate": 0.00019999948000682798, - "loss": 46.0, - "step": 13436 - }, - { - "epoch": 1.0273524858076724, - "grad_norm": 0.0006722132093273103, - "learning_rate": 0.00019999947992936455, - "loss": 46.0, - "step": 13437 - }, - { - "epoch": 1.0274289427910621, - "grad_norm": 0.0029457705095410347, - "learning_rate": 0.00019999947985189533, - "loss": 46.0, - "step": 13438 - }, - { - "epoch": 1.0275053997744519, - "grad_norm": 0.000724061974324286, - "learning_rate": 0.00019999947977442035, - "loss": 46.0, - "step": 13439 - }, - { - "epoch": 1.0275818567578416, - "grad_norm": 0.0013748782221227884, - "learning_rate": 0.0001999994796969396, - "loss": 46.0, - "step": 13440 - }, - { - "epoch": 1.0276583137412314, - "grad_norm": 0.00034714952926151454, - "learning_rate": 0.0001999994796194531, - "loss": 46.0, - "step": 13441 - }, - { - "epoch": 1.0277347707246212, - "grad_norm": 0.0020628017373383045, - "learning_rate": 0.0001999994795419608, - "loss": 46.0, - "step": 13442 - }, - { - "epoch": 1.0278112277080107, - "grad_norm": 0.00254381587728858, - "learning_rate": 0.0001999994794644627, - "loss": 46.0, - "step": 13443 - }, - { - "epoch": 1.0278876846914005, - "grad_norm": 0.0014569807099178433, - "learning_rate": 0.0001999994793869589, - "loss": 46.0, - "step": 13444 - }, - { - "epoch": 1.0279641416747902, - "grad_norm": 0.0006399656413123012, - "learning_rate": 0.0001999994793094493, - "loss": 46.0, - "step": 13445 - }, - { - "epoch": 1.02804059865818, - "grad_norm": 0.005551642272621393, - "learning_rate": 0.00019999947923193394, - "loss": 46.0, - "step": 13446 - }, - { - "epoch": 1.0281170556415697, - "grad_norm": 0.0009210671414621174, - "learning_rate": 0.0001999994791544128, - "loss": 46.0, - "step": 13447 - }, - { - "epoch": 1.0281935126249593, - "grad_norm": 0.0012172829592600465, - "learning_rate": 0.0001999994790768859, - "loss": 46.0, - "step": 13448 - }, - { - "epoch": 1.028269969608349, - "grad_norm": 0.0009926899801939726, - "learning_rate": 0.0001999994789993532, - "loss": 46.0, - "step": 13449 - }, - { - "epoch": 1.0283464265917388, - "grad_norm": 0.008660213090479374, - "learning_rate": 0.0001999994789218148, - "loss": 46.0, - "step": 13450 - }, - { - "epoch": 1.0284228835751286, - "grad_norm": 0.0016274573281407356, - "learning_rate": 0.00019999947884427054, - "loss": 46.0, - "step": 13451 - }, - { - "epoch": 1.0284993405585183, - "grad_norm": 0.0007088819984346628, - "learning_rate": 0.00019999947876672057, - "loss": 46.0, - "step": 13452 - }, - { - "epoch": 1.028575797541908, - "grad_norm": 0.0013249666662886739, - "learning_rate": 0.00019999947868916483, - "loss": 46.0, - "step": 13453 - }, - { - "epoch": 1.0286522545252976, - "grad_norm": 0.0014633532846346498, - "learning_rate": 0.0001999994786116033, - "loss": 46.0, - "step": 13454 - }, - { - "epoch": 1.0287287115086874, - "grad_norm": 0.0028745331801474094, - "learning_rate": 0.000199999478534036, - "loss": 46.0, - "step": 13455 - }, - { - "epoch": 1.0288051684920771, - "grad_norm": 0.0043944101780653, - "learning_rate": 0.00019999947845646295, - "loss": 46.0, - "step": 13456 - }, - { - "epoch": 1.028881625475467, - "grad_norm": 0.0012361662229523063, - "learning_rate": 0.0001999994783788841, - "loss": 46.0, - "step": 13457 - }, - { - "epoch": 1.0289580824588567, - "grad_norm": 0.016332441940903664, - "learning_rate": 0.00019999947830129952, - "loss": 46.0, - "step": 13458 - }, - { - "epoch": 1.0290345394422462, - "grad_norm": 0.003959211986511946, - "learning_rate": 0.00019999947822370913, - "loss": 46.0, - "step": 13459 - }, - { - "epoch": 1.029110996425636, - "grad_norm": 0.003940400201827288, - "learning_rate": 0.00019999947814611303, - "loss": 46.0, - "step": 13460 - }, - { - "epoch": 1.0291874534090257, - "grad_norm": 0.0007510966970585287, - "learning_rate": 0.0001999994780685111, - "loss": 46.0, - "step": 13461 - }, - { - "epoch": 1.0292639103924155, - "grad_norm": 0.001795685151591897, - "learning_rate": 0.00019999947799090344, - "loss": 46.0, - "step": 13462 - }, - { - "epoch": 1.0293403673758053, - "grad_norm": 0.0020094038918614388, - "learning_rate": 0.00019999947791328996, - "loss": 46.0, - "step": 13463 - }, - { - "epoch": 1.029416824359195, - "grad_norm": 0.0009090380626730621, - "learning_rate": 0.00019999947783567076, - "loss": 46.0, - "step": 13464 - }, - { - "epoch": 1.0294932813425846, - "grad_norm": 0.0010834747226908803, - "learning_rate": 0.00019999947775804577, - "loss": 46.0, - "step": 13465 - }, - { - "epoch": 1.0295697383259743, - "grad_norm": 0.00221562129445374, - "learning_rate": 0.00019999947768041502, - "loss": 46.0, - "step": 13466 - }, - { - "epoch": 1.029646195309364, - "grad_norm": 0.0038680091965943575, - "learning_rate": 0.00019999947760277847, - "loss": 46.0, - "step": 13467 - }, - { - "epoch": 1.0297226522927538, - "grad_norm": 0.0021474480163306, - "learning_rate": 0.00019999947752513618, - "loss": 46.0, - "step": 13468 - }, - { - "epoch": 1.0297991092761436, - "grad_norm": 0.001359705813229084, - "learning_rate": 0.00019999947744748811, - "loss": 46.0, - "step": 13469 - }, - { - "epoch": 1.0298755662595331, - "grad_norm": 0.002832723781466484, - "learning_rate": 0.0001999994773698343, - "loss": 46.0, - "step": 13470 - }, - { - "epoch": 1.029952023242923, - "grad_norm": 0.001258998061530292, - "learning_rate": 0.00019999947729217472, - "loss": 46.0, - "step": 13471 - }, - { - "epoch": 1.0300284802263127, - "grad_norm": 0.0026318260934203863, - "learning_rate": 0.00019999947721450933, - "loss": 46.0, - "step": 13472 - }, - { - "epoch": 1.0301049372097024, - "grad_norm": 0.00022138512576930225, - "learning_rate": 0.00019999947713683817, - "loss": 46.0, - "step": 13473 - }, - { - "epoch": 1.0301813941930922, - "grad_norm": 0.0032568189781159163, - "learning_rate": 0.0001999994770591613, - "loss": 46.0, - "step": 13474 - }, - { - "epoch": 1.030257851176482, - "grad_norm": 0.0010018192697316408, - "learning_rate": 0.0001999994769814786, - "loss": 46.0, - "step": 13475 - }, - { - "epoch": 1.0303343081598715, - "grad_norm": 0.00046532382839359343, - "learning_rate": 0.00019999947690379016, - "loss": 46.0, - "step": 13476 - }, - { - "epoch": 1.0304107651432612, - "grad_norm": 0.000701883458532393, - "learning_rate": 0.00019999947682609594, - "loss": 46.0, - "step": 13477 - }, - { - "epoch": 1.030487222126651, - "grad_norm": 0.006138428580015898, - "learning_rate": 0.00019999947674839594, - "loss": 46.0, - "step": 13478 - }, - { - "epoch": 1.0305636791100408, - "grad_norm": 0.002016335027292371, - "learning_rate": 0.0001999994766706902, - "loss": 46.0, - "step": 13479 - }, - { - "epoch": 1.0306401360934305, - "grad_norm": 0.0007726931944489479, - "learning_rate": 0.00019999947659297864, - "loss": 46.0, - "step": 13480 - }, - { - "epoch": 1.03071659307682, - "grad_norm": 0.002231534570455551, - "learning_rate": 0.00019999947651526135, - "loss": 46.0, - "step": 13481 - }, - { - "epoch": 1.0307930500602098, - "grad_norm": 0.0018946636701002717, - "learning_rate": 0.0001999994764375383, - "loss": 46.0, - "step": 13482 - }, - { - "epoch": 1.0308695070435996, - "grad_norm": 0.00237390142865479, - "learning_rate": 0.00019999947635980947, - "loss": 46.0, - "step": 13483 - }, - { - "epoch": 1.0309459640269893, - "grad_norm": 0.0006835960666649044, - "learning_rate": 0.00019999947628207486, - "loss": 46.0, - "step": 13484 - }, - { - "epoch": 1.031022421010379, - "grad_norm": 0.0016942026559263468, - "learning_rate": 0.00019999947620433448, - "loss": 46.0, - "step": 13485 - }, - { - "epoch": 1.0310988779937686, - "grad_norm": 0.002672442002221942, - "learning_rate": 0.00019999947612658834, - "loss": 46.0, - "step": 13486 - }, - { - "epoch": 1.0311753349771584, - "grad_norm": 0.0019218804081901908, - "learning_rate": 0.0001999994760488364, - "loss": 46.0, - "step": 13487 - }, - { - "epoch": 1.0312517919605482, - "grad_norm": 0.0005724010989069939, - "learning_rate": 0.00019999947597107873, - "loss": 46.0, - "step": 13488 - }, - { - "epoch": 1.031328248943938, - "grad_norm": 0.01681784726679325, - "learning_rate": 0.00019999947589331528, - "loss": 46.0, - "step": 13489 - }, - { - "epoch": 1.0314047059273277, - "grad_norm": 0.0030642254278063774, - "learning_rate": 0.00019999947581554608, - "loss": 46.0, - "step": 13490 - }, - { - "epoch": 1.0314811629107175, - "grad_norm": 0.0012512184912338853, - "learning_rate": 0.00019999947573777106, - "loss": 46.0, - "step": 13491 - }, - { - "epoch": 1.031557619894107, - "grad_norm": 0.001450938987545669, - "learning_rate": 0.0001999994756599903, - "loss": 46.0, - "step": 13492 - }, - { - "epoch": 1.0316340768774968, - "grad_norm": 0.001800187979824841, - "learning_rate": 0.0001999994755822038, - "loss": 46.0, - "step": 13493 - }, - { - "epoch": 1.0317105338608865, - "grad_norm": 0.0008096079109236598, - "learning_rate": 0.0001999994755044115, - "loss": 46.0, - "step": 13494 - }, - { - "epoch": 1.0317869908442763, - "grad_norm": 0.006515221670269966, - "learning_rate": 0.0001999994754266134, - "loss": 46.0, - "step": 13495 - }, - { - "epoch": 1.031863447827666, - "grad_norm": 0.0010154324118047953, - "learning_rate": 0.0001999994753488096, - "loss": 46.0, - "step": 13496 - }, - { - "epoch": 1.0319399048110556, - "grad_norm": 0.010628794319927692, - "learning_rate": 0.00019999947527099999, - "loss": 46.0, - "step": 13497 - }, - { - "epoch": 1.0320163617944453, - "grad_norm": 0.005647844634950161, - "learning_rate": 0.00019999947519318457, - "loss": 46.0, - "step": 13498 - }, - { - "epoch": 1.032092818777835, - "grad_norm": 0.001832620589993894, - "learning_rate": 0.00019999947511536347, - "loss": 46.0, - "step": 13499 - }, - { - "epoch": 1.0321692757612249, - "grad_norm": 0.0016396817518398166, - "learning_rate": 0.00019999947503753654, - "loss": 46.0, - "step": 13500 - }, - { - "epoch": 1.0322457327446146, - "grad_norm": 0.00513737415894866, - "learning_rate": 0.00019999947495970383, - "loss": 46.0, - "step": 13501 - }, - { - "epoch": 1.0323221897280044, - "grad_norm": 0.0011204457841813564, - "learning_rate": 0.00019999947488186538, - "loss": 46.0, - "step": 13502 - }, - { - "epoch": 1.032398646711394, - "grad_norm": 0.0009296801290474832, - "learning_rate": 0.00019999947480402118, - "loss": 46.0, - "step": 13503 - }, - { - "epoch": 1.0324751036947837, - "grad_norm": 0.0006482386379502714, - "learning_rate": 0.00019999947472617118, - "loss": 46.0, - "step": 13504 - }, - { - "epoch": 1.0325515606781734, - "grad_norm": 0.004908979404717684, - "learning_rate": 0.0001999994746483154, - "loss": 46.0, - "step": 13505 - }, - { - "epoch": 1.0326280176615632, - "grad_norm": 0.0009123889612965286, - "learning_rate": 0.00019999947457045386, - "loss": 46.0, - "step": 13506 - }, - { - "epoch": 1.032704474644953, - "grad_norm": 0.0045522041618824005, - "learning_rate": 0.00019999947449258657, - "loss": 46.0, - "step": 13507 - }, - { - "epoch": 1.0327809316283425, - "grad_norm": 0.0005408361903391778, - "learning_rate": 0.0001999994744147135, - "loss": 46.0, - "step": 13508 - }, - { - "epoch": 1.0328573886117323, - "grad_norm": 0.000514801824465394, - "learning_rate": 0.00019999947433683467, - "loss": 46.0, - "step": 13509 - }, - { - "epoch": 1.032933845595122, - "grad_norm": 0.0005614846013486385, - "learning_rate": 0.00019999947425895006, - "loss": 46.0, - "step": 13510 - }, - { - "epoch": 1.0330103025785118, - "grad_norm": 0.0024432719219475985, - "learning_rate": 0.00019999947418105967, - "loss": 46.0, - "step": 13511 - }, - { - "epoch": 1.0330867595619015, - "grad_norm": 0.0012248132843524218, - "learning_rate": 0.00019999947410316354, - "loss": 46.0, - "step": 13512 - }, - { - "epoch": 1.0331632165452913, - "grad_norm": 0.0020208240021020174, - "learning_rate": 0.00019999947402526164, - "loss": 46.0, - "step": 13513 - }, - { - "epoch": 1.0332396735286808, - "grad_norm": 0.0003774506039917469, - "learning_rate": 0.00019999947394735393, - "loss": 46.0, - "step": 13514 - }, - { - "epoch": 1.0333161305120706, - "grad_norm": 0.0009529279777780175, - "learning_rate": 0.00019999947386944048, - "loss": 46.0, - "step": 13515 - }, - { - "epoch": 1.0333925874954604, - "grad_norm": 0.001439249492250383, - "learning_rate": 0.00019999947379152123, - "loss": 46.0, - "step": 13516 - }, - { - "epoch": 1.0334690444788501, - "grad_norm": 0.0006790959741920233, - "learning_rate": 0.00019999947371359623, - "loss": 46.0, - "step": 13517 - }, - { - "epoch": 1.03354550146224, - "grad_norm": 0.0010327092604711652, - "learning_rate": 0.00019999947363566549, - "loss": 46.0, - "step": 13518 - }, - { - "epoch": 1.0336219584456294, - "grad_norm": 0.0011972899083048105, - "learning_rate": 0.00019999947355772894, - "loss": 46.0, - "step": 13519 - }, - { - "epoch": 1.0336984154290192, - "grad_norm": 0.004191542509943247, - "learning_rate": 0.00019999947347978662, - "loss": 46.0, - "step": 13520 - }, - { - "epoch": 1.033774872412409, - "grad_norm": 0.0007471860735677183, - "learning_rate": 0.00019999947340183858, - "loss": 46.0, - "step": 13521 - }, - { - "epoch": 1.0338513293957987, - "grad_norm": 0.0002676327130757272, - "learning_rate": 0.00019999947332388472, - "loss": 46.0, - "step": 13522 - }, - { - "epoch": 1.0339277863791885, - "grad_norm": 0.0010614984203130007, - "learning_rate": 0.0001999994732459251, - "loss": 46.0, - "step": 13523 - }, - { - "epoch": 1.0340042433625782, - "grad_norm": 0.0027158628217875957, - "learning_rate": 0.00019999947316795972, - "loss": 46.0, - "step": 13524 - }, - { - "epoch": 1.0340807003459678, - "grad_norm": 0.0010642081033438444, - "learning_rate": 0.00019999947308998856, - "loss": 46.0, - "step": 13525 - }, - { - "epoch": 1.0341571573293575, - "grad_norm": 0.0011307941749691963, - "learning_rate": 0.00019999947301201166, - "loss": 46.0, - "step": 13526 - }, - { - "epoch": 1.0342336143127473, - "grad_norm": 0.0034480879548937082, - "learning_rate": 0.00019999947293402896, - "loss": 46.0, - "step": 13527 - }, - { - "epoch": 1.034310071296137, - "grad_norm": 0.005681936163455248, - "learning_rate": 0.0001999994728560405, - "loss": 46.0, - "step": 13528 - }, - { - "epoch": 1.0343865282795268, - "grad_norm": 0.001056214328855276, - "learning_rate": 0.00019999947277804628, - "loss": 46.0, - "step": 13529 - }, - { - "epoch": 1.0344629852629164, - "grad_norm": 0.006874502636492252, - "learning_rate": 0.00019999947270004625, - "loss": 46.0, - "step": 13530 - }, - { - "epoch": 1.0345394422463061, - "grad_norm": 0.0016898735193535686, - "learning_rate": 0.0001999994726220405, - "loss": 46.0, - "step": 13531 - }, - { - "epoch": 1.0346158992296959, - "grad_norm": 0.0019312641816213727, - "learning_rate": 0.00019999947254402897, - "loss": 46.0, - "step": 13532 - }, - { - "epoch": 1.0346923562130856, - "grad_norm": 0.000637902005109936, - "learning_rate": 0.00019999947246601165, - "loss": 46.0, - "step": 13533 - }, - { - "epoch": 1.0347688131964754, - "grad_norm": 0.00584821030497551, - "learning_rate": 0.00019999947238798858, - "loss": 46.0, - "step": 13534 - }, - { - "epoch": 1.034845270179865, - "grad_norm": 0.0010185374412685633, - "learning_rate": 0.00019999947230995975, - "loss": 46.0, - "step": 13535 - }, - { - "epoch": 1.0349217271632547, - "grad_norm": 0.0011475110659375787, - "learning_rate": 0.00019999947223192513, - "loss": 46.0, - "step": 13536 - }, - { - "epoch": 1.0349981841466445, - "grad_norm": 0.0015137637965381145, - "learning_rate": 0.00019999947215388475, - "loss": 46.0, - "step": 13537 - }, - { - "epoch": 1.0350746411300342, - "grad_norm": 0.002390458947047591, - "learning_rate": 0.0001999994720758386, - "loss": 46.0, - "step": 13538 - }, - { - "epoch": 1.035151098113424, - "grad_norm": 0.0007472530705854297, - "learning_rate": 0.0001999994719977867, - "loss": 46.0, - "step": 13539 - }, - { - "epoch": 1.0352275550968137, - "grad_norm": 0.0023285613860934973, - "learning_rate": 0.00019999947191972898, - "loss": 46.0, - "step": 13540 - }, - { - "epoch": 1.0353040120802033, - "grad_norm": 0.0013641807017847896, - "learning_rate": 0.00019999947184166553, - "loss": 46.0, - "step": 13541 - }, - { - "epoch": 1.035380469063593, - "grad_norm": 0.0007180684478953481, - "learning_rate": 0.00019999947176359628, - "loss": 46.0, - "step": 13542 - }, - { - "epoch": 1.0354569260469828, - "grad_norm": 0.0014429963193833828, - "learning_rate": 0.00019999947168552126, - "loss": 46.0, - "step": 13543 - }, - { - "epoch": 1.0355333830303726, - "grad_norm": 0.0010568283032625914, - "learning_rate": 0.0001999994716074405, - "loss": 46.0, - "step": 13544 - }, - { - "epoch": 1.0356098400137623, - "grad_norm": 0.0009685680270195007, - "learning_rate": 0.00019999947152935397, - "loss": 46.0, - "step": 13545 - }, - { - "epoch": 1.0356862969971519, - "grad_norm": 0.0007094335742294788, - "learning_rate": 0.00019999947145126165, - "loss": 46.0, - "step": 13546 - }, - { - "epoch": 1.0357627539805416, - "grad_norm": 0.0008215317502617836, - "learning_rate": 0.00019999947137316356, - "loss": 46.0, - "step": 13547 - }, - { - "epoch": 1.0358392109639314, - "grad_norm": 0.0006712996400892735, - "learning_rate": 0.00019999947129505975, - "loss": 46.0, - "step": 13548 - }, - { - "epoch": 1.0359156679473212, - "grad_norm": 0.0013593286275863647, - "learning_rate": 0.00019999947121695014, - "loss": 46.0, - "step": 13549 - }, - { - "epoch": 1.035992124930711, - "grad_norm": 0.001702674780972302, - "learning_rate": 0.00019999947113883473, - "loss": 46.0, - "step": 13550 - }, - { - "epoch": 1.0360685819141007, - "grad_norm": 0.008379071019589901, - "learning_rate": 0.00019999947106071357, - "loss": 46.0, - "step": 13551 - }, - { - "epoch": 1.0361450388974902, - "grad_norm": 0.006596278864890337, - "learning_rate": 0.00019999947098258664, - "loss": 46.0, - "step": 13552 - }, - { - "epoch": 1.03622149588088, - "grad_norm": 0.0026221596635878086, - "learning_rate": 0.00019999947090445396, - "loss": 46.0, - "step": 13553 - }, - { - "epoch": 1.0362979528642697, - "grad_norm": 0.0005150894867256284, - "learning_rate": 0.00019999947082631548, - "loss": 46.0, - "step": 13554 - }, - { - "epoch": 1.0363744098476595, - "grad_norm": 0.0010322367306798697, - "learning_rate": 0.00019999947074817126, - "loss": 46.0, - "step": 13555 - }, - { - "epoch": 1.0364508668310493, - "grad_norm": 0.001431052340194583, - "learning_rate": 0.00019999947067002126, - "loss": 46.0, - "step": 13556 - }, - { - "epoch": 1.0365273238144388, - "grad_norm": 0.002187419915571809, - "learning_rate": 0.0001999994705918655, - "loss": 46.0, - "step": 13557 - }, - { - "epoch": 1.0366037807978286, - "grad_norm": 0.00047102628741413355, - "learning_rate": 0.00019999947051370395, - "loss": 46.0, - "step": 13558 - }, - { - "epoch": 1.0366802377812183, - "grad_norm": 0.002510851714760065, - "learning_rate": 0.00019999947043553663, - "loss": 46.0, - "step": 13559 - }, - { - "epoch": 1.036756694764608, - "grad_norm": 0.0009445094037801027, - "learning_rate": 0.00019999947035736354, - "loss": 46.0, - "step": 13560 - }, - { - "epoch": 1.0368331517479978, - "grad_norm": 0.0013167369179427624, - "learning_rate": 0.00019999947027918467, - "loss": 46.0, - "step": 13561 - }, - { - "epoch": 1.0369096087313876, - "grad_norm": 0.0020650813821703196, - "learning_rate": 0.00019999947020100006, - "loss": 46.0, - "step": 13562 - }, - { - "epoch": 1.0369860657147771, - "grad_norm": 0.0011621693847700953, - "learning_rate": 0.00019999947012280968, - "loss": 46.0, - "step": 13563 - }, - { - "epoch": 1.037062522698167, - "grad_norm": 0.0004166075377725065, - "learning_rate": 0.00019999947004461352, - "loss": 46.0, - "step": 13564 - }, - { - "epoch": 1.0371389796815567, - "grad_norm": 0.0010638912208378315, - "learning_rate": 0.0001999994699664116, - "loss": 46.0, - "step": 13565 - }, - { - "epoch": 1.0372154366649464, - "grad_norm": 0.0005540067795664072, - "learning_rate": 0.00019999946988820392, - "loss": 46.0, - "step": 13566 - }, - { - "epoch": 1.0372918936483362, - "grad_norm": 0.0022389935329556465, - "learning_rate": 0.00019999946980999044, - "loss": 46.0, - "step": 13567 - }, - { - "epoch": 1.0373683506317257, - "grad_norm": 0.00045577631681226194, - "learning_rate": 0.0001999994697317712, - "loss": 46.0, - "step": 13568 - }, - { - "epoch": 1.0374448076151155, - "grad_norm": 0.0014733450952917337, - "learning_rate": 0.0001999994696535462, - "loss": 46.0, - "step": 13569 - }, - { - "epoch": 1.0375212645985052, - "grad_norm": 0.0006990301189944148, - "learning_rate": 0.00019999946957531542, - "loss": 46.0, - "step": 13570 - }, - { - "epoch": 1.037597721581895, - "grad_norm": 0.0008845215197652578, - "learning_rate": 0.00019999946949707885, - "loss": 46.0, - "step": 13571 - }, - { - "epoch": 1.0376741785652848, - "grad_norm": 0.006686788517981768, - "learning_rate": 0.00019999946941883656, - "loss": 46.0, - "step": 13572 - }, - { - "epoch": 1.0377506355486745, - "grad_norm": 0.00149017374496907, - "learning_rate": 0.00019999946934058847, - "loss": 46.0, - "step": 13573 - }, - { - "epoch": 1.037827092532064, - "grad_norm": 0.0012148062232881784, - "learning_rate": 0.0001999994692623346, - "loss": 46.0, - "step": 13574 - }, - { - "epoch": 1.0379035495154538, - "grad_norm": 0.0005045654252171516, - "learning_rate": 0.000199999469184075, - "loss": 46.0, - "step": 13575 - }, - { - "epoch": 1.0379800064988436, - "grad_norm": 0.001179555431008339, - "learning_rate": 0.0001999994691058096, - "loss": 46.0, - "step": 13576 - }, - { - "epoch": 1.0380564634822333, - "grad_norm": 0.00018153031123802066, - "learning_rate": 0.00019999946902753845, - "loss": 46.0, - "step": 13577 - }, - { - "epoch": 1.038132920465623, - "grad_norm": 0.0008283884380944073, - "learning_rate": 0.00019999946894926152, - "loss": 46.0, - "step": 13578 - }, - { - "epoch": 1.0382093774490126, - "grad_norm": 0.0009526748908683658, - "learning_rate": 0.0001999994688709788, - "loss": 46.0, - "step": 13579 - }, - { - "epoch": 1.0382858344324024, - "grad_norm": 0.0004294952377676964, - "learning_rate": 0.00019999946879269034, - "loss": 46.0, - "step": 13580 - }, - { - "epoch": 1.0383622914157922, - "grad_norm": 0.0006923009641468525, - "learning_rate": 0.0001999994687143961, - "loss": 46.0, - "step": 13581 - }, - { - "epoch": 1.038438748399182, - "grad_norm": 0.0028068027459084988, - "learning_rate": 0.0001999994686360961, - "loss": 46.0, - "step": 13582 - }, - { - "epoch": 1.0385152053825717, - "grad_norm": 0.0006975121796131134, - "learning_rate": 0.0001999994685577903, - "loss": 46.0, - "step": 13583 - }, - { - "epoch": 1.0385916623659615, - "grad_norm": 0.001403622212819755, - "learning_rate": 0.00019999946847947876, - "loss": 46.0, - "step": 13584 - }, - { - "epoch": 1.038668119349351, - "grad_norm": 0.0008122682920657098, - "learning_rate": 0.00019999946840116147, - "loss": 46.0, - "step": 13585 - }, - { - "epoch": 1.0387445763327408, - "grad_norm": 0.000543102971278131, - "learning_rate": 0.00019999946832283838, - "loss": 46.0, - "step": 13586 - }, - { - "epoch": 1.0388210333161305, - "grad_norm": 0.011759413406252861, - "learning_rate": 0.0001999994682445095, - "loss": 46.0, - "step": 13587 - }, - { - "epoch": 1.0388974902995203, - "grad_norm": 0.0027144672349095345, - "learning_rate": 0.0001999994681661749, - "loss": 46.0, - "step": 13588 - }, - { - "epoch": 1.03897394728291, - "grad_norm": 0.006971284281462431, - "learning_rate": 0.0001999994680878345, - "loss": 46.0, - "step": 13589 - }, - { - "epoch": 1.0390504042662996, - "grad_norm": 0.004064250737428665, - "learning_rate": 0.00019999946800948834, - "loss": 46.0, - "step": 13590 - }, - { - "epoch": 1.0391268612496893, - "grad_norm": 0.001461939886212349, - "learning_rate": 0.00019999946793113638, - "loss": 46.0, - "step": 13591 - }, - { - "epoch": 1.039203318233079, - "grad_norm": 0.0006457848357968032, - "learning_rate": 0.0001999994678527787, - "loss": 46.0, - "step": 13592 - }, - { - "epoch": 1.0392797752164689, - "grad_norm": 0.0010811688844114542, - "learning_rate": 0.0001999994677744152, - "loss": 46.0, - "step": 13593 - }, - { - "epoch": 1.0393562321998586, - "grad_norm": 0.006942572072148323, - "learning_rate": 0.00019999946769604597, - "loss": 46.0, - "step": 13594 - }, - { - "epoch": 1.0394326891832484, - "grad_norm": 0.000904568936675787, - "learning_rate": 0.00019999946761767098, - "loss": 46.0, - "step": 13595 - }, - { - "epoch": 1.039509146166638, - "grad_norm": 0.0005813430761918426, - "learning_rate": 0.00019999946753929018, - "loss": 46.0, - "step": 13596 - }, - { - "epoch": 1.0395856031500277, - "grad_norm": 0.001077859429642558, - "learning_rate": 0.00019999946746090364, - "loss": 46.0, - "step": 13597 - }, - { - "epoch": 1.0396620601334174, - "grad_norm": 0.0039033321663737297, - "learning_rate": 0.00019999946738251132, - "loss": 46.0, - "step": 13598 - }, - { - "epoch": 1.0397385171168072, - "grad_norm": 0.001438381033949554, - "learning_rate": 0.00019999946730411323, - "loss": 46.0, - "step": 13599 - }, - { - "epoch": 1.039814974100197, - "grad_norm": 0.011141834780573845, - "learning_rate": 0.00019999946722570937, - "loss": 46.0, - "step": 13600 - }, - { - "epoch": 1.0398914310835865, - "grad_norm": 0.021015118807554245, - "learning_rate": 0.00019999946714729973, - "loss": 46.0, - "step": 13601 - }, - { - "epoch": 1.0399678880669763, - "grad_norm": 0.00244560232385993, - "learning_rate": 0.00019999946706888432, - "loss": 46.0, - "step": 13602 - }, - { - "epoch": 1.040044345050366, - "grad_norm": 0.0006874041282571852, - "learning_rate": 0.0001999994669904632, - "loss": 46.0, - "step": 13603 - }, - { - "epoch": 1.0401208020337558, - "grad_norm": 0.0007708551711402833, - "learning_rate": 0.00019999946691203624, - "loss": 46.0, - "step": 13604 - }, - { - "epoch": 1.0401972590171455, - "grad_norm": 0.0008431808673776686, - "learning_rate": 0.00019999946683360353, - "loss": 46.0, - "step": 13605 - }, - { - "epoch": 1.0402737160005353, - "grad_norm": 0.0008816320332698524, - "learning_rate": 0.00019999946675516506, - "loss": 46.0, - "step": 13606 - }, - { - "epoch": 1.0403501729839248, - "grad_norm": 0.0035544056445360184, - "learning_rate": 0.00019999946667672083, - "loss": 46.0, - "step": 13607 - }, - { - "epoch": 1.0404266299673146, - "grad_norm": 0.0009899329161271453, - "learning_rate": 0.0001999994665982708, - "loss": 46.0, - "step": 13608 - }, - { - "epoch": 1.0405030869507044, - "grad_norm": 0.0028448542580008507, - "learning_rate": 0.00019999946651981501, - "loss": 46.0, - "step": 13609 - }, - { - "epoch": 1.0405795439340941, - "grad_norm": 0.0007092030718922615, - "learning_rate": 0.00019999946644135344, - "loss": 46.0, - "step": 13610 - }, - { - "epoch": 1.040656000917484, - "grad_norm": 0.002470811130478978, - "learning_rate": 0.00019999946636288613, - "loss": 46.0, - "step": 13611 - }, - { - "epoch": 1.0407324579008734, - "grad_norm": 0.003848015097901225, - "learning_rate": 0.00019999946628441304, - "loss": 46.0, - "step": 13612 - }, - { - "epoch": 1.0408089148842632, - "grad_norm": 0.0022235563956201077, - "learning_rate": 0.0001999994662059342, - "loss": 46.0, - "step": 13613 - }, - { - "epoch": 1.040885371867653, - "grad_norm": 0.0016022929921746254, - "learning_rate": 0.00019999946612744957, - "loss": 46.0, - "step": 13614 - }, - { - "epoch": 1.0409618288510427, - "grad_norm": 0.0015884211752563715, - "learning_rate": 0.00019999946604895916, - "loss": 46.0, - "step": 13615 - }, - { - "epoch": 1.0410382858344325, - "grad_norm": 0.0031271979678422213, - "learning_rate": 0.000199999465970463, - "loss": 46.0, - "step": 13616 - }, - { - "epoch": 1.0411147428178222, - "grad_norm": 0.004668507259339094, - "learning_rate": 0.00019999946589196104, - "loss": 46.0, - "step": 13617 - }, - { - "epoch": 1.0411911998012118, - "grad_norm": 0.0006736156647093594, - "learning_rate": 0.00019999946581345334, - "loss": 46.0, - "step": 13618 - }, - { - "epoch": 1.0412676567846015, - "grad_norm": 0.0006623711669817567, - "learning_rate": 0.00019999946573493987, - "loss": 46.0, - "step": 13619 - }, - { - "epoch": 1.0413441137679913, - "grad_norm": 0.0010033831931650639, - "learning_rate": 0.00019999946565642062, - "loss": 46.0, - "step": 13620 - }, - { - "epoch": 1.041420570751381, - "grad_norm": 0.00031629044678993523, - "learning_rate": 0.00019999946557789562, - "loss": 46.0, - "step": 13621 - }, - { - "epoch": 1.0414970277347708, - "grad_norm": 0.0015300210798159242, - "learning_rate": 0.00019999946549936482, - "loss": 46.0, - "step": 13622 - }, - { - "epoch": 1.0415734847181604, - "grad_norm": 0.002132994355633855, - "learning_rate": 0.00019999946542082828, - "loss": 46.0, - "step": 13623 - }, - { - "epoch": 1.0416499417015501, - "grad_norm": 0.002365228720009327, - "learning_rate": 0.00019999946534228594, - "loss": 46.0, - "step": 13624 - }, - { - "epoch": 1.0417263986849399, - "grad_norm": 0.004612469580024481, - "learning_rate": 0.00019999946526373785, - "loss": 46.0, - "step": 13625 - }, - { - "epoch": 1.0418028556683296, - "grad_norm": 0.00740779796615243, - "learning_rate": 0.000199999465185184, - "loss": 46.0, - "step": 13626 - }, - { - "epoch": 1.0418793126517194, - "grad_norm": 0.0011563485022634268, - "learning_rate": 0.00019999946510662435, - "loss": 46.0, - "step": 13627 - }, - { - "epoch": 1.041955769635109, - "grad_norm": 0.0008899495005607605, - "learning_rate": 0.00019999946502805894, - "loss": 46.0, - "step": 13628 - }, - { - "epoch": 1.0420322266184987, - "grad_norm": 0.003935758024454117, - "learning_rate": 0.00019999946494948779, - "loss": 46.0, - "step": 13629 - }, - { - "epoch": 1.0421086836018885, - "grad_norm": 0.0012259908253327012, - "learning_rate": 0.00019999946487091083, - "loss": 46.0, - "step": 13630 - }, - { - "epoch": 1.0421851405852782, - "grad_norm": 0.0007822164334356785, - "learning_rate": 0.00019999946479232813, - "loss": 46.0, - "step": 13631 - }, - { - "epoch": 1.042261597568668, - "grad_norm": 0.0005279415054246783, - "learning_rate": 0.00019999946471373965, - "loss": 46.0, - "step": 13632 - }, - { - "epoch": 1.0423380545520577, - "grad_norm": 0.0008386989939026535, - "learning_rate": 0.0001999994646351454, - "loss": 46.0, - "step": 13633 - }, - { - "epoch": 1.0424145115354473, - "grad_norm": 0.0028680155519396067, - "learning_rate": 0.00019999946455654538, - "loss": 46.0, - "step": 13634 - }, - { - "epoch": 1.042490968518837, - "grad_norm": 0.0035199858248233795, - "learning_rate": 0.00019999946447793958, - "loss": 46.0, - "step": 13635 - }, - { - "epoch": 1.0425674255022268, - "grad_norm": 0.000766197161283344, - "learning_rate": 0.00019999946439932804, - "loss": 46.0, - "step": 13636 - }, - { - "epoch": 1.0426438824856166, - "grad_norm": 0.00039460984407924116, - "learning_rate": 0.00019999946432071073, - "loss": 46.0, - "step": 13637 - }, - { - "epoch": 1.0427203394690063, - "grad_norm": 0.0006706234416924417, - "learning_rate": 0.0001999994642420876, - "loss": 46.0, - "step": 13638 - }, - { - "epoch": 1.0427967964523959, - "grad_norm": 0.00034934974974021316, - "learning_rate": 0.00019999946416345875, - "loss": 46.0, - "step": 13639 - }, - { - "epoch": 1.0428732534357856, - "grad_norm": 0.0050964984111487865, - "learning_rate": 0.00019999946408482414, - "loss": 46.0, - "step": 13640 - }, - { - "epoch": 1.0429497104191754, - "grad_norm": 0.001377904205583036, - "learning_rate": 0.0001999994640061837, - "loss": 46.0, - "step": 13641 - }, - { - "epoch": 1.0430261674025652, - "grad_norm": 0.002273187041282654, - "learning_rate": 0.00019999946392753755, - "loss": 46.0, - "step": 13642 - }, - { - "epoch": 1.043102624385955, - "grad_norm": 0.006097042933106422, - "learning_rate": 0.0001999994638488856, - "loss": 46.0, - "step": 13643 - }, - { - "epoch": 1.0431790813693447, - "grad_norm": 0.002980379853397608, - "learning_rate": 0.00019999946377022786, - "loss": 46.0, - "step": 13644 - }, - { - "epoch": 1.0432555383527342, - "grad_norm": 0.002494513988494873, - "learning_rate": 0.0001999994636915644, - "loss": 46.0, - "step": 13645 - }, - { - "epoch": 1.043331995336124, - "grad_norm": 0.0011300815967842937, - "learning_rate": 0.00019999946361289514, - "loss": 46.0, - "step": 13646 - }, - { - "epoch": 1.0434084523195137, - "grad_norm": 0.0005182846216484904, - "learning_rate": 0.00019999946353422012, - "loss": 46.0, - "step": 13647 - }, - { - "epoch": 1.0434849093029035, - "grad_norm": 0.00644265441223979, - "learning_rate": 0.00019999946345553932, - "loss": 46.0, - "step": 13648 - }, - { - "epoch": 1.0435613662862933, - "grad_norm": 0.005353832151740789, - "learning_rate": 0.00019999946337685278, - "loss": 46.0, - "step": 13649 - }, - { - "epoch": 1.0436378232696828, - "grad_norm": 0.0018770033493638039, - "learning_rate": 0.00019999946329816046, - "loss": 46.0, - "step": 13650 - }, - { - "epoch": 1.0437142802530726, - "grad_norm": 0.0013232798082754016, - "learning_rate": 0.00019999946321946235, - "loss": 46.0, - "step": 13651 - }, - { - "epoch": 1.0437907372364623, - "grad_norm": 0.0009263827814720571, - "learning_rate": 0.00019999946314075849, - "loss": 46.0, - "step": 13652 - }, - { - "epoch": 1.043867194219852, - "grad_norm": 0.0035563067067414522, - "learning_rate": 0.00019999946306204885, - "loss": 46.0, - "step": 13653 - }, - { - "epoch": 1.0439436512032418, - "grad_norm": 0.005331353284418583, - "learning_rate": 0.00019999946298333344, - "loss": 46.0, - "step": 13654 - }, - { - "epoch": 1.0440201081866316, - "grad_norm": 0.001637037261389196, - "learning_rate": 0.00019999946290461223, - "loss": 46.0, - "step": 13655 - }, - { - "epoch": 1.0440965651700211, - "grad_norm": 0.0008109994232654572, - "learning_rate": 0.0001999994628258853, - "loss": 46.0, - "step": 13656 - }, - { - "epoch": 1.044173022153411, - "grad_norm": 0.004891208838671446, - "learning_rate": 0.0001999994627471526, - "loss": 46.0, - "step": 13657 - }, - { - "epoch": 1.0442494791368007, - "grad_norm": 0.004336840473115444, - "learning_rate": 0.00019999946266841413, - "loss": 46.0, - "step": 13658 - }, - { - "epoch": 1.0443259361201904, - "grad_norm": 0.0029382656794041395, - "learning_rate": 0.00019999946258966985, - "loss": 46.0, - "step": 13659 - }, - { - "epoch": 1.0444023931035802, - "grad_norm": 0.0004281848669052124, - "learning_rate": 0.00019999946251091983, - "loss": 46.0, - "step": 13660 - }, - { - "epoch": 1.0444788500869697, - "grad_norm": 0.00045410022721625865, - "learning_rate": 0.00019999946243216403, - "loss": 46.0, - "step": 13661 - }, - { - "epoch": 1.0445553070703595, - "grad_norm": 0.0012513186084106565, - "learning_rate": 0.00019999946235340247, - "loss": 46.0, - "step": 13662 - }, - { - "epoch": 1.0446317640537492, - "grad_norm": 0.0007714648963883519, - "learning_rate": 0.00019999946227463515, - "loss": 46.0, - "step": 13663 - }, - { - "epoch": 1.044708221037139, - "grad_norm": 0.0010255188681185246, - "learning_rate": 0.00019999946219586203, - "loss": 46.0, - "step": 13664 - }, - { - "epoch": 1.0447846780205288, - "grad_norm": 0.0007217352977022529, - "learning_rate": 0.00019999946211708317, - "loss": 46.0, - "step": 13665 - }, - { - "epoch": 1.0448611350039183, - "grad_norm": 0.003992976620793343, - "learning_rate": 0.00019999946203829854, - "loss": 46.0, - "step": 13666 - }, - { - "epoch": 1.044937591987308, - "grad_norm": 0.012705071829259396, - "learning_rate": 0.00019999946195950813, - "loss": 46.0, - "step": 13667 - }, - { - "epoch": 1.0450140489706978, - "grad_norm": 0.009934223257005215, - "learning_rate": 0.00019999946188071195, - "loss": 46.0, - "step": 13668 - }, - { - "epoch": 1.0450905059540876, - "grad_norm": 0.003383256494998932, - "learning_rate": 0.00019999946180191, - "loss": 46.0, - "step": 13669 - }, - { - "epoch": 1.0451669629374774, - "grad_norm": 0.0009678475908003747, - "learning_rate": 0.0001999994617231023, - "loss": 46.0, - "step": 13670 - }, - { - "epoch": 1.0452434199208671, - "grad_norm": 0.0013291980139911175, - "learning_rate": 0.0001999994616442888, - "loss": 46.0, - "step": 13671 - }, - { - "epoch": 1.0453198769042567, - "grad_norm": 0.0007834390853531659, - "learning_rate": 0.00019999946156546954, - "loss": 46.0, - "step": 13672 - }, - { - "epoch": 1.0453963338876464, - "grad_norm": 0.004882368259131908, - "learning_rate": 0.0001999994614866445, - "loss": 46.0, - "step": 13673 - }, - { - "epoch": 1.0454727908710362, - "grad_norm": 0.005486754700541496, - "learning_rate": 0.00019999946140781372, - "loss": 46.0, - "step": 13674 - }, - { - "epoch": 1.045549247854426, - "grad_norm": 0.0036457020323723555, - "learning_rate": 0.00019999946132897716, - "loss": 46.0, - "step": 13675 - }, - { - "epoch": 1.0456257048378157, - "grad_norm": 0.0005810261936858296, - "learning_rate": 0.0001999994612501348, - "loss": 46.0, - "step": 13676 - }, - { - "epoch": 1.0457021618212052, - "grad_norm": 0.0019671572372317314, - "learning_rate": 0.00019999946117128673, - "loss": 46.0, - "step": 13677 - }, - { - "epoch": 1.045778618804595, - "grad_norm": 0.0008908930467441678, - "learning_rate": 0.00019999946109243284, - "loss": 46.0, - "step": 13678 - }, - { - "epoch": 1.0458550757879848, - "grad_norm": 0.001128511969000101, - "learning_rate": 0.00019999946101357318, - "loss": 46.0, - "step": 13679 - }, - { - "epoch": 1.0459315327713745, - "grad_norm": 0.0010615490609779954, - "learning_rate": 0.0001999994609347078, - "loss": 46.0, - "step": 13680 - }, - { - "epoch": 1.0460079897547643, - "grad_norm": 0.001050188671797514, - "learning_rate": 0.00019999946085583661, - "loss": 46.0, - "step": 13681 - }, - { - "epoch": 1.046084446738154, - "grad_norm": 0.0023927355650812387, - "learning_rate": 0.00019999946077695966, - "loss": 46.0, - "step": 13682 - }, - { - "epoch": 1.0461609037215436, - "grad_norm": 0.0008692205883562565, - "learning_rate": 0.00019999946069807693, - "loss": 46.0, - "step": 13683 - }, - { - "epoch": 1.0462373607049333, - "grad_norm": 0.008289349265396595, - "learning_rate": 0.00019999946061918843, - "loss": 46.0, - "step": 13684 - }, - { - "epoch": 1.046313817688323, - "grad_norm": 0.001233914983458817, - "learning_rate": 0.00019999946054029418, - "loss": 46.0, - "step": 13685 - }, - { - "epoch": 1.0463902746717129, - "grad_norm": 0.002012679586187005, - "learning_rate": 0.00019999946046139416, - "loss": 46.0, - "step": 13686 - }, - { - "epoch": 1.0464667316551026, - "grad_norm": 0.00028954027220606804, - "learning_rate": 0.00019999946038248836, - "loss": 46.0, - "step": 13687 - }, - { - "epoch": 1.0465431886384922, - "grad_norm": 0.007841721177101135, - "learning_rate": 0.0001999994603035768, - "loss": 46.0, - "step": 13688 - }, - { - "epoch": 1.046619645621882, - "grad_norm": 0.0012884187744930387, - "learning_rate": 0.00019999946022465946, - "loss": 46.0, - "step": 13689 - }, - { - "epoch": 1.0466961026052717, - "grad_norm": 0.004484914243221283, - "learning_rate": 0.00019999946014573637, - "loss": 46.0, - "step": 13690 - }, - { - "epoch": 1.0467725595886614, - "grad_norm": 0.0009876226540654898, - "learning_rate": 0.00019999946006680748, - "loss": 46.0, - "step": 13691 - }, - { - "epoch": 1.0468490165720512, - "grad_norm": 0.0017544401343911886, - "learning_rate": 0.00019999945998787282, - "loss": 46.0, - "step": 13692 - }, - { - "epoch": 1.046925473555441, - "grad_norm": 0.001603399170562625, - "learning_rate": 0.0001999994599089324, - "loss": 46.0, - "step": 13693 - }, - { - "epoch": 1.0470019305388305, - "grad_norm": 0.0014639829751104116, - "learning_rate": 0.00019999945982998623, - "loss": 46.0, - "step": 13694 - }, - { - "epoch": 1.0470783875222203, - "grad_norm": 0.0008471490582451224, - "learning_rate": 0.00019999945975103427, - "loss": 46.0, - "step": 13695 - }, - { - "epoch": 1.04715484450561, - "grad_norm": 0.003734071273356676, - "learning_rate": 0.00019999945967207655, - "loss": 46.0, - "step": 13696 - }, - { - "epoch": 1.0472313014889998, - "grad_norm": 0.013389692641794682, - "learning_rate": 0.00019999945959311305, - "loss": 46.0, - "step": 13697 - }, - { - "epoch": 1.0473077584723895, - "grad_norm": 0.00100217224098742, - "learning_rate": 0.0001999994595141438, - "loss": 46.0, - "step": 13698 - }, - { - "epoch": 1.047384215455779, - "grad_norm": 0.0021251337602734566, - "learning_rate": 0.00019999945943516875, - "loss": 46.0, - "step": 13699 - }, - { - "epoch": 1.0474606724391688, - "grad_norm": 0.001842330559156835, - "learning_rate": 0.00019999945935618798, - "loss": 46.0, - "step": 13700 - }, - { - "epoch": 1.0475371294225586, - "grad_norm": 0.000933229923248291, - "learning_rate": 0.0001999994592772014, - "loss": 46.0, - "step": 13701 - }, - { - "epoch": 1.0476135864059484, - "grad_norm": 0.0006351879565045238, - "learning_rate": 0.00019999945919820907, - "loss": 46.0, - "step": 13702 - }, - { - "epoch": 1.0476900433893381, - "grad_norm": 0.001479899394325912, - "learning_rate": 0.00019999945911921096, - "loss": 46.0, - "step": 13703 - }, - { - "epoch": 1.047766500372728, - "grad_norm": 0.0015070316148921847, - "learning_rate": 0.00019999945904020707, - "loss": 46.0, - "step": 13704 - }, - { - "epoch": 1.0478429573561174, - "grad_norm": 0.001733145210891962, - "learning_rate": 0.00019999945896119744, - "loss": 46.0, - "step": 13705 - }, - { - "epoch": 1.0479194143395072, - "grad_norm": 0.0010924865491688251, - "learning_rate": 0.000199999458882182, - "loss": 46.0, - "step": 13706 - }, - { - "epoch": 1.047995871322897, - "grad_norm": 0.0009933550609275699, - "learning_rate": 0.00019999945880316082, - "loss": 46.0, - "step": 13707 - }, - { - "epoch": 1.0480723283062867, - "grad_norm": 0.003499661572277546, - "learning_rate": 0.00019999945872413387, - "loss": 46.0, - "step": 13708 - }, - { - "epoch": 1.0481487852896765, - "grad_norm": 0.001802286715246737, - "learning_rate": 0.00019999945864510114, - "loss": 46.0, - "step": 13709 - }, - { - "epoch": 1.048225242273066, - "grad_norm": 0.004978041164577007, - "learning_rate": 0.00019999945856606264, - "loss": 46.0, - "step": 13710 - }, - { - "epoch": 1.0483016992564558, - "grad_norm": 0.0018099646549671888, - "learning_rate": 0.00019999945848701836, - "loss": 46.0, - "step": 13711 - }, - { - "epoch": 1.0483781562398455, - "grad_norm": 0.001607256941497326, - "learning_rate": 0.00019999945840796834, - "loss": 46.0, - "step": 13712 - }, - { - "epoch": 1.0484546132232353, - "grad_norm": 0.0012299392838031054, - "learning_rate": 0.00019999945832891255, - "loss": 46.0, - "step": 13713 - }, - { - "epoch": 1.048531070206625, - "grad_norm": 0.0019190805032849312, - "learning_rate": 0.00019999945824985096, - "loss": 46.0, - "step": 13714 - }, - { - "epoch": 1.0486075271900148, - "grad_norm": 0.0013620478566735983, - "learning_rate": 0.00019999945817078361, - "loss": 46.0, - "step": 13715 - }, - { - "epoch": 1.0486839841734044, - "grad_norm": 0.0010639394167810678, - "learning_rate": 0.00019999945809171053, - "loss": 46.0, - "step": 13716 - }, - { - "epoch": 1.0487604411567941, - "grad_norm": 0.002282089786604047, - "learning_rate": 0.00019999945801263164, - "loss": 46.0, - "step": 13717 - }, - { - "epoch": 1.0488368981401839, - "grad_norm": 0.0007381599280051887, - "learning_rate": 0.000199999457933547, - "loss": 46.0, - "step": 13718 - }, - { - "epoch": 1.0489133551235736, - "grad_norm": 0.0020886831916868687, - "learning_rate": 0.00019999945785445657, - "loss": 46.0, - "step": 13719 - }, - { - "epoch": 1.0489898121069634, - "grad_norm": 0.0005917309317737818, - "learning_rate": 0.00019999945777536036, - "loss": 46.0, - "step": 13720 - }, - { - "epoch": 1.049066269090353, - "grad_norm": 0.002708891173824668, - "learning_rate": 0.0001999994576962584, - "loss": 46.0, - "step": 13721 - }, - { - "epoch": 1.0491427260737427, - "grad_norm": 0.001117902691476047, - "learning_rate": 0.0001999994576171507, - "loss": 46.0, - "step": 13722 - }, - { - "epoch": 1.0492191830571325, - "grad_norm": 0.002134914742782712, - "learning_rate": 0.00019999945753803718, - "loss": 46.0, - "step": 13723 - }, - { - "epoch": 1.0492956400405222, - "grad_norm": 0.0005679893074557185, - "learning_rate": 0.00019999945745891794, - "loss": 46.0, - "step": 13724 - }, - { - "epoch": 1.049372097023912, - "grad_norm": 0.0011671686079353094, - "learning_rate": 0.0001999994573797929, - "loss": 46.0, - "step": 13725 - }, - { - "epoch": 1.0494485540073017, - "grad_norm": 0.0013793562538921833, - "learning_rate": 0.0001999994573006621, - "loss": 46.0, - "step": 13726 - }, - { - "epoch": 1.0495250109906913, - "grad_norm": 0.009731952100992203, - "learning_rate": 0.0001999994572215255, - "loss": 46.0, - "step": 13727 - }, - { - "epoch": 1.049601467974081, - "grad_norm": 0.009387202560901642, - "learning_rate": 0.0001999994571423832, - "loss": 46.0, - "step": 13728 - }, - { - "epoch": 1.0496779249574708, - "grad_norm": 0.0028804149478673935, - "learning_rate": 0.00019999945706323505, - "loss": 46.0, - "step": 13729 - }, - { - "epoch": 1.0497543819408606, - "grad_norm": 0.0013176982756704092, - "learning_rate": 0.00019999945698408116, - "loss": 46.0, - "step": 13730 - }, - { - "epoch": 1.0498308389242503, - "grad_norm": 0.0037029809318482876, - "learning_rate": 0.00019999945690492152, - "loss": 46.0, - "step": 13731 - }, - { - "epoch": 1.0499072959076399, - "grad_norm": 0.0010358724975958467, - "learning_rate": 0.0001999994568257561, - "loss": 46.0, - "step": 13732 - }, - { - "epoch": 1.0499837528910296, - "grad_norm": 0.004200923256576061, - "learning_rate": 0.0001999994567465849, - "loss": 46.0, - "step": 13733 - }, - { - "epoch": 1.0500602098744194, - "grad_norm": 0.0006061145686544478, - "learning_rate": 0.00019999945666740793, - "loss": 46.0, - "step": 13734 - }, - { - "epoch": 1.0501366668578092, - "grad_norm": 0.0010573656763881445, - "learning_rate": 0.00019999945658822523, - "loss": 46.0, - "step": 13735 - }, - { - "epoch": 1.050213123841199, - "grad_norm": 0.0007051253342069685, - "learning_rate": 0.0001999994565090367, - "loss": 46.0, - "step": 13736 - }, - { - "epoch": 1.0502895808245887, - "grad_norm": 0.0010889818659052253, - "learning_rate": 0.00019999945642984246, - "loss": 46.0, - "step": 13737 - }, - { - "epoch": 1.0503660378079782, - "grad_norm": 0.0014793929876759648, - "learning_rate": 0.0001999994563506424, - "loss": 46.0, - "step": 13738 - }, - { - "epoch": 1.050442494791368, - "grad_norm": 0.000877274782396853, - "learning_rate": 0.0001999994562714366, - "loss": 46.0, - "step": 13739 - }, - { - "epoch": 1.0505189517747577, - "grad_norm": 0.005213289987295866, - "learning_rate": 0.00019999945619222502, - "loss": 46.0, - "step": 13740 - }, - { - "epoch": 1.0505954087581475, - "grad_norm": 0.0015559031162410975, - "learning_rate": 0.00019999945611300768, - "loss": 46.0, - "step": 13741 - }, - { - "epoch": 1.0506718657415373, - "grad_norm": 0.0010985876433551311, - "learning_rate": 0.00019999945603378457, - "loss": 46.0, - "step": 13742 - }, - { - "epoch": 1.0507483227249268, - "grad_norm": 0.001478587044402957, - "learning_rate": 0.00019999945595455568, - "loss": 46.0, - "step": 13743 - }, - { - "epoch": 1.0508247797083166, - "grad_norm": 0.0006309555028565228, - "learning_rate": 0.000199999455875321, - "loss": 46.0, - "step": 13744 - }, - { - "epoch": 1.0509012366917063, - "grad_norm": 0.004757370334118605, - "learning_rate": 0.0001999994557960806, - "loss": 46.0, - "step": 13745 - }, - { - "epoch": 1.050977693675096, - "grad_norm": 0.004981026519089937, - "learning_rate": 0.0001999994557168344, - "loss": 46.0, - "step": 13746 - }, - { - "epoch": 1.0510541506584858, - "grad_norm": 0.0008750940323807299, - "learning_rate": 0.00019999945563758243, - "loss": 46.0, - "step": 13747 - }, - { - "epoch": 1.0511306076418756, - "grad_norm": 0.0010408051311969757, - "learning_rate": 0.0001999994555583247, - "loss": 46.0, - "step": 13748 - }, - { - "epoch": 1.0512070646252651, - "grad_norm": 0.0021473548840731382, - "learning_rate": 0.0001999994554790612, - "loss": 46.0, - "step": 13749 - }, - { - "epoch": 1.051283521608655, - "grad_norm": 0.0005332798464223742, - "learning_rate": 0.00019999945539979193, - "loss": 46.0, - "step": 13750 - }, - { - "epoch": 1.0513599785920447, - "grad_norm": 0.0025775327812880278, - "learning_rate": 0.00019999945532051688, - "loss": 46.0, - "step": 13751 - }, - { - "epoch": 1.0514364355754344, - "grad_norm": 0.014799004420638084, - "learning_rate": 0.0001999994552412361, - "loss": 46.0, - "step": 13752 - }, - { - "epoch": 1.0515128925588242, - "grad_norm": 0.002512461505830288, - "learning_rate": 0.00019999945516194952, - "loss": 46.0, - "step": 13753 - }, - { - "epoch": 1.0515893495422137, - "grad_norm": 0.011496616527438164, - "learning_rate": 0.00019999945508265715, - "loss": 46.0, - "step": 13754 - }, - { - "epoch": 1.0516658065256035, - "grad_norm": 0.0006701357197016478, - "learning_rate": 0.00019999945500335904, - "loss": 46.0, - "step": 13755 - }, - { - "epoch": 1.0517422635089932, - "grad_norm": 0.00208885595202446, - "learning_rate": 0.00019999945492405515, - "loss": 46.0, - "step": 13756 - }, - { - "epoch": 1.051818720492383, - "grad_norm": 0.0008675071294419467, - "learning_rate": 0.00019999945484474547, - "loss": 46.0, - "step": 13757 - }, - { - "epoch": 1.0518951774757728, - "grad_norm": 0.0010657686507329345, - "learning_rate": 0.00019999945476543006, - "loss": 46.0, - "step": 13758 - }, - { - "epoch": 1.0519716344591623, - "grad_norm": 0.0007977762725204229, - "learning_rate": 0.00019999945468610885, - "loss": 46.0, - "step": 13759 - }, - { - "epoch": 1.052048091442552, - "grad_norm": 0.0009384271106682718, - "learning_rate": 0.0001999994546067819, - "loss": 46.0, - "step": 13760 - }, - { - "epoch": 1.0521245484259418, - "grad_norm": 0.0009604932856746018, - "learning_rate": 0.00019999945452744915, - "loss": 46.0, - "step": 13761 - }, - { - "epoch": 1.0522010054093316, - "grad_norm": 0.0028188966680318117, - "learning_rate": 0.00019999945444811065, - "loss": 46.0, - "step": 13762 - }, - { - "epoch": 1.0522774623927214, - "grad_norm": 0.004643087275326252, - "learning_rate": 0.0001999994543687664, - "loss": 46.0, - "step": 13763 - }, - { - "epoch": 1.0523539193761111, - "grad_norm": 0.0006218428025022149, - "learning_rate": 0.00019999945428941635, - "loss": 46.0, - "step": 13764 - }, - { - "epoch": 1.0524303763595007, - "grad_norm": 0.007419173140078783, - "learning_rate": 0.00019999945421006053, - "loss": 46.0, - "step": 13765 - }, - { - "epoch": 1.0525068333428904, - "grad_norm": 0.00118886714335531, - "learning_rate": 0.00019999945413069897, - "loss": 46.0, - "step": 13766 - }, - { - "epoch": 1.0525832903262802, - "grad_norm": 0.00042946290341205895, - "learning_rate": 0.0001999994540513316, - "loss": 46.0, - "step": 13767 - }, - { - "epoch": 1.05265974730967, - "grad_norm": 0.0006862473674118519, - "learning_rate": 0.00019999945397195846, - "loss": 46.0, - "step": 13768 - }, - { - "epoch": 1.0527362042930597, - "grad_norm": 0.00039609955274499953, - "learning_rate": 0.00019999945389257958, - "loss": 46.0, - "step": 13769 - }, - { - "epoch": 1.0528126612764492, - "grad_norm": 0.0013059170451015234, - "learning_rate": 0.00019999945381319492, - "loss": 46.0, - "step": 13770 - }, - { - "epoch": 1.052889118259839, - "grad_norm": 0.00047861848725005984, - "learning_rate": 0.00019999945373380448, - "loss": 46.0, - "step": 13771 - }, - { - "epoch": 1.0529655752432288, - "grad_norm": 0.0002761677314992994, - "learning_rate": 0.00019999945365440828, - "loss": 46.0, - "step": 13772 - }, - { - "epoch": 1.0530420322266185, - "grad_norm": 0.0013266659807413816, - "learning_rate": 0.00019999945357500632, - "loss": 46.0, - "step": 13773 - }, - { - "epoch": 1.0531184892100083, - "grad_norm": 0.0019480689661577344, - "learning_rate": 0.0001999994534955986, - "loss": 46.0, - "step": 13774 - }, - { - "epoch": 1.053194946193398, - "grad_norm": 0.001726043876260519, - "learning_rate": 0.0001999994534161851, - "loss": 46.0, - "step": 13775 - }, - { - "epoch": 1.0532714031767876, - "grad_norm": 0.0012734434567391872, - "learning_rate": 0.0001999994533367658, - "loss": 46.0, - "step": 13776 - }, - { - "epoch": 1.0533478601601773, - "grad_norm": 0.0017384429229423404, - "learning_rate": 0.00019999945325734075, - "loss": 46.0, - "step": 13777 - }, - { - "epoch": 1.053424317143567, - "grad_norm": 0.0011614080285653472, - "learning_rate": 0.00019999945317790996, - "loss": 46.0, - "step": 13778 - }, - { - "epoch": 1.0535007741269569, - "grad_norm": 0.0009231917792931199, - "learning_rate": 0.00019999945309847337, - "loss": 46.0, - "step": 13779 - }, - { - "epoch": 1.0535772311103466, - "grad_norm": 0.0010538025526329875, - "learning_rate": 0.000199999453019031, - "loss": 46.0, - "step": 13780 - }, - { - "epoch": 1.0536536880937362, - "grad_norm": 0.00501682935282588, - "learning_rate": 0.00019999945293958286, - "loss": 46.0, - "step": 13781 - }, - { - "epoch": 1.053730145077126, - "grad_norm": 0.002531415782868862, - "learning_rate": 0.000199999452860129, - "loss": 46.0, - "step": 13782 - }, - { - "epoch": 1.0538066020605157, - "grad_norm": 0.0007454706355929375, - "learning_rate": 0.00019999945278066932, - "loss": 46.0, - "step": 13783 - }, - { - "epoch": 1.0538830590439054, - "grad_norm": 0.004449669271707535, - "learning_rate": 0.0001999994527012039, - "loss": 46.0, - "step": 13784 - }, - { - "epoch": 1.0539595160272952, - "grad_norm": 0.0010712840594351292, - "learning_rate": 0.00019999945262173268, - "loss": 46.0, - "step": 13785 - }, - { - "epoch": 1.054035973010685, - "grad_norm": 0.002205733209848404, - "learning_rate": 0.00019999945254225572, - "loss": 46.0, - "step": 13786 - }, - { - "epoch": 1.0541124299940745, - "grad_norm": 0.001104493043385446, - "learning_rate": 0.00019999945246277297, - "loss": 46.0, - "step": 13787 - }, - { - "epoch": 1.0541888869774643, - "grad_norm": 0.0007203186396509409, - "learning_rate": 0.00019999945238328447, - "loss": 46.0, - "step": 13788 - }, - { - "epoch": 1.054265343960854, - "grad_norm": 0.002434931695461273, - "learning_rate": 0.0001999994523037902, - "loss": 46.0, - "step": 13789 - }, - { - "epoch": 1.0543418009442438, - "grad_norm": 0.0015634875744581223, - "learning_rate": 0.00019999945222429015, - "loss": 46.0, - "step": 13790 - }, - { - "epoch": 1.0544182579276336, - "grad_norm": 0.0012279485817998648, - "learning_rate": 0.00019999945214478433, - "loss": 46.0, - "step": 13791 - }, - { - "epoch": 1.054494714911023, - "grad_norm": 0.0005795487086288631, - "learning_rate": 0.00019999945206527274, - "loss": 46.0, - "step": 13792 - }, - { - "epoch": 1.0545711718944129, - "grad_norm": 0.0017037650104612112, - "learning_rate": 0.00019999945198575538, - "loss": 46.0, - "step": 13793 - }, - { - "epoch": 1.0546476288778026, - "grad_norm": 0.004591943230479956, - "learning_rate": 0.00019999945190623226, - "loss": 46.0, - "step": 13794 - }, - { - "epoch": 1.0547240858611924, - "grad_norm": 0.0007427557138726115, - "learning_rate": 0.00019999945182670335, - "loss": 46.0, - "step": 13795 - }, - { - "epoch": 1.0548005428445821, - "grad_norm": 0.0005532317445613444, - "learning_rate": 0.0001999994517471687, - "loss": 46.0, - "step": 13796 - }, - { - "epoch": 1.0548769998279717, - "grad_norm": 0.00041654807864688337, - "learning_rate": 0.00019999945166762826, - "loss": 46.0, - "step": 13797 - }, - { - "epoch": 1.0549534568113614, - "grad_norm": 0.0021832596976310015, - "learning_rate": 0.00019999945158808205, - "loss": 46.0, - "step": 13798 - }, - { - "epoch": 1.0550299137947512, - "grad_norm": 0.0008539077243767679, - "learning_rate": 0.00019999945150853007, - "loss": 46.0, - "step": 13799 - }, - { - "epoch": 1.055106370778141, - "grad_norm": 0.0006561922491528094, - "learning_rate": 0.00019999945142897235, - "loss": 46.0, - "step": 13800 - }, - { - "epoch": 1.0551828277615307, - "grad_norm": 0.00170541659463197, - "learning_rate": 0.00019999945134940882, - "loss": 46.0, - "step": 13801 - }, - { - "epoch": 1.0552592847449205, - "grad_norm": 0.009879326447844505, - "learning_rate": 0.00019999945126983955, - "loss": 46.0, - "step": 13802 - }, - { - "epoch": 1.05533574172831, - "grad_norm": 0.00253881374374032, - "learning_rate": 0.00019999945119026448, - "loss": 46.0, - "step": 13803 - }, - { - "epoch": 1.0554121987116998, - "grad_norm": 0.004980718716979027, - "learning_rate": 0.0001999994511106837, - "loss": 46.0, - "step": 13804 - }, - { - "epoch": 1.0554886556950895, - "grad_norm": 0.0040793651714921, - "learning_rate": 0.0001999994510310971, - "loss": 46.0, - "step": 13805 - }, - { - "epoch": 1.0555651126784793, - "grad_norm": 0.0013489745324477553, - "learning_rate": 0.00019999945095150473, - "loss": 46.0, - "step": 13806 - }, - { - "epoch": 1.055641569661869, - "grad_norm": 0.0007245230954140425, - "learning_rate": 0.0001999994508719066, - "loss": 46.0, - "step": 13807 - }, - { - "epoch": 1.0557180266452586, - "grad_norm": 0.0034304941073060036, - "learning_rate": 0.00019999945079230268, - "loss": 46.0, - "step": 13808 - }, - { - "epoch": 1.0557944836286484, - "grad_norm": 0.0015015583485364914, - "learning_rate": 0.00019999945071269305, - "loss": 46.0, - "step": 13809 - }, - { - "epoch": 1.0558709406120381, - "grad_norm": 0.022670798003673553, - "learning_rate": 0.0001999994506330776, - "loss": 46.0, - "step": 13810 - }, - { - "epoch": 1.0559473975954279, - "grad_norm": 0.0004668761102948338, - "learning_rate": 0.00019999945055345638, - "loss": 46.0, - "step": 13811 - }, - { - "epoch": 1.0560238545788176, - "grad_norm": 0.0020051011815667152, - "learning_rate": 0.0001999994504738294, - "loss": 46.0, - "step": 13812 - }, - { - "epoch": 1.0561003115622074, - "grad_norm": 0.0006668036221526563, - "learning_rate": 0.00019999945039419665, - "loss": 46.0, - "step": 13813 - }, - { - "epoch": 1.056176768545597, - "grad_norm": 0.0025760773569345474, - "learning_rate": 0.00019999945031455812, - "loss": 46.0, - "step": 13814 - }, - { - "epoch": 1.0562532255289867, - "grad_norm": 0.002122888108715415, - "learning_rate": 0.00019999945023491385, - "loss": 46.0, - "step": 13815 - }, - { - "epoch": 1.0563296825123765, - "grad_norm": 0.0006113943527452648, - "learning_rate": 0.00019999945015526383, - "loss": 46.0, - "step": 13816 - }, - { - "epoch": 1.0564061394957662, - "grad_norm": 0.004621585365384817, - "learning_rate": 0.000199999450075608, - "loss": 46.0, - "step": 13817 - }, - { - "epoch": 1.056482596479156, - "grad_norm": 0.0005622573080472648, - "learning_rate": 0.0001999994499959464, - "loss": 46.0, - "step": 13818 - }, - { - "epoch": 1.0565590534625455, - "grad_norm": 0.0010672372300177813, - "learning_rate": 0.00019999944991627903, - "loss": 46.0, - "step": 13819 - }, - { - "epoch": 1.0566355104459353, - "grad_norm": 0.0035669091157615185, - "learning_rate": 0.0001999994498366059, - "loss": 46.0, - "step": 13820 - }, - { - "epoch": 1.056711967429325, - "grad_norm": 0.0020136700477451086, - "learning_rate": 0.00019999944975692698, - "loss": 46.0, - "step": 13821 - }, - { - "epoch": 1.0567884244127148, - "grad_norm": 0.002263610251247883, - "learning_rate": 0.00019999944967724232, - "loss": 46.0, - "step": 13822 - }, - { - "epoch": 1.0568648813961046, - "grad_norm": 0.0007125269039534032, - "learning_rate": 0.0001999994495975519, - "loss": 46.0, - "step": 13823 - }, - { - "epoch": 1.0569413383794943, - "grad_norm": 0.0004161091346759349, - "learning_rate": 0.00019999944951785569, - "loss": 46.0, - "step": 13824 - }, - { - "epoch": 1.0570177953628839, - "grad_norm": 0.0005981248104944825, - "learning_rate": 0.0001999994494381537, - "loss": 46.0, - "step": 13825 - }, - { - "epoch": 1.0570942523462736, - "grad_norm": 0.0007105072145350277, - "learning_rate": 0.00019999944935844595, - "loss": 46.0, - "step": 13826 - }, - { - "epoch": 1.0571707093296634, - "grad_norm": 0.006244134623557329, - "learning_rate": 0.00019999944927873243, - "loss": 46.0, - "step": 13827 - }, - { - "epoch": 1.0572471663130532, - "grad_norm": 0.000786393997259438, - "learning_rate": 0.00019999944919901316, - "loss": 46.0, - "step": 13828 - }, - { - "epoch": 1.057323623296443, - "grad_norm": 0.0021577521692961454, - "learning_rate": 0.00019999944911928809, - "loss": 46.0, - "step": 13829 - }, - { - "epoch": 1.0574000802798325, - "grad_norm": 0.0015617961762472987, - "learning_rate": 0.00019999944903955727, - "loss": 46.0, - "step": 13830 - }, - { - "epoch": 1.0574765372632222, - "grad_norm": 0.00087475631153211, - "learning_rate": 0.00019999944895982068, - "loss": 46.0, - "step": 13831 - }, - { - "epoch": 1.057552994246612, - "grad_norm": 0.0037890796083956957, - "learning_rate": 0.0001999994488800783, - "loss": 46.0, - "step": 13832 - }, - { - "epoch": 1.0576294512300017, - "grad_norm": 0.0032942306715995073, - "learning_rate": 0.0001999994488003302, - "loss": 46.0, - "step": 13833 - }, - { - "epoch": 1.0577059082133915, - "grad_norm": 0.0011341753415763378, - "learning_rate": 0.0001999994487205763, - "loss": 46.0, - "step": 13834 - }, - { - "epoch": 1.0577823651967813, - "grad_norm": 0.00255045248195529, - "learning_rate": 0.0001999994486408166, - "loss": 46.0, - "step": 13835 - }, - { - "epoch": 1.0578588221801708, - "grad_norm": 0.0007576187490485609, - "learning_rate": 0.00019999944856105114, - "loss": 46.0, - "step": 13836 - }, - { - "epoch": 1.0579352791635606, - "grad_norm": 0.0030599667225033045, - "learning_rate": 0.00019999944848127994, - "loss": 46.0, - "step": 13837 - }, - { - "epoch": 1.0580117361469503, - "grad_norm": 0.0007769312942400575, - "learning_rate": 0.000199999448401503, - "loss": 46.0, - "step": 13838 - }, - { - "epoch": 1.05808819313034, - "grad_norm": 0.003815098898485303, - "learning_rate": 0.0001999994483217202, - "loss": 46.0, - "step": 13839 - }, - { - "epoch": 1.0581646501137298, - "grad_norm": 0.0030538265127688646, - "learning_rate": 0.0001999994482419317, - "loss": 46.0, - "step": 13840 - }, - { - "epoch": 1.0582411070971194, - "grad_norm": 0.0007212893688119948, - "learning_rate": 0.0001999994481621374, - "loss": 46.0, - "step": 13841 - }, - { - "epoch": 1.0583175640805091, - "grad_norm": 0.0012864685850217938, - "learning_rate": 0.00019999944808233737, - "loss": 46.0, - "step": 13842 - }, - { - "epoch": 1.058394021063899, - "grad_norm": 0.0011859576916322112, - "learning_rate": 0.00019999944800253152, - "loss": 46.0, - "step": 13843 - }, - { - "epoch": 1.0584704780472887, - "grad_norm": 0.001851612119935453, - "learning_rate": 0.00019999944792271993, - "loss": 46.0, - "step": 13844 - }, - { - "epoch": 1.0585469350306784, - "grad_norm": 0.0010871737031266093, - "learning_rate": 0.00019999944784290257, - "loss": 46.0, - "step": 13845 - }, - { - "epoch": 1.0586233920140682, - "grad_norm": 0.0018016340909525752, - "learning_rate": 0.00019999944776307943, - "loss": 46.0, - "step": 13846 - }, - { - "epoch": 1.0586998489974577, - "grad_norm": 0.0023099295794963837, - "learning_rate": 0.00019999944768325052, - "loss": 46.0, - "step": 13847 - }, - { - "epoch": 1.0587763059808475, - "grad_norm": 0.0021356171928346157, - "learning_rate": 0.00019999944760341583, - "loss": 46.0, - "step": 13848 - }, - { - "epoch": 1.0588527629642372, - "grad_norm": 0.0010564925614744425, - "learning_rate": 0.0001999994475235754, - "loss": 46.0, - "step": 13849 - }, - { - "epoch": 1.058929219947627, - "grad_norm": 0.004075905773788691, - "learning_rate": 0.00019999944744372917, - "loss": 46.0, - "step": 13850 - }, - { - "epoch": 1.0590056769310168, - "grad_norm": 0.0004121228994335979, - "learning_rate": 0.0001999994473638772, - "loss": 46.0, - "step": 13851 - }, - { - "epoch": 1.0590821339144063, - "grad_norm": 0.0009071128442883492, - "learning_rate": 0.00019999944728401944, - "loss": 46.0, - "step": 13852 - }, - { - "epoch": 1.059158590897796, - "grad_norm": 0.000884173670783639, - "learning_rate": 0.00019999944720415594, - "loss": 46.0, - "step": 13853 - }, - { - "epoch": 1.0592350478811858, - "grad_norm": 0.0018070179503411055, - "learning_rate": 0.00019999944712428665, - "loss": 46.0, - "step": 13854 - }, - { - "epoch": 1.0593115048645756, - "grad_norm": 0.001158177386969328, - "learning_rate": 0.00019999944704441157, - "loss": 46.0, - "step": 13855 - }, - { - "epoch": 1.0593879618479654, - "grad_norm": 0.002257951535284519, - "learning_rate": 0.00019999944696453076, - "loss": 46.0, - "step": 13856 - }, - { - "epoch": 1.0594644188313551, - "grad_norm": 0.0010425011860206723, - "learning_rate": 0.00019999944688464414, - "loss": 46.0, - "step": 13857 - }, - { - "epoch": 1.0595408758147447, - "grad_norm": 0.0020225043408572674, - "learning_rate": 0.00019999944680475177, - "loss": 46.0, - "step": 13858 - }, - { - "epoch": 1.0596173327981344, - "grad_norm": 0.000977058312855661, - "learning_rate": 0.00019999944672485364, - "loss": 46.0, - "step": 13859 - }, - { - "epoch": 1.0596937897815242, - "grad_norm": 0.007345973514020443, - "learning_rate": 0.00019999944664494975, - "loss": 46.0, - "step": 13860 - }, - { - "epoch": 1.059770246764914, - "grad_norm": 0.0009060087613761425, - "learning_rate": 0.00019999944656504007, - "loss": 46.0, - "step": 13861 - }, - { - "epoch": 1.0598467037483037, - "grad_norm": 0.0014056694926694036, - "learning_rate": 0.0001999994464851246, - "loss": 46.0, - "step": 13862 - }, - { - "epoch": 1.0599231607316932, - "grad_norm": 0.0012392988428473473, - "learning_rate": 0.00019999944640520338, - "loss": 46.0, - "step": 13863 - }, - { - "epoch": 1.059999617715083, - "grad_norm": 0.009871658869087696, - "learning_rate": 0.0001999994463252764, - "loss": 46.0, - "step": 13864 - }, - { - "epoch": 1.0600760746984728, - "grad_norm": 0.0010715080425143242, - "learning_rate": 0.00019999944624534365, - "loss": 46.0, - "step": 13865 - }, - { - "epoch": 1.0601525316818625, - "grad_norm": 0.0026220898143947124, - "learning_rate": 0.00019999944616540513, - "loss": 46.0, - "step": 13866 - }, - { - "epoch": 1.0602289886652523, - "grad_norm": 0.000647886423394084, - "learning_rate": 0.00019999944608546083, - "loss": 46.0, - "step": 13867 - }, - { - "epoch": 1.060305445648642, - "grad_norm": 0.0003339028626214713, - "learning_rate": 0.00019999944600551078, - "loss": 46.0, - "step": 13868 - }, - { - "epoch": 1.0603819026320316, - "grad_norm": 0.0009584177169017494, - "learning_rate": 0.00019999944592555494, - "loss": 46.0, - "step": 13869 - }, - { - "epoch": 1.0604583596154213, - "grad_norm": 0.0011102084536105394, - "learning_rate": 0.00019999944584559335, - "loss": 46.0, - "step": 13870 - }, - { - "epoch": 1.060534816598811, - "grad_norm": 0.002412444679066539, - "learning_rate": 0.00019999944576562596, - "loss": 46.0, - "step": 13871 - }, - { - "epoch": 1.0606112735822009, - "grad_norm": 0.0015999983297660947, - "learning_rate": 0.00019999944568565282, - "loss": 46.0, - "step": 13872 - }, - { - "epoch": 1.0606877305655906, - "grad_norm": 0.0015892237424850464, - "learning_rate": 0.0001999994456056739, - "loss": 46.0, - "step": 13873 - }, - { - "epoch": 1.0607641875489802, - "grad_norm": 0.0005685322685167193, - "learning_rate": 0.00019999944552568922, - "loss": 46.0, - "step": 13874 - }, - { - "epoch": 1.06084064453237, - "grad_norm": 0.0012501970632001758, - "learning_rate": 0.00019999944544569877, - "loss": 46.0, - "step": 13875 - }, - { - "epoch": 1.0609171015157597, - "grad_norm": 0.0011882673716172576, - "learning_rate": 0.00019999944536570256, - "loss": 46.0, - "step": 13876 - }, - { - "epoch": 1.0609935584991494, - "grad_norm": 0.0011230031959712505, - "learning_rate": 0.00019999944528570059, - "loss": 46.0, - "step": 13877 - }, - { - "epoch": 1.0610700154825392, - "grad_norm": 0.0005707996897399426, - "learning_rate": 0.0001999994452056928, - "loss": 46.0, - "step": 13878 - }, - { - "epoch": 1.061146472465929, - "grad_norm": 0.0005501255509443581, - "learning_rate": 0.00019999944512567928, - "loss": 46.0, - "step": 13879 - }, - { - "epoch": 1.0612229294493185, - "grad_norm": 0.0024445634335279465, - "learning_rate": 0.00019999944504566, - "loss": 46.0, - "step": 13880 - }, - { - "epoch": 1.0612993864327083, - "grad_norm": 0.0014774432638660073, - "learning_rate": 0.00019999944496563492, - "loss": 46.0, - "step": 13881 - }, - { - "epoch": 1.061375843416098, - "grad_norm": 0.0016902334755286574, - "learning_rate": 0.0001999994448856041, - "loss": 46.0, - "step": 13882 - }, - { - "epoch": 1.0614523003994878, - "grad_norm": 0.003523385850712657, - "learning_rate": 0.00019999944480556748, - "loss": 46.0, - "step": 13883 - }, - { - "epoch": 1.0615287573828776, - "grad_norm": 0.0012848739279434085, - "learning_rate": 0.00019999944472552512, - "loss": 46.0, - "step": 13884 - }, - { - "epoch": 1.061605214366267, - "grad_norm": 0.0045204805210232735, - "learning_rate": 0.00019999944464547698, - "loss": 46.0, - "step": 13885 - }, - { - "epoch": 1.0616816713496569, - "grad_norm": 0.0009408780606463552, - "learning_rate": 0.00019999944456542304, - "loss": 46.0, - "step": 13886 - }, - { - "epoch": 1.0617581283330466, - "grad_norm": 0.001686860341578722, - "learning_rate": 0.00019999944448536336, - "loss": 46.0, - "step": 13887 - }, - { - "epoch": 1.0618345853164364, - "grad_norm": 0.0042866868898272514, - "learning_rate": 0.00019999944440529793, - "loss": 46.0, - "step": 13888 - }, - { - "epoch": 1.0619110422998261, - "grad_norm": 0.000781725684646517, - "learning_rate": 0.0001999994443252267, - "loss": 46.0, - "step": 13889 - }, - { - "epoch": 1.0619874992832157, - "grad_norm": 0.0012019751593470573, - "learning_rate": 0.0001999994442451497, - "loss": 46.0, - "step": 13890 - }, - { - "epoch": 1.0620639562666054, - "grad_norm": 0.0017092638881877065, - "learning_rate": 0.00019999944416506694, - "loss": 46.0, - "step": 13891 - }, - { - "epoch": 1.0621404132499952, - "grad_norm": 0.0011406762059777975, - "learning_rate": 0.00019999944408497842, - "loss": 46.0, - "step": 13892 - }, - { - "epoch": 1.062216870233385, - "grad_norm": 0.005513755138963461, - "learning_rate": 0.0001999994440048841, - "loss": 46.0, - "step": 13893 - }, - { - "epoch": 1.0622933272167747, - "grad_norm": 0.001303394790738821, - "learning_rate": 0.00019999944392478405, - "loss": 46.0, - "step": 13894 - }, - { - "epoch": 1.0623697842001645, - "grad_norm": 0.0011032327311113477, - "learning_rate": 0.0001999994438446782, - "loss": 46.0, - "step": 13895 - }, - { - "epoch": 1.062446241183554, - "grad_norm": 0.0010128116700798273, - "learning_rate": 0.00019999944376456662, - "loss": 46.0, - "step": 13896 - }, - { - "epoch": 1.0625226981669438, - "grad_norm": 0.001919205067679286, - "learning_rate": 0.00019999944368444923, - "loss": 46.0, - "step": 13897 - }, - { - "epoch": 1.0625991551503335, - "grad_norm": 0.008968381211161613, - "learning_rate": 0.00019999944360432606, - "loss": 46.0, - "step": 13898 - }, - { - "epoch": 1.0626756121337233, - "grad_norm": 0.0024009079206734896, - "learning_rate": 0.00019999944352419718, - "loss": 46.0, - "step": 13899 - }, - { - "epoch": 1.062752069117113, - "grad_norm": 0.00323579553514719, - "learning_rate": 0.0001999994434440625, - "loss": 46.0, - "step": 13900 - }, - { - "epoch": 1.0628285261005026, - "grad_norm": 0.0005370948347263038, - "learning_rate": 0.000199999443363922, - "loss": 46.0, - "step": 13901 - }, - { - "epoch": 1.0629049830838924, - "grad_norm": 0.0016549831489101052, - "learning_rate": 0.0001999994432837758, - "loss": 46.0, - "step": 13902 - }, - { - "epoch": 1.0629814400672821, - "grad_norm": 0.0028080902993679047, - "learning_rate": 0.0001999994432036238, - "loss": 46.0, - "step": 13903 - }, - { - "epoch": 1.0630578970506719, - "grad_norm": 0.002084887120872736, - "learning_rate": 0.00019999944312346606, - "loss": 46.0, - "step": 13904 - }, - { - "epoch": 1.0631343540340616, - "grad_norm": 0.0009972843108698726, - "learning_rate": 0.0001999994430433025, - "loss": 46.0, - "step": 13905 - }, - { - "epoch": 1.0632108110174514, - "grad_norm": 0.0006952547118999064, - "learning_rate": 0.0001999994429631332, - "loss": 46.0, - "step": 13906 - }, - { - "epoch": 1.063287268000841, - "grad_norm": 0.0023219604045152664, - "learning_rate": 0.00019999944288295814, - "loss": 46.0, - "step": 13907 - }, - { - "epoch": 1.0633637249842307, - "grad_norm": 0.0016072430880740285, - "learning_rate": 0.0001999994428027773, - "loss": 46.0, - "step": 13908 - }, - { - "epoch": 1.0634401819676205, - "grad_norm": 0.0006178435869514942, - "learning_rate": 0.00019999944272259068, - "loss": 46.0, - "step": 13909 - }, - { - "epoch": 1.0635166389510102, - "grad_norm": 0.0014596887631341815, - "learning_rate": 0.00019999944264239831, - "loss": 46.0, - "step": 13910 - }, - { - "epoch": 1.0635930959344, - "grad_norm": 0.0007467910181730986, - "learning_rate": 0.00019999944256220018, - "loss": 46.0, - "step": 13911 - }, - { - "epoch": 1.0636695529177895, - "grad_norm": 0.0003401925787329674, - "learning_rate": 0.00019999944248199624, - "loss": 46.0, - "step": 13912 - }, - { - "epoch": 1.0637460099011793, - "grad_norm": 0.0006558423629030585, - "learning_rate": 0.00019999944240178656, - "loss": 46.0, - "step": 13913 - }, - { - "epoch": 1.063822466884569, - "grad_norm": 0.005127561744302511, - "learning_rate": 0.0001999994423215711, - "loss": 46.0, - "step": 13914 - }, - { - "epoch": 1.0638989238679588, - "grad_norm": 0.0004356475837994367, - "learning_rate": 0.00019999944224134987, - "loss": 46.0, - "step": 13915 - }, - { - "epoch": 1.0639753808513486, - "grad_norm": 0.0006102005718275905, - "learning_rate": 0.00019999944216112287, - "loss": 46.0, - "step": 13916 - }, - { - "epoch": 1.0640518378347383, - "grad_norm": 0.010889898054301739, - "learning_rate": 0.00019999944208089012, - "loss": 46.0, - "step": 13917 - }, - { - "epoch": 1.0641282948181279, - "grad_norm": 0.005070434417575598, - "learning_rate": 0.0001999994420006516, - "loss": 46.0, - "step": 13918 - }, - { - "epoch": 1.0642047518015176, - "grad_norm": 0.010388080030679703, - "learning_rate": 0.0001999994419204073, - "loss": 46.0, - "step": 13919 - }, - { - "epoch": 1.0642812087849074, - "grad_norm": 0.0015205597737804055, - "learning_rate": 0.00019999944184015723, - "loss": 46.0, - "step": 13920 - }, - { - "epoch": 1.0643576657682972, - "grad_norm": 0.0013914147857576609, - "learning_rate": 0.00019999944175990139, - "loss": 46.0, - "step": 13921 - }, - { - "epoch": 1.064434122751687, - "grad_norm": 0.0008867255528457463, - "learning_rate": 0.00019999944167963974, - "loss": 46.0, - "step": 13922 - }, - { - "epoch": 1.0645105797350765, - "grad_norm": 0.0012207137187942863, - "learning_rate": 0.00019999944159937235, - "loss": 46.0, - "step": 13923 - }, - { - "epoch": 1.0645870367184662, - "grad_norm": 0.0053460123017430305, - "learning_rate": 0.00019999944151909922, - "loss": 46.0, - "step": 13924 - }, - { - "epoch": 1.064663493701856, - "grad_norm": 0.00437916861847043, - "learning_rate": 0.0001999994414388203, - "loss": 46.0, - "step": 13925 - }, - { - "epoch": 1.0647399506852457, - "grad_norm": 0.0012223742669448256, - "learning_rate": 0.00019999944135853562, - "loss": 46.0, - "step": 13926 - }, - { - "epoch": 1.0648164076686355, - "grad_norm": 0.0006850723293609917, - "learning_rate": 0.00019999944127824514, - "loss": 46.0, - "step": 13927 - }, - { - "epoch": 1.064892864652025, - "grad_norm": 0.002215558895841241, - "learning_rate": 0.00019999944119794894, - "loss": 46.0, - "step": 13928 - }, - { - "epoch": 1.0649693216354148, - "grad_norm": 0.002368104411289096, - "learning_rate": 0.00019999944111764693, - "loss": 46.0, - "step": 13929 - }, - { - "epoch": 1.0650457786188046, - "grad_norm": 0.0043951282277703285, - "learning_rate": 0.00019999944103733913, - "loss": 46.0, - "step": 13930 - }, - { - "epoch": 1.0651222356021943, - "grad_norm": 0.002747457241639495, - "learning_rate": 0.0001999994409570256, - "loss": 46.0, - "step": 13931 - }, - { - "epoch": 1.065198692585584, - "grad_norm": 0.0009195831953547895, - "learning_rate": 0.0001999994408767063, - "loss": 46.0, - "step": 13932 - }, - { - "epoch": 1.0652751495689738, - "grad_norm": 0.0004528456483967602, - "learning_rate": 0.00019999944079638121, - "loss": 46.0, - "step": 13933 - }, - { - "epoch": 1.0653516065523634, - "grad_norm": 0.0024599151220172644, - "learning_rate": 0.0001999994407160504, - "loss": 46.0, - "step": 13934 - }, - { - "epoch": 1.0654280635357531, - "grad_norm": 0.0012116293655708432, - "learning_rate": 0.00019999944063571378, - "loss": 46.0, - "step": 13935 - }, - { - "epoch": 1.065504520519143, - "grad_norm": 0.001292297849431634, - "learning_rate": 0.0001999994405553714, - "loss": 46.0, - "step": 13936 - }, - { - "epoch": 1.0655809775025327, - "grad_norm": 0.0027012350037693977, - "learning_rate": 0.00019999944047502323, - "loss": 46.0, - "step": 13937 - }, - { - "epoch": 1.0656574344859224, - "grad_norm": 0.002673080191016197, - "learning_rate": 0.00019999944039466932, - "loss": 46.0, - "step": 13938 - }, - { - "epoch": 1.065733891469312, - "grad_norm": 0.0005960490670986474, - "learning_rate": 0.0001999994403143096, - "loss": 46.0, - "step": 13939 - }, - { - "epoch": 1.0658103484527017, - "grad_norm": 0.000893922580871731, - "learning_rate": 0.00019999944023394415, - "loss": 46.0, - "step": 13940 - }, - { - "epoch": 1.0658868054360915, - "grad_norm": 0.0030416965018957853, - "learning_rate": 0.00019999944015357293, - "loss": 46.0, - "step": 13941 - }, - { - "epoch": 1.0659632624194813, - "grad_norm": 0.0006262491806410253, - "learning_rate": 0.00019999944007319592, - "loss": 46.0, - "step": 13942 - }, - { - "epoch": 1.066039719402871, - "grad_norm": 0.0006563802598975599, - "learning_rate": 0.00019999943999281317, - "loss": 46.0, - "step": 13943 - }, - { - "epoch": 1.0661161763862608, - "grad_norm": 0.0006599270273000002, - "learning_rate": 0.00019999943991242462, - "loss": 46.0, - "step": 13944 - }, - { - "epoch": 1.0661926333696503, - "grad_norm": 0.002420566976070404, - "learning_rate": 0.0001999994398320303, - "loss": 46.0, - "step": 13945 - }, - { - "epoch": 1.06626909035304, - "grad_norm": 0.0007779768202453852, - "learning_rate": 0.00019999943975163023, - "loss": 46.0, - "step": 13946 - }, - { - "epoch": 1.0663455473364298, - "grad_norm": 0.001933785737492144, - "learning_rate": 0.0001999994396712244, - "loss": 46.0, - "step": 13947 - }, - { - "epoch": 1.0664220043198196, - "grad_norm": 0.001142044086009264, - "learning_rate": 0.00019999943959081277, - "loss": 46.0, - "step": 13948 - }, - { - "epoch": 1.0664984613032094, - "grad_norm": 0.0010103305103257298, - "learning_rate": 0.00019999943951039536, - "loss": 46.0, - "step": 13949 - }, - { - "epoch": 1.066574918286599, - "grad_norm": 0.0038067286368459463, - "learning_rate": 0.00019999943942997225, - "loss": 46.0, - "step": 13950 - }, - { - "epoch": 1.0666513752699887, - "grad_norm": 0.0010027054231613874, - "learning_rate": 0.00019999943934954329, - "loss": 46.0, - "step": 13951 - }, - { - "epoch": 1.0667278322533784, - "grad_norm": 0.0021999781019985676, - "learning_rate": 0.0001999994392691086, - "loss": 46.0, - "step": 13952 - }, - { - "epoch": 1.0668042892367682, - "grad_norm": 0.0010803771438077092, - "learning_rate": 0.00019999943918866815, - "loss": 46.0, - "step": 13953 - }, - { - "epoch": 1.066880746220158, - "grad_norm": 0.003673393977805972, - "learning_rate": 0.00019999943910822192, - "loss": 46.0, - "step": 13954 - }, - { - "epoch": 1.0669572032035477, - "grad_norm": 0.0018245381070300937, - "learning_rate": 0.0001999994390277699, - "loss": 46.0, - "step": 13955 - }, - { - "epoch": 1.0670336601869372, - "grad_norm": 0.000570030533708632, - "learning_rate": 0.0001999994389473121, - "loss": 46.0, - "step": 13956 - }, - { - "epoch": 1.067110117170327, - "grad_norm": 0.0035906797274947166, - "learning_rate": 0.0001999994388668486, - "loss": 46.0, - "step": 13957 - }, - { - "epoch": 1.0671865741537168, - "grad_norm": 0.0031293847132474184, - "learning_rate": 0.00019999943878637927, - "loss": 46.0, - "step": 13958 - }, - { - "epoch": 1.0672630311371065, - "grad_norm": 0.012548170052468777, - "learning_rate": 0.00019999943870590417, - "loss": 46.0, - "step": 13959 - }, - { - "epoch": 1.0673394881204963, - "grad_norm": 0.0019971649162471294, - "learning_rate": 0.00019999943862542333, - "loss": 46.0, - "step": 13960 - }, - { - "epoch": 1.0674159451038858, - "grad_norm": 0.001103279646486044, - "learning_rate": 0.00019999943854493671, - "loss": 46.0, - "step": 13961 - }, - { - "epoch": 1.0674924020872756, - "grad_norm": 0.001971827819943428, - "learning_rate": 0.00019999943846444433, - "loss": 46.0, - "step": 13962 - }, - { - "epoch": 1.0675688590706653, - "grad_norm": 0.0006237890338525176, - "learning_rate": 0.00019999943838394616, - "loss": 46.0, - "step": 13963 - }, - { - "epoch": 1.067645316054055, - "grad_norm": 0.0047789085656404495, - "learning_rate": 0.00019999943830344225, - "loss": 46.0, - "step": 13964 - }, - { - "epoch": 1.0677217730374449, - "grad_norm": 0.0009100134484469891, - "learning_rate": 0.00019999943822293257, - "loss": 46.0, - "step": 13965 - }, - { - "epoch": 1.0677982300208346, - "grad_norm": 0.0016656355001032352, - "learning_rate": 0.0001999994381424171, - "loss": 46.0, - "step": 13966 - }, - { - "epoch": 1.0678746870042242, - "grad_norm": 0.002566606504842639, - "learning_rate": 0.00019999943806189586, - "loss": 46.0, - "step": 13967 - }, - { - "epoch": 1.067951143987614, - "grad_norm": 0.0009039713768288493, - "learning_rate": 0.00019999943798136883, - "loss": 46.0, - "step": 13968 - }, - { - "epoch": 1.0680276009710037, - "grad_norm": 0.00263177789747715, - "learning_rate": 0.00019999943790083606, - "loss": 46.0, - "step": 13969 - }, - { - "epoch": 1.0681040579543934, - "grad_norm": 0.00045178047730587423, - "learning_rate": 0.00019999943782029753, - "loss": 46.0, - "step": 13970 - }, - { - "epoch": 1.0681805149377832, - "grad_norm": 0.000992119312286377, - "learning_rate": 0.0001999994377397532, - "loss": 46.0, - "step": 13971 - }, - { - "epoch": 1.0682569719211727, - "grad_norm": 0.0010817080037668347, - "learning_rate": 0.00019999943765920312, - "loss": 46.0, - "step": 13972 - }, - { - "epoch": 1.0683334289045625, - "grad_norm": 0.002219643909484148, - "learning_rate": 0.00019999943757864727, - "loss": 46.0, - "step": 13973 - }, - { - "epoch": 1.0684098858879523, - "grad_norm": 0.006232650950551033, - "learning_rate": 0.00019999943749808566, - "loss": 46.0, - "step": 13974 - }, - { - "epoch": 1.068486342871342, - "grad_norm": 0.0008987795445136726, - "learning_rate": 0.00019999943741751824, - "loss": 46.0, - "step": 13975 - }, - { - "epoch": 1.0685627998547318, - "grad_norm": 0.007680040318518877, - "learning_rate": 0.0001999994373369451, - "loss": 46.0, - "step": 13976 - }, - { - "epoch": 1.0686392568381216, - "grad_norm": 0.0015218533808365464, - "learning_rate": 0.00019999943725636617, - "loss": 46.0, - "step": 13977 - }, - { - "epoch": 1.068715713821511, - "grad_norm": 0.001155595644377172, - "learning_rate": 0.00019999943717578146, - "loss": 46.0, - "step": 13978 - }, - { - "epoch": 1.0687921708049009, - "grad_norm": 0.0014901005197316408, - "learning_rate": 0.000199999437095191, - "loss": 46.0, - "step": 13979 - }, - { - "epoch": 1.0688686277882906, - "grad_norm": 0.008625698275864124, - "learning_rate": 0.00019999943701459475, - "loss": 46.0, - "step": 13980 - }, - { - "epoch": 1.0689450847716804, - "grad_norm": 0.005076452158391476, - "learning_rate": 0.00019999943693399278, - "loss": 46.0, - "step": 13981 - }, - { - "epoch": 1.0690215417550701, - "grad_norm": 0.0022425646893680096, - "learning_rate": 0.00019999943685338498, - "loss": 46.0, - "step": 13982 - }, - { - "epoch": 1.0690979987384597, - "grad_norm": 0.007618380710482597, - "learning_rate": 0.00019999943677277143, - "loss": 46.0, - "step": 13983 - }, - { - "epoch": 1.0691744557218494, - "grad_norm": 0.0003344593569636345, - "learning_rate": 0.0001999994366921521, - "loss": 46.0, - "step": 13984 - }, - { - "epoch": 1.0692509127052392, - "grad_norm": 0.0008194109541364014, - "learning_rate": 0.00019999943661152704, - "loss": 46.0, - "step": 13985 - }, - { - "epoch": 1.069327369688629, - "grad_norm": 0.0005785655812360346, - "learning_rate": 0.00019999943653089617, - "loss": 46.0, - "step": 13986 - }, - { - "epoch": 1.0694038266720187, - "grad_norm": 0.0063834101893007755, - "learning_rate": 0.00019999943645025953, - "loss": 46.0, - "step": 13987 - }, - { - "epoch": 1.0694802836554085, - "grad_norm": 0.0012502353638410568, - "learning_rate": 0.00019999943636961717, - "loss": 46.0, - "step": 13988 - }, - { - "epoch": 1.069556740638798, - "grad_norm": 0.0014219083823263645, - "learning_rate": 0.000199999436288969, - "loss": 46.0, - "step": 13989 - }, - { - "epoch": 1.0696331976221878, - "grad_norm": 0.0020772735588252544, - "learning_rate": 0.00019999943620831504, - "loss": 46.0, - "step": 13990 - }, - { - "epoch": 1.0697096546055775, - "grad_norm": 0.0006585913361050189, - "learning_rate": 0.00019999943612765536, - "loss": 46.0, - "step": 13991 - }, - { - "epoch": 1.0697861115889673, - "grad_norm": 0.0013312958180904388, - "learning_rate": 0.0001999994360469899, - "loss": 46.0, - "step": 13992 - }, - { - "epoch": 1.069862568572357, - "grad_norm": 0.003061628434807062, - "learning_rate": 0.00019999943596631865, - "loss": 46.0, - "step": 13993 - }, - { - "epoch": 1.0699390255557466, - "grad_norm": 0.0015037890989333391, - "learning_rate": 0.00019999943588564162, - "loss": 46.0, - "step": 13994 - }, - { - "epoch": 1.0700154825391364, - "grad_norm": 0.001705870614387095, - "learning_rate": 0.00019999943580495885, - "loss": 46.0, - "step": 13995 - }, - { - "epoch": 1.0700919395225261, - "grad_norm": 0.0010505091631785035, - "learning_rate": 0.00019999943572427032, - "loss": 46.0, - "step": 13996 - }, - { - "epoch": 1.0701683965059159, - "grad_norm": 0.0013376179849728942, - "learning_rate": 0.00019999943564357598, - "loss": 46.0, - "step": 13997 - }, - { - "epoch": 1.0702448534893056, - "grad_norm": 0.000491575978230685, - "learning_rate": 0.0001999994355628759, - "loss": 46.0, - "step": 13998 - }, - { - "epoch": 1.0703213104726954, - "grad_norm": 0.0008970610215328634, - "learning_rate": 0.00019999943548217004, - "loss": 46.0, - "step": 13999 - }, - { - "epoch": 1.070397767456085, - "grad_norm": 0.0007986085838638246, - "learning_rate": 0.0001999994354014584, - "loss": 46.0, - "step": 14000 - }, - { - "epoch": 1.0704742244394747, - "grad_norm": 0.005095066037029028, - "learning_rate": 0.000199999435320741, - "loss": 46.0, - "step": 14001 - }, - { - "epoch": 1.0705506814228645, - "grad_norm": 0.0006853225640952587, - "learning_rate": 0.00019999943524001785, - "loss": 46.0, - "step": 14002 - }, - { - "epoch": 1.0706271384062542, - "grad_norm": 0.0014812907902523875, - "learning_rate": 0.00019999943515928891, - "loss": 46.0, - "step": 14003 - }, - { - "epoch": 1.070703595389644, - "grad_norm": 0.0007540222723037004, - "learning_rate": 0.0001999994350785542, - "loss": 46.0, - "step": 14004 - }, - { - "epoch": 1.0707800523730335, - "grad_norm": 0.0016401633620262146, - "learning_rate": 0.00019999943499781372, - "loss": 46.0, - "step": 14005 - }, - { - "epoch": 1.0708565093564233, - "grad_norm": 0.0012590049300342798, - "learning_rate": 0.00019999943491706747, - "loss": 46.0, - "step": 14006 - }, - { - "epoch": 1.070932966339813, - "grad_norm": 0.010890955105423927, - "learning_rate": 0.00019999943483631547, - "loss": 46.0, - "step": 14007 - }, - { - "epoch": 1.0710094233232028, - "grad_norm": 0.0007714315433986485, - "learning_rate": 0.0001999994347555577, - "loss": 46.0, - "step": 14008 - }, - { - "epoch": 1.0710858803065926, - "grad_norm": 0.0007708133780397475, - "learning_rate": 0.00019999943467479415, - "loss": 46.0, - "step": 14009 - }, - { - "epoch": 1.0711623372899823, - "grad_norm": 0.0004983276594430208, - "learning_rate": 0.00019999943459402482, - "loss": 46.0, - "step": 14010 - }, - { - "epoch": 1.0712387942733719, - "grad_norm": 0.0007447259267792106, - "learning_rate": 0.00019999943451324973, - "loss": 46.0, - "step": 14011 - }, - { - "epoch": 1.0713152512567616, - "grad_norm": 0.004360907711088657, - "learning_rate": 0.00019999943443246886, - "loss": 46.0, - "step": 14012 - }, - { - "epoch": 1.0713917082401514, - "grad_norm": 0.0011546981986612082, - "learning_rate": 0.00019999943435168222, - "loss": 46.0, - "step": 14013 - }, - { - "epoch": 1.0714681652235412, - "grad_norm": 0.004667202010750771, - "learning_rate": 0.00019999943427088983, - "loss": 46.0, - "step": 14014 - }, - { - "epoch": 1.071544622206931, - "grad_norm": 0.002899933373555541, - "learning_rate": 0.00019999943419009167, - "loss": 46.0, - "step": 14015 - }, - { - "epoch": 1.0716210791903205, - "grad_norm": 0.002609699033200741, - "learning_rate": 0.0001999994341092877, - "loss": 46.0, - "step": 14016 - }, - { - "epoch": 1.0716975361737102, - "grad_norm": 0.0009058783762156963, - "learning_rate": 0.00019999943402847803, - "loss": 46.0, - "step": 14017 - }, - { - "epoch": 1.0717739931571, - "grad_norm": 0.0005103914299979806, - "learning_rate": 0.00019999943394766257, - "loss": 46.0, - "step": 14018 - }, - { - "epoch": 1.0718504501404897, - "grad_norm": 0.0012112745316699147, - "learning_rate": 0.0001999994338668413, - "loss": 46.0, - "step": 14019 - }, - { - "epoch": 1.0719269071238795, - "grad_norm": 0.0012157124001532793, - "learning_rate": 0.0001999994337860143, - "loss": 46.0, - "step": 14020 - }, - { - "epoch": 1.0720033641072693, - "grad_norm": 0.005270917434245348, - "learning_rate": 0.00019999943370518152, - "loss": 46.0, - "step": 14021 - }, - { - "epoch": 1.0720798210906588, - "grad_norm": 0.0014197340933606029, - "learning_rate": 0.00019999943362434294, - "loss": 46.0, - "step": 14022 - }, - { - "epoch": 1.0721562780740486, - "grad_norm": 0.0008277800516225398, - "learning_rate": 0.00019999943354349862, - "loss": 46.0, - "step": 14023 - }, - { - "epoch": 1.0722327350574383, - "grad_norm": 0.0012856264365836978, - "learning_rate": 0.00019999943346264853, - "loss": 46.0, - "step": 14024 - }, - { - "epoch": 1.072309192040828, - "grad_norm": 0.0010629127500578761, - "learning_rate": 0.00019999943338179269, - "loss": 46.0, - "step": 14025 - }, - { - "epoch": 1.0723856490242178, - "grad_norm": 0.0011625560000538826, - "learning_rate": 0.00019999943330093104, - "loss": 46.0, - "step": 14026 - }, - { - "epoch": 1.0724621060076074, - "grad_norm": 0.0010466283420100808, - "learning_rate": 0.00019999943322006366, - "loss": 46.0, - "step": 14027 - }, - { - "epoch": 1.0725385629909971, - "grad_norm": 0.0007803745684213936, - "learning_rate": 0.00019999943313919047, - "loss": 46.0, - "step": 14028 - }, - { - "epoch": 1.072615019974387, - "grad_norm": 0.0005933826323598623, - "learning_rate": 0.00019999943305831154, - "loss": 46.0, - "step": 14029 - }, - { - "epoch": 1.0726914769577767, - "grad_norm": 0.0007032674620859325, - "learning_rate": 0.0001999994329774268, - "loss": 46.0, - "step": 14030 - }, - { - "epoch": 1.0727679339411664, - "grad_norm": 0.0032815239392220974, - "learning_rate": 0.00019999943289653632, - "loss": 46.0, - "step": 14031 - }, - { - "epoch": 1.072844390924556, - "grad_norm": 0.0029132612980902195, - "learning_rate": 0.0001999994328156401, - "loss": 46.0, - "step": 14032 - }, - { - "epoch": 1.0729208479079457, - "grad_norm": 0.001137091894634068, - "learning_rate": 0.00019999943273473806, - "loss": 46.0, - "step": 14033 - }, - { - "epoch": 1.0729973048913355, - "grad_norm": 0.0009258515201508999, - "learning_rate": 0.00019999943265383026, - "loss": 46.0, - "step": 14034 - }, - { - "epoch": 1.0730737618747253, - "grad_norm": 0.0021736854687333107, - "learning_rate": 0.00019999943257291674, - "loss": 46.0, - "step": 14035 - }, - { - "epoch": 1.073150218858115, - "grad_norm": 0.00047871595597825944, - "learning_rate": 0.0001999994324919974, - "loss": 46.0, - "step": 14036 - }, - { - "epoch": 1.0732266758415048, - "grad_norm": 0.0007176236249506474, - "learning_rate": 0.00019999943241107227, - "loss": 46.0, - "step": 14037 - }, - { - "epoch": 1.0733031328248943, - "grad_norm": 0.0057198782451450825, - "learning_rate": 0.00019999943233014143, - "loss": 46.0, - "step": 14038 - }, - { - "epoch": 1.073379589808284, - "grad_norm": 0.00047919657663442194, - "learning_rate": 0.0001999994322492048, - "loss": 46.0, - "step": 14039 - }, - { - "epoch": 1.0734560467916738, - "grad_norm": 0.0011984321754425764, - "learning_rate": 0.0001999994321682624, - "loss": 46.0, - "step": 14040 - }, - { - "epoch": 1.0735325037750636, - "grad_norm": 0.0012020234717056155, - "learning_rate": 0.00019999943208731422, - "loss": 46.0, - "step": 14041 - }, - { - "epoch": 1.0736089607584534, - "grad_norm": 0.0012850005878135562, - "learning_rate": 0.00019999943200636028, - "loss": 46.0, - "step": 14042 - }, - { - "epoch": 1.073685417741843, - "grad_norm": 0.001039457623846829, - "learning_rate": 0.00019999943192540055, - "loss": 46.0, - "step": 14043 - }, - { - "epoch": 1.0737618747252327, - "grad_norm": 0.0015597695019096136, - "learning_rate": 0.00019999943184443507, - "loss": 46.0, - "step": 14044 - }, - { - "epoch": 1.0738383317086224, - "grad_norm": 0.0021370318718254566, - "learning_rate": 0.00019999943176346384, - "loss": 46.0, - "step": 14045 - }, - { - "epoch": 1.0739147886920122, - "grad_norm": 0.0011766912648454309, - "learning_rate": 0.00019999943168248682, - "loss": 46.0, - "step": 14046 - }, - { - "epoch": 1.073991245675402, - "grad_norm": 0.004398695193231106, - "learning_rate": 0.00019999943160150401, - "loss": 46.0, - "step": 14047 - }, - { - "epoch": 1.0740677026587917, - "grad_norm": 0.0015525203198194504, - "learning_rate": 0.00019999943152051547, - "loss": 46.0, - "step": 14048 - }, - { - "epoch": 1.0741441596421812, - "grad_norm": 0.0021211395505815744, - "learning_rate": 0.00019999943143952115, - "loss": 46.0, - "step": 14049 - }, - { - "epoch": 1.074220616625571, - "grad_norm": 0.0017788648838177323, - "learning_rate": 0.00019999943135852103, - "loss": 46.0, - "step": 14050 - }, - { - "epoch": 1.0742970736089608, - "grad_norm": 0.009273013100028038, - "learning_rate": 0.00019999943127751516, - "loss": 46.0, - "step": 14051 - }, - { - "epoch": 1.0743735305923505, - "grad_norm": 0.0010953949531540275, - "learning_rate": 0.00019999943119650352, - "loss": 46.0, - "step": 14052 - }, - { - "epoch": 1.0744499875757403, - "grad_norm": 0.00035844757803715765, - "learning_rate": 0.0001999994311154861, - "loss": 46.0, - "step": 14053 - }, - { - "epoch": 1.0745264445591298, - "grad_norm": 0.0010386542417109013, - "learning_rate": 0.00019999943103446294, - "loss": 46.0, - "step": 14054 - }, - { - "epoch": 1.0746029015425196, - "grad_norm": 0.0019077634206041694, - "learning_rate": 0.000199999430953434, - "loss": 46.0, - "step": 14055 - }, - { - "epoch": 1.0746793585259093, - "grad_norm": 0.014803217723965645, - "learning_rate": 0.0001999994308723993, - "loss": 46.0, - "step": 14056 - }, - { - "epoch": 1.074755815509299, - "grad_norm": 0.0065595852211117744, - "learning_rate": 0.00019999943079135882, - "loss": 46.0, - "step": 14057 - }, - { - "epoch": 1.0748322724926889, - "grad_norm": 0.00048247791710309684, - "learning_rate": 0.00019999943071031254, - "loss": 46.0, - "step": 14058 - }, - { - "epoch": 1.0749087294760784, - "grad_norm": 0.0014820725191384554, - "learning_rate": 0.00019999943062926052, - "loss": 46.0, - "step": 14059 - }, - { - "epoch": 1.0749851864594682, - "grad_norm": 0.0007685491000302136, - "learning_rate": 0.00019999943054820274, - "loss": 46.0, - "step": 14060 - }, - { - "epoch": 1.075061643442858, - "grad_norm": 0.0010234734509140253, - "learning_rate": 0.00019999943046713917, - "loss": 46.0, - "step": 14061 - }, - { - "epoch": 1.0751381004262477, - "grad_norm": 0.010823758319020271, - "learning_rate": 0.00019999943038606985, - "loss": 46.0, - "step": 14062 - }, - { - "epoch": 1.0752145574096375, - "grad_norm": 0.011111236177384853, - "learning_rate": 0.00019999943030499476, - "loss": 46.0, - "step": 14063 - }, - { - "epoch": 1.0752910143930272, - "grad_norm": 0.004077092744410038, - "learning_rate": 0.0001999994302239139, - "loss": 46.0, - "step": 14064 - }, - { - "epoch": 1.0753674713764168, - "grad_norm": 0.0010589115554466844, - "learning_rate": 0.00019999943014282722, - "loss": 46.0, - "step": 14065 - }, - { - "epoch": 1.0754439283598065, - "grad_norm": 0.0036376789212226868, - "learning_rate": 0.00019999943006173483, - "loss": 46.0, - "step": 14066 - }, - { - "epoch": 1.0755203853431963, - "grad_norm": 0.004602636676281691, - "learning_rate": 0.00019999942998063665, - "loss": 46.0, - "step": 14067 - }, - { - "epoch": 1.075596842326586, - "grad_norm": 0.0030424189753830433, - "learning_rate": 0.0001999994298995327, - "loss": 46.0, - "step": 14068 - }, - { - "epoch": 1.0756732993099758, - "grad_norm": 0.001877476810477674, - "learning_rate": 0.00019999942981842298, - "loss": 46.0, - "step": 14069 - }, - { - "epoch": 1.0757497562933653, - "grad_norm": 0.0006097674486227334, - "learning_rate": 0.0001999994297373075, - "loss": 46.0, - "step": 14070 - }, - { - "epoch": 1.075826213276755, - "grad_norm": 0.0006602031062357128, - "learning_rate": 0.00019999942965618622, - "loss": 46.0, - "step": 14071 - }, - { - "epoch": 1.0759026702601449, - "grad_norm": 0.0026901427190750837, - "learning_rate": 0.00019999942957505922, - "loss": 46.0, - "step": 14072 - }, - { - "epoch": 1.0759791272435346, - "grad_norm": 0.0005531682982109487, - "learning_rate": 0.00019999942949392642, - "loss": 46.0, - "step": 14073 - }, - { - "epoch": 1.0760555842269244, - "grad_norm": 0.0009872823720797896, - "learning_rate": 0.00019999942941278787, - "loss": 46.0, - "step": 14074 - }, - { - "epoch": 1.0761320412103141, - "grad_norm": 0.0010065939277410507, - "learning_rate": 0.00019999942933164353, - "loss": 46.0, - "step": 14075 - }, - { - "epoch": 1.0762084981937037, - "grad_norm": 0.004798435606062412, - "learning_rate": 0.0001999994292504934, - "loss": 46.0, - "step": 14076 - }, - { - "epoch": 1.0762849551770934, - "grad_norm": 0.0008685302454978228, - "learning_rate": 0.00019999942916933754, - "loss": 46.0, - "step": 14077 - }, - { - "epoch": 1.0763614121604832, - "grad_norm": 0.0015082533936947584, - "learning_rate": 0.0001999994290881759, - "loss": 46.0, - "step": 14078 - }, - { - "epoch": 1.076437869143873, - "grad_norm": 0.006136310752481222, - "learning_rate": 0.0001999994290070085, - "loss": 46.0, - "step": 14079 - }, - { - "epoch": 1.0765143261272627, - "grad_norm": 0.0010586829157546163, - "learning_rate": 0.0001999994289258353, - "loss": 46.0, - "step": 14080 - }, - { - "epoch": 1.0765907831106523, - "grad_norm": 0.002820434281602502, - "learning_rate": 0.00019999942884465637, - "loss": 46.0, - "step": 14081 - }, - { - "epoch": 1.076667240094042, - "grad_norm": 0.0017263127956539392, - "learning_rate": 0.00019999942876347163, - "loss": 46.0, - "step": 14082 - }, - { - "epoch": 1.0767436970774318, - "grad_norm": 0.0010990806622430682, - "learning_rate": 0.00019999942868228115, - "loss": 46.0, - "step": 14083 - }, - { - "epoch": 1.0768201540608215, - "grad_norm": 0.0011168146738782525, - "learning_rate": 0.0001999994286010849, - "loss": 46.0, - "step": 14084 - }, - { - "epoch": 1.0768966110442113, - "grad_norm": 0.005691499914973974, - "learning_rate": 0.00019999942851988287, - "loss": 46.0, - "step": 14085 - }, - { - "epoch": 1.076973068027601, - "grad_norm": 0.0013034566072747111, - "learning_rate": 0.00019999942843867507, - "loss": 46.0, - "step": 14086 - }, - { - "epoch": 1.0770495250109906, - "grad_norm": 0.001498423283919692, - "learning_rate": 0.0001999994283574615, - "loss": 46.0, - "step": 14087 - }, - { - "epoch": 1.0771259819943804, - "grad_norm": 0.002232861705124378, - "learning_rate": 0.00019999942827624216, - "loss": 46.0, - "step": 14088 - }, - { - "epoch": 1.0772024389777701, - "grad_norm": 0.006442929618060589, - "learning_rate": 0.00019999942819501704, - "loss": 46.0, - "step": 14089 - }, - { - "epoch": 1.0772788959611599, - "grad_norm": 0.0007409933023154736, - "learning_rate": 0.00019999942811378617, - "loss": 46.0, - "step": 14090 - }, - { - "epoch": 1.0773553529445496, - "grad_norm": 0.0011904374696314335, - "learning_rate": 0.00019999942803254953, - "loss": 46.0, - "step": 14091 - }, - { - "epoch": 1.0774318099279392, - "grad_norm": 0.012527956627309322, - "learning_rate": 0.00019999942795130712, - "loss": 46.0, - "step": 14092 - }, - { - "epoch": 1.077508266911329, - "grad_norm": 0.0034234528429806232, - "learning_rate": 0.00019999942787005893, - "loss": 46.0, - "step": 14093 - }, - { - "epoch": 1.0775847238947187, - "grad_norm": 0.002114152768626809, - "learning_rate": 0.000199999427788805, - "loss": 46.0, - "step": 14094 - }, - { - "epoch": 1.0776611808781085, - "grad_norm": 0.004000728949904442, - "learning_rate": 0.00019999942770754526, - "loss": 46.0, - "step": 14095 - }, - { - "epoch": 1.0777376378614982, - "grad_norm": 0.004133324604481459, - "learning_rate": 0.00019999942762627976, - "loss": 46.0, - "step": 14096 - }, - { - "epoch": 1.077814094844888, - "grad_norm": 0.0016991019947454333, - "learning_rate": 0.0001999994275450085, - "loss": 46.0, - "step": 14097 - }, - { - "epoch": 1.0778905518282775, - "grad_norm": 0.0006427473272196949, - "learning_rate": 0.00019999942746373148, - "loss": 46.0, - "step": 14098 - }, - { - "epoch": 1.0779670088116673, - "grad_norm": 0.0022385362535715103, - "learning_rate": 0.00019999942738244868, - "loss": 46.0, - "step": 14099 - }, - { - "epoch": 1.078043465795057, - "grad_norm": 0.00048014981439337134, - "learning_rate": 0.0001999994273011601, - "loss": 46.0, - "step": 14100 - }, - { - "epoch": 1.0781199227784468, - "grad_norm": 0.0008626414928585291, - "learning_rate": 0.0001999994272198658, - "loss": 46.0, - "step": 14101 - }, - { - "epoch": 1.0781963797618366, - "grad_norm": 0.0013959245989099145, - "learning_rate": 0.00019999942713856567, - "loss": 46.0, - "step": 14102 - }, - { - "epoch": 1.0782728367452261, - "grad_norm": 0.0007344413315877318, - "learning_rate": 0.00019999942705725977, - "loss": 46.0, - "step": 14103 - }, - { - "epoch": 1.0783492937286159, - "grad_norm": 0.0024672059807926416, - "learning_rate": 0.00019999942697594816, - "loss": 46.0, - "step": 14104 - }, - { - "epoch": 1.0784257507120056, - "grad_norm": 0.0009161752532236278, - "learning_rate": 0.00019999942689463072, - "loss": 46.0, - "step": 14105 - }, - { - "epoch": 1.0785022076953954, - "grad_norm": 0.0011544388253241777, - "learning_rate": 0.00019999942681330754, - "loss": 46.0, - "step": 14106 - }, - { - "epoch": 1.0785786646787852, - "grad_norm": 0.0035547870211303234, - "learning_rate": 0.0001999994267319786, - "loss": 46.0, - "step": 14107 - }, - { - "epoch": 1.078655121662175, - "grad_norm": 0.003957994747906923, - "learning_rate": 0.00019999942665064387, - "loss": 46.0, - "step": 14108 - }, - { - "epoch": 1.0787315786455645, - "grad_norm": 0.0004292784142307937, - "learning_rate": 0.00019999942656930337, - "loss": 46.0, - "step": 14109 - }, - { - "epoch": 1.0788080356289542, - "grad_norm": 0.0008417435456067324, - "learning_rate": 0.0001999994264879571, - "loss": 46.0, - "step": 14110 - }, - { - "epoch": 1.078884492612344, - "grad_norm": 0.001191833638586104, - "learning_rate": 0.00019999942640660506, - "loss": 46.0, - "step": 14111 - }, - { - "epoch": 1.0789609495957337, - "grad_norm": 0.0007495269528590143, - "learning_rate": 0.0001999994263252473, - "loss": 46.0, - "step": 14112 - }, - { - "epoch": 1.0790374065791235, - "grad_norm": 0.0006646562251262367, - "learning_rate": 0.00019999942624388372, - "loss": 46.0, - "step": 14113 - }, - { - "epoch": 1.079113863562513, - "grad_norm": 0.0009241909720003605, - "learning_rate": 0.00019999942616251434, - "loss": 46.0, - "step": 14114 - }, - { - "epoch": 1.0791903205459028, - "grad_norm": 0.0022906421218067408, - "learning_rate": 0.00019999942608113925, - "loss": 46.0, - "step": 14115 - }, - { - "epoch": 1.0792667775292926, - "grad_norm": 0.0015027154004201293, - "learning_rate": 0.00019999942599975836, - "loss": 46.0, - "step": 14116 - }, - { - "epoch": 1.0793432345126823, - "grad_norm": 0.0005942407879047096, - "learning_rate": 0.00019999942591837172, - "loss": 46.0, - "step": 14117 - }, - { - "epoch": 1.079419691496072, - "grad_norm": 0.0018964800983667374, - "learning_rate": 0.0001999994258369793, - "loss": 46.0, - "step": 14118 - }, - { - "epoch": 1.0794961484794618, - "grad_norm": 0.0037390545476228, - "learning_rate": 0.0001999994257555811, - "loss": 46.0, - "step": 14119 - }, - { - "epoch": 1.0795726054628514, - "grad_norm": 0.0034300799015909433, - "learning_rate": 0.00019999942567417716, - "loss": 46.0, - "step": 14120 - }, - { - "epoch": 1.0796490624462411, - "grad_norm": 0.0009461540030315518, - "learning_rate": 0.00019999942559276743, - "loss": 46.0, - "step": 14121 - }, - { - "epoch": 1.079725519429631, - "grad_norm": 0.0010551678715273738, - "learning_rate": 0.00019999942551135192, - "loss": 46.0, - "step": 14122 - }, - { - "epoch": 1.0798019764130207, - "grad_norm": 0.0015043900348246098, - "learning_rate": 0.00019999942542993067, - "loss": 46.0, - "step": 14123 - }, - { - "epoch": 1.0798784333964104, - "grad_norm": 0.011879284866154194, - "learning_rate": 0.00019999942534850362, - "loss": 46.0, - "step": 14124 - }, - { - "epoch": 1.0799548903798, - "grad_norm": 0.0015790078323334455, - "learning_rate": 0.00019999942526707082, - "loss": 46.0, - "step": 14125 - }, - { - "epoch": 1.0800313473631897, - "grad_norm": 0.0008118515834212303, - "learning_rate": 0.00019999942518563225, - "loss": 46.0, - "step": 14126 - }, - { - "epoch": 1.0801078043465795, - "grad_norm": 0.0010812345426529646, - "learning_rate": 0.0001999994251041879, - "loss": 46.0, - "step": 14127 - }, - { - "epoch": 1.0801842613299693, - "grad_norm": 0.0014956349041312933, - "learning_rate": 0.0001999994250227378, - "loss": 46.0, - "step": 14128 - }, - { - "epoch": 1.080260718313359, - "grad_norm": 0.0059073143638670444, - "learning_rate": 0.0001999994249412819, - "loss": 46.0, - "step": 14129 - }, - { - "epoch": 1.0803371752967488, - "grad_norm": 0.006054010707885027, - "learning_rate": 0.00019999942485982025, - "loss": 46.0, - "step": 14130 - }, - { - "epoch": 1.0804136322801383, - "grad_norm": 0.001565291895531118, - "learning_rate": 0.00019999942477835284, - "loss": 46.0, - "step": 14131 - }, - { - "epoch": 1.080490089263528, - "grad_norm": 0.001726433401927352, - "learning_rate": 0.00019999942469687965, - "loss": 46.0, - "step": 14132 - }, - { - "epoch": 1.0805665462469178, - "grad_norm": 0.00841713696718216, - "learning_rate": 0.0001999994246154007, - "loss": 46.0, - "step": 14133 - }, - { - "epoch": 1.0806430032303076, - "grad_norm": 0.0015397778479382396, - "learning_rate": 0.00019999942453391596, - "loss": 46.0, - "step": 14134 - }, - { - "epoch": 1.0807194602136974, - "grad_norm": 0.0026642829179763794, - "learning_rate": 0.00019999942445242546, - "loss": 46.0, - "step": 14135 - }, - { - "epoch": 1.080795917197087, - "grad_norm": 0.0017836529295891523, - "learning_rate": 0.0001999994243709292, - "loss": 46.0, - "step": 14136 - }, - { - "epoch": 1.0808723741804767, - "grad_norm": 0.001951273763552308, - "learning_rate": 0.00019999942428942715, - "loss": 46.0, - "step": 14137 - }, - { - "epoch": 1.0809488311638664, - "grad_norm": 0.0010551137384027243, - "learning_rate": 0.00019999942420791933, - "loss": 46.0, - "step": 14138 - }, - { - "epoch": 1.0810252881472562, - "grad_norm": 0.00663244491443038, - "learning_rate": 0.00019999942412640578, - "loss": 46.0, - "step": 14139 - }, - { - "epoch": 1.081101745130646, - "grad_norm": 0.002032683929428458, - "learning_rate": 0.0001999994240448864, - "loss": 46.0, - "step": 14140 - }, - { - "epoch": 1.0811782021140357, - "grad_norm": 0.0005868744920007885, - "learning_rate": 0.00019999942396336132, - "loss": 46.0, - "step": 14141 - }, - { - "epoch": 1.0812546590974252, - "grad_norm": 0.0022092147264629602, - "learning_rate": 0.00019999942388183043, - "loss": 46.0, - "step": 14142 - }, - { - "epoch": 1.081331116080815, - "grad_norm": 0.003351550316438079, - "learning_rate": 0.00019999942380029376, - "loss": 46.0, - "step": 14143 - }, - { - "epoch": 1.0814075730642048, - "grad_norm": 0.0007190115284174681, - "learning_rate": 0.00019999942371875132, - "loss": 46.0, - "step": 14144 - }, - { - "epoch": 1.0814840300475945, - "grad_norm": 0.0009946164209395647, - "learning_rate": 0.00019999942363720314, - "loss": 46.0, - "step": 14145 - }, - { - "epoch": 1.0815604870309843, - "grad_norm": 0.0005807209527119994, - "learning_rate": 0.00019999942355564918, - "loss": 46.0, - "step": 14146 - }, - { - "epoch": 1.0816369440143738, - "grad_norm": 0.004871238023042679, - "learning_rate": 0.00019999942347408945, - "loss": 46.0, - "step": 14147 - }, - { - "epoch": 1.0817134009977636, - "grad_norm": 0.000403507990995422, - "learning_rate": 0.00019999942339252394, - "loss": 46.0, - "step": 14148 - }, - { - "epoch": 1.0817898579811533, - "grad_norm": 0.001539861666969955, - "learning_rate": 0.0001999994233109527, - "loss": 46.0, - "step": 14149 - }, - { - "epoch": 1.081866314964543, - "grad_norm": 0.0012488304637372494, - "learning_rate": 0.00019999942322937564, - "loss": 46.0, - "step": 14150 - }, - { - "epoch": 1.0819427719479329, - "grad_norm": 0.0035214044619351625, - "learning_rate": 0.00019999942314779284, - "loss": 46.0, - "step": 14151 - }, - { - "epoch": 1.0820192289313226, - "grad_norm": 0.004506431985646486, - "learning_rate": 0.00019999942306620424, - "loss": 46.0, - "step": 14152 - }, - { - "epoch": 1.0820956859147122, - "grad_norm": 0.014677898958325386, - "learning_rate": 0.00019999942298460992, - "loss": 46.0, - "step": 14153 - }, - { - "epoch": 1.082172142898102, - "grad_norm": 0.0026089835446327925, - "learning_rate": 0.00019999942290300978, - "loss": 46.0, - "step": 14154 - }, - { - "epoch": 1.0822485998814917, - "grad_norm": 0.0006066914065741003, - "learning_rate": 0.0001999994228214039, - "loss": 46.0, - "step": 14155 - }, - { - "epoch": 1.0823250568648815, - "grad_norm": 0.0045068091712892056, - "learning_rate": 0.00019999942273979225, - "loss": 46.0, - "step": 14156 - }, - { - "epoch": 1.0824015138482712, - "grad_norm": 0.0010878955945372581, - "learning_rate": 0.0001999994226581748, - "loss": 46.0, - "step": 14157 - }, - { - "epoch": 1.0824779708316608, - "grad_norm": 0.001035849447362125, - "learning_rate": 0.00019999942257655163, - "loss": 46.0, - "step": 14158 - }, - { - "epoch": 1.0825544278150505, - "grad_norm": 0.0008703760686330497, - "learning_rate": 0.00019999942249492264, - "loss": 46.0, - "step": 14159 - }, - { - "epoch": 1.0826308847984403, - "grad_norm": 0.004856419749557972, - "learning_rate": 0.0001999994224132879, - "loss": 46.0, - "step": 14160 - }, - { - "epoch": 1.08270734178183, - "grad_norm": 0.0011521837441250682, - "learning_rate": 0.0001999994223316474, - "loss": 46.0, - "step": 14161 - }, - { - "epoch": 1.0827837987652198, - "grad_norm": 0.00044389814138412476, - "learning_rate": 0.00019999942225000115, - "loss": 46.0, - "step": 14162 - }, - { - "epoch": 1.0828602557486093, - "grad_norm": 0.0030648878309875727, - "learning_rate": 0.0001999994221683491, - "loss": 46.0, - "step": 14163 - }, - { - "epoch": 1.082936712731999, - "grad_norm": 0.0009003924205899239, - "learning_rate": 0.0001999994220866913, - "loss": 46.0, - "step": 14164 - }, - { - "epoch": 1.0830131697153889, - "grad_norm": 0.006464676465839148, - "learning_rate": 0.0001999994220050277, - "loss": 46.0, - "step": 14165 - }, - { - "epoch": 1.0830896266987786, - "grad_norm": 0.003375225467607379, - "learning_rate": 0.00019999942192335833, - "loss": 46.0, - "step": 14166 - }, - { - "epoch": 1.0831660836821684, - "grad_norm": 0.0016525730025023222, - "learning_rate": 0.00019999942184168324, - "loss": 46.0, - "step": 14167 - }, - { - "epoch": 1.0832425406655581, - "grad_norm": 0.0007938342751003802, - "learning_rate": 0.00019999942176000235, - "loss": 46.0, - "step": 14168 - }, - { - "epoch": 1.0833189976489477, - "grad_norm": 0.0006924756453372538, - "learning_rate": 0.00019999942167831569, - "loss": 46.0, - "step": 14169 - }, - { - "epoch": 1.0833954546323374, - "grad_norm": 0.002503820229321718, - "learning_rate": 0.00019999942159662325, - "loss": 46.0, - "step": 14170 - }, - { - "epoch": 1.0834719116157272, - "grad_norm": 0.0006507441867142916, - "learning_rate": 0.00019999942151492506, - "loss": 46.0, - "step": 14171 - }, - { - "epoch": 1.083548368599117, - "grad_norm": 0.013293273746967316, - "learning_rate": 0.00019999942143322108, - "loss": 46.0, - "step": 14172 - }, - { - "epoch": 1.0836248255825067, - "grad_norm": 0.009504067711532116, - "learning_rate": 0.00019999942135151135, - "loss": 46.0, - "step": 14173 - }, - { - "epoch": 1.0837012825658963, - "grad_norm": 0.019417013972997665, - "learning_rate": 0.00019999942126979584, - "loss": 46.0, - "step": 14174 - }, - { - "epoch": 1.083777739549286, - "grad_norm": 0.0016870127292349935, - "learning_rate": 0.00019999942118807456, - "loss": 46.0, - "step": 14175 - }, - { - "epoch": 1.0838541965326758, - "grad_norm": 0.0017147115431725979, - "learning_rate": 0.00019999942110634754, - "loss": 46.0, - "step": 14176 - }, - { - "epoch": 1.0839306535160655, - "grad_norm": 0.003814919851720333, - "learning_rate": 0.00019999942102461472, - "loss": 46.0, - "step": 14177 - }, - { - "epoch": 1.0840071104994553, - "grad_norm": 0.002881775377318263, - "learning_rate": 0.00019999942094287612, - "loss": 46.0, - "step": 14178 - }, - { - "epoch": 1.084083567482845, - "grad_norm": 0.002160491421818733, - "learning_rate": 0.00019999942086113177, - "loss": 46.0, - "step": 14179 - }, - { - "epoch": 1.0841600244662346, - "grad_norm": 0.015525463968515396, - "learning_rate": 0.00019999942077938166, - "loss": 46.0, - "step": 14180 - }, - { - "epoch": 1.0842364814496244, - "grad_norm": 0.0011640343582257628, - "learning_rate": 0.00019999942069762576, - "loss": 46.0, - "step": 14181 - }, - { - "epoch": 1.0843129384330141, - "grad_norm": 0.001267034444026649, - "learning_rate": 0.0001999994206158641, - "loss": 46.0, - "step": 14182 - }, - { - "epoch": 1.084389395416404, - "grad_norm": 0.0023554738145321608, - "learning_rate": 0.00019999942053409666, - "loss": 46.0, - "step": 14183 - }, - { - "epoch": 1.0844658523997937, - "grad_norm": 0.00042725124512799084, - "learning_rate": 0.00019999942045232345, - "loss": 46.0, - "step": 14184 - }, - { - "epoch": 1.0845423093831832, - "grad_norm": 0.0007296939729712903, - "learning_rate": 0.00019999942037054452, - "loss": 46.0, - "step": 14185 - }, - { - "epoch": 1.084618766366573, - "grad_norm": 0.0006084716296754777, - "learning_rate": 0.00019999942028875976, - "loss": 46.0, - "step": 14186 - }, - { - "epoch": 1.0846952233499627, - "grad_norm": 0.005822858773171902, - "learning_rate": 0.00019999942020696926, - "loss": 46.0, - "step": 14187 - }, - { - "epoch": 1.0847716803333525, - "grad_norm": 0.0015475107356905937, - "learning_rate": 0.00019999942012517295, - "loss": 46.0, - "step": 14188 - }, - { - "epoch": 1.0848481373167422, - "grad_norm": 0.0009447706979699433, - "learning_rate": 0.00019999942004337093, - "loss": 46.0, - "step": 14189 - }, - { - "epoch": 1.0849245943001318, - "grad_norm": 0.003099769353866577, - "learning_rate": 0.0001999994199615631, - "loss": 46.0, - "step": 14190 - }, - { - "epoch": 1.0850010512835215, - "grad_norm": 0.0008022661786526442, - "learning_rate": 0.0001999994198797495, - "loss": 46.0, - "step": 14191 - }, - { - "epoch": 1.0850775082669113, - "grad_norm": 0.0009172812569886446, - "learning_rate": 0.00019999941979793016, - "loss": 46.0, - "step": 14192 - }, - { - "epoch": 1.085153965250301, - "grad_norm": 0.0014615554828196764, - "learning_rate": 0.00019999941971610502, - "loss": 46.0, - "step": 14193 - }, - { - "epoch": 1.0852304222336908, - "grad_norm": 0.0009250359726138413, - "learning_rate": 0.00019999941963427413, - "loss": 46.0, - "step": 14194 - }, - { - "epoch": 1.0853068792170806, - "grad_norm": 0.0008587997872382402, - "learning_rate": 0.00019999941955243746, - "loss": 46.0, - "step": 14195 - }, - { - "epoch": 1.0853833362004701, - "grad_norm": 0.0009336332441307604, - "learning_rate": 0.00019999941947059505, - "loss": 46.0, - "step": 14196 - }, - { - "epoch": 1.0854597931838599, - "grad_norm": 0.0009571525151841342, - "learning_rate": 0.00019999941938874684, - "loss": 46.0, - "step": 14197 - }, - { - "epoch": 1.0855362501672496, - "grad_norm": 0.0025165551342070103, - "learning_rate": 0.00019999941930689286, - "loss": 46.0, - "step": 14198 - }, - { - "epoch": 1.0856127071506394, - "grad_norm": 0.0033422091510146856, - "learning_rate": 0.00019999941922503313, - "loss": 46.0, - "step": 14199 - }, - { - "epoch": 1.0856891641340292, - "grad_norm": 0.0013745806645601988, - "learning_rate": 0.0001999994191431676, - "loss": 46.0, - "step": 14200 - }, - { - "epoch": 1.0857656211174187, - "grad_norm": 0.00043467935756780207, - "learning_rate": 0.00019999941906129635, - "loss": 46.0, - "step": 14201 - }, - { - "epoch": 1.0858420781008085, - "grad_norm": 0.0010825492208823562, - "learning_rate": 0.0001999994189794193, - "loss": 46.0, - "step": 14202 - }, - { - "epoch": 1.0859185350841982, - "grad_norm": 0.0031072243582457304, - "learning_rate": 0.00019999941889753647, - "loss": 46.0, - "step": 14203 - }, - { - "epoch": 1.085994992067588, - "grad_norm": 0.0006837751134298742, - "learning_rate": 0.00019999941881564787, - "loss": 46.0, - "step": 14204 - }, - { - "epoch": 1.0860714490509777, - "grad_norm": 0.0008400525548495352, - "learning_rate": 0.00019999941873375353, - "loss": 46.0, - "step": 14205 - }, - { - "epoch": 1.0861479060343675, - "grad_norm": 0.003376114647835493, - "learning_rate": 0.00019999941865185339, - "loss": 46.0, - "step": 14206 - }, - { - "epoch": 1.086224363017757, - "grad_norm": 0.0009143169154413044, - "learning_rate": 0.0001999994185699475, - "loss": 46.0, - "step": 14207 - }, - { - "epoch": 1.0863008200011468, - "grad_norm": 0.001540070166811347, - "learning_rate": 0.00019999941848803583, - "loss": 46.0, - "step": 14208 - }, - { - "epoch": 1.0863772769845366, - "grad_norm": 0.000881193729583174, - "learning_rate": 0.00019999941840611837, - "loss": 46.0, - "step": 14209 - }, - { - "epoch": 1.0864537339679263, - "grad_norm": 0.0014265382196754217, - "learning_rate": 0.0001999994183241952, - "loss": 46.0, - "step": 14210 - }, - { - "epoch": 1.086530190951316, - "grad_norm": 0.0006530576501972973, - "learning_rate": 0.0001999994182422662, - "loss": 46.0, - "step": 14211 - }, - { - "epoch": 1.0866066479347056, - "grad_norm": 0.0011609486537054181, - "learning_rate": 0.00019999941816033147, - "loss": 46.0, - "step": 14212 - }, - { - "epoch": 1.0866831049180954, - "grad_norm": 0.00048583035822957754, - "learning_rate": 0.00019999941807839097, - "loss": 46.0, - "step": 14213 - }, - { - "epoch": 1.0867595619014851, - "grad_norm": 0.0005649944650940597, - "learning_rate": 0.00019999941799644466, - "loss": 46.0, - "step": 14214 - }, - { - "epoch": 1.086836018884875, - "grad_norm": 0.0011280899634584785, - "learning_rate": 0.0001999994179144926, - "loss": 46.0, - "step": 14215 - }, - { - "epoch": 1.0869124758682647, - "grad_norm": 0.005256341770291328, - "learning_rate": 0.0001999994178325348, - "loss": 46.0, - "step": 14216 - }, - { - "epoch": 1.0869889328516544, - "grad_norm": 0.0026654910761862993, - "learning_rate": 0.00019999941775057122, - "loss": 46.0, - "step": 14217 - }, - { - "epoch": 1.087065389835044, - "grad_norm": 0.0005470442119985819, - "learning_rate": 0.00019999941766860185, - "loss": 46.0, - "step": 14218 - }, - { - "epoch": 1.0871418468184337, - "grad_norm": 0.0008824032265692949, - "learning_rate": 0.0001999994175866267, - "loss": 46.0, - "step": 14219 - }, - { - "epoch": 1.0872183038018235, - "grad_norm": 0.001202802057377994, - "learning_rate": 0.0001999994175046458, - "loss": 46.0, - "step": 14220 - }, - { - "epoch": 1.0872947607852133, - "grad_norm": 0.0006799806142225862, - "learning_rate": 0.00019999941742265915, - "loss": 46.0, - "step": 14221 - }, - { - "epoch": 1.087371217768603, - "grad_norm": 0.006414244882762432, - "learning_rate": 0.00019999941734066668, - "loss": 46.0, - "step": 14222 - }, - { - "epoch": 1.0874476747519926, - "grad_norm": 0.0012257150374352932, - "learning_rate": 0.0001999994172586685, - "loss": 46.0, - "step": 14223 - }, - { - "epoch": 1.0875241317353823, - "grad_norm": 0.0013132316526025534, - "learning_rate": 0.00019999941717666452, - "loss": 46.0, - "step": 14224 - }, - { - "epoch": 1.087600588718772, - "grad_norm": 0.0031813608948141336, - "learning_rate": 0.00019999941709465476, - "loss": 46.0, - "step": 14225 - }, - { - "epoch": 1.0876770457021618, - "grad_norm": 0.0008511891937814653, - "learning_rate": 0.00019999941701263926, - "loss": 46.0, - "step": 14226 - }, - { - "epoch": 1.0877535026855516, - "grad_norm": 0.001913353567942977, - "learning_rate": 0.00019999941693061795, - "loss": 46.0, - "step": 14227 - }, - { - "epoch": 1.0878299596689414, - "grad_norm": 0.0020528610330075026, - "learning_rate": 0.0001999994168485909, - "loss": 46.0, - "step": 14228 - }, - { - "epoch": 1.087906416652331, - "grad_norm": 0.011435126885771751, - "learning_rate": 0.00019999941676655808, - "loss": 46.0, - "step": 14229 - }, - { - "epoch": 1.0879828736357207, - "grad_norm": 0.0023141042329370975, - "learning_rate": 0.00019999941668451948, - "loss": 46.0, - "step": 14230 - }, - { - "epoch": 1.0880593306191104, - "grad_norm": 0.002171831438317895, - "learning_rate": 0.00019999941660247514, - "loss": 46.0, - "step": 14231 - }, - { - "epoch": 1.0881357876025002, - "grad_norm": 0.005008396692574024, - "learning_rate": 0.000199999416520425, - "loss": 46.0, - "step": 14232 - }, - { - "epoch": 1.08821224458589, - "grad_norm": 0.0005496269441209733, - "learning_rate": 0.00019999941643836908, - "loss": 46.0, - "step": 14233 - }, - { - "epoch": 1.0882887015692795, - "grad_norm": 0.0019480016781017184, - "learning_rate": 0.00019999941635630744, - "loss": 46.0, - "step": 14234 - }, - { - "epoch": 1.0883651585526692, - "grad_norm": 0.0005285037914291024, - "learning_rate": 0.00019999941627423998, - "loss": 46.0, - "step": 14235 - }, - { - "epoch": 1.088441615536059, - "grad_norm": 0.0030830376781523228, - "learning_rate": 0.00019999941619216677, - "loss": 46.0, - "step": 14236 - }, - { - "epoch": 1.0885180725194488, - "grad_norm": 0.0007691544597037137, - "learning_rate": 0.0001999994161100878, - "loss": 46.0, - "step": 14237 - }, - { - "epoch": 1.0885945295028385, - "grad_norm": 0.0013154344633221626, - "learning_rate": 0.00019999941602800303, - "loss": 46.0, - "step": 14238 - }, - { - "epoch": 1.0886709864862283, - "grad_norm": 0.0021835200022906065, - "learning_rate": 0.00019999941594591253, - "loss": 46.0, - "step": 14239 - }, - { - "epoch": 1.0887474434696178, - "grad_norm": 0.0011576777324080467, - "learning_rate": 0.00019999941586381622, - "loss": 46.0, - "step": 14240 - }, - { - "epoch": 1.0888239004530076, - "grad_norm": 0.0003792611532844603, - "learning_rate": 0.00019999941578171417, - "loss": 46.0, - "step": 14241 - }, - { - "epoch": 1.0889003574363973, - "grad_norm": 0.0009405299206264317, - "learning_rate": 0.00019999941569960635, - "loss": 46.0, - "step": 14242 - }, - { - "epoch": 1.088976814419787, - "grad_norm": 0.0006741826073266566, - "learning_rate": 0.00019999941561749275, - "loss": 46.0, - "step": 14243 - }, - { - "epoch": 1.0890532714031769, - "grad_norm": 0.004020855762064457, - "learning_rate": 0.0001999994155353734, - "loss": 46.0, - "step": 14244 - }, - { - "epoch": 1.0891297283865664, - "grad_norm": 0.000288337905658409, - "learning_rate": 0.00019999941545324824, - "loss": 46.0, - "step": 14245 - }, - { - "epoch": 1.0892061853699562, - "grad_norm": 0.0007682301802560687, - "learning_rate": 0.00019999941537111735, - "loss": 46.0, - "step": 14246 - }, - { - "epoch": 1.089282642353346, - "grad_norm": 0.0005076015368103981, - "learning_rate": 0.00019999941528898066, - "loss": 46.0, - "step": 14247 - }, - { - "epoch": 1.0893590993367357, - "grad_norm": 0.0020966168958693743, - "learning_rate": 0.00019999941520683822, - "loss": 46.0, - "step": 14248 - }, - { - "epoch": 1.0894355563201255, - "grad_norm": 0.0007726805051788688, - "learning_rate": 0.00019999941512469002, - "loss": 46.0, - "step": 14249 - }, - { - "epoch": 1.0895120133035152, - "grad_norm": 0.0025917242746800184, - "learning_rate": 0.00019999941504253603, - "loss": 46.0, - "step": 14250 - }, - { - "epoch": 1.0895884702869048, - "grad_norm": 0.00030111370142549276, - "learning_rate": 0.00019999941496037628, - "loss": 46.0, - "step": 14251 - }, - { - "epoch": 1.0896649272702945, - "grad_norm": 0.0038051693700253963, - "learning_rate": 0.00019999941487821077, - "loss": 46.0, - "step": 14252 - }, - { - "epoch": 1.0897413842536843, - "grad_norm": 0.0014256901340559125, - "learning_rate": 0.00019999941479603947, - "loss": 46.0, - "step": 14253 - }, - { - "epoch": 1.089817841237074, - "grad_norm": 0.0007533254683949053, - "learning_rate": 0.00019999941471386242, - "loss": 46.0, - "step": 14254 - }, - { - "epoch": 1.0898942982204638, - "grad_norm": 0.004037715960294008, - "learning_rate": 0.00019999941463167957, - "loss": 46.0, - "step": 14255 - }, - { - "epoch": 1.0899707552038533, - "grad_norm": 0.008206482045352459, - "learning_rate": 0.00019999941454949097, - "loss": 46.0, - "step": 14256 - }, - { - "epoch": 1.090047212187243, - "grad_norm": 0.0006208682316355407, - "learning_rate": 0.00019999941446729663, - "loss": 46.0, - "step": 14257 - }, - { - "epoch": 1.0901236691706329, - "grad_norm": 0.000864917878061533, - "learning_rate": 0.00019999941438509646, - "loss": 46.0, - "step": 14258 - }, - { - "epoch": 1.0902001261540226, - "grad_norm": 0.0031939952168613672, - "learning_rate": 0.00019999941430289057, - "loss": 46.0, - "step": 14259 - }, - { - "epoch": 1.0902765831374124, - "grad_norm": 0.0006800976698286831, - "learning_rate": 0.00019999941422067888, - "loss": 46.0, - "step": 14260 - }, - { - "epoch": 1.0903530401208021, - "grad_norm": 0.0012649616692215204, - "learning_rate": 0.00019999941413846145, - "loss": 46.0, - "step": 14261 - }, - { - "epoch": 1.0904294971041917, - "grad_norm": 0.0011895967181771994, - "learning_rate": 0.00019999941405623824, - "loss": 46.0, - "step": 14262 - }, - { - "epoch": 1.0905059540875814, - "grad_norm": 0.0017825077520683408, - "learning_rate": 0.00019999941397400925, - "loss": 46.0, - "step": 14263 - }, - { - "epoch": 1.0905824110709712, - "grad_norm": 0.001082477392628789, - "learning_rate": 0.0001999994138917745, - "loss": 46.0, - "step": 14264 - }, - { - "epoch": 1.090658868054361, - "grad_norm": 0.0014291038969531655, - "learning_rate": 0.00019999941380953397, - "loss": 46.0, - "step": 14265 - }, - { - "epoch": 1.0907353250377507, - "grad_norm": 0.002422285731881857, - "learning_rate": 0.00019999941372728767, - "loss": 46.0, - "step": 14266 - }, - { - "epoch": 1.0908117820211403, - "grad_norm": 0.0010261788265779614, - "learning_rate": 0.00019999941364503562, - "loss": 46.0, - "step": 14267 - }, - { - "epoch": 1.09088823900453, - "grad_norm": 0.0014432704774662852, - "learning_rate": 0.0001999994135627778, - "loss": 46.0, - "step": 14268 - }, - { - "epoch": 1.0909646959879198, - "grad_norm": 0.009557048790156841, - "learning_rate": 0.0001999994134805142, - "loss": 46.0, - "step": 14269 - }, - { - "epoch": 1.0910411529713095, - "grad_norm": 0.001137599116191268, - "learning_rate": 0.0001999994133982448, - "loss": 46.0, - "step": 14270 - }, - { - "epoch": 1.0911176099546993, - "grad_norm": 0.002625234192237258, - "learning_rate": 0.0001999994133159697, - "loss": 46.0, - "step": 14271 - }, - { - "epoch": 1.091194066938089, - "grad_norm": 0.0010089966235682368, - "learning_rate": 0.00019999941323368877, - "loss": 46.0, - "step": 14272 - }, - { - "epoch": 1.0912705239214786, - "grad_norm": 0.0017524787690490484, - "learning_rate": 0.00019999941315140208, - "loss": 46.0, - "step": 14273 - }, - { - "epoch": 1.0913469809048684, - "grad_norm": 0.0013489775592461228, - "learning_rate": 0.00019999941306910962, - "loss": 46.0, - "step": 14274 - }, - { - "epoch": 1.0914234378882581, - "grad_norm": 0.012160534970462322, - "learning_rate": 0.0001999994129868114, - "loss": 46.0, - "step": 14275 - }, - { - "epoch": 1.091499894871648, - "grad_norm": 0.0024522680323570967, - "learning_rate": 0.00019999941290450743, - "loss": 46.0, - "step": 14276 - }, - { - "epoch": 1.0915763518550377, - "grad_norm": 0.0019066098611801863, - "learning_rate": 0.00019999941282219767, - "loss": 46.0, - "step": 14277 - }, - { - "epoch": 1.0916528088384272, - "grad_norm": 0.003900516778230667, - "learning_rate": 0.00019999941273988214, - "loss": 46.0, - "step": 14278 - }, - { - "epoch": 1.091729265821817, - "grad_norm": 0.004702542442828417, - "learning_rate": 0.00019999941265756084, - "loss": 46.0, - "step": 14279 - }, - { - "epoch": 1.0918057228052067, - "grad_norm": 0.003026853082701564, - "learning_rate": 0.0001999994125752338, - "loss": 46.0, - "step": 14280 - }, - { - "epoch": 1.0918821797885965, - "grad_norm": 0.0005396785563789308, - "learning_rate": 0.00019999941249290094, - "loss": 46.0, - "step": 14281 - }, - { - "epoch": 1.0919586367719862, - "grad_norm": 0.0006609810516238213, - "learning_rate": 0.00019999941241056235, - "loss": 46.0, - "step": 14282 - }, - { - "epoch": 1.092035093755376, - "grad_norm": 0.0009154931176453829, - "learning_rate": 0.00019999941232821798, - "loss": 46.0, - "step": 14283 - }, - { - "epoch": 1.0921115507387655, - "grad_norm": 0.003077547065913677, - "learning_rate": 0.00019999941224586783, - "loss": 46.0, - "step": 14284 - }, - { - "epoch": 1.0921880077221553, - "grad_norm": 0.0012425247114151716, - "learning_rate": 0.0001999994121635119, - "loss": 46.0, - "step": 14285 - }, - { - "epoch": 1.092264464705545, - "grad_norm": 0.0006585618830285966, - "learning_rate": 0.00019999941208115023, - "loss": 46.0, - "step": 14286 - }, - { - "epoch": 1.0923409216889348, - "grad_norm": 0.0006826651515439153, - "learning_rate": 0.0001999994119987828, - "loss": 46.0, - "step": 14287 - }, - { - "epoch": 1.0924173786723246, - "grad_norm": 0.0012108294758945704, - "learning_rate": 0.00019999941191640956, - "loss": 46.0, - "step": 14288 - }, - { - "epoch": 1.0924938356557141, - "grad_norm": 0.011197608895599842, - "learning_rate": 0.00019999941183403058, - "loss": 46.0, - "step": 14289 - }, - { - "epoch": 1.0925702926391039, - "grad_norm": 0.007502470165491104, - "learning_rate": 0.0001999994117516458, - "loss": 46.0, - "step": 14290 - }, - { - "epoch": 1.0926467496224936, - "grad_norm": 0.017458975315093994, - "learning_rate": 0.0001999994116692553, - "loss": 46.0, - "step": 14291 - }, - { - "epoch": 1.0927232066058834, - "grad_norm": 0.0008480674587190151, - "learning_rate": 0.000199999411586859, - "loss": 46.0, - "step": 14292 - }, - { - "epoch": 1.0927996635892732, - "grad_norm": 0.002832941245287657, - "learning_rate": 0.00019999941150445691, - "loss": 46.0, - "step": 14293 - }, - { - "epoch": 1.092876120572663, - "grad_norm": 0.005975499749183655, - "learning_rate": 0.0001999994114220491, - "loss": 46.0, - "step": 14294 - }, - { - "epoch": 1.0929525775560525, - "grad_norm": 0.003008169587701559, - "learning_rate": 0.00019999941133963547, - "loss": 46.0, - "step": 14295 - }, - { - "epoch": 1.0930290345394422, - "grad_norm": 0.001098928158171475, - "learning_rate": 0.0001999994112572161, - "loss": 46.0, - "step": 14296 - }, - { - "epoch": 1.093105491522832, - "grad_norm": 0.0005411769379861653, - "learning_rate": 0.00019999941117479096, - "loss": 46.0, - "step": 14297 - }, - { - "epoch": 1.0931819485062217, - "grad_norm": 0.0016416804865002632, - "learning_rate": 0.00019999941109236004, - "loss": 46.0, - "step": 14298 - }, - { - "epoch": 1.0932584054896115, - "grad_norm": 0.0010592266917228699, - "learning_rate": 0.00019999941100992336, - "loss": 46.0, - "step": 14299 - }, - { - "epoch": 1.093334862473001, - "grad_norm": 0.002182136056944728, - "learning_rate": 0.0001999994109274809, - "loss": 46.0, - "step": 14300 - }, - { - "epoch": 1.0934113194563908, - "grad_norm": 0.0008108122856356204, - "learning_rate": 0.00019999941084503268, - "loss": 46.0, - "step": 14301 - }, - { - "epoch": 1.0934877764397806, - "grad_norm": 0.003072872292250395, - "learning_rate": 0.00019999941076257868, - "loss": 46.0, - "step": 14302 - }, - { - "epoch": 1.0935642334231703, - "grad_norm": 0.00027640291955322027, - "learning_rate": 0.00019999941068011895, - "loss": 46.0, - "step": 14303 - }, - { - "epoch": 1.09364069040656, - "grad_norm": 0.005797708407044411, - "learning_rate": 0.0001999994105976534, - "loss": 46.0, - "step": 14304 - }, - { - "epoch": 1.0937171473899496, - "grad_norm": 0.002766105579212308, - "learning_rate": 0.0001999994105151821, - "loss": 46.0, - "step": 14305 - }, - { - "epoch": 1.0937936043733394, - "grad_norm": 0.0011098323157057166, - "learning_rate": 0.00019999941043270501, - "loss": 46.0, - "step": 14306 - }, - { - "epoch": 1.0938700613567292, - "grad_norm": 0.0007261222344823182, - "learning_rate": 0.0001999994103502222, - "loss": 46.0, - "step": 14307 - }, - { - "epoch": 1.093946518340119, - "grad_norm": 0.0014111533528193831, - "learning_rate": 0.00019999941026773357, - "loss": 46.0, - "step": 14308 - }, - { - "epoch": 1.0940229753235087, - "grad_norm": 0.0020265085622668266, - "learning_rate": 0.0001999994101852392, - "loss": 46.0, - "step": 14309 - }, - { - "epoch": 1.0940994323068984, - "grad_norm": 0.0014865364646539092, - "learning_rate": 0.00019999941010273906, - "loss": 46.0, - "step": 14310 - }, - { - "epoch": 1.094175889290288, - "grad_norm": 0.0005349652492441237, - "learning_rate": 0.00019999941002023315, - "loss": 46.0, - "step": 14311 - }, - { - "epoch": 1.0942523462736777, - "grad_norm": 0.0008251930703409016, - "learning_rate": 0.00019999940993772146, - "loss": 46.0, - "step": 14312 - }, - { - "epoch": 1.0943288032570675, - "grad_norm": 0.0005950809572823346, - "learning_rate": 0.000199999409855204, - "loss": 46.0, - "step": 14313 - }, - { - "epoch": 1.0944052602404573, - "grad_norm": 0.004502730909734964, - "learning_rate": 0.00019999940977268076, - "loss": 46.0, - "step": 14314 - }, - { - "epoch": 1.094481717223847, - "grad_norm": 0.001285791746340692, - "learning_rate": 0.00019999940969015175, - "loss": 46.0, - "step": 14315 - }, - { - "epoch": 1.0945581742072366, - "grad_norm": 0.0010087513364851475, - "learning_rate": 0.000199999409607617, - "loss": 46.0, - "step": 14316 - }, - { - "epoch": 1.0946346311906263, - "grad_norm": 0.0014537750976160169, - "learning_rate": 0.00019999940952507647, - "loss": 46.0, - "step": 14317 - }, - { - "epoch": 1.094711088174016, - "grad_norm": 0.0007825226639397442, - "learning_rate": 0.00019999940944253017, - "loss": 46.0, - "step": 14318 - }, - { - "epoch": 1.0947875451574058, - "grad_norm": 0.0012522010365501046, - "learning_rate": 0.0001999994093599781, - "loss": 46.0, - "step": 14319 - }, - { - "epoch": 1.0948640021407956, - "grad_norm": 0.0015893217641860247, - "learning_rate": 0.00019999940927742027, - "loss": 46.0, - "step": 14320 - }, - { - "epoch": 1.0949404591241851, - "grad_norm": 0.0011654389090836048, - "learning_rate": 0.00019999940919485665, - "loss": 46.0, - "step": 14321 - }, - { - "epoch": 1.095016916107575, - "grad_norm": 0.0013971192529425025, - "learning_rate": 0.00019999940911228728, - "loss": 46.0, - "step": 14322 - }, - { - "epoch": 1.0950933730909647, - "grad_norm": 0.0007000422920100391, - "learning_rate": 0.0001999994090297121, - "loss": 46.0, - "step": 14323 - }, - { - "epoch": 1.0951698300743544, - "grad_norm": 0.004778878763318062, - "learning_rate": 0.00019999940894713117, - "loss": 46.0, - "step": 14324 - }, - { - "epoch": 1.0952462870577442, - "grad_norm": 0.0007299938006326556, - "learning_rate": 0.0001999994088645445, - "loss": 46.0, - "step": 14325 - }, - { - "epoch": 1.095322744041134, - "grad_norm": 0.0012658416526392102, - "learning_rate": 0.00019999940878195205, - "loss": 46.0, - "step": 14326 - }, - { - "epoch": 1.0953992010245235, - "grad_norm": 0.001013493980281055, - "learning_rate": 0.00019999940869935384, - "loss": 46.0, - "step": 14327 - }, - { - "epoch": 1.0954756580079132, - "grad_norm": 0.0020321214105933905, - "learning_rate": 0.0001999994086167498, - "loss": 46.0, - "step": 14328 - }, - { - "epoch": 1.095552114991303, - "grad_norm": 0.0014854243490844965, - "learning_rate": 0.00019999940853414005, - "loss": 46.0, - "step": 14329 - }, - { - "epoch": 1.0956285719746928, - "grad_norm": 0.0029417916666716337, - "learning_rate": 0.00019999940845152452, - "loss": 46.0, - "step": 14330 - }, - { - "epoch": 1.0957050289580825, - "grad_norm": 0.0057570659555494785, - "learning_rate": 0.00019999940836890322, - "loss": 46.0, - "step": 14331 - }, - { - "epoch": 1.095781485941472, - "grad_norm": 0.0041068303398787975, - "learning_rate": 0.00019999940828627615, - "loss": 46.0, - "step": 14332 - }, - { - "epoch": 1.0958579429248618, - "grad_norm": 0.0025582164525985718, - "learning_rate": 0.0001999994082036433, - "loss": 46.0, - "step": 14333 - }, - { - "epoch": 1.0959343999082516, - "grad_norm": 0.0004594225320033729, - "learning_rate": 0.00019999940812100468, - "loss": 46.0, - "step": 14334 - }, - { - "epoch": 1.0960108568916413, - "grad_norm": 0.007679132744669914, - "learning_rate": 0.0001999994080383603, - "loss": 46.0, - "step": 14335 - }, - { - "epoch": 1.096087313875031, - "grad_norm": 0.0033311406150460243, - "learning_rate": 0.00019999940795571014, - "loss": 46.0, - "step": 14336 - }, - { - "epoch": 1.0961637708584209, - "grad_norm": 0.002244324656203389, - "learning_rate": 0.00019999940787305423, - "loss": 46.0, - "step": 14337 - }, - { - "epoch": 1.0962402278418104, - "grad_norm": 0.002362343482673168, - "learning_rate": 0.0001999994077903925, - "loss": 46.0, - "step": 14338 - }, - { - "epoch": 1.0963166848252002, - "grad_norm": 0.0023635150864720345, - "learning_rate": 0.00019999940770772508, - "loss": 46.0, - "step": 14339 - }, - { - "epoch": 1.09639314180859, - "grad_norm": 0.0016179463127627969, - "learning_rate": 0.00019999940762505184, - "loss": 46.0, - "step": 14340 - }, - { - "epoch": 1.0964695987919797, - "grad_norm": 0.0005304119549691677, - "learning_rate": 0.00019999940754237284, - "loss": 46.0, - "step": 14341 - }, - { - "epoch": 1.0965460557753695, - "grad_norm": 0.0058393292129039764, - "learning_rate": 0.00019999940745968808, - "loss": 46.0, - "step": 14342 - }, - { - "epoch": 1.096622512758759, - "grad_norm": 0.004538960754871368, - "learning_rate": 0.00019999940737699753, - "loss": 46.0, - "step": 14343 - }, - { - "epoch": 1.0966989697421488, - "grad_norm": 0.0038289304357022047, - "learning_rate": 0.00019999940729430123, - "loss": 46.0, - "step": 14344 - }, - { - "epoch": 1.0967754267255385, - "grad_norm": 0.001197592355310917, - "learning_rate": 0.00019999940721159915, - "loss": 46.0, - "step": 14345 - }, - { - "epoch": 1.0968518837089283, - "grad_norm": 0.0008619913132861257, - "learning_rate": 0.0001999994071288913, - "loss": 46.0, - "step": 14346 - }, - { - "epoch": 1.096928340692318, - "grad_norm": 0.0010824321070685983, - "learning_rate": 0.0001999994070461777, - "loss": 46.0, - "step": 14347 - }, - { - "epoch": 1.0970047976757078, - "grad_norm": 0.0008617830462753773, - "learning_rate": 0.00019999940696345831, - "loss": 46.0, - "step": 14348 - }, - { - "epoch": 1.0970812546590973, - "grad_norm": 0.0014405191177502275, - "learning_rate": 0.00019999940688073315, - "loss": 46.0, - "step": 14349 - }, - { - "epoch": 1.097157711642487, - "grad_norm": 0.0021947685163468122, - "learning_rate": 0.00019999940679800223, - "loss": 46.0, - "step": 14350 - }, - { - "epoch": 1.0972341686258769, - "grad_norm": 0.0004924390814267099, - "learning_rate": 0.00019999940671526555, - "loss": 46.0, - "step": 14351 - }, - { - "epoch": 1.0973106256092666, - "grad_norm": 0.0009457893902435899, - "learning_rate": 0.00019999940663252308, - "loss": 46.0, - "step": 14352 - }, - { - "epoch": 1.0973870825926564, - "grad_norm": 0.001278024516068399, - "learning_rate": 0.00019999940654977482, - "loss": 46.0, - "step": 14353 - }, - { - "epoch": 1.097463539576046, - "grad_norm": 0.04226132109761238, - "learning_rate": 0.00019999940646702084, - "loss": 46.0, - "step": 14354 - }, - { - "epoch": 1.0975399965594357, - "grad_norm": 0.0015559179009869695, - "learning_rate": 0.00019999940638426103, - "loss": 46.0, - "step": 14355 - }, - { - "epoch": 1.0976164535428254, - "grad_norm": 0.005004845093935728, - "learning_rate": 0.0001999994063014955, - "loss": 46.0, - "step": 14356 - }, - { - "epoch": 1.0976929105262152, - "grad_norm": 0.0012471572263166308, - "learning_rate": 0.0001999994062187242, - "loss": 46.0, - "step": 14357 - }, - { - "epoch": 1.097769367509605, - "grad_norm": 0.0009397243848070502, - "learning_rate": 0.00019999940613594713, - "loss": 46.0, - "step": 14358 - }, - { - "epoch": 1.0978458244929947, - "grad_norm": 0.00046177723561413586, - "learning_rate": 0.00019999940605316429, - "loss": 46.0, - "step": 14359 - }, - { - "epoch": 1.0979222814763843, - "grad_norm": 0.0020263090264052153, - "learning_rate": 0.00019999940597037566, - "loss": 46.0, - "step": 14360 - }, - { - "epoch": 1.097998738459774, - "grad_norm": 0.0009293400798924267, - "learning_rate": 0.00019999940588758127, - "loss": 46.0, - "step": 14361 - }, - { - "epoch": 1.0980751954431638, - "grad_norm": 0.0010265014134347439, - "learning_rate": 0.0001999994058047811, - "loss": 46.0, - "step": 14362 - }, - { - "epoch": 1.0981516524265535, - "grad_norm": 0.003921338357031345, - "learning_rate": 0.00019999940572197516, - "loss": 46.0, - "step": 14363 - }, - { - "epoch": 1.0982281094099433, - "grad_norm": 0.0016031806590035558, - "learning_rate": 0.0001999994056391635, - "loss": 46.0, - "step": 14364 - }, - { - "epoch": 1.0983045663933328, - "grad_norm": 0.002569162053987384, - "learning_rate": 0.00019999940555634601, - "loss": 46.0, - "step": 14365 - }, - { - "epoch": 1.0983810233767226, - "grad_norm": 0.000720013864338398, - "learning_rate": 0.0001999994054735228, - "loss": 46.0, - "step": 14366 - }, - { - "epoch": 1.0984574803601124, - "grad_norm": 0.002361343940719962, - "learning_rate": 0.00019999940539069377, - "loss": 46.0, - "step": 14367 - }, - { - "epoch": 1.0985339373435021, - "grad_norm": 0.000996338319964707, - "learning_rate": 0.00019999940530785902, - "loss": 46.0, - "step": 14368 - }, - { - "epoch": 1.098610394326892, - "grad_norm": 0.0010644549038261175, - "learning_rate": 0.00019999940522501846, - "loss": 46.0, - "step": 14369 - }, - { - "epoch": 1.0986868513102817, - "grad_norm": 0.0018155085854232311, - "learning_rate": 0.00019999940514217216, - "loss": 46.0, - "step": 14370 - }, - { - "epoch": 1.0987633082936712, - "grad_norm": 0.0008115923847071826, - "learning_rate": 0.0001999994050593201, - "loss": 46.0, - "step": 14371 - }, - { - "epoch": 1.098839765277061, - "grad_norm": 0.001588737010024488, - "learning_rate": 0.00019999940497646224, - "loss": 46.0, - "step": 14372 - }, - { - "epoch": 1.0989162222604507, - "grad_norm": 0.0013079143827781081, - "learning_rate": 0.0001999994048935986, - "loss": 46.0, - "step": 14373 - }, - { - "epoch": 1.0989926792438405, - "grad_norm": 0.0016037931200116873, - "learning_rate": 0.0001999994048107292, - "loss": 46.0, - "step": 14374 - }, - { - "epoch": 1.0990691362272302, - "grad_norm": 0.0015312456525862217, - "learning_rate": 0.00019999940472785406, - "loss": 46.0, - "step": 14375 - }, - { - "epoch": 1.0991455932106198, - "grad_norm": 0.004522494971752167, - "learning_rate": 0.00019999940464497312, - "loss": 46.0, - "step": 14376 - }, - { - "epoch": 1.0992220501940095, - "grad_norm": 0.0021111182868480682, - "learning_rate": 0.0001999994045620864, - "loss": 46.0, - "step": 14377 - }, - { - "epoch": 1.0992985071773993, - "grad_norm": 0.0008027053554542363, - "learning_rate": 0.00019999940447919397, - "loss": 46.0, - "step": 14378 - }, - { - "epoch": 1.099374964160789, - "grad_norm": 0.0007732062367722392, - "learning_rate": 0.00019999940439629571, - "loss": 46.0, - "step": 14379 - }, - { - "epoch": 1.0994514211441788, - "grad_norm": 0.001535898307338357, - "learning_rate": 0.0001999994043133917, - "loss": 46.0, - "step": 14380 - }, - { - "epoch": 1.0995278781275686, - "grad_norm": 0.0068520293571054935, - "learning_rate": 0.00019999940423048193, - "loss": 46.0, - "step": 14381 - }, - { - "epoch": 1.0996043351109581, - "grad_norm": 0.002485432429239154, - "learning_rate": 0.0001999994041475664, - "loss": 46.0, - "step": 14382 - }, - { - "epoch": 1.0996807920943479, - "grad_norm": 0.001896600821055472, - "learning_rate": 0.00019999940406464507, - "loss": 46.0, - "step": 14383 - }, - { - "epoch": 1.0997572490777376, - "grad_norm": 0.001922485651448369, - "learning_rate": 0.000199999403981718, - "loss": 46.0, - "step": 14384 - }, - { - "epoch": 1.0998337060611274, - "grad_norm": 0.002252592472359538, - "learning_rate": 0.00019999940389878513, - "loss": 46.0, - "step": 14385 - }, - { - "epoch": 1.0999101630445172, - "grad_norm": 0.0007943834061734378, - "learning_rate": 0.0001999994038158465, - "loss": 46.0, - "step": 14386 - }, - { - "epoch": 1.0999866200279067, - "grad_norm": 0.002882985631003976, - "learning_rate": 0.00019999940373290214, - "loss": 46.0, - "step": 14387 - }, - { - "epoch": 1.1000630770112965, - "grad_norm": 0.0019011124968528748, - "learning_rate": 0.00019999940364995194, - "loss": 46.0, - "step": 14388 - }, - { - "epoch": 1.1001395339946862, - "grad_norm": 0.0033900458365678787, - "learning_rate": 0.00019999940356699603, - "loss": 46.0, - "step": 14389 - }, - { - "epoch": 1.100215990978076, - "grad_norm": 0.0015945249469950795, - "learning_rate": 0.00019999940348403432, - "loss": 46.0, - "step": 14390 - }, - { - "epoch": 1.1002924479614657, - "grad_norm": 0.0021856757812201977, - "learning_rate": 0.00019999940340106686, - "loss": 46.0, - "step": 14391 - }, - { - "epoch": 1.1003689049448555, - "grad_norm": 0.00046254173503257334, - "learning_rate": 0.00019999940331809362, - "loss": 46.0, - "step": 14392 - }, - { - "epoch": 1.100445361928245, - "grad_norm": 0.0010494188172742724, - "learning_rate": 0.0001999994032351146, - "loss": 46.0, - "step": 14393 - }, - { - "epoch": 1.1005218189116348, - "grad_norm": 0.0008125773165374994, - "learning_rate": 0.0001999994031521298, - "loss": 46.0, - "step": 14394 - }, - { - "epoch": 1.1005982758950246, - "grad_norm": 0.001067085424438119, - "learning_rate": 0.00019999940306913926, - "loss": 46.0, - "step": 14395 - }, - { - "epoch": 1.1006747328784143, - "grad_norm": 0.0013594641350209713, - "learning_rate": 0.00019999940298614296, - "loss": 46.0, - "step": 14396 - }, - { - "epoch": 1.100751189861804, - "grad_norm": 0.0024234233424067497, - "learning_rate": 0.00019999940290314088, - "loss": 46.0, - "step": 14397 - }, - { - "epoch": 1.1008276468451936, - "grad_norm": 0.0005430607707239687, - "learning_rate": 0.00019999940282013304, - "loss": 46.0, - "step": 14398 - }, - { - "epoch": 1.1009041038285834, - "grad_norm": 0.0011346307583153248, - "learning_rate": 0.0001999994027371194, - "loss": 46.0, - "step": 14399 - }, - { - "epoch": 1.1009805608119732, - "grad_norm": 0.019765105098485947, - "learning_rate": 0.0001999994026541, - "loss": 46.0, - "step": 14400 - }, - { - "epoch": 1.101057017795363, - "grad_norm": 0.0009854196105152369, - "learning_rate": 0.00019999940257107483, - "loss": 46.0, - "step": 14401 - }, - { - "epoch": 1.1011334747787527, - "grad_norm": 0.0010334094986319542, - "learning_rate": 0.0001999994024880439, - "loss": 46.0, - "step": 14402 - }, - { - "epoch": 1.1012099317621424, - "grad_norm": 0.0016409477684646845, - "learning_rate": 0.00019999940240500718, - "loss": 46.0, - "step": 14403 - }, - { - "epoch": 1.101286388745532, - "grad_norm": 0.004089921712875366, - "learning_rate": 0.00019999940232196472, - "loss": 46.0, - "step": 14404 - }, - { - "epoch": 1.1013628457289217, - "grad_norm": 0.0032342243939638138, - "learning_rate": 0.00019999940223891649, - "loss": 46.0, - "step": 14405 - }, - { - "epoch": 1.1014393027123115, - "grad_norm": 0.0013204023707658052, - "learning_rate": 0.00019999940215586248, - "loss": 46.0, - "step": 14406 - }, - { - "epoch": 1.1015157596957013, - "grad_norm": 0.0014779911143705249, - "learning_rate": 0.0001999994020728027, - "loss": 46.0, - "step": 14407 - }, - { - "epoch": 1.101592216679091, - "grad_norm": 0.0029300181195139885, - "learning_rate": 0.00019999940198973715, - "loss": 46.0, - "step": 14408 - }, - { - "epoch": 1.1016686736624806, - "grad_norm": 0.008498414419591427, - "learning_rate": 0.00019999940190666582, - "loss": 46.0, - "step": 14409 - }, - { - "epoch": 1.1017451306458703, - "grad_norm": 0.0032741765026003122, - "learning_rate": 0.00019999940182358872, - "loss": 46.0, - "step": 14410 - }, - { - "epoch": 1.10182158762926, - "grad_norm": 0.0019324973691254854, - "learning_rate": 0.00019999940174050587, - "loss": 46.0, - "step": 14411 - }, - { - "epoch": 1.1018980446126498, - "grad_norm": 0.0022848667576909065, - "learning_rate": 0.00019999940165741725, - "loss": 46.0, - "step": 14412 - }, - { - "epoch": 1.1019745015960396, - "grad_norm": 0.0010458447504788637, - "learning_rate": 0.00019999940157432283, - "loss": 46.0, - "step": 14413 - }, - { - "epoch": 1.1020509585794294, - "grad_norm": 0.0016525558894500136, - "learning_rate": 0.00019999940149122267, - "loss": 46.0, - "step": 14414 - }, - { - "epoch": 1.102127415562819, - "grad_norm": 0.0020195390097796917, - "learning_rate": 0.00019999940140811675, - "loss": 46.0, - "step": 14415 - }, - { - "epoch": 1.1022038725462087, - "grad_norm": 0.003891491098329425, - "learning_rate": 0.00019999940132500504, - "loss": 46.0, - "step": 14416 - }, - { - "epoch": 1.1022803295295984, - "grad_norm": 0.0005561204743571579, - "learning_rate": 0.00019999940124188753, - "loss": 46.0, - "step": 14417 - }, - { - "epoch": 1.1023567865129882, - "grad_norm": 0.0009384403238072991, - "learning_rate": 0.0001999994011587643, - "loss": 46.0, - "step": 14418 - }, - { - "epoch": 1.102433243496378, - "grad_norm": 0.0015144681092351675, - "learning_rate": 0.0001999994010756353, - "loss": 46.0, - "step": 14419 - }, - { - "epoch": 1.1025097004797675, - "grad_norm": 0.0014589742058888078, - "learning_rate": 0.00019999940099250048, - "loss": 46.0, - "step": 14420 - }, - { - "epoch": 1.1025861574631572, - "grad_norm": 0.0035015870817005634, - "learning_rate": 0.00019999940090935996, - "loss": 46.0, - "step": 14421 - }, - { - "epoch": 1.102662614446547, - "grad_norm": 0.0008231739629991353, - "learning_rate": 0.00019999940082621366, - "loss": 46.0, - "step": 14422 - }, - { - "epoch": 1.1027390714299368, - "grad_norm": 0.0009577129385434091, - "learning_rate": 0.00019999940074306153, - "loss": 46.0, - "step": 14423 - }, - { - "epoch": 1.1028155284133265, - "grad_norm": 0.0007714586681686342, - "learning_rate": 0.00019999940065990369, - "loss": 46.0, - "step": 14424 - }, - { - "epoch": 1.1028919853967163, - "grad_norm": 0.0008904701680876315, - "learning_rate": 0.00019999940057674004, - "loss": 46.0, - "step": 14425 - }, - { - "epoch": 1.1029684423801058, - "grad_norm": 0.003224317217245698, - "learning_rate": 0.00019999940049357065, - "loss": 46.0, - "step": 14426 - }, - { - "epoch": 1.1030448993634956, - "grad_norm": 0.0016535185277462006, - "learning_rate": 0.00019999940041039545, - "loss": 46.0, - "step": 14427 - }, - { - "epoch": 1.1031213563468854, - "grad_norm": 0.0012804112629964948, - "learning_rate": 0.00019999940032721454, - "loss": 46.0, - "step": 14428 - }, - { - "epoch": 1.1031978133302751, - "grad_norm": 0.0022527556866407394, - "learning_rate": 0.00019999940024402783, - "loss": 46.0, - "step": 14429 - }, - { - "epoch": 1.1032742703136649, - "grad_norm": 0.002016973914578557, - "learning_rate": 0.00019999940016083537, - "loss": 46.0, - "step": 14430 - }, - { - "epoch": 1.1033507272970544, - "grad_norm": 0.000759428832679987, - "learning_rate": 0.0001999994000776371, - "loss": 46.0, - "step": 14431 - }, - { - "epoch": 1.1034271842804442, - "grad_norm": 0.0037978622131049633, - "learning_rate": 0.00019999939999443308, - "loss": 46.0, - "step": 14432 - }, - { - "epoch": 1.103503641263834, - "grad_norm": 0.0011195025872439146, - "learning_rate": 0.0001999993999112233, - "loss": 46.0, - "step": 14433 - }, - { - "epoch": 1.1035800982472237, - "grad_norm": 0.0005680260364897549, - "learning_rate": 0.00019999939982800775, - "loss": 46.0, - "step": 14434 - }, - { - "epoch": 1.1036565552306135, - "grad_norm": 0.001830149907618761, - "learning_rate": 0.00019999939974478642, - "loss": 46.0, - "step": 14435 - }, - { - "epoch": 1.103733012214003, - "grad_norm": 0.013707278296351433, - "learning_rate": 0.00019999939966155932, - "loss": 46.0, - "step": 14436 - }, - { - "epoch": 1.1038094691973928, - "grad_norm": 0.0010511912405490875, - "learning_rate": 0.00019999939957832648, - "loss": 46.0, - "step": 14437 - }, - { - "epoch": 1.1038859261807825, - "grad_norm": 0.005906635895371437, - "learning_rate": 0.0001999993994950878, - "loss": 46.0, - "step": 14438 - }, - { - "epoch": 1.1039623831641723, - "grad_norm": 0.003412651363760233, - "learning_rate": 0.00019999939941184344, - "loss": 46.0, - "step": 14439 - }, - { - "epoch": 1.104038840147562, - "grad_norm": 0.0021113960538059473, - "learning_rate": 0.00019999939932859327, - "loss": 46.0, - "step": 14440 - }, - { - "epoch": 1.1041152971309518, - "grad_norm": 0.0010071038268506527, - "learning_rate": 0.00019999939924533733, - "loss": 46.0, - "step": 14441 - }, - { - "epoch": 1.1041917541143413, - "grad_norm": 0.0021579128224402666, - "learning_rate": 0.00019999939916207562, - "loss": 46.0, - "step": 14442 - }, - { - "epoch": 1.104268211097731, - "grad_norm": 0.007896361872553825, - "learning_rate": 0.00019999939907880813, - "loss": 46.0, - "step": 14443 - }, - { - "epoch": 1.1043446680811209, - "grad_norm": 0.0011740727350115776, - "learning_rate": 0.00019999939899553487, - "loss": 46.0, - "step": 14444 - }, - { - "epoch": 1.1044211250645106, - "grad_norm": 0.0008192333043552935, - "learning_rate": 0.00019999939891225584, - "loss": 46.0, - "step": 14445 - }, - { - "epoch": 1.1044975820479004, - "grad_norm": 0.00061931146774441, - "learning_rate": 0.00019999939882897106, - "loss": 46.0, - "step": 14446 - }, - { - "epoch": 1.10457403903129, - "grad_norm": 0.002528889337554574, - "learning_rate": 0.0001999993987456805, - "loss": 46.0, - "step": 14447 - }, - { - "epoch": 1.1046504960146797, - "grad_norm": 0.0010466751409694552, - "learning_rate": 0.00019999939866238419, - "loss": 46.0, - "step": 14448 - }, - { - "epoch": 1.1047269529980694, - "grad_norm": 0.0010379026643931866, - "learning_rate": 0.0001999993985790821, - "loss": 46.0, - "step": 14449 - }, - { - "epoch": 1.1048034099814592, - "grad_norm": 0.001050668186508119, - "learning_rate": 0.00019999939849577421, - "loss": 46.0, - "step": 14450 - }, - { - "epoch": 1.104879866964849, - "grad_norm": 0.0033809831365942955, - "learning_rate": 0.00019999939841246057, - "loss": 46.0, - "step": 14451 - }, - { - "epoch": 1.1049563239482385, - "grad_norm": 0.000771119084674865, - "learning_rate": 0.00019999939832914118, - "loss": 46.0, - "step": 14452 - }, - { - "epoch": 1.1050327809316283, - "grad_norm": 0.003728782758116722, - "learning_rate": 0.00019999939824581598, - "loss": 46.0, - "step": 14453 - }, - { - "epoch": 1.105109237915018, - "grad_norm": 0.002532541286200285, - "learning_rate": 0.00019999939816248505, - "loss": 46.0, - "step": 14454 - }, - { - "epoch": 1.1051856948984078, - "grad_norm": 0.000899175473023206, - "learning_rate": 0.00019999939807914833, - "loss": 46.0, - "step": 14455 - }, - { - "epoch": 1.1052621518817975, - "grad_norm": 0.0028807383496314287, - "learning_rate": 0.00019999939799580588, - "loss": 46.0, - "step": 14456 - }, - { - "epoch": 1.1053386088651873, - "grad_norm": 0.00307116424664855, - "learning_rate": 0.00019999939791245762, - "loss": 46.0, - "step": 14457 - }, - { - "epoch": 1.1054150658485768, - "grad_norm": 0.0006544000934809446, - "learning_rate": 0.00019999939782910358, - "loss": 46.0, - "step": 14458 - }, - { - "epoch": 1.1054915228319666, - "grad_norm": 0.0010353238321840763, - "learning_rate": 0.00019999939774574378, - "loss": 46.0, - "step": 14459 - }, - { - "epoch": 1.1055679798153564, - "grad_norm": 0.0013588287401944399, - "learning_rate": 0.00019999939766237823, - "loss": 46.0, - "step": 14460 - }, - { - "epoch": 1.1056444367987461, - "grad_norm": 0.015461375936865807, - "learning_rate": 0.00019999939757900693, - "loss": 46.0, - "step": 14461 - }, - { - "epoch": 1.105720893782136, - "grad_norm": 0.020595377311110497, - "learning_rate": 0.0001999993974956298, - "loss": 46.0, - "step": 14462 - }, - { - "epoch": 1.1057973507655254, - "grad_norm": 0.0017215183470398188, - "learning_rate": 0.00019999939741224693, - "loss": 46.0, - "step": 14463 - }, - { - "epoch": 1.1058738077489152, - "grad_norm": 0.0008422309765592217, - "learning_rate": 0.0001999993973288583, - "loss": 46.0, - "step": 14464 - }, - { - "epoch": 1.105950264732305, - "grad_norm": 0.005359334871172905, - "learning_rate": 0.0001999993972454639, - "loss": 46.0, - "step": 14465 - }, - { - "epoch": 1.1060267217156947, - "grad_norm": 0.0011890229070559144, - "learning_rate": 0.00019999939716206373, - "loss": 46.0, - "step": 14466 - }, - { - "epoch": 1.1061031786990845, - "grad_norm": 0.003403395414352417, - "learning_rate": 0.0001999993970786578, - "loss": 46.0, - "step": 14467 - }, - { - "epoch": 1.1061796356824742, - "grad_norm": 0.004768850281834602, - "learning_rate": 0.00019999939699524605, - "loss": 46.0, - "step": 14468 - }, - { - "epoch": 1.1062560926658638, - "grad_norm": 0.0024453282821923494, - "learning_rate": 0.00019999939691182857, - "loss": 46.0, - "step": 14469 - }, - { - "epoch": 1.1063325496492535, - "grad_norm": 0.030039310455322266, - "learning_rate": 0.0001999993968284053, - "loss": 46.0, - "step": 14470 - }, - { - "epoch": 1.1064090066326433, - "grad_norm": 0.0004251937789376825, - "learning_rate": 0.0001999993967449763, - "loss": 46.0, - "step": 14471 - }, - { - "epoch": 1.106485463616033, - "grad_norm": 0.0009537142468616366, - "learning_rate": 0.00019999939666154147, - "loss": 46.0, - "step": 14472 - }, - { - "epoch": 1.1065619205994228, - "grad_norm": 0.0007189179304987192, - "learning_rate": 0.00019999939657810095, - "loss": 46.0, - "step": 14473 - }, - { - "epoch": 1.1066383775828124, - "grad_norm": 0.0009143691859208047, - "learning_rate": 0.0001999993964946546, - "loss": 46.0, - "step": 14474 - }, - { - "epoch": 1.1067148345662021, - "grad_norm": 0.0005709800170734525, - "learning_rate": 0.0001999993964112025, - "loss": 46.0, - "step": 14475 - }, - { - "epoch": 1.1067912915495919, - "grad_norm": 0.001992768608033657, - "learning_rate": 0.00019999939632774462, - "loss": 46.0, - "step": 14476 - }, - { - "epoch": 1.1068677485329816, - "grad_norm": 0.0008526010788045824, - "learning_rate": 0.00019999939624428098, - "loss": 46.0, - "step": 14477 - }, - { - "epoch": 1.1069442055163714, - "grad_norm": 0.000938041543122381, - "learning_rate": 0.0001999993961608116, - "loss": 46.0, - "step": 14478 - }, - { - "epoch": 1.1070206624997612, - "grad_norm": 0.0035874040331691504, - "learning_rate": 0.0001999993960773364, - "loss": 46.0, - "step": 14479 - }, - { - "epoch": 1.1070971194831507, - "grad_norm": 0.0009905361803248525, - "learning_rate": 0.00019999939599385543, - "loss": 46.0, - "step": 14480 - }, - { - "epoch": 1.1071735764665405, - "grad_norm": 0.0015274417819455266, - "learning_rate": 0.00019999939591036872, - "loss": 46.0, - "step": 14481 - }, - { - "epoch": 1.1072500334499302, - "grad_norm": 0.0014322145143523812, - "learning_rate": 0.00019999939582687623, - "loss": 46.0, - "step": 14482 - }, - { - "epoch": 1.10732649043332, - "grad_norm": 0.0005529415211640298, - "learning_rate": 0.00019999939574337798, - "loss": 46.0, - "step": 14483 - }, - { - "epoch": 1.1074029474167097, - "grad_norm": 0.000556205166503787, - "learning_rate": 0.00019999939565987394, - "loss": 46.0, - "step": 14484 - }, - { - "epoch": 1.1074794044000993, - "grad_norm": 0.00035779859172180295, - "learning_rate": 0.00019999939557636417, - "loss": 46.0, - "step": 14485 - }, - { - "epoch": 1.107555861383489, - "grad_norm": 0.0037278644740581512, - "learning_rate": 0.00019999939549284862, - "loss": 46.0, - "step": 14486 - }, - { - "epoch": 1.1076323183668788, - "grad_norm": 0.0003472397511359304, - "learning_rate": 0.00019999939540932726, - "loss": 46.0, - "step": 14487 - }, - { - "epoch": 1.1077087753502686, - "grad_norm": 0.0032691857777535915, - "learning_rate": 0.00019999939532580014, - "loss": 46.0, - "step": 14488 - }, - { - "epoch": 1.1077852323336583, - "grad_norm": 0.0006711469613946974, - "learning_rate": 0.00019999939524226727, - "loss": 46.0, - "step": 14489 - }, - { - "epoch": 1.107861689317048, - "grad_norm": 0.0008515675435774028, - "learning_rate": 0.00019999939515872862, - "loss": 46.0, - "step": 14490 - }, - { - "epoch": 1.1079381463004376, - "grad_norm": 0.0011870659654960036, - "learning_rate": 0.0001999993950751842, - "loss": 46.0, - "step": 14491 - }, - { - "epoch": 1.1080146032838274, - "grad_norm": 0.0009953827830031514, - "learning_rate": 0.000199999394991634, - "loss": 46.0, - "step": 14492 - }, - { - "epoch": 1.1080910602672172, - "grad_norm": 0.0007408856763504446, - "learning_rate": 0.00019999939490807807, - "loss": 46.0, - "step": 14493 - }, - { - "epoch": 1.108167517250607, - "grad_norm": 0.0003907881327904761, - "learning_rate": 0.00019999939482451636, - "loss": 46.0, - "step": 14494 - }, - { - "epoch": 1.1082439742339967, - "grad_norm": 0.005370740778744221, - "learning_rate": 0.00019999939474094885, - "loss": 46.0, - "step": 14495 - }, - { - "epoch": 1.1083204312173862, - "grad_norm": 0.003085968317463994, - "learning_rate": 0.00019999939465737562, - "loss": 46.0, - "step": 14496 - }, - { - "epoch": 1.108396888200776, - "grad_norm": 0.0015167782548815012, - "learning_rate": 0.0001999993945737966, - "loss": 46.0, - "step": 14497 - }, - { - "epoch": 1.1084733451841657, - "grad_norm": 0.0006963883060961962, - "learning_rate": 0.00019999939449021179, - "loss": 46.0, - "step": 14498 - }, - { - "epoch": 1.1085498021675555, - "grad_norm": 0.0033322281669825315, - "learning_rate": 0.00019999939440662123, - "loss": 46.0, - "step": 14499 - }, - { - "epoch": 1.1086262591509453, - "grad_norm": 0.0028548554982990026, - "learning_rate": 0.0001999993943230249, - "loss": 46.0, - "step": 14500 - }, - { - "epoch": 1.108702716134335, - "grad_norm": 0.00474637420848012, - "learning_rate": 0.00019999939423942279, - "loss": 46.0, - "step": 14501 - }, - { - "epoch": 1.1087791731177246, - "grad_norm": 0.0037463712505996227, - "learning_rate": 0.00019999939415581491, - "loss": 46.0, - "step": 14502 - }, - { - "epoch": 1.1088556301011143, - "grad_norm": 0.00046203105011954904, - "learning_rate": 0.00019999939407220127, - "loss": 46.0, - "step": 14503 - }, - { - "epoch": 1.108932087084504, - "grad_norm": 0.0007735805120319128, - "learning_rate": 0.00019999939398858185, - "loss": 46.0, - "step": 14504 - }, - { - "epoch": 1.1090085440678938, - "grad_norm": 0.0018727260176092386, - "learning_rate": 0.00019999939390495666, - "loss": 46.0, - "step": 14505 - }, - { - "epoch": 1.1090850010512836, - "grad_norm": 0.0008407104178331792, - "learning_rate": 0.0001999993938213257, - "loss": 46.0, - "step": 14506 - }, - { - "epoch": 1.1091614580346731, - "grad_norm": 0.0007268865592777729, - "learning_rate": 0.00019999939373768899, - "loss": 46.0, - "step": 14507 - }, - { - "epoch": 1.109237915018063, - "grad_norm": 0.0015527295181527734, - "learning_rate": 0.00019999939365404647, - "loss": 46.0, - "step": 14508 - }, - { - "epoch": 1.1093143720014527, - "grad_norm": 0.00415242463350296, - "learning_rate": 0.00019999939357039824, - "loss": 46.0, - "step": 14509 - }, - { - "epoch": 1.1093908289848424, - "grad_norm": 0.0024282834492623806, - "learning_rate": 0.00019999939348674419, - "loss": 46.0, - "step": 14510 - }, - { - "epoch": 1.1094672859682322, - "grad_norm": 0.0038267658092081547, - "learning_rate": 0.00019999939340308438, - "loss": 46.0, - "step": 14511 - }, - { - "epoch": 1.109543742951622, - "grad_norm": 0.0013181344838812947, - "learning_rate": 0.0001999993933194188, - "loss": 46.0, - "step": 14512 - }, - { - "epoch": 1.1096201999350115, - "grad_norm": 0.0017209653742611408, - "learning_rate": 0.00019999939323574748, - "loss": 46.0, - "step": 14513 - }, - { - "epoch": 1.1096966569184012, - "grad_norm": 0.0015143873170018196, - "learning_rate": 0.00019999939315207038, - "loss": 46.0, - "step": 14514 - }, - { - "epoch": 1.109773113901791, - "grad_norm": 0.0033869887702167034, - "learning_rate": 0.00019999939306838748, - "loss": 46.0, - "step": 14515 - }, - { - "epoch": 1.1098495708851808, - "grad_norm": 0.07970468699932098, - "learning_rate": 0.00019999939298469884, - "loss": 46.0, - "step": 14516 - }, - { - "epoch": 1.1099260278685705, - "grad_norm": 0.0011768367839977145, - "learning_rate": 0.00019999939290100445, - "loss": 46.0, - "step": 14517 - }, - { - "epoch": 1.11000248485196, - "grad_norm": 0.0009448364726267755, - "learning_rate": 0.00019999939281730423, - "loss": 46.0, - "step": 14518 - }, - { - "epoch": 1.1100789418353498, - "grad_norm": 0.0035344057250767946, - "learning_rate": 0.0001999993927335983, - "loss": 46.0, - "step": 14519 - }, - { - "epoch": 1.1101553988187396, - "grad_norm": 0.0007960295770317316, - "learning_rate": 0.00019999939264988656, - "loss": 46.0, - "step": 14520 - }, - { - "epoch": 1.1102318558021294, - "grad_norm": 0.0033600088208913803, - "learning_rate": 0.00019999939256616907, - "loss": 46.0, - "step": 14521 - }, - { - "epoch": 1.1103083127855191, - "grad_norm": 0.0007676472305320203, - "learning_rate": 0.0001999993924824458, - "loss": 46.0, - "step": 14522 - }, - { - "epoch": 1.1103847697689089, - "grad_norm": 0.0005052352789789438, - "learning_rate": 0.00019999939239871676, - "loss": 46.0, - "step": 14523 - }, - { - "epoch": 1.1104612267522984, - "grad_norm": 0.0006214513559825718, - "learning_rate": 0.00019999939231498198, - "loss": 46.0, - "step": 14524 - }, - { - "epoch": 1.1105376837356882, - "grad_norm": 0.006707735825330019, - "learning_rate": 0.0001999993922312414, - "loss": 46.0, - "step": 14525 - }, - { - "epoch": 1.110614140719078, - "grad_norm": 0.002007142873480916, - "learning_rate": 0.00019999939214749505, - "loss": 46.0, - "step": 14526 - }, - { - "epoch": 1.1106905977024677, - "grad_norm": 0.002970101311802864, - "learning_rate": 0.00019999939206374293, - "loss": 46.0, - "step": 14527 - }, - { - "epoch": 1.1107670546858575, - "grad_norm": 0.004510781727731228, - "learning_rate": 0.00019999939197998506, - "loss": 46.0, - "step": 14528 - }, - { - "epoch": 1.110843511669247, - "grad_norm": 0.002100117038935423, - "learning_rate": 0.00019999939189622141, - "loss": 46.0, - "step": 14529 - }, - { - "epoch": 1.1109199686526368, - "grad_norm": 0.0030245014932006598, - "learning_rate": 0.000199999391812452, - "loss": 46.0, - "step": 14530 - }, - { - "epoch": 1.1109964256360265, - "grad_norm": 0.0015051182126626372, - "learning_rate": 0.00019999939172867678, - "loss": 46.0, - "step": 14531 - }, - { - "epoch": 1.1110728826194163, - "grad_norm": 0.0005448606098070741, - "learning_rate": 0.00019999939164489584, - "loss": 46.0, - "step": 14532 - }, - { - "epoch": 1.111149339602806, - "grad_norm": 0.0005574658280238509, - "learning_rate": 0.0001999993915611091, - "loss": 46.0, - "step": 14533 - }, - { - "epoch": 1.1112257965861958, - "grad_norm": 0.000571928801946342, - "learning_rate": 0.00019999939147731662, - "loss": 46.0, - "step": 14534 - }, - { - "epoch": 1.1113022535695853, - "grad_norm": 0.002085441257804632, - "learning_rate": 0.00019999939139351836, - "loss": 46.0, - "step": 14535 - }, - { - "epoch": 1.111378710552975, - "grad_norm": 0.0004395263676997274, - "learning_rate": 0.00019999939130971433, - "loss": 46.0, - "step": 14536 - }, - { - "epoch": 1.1114551675363649, - "grad_norm": 0.0010859448229894042, - "learning_rate": 0.00019999939122590453, - "loss": 46.0, - "step": 14537 - }, - { - "epoch": 1.1115316245197546, - "grad_norm": 0.0020721242763102055, - "learning_rate": 0.00019999939114208895, - "loss": 46.0, - "step": 14538 - }, - { - "epoch": 1.1116080815031444, - "grad_norm": 0.002610336057841778, - "learning_rate": 0.0001999993910582676, - "loss": 46.0, - "step": 14539 - }, - { - "epoch": 1.111684538486534, - "grad_norm": 0.0015839739935472608, - "learning_rate": 0.00019999939097444048, - "loss": 46.0, - "step": 14540 - }, - { - "epoch": 1.1117609954699237, - "grad_norm": 0.00048155890544876456, - "learning_rate": 0.00019999939089060758, - "loss": 46.0, - "step": 14541 - }, - { - "epoch": 1.1118374524533134, - "grad_norm": 0.001979676540941, - "learning_rate": 0.00019999939080676894, - "loss": 46.0, - "step": 14542 - }, - { - "epoch": 1.1119139094367032, - "grad_norm": 0.0007771134260110557, - "learning_rate": 0.0001999993907229245, - "loss": 46.0, - "step": 14543 - }, - { - "epoch": 1.111990366420093, - "grad_norm": 0.00046945270150899887, - "learning_rate": 0.00019999939063907433, - "loss": 46.0, - "step": 14544 - }, - { - "epoch": 1.1120668234034827, - "grad_norm": 0.0010695139644667506, - "learning_rate": 0.00019999939055521834, - "loss": 46.0, - "step": 14545 - }, - { - "epoch": 1.1121432803868723, - "grad_norm": 0.002555902348831296, - "learning_rate": 0.00019999939047135663, - "loss": 46.0, - "step": 14546 - }, - { - "epoch": 1.112219737370262, - "grad_norm": 0.0034649772569537163, - "learning_rate": 0.00019999939038748915, - "loss": 46.0, - "step": 14547 - }, - { - "epoch": 1.1122961943536518, - "grad_norm": 0.0009688063291832805, - "learning_rate": 0.00019999939030361586, - "loss": 46.0, - "step": 14548 - }, - { - "epoch": 1.1123726513370416, - "grad_norm": 0.0011843398679047823, - "learning_rate": 0.0001999993902197368, - "loss": 46.0, - "step": 14549 - }, - { - "epoch": 1.1124491083204313, - "grad_norm": 0.0027196116279810667, - "learning_rate": 0.00019999939013585203, - "loss": 46.0, - "step": 14550 - }, - { - "epoch": 1.1125255653038209, - "grad_norm": 0.004095969721674919, - "learning_rate": 0.00019999939005196145, - "loss": 46.0, - "step": 14551 - }, - { - "epoch": 1.1126020222872106, - "grad_norm": 0.0020090772304683924, - "learning_rate": 0.00019999938996806508, - "loss": 46.0, - "step": 14552 - }, - { - "epoch": 1.1126784792706004, - "grad_norm": 0.002264356706291437, - "learning_rate": 0.00019999938988416295, - "loss": 46.0, - "step": 14553 - }, - { - "epoch": 1.1127549362539901, - "grad_norm": 0.0006671007722616196, - "learning_rate": 0.0001999993898002551, - "loss": 46.0, - "step": 14554 - }, - { - "epoch": 1.11283139323738, - "grad_norm": 0.0007893171277828515, - "learning_rate": 0.0001999993897163414, - "loss": 46.0, - "step": 14555 - }, - { - "epoch": 1.1129078502207697, - "grad_norm": 0.0007784123881720006, - "learning_rate": 0.00019999938963242202, - "loss": 46.0, - "step": 14556 - }, - { - "epoch": 1.1129843072041592, - "grad_norm": 0.0009553050622344017, - "learning_rate": 0.0001999993895484968, - "loss": 46.0, - "step": 14557 - }, - { - "epoch": 1.113060764187549, - "grad_norm": 0.001965997740626335, - "learning_rate": 0.00019999938946456584, - "loss": 46.0, - "step": 14558 - }, - { - "epoch": 1.1131372211709387, - "grad_norm": 0.0005890214815735817, - "learning_rate": 0.00019999938938062913, - "loss": 46.0, - "step": 14559 - }, - { - "epoch": 1.1132136781543285, - "grad_norm": 0.007633336819708347, - "learning_rate": 0.00019999938929668662, - "loss": 46.0, - "step": 14560 - }, - { - "epoch": 1.1132901351377182, - "grad_norm": 0.0012276744237169623, - "learning_rate": 0.00019999938921273834, - "loss": 46.0, - "step": 14561 - }, - { - "epoch": 1.1133665921211078, - "grad_norm": 0.0003334197390358895, - "learning_rate": 0.0001999993891287843, - "loss": 46.0, - "step": 14562 - }, - { - "epoch": 1.1134430491044975, - "grad_norm": 0.0008329752599820495, - "learning_rate": 0.00019999938904482448, - "loss": 46.0, - "step": 14563 - }, - { - "epoch": 1.1135195060878873, - "grad_norm": 0.002505991840735078, - "learning_rate": 0.0001999993889608589, - "loss": 46.0, - "step": 14564 - }, - { - "epoch": 1.113595963071277, - "grad_norm": 0.004677955061197281, - "learning_rate": 0.00019999938887688755, - "loss": 46.0, - "step": 14565 - }, - { - "epoch": 1.1136724200546668, - "grad_norm": 0.0009772832272574306, - "learning_rate": 0.00019999938879291046, - "loss": 46.0, - "step": 14566 - }, - { - "epoch": 1.1137488770380564, - "grad_norm": 0.00270912260748446, - "learning_rate": 0.00019999938870892753, - "loss": 46.0, - "step": 14567 - }, - { - "epoch": 1.1138253340214461, - "grad_norm": 0.002612000796943903, - "learning_rate": 0.0001999993886249389, - "loss": 46.0, - "step": 14568 - }, - { - "epoch": 1.1139017910048359, - "grad_norm": 0.0009519907762296498, - "learning_rate": 0.00019999938854094447, - "loss": 46.0, - "step": 14569 - }, - { - "epoch": 1.1139782479882256, - "grad_norm": 0.0007542958483099937, - "learning_rate": 0.00019999938845694426, - "loss": 46.0, - "step": 14570 - }, - { - "epoch": 1.1140547049716154, - "grad_norm": 0.0011578942649066448, - "learning_rate": 0.0001999993883729383, - "loss": 46.0, - "step": 14571 - }, - { - "epoch": 1.1141311619550052, - "grad_norm": 0.002856897423043847, - "learning_rate": 0.00019999938828892656, - "loss": 46.0, - "step": 14572 - }, - { - "epoch": 1.1142076189383947, - "grad_norm": 0.000751401181332767, - "learning_rate": 0.00019999938820490908, - "loss": 46.0, - "step": 14573 - }, - { - "epoch": 1.1142840759217845, - "grad_norm": 0.000736706075258553, - "learning_rate": 0.0001999993881208858, - "loss": 46.0, - "step": 14574 - }, - { - "epoch": 1.1143605329051742, - "grad_norm": 0.006852846127003431, - "learning_rate": 0.00019999938803685674, - "loss": 46.0, - "step": 14575 - }, - { - "epoch": 1.114436989888564, - "grad_norm": 0.0009242200758308172, - "learning_rate": 0.0001999993879528219, - "loss": 46.0, - "step": 14576 - }, - { - "epoch": 1.1145134468719538, - "grad_norm": 0.0026618086267262697, - "learning_rate": 0.00019999938786878136, - "loss": 46.0, - "step": 14577 - }, - { - "epoch": 1.1145899038553433, - "grad_norm": 0.001850903034210205, - "learning_rate": 0.00019999938778473498, - "loss": 46.0, - "step": 14578 - }, - { - "epoch": 1.114666360838733, - "grad_norm": 0.0036684207152575254, - "learning_rate": 0.00019999938770068289, - "loss": 46.0, - "step": 14579 - }, - { - "epoch": 1.1147428178221228, - "grad_norm": 0.0013093024026602507, - "learning_rate": 0.000199999387616625, - "loss": 46.0, - "step": 14580 - }, - { - "epoch": 1.1148192748055126, - "grad_norm": 0.001117786392569542, - "learning_rate": 0.00019999938753256135, - "loss": 46.0, - "step": 14581 - }, - { - "epoch": 1.1148957317889023, - "grad_norm": 0.0009027719497680664, - "learning_rate": 0.0001999993874484919, - "loss": 46.0, - "step": 14582 - }, - { - "epoch": 1.1149721887722919, - "grad_norm": 0.0017418510979041457, - "learning_rate": 0.0001999993873644167, - "loss": 46.0, - "step": 14583 - }, - { - "epoch": 1.1150486457556816, - "grad_norm": 0.003381724003702402, - "learning_rate": 0.00019999938728033572, - "loss": 46.0, - "step": 14584 - }, - { - "epoch": 1.1151251027390714, - "grad_norm": 0.004773173946887255, - "learning_rate": 0.000199999387196249, - "loss": 46.0, - "step": 14585 - }, - { - "epoch": 1.1152015597224612, - "grad_norm": 0.0015445406315848231, - "learning_rate": 0.00019999938711215648, - "loss": 46.0, - "step": 14586 - }, - { - "epoch": 1.115278016705851, - "grad_norm": 0.0010909107513725758, - "learning_rate": 0.0001999993870280582, - "loss": 46.0, - "step": 14587 - }, - { - "epoch": 1.1153544736892407, - "grad_norm": 0.0048500304110348225, - "learning_rate": 0.00019999938694395417, - "loss": 46.0, - "step": 14588 - }, - { - "epoch": 1.1154309306726302, - "grad_norm": 0.0009982322808355093, - "learning_rate": 0.00019999938685984434, - "loss": 46.0, - "step": 14589 - }, - { - "epoch": 1.11550738765602, - "grad_norm": 0.0021196540910750628, - "learning_rate": 0.00019999938677572876, - "loss": 46.0, - "step": 14590 - }, - { - "epoch": 1.1155838446394097, - "grad_norm": 0.002493401989340782, - "learning_rate": 0.0001999993866916074, - "loss": 46.0, - "step": 14591 - }, - { - "epoch": 1.1156603016227995, - "grad_norm": 0.0035793951246887445, - "learning_rate": 0.0001999993866074803, - "loss": 46.0, - "step": 14592 - }, - { - "epoch": 1.1157367586061893, - "grad_norm": 0.001330891507677734, - "learning_rate": 0.00019999938652334737, - "loss": 46.0, - "step": 14593 - }, - { - "epoch": 1.1158132155895788, - "grad_norm": 0.004177140071988106, - "learning_rate": 0.00019999938643920872, - "loss": 46.0, - "step": 14594 - }, - { - "epoch": 1.1158896725729686, - "grad_norm": 0.0014069881290197372, - "learning_rate": 0.0001999993863550643, - "loss": 46.0, - "step": 14595 - }, - { - "epoch": 1.1159661295563583, - "grad_norm": 0.0009850873611867428, - "learning_rate": 0.0001999993862709141, - "loss": 46.0, - "step": 14596 - }, - { - "epoch": 1.116042586539748, - "grad_norm": 0.0017023030668497086, - "learning_rate": 0.00019999938618675813, - "loss": 46.0, - "step": 14597 - }, - { - "epoch": 1.1161190435231378, - "grad_norm": 0.0029525081627070904, - "learning_rate": 0.0001999993861025964, - "loss": 46.0, - "step": 14598 - }, - { - "epoch": 1.1161955005065276, - "grad_norm": 0.0005272942944429815, - "learning_rate": 0.00019999938601842886, - "loss": 46.0, - "step": 14599 - }, - { - "epoch": 1.1162719574899171, - "grad_norm": 0.0010052170837298036, - "learning_rate": 0.00019999938593425558, - "loss": 46.0, - "step": 14600 - }, - { - "epoch": 1.116348414473307, - "grad_norm": 0.001506473054178059, - "learning_rate": 0.00019999938585007655, - "loss": 46.0, - "step": 14601 - }, - { - "epoch": 1.1164248714566967, - "grad_norm": 0.001979702850803733, - "learning_rate": 0.00019999938576589172, - "loss": 46.0, - "step": 14602 - }, - { - "epoch": 1.1165013284400864, - "grad_norm": 0.0018389724427834153, - "learning_rate": 0.00019999938568170114, - "loss": 46.0, - "step": 14603 - }, - { - "epoch": 1.1165777854234762, - "grad_norm": 0.0006304451962932944, - "learning_rate": 0.00019999938559750477, - "loss": 46.0, - "step": 14604 - }, - { - "epoch": 1.1166542424068657, - "grad_norm": 0.0008731254492886364, - "learning_rate": 0.00019999938551330267, - "loss": 46.0, - "step": 14605 - }, - { - "epoch": 1.1167306993902555, - "grad_norm": 0.0010184376733377576, - "learning_rate": 0.00019999938542909475, - "loss": 46.0, - "step": 14606 - }, - { - "epoch": 1.1168071563736452, - "grad_norm": 0.0070178876630961895, - "learning_rate": 0.00019999938534488108, - "loss": 46.0, - "step": 14607 - }, - { - "epoch": 1.116883613357035, - "grad_norm": 0.0007276375545188785, - "learning_rate": 0.00019999938526066164, - "loss": 46.0, - "step": 14608 - }, - { - "epoch": 1.1169600703404248, - "grad_norm": 0.0014580200659111142, - "learning_rate": 0.00019999938517643645, - "loss": 46.0, - "step": 14609 - }, - { - "epoch": 1.1170365273238145, - "grad_norm": 0.0038676836993545294, - "learning_rate": 0.00019999938509220549, - "loss": 46.0, - "step": 14610 - }, - { - "epoch": 1.117112984307204, - "grad_norm": 0.019074708223342896, - "learning_rate": 0.00019999938500796872, - "loss": 46.0, - "step": 14611 - }, - { - "epoch": 1.1171894412905938, - "grad_norm": 0.001472771167755127, - "learning_rate": 0.00019999938492372624, - "loss": 46.0, - "step": 14612 - }, - { - "epoch": 1.1172658982739836, - "grad_norm": 0.0005961598362773657, - "learning_rate": 0.00019999938483947793, - "loss": 46.0, - "step": 14613 - }, - { - "epoch": 1.1173423552573734, - "grad_norm": 0.003704892238602042, - "learning_rate": 0.00019999938475522388, - "loss": 46.0, - "step": 14614 - }, - { - "epoch": 1.1174188122407631, - "grad_norm": 0.001822069170884788, - "learning_rate": 0.00019999938467096408, - "loss": 46.0, - "step": 14615 - }, - { - "epoch": 1.1174952692241527, - "grad_norm": 0.0007231037016026676, - "learning_rate": 0.0001999993845866985, - "loss": 46.0, - "step": 14616 - }, - { - "epoch": 1.1175717262075424, - "grad_norm": 0.0031787478365004063, - "learning_rate": 0.00019999938450242712, - "loss": 46.0, - "step": 14617 - }, - { - "epoch": 1.1176481831909322, - "grad_norm": 0.0012274907203391194, - "learning_rate": 0.00019999938441814998, - "loss": 46.0, - "step": 14618 - }, - { - "epoch": 1.117724640174322, - "grad_norm": 0.0009310300811193883, - "learning_rate": 0.00019999938433386708, - "loss": 46.0, - "step": 14619 - }, - { - "epoch": 1.1178010971577117, - "grad_norm": 0.020797275006771088, - "learning_rate": 0.00019999938424957844, - "loss": 46.0, - "step": 14620 - }, - { - "epoch": 1.1178775541411015, - "grad_norm": 0.005265621468424797, - "learning_rate": 0.000199999384165284, - "loss": 46.0, - "step": 14621 - }, - { - "epoch": 1.117954011124491, - "grad_norm": 0.0006121473270468414, - "learning_rate": 0.00019999938408098378, - "loss": 46.0, - "step": 14622 - }, - { - "epoch": 1.1180304681078808, - "grad_norm": 0.0018558199517428875, - "learning_rate": 0.0001999993839966778, - "loss": 46.0, - "step": 14623 - }, - { - "epoch": 1.1181069250912705, - "grad_norm": 0.0022252004127949476, - "learning_rate": 0.00019999938391236606, - "loss": 46.0, - "step": 14624 - }, - { - "epoch": 1.1181833820746603, - "grad_norm": 0.0009050408843904734, - "learning_rate": 0.00019999938382804855, - "loss": 46.0, - "step": 14625 - }, - { - "epoch": 1.11825983905805, - "grad_norm": 0.0010619487147778273, - "learning_rate": 0.00019999938374372527, - "loss": 46.0, - "step": 14626 - }, - { - "epoch": 1.1183362960414396, - "grad_norm": 0.0008505067089572549, - "learning_rate": 0.0001999993836593962, - "loss": 46.0, - "step": 14627 - }, - { - "epoch": 1.1184127530248293, - "grad_norm": 0.007907591760158539, - "learning_rate": 0.00019999938357506138, - "loss": 46.0, - "step": 14628 - }, - { - "epoch": 1.118489210008219, - "grad_norm": 0.0020460919477045536, - "learning_rate": 0.00019999938349072078, - "loss": 46.0, - "step": 14629 - }, - { - "epoch": 1.1185656669916089, - "grad_norm": 0.0015377800446003675, - "learning_rate": 0.00019999938340637443, - "loss": 46.0, - "step": 14630 - }, - { - "epoch": 1.1186421239749986, - "grad_norm": 0.0008151123765856028, - "learning_rate": 0.0001999993833220223, - "loss": 46.0, - "step": 14631 - }, - { - "epoch": 1.1187185809583884, - "grad_norm": 0.001854747999459505, - "learning_rate": 0.00019999938323766442, - "loss": 46.0, - "step": 14632 - }, - { - "epoch": 1.118795037941778, - "grad_norm": 0.0012854618253186345, - "learning_rate": 0.00019999938315330075, - "loss": 46.0, - "step": 14633 - }, - { - "epoch": 1.1188714949251677, - "grad_norm": 0.000622910913079977, - "learning_rate": 0.00019999938306893128, - "loss": 46.0, - "step": 14634 - }, - { - "epoch": 1.1189479519085574, - "grad_norm": 0.0028844457119703293, - "learning_rate": 0.00019999938298455606, - "loss": 46.0, - "step": 14635 - }, - { - "epoch": 1.1190244088919472, - "grad_norm": 0.004551828373223543, - "learning_rate": 0.0001999993829001751, - "loss": 46.0, - "step": 14636 - }, - { - "epoch": 1.119100865875337, - "grad_norm": 0.0008320729248225689, - "learning_rate": 0.00019999938281578837, - "loss": 46.0, - "step": 14637 - }, - { - "epoch": 1.1191773228587265, - "grad_norm": 0.00041060420335270464, - "learning_rate": 0.00019999938273139583, - "loss": 46.0, - "step": 14638 - }, - { - "epoch": 1.1192537798421163, - "grad_norm": 0.002745057689025998, - "learning_rate": 0.00019999938264699755, - "loss": 46.0, - "step": 14639 - }, - { - "epoch": 1.119330236825506, - "grad_norm": 0.0022486604284495115, - "learning_rate": 0.0001999993825625935, - "loss": 46.0, - "step": 14640 - }, - { - "epoch": 1.1194066938088958, - "grad_norm": 0.0021962022874504328, - "learning_rate": 0.00019999938247818367, - "loss": 46.0, - "step": 14641 - }, - { - "epoch": 1.1194831507922856, - "grad_norm": 0.0005597159615717828, - "learning_rate": 0.00019999938239376807, - "loss": 46.0, - "step": 14642 - }, - { - "epoch": 1.1195596077756753, - "grad_norm": 0.0004508441488724202, - "learning_rate": 0.00019999938230934672, - "loss": 46.0, - "step": 14643 - }, - { - "epoch": 1.1196360647590649, - "grad_norm": 0.0014569774502888322, - "learning_rate": 0.0001999993822249196, - "loss": 46.0, - "step": 14644 - }, - { - "epoch": 1.1197125217424546, - "grad_norm": 0.008597142994403839, - "learning_rate": 0.00019999938214048667, - "loss": 46.0, - "step": 14645 - }, - { - "epoch": 1.1197889787258444, - "grad_norm": 0.004099604208022356, - "learning_rate": 0.000199999382056048, - "loss": 46.0, - "step": 14646 - }, - { - "epoch": 1.1198654357092341, - "grad_norm": 0.0025996100157499313, - "learning_rate": 0.00019999938197160354, - "loss": 46.0, - "step": 14647 - }, - { - "epoch": 1.119941892692624, - "grad_norm": 0.0016507940599694848, - "learning_rate": 0.00019999938188715335, - "loss": 46.0, - "step": 14648 - }, - { - "epoch": 1.1200183496760134, - "grad_norm": 0.0009958160808309913, - "learning_rate": 0.00019999938180269736, - "loss": 46.0, - "step": 14649 - }, - { - "epoch": 1.1200948066594032, - "grad_norm": 0.0006161693017929792, - "learning_rate": 0.0001999993817182356, - "loss": 46.0, - "step": 14650 - }, - { - "epoch": 1.120171263642793, - "grad_norm": 0.0014730727998539805, - "learning_rate": 0.0001999993816337681, - "loss": 46.0, - "step": 14651 - }, - { - "epoch": 1.1202477206261827, - "grad_norm": 0.0004374423297122121, - "learning_rate": 0.0001999993815492948, - "loss": 46.0, - "step": 14652 - }, - { - "epoch": 1.1203241776095725, - "grad_norm": 0.001653553918004036, - "learning_rate": 0.00019999938146481573, - "loss": 46.0, - "step": 14653 - }, - { - "epoch": 1.1204006345929622, - "grad_norm": 0.0005684678326360881, - "learning_rate": 0.00019999938138033093, - "loss": 46.0, - "step": 14654 - }, - { - "epoch": 1.1204770915763518, - "grad_norm": 0.004800945520401001, - "learning_rate": 0.00019999938129584033, - "loss": 46.0, - "step": 14655 - }, - { - "epoch": 1.1205535485597415, - "grad_norm": 0.0007770388037897646, - "learning_rate": 0.00019999938121134395, - "loss": 46.0, - "step": 14656 - }, - { - "epoch": 1.1206300055431313, - "grad_norm": 0.0005487881717272103, - "learning_rate": 0.00019999938112684183, - "loss": 46.0, - "step": 14657 - }, - { - "epoch": 1.120706462526521, - "grad_norm": 0.0012454709503799677, - "learning_rate": 0.0001999993810423339, - "loss": 46.0, - "step": 14658 - }, - { - "epoch": 1.1207829195099108, - "grad_norm": 0.0006934825796633959, - "learning_rate": 0.00019999938095782022, - "loss": 46.0, - "step": 14659 - }, - { - "epoch": 1.1208593764933004, - "grad_norm": 0.0016738050617277622, - "learning_rate": 0.0001999993808733008, - "loss": 46.0, - "step": 14660 - }, - { - "epoch": 1.1209358334766901, - "grad_norm": 0.0009980673203244805, - "learning_rate": 0.0001999993807887756, - "loss": 46.0, - "step": 14661 - }, - { - "epoch": 1.1210122904600799, - "grad_norm": 0.0011886612046509981, - "learning_rate": 0.0001999993807042446, - "loss": 46.0, - "step": 14662 - }, - { - "epoch": 1.1210887474434696, - "grad_norm": 0.0004915367462672293, - "learning_rate": 0.00019999938061970784, - "loss": 46.0, - "step": 14663 - }, - { - "epoch": 1.1211652044268594, - "grad_norm": 0.0027914049569517374, - "learning_rate": 0.00019999938053516533, - "loss": 46.0, - "step": 14664 - }, - { - "epoch": 1.1212416614102492, - "grad_norm": 0.0018157025333493948, - "learning_rate": 0.00019999938045061705, - "loss": 46.0, - "step": 14665 - }, - { - "epoch": 1.1213181183936387, - "grad_norm": 0.001068687648512423, - "learning_rate": 0.00019999938036606297, - "loss": 46.0, - "step": 14666 - }, - { - "epoch": 1.1213945753770285, - "grad_norm": 0.0029094095807522535, - "learning_rate": 0.00019999938028150314, - "loss": 46.0, - "step": 14667 - }, - { - "epoch": 1.1214710323604182, - "grad_norm": 0.003869979875162244, - "learning_rate": 0.00019999938019693754, - "loss": 46.0, - "step": 14668 - }, - { - "epoch": 1.121547489343808, - "grad_norm": 0.0025615976192057133, - "learning_rate": 0.00019999938011236616, - "loss": 46.0, - "step": 14669 - }, - { - "epoch": 1.1216239463271978, - "grad_norm": 0.0006732771289534867, - "learning_rate": 0.00019999938002778904, - "loss": 46.0, - "step": 14670 - }, - { - "epoch": 1.1217004033105873, - "grad_norm": 0.0014956679660826921, - "learning_rate": 0.00019999937994320612, - "loss": 46.0, - "step": 14671 - }, - { - "epoch": 1.121776860293977, - "grad_norm": 0.005253876093775034, - "learning_rate": 0.00019999937985861746, - "loss": 46.0, - "step": 14672 - }, - { - "epoch": 1.1218533172773668, - "grad_norm": 0.003075046231970191, - "learning_rate": 0.000199999379774023, - "loss": 46.0, - "step": 14673 - }, - { - "epoch": 1.1219297742607566, - "grad_norm": 0.0010864182841032743, - "learning_rate": 0.0001999993796894228, - "loss": 46.0, - "step": 14674 - }, - { - "epoch": 1.1220062312441463, - "grad_norm": 0.00045925957965664566, - "learning_rate": 0.0001999993796048168, - "loss": 46.0, - "step": 14675 - }, - { - "epoch": 1.122082688227536, - "grad_norm": 0.004406673833727837, - "learning_rate": 0.00019999937952020505, - "loss": 46.0, - "step": 14676 - }, - { - "epoch": 1.1221591452109256, - "grad_norm": 0.0021077890414744616, - "learning_rate": 0.00019999937943558752, - "loss": 46.0, - "step": 14677 - }, - { - "epoch": 1.1222356021943154, - "grad_norm": 0.0007649871986359358, - "learning_rate": 0.00019999937935096424, - "loss": 46.0, - "step": 14678 - }, - { - "epoch": 1.1223120591777052, - "grad_norm": 0.004702277481555939, - "learning_rate": 0.00019999937926633516, - "loss": 46.0, - "step": 14679 - }, - { - "epoch": 1.122388516161095, - "grad_norm": 0.0013283280422911048, - "learning_rate": 0.00019999937918170033, - "loss": 46.0, - "step": 14680 - }, - { - "epoch": 1.1224649731444847, - "grad_norm": 0.0005173981771804392, - "learning_rate": 0.00019999937909705973, - "loss": 46.0, - "step": 14681 - }, - { - "epoch": 1.1225414301278742, - "grad_norm": 0.005242025945335627, - "learning_rate": 0.00019999937901241338, - "loss": 46.0, - "step": 14682 - }, - { - "epoch": 1.122617887111264, - "grad_norm": 0.0023013725876808167, - "learning_rate": 0.0001999993789277612, - "loss": 46.0, - "step": 14683 - }, - { - "epoch": 1.1226943440946537, - "grad_norm": 0.00021133584959898144, - "learning_rate": 0.00019999937884310328, - "loss": 46.0, - "step": 14684 - }, - { - "epoch": 1.1227708010780435, - "grad_norm": 0.03928922489285469, - "learning_rate": 0.00019999937875843964, - "loss": 46.0, - "step": 14685 - }, - { - "epoch": 1.1228472580614333, - "grad_norm": 0.0007286184700205922, - "learning_rate": 0.00019999937867377018, - "loss": 46.0, - "step": 14686 - }, - { - "epoch": 1.122923715044823, - "grad_norm": 0.0019904032815247774, - "learning_rate": 0.00019999937858909496, - "loss": 46.0, - "step": 14687 - }, - { - "epoch": 1.1230001720282126, - "grad_norm": 0.0006558075547218323, - "learning_rate": 0.00019999937850441398, - "loss": 46.0, - "step": 14688 - }, - { - "epoch": 1.1230766290116023, - "grad_norm": 0.011617771349847317, - "learning_rate": 0.0001999993784197272, - "loss": 46.0, - "step": 14689 - }, - { - "epoch": 1.123153085994992, - "grad_norm": 0.0013864102074876428, - "learning_rate": 0.00019999937833503468, - "loss": 46.0, - "step": 14690 - }, - { - "epoch": 1.1232295429783818, - "grad_norm": 0.0004177753289695829, - "learning_rate": 0.0001999993782503364, - "loss": 46.0, - "step": 14691 - }, - { - "epoch": 1.1233059999617716, - "grad_norm": 0.0035847309045493603, - "learning_rate": 0.00019999937816563232, - "loss": 46.0, - "step": 14692 - }, - { - "epoch": 1.1233824569451611, - "grad_norm": 0.0009797184029594064, - "learning_rate": 0.0001999993780809225, - "loss": 46.0, - "step": 14693 - }, - { - "epoch": 1.123458913928551, - "grad_norm": 0.0007893379661254585, - "learning_rate": 0.00019999937799620687, - "loss": 46.0, - "step": 14694 - }, - { - "epoch": 1.1235353709119407, - "grad_norm": 0.0011319953482598066, - "learning_rate": 0.00019999937791148552, - "loss": 46.0, - "step": 14695 - }, - { - "epoch": 1.1236118278953304, - "grad_norm": 0.009061713702976704, - "learning_rate": 0.00019999937782675837, - "loss": 46.0, - "step": 14696 - }, - { - "epoch": 1.1236882848787202, - "grad_norm": 0.001112721860408783, - "learning_rate": 0.00019999937774202545, - "loss": 46.0, - "step": 14697 - }, - { - "epoch": 1.1237647418621097, - "grad_norm": 0.0010878619505092502, - "learning_rate": 0.00019999937765728678, - "loss": 46.0, - "step": 14698 - }, - { - "epoch": 1.1238411988454995, - "grad_norm": 0.0012675655307248235, - "learning_rate": 0.00019999937757254232, - "loss": 46.0, - "step": 14699 - }, - { - "epoch": 1.1239176558288893, - "grad_norm": 0.002784379757940769, - "learning_rate": 0.0001999993774877921, - "loss": 46.0, - "step": 14700 - }, - { - "epoch": 1.123994112812279, - "grad_norm": 0.0014011251041665673, - "learning_rate": 0.00019999937740303612, - "loss": 46.0, - "step": 14701 - }, - { - "epoch": 1.1240705697956688, - "grad_norm": 0.001019007759168744, - "learning_rate": 0.00019999937731827436, - "loss": 46.0, - "step": 14702 - }, - { - "epoch": 1.1241470267790585, - "grad_norm": 0.005811325740069151, - "learning_rate": 0.00019999937723350682, - "loss": 46.0, - "step": 14703 - }, - { - "epoch": 1.124223483762448, - "grad_norm": 0.010807032696902752, - "learning_rate": 0.00019999937714873354, - "loss": 46.0, - "step": 14704 - }, - { - "epoch": 1.1242999407458378, - "grad_norm": 0.012497744522988796, - "learning_rate": 0.00019999937706395446, - "loss": 46.0, - "step": 14705 - }, - { - "epoch": 1.1243763977292276, - "grad_norm": 0.00172276864759624, - "learning_rate": 0.0001999993769791696, - "loss": 46.0, - "step": 14706 - }, - { - "epoch": 1.1244528547126174, - "grad_norm": 0.0007320116274058819, - "learning_rate": 0.000199999376894379, - "loss": 46.0, - "step": 14707 - }, - { - "epoch": 1.1245293116960071, - "grad_norm": 0.00480728829279542, - "learning_rate": 0.00019999937680958263, - "loss": 46.0, - "step": 14708 - }, - { - "epoch": 1.1246057686793967, - "grad_norm": 0.0018890599021688104, - "learning_rate": 0.0001999993767247805, - "loss": 46.0, - "step": 14709 - }, - { - "epoch": 1.1246822256627864, - "grad_norm": 0.0009019332937896252, - "learning_rate": 0.00019999937663997257, - "loss": 46.0, - "step": 14710 - }, - { - "epoch": 1.1247586826461762, - "grad_norm": 0.0010031972778961062, - "learning_rate": 0.00019999937655515887, - "loss": 46.0, - "step": 14711 - }, - { - "epoch": 1.124835139629566, - "grad_norm": 0.001480329898186028, - "learning_rate": 0.00019999937647033943, - "loss": 46.0, - "step": 14712 - }, - { - "epoch": 1.1249115966129557, - "grad_norm": 0.000891718256752938, - "learning_rate": 0.00019999937638551422, - "loss": 46.0, - "step": 14713 - }, - { - "epoch": 1.1249880535963452, - "grad_norm": 0.006402339320629835, - "learning_rate": 0.0001999993763006832, - "loss": 46.0, - "step": 14714 - }, - { - "epoch": 1.125064510579735, - "grad_norm": 0.0023910673335194588, - "learning_rate": 0.00019999937621584645, - "loss": 46.0, - "step": 14715 - }, - { - "epoch": 1.1251409675631248, - "grad_norm": 0.0026635779067873955, - "learning_rate": 0.00019999937613100391, - "loss": 46.0, - "step": 14716 - }, - { - "epoch": 1.1252174245465145, - "grad_norm": 0.0008241531904786825, - "learning_rate": 0.0001999993760461556, - "loss": 46.0, - "step": 14717 - }, - { - "epoch": 1.1252938815299043, - "grad_norm": 0.006887366995215416, - "learning_rate": 0.00019999937596130155, - "loss": 46.0, - "step": 14718 - }, - { - "epoch": 1.125370338513294, - "grad_norm": 0.007253400515764952, - "learning_rate": 0.0001999993758764417, - "loss": 46.0, - "step": 14719 - }, - { - "epoch": 1.1254467954966836, - "grad_norm": 0.004404946696013212, - "learning_rate": 0.0001999993757915761, - "loss": 46.0, - "step": 14720 - }, - { - "epoch": 1.1255232524800733, - "grad_norm": 0.0009496635757386684, - "learning_rate": 0.00019999937570670473, - "loss": 46.0, - "step": 14721 - }, - { - "epoch": 1.125599709463463, - "grad_norm": 0.002452364657074213, - "learning_rate": 0.00019999937562182758, - "loss": 46.0, - "step": 14722 - }, - { - "epoch": 1.1256761664468529, - "grad_norm": 0.0047640809789299965, - "learning_rate": 0.00019999937553694466, - "loss": 46.0, - "step": 14723 - }, - { - "epoch": 1.1257526234302426, - "grad_norm": 0.0006687306449748576, - "learning_rate": 0.000199999375452056, - "loss": 46.0, - "step": 14724 - }, - { - "epoch": 1.1258290804136322, - "grad_norm": 0.0014523823047056794, - "learning_rate": 0.0001999993753671615, - "loss": 46.0, - "step": 14725 - }, - { - "epoch": 1.125905537397022, - "grad_norm": 0.0005080315750092268, - "learning_rate": 0.00019999937528226126, - "loss": 46.0, - "step": 14726 - }, - { - "epoch": 1.1259819943804117, - "grad_norm": 0.0014898290392011404, - "learning_rate": 0.00019999937519735527, - "loss": 46.0, - "step": 14727 - }, - { - "epoch": 1.1260584513638014, - "grad_norm": 0.0032106288708746433, - "learning_rate": 0.00019999937511244352, - "loss": 46.0, - "step": 14728 - }, - { - "epoch": 1.1261349083471912, - "grad_norm": 0.0011365648824721575, - "learning_rate": 0.00019999937502752598, - "loss": 46.0, - "step": 14729 - }, - { - "epoch": 1.126211365330581, - "grad_norm": 0.0010857610031962395, - "learning_rate": 0.0001999993749426027, - "loss": 46.0, - "step": 14730 - }, - { - "epoch": 1.1262878223139705, - "grad_norm": 0.0010345472255721688, - "learning_rate": 0.00019999937485767362, - "loss": 46.0, - "step": 14731 - }, - { - "epoch": 1.1263642792973603, - "grad_norm": 0.0025925934314727783, - "learning_rate": 0.00019999937477273877, - "loss": 46.0, - "step": 14732 - }, - { - "epoch": 1.12644073628075, - "grad_norm": 0.003173497272655368, - "learning_rate": 0.00019999937468779817, - "loss": 46.0, - "step": 14733 - }, - { - "epoch": 1.1265171932641398, - "grad_norm": 0.004784189164638519, - "learning_rate": 0.00019999937460285177, - "loss": 46.0, - "step": 14734 - }, - { - "epoch": 1.1265936502475296, - "grad_norm": 0.0006555349100381136, - "learning_rate": 0.00019999937451789962, - "loss": 46.0, - "step": 14735 - }, - { - "epoch": 1.126670107230919, - "grad_norm": 0.0005574798560701311, - "learning_rate": 0.00019999937443294173, - "loss": 46.0, - "step": 14736 - }, - { - "epoch": 1.1267465642143089, - "grad_norm": 0.0004293904057703912, - "learning_rate": 0.000199999374347978, - "loss": 46.0, - "step": 14737 - }, - { - "epoch": 1.1268230211976986, - "grad_norm": 0.002910550683736801, - "learning_rate": 0.00019999937426300857, - "loss": 46.0, - "step": 14738 - }, - { - "epoch": 1.1268994781810884, - "grad_norm": 0.005198165308684111, - "learning_rate": 0.0001999993741780333, - "loss": 46.0, - "step": 14739 - }, - { - "epoch": 1.1269759351644781, - "grad_norm": 0.0028310639318078756, - "learning_rate": 0.00019999937409305235, - "loss": 46.0, - "step": 14740 - }, - { - "epoch": 1.127052392147868, - "grad_norm": 0.0007997811189852655, - "learning_rate": 0.00019999937400806556, - "loss": 46.0, - "step": 14741 - }, - { - "epoch": 1.1271288491312574, - "grad_norm": 0.001436564140021801, - "learning_rate": 0.00019999937392307305, - "loss": 46.0, - "step": 14742 - }, - { - "epoch": 1.1272053061146472, - "grad_norm": 0.01144907996058464, - "learning_rate": 0.00019999937383807472, - "loss": 46.0, - "step": 14743 - }, - { - "epoch": 1.127281763098037, - "grad_norm": 0.0005288422107696533, - "learning_rate": 0.00019999937375307064, - "loss": 46.0, - "step": 14744 - }, - { - "epoch": 1.1273582200814267, - "grad_norm": 0.004587346687912941, - "learning_rate": 0.00019999937366806082, - "loss": 46.0, - "step": 14745 - }, - { - "epoch": 1.1274346770648165, - "grad_norm": 0.0006087794899940491, - "learning_rate": 0.0001999993735830452, - "loss": 46.0, - "step": 14746 - }, - { - "epoch": 1.127511134048206, - "grad_norm": 0.0010498225456103683, - "learning_rate": 0.0001999993734980238, - "loss": 46.0, - "step": 14747 - }, - { - "epoch": 1.1275875910315958, - "grad_norm": 0.0013251262716948986, - "learning_rate": 0.00019999937341299667, - "loss": 46.0, - "step": 14748 - }, - { - "epoch": 1.1276640480149855, - "grad_norm": 0.001476874342188239, - "learning_rate": 0.00019999937332796375, - "loss": 46.0, - "step": 14749 - }, - { - "epoch": 1.1277405049983753, - "grad_norm": 0.002560973633080721, - "learning_rate": 0.00019999937324292503, - "loss": 46.0, - "step": 14750 - }, - { - "epoch": 1.127816961981765, - "grad_norm": 0.009889591485261917, - "learning_rate": 0.00019999937315788057, - "loss": 46.0, - "step": 14751 - }, - { - "epoch": 1.1278934189651548, - "grad_norm": 0.0006871015066280961, - "learning_rate": 0.00019999937307283035, - "loss": 46.0, - "step": 14752 - }, - { - "epoch": 1.1279698759485444, - "grad_norm": 0.007508058566600084, - "learning_rate": 0.00019999937298777437, - "loss": 46.0, - "step": 14753 - }, - { - "epoch": 1.1280463329319341, - "grad_norm": 0.000655336189083755, - "learning_rate": 0.00019999937290271258, - "loss": 46.0, - "step": 14754 - }, - { - "epoch": 1.1281227899153239, - "grad_norm": 0.0004830614780075848, - "learning_rate": 0.00019999937281764502, - "loss": 46.0, - "step": 14755 - }, - { - "epoch": 1.1281992468987136, - "grad_norm": 0.0006952632684260607, - "learning_rate": 0.00019999937273257174, - "loss": 46.0, - "step": 14756 - }, - { - "epoch": 1.1282757038821034, - "grad_norm": 0.00028462946647778153, - "learning_rate": 0.00019999937264749264, - "loss": 46.0, - "step": 14757 - }, - { - "epoch": 1.128352160865493, - "grad_norm": 0.0007418460445478559, - "learning_rate": 0.0001999993725624078, - "loss": 46.0, - "step": 14758 - }, - { - "epoch": 1.1284286178488827, - "grad_norm": 0.00320740370079875, - "learning_rate": 0.00019999937247731716, - "loss": 46.0, - "step": 14759 - }, - { - "epoch": 1.1285050748322725, - "grad_norm": 0.000649608438834548, - "learning_rate": 0.00019999937239222079, - "loss": 46.0, - "step": 14760 - }, - { - "epoch": 1.1285815318156622, - "grad_norm": 0.0008807418635115027, - "learning_rate": 0.00019999937230711864, - "loss": 46.0, - "step": 14761 - }, - { - "epoch": 1.128657988799052, - "grad_norm": 0.0010668383911252022, - "learning_rate": 0.00019999937222201072, - "loss": 46.0, - "step": 14762 - }, - { - "epoch": 1.1287344457824418, - "grad_norm": 0.0008774464367888868, - "learning_rate": 0.00019999937213689703, - "loss": 46.0, - "step": 14763 - }, - { - "epoch": 1.1288109027658313, - "grad_norm": 0.0014249954838305712, - "learning_rate": 0.00019999937205177756, - "loss": 46.0, - "step": 14764 - }, - { - "epoch": 1.128887359749221, - "grad_norm": 0.0014622171875089407, - "learning_rate": 0.00019999937196665232, - "loss": 46.0, - "step": 14765 - }, - { - "epoch": 1.1289638167326108, - "grad_norm": 0.0015475729014724493, - "learning_rate": 0.0001999993718815213, - "loss": 46.0, - "step": 14766 - }, - { - "epoch": 1.1290402737160006, - "grad_norm": 0.0008325300877913833, - "learning_rate": 0.00019999937179638455, - "loss": 46.0, - "step": 14767 - }, - { - "epoch": 1.1291167306993903, - "grad_norm": 0.0060522365383803844, - "learning_rate": 0.00019999937171124202, - "loss": 46.0, - "step": 14768 - }, - { - "epoch": 1.1291931876827799, - "grad_norm": 0.007760223001241684, - "learning_rate": 0.0001999993716260937, - "loss": 46.0, - "step": 14769 - }, - { - "epoch": 1.1292696446661696, - "grad_norm": 0.0018330486491322517, - "learning_rate": 0.0001999993715409396, - "loss": 46.0, - "step": 14770 - }, - { - "epoch": 1.1293461016495594, - "grad_norm": 0.0007562093669548631, - "learning_rate": 0.00019999937145577978, - "loss": 46.0, - "step": 14771 - }, - { - "epoch": 1.1294225586329492, - "grad_norm": 0.005484177730977535, - "learning_rate": 0.00019999937137061416, - "loss": 46.0, - "step": 14772 - }, - { - "epoch": 1.129499015616339, - "grad_norm": 0.008028887212276459, - "learning_rate": 0.00019999937128544276, - "loss": 46.0, - "step": 14773 - }, - { - "epoch": 1.1295754725997287, - "grad_norm": 0.005428973585367203, - "learning_rate": 0.0001999993712002656, - "loss": 46.0, - "step": 14774 - }, - { - "epoch": 1.1296519295831182, - "grad_norm": 0.0040501998737454414, - "learning_rate": 0.00019999937111508267, - "loss": 46.0, - "step": 14775 - }, - { - "epoch": 1.129728386566508, - "grad_norm": 0.009396358393132687, - "learning_rate": 0.00019999937102989398, - "loss": 46.0, - "step": 14776 - }, - { - "epoch": 1.1298048435498977, - "grad_norm": 0.0012576605658978224, - "learning_rate": 0.0001999993709446995, - "loss": 46.0, - "step": 14777 - }, - { - "epoch": 1.1298813005332875, - "grad_norm": 0.0014676004648208618, - "learning_rate": 0.0001999993708594993, - "loss": 46.0, - "step": 14778 - }, - { - "epoch": 1.1299577575166773, - "grad_norm": 0.0005422924878075719, - "learning_rate": 0.00019999937077429326, - "loss": 46.0, - "step": 14779 - }, - { - "epoch": 1.1300342145000668, - "grad_norm": 0.004032416269183159, - "learning_rate": 0.0001999993706890815, - "loss": 46.0, - "step": 14780 - }, - { - "epoch": 1.1301106714834566, - "grad_norm": 0.0003497939324006438, - "learning_rate": 0.00019999937060386394, - "loss": 46.0, - "step": 14781 - }, - { - "epoch": 1.1301871284668463, - "grad_norm": 0.002158332848921418, - "learning_rate": 0.00019999937051864064, - "loss": 46.0, - "step": 14782 - }, - { - "epoch": 1.130263585450236, - "grad_norm": 0.0035231029614806175, - "learning_rate": 0.00019999937043341156, - "loss": 46.0, - "step": 14783 - }, - { - "epoch": 1.1303400424336258, - "grad_norm": 0.0015833841171115637, - "learning_rate": 0.0001999993703481767, - "loss": 46.0, - "step": 14784 - }, - { - "epoch": 1.1304164994170156, - "grad_norm": 0.000613283016718924, - "learning_rate": 0.00019999937026293608, - "loss": 46.0, - "step": 14785 - }, - { - "epoch": 1.1304929564004051, - "grad_norm": 0.0013062895741313696, - "learning_rate": 0.0001999993701776897, - "loss": 46.0, - "step": 14786 - }, - { - "epoch": 1.130569413383795, - "grad_norm": 0.0019704641308635473, - "learning_rate": 0.00019999937009243754, - "loss": 46.0, - "step": 14787 - }, - { - "epoch": 1.1306458703671847, - "grad_norm": 0.00115984829608351, - "learning_rate": 0.00019999937000717962, - "loss": 46.0, - "step": 14788 - }, - { - "epoch": 1.1307223273505744, - "grad_norm": 0.01768254116177559, - "learning_rate": 0.0001999993699219159, - "loss": 46.0, - "step": 14789 - }, - { - "epoch": 1.1307987843339642, - "grad_norm": 0.0011324993101879954, - "learning_rate": 0.0001999993698366464, - "loss": 46.0, - "step": 14790 - }, - { - "epoch": 1.1308752413173537, - "grad_norm": 0.0007432815618813038, - "learning_rate": 0.00019999936975137122, - "loss": 46.0, - "step": 14791 - }, - { - "epoch": 1.1309516983007435, - "grad_norm": 0.00291697820648551, - "learning_rate": 0.00019999936966609019, - "loss": 46.0, - "step": 14792 - }, - { - "epoch": 1.1310281552841333, - "grad_norm": 0.0016537210904061794, - "learning_rate": 0.0001999993695808034, - "loss": 46.0, - "step": 14793 - }, - { - "epoch": 1.131104612267523, - "grad_norm": 0.0007090248982422054, - "learning_rate": 0.00019999936949551087, - "loss": 46.0, - "step": 14794 - }, - { - "epoch": 1.1311810692509128, - "grad_norm": 0.0029994596261531115, - "learning_rate": 0.00019999936941021256, - "loss": 46.0, - "step": 14795 - }, - { - "epoch": 1.1312575262343025, - "grad_norm": 0.0015521880704909563, - "learning_rate": 0.00019999936932490849, - "loss": 46.0, - "step": 14796 - }, - { - "epoch": 1.131333983217692, - "grad_norm": 0.0008390407892875373, - "learning_rate": 0.0001999993692395986, - "loss": 46.0, - "step": 14797 - }, - { - "epoch": 1.1314104402010818, - "grad_norm": 0.0036280702333897352, - "learning_rate": 0.00019999936915428298, - "loss": 46.0, - "step": 14798 - }, - { - "epoch": 1.1314868971844716, - "grad_norm": 0.0016299005364999175, - "learning_rate": 0.00019999936906896158, - "loss": 46.0, - "step": 14799 - }, - { - "epoch": 1.1315633541678614, - "grad_norm": 0.0050012883730232716, - "learning_rate": 0.00019999936898363444, - "loss": 46.0, - "step": 14800 - }, - { - "epoch": 1.1316398111512511, - "grad_norm": 0.0006390110356733203, - "learning_rate": 0.00019999936889830152, - "loss": 46.0, - "step": 14801 - }, - { - "epoch": 1.1317162681346407, - "grad_norm": 0.002643123734742403, - "learning_rate": 0.0001999993688129628, - "loss": 46.0, - "step": 14802 - }, - { - "epoch": 1.1317927251180304, - "grad_norm": 0.002462599892169237, - "learning_rate": 0.00019999936872761834, - "loss": 46.0, - "step": 14803 - }, - { - "epoch": 1.1318691821014202, - "grad_norm": 0.0023767855018377304, - "learning_rate": 0.0001999993686422681, - "loss": 46.0, - "step": 14804 - }, - { - "epoch": 1.13194563908481, - "grad_norm": 0.009208094328641891, - "learning_rate": 0.00019999936855691206, - "loss": 46.0, - "step": 14805 - }, - { - "epoch": 1.1320220960681997, - "grad_norm": 0.0023866461124271154, - "learning_rate": 0.0001999993684715503, - "loss": 46.0, - "step": 14806 - }, - { - "epoch": 1.1320985530515895, - "grad_norm": 0.0004645321168936789, - "learning_rate": 0.00019999936838618277, - "loss": 46.0, - "step": 14807 - }, - { - "epoch": 1.132175010034979, - "grad_norm": 0.0008993617375381291, - "learning_rate": 0.00019999936830080944, - "loss": 46.0, - "step": 14808 - }, - { - "epoch": 1.1322514670183688, - "grad_norm": 0.001164270332083106, - "learning_rate": 0.00019999936821543034, - "loss": 46.0, - "step": 14809 - }, - { - "epoch": 1.1323279240017585, - "grad_norm": 0.008869229815900326, - "learning_rate": 0.00019999936813004548, - "loss": 46.0, - "step": 14810 - }, - { - "epoch": 1.1324043809851483, - "grad_norm": 0.0012709078146144748, - "learning_rate": 0.00019999936804465483, - "loss": 46.0, - "step": 14811 - }, - { - "epoch": 1.132480837968538, - "grad_norm": 0.001199951395392418, - "learning_rate": 0.00019999936795925846, - "loss": 46.0, - "step": 14812 - }, - { - "epoch": 1.1325572949519276, - "grad_norm": 0.0010795732960104942, - "learning_rate": 0.0001999993678738563, - "loss": 46.0, - "step": 14813 - }, - { - "epoch": 1.1326337519353173, - "grad_norm": 0.001172151300124824, - "learning_rate": 0.00019999936778844837, - "loss": 46.0, - "step": 14814 - }, - { - "epoch": 1.132710208918707, - "grad_norm": 0.0009043826721608639, - "learning_rate": 0.00019999936770303465, - "loss": 46.0, - "step": 14815 - }, - { - "epoch": 1.1327866659020969, - "grad_norm": 0.001527887536212802, - "learning_rate": 0.0001999993676176152, - "loss": 46.0, - "step": 14816 - }, - { - "epoch": 1.1328631228854866, - "grad_norm": 0.002399386838078499, - "learning_rate": 0.00019999936753218992, - "loss": 46.0, - "step": 14817 - }, - { - "epoch": 1.1329395798688764, - "grad_norm": 0.0062965815886855125, - "learning_rate": 0.00019999936744675894, - "loss": 46.0, - "step": 14818 - }, - { - "epoch": 1.133016036852266, - "grad_norm": 0.005739616230130196, - "learning_rate": 0.00019999936736132216, - "loss": 46.0, - "step": 14819 - }, - { - "epoch": 1.1330924938356557, - "grad_norm": 0.001907101133838296, - "learning_rate": 0.00019999936727587957, - "loss": 46.0, - "step": 14820 - }, - { - "epoch": 1.1331689508190455, - "grad_norm": 0.004465166013687849, - "learning_rate": 0.00019999936719043127, - "loss": 46.0, - "step": 14821 - }, - { - "epoch": 1.1332454078024352, - "grad_norm": 0.0007419039611704648, - "learning_rate": 0.0001999993671049772, - "loss": 46.0, - "step": 14822 - }, - { - "epoch": 1.1333218647858248, - "grad_norm": 0.00021928591013420373, - "learning_rate": 0.0001999993670195173, - "loss": 46.0, - "step": 14823 - }, - { - "epoch": 1.1333983217692145, - "grad_norm": 0.0007787608774378896, - "learning_rate": 0.00019999936693405169, - "loss": 46.0, - "step": 14824 - }, - { - "epoch": 1.1334747787526043, - "grad_norm": 0.0008408714784309268, - "learning_rate": 0.0001999993668485803, - "loss": 46.0, - "step": 14825 - }, - { - "epoch": 1.133551235735994, - "grad_norm": 0.0009409827180206776, - "learning_rate": 0.00019999936676310314, - "loss": 46.0, - "step": 14826 - }, - { - "epoch": 1.1336276927193838, - "grad_norm": 0.0008453232585452497, - "learning_rate": 0.00019999936667762017, - "loss": 46.0, - "step": 14827 - }, - { - "epoch": 1.1337041497027736, - "grad_norm": 0.0014677041908726096, - "learning_rate": 0.0001999993665921315, - "loss": 46.0, - "step": 14828 - }, - { - "epoch": 1.1337806066861633, - "grad_norm": 0.015801239758729935, - "learning_rate": 0.000199999366506637, - "loss": 46.0, - "step": 14829 - }, - { - "epoch": 1.1338570636695529, - "grad_norm": 0.0012309295125305653, - "learning_rate": 0.00019999936642113678, - "loss": 46.0, - "step": 14830 - }, - { - "epoch": 1.1339335206529426, - "grad_norm": 0.000673692615237087, - "learning_rate": 0.00019999936633563074, - "loss": 46.0, - "step": 14831 - }, - { - "epoch": 1.1340099776363324, - "grad_norm": 0.011002810671925545, - "learning_rate": 0.00019999936625011896, - "loss": 46.0, - "step": 14832 - }, - { - "epoch": 1.1340864346197221, - "grad_norm": 0.0012958996230736375, - "learning_rate": 0.0001999993661646014, - "loss": 46.0, - "step": 14833 - }, - { - "epoch": 1.1341628916031117, - "grad_norm": 0.004489955957978964, - "learning_rate": 0.00019999936607907807, - "loss": 46.0, - "step": 14834 - }, - { - "epoch": 1.1342393485865014, - "grad_norm": 0.001114579732529819, - "learning_rate": 0.000199999365993549, - "loss": 46.0, - "step": 14835 - }, - { - "epoch": 1.1343158055698912, - "grad_norm": 0.00208867690525949, - "learning_rate": 0.00019999936590801414, - "loss": 46.0, - "step": 14836 - }, - { - "epoch": 1.134392262553281, - "grad_norm": 0.00457307044416666, - "learning_rate": 0.00019999936582247352, - "loss": 46.0, - "step": 14837 - }, - { - "epoch": 1.1344687195366707, - "grad_norm": 0.0015747388824820518, - "learning_rate": 0.0001999993657369271, - "loss": 46.0, - "step": 14838 - }, - { - "epoch": 1.1345451765200605, - "grad_norm": 0.001406220137141645, - "learning_rate": 0.00019999936565137495, - "loss": 46.0, - "step": 14839 - }, - { - "epoch": 1.1346216335034502, - "grad_norm": 0.0006022056913934648, - "learning_rate": 0.000199999365565817, - "loss": 46.0, - "step": 14840 - }, - { - "epoch": 1.1346980904868398, - "grad_norm": 0.0013228778261691332, - "learning_rate": 0.0001999993654802533, - "loss": 46.0, - "step": 14841 - }, - { - "epoch": 1.1347745474702295, - "grad_norm": 0.00700360769405961, - "learning_rate": 0.0001999993653946838, - "loss": 46.0, - "step": 14842 - }, - { - "epoch": 1.1348510044536193, - "grad_norm": 0.010039308108389378, - "learning_rate": 0.00019999936530910856, - "loss": 46.0, - "step": 14843 - }, - { - "epoch": 1.134927461437009, - "grad_norm": 0.000553904683329165, - "learning_rate": 0.00019999936522352752, - "loss": 46.0, - "step": 14844 - }, - { - "epoch": 1.1350039184203986, - "grad_norm": 0.0009194056037813425, - "learning_rate": 0.00019999936513794076, - "loss": 46.0, - "step": 14845 - }, - { - "epoch": 1.1350803754037884, - "grad_norm": 0.0023718979209661484, - "learning_rate": 0.0001999993650523482, - "loss": 46.0, - "step": 14846 - }, - { - "epoch": 1.1351568323871781, - "grad_norm": 0.0012134637217968702, - "learning_rate": 0.00019999936496674988, - "loss": 46.0, - "step": 14847 - }, - { - "epoch": 1.1352332893705679, - "grad_norm": 0.0012317672371864319, - "learning_rate": 0.0001999993648811458, - "loss": 46.0, - "step": 14848 - }, - { - "epoch": 1.1353097463539576, - "grad_norm": 0.0016546202823519707, - "learning_rate": 0.00019999936479553592, - "loss": 46.0, - "step": 14849 - }, - { - "epoch": 1.1353862033373474, - "grad_norm": 0.003088191617280245, - "learning_rate": 0.00019999936470992027, - "loss": 46.0, - "step": 14850 - }, - { - "epoch": 1.135462660320737, - "grad_norm": 0.001624019700102508, - "learning_rate": 0.0001999993646242989, - "loss": 46.0, - "step": 14851 - }, - { - "epoch": 1.1355391173041267, - "grad_norm": 0.0010510454885661602, - "learning_rate": 0.0001999993645386717, - "loss": 46.0, - "step": 14852 - }, - { - "epoch": 1.1356155742875165, - "grad_norm": 0.0020558179821819067, - "learning_rate": 0.0001999993644530388, - "loss": 46.0, - "step": 14853 - }, - { - "epoch": 1.1356920312709062, - "grad_norm": 0.0007484521484002471, - "learning_rate": 0.00019999936436740004, - "loss": 46.0, - "step": 14854 - }, - { - "epoch": 1.135768488254296, - "grad_norm": 0.0004398217424750328, - "learning_rate": 0.00019999936428175555, - "loss": 46.0, - "step": 14855 - }, - { - "epoch": 1.1358449452376855, - "grad_norm": 0.002432475332170725, - "learning_rate": 0.00019999936419610531, - "loss": 46.0, - "step": 14856 - }, - { - "epoch": 1.1359214022210753, - "grad_norm": 0.03173724561929703, - "learning_rate": 0.0001999993641104493, - "loss": 46.0, - "step": 14857 - }, - { - "epoch": 1.135997859204465, - "grad_norm": 0.000692440546117723, - "learning_rate": 0.00019999936402478752, - "loss": 46.0, - "step": 14858 - }, - { - "epoch": 1.1360743161878548, - "grad_norm": 0.003717514919117093, - "learning_rate": 0.00019999936393911996, - "loss": 46.0, - "step": 14859 - }, - { - "epoch": 1.1361507731712446, - "grad_norm": 0.002513560699298978, - "learning_rate": 0.0001999993638534466, - "loss": 46.0, - "step": 14860 - }, - { - "epoch": 1.1362272301546343, - "grad_norm": 0.0017767185345292091, - "learning_rate": 0.0001999993637677675, - "loss": 46.0, - "step": 14861 - }, - { - "epoch": 1.1363036871380239, - "grad_norm": 0.003988855052739382, - "learning_rate": 0.00019999936368208268, - "loss": 46.0, - "step": 14862 - }, - { - "epoch": 1.1363801441214136, - "grad_norm": 0.0011938252719119191, - "learning_rate": 0.00019999936359639203, - "loss": 46.0, - "step": 14863 - }, - { - "epoch": 1.1364566011048034, - "grad_norm": 0.004000297747552395, - "learning_rate": 0.0001999993635106956, - "loss": 46.0, - "step": 14864 - }, - { - "epoch": 1.1365330580881932, - "grad_norm": 0.000504208030179143, - "learning_rate": 0.00019999936342499344, - "loss": 46.0, - "step": 14865 - }, - { - "epoch": 1.136609515071583, - "grad_norm": 0.0006104611675255001, - "learning_rate": 0.00019999936333928552, - "loss": 46.0, - "step": 14866 - }, - { - "epoch": 1.1366859720549725, - "grad_norm": 0.002088473876938224, - "learning_rate": 0.00019999936325357177, - "loss": 46.0, - "step": 14867 - }, - { - "epoch": 1.1367624290383622, - "grad_norm": 0.002962731057778001, - "learning_rate": 0.0001999993631678523, - "loss": 46.0, - "step": 14868 - }, - { - "epoch": 1.136838886021752, - "grad_norm": 0.001260658958926797, - "learning_rate": 0.00019999936308212702, - "loss": 46.0, - "step": 14869 - }, - { - "epoch": 1.1369153430051417, - "grad_norm": 0.00048768112901598215, - "learning_rate": 0.000199999362996396, - "loss": 46.0, - "step": 14870 - }, - { - "epoch": 1.1369917999885315, - "grad_norm": 0.0006740997778251767, - "learning_rate": 0.00019999936291065923, - "loss": 46.0, - "step": 14871 - }, - { - "epoch": 1.1370682569719213, - "grad_norm": 0.0013945174869149923, - "learning_rate": 0.00019999936282491667, - "loss": 46.0, - "step": 14872 - }, - { - "epoch": 1.1371447139553108, - "grad_norm": 0.0009060294833034277, - "learning_rate": 0.00019999936273916834, - "loss": 46.0, - "step": 14873 - }, - { - "epoch": 1.1372211709387006, - "grad_norm": 0.0005468170274980366, - "learning_rate": 0.00019999936265341424, - "loss": 46.0, - "step": 14874 - }, - { - "epoch": 1.1372976279220903, - "grad_norm": 0.0011170461075380445, - "learning_rate": 0.00019999936256765436, - "loss": 46.0, - "step": 14875 - }, - { - "epoch": 1.13737408490548, - "grad_norm": 0.0028317549731582403, - "learning_rate": 0.00019999936248188874, - "loss": 46.0, - "step": 14876 - }, - { - "epoch": 1.1374505418888698, - "grad_norm": 0.0015321020036935806, - "learning_rate": 0.00019999936239611734, - "loss": 46.0, - "step": 14877 - }, - { - "epoch": 1.1375269988722594, - "grad_norm": 0.0006664778920821846, - "learning_rate": 0.00019999936231034014, - "loss": 46.0, - "step": 14878 - }, - { - "epoch": 1.1376034558556491, - "grad_norm": 0.002765545854344964, - "learning_rate": 0.0001999993622245572, - "loss": 46.0, - "step": 14879 - }, - { - "epoch": 1.137679912839039, - "grad_norm": 0.007046765647828579, - "learning_rate": 0.00019999936213876848, - "loss": 46.0, - "step": 14880 - }, - { - "epoch": 1.1377563698224287, - "grad_norm": 0.0010540410876274109, - "learning_rate": 0.000199999362052974, - "loss": 46.0, - "step": 14881 - }, - { - "epoch": 1.1378328268058184, - "grad_norm": 0.0010335259139537811, - "learning_rate": 0.00019999936196717376, - "loss": 46.0, - "step": 14882 - }, - { - "epoch": 1.1379092837892082, - "grad_norm": 0.0007436656742356718, - "learning_rate": 0.00019999936188136772, - "loss": 46.0, - "step": 14883 - }, - { - "epoch": 1.1379857407725977, - "grad_norm": 0.0006247290875762701, - "learning_rate": 0.0001999993617955559, - "loss": 46.0, - "step": 14884 - }, - { - "epoch": 1.1380621977559875, - "grad_norm": 0.012216565199196339, - "learning_rate": 0.00019999936170973835, - "loss": 46.0, - "step": 14885 - }, - { - "epoch": 1.1381386547393773, - "grad_norm": 0.0003892823006026447, - "learning_rate": 0.00019999936162391505, - "loss": 46.0, - "step": 14886 - }, - { - "epoch": 1.138215111722767, - "grad_norm": 0.0015606341185048223, - "learning_rate": 0.00019999936153808592, - "loss": 46.0, - "step": 14887 - }, - { - "epoch": 1.1382915687061568, - "grad_norm": 0.004554451908916235, - "learning_rate": 0.00019999936145225107, - "loss": 46.0, - "step": 14888 - }, - { - "epoch": 1.1383680256895463, - "grad_norm": 0.00164883048273623, - "learning_rate": 0.00019999936136641042, - "loss": 46.0, - "step": 14889 - }, - { - "epoch": 1.138444482672936, - "grad_norm": 0.0018709847936406732, - "learning_rate": 0.000199999361280564, - "loss": 46.0, - "step": 14890 - }, - { - "epoch": 1.1385209396563258, - "grad_norm": 0.002143275924026966, - "learning_rate": 0.00019999936119471183, - "loss": 46.0, - "step": 14891 - }, - { - "epoch": 1.1385973966397156, - "grad_norm": 0.0024356436915695667, - "learning_rate": 0.00019999936110885389, - "loss": 46.0, - "step": 14892 - }, - { - "epoch": 1.1386738536231054, - "grad_norm": 0.0006338706007227302, - "learning_rate": 0.00019999936102299017, - "loss": 46.0, - "step": 14893 - }, - { - "epoch": 1.1387503106064951, - "grad_norm": 0.0005174280377104878, - "learning_rate": 0.00019999936093712068, - "loss": 46.0, - "step": 14894 - }, - { - "epoch": 1.1388267675898847, - "grad_norm": 0.02072383277118206, - "learning_rate": 0.00019999936085124542, - "loss": 46.0, - "step": 14895 - }, - { - "epoch": 1.1389032245732744, - "grad_norm": 0.0012328787706792355, - "learning_rate": 0.00019999936076536438, - "loss": 46.0, - "step": 14896 - }, - { - "epoch": 1.1389796815566642, - "grad_norm": 0.0037673902697861195, - "learning_rate": 0.00019999936067947757, - "loss": 46.0, - "step": 14897 - }, - { - "epoch": 1.139056138540054, - "grad_norm": 0.0032261626329272985, - "learning_rate": 0.00019999936059358504, - "loss": 46.0, - "step": 14898 - }, - { - "epoch": 1.1391325955234437, - "grad_norm": 0.0017686389619484544, - "learning_rate": 0.00019999936050768669, - "loss": 46.0, - "step": 14899 - }, - { - "epoch": 1.1392090525068332, - "grad_norm": 0.00190397456753999, - "learning_rate": 0.00019999936042178258, - "loss": 46.0, - "step": 14900 - }, - { - "epoch": 1.139285509490223, - "grad_norm": 0.010475380346179008, - "learning_rate": 0.0001999993603358727, - "loss": 46.0, - "step": 14901 - }, - { - "epoch": 1.1393619664736128, - "grad_norm": 0.002703100675716996, - "learning_rate": 0.00019999936024995706, - "loss": 46.0, - "step": 14902 - }, - { - "epoch": 1.1394384234570025, - "grad_norm": 0.0007554986514151096, - "learning_rate": 0.00019999936016403566, - "loss": 46.0, - "step": 14903 - }, - { - "epoch": 1.1395148804403923, - "grad_norm": 0.0015145642682909966, - "learning_rate": 0.00019999936007810846, - "loss": 46.0, - "step": 14904 - }, - { - "epoch": 1.139591337423782, - "grad_norm": 0.0011756264138966799, - "learning_rate": 0.00019999935999217552, - "loss": 46.0, - "step": 14905 - }, - { - "epoch": 1.1396677944071716, - "grad_norm": 0.003527546999976039, - "learning_rate": 0.0001999993599062368, - "loss": 46.0, - "step": 14906 - }, - { - "epoch": 1.1397442513905613, - "grad_norm": 0.0009252013405784965, - "learning_rate": 0.00019999935982029232, - "loss": 46.0, - "step": 14907 - }, - { - "epoch": 1.139820708373951, - "grad_norm": 0.002993206027895212, - "learning_rate": 0.00019999935973434205, - "loss": 46.0, - "step": 14908 - }, - { - "epoch": 1.1398971653573409, - "grad_norm": 0.0014797229086980224, - "learning_rate": 0.00019999935964838602, - "loss": 46.0, - "step": 14909 - }, - { - "epoch": 1.1399736223407306, - "grad_norm": 0.0007859288016334176, - "learning_rate": 0.0001999993595624242, - "loss": 46.0, - "step": 14910 - }, - { - "epoch": 1.1400500793241202, - "grad_norm": 0.003251680638641119, - "learning_rate": 0.00019999935947645665, - "loss": 46.0, - "step": 14911 - }, - { - "epoch": 1.14012653630751, - "grad_norm": 0.0010924285743385553, - "learning_rate": 0.00019999935939048332, - "loss": 46.0, - "step": 14912 - }, - { - "epoch": 1.1402029932908997, - "grad_norm": 0.0015300287632271647, - "learning_rate": 0.0001999993593045042, - "loss": 46.0, - "step": 14913 - }, - { - "epoch": 1.1402794502742895, - "grad_norm": 0.001611140789464116, - "learning_rate": 0.00019999935921851934, - "loss": 46.0, - "step": 14914 - }, - { - "epoch": 1.1403559072576792, - "grad_norm": 0.0006278762593865395, - "learning_rate": 0.0001999993591325287, - "loss": 46.0, - "step": 14915 - }, - { - "epoch": 1.140432364241069, - "grad_norm": 0.0013426505029201508, - "learning_rate": 0.00019999935904653227, - "loss": 46.0, - "step": 14916 - }, - { - "epoch": 1.1405088212244585, - "grad_norm": 0.0015826657181605697, - "learning_rate": 0.00019999935896053008, - "loss": 46.0, - "step": 14917 - }, - { - "epoch": 1.1405852782078483, - "grad_norm": 0.002817973028868437, - "learning_rate": 0.00019999935887452213, - "loss": 46.0, - "step": 14918 - }, - { - "epoch": 1.140661735191238, - "grad_norm": 0.0003659868089016527, - "learning_rate": 0.00019999935878850842, - "loss": 46.0, - "step": 14919 - }, - { - "epoch": 1.1407381921746278, - "grad_norm": 0.0009584235958755016, - "learning_rate": 0.0001999993587024889, - "loss": 46.0, - "step": 14920 - }, - { - "epoch": 1.1408146491580176, - "grad_norm": 0.003864546772092581, - "learning_rate": 0.00019999935861646364, - "loss": 46.0, - "step": 14921 - }, - { - "epoch": 1.140891106141407, - "grad_norm": 0.0009550279937684536, - "learning_rate": 0.00019999935853043263, - "loss": 46.0, - "step": 14922 - }, - { - "epoch": 1.1409675631247969, - "grad_norm": 0.0002660670434124768, - "learning_rate": 0.00019999935844439582, - "loss": 46.0, - "step": 14923 - }, - { - "epoch": 1.1410440201081866, - "grad_norm": 0.0017768017714843154, - "learning_rate": 0.00019999935835835327, - "loss": 46.0, - "step": 14924 - }, - { - "epoch": 1.1411204770915764, - "grad_norm": 0.0035320480819791555, - "learning_rate": 0.00019999935827230488, - "loss": 46.0, - "step": 14925 - }, - { - "epoch": 1.1411969340749661, - "grad_norm": 0.0099811851978302, - "learning_rate": 0.0001999993581862508, - "loss": 46.0, - "step": 14926 - }, - { - "epoch": 1.141273391058356, - "grad_norm": 0.0008795639732852578, - "learning_rate": 0.00019999935810019093, - "loss": 46.0, - "step": 14927 - }, - { - "epoch": 1.1413498480417454, - "grad_norm": 0.0010169106535613537, - "learning_rate": 0.00019999935801412528, - "loss": 46.0, - "step": 14928 - }, - { - "epoch": 1.1414263050251352, - "grad_norm": 0.0011053842026740313, - "learning_rate": 0.00019999935792805386, - "loss": 46.0, - "step": 14929 - }, - { - "epoch": 1.141502762008525, - "grad_norm": 0.001032682484947145, - "learning_rate": 0.00019999935784197667, - "loss": 46.0, - "step": 14930 - }, - { - "epoch": 1.1415792189919147, - "grad_norm": 0.0029977222438901663, - "learning_rate": 0.00019999935775589372, - "loss": 46.0, - "step": 14931 - }, - { - "epoch": 1.1416556759753045, - "grad_norm": 0.00665133586153388, - "learning_rate": 0.00019999935766980498, - "loss": 46.0, - "step": 14932 - }, - { - "epoch": 1.141732132958694, - "grad_norm": 0.0018333197804167867, - "learning_rate": 0.0001999993575837105, - "loss": 46.0, - "step": 14933 - }, - { - "epoch": 1.1418085899420838, - "grad_norm": 0.0008564876043237746, - "learning_rate": 0.00019999935749761023, - "loss": 46.0, - "step": 14934 - }, - { - "epoch": 1.1418850469254735, - "grad_norm": 0.0006230600411072373, - "learning_rate": 0.00019999935741150417, - "loss": 46.0, - "step": 14935 - }, - { - "epoch": 1.1419615039088633, - "grad_norm": 0.0005235027056187391, - "learning_rate": 0.00019999935732539239, - "loss": 46.0, - "step": 14936 - }, - { - "epoch": 1.142037960892253, - "grad_norm": 0.005327706225216389, - "learning_rate": 0.0001999993572392748, - "loss": 46.0, - "step": 14937 - }, - { - "epoch": 1.1421144178756428, - "grad_norm": 0.000640437996480614, - "learning_rate": 0.00019999935715315145, - "loss": 46.0, - "step": 14938 - }, - { - "epoch": 1.1421908748590324, - "grad_norm": 0.0012529651867225766, - "learning_rate": 0.00019999935706702235, - "loss": 46.0, - "step": 14939 - }, - { - "epoch": 1.1422673318424221, - "grad_norm": 0.0015473649837076664, - "learning_rate": 0.00019999935698088747, - "loss": 46.0, - "step": 14940 - }, - { - "epoch": 1.142343788825812, - "grad_norm": 0.0034063924103975296, - "learning_rate": 0.00019999935689474682, - "loss": 46.0, - "step": 14941 - }, - { - "epoch": 1.1424202458092017, - "grad_norm": 0.003460923209786415, - "learning_rate": 0.0001999993568086004, - "loss": 46.0, - "step": 14942 - }, - { - "epoch": 1.1424967027925914, - "grad_norm": 0.006585083436220884, - "learning_rate": 0.0001999993567224482, - "loss": 46.0, - "step": 14943 - }, - { - "epoch": 1.142573159775981, - "grad_norm": 0.001133933779783547, - "learning_rate": 0.00019999935663629026, - "loss": 46.0, - "step": 14944 - }, - { - "epoch": 1.1426496167593707, - "grad_norm": 0.0010075286263599992, - "learning_rate": 0.00019999935655012655, - "loss": 46.0, - "step": 14945 - }, - { - "epoch": 1.1427260737427605, - "grad_norm": 0.001493255840614438, - "learning_rate": 0.00019999935646395703, - "loss": 46.0, - "step": 14946 - }, - { - "epoch": 1.1428025307261502, - "grad_norm": 0.0006314993952400982, - "learning_rate": 0.00019999935637778177, - "loss": 46.0, - "step": 14947 - }, - { - "epoch": 1.14287898770954, - "grad_norm": 0.0011521636042743921, - "learning_rate": 0.0001999993562916007, - "loss": 46.0, - "step": 14948 - }, - { - "epoch": 1.1429554446929298, - "grad_norm": 0.005184476729482412, - "learning_rate": 0.00019999935620541393, - "loss": 46.0, - "step": 14949 - }, - { - "epoch": 1.1430319016763193, - "grad_norm": 0.0009690604638308287, - "learning_rate": 0.00019999935611922137, - "loss": 46.0, - "step": 14950 - }, - { - "epoch": 1.143108358659709, - "grad_norm": 0.0032997315283864737, - "learning_rate": 0.000199999356033023, - "loss": 46.0, - "step": 14951 - }, - { - "epoch": 1.1431848156430988, - "grad_norm": 0.0010366393253207207, - "learning_rate": 0.0001999993559468189, - "loss": 46.0, - "step": 14952 - }, - { - "epoch": 1.1432612726264886, - "grad_norm": 0.0010884796502068639, - "learning_rate": 0.000199999355860609, - "loss": 46.0, - "step": 14953 - }, - { - "epoch": 1.1433377296098783, - "grad_norm": 0.0012642787769436836, - "learning_rate": 0.00019999935577439334, - "loss": 46.0, - "step": 14954 - }, - { - "epoch": 1.1434141865932679, - "grad_norm": 0.0012666076654568315, - "learning_rate": 0.00019999935568817194, - "loss": 46.0, - "step": 14955 - }, - { - "epoch": 1.1434906435766576, - "grad_norm": 0.0025166980922222137, - "learning_rate": 0.00019999935560194475, - "loss": 46.0, - "step": 14956 - }, - { - "epoch": 1.1435671005600474, - "grad_norm": 0.0009621352655813098, - "learning_rate": 0.00019999935551571175, - "loss": 46.0, - "step": 14957 - }, - { - "epoch": 1.1436435575434372, - "grad_norm": 0.001618271810002625, - "learning_rate": 0.00019999935542947307, - "loss": 46.0, - "step": 14958 - }, - { - "epoch": 1.143720014526827, - "grad_norm": 0.0007134682964533567, - "learning_rate": 0.00019999935534322852, - "loss": 46.0, - "step": 14959 - }, - { - "epoch": 1.1437964715102167, - "grad_norm": 0.0006969352834858, - "learning_rate": 0.0001999993552569783, - "loss": 46.0, - "step": 14960 - }, - { - "epoch": 1.1438729284936062, - "grad_norm": 0.0005706926458515227, - "learning_rate": 0.00019999935517072223, - "loss": 46.0, - "step": 14961 - }, - { - "epoch": 1.143949385476996, - "grad_norm": 0.0019901113118976355, - "learning_rate": 0.00019999935508446042, - "loss": 46.0, - "step": 14962 - }, - { - "epoch": 1.1440258424603857, - "grad_norm": 0.001561901532113552, - "learning_rate": 0.00019999935499819284, - "loss": 46.0, - "step": 14963 - }, - { - "epoch": 1.1441022994437755, - "grad_norm": 0.004461059346795082, - "learning_rate": 0.0001999993549119195, - "loss": 46.0, - "step": 14964 - }, - { - "epoch": 1.144178756427165, - "grad_norm": 0.000552699260879308, - "learning_rate": 0.00019999935482564038, - "loss": 46.0, - "step": 14965 - }, - { - "epoch": 1.1442552134105548, - "grad_norm": 0.0015074230032041669, - "learning_rate": 0.0001999993547393555, - "loss": 46.0, - "step": 14966 - }, - { - "epoch": 1.1443316703939446, - "grad_norm": 0.008560001850128174, - "learning_rate": 0.00019999935465306483, - "loss": 46.0, - "step": 14967 - }, - { - "epoch": 1.1444081273773343, - "grad_norm": 0.0015120639000087976, - "learning_rate": 0.0001999993545667684, - "loss": 46.0, - "step": 14968 - }, - { - "epoch": 1.144484584360724, - "grad_norm": 0.0004665811429731548, - "learning_rate": 0.00019999935448046621, - "loss": 46.0, - "step": 14969 - }, - { - "epoch": 1.1445610413441138, - "grad_norm": 0.003878061193972826, - "learning_rate": 0.00019999935439415825, - "loss": 46.0, - "step": 14970 - }, - { - "epoch": 1.1446374983275036, - "grad_norm": 0.0005604481557384133, - "learning_rate": 0.00019999935430784453, - "loss": 46.0, - "step": 14971 - }, - { - "epoch": 1.1447139553108931, - "grad_norm": 0.0025031897239387035, - "learning_rate": 0.00019999935422152502, - "loss": 46.0, - "step": 14972 - }, - { - "epoch": 1.144790412294283, - "grad_norm": 0.0003243081155233085, - "learning_rate": 0.00019999935413519976, - "loss": 46.0, - "step": 14973 - }, - { - "epoch": 1.1448668692776727, - "grad_norm": 0.0030771391466259956, - "learning_rate": 0.00019999935404886872, - "loss": 46.0, - "step": 14974 - }, - { - "epoch": 1.1449433262610624, - "grad_norm": 0.0013102870434522629, - "learning_rate": 0.0001999993539625319, - "loss": 46.0, - "step": 14975 - }, - { - "epoch": 1.145019783244452, - "grad_norm": 0.0011032220209017396, - "learning_rate": 0.00019999935387618933, - "loss": 46.0, - "step": 14976 - }, - { - "epoch": 1.1450962402278417, - "grad_norm": 0.01567205600440502, - "learning_rate": 0.00019999935378984098, - "loss": 46.0, - "step": 14977 - }, - { - "epoch": 1.1451726972112315, - "grad_norm": 0.000965586572419852, - "learning_rate": 0.00019999935370348685, - "loss": 46.0, - "step": 14978 - }, - { - "epoch": 1.1452491541946213, - "grad_norm": 0.002214935142546892, - "learning_rate": 0.00019999935361712695, - "loss": 46.0, - "step": 14979 - }, - { - "epoch": 1.145325611178011, - "grad_norm": 0.0034965798258781433, - "learning_rate": 0.0001999993535307613, - "loss": 46.0, - "step": 14980 - }, - { - "epoch": 1.1454020681614008, - "grad_norm": 0.0009097771253436804, - "learning_rate": 0.00019999935344438988, - "loss": 46.0, - "step": 14981 - }, - { - "epoch": 1.1454785251447903, - "grad_norm": 0.0008807270787656307, - "learning_rate": 0.00019999935335801268, - "loss": 46.0, - "step": 14982 - }, - { - "epoch": 1.14555498212818, - "grad_norm": 0.0016269584884867072, - "learning_rate": 0.00019999935327162972, - "loss": 46.0, - "step": 14983 - }, - { - "epoch": 1.1456314391115698, - "grad_norm": 0.002810464473441243, - "learning_rate": 0.00019999935318524098, - "loss": 46.0, - "step": 14984 - }, - { - "epoch": 1.1457078960949596, - "grad_norm": 0.0016543669626116753, - "learning_rate": 0.0001999993530988465, - "loss": 46.0, - "step": 14985 - }, - { - "epoch": 1.1457843530783494, - "grad_norm": 0.0019010150572285056, - "learning_rate": 0.0001999993530124462, - "loss": 46.0, - "step": 14986 - }, - { - "epoch": 1.145860810061739, - "grad_norm": 0.006113330367952585, - "learning_rate": 0.00019999935292604014, - "loss": 46.0, - "step": 14987 - }, - { - "epoch": 1.1459372670451287, - "grad_norm": 0.0009544951608404517, - "learning_rate": 0.00019999935283962833, - "loss": 46.0, - "step": 14988 - }, - { - "epoch": 1.1460137240285184, - "grad_norm": 0.01300711091607809, - "learning_rate": 0.00019999935275321075, - "loss": 46.0, - "step": 14989 - }, - { - "epoch": 1.1460901810119082, - "grad_norm": 0.001527132117189467, - "learning_rate": 0.0001999993526667874, - "loss": 46.0, - "step": 14990 - }, - { - "epoch": 1.146166637995298, - "grad_norm": 0.0007927576662041247, - "learning_rate": 0.0001999993525803583, - "loss": 46.0, - "step": 14991 - }, - { - "epoch": 1.1462430949786877, - "grad_norm": 0.0008664650958962739, - "learning_rate": 0.0001999993524939234, - "loss": 46.0, - "step": 14992 - }, - { - "epoch": 1.1463195519620772, - "grad_norm": 0.003534761257469654, - "learning_rate": 0.00019999935240748272, - "loss": 46.0, - "step": 14993 - }, - { - "epoch": 1.146396008945467, - "grad_norm": 0.0017882920801639557, - "learning_rate": 0.0001999993523210363, - "loss": 46.0, - "step": 14994 - }, - { - "epoch": 1.1464724659288568, - "grad_norm": 0.0007550594164058566, - "learning_rate": 0.0001999993522345841, - "loss": 46.0, - "step": 14995 - }, - { - "epoch": 1.1465489229122465, - "grad_norm": 0.0011703919153660536, - "learning_rate": 0.00019999935214812614, - "loss": 46.0, - "step": 14996 - }, - { - "epoch": 1.1466253798956363, - "grad_norm": 0.0008181068697012961, - "learning_rate": 0.0001999993520616624, - "loss": 46.0, - "step": 14997 - }, - { - "epoch": 1.1467018368790258, - "grad_norm": 0.0012540104798972607, - "learning_rate": 0.00019999935197519288, - "loss": 46.0, - "step": 14998 - }, - { - "epoch": 1.1467782938624156, - "grad_norm": 0.0034341567661613226, - "learning_rate": 0.00019999935188871762, - "loss": 46.0, - "step": 14999 - }, - { - "epoch": 1.1468547508458053, - "grad_norm": 0.011878249235451221, - "learning_rate": 0.00019999935180223656, - "loss": 46.0, - "step": 15000 - }, - { - "epoch": 1.146931207829195, - "grad_norm": 0.0005729121621698141, - "learning_rate": 0.00019999935171574975, - "loss": 46.0, - "step": 15001 - }, - { - "epoch": 1.1470076648125849, - "grad_norm": 0.0005691108526661992, - "learning_rate": 0.00019999935162925717, - "loss": 46.0, - "step": 15002 - }, - { - "epoch": 1.1470841217959746, - "grad_norm": 0.004101775120943785, - "learning_rate": 0.00019999935154275882, - "loss": 46.0, - "step": 15003 - }, - { - "epoch": 1.1471605787793642, - "grad_norm": 0.0024868238251656294, - "learning_rate": 0.0001999993514562547, - "loss": 46.0, - "step": 15004 - }, - { - "epoch": 1.147237035762754, - "grad_norm": 0.002159201307222247, - "learning_rate": 0.0001999993513697448, - "loss": 46.0, - "step": 15005 - }, - { - "epoch": 1.1473134927461437, - "grad_norm": 0.0010787959909066558, - "learning_rate": 0.00019999935128322914, - "loss": 46.0, - "step": 15006 - }, - { - "epoch": 1.1473899497295335, - "grad_norm": 0.0008919485262595117, - "learning_rate": 0.0001999993511967077, - "loss": 46.0, - "step": 15007 - }, - { - "epoch": 1.1474664067129232, - "grad_norm": 0.004554085433483124, - "learning_rate": 0.0001999993511101805, - "loss": 46.0, - "step": 15008 - }, - { - "epoch": 1.1475428636963128, - "grad_norm": 0.0007534997421316803, - "learning_rate": 0.00019999935102364753, - "loss": 46.0, - "step": 15009 - }, - { - "epoch": 1.1476193206797025, - "grad_norm": 0.0012508330401033163, - "learning_rate": 0.0001999993509371088, - "loss": 46.0, - "step": 15010 - }, - { - "epoch": 1.1476957776630923, - "grad_norm": 0.0013874380383640528, - "learning_rate": 0.00019999935085056428, - "loss": 46.0, - "step": 15011 - }, - { - "epoch": 1.147772234646482, - "grad_norm": 0.0008722414495423436, - "learning_rate": 0.000199999350764014, - "loss": 46.0, - "step": 15012 - }, - { - "epoch": 1.1478486916298718, - "grad_norm": 0.0006791193154640496, - "learning_rate": 0.00019999935067745793, - "loss": 46.0, - "step": 15013 - }, - { - "epoch": 1.1479251486132616, - "grad_norm": 0.0013268411858007312, - "learning_rate": 0.00019999935059089615, - "loss": 46.0, - "step": 15014 - }, - { - "epoch": 1.148001605596651, - "grad_norm": 0.00099524250254035, - "learning_rate": 0.00019999935050432854, - "loss": 46.0, - "step": 15015 - }, - { - "epoch": 1.1480780625800409, - "grad_norm": 0.0029780941549688578, - "learning_rate": 0.00019999935041775516, - "loss": 46.0, - "step": 15016 - }, - { - "epoch": 1.1481545195634306, - "grad_norm": 0.002819076180458069, - "learning_rate": 0.00019999935033117604, - "loss": 46.0, - "step": 15017 - }, - { - "epoch": 1.1482309765468204, - "grad_norm": 0.0006981000187806785, - "learning_rate": 0.00019999935024459116, - "loss": 46.0, - "step": 15018 - }, - { - "epoch": 1.1483074335302101, - "grad_norm": 0.0009209704003296793, - "learning_rate": 0.00019999935015800052, - "loss": 46.0, - "step": 15019 - }, - { - "epoch": 1.1483838905135997, - "grad_norm": 0.00589301623404026, - "learning_rate": 0.00019999935007140404, - "loss": 46.0, - "step": 15020 - }, - { - "epoch": 1.1484603474969894, - "grad_norm": 0.0038544354028999805, - "learning_rate": 0.00019999934998480185, - "loss": 46.0, - "step": 15021 - }, - { - "epoch": 1.1485368044803792, - "grad_norm": 0.004403791856020689, - "learning_rate": 0.00019999934989819388, - "loss": 46.0, - "step": 15022 - }, - { - "epoch": 1.148613261463769, - "grad_norm": 0.0008765459060668945, - "learning_rate": 0.00019999934981158014, - "loss": 46.0, - "step": 15023 - }, - { - "epoch": 1.1486897184471587, - "grad_norm": 0.003624152159318328, - "learning_rate": 0.00019999934972496063, - "loss": 46.0, - "step": 15024 - }, - { - "epoch": 1.1487661754305485, - "grad_norm": 0.0012451920192688704, - "learning_rate": 0.00019999934963833534, - "loss": 46.0, - "step": 15025 - }, - { - "epoch": 1.148842632413938, - "grad_norm": 0.004042399115860462, - "learning_rate": 0.0001999993495517043, - "loss": 46.0, - "step": 15026 - }, - { - "epoch": 1.1489190893973278, - "grad_norm": 0.0003782711864914745, - "learning_rate": 0.00019999934946506747, - "loss": 46.0, - "step": 15027 - }, - { - "epoch": 1.1489955463807175, - "grad_norm": 0.000507171789649874, - "learning_rate": 0.0001999993493784249, - "loss": 46.0, - "step": 15028 - }, - { - "epoch": 1.1490720033641073, - "grad_norm": 0.002404289785772562, - "learning_rate": 0.00019999934929177654, - "loss": 46.0, - "step": 15029 - }, - { - "epoch": 1.149148460347497, - "grad_norm": 0.0005509738111868501, - "learning_rate": 0.00019999934920512239, - "loss": 46.0, - "step": 15030 - }, - { - "epoch": 1.1492249173308866, - "grad_norm": 0.01184279378503561, - "learning_rate": 0.00019999934911846249, - "loss": 46.0, - "step": 15031 - }, - { - "epoch": 1.1493013743142764, - "grad_norm": 0.0009135470609180629, - "learning_rate": 0.0001999993490317968, - "loss": 46.0, - "step": 15032 - }, - { - "epoch": 1.1493778312976661, - "grad_norm": 0.0019471608102321625, - "learning_rate": 0.00019999934894512537, - "loss": 46.0, - "step": 15033 - }, - { - "epoch": 1.149454288281056, - "grad_norm": 0.0006821328424848616, - "learning_rate": 0.00019999934885844817, - "loss": 46.0, - "step": 15034 - }, - { - "epoch": 1.1495307452644457, - "grad_norm": 0.0006921023596078157, - "learning_rate": 0.0001999993487717652, - "loss": 46.0, - "step": 15035 - }, - { - "epoch": 1.1496072022478354, - "grad_norm": 0.0015824009897187352, - "learning_rate": 0.00019999934868507647, - "loss": 46.0, - "step": 15036 - }, - { - "epoch": 1.149683659231225, - "grad_norm": 0.002302803099155426, - "learning_rate": 0.00019999934859838195, - "loss": 46.0, - "step": 15037 - }, - { - "epoch": 1.1497601162146147, - "grad_norm": 0.0007952466839924455, - "learning_rate": 0.00019999934851168164, - "loss": 46.0, - "step": 15038 - }, - { - "epoch": 1.1498365731980045, - "grad_norm": 0.0022488797549158335, - "learning_rate": 0.0001999993484249756, - "loss": 46.0, - "step": 15039 - }, - { - "epoch": 1.1499130301813942, - "grad_norm": 0.0004454887821339071, - "learning_rate": 0.0001999993483382638, - "loss": 46.0, - "step": 15040 - }, - { - "epoch": 1.149989487164784, - "grad_norm": 0.001770614180713892, - "learning_rate": 0.0001999993482515462, - "loss": 46.0, - "step": 15041 - }, - { - "epoch": 1.1500659441481735, - "grad_norm": 0.0008196835406124592, - "learning_rate": 0.00019999934816482284, - "loss": 46.0, - "step": 15042 - }, - { - "epoch": 1.1501424011315633, - "grad_norm": 0.002438412979245186, - "learning_rate": 0.0001999993480780937, - "loss": 46.0, - "step": 15043 - }, - { - "epoch": 1.150218858114953, - "grad_norm": 0.0010210862383246422, - "learning_rate": 0.00019999934799135879, - "loss": 46.0, - "step": 15044 - }, - { - "epoch": 1.1502953150983428, - "grad_norm": 0.006213759537786245, - "learning_rate": 0.00019999934790461814, - "loss": 46.0, - "step": 15045 - }, - { - "epoch": 1.1503717720817326, - "grad_norm": 0.0007969638681970537, - "learning_rate": 0.00019999934781787172, - "loss": 46.0, - "step": 15046 - }, - { - "epoch": 1.1504482290651223, - "grad_norm": 0.0006172886351123452, - "learning_rate": 0.00019999934773111947, - "loss": 46.0, - "step": 15047 - }, - { - "epoch": 1.1505246860485119, - "grad_norm": 0.0005070014740340412, - "learning_rate": 0.0001999993476443615, - "loss": 46.0, - "step": 15048 - }, - { - "epoch": 1.1506011430319016, - "grad_norm": 0.0013464834773913026, - "learning_rate": 0.00019999934755759777, - "loss": 46.0, - "step": 15049 - }, - { - "epoch": 1.1506776000152914, - "grad_norm": 0.001961823320016265, - "learning_rate": 0.00019999934747082825, - "loss": 46.0, - "step": 15050 - }, - { - "epoch": 1.1507540569986812, - "grad_norm": 0.003623869502916932, - "learning_rate": 0.00019999934738405294, - "loss": 46.0, - "step": 15051 - }, - { - "epoch": 1.150830513982071, - "grad_norm": 0.0025356500409543514, - "learning_rate": 0.0001999993472972719, - "loss": 46.0, - "step": 15052 - }, - { - "epoch": 1.1509069709654605, - "grad_norm": 0.0015827063471078873, - "learning_rate": 0.00019999934721048507, - "loss": 46.0, - "step": 15053 - }, - { - "epoch": 1.1509834279488502, - "grad_norm": 0.0007500315550714731, - "learning_rate": 0.0001999993471236925, - "loss": 46.0, - "step": 15054 - }, - { - "epoch": 1.15105988493224, - "grad_norm": 0.0035137522500008345, - "learning_rate": 0.00019999934703689412, - "loss": 46.0, - "step": 15055 - }, - { - "epoch": 1.1511363419156297, - "grad_norm": 0.000715860107447952, - "learning_rate": 0.00019999934695009, - "loss": 46.0, - "step": 15056 - }, - { - "epoch": 1.1512127988990195, - "grad_norm": 0.001137072453275323, - "learning_rate": 0.0001999993468632801, - "loss": 46.0, - "step": 15057 - }, - { - "epoch": 1.1512892558824093, - "grad_norm": 0.010411955416202545, - "learning_rate": 0.0001999993467764644, - "loss": 46.0, - "step": 15058 - }, - { - "epoch": 1.1513657128657988, - "grad_norm": 0.004144825506955385, - "learning_rate": 0.00019999934668964294, - "loss": 46.0, - "step": 15059 - }, - { - "epoch": 1.1514421698491886, - "grad_norm": 0.0005411454476416111, - "learning_rate": 0.00019999934660281575, - "loss": 46.0, - "step": 15060 - }, - { - "epoch": 1.1515186268325783, - "grad_norm": 0.0057448456063866615, - "learning_rate": 0.00019999934651598276, - "loss": 46.0, - "step": 15061 - }, - { - "epoch": 1.151595083815968, - "grad_norm": 0.009231047704815865, - "learning_rate": 0.00019999934642914402, - "loss": 46.0, - "step": 15062 - }, - { - "epoch": 1.1516715407993579, - "grad_norm": 0.00044466342660598457, - "learning_rate": 0.0001999993463422995, - "loss": 46.0, - "step": 15063 - }, - { - "epoch": 1.1517479977827474, - "grad_norm": 0.0007833400159142911, - "learning_rate": 0.0001999993462554492, - "loss": 46.0, - "step": 15064 - }, - { - "epoch": 1.1518244547661372, - "grad_norm": 0.0011725116055458784, - "learning_rate": 0.00019999934616859316, - "loss": 46.0, - "step": 15065 - }, - { - "epoch": 1.151900911749527, - "grad_norm": 0.0009265861008316278, - "learning_rate": 0.00019999934608173133, - "loss": 46.0, - "step": 15066 - }, - { - "epoch": 1.1519773687329167, - "grad_norm": 0.0018999343737959862, - "learning_rate": 0.00019999934599486375, - "loss": 46.0, - "step": 15067 - }, - { - "epoch": 1.1520538257163064, - "grad_norm": 0.003532078582793474, - "learning_rate": 0.00019999934590799037, - "loss": 46.0, - "step": 15068 - }, - { - "epoch": 1.1521302826996962, - "grad_norm": 0.0007617921219207346, - "learning_rate": 0.00019999934582111122, - "loss": 46.0, - "step": 15069 - }, - { - "epoch": 1.1522067396830857, - "grad_norm": 0.00045757039333693683, - "learning_rate": 0.00019999934573422632, - "loss": 46.0, - "step": 15070 - }, - { - "epoch": 1.1522831966664755, - "grad_norm": 0.002881947671994567, - "learning_rate": 0.00019999934564733564, - "loss": 46.0, - "step": 15071 - }, - { - "epoch": 1.1523596536498653, - "grad_norm": 0.0039932820945978165, - "learning_rate": 0.0001999993455604392, - "loss": 46.0, - "step": 15072 - }, - { - "epoch": 1.152436110633255, - "grad_norm": 0.001974108163267374, - "learning_rate": 0.00019999934547353698, - "loss": 46.0, - "step": 15073 - }, - { - "epoch": 1.1525125676166448, - "grad_norm": 0.0056515466421842575, - "learning_rate": 0.00019999934538662899, - "loss": 46.0, - "step": 15074 - }, - { - "epoch": 1.1525890246000343, - "grad_norm": 0.0034237743820995092, - "learning_rate": 0.00019999934529971525, - "loss": 46.0, - "step": 15075 - }, - { - "epoch": 1.152665481583424, - "grad_norm": 0.0010519057977944613, - "learning_rate": 0.00019999934521279573, - "loss": 46.0, - "step": 15076 - }, - { - "epoch": 1.1527419385668138, - "grad_norm": 0.000748187187127769, - "learning_rate": 0.00019999934512587045, - "loss": 46.0, - "step": 15077 - }, - { - "epoch": 1.1528183955502036, - "grad_norm": 0.002633391646668315, - "learning_rate": 0.0001999993450389394, - "loss": 46.0, - "step": 15078 - }, - { - "epoch": 1.1528948525335934, - "grad_norm": 0.0005842336686328053, - "learning_rate": 0.00019999934495200253, - "loss": 46.0, - "step": 15079 - }, - { - "epoch": 1.1529713095169831, - "grad_norm": 0.004706832114607096, - "learning_rate": 0.00019999934486505992, - "loss": 46.0, - "step": 15080 - }, - { - "epoch": 1.1530477665003727, - "grad_norm": 0.001201618812046945, - "learning_rate": 0.00019999934477811157, - "loss": 46.0, - "step": 15081 - }, - { - "epoch": 1.1531242234837624, - "grad_norm": 0.0020892757456749678, - "learning_rate": 0.00019999934469115745, - "loss": 46.0, - "step": 15082 - }, - { - "epoch": 1.1532006804671522, - "grad_norm": 0.0012676959158852696, - "learning_rate": 0.00019999934460419755, - "loss": 46.0, - "step": 15083 - }, - { - "epoch": 1.153277137450542, - "grad_norm": 0.0008154280949383974, - "learning_rate": 0.00019999934451723185, - "loss": 46.0, - "step": 15084 - }, - { - "epoch": 1.1533535944339317, - "grad_norm": 0.0011737341992557049, - "learning_rate": 0.0001999993444302604, - "loss": 46.0, - "step": 15085 - }, - { - "epoch": 1.1534300514173212, - "grad_norm": 0.0024835539516061544, - "learning_rate": 0.00019999934434328318, - "loss": 46.0, - "step": 15086 - }, - { - "epoch": 1.153506508400711, - "grad_norm": 0.0006917276768945158, - "learning_rate": 0.0001999993442563002, - "loss": 46.0, - "step": 15087 - }, - { - "epoch": 1.1535829653841008, - "grad_norm": 0.003550690831616521, - "learning_rate": 0.00019999934416931145, - "loss": 46.0, - "step": 15088 - }, - { - "epoch": 1.1536594223674905, - "grad_norm": 0.0011154643725603819, - "learning_rate": 0.00019999934408231694, - "loss": 46.0, - "step": 15089 - }, - { - "epoch": 1.1537358793508803, - "grad_norm": 0.00127691391389817, - "learning_rate": 0.00019999934399531663, - "loss": 46.0, - "step": 15090 - }, - { - "epoch": 1.15381233633427, - "grad_norm": 0.0006531690014526248, - "learning_rate": 0.00019999934390831057, - "loss": 46.0, - "step": 15091 - }, - { - "epoch": 1.1538887933176596, - "grad_norm": 0.010128902271389961, - "learning_rate": 0.00019999934382129874, - "loss": 46.0, - "step": 15092 - }, - { - "epoch": 1.1539652503010494, - "grad_norm": 0.006677805911749601, - "learning_rate": 0.00019999934373428113, - "loss": 46.0, - "step": 15093 - }, - { - "epoch": 1.1540417072844391, - "grad_norm": 0.0012262585805729032, - "learning_rate": 0.00019999934364725775, - "loss": 46.0, - "step": 15094 - }, - { - "epoch": 1.1541181642678289, - "grad_norm": 0.0006395959062501788, - "learning_rate": 0.00019999934356022863, - "loss": 46.0, - "step": 15095 - }, - { - "epoch": 1.1541946212512184, - "grad_norm": 0.0011814252939075232, - "learning_rate": 0.0001999993434731937, - "loss": 46.0, - "step": 15096 - }, - { - "epoch": 1.1542710782346082, - "grad_norm": 0.0007847616798244417, - "learning_rate": 0.00019999934338615303, - "loss": 46.0, - "step": 15097 - }, - { - "epoch": 1.154347535217998, - "grad_norm": 0.001342223258689046, - "learning_rate": 0.00019999934329910656, - "loss": 46.0, - "step": 15098 - }, - { - "epoch": 1.1544239922013877, - "grad_norm": 0.0013589911395683885, - "learning_rate": 0.0001999993432120544, - "loss": 46.0, - "step": 15099 - }, - { - "epoch": 1.1545004491847775, - "grad_norm": 0.0010037136962637305, - "learning_rate": 0.00019999934312499637, - "loss": 46.0, - "step": 15100 - }, - { - "epoch": 1.1545769061681672, - "grad_norm": 0.0028625307604670525, - "learning_rate": 0.0001999993430379326, - "loss": 46.0, - "step": 15101 - }, - { - "epoch": 1.154653363151557, - "grad_norm": 0.005549272056668997, - "learning_rate": 0.0001999993429508631, - "loss": 46.0, - "step": 15102 - }, - { - "epoch": 1.1547298201349465, - "grad_norm": 0.0011536511592566967, - "learning_rate": 0.0001999993428637878, - "loss": 46.0, - "step": 15103 - }, - { - "epoch": 1.1548062771183363, - "grad_norm": 0.0021739182993769646, - "learning_rate": 0.00019999934277670673, - "loss": 46.0, - "step": 15104 - }, - { - "epoch": 1.154882734101726, - "grad_norm": 0.0008432441391050816, - "learning_rate": 0.0001999993426896199, - "loss": 46.0, - "step": 15105 - }, - { - "epoch": 1.1549591910851158, - "grad_norm": 0.000681453850120306, - "learning_rate": 0.0001999993426025273, - "loss": 46.0, - "step": 15106 - }, - { - "epoch": 1.1550356480685053, - "grad_norm": 0.0010257838293910027, - "learning_rate": 0.0001999993425154289, - "loss": 46.0, - "step": 15107 - }, - { - "epoch": 1.155112105051895, - "grad_norm": 0.007447650656104088, - "learning_rate": 0.00019999934242832476, - "loss": 46.0, - "step": 15108 - }, - { - "epoch": 1.1551885620352849, - "grad_norm": 0.0004147738800384104, - "learning_rate": 0.00019999934234121486, - "loss": 46.0, - "step": 15109 - }, - { - "epoch": 1.1552650190186746, - "grad_norm": 0.0017721919575706124, - "learning_rate": 0.0001999993422540992, - "loss": 46.0, - "step": 15110 - }, - { - "epoch": 1.1553414760020644, - "grad_norm": 0.005435622297227383, - "learning_rate": 0.00019999934216697774, - "loss": 46.0, - "step": 15111 - }, - { - "epoch": 1.1554179329854541, - "grad_norm": 0.002470600651577115, - "learning_rate": 0.0001999993420798505, - "loss": 46.0, - "step": 15112 - }, - { - "epoch": 1.1554943899688437, - "grad_norm": 0.001980425789952278, - "learning_rate": 0.00019999934199271753, - "loss": 46.0, - "step": 15113 - }, - { - "epoch": 1.1555708469522334, - "grad_norm": 0.0009720762027427554, - "learning_rate": 0.00019999934190557874, - "loss": 46.0, - "step": 15114 - }, - { - "epoch": 1.1556473039356232, - "grad_norm": 0.000672674854286015, - "learning_rate": 0.00019999934181843423, - "loss": 46.0, - "step": 15115 - }, - { - "epoch": 1.155723760919013, - "grad_norm": 0.0014648701762780547, - "learning_rate": 0.00019999934173128394, - "loss": 46.0, - "step": 15116 - }, - { - "epoch": 1.1558002179024027, - "grad_norm": 0.005455412901937962, - "learning_rate": 0.00019999934164412786, - "loss": 46.0, - "step": 15117 - }, - { - "epoch": 1.1558766748857923, - "grad_norm": 0.0016148581635206938, - "learning_rate": 0.00019999934155696603, - "loss": 46.0, - "step": 15118 - }, - { - "epoch": 1.155953131869182, - "grad_norm": 0.006246815901249647, - "learning_rate": 0.00019999934146979842, - "loss": 46.0, - "step": 15119 - }, - { - "epoch": 1.1560295888525718, - "grad_norm": 0.0014239230658859015, - "learning_rate": 0.00019999934138262504, - "loss": 46.0, - "step": 15120 - }, - { - "epoch": 1.1561060458359615, - "grad_norm": 0.0011944967554882169, - "learning_rate": 0.00019999934129544592, - "loss": 46.0, - "step": 15121 - }, - { - "epoch": 1.1561825028193513, - "grad_norm": 0.0007652255007997155, - "learning_rate": 0.000199999341208261, - "loss": 46.0, - "step": 15122 - }, - { - "epoch": 1.156258959802741, - "grad_norm": 0.0005660508759319782, - "learning_rate": 0.0001999993411210703, - "loss": 46.0, - "step": 15123 - }, - { - "epoch": 1.1563354167861306, - "grad_norm": 0.0013085348764434457, - "learning_rate": 0.00019999934103387385, - "loss": 46.0, - "step": 15124 - }, - { - "epoch": 1.1564118737695204, - "grad_norm": 0.0021195111330598593, - "learning_rate": 0.0001999993409466716, - "loss": 46.0, - "step": 15125 - }, - { - "epoch": 1.1564883307529101, - "grad_norm": 0.0313098318874836, - "learning_rate": 0.00019999934085946364, - "loss": 46.0, - "step": 15126 - }, - { - "epoch": 1.1565647877363, - "grad_norm": 0.009222094900906086, - "learning_rate": 0.00019999934077224988, - "loss": 46.0, - "step": 15127 - }, - { - "epoch": 1.1566412447196897, - "grad_norm": 0.0013312717201188207, - "learning_rate": 0.0001999993406850303, - "loss": 46.0, - "step": 15128 - }, - { - "epoch": 1.1567177017030792, - "grad_norm": 0.0012715579941868782, - "learning_rate": 0.00019999934059780503, - "loss": 46.0, - "step": 15129 - }, - { - "epoch": 1.156794158686469, - "grad_norm": 0.0012400245759636164, - "learning_rate": 0.00019999934051057397, - "loss": 46.0, - "step": 15130 - }, - { - "epoch": 1.1568706156698587, - "grad_norm": 0.0012981371255591512, - "learning_rate": 0.0001999993404233371, - "loss": 46.0, - "step": 15131 - }, - { - "epoch": 1.1569470726532485, - "grad_norm": 0.004702863283455372, - "learning_rate": 0.0001999993403360945, - "loss": 46.0, - "step": 15132 - }, - { - "epoch": 1.1570235296366382, - "grad_norm": 0.0005895852227695286, - "learning_rate": 0.00019999934024884613, - "loss": 46.0, - "step": 15133 - }, - { - "epoch": 1.157099986620028, - "grad_norm": 0.004708996042609215, - "learning_rate": 0.00019999934016159198, - "loss": 46.0, - "step": 15134 - }, - { - "epoch": 1.1571764436034175, - "grad_norm": 0.001433222321793437, - "learning_rate": 0.00019999934007433208, - "loss": 46.0, - "step": 15135 - }, - { - "epoch": 1.1572529005868073, - "grad_norm": 0.000706484483089298, - "learning_rate": 0.00019999933998706638, - "loss": 46.0, - "step": 15136 - }, - { - "epoch": 1.157329357570197, - "grad_norm": 0.00033455152879469097, - "learning_rate": 0.00019999933989979494, - "loss": 46.0, - "step": 15137 - }, - { - "epoch": 1.1574058145535868, - "grad_norm": 0.0008692506817169487, - "learning_rate": 0.0001999993398125177, - "loss": 46.0, - "step": 15138 - }, - { - "epoch": 1.1574822715369766, - "grad_norm": 0.002206777920946479, - "learning_rate": 0.0001999993397252347, - "loss": 46.0, - "step": 15139 - }, - { - "epoch": 1.1575587285203661, - "grad_norm": 0.0025811726227402687, - "learning_rate": 0.00019999933963794594, - "loss": 46.0, - "step": 15140 - }, - { - "epoch": 1.1576351855037559, - "grad_norm": 0.0015727608697488904, - "learning_rate": 0.0001999993395506514, - "loss": 46.0, - "step": 15141 - }, - { - "epoch": 1.1577116424871456, - "grad_norm": 0.0016923267394304276, - "learning_rate": 0.00019999933946335109, - "loss": 46.0, - "step": 15142 - }, - { - "epoch": 1.1577880994705354, - "grad_norm": 0.0017254312988370657, - "learning_rate": 0.00019999933937604503, - "loss": 46.0, - "step": 15143 - }, - { - "epoch": 1.1578645564539252, - "grad_norm": 0.0008460857206955552, - "learning_rate": 0.00019999933928873317, - "loss": 46.0, - "step": 15144 - }, - { - "epoch": 1.157941013437315, - "grad_norm": 0.0011523258872330189, - "learning_rate": 0.0001999993392014156, - "loss": 46.0, - "step": 15145 - }, - { - "epoch": 1.1580174704207045, - "grad_norm": 0.0005783882807008922, - "learning_rate": 0.0001999993391140922, - "loss": 46.0, - "step": 15146 - }, - { - "epoch": 1.1580939274040942, - "grad_norm": 0.0016667454037815332, - "learning_rate": 0.00019999933902676304, - "loss": 46.0, - "step": 15147 - }, - { - "epoch": 1.158170384387484, - "grad_norm": 0.001644982141442597, - "learning_rate": 0.00019999933893942814, - "loss": 46.0, - "step": 15148 - }, - { - "epoch": 1.1582468413708737, - "grad_norm": 0.0028538370970636606, - "learning_rate": 0.00019999933885208741, - "loss": 46.0, - "step": 15149 - }, - { - "epoch": 1.1583232983542635, - "grad_norm": 0.0012513879919424653, - "learning_rate": 0.00019999933876474097, - "loss": 46.0, - "step": 15150 - }, - { - "epoch": 1.158399755337653, - "grad_norm": 0.001760341809131205, - "learning_rate": 0.00019999933867738875, - "loss": 46.0, - "step": 15151 - }, - { - "epoch": 1.1584762123210428, - "grad_norm": 0.0009005492902360857, - "learning_rate": 0.00019999933859003076, - "loss": 46.0, - "step": 15152 - }, - { - "epoch": 1.1585526693044326, - "grad_norm": 0.01334104873239994, - "learning_rate": 0.00019999933850266697, - "loss": 46.0, - "step": 15153 - }, - { - "epoch": 1.1586291262878223, - "grad_norm": 0.002504281932488084, - "learning_rate": 0.00019999933841529743, - "loss": 46.0, - "step": 15154 - }, - { - "epoch": 1.158705583271212, - "grad_norm": 0.0009778373641893268, - "learning_rate": 0.00019999933832792212, - "loss": 46.0, - "step": 15155 - }, - { - "epoch": 1.1587820402546019, - "grad_norm": 0.0006808859179727733, - "learning_rate": 0.00019999933824054104, - "loss": 46.0, - "step": 15156 - }, - { - "epoch": 1.1588584972379914, - "grad_norm": 0.0021752764005213976, - "learning_rate": 0.0001999993381531542, - "loss": 46.0, - "step": 15157 - }, - { - "epoch": 1.1589349542213812, - "grad_norm": 0.0010882660280913115, - "learning_rate": 0.0001999993380657616, - "loss": 46.0, - "step": 15158 - }, - { - "epoch": 1.159011411204771, - "grad_norm": 0.000635226140730083, - "learning_rate": 0.0001999993379783632, - "loss": 46.0, - "step": 15159 - }, - { - "epoch": 1.1590878681881607, - "grad_norm": 0.0009248583228327334, - "learning_rate": 0.00019999933789095905, - "loss": 46.0, - "step": 15160 - }, - { - "epoch": 1.1591643251715504, - "grad_norm": 0.0019124189857393503, - "learning_rate": 0.00019999933780354912, - "loss": 46.0, - "step": 15161 - }, - { - "epoch": 1.15924078215494, - "grad_norm": 0.0016719320556148887, - "learning_rate": 0.00019999933771613345, - "loss": 46.0, - "step": 15162 - }, - { - "epoch": 1.1593172391383297, - "grad_norm": 0.0021364176645874977, - "learning_rate": 0.00019999933762871198, - "loss": 46.0, - "step": 15163 - }, - { - "epoch": 1.1593936961217195, - "grad_norm": 0.0009226591791957617, - "learning_rate": 0.00019999933754128474, - "loss": 46.0, - "step": 15164 - }, - { - "epoch": 1.1594701531051093, - "grad_norm": 0.0008954951772466302, - "learning_rate": 0.00019999933745385175, - "loss": 46.0, - "step": 15165 - }, - { - "epoch": 1.159546610088499, - "grad_norm": 0.02052871510386467, - "learning_rate": 0.00019999933736641298, - "loss": 46.0, - "step": 15166 - }, - { - "epoch": 1.1596230670718888, - "grad_norm": 0.00204788101837039, - "learning_rate": 0.00019999933727896845, - "loss": 46.0, - "step": 15167 - }, - { - "epoch": 1.1596995240552783, - "grad_norm": 0.0005477396189235151, - "learning_rate": 0.00019999933719151814, - "loss": 46.0, - "step": 15168 - }, - { - "epoch": 1.159775981038668, - "grad_norm": 0.008746067062020302, - "learning_rate": 0.00019999933710406208, - "loss": 46.0, - "step": 15169 - }, - { - "epoch": 1.1598524380220578, - "grad_norm": 0.012607166543602943, - "learning_rate": 0.00019999933701660022, - "loss": 46.0, - "step": 15170 - }, - { - "epoch": 1.1599288950054476, - "grad_norm": 0.001968152355402708, - "learning_rate": 0.0001999993369291326, - "loss": 46.0, - "step": 15171 - }, - { - "epoch": 1.1600053519888374, - "grad_norm": 0.0019370965892449021, - "learning_rate": 0.00019999933684165921, - "loss": 46.0, - "step": 15172 - }, - { - "epoch": 1.160081808972227, - "grad_norm": 0.0014788837870582938, - "learning_rate": 0.00019999933675418006, - "loss": 46.0, - "step": 15173 - }, - { - "epoch": 1.1601582659556167, - "grad_norm": 0.002217283006757498, - "learning_rate": 0.00019999933666669514, - "loss": 46.0, - "step": 15174 - }, - { - "epoch": 1.1602347229390064, - "grad_norm": 0.0005758915795013309, - "learning_rate": 0.00019999933657920444, - "loss": 46.0, - "step": 15175 - }, - { - "epoch": 1.1603111799223962, - "grad_norm": 0.0012430837377905846, - "learning_rate": 0.00019999933649170797, - "loss": 46.0, - "step": 15176 - }, - { - "epoch": 1.160387636905786, - "grad_norm": 0.001995112979784608, - "learning_rate": 0.00019999933640420573, - "loss": 46.0, - "step": 15177 - }, - { - "epoch": 1.1604640938891757, - "grad_norm": 0.001302272779867053, - "learning_rate": 0.00019999933631669774, - "loss": 46.0, - "step": 15178 - }, - { - "epoch": 1.1605405508725652, - "grad_norm": 0.0006122771301306784, - "learning_rate": 0.00019999933622918397, - "loss": 46.0, - "step": 15179 - }, - { - "epoch": 1.160617007855955, - "grad_norm": 0.0008690704125910997, - "learning_rate": 0.0001999993361416644, - "loss": 46.0, - "step": 15180 - }, - { - "epoch": 1.1606934648393448, - "grad_norm": 0.003428082913160324, - "learning_rate": 0.00019999933605413913, - "loss": 46.0, - "step": 15181 - }, - { - "epoch": 1.1607699218227345, - "grad_norm": 0.0006868264172226191, - "learning_rate": 0.00019999933596660804, - "loss": 46.0, - "step": 15182 - }, - { - "epoch": 1.1608463788061243, - "grad_norm": 0.0018730575684458017, - "learning_rate": 0.00019999933587907119, - "loss": 46.0, - "step": 15183 - }, - { - "epoch": 1.1609228357895138, - "grad_norm": 0.0013282742584124207, - "learning_rate": 0.00019999933579152858, - "loss": 46.0, - "step": 15184 - }, - { - "epoch": 1.1609992927729036, - "grad_norm": 0.0035859534982591867, - "learning_rate": 0.00019999933570398018, - "loss": 46.0, - "step": 15185 - }, - { - "epoch": 1.1610757497562934, - "grad_norm": 0.000959887751378119, - "learning_rate": 0.00019999933561642603, - "loss": 46.0, - "step": 15186 - }, - { - "epoch": 1.1611522067396831, - "grad_norm": 0.0009075164562091231, - "learning_rate": 0.0001999993355288661, - "loss": 46.0, - "step": 15187 - }, - { - "epoch": 1.1612286637230729, - "grad_norm": 0.0012018652632832527, - "learning_rate": 0.00019999933544130038, - "loss": 46.0, - "step": 15188 - }, - { - "epoch": 1.1613051207064626, - "grad_norm": 0.016878055408596992, - "learning_rate": 0.0001999993353537289, - "loss": 46.0, - "step": 15189 - }, - { - "epoch": 1.1613815776898522, - "grad_norm": 0.0034380739089101553, - "learning_rate": 0.0001999993352661517, - "loss": 46.0, - "step": 15190 - }, - { - "epoch": 1.161458034673242, - "grad_norm": 0.0026983842253684998, - "learning_rate": 0.00019999933517856868, - "loss": 46.0, - "step": 15191 - }, - { - "epoch": 1.1615344916566317, - "grad_norm": 0.0031875669956207275, - "learning_rate": 0.00019999933509097991, - "loss": 46.0, - "step": 15192 - }, - { - "epoch": 1.1616109486400215, - "grad_norm": 0.0016166269779205322, - "learning_rate": 0.00019999933500338538, - "loss": 46.0, - "step": 15193 - }, - { - "epoch": 1.1616874056234112, - "grad_norm": 0.00036780114169232547, - "learning_rate": 0.00019999933491578507, - "loss": 46.0, - "step": 15194 - }, - { - "epoch": 1.1617638626068008, - "grad_norm": 0.001250248751603067, - "learning_rate": 0.00019999933482817896, - "loss": 46.0, - "step": 15195 - }, - { - "epoch": 1.1618403195901905, - "grad_norm": 0.0019274428486824036, - "learning_rate": 0.00019999933474056713, - "loss": 46.0, - "step": 15196 - }, - { - "epoch": 1.1619167765735803, - "grad_norm": 0.0012433610390871763, - "learning_rate": 0.0001999993346529495, - "loss": 46.0, - "step": 15197 - }, - { - "epoch": 1.16199323355697, - "grad_norm": 0.0017420609947293997, - "learning_rate": 0.00019999933456532612, - "loss": 46.0, - "step": 15198 - }, - { - "epoch": 1.1620696905403598, - "grad_norm": 0.0038535750936716795, - "learning_rate": 0.00019999933447769697, - "loss": 46.0, - "step": 15199 - }, - { - "epoch": 1.1621461475237496, - "grad_norm": 0.0011651928070932627, - "learning_rate": 0.00019999933439006202, - "loss": 46.0, - "step": 15200 - }, - { - "epoch": 1.162222604507139, - "grad_norm": 0.003509982954710722, - "learning_rate": 0.00019999933430242132, - "loss": 46.0, - "step": 15201 - }, - { - "epoch": 1.1622990614905289, - "grad_norm": 0.010439963079988956, - "learning_rate": 0.00019999933421477485, - "loss": 46.0, - "step": 15202 - }, - { - "epoch": 1.1623755184739186, - "grad_norm": 0.002135409275069833, - "learning_rate": 0.0001999993341271226, - "loss": 46.0, - "step": 15203 - }, - { - "epoch": 1.1624519754573084, - "grad_norm": 0.001027404796332121, - "learning_rate": 0.00019999933403946462, - "loss": 46.0, - "step": 15204 - }, - { - "epoch": 1.1625284324406981, - "grad_norm": 0.0007074300665408373, - "learning_rate": 0.00019999933395180083, - "loss": 46.0, - "step": 15205 - }, - { - "epoch": 1.1626048894240877, - "grad_norm": 0.001136760925874114, - "learning_rate": 0.00019999933386413127, - "loss": 46.0, - "step": 15206 - }, - { - "epoch": 1.1626813464074774, - "grad_norm": 0.0005381845403462648, - "learning_rate": 0.00019999933377645599, - "loss": 46.0, - "step": 15207 - }, - { - "epoch": 1.1627578033908672, - "grad_norm": 0.0016024968354031444, - "learning_rate": 0.00019999933368877487, - "loss": 46.0, - "step": 15208 - }, - { - "epoch": 1.162834260374257, - "grad_norm": 0.0007701287395320833, - "learning_rate": 0.00019999933360108805, - "loss": 46.0, - "step": 15209 - }, - { - "epoch": 1.1629107173576467, - "grad_norm": 0.0010702564613893628, - "learning_rate": 0.00019999933351339542, - "loss": 46.0, - "step": 15210 - }, - { - "epoch": 1.1629871743410365, - "grad_norm": 0.002136446302756667, - "learning_rate": 0.000199999333425697, - "loss": 46.0, - "step": 15211 - }, - { - "epoch": 1.163063631324426, - "grad_norm": 0.0020883698016405106, - "learning_rate": 0.00019999933333799286, - "loss": 46.0, - "step": 15212 - }, - { - "epoch": 1.1631400883078158, - "grad_norm": 0.002453526249155402, - "learning_rate": 0.00019999933325028294, - "loss": 46.0, - "step": 15213 - }, - { - "epoch": 1.1632165452912056, - "grad_norm": 0.0009737057844176888, - "learning_rate": 0.00019999933316256722, - "loss": 46.0, - "step": 15214 - }, - { - "epoch": 1.1632930022745953, - "grad_norm": 0.0015718861250206828, - "learning_rate": 0.00019999933307484575, - "loss": 46.0, - "step": 15215 - }, - { - "epoch": 1.163369459257985, - "grad_norm": 0.0008530642371624708, - "learning_rate": 0.0001999993329871185, - "loss": 46.0, - "step": 15216 - }, - { - "epoch": 1.1634459162413746, - "grad_norm": 0.0011651882668957114, - "learning_rate": 0.00019999933289938551, - "loss": 46.0, - "step": 15217 - }, - { - "epoch": 1.1635223732247644, - "grad_norm": 0.003901093266904354, - "learning_rate": 0.00019999933281164673, - "loss": 46.0, - "step": 15218 - }, - { - "epoch": 1.1635988302081541, - "grad_norm": 0.0011217995779588819, - "learning_rate": 0.00019999933272390216, - "loss": 46.0, - "step": 15219 - }, - { - "epoch": 1.163675287191544, - "grad_norm": 0.0007204018183983862, - "learning_rate": 0.00019999933263615188, - "loss": 46.0, - "step": 15220 - }, - { - "epoch": 1.1637517441749337, - "grad_norm": 0.0008575300453230739, - "learning_rate": 0.00019999933254839577, - "loss": 46.0, - "step": 15221 - }, - { - "epoch": 1.1638282011583234, - "grad_norm": 0.0011869131121784449, - "learning_rate": 0.0001999993324606339, - "loss": 46.0, - "step": 15222 - }, - { - "epoch": 1.163904658141713, - "grad_norm": 0.0005873568588867784, - "learning_rate": 0.0001999993323728663, - "loss": 46.0, - "step": 15223 - }, - { - "epoch": 1.1639811151251027, - "grad_norm": 0.0010079137282446027, - "learning_rate": 0.00019999933228509288, - "loss": 46.0, - "step": 15224 - }, - { - "epoch": 1.1640575721084925, - "grad_norm": 0.0026000505313277245, - "learning_rate": 0.00019999933219731373, - "loss": 46.0, - "step": 15225 - }, - { - "epoch": 1.1641340290918822, - "grad_norm": 0.0026131710037589073, - "learning_rate": 0.00019999933210952878, - "loss": 46.0, - "step": 15226 - }, - { - "epoch": 1.1642104860752718, - "grad_norm": 0.0009959496092051268, - "learning_rate": 0.00019999933202173809, - "loss": 46.0, - "step": 15227 - }, - { - "epoch": 1.1642869430586615, - "grad_norm": 0.0009695867775008082, - "learning_rate": 0.00019999933193394162, - "loss": 46.0, - "step": 15228 - }, - { - "epoch": 1.1643634000420513, - "grad_norm": 0.0026348461396992207, - "learning_rate": 0.00019999933184613937, - "loss": 46.0, - "step": 15229 - }, - { - "epoch": 1.164439857025441, - "grad_norm": 0.0005475819925777614, - "learning_rate": 0.00019999933175833136, - "loss": 46.0, - "step": 15230 - }, - { - "epoch": 1.1645163140088308, - "grad_norm": 0.0009518524166196585, - "learning_rate": 0.0001999993316705176, - "loss": 46.0, - "step": 15231 - }, - { - "epoch": 1.1645927709922206, - "grad_norm": 0.0016149907605722547, - "learning_rate": 0.000199999331582698, - "loss": 46.0, - "step": 15232 - }, - { - "epoch": 1.1646692279756103, - "grad_norm": 0.0010324977338314056, - "learning_rate": 0.0001999993314948727, - "loss": 46.0, - "step": 15233 - }, - { - "epoch": 1.1647456849589999, - "grad_norm": 0.0015724231489002705, - "learning_rate": 0.00019999933140704161, - "loss": 46.0, - "step": 15234 - }, - { - "epoch": 1.1648221419423896, - "grad_norm": 0.0008942470885813236, - "learning_rate": 0.00019999933131920476, - "loss": 46.0, - "step": 15235 - }, - { - "epoch": 1.1648985989257794, - "grad_norm": 0.0013806776842102408, - "learning_rate": 0.00019999933123136213, - "loss": 46.0, - "step": 15236 - }, - { - "epoch": 1.1649750559091692, - "grad_norm": 0.001410484779626131, - "learning_rate": 0.00019999933114351373, - "loss": 46.0, - "step": 15237 - }, - { - "epoch": 1.1650515128925587, - "grad_norm": 0.0005561124416999519, - "learning_rate": 0.00019999933105565955, - "loss": 46.0, - "step": 15238 - }, - { - "epoch": 1.1651279698759485, - "grad_norm": 0.0013579585356637836, - "learning_rate": 0.0001999993309677996, - "loss": 46.0, - "step": 15239 - }, - { - "epoch": 1.1652044268593382, - "grad_norm": 0.004409249406307936, - "learning_rate": 0.00019999933087993393, - "loss": 46.0, - "step": 15240 - }, - { - "epoch": 1.165280883842728, - "grad_norm": 0.0008311390993185341, - "learning_rate": 0.00019999933079206246, - "loss": 46.0, - "step": 15241 - }, - { - "epoch": 1.1653573408261177, - "grad_norm": 0.0010167277650907636, - "learning_rate": 0.0001999993307041852, - "loss": 46.0, - "step": 15242 - }, - { - "epoch": 1.1654337978095075, - "grad_norm": 0.001312794047407806, - "learning_rate": 0.00019999933061630218, - "loss": 46.0, - "step": 15243 - }, - { - "epoch": 1.165510254792897, - "grad_norm": 0.002374960808083415, - "learning_rate": 0.0001999993305284134, - "loss": 46.0, - "step": 15244 - }, - { - "epoch": 1.1655867117762868, - "grad_norm": 0.001582706463523209, - "learning_rate": 0.00019999933044051885, - "loss": 46.0, - "step": 15245 - }, - { - "epoch": 1.1656631687596766, - "grad_norm": 0.0022503496147692204, - "learning_rate": 0.00019999933035261852, - "loss": 46.0, - "step": 15246 - }, - { - "epoch": 1.1657396257430663, - "grad_norm": 0.001016007736325264, - "learning_rate": 0.00019999933026471244, - "loss": 46.0, - "step": 15247 - }, - { - "epoch": 1.165816082726456, - "grad_norm": 0.0012851026840507984, - "learning_rate": 0.00019999933017680058, - "loss": 46.0, - "step": 15248 - }, - { - "epoch": 1.1658925397098456, - "grad_norm": 0.0005490654148161411, - "learning_rate": 0.00019999933008888295, - "loss": 46.0, - "step": 15249 - }, - { - "epoch": 1.1659689966932354, - "grad_norm": 0.003530229674652219, - "learning_rate": 0.00019999933000095955, - "loss": 46.0, - "step": 15250 - }, - { - "epoch": 1.1660454536766252, - "grad_norm": 0.0014833117602393031, - "learning_rate": 0.00019999932991303037, - "loss": 46.0, - "step": 15251 - }, - { - "epoch": 1.166121910660015, - "grad_norm": 0.0028618767391890287, - "learning_rate": 0.00019999932982509542, - "loss": 46.0, - "step": 15252 - }, - { - "epoch": 1.1661983676434047, - "grad_norm": 0.001250334084033966, - "learning_rate": 0.00019999932973715473, - "loss": 46.0, - "step": 15253 - }, - { - "epoch": 1.1662748246267944, - "grad_norm": 0.0014182317536324263, - "learning_rate": 0.00019999932964920823, - "loss": 46.0, - "step": 15254 - }, - { - "epoch": 1.166351281610184, - "grad_norm": 0.0007662430871278048, - "learning_rate": 0.000199999329561256, - "loss": 46.0, - "step": 15255 - }, - { - "epoch": 1.1664277385935737, - "grad_norm": 0.001694596721790731, - "learning_rate": 0.00019999932947329798, - "loss": 46.0, - "step": 15256 - }, - { - "epoch": 1.1665041955769635, - "grad_norm": 0.0006246630218811333, - "learning_rate": 0.00019999932938533421, - "loss": 46.0, - "step": 15257 - }, - { - "epoch": 1.1665806525603533, - "grad_norm": 0.00046615154133178294, - "learning_rate": 0.00019999932929736463, - "loss": 46.0, - "step": 15258 - }, - { - "epoch": 1.166657109543743, - "grad_norm": 0.0010403483174741268, - "learning_rate": 0.00019999932920938932, - "loss": 46.0, - "step": 15259 - }, - { - "epoch": 1.1667335665271326, - "grad_norm": 0.0010131682502105832, - "learning_rate": 0.0001999993291214082, - "loss": 46.0, - "step": 15260 - }, - { - "epoch": 1.1668100235105223, - "grad_norm": 0.0009167783427983522, - "learning_rate": 0.00019999932903342135, - "loss": 46.0, - "step": 15261 - }, - { - "epoch": 1.166886480493912, - "grad_norm": 0.0019855392165482044, - "learning_rate": 0.00019999932894542872, - "loss": 46.0, - "step": 15262 - }, - { - "epoch": 1.1669629374773018, - "grad_norm": 0.0005558494594879448, - "learning_rate": 0.00019999932885743032, - "loss": 46.0, - "step": 15263 - }, - { - "epoch": 1.1670393944606916, - "grad_norm": 0.000635394302662462, - "learning_rate": 0.00019999932876942615, - "loss": 46.0, - "step": 15264 - }, - { - "epoch": 1.1671158514440814, - "grad_norm": 0.03312766179442406, - "learning_rate": 0.00019999932868141622, - "loss": 46.0, - "step": 15265 - }, - { - "epoch": 1.167192308427471, - "grad_norm": 0.0019652668852359056, - "learning_rate": 0.0001999993285934005, - "loss": 46.0, - "step": 15266 - }, - { - "epoch": 1.1672687654108607, - "grad_norm": 0.0008170937653630972, - "learning_rate": 0.000199999328505379, - "loss": 46.0, - "step": 15267 - }, - { - "epoch": 1.1673452223942504, - "grad_norm": 0.0009267200948670506, - "learning_rate": 0.00019999932841735177, - "loss": 46.0, - "step": 15268 - }, - { - "epoch": 1.1674216793776402, - "grad_norm": 0.001527606975287199, - "learning_rate": 0.00019999932832931875, - "loss": 46.0, - "step": 15269 - }, - { - "epoch": 1.16749813636103, - "grad_norm": 0.003069679019972682, - "learning_rate": 0.00019999932824127996, - "loss": 46.0, - "step": 15270 - }, - { - "epoch": 1.1675745933444195, - "grad_norm": 0.001260369666852057, - "learning_rate": 0.0001999993281532354, - "loss": 46.0, - "step": 15271 - }, - { - "epoch": 1.1676510503278092, - "grad_norm": 0.005360051989555359, - "learning_rate": 0.00019999932806518506, - "loss": 46.0, - "step": 15272 - }, - { - "epoch": 1.167727507311199, - "grad_norm": 0.0005134766106493771, - "learning_rate": 0.00019999932797712896, - "loss": 46.0, - "step": 15273 - }, - { - "epoch": 1.1678039642945888, - "grad_norm": 0.0006678166682831943, - "learning_rate": 0.0001999993278890671, - "loss": 46.0, - "step": 15274 - }, - { - "epoch": 1.1678804212779785, - "grad_norm": 0.0007406029617413878, - "learning_rate": 0.00019999932780099945, - "loss": 46.0, - "step": 15275 - }, - { - "epoch": 1.1679568782613683, - "grad_norm": 0.0006844205199740827, - "learning_rate": 0.00019999932771292607, - "loss": 46.0, - "step": 15276 - }, - { - "epoch": 1.1680333352447578, - "grad_norm": 0.0009097892907448113, - "learning_rate": 0.0001999993276248469, - "loss": 46.0, - "step": 15277 - }, - { - "epoch": 1.1681097922281476, - "grad_norm": 0.0005681724287569523, - "learning_rate": 0.00019999932753676195, - "loss": 46.0, - "step": 15278 - }, - { - "epoch": 1.1681862492115374, - "grad_norm": 0.010308535769581795, - "learning_rate": 0.00019999932744867125, - "loss": 46.0, - "step": 15279 - }, - { - "epoch": 1.1682627061949271, - "grad_norm": 0.00038089058944024146, - "learning_rate": 0.00019999932736057473, - "loss": 46.0, - "step": 15280 - }, - { - "epoch": 1.1683391631783169, - "grad_norm": 0.0005786475376226008, - "learning_rate": 0.0001999993272724725, - "loss": 46.0, - "step": 15281 - }, - { - "epoch": 1.1684156201617064, - "grad_norm": 0.009335752576589584, - "learning_rate": 0.0001999993271843645, - "loss": 46.0, - "step": 15282 - }, - { - "epoch": 1.1684920771450962, - "grad_norm": 0.0018412502249702811, - "learning_rate": 0.0001999993270962507, - "loss": 46.0, - "step": 15283 - }, - { - "epoch": 1.168568534128486, - "grad_norm": 0.000612473173532635, - "learning_rate": 0.00019999932700813113, - "loss": 46.0, - "step": 15284 - }, - { - "epoch": 1.1686449911118757, - "grad_norm": 0.0006646934198215604, - "learning_rate": 0.0001999993269200058, - "loss": 46.0, - "step": 15285 - }, - { - "epoch": 1.1687214480952655, - "grad_norm": 0.0009676276822574437, - "learning_rate": 0.0001999993268318747, - "loss": 46.0, - "step": 15286 - }, - { - "epoch": 1.1687979050786552, - "grad_norm": 0.0018264454556629062, - "learning_rate": 0.00019999932674373783, - "loss": 46.0, - "step": 15287 - }, - { - "epoch": 1.1688743620620448, - "grad_norm": 0.0006531497347168624, - "learning_rate": 0.0001999993266555952, - "loss": 46.0, - "step": 15288 - }, - { - "epoch": 1.1689508190454345, - "grad_norm": 0.004945653956383467, - "learning_rate": 0.00019999932656744677, - "loss": 46.0, - "step": 15289 - }, - { - "epoch": 1.1690272760288243, - "grad_norm": 0.010995850898325443, - "learning_rate": 0.0001999993264792926, - "loss": 46.0, - "step": 15290 - }, - { - "epoch": 1.169103733012214, - "grad_norm": 0.0026049462612718344, - "learning_rate": 0.00019999932639113265, - "loss": 46.0, - "step": 15291 - }, - { - "epoch": 1.1691801899956038, - "grad_norm": 0.0010155064519494772, - "learning_rate": 0.00019999932630296693, - "loss": 46.0, - "step": 15292 - }, - { - "epoch": 1.1692566469789933, - "grad_norm": 0.0028305945452302694, - "learning_rate": 0.00019999932621479546, - "loss": 46.0, - "step": 15293 - }, - { - "epoch": 1.169333103962383, - "grad_norm": 0.0050291153602302074, - "learning_rate": 0.0001999993261266182, - "loss": 46.0, - "step": 15294 - }, - { - "epoch": 1.1694095609457729, - "grad_norm": 0.0024297942873090506, - "learning_rate": 0.00019999932603843518, - "loss": 46.0, - "step": 15295 - }, - { - "epoch": 1.1694860179291626, - "grad_norm": 0.0013381576864048839, - "learning_rate": 0.0001999993259502464, - "loss": 46.0, - "step": 15296 - }, - { - "epoch": 1.1695624749125524, - "grad_norm": 0.0008254920248873532, - "learning_rate": 0.00019999932586205183, - "loss": 46.0, - "step": 15297 - }, - { - "epoch": 1.1696389318959421, - "grad_norm": 0.0008895752253010869, - "learning_rate": 0.0001999993257738515, - "loss": 46.0, - "step": 15298 - }, - { - "epoch": 1.1697153888793317, - "grad_norm": 0.0012544800993055105, - "learning_rate": 0.00019999932568564538, - "loss": 46.0, - "step": 15299 - }, - { - "epoch": 1.1697918458627214, - "grad_norm": 0.00042710232082754374, - "learning_rate": 0.0001999993255974335, - "loss": 46.0, - "step": 15300 - }, - { - "epoch": 1.1698683028461112, - "grad_norm": 0.0010928551200777292, - "learning_rate": 0.00019999932550921588, - "loss": 46.0, - "step": 15301 - }, - { - "epoch": 1.169944759829501, - "grad_norm": 0.0018931017257273197, - "learning_rate": 0.00019999932542099247, - "loss": 46.0, - "step": 15302 - }, - { - "epoch": 1.1700212168128907, - "grad_norm": 0.0011676972499117255, - "learning_rate": 0.0001999993253327633, - "loss": 46.0, - "step": 15303 - }, - { - "epoch": 1.1700976737962803, - "grad_norm": 0.0007247917237691581, - "learning_rate": 0.00019999932524452833, - "loss": 46.0, - "step": 15304 - }, - { - "epoch": 1.17017413077967, - "grad_norm": 0.001112442696467042, - "learning_rate": 0.00019999932515628763, - "loss": 46.0, - "step": 15305 - }, - { - "epoch": 1.1702505877630598, - "grad_norm": 0.0006946864305064082, - "learning_rate": 0.00019999932506804114, - "loss": 46.0, - "step": 15306 - }, - { - "epoch": 1.1703270447464496, - "grad_norm": 0.00289551867172122, - "learning_rate": 0.00019999932497978887, - "loss": 46.0, - "step": 15307 - }, - { - "epoch": 1.1704035017298393, - "grad_norm": 0.007863664999604225, - "learning_rate": 0.00019999932489153083, - "loss": 46.0, - "step": 15308 - }, - { - "epoch": 1.170479958713229, - "grad_norm": 0.004753883462399244, - "learning_rate": 0.00019999932480326704, - "loss": 46.0, - "step": 15309 - }, - { - "epoch": 1.1705564156966186, - "grad_norm": 0.004676952958106995, - "learning_rate": 0.0001999993247149975, - "loss": 46.0, - "step": 15310 - }, - { - "epoch": 1.1706328726800084, - "grad_norm": 0.0017181761795654893, - "learning_rate": 0.00019999932462672214, - "loss": 46.0, - "step": 15311 - }, - { - "epoch": 1.1707093296633981, - "grad_norm": 0.0011043314589187503, - "learning_rate": 0.00019999932453844106, - "loss": 46.0, - "step": 15312 - }, - { - "epoch": 1.170785786646788, - "grad_norm": 0.0007979258080013096, - "learning_rate": 0.00019999932445015416, - "loss": 46.0, - "step": 15313 - }, - { - "epoch": 1.1708622436301777, - "grad_norm": 0.0013872137060388923, - "learning_rate": 0.00019999932436186153, - "loss": 46.0, - "step": 15314 - }, - { - "epoch": 1.1709387006135672, - "grad_norm": 0.0009669964201748371, - "learning_rate": 0.00019999932427356313, - "loss": 46.0, - "step": 15315 - }, - { - "epoch": 1.171015157596957, - "grad_norm": 0.0008406496490351856, - "learning_rate": 0.00019999932418525895, - "loss": 46.0, - "step": 15316 - }, - { - "epoch": 1.1710916145803467, - "grad_norm": 0.000770802958868444, - "learning_rate": 0.00019999932409694898, - "loss": 46.0, - "step": 15317 - }, - { - "epoch": 1.1711680715637365, - "grad_norm": 0.0017223728355020285, - "learning_rate": 0.00019999932400863326, - "loss": 46.0, - "step": 15318 - }, - { - "epoch": 1.1712445285471262, - "grad_norm": 0.004088039044290781, - "learning_rate": 0.00019999932392031176, - "loss": 46.0, - "step": 15319 - }, - { - "epoch": 1.171320985530516, - "grad_norm": 0.0021303328685462475, - "learning_rate": 0.00019999932383198452, - "loss": 46.0, - "step": 15320 - }, - { - "epoch": 1.1713974425139055, - "grad_norm": 0.0007540914812125266, - "learning_rate": 0.00019999932374365148, - "loss": 46.0, - "step": 15321 - }, - { - "epoch": 1.1714738994972953, - "grad_norm": 0.004459446761757135, - "learning_rate": 0.0001999993236553127, - "loss": 46.0, - "step": 15322 - }, - { - "epoch": 1.171550356480685, - "grad_norm": 0.0017783168004825711, - "learning_rate": 0.0001999993235669681, - "loss": 46.0, - "step": 15323 - }, - { - "epoch": 1.1716268134640748, - "grad_norm": 0.0010124341351911426, - "learning_rate": 0.00019999932347861777, - "loss": 46.0, - "step": 15324 - }, - { - "epoch": 1.1717032704474646, - "grad_norm": 0.0007500700303353369, - "learning_rate": 0.0001999993233902617, - "loss": 46.0, - "step": 15325 - }, - { - "epoch": 1.1717797274308541, - "grad_norm": 0.0005886171711608768, - "learning_rate": 0.00019999932330189979, - "loss": 46.0, - "step": 15326 - }, - { - "epoch": 1.1718561844142439, - "grad_norm": 0.004187110345810652, - "learning_rate": 0.00019999932321353216, - "loss": 46.0, - "step": 15327 - }, - { - "epoch": 1.1719326413976336, - "grad_norm": 0.00308572081848979, - "learning_rate": 0.00019999932312515873, - "loss": 46.0, - "step": 15328 - }, - { - "epoch": 1.1720090983810234, - "grad_norm": 0.001711120828986168, - "learning_rate": 0.00019999932303677956, - "loss": 46.0, - "step": 15329 - }, - { - "epoch": 1.1720855553644132, - "grad_norm": 0.0035854196175932884, - "learning_rate": 0.0001999993229483946, - "loss": 46.0, - "step": 15330 - }, - { - "epoch": 1.172162012347803, - "grad_norm": 0.007313880138099194, - "learning_rate": 0.0001999993228600039, - "loss": 46.0, - "step": 15331 - }, - { - "epoch": 1.1722384693311925, - "grad_norm": 0.0008720503537915647, - "learning_rate": 0.0001999993227716074, - "loss": 46.0, - "step": 15332 - }, - { - "epoch": 1.1723149263145822, - "grad_norm": 0.001349899685010314, - "learning_rate": 0.00019999932268320513, - "loss": 46.0, - "step": 15333 - }, - { - "epoch": 1.172391383297972, - "grad_norm": 0.011800352483987808, - "learning_rate": 0.0001999993225947971, - "loss": 46.0, - "step": 15334 - }, - { - "epoch": 1.1724678402813618, - "grad_norm": 0.0007968770805746317, - "learning_rate": 0.0001999993225063833, - "loss": 46.0, - "step": 15335 - }, - { - "epoch": 1.1725442972647515, - "grad_norm": 0.001050457707606256, - "learning_rate": 0.00019999932241796374, - "loss": 46.0, - "step": 15336 - }, - { - "epoch": 1.172620754248141, - "grad_norm": 0.018532712012529373, - "learning_rate": 0.0001999993223295384, - "loss": 46.0, - "step": 15337 - }, - { - "epoch": 1.1726972112315308, - "grad_norm": 0.0005391244194470346, - "learning_rate": 0.00019999932224110727, - "loss": 46.0, - "step": 15338 - }, - { - "epoch": 1.1727736682149206, - "grad_norm": 0.0005859649972990155, - "learning_rate": 0.00019999932215267042, - "loss": 46.0, - "step": 15339 - }, - { - "epoch": 1.1728501251983103, - "grad_norm": 0.0023180313874036074, - "learning_rate": 0.00019999932206422777, - "loss": 46.0, - "step": 15340 - }, - { - "epoch": 1.1729265821817, - "grad_norm": 0.0014467989094555378, - "learning_rate": 0.00019999932197577934, - "loss": 46.0, - "step": 15341 - }, - { - "epoch": 1.1730030391650899, - "grad_norm": 0.009292705915868282, - "learning_rate": 0.00019999932188732516, - "loss": 46.0, - "step": 15342 - }, - { - "epoch": 1.1730794961484794, - "grad_norm": 0.001998856896534562, - "learning_rate": 0.0001999993217988652, - "loss": 46.0, - "step": 15343 - }, - { - "epoch": 1.1731559531318692, - "grad_norm": 0.0036292888689786196, - "learning_rate": 0.00019999932171039947, - "loss": 46.0, - "step": 15344 - }, - { - "epoch": 1.173232410115259, - "grad_norm": 0.005679688882082701, - "learning_rate": 0.00019999932162192798, - "loss": 46.0, - "step": 15345 - }, - { - "epoch": 1.1733088670986487, - "grad_norm": 0.0007781201275065541, - "learning_rate": 0.0001999993215334507, - "loss": 46.0, - "step": 15346 - }, - { - "epoch": 1.1733853240820384, - "grad_norm": 0.0010257113026455045, - "learning_rate": 0.00019999932144496767, - "loss": 46.0, - "step": 15347 - }, - { - "epoch": 1.173461781065428, - "grad_norm": 0.010551296174526215, - "learning_rate": 0.00019999932135647888, - "loss": 46.0, - "step": 15348 - }, - { - "epoch": 1.1735382380488177, - "grad_norm": 0.002150070620700717, - "learning_rate": 0.0001999993212679843, - "loss": 46.0, - "step": 15349 - }, - { - "epoch": 1.1736146950322075, - "grad_norm": 0.0012724722037091851, - "learning_rate": 0.00019999932117948394, - "loss": 46.0, - "step": 15350 - }, - { - "epoch": 1.1736911520155973, - "grad_norm": 0.002970330184325576, - "learning_rate": 0.00019999932109097786, - "loss": 46.0, - "step": 15351 - }, - { - "epoch": 1.173767608998987, - "grad_norm": 0.0003113442216999829, - "learning_rate": 0.00019999932100246598, - "loss": 46.0, - "step": 15352 - }, - { - "epoch": 1.1738440659823768, - "grad_norm": 0.001013904926367104, - "learning_rate": 0.0001999993209139483, - "loss": 46.0, - "step": 15353 - }, - { - "epoch": 1.1739205229657663, - "grad_norm": 0.0009218161576427519, - "learning_rate": 0.0001999993208254249, - "loss": 46.0, - "step": 15354 - }, - { - "epoch": 1.173996979949156, - "grad_norm": 0.0011633316753432155, - "learning_rate": 0.00019999932073689572, - "loss": 46.0, - "step": 15355 - }, - { - "epoch": 1.1740734369325458, - "grad_norm": 0.0016000056639313698, - "learning_rate": 0.00019999932064836075, - "loss": 46.0, - "step": 15356 - }, - { - "epoch": 1.1741498939159356, - "grad_norm": 0.0007826977525837719, - "learning_rate": 0.00019999932055982003, - "loss": 46.0, - "step": 15357 - }, - { - "epoch": 1.1742263508993251, - "grad_norm": 0.02654682844877243, - "learning_rate": 0.0001999993204712735, - "loss": 46.0, - "step": 15358 - }, - { - "epoch": 1.174302807882715, - "grad_norm": 0.0009169411496259272, - "learning_rate": 0.00019999932038272124, - "loss": 46.0, - "step": 15359 - }, - { - "epoch": 1.1743792648661047, - "grad_norm": 0.0005046732840128243, - "learning_rate": 0.00019999932029416323, - "loss": 46.0, - "step": 15360 - }, - { - "epoch": 1.1744557218494944, - "grad_norm": 0.015056449919939041, - "learning_rate": 0.00019999932020559942, - "loss": 46.0, - "step": 15361 - }, - { - "epoch": 1.1745321788328842, - "grad_norm": 0.0007006344385445118, - "learning_rate": 0.00019999932011702986, - "loss": 46.0, - "step": 15362 - }, - { - "epoch": 1.174608635816274, - "grad_norm": 0.0004977277712896466, - "learning_rate": 0.0001999993200284545, - "loss": 46.0, - "step": 15363 - }, - { - "epoch": 1.1746850927996637, - "grad_norm": 0.0022992698941379786, - "learning_rate": 0.0001999993199398734, - "loss": 46.0, - "step": 15364 - }, - { - "epoch": 1.1747615497830532, - "grad_norm": 0.0015222210204228759, - "learning_rate": 0.0001999993198512865, - "loss": 46.0, - "step": 15365 - }, - { - "epoch": 1.174838006766443, - "grad_norm": 0.0020082297269254923, - "learning_rate": 0.00019999931976269383, - "loss": 46.0, - "step": 15366 - }, - { - "epoch": 1.1749144637498328, - "grad_norm": 0.0039048115722835064, - "learning_rate": 0.00019999931967409543, - "loss": 46.0, - "step": 15367 - }, - { - "epoch": 1.1749909207332225, - "grad_norm": 0.0003600383934099227, - "learning_rate": 0.00019999931958549123, - "loss": 46.0, - "step": 15368 - }, - { - "epoch": 1.175067377716612, - "grad_norm": 0.00027772699831984937, - "learning_rate": 0.00019999931949688126, - "loss": 46.0, - "step": 15369 - }, - { - "epoch": 1.1751438347000018, - "grad_norm": 0.0011163987219333649, - "learning_rate": 0.00019999931940826554, - "loss": 46.0, - "step": 15370 - }, - { - "epoch": 1.1752202916833916, - "grad_norm": 0.009584103710949421, - "learning_rate": 0.00019999931931964404, - "loss": 46.0, - "step": 15371 - }, - { - "epoch": 1.1752967486667814, - "grad_norm": 0.0008097888203337789, - "learning_rate": 0.00019999931923101678, - "loss": 46.0, - "step": 15372 - }, - { - "epoch": 1.1753732056501711, - "grad_norm": 0.001861053635366261, - "learning_rate": 0.0001999993191423837, - "loss": 46.0, - "step": 15373 - }, - { - "epoch": 1.1754496626335609, - "grad_norm": 0.0006373692885972559, - "learning_rate": 0.00019999931905374492, - "loss": 46.0, - "step": 15374 - }, - { - "epoch": 1.1755261196169506, - "grad_norm": 0.0019834719132632017, - "learning_rate": 0.00019999931896510034, - "loss": 46.0, - "step": 15375 - }, - { - "epoch": 1.1756025766003402, - "grad_norm": 0.0006228814017958939, - "learning_rate": 0.00019999931887645, - "loss": 46.0, - "step": 15376 - }, - { - "epoch": 1.17567903358373, - "grad_norm": 0.0011599621502682567, - "learning_rate": 0.0001999993187877939, - "loss": 46.0, - "step": 15377 - }, - { - "epoch": 1.1757554905671197, - "grad_norm": 0.0009037195122800767, - "learning_rate": 0.000199999318699132, - "loss": 46.0, - "step": 15378 - }, - { - "epoch": 1.1758319475505095, - "grad_norm": 0.001184728229418397, - "learning_rate": 0.00019999931861046434, - "loss": 46.0, - "step": 15379 - }, - { - "epoch": 1.175908404533899, - "grad_norm": 0.0014615757390856743, - "learning_rate": 0.00019999931852179091, - "loss": 46.0, - "step": 15380 - }, - { - "epoch": 1.1759848615172888, - "grad_norm": 0.000806040596216917, - "learning_rate": 0.00019999931843311171, - "loss": 46.0, - "step": 15381 - }, - { - "epoch": 1.1760613185006785, - "grad_norm": 0.0049487450160086155, - "learning_rate": 0.00019999931834442677, - "loss": 46.0, - "step": 15382 - }, - { - "epoch": 1.1761377754840683, - "grad_norm": 0.0005893089110031724, - "learning_rate": 0.00019999931825573602, - "loss": 46.0, - "step": 15383 - }, - { - "epoch": 1.176214232467458, - "grad_norm": 0.0011899803066626191, - "learning_rate": 0.00019999931816703953, - "loss": 46.0, - "step": 15384 - }, - { - "epoch": 1.1762906894508478, - "grad_norm": 0.001896370085887611, - "learning_rate": 0.00019999931807833726, - "loss": 46.0, - "step": 15385 - }, - { - "epoch": 1.1763671464342373, - "grad_norm": 0.002269502729177475, - "learning_rate": 0.0001999993179896292, - "loss": 46.0, - "step": 15386 - }, - { - "epoch": 1.176443603417627, - "grad_norm": 0.0029634120874106884, - "learning_rate": 0.0001999993179009154, - "loss": 46.0, - "step": 15387 - }, - { - "epoch": 1.1765200604010169, - "grad_norm": 0.0008897338411770761, - "learning_rate": 0.00019999931781219582, - "loss": 46.0, - "step": 15388 - }, - { - "epoch": 1.1765965173844066, - "grad_norm": 0.001576107693836093, - "learning_rate": 0.00019999931772347047, - "loss": 46.0, - "step": 15389 - }, - { - "epoch": 1.1766729743677964, - "grad_norm": 0.00033984603942371905, - "learning_rate": 0.00019999931763473936, - "loss": 46.0, - "step": 15390 - }, - { - "epoch": 1.176749431351186, - "grad_norm": 0.00216719601303339, - "learning_rate": 0.00019999931754600248, - "loss": 46.0, - "step": 15391 - }, - { - "epoch": 1.1768258883345757, - "grad_norm": 0.00038681377191096544, - "learning_rate": 0.00019999931745725983, - "loss": 46.0, - "step": 15392 - }, - { - "epoch": 1.1769023453179654, - "grad_norm": 0.0017777879256755114, - "learning_rate": 0.00019999931736851137, - "loss": 46.0, - "step": 15393 - }, - { - "epoch": 1.1769788023013552, - "grad_norm": 0.0004222371499054134, - "learning_rate": 0.00019999931727975718, - "loss": 46.0, - "step": 15394 - }, - { - "epoch": 1.177055259284745, - "grad_norm": 0.0016078681219369173, - "learning_rate": 0.00019999931719099723, - "loss": 46.0, - "step": 15395 - }, - { - "epoch": 1.1771317162681347, - "grad_norm": 0.0007601361139677465, - "learning_rate": 0.0001999993171022315, - "loss": 46.0, - "step": 15396 - }, - { - "epoch": 1.1772081732515243, - "grad_norm": 0.0007044863305054605, - "learning_rate": 0.00019999931701345996, - "loss": 46.0, - "step": 15397 - }, - { - "epoch": 1.177284630234914, - "grad_norm": 0.002174082677811384, - "learning_rate": 0.00019999931692468273, - "loss": 46.0, - "step": 15398 - }, - { - "epoch": 1.1773610872183038, - "grad_norm": 0.0003637280606199056, - "learning_rate": 0.0001999993168358997, - "loss": 46.0, - "step": 15399 - }, - { - "epoch": 1.1774375442016936, - "grad_norm": 0.0012934559490531683, - "learning_rate": 0.00019999931674711085, - "loss": 46.0, - "step": 15400 - }, - { - "epoch": 1.1775140011850833, - "grad_norm": 0.0026614877860993147, - "learning_rate": 0.0001999993166583163, - "loss": 46.0, - "step": 15401 - }, - { - "epoch": 1.1775904581684729, - "grad_norm": 0.001914922846481204, - "learning_rate": 0.00019999931656951593, - "loss": 46.0, - "step": 15402 - }, - { - "epoch": 1.1776669151518626, - "grad_norm": 0.001040788134559989, - "learning_rate": 0.00019999931648070982, - "loss": 46.0, - "step": 15403 - }, - { - "epoch": 1.1777433721352524, - "grad_norm": 0.0005309241823852062, - "learning_rate": 0.00019999931639189792, - "loss": 46.0, - "step": 15404 - }, - { - "epoch": 1.1778198291186421, - "grad_norm": 0.001082441653124988, - "learning_rate": 0.00019999931630308027, - "loss": 46.0, - "step": 15405 - }, - { - "epoch": 1.177896286102032, - "grad_norm": 0.005845897365361452, - "learning_rate": 0.00019999931621425684, - "loss": 46.0, - "step": 15406 - }, - { - "epoch": 1.1779727430854217, - "grad_norm": 0.0017694375710561872, - "learning_rate": 0.00019999931612542761, - "loss": 46.0, - "step": 15407 - }, - { - "epoch": 1.1780492000688112, - "grad_norm": 0.000950800662394613, - "learning_rate": 0.00019999931603659264, - "loss": 46.0, - "step": 15408 - }, - { - "epoch": 1.178125657052201, - "grad_norm": 0.003270723158493638, - "learning_rate": 0.00019999931594775192, - "loss": 46.0, - "step": 15409 - }, - { - "epoch": 1.1782021140355907, - "grad_norm": 0.004383347928524017, - "learning_rate": 0.0001999993158589054, - "loss": 46.0, - "step": 15410 - }, - { - "epoch": 1.1782785710189805, - "grad_norm": 0.0009847903857007623, - "learning_rate": 0.00019999931577005314, - "loss": 46.0, - "step": 15411 - }, - { - "epoch": 1.1783550280023702, - "grad_norm": 0.0020054453052580357, - "learning_rate": 0.0001999993156811951, - "loss": 46.0, - "step": 15412 - }, - { - "epoch": 1.1784314849857598, - "grad_norm": 0.000637597928289324, - "learning_rate": 0.0001999993155923313, - "loss": 46.0, - "step": 15413 - }, - { - "epoch": 1.1785079419691495, - "grad_norm": 0.0009336243383586407, - "learning_rate": 0.0001999993155034617, - "loss": 46.0, - "step": 15414 - }, - { - "epoch": 1.1785843989525393, - "grad_norm": 0.0024396292865276337, - "learning_rate": 0.00019999931541458634, - "loss": 46.0, - "step": 15415 - }, - { - "epoch": 1.178660855935929, - "grad_norm": 0.00648860028013587, - "learning_rate": 0.0001999993153257052, - "loss": 46.0, - "step": 15416 - }, - { - "epoch": 1.1787373129193188, - "grad_norm": 0.0016205405117943883, - "learning_rate": 0.0001999993152368183, - "loss": 46.0, - "step": 15417 - }, - { - "epoch": 1.1788137699027086, - "grad_norm": 0.0008968322072178125, - "learning_rate": 0.00019999931514792568, - "loss": 46.0, - "step": 15418 - }, - { - "epoch": 1.1788902268860981, - "grad_norm": 0.0019769337959587574, - "learning_rate": 0.00019999931505902723, - "loss": 46.0, - "step": 15419 - }, - { - "epoch": 1.1789666838694879, - "grad_norm": 0.0015798007370904088, - "learning_rate": 0.00019999931497012303, - "loss": 46.0, - "step": 15420 - }, - { - "epoch": 1.1790431408528776, - "grad_norm": 0.001267299405299127, - "learning_rate": 0.00019999931488121306, - "loss": 46.0, - "step": 15421 - }, - { - "epoch": 1.1791195978362674, - "grad_norm": 0.0009020359138958156, - "learning_rate": 0.0001999993147922973, - "loss": 46.0, - "step": 15422 - }, - { - "epoch": 1.1791960548196572, - "grad_norm": 0.0023591441567987204, - "learning_rate": 0.0001999993147033758, - "loss": 46.0, - "step": 15423 - }, - { - "epoch": 1.1792725118030467, - "grad_norm": 0.0015569181414321065, - "learning_rate": 0.00019999931461444853, - "loss": 46.0, - "step": 15424 - }, - { - "epoch": 1.1793489687864365, - "grad_norm": 0.0011056180810555816, - "learning_rate": 0.0001999993145255155, - "loss": 46.0, - "step": 15425 - }, - { - "epoch": 1.1794254257698262, - "grad_norm": 0.0012619401095435023, - "learning_rate": 0.00019999931443657668, - "loss": 46.0, - "step": 15426 - }, - { - "epoch": 1.179501882753216, - "grad_norm": 0.003567162435501814, - "learning_rate": 0.0001999993143476321, - "loss": 46.0, - "step": 15427 - }, - { - "epoch": 1.1795783397366058, - "grad_norm": 0.012163173407316208, - "learning_rate": 0.00019999931425868173, - "loss": 46.0, - "step": 15428 - }, - { - "epoch": 1.1796547967199955, - "grad_norm": 0.0016579240327700973, - "learning_rate": 0.0001999993141697256, - "loss": 46.0, - "step": 15429 - }, - { - "epoch": 1.179731253703385, - "grad_norm": 0.000664646562654525, - "learning_rate": 0.00019999931408076372, - "loss": 46.0, - "step": 15430 - }, - { - "epoch": 1.1798077106867748, - "grad_norm": 0.0017933192430064082, - "learning_rate": 0.00019999931399179604, - "loss": 46.0, - "step": 15431 - }, - { - "epoch": 1.1798841676701646, - "grad_norm": 0.0009983469499275088, - "learning_rate": 0.0001999993139028226, - "loss": 46.0, - "step": 15432 - }, - { - "epoch": 1.1799606246535543, - "grad_norm": 0.0006373559008352458, - "learning_rate": 0.0001999993138138434, - "loss": 46.0, - "step": 15433 - }, - { - "epoch": 1.180037081636944, - "grad_norm": 0.005174398887902498, - "learning_rate": 0.00019999931372485842, - "loss": 46.0, - "step": 15434 - }, - { - "epoch": 1.1801135386203336, - "grad_norm": 0.0029068132862448692, - "learning_rate": 0.00019999931363586768, - "loss": 46.0, - "step": 15435 - }, - { - "epoch": 1.1801899956037234, - "grad_norm": 0.0005682732444256544, - "learning_rate": 0.00019999931354687119, - "loss": 46.0, - "step": 15436 - }, - { - "epoch": 1.1802664525871132, - "grad_norm": 0.0015646591782569885, - "learning_rate": 0.0001999993134578689, - "loss": 46.0, - "step": 15437 - }, - { - "epoch": 1.180342909570503, - "grad_norm": 0.00571898277848959, - "learning_rate": 0.00019999931336886083, - "loss": 46.0, - "step": 15438 - }, - { - "epoch": 1.1804193665538927, - "grad_norm": 0.0016845166683197021, - "learning_rate": 0.00019999931327984702, - "loss": 46.0, - "step": 15439 - }, - { - "epoch": 1.1804958235372824, - "grad_norm": 0.0006869092467240989, - "learning_rate": 0.00019999931319082743, - "loss": 46.0, - "step": 15440 - }, - { - "epoch": 1.180572280520672, - "grad_norm": 0.001669710618443787, - "learning_rate": 0.0001999993131018021, - "loss": 46.0, - "step": 15441 - }, - { - "epoch": 1.1806487375040617, - "grad_norm": 0.0006997182499617338, - "learning_rate": 0.00019999931301277094, - "loss": 46.0, - "step": 15442 - }, - { - "epoch": 1.1807251944874515, - "grad_norm": 0.0011786954710260034, - "learning_rate": 0.00019999931292373404, - "loss": 46.0, - "step": 15443 - }, - { - "epoch": 1.1808016514708413, - "grad_norm": 0.0031874889973551035, - "learning_rate": 0.0001999993128346914, - "loss": 46.0, - "step": 15444 - }, - { - "epoch": 1.180878108454231, - "grad_norm": 0.0019556907936930656, - "learning_rate": 0.00019999931274564293, - "loss": 46.0, - "step": 15445 - }, - { - "epoch": 1.1809545654376206, - "grad_norm": 0.0014699024613946676, - "learning_rate": 0.00019999931265658876, - "loss": 46.0, - "step": 15446 - }, - { - "epoch": 1.1810310224210103, - "grad_norm": 0.004740988835692406, - "learning_rate": 0.00019999931256752874, - "loss": 46.0, - "step": 15447 - }, - { - "epoch": 1.1811074794044, - "grad_norm": 0.002042998792603612, - "learning_rate": 0.00019999931247846302, - "loss": 46.0, - "step": 15448 - }, - { - "epoch": 1.1811839363877898, - "grad_norm": 0.0009597280877642334, - "learning_rate": 0.0001999993123893915, - "loss": 46.0, - "step": 15449 - }, - { - "epoch": 1.1812603933711796, - "grad_norm": 0.0006218540947884321, - "learning_rate": 0.00019999931230031424, - "loss": 46.0, - "step": 15450 - }, - { - "epoch": 1.1813368503545694, - "grad_norm": 0.0005163172609172761, - "learning_rate": 0.00019999931221123117, - "loss": 46.0, - "step": 15451 - }, - { - "epoch": 1.181413307337959, - "grad_norm": 0.02556164562702179, - "learning_rate": 0.00019999931212214236, - "loss": 46.0, - "step": 15452 - }, - { - "epoch": 1.1814897643213487, - "grad_norm": 0.0012519657611846924, - "learning_rate": 0.00019999931203304775, - "loss": 46.0, - "step": 15453 - }, - { - "epoch": 1.1815662213047384, - "grad_norm": 0.0013388078659772873, - "learning_rate": 0.0001999993119439474, - "loss": 46.0, - "step": 15454 - }, - { - "epoch": 1.1816426782881282, - "grad_norm": 0.0005547038745135069, - "learning_rate": 0.00019999931185484126, - "loss": 46.0, - "step": 15455 - }, - { - "epoch": 1.181719135271518, - "grad_norm": 0.001275600166991353, - "learning_rate": 0.00019999931176572936, - "loss": 46.0, - "step": 15456 - }, - { - "epoch": 1.1817955922549075, - "grad_norm": 0.0003953548439312726, - "learning_rate": 0.0001999993116766117, - "loss": 46.0, - "step": 15457 - }, - { - "epoch": 1.1818720492382973, - "grad_norm": 0.0006889212527312338, - "learning_rate": 0.00019999931158748825, - "loss": 46.0, - "step": 15458 - }, - { - "epoch": 1.181948506221687, - "grad_norm": 0.0020947728771716356, - "learning_rate": 0.00019999931149835906, - "loss": 46.0, - "step": 15459 - }, - { - "epoch": 1.1820249632050768, - "grad_norm": 0.00659290561452508, - "learning_rate": 0.00019999931140922406, - "loss": 46.0, - "step": 15460 - }, - { - "epoch": 1.1821014201884665, - "grad_norm": 0.0021299654617905617, - "learning_rate": 0.0001999993113200833, - "loss": 46.0, - "step": 15461 - }, - { - "epoch": 1.1821778771718563, - "grad_norm": 0.0005596533883363008, - "learning_rate": 0.0001999993112309368, - "loss": 46.0, - "step": 15462 - }, - { - "epoch": 1.1822543341552458, - "grad_norm": 0.00132686214055866, - "learning_rate": 0.00019999931114178453, - "loss": 46.0, - "step": 15463 - }, - { - "epoch": 1.1823307911386356, - "grad_norm": 0.0015958151780068874, - "learning_rate": 0.00019999931105262647, - "loss": 46.0, - "step": 15464 - }, - { - "epoch": 1.1824072481220254, - "grad_norm": 0.0008263010531663895, - "learning_rate": 0.00019999931096346263, - "loss": 46.0, - "step": 15465 - }, - { - "epoch": 1.1824837051054151, - "grad_norm": 0.0008682474144734442, - "learning_rate": 0.00019999931087429304, - "loss": 46.0, - "step": 15466 - }, - { - "epoch": 1.1825601620888049, - "grad_norm": 0.001114193699322641, - "learning_rate": 0.0001999993107851177, - "loss": 46.0, - "step": 15467 - }, - { - "epoch": 1.1826366190721944, - "grad_norm": 0.0018916726112365723, - "learning_rate": 0.00019999931069593653, - "loss": 46.0, - "step": 15468 - }, - { - "epoch": 1.1827130760555842, - "grad_norm": 0.010582629591226578, - "learning_rate": 0.00019999931060674965, - "loss": 46.0, - "step": 15469 - }, - { - "epoch": 1.182789533038974, - "grad_norm": 0.0010482128709554672, - "learning_rate": 0.00019999931051755697, - "loss": 46.0, - "step": 15470 - }, - { - "epoch": 1.1828659900223637, - "grad_norm": 0.0016996449558064342, - "learning_rate": 0.00019999931042835852, - "loss": 46.0, - "step": 15471 - }, - { - "epoch": 1.1829424470057535, - "grad_norm": 0.0012100361054763198, - "learning_rate": 0.00019999931033915433, - "loss": 46.0, - "step": 15472 - }, - { - "epoch": 1.1830189039891432, - "grad_norm": 0.0006674345931969583, - "learning_rate": 0.00019999931024994435, - "loss": 46.0, - "step": 15473 - }, - { - "epoch": 1.1830953609725328, - "grad_norm": 0.014526882208883762, - "learning_rate": 0.00019999931016072858, - "loss": 46.0, - "step": 15474 - }, - { - "epoch": 1.1831718179559225, - "grad_norm": 0.0011232286924496293, - "learning_rate": 0.00019999931007150707, - "loss": 46.0, - "step": 15475 - }, - { - "epoch": 1.1832482749393123, - "grad_norm": 0.000940681085921824, - "learning_rate": 0.0001999993099822798, - "loss": 46.0, - "step": 15476 - }, - { - "epoch": 1.183324731922702, - "grad_norm": 0.0012857902329415083, - "learning_rate": 0.00019999930989304674, - "loss": 46.0, - "step": 15477 - }, - { - "epoch": 1.1834011889060918, - "grad_norm": 0.008986872620880604, - "learning_rate": 0.0001999993098038079, - "loss": 46.0, - "step": 15478 - }, - { - "epoch": 1.1834776458894813, - "grad_norm": 0.0015535138081759214, - "learning_rate": 0.00019999930971456332, - "loss": 46.0, - "step": 15479 - }, - { - "epoch": 1.183554102872871, - "grad_norm": 0.0013885722728446126, - "learning_rate": 0.00019999930962531296, - "loss": 46.0, - "step": 15480 - }, - { - "epoch": 1.1836305598562609, - "grad_norm": 0.008818816393613815, - "learning_rate": 0.0001999993095360568, - "loss": 46.0, - "step": 15481 - }, - { - "epoch": 1.1837070168396506, - "grad_norm": 0.0011947766179218888, - "learning_rate": 0.0001999993094467949, - "loss": 46.0, - "step": 15482 - }, - { - "epoch": 1.1837834738230404, - "grad_norm": 0.0004282281151972711, - "learning_rate": 0.00019999930935752725, - "loss": 46.0, - "step": 15483 - }, - { - "epoch": 1.1838599308064301, - "grad_norm": 0.00035026593832299113, - "learning_rate": 0.0001999993092682538, - "loss": 46.0, - "step": 15484 - }, - { - "epoch": 1.1839363877898197, - "grad_norm": 0.0010041380301117897, - "learning_rate": 0.00019999930917897457, - "loss": 46.0, - "step": 15485 - }, - { - "epoch": 1.1840128447732094, - "grad_norm": 0.0021436784882098436, - "learning_rate": 0.00019999930908968957, - "loss": 46.0, - "step": 15486 - }, - { - "epoch": 1.1840893017565992, - "grad_norm": 0.0012605394003912807, - "learning_rate": 0.00019999930900039883, - "loss": 46.0, - "step": 15487 - }, - { - "epoch": 1.184165758739989, - "grad_norm": 0.0013191248290240765, - "learning_rate": 0.0001999993089111023, - "loss": 46.0, - "step": 15488 - }, - { - "epoch": 1.1842422157233785, - "grad_norm": 0.0004925571847707033, - "learning_rate": 0.00019999930882180002, - "loss": 46.0, - "step": 15489 - }, - { - "epoch": 1.1843186727067683, - "grad_norm": 0.0007125950651243329, - "learning_rate": 0.00019999930873249199, - "loss": 46.0, - "step": 15490 - }, - { - "epoch": 1.184395129690158, - "grad_norm": 0.0007753656827844679, - "learning_rate": 0.00019999930864317812, - "loss": 46.0, - "step": 15491 - }, - { - "epoch": 1.1844715866735478, - "grad_norm": 0.0009097495931200683, - "learning_rate": 0.00019999930855385856, - "loss": 46.0, - "step": 15492 - }, - { - "epoch": 1.1845480436569376, - "grad_norm": 0.00044157583033666015, - "learning_rate": 0.00019999930846453315, - "loss": 46.0, - "step": 15493 - }, - { - "epoch": 1.1846245006403273, - "grad_norm": 0.0010635844664648175, - "learning_rate": 0.00019999930837520205, - "loss": 46.0, - "step": 15494 - }, - { - "epoch": 1.184700957623717, - "grad_norm": 0.0007624759455211461, - "learning_rate": 0.00019999930828586512, - "loss": 46.0, - "step": 15495 - }, - { - "epoch": 1.1847774146071066, - "grad_norm": 0.0028700088150799274, - "learning_rate": 0.00019999930819652244, - "loss": 46.0, - "step": 15496 - }, - { - "epoch": 1.1848538715904964, - "grad_norm": 0.0014099212130531669, - "learning_rate": 0.000199999308107174, - "loss": 46.0, - "step": 15497 - }, - { - "epoch": 1.1849303285738861, - "grad_norm": 0.005258242599666119, - "learning_rate": 0.0001999993080178198, - "loss": 46.0, - "step": 15498 - }, - { - "epoch": 1.185006785557276, - "grad_norm": 0.0009431655053049326, - "learning_rate": 0.0001999993079284598, - "loss": 46.0, - "step": 15499 - }, - { - "epoch": 1.1850832425406654, - "grad_norm": 0.0009974036365747452, - "learning_rate": 0.00019999930783909405, - "loss": 46.0, - "step": 15500 - }, - { - "epoch": 1.1851596995240552, - "grad_norm": 0.0033718496561050415, - "learning_rate": 0.00019999930774972254, - "loss": 46.0, - "step": 15501 - }, - { - "epoch": 1.185236156507445, - "grad_norm": 0.0018586317310109735, - "learning_rate": 0.00019999930766034525, - "loss": 46.0, - "step": 15502 - }, - { - "epoch": 1.1853126134908347, - "grad_norm": 0.0027830402832478285, - "learning_rate": 0.00019999930757096216, - "loss": 46.0, - "step": 15503 - }, - { - "epoch": 1.1853890704742245, - "grad_norm": 0.0248353723436594, - "learning_rate": 0.00019999930748157335, - "loss": 46.0, - "step": 15504 - }, - { - "epoch": 1.1854655274576142, - "grad_norm": 0.0005697284359484911, - "learning_rate": 0.00019999930739217874, - "loss": 46.0, - "step": 15505 - }, - { - "epoch": 1.185541984441004, - "grad_norm": 0.00223549478687346, - "learning_rate": 0.00019999930730277838, - "loss": 46.0, - "step": 15506 - }, - { - "epoch": 1.1856184414243935, - "grad_norm": 0.002657330362126231, - "learning_rate": 0.00019999930721337222, - "loss": 46.0, - "step": 15507 - }, - { - "epoch": 1.1856948984077833, - "grad_norm": 0.0014047704171389341, - "learning_rate": 0.00019999930712396032, - "loss": 46.0, - "step": 15508 - }, - { - "epoch": 1.185771355391173, - "grad_norm": 0.0018662635702639818, - "learning_rate": 0.00019999930703454264, - "loss": 46.0, - "step": 15509 - }, - { - "epoch": 1.1858478123745628, - "grad_norm": 0.002144875703379512, - "learning_rate": 0.0001999993069451192, - "loss": 46.0, - "step": 15510 - }, - { - "epoch": 1.1859242693579524, - "grad_norm": 0.0012508862419053912, - "learning_rate": 0.00019999930685568997, - "loss": 46.0, - "step": 15511 - }, - { - "epoch": 1.1860007263413421, - "grad_norm": 0.0010779559379443526, - "learning_rate": 0.00019999930676625497, - "loss": 46.0, - "step": 15512 - }, - { - "epoch": 1.1860771833247319, - "grad_norm": 0.0007783265900798142, - "learning_rate": 0.00019999930667681423, - "loss": 46.0, - "step": 15513 - }, - { - "epoch": 1.1861536403081216, - "grad_norm": 0.0008492347551509738, - "learning_rate": 0.0001999993065873677, - "loss": 46.0, - "step": 15514 - }, - { - "epoch": 1.1862300972915114, - "grad_norm": 0.0004294106038287282, - "learning_rate": 0.0001999993064979154, - "loss": 46.0, - "step": 15515 - }, - { - "epoch": 1.1863065542749012, - "grad_norm": 0.0006817835965193808, - "learning_rate": 0.00019999930640845733, - "loss": 46.0, - "step": 15516 - }, - { - "epoch": 1.1863830112582907, - "grad_norm": 0.011327757500112057, - "learning_rate": 0.0001999993063189935, - "loss": 46.0, - "step": 15517 - }, - { - "epoch": 1.1864594682416805, - "grad_norm": 0.0007910567219369113, - "learning_rate": 0.0001999993062295239, - "loss": 46.0, - "step": 15518 - }, - { - "epoch": 1.1865359252250702, - "grad_norm": 0.0013478505425155163, - "learning_rate": 0.00019999930614004853, - "loss": 46.0, - "step": 15519 - }, - { - "epoch": 1.18661238220846, - "grad_norm": 0.001307833124883473, - "learning_rate": 0.00019999930605056737, - "loss": 46.0, - "step": 15520 - }, - { - "epoch": 1.1866888391918498, - "grad_norm": 0.0005885678110644221, - "learning_rate": 0.00019999930596108047, - "loss": 46.0, - "step": 15521 - }, - { - "epoch": 1.1867652961752393, - "grad_norm": 0.0012011270737275481, - "learning_rate": 0.0001999993058715878, - "loss": 46.0, - "step": 15522 - }, - { - "epoch": 1.186841753158629, - "grad_norm": 0.0006303370464593172, - "learning_rate": 0.00019999930578208934, - "loss": 46.0, - "step": 15523 - }, - { - "epoch": 1.1869182101420188, - "grad_norm": 0.0007026963867247105, - "learning_rate": 0.00019999930569258512, - "loss": 46.0, - "step": 15524 - }, - { - "epoch": 1.1869946671254086, - "grad_norm": 0.0029053364414721727, - "learning_rate": 0.00019999930560307515, - "loss": 46.0, - "step": 15525 - }, - { - "epoch": 1.1870711241087983, - "grad_norm": 0.00027197255985811353, - "learning_rate": 0.00019999930551355938, - "loss": 46.0, - "step": 15526 - }, - { - "epoch": 1.187147581092188, - "grad_norm": 0.0013747927732765675, - "learning_rate": 0.00019999930542403783, - "loss": 46.0, - "step": 15527 - }, - { - "epoch": 1.1872240380755776, - "grad_norm": 0.019425805658102036, - "learning_rate": 0.00019999930533451057, - "loss": 46.0, - "step": 15528 - }, - { - "epoch": 1.1873004950589674, - "grad_norm": 0.002812436316162348, - "learning_rate": 0.00019999930524497748, - "loss": 46.0, - "step": 15529 - }, - { - "epoch": 1.1873769520423572, - "grad_norm": 0.0008677076548337936, - "learning_rate": 0.00019999930515543865, - "loss": 46.0, - "step": 15530 - }, - { - "epoch": 1.187453409025747, - "grad_norm": 0.0010049112606793642, - "learning_rate": 0.00019999930506589406, - "loss": 46.0, - "step": 15531 - }, - { - "epoch": 1.1875298660091367, - "grad_norm": 0.0019073245348408818, - "learning_rate": 0.00019999930497634368, - "loss": 46.0, - "step": 15532 - }, - { - "epoch": 1.1876063229925262, - "grad_norm": 0.0046845655888319016, - "learning_rate": 0.0001999993048867875, - "loss": 46.0, - "step": 15533 - }, - { - "epoch": 1.187682779975916, - "grad_norm": 0.0016535703325644135, - "learning_rate": 0.0001999993047972256, - "loss": 46.0, - "step": 15534 - }, - { - "epoch": 1.1877592369593057, - "grad_norm": 0.0011481159599497914, - "learning_rate": 0.00019999930470765794, - "loss": 46.0, - "step": 15535 - }, - { - "epoch": 1.1878356939426955, - "grad_norm": 0.002079533413052559, - "learning_rate": 0.00019999930461808447, - "loss": 46.0, - "step": 15536 - }, - { - "epoch": 1.1879121509260853, - "grad_norm": 0.003487946232780814, - "learning_rate": 0.00019999930452850527, - "loss": 46.0, - "step": 15537 - }, - { - "epoch": 1.187988607909475, - "grad_norm": 0.000580307561904192, - "learning_rate": 0.00019999930443892025, - "loss": 46.0, - "step": 15538 - }, - { - "epoch": 1.1880650648928646, - "grad_norm": 0.0003372525388840586, - "learning_rate": 0.0001999993043493295, - "loss": 46.0, - "step": 15539 - }, - { - "epoch": 1.1881415218762543, - "grad_norm": 0.00035768383531831205, - "learning_rate": 0.00019999930425973296, - "loss": 46.0, - "step": 15540 - }, - { - "epoch": 1.188217978859644, - "grad_norm": 0.005122461821883917, - "learning_rate": 0.00019999930417013067, - "loss": 46.0, - "step": 15541 - }, - { - "epoch": 1.1882944358430338, - "grad_norm": 0.002018338069319725, - "learning_rate": 0.00019999930408052258, - "loss": 46.0, - "step": 15542 - }, - { - "epoch": 1.1883708928264236, - "grad_norm": 0.0025058388710021973, - "learning_rate": 0.00019999930399090875, - "loss": 46.0, - "step": 15543 - }, - { - "epoch": 1.1884473498098131, - "grad_norm": 0.0035456903278827667, - "learning_rate": 0.00019999930390128916, - "loss": 46.0, - "step": 15544 - }, - { - "epoch": 1.188523806793203, - "grad_norm": 0.0039965189062058926, - "learning_rate": 0.00019999930381166378, - "loss": 46.0, - "step": 15545 - }, - { - "epoch": 1.1886002637765927, - "grad_norm": 0.0009805349400267005, - "learning_rate": 0.00019999930372203263, - "loss": 46.0, - "step": 15546 - }, - { - "epoch": 1.1886767207599824, - "grad_norm": 0.0008931240881793201, - "learning_rate": 0.00019999930363239572, - "loss": 46.0, - "step": 15547 - }, - { - "epoch": 1.1887531777433722, - "grad_norm": 0.0036219563335180283, - "learning_rate": 0.00019999930354275302, - "loss": 46.0, - "step": 15548 - }, - { - "epoch": 1.188829634726762, - "grad_norm": 0.0014206778723746538, - "learning_rate": 0.00019999930345310457, - "loss": 46.0, - "step": 15549 - }, - { - "epoch": 1.1889060917101515, - "grad_norm": 0.0008011855534277856, - "learning_rate": 0.00019999930336345035, - "loss": 46.0, - "step": 15550 - }, - { - "epoch": 1.1889825486935413, - "grad_norm": 0.0005782452644780278, - "learning_rate": 0.00019999930327379035, - "loss": 46.0, - "step": 15551 - }, - { - "epoch": 1.189059005676931, - "grad_norm": 0.0016597473295405507, - "learning_rate": 0.00019999930318412458, - "loss": 46.0, - "step": 15552 - }, - { - "epoch": 1.1891354626603208, - "grad_norm": 0.0007053731824271381, - "learning_rate": 0.00019999930309445304, - "loss": 46.0, - "step": 15553 - }, - { - "epoch": 1.1892119196437105, - "grad_norm": 0.0008106050663627684, - "learning_rate": 0.00019999930300477575, - "loss": 46.0, - "step": 15554 - }, - { - "epoch": 1.1892883766271, - "grad_norm": 0.004580857697874308, - "learning_rate": 0.0001999993029150927, - "loss": 46.0, - "step": 15555 - }, - { - "epoch": 1.1893648336104898, - "grad_norm": 0.0007276267861016095, - "learning_rate": 0.00019999930282540383, - "loss": 46.0, - "step": 15556 - }, - { - "epoch": 1.1894412905938796, - "grad_norm": 0.0011253143893554807, - "learning_rate": 0.00019999930273570922, - "loss": 46.0, - "step": 15557 - }, - { - "epoch": 1.1895177475772694, - "grad_norm": 0.003680725581943989, - "learning_rate": 0.00019999930264600883, - "loss": 46.0, - "step": 15558 - }, - { - "epoch": 1.1895942045606591, - "grad_norm": 0.0020579949487000704, - "learning_rate": 0.00019999930255630268, - "loss": 46.0, - "step": 15559 - }, - { - "epoch": 1.1896706615440489, - "grad_norm": 0.0021691475994884968, - "learning_rate": 0.00019999930246659078, - "loss": 46.0, - "step": 15560 - }, - { - "epoch": 1.1897471185274384, - "grad_norm": 0.0016062804497778416, - "learning_rate": 0.0001999993023768731, - "loss": 46.0, - "step": 15561 - }, - { - "epoch": 1.1898235755108282, - "grad_norm": 0.00032115174690261483, - "learning_rate": 0.00019999930228714962, - "loss": 46.0, - "step": 15562 - }, - { - "epoch": 1.189900032494218, - "grad_norm": 0.001098536653444171, - "learning_rate": 0.0001999993021974204, - "loss": 46.0, - "step": 15563 - }, - { - "epoch": 1.1899764894776077, - "grad_norm": 0.0012571340193971992, - "learning_rate": 0.0001999993021076854, - "loss": 46.0, - "step": 15564 - }, - { - "epoch": 1.1900529464609975, - "grad_norm": 0.0016245145816355944, - "learning_rate": 0.00019999930201794466, - "loss": 46.0, - "step": 15565 - }, - { - "epoch": 1.190129403444387, - "grad_norm": 0.0012835495872423053, - "learning_rate": 0.0001999993019281981, - "loss": 46.0, - "step": 15566 - }, - { - "epoch": 1.1902058604277768, - "grad_norm": 0.0005703620845451951, - "learning_rate": 0.0001999993018384458, - "loss": 46.0, - "step": 15567 - }, - { - "epoch": 1.1902823174111665, - "grad_norm": 0.0012046168558299541, - "learning_rate": 0.00019999930174868772, - "loss": 46.0, - "step": 15568 - }, - { - "epoch": 1.1903587743945563, - "grad_norm": 0.004526298958808184, - "learning_rate": 0.00019999930165892388, - "loss": 46.0, - "step": 15569 - }, - { - "epoch": 1.190435231377946, - "grad_norm": 0.002534755039960146, - "learning_rate": 0.00019999930156915427, - "loss": 46.0, - "step": 15570 - }, - { - "epoch": 1.1905116883613358, - "grad_norm": 0.0031370320357382298, - "learning_rate": 0.0001999993014793789, - "loss": 46.0, - "step": 15571 - }, - { - "epoch": 1.1905881453447253, - "grad_norm": 0.0008780405623838305, - "learning_rate": 0.00019999930138959773, - "loss": 46.0, - "step": 15572 - }, - { - "epoch": 1.190664602328115, - "grad_norm": 0.0004659658297896385, - "learning_rate": 0.0001999993012998108, - "loss": 46.0, - "step": 15573 - }, - { - "epoch": 1.1907410593115049, - "grad_norm": 0.0009031351655721664, - "learning_rate": 0.00019999930121001813, - "loss": 46.0, - "step": 15574 - }, - { - "epoch": 1.1908175162948946, - "grad_norm": 0.0008859423687681556, - "learning_rate": 0.00019999930112021965, - "loss": 46.0, - "step": 15575 - }, - { - "epoch": 1.1908939732782844, - "grad_norm": 0.0036954539828002453, - "learning_rate": 0.00019999930103041543, - "loss": 46.0, - "step": 15576 - }, - { - "epoch": 1.190970430261674, - "grad_norm": 0.002833851845934987, - "learning_rate": 0.00019999930094060544, - "loss": 46.0, - "step": 15577 - }, - { - "epoch": 1.1910468872450637, - "grad_norm": 0.0007078189519234002, - "learning_rate": 0.00019999930085078967, - "loss": 46.0, - "step": 15578 - }, - { - "epoch": 1.1911233442284535, - "grad_norm": 0.002464585704728961, - "learning_rate": 0.00019999930076096813, - "loss": 46.0, - "step": 15579 - }, - { - "epoch": 1.1911998012118432, - "grad_norm": 0.00043234441545791924, - "learning_rate": 0.00019999930067114084, - "loss": 46.0, - "step": 15580 - }, - { - "epoch": 1.191276258195233, - "grad_norm": 0.001243620296008885, - "learning_rate": 0.00019999930058130775, - "loss": 46.0, - "step": 15581 - }, - { - "epoch": 1.1913527151786227, - "grad_norm": 0.0021027387119829655, - "learning_rate": 0.00019999930049146891, - "loss": 46.0, - "step": 15582 - }, - { - "epoch": 1.1914291721620123, - "grad_norm": 0.0007718808483332396, - "learning_rate": 0.00019999930040162428, - "loss": 46.0, - "step": 15583 - }, - { - "epoch": 1.191505629145402, - "grad_norm": 0.0051543209701776505, - "learning_rate": 0.0001999993003117739, - "loss": 46.0, - "step": 15584 - }, - { - "epoch": 1.1915820861287918, - "grad_norm": 0.003042373340576887, - "learning_rate": 0.00019999930022191774, - "loss": 46.0, - "step": 15585 - }, - { - "epoch": 1.1916585431121816, - "grad_norm": 0.0007157353684306145, - "learning_rate": 0.00019999930013205584, - "loss": 46.0, - "step": 15586 - }, - { - "epoch": 1.1917350000955713, - "grad_norm": 0.0048282877542078495, - "learning_rate": 0.0001999993000421881, - "loss": 46.0, - "step": 15587 - }, - { - "epoch": 1.1918114570789609, - "grad_norm": 0.0007894468144513667, - "learning_rate": 0.0001999992999523147, - "loss": 46.0, - "step": 15588 - }, - { - "epoch": 1.1918879140623506, - "grad_norm": 0.0008379890932701528, - "learning_rate": 0.0001999992998624354, - "loss": 46.0, - "step": 15589 - }, - { - "epoch": 1.1919643710457404, - "grad_norm": 0.001715177670121193, - "learning_rate": 0.00019999929977255042, - "loss": 46.0, - "step": 15590 - }, - { - "epoch": 1.1920408280291301, - "grad_norm": 0.0018882370786741376, - "learning_rate": 0.00019999929968265968, - "loss": 46.0, - "step": 15591 - }, - { - "epoch": 1.19211728501252, - "grad_norm": 0.0028970823623239994, - "learning_rate": 0.00019999929959276313, - "loss": 46.0, - "step": 15592 - }, - { - "epoch": 1.1921937419959097, - "grad_norm": 0.0004898144397884607, - "learning_rate": 0.00019999929950286082, - "loss": 46.0, - "step": 15593 - }, - { - "epoch": 1.1922701989792992, - "grad_norm": 0.001743406057357788, - "learning_rate": 0.00019999929941295276, - "loss": 46.0, - "step": 15594 - }, - { - "epoch": 1.192346655962689, - "grad_norm": 0.0008911142940632999, - "learning_rate": 0.0001999992993230389, - "loss": 46.0, - "step": 15595 - }, - { - "epoch": 1.1924231129460787, - "grad_norm": 0.0005246399086900055, - "learning_rate": 0.00019999929923311926, - "loss": 46.0, - "step": 15596 - }, - { - "epoch": 1.1924995699294685, - "grad_norm": 0.0019381000893190503, - "learning_rate": 0.00019999929914319385, - "loss": 46.0, - "step": 15597 - }, - { - "epoch": 1.1925760269128582, - "grad_norm": 0.0005452848272398114, - "learning_rate": 0.00019999929905326272, - "loss": 46.0, - "step": 15598 - }, - { - "epoch": 1.1926524838962478, - "grad_norm": 0.004592359997332096, - "learning_rate": 0.0001999992989633258, - "loss": 46.0, - "step": 15599 - }, - { - "epoch": 1.1927289408796375, - "grad_norm": 0.0026152986101806164, - "learning_rate": 0.00019999929887338312, - "loss": 46.0, - "step": 15600 - }, - { - "epoch": 1.1928053978630273, - "grad_norm": 0.0006257917848415673, - "learning_rate": 0.00019999929878343465, - "loss": 46.0, - "step": 15601 - }, - { - "epoch": 1.192881854846417, - "grad_norm": 0.0011624919716268778, - "learning_rate": 0.0001999992986934804, - "loss": 46.0, - "step": 15602 - }, - { - "epoch": 1.1929583118298068, - "grad_norm": 0.0005555449170060456, - "learning_rate": 0.0001999992986035204, - "loss": 46.0, - "step": 15603 - }, - { - "epoch": 1.1930347688131966, - "grad_norm": 0.0034399430733174086, - "learning_rate": 0.00019999929851355464, - "loss": 46.0, - "step": 15604 - }, - { - "epoch": 1.1931112257965861, - "grad_norm": 0.0009292967733927071, - "learning_rate": 0.0001999992984235831, - "loss": 46.0, - "step": 15605 - }, - { - "epoch": 1.1931876827799759, - "grad_norm": 0.0012462940067052841, - "learning_rate": 0.00019999929833360578, - "loss": 46.0, - "step": 15606 - }, - { - "epoch": 1.1932641397633656, - "grad_norm": 0.0008562547154724598, - "learning_rate": 0.0001999992982436227, - "loss": 46.0, - "step": 15607 - }, - { - "epoch": 1.1933405967467554, - "grad_norm": 0.0008484327117912471, - "learning_rate": 0.00019999929815363383, - "loss": 46.0, - "step": 15608 - }, - { - "epoch": 1.1934170537301452, - "grad_norm": 0.0032459385693073273, - "learning_rate": 0.00019999929806363922, - "loss": 46.0, - "step": 15609 - }, - { - "epoch": 1.1934935107135347, - "grad_norm": 0.000585326284635812, - "learning_rate": 0.00019999929797363884, - "loss": 46.0, - "step": 15610 - }, - { - "epoch": 1.1935699676969245, - "grad_norm": 0.0009977323934435844, - "learning_rate": 0.00019999929788363266, - "loss": 46.0, - "step": 15611 - }, - { - "epoch": 1.1936464246803142, - "grad_norm": 0.0008985247113741934, - "learning_rate": 0.00019999929779362076, - "loss": 46.0, - "step": 15612 - }, - { - "epoch": 1.193722881663704, - "grad_norm": 0.0008775783353485167, - "learning_rate": 0.00019999929770360303, - "loss": 46.0, - "step": 15613 - }, - { - "epoch": 1.1937993386470938, - "grad_norm": 0.0013121296651661396, - "learning_rate": 0.00019999929761357955, - "loss": 46.0, - "step": 15614 - }, - { - "epoch": 1.1938757956304835, - "grad_norm": 0.009874706156551838, - "learning_rate": 0.00019999929752355033, - "loss": 46.0, - "step": 15615 - }, - { - "epoch": 1.193952252613873, - "grad_norm": 0.002934205112978816, - "learning_rate": 0.00019999929743351534, - "loss": 46.0, - "step": 15616 - }, - { - "epoch": 1.1940287095972628, - "grad_norm": 0.0009753283229656518, - "learning_rate": 0.00019999929734347454, - "loss": 46.0, - "step": 15617 - }, - { - "epoch": 1.1941051665806526, - "grad_norm": 0.0009777714731171727, - "learning_rate": 0.000199999297253428, - "loss": 46.0, - "step": 15618 - }, - { - "epoch": 1.1941816235640423, - "grad_norm": 0.0043449425138533115, - "learning_rate": 0.0001999992971633757, - "loss": 46.0, - "step": 15619 - }, - { - "epoch": 1.1942580805474319, - "grad_norm": 0.09789062291383743, - "learning_rate": 0.00019999929707331763, - "loss": 46.0, - "step": 15620 - }, - { - "epoch": 1.1943345375308216, - "grad_norm": 0.011709102429449558, - "learning_rate": 0.00019999929698325377, - "loss": 46.0, - "step": 15621 - }, - { - "epoch": 1.1944109945142114, - "grad_norm": 0.0004991114838048816, - "learning_rate": 0.00019999929689318413, - "loss": 46.0, - "step": 15622 - }, - { - "epoch": 1.1944874514976012, - "grad_norm": 0.001007484970614314, - "learning_rate": 0.00019999929680310875, - "loss": 46.0, - "step": 15623 - }, - { - "epoch": 1.194563908480991, - "grad_norm": 0.0025881242472678423, - "learning_rate": 0.00019999929671302757, - "loss": 46.0, - "step": 15624 - }, - { - "epoch": 1.1946403654643807, - "grad_norm": 0.0012657179031521082, - "learning_rate": 0.00019999929662294067, - "loss": 46.0, - "step": 15625 - }, - { - "epoch": 1.1947168224477704, - "grad_norm": 0.0005705134244635701, - "learning_rate": 0.00019999929653284797, - "loss": 46.0, - "step": 15626 - }, - { - "epoch": 1.19479327943116, - "grad_norm": 0.0005474984645843506, - "learning_rate": 0.0001999992964427495, - "loss": 46.0, - "step": 15627 - }, - { - "epoch": 1.1948697364145497, - "grad_norm": 0.0007833965355530381, - "learning_rate": 0.00019999929635264524, - "loss": 46.0, - "step": 15628 - }, - { - "epoch": 1.1949461933979395, - "grad_norm": 0.0006912931567057967, - "learning_rate": 0.00019999929626253525, - "loss": 46.0, - "step": 15629 - }, - { - "epoch": 1.1950226503813293, - "grad_norm": 0.0016190021997317672, - "learning_rate": 0.00019999929617241946, - "loss": 46.0, - "step": 15630 - }, - { - "epoch": 1.1950991073647188, - "grad_norm": 0.0013823227491229773, - "learning_rate": 0.00019999929608229791, - "loss": 46.0, - "step": 15631 - }, - { - "epoch": 1.1951755643481086, - "grad_norm": 0.002389416564255953, - "learning_rate": 0.0001999992959921706, - "loss": 46.0, - "step": 15632 - }, - { - "epoch": 1.1952520213314983, - "grad_norm": 0.0012995239812880754, - "learning_rate": 0.00019999929590203749, - "loss": 46.0, - "step": 15633 - }, - { - "epoch": 1.195328478314888, - "grad_norm": 0.003739702282473445, - "learning_rate": 0.00019999929581189868, - "loss": 46.0, - "step": 15634 - }, - { - "epoch": 1.1954049352982778, - "grad_norm": 0.005378869362175465, - "learning_rate": 0.00019999929572175402, - "loss": 46.0, - "step": 15635 - }, - { - "epoch": 1.1954813922816676, - "grad_norm": 0.003659531706944108, - "learning_rate": 0.00019999929563160364, - "loss": 46.0, - "step": 15636 - }, - { - "epoch": 1.1955578492650574, - "grad_norm": 0.0006310660974122584, - "learning_rate": 0.00019999929554144748, - "loss": 46.0, - "step": 15637 - }, - { - "epoch": 1.195634306248447, - "grad_norm": 0.0013169602025300264, - "learning_rate": 0.00019999929545128553, - "loss": 46.0, - "step": 15638 - }, - { - "epoch": 1.1957107632318367, - "grad_norm": 0.001478321268223226, - "learning_rate": 0.00019999929536111783, - "loss": 46.0, - "step": 15639 - }, - { - "epoch": 1.1957872202152264, - "grad_norm": 0.0013395135756582022, - "learning_rate": 0.00019999929527094435, - "loss": 46.0, - "step": 15640 - }, - { - "epoch": 1.1958636771986162, - "grad_norm": 0.0013497895561158657, - "learning_rate": 0.00019999929518076513, - "loss": 46.0, - "step": 15641 - }, - { - "epoch": 1.1959401341820057, - "grad_norm": 0.0008274232386611402, - "learning_rate": 0.0001999992950905801, - "loss": 46.0, - "step": 15642 - }, - { - "epoch": 1.1960165911653955, - "grad_norm": 0.0005337708862498403, - "learning_rate": 0.00019999929500038932, - "loss": 46.0, - "step": 15643 - }, - { - "epoch": 1.1960930481487853, - "grad_norm": 0.0018749083392322063, - "learning_rate": 0.00019999929491019278, - "loss": 46.0, - "step": 15644 - }, - { - "epoch": 1.196169505132175, - "grad_norm": 0.0007693894440308213, - "learning_rate": 0.00019999929481999046, - "loss": 46.0, - "step": 15645 - }, - { - "epoch": 1.1962459621155648, - "grad_norm": 0.029752876609563828, - "learning_rate": 0.00019999929472978235, - "loss": 46.0, - "step": 15646 - }, - { - "epoch": 1.1963224190989545, - "grad_norm": 0.0005141844158060849, - "learning_rate": 0.00019999929463956852, - "loss": 46.0, - "step": 15647 - }, - { - "epoch": 1.196398876082344, - "grad_norm": 0.0018940542358905077, - "learning_rate": 0.00019999929454934888, - "loss": 46.0, - "step": 15648 - }, - { - "epoch": 1.1964753330657338, - "grad_norm": 0.005433386657387018, - "learning_rate": 0.0001999992944591235, - "loss": 46.0, - "step": 15649 - }, - { - "epoch": 1.1965517900491236, - "grad_norm": 0.0018717003986239433, - "learning_rate": 0.00019999929436889232, - "loss": 46.0, - "step": 15650 - }, - { - "epoch": 1.1966282470325134, - "grad_norm": 0.002483527874574065, - "learning_rate": 0.0001999992942786554, - "loss": 46.0, - "step": 15651 - }, - { - "epoch": 1.1967047040159031, - "grad_norm": 0.003623990109190345, - "learning_rate": 0.00019999929418841267, - "loss": 46.0, - "step": 15652 - }, - { - "epoch": 1.1967811609992927, - "grad_norm": 0.03166210651397705, - "learning_rate": 0.00019999929409816422, - "loss": 46.0, - "step": 15653 - }, - { - "epoch": 1.1968576179826824, - "grad_norm": 0.0007963934913277626, - "learning_rate": 0.00019999929400790997, - "loss": 46.0, - "step": 15654 - }, - { - "epoch": 1.1969340749660722, - "grad_norm": 0.0017357331234961748, - "learning_rate": 0.00019999929391764995, - "loss": 46.0, - "step": 15655 - }, - { - "epoch": 1.197010531949462, - "grad_norm": 0.0056830416433513165, - "learning_rate": 0.00019999929382738416, - "loss": 46.0, - "step": 15656 - }, - { - "epoch": 1.1970869889328517, - "grad_norm": 0.0014931554906070232, - "learning_rate": 0.00019999929373711262, - "loss": 46.0, - "step": 15657 - }, - { - "epoch": 1.1971634459162415, - "grad_norm": 0.009246180765330791, - "learning_rate": 0.0001999992936468353, - "loss": 46.0, - "step": 15658 - }, - { - "epoch": 1.197239902899631, - "grad_norm": 0.003291910281404853, - "learning_rate": 0.00019999929355655222, - "loss": 46.0, - "step": 15659 - }, - { - "epoch": 1.1973163598830208, - "grad_norm": 0.003330525942146778, - "learning_rate": 0.00019999929346626333, - "loss": 46.0, - "step": 15660 - }, - { - "epoch": 1.1973928168664105, - "grad_norm": 0.0008768620318733156, - "learning_rate": 0.00019999929337596872, - "loss": 46.0, - "step": 15661 - }, - { - "epoch": 1.1974692738498003, - "grad_norm": 0.0006625146488659084, - "learning_rate": 0.00019999929328566834, - "loss": 46.0, - "step": 15662 - }, - { - "epoch": 1.19754573083319, - "grad_norm": 0.0009174922597594559, - "learning_rate": 0.00019999929319536213, - "loss": 46.0, - "step": 15663 - }, - { - "epoch": 1.1976221878165796, - "grad_norm": 0.0017498661763966084, - "learning_rate": 0.0001999992931050502, - "loss": 46.0, - "step": 15664 - }, - { - "epoch": 1.1976986447999693, - "grad_norm": 0.012102453038096428, - "learning_rate": 0.0001999992930147325, - "loss": 46.0, - "step": 15665 - }, - { - "epoch": 1.197775101783359, - "grad_norm": 0.0007139602093957365, - "learning_rate": 0.00019999929292440903, - "loss": 46.0, - "step": 15666 - }, - { - "epoch": 1.1978515587667489, - "grad_norm": 0.0013389961095526814, - "learning_rate": 0.0001999992928340798, - "loss": 46.0, - "step": 15667 - }, - { - "epoch": 1.1979280157501386, - "grad_norm": 0.002476147376000881, - "learning_rate": 0.00019999929274374477, - "loss": 46.0, - "step": 15668 - }, - { - "epoch": 1.1980044727335284, - "grad_norm": 0.004321780521422625, - "learning_rate": 0.00019999929265340397, - "loss": 46.0, - "step": 15669 - }, - { - "epoch": 1.198080929716918, - "grad_norm": 0.001915768370963633, - "learning_rate": 0.00019999929256305743, - "loss": 46.0, - "step": 15670 - }, - { - "epoch": 1.1981573867003077, - "grad_norm": 0.000593613600358367, - "learning_rate": 0.0001999992924727051, - "loss": 46.0, - "step": 15671 - }, - { - "epoch": 1.1982338436836975, - "grad_norm": 0.0013915409799665213, - "learning_rate": 0.000199999292382347, - "loss": 46.0, - "step": 15672 - }, - { - "epoch": 1.1983103006670872, - "grad_norm": 0.0040787504985928535, - "learning_rate": 0.00019999929229198315, - "loss": 46.0, - "step": 15673 - }, - { - "epoch": 1.198386757650477, - "grad_norm": 0.0006661030347459018, - "learning_rate": 0.0001999992922016135, - "loss": 46.0, - "step": 15674 - }, - { - "epoch": 1.1984632146338665, - "grad_norm": 0.0006008045747876167, - "learning_rate": 0.0001999992921112381, - "loss": 46.0, - "step": 15675 - }, - { - "epoch": 1.1985396716172563, - "grad_norm": 0.010750112123787403, - "learning_rate": 0.00019999929202085695, - "loss": 46.0, - "step": 15676 - }, - { - "epoch": 1.198616128600646, - "grad_norm": 0.004843415226787329, - "learning_rate": 0.00019999929193047, - "loss": 46.0, - "step": 15677 - }, - { - "epoch": 1.1986925855840358, - "grad_norm": 0.0017501342808827758, - "learning_rate": 0.00019999929184007727, - "loss": 46.0, - "step": 15678 - }, - { - "epoch": 1.1987690425674256, - "grad_norm": 0.0016653920756652951, - "learning_rate": 0.00019999929174967883, - "loss": 46.0, - "step": 15679 - }, - { - "epoch": 1.1988454995508153, - "grad_norm": 0.001998928841203451, - "learning_rate": 0.00019999929165927455, - "loss": 46.0, - "step": 15680 - }, - { - "epoch": 1.1989219565342049, - "grad_norm": 0.0014376409817487001, - "learning_rate": 0.00019999929156886453, - "loss": 46.0, - "step": 15681 - }, - { - "epoch": 1.1989984135175946, - "grad_norm": 0.0010311650112271309, - "learning_rate": 0.00019999929147844877, - "loss": 46.0, - "step": 15682 - }, - { - "epoch": 1.1990748705009844, - "grad_norm": 0.0006080266321077943, - "learning_rate": 0.0001999992913880272, - "loss": 46.0, - "step": 15683 - }, - { - "epoch": 1.1991513274843741, - "grad_norm": 0.0011371842119842768, - "learning_rate": 0.00019999929129759986, - "loss": 46.0, - "step": 15684 - }, - { - "epoch": 1.199227784467764, - "grad_norm": 0.0012255256297066808, - "learning_rate": 0.00019999929120716677, - "loss": 46.0, - "step": 15685 - }, - { - "epoch": 1.1993042414511534, - "grad_norm": 0.003824906889349222, - "learning_rate": 0.00019999929111672794, - "loss": 46.0, - "step": 15686 - }, - { - "epoch": 1.1993806984345432, - "grad_norm": 0.0009307143627665937, - "learning_rate": 0.00019999929102628328, - "loss": 46.0, - "step": 15687 - }, - { - "epoch": 1.199457155417933, - "grad_norm": 0.004113251343369484, - "learning_rate": 0.00019999929093583287, - "loss": 46.0, - "step": 15688 - }, - { - "epoch": 1.1995336124013227, - "grad_norm": 0.0006817805697210133, - "learning_rate": 0.0001999992908453767, - "loss": 46.0, - "step": 15689 - }, - { - "epoch": 1.1996100693847125, - "grad_norm": 0.007668792735785246, - "learning_rate": 0.00019999929075491477, - "loss": 46.0, - "step": 15690 - }, - { - "epoch": 1.1996865263681022, - "grad_norm": 0.0011725733056664467, - "learning_rate": 0.00019999929066444704, - "loss": 46.0, - "step": 15691 - }, - { - "epoch": 1.1997629833514918, - "grad_norm": 0.0013711429201066494, - "learning_rate": 0.00019999929057397354, - "loss": 46.0, - "step": 15692 - }, - { - "epoch": 1.1998394403348815, - "grad_norm": 0.014447052031755447, - "learning_rate": 0.00019999929048349432, - "loss": 46.0, - "step": 15693 - }, - { - "epoch": 1.1999158973182713, - "grad_norm": 0.0028197262436151505, - "learning_rate": 0.00019999929039300927, - "loss": 46.0, - "step": 15694 - }, - { - "epoch": 1.199992354301661, - "grad_norm": 0.0005684084026142955, - "learning_rate": 0.0001999992903025185, - "loss": 46.0, - "step": 15695 - }, - { - "epoch": 1.2000688112850508, - "grad_norm": 0.0021674875169992447, - "learning_rate": 0.00019999929021202192, - "loss": 46.0, - "step": 15696 - }, - { - "epoch": 1.2001452682684404, - "grad_norm": 0.0014034503838047385, - "learning_rate": 0.0001999992901215196, - "loss": 46.0, - "step": 15697 - }, - { - "epoch": 1.2002217252518301, - "grad_norm": 0.0005880245007574558, - "learning_rate": 0.00019999929003101152, - "loss": 46.0, - "step": 15698 - }, - { - "epoch": 1.20029818223522, - "grad_norm": 0.0023854663595557213, - "learning_rate": 0.00019999928994049766, - "loss": 46.0, - "step": 15699 - }, - { - "epoch": 1.2003746392186097, - "grad_norm": 0.0008366689435206354, - "learning_rate": 0.00019999928984997802, - "loss": 46.0, - "step": 15700 - }, - { - "epoch": 1.2004510962019994, - "grad_norm": 0.0010805936763063073, - "learning_rate": 0.00019999928975945262, - "loss": 46.0, - "step": 15701 - }, - { - "epoch": 1.2005275531853892, - "grad_norm": 0.0017285803332924843, - "learning_rate": 0.00019999928966892144, - "loss": 46.0, - "step": 15702 - }, - { - "epoch": 1.2006040101687787, - "grad_norm": 0.0006802756688557565, - "learning_rate": 0.00019999928957838449, - "loss": 46.0, - "step": 15703 - }, - { - "epoch": 1.2006804671521685, - "grad_norm": 0.0007277424447238445, - "learning_rate": 0.00019999928948784176, - "loss": 46.0, - "step": 15704 - }, - { - "epoch": 1.2007569241355582, - "grad_norm": 0.0008758124895393848, - "learning_rate": 0.0001999992893972933, - "loss": 46.0, - "step": 15705 - }, - { - "epoch": 1.200833381118948, - "grad_norm": 0.0008830575970932841, - "learning_rate": 0.00019999928930673904, - "loss": 46.0, - "step": 15706 - }, - { - "epoch": 1.2009098381023378, - "grad_norm": 0.0011764157097786665, - "learning_rate": 0.00019999928921617902, - "loss": 46.0, - "step": 15707 - }, - { - "epoch": 1.2009862950857273, - "grad_norm": 0.001333172433078289, - "learning_rate": 0.00019999928912561323, - "loss": 46.0, - "step": 15708 - }, - { - "epoch": 1.201062752069117, - "grad_norm": 0.0013649067841470242, - "learning_rate": 0.00019999928903504166, - "loss": 46.0, - "step": 15709 - }, - { - "epoch": 1.2011392090525068, - "grad_norm": 0.002075194614008069, - "learning_rate": 0.00019999928894446432, - "loss": 46.0, - "step": 15710 - }, - { - "epoch": 1.2012156660358966, - "grad_norm": 0.0008259046007879078, - "learning_rate": 0.0001999992888538812, - "loss": 46.0, - "step": 15711 - }, - { - "epoch": 1.2012921230192863, - "grad_norm": 0.002426484599709511, - "learning_rate": 0.00019999928876329235, - "loss": 46.0, - "step": 15712 - }, - { - "epoch": 1.201368580002676, - "grad_norm": 0.0007950731087476015, - "learning_rate": 0.0001999992886726977, - "loss": 46.0, - "step": 15713 - }, - { - "epoch": 1.2014450369860656, - "grad_norm": 0.0007124730036593974, - "learning_rate": 0.0001999992885820973, - "loss": 46.0, - "step": 15714 - }, - { - "epoch": 1.2015214939694554, - "grad_norm": 0.0008882875554263592, - "learning_rate": 0.00019999928849149113, - "loss": 46.0, - "step": 15715 - }, - { - "epoch": 1.2015979509528452, - "grad_norm": 0.009240842424333096, - "learning_rate": 0.00019999928840087918, - "loss": 46.0, - "step": 15716 - }, - { - "epoch": 1.201674407936235, - "grad_norm": 0.0008655826677568257, - "learning_rate": 0.00019999928831026146, - "loss": 46.0, - "step": 15717 - }, - { - "epoch": 1.2017508649196247, - "grad_norm": 0.0038563332054764032, - "learning_rate": 0.00019999928821963798, - "loss": 46.0, - "step": 15718 - }, - { - "epoch": 1.2018273219030142, - "grad_norm": 0.0011683328775689006, - "learning_rate": 0.0001999992881290087, - "loss": 46.0, - "step": 15719 - }, - { - "epoch": 1.201903778886404, - "grad_norm": 0.00226473156362772, - "learning_rate": 0.0001999992880383737, - "loss": 46.0, - "step": 15720 - }, - { - "epoch": 1.2019802358697937, - "grad_norm": 0.020189478993415833, - "learning_rate": 0.0001999992879477329, - "loss": 46.0, - "step": 15721 - }, - { - "epoch": 1.2020566928531835, - "grad_norm": 0.004795554094016552, - "learning_rate": 0.00019999928785708636, - "loss": 46.0, - "step": 15722 - }, - { - "epoch": 1.2021331498365733, - "grad_norm": 0.003608362516388297, - "learning_rate": 0.00019999928776643402, - "loss": 46.0, - "step": 15723 - }, - { - "epoch": 1.202209606819963, - "grad_norm": 0.001722640939988196, - "learning_rate": 0.0001999992876757759, - "loss": 46.0, - "step": 15724 - }, - { - "epoch": 1.2022860638033526, - "grad_norm": 0.019515084102749825, - "learning_rate": 0.00019999928758511205, - "loss": 46.0, - "step": 15725 - }, - { - "epoch": 1.2023625207867423, - "grad_norm": 0.003746159840375185, - "learning_rate": 0.0001999992874944424, - "loss": 46.0, - "step": 15726 - }, - { - "epoch": 1.202438977770132, - "grad_norm": 0.0007993843173608184, - "learning_rate": 0.00019999928740376696, - "loss": 46.0, - "step": 15727 - }, - { - "epoch": 1.2025154347535219, - "grad_norm": 0.0006316457875072956, - "learning_rate": 0.0001999992873130858, - "loss": 46.0, - "step": 15728 - }, - { - "epoch": 1.2025918917369116, - "grad_norm": 0.0013367807259783149, - "learning_rate": 0.00019999928722239885, - "loss": 46.0, - "step": 15729 - }, - { - "epoch": 1.2026683487203012, - "grad_norm": 0.001325565972365439, - "learning_rate": 0.00019999928713170615, - "loss": 46.0, - "step": 15730 - }, - { - "epoch": 1.202744805703691, - "grad_norm": 0.0017381255747750401, - "learning_rate": 0.00019999928704100763, - "loss": 46.0, - "step": 15731 - }, - { - "epoch": 1.2028212626870807, - "grad_norm": 0.0007777658174745739, - "learning_rate": 0.00019999928695030338, - "loss": 46.0, - "step": 15732 - }, - { - "epoch": 1.2028977196704704, - "grad_norm": 0.0052057718858122826, - "learning_rate": 0.00019999928685959336, - "loss": 46.0, - "step": 15733 - }, - { - "epoch": 1.2029741766538602, - "grad_norm": 0.0016010685358196497, - "learning_rate": 0.00019999928676887755, - "loss": 46.0, - "step": 15734 - }, - { - "epoch": 1.20305063363725, - "grad_norm": 0.008380594663321972, - "learning_rate": 0.000199999286678156, - "loss": 46.0, - "step": 15735 - }, - { - "epoch": 1.2031270906206395, - "grad_norm": 0.0013300662394613028, - "learning_rate": 0.00019999928658742867, - "loss": 46.0, - "step": 15736 - }, - { - "epoch": 1.2032035476040293, - "grad_norm": 0.0024072073865681887, - "learning_rate": 0.00019999928649669556, - "loss": 46.0, - "step": 15737 - }, - { - "epoch": 1.203280004587419, - "grad_norm": 0.0009984906064346433, - "learning_rate": 0.0001999992864059567, - "loss": 46.0, - "step": 15738 - }, - { - "epoch": 1.2033564615708088, - "grad_norm": 0.0010214403737336397, - "learning_rate": 0.00019999928631521204, - "loss": 46.0, - "step": 15739 - }, - { - "epoch": 1.2034329185541985, - "grad_norm": 0.0011432041646912694, - "learning_rate": 0.00019999928622446163, - "loss": 46.0, - "step": 15740 - }, - { - "epoch": 1.203509375537588, - "grad_norm": 0.0008477675728499889, - "learning_rate": 0.00019999928613370545, - "loss": 46.0, - "step": 15741 - }, - { - "epoch": 1.2035858325209778, - "grad_norm": 0.0018691600998863578, - "learning_rate": 0.0001999992860429435, - "loss": 46.0, - "step": 15742 - }, - { - "epoch": 1.2036622895043676, - "grad_norm": 0.0011603889288380742, - "learning_rate": 0.00019999928595217575, - "loss": 46.0, - "step": 15743 - }, - { - "epoch": 1.2037387464877574, - "grad_norm": 0.0018194845179095864, - "learning_rate": 0.00019999928586140228, - "loss": 46.0, - "step": 15744 - }, - { - "epoch": 1.2038152034711471, - "grad_norm": 0.005773098673671484, - "learning_rate": 0.000199999285770623, - "loss": 46.0, - "step": 15745 - }, - { - "epoch": 1.2038916604545369, - "grad_norm": 0.0022576279006898403, - "learning_rate": 0.000199999285679838, - "loss": 46.0, - "step": 15746 - }, - { - "epoch": 1.2039681174379264, - "grad_norm": 0.0007069289567880332, - "learning_rate": 0.00019999928558904717, - "loss": 46.0, - "step": 15747 - }, - { - "epoch": 1.2040445744213162, - "grad_norm": 0.001302338787354529, - "learning_rate": 0.0001999992854982506, - "loss": 46.0, - "step": 15748 - }, - { - "epoch": 1.204121031404706, - "grad_norm": 0.0061085522174835205, - "learning_rate": 0.00019999928540744827, - "loss": 46.0, - "step": 15749 - }, - { - "epoch": 1.2041974883880957, - "grad_norm": 0.0017046869033947587, - "learning_rate": 0.00019999928531664018, - "loss": 46.0, - "step": 15750 - }, - { - "epoch": 1.2042739453714852, - "grad_norm": 0.003390026278793812, - "learning_rate": 0.0001999992852258263, - "loss": 46.0, - "step": 15751 - }, - { - "epoch": 1.204350402354875, - "grad_norm": 0.0013632539194077253, - "learning_rate": 0.00019999928513500664, - "loss": 46.0, - "step": 15752 - }, - { - "epoch": 1.2044268593382648, - "grad_norm": 0.003184331115335226, - "learning_rate": 0.00019999928504418123, - "loss": 46.0, - "step": 15753 - }, - { - "epoch": 1.2045033163216545, - "grad_norm": 0.0018485623877495527, - "learning_rate": 0.00019999928495335003, - "loss": 46.0, - "step": 15754 - }, - { - "epoch": 1.2045797733050443, - "grad_norm": 0.0009776364313438535, - "learning_rate": 0.0001999992848625131, - "loss": 46.0, - "step": 15755 - }, - { - "epoch": 1.204656230288434, - "grad_norm": 0.001214486313983798, - "learning_rate": 0.00019999928477167038, - "loss": 46.0, - "step": 15756 - }, - { - "epoch": 1.2047326872718238, - "grad_norm": 0.004157719202339649, - "learning_rate": 0.00019999928468082188, - "loss": 46.0, - "step": 15757 - }, - { - "epoch": 1.2048091442552133, - "grad_norm": 0.0006044196197763085, - "learning_rate": 0.0001999992845899676, - "loss": 46.0, - "step": 15758 - }, - { - "epoch": 1.204885601238603, - "grad_norm": 0.0008829439175315201, - "learning_rate": 0.00019999928449910756, - "loss": 46.0, - "step": 15759 - }, - { - "epoch": 1.2049620582219929, - "grad_norm": 0.0009564216015860438, - "learning_rate": 0.00019999928440824177, - "loss": 46.0, - "step": 15760 - }, - { - "epoch": 1.2050385152053826, - "grad_norm": 0.002625989029183984, - "learning_rate": 0.0001999992843173702, - "loss": 46.0, - "step": 15761 - }, - { - "epoch": 1.2051149721887722, - "grad_norm": 0.0009223964880220592, - "learning_rate": 0.00019999928422649287, - "loss": 46.0, - "step": 15762 - }, - { - "epoch": 1.205191429172162, - "grad_norm": 0.0006487914943136275, - "learning_rate": 0.00019999928413560976, - "loss": 46.0, - "step": 15763 - }, - { - "epoch": 1.2052678861555517, - "grad_norm": 0.0027385635767132044, - "learning_rate": 0.00019999928404472087, - "loss": 46.0, - "step": 15764 - }, - { - "epoch": 1.2053443431389415, - "grad_norm": 0.00044901890214532614, - "learning_rate": 0.00019999928395382624, - "loss": 46.0, - "step": 15765 - }, - { - "epoch": 1.2054208001223312, - "grad_norm": 0.0007760389125905931, - "learning_rate": 0.0001999992838629258, - "loss": 46.0, - "step": 15766 - }, - { - "epoch": 1.205497257105721, - "grad_norm": 0.0009632749133743346, - "learning_rate": 0.00019999928377201963, - "loss": 46.0, - "step": 15767 - }, - { - "epoch": 1.2055737140891107, - "grad_norm": 0.0009677984635345638, - "learning_rate": 0.00019999928368110768, - "loss": 46.0, - "step": 15768 - }, - { - "epoch": 1.2056501710725003, - "grad_norm": 0.0023266051430255175, - "learning_rate": 0.00019999928359018993, - "loss": 46.0, - "step": 15769 - }, - { - "epoch": 1.20572662805589, - "grad_norm": 0.0021874792873859406, - "learning_rate": 0.00019999928349926646, - "loss": 46.0, - "step": 15770 - }, - { - "epoch": 1.2058030850392798, - "grad_norm": 0.0007334948168136179, - "learning_rate": 0.00019999928340833718, - "loss": 46.0, - "step": 15771 - }, - { - "epoch": 1.2058795420226696, - "grad_norm": 0.01449010893702507, - "learning_rate": 0.00019999928331740214, - "loss": 46.0, - "step": 15772 - }, - { - "epoch": 1.205955999006059, - "grad_norm": 0.00066584930755198, - "learning_rate": 0.00019999928322646135, - "loss": 46.0, - "step": 15773 - }, - { - "epoch": 1.2060324559894489, - "grad_norm": 0.002816611435264349, - "learning_rate": 0.00019999928313551478, - "loss": 46.0, - "step": 15774 - }, - { - "epoch": 1.2061089129728386, - "grad_norm": 0.0020622105803340673, - "learning_rate": 0.00019999928304456242, - "loss": 46.0, - "step": 15775 - }, - { - "epoch": 1.2061853699562284, - "grad_norm": 0.001504861400462687, - "learning_rate": 0.00019999928295360433, - "loss": 46.0, - "step": 15776 - }, - { - "epoch": 1.2062618269396181, - "grad_norm": 0.001224591745994985, - "learning_rate": 0.00019999928286264042, - "loss": 46.0, - "step": 15777 - }, - { - "epoch": 1.206338283923008, - "grad_norm": 0.0012241953518241644, - "learning_rate": 0.0001999992827716708, - "loss": 46.0, - "step": 15778 - }, - { - "epoch": 1.2064147409063974, - "grad_norm": 0.005175963044166565, - "learning_rate": 0.00019999928268069533, - "loss": 46.0, - "step": 15779 - }, - { - "epoch": 1.2064911978897872, - "grad_norm": 0.000688257219735533, - "learning_rate": 0.00019999928258971416, - "loss": 46.0, - "step": 15780 - }, - { - "epoch": 1.206567654873177, - "grad_norm": 0.0015347795560956001, - "learning_rate": 0.0001999992824987272, - "loss": 46.0, - "step": 15781 - }, - { - "epoch": 1.2066441118565667, - "grad_norm": 0.0006922337342984974, - "learning_rate": 0.00019999928240773448, - "loss": 46.0, - "step": 15782 - }, - { - "epoch": 1.2067205688399565, - "grad_norm": 0.006682609673589468, - "learning_rate": 0.00019999928231673598, - "loss": 46.0, - "step": 15783 - }, - { - "epoch": 1.206797025823346, - "grad_norm": 0.0013085160171613097, - "learning_rate": 0.0001999992822257317, - "loss": 46.0, - "step": 15784 - }, - { - "epoch": 1.2068734828067358, - "grad_norm": 0.0023341807536780834, - "learning_rate": 0.00019999928213472167, - "loss": 46.0, - "step": 15785 - }, - { - "epoch": 1.2069499397901255, - "grad_norm": 0.0010500644566491246, - "learning_rate": 0.00019999928204370585, - "loss": 46.0, - "step": 15786 - }, - { - "epoch": 1.2070263967735153, - "grad_norm": 0.006038394756615162, - "learning_rate": 0.00019999928195268428, - "loss": 46.0, - "step": 15787 - }, - { - "epoch": 1.207102853756905, - "grad_norm": 0.0005848530563525856, - "learning_rate": 0.00019999928186165695, - "loss": 46.0, - "step": 15788 - }, - { - "epoch": 1.2071793107402948, - "grad_norm": 0.001309465034864843, - "learning_rate": 0.00019999928177062384, - "loss": 46.0, - "step": 15789 - }, - { - "epoch": 1.2072557677236844, - "grad_norm": 0.028371086344122887, - "learning_rate": 0.00019999928167958495, - "loss": 46.0, - "step": 15790 - }, - { - "epoch": 1.2073322247070741, - "grad_norm": 0.0012720482191070914, - "learning_rate": 0.00019999928158854032, - "loss": 46.0, - "step": 15791 - }, - { - "epoch": 1.207408681690464, - "grad_norm": 0.0008237158763222396, - "learning_rate": 0.0001999992814974899, - "loss": 46.0, - "step": 15792 - }, - { - "epoch": 1.2074851386738537, - "grad_norm": 0.0007866528467275202, - "learning_rate": 0.00019999928140643366, - "loss": 46.0, - "step": 15793 - }, - { - "epoch": 1.2075615956572434, - "grad_norm": 0.004056129604578018, - "learning_rate": 0.0001999992813153717, - "loss": 46.0, - "step": 15794 - }, - { - "epoch": 1.207638052640633, - "grad_norm": 0.0009275604388676584, - "learning_rate": 0.00019999928122430398, - "loss": 46.0, - "step": 15795 - }, - { - "epoch": 1.2077145096240227, - "grad_norm": 0.0013814476551488042, - "learning_rate": 0.00019999928113323049, - "loss": 46.0, - "step": 15796 - }, - { - "epoch": 1.2077909666074125, - "grad_norm": 0.001514368923380971, - "learning_rate": 0.00019999928104215121, - "loss": 46.0, - "step": 15797 - }, - { - "epoch": 1.2078674235908022, - "grad_norm": 0.0010194851784035563, - "learning_rate": 0.0001999992809510662, - "loss": 46.0, - "step": 15798 - }, - { - "epoch": 1.207943880574192, - "grad_norm": 0.000640751444734633, - "learning_rate": 0.00019999928085997538, - "loss": 46.0, - "step": 15799 - }, - { - "epoch": 1.2080203375575818, - "grad_norm": 0.0011753039434552193, - "learning_rate": 0.0001999992807688788, - "loss": 46.0, - "step": 15800 - }, - { - "epoch": 1.2080967945409713, - "grad_norm": 0.003666070057079196, - "learning_rate": 0.00019999928067777645, - "loss": 46.0, - "step": 15801 - }, - { - "epoch": 1.208173251524361, - "grad_norm": 0.0012108872178941965, - "learning_rate": 0.00019999928058666831, - "loss": 46.0, - "step": 15802 - }, - { - "epoch": 1.2082497085077508, - "grad_norm": 0.004139328841120005, - "learning_rate": 0.00019999928049555443, - "loss": 46.0, - "step": 15803 - }, - { - "epoch": 1.2083261654911406, - "grad_norm": 0.00129911326803267, - "learning_rate": 0.00019999928040443477, - "loss": 46.0, - "step": 15804 - }, - { - "epoch": 1.2084026224745303, - "grad_norm": 0.0005257275770418346, - "learning_rate": 0.00019999928031330937, - "loss": 46.0, - "step": 15805 - }, - { - "epoch": 1.2084790794579199, - "grad_norm": 0.0003099743917118758, - "learning_rate": 0.00019999928022217816, - "loss": 46.0, - "step": 15806 - }, - { - "epoch": 1.2085555364413096, - "grad_norm": 0.0007760750595480204, - "learning_rate": 0.00019999928013104121, - "loss": 46.0, - "step": 15807 - }, - { - "epoch": 1.2086319934246994, - "grad_norm": 0.003667765064164996, - "learning_rate": 0.00019999928003989846, - "loss": 46.0, - "step": 15808 - }, - { - "epoch": 1.2087084504080892, - "grad_norm": 0.0008092631469480693, - "learning_rate": 0.00019999927994874997, - "loss": 46.0, - "step": 15809 - }, - { - "epoch": 1.208784907391479, - "grad_norm": 0.002482498763129115, - "learning_rate": 0.0001999992798575957, - "loss": 46.0, - "step": 15810 - }, - { - "epoch": 1.2088613643748687, - "grad_norm": 0.00486728036776185, - "learning_rate": 0.00019999927976643565, - "loss": 46.0, - "step": 15811 - }, - { - "epoch": 1.2089378213582582, - "grad_norm": 0.003524020779877901, - "learning_rate": 0.00019999927967526983, - "loss": 46.0, - "step": 15812 - }, - { - "epoch": 1.209014278341648, - "grad_norm": 0.005361601244658232, - "learning_rate": 0.00019999927958409827, - "loss": 46.0, - "step": 15813 - }, - { - "epoch": 1.2090907353250377, - "grad_norm": 0.001418828614987433, - "learning_rate": 0.0001999992794929209, - "loss": 46.0, - "step": 15814 - }, - { - "epoch": 1.2091671923084275, - "grad_norm": 0.006174227222800255, - "learning_rate": 0.0001999992794017378, - "loss": 46.0, - "step": 15815 - }, - { - "epoch": 1.2092436492918173, - "grad_norm": 0.002934941789135337, - "learning_rate": 0.0001999992793105489, - "loss": 46.0, - "step": 15816 - }, - { - "epoch": 1.2093201062752068, - "grad_norm": 0.0010737967677414417, - "learning_rate": 0.00019999927921935423, - "loss": 46.0, - "step": 15817 - }, - { - "epoch": 1.2093965632585966, - "grad_norm": 0.022198457270860672, - "learning_rate": 0.00019999927912815383, - "loss": 46.0, - "step": 15818 - }, - { - "epoch": 1.2094730202419863, - "grad_norm": 0.024242263287305832, - "learning_rate": 0.00019999927903694762, - "loss": 46.0, - "step": 15819 - }, - { - "epoch": 1.209549477225376, - "grad_norm": 0.0035455338656902313, - "learning_rate": 0.00019999927894573567, - "loss": 46.0, - "step": 15820 - }, - { - "epoch": 1.2096259342087659, - "grad_norm": 0.0033642714843153954, - "learning_rate": 0.00019999927885451792, - "loss": 46.0, - "step": 15821 - }, - { - "epoch": 1.2097023911921556, - "grad_norm": 0.0016957588959485292, - "learning_rate": 0.0001999992787632944, - "loss": 46.0, - "step": 15822 - }, - { - "epoch": 1.2097788481755452, - "grad_norm": 0.0007913652225397527, - "learning_rate": 0.00019999927867206513, - "loss": 46.0, - "step": 15823 - }, - { - "epoch": 1.209855305158935, - "grad_norm": 0.0010948361596092582, - "learning_rate": 0.00019999927858083008, - "loss": 46.0, - "step": 15824 - }, - { - "epoch": 1.2099317621423247, - "grad_norm": 0.002287833718582988, - "learning_rate": 0.0001999992784895893, - "loss": 46.0, - "step": 15825 - }, - { - "epoch": 1.2100082191257144, - "grad_norm": 0.0016990774311125278, - "learning_rate": 0.0001999992783983427, - "loss": 46.0, - "step": 15826 - }, - { - "epoch": 1.2100846761091042, - "grad_norm": 0.0011581816943362355, - "learning_rate": 0.00019999927830709037, - "loss": 46.0, - "step": 15827 - }, - { - "epoch": 1.2101611330924937, - "grad_norm": 0.0010288050398230553, - "learning_rate": 0.00019999927821583223, - "loss": 46.0, - "step": 15828 - }, - { - "epoch": 1.2102375900758835, - "grad_norm": 0.0007644432480446994, - "learning_rate": 0.00019999927812456835, - "loss": 46.0, - "step": 15829 - }, - { - "epoch": 1.2103140470592733, - "grad_norm": 0.0010509173152968287, - "learning_rate": 0.0001999992780332987, - "loss": 46.0, - "step": 15830 - }, - { - "epoch": 1.210390504042663, - "grad_norm": 0.0006509299273602664, - "learning_rate": 0.00019999927794202323, - "loss": 46.0, - "step": 15831 - }, - { - "epoch": 1.2104669610260528, - "grad_norm": 0.0009767917217686772, - "learning_rate": 0.00019999927785074203, - "loss": 46.0, - "step": 15832 - }, - { - "epoch": 1.2105434180094425, - "grad_norm": 0.003716397564858198, - "learning_rate": 0.00019999927775945508, - "loss": 46.0, - "step": 15833 - }, - { - "epoch": 1.210619874992832, - "grad_norm": 0.0033332237508147955, - "learning_rate": 0.00019999927766816233, - "loss": 46.0, - "step": 15834 - }, - { - "epoch": 1.2106963319762218, - "grad_norm": 0.0008651018142700195, - "learning_rate": 0.00019999927757686383, - "loss": 46.0, - "step": 15835 - }, - { - "epoch": 1.2107727889596116, - "grad_norm": 0.002342688851058483, - "learning_rate": 0.00019999927748555956, - "loss": 46.0, - "step": 15836 - }, - { - "epoch": 1.2108492459430014, - "grad_norm": 0.001168787362985313, - "learning_rate": 0.00019999927739424952, - "loss": 46.0, - "step": 15837 - }, - { - "epoch": 1.2109257029263911, - "grad_norm": 0.003096845233812928, - "learning_rate": 0.00019999927730293367, - "loss": 46.0, - "step": 15838 - }, - { - "epoch": 1.2110021599097807, - "grad_norm": 0.004865511320531368, - "learning_rate": 0.0001999992772116121, - "loss": 46.0, - "step": 15839 - }, - { - "epoch": 1.2110786168931704, - "grad_norm": 0.0017508199671283364, - "learning_rate": 0.00019999927712028478, - "loss": 46.0, - "step": 15840 - }, - { - "epoch": 1.2111550738765602, - "grad_norm": 0.001202625804580748, - "learning_rate": 0.00019999927702895164, - "loss": 46.0, - "step": 15841 - }, - { - "epoch": 1.21123153085995, - "grad_norm": 0.0012115223798900843, - "learning_rate": 0.00019999927693761273, - "loss": 46.0, - "step": 15842 - }, - { - "epoch": 1.2113079878433397, - "grad_norm": 0.0060264067724347115, - "learning_rate": 0.00019999927684626807, - "loss": 46.0, - "step": 15843 - }, - { - "epoch": 1.2113844448267295, - "grad_norm": 0.0016420769970864058, - "learning_rate": 0.00019999927675491764, - "loss": 46.0, - "step": 15844 - }, - { - "epoch": 1.211460901810119, - "grad_norm": 0.0007236411329358816, - "learning_rate": 0.00019999927666356144, - "loss": 46.0, - "step": 15845 - }, - { - "epoch": 1.2115373587935088, - "grad_norm": 0.004076045472174883, - "learning_rate": 0.00019999927657219946, - "loss": 46.0, - "step": 15846 - }, - { - "epoch": 1.2116138157768985, - "grad_norm": 0.0010735122486948967, - "learning_rate": 0.0001999992764808317, - "loss": 46.0, - "step": 15847 - }, - { - "epoch": 1.2116902727602883, - "grad_norm": 0.0016806970816105604, - "learning_rate": 0.00019999927638945822, - "loss": 46.0, - "step": 15848 - }, - { - "epoch": 1.211766729743678, - "grad_norm": 0.0011726424563676119, - "learning_rate": 0.00019999927629807895, - "loss": 46.0, - "step": 15849 - }, - { - "epoch": 1.2118431867270676, - "grad_norm": 0.0009754498605616391, - "learning_rate": 0.00019999927620669388, - "loss": 46.0, - "step": 15850 - }, - { - "epoch": 1.2119196437104574, - "grad_norm": 0.0008477416122332215, - "learning_rate": 0.00019999927611530306, - "loss": 46.0, - "step": 15851 - }, - { - "epoch": 1.2119961006938471, - "grad_norm": 0.00562383234500885, - "learning_rate": 0.0001999992760239065, - "loss": 46.0, - "step": 15852 - }, - { - "epoch": 1.2120725576772369, - "grad_norm": 0.0027025260496884584, - "learning_rate": 0.00019999927593250413, - "loss": 46.0, - "step": 15853 - }, - { - "epoch": 1.2121490146606266, - "grad_norm": 0.0019046923844143748, - "learning_rate": 0.00019999927584109603, - "loss": 46.0, - "step": 15854 - }, - { - "epoch": 1.2122254716440164, - "grad_norm": 0.024615593254566193, - "learning_rate": 0.00019999927574968212, - "loss": 46.0, - "step": 15855 - }, - { - "epoch": 1.212301928627406, - "grad_norm": 0.0008551036007702351, - "learning_rate": 0.00019999927565826243, - "loss": 46.0, - "step": 15856 - }, - { - "epoch": 1.2123783856107957, - "grad_norm": 0.0010178949451074004, - "learning_rate": 0.000199999275566837, - "loss": 46.0, - "step": 15857 - }, - { - "epoch": 1.2124548425941855, - "grad_norm": 0.0010859669419005513, - "learning_rate": 0.0001999992754754058, - "loss": 46.0, - "step": 15858 - }, - { - "epoch": 1.2125312995775752, - "grad_norm": 0.0024663677904754877, - "learning_rate": 0.00019999927538396882, - "loss": 46.0, - "step": 15859 - }, - { - "epoch": 1.212607756560965, - "grad_norm": 0.0017164589371532202, - "learning_rate": 0.00019999927529252607, - "loss": 46.0, - "step": 15860 - }, - { - "epoch": 1.2126842135443545, - "grad_norm": 0.0015444419113919139, - "learning_rate": 0.00019999927520107758, - "loss": 46.0, - "step": 15861 - }, - { - "epoch": 1.2127606705277443, - "grad_norm": 0.000503760005813092, - "learning_rate": 0.00019999927510962328, - "loss": 46.0, - "step": 15862 - }, - { - "epoch": 1.212837127511134, - "grad_norm": 0.0008513595093972981, - "learning_rate": 0.0001999992750181632, - "loss": 46.0, - "step": 15863 - }, - { - "epoch": 1.2129135844945238, - "grad_norm": 0.0015667357947677374, - "learning_rate": 0.00019999927492669742, - "loss": 46.0, - "step": 15864 - }, - { - "epoch": 1.2129900414779136, - "grad_norm": 0.0013385817874222994, - "learning_rate": 0.00019999927483522583, - "loss": 46.0, - "step": 15865 - }, - { - "epoch": 1.2130664984613033, - "grad_norm": 0.0030473442748188972, - "learning_rate": 0.00019999927474374847, - "loss": 46.0, - "step": 15866 - }, - { - "epoch": 1.2131429554446929, - "grad_norm": 0.012189347296953201, - "learning_rate": 0.00019999927465226533, - "loss": 46.0, - "step": 15867 - }, - { - "epoch": 1.2132194124280826, - "grad_norm": 0.005485172383487225, - "learning_rate": 0.00019999927456077642, - "loss": 46.0, - "step": 15868 - }, - { - "epoch": 1.2132958694114724, - "grad_norm": 0.0005193467368371785, - "learning_rate": 0.00019999927446928177, - "loss": 46.0, - "step": 15869 - }, - { - "epoch": 1.2133723263948621, - "grad_norm": 0.00326589890755713, - "learning_rate": 0.00019999927437778134, - "loss": 46.0, - "step": 15870 - }, - { - "epoch": 1.213448783378252, - "grad_norm": 0.0008399309008382261, - "learning_rate": 0.0001999992742862751, - "loss": 46.0, - "step": 15871 - }, - { - "epoch": 1.2135252403616414, - "grad_norm": 0.0007985203992575407, - "learning_rate": 0.00019999927419476316, - "loss": 46.0, - "step": 15872 - }, - { - "epoch": 1.2136016973450312, - "grad_norm": 0.0007578706135973334, - "learning_rate": 0.00019999927410324538, - "loss": 46.0, - "step": 15873 - }, - { - "epoch": 1.213678154328421, - "grad_norm": 0.00224702013656497, - "learning_rate": 0.0001999992740117219, - "loss": 46.0, - "step": 15874 - }, - { - "epoch": 1.2137546113118107, - "grad_norm": 0.0006704268744215369, - "learning_rate": 0.00019999927392019262, - "loss": 46.0, - "step": 15875 - }, - { - "epoch": 1.2138310682952005, - "grad_norm": 0.0015335469506680965, - "learning_rate": 0.00019999927382865755, - "loss": 46.0, - "step": 15876 - }, - { - "epoch": 1.2139075252785902, - "grad_norm": 0.0004962708917446434, - "learning_rate": 0.00019999927373711674, - "loss": 46.0, - "step": 15877 - }, - { - "epoch": 1.2139839822619798, - "grad_norm": 0.0012502699391916394, - "learning_rate": 0.00019999927364557015, - "loss": 46.0, - "step": 15878 - }, - { - "epoch": 1.2140604392453695, - "grad_norm": 0.0037831610534340143, - "learning_rate": 0.00019999927355401778, - "loss": 46.0, - "step": 15879 - }, - { - "epoch": 1.2141368962287593, - "grad_norm": 0.0030314449686557055, - "learning_rate": 0.00019999927346245965, - "loss": 46.0, - "step": 15880 - }, - { - "epoch": 1.214213353212149, - "grad_norm": 0.001109353732317686, - "learning_rate": 0.00019999927337089574, - "loss": 46.0, - "step": 15881 - }, - { - "epoch": 1.2142898101955386, - "grad_norm": 0.0020769829861819744, - "learning_rate": 0.00019999927327932608, - "loss": 46.0, - "step": 15882 - }, - { - "epoch": 1.2143662671789284, - "grad_norm": 0.0017611763905733824, - "learning_rate": 0.00019999927318775065, - "loss": 46.0, - "step": 15883 - }, - { - "epoch": 1.2144427241623181, - "grad_norm": 0.0014140050625428557, - "learning_rate": 0.00019999927309616942, - "loss": 46.0, - "step": 15884 - }, - { - "epoch": 1.214519181145708, - "grad_norm": 0.0008226445643231273, - "learning_rate": 0.00019999927300458245, - "loss": 46.0, - "step": 15885 - }, - { - "epoch": 1.2145956381290977, - "grad_norm": 0.0018748317379504442, - "learning_rate": 0.0001999992729129897, - "loss": 46.0, - "step": 15886 - }, - { - "epoch": 1.2146720951124874, - "grad_norm": 0.0012664722744375467, - "learning_rate": 0.00019999927282139118, - "loss": 46.0, - "step": 15887 - }, - { - "epoch": 1.2147485520958772, - "grad_norm": 0.0008940821280702949, - "learning_rate": 0.00019999927272978688, - "loss": 46.0, - "step": 15888 - }, - { - "epoch": 1.2148250090792667, - "grad_norm": 0.004960604943335056, - "learning_rate": 0.00019999927263817684, - "loss": 46.0, - "step": 15889 - }, - { - "epoch": 1.2149014660626565, - "grad_norm": 0.0009728364530019462, - "learning_rate": 0.00019999927254656102, - "loss": 46.0, - "step": 15890 - }, - { - "epoch": 1.2149779230460462, - "grad_norm": 0.004020273685455322, - "learning_rate": 0.0001999992724549394, - "loss": 46.0, - "step": 15891 - }, - { - "epoch": 1.215054380029436, - "grad_norm": 0.0008358367485925555, - "learning_rate": 0.00019999927236331207, - "loss": 46.0, - "step": 15892 - }, - { - "epoch": 1.2151308370128255, - "grad_norm": 0.0017120903357863426, - "learning_rate": 0.00019999927227167894, - "loss": 46.0, - "step": 15893 - }, - { - "epoch": 1.2152072939962153, - "grad_norm": 0.0012376890517771244, - "learning_rate": 0.00019999927218004003, - "loss": 46.0, - "step": 15894 - }, - { - "epoch": 1.215283750979605, - "grad_norm": 0.0014771816786378622, - "learning_rate": 0.00019999927208839535, - "loss": 46.0, - "step": 15895 - }, - { - "epoch": 1.2153602079629948, - "grad_norm": 0.0011830950388684869, - "learning_rate": 0.00019999927199674492, - "loss": 46.0, - "step": 15896 - }, - { - "epoch": 1.2154366649463846, - "grad_norm": 0.002167725469917059, - "learning_rate": 0.00019999927190508872, - "loss": 46.0, - "step": 15897 - }, - { - "epoch": 1.2155131219297743, - "grad_norm": 0.00478803226724267, - "learning_rate": 0.00019999927181342671, - "loss": 46.0, - "step": 15898 - }, - { - "epoch": 1.215589578913164, - "grad_norm": 0.0007764878682792187, - "learning_rate": 0.00019999927172175897, - "loss": 46.0, - "step": 15899 - }, - { - "epoch": 1.2156660358965536, - "grad_norm": 0.0007188431336544454, - "learning_rate": 0.00019999927163008544, - "loss": 46.0, - "step": 15900 - }, - { - "epoch": 1.2157424928799434, - "grad_norm": 0.0004835437284782529, - "learning_rate": 0.00019999927153840617, - "loss": 46.0, - "step": 15901 - }, - { - "epoch": 1.2158189498633332, - "grad_norm": 0.0016761115984991193, - "learning_rate": 0.0001999992714467211, - "loss": 46.0, - "step": 15902 - }, - { - "epoch": 1.215895406846723, - "grad_norm": 0.004625473637133837, - "learning_rate": 0.00019999927135503026, - "loss": 46.0, - "step": 15903 - }, - { - "epoch": 1.2159718638301125, - "grad_norm": 0.0007580881356261671, - "learning_rate": 0.00019999927126333368, - "loss": 46.0, - "step": 15904 - }, - { - "epoch": 1.2160483208135022, - "grad_norm": 0.0003252858587075025, - "learning_rate": 0.0001999992711716313, - "loss": 46.0, - "step": 15905 - }, - { - "epoch": 1.216124777796892, - "grad_norm": 0.0025529228150844574, - "learning_rate": 0.00019999927107992318, - "loss": 46.0, - "step": 15906 - }, - { - "epoch": 1.2162012347802817, - "grad_norm": 0.008447384461760521, - "learning_rate": 0.00019999927098820927, - "loss": 46.0, - "step": 15907 - }, - { - "epoch": 1.2162776917636715, - "grad_norm": 0.0009964253986254334, - "learning_rate": 0.0001999992708964896, - "loss": 46.0, - "step": 15908 - }, - { - "epoch": 1.2163541487470613, - "grad_norm": 0.001493568066507578, - "learning_rate": 0.00019999927080476416, - "loss": 46.0, - "step": 15909 - }, - { - "epoch": 1.2164306057304508, - "grad_norm": 0.0016631204634904861, - "learning_rate": 0.00019999927071303293, - "loss": 46.0, - "step": 15910 - }, - { - "epoch": 1.2165070627138406, - "grad_norm": 0.0005618016002699733, - "learning_rate": 0.00019999927062129596, - "loss": 46.0, - "step": 15911 - }, - { - "epoch": 1.2165835196972303, - "grad_norm": 0.0015689197462052107, - "learning_rate": 0.0001999992705295532, - "loss": 46.0, - "step": 15912 - }, - { - "epoch": 1.21665997668062, - "grad_norm": 0.0033365788403898478, - "learning_rate": 0.00019999927043780469, - "loss": 46.0, - "step": 15913 - }, - { - "epoch": 1.2167364336640099, - "grad_norm": 0.0007817473378963768, - "learning_rate": 0.0001999992703460504, - "loss": 46.0, - "step": 15914 - }, - { - "epoch": 1.2168128906473994, - "grad_norm": 0.005644321441650391, - "learning_rate": 0.00019999927025429032, - "loss": 46.0, - "step": 15915 - }, - { - "epoch": 1.2168893476307892, - "grad_norm": 0.001306253601796925, - "learning_rate": 0.00019999927016252448, - "loss": 46.0, - "step": 15916 - }, - { - "epoch": 1.216965804614179, - "grad_norm": 0.0012445839820429683, - "learning_rate": 0.0001999992700707529, - "loss": 46.0, - "step": 15917 - }, - { - "epoch": 1.2170422615975687, - "grad_norm": 0.0006187583203427494, - "learning_rate": 0.00019999926997897553, - "loss": 46.0, - "step": 15918 - }, - { - "epoch": 1.2171187185809584, - "grad_norm": 0.0010500020580366254, - "learning_rate": 0.0001999992698871924, - "loss": 46.0, - "step": 15919 - }, - { - "epoch": 1.2171951755643482, - "grad_norm": 0.0005063980934210122, - "learning_rate": 0.0001999992697954035, - "loss": 46.0, - "step": 15920 - }, - { - "epoch": 1.2172716325477377, - "grad_norm": 0.00218991213478148, - "learning_rate": 0.0001999992697036088, - "loss": 46.0, - "step": 15921 - }, - { - "epoch": 1.2173480895311275, - "grad_norm": 0.0005154482205398381, - "learning_rate": 0.00019999926961180835, - "loss": 46.0, - "step": 15922 - }, - { - "epoch": 1.2174245465145173, - "grad_norm": 0.0011830269359052181, - "learning_rate": 0.00019999926952000215, - "loss": 46.0, - "step": 15923 - }, - { - "epoch": 1.217501003497907, - "grad_norm": 0.0032510936725884676, - "learning_rate": 0.00019999926942819017, - "loss": 46.0, - "step": 15924 - }, - { - "epoch": 1.2175774604812968, - "grad_norm": 0.002212245250120759, - "learning_rate": 0.0001999992693363724, - "loss": 46.0, - "step": 15925 - }, - { - "epoch": 1.2176539174646863, - "grad_norm": 0.0006792893982492387, - "learning_rate": 0.00019999926924454888, - "loss": 46.0, - "step": 15926 - }, - { - "epoch": 1.217730374448076, - "grad_norm": 0.00031252449844032526, - "learning_rate": 0.00019999926915271958, - "loss": 46.0, - "step": 15927 - }, - { - "epoch": 1.2178068314314658, - "grad_norm": 0.0009779686806723475, - "learning_rate": 0.00019999926906088451, - "loss": 46.0, - "step": 15928 - }, - { - "epoch": 1.2178832884148556, - "grad_norm": 0.005132970865815878, - "learning_rate": 0.0001999992689690437, - "loss": 46.0, - "step": 15929 - }, - { - "epoch": 1.2179597453982454, - "grad_norm": 0.000386443076422438, - "learning_rate": 0.00019999926887719709, - "loss": 46.0, - "step": 15930 - }, - { - "epoch": 1.2180362023816351, - "grad_norm": 0.007529250346124172, - "learning_rate": 0.00019999926878534472, - "loss": 46.0, - "step": 15931 - }, - { - "epoch": 1.2181126593650247, - "grad_norm": 0.0010429989779368043, - "learning_rate": 0.00019999926869348656, - "loss": 46.0, - "step": 15932 - }, - { - "epoch": 1.2181891163484144, - "grad_norm": 0.0007154787890613079, - "learning_rate": 0.00019999926860162268, - "loss": 46.0, - "step": 15933 - }, - { - "epoch": 1.2182655733318042, - "grad_norm": 0.0008657712023705244, - "learning_rate": 0.000199999268509753, - "loss": 46.0, - "step": 15934 - }, - { - "epoch": 1.218342030315194, - "grad_norm": 0.0021054905373603106, - "learning_rate": 0.00019999926841787752, - "loss": 46.0, - "step": 15935 - }, - { - "epoch": 1.2184184872985837, - "grad_norm": 0.0012357636587694287, - "learning_rate": 0.00019999926832599632, - "loss": 46.0, - "step": 15936 - }, - { - "epoch": 1.2184949442819732, - "grad_norm": 0.007126253563910723, - "learning_rate": 0.00019999926823410932, - "loss": 46.0, - "step": 15937 - }, - { - "epoch": 1.218571401265363, - "grad_norm": 0.0004661525599658489, - "learning_rate": 0.00019999926814221657, - "loss": 46.0, - "step": 15938 - }, - { - "epoch": 1.2186478582487528, - "grad_norm": 0.0057141780853271484, - "learning_rate": 0.00019999926805031805, - "loss": 46.0, - "step": 15939 - }, - { - "epoch": 1.2187243152321425, - "grad_norm": 0.0018218666082248092, - "learning_rate": 0.00019999926795841378, - "loss": 46.0, - "step": 15940 - }, - { - "epoch": 1.2188007722155323, - "grad_norm": 0.0008860841626301408, - "learning_rate": 0.00019999926786650368, - "loss": 46.0, - "step": 15941 - }, - { - "epoch": 1.218877229198922, - "grad_norm": 0.0005197604186832905, - "learning_rate": 0.00019999926777458787, - "loss": 46.0, - "step": 15942 - }, - { - "epoch": 1.2189536861823116, - "grad_norm": 0.0021949796937406063, - "learning_rate": 0.00019999926768266626, - "loss": 46.0, - "step": 15943 - }, - { - "epoch": 1.2190301431657014, - "grad_norm": 0.005042894743382931, - "learning_rate": 0.0001999992675907389, - "loss": 46.0, - "step": 15944 - }, - { - "epoch": 1.2191066001490911, - "grad_norm": 0.0009545614011585712, - "learning_rate": 0.00019999926749880573, - "loss": 46.0, - "step": 15945 - }, - { - "epoch": 1.2191830571324809, - "grad_norm": 0.0034013711847364902, - "learning_rate": 0.00019999926740686683, - "loss": 46.0, - "step": 15946 - }, - { - "epoch": 1.2192595141158706, - "grad_norm": 0.0008844583644531667, - "learning_rate": 0.00019999926731492214, - "loss": 46.0, - "step": 15947 - }, - { - "epoch": 1.2193359710992602, - "grad_norm": 0.010152323171496391, - "learning_rate": 0.0001999992672229717, - "loss": 46.0, - "step": 15948 - }, - { - "epoch": 1.21941242808265, - "grad_norm": 0.0011236346326768398, - "learning_rate": 0.00019999926713101546, - "loss": 46.0, - "step": 15949 - }, - { - "epoch": 1.2194888850660397, - "grad_norm": 0.001233525574207306, - "learning_rate": 0.0001999992670390535, - "loss": 46.0, - "step": 15950 - }, - { - "epoch": 1.2195653420494295, - "grad_norm": 0.0005170265794731677, - "learning_rate": 0.00019999926694708574, - "loss": 46.0, - "step": 15951 - }, - { - "epoch": 1.2196417990328192, - "grad_norm": 0.0017057224176824093, - "learning_rate": 0.0001999992668551122, - "loss": 46.0, - "step": 15952 - }, - { - "epoch": 1.219718256016209, - "grad_norm": 0.001143014756962657, - "learning_rate": 0.0001999992667631329, - "loss": 46.0, - "step": 15953 - }, - { - "epoch": 1.2197947129995985, - "grad_norm": 0.0004931085277348757, - "learning_rate": 0.00019999926667114783, - "loss": 46.0, - "step": 15954 - }, - { - "epoch": 1.2198711699829883, - "grad_norm": 0.003175918245688081, - "learning_rate": 0.00019999926657915696, - "loss": 46.0, - "step": 15955 - }, - { - "epoch": 1.219947626966378, - "grad_norm": 0.0005682227783836424, - "learning_rate": 0.00019999926648716038, - "loss": 46.0, - "step": 15956 - }, - { - "epoch": 1.2200240839497678, - "grad_norm": 0.0011381481308490038, - "learning_rate": 0.00019999926639515802, - "loss": 46.0, - "step": 15957 - }, - { - "epoch": 1.2201005409331576, - "grad_norm": 0.0007836158038116992, - "learning_rate": 0.00019999926630314985, - "loss": 46.0, - "step": 15958 - }, - { - "epoch": 1.220176997916547, - "grad_norm": 0.006223226897418499, - "learning_rate": 0.00019999926621113595, - "loss": 46.0, - "step": 15959 - }, - { - "epoch": 1.2202534548999369, - "grad_norm": 0.0034408390056341887, - "learning_rate": 0.00019999926611911627, - "loss": 46.0, - "step": 15960 - }, - { - "epoch": 1.2203299118833266, - "grad_norm": 0.0010396104771643877, - "learning_rate": 0.0001999992660270908, - "loss": 46.0, - "step": 15961 - }, - { - "epoch": 1.2204063688667164, - "grad_norm": 0.0015676913317292929, - "learning_rate": 0.00019999926593505958, - "loss": 46.0, - "step": 15962 - }, - { - "epoch": 1.2204828258501061, - "grad_norm": 0.0023836365435272455, - "learning_rate": 0.0001999992658430226, - "loss": 46.0, - "step": 15963 - }, - { - "epoch": 1.220559282833496, - "grad_norm": 0.0006649569259025156, - "learning_rate": 0.00019999926575097984, - "loss": 46.0, - "step": 15964 - }, - { - "epoch": 1.2206357398168854, - "grad_norm": 0.0008707884699106216, - "learning_rate": 0.0001999992656589313, - "loss": 46.0, - "step": 15965 - }, - { - "epoch": 1.2207121968002752, - "grad_norm": 0.0004146408464293927, - "learning_rate": 0.000199999265566877, - "loss": 46.0, - "step": 15966 - }, - { - "epoch": 1.220788653783665, - "grad_norm": 0.0031799226999282837, - "learning_rate": 0.00019999926547481693, - "loss": 46.0, - "step": 15967 - }, - { - "epoch": 1.2208651107670547, - "grad_norm": 0.0059667713940143585, - "learning_rate": 0.0001999992653827511, - "loss": 46.0, - "step": 15968 - }, - { - "epoch": 1.2209415677504445, - "grad_norm": 0.0009155656443908811, - "learning_rate": 0.00019999926529067947, - "loss": 46.0, - "step": 15969 - }, - { - "epoch": 1.221018024733834, - "grad_norm": 0.004582240246236324, - "learning_rate": 0.0001999992651986021, - "loss": 46.0, - "step": 15970 - }, - { - "epoch": 1.2210944817172238, - "grad_norm": 0.004969425965100527, - "learning_rate": 0.00019999926510651895, - "loss": 46.0, - "step": 15971 - }, - { - "epoch": 1.2211709387006136, - "grad_norm": 0.0029141867998987436, - "learning_rate": 0.00019999926501443005, - "loss": 46.0, - "step": 15972 - }, - { - "epoch": 1.2212473956840033, - "grad_norm": 0.0014684441266581416, - "learning_rate": 0.00019999926492233536, - "loss": 46.0, - "step": 15973 - }, - { - "epoch": 1.221323852667393, - "grad_norm": 0.0008456144132651389, - "learning_rate": 0.0001999992648302349, - "loss": 46.0, - "step": 15974 - }, - { - "epoch": 1.2214003096507828, - "grad_norm": 0.02805967815220356, - "learning_rate": 0.00019999926473812866, - "loss": 46.0, - "step": 15975 - }, - { - "epoch": 1.2214767666341724, - "grad_norm": 0.001692122663371265, - "learning_rate": 0.00019999926464601668, - "loss": 46.0, - "step": 15976 - }, - { - "epoch": 1.2215532236175621, - "grad_norm": 0.004000423941761255, - "learning_rate": 0.00019999926455389894, - "loss": 46.0, - "step": 15977 - }, - { - "epoch": 1.221629680600952, - "grad_norm": 0.0032135627698153257, - "learning_rate": 0.00019999926446177536, - "loss": 46.0, - "step": 15978 - }, - { - "epoch": 1.2217061375843417, - "grad_norm": 0.0023271837271749973, - "learning_rate": 0.0001999992643696461, - "loss": 46.0, - "step": 15979 - }, - { - "epoch": 1.2217825945677314, - "grad_norm": 0.0017594281816855073, - "learning_rate": 0.000199999264277511, - "loss": 46.0, - "step": 15980 - }, - { - "epoch": 1.221859051551121, - "grad_norm": 0.002090575871989131, - "learning_rate": 0.00019999926418537016, - "loss": 46.0, - "step": 15981 - }, - { - "epoch": 1.2219355085345107, - "grad_norm": 0.0010836290894076228, - "learning_rate": 0.00019999926409322355, - "loss": 46.0, - "step": 15982 - }, - { - "epoch": 1.2220119655179005, - "grad_norm": 0.0003954892745241523, - "learning_rate": 0.0001999992640010712, - "loss": 46.0, - "step": 15983 - }, - { - "epoch": 1.2220884225012902, - "grad_norm": 0.0006336772930808365, - "learning_rate": 0.00019999926390891303, - "loss": 46.0, - "step": 15984 - }, - { - "epoch": 1.22216487948468, - "grad_norm": 0.0014205609913915396, - "learning_rate": 0.00019999926381674912, - "loss": 46.0, - "step": 15985 - }, - { - "epoch": 1.2222413364680698, - "grad_norm": 0.001966850133612752, - "learning_rate": 0.0001999992637245794, - "loss": 46.0, - "step": 15986 - }, - { - "epoch": 1.2223177934514593, - "grad_norm": 0.0007035090238787234, - "learning_rate": 0.00019999926363240396, - "loss": 46.0, - "step": 15987 - }, - { - "epoch": 1.222394250434849, - "grad_norm": 0.0005249667447060347, - "learning_rate": 0.0001999992635402227, - "loss": 46.0, - "step": 15988 - }, - { - "epoch": 1.2224707074182388, - "grad_norm": 0.0007026847451925278, - "learning_rate": 0.00019999926344803573, - "loss": 46.0, - "step": 15989 - }, - { - "epoch": 1.2225471644016286, - "grad_norm": 0.003269130364060402, - "learning_rate": 0.00019999926335584296, - "loss": 46.0, - "step": 15990 - }, - { - "epoch": 1.2226236213850183, - "grad_norm": 0.008514025248587132, - "learning_rate": 0.00019999926326364444, - "loss": 46.0, - "step": 15991 - }, - { - "epoch": 1.2227000783684079, - "grad_norm": 0.0012790159089490771, - "learning_rate": 0.00019999926317144012, - "loss": 46.0, - "step": 15992 - }, - { - "epoch": 1.2227765353517976, - "grad_norm": 0.0014809739077463746, - "learning_rate": 0.00019999926307923005, - "loss": 46.0, - "step": 15993 - }, - { - "epoch": 1.2228529923351874, - "grad_norm": 0.0012516764691099524, - "learning_rate": 0.0001999992629870142, - "loss": 46.0, - "step": 15994 - }, - { - "epoch": 1.2229294493185772, - "grad_norm": 0.003328055376186967, - "learning_rate": 0.0001999992628947926, - "loss": 46.0, - "step": 15995 - }, - { - "epoch": 1.223005906301967, - "grad_norm": 0.0009070641244761646, - "learning_rate": 0.0001999992628025652, - "loss": 46.0, - "step": 15996 - }, - { - "epoch": 1.2230823632853567, - "grad_norm": 0.0011334576411172748, - "learning_rate": 0.00019999926271033208, - "loss": 46.0, - "step": 15997 - }, - { - "epoch": 1.2231588202687462, - "grad_norm": 0.0008738017058931291, - "learning_rate": 0.00019999926261809315, - "loss": 46.0, - "step": 15998 - }, - { - "epoch": 1.223235277252136, - "grad_norm": 0.0007524918182753026, - "learning_rate": 0.00019999926252584844, - "loss": 46.0, - "step": 15999 - }, - { - "epoch": 1.2233117342355257, - "grad_norm": 0.0015665973769500852, - "learning_rate": 0.00019999926243359796, - "loss": 46.0, - "step": 16000 - }, - { - "epoch": 1.2233881912189155, - "grad_norm": 0.0012447729241102934, - "learning_rate": 0.00019999926234134176, - "loss": 46.0, - "step": 16001 - }, - { - "epoch": 1.2234646482023053, - "grad_norm": 0.0008318267646245658, - "learning_rate": 0.00019999926224907973, - "loss": 46.0, - "step": 16002 - }, - { - "epoch": 1.2235411051856948, - "grad_norm": 0.00046407562331296504, - "learning_rate": 0.00019999926215681199, - "loss": 46.0, - "step": 16003 - }, - { - "epoch": 1.2236175621690846, - "grad_norm": 0.00404116977006197, - "learning_rate": 0.00019999926206453844, - "loss": 46.0, - "step": 16004 - }, - { - "epoch": 1.2236940191524743, - "grad_norm": 0.0013177929213270545, - "learning_rate": 0.00019999926197225915, - "loss": 46.0, - "step": 16005 - }, - { - "epoch": 1.223770476135864, - "grad_norm": 0.0012153750285506248, - "learning_rate": 0.00019999926187997405, - "loss": 46.0, - "step": 16006 - }, - { - "epoch": 1.2238469331192539, - "grad_norm": 0.004689784254878759, - "learning_rate": 0.0001999992617876832, - "loss": 46.0, - "step": 16007 - }, - { - "epoch": 1.2239233901026436, - "grad_norm": 0.0010637850500643253, - "learning_rate": 0.0001999992616953866, - "loss": 46.0, - "step": 16008 - }, - { - "epoch": 1.2239998470860332, - "grad_norm": 0.0007532556192018092, - "learning_rate": 0.00019999926160308421, - "loss": 46.0, - "step": 16009 - }, - { - "epoch": 1.224076304069423, - "grad_norm": 0.001208620727993548, - "learning_rate": 0.00019999926151077605, - "loss": 46.0, - "step": 16010 - }, - { - "epoch": 1.2241527610528127, - "grad_norm": 0.0009078102302737534, - "learning_rate": 0.00019999926141846212, - "loss": 46.0, - "step": 16011 - }, - { - "epoch": 1.2242292180362024, - "grad_norm": 0.0014089127071201801, - "learning_rate": 0.00019999926132614244, - "loss": 46.0, - "step": 16012 - }, - { - "epoch": 1.2243056750195922, - "grad_norm": 0.0007508535054512322, - "learning_rate": 0.000199999261233817, - "loss": 46.0, - "step": 16013 - }, - { - "epoch": 1.2243821320029817, - "grad_norm": 0.0008192521636374295, - "learning_rate": 0.00019999926114148576, - "loss": 46.0, - "step": 16014 - }, - { - "epoch": 1.2244585889863715, - "grad_norm": 0.0005333675653673708, - "learning_rate": 0.00019999926104914874, - "loss": 46.0, - "step": 16015 - }, - { - "epoch": 1.2245350459697613, - "grad_norm": 0.0017077181255444884, - "learning_rate": 0.000199999260956806, - "loss": 46.0, - "step": 16016 - }, - { - "epoch": 1.224611502953151, - "grad_norm": 0.0009263677056878805, - "learning_rate": 0.00019999926086445742, - "loss": 46.0, - "step": 16017 - }, - { - "epoch": 1.2246879599365408, - "grad_norm": 0.0008773449226282537, - "learning_rate": 0.00019999926077210312, - "loss": 46.0, - "step": 16018 - }, - { - "epoch": 1.2247644169199305, - "grad_norm": 0.0018951385281980038, - "learning_rate": 0.00019999926067974303, - "loss": 46.0, - "step": 16019 - }, - { - "epoch": 1.22484087390332, - "grad_norm": 0.0007082127267494798, - "learning_rate": 0.0001999992605873772, - "loss": 46.0, - "step": 16020 - }, - { - "epoch": 1.2249173308867098, - "grad_norm": 0.0006573075661435723, - "learning_rate": 0.00019999926049500558, - "loss": 46.0, - "step": 16021 - }, - { - "epoch": 1.2249937878700996, - "grad_norm": 0.001450154697522521, - "learning_rate": 0.00019999926040262817, - "loss": 46.0, - "step": 16022 - }, - { - "epoch": 1.2250702448534894, - "grad_norm": 0.0005226099165156484, - "learning_rate": 0.00019999926031024503, - "loss": 46.0, - "step": 16023 - }, - { - "epoch": 1.225146701836879, - "grad_norm": 0.0005636019050143659, - "learning_rate": 0.0001999992602178561, - "loss": 46.0, - "step": 16024 - }, - { - "epoch": 1.2252231588202687, - "grad_norm": 0.0010586412390694022, - "learning_rate": 0.0001999992601254614, - "loss": 46.0, - "step": 16025 - }, - { - "epoch": 1.2252996158036584, - "grad_norm": 0.000290626980131492, - "learning_rate": 0.00019999926003306094, - "loss": 46.0, - "step": 16026 - }, - { - "epoch": 1.2253760727870482, - "grad_norm": 0.0012102006003260612, - "learning_rate": 0.0001999992599406547, - "loss": 46.0, - "step": 16027 - }, - { - "epoch": 1.225452529770438, - "grad_norm": 0.0013221726985648274, - "learning_rate": 0.00019999925984824272, - "loss": 46.0, - "step": 16028 - }, - { - "epoch": 1.2255289867538277, - "grad_norm": 0.0005828901776112616, - "learning_rate": 0.00019999925975582494, - "loss": 46.0, - "step": 16029 - }, - { - "epoch": 1.2256054437372175, - "grad_norm": 0.0010008827084675431, - "learning_rate": 0.00019999925966340137, - "loss": 46.0, - "step": 16030 - }, - { - "epoch": 1.225681900720607, - "grad_norm": 0.0033580453600734472, - "learning_rate": 0.00019999925957097208, - "loss": 46.0, - "step": 16031 - }, - { - "epoch": 1.2257583577039968, - "grad_norm": 0.002998590236529708, - "learning_rate": 0.00019999925947853699, - "loss": 46.0, - "step": 16032 - }, - { - "epoch": 1.2258348146873865, - "grad_norm": 0.022819610312581062, - "learning_rate": 0.00019999925938609615, - "loss": 46.0, - "step": 16033 - }, - { - "epoch": 1.2259112716707763, - "grad_norm": 0.002703476930037141, - "learning_rate": 0.00019999925929364953, - "loss": 46.0, - "step": 16034 - }, - { - "epoch": 1.2259877286541658, - "grad_norm": 0.006659628823399544, - "learning_rate": 0.00019999925920119712, - "loss": 46.0, - "step": 16035 - }, - { - "epoch": 1.2260641856375556, - "grad_norm": 0.0012546078069135547, - "learning_rate": 0.000199999259108739, - "loss": 46.0, - "step": 16036 - }, - { - "epoch": 1.2261406426209454, - "grad_norm": 0.0009955652058124542, - "learning_rate": 0.00019999925901627506, - "loss": 46.0, - "step": 16037 - }, - { - "epoch": 1.2262170996043351, - "grad_norm": 0.0034153307788074017, - "learning_rate": 0.00019999925892380532, - "loss": 46.0, - "step": 16038 - }, - { - "epoch": 1.2262935565877249, - "grad_norm": 0.002630155300721526, - "learning_rate": 0.00019999925883132987, - "loss": 46.0, - "step": 16039 - }, - { - "epoch": 1.2263700135711146, - "grad_norm": 0.0012720349477604032, - "learning_rate": 0.00019999925873884865, - "loss": 46.0, - "step": 16040 - }, - { - "epoch": 1.2264464705545042, - "grad_norm": 0.000962450576480478, - "learning_rate": 0.00019999925864636162, - "loss": 46.0, - "step": 16041 - }, - { - "epoch": 1.226522927537894, - "grad_norm": 0.0006845833268016577, - "learning_rate": 0.00019999925855386885, - "loss": 46.0, - "step": 16042 - }, - { - "epoch": 1.2265993845212837, - "grad_norm": 0.0006671029841527343, - "learning_rate": 0.0001999992584613703, - "loss": 46.0, - "step": 16043 - }, - { - "epoch": 1.2266758415046735, - "grad_norm": 0.001622217707335949, - "learning_rate": 0.00019999925836886598, - "loss": 46.0, - "step": 16044 - }, - { - "epoch": 1.2267522984880632, - "grad_norm": 0.0038421335630118847, - "learning_rate": 0.00019999925827635592, - "loss": 46.0, - "step": 16045 - }, - { - "epoch": 1.2268287554714528, - "grad_norm": 0.000969208253081888, - "learning_rate": 0.00019999925818384008, - "loss": 46.0, - "step": 16046 - }, - { - "epoch": 1.2269052124548425, - "grad_norm": 0.0009379900875501335, - "learning_rate": 0.00019999925809131847, - "loss": 46.0, - "step": 16047 - }, - { - "epoch": 1.2269816694382323, - "grad_norm": 0.002720875898376107, - "learning_rate": 0.00019999925799879105, - "loss": 46.0, - "step": 16048 - }, - { - "epoch": 1.227058126421622, - "grad_norm": 0.0004442320205271244, - "learning_rate": 0.0001999992579062579, - "loss": 46.0, - "step": 16049 - }, - { - "epoch": 1.2271345834050118, - "grad_norm": 0.0006628509145230055, - "learning_rate": 0.00019999925781371896, - "loss": 46.0, - "step": 16050 - }, - { - "epoch": 1.2272110403884016, - "grad_norm": 0.0015220519853755832, - "learning_rate": 0.00019999925772117426, - "loss": 46.0, - "step": 16051 - }, - { - "epoch": 1.227287497371791, - "grad_norm": 0.0014778684126213193, - "learning_rate": 0.00019999925762862378, - "loss": 46.0, - "step": 16052 - }, - { - "epoch": 1.2273639543551809, - "grad_norm": 0.000373422255506739, - "learning_rate": 0.00019999925753606755, - "loss": 46.0, - "step": 16053 - }, - { - "epoch": 1.2274404113385706, - "grad_norm": 0.0007266036118380725, - "learning_rate": 0.00019999925744350553, - "loss": 46.0, - "step": 16054 - }, - { - "epoch": 1.2275168683219604, - "grad_norm": 0.0012402933789417148, - "learning_rate": 0.00019999925735093775, - "loss": 46.0, - "step": 16055 - }, - { - "epoch": 1.2275933253053501, - "grad_norm": 0.0009391485946252942, - "learning_rate": 0.0001999992572583642, - "loss": 46.0, - "step": 16056 - }, - { - "epoch": 1.2276697822887397, - "grad_norm": 0.000821779016405344, - "learning_rate": 0.0001999992571657849, - "loss": 46.0, - "step": 16057 - }, - { - "epoch": 1.2277462392721294, - "grad_norm": 0.021309809759259224, - "learning_rate": 0.0001999992570731998, - "loss": 46.0, - "step": 16058 - }, - { - "epoch": 1.2278226962555192, - "grad_norm": 0.001841805293224752, - "learning_rate": 0.00019999925698060896, - "loss": 46.0, - "step": 16059 - }, - { - "epoch": 1.227899153238909, - "grad_norm": 0.0003897084097843617, - "learning_rate": 0.00019999925688801232, - "loss": 46.0, - "step": 16060 - }, - { - "epoch": 1.2279756102222987, - "grad_norm": 0.0020316436421126127, - "learning_rate": 0.0001999992567954099, - "loss": 46.0, - "step": 16061 - }, - { - "epoch": 1.2280520672056885, - "grad_norm": 0.0007745440234430134, - "learning_rate": 0.00019999925670280178, - "loss": 46.0, - "step": 16062 - }, - { - "epoch": 1.228128524189078, - "grad_norm": 0.0015158121241256595, - "learning_rate": 0.00019999925661018785, - "loss": 46.0, - "step": 16063 - }, - { - "epoch": 1.2282049811724678, - "grad_norm": 0.0005278295138850808, - "learning_rate": 0.0001999992565175681, - "loss": 46.0, - "step": 16064 - }, - { - "epoch": 1.2282814381558576, - "grad_norm": 0.0008000683737918735, - "learning_rate": 0.00019999925642494266, - "loss": 46.0, - "step": 16065 - }, - { - "epoch": 1.2283578951392473, - "grad_norm": 0.0019807324279099703, - "learning_rate": 0.00019999925633231144, - "loss": 46.0, - "step": 16066 - }, - { - "epoch": 1.228434352122637, - "grad_norm": 0.0012882484588772058, - "learning_rate": 0.0001999992562396744, - "loss": 46.0, - "step": 16067 - }, - { - "epoch": 1.2285108091060266, - "grad_norm": 0.0006577378953807056, - "learning_rate": 0.00019999925614703164, - "loss": 46.0, - "step": 16068 - }, - { - "epoch": 1.2285872660894164, - "grad_norm": 0.0014593867817893624, - "learning_rate": 0.00019999925605438307, - "loss": 46.0, - "step": 16069 - }, - { - "epoch": 1.2286637230728061, - "grad_norm": 0.002570970682427287, - "learning_rate": 0.00019999925596172875, - "loss": 46.0, - "step": 16070 - }, - { - "epoch": 1.228740180056196, - "grad_norm": 0.0006125358049757779, - "learning_rate": 0.00019999925586906866, - "loss": 46.0, - "step": 16071 - }, - { - "epoch": 1.2288166370395857, - "grad_norm": 0.0014641006710007787, - "learning_rate": 0.00019999925577640282, - "loss": 46.0, - "step": 16072 - }, - { - "epoch": 1.2288930940229754, - "grad_norm": 0.0004070083377882838, - "learning_rate": 0.00019999925568373118, - "loss": 46.0, - "step": 16073 - }, - { - "epoch": 1.228969551006365, - "grad_norm": 0.0017690513050183654, - "learning_rate": 0.0001999992555910538, - "loss": 46.0, - "step": 16074 - }, - { - "epoch": 1.2290460079897547, - "grad_norm": 0.0005106092430651188, - "learning_rate": 0.0001999992554983706, - "loss": 46.0, - "step": 16075 - }, - { - "epoch": 1.2291224649731445, - "grad_norm": 0.0007438038592226803, - "learning_rate": 0.00019999925540568168, - "loss": 46.0, - "step": 16076 - }, - { - "epoch": 1.2291989219565342, - "grad_norm": 0.0016169827431440353, - "learning_rate": 0.00019999925531298697, - "loss": 46.0, - "step": 16077 - }, - { - "epoch": 1.229275378939924, - "grad_norm": 0.003142925212159753, - "learning_rate": 0.0001999992552202865, - "loss": 46.0, - "step": 16078 - }, - { - "epoch": 1.2293518359233135, - "grad_norm": 0.005689569283276796, - "learning_rate": 0.00019999925512758027, - "loss": 46.0, - "step": 16079 - }, - { - "epoch": 1.2294282929067033, - "grad_norm": 0.0010265561286360025, - "learning_rate": 0.00019999925503486824, - "loss": 46.0, - "step": 16080 - }, - { - "epoch": 1.229504749890093, - "grad_norm": 0.0004349384398665279, - "learning_rate": 0.00019999925494215047, - "loss": 46.0, - "step": 16081 - }, - { - "epoch": 1.2295812068734828, - "grad_norm": 0.0007582813268527389, - "learning_rate": 0.0001999992548494269, - "loss": 46.0, - "step": 16082 - }, - { - "epoch": 1.2296576638568726, - "grad_norm": 0.010492806322872639, - "learning_rate": 0.00019999925475669758, - "loss": 46.0, - "step": 16083 - }, - { - "epoch": 1.2297341208402623, - "grad_norm": 0.0022323697339743376, - "learning_rate": 0.0001999992546639625, - "loss": 46.0, - "step": 16084 - }, - { - "epoch": 1.2298105778236519, - "grad_norm": 0.004740293137729168, - "learning_rate": 0.00019999925457122163, - "loss": 46.0, - "step": 16085 - }, - { - "epoch": 1.2298870348070416, - "grad_norm": 0.004808230325579643, - "learning_rate": 0.00019999925447847502, - "loss": 46.0, - "step": 16086 - }, - { - "epoch": 1.2299634917904314, - "grad_norm": 0.0005458660889416933, - "learning_rate": 0.00019999925438572263, - "loss": 46.0, - "step": 16087 - }, - { - "epoch": 1.2300399487738212, - "grad_norm": 0.0008397402707487345, - "learning_rate": 0.00019999925429296442, - "loss": 46.0, - "step": 16088 - }, - { - "epoch": 1.230116405757211, - "grad_norm": 0.000881789019331336, - "learning_rate": 0.0001999992542002005, - "loss": 46.0, - "step": 16089 - }, - { - "epoch": 1.2301928627406005, - "grad_norm": 0.0006640120409429073, - "learning_rate": 0.00019999925410743078, - "loss": 46.0, - "step": 16090 - }, - { - "epoch": 1.2302693197239902, - "grad_norm": 0.0007411792175844312, - "learning_rate": 0.0001999992540146553, - "loss": 46.0, - "step": 16091 - }, - { - "epoch": 1.23034577670738, - "grad_norm": 0.0011047892039641738, - "learning_rate": 0.00019999925392187405, - "loss": 46.0, - "step": 16092 - }, - { - "epoch": 1.2304222336907698, - "grad_norm": 0.00347514683380723, - "learning_rate": 0.00019999925382908703, - "loss": 46.0, - "step": 16093 - }, - { - "epoch": 1.2304986906741595, - "grad_norm": 0.0007427798700518906, - "learning_rate": 0.00019999925373629426, - "loss": 46.0, - "step": 16094 - }, - { - "epoch": 1.2305751476575493, - "grad_norm": 0.0010663170833140612, - "learning_rate": 0.0001999992536434957, - "loss": 46.0, - "step": 16095 - }, - { - "epoch": 1.2306516046409388, - "grad_norm": 0.0009400284616276622, - "learning_rate": 0.00019999925355069137, - "loss": 46.0, - "step": 16096 - }, - { - "epoch": 1.2307280616243286, - "grad_norm": 0.0030948964413255453, - "learning_rate": 0.0001999992534578813, - "loss": 46.0, - "step": 16097 - }, - { - "epoch": 1.2308045186077183, - "grad_norm": 0.0014139021513983607, - "learning_rate": 0.0001999992533650654, - "loss": 46.0, - "step": 16098 - }, - { - "epoch": 1.230880975591108, - "grad_norm": 0.0008110702619887888, - "learning_rate": 0.0001999992532722438, - "loss": 46.0, - "step": 16099 - }, - { - "epoch": 1.2309574325744979, - "grad_norm": 0.0005633847904391587, - "learning_rate": 0.00019999925317941642, - "loss": 46.0, - "step": 16100 - }, - { - "epoch": 1.2310338895578874, - "grad_norm": 0.0006365786539390683, - "learning_rate": 0.00019999925308658323, - "loss": 46.0, - "step": 16101 - }, - { - "epoch": 1.2311103465412772, - "grad_norm": 0.0013632565969601274, - "learning_rate": 0.0001999992529937443, - "loss": 46.0, - "step": 16102 - }, - { - "epoch": 1.231186803524667, - "grad_norm": 0.0016504349187016487, - "learning_rate": 0.00019999925290089957, - "loss": 46.0, - "step": 16103 - }, - { - "epoch": 1.2312632605080567, - "grad_norm": 0.0015550582902505994, - "learning_rate": 0.0001999992528080491, - "loss": 46.0, - "step": 16104 - }, - { - "epoch": 1.2313397174914464, - "grad_norm": 0.0005089414771646261, - "learning_rate": 0.00019999925271519287, - "loss": 46.0, - "step": 16105 - }, - { - "epoch": 1.2314161744748362, - "grad_norm": 0.0006099442252889276, - "learning_rate": 0.00019999925262233084, - "loss": 46.0, - "step": 16106 - }, - { - "epoch": 1.2314926314582257, - "grad_norm": 0.001864170772023499, - "learning_rate": 0.00019999925252946302, - "loss": 46.0, - "step": 16107 - }, - { - "epoch": 1.2315690884416155, - "grad_norm": 0.0010190855246037245, - "learning_rate": 0.0001999992524365895, - "loss": 46.0, - "step": 16108 - }, - { - "epoch": 1.2316455454250053, - "grad_norm": 0.0019764506723731756, - "learning_rate": 0.00019999925234371016, - "loss": 46.0, - "step": 16109 - }, - { - "epoch": 1.231722002408395, - "grad_norm": 0.004341428633779287, - "learning_rate": 0.00019999925225082507, - "loss": 46.0, - "step": 16110 - }, - { - "epoch": 1.2317984593917848, - "grad_norm": 0.003148481948301196, - "learning_rate": 0.0001999992521579342, - "loss": 46.0, - "step": 16111 - }, - { - "epoch": 1.2318749163751743, - "grad_norm": 0.003891546744853258, - "learning_rate": 0.00019999925206503756, - "loss": 46.0, - "step": 16112 - }, - { - "epoch": 1.231951373358564, - "grad_norm": 0.01181508507579565, - "learning_rate": 0.00019999925197213515, - "loss": 46.0, - "step": 16113 - }, - { - "epoch": 1.2320278303419538, - "grad_norm": 0.002067372901365161, - "learning_rate": 0.000199999251879227, - "loss": 46.0, - "step": 16114 - }, - { - "epoch": 1.2321042873253436, - "grad_norm": 0.0033666668459773064, - "learning_rate": 0.00019999925178631304, - "loss": 46.0, - "step": 16115 - }, - { - "epoch": 1.2321807443087334, - "grad_norm": 0.005725543480366468, - "learning_rate": 0.00019999925169339333, - "loss": 46.0, - "step": 16116 - }, - { - "epoch": 1.2322572012921231, - "grad_norm": 0.0016691512428224087, - "learning_rate": 0.00019999925160046783, - "loss": 46.0, - "step": 16117 - }, - { - "epoch": 1.2323336582755127, - "grad_norm": 0.001649119658395648, - "learning_rate": 0.0001999992515075366, - "loss": 46.0, - "step": 16118 - }, - { - "epoch": 1.2324101152589024, - "grad_norm": 0.006222618278115988, - "learning_rate": 0.00019999925141459958, - "loss": 46.0, - "step": 16119 - }, - { - "epoch": 1.2324865722422922, - "grad_norm": 0.0028519434854388237, - "learning_rate": 0.00019999925132165678, - "loss": 46.0, - "step": 16120 - }, - { - "epoch": 1.232563029225682, - "grad_norm": 0.0006758978706784546, - "learning_rate": 0.00019999925122870824, - "loss": 46.0, - "step": 16121 - }, - { - "epoch": 1.2326394862090717, - "grad_norm": 0.00047451394493691623, - "learning_rate": 0.0001999992511357539, - "loss": 46.0, - "step": 16122 - }, - { - "epoch": 1.2327159431924612, - "grad_norm": 0.0030632740817964077, - "learning_rate": 0.0001999992510427938, - "loss": 46.0, - "step": 16123 - }, - { - "epoch": 1.232792400175851, - "grad_norm": 0.0018083520699292421, - "learning_rate": 0.00019999925094982794, - "loss": 46.0, - "step": 16124 - }, - { - "epoch": 1.2328688571592408, - "grad_norm": 0.003031206550076604, - "learning_rate": 0.0001999992508568563, - "loss": 46.0, - "step": 16125 - }, - { - "epoch": 1.2329453141426305, - "grad_norm": 0.0008447115542367101, - "learning_rate": 0.0001999992507638789, - "loss": 46.0, - "step": 16126 - }, - { - "epoch": 1.2330217711260203, - "grad_norm": 0.0005253884010016918, - "learning_rate": 0.0001999992506708957, - "loss": 46.0, - "step": 16127 - }, - { - "epoch": 1.23309822810941, - "grad_norm": 0.00044152833288535476, - "learning_rate": 0.00019999925057790678, - "loss": 46.0, - "step": 16128 - }, - { - "epoch": 1.2331746850927996, - "grad_norm": 0.0015555663267150521, - "learning_rate": 0.00019999925048491205, - "loss": 46.0, - "step": 16129 - }, - { - "epoch": 1.2332511420761894, - "grad_norm": 0.0009968159720301628, - "learning_rate": 0.00019999925039191157, - "loss": 46.0, - "step": 16130 - }, - { - "epoch": 1.2333275990595791, - "grad_norm": 0.0025702675338834524, - "learning_rate": 0.00019999925029890532, - "loss": 46.0, - "step": 16131 - }, - { - "epoch": 1.2334040560429689, - "grad_norm": 0.001372497878037393, - "learning_rate": 0.0001999992502058933, - "loss": 46.0, - "step": 16132 - }, - { - "epoch": 1.2334805130263586, - "grad_norm": 0.002881584921851754, - "learning_rate": 0.0001999992501128755, - "loss": 46.0, - "step": 16133 - }, - { - "epoch": 1.2335569700097482, - "grad_norm": 0.0025717185344547033, - "learning_rate": 0.00019999925001985193, - "loss": 46.0, - "step": 16134 - }, - { - "epoch": 1.233633426993138, - "grad_norm": 0.00585021311417222, - "learning_rate": 0.0001999992499268226, - "loss": 46.0, - "step": 16135 - }, - { - "epoch": 1.2337098839765277, - "grad_norm": 0.002392655238509178, - "learning_rate": 0.0001999992498337875, - "loss": 46.0, - "step": 16136 - }, - { - "epoch": 1.2337863409599175, - "grad_norm": 0.001367436139844358, - "learning_rate": 0.00019999924974074663, - "loss": 46.0, - "step": 16137 - }, - { - "epoch": 1.2338627979433072, - "grad_norm": 0.010383804328739643, - "learning_rate": 0.0001999992496477, - "loss": 46.0, - "step": 16138 - }, - { - "epoch": 1.233939254926697, - "grad_norm": 0.00501530896872282, - "learning_rate": 0.00019999924955464758, - "loss": 46.0, - "step": 16139 - }, - { - "epoch": 1.2340157119100865, - "grad_norm": 0.0011613249080255628, - "learning_rate": 0.0001999992494615894, - "loss": 46.0, - "step": 16140 - }, - { - "epoch": 1.2340921688934763, - "grad_norm": 0.00086108228424564, - "learning_rate": 0.00019999924936852547, - "loss": 46.0, - "step": 16141 - }, - { - "epoch": 1.234168625876866, - "grad_norm": 0.00201953062787652, - "learning_rate": 0.00019999924927545574, - "loss": 46.0, - "step": 16142 - }, - { - "epoch": 1.2342450828602558, - "grad_norm": 0.0024786172434687614, - "learning_rate": 0.00019999924918238026, - "loss": 46.0, - "step": 16143 - }, - { - "epoch": 1.2343215398436456, - "grad_norm": 0.0011446401476860046, - "learning_rate": 0.000199999249089299, - "loss": 46.0, - "step": 16144 - }, - { - "epoch": 1.234397996827035, - "grad_norm": 0.0010201006662100554, - "learning_rate": 0.000199999248996212, - "loss": 46.0, - "step": 16145 - }, - { - "epoch": 1.2344744538104249, - "grad_norm": 0.003638614434748888, - "learning_rate": 0.0001999992489031192, - "loss": 46.0, - "step": 16146 - }, - { - "epoch": 1.2345509107938146, - "grad_norm": 0.00087597721721977, - "learning_rate": 0.00019999924881002062, - "loss": 46.0, - "step": 16147 - }, - { - "epoch": 1.2346273677772044, - "grad_norm": 0.0005804141983389854, - "learning_rate": 0.00019999924871691628, - "loss": 46.0, - "step": 16148 - }, - { - "epoch": 1.2347038247605941, - "grad_norm": 0.004878742154687643, - "learning_rate": 0.0001999992486238062, - "loss": 46.0, - "step": 16149 - }, - { - "epoch": 1.234780281743984, - "grad_norm": 0.009944730438292027, - "learning_rate": 0.00019999924853069032, - "loss": 46.0, - "step": 16150 - }, - { - "epoch": 1.2348567387273734, - "grad_norm": 0.0006810511113144457, - "learning_rate": 0.0001999992484375687, - "loss": 46.0, - "step": 16151 - }, - { - "epoch": 1.2349331957107632, - "grad_norm": 0.0020122802816331387, - "learning_rate": 0.00019999924834444125, - "loss": 46.0, - "step": 16152 - }, - { - "epoch": 1.235009652694153, - "grad_norm": 0.0026216183323413134, - "learning_rate": 0.0001999992482513081, - "loss": 46.0, - "step": 16153 - }, - { - "epoch": 1.2350861096775427, - "grad_norm": 0.0010264270240440965, - "learning_rate": 0.00019999924815816914, - "loss": 46.0, - "step": 16154 - }, - { - "epoch": 1.2351625666609323, - "grad_norm": 0.0011440090602263808, - "learning_rate": 0.0001999992480650244, - "loss": 46.0, - "step": 16155 - }, - { - "epoch": 1.235239023644322, - "grad_norm": 0.0011800434440374374, - "learning_rate": 0.00019999924797187393, - "loss": 46.0, - "step": 16156 - }, - { - "epoch": 1.2353154806277118, - "grad_norm": 0.0014037586515769362, - "learning_rate": 0.00019999924787871765, - "loss": 46.0, - "step": 16157 - }, - { - "epoch": 1.2353919376111016, - "grad_norm": 0.0006325557478703558, - "learning_rate": 0.00019999924778555563, - "loss": 46.0, - "step": 16158 - }, - { - "epoch": 1.2354683945944913, - "grad_norm": 0.0014421932864934206, - "learning_rate": 0.00019999924769238783, - "loss": 46.0, - "step": 16159 - }, - { - "epoch": 1.235544851577881, - "grad_norm": 0.0009284376865252852, - "learning_rate": 0.0001999992475992143, - "loss": 46.0, - "step": 16160 - }, - { - "epoch": 1.2356213085612708, - "grad_norm": 0.001163533772341907, - "learning_rate": 0.00019999924750603495, - "loss": 46.0, - "step": 16161 - }, - { - "epoch": 1.2356977655446604, - "grad_norm": 0.0024985643103718758, - "learning_rate": 0.00019999924741284983, - "loss": 46.0, - "step": 16162 - }, - { - "epoch": 1.2357742225280501, - "grad_norm": 0.0006378112593665719, - "learning_rate": 0.00019999924731965897, - "loss": 46.0, - "step": 16163 - }, - { - "epoch": 1.23585067951144, - "grad_norm": 0.0007794823031872511, - "learning_rate": 0.00019999924722646233, - "loss": 46.0, - "step": 16164 - }, - { - "epoch": 1.2359271364948297, - "grad_norm": 0.0008109778864309192, - "learning_rate": 0.0001999992471332599, - "loss": 46.0, - "step": 16165 - }, - { - "epoch": 1.2360035934782192, - "grad_norm": 0.0007147433934733272, - "learning_rate": 0.0001999992470400517, - "loss": 46.0, - "step": 16166 - }, - { - "epoch": 1.236080050461609, - "grad_norm": 0.00229713530279696, - "learning_rate": 0.00019999924694683778, - "loss": 46.0, - "step": 16167 - }, - { - "epoch": 1.2361565074449987, - "grad_norm": 0.0012973527191206813, - "learning_rate": 0.00019999924685361803, - "loss": 46.0, - "step": 16168 - }, - { - "epoch": 1.2362329644283885, - "grad_norm": 0.0017626202898100019, - "learning_rate": 0.00019999924676039258, - "loss": 46.0, - "step": 16169 - }, - { - "epoch": 1.2363094214117782, - "grad_norm": 0.0006364518194459379, - "learning_rate": 0.0001999992466671613, - "loss": 46.0, - "step": 16170 - }, - { - "epoch": 1.236385878395168, - "grad_norm": 0.002682435791939497, - "learning_rate": 0.00019999924657392425, - "loss": 46.0, - "step": 16171 - }, - { - "epoch": 1.2364623353785575, - "grad_norm": 0.0010396478464826941, - "learning_rate": 0.00019999924648068148, - "loss": 46.0, - "step": 16172 - }, - { - "epoch": 1.2365387923619473, - "grad_norm": 0.0009788823081180453, - "learning_rate": 0.0001999992463874329, - "loss": 46.0, - "step": 16173 - }, - { - "epoch": 1.236615249345337, - "grad_norm": 0.0010156398639082909, - "learning_rate": 0.00019999924629417857, - "loss": 46.0, - "step": 16174 - }, - { - "epoch": 1.2366917063287268, - "grad_norm": 0.000712582899723202, - "learning_rate": 0.00019999924620091845, - "loss": 46.0, - "step": 16175 - }, - { - "epoch": 1.2367681633121166, - "grad_norm": 0.0018316407222300768, - "learning_rate": 0.00019999924610765256, - "loss": 46.0, - "step": 16176 - }, - { - "epoch": 1.2368446202955061, - "grad_norm": 0.0013650762848556042, - "learning_rate": 0.00019999924601438093, - "loss": 46.0, - "step": 16177 - }, - { - "epoch": 1.2369210772788959, - "grad_norm": 0.0006641428335569799, - "learning_rate": 0.00019999924592110352, - "loss": 46.0, - "step": 16178 - }, - { - "epoch": 1.2369975342622856, - "grad_norm": 0.0006430278299376369, - "learning_rate": 0.00019999924582782033, - "loss": 46.0, - "step": 16179 - }, - { - "epoch": 1.2370739912456754, - "grad_norm": 0.001062024850398302, - "learning_rate": 0.00019999924573453138, - "loss": 46.0, - "step": 16180 - }, - { - "epoch": 1.2371504482290652, - "grad_norm": 0.0004638636310119182, - "learning_rate": 0.00019999924564123665, - "loss": 46.0, - "step": 16181 - }, - { - "epoch": 1.237226905212455, - "grad_norm": 0.0006842865259386599, - "learning_rate": 0.00019999924554793617, - "loss": 46.0, - "step": 16182 - }, - { - "epoch": 1.2373033621958445, - "grad_norm": 0.000827309675514698, - "learning_rate": 0.0001999992454546299, - "loss": 46.0, - "step": 16183 - }, - { - "epoch": 1.2373798191792342, - "grad_norm": 0.000634370488114655, - "learning_rate": 0.00019999924536131785, - "loss": 46.0, - "step": 16184 - }, - { - "epoch": 1.237456276162624, - "grad_norm": 0.00038265628973022103, - "learning_rate": 0.00019999924526800005, - "loss": 46.0, - "step": 16185 - }, - { - "epoch": 1.2375327331460138, - "grad_norm": 0.0005271471454761922, - "learning_rate": 0.00019999924517467648, - "loss": 46.0, - "step": 16186 - }, - { - "epoch": 1.2376091901294035, - "grad_norm": 0.013602126389741898, - "learning_rate": 0.00019999924508134714, - "loss": 46.0, - "step": 16187 - }, - { - "epoch": 1.237685647112793, - "grad_norm": 0.0012449331115931273, - "learning_rate": 0.00019999924498801205, - "loss": 46.0, - "step": 16188 - }, - { - "epoch": 1.2377621040961828, - "grad_norm": 0.0010873565915971994, - "learning_rate": 0.00019999924489467116, - "loss": 46.0, - "step": 16189 - }, - { - "epoch": 1.2378385610795726, - "grad_norm": 0.0019398077856749296, - "learning_rate": 0.0001999992448013245, - "loss": 46.0, - "step": 16190 - }, - { - "epoch": 1.2379150180629623, - "grad_norm": 0.0007611886248923838, - "learning_rate": 0.0001999992447079721, - "loss": 46.0, - "step": 16191 - }, - { - "epoch": 1.237991475046352, - "grad_norm": 0.006044069305062294, - "learning_rate": 0.0001999992446146139, - "loss": 46.0, - "step": 16192 - }, - { - "epoch": 1.2380679320297419, - "grad_norm": 0.002332279458642006, - "learning_rate": 0.00019999924452124995, - "loss": 46.0, - "step": 16193 - }, - { - "epoch": 1.2381443890131314, - "grad_norm": 0.0009618547046557069, - "learning_rate": 0.0001999992444278802, - "loss": 46.0, - "step": 16194 - }, - { - "epoch": 1.2382208459965212, - "grad_norm": 0.0028223982080817223, - "learning_rate": 0.00019999924433450472, - "loss": 46.0, - "step": 16195 - }, - { - "epoch": 1.238297302979911, - "grad_norm": 0.0026741530746221542, - "learning_rate": 0.00019999924424112347, - "loss": 46.0, - "step": 16196 - }, - { - "epoch": 1.2383737599633007, - "grad_norm": 0.0014414218021556735, - "learning_rate": 0.00019999924414773642, - "loss": 46.0, - "step": 16197 - }, - { - "epoch": 1.2384502169466904, - "grad_norm": 0.0011779188644140959, - "learning_rate": 0.00019999924405434362, - "loss": 46.0, - "step": 16198 - }, - { - "epoch": 1.23852667393008, - "grad_norm": 0.0012218063930049539, - "learning_rate": 0.00019999924396094505, - "loss": 46.0, - "step": 16199 - }, - { - "epoch": 1.2386031309134697, - "grad_norm": 0.0026084252167493105, - "learning_rate": 0.00019999924386754068, - "loss": 46.0, - "step": 16200 - }, - { - "epoch": 1.2386795878968595, - "grad_norm": 0.0030180804897099733, - "learning_rate": 0.0001999992437741306, - "loss": 46.0, - "step": 16201 - }, - { - "epoch": 1.2387560448802493, - "grad_norm": 0.003117549931630492, - "learning_rate": 0.00019999924368071473, - "loss": 46.0, - "step": 16202 - }, - { - "epoch": 1.238832501863639, - "grad_norm": 0.0025547968689352274, - "learning_rate": 0.00019999924358729304, - "loss": 46.0, - "step": 16203 - }, - { - "epoch": 1.2389089588470288, - "grad_norm": 0.0014640373410657048, - "learning_rate": 0.00019999924349386566, - "loss": 46.0, - "step": 16204 - }, - { - "epoch": 1.2389854158304183, - "grad_norm": 0.00039131238008849323, - "learning_rate": 0.00019999924340043243, - "loss": 46.0, - "step": 16205 - }, - { - "epoch": 1.239061872813808, - "grad_norm": 0.0013695830712094903, - "learning_rate": 0.0001999992433069935, - "loss": 46.0, - "step": 16206 - }, - { - "epoch": 1.2391383297971978, - "grad_norm": 0.0011735250009223819, - "learning_rate": 0.00019999924321354877, - "loss": 46.0, - "step": 16207 - }, - { - "epoch": 1.2392147867805876, - "grad_norm": 0.001279689371585846, - "learning_rate": 0.00019999924312009827, - "loss": 46.0, - "step": 16208 - }, - { - "epoch": 1.2392912437639774, - "grad_norm": 0.0012681097723543644, - "learning_rate": 0.000199999243026642, - "loss": 46.0, - "step": 16209 - }, - { - "epoch": 1.239367700747367, - "grad_norm": 0.0022884411737322807, - "learning_rate": 0.00019999924293317997, - "loss": 46.0, - "step": 16210 - }, - { - "epoch": 1.2394441577307567, - "grad_norm": 0.005618903785943985, - "learning_rate": 0.00019999924283971217, - "loss": 46.0, - "step": 16211 - }, - { - "epoch": 1.2395206147141464, - "grad_norm": 0.001149306888692081, - "learning_rate": 0.00019999924274623858, - "loss": 46.0, - "step": 16212 - }, - { - "epoch": 1.2395970716975362, - "grad_norm": 0.0005931926425546408, - "learning_rate": 0.00019999924265275923, - "loss": 46.0, - "step": 16213 - }, - { - "epoch": 1.239673528680926, - "grad_norm": 0.0034078762400895357, - "learning_rate": 0.00019999924255927412, - "loss": 46.0, - "step": 16214 - }, - { - "epoch": 1.2397499856643157, - "grad_norm": 0.0017527914606034756, - "learning_rate": 0.00019999924246578323, - "loss": 46.0, - "step": 16215 - }, - { - "epoch": 1.2398264426477053, - "grad_norm": 0.006149209104478359, - "learning_rate": 0.00019999924237228657, - "loss": 46.0, - "step": 16216 - }, - { - "epoch": 1.239902899631095, - "grad_norm": 0.0046256110072135925, - "learning_rate": 0.00019999924227878416, - "loss": 46.0, - "step": 16217 - }, - { - "epoch": 1.2399793566144848, - "grad_norm": 0.0004790711391251534, - "learning_rate": 0.00019999924218527598, - "loss": 46.0, - "step": 16218 - }, - { - "epoch": 1.2400558135978745, - "grad_norm": 0.0002844065602403134, - "learning_rate": 0.00019999924209176202, - "loss": 46.0, - "step": 16219 - }, - { - "epoch": 1.2401322705812643, - "grad_norm": 0.00136388442479074, - "learning_rate": 0.00019999924199824227, - "loss": 46.0, - "step": 16220 - }, - { - "epoch": 1.2402087275646538, - "grad_norm": 0.001815884024836123, - "learning_rate": 0.00019999924190471676, - "loss": 46.0, - "step": 16221 - }, - { - "epoch": 1.2402851845480436, - "grad_norm": 0.0018731511663645506, - "learning_rate": 0.0001999992418111855, - "loss": 46.0, - "step": 16222 - }, - { - "epoch": 1.2403616415314334, - "grad_norm": 0.0005997954867780209, - "learning_rate": 0.00019999924171764847, - "loss": 46.0, - "step": 16223 - }, - { - "epoch": 1.2404380985148231, - "grad_norm": 0.002003435045480728, - "learning_rate": 0.00019999924162410567, - "loss": 46.0, - "step": 16224 - }, - { - "epoch": 1.2405145554982129, - "grad_norm": 0.006114141549915075, - "learning_rate": 0.00019999924153055708, - "loss": 46.0, - "step": 16225 - }, - { - "epoch": 1.2405910124816026, - "grad_norm": 0.0009145902586169541, - "learning_rate": 0.00019999924143700274, - "loss": 46.0, - "step": 16226 - }, - { - "epoch": 1.2406674694649922, - "grad_norm": 0.0016500097699463367, - "learning_rate": 0.00019999924134344262, - "loss": 46.0, - "step": 16227 - }, - { - "epoch": 1.240743926448382, - "grad_norm": 0.0005913766217418015, - "learning_rate": 0.00019999924124987673, - "loss": 46.0, - "step": 16228 - }, - { - "epoch": 1.2408203834317717, - "grad_norm": 0.0038779121823608875, - "learning_rate": 0.00019999924115630507, - "loss": 46.0, - "step": 16229 - }, - { - "epoch": 1.2408968404151615, - "grad_norm": 0.0003662245871964842, - "learning_rate": 0.00019999924106272766, - "loss": 46.0, - "step": 16230 - }, - { - "epoch": 1.2409732973985512, - "grad_norm": 0.0008042265544645488, - "learning_rate": 0.00019999924096914445, - "loss": 46.0, - "step": 16231 - }, - { - "epoch": 1.2410497543819408, - "grad_norm": 0.0015300572849810123, - "learning_rate": 0.00019999924087555547, - "loss": 46.0, - "step": 16232 - }, - { - "epoch": 1.2411262113653305, - "grad_norm": 0.0005596036207862198, - "learning_rate": 0.00019999924078196074, - "loss": 46.0, - "step": 16233 - }, - { - "epoch": 1.2412026683487203, - "grad_norm": 0.0005785702960565686, - "learning_rate": 0.00019999924068836027, - "loss": 46.0, - "step": 16234 - }, - { - "epoch": 1.24127912533211, - "grad_norm": 0.016834190115332603, - "learning_rate": 0.000199999240594754, - "loss": 46.0, - "step": 16235 - }, - { - "epoch": 1.2413555823154998, - "grad_norm": 0.002775345230475068, - "learning_rate": 0.00019999924050114194, - "loss": 46.0, - "step": 16236 - }, - { - "epoch": 1.2414320392988896, - "grad_norm": 0.0033433123026043177, - "learning_rate": 0.00019999924040752412, - "loss": 46.0, - "step": 16237 - }, - { - "epoch": 1.241508496282279, - "grad_norm": 0.0027108630165457726, - "learning_rate": 0.00019999924031390055, - "loss": 46.0, - "step": 16238 - }, - { - "epoch": 1.2415849532656689, - "grad_norm": 0.0018398347310721874, - "learning_rate": 0.00019999924022027119, - "loss": 46.0, - "step": 16239 - }, - { - "epoch": 1.2416614102490586, - "grad_norm": 0.002263805130496621, - "learning_rate": 0.0001999992401266361, - "loss": 46.0, - "step": 16240 - }, - { - "epoch": 1.2417378672324484, - "grad_norm": 0.0023306889925152063, - "learning_rate": 0.00019999924003299518, - "loss": 46.0, - "step": 16241 - }, - { - "epoch": 1.2418143242158382, - "grad_norm": 0.0023174022790044546, - "learning_rate": 0.00019999923993934852, - "loss": 46.0, - "step": 16242 - }, - { - "epoch": 1.2418907811992277, - "grad_norm": 0.0031036718282848597, - "learning_rate": 0.0001999992398456961, - "loss": 46.0, - "step": 16243 - }, - { - "epoch": 1.2419672381826175, - "grad_norm": 0.00155420892406255, - "learning_rate": 0.0001999992397520379, - "loss": 46.0, - "step": 16244 - }, - { - "epoch": 1.2420436951660072, - "grad_norm": 0.0039010613691061735, - "learning_rate": 0.00019999923965837395, - "loss": 46.0, - "step": 16245 - }, - { - "epoch": 1.242120152149397, - "grad_norm": 0.0008912440971471369, - "learning_rate": 0.00019999923956470422, - "loss": 46.0, - "step": 16246 - }, - { - "epoch": 1.2421966091327867, - "grad_norm": 0.000617080891970545, - "learning_rate": 0.0001999992394710287, - "loss": 46.0, - "step": 16247 - }, - { - "epoch": 1.2422730661161765, - "grad_norm": 0.0014821469085291028, - "learning_rate": 0.00019999923937734742, - "loss": 46.0, - "step": 16248 - }, - { - "epoch": 1.242349523099566, - "grad_norm": 0.0006066728383302689, - "learning_rate": 0.00019999923928366037, - "loss": 46.0, - "step": 16249 - }, - { - "epoch": 1.2424259800829558, - "grad_norm": 0.002139391377568245, - "learning_rate": 0.00019999923918996758, - "loss": 46.0, - "step": 16250 - }, - { - "epoch": 1.2425024370663456, - "grad_norm": 0.0019193944754078984, - "learning_rate": 0.00019999923909626898, - "loss": 46.0, - "step": 16251 - }, - { - "epoch": 1.2425788940497353, - "grad_norm": 0.008647779934108257, - "learning_rate": 0.00019999923900256464, - "loss": 46.0, - "step": 16252 - }, - { - "epoch": 1.242655351033125, - "grad_norm": 0.000417533447034657, - "learning_rate": 0.00019999923890885453, - "loss": 46.0, - "step": 16253 - }, - { - "epoch": 1.2427318080165146, - "grad_norm": 0.004375928081572056, - "learning_rate": 0.0001999992388151386, - "loss": 46.0, - "step": 16254 - }, - { - "epoch": 1.2428082649999044, - "grad_norm": 0.004379493650048971, - "learning_rate": 0.00019999923872141695, - "loss": 46.0, - "step": 16255 - }, - { - "epoch": 1.2428847219832941, - "grad_norm": 0.0010251826606690884, - "learning_rate": 0.00019999923862768952, - "loss": 46.0, - "step": 16256 - }, - { - "epoch": 1.242961178966684, - "grad_norm": 0.0006800846313126385, - "learning_rate": 0.00019999923853395633, - "loss": 46.0, - "step": 16257 - }, - { - "epoch": 1.2430376359500737, - "grad_norm": 0.002811568323522806, - "learning_rate": 0.00019999923844021735, - "loss": 46.0, - "step": 16258 - }, - { - "epoch": 1.2431140929334634, - "grad_norm": 0.0007100341026671231, - "learning_rate": 0.00019999923834647263, - "loss": 46.0, - "step": 16259 - }, - { - "epoch": 1.243190549916853, - "grad_norm": 0.0012785578146576881, - "learning_rate": 0.00019999923825272212, - "loss": 46.0, - "step": 16260 - }, - { - "epoch": 1.2432670069002427, - "grad_norm": 0.0009894692339003086, - "learning_rate": 0.00019999923815896585, - "loss": 46.0, - "step": 16261 - }, - { - "epoch": 1.2433434638836325, - "grad_norm": 0.00040542345959693193, - "learning_rate": 0.0001999992380652038, - "loss": 46.0, - "step": 16262 - }, - { - "epoch": 1.2434199208670222, - "grad_norm": 0.0017142135184258223, - "learning_rate": 0.00019999923797143598, - "loss": 46.0, - "step": 16263 - }, - { - "epoch": 1.243496377850412, - "grad_norm": 0.000728804909158498, - "learning_rate": 0.00019999923787766239, - "loss": 46.0, - "step": 16264 - }, - { - "epoch": 1.2435728348338015, - "grad_norm": 0.0009789259638637304, - "learning_rate": 0.00019999923778388302, - "loss": 46.0, - "step": 16265 - }, - { - "epoch": 1.2436492918171913, - "grad_norm": 0.001567603787407279, - "learning_rate": 0.00019999923769009793, - "loss": 46.0, - "step": 16266 - }, - { - "epoch": 1.243725748800581, - "grad_norm": 0.0006959529710002244, - "learning_rate": 0.00019999923759630704, - "loss": 46.0, - "step": 16267 - }, - { - "epoch": 1.2438022057839708, - "grad_norm": 0.0014233523979783058, - "learning_rate": 0.00019999923750251038, - "loss": 46.0, - "step": 16268 - }, - { - "epoch": 1.2438786627673606, - "grad_norm": 0.0011403318494558334, - "learning_rate": 0.00019999923740870795, - "loss": 46.0, - "step": 16269 - }, - { - "epoch": 1.2439551197507503, - "grad_norm": 0.0023819133639335632, - "learning_rate": 0.00019999923731489974, - "loss": 46.0, - "step": 16270 - }, - { - "epoch": 1.2440315767341399, - "grad_norm": 0.0026477393694221973, - "learning_rate": 0.00019999923722108576, - "loss": 46.0, - "step": 16271 - }, - { - "epoch": 1.2441080337175296, - "grad_norm": 0.0011295088334009051, - "learning_rate": 0.000199999237127266, - "loss": 46.0, - "step": 16272 - }, - { - "epoch": 1.2441844907009194, - "grad_norm": 0.0010174043709412217, - "learning_rate": 0.0001999992370334405, - "loss": 46.0, - "step": 16273 - }, - { - "epoch": 1.2442609476843092, - "grad_norm": 0.000810265657491982, - "learning_rate": 0.00019999923693960923, - "loss": 46.0, - "step": 16274 - }, - { - "epoch": 1.244337404667699, - "grad_norm": 0.0014384017558768392, - "learning_rate": 0.00019999923684577218, - "loss": 46.0, - "step": 16275 - }, - { - "epoch": 1.2444138616510885, - "grad_norm": 0.0015048125060275197, - "learning_rate": 0.00019999923675192936, - "loss": 46.0, - "step": 16276 - }, - { - "epoch": 1.2444903186344782, - "grad_norm": 0.001139685045927763, - "learning_rate": 0.00019999923665808077, - "loss": 46.0, - "step": 16277 - }, - { - "epoch": 1.244566775617868, - "grad_norm": 0.001738716964609921, - "learning_rate": 0.0001999992365642264, - "loss": 46.0, - "step": 16278 - }, - { - "epoch": 1.2446432326012578, - "grad_norm": 0.0017025260021910071, - "learning_rate": 0.00019999923647036629, - "loss": 46.0, - "step": 16279 - }, - { - "epoch": 1.2447196895846475, - "grad_norm": 0.012858227826654911, - "learning_rate": 0.0001999992363765004, - "loss": 46.0, - "step": 16280 - }, - { - "epoch": 1.2447961465680373, - "grad_norm": 0.0013719985727220774, - "learning_rate": 0.00019999923628262874, - "loss": 46.0, - "step": 16281 - }, - { - "epoch": 1.2448726035514268, - "grad_norm": 0.0010038783075287938, - "learning_rate": 0.0001999992361887513, - "loss": 46.0, - "step": 16282 - }, - { - "epoch": 1.2449490605348166, - "grad_norm": 0.0017927760491147637, - "learning_rate": 0.0001999992360948681, - "loss": 46.0, - "step": 16283 - }, - { - "epoch": 1.2450255175182063, - "grad_norm": 0.00580955296754837, - "learning_rate": 0.00019999923600097912, - "loss": 46.0, - "step": 16284 - }, - { - "epoch": 1.245101974501596, - "grad_norm": 0.004721891600638628, - "learning_rate": 0.00019999923590708436, - "loss": 46.0, - "step": 16285 - }, - { - "epoch": 1.2451784314849856, - "grad_norm": 0.0008275595610029995, - "learning_rate": 0.00019999923581318386, - "loss": 46.0, - "step": 16286 - }, - { - "epoch": 1.2452548884683754, - "grad_norm": 0.0012638730695471168, - "learning_rate": 0.00019999923571927756, - "loss": 46.0, - "step": 16287 - }, - { - "epoch": 1.2453313454517652, - "grad_norm": 0.001253433059900999, - "learning_rate": 0.00019999923562536554, - "loss": 46.0, - "step": 16288 - }, - { - "epoch": 1.245407802435155, - "grad_norm": 0.002018025843426585, - "learning_rate": 0.00019999923553144772, - "loss": 46.0, - "step": 16289 - }, - { - "epoch": 1.2454842594185447, - "grad_norm": 0.003162020118907094, - "learning_rate": 0.00019999923543752413, - "loss": 46.0, - "step": 16290 - }, - { - "epoch": 1.2455607164019344, - "grad_norm": 0.0006781438132748008, - "learning_rate": 0.00019999923534359476, - "loss": 46.0, - "step": 16291 - }, - { - "epoch": 1.2456371733853242, - "grad_norm": 0.0013762114103883505, - "learning_rate": 0.00019999923524965962, - "loss": 46.0, - "step": 16292 - }, - { - "epoch": 1.2457136303687137, - "grad_norm": 0.0008886960567906499, - "learning_rate": 0.00019999923515571873, - "loss": 46.0, - "step": 16293 - }, - { - "epoch": 1.2457900873521035, - "grad_norm": 0.009898100048303604, - "learning_rate": 0.00019999923506177207, - "loss": 46.0, - "step": 16294 - }, - { - "epoch": 1.2458665443354933, - "grad_norm": 0.0008863803232088685, - "learning_rate": 0.00019999923496781964, - "loss": 46.0, - "step": 16295 - }, - { - "epoch": 1.245943001318883, - "grad_norm": 0.0010941906366497278, - "learning_rate": 0.00019999923487386143, - "loss": 46.0, - "step": 16296 - }, - { - "epoch": 1.2460194583022726, - "grad_norm": 0.0005877632647752762, - "learning_rate": 0.00019999923477989745, - "loss": 46.0, - "step": 16297 - }, - { - "epoch": 1.2460959152856623, - "grad_norm": 0.0010235217632725835, - "learning_rate": 0.00019999923468592772, - "loss": 46.0, - "step": 16298 - }, - { - "epoch": 1.246172372269052, - "grad_norm": 0.010449610650539398, - "learning_rate": 0.00019999923459195217, - "loss": 46.0, - "step": 16299 - }, - { - "epoch": 1.2462488292524418, - "grad_norm": 0.002556039486080408, - "learning_rate": 0.00019999923449797092, - "loss": 46.0, - "step": 16300 - }, - { - "epoch": 1.2463252862358316, - "grad_norm": 0.0046071987599134445, - "learning_rate": 0.00019999923440398385, - "loss": 46.0, - "step": 16301 - }, - { - "epoch": 1.2464017432192214, - "grad_norm": 0.003930953796952963, - "learning_rate": 0.00019999923430999103, - "loss": 46.0, - "step": 16302 - }, - { - "epoch": 1.2464782002026111, - "grad_norm": 0.00046681874664500356, - "learning_rate": 0.00019999923421599243, - "loss": 46.0, - "step": 16303 - }, - { - "epoch": 1.2465546571860007, - "grad_norm": 0.0007205181173048913, - "learning_rate": 0.00019999923412198807, - "loss": 46.0, - "step": 16304 - }, - { - "epoch": 1.2466311141693904, - "grad_norm": 0.004459925461560488, - "learning_rate": 0.00019999923402797793, - "loss": 46.0, - "step": 16305 - }, - { - "epoch": 1.2467075711527802, - "grad_norm": 0.0005660425522364676, - "learning_rate": 0.00019999923393396204, - "loss": 46.0, - "step": 16306 - }, - { - "epoch": 1.24678402813617, - "grad_norm": 0.0022302453871816397, - "learning_rate": 0.00019999923383994038, - "loss": 46.0, - "step": 16307 - }, - { - "epoch": 1.2468604851195595, - "grad_norm": 0.0012974118581041694, - "learning_rate": 0.00019999923374591295, - "loss": 46.0, - "step": 16308 - }, - { - "epoch": 1.2469369421029493, - "grad_norm": 0.0009810287738218904, - "learning_rate": 0.0001999992336518797, - "loss": 46.0, - "step": 16309 - }, - { - "epoch": 1.247013399086339, - "grad_norm": 0.0011373079614713788, - "learning_rate": 0.00019999923355784076, - "loss": 46.0, - "step": 16310 - }, - { - "epoch": 1.2470898560697288, - "grad_norm": 0.0012776160147041082, - "learning_rate": 0.00019999923346379598, - "loss": 46.0, - "step": 16311 - }, - { - "epoch": 1.2471663130531185, - "grad_norm": 0.004559324588626623, - "learning_rate": 0.00019999923336974548, - "loss": 46.0, - "step": 16312 - }, - { - "epoch": 1.2472427700365083, - "grad_norm": 0.0019243181450292468, - "learning_rate": 0.0001999992332756892, - "loss": 46.0, - "step": 16313 - }, - { - "epoch": 1.2473192270198978, - "grad_norm": 0.0018928120844066143, - "learning_rate": 0.00019999923318162713, - "loss": 46.0, - "step": 16314 - }, - { - "epoch": 1.2473956840032876, - "grad_norm": 0.001409701886586845, - "learning_rate": 0.0001999992330875593, - "loss": 46.0, - "step": 16315 - }, - { - "epoch": 1.2474721409866774, - "grad_norm": 0.0011028624139726162, - "learning_rate": 0.00019999923299348572, - "loss": 46.0, - "step": 16316 - }, - { - "epoch": 1.2475485979700671, - "grad_norm": 0.006825182121247053, - "learning_rate": 0.00019999923289940635, - "loss": 46.0, - "step": 16317 - }, - { - "epoch": 1.2476250549534569, - "grad_norm": 0.0010181937832385302, - "learning_rate": 0.0001999992328053212, - "loss": 46.0, - "step": 16318 - }, - { - "epoch": 1.2477015119368464, - "grad_norm": 0.0013277959078550339, - "learning_rate": 0.00019999923271123033, - "loss": 46.0, - "step": 16319 - }, - { - "epoch": 1.2477779689202362, - "grad_norm": 0.0024626338854432106, - "learning_rate": 0.00019999923261713364, - "loss": 46.0, - "step": 16320 - }, - { - "epoch": 1.247854425903626, - "grad_norm": 0.0015709082363173366, - "learning_rate": 0.0001999992325230312, - "loss": 46.0, - "step": 16321 - }, - { - "epoch": 1.2479308828870157, - "grad_norm": 0.0012152184499427676, - "learning_rate": 0.000199999232428923, - "loss": 46.0, - "step": 16322 - }, - { - "epoch": 1.2480073398704055, - "grad_norm": 0.0012776012299582362, - "learning_rate": 0.00019999923233480902, - "loss": 46.0, - "step": 16323 - }, - { - "epoch": 1.2480837968537952, - "grad_norm": 0.0006866256590001285, - "learning_rate": 0.00019999923224068927, - "loss": 46.0, - "step": 16324 - }, - { - "epoch": 1.2481602538371848, - "grad_norm": 0.0012240622891113162, - "learning_rate": 0.00019999923214656377, - "loss": 46.0, - "step": 16325 - }, - { - "epoch": 1.2482367108205745, - "grad_norm": 0.001377153443172574, - "learning_rate": 0.00019999923205243247, - "loss": 46.0, - "step": 16326 - }, - { - "epoch": 1.2483131678039643, - "grad_norm": 0.020239299163222313, - "learning_rate": 0.00019999923195829542, - "loss": 46.0, - "step": 16327 - }, - { - "epoch": 1.248389624787354, - "grad_norm": 0.0008103779400698841, - "learning_rate": 0.0001999992318641526, - "loss": 46.0, - "step": 16328 - }, - { - "epoch": 1.2484660817707438, - "grad_norm": 0.0009015205432660878, - "learning_rate": 0.00019999923177000398, - "loss": 46.0, - "step": 16329 - }, - { - "epoch": 1.2485425387541333, - "grad_norm": 0.0009062756435014307, - "learning_rate": 0.00019999923167584961, - "loss": 46.0, - "step": 16330 - }, - { - "epoch": 1.248618995737523, - "grad_norm": 0.0006869305507279932, - "learning_rate": 0.00019999923158168947, - "loss": 46.0, - "step": 16331 - }, - { - "epoch": 1.2486954527209129, - "grad_norm": 0.0020838575437664986, - "learning_rate": 0.0001999992314875236, - "loss": 46.0, - "step": 16332 - }, - { - "epoch": 1.2487719097043026, - "grad_norm": 0.0006608259282074869, - "learning_rate": 0.0001999992313933519, - "loss": 46.0, - "step": 16333 - }, - { - "epoch": 1.2488483666876924, - "grad_norm": 0.0007762082386761904, - "learning_rate": 0.00019999923129917447, - "loss": 46.0, - "step": 16334 - }, - { - "epoch": 1.2489248236710822, - "grad_norm": 0.0011478044325485826, - "learning_rate": 0.00019999923120499124, - "loss": 46.0, - "step": 16335 - }, - { - "epoch": 1.2490012806544717, - "grad_norm": 0.005378944333642721, - "learning_rate": 0.00019999923111080228, - "loss": 46.0, - "step": 16336 - }, - { - "epoch": 1.2490777376378615, - "grad_norm": 0.006752186920493841, - "learning_rate": 0.0001999992310166075, - "loss": 46.0, - "step": 16337 - }, - { - "epoch": 1.2491541946212512, - "grad_norm": 0.004907096736133099, - "learning_rate": 0.00019999923092240698, - "loss": 46.0, - "step": 16338 - }, - { - "epoch": 1.249230651604641, - "grad_norm": 0.0009439613786526024, - "learning_rate": 0.00019999923082820068, - "loss": 46.0, - "step": 16339 - }, - { - "epoch": 1.2493071085880307, - "grad_norm": 0.0007309425855055451, - "learning_rate": 0.00019999923073398863, - "loss": 46.0, - "step": 16340 - }, - { - "epoch": 1.2493835655714203, - "grad_norm": 0.0007836582371965051, - "learning_rate": 0.00019999923063977078, - "loss": 46.0, - "step": 16341 - }, - { - "epoch": 1.24946002255481, - "grad_norm": 0.001868736813776195, - "learning_rate": 0.00019999923054554722, - "loss": 46.0, - "step": 16342 - }, - { - "epoch": 1.2495364795381998, - "grad_norm": 0.0005757371545769274, - "learning_rate": 0.00019999923045131785, - "loss": 46.0, - "step": 16343 - }, - { - "epoch": 1.2496129365215896, - "grad_norm": 0.0006243189563974738, - "learning_rate": 0.00019999923035708271, - "loss": 46.0, - "step": 16344 - }, - { - "epoch": 1.2496893935049793, - "grad_norm": 0.02133634313941002, - "learning_rate": 0.0001999992302628418, - "loss": 46.0, - "step": 16345 - }, - { - "epoch": 1.249765850488369, - "grad_norm": 0.0007615424110554159, - "learning_rate": 0.00019999923016859511, - "loss": 46.0, - "step": 16346 - }, - { - "epoch": 1.2498423074717586, - "grad_norm": 0.0019910961855202913, - "learning_rate": 0.00019999923007434268, - "loss": 46.0, - "step": 16347 - }, - { - "epoch": 1.2499187644551484, - "grad_norm": 0.0017621485749259591, - "learning_rate": 0.00019999922998008445, - "loss": 46.0, - "step": 16348 - }, - { - "epoch": 1.2499952214385381, - "grad_norm": 0.0059636677615344524, - "learning_rate": 0.00019999922988582047, - "loss": 46.0, - "step": 16349 - }, - { - "epoch": 1.250071678421928, - "grad_norm": 0.002571824472397566, - "learning_rate": 0.00019999922979155072, - "loss": 46.0, - "step": 16350 - }, - { - "epoch": 1.2501481354053177, - "grad_norm": 0.0012950774980708957, - "learning_rate": 0.0001999992296972752, - "loss": 46.0, - "step": 16351 - }, - { - "epoch": 1.2502245923887072, - "grad_norm": 0.001501788618043065, - "learning_rate": 0.00019999922960299392, - "loss": 46.0, - "step": 16352 - }, - { - "epoch": 1.250301049372097, - "grad_norm": 0.0005776687758043408, - "learning_rate": 0.00019999922950870685, - "loss": 46.0, - "step": 16353 - }, - { - "epoch": 1.2503775063554867, - "grad_norm": 0.007493641227483749, - "learning_rate": 0.000199999229414414, - "loss": 46.0, - "step": 16354 - }, - { - "epoch": 1.2504539633388765, - "grad_norm": 0.001138480263762176, - "learning_rate": 0.00019999922932011538, - "loss": 46.0, - "step": 16355 - }, - { - "epoch": 1.2505304203222662, - "grad_norm": 0.0009020980796776712, - "learning_rate": 0.00019999922922581104, - "loss": 46.0, - "step": 16356 - }, - { - "epoch": 1.250606877305656, - "grad_norm": 0.002299938118085265, - "learning_rate": 0.00019999922913150088, - "loss": 46.0, - "step": 16357 - }, - { - "epoch": 1.2506833342890455, - "grad_norm": 0.0025025885552167892, - "learning_rate": 0.000199999229037185, - "loss": 46.0, - "step": 16358 - }, - { - "epoch": 1.2507597912724353, - "grad_norm": 0.0017581349238753319, - "learning_rate": 0.0001999992289428633, - "loss": 46.0, - "step": 16359 - }, - { - "epoch": 1.250836248255825, - "grad_norm": 0.0018445730675011873, - "learning_rate": 0.00019999922884853585, - "loss": 46.0, - "step": 16360 - }, - { - "epoch": 1.2509127052392148, - "grad_norm": 0.0010989966103807092, - "learning_rate": 0.00019999922875420264, - "loss": 46.0, - "step": 16361 - }, - { - "epoch": 1.2509891622226046, - "grad_norm": 0.0010360617889091372, - "learning_rate": 0.00019999922865986366, - "loss": 46.0, - "step": 16362 - }, - { - "epoch": 1.2510656192059941, - "grad_norm": 0.004308904986828566, - "learning_rate": 0.00019999922856551888, - "loss": 46.0, - "step": 16363 - }, - { - "epoch": 1.251142076189384, - "grad_norm": 0.0013923459919169545, - "learning_rate": 0.00019999922847116836, - "loss": 46.0, - "step": 16364 - }, - { - "epoch": 1.2512185331727737, - "grad_norm": 0.0007284162566065788, - "learning_rate": 0.00019999922837681208, - "loss": 46.0, - "step": 16365 - }, - { - "epoch": 1.2512949901561634, - "grad_norm": 0.0030336237978190184, - "learning_rate": 0.00019999922828245, - "loss": 46.0, - "step": 16366 - }, - { - "epoch": 1.2513714471395532, - "grad_norm": 0.0015833905199542642, - "learning_rate": 0.00019999922818808217, - "loss": 46.0, - "step": 16367 - }, - { - "epoch": 1.251447904122943, - "grad_norm": 0.00913267768919468, - "learning_rate": 0.00019999922809370857, - "loss": 46.0, - "step": 16368 - }, - { - "epoch": 1.2515243611063325, - "grad_norm": 0.0007435894221998751, - "learning_rate": 0.0001999992279993292, - "loss": 46.0, - "step": 16369 - }, - { - "epoch": 1.2516008180897222, - "grad_norm": 0.013490833342075348, - "learning_rate": 0.00019999922790494404, - "loss": 46.0, - "step": 16370 - }, - { - "epoch": 1.251677275073112, - "grad_norm": 0.000607972324360162, - "learning_rate": 0.00019999922781055313, - "loss": 46.0, - "step": 16371 - }, - { - "epoch": 1.2517537320565018, - "grad_norm": 0.002596711041405797, - "learning_rate": 0.00019999922771615647, - "loss": 46.0, - "step": 16372 - }, - { - "epoch": 1.2518301890398913, - "grad_norm": 0.0031988900154829025, - "learning_rate": 0.000199999227621754, - "loss": 46.0, - "step": 16373 - }, - { - "epoch": 1.251906646023281, - "grad_norm": 0.002775750122964382, - "learning_rate": 0.00019999922752734578, - "loss": 46.0, - "step": 16374 - }, - { - "epoch": 1.2519831030066708, - "grad_norm": 0.0031500759068876505, - "learning_rate": 0.0001999992274329318, - "loss": 46.0, - "step": 16375 - }, - { - "epoch": 1.2520595599900606, - "grad_norm": 0.005923932418227196, - "learning_rate": 0.00019999922733851205, - "loss": 46.0, - "step": 16376 - }, - { - "epoch": 1.2521360169734503, - "grad_norm": 0.0006573188002221286, - "learning_rate": 0.00019999922724408652, - "loss": 46.0, - "step": 16377 - }, - { - "epoch": 1.25221247395684, - "grad_norm": 0.0006369486800394952, - "learning_rate": 0.00019999922714965523, - "loss": 46.0, - "step": 16378 - }, - { - "epoch": 1.2522889309402299, - "grad_norm": 0.04541425406932831, - "learning_rate": 0.00019999922705521815, - "loss": 46.0, - "step": 16379 - }, - { - "epoch": 1.2523653879236194, - "grad_norm": 0.0007402290357276797, - "learning_rate": 0.0001999992269607753, - "loss": 46.0, - "step": 16380 - }, - { - "epoch": 1.2524418449070092, - "grad_norm": 0.0026812339201569557, - "learning_rate": 0.00019999922686632672, - "loss": 46.0, - "step": 16381 - }, - { - "epoch": 1.252518301890399, - "grad_norm": 0.004508398473262787, - "learning_rate": 0.00019999922677187232, - "loss": 46.0, - "step": 16382 - }, - { - "epoch": 1.2525947588737887, - "grad_norm": 0.0007145089912228286, - "learning_rate": 0.00019999922667741219, - "loss": 46.0, - "step": 16383 - }, - { - "epoch": 1.2526712158571782, - "grad_norm": 0.001860719989053905, - "learning_rate": 0.00019999922658294627, - "loss": 46.0, - "step": 16384 - }, - { - "epoch": 1.252747672840568, - "grad_norm": 0.001473874319344759, - "learning_rate": 0.0001999992264884746, - "loss": 46.0, - "step": 16385 - }, - { - "epoch": 1.2528241298239577, - "grad_norm": 0.0007973352330736816, - "learning_rate": 0.00019999922639399716, - "loss": 46.0, - "step": 16386 - }, - { - "epoch": 1.2529005868073475, - "grad_norm": 0.001900269417092204, - "learning_rate": 0.0001999992262995139, - "loss": 46.0, - "step": 16387 - }, - { - "epoch": 1.2529770437907373, - "grad_norm": 0.0016751679359003901, - "learning_rate": 0.00019999922620502495, - "loss": 46.0, - "step": 16388 - }, - { - "epoch": 1.253053500774127, - "grad_norm": 0.0024422432761639357, - "learning_rate": 0.00019999922611053017, - "loss": 46.0, - "step": 16389 - }, - { - "epoch": 1.2531299577575168, - "grad_norm": 0.0011167627526447177, - "learning_rate": 0.00019999922601602962, - "loss": 46.0, - "step": 16390 - }, - { - "epoch": 1.2532064147409063, - "grad_norm": 0.0005226141074672341, - "learning_rate": 0.00019999922592152332, - "loss": 46.0, - "step": 16391 - }, - { - "epoch": 1.253282871724296, - "grad_norm": 0.001044654636643827, - "learning_rate": 0.00019999922582701127, - "loss": 46.0, - "step": 16392 - }, - { - "epoch": 1.2533593287076858, - "grad_norm": 0.0019333363743498921, - "learning_rate": 0.0001999992257324934, - "loss": 46.0, - "step": 16393 - }, - { - "epoch": 1.2534357856910756, - "grad_norm": 0.0014803933445364237, - "learning_rate": 0.0001999992256379698, - "loss": 46.0, - "step": 16394 - }, - { - "epoch": 1.2535122426744651, - "grad_norm": 0.00046341324923560023, - "learning_rate": 0.00019999922554344044, - "loss": 46.0, - "step": 16395 - }, - { - "epoch": 1.253588699657855, - "grad_norm": 0.003914657514542341, - "learning_rate": 0.00019999922544890528, - "loss": 46.0, - "step": 16396 - }, - { - "epoch": 1.2536651566412447, - "grad_norm": 0.0016827434301376343, - "learning_rate": 0.00019999922535436437, - "loss": 46.0, - "step": 16397 - }, - { - "epoch": 1.2537416136246344, - "grad_norm": 0.0011291344417259097, - "learning_rate": 0.00019999922525981768, - "loss": 46.0, - "step": 16398 - }, - { - "epoch": 1.2538180706080242, - "grad_norm": 0.01156060490757227, - "learning_rate": 0.00019999922516526522, - "loss": 46.0, - "step": 16399 - }, - { - "epoch": 1.253894527591414, - "grad_norm": 0.0011152446968480945, - "learning_rate": 0.00019999922507070702, - "loss": 46.0, - "step": 16400 - }, - { - "epoch": 1.2539709845748037, - "grad_norm": 0.002592396456748247, - "learning_rate": 0.00019999922497614301, - "loss": 46.0, - "step": 16401 - }, - { - "epoch": 1.2540474415581933, - "grad_norm": 0.004796263761818409, - "learning_rate": 0.00019999922488157324, - "loss": 46.0, - "step": 16402 - }, - { - "epoch": 1.254123898541583, - "grad_norm": 0.0004613221681211144, - "learning_rate": 0.00019999922478699774, - "loss": 46.0, - "step": 16403 - }, - { - "epoch": 1.2542003555249728, - "grad_norm": 0.004102225415408611, - "learning_rate": 0.00019999922469241641, - "loss": 46.0, - "step": 16404 - }, - { - "epoch": 1.2542768125083625, - "grad_norm": 0.00032626657048240304, - "learning_rate": 0.00019999922459782934, - "loss": 46.0, - "step": 16405 - }, - { - "epoch": 1.254353269491752, - "grad_norm": 0.005635222885757685, - "learning_rate": 0.0001999992245032365, - "loss": 46.0, - "step": 16406 - }, - { - "epoch": 1.2544297264751418, - "grad_norm": 0.001193670672364533, - "learning_rate": 0.0001999992244086379, - "loss": 46.0, - "step": 16407 - }, - { - "epoch": 1.2545061834585316, - "grad_norm": 0.0007480288622900844, - "learning_rate": 0.00019999922431403351, - "loss": 46.0, - "step": 16408 - }, - { - "epoch": 1.2545826404419214, - "grad_norm": 0.0004027009126730263, - "learning_rate": 0.00019999922421942338, - "loss": 46.0, - "step": 16409 - }, - { - "epoch": 1.2546590974253111, - "grad_norm": 0.0007107963901944458, - "learning_rate": 0.00019999922412480744, - "loss": 46.0, - "step": 16410 - }, - { - "epoch": 1.2547355544087009, - "grad_norm": 0.0015318809309974313, - "learning_rate": 0.00019999922403018573, - "loss": 46.0, - "step": 16411 - }, - { - "epoch": 1.2548120113920906, - "grad_norm": 0.0008197858696803451, - "learning_rate": 0.0001999992239355583, - "loss": 46.0, - "step": 16412 - }, - { - "epoch": 1.2548884683754802, - "grad_norm": 0.0007833583513274789, - "learning_rate": 0.00019999922384092506, - "loss": 46.0, - "step": 16413 - }, - { - "epoch": 1.25496492535887, - "grad_norm": 0.0025514462031424046, - "learning_rate": 0.0001999992237462861, - "loss": 46.0, - "step": 16414 - }, - { - "epoch": 1.2550413823422597, - "grad_norm": 0.0016124843386933208, - "learning_rate": 0.0001999992236516413, - "loss": 46.0, - "step": 16415 - }, - { - "epoch": 1.2551178393256495, - "grad_norm": 0.00036243052454665303, - "learning_rate": 0.00019999922355699076, - "loss": 46.0, - "step": 16416 - }, - { - "epoch": 1.255194296309039, - "grad_norm": 0.0009067085920833051, - "learning_rate": 0.00019999922346233446, - "loss": 46.0, - "step": 16417 - }, - { - "epoch": 1.2552707532924288, - "grad_norm": 0.000285412126686424, - "learning_rate": 0.00019999922336767242, - "loss": 46.0, - "step": 16418 - }, - { - "epoch": 1.2553472102758185, - "grad_norm": 0.0016339175635948777, - "learning_rate": 0.00019999922327300454, - "loss": 46.0, - "step": 16419 - }, - { - "epoch": 1.2554236672592083, - "grad_norm": 0.001466345856897533, - "learning_rate": 0.00019999922317833093, - "loss": 46.0, - "step": 16420 - }, - { - "epoch": 1.255500124242598, - "grad_norm": 0.005923457909375429, - "learning_rate": 0.00019999922308365156, - "loss": 46.0, - "step": 16421 - }, - { - "epoch": 1.2555765812259878, - "grad_norm": 0.0009513382101431489, - "learning_rate": 0.0001999992229889664, - "loss": 46.0, - "step": 16422 - }, - { - "epoch": 1.2556530382093776, - "grad_norm": 0.0007420122274197638, - "learning_rate": 0.00019999922289427549, - "loss": 46.0, - "step": 16423 - }, - { - "epoch": 1.255729495192767, - "grad_norm": 0.00023287672956939787, - "learning_rate": 0.0001999992227995788, - "loss": 46.0, - "step": 16424 - }, - { - "epoch": 1.2558059521761569, - "grad_norm": 0.01207447238266468, - "learning_rate": 0.00019999922270487632, - "loss": 46.0, - "step": 16425 - }, - { - "epoch": 1.2558824091595466, - "grad_norm": 0.0009858588455244899, - "learning_rate": 0.0001999992226101681, - "loss": 46.0, - "step": 16426 - }, - { - "epoch": 1.2559588661429364, - "grad_norm": 0.0011797369224950671, - "learning_rate": 0.0001999992225154541, - "loss": 46.0, - "step": 16427 - }, - { - "epoch": 1.256035323126326, - "grad_norm": 0.0026717614382505417, - "learning_rate": 0.00019999922242073433, - "loss": 46.0, - "step": 16428 - }, - { - "epoch": 1.2561117801097157, - "grad_norm": 0.0006869658827781677, - "learning_rate": 0.0001999992223260088, - "loss": 46.0, - "step": 16429 - }, - { - "epoch": 1.2561882370931055, - "grad_norm": 0.0010481844656169415, - "learning_rate": 0.00019999922223127748, - "loss": 46.0, - "step": 16430 - }, - { - "epoch": 1.2562646940764952, - "grad_norm": 0.000877156388014555, - "learning_rate": 0.0001999992221365404, - "loss": 46.0, - "step": 16431 - }, - { - "epoch": 1.256341151059885, - "grad_norm": 0.004499401897192001, - "learning_rate": 0.00019999922204179757, - "loss": 46.0, - "step": 16432 - }, - { - "epoch": 1.2564176080432747, - "grad_norm": 0.0030122101306915283, - "learning_rate": 0.00019999922194704898, - "loss": 46.0, - "step": 16433 - }, - { - "epoch": 1.2564940650266645, - "grad_norm": 0.00827631913125515, - "learning_rate": 0.00019999922185229456, - "loss": 46.0, - "step": 16434 - }, - { - "epoch": 1.256570522010054, - "grad_norm": 0.0010939062340185046, - "learning_rate": 0.00019999922175753442, - "loss": 46.0, - "step": 16435 - }, - { - "epoch": 1.2566469789934438, - "grad_norm": 0.002193512162193656, - "learning_rate": 0.0001999992216627685, - "loss": 46.0, - "step": 16436 - }, - { - "epoch": 1.2567234359768336, - "grad_norm": 0.0020986529998481274, - "learning_rate": 0.0001999992215679968, - "loss": 46.0, - "step": 16437 - }, - { - "epoch": 1.2567998929602233, - "grad_norm": 0.0006867117481306195, - "learning_rate": 0.00019999922147321932, - "loss": 46.0, - "step": 16438 - }, - { - "epoch": 1.2568763499436129, - "grad_norm": 0.015324806794524193, - "learning_rate": 0.00019999922137843608, - "loss": 46.0, - "step": 16439 - }, - { - "epoch": 1.2569528069270026, - "grad_norm": 0.0013696304522454739, - "learning_rate": 0.0001999992212836471, - "loss": 46.0, - "step": 16440 - }, - { - "epoch": 1.2570292639103924, - "grad_norm": 0.003059870097786188, - "learning_rate": 0.00019999922118885233, - "loss": 46.0, - "step": 16441 - }, - { - "epoch": 1.2571057208937821, - "grad_norm": 0.0016380450688302517, - "learning_rate": 0.00019999922109405178, - "loss": 46.0, - "step": 16442 - }, - { - "epoch": 1.257182177877172, - "grad_norm": 0.0006603061337955296, - "learning_rate": 0.00019999922099924548, - "loss": 46.0, - "step": 16443 - }, - { - "epoch": 1.2572586348605617, - "grad_norm": 0.003353565465658903, - "learning_rate": 0.0001999992209044334, - "loss": 46.0, - "step": 16444 - }, - { - "epoch": 1.2573350918439514, - "grad_norm": 0.0006715510971844196, - "learning_rate": 0.00019999922080961554, - "loss": 46.0, - "step": 16445 - }, - { - "epoch": 1.257411548827341, - "grad_norm": 0.0006737725343555212, - "learning_rate": 0.00019999922071479192, - "loss": 46.0, - "step": 16446 - }, - { - "epoch": 1.2574880058107307, - "grad_norm": 0.001551430788822472, - "learning_rate": 0.00019999922061996256, - "loss": 46.0, - "step": 16447 - }, - { - "epoch": 1.2575644627941205, - "grad_norm": 0.00261330115608871, - "learning_rate": 0.0001999992205251274, - "loss": 46.0, - "step": 16448 - }, - { - "epoch": 1.2576409197775102, - "grad_norm": 0.0008056391961872578, - "learning_rate": 0.00019999922043028646, - "loss": 46.0, - "step": 16449 - }, - { - "epoch": 1.2577173767608998, - "grad_norm": 0.013351313769817352, - "learning_rate": 0.00019999922033543977, - "loss": 46.0, - "step": 16450 - }, - { - "epoch": 1.2577938337442895, - "grad_norm": 0.001033368636853993, - "learning_rate": 0.00019999922024058732, - "loss": 46.0, - "step": 16451 - }, - { - "epoch": 1.2578702907276793, - "grad_norm": 0.002594680991023779, - "learning_rate": 0.00019999922014572906, - "loss": 46.0, - "step": 16452 - }, - { - "epoch": 1.257946747711069, - "grad_norm": 0.001639623544178903, - "learning_rate": 0.00019999922005086506, - "loss": 46.0, - "step": 16453 - }, - { - "epoch": 1.2580232046944588, - "grad_norm": 0.0030052606016397476, - "learning_rate": 0.00019999921995599528, - "loss": 46.0, - "step": 16454 - }, - { - "epoch": 1.2580996616778486, - "grad_norm": 0.0014609363861382008, - "learning_rate": 0.00019999921986111976, - "loss": 46.0, - "step": 16455 - }, - { - "epoch": 1.2581761186612384, - "grad_norm": 0.0023911858443170786, - "learning_rate": 0.00019999921976623846, - "loss": 46.0, - "step": 16456 - }, - { - "epoch": 1.258252575644628, - "grad_norm": 0.0013884411891922355, - "learning_rate": 0.00019999921967135136, - "loss": 46.0, - "step": 16457 - }, - { - "epoch": 1.2583290326280177, - "grad_norm": 0.0007670379709452391, - "learning_rate": 0.00019999921957645852, - "loss": 46.0, - "step": 16458 - }, - { - "epoch": 1.2584054896114074, - "grad_norm": 0.001012188266031444, - "learning_rate": 0.00019999921948155987, - "loss": 46.0, - "step": 16459 - }, - { - "epoch": 1.2584819465947972, - "grad_norm": 0.0008938998798839748, - "learning_rate": 0.0001999992193866555, - "loss": 46.0, - "step": 16460 - }, - { - "epoch": 1.2585584035781867, - "grad_norm": 0.004603705368936062, - "learning_rate": 0.00019999921929174535, - "loss": 46.0, - "step": 16461 - }, - { - "epoch": 1.2586348605615765, - "grad_norm": 0.0006540778558701277, - "learning_rate": 0.0001999992191968294, - "loss": 46.0, - "step": 16462 - }, - { - "epoch": 1.2587113175449662, - "grad_norm": 0.0017041172832250595, - "learning_rate": 0.00019999921910190773, - "loss": 46.0, - "step": 16463 - }, - { - "epoch": 1.258787774528356, - "grad_norm": 0.001365963602438569, - "learning_rate": 0.00019999921900698024, - "loss": 46.0, - "step": 16464 - }, - { - "epoch": 1.2588642315117458, - "grad_norm": 0.0024636252783238888, - "learning_rate": 0.00019999921891204704, - "loss": 46.0, - "step": 16465 - }, - { - "epoch": 1.2589406884951355, - "grad_norm": 0.0012807559687644243, - "learning_rate": 0.00019999921881710798, - "loss": 46.0, - "step": 16466 - }, - { - "epoch": 1.2590171454785253, - "grad_norm": 0.0008748608524911106, - "learning_rate": 0.00019999921872216323, - "loss": 46.0, - "step": 16467 - }, - { - "epoch": 1.2590936024619148, - "grad_norm": 0.003088687313720584, - "learning_rate": 0.00019999921862721268, - "loss": 46.0, - "step": 16468 - }, - { - "epoch": 1.2591700594453046, - "grad_norm": 0.0011356067843735218, - "learning_rate": 0.00019999921853225639, - "loss": 46.0, - "step": 16469 - }, - { - "epoch": 1.2592465164286943, - "grad_norm": 0.0016523645026609302, - "learning_rate": 0.0001999992184372943, - "loss": 46.0, - "step": 16470 - }, - { - "epoch": 1.259322973412084, - "grad_norm": 0.0014879752416163683, - "learning_rate": 0.00019999921834232644, - "loss": 46.0, - "step": 16471 - }, - { - "epoch": 1.2593994303954736, - "grad_norm": 0.0013844635104760528, - "learning_rate": 0.00019999921824735283, - "loss": 46.0, - "step": 16472 - }, - { - "epoch": 1.2594758873788634, - "grad_norm": 0.00357668474316597, - "learning_rate": 0.0001999992181523734, - "loss": 46.0, - "step": 16473 - }, - { - "epoch": 1.2595523443622532, - "grad_norm": 0.001873683650046587, - "learning_rate": 0.00019999921805738825, - "loss": 46.0, - "step": 16474 - }, - { - "epoch": 1.259628801345643, - "grad_norm": 0.022805245593190193, - "learning_rate": 0.00019999921796239734, - "loss": 46.0, - "step": 16475 - }, - { - "epoch": 1.2597052583290327, - "grad_norm": 0.0007727384218014777, - "learning_rate": 0.00019999921786740063, - "loss": 46.0, - "step": 16476 - }, - { - "epoch": 1.2597817153124224, - "grad_norm": 0.0037208418361842632, - "learning_rate": 0.00019999921777239817, - "loss": 46.0, - "step": 16477 - }, - { - "epoch": 1.2598581722958122, - "grad_norm": 0.0011424771510064602, - "learning_rate": 0.00019999921767738994, - "loss": 46.0, - "step": 16478 - }, - { - "epoch": 1.2599346292792017, - "grad_norm": 0.0008388548740185797, - "learning_rate": 0.0001999992175823759, - "loss": 46.0, - "step": 16479 - }, - { - "epoch": 1.2600110862625915, - "grad_norm": 0.0014203950995579362, - "learning_rate": 0.00019999921748735613, - "loss": 46.0, - "step": 16480 - }, - { - "epoch": 1.2600875432459813, - "grad_norm": 0.000518681132234633, - "learning_rate": 0.00019999921739233058, - "loss": 46.0, - "step": 16481 - }, - { - "epoch": 1.260164000229371, - "grad_norm": 0.007198650855571032, - "learning_rate": 0.00019999921729729926, - "loss": 46.0, - "step": 16482 - }, - { - "epoch": 1.2602404572127606, - "grad_norm": 0.0006486611091531813, - "learning_rate": 0.0001999992172022622, - "loss": 46.0, - "step": 16483 - }, - { - "epoch": 1.2603169141961503, - "grad_norm": 0.011363430880010128, - "learning_rate": 0.00019999921710721935, - "loss": 46.0, - "step": 16484 - }, - { - "epoch": 1.26039337117954, - "grad_norm": 0.00170981977134943, - "learning_rate": 0.00019999921701217073, - "loss": 46.0, - "step": 16485 - }, - { - "epoch": 1.2604698281629299, - "grad_norm": 0.004637775011360645, - "learning_rate": 0.00019999921691711632, - "loss": 46.0, - "step": 16486 - }, - { - "epoch": 1.2605462851463196, - "grad_norm": 0.003388921497389674, - "learning_rate": 0.00019999921682205615, - "loss": 46.0, - "step": 16487 - }, - { - "epoch": 1.2606227421297094, - "grad_norm": 0.0015615829033777118, - "learning_rate": 0.00019999921672699022, - "loss": 46.0, - "step": 16488 - }, - { - "epoch": 1.260699199113099, - "grad_norm": 0.0005641995812766254, - "learning_rate": 0.0001999992166319185, - "loss": 46.0, - "step": 16489 - }, - { - "epoch": 1.2607756560964887, - "grad_norm": 0.0007029682747088373, - "learning_rate": 0.00019999921653684105, - "loss": 46.0, - "step": 16490 - }, - { - "epoch": 1.2608521130798784, - "grad_norm": 0.0007309545180760324, - "learning_rate": 0.00019999921644175782, - "loss": 46.0, - "step": 16491 - }, - { - "epoch": 1.2609285700632682, - "grad_norm": 0.000503484916407615, - "learning_rate": 0.00019999921634666882, - "loss": 46.0, - "step": 16492 - }, - { - "epoch": 1.261005027046658, - "grad_norm": 0.0013351273955777287, - "learning_rate": 0.00019999921625157404, - "loss": 46.0, - "step": 16493 - }, - { - "epoch": 1.2610814840300475, - "grad_norm": 0.0012642602669075131, - "learning_rate": 0.0001999992161564735, - "loss": 46.0, - "step": 16494 - }, - { - "epoch": 1.2611579410134373, - "grad_norm": 0.002974753500893712, - "learning_rate": 0.00019999921606136717, - "loss": 46.0, - "step": 16495 - }, - { - "epoch": 1.261234397996827, - "grad_norm": 0.0011573872761800885, - "learning_rate": 0.00019999921596625507, - "loss": 46.0, - "step": 16496 - }, - { - "epoch": 1.2613108549802168, - "grad_norm": 0.027800822630524635, - "learning_rate": 0.0001999992158711372, - "loss": 46.0, - "step": 16497 - }, - { - "epoch": 1.2613873119636065, - "grad_norm": 0.001148067880421877, - "learning_rate": 0.00019999921577601356, - "loss": 46.0, - "step": 16498 - }, - { - "epoch": 1.2614637689469963, - "grad_norm": 0.0018769362941384315, - "learning_rate": 0.00019999921568088417, - "loss": 46.0, - "step": 16499 - }, - { - "epoch": 1.2615402259303858, - "grad_norm": 0.0010231048800051212, - "learning_rate": 0.000199999215585749, - "loss": 46.0, - "step": 16500 - }, - { - "epoch": 1.2616166829137756, - "grad_norm": 0.002108842832967639, - "learning_rate": 0.00019999921549060807, - "loss": 46.0, - "step": 16501 - }, - { - "epoch": 1.2616931398971654, - "grad_norm": 0.001610041013918817, - "learning_rate": 0.0001999992153954614, - "loss": 46.0, - "step": 16502 - }, - { - "epoch": 1.2617695968805551, - "grad_norm": 0.0017317309975624084, - "learning_rate": 0.0001999992153003089, - "loss": 46.0, - "step": 16503 - }, - { - "epoch": 1.2618460538639447, - "grad_norm": 0.007911157794296741, - "learning_rate": 0.00019999921520515065, - "loss": 46.0, - "step": 16504 - }, - { - "epoch": 1.2619225108473344, - "grad_norm": 0.0010015369625762105, - "learning_rate": 0.00019999921510998662, - "loss": 46.0, - "step": 16505 - }, - { - "epoch": 1.2619989678307242, - "grad_norm": 0.0024152202531695366, - "learning_rate": 0.00019999921501481687, - "loss": 46.0, - "step": 16506 - }, - { - "epoch": 1.262075424814114, - "grad_norm": 0.009238668717443943, - "learning_rate": 0.00019999921491964132, - "loss": 46.0, - "step": 16507 - }, - { - "epoch": 1.2621518817975037, - "grad_norm": 0.002034083940088749, - "learning_rate": 0.00019999921482446, - "loss": 46.0, - "step": 16508 - }, - { - "epoch": 1.2622283387808935, - "grad_norm": 0.0010638341773301363, - "learning_rate": 0.0001999992147292729, - "loss": 46.0, - "step": 16509 - }, - { - "epoch": 1.2623047957642832, - "grad_norm": 0.0013406927464529872, - "learning_rate": 0.00019999921463408003, - "loss": 46.0, - "step": 16510 - }, - { - "epoch": 1.2623812527476728, - "grad_norm": 0.001091621583327651, - "learning_rate": 0.00019999921453888142, - "loss": 46.0, - "step": 16511 - }, - { - "epoch": 1.2624577097310625, - "grad_norm": 0.0014226384228095412, - "learning_rate": 0.000199999214443677, - "loss": 46.0, - "step": 16512 - }, - { - "epoch": 1.2625341667144523, - "grad_norm": 0.007693960331380367, - "learning_rate": 0.00019999921434846684, - "loss": 46.0, - "step": 16513 - }, - { - "epoch": 1.262610623697842, - "grad_norm": 0.0028738912660628557, - "learning_rate": 0.0001999992142532509, - "loss": 46.0, - "step": 16514 - }, - { - "epoch": 1.2626870806812316, - "grad_norm": 0.0008677901932969689, - "learning_rate": 0.0001999992141580292, - "loss": 46.0, - "step": 16515 - }, - { - "epoch": 1.2627635376646213, - "grad_norm": 0.0029121434781700373, - "learning_rate": 0.0001999992140628017, - "loss": 46.0, - "step": 16516 - }, - { - "epoch": 1.262839994648011, - "grad_norm": 0.005586137063801289, - "learning_rate": 0.00019999921396756846, - "loss": 46.0, - "step": 16517 - }, - { - "epoch": 1.2629164516314009, - "grad_norm": 0.0021744370460510254, - "learning_rate": 0.00019999921387232945, - "loss": 46.0, - "step": 16518 - }, - { - "epoch": 1.2629929086147906, - "grad_norm": 0.0011308243265375495, - "learning_rate": 0.00019999921377708468, - "loss": 46.0, - "step": 16519 - }, - { - "epoch": 1.2630693655981804, - "grad_norm": 0.002094999188557267, - "learning_rate": 0.00019999921368183413, - "loss": 46.0, - "step": 16520 - }, - { - "epoch": 1.2631458225815702, - "grad_norm": 0.0010615659411996603, - "learning_rate": 0.0001999992135865778, - "loss": 46.0, - "step": 16521 - }, - { - "epoch": 1.2632222795649597, - "grad_norm": 0.0029781595803797245, - "learning_rate": 0.00019999921349131568, - "loss": 46.0, - "step": 16522 - }, - { - "epoch": 1.2632987365483495, - "grad_norm": 0.0010432334383949637, - "learning_rate": 0.00019999921339604787, - "loss": 46.0, - "step": 16523 - }, - { - "epoch": 1.2633751935317392, - "grad_norm": 0.0023265474010258913, - "learning_rate": 0.0001999992133007742, - "loss": 46.0, - "step": 16524 - }, - { - "epoch": 1.263451650515129, - "grad_norm": 0.001020620809867978, - "learning_rate": 0.0001999992132054948, - "loss": 46.0, - "step": 16525 - }, - { - "epoch": 1.2635281074985185, - "grad_norm": 0.0011690454557538033, - "learning_rate": 0.00019999921311020965, - "loss": 46.0, - "step": 16526 - }, - { - "epoch": 1.2636045644819083, - "grad_norm": 0.0007669496117159724, - "learning_rate": 0.0001999992130149187, - "loss": 46.0, - "step": 16527 - }, - { - "epoch": 1.263681021465298, - "grad_norm": 0.0033987811766564846, - "learning_rate": 0.00019999921291962198, - "loss": 46.0, - "step": 16528 - }, - { - "epoch": 1.2637574784486878, - "grad_norm": 0.0017866194248199463, - "learning_rate": 0.0001999992128243195, - "loss": 46.0, - "step": 16529 - }, - { - "epoch": 1.2638339354320776, - "grad_norm": 0.002379220211878419, - "learning_rate": 0.00019999921272901127, - "loss": 46.0, - "step": 16530 - }, - { - "epoch": 1.2639103924154673, - "grad_norm": 0.0007606594008393586, - "learning_rate": 0.00019999921263369726, - "loss": 46.0, - "step": 16531 - }, - { - "epoch": 1.263986849398857, - "grad_norm": 0.0007884561782702804, - "learning_rate": 0.0001999992125383775, - "loss": 46.0, - "step": 16532 - }, - { - "epoch": 1.2640633063822466, - "grad_norm": 0.001040615257807076, - "learning_rate": 0.0001999992124430519, - "loss": 46.0, - "step": 16533 - }, - { - "epoch": 1.2641397633656364, - "grad_norm": 0.0005108760087750852, - "learning_rate": 0.00019999921234772056, - "loss": 46.0, - "step": 16534 - }, - { - "epoch": 1.2642162203490261, - "grad_norm": 0.0006389761110767722, - "learning_rate": 0.0001999992122523835, - "loss": 46.0, - "step": 16535 - }, - { - "epoch": 1.264292677332416, - "grad_norm": 0.0016258525429293513, - "learning_rate": 0.0001999992121570406, - "loss": 46.0, - "step": 16536 - }, - { - "epoch": 1.2643691343158054, - "grad_norm": 0.0044453563168644905, - "learning_rate": 0.00019999921206169199, - "loss": 46.0, - "step": 16537 - }, - { - "epoch": 1.2644455912991952, - "grad_norm": 0.0009650862193666399, - "learning_rate": 0.0001999992119663376, - "loss": 46.0, - "step": 16538 - }, - { - "epoch": 1.264522048282585, - "grad_norm": 0.0007522368105128407, - "learning_rate": 0.0001999992118709774, - "loss": 46.0, - "step": 16539 - }, - { - "epoch": 1.2645985052659747, - "grad_norm": 0.0012720072409138083, - "learning_rate": 0.00019999921177561147, - "loss": 46.0, - "step": 16540 - }, - { - "epoch": 1.2646749622493645, - "grad_norm": 0.001897201407700777, - "learning_rate": 0.00019999921168023976, - "loss": 46.0, - "step": 16541 - }, - { - "epoch": 1.2647514192327542, - "grad_norm": 0.0023809215053915977, - "learning_rate": 0.00019999921158486228, - "loss": 46.0, - "step": 16542 - }, - { - "epoch": 1.264827876216144, - "grad_norm": 0.0017961254343390465, - "learning_rate": 0.00019999921148947905, - "loss": 46.0, - "step": 16543 - }, - { - "epoch": 1.2649043331995335, - "grad_norm": 0.0012066778726875782, - "learning_rate": 0.00019999921139409003, - "loss": 46.0, - "step": 16544 - }, - { - "epoch": 1.2649807901829233, - "grad_norm": 0.003078483510762453, - "learning_rate": 0.00019999921129869522, - "loss": 46.0, - "step": 16545 - }, - { - "epoch": 1.265057247166313, - "grad_norm": 0.010181276127696037, - "learning_rate": 0.00019999921120329465, - "loss": 46.0, - "step": 16546 - }, - { - "epoch": 1.2651337041497028, - "grad_norm": 0.007794594392180443, - "learning_rate": 0.00019999921110788833, - "loss": 46.0, - "step": 16547 - }, - { - "epoch": 1.2652101611330924, - "grad_norm": 0.0167773999273777, - "learning_rate": 0.00019999921101247626, - "loss": 46.0, - "step": 16548 - }, - { - "epoch": 1.2652866181164821, - "grad_norm": 0.0004972476162947714, - "learning_rate": 0.00019999921091705836, - "loss": 46.0, - "step": 16549 - }, - { - "epoch": 1.265363075099872, - "grad_norm": 0.001543243881314993, - "learning_rate": 0.00019999921082163472, - "loss": 46.0, - "step": 16550 - }, - { - "epoch": 1.2654395320832617, - "grad_norm": 0.0011297330493107438, - "learning_rate": 0.00019999921072620533, - "loss": 46.0, - "step": 16551 - }, - { - "epoch": 1.2655159890666514, - "grad_norm": 0.001778118428774178, - "learning_rate": 0.00019999921063077017, - "loss": 46.0, - "step": 16552 - }, - { - "epoch": 1.2655924460500412, - "grad_norm": 0.003555284347385168, - "learning_rate": 0.0001999992105353292, - "loss": 46.0, - "step": 16553 - }, - { - "epoch": 1.265668903033431, - "grad_norm": 0.0029371276032179594, - "learning_rate": 0.0001999992104398825, - "loss": 46.0, - "step": 16554 - }, - { - "epoch": 1.2657453600168205, - "grad_norm": 0.003292506095021963, - "learning_rate": 0.00019999921034443002, - "loss": 46.0, - "step": 16555 - }, - { - "epoch": 1.2658218170002102, - "grad_norm": 0.0027380939573049545, - "learning_rate": 0.00019999921024897174, - "loss": 46.0, - "step": 16556 - }, - { - "epoch": 1.2658982739836, - "grad_norm": 0.0006956110009923577, - "learning_rate": 0.00019999921015350774, - "loss": 46.0, - "step": 16557 - }, - { - "epoch": 1.2659747309669898, - "grad_norm": 0.002119495067745447, - "learning_rate": 0.00019999921005803793, - "loss": 46.0, - "step": 16558 - }, - { - "epoch": 1.2660511879503793, - "grad_norm": 0.003707997966557741, - "learning_rate": 0.00019999920996256239, - "loss": 46.0, - "step": 16559 - }, - { - "epoch": 1.266127644933769, - "grad_norm": 0.0006579714827239513, - "learning_rate": 0.00019999920986708104, - "loss": 46.0, - "step": 16560 - }, - { - "epoch": 1.2662041019171588, - "grad_norm": 0.0011696142610162497, - "learning_rate": 0.00019999920977159397, - "loss": 46.0, - "step": 16561 - }, - { - "epoch": 1.2662805589005486, - "grad_norm": 0.0007162902620621026, - "learning_rate": 0.00019999920967610107, - "loss": 46.0, - "step": 16562 - }, - { - "epoch": 1.2663570158839383, - "grad_norm": 0.0015866734320297837, - "learning_rate": 0.00019999920958060243, - "loss": 46.0, - "step": 16563 - }, - { - "epoch": 1.266433472867328, - "grad_norm": 0.0010536729823797941, - "learning_rate": 0.00019999920948509802, - "loss": 46.0, - "step": 16564 - }, - { - "epoch": 1.2665099298507179, - "grad_norm": 0.003634421853348613, - "learning_rate": 0.00019999920938958786, - "loss": 46.0, - "step": 16565 - }, - { - "epoch": 1.2665863868341074, - "grad_norm": 0.00461319275200367, - "learning_rate": 0.0001999992092940719, - "loss": 46.0, - "step": 16566 - }, - { - "epoch": 1.2666628438174972, - "grad_norm": 0.0012473630486056209, - "learning_rate": 0.0001999992091985502, - "loss": 46.0, - "step": 16567 - }, - { - "epoch": 1.266739300800887, - "grad_norm": 0.007341197691857815, - "learning_rate": 0.00019999920910302273, - "loss": 46.0, - "step": 16568 - }, - { - "epoch": 1.2668157577842767, - "grad_norm": 0.0007693300140090287, - "learning_rate": 0.00019999920900748945, - "loss": 46.0, - "step": 16569 - }, - { - "epoch": 1.2668922147676662, - "grad_norm": 0.002891362411901355, - "learning_rate": 0.00019999920891195042, - "loss": 46.0, - "step": 16570 - }, - { - "epoch": 1.266968671751056, - "grad_norm": 0.0004631927004083991, - "learning_rate": 0.00019999920881640565, - "loss": 46.0, - "step": 16571 - }, - { - "epoch": 1.2670451287344457, - "grad_norm": 0.0014737516175955534, - "learning_rate": 0.0001999992087208551, - "loss": 46.0, - "step": 16572 - }, - { - "epoch": 1.2671215857178355, - "grad_norm": 0.007577573414891958, - "learning_rate": 0.00019999920862529873, - "loss": 46.0, - "step": 16573 - }, - { - "epoch": 1.2671980427012253, - "grad_norm": 0.00051100057316944, - "learning_rate": 0.00019999920852973663, - "loss": 46.0, - "step": 16574 - }, - { - "epoch": 1.267274499684615, - "grad_norm": 0.0013006648514419794, - "learning_rate": 0.00019999920843416876, - "loss": 46.0, - "step": 16575 - }, - { - "epoch": 1.2673509566680048, - "grad_norm": 0.0004796080756932497, - "learning_rate": 0.00019999920833859512, - "loss": 46.0, - "step": 16576 - }, - { - "epoch": 1.2674274136513943, - "grad_norm": 0.007776820100843906, - "learning_rate": 0.00019999920824301573, - "loss": 46.0, - "step": 16577 - }, - { - "epoch": 1.267503870634784, - "grad_norm": 0.00033089928911067545, - "learning_rate": 0.00019999920814743055, - "loss": 46.0, - "step": 16578 - }, - { - "epoch": 1.2675803276181739, - "grad_norm": 0.0030644612852483988, - "learning_rate": 0.00019999920805183958, - "loss": 46.0, - "step": 16579 - }, - { - "epoch": 1.2676567846015636, - "grad_norm": 0.0009644554811529815, - "learning_rate": 0.00019999920795624288, - "loss": 46.0, - "step": 16580 - }, - { - "epoch": 1.2677332415849532, - "grad_norm": 0.00048184592742472887, - "learning_rate": 0.0001999992078606404, - "loss": 46.0, - "step": 16581 - }, - { - "epoch": 1.267809698568343, - "grad_norm": 0.0012677001068368554, - "learning_rate": 0.00019999920776503211, - "loss": 46.0, - "step": 16582 - }, - { - "epoch": 1.2678861555517327, - "grad_norm": 0.0009831043425947428, - "learning_rate": 0.00019999920766941811, - "loss": 46.0, - "step": 16583 - }, - { - "epoch": 1.2679626125351224, - "grad_norm": 0.0007564874831587076, - "learning_rate": 0.0001999992075737983, - "loss": 46.0, - "step": 16584 - }, - { - "epoch": 1.2680390695185122, - "grad_norm": 0.00402864022180438, - "learning_rate": 0.00019999920747817274, - "loss": 46.0, - "step": 16585 - }, - { - "epoch": 1.268115526501902, - "grad_norm": 0.0007438174216076732, - "learning_rate": 0.00019999920738254142, - "loss": 46.0, - "step": 16586 - }, - { - "epoch": 1.2681919834852917, - "grad_norm": 0.0009092689142562449, - "learning_rate": 0.00019999920728690432, - "loss": 46.0, - "step": 16587 - }, - { - "epoch": 1.2682684404686813, - "grad_norm": 0.002842287067323923, - "learning_rate": 0.00019999920719126143, - "loss": 46.0, - "step": 16588 - }, - { - "epoch": 1.268344897452071, - "grad_norm": 0.0014212834648787975, - "learning_rate": 0.0001999992070956128, - "loss": 46.0, - "step": 16589 - }, - { - "epoch": 1.2684213544354608, - "grad_norm": 0.0015836736420169473, - "learning_rate": 0.00019999920699995837, - "loss": 46.0, - "step": 16590 - }, - { - "epoch": 1.2684978114188505, - "grad_norm": 0.007913281209766865, - "learning_rate": 0.00019999920690429819, - "loss": 46.0, - "step": 16591 - }, - { - "epoch": 1.26857426840224, - "grad_norm": 0.0007570114685222507, - "learning_rate": 0.00019999920680863222, - "loss": 46.0, - "step": 16592 - }, - { - "epoch": 1.2686507253856298, - "grad_norm": 0.0006269912701100111, - "learning_rate": 0.00019999920671296052, - "loss": 46.0, - "step": 16593 - }, - { - "epoch": 1.2687271823690196, - "grad_norm": 0.001360012567602098, - "learning_rate": 0.000199999206617283, - "loss": 46.0, - "step": 16594 - }, - { - "epoch": 1.2688036393524094, - "grad_norm": 0.0006117227021604776, - "learning_rate": 0.00019999920652159978, - "loss": 46.0, - "step": 16595 - }, - { - "epoch": 1.2688800963357991, - "grad_norm": 0.0008045897702686489, - "learning_rate": 0.00019999920642591073, - "loss": 46.0, - "step": 16596 - }, - { - "epoch": 1.2689565533191889, - "grad_norm": 0.0013260227860882878, - "learning_rate": 0.00019999920633021593, - "loss": 46.0, - "step": 16597 - }, - { - "epoch": 1.2690330103025786, - "grad_norm": 0.002823147689923644, - "learning_rate": 0.00019999920623451538, - "loss": 46.0, - "step": 16598 - }, - { - "epoch": 1.2691094672859682, - "grad_norm": 0.0012219519121572375, - "learning_rate": 0.00019999920613880906, - "loss": 46.0, - "step": 16599 - }, - { - "epoch": 1.269185924269358, - "grad_norm": 0.002700086683034897, - "learning_rate": 0.00019999920604309694, - "loss": 46.0, - "step": 16600 - }, - { - "epoch": 1.2692623812527477, - "grad_norm": 0.0026994612999260426, - "learning_rate": 0.00019999920594737907, - "loss": 46.0, - "step": 16601 - }, - { - "epoch": 1.2693388382361375, - "grad_norm": 0.0013872861163690686, - "learning_rate": 0.0001999992058516554, - "loss": 46.0, - "step": 16602 - }, - { - "epoch": 1.269415295219527, - "grad_norm": 0.0008902488625608385, - "learning_rate": 0.000199999205755926, - "loss": 46.0, - "step": 16603 - }, - { - "epoch": 1.2694917522029168, - "grad_norm": 0.0019012822303920984, - "learning_rate": 0.0001999992056601908, - "loss": 46.0, - "step": 16604 - }, - { - "epoch": 1.2695682091863065, - "grad_norm": 0.0006907540373504162, - "learning_rate": 0.00019999920556444987, - "loss": 46.0, - "step": 16605 - }, - { - "epoch": 1.2696446661696963, - "grad_norm": 0.0022475533187389374, - "learning_rate": 0.00019999920546870313, - "loss": 46.0, - "step": 16606 - }, - { - "epoch": 1.269721123153086, - "grad_norm": 0.0009680610382929444, - "learning_rate": 0.00019999920537295065, - "loss": 46.0, - "step": 16607 - }, - { - "epoch": 1.2697975801364758, - "grad_norm": 0.0036767732817679644, - "learning_rate": 0.00019999920527719237, - "loss": 46.0, - "step": 16608 - }, - { - "epoch": 1.2698740371198656, - "grad_norm": 0.003197531448677182, - "learning_rate": 0.00019999920518142835, - "loss": 46.0, - "step": 16609 - }, - { - "epoch": 1.2699504941032551, - "grad_norm": 0.001533971750177443, - "learning_rate": 0.00019999920508565855, - "loss": 46.0, - "step": 16610 - }, - { - "epoch": 1.2700269510866449, - "grad_norm": 0.0074510653503239155, - "learning_rate": 0.00019999920498988297, - "loss": 46.0, - "step": 16611 - }, - { - "epoch": 1.2701034080700346, - "grad_norm": 0.001222128514200449, - "learning_rate": 0.00019999920489410165, - "loss": 46.0, - "step": 16612 - }, - { - "epoch": 1.2701798650534244, - "grad_norm": 0.005164609290659428, - "learning_rate": 0.00019999920479831453, - "loss": 46.0, - "step": 16613 - }, - { - "epoch": 1.270256322036814, - "grad_norm": 0.0008039168897084892, - "learning_rate": 0.00019999920470252164, - "loss": 46.0, - "step": 16614 - }, - { - "epoch": 1.2703327790202037, - "grad_norm": 0.007434156257659197, - "learning_rate": 0.000199999204606723, - "loss": 46.0, - "step": 16615 - }, - { - "epoch": 1.2704092360035935, - "grad_norm": 0.0014897106448188424, - "learning_rate": 0.00019999920451091858, - "loss": 46.0, - "step": 16616 - }, - { - "epoch": 1.2704856929869832, - "grad_norm": 0.0005934524815529585, - "learning_rate": 0.0001999992044151084, - "loss": 46.0, - "step": 16617 - }, - { - "epoch": 1.270562149970373, - "grad_norm": 0.0008495784713886678, - "learning_rate": 0.00019999920431929244, - "loss": 46.0, - "step": 16618 - }, - { - "epoch": 1.2706386069537627, - "grad_norm": 0.041476089507341385, - "learning_rate": 0.0001999992042234707, - "loss": 46.0, - "step": 16619 - }, - { - "epoch": 1.2707150639371523, - "grad_norm": 0.0004496494948398322, - "learning_rate": 0.00019999920412764325, - "loss": 46.0, - "step": 16620 - }, - { - "epoch": 1.270791520920542, - "grad_norm": 0.006153932306915522, - "learning_rate": 0.00019999920403180997, - "loss": 46.0, - "step": 16621 - }, - { - "epoch": 1.2708679779039318, - "grad_norm": 0.002771378494799137, - "learning_rate": 0.00019999920393597091, - "loss": 46.0, - "step": 16622 - }, - { - "epoch": 1.2709444348873216, - "grad_norm": 0.003782296320423484, - "learning_rate": 0.00019999920384012611, - "loss": 46.0, - "step": 16623 - }, - { - "epoch": 1.2710208918707113, - "grad_norm": 0.001720154657959938, - "learning_rate": 0.00019999920374427554, - "loss": 46.0, - "step": 16624 - }, - { - "epoch": 1.2710973488541009, - "grad_norm": 0.0026643683668226004, - "learning_rate": 0.0001999992036484192, - "loss": 46.0, - "step": 16625 - }, - { - "epoch": 1.2711738058374906, - "grad_norm": 0.0022460664622485638, - "learning_rate": 0.0001999992035525571, - "loss": 46.0, - "step": 16626 - }, - { - "epoch": 1.2712502628208804, - "grad_norm": 0.015322674065828323, - "learning_rate": 0.00019999920345668923, - "loss": 46.0, - "step": 16627 - }, - { - "epoch": 1.2713267198042701, - "grad_norm": 0.007272724993526936, - "learning_rate": 0.00019999920336081557, - "loss": 46.0, - "step": 16628 - }, - { - "epoch": 1.27140317678766, - "grad_norm": 0.0018612624844536185, - "learning_rate": 0.00019999920326493613, - "loss": 46.0, - "step": 16629 - }, - { - "epoch": 1.2714796337710497, - "grad_norm": 0.0007866800879128277, - "learning_rate": 0.00019999920316905097, - "loss": 46.0, - "step": 16630 - }, - { - "epoch": 1.2715560907544392, - "grad_norm": 0.005086428951472044, - "learning_rate": 0.00019999920307316, - "loss": 46.0, - "step": 16631 - }, - { - "epoch": 1.271632547737829, - "grad_norm": 0.0009069970110431314, - "learning_rate": 0.00019999920297726327, - "loss": 46.0, - "step": 16632 - }, - { - "epoch": 1.2717090047212187, - "grad_norm": 0.001986677059903741, - "learning_rate": 0.0001999992028813608, - "loss": 46.0, - "step": 16633 - }, - { - "epoch": 1.2717854617046085, - "grad_norm": 0.003573087742552161, - "learning_rate": 0.0001999992027854525, - "loss": 46.0, - "step": 16634 - }, - { - "epoch": 1.271861918687998, - "grad_norm": 0.013164808973670006, - "learning_rate": 0.00019999920268953846, - "loss": 46.0, - "step": 16635 - }, - { - "epoch": 1.2719383756713878, - "grad_norm": 0.001385536976158619, - "learning_rate": 0.0001999992025936187, - "loss": 46.0, - "step": 16636 - }, - { - "epoch": 1.2720148326547775, - "grad_norm": 0.0006866052863188088, - "learning_rate": 0.00019999920249769311, - "loss": 46.0, - "step": 16637 - }, - { - "epoch": 1.2720912896381673, - "grad_norm": 0.00137321837246418, - "learning_rate": 0.00019999920240176177, - "loss": 46.0, - "step": 16638 - }, - { - "epoch": 1.272167746621557, - "grad_norm": 0.0012887525372207165, - "learning_rate": 0.00019999920230582462, - "loss": 46.0, - "step": 16639 - }, - { - "epoch": 1.2722442036049468, - "grad_norm": 0.0023031088057905436, - "learning_rate": 0.00019999920220988175, - "loss": 46.0, - "step": 16640 - }, - { - "epoch": 1.2723206605883366, - "grad_norm": 0.002139696851372719, - "learning_rate": 0.00019999920211393311, - "loss": 46.0, - "step": 16641 - }, - { - "epoch": 1.2723971175717261, - "grad_norm": 0.005255787633359432, - "learning_rate": 0.00019999920201797867, - "loss": 46.0, - "step": 16642 - }, - { - "epoch": 1.272473574555116, - "grad_norm": 0.00397008890286088, - "learning_rate": 0.0001999992019220185, - "loss": 46.0, - "step": 16643 - }, - { - "epoch": 1.2725500315385057, - "grad_norm": 0.002200365299358964, - "learning_rate": 0.00019999920182605253, - "loss": 46.0, - "step": 16644 - }, - { - "epoch": 1.2726264885218954, - "grad_norm": 0.0005313935107551515, - "learning_rate": 0.0001999992017300808, - "loss": 46.0, - "step": 16645 - }, - { - "epoch": 1.272702945505285, - "grad_norm": 0.006090338341891766, - "learning_rate": 0.0001999992016341033, - "loss": 46.0, - "step": 16646 - }, - { - "epoch": 1.2727794024886747, - "grad_norm": 0.0013891661074012518, - "learning_rate": 0.00019999920153812, - "loss": 46.0, - "step": 16647 - }, - { - "epoch": 1.2728558594720645, - "grad_norm": 0.0023395169991999865, - "learning_rate": 0.00019999920144213098, - "loss": 46.0, - "step": 16648 - }, - { - "epoch": 1.2729323164554542, - "grad_norm": 0.002753504319116473, - "learning_rate": 0.00019999920134613616, - "loss": 46.0, - "step": 16649 - }, - { - "epoch": 1.273008773438844, - "grad_norm": 0.014427597634494305, - "learning_rate": 0.0001999992012501356, - "loss": 46.0, - "step": 16650 - }, - { - "epoch": 1.2730852304222338, - "grad_norm": 0.0012178807519376278, - "learning_rate": 0.00019999920115412926, - "loss": 46.0, - "step": 16651 - }, - { - "epoch": 1.2731616874056235, - "grad_norm": 0.001439434476196766, - "learning_rate": 0.00019999920105811714, - "loss": 46.0, - "step": 16652 - }, - { - "epoch": 1.273238144389013, - "grad_norm": 0.001111507066525519, - "learning_rate": 0.00019999920096209925, - "loss": 46.0, - "step": 16653 - }, - { - "epoch": 1.2733146013724028, - "grad_norm": 0.0007047646213322878, - "learning_rate": 0.0001999992008660756, - "loss": 46.0, - "step": 16654 - }, - { - "epoch": 1.2733910583557926, - "grad_norm": 0.007054263260215521, - "learning_rate": 0.0001999992007700462, - "loss": 46.0, - "step": 16655 - }, - { - "epoch": 1.2734675153391823, - "grad_norm": 0.0009644195670261979, - "learning_rate": 0.00019999920067401099, - "loss": 46.0, - "step": 16656 - }, - { - "epoch": 1.2735439723225719, - "grad_norm": 0.0018699506763368845, - "learning_rate": 0.00019999920057797003, - "loss": 46.0, - "step": 16657 - }, - { - "epoch": 1.2736204293059616, - "grad_norm": 0.001187073066830635, - "learning_rate": 0.0001999992004819233, - "loss": 46.0, - "step": 16658 - }, - { - "epoch": 1.2736968862893514, - "grad_norm": 0.0007307162159122527, - "learning_rate": 0.0001999992003858708, - "loss": 46.0, - "step": 16659 - }, - { - "epoch": 1.2737733432727412, - "grad_norm": 0.0028837842401117086, - "learning_rate": 0.0001999992002898125, - "loss": 46.0, - "step": 16660 - }, - { - "epoch": 1.273849800256131, - "grad_norm": 0.002315081190317869, - "learning_rate": 0.00019999920019374848, - "loss": 46.0, - "step": 16661 - }, - { - "epoch": 1.2739262572395207, - "grad_norm": 0.0014030288439244032, - "learning_rate": 0.00019999920009767868, - "loss": 46.0, - "step": 16662 - }, - { - "epoch": 1.2740027142229104, - "grad_norm": 0.0015715938061475754, - "learning_rate": 0.0001999992000016031, - "loss": 46.0, - "step": 16663 - }, - { - "epoch": 1.2740791712063, - "grad_norm": 0.0003001201548613608, - "learning_rate": 0.00019999919990552174, - "loss": 46.0, - "step": 16664 - }, - { - "epoch": 1.2741556281896897, - "grad_norm": 0.0005279534379951656, - "learning_rate": 0.00019999919980943462, - "loss": 46.0, - "step": 16665 - }, - { - "epoch": 1.2742320851730795, - "grad_norm": 0.0014168646885082126, - "learning_rate": 0.00019999919971334172, - "loss": 46.0, - "step": 16666 - }, - { - "epoch": 1.2743085421564693, - "grad_norm": 0.005152455996721983, - "learning_rate": 0.00019999919961724308, - "loss": 46.0, - "step": 16667 - }, - { - "epoch": 1.2743849991398588, - "grad_norm": 0.0014782946091145277, - "learning_rate": 0.00019999919952113865, - "loss": 46.0, - "step": 16668 - }, - { - "epoch": 1.2744614561232486, - "grad_norm": 0.001206699525937438, - "learning_rate": 0.00019999919942502846, - "loss": 46.0, - "step": 16669 - }, - { - "epoch": 1.2745379131066383, - "grad_norm": 0.0012304575648158789, - "learning_rate": 0.0001999991993289125, - "loss": 46.0, - "step": 16670 - }, - { - "epoch": 1.274614370090028, - "grad_norm": 0.0014089810429140925, - "learning_rate": 0.00019999919923279077, - "loss": 46.0, - "step": 16671 - }, - { - "epoch": 1.2746908270734179, - "grad_norm": 0.005282597150653601, - "learning_rate": 0.00019999919913666326, - "loss": 46.0, - "step": 16672 - }, - { - "epoch": 1.2747672840568076, - "grad_norm": 0.001720866421237588, - "learning_rate": 0.00019999919904052998, - "loss": 46.0, - "step": 16673 - }, - { - "epoch": 1.2748437410401974, - "grad_norm": 0.0008292746497318149, - "learning_rate": 0.00019999919894439093, - "loss": 46.0, - "step": 16674 - }, - { - "epoch": 1.274920198023587, - "grad_norm": 0.0014621547888964415, - "learning_rate": 0.00019999919884824613, - "loss": 46.0, - "step": 16675 - }, - { - "epoch": 1.2749966550069767, - "grad_norm": 0.00047751498641446233, - "learning_rate": 0.00019999919875209553, - "loss": 46.0, - "step": 16676 - }, - { - "epoch": 1.2750731119903664, - "grad_norm": 0.0038491645827889442, - "learning_rate": 0.00019999919865593919, - "loss": 46.0, - "step": 16677 - }, - { - "epoch": 1.2751495689737562, - "grad_norm": 0.0006406709435395896, - "learning_rate": 0.00019999919855977707, - "loss": 46.0, - "step": 16678 - }, - { - "epoch": 1.2752260259571457, - "grad_norm": 0.0006662397645413876, - "learning_rate": 0.0001999991984636092, - "loss": 46.0, - "step": 16679 - }, - { - "epoch": 1.2753024829405355, - "grad_norm": 0.0007661851705051959, - "learning_rate": 0.00019999919836743554, - "loss": 46.0, - "step": 16680 - }, - { - "epoch": 1.2753789399239253, - "grad_norm": 0.0015701422234997153, - "learning_rate": 0.00019999919827125607, - "loss": 46.0, - "step": 16681 - }, - { - "epoch": 1.275455396907315, - "grad_norm": 0.0017563244327902794, - "learning_rate": 0.00019999919817507088, - "loss": 46.0, - "step": 16682 - }, - { - "epoch": 1.2755318538907048, - "grad_norm": 0.000649426830932498, - "learning_rate": 0.00019999919807887993, - "loss": 46.0, - "step": 16683 - }, - { - "epoch": 1.2756083108740945, - "grad_norm": 0.0009337930241599679, - "learning_rate": 0.0001999991979826832, - "loss": 46.0, - "step": 16684 - }, - { - "epoch": 1.2756847678574843, - "grad_norm": 0.0007409833488054574, - "learning_rate": 0.00019999919788648066, - "loss": 46.0, - "step": 16685 - }, - { - "epoch": 1.2757612248408738, - "grad_norm": 0.0012036182451993227, - "learning_rate": 0.0001999991977902724, - "loss": 46.0, - "step": 16686 - }, - { - "epoch": 1.2758376818242636, - "grad_norm": 0.002025241730734706, - "learning_rate": 0.00019999919769405838, - "loss": 46.0, - "step": 16687 - }, - { - "epoch": 1.2759141388076534, - "grad_norm": 0.0032368816900998354, - "learning_rate": 0.00019999919759783853, - "loss": 46.0, - "step": 16688 - }, - { - "epoch": 1.2759905957910431, - "grad_norm": 0.0009148273384198546, - "learning_rate": 0.00019999919750161299, - "loss": 46.0, - "step": 16689 - }, - { - "epoch": 1.2760670527744327, - "grad_norm": 0.0033446482848376036, - "learning_rate": 0.0001999991974053816, - "loss": 46.0, - "step": 16690 - }, - { - "epoch": 1.2761435097578224, - "grad_norm": 0.002166281221434474, - "learning_rate": 0.0001999991973091445, - "loss": 46.0, - "step": 16691 - }, - { - "epoch": 1.2762199667412122, - "grad_norm": 0.002013568300753832, - "learning_rate": 0.0001999991972129016, - "loss": 46.0, - "step": 16692 - }, - { - "epoch": 1.276296423724602, - "grad_norm": 0.001305202255025506, - "learning_rate": 0.00019999919711665294, - "loss": 46.0, - "step": 16693 - }, - { - "epoch": 1.2763728807079917, - "grad_norm": 0.0012782372068613768, - "learning_rate": 0.0001999991970203985, - "loss": 46.0, - "step": 16694 - }, - { - "epoch": 1.2764493376913815, - "grad_norm": 0.0006937553989700973, - "learning_rate": 0.0001999991969241383, - "loss": 46.0, - "step": 16695 - }, - { - "epoch": 1.2765257946747712, - "grad_norm": 0.001616218825802207, - "learning_rate": 0.00019999919682787233, - "loss": 46.0, - "step": 16696 - }, - { - "epoch": 1.2766022516581608, - "grad_norm": 0.002052373019978404, - "learning_rate": 0.00019999919673160062, - "loss": 46.0, - "step": 16697 - }, - { - "epoch": 1.2766787086415505, - "grad_norm": 0.0010528018465265632, - "learning_rate": 0.0001999991966353231, - "loss": 46.0, - "step": 16698 - }, - { - "epoch": 1.2767551656249403, - "grad_norm": 0.0025244427379220724, - "learning_rate": 0.0001999991965390398, - "loss": 46.0, - "step": 16699 - }, - { - "epoch": 1.27683162260833, - "grad_norm": 0.0011942755663767457, - "learning_rate": 0.00019999919644275076, - "loss": 46.0, - "step": 16700 - }, - { - "epoch": 1.2769080795917196, - "grad_norm": 0.00030391293694265187, - "learning_rate": 0.00019999919634645593, - "loss": 46.0, - "step": 16701 - }, - { - "epoch": 1.2769845365751094, - "grad_norm": 0.00041188261820934713, - "learning_rate": 0.00019999919625015536, - "loss": 46.0, - "step": 16702 - }, - { - "epoch": 1.2770609935584991, - "grad_norm": 0.0017732786945998669, - "learning_rate": 0.000199999196153849, - "loss": 46.0, - "step": 16703 - }, - { - "epoch": 1.2771374505418889, - "grad_norm": 0.0050637805834412575, - "learning_rate": 0.00019999919605753687, - "loss": 46.0, - "step": 16704 - }, - { - "epoch": 1.2772139075252786, - "grad_norm": 0.006125276442617178, - "learning_rate": 0.00019999919596121898, - "loss": 46.0, - "step": 16705 - }, - { - "epoch": 1.2772903645086684, - "grad_norm": 0.0006493722321465611, - "learning_rate": 0.00019999919586489534, - "loss": 46.0, - "step": 16706 - }, - { - "epoch": 1.2773668214920582, - "grad_norm": 0.0010154986521229148, - "learning_rate": 0.0001999991957685659, - "loss": 46.0, - "step": 16707 - }, - { - "epoch": 1.2774432784754477, - "grad_norm": 0.004606302827596664, - "learning_rate": 0.0001999991956722307, - "loss": 46.0, - "step": 16708 - }, - { - "epoch": 1.2775197354588375, - "grad_norm": 0.00040772423380985856, - "learning_rate": 0.0001999991955758897, - "loss": 46.0, - "step": 16709 - }, - { - "epoch": 1.2775961924422272, - "grad_norm": 0.0010258068796247244, - "learning_rate": 0.00019999919547954295, - "loss": 46.0, - "step": 16710 - }, - { - "epoch": 1.277672649425617, - "grad_norm": 0.0014730957336723804, - "learning_rate": 0.00019999919538319047, - "loss": 46.0, - "step": 16711 - }, - { - "epoch": 1.2777491064090065, - "grad_norm": 0.0018360507674515247, - "learning_rate": 0.00019999919528683216, - "loss": 46.0, - "step": 16712 - }, - { - "epoch": 1.2778255633923963, - "grad_norm": 0.0031213832553476095, - "learning_rate": 0.00019999919519046814, - "loss": 46.0, - "step": 16713 - }, - { - "epoch": 1.277902020375786, - "grad_norm": 0.0005239760503172874, - "learning_rate": 0.00019999919509409831, - "loss": 46.0, - "step": 16714 - }, - { - "epoch": 1.2779784773591758, - "grad_norm": 0.002066908171400428, - "learning_rate": 0.00019999919499772272, - "loss": 46.0, - "step": 16715 - }, - { - "epoch": 1.2780549343425656, - "grad_norm": 0.007595262955874205, - "learning_rate": 0.00019999919490134137, - "loss": 46.0, - "step": 16716 - }, - { - "epoch": 1.2781313913259553, - "grad_norm": 0.0014205316547304392, - "learning_rate": 0.00019999919480495423, - "loss": 46.0, - "step": 16717 - }, - { - "epoch": 1.278207848309345, - "grad_norm": 0.00043894999544136226, - "learning_rate": 0.00019999919470856133, - "loss": 46.0, - "step": 16718 - }, - { - "epoch": 1.2782843052927346, - "grad_norm": 0.007440910208970308, - "learning_rate": 0.00019999919461216267, - "loss": 46.0, - "step": 16719 - }, - { - "epoch": 1.2783607622761244, - "grad_norm": 0.0008081428823061287, - "learning_rate": 0.00019999919451575823, - "loss": 46.0, - "step": 16720 - }, - { - "epoch": 1.2784372192595141, - "grad_norm": 0.00144693732727319, - "learning_rate": 0.00019999919441934805, - "loss": 46.0, - "step": 16721 - }, - { - "epoch": 1.278513676242904, - "grad_norm": 0.001247951528057456, - "learning_rate": 0.00019999919432293206, - "loss": 46.0, - "step": 16722 - }, - { - "epoch": 1.2785901332262934, - "grad_norm": 0.0061896494589746, - "learning_rate": 0.00019999919422651033, - "loss": 46.0, - "step": 16723 - }, - { - "epoch": 1.2786665902096832, - "grad_norm": 0.0013113584136590362, - "learning_rate": 0.00019999919413008283, - "loss": 46.0, - "step": 16724 - }, - { - "epoch": 1.278743047193073, - "grad_norm": 0.000813615566585213, - "learning_rate": 0.0001999991940336495, - "loss": 46.0, - "step": 16725 - }, - { - "epoch": 1.2788195041764627, - "grad_norm": 0.0031911423429846764, - "learning_rate": 0.00019999919393721047, - "loss": 46.0, - "step": 16726 - }, - { - "epoch": 1.2788959611598525, - "grad_norm": 0.0012606719974428415, - "learning_rate": 0.00019999919384076567, - "loss": 46.0, - "step": 16727 - }, - { - "epoch": 1.2789724181432423, - "grad_norm": 0.0007722353911958635, - "learning_rate": 0.00019999919374431507, - "loss": 46.0, - "step": 16728 - }, - { - "epoch": 1.279048875126632, - "grad_norm": 0.0006220923969522119, - "learning_rate": 0.00019999919364785867, - "loss": 46.0, - "step": 16729 - }, - { - "epoch": 1.2791253321100216, - "grad_norm": 0.0006802931311540306, - "learning_rate": 0.00019999919355139656, - "loss": 46.0, - "step": 16730 - }, - { - "epoch": 1.2792017890934113, - "grad_norm": 0.001854546251706779, - "learning_rate": 0.00019999919345492867, - "loss": 46.0, - "step": 16731 - }, - { - "epoch": 1.279278246076801, - "grad_norm": 0.00047614064533263445, - "learning_rate": 0.00019999919335845497, - "loss": 46.0, - "step": 16732 - }, - { - "epoch": 1.2793547030601908, - "grad_norm": 0.0013608257286250591, - "learning_rate": 0.00019999919326197556, - "loss": 46.0, - "step": 16733 - }, - { - "epoch": 1.2794311600435804, - "grad_norm": 0.0005788191920146346, - "learning_rate": 0.00019999919316549035, - "loss": 46.0, - "step": 16734 - }, - { - "epoch": 1.2795076170269701, - "grad_norm": 0.0009505003690719604, - "learning_rate": 0.00019999919306899937, - "loss": 46.0, - "step": 16735 - }, - { - "epoch": 1.27958407401036, - "grad_norm": 0.0007994129555299878, - "learning_rate": 0.00019999919297250264, - "loss": 46.0, - "step": 16736 - }, - { - "epoch": 1.2796605309937497, - "grad_norm": 0.0009163281647488475, - "learning_rate": 0.0001999991928760001, - "loss": 46.0, - "step": 16737 - }, - { - "epoch": 1.2797369879771394, - "grad_norm": 0.0012442166917026043, - "learning_rate": 0.00019999919277949183, - "loss": 46.0, - "step": 16738 - }, - { - "epoch": 1.2798134449605292, - "grad_norm": 0.0014432481257244945, - "learning_rate": 0.00019999919268297778, - "loss": 46.0, - "step": 16739 - }, - { - "epoch": 1.279889901943919, - "grad_norm": 0.0018776024226099253, - "learning_rate": 0.00019999919258645795, - "loss": 46.0, - "step": 16740 - }, - { - "epoch": 1.2799663589273085, - "grad_norm": 0.004317415878176689, - "learning_rate": 0.00019999919248993235, - "loss": 46.0, - "step": 16741 - }, - { - "epoch": 1.2800428159106982, - "grad_norm": 0.003129079006612301, - "learning_rate": 0.00019999919239340098, - "loss": 46.0, - "step": 16742 - }, - { - "epoch": 1.280119272894088, - "grad_norm": 0.003770609153434634, - "learning_rate": 0.00019999919229686386, - "loss": 46.0, - "step": 16743 - }, - { - "epoch": 1.2801957298774778, - "grad_norm": 0.007101863622665405, - "learning_rate": 0.00019999919220032095, - "loss": 46.0, - "step": 16744 - }, - { - "epoch": 1.2802721868608673, - "grad_norm": 0.0007034234004095197, - "learning_rate": 0.00019999919210377228, - "loss": 46.0, - "step": 16745 - }, - { - "epoch": 1.280348643844257, - "grad_norm": 0.004554283805191517, - "learning_rate": 0.00019999919200721787, - "loss": 46.0, - "step": 16746 - }, - { - "epoch": 1.2804251008276468, - "grad_norm": 0.0004629794566426426, - "learning_rate": 0.00019999919191065763, - "loss": 46.0, - "step": 16747 - }, - { - "epoch": 1.2805015578110366, - "grad_norm": 0.002565753413364291, - "learning_rate": 0.00019999919181409168, - "loss": 46.0, - "step": 16748 - }, - { - "epoch": 1.2805780147944263, - "grad_norm": 0.0008360464707948267, - "learning_rate": 0.00019999919171751992, - "loss": 46.0, - "step": 16749 - }, - { - "epoch": 1.280654471777816, - "grad_norm": 0.00041501910891383886, - "learning_rate": 0.00019999919162094241, - "loss": 46.0, - "step": 16750 - }, - { - "epoch": 1.2807309287612056, - "grad_norm": 0.006867795251309872, - "learning_rate": 0.0001999991915243591, - "loss": 46.0, - "step": 16751 - }, - { - "epoch": 1.2808073857445954, - "grad_norm": 0.0028635147027671337, - "learning_rate": 0.00019999919142777006, - "loss": 46.0, - "step": 16752 - }, - { - "epoch": 1.2808838427279852, - "grad_norm": 0.0019897243473678827, - "learning_rate": 0.0001999991913311752, - "loss": 46.0, - "step": 16753 - }, - { - "epoch": 1.280960299711375, - "grad_norm": 0.001525622676126659, - "learning_rate": 0.0001999991912345746, - "loss": 46.0, - "step": 16754 - }, - { - "epoch": 1.2810367566947647, - "grad_norm": 0.005283045582473278, - "learning_rate": 0.00019999919113796827, - "loss": 46.0, - "step": 16755 - }, - { - "epoch": 1.2811132136781542, - "grad_norm": 0.001264419755898416, - "learning_rate": 0.00019999919104135615, - "loss": 46.0, - "step": 16756 - }, - { - "epoch": 1.281189670661544, - "grad_norm": 0.0012440038844943047, - "learning_rate": 0.00019999919094473826, - "loss": 46.0, - "step": 16757 - }, - { - "epoch": 1.2812661276449337, - "grad_norm": 0.0007672092178836465, - "learning_rate": 0.00019999919084811457, - "loss": 46.0, - "step": 16758 - }, - { - "epoch": 1.2813425846283235, - "grad_norm": 0.00561955152079463, - "learning_rate": 0.00019999919075148513, - "loss": 46.0, - "step": 16759 - }, - { - "epoch": 1.2814190416117133, - "grad_norm": 0.0029164638835936785, - "learning_rate": 0.00019999919065484992, - "loss": 46.0, - "step": 16760 - }, - { - "epoch": 1.281495498595103, - "grad_norm": 0.0009613786824047565, - "learning_rate": 0.00019999919055820894, - "loss": 46.0, - "step": 16761 - }, - { - "epoch": 1.2815719555784926, - "grad_norm": 0.0004206848971080035, - "learning_rate": 0.00019999919046156218, - "loss": 46.0, - "step": 16762 - }, - { - "epoch": 1.2816484125618823, - "grad_norm": 0.0011929139727726579, - "learning_rate": 0.00019999919036490965, - "loss": 46.0, - "step": 16763 - }, - { - "epoch": 1.281724869545272, - "grad_norm": 0.00257196556776762, - "learning_rate": 0.00019999919026825137, - "loss": 46.0, - "step": 16764 - }, - { - "epoch": 1.2818013265286619, - "grad_norm": 0.0005610408261418343, - "learning_rate": 0.0001999991901715873, - "loss": 46.0, - "step": 16765 - }, - { - "epoch": 1.2818777835120514, - "grad_norm": 0.0007481915527023375, - "learning_rate": 0.00019999919007491747, - "loss": 46.0, - "step": 16766 - }, - { - "epoch": 1.2819542404954412, - "grad_norm": 0.0008595797698944807, - "learning_rate": 0.00019999918997824187, - "loss": 46.0, - "step": 16767 - }, - { - "epoch": 1.282030697478831, - "grad_norm": 0.0005751067074015737, - "learning_rate": 0.00019999918988156053, - "loss": 46.0, - "step": 16768 - }, - { - "epoch": 1.2821071544622207, - "grad_norm": 0.0008936603553593159, - "learning_rate": 0.00019999918978487338, - "loss": 46.0, - "step": 16769 - }, - { - "epoch": 1.2821836114456104, - "grad_norm": 0.006716905161738396, - "learning_rate": 0.00019999918968818047, - "loss": 46.0, - "step": 16770 - }, - { - "epoch": 1.2822600684290002, - "grad_norm": 0.0010301817674189806, - "learning_rate": 0.0001999991895914818, - "loss": 46.0, - "step": 16771 - }, - { - "epoch": 1.28233652541239, - "grad_norm": 0.0013083780650049448, - "learning_rate": 0.00019999918949477734, - "loss": 46.0, - "step": 16772 - }, - { - "epoch": 1.2824129823957795, - "grad_norm": 0.0007963533280417323, - "learning_rate": 0.00019999918939806716, - "loss": 46.0, - "step": 16773 - }, - { - "epoch": 1.2824894393791693, - "grad_norm": 0.0004952663439325988, - "learning_rate": 0.00019999918930135115, - "loss": 46.0, - "step": 16774 - }, - { - "epoch": 1.282565896362559, - "grad_norm": 0.0019545843824744225, - "learning_rate": 0.0001999991892046294, - "loss": 46.0, - "step": 16775 - }, - { - "epoch": 1.2826423533459488, - "grad_norm": 0.0017712283879518509, - "learning_rate": 0.00019999918910790188, - "loss": 46.0, - "step": 16776 - }, - { - "epoch": 1.2827188103293383, - "grad_norm": 0.0007526252884417772, - "learning_rate": 0.00019999918901116858, - "loss": 46.0, - "step": 16777 - }, - { - "epoch": 1.282795267312728, - "grad_norm": 0.0017684220802038908, - "learning_rate": 0.00019999918891442953, - "loss": 46.0, - "step": 16778 - }, - { - "epoch": 1.2828717242961178, - "grad_norm": 0.0010477288160473108, - "learning_rate": 0.00019999918881768468, - "loss": 46.0, - "step": 16779 - }, - { - "epoch": 1.2829481812795076, - "grad_norm": 0.008022102527320385, - "learning_rate": 0.00019999918872093408, - "loss": 46.0, - "step": 16780 - }, - { - "epoch": 1.2830246382628974, - "grad_norm": 0.000900499930139631, - "learning_rate": 0.0001999991886241777, - "loss": 46.0, - "step": 16781 - }, - { - "epoch": 1.2831010952462871, - "grad_norm": 0.002912320662289858, - "learning_rate": 0.0001999991885274156, - "loss": 46.0, - "step": 16782 - }, - { - "epoch": 1.2831775522296769, - "grad_norm": 0.000426529353717342, - "learning_rate": 0.00019999918843064768, - "loss": 46.0, - "step": 16783 - }, - { - "epoch": 1.2832540092130664, - "grad_norm": 0.0009704563417471945, - "learning_rate": 0.00019999918833387402, - "loss": 46.0, - "step": 16784 - }, - { - "epoch": 1.2833304661964562, - "grad_norm": 0.001059980597347021, - "learning_rate": 0.00019999918823709453, - "loss": 46.0, - "step": 16785 - }, - { - "epoch": 1.283406923179846, - "grad_norm": 0.0007888384279794991, - "learning_rate": 0.00019999918814030934, - "loss": 46.0, - "step": 16786 - }, - { - "epoch": 1.2834833801632357, - "grad_norm": 0.00038376240991055965, - "learning_rate": 0.00019999918804351836, - "loss": 46.0, - "step": 16787 - }, - { - "epoch": 1.2835598371466252, - "grad_norm": 0.0007514795288443565, - "learning_rate": 0.0001999991879467216, - "loss": 46.0, - "step": 16788 - }, - { - "epoch": 1.283636294130015, - "grad_norm": 0.002230469137430191, - "learning_rate": 0.00019999918784991907, - "loss": 46.0, - "step": 16789 - }, - { - "epoch": 1.2837127511134048, - "grad_norm": 0.0016391673125326633, - "learning_rate": 0.00019999918775311077, - "loss": 46.0, - "step": 16790 - }, - { - "epoch": 1.2837892080967945, - "grad_norm": 0.002981659723445773, - "learning_rate": 0.00019999918765629672, - "loss": 46.0, - "step": 16791 - }, - { - "epoch": 1.2838656650801843, - "grad_norm": 0.0009505358757451177, - "learning_rate": 0.0001999991875594769, - "loss": 46.0, - "step": 16792 - }, - { - "epoch": 1.283942122063574, - "grad_norm": 0.001292443834245205, - "learning_rate": 0.00019999918746265127, - "loss": 46.0, - "step": 16793 - }, - { - "epoch": 1.2840185790469638, - "grad_norm": 0.0011380905052646995, - "learning_rate": 0.0001999991873658199, - "loss": 46.0, - "step": 16794 - }, - { - "epoch": 1.2840950360303534, - "grad_norm": 0.0011754799634218216, - "learning_rate": 0.00019999918726898276, - "loss": 46.0, - "step": 16795 - }, - { - "epoch": 1.2841714930137431, - "grad_norm": 0.006151310168206692, - "learning_rate": 0.00019999918717213987, - "loss": 46.0, - "step": 16796 - }, - { - "epoch": 1.2842479499971329, - "grad_norm": 0.0008278985624201596, - "learning_rate": 0.00019999918707529118, - "loss": 46.0, - "step": 16797 - }, - { - "epoch": 1.2843244069805226, - "grad_norm": 0.007651502266526222, - "learning_rate": 0.00019999918697843674, - "loss": 46.0, - "step": 16798 - }, - { - "epoch": 1.2844008639639122, - "grad_norm": 0.000510715355630964, - "learning_rate": 0.0001999991868815765, - "loss": 46.0, - "step": 16799 - }, - { - "epoch": 1.284477320947302, - "grad_norm": 0.0029057872015982866, - "learning_rate": 0.00019999918678471052, - "loss": 46.0, - "step": 16800 - }, - { - "epoch": 1.2845537779306917, - "grad_norm": 0.005568479187786579, - "learning_rate": 0.00019999918668783874, - "loss": 46.0, - "step": 16801 - }, - { - "epoch": 1.2846302349140815, - "grad_norm": 0.013746390119194984, - "learning_rate": 0.0001999991865909612, - "loss": 46.0, - "step": 16802 - }, - { - "epoch": 1.2847066918974712, - "grad_norm": 0.0011699412716552615, - "learning_rate": 0.00019999918649407793, - "loss": 46.0, - "step": 16803 - }, - { - "epoch": 1.284783148880861, - "grad_norm": 0.0008903414709493518, - "learning_rate": 0.00019999918639718888, - "loss": 46.0, - "step": 16804 - }, - { - "epoch": 1.2848596058642507, - "grad_norm": 0.002062085783109069, - "learning_rate": 0.00019999918630029403, - "loss": 46.0, - "step": 16805 - }, - { - "epoch": 1.2849360628476403, - "grad_norm": 0.0009825292509049177, - "learning_rate": 0.0001999991862033934, - "loss": 46.0, - "step": 16806 - }, - { - "epoch": 1.28501251983103, - "grad_norm": 0.017173558473587036, - "learning_rate": 0.00019999918610648707, - "loss": 46.0, - "step": 16807 - }, - { - "epoch": 1.2850889768144198, - "grad_norm": 0.005197874736040831, - "learning_rate": 0.0001999991860095749, - "loss": 46.0, - "step": 16808 - }, - { - "epoch": 1.2851654337978096, - "grad_norm": 0.0014744378859177232, - "learning_rate": 0.000199999185912657, - "loss": 46.0, - "step": 16809 - }, - { - "epoch": 1.285241890781199, - "grad_norm": 0.0020845697727054358, - "learning_rate": 0.00019999918581573335, - "loss": 46.0, - "step": 16810 - }, - { - "epoch": 1.2853183477645889, - "grad_norm": 0.002136238617822528, - "learning_rate": 0.00019999918571880388, - "loss": 46.0, - "step": 16811 - }, - { - "epoch": 1.2853948047479786, - "grad_norm": 0.0004366362700238824, - "learning_rate": 0.00019999918562186865, - "loss": 46.0, - "step": 16812 - }, - { - "epoch": 1.2854712617313684, - "grad_norm": 0.0007503421511501074, - "learning_rate": 0.00019999918552492764, - "loss": 46.0, - "step": 16813 - }, - { - "epoch": 1.2855477187147581, - "grad_norm": 0.0012404288863763213, - "learning_rate": 0.00019999918542798088, - "loss": 46.0, - "step": 16814 - }, - { - "epoch": 1.285624175698148, - "grad_norm": 0.0015721333911642432, - "learning_rate": 0.00019999918533102838, - "loss": 46.0, - "step": 16815 - }, - { - "epoch": 1.2857006326815377, - "grad_norm": 0.0011280224425718188, - "learning_rate": 0.00019999918523407007, - "loss": 46.0, - "step": 16816 - }, - { - "epoch": 1.2857770896649272, - "grad_norm": 0.0013504691887646914, - "learning_rate": 0.000199999185137106, - "loss": 46.0, - "step": 16817 - }, - { - "epoch": 1.285853546648317, - "grad_norm": 0.001519326469860971, - "learning_rate": 0.00019999918504013615, - "loss": 46.0, - "step": 16818 - }, - { - "epoch": 1.2859300036317067, - "grad_norm": 0.011493152938783169, - "learning_rate": 0.00019999918494316058, - "loss": 46.0, - "step": 16819 - }, - { - "epoch": 1.2860064606150965, - "grad_norm": 0.0015346946893259883, - "learning_rate": 0.00019999918484617916, - "loss": 46.0, - "step": 16820 - }, - { - "epoch": 1.286082917598486, - "grad_norm": 0.005602580960839987, - "learning_rate": 0.00019999918474919204, - "loss": 46.0, - "step": 16821 - }, - { - "epoch": 1.2861593745818758, - "grad_norm": 0.0030354626942425966, - "learning_rate": 0.00019999918465219912, - "loss": 46.0, - "step": 16822 - }, - { - "epoch": 1.2862358315652656, - "grad_norm": 0.0030874423682689667, - "learning_rate": 0.00019999918455520043, - "loss": 46.0, - "step": 16823 - }, - { - "epoch": 1.2863122885486553, - "grad_norm": 0.00119735614862293, - "learning_rate": 0.000199999184458196, - "loss": 46.0, - "step": 16824 - }, - { - "epoch": 1.286388745532045, - "grad_norm": 0.0014501403784379363, - "learning_rate": 0.00019999918436118574, - "loss": 46.0, - "step": 16825 - }, - { - "epoch": 1.2864652025154348, - "grad_norm": 0.0013016184093430638, - "learning_rate": 0.00019999918426416978, - "loss": 46.0, - "step": 16826 - }, - { - "epoch": 1.2865416594988246, - "grad_norm": 0.00046273902989923954, - "learning_rate": 0.000199999184167148, - "loss": 46.0, - "step": 16827 - }, - { - "epoch": 1.2866181164822141, - "grad_norm": 0.0003741539840120822, - "learning_rate": 0.0001999991840701205, - "loss": 46.0, - "step": 16828 - }, - { - "epoch": 1.286694573465604, - "grad_norm": 0.0036199484020471573, - "learning_rate": 0.00019999918397308717, - "loss": 46.0, - "step": 16829 - }, - { - "epoch": 1.2867710304489937, - "grad_norm": 0.0005321458447724581, - "learning_rate": 0.0001999991838760481, - "loss": 46.0, - "step": 16830 - }, - { - "epoch": 1.2868474874323834, - "grad_norm": 0.001334922038950026, - "learning_rate": 0.00019999918377900327, - "loss": 46.0, - "step": 16831 - }, - { - "epoch": 1.286923944415773, - "grad_norm": 0.0008652047836221755, - "learning_rate": 0.00019999918368195264, - "loss": 46.0, - "step": 16832 - }, - { - "epoch": 1.2870004013991627, - "grad_norm": 0.0028909649699926376, - "learning_rate": 0.00019999918358489628, - "loss": 46.0, - "step": 16833 - }, - { - "epoch": 1.2870768583825525, - "grad_norm": 0.0012722325045615435, - "learning_rate": 0.00019999918348783413, - "loss": 46.0, - "step": 16834 - }, - { - "epoch": 1.2871533153659422, - "grad_norm": 0.0003154508594889194, - "learning_rate": 0.00019999918339076622, - "loss": 46.0, - "step": 16835 - }, - { - "epoch": 1.287229772349332, - "grad_norm": 0.0012075634440407157, - "learning_rate": 0.00019999918329369253, - "loss": 46.0, - "step": 16836 - }, - { - "epoch": 1.2873062293327218, - "grad_norm": 0.0005677743465639651, - "learning_rate": 0.00019999918319661307, - "loss": 46.0, - "step": 16837 - }, - { - "epoch": 1.2873826863161115, - "grad_norm": 0.0008385126129724085, - "learning_rate": 0.00019999918309952786, - "loss": 46.0, - "step": 16838 - }, - { - "epoch": 1.287459143299501, - "grad_norm": 0.0005314136506058276, - "learning_rate": 0.00019999918300243685, - "loss": 46.0, - "step": 16839 - }, - { - "epoch": 1.2875356002828908, - "grad_norm": 0.0015994678251445293, - "learning_rate": 0.00019999918290534006, - "loss": 46.0, - "step": 16840 - }, - { - "epoch": 1.2876120572662806, - "grad_norm": 0.0006531765102408826, - "learning_rate": 0.00019999918280823756, - "loss": 46.0, - "step": 16841 - }, - { - "epoch": 1.2876885142496703, - "grad_norm": 0.003536122152581811, - "learning_rate": 0.00019999918271112926, - "loss": 46.0, - "step": 16842 - }, - { - "epoch": 1.2877649712330599, - "grad_norm": 0.001129776705056429, - "learning_rate": 0.00019999918261401516, - "loss": 46.0, - "step": 16843 - }, - { - "epoch": 1.2878414282164496, - "grad_norm": 0.0003413664235267788, - "learning_rate": 0.00019999918251689534, - "loss": 46.0, - "step": 16844 - }, - { - "epoch": 1.2879178851998394, - "grad_norm": 0.0012691678712144494, - "learning_rate": 0.0001999991824197697, - "loss": 46.0, - "step": 16845 - }, - { - "epoch": 1.2879943421832292, - "grad_norm": 0.0020315167494118214, - "learning_rate": 0.00019999918232263832, - "loss": 46.0, - "step": 16846 - }, - { - "epoch": 1.288070799166619, - "grad_norm": 0.005115443374961615, - "learning_rate": 0.00019999918222550118, - "loss": 46.0, - "step": 16847 - }, - { - "epoch": 1.2881472561500087, - "grad_norm": 0.0011568405898287892, - "learning_rate": 0.00019999918212835826, - "loss": 46.0, - "step": 16848 - }, - { - "epoch": 1.2882237131333985, - "grad_norm": 0.02212458848953247, - "learning_rate": 0.00019999918203120957, - "loss": 46.0, - "step": 16849 - }, - { - "epoch": 1.288300170116788, - "grad_norm": 0.0038435286842286587, - "learning_rate": 0.0001999991819340551, - "loss": 46.0, - "step": 16850 - }, - { - "epoch": 1.2883766271001778, - "grad_norm": 0.0004945640102960169, - "learning_rate": 0.00019999918183689487, - "loss": 46.0, - "step": 16851 - }, - { - "epoch": 1.2884530840835675, - "grad_norm": 0.001998421037569642, - "learning_rate": 0.0001999991817397289, - "loss": 46.0, - "step": 16852 - }, - { - "epoch": 1.2885295410669573, - "grad_norm": 0.0010479297488927841, - "learning_rate": 0.00019999918164255714, - "loss": 46.0, - "step": 16853 - }, - { - "epoch": 1.2886059980503468, - "grad_norm": 0.0008820774964988232, - "learning_rate": 0.00019999918154537958, - "loss": 46.0, - "step": 16854 - }, - { - "epoch": 1.2886824550337366, - "grad_norm": 0.0012558652088046074, - "learning_rate": 0.0001999991814481963, - "loss": 46.0, - "step": 16855 - }, - { - "epoch": 1.2887589120171263, - "grad_norm": 0.0008983712759800255, - "learning_rate": 0.0001999991813510072, - "loss": 46.0, - "step": 16856 - }, - { - "epoch": 1.288835369000516, - "grad_norm": 0.0009755120845511556, - "learning_rate": 0.00019999918125381235, - "loss": 46.0, - "step": 16857 - }, - { - "epoch": 1.2889118259839059, - "grad_norm": 0.002427182625979185, - "learning_rate": 0.00019999918115661176, - "loss": 46.0, - "step": 16858 - }, - { - "epoch": 1.2889882829672956, - "grad_norm": 0.001303202356211841, - "learning_rate": 0.0001999991810594054, - "loss": 46.0, - "step": 16859 - }, - { - "epoch": 1.2890647399506854, - "grad_norm": 0.0004270741774234921, - "learning_rate": 0.00019999918096219322, - "loss": 46.0, - "step": 16860 - }, - { - "epoch": 1.289141196934075, - "grad_norm": 0.0030658901669085026, - "learning_rate": 0.0001999991808649753, - "loss": 46.0, - "step": 16861 - }, - { - "epoch": 1.2892176539174647, - "grad_norm": 0.004079016856849194, - "learning_rate": 0.00019999918076775162, - "loss": 46.0, - "step": 16862 - }, - { - "epoch": 1.2892941109008544, - "grad_norm": 0.0013592157047241926, - "learning_rate": 0.00019999918067052216, - "loss": 46.0, - "step": 16863 - }, - { - "epoch": 1.2893705678842442, - "grad_norm": 0.004121098667383194, - "learning_rate": 0.0001999991805732869, - "loss": 46.0, - "step": 16864 - }, - { - "epoch": 1.2894470248676337, - "grad_norm": 0.005282412748783827, - "learning_rate": 0.0001999991804760459, - "loss": 46.0, - "step": 16865 - }, - { - "epoch": 1.2895234818510235, - "grad_norm": 0.0012981975451111794, - "learning_rate": 0.00019999918037879916, - "loss": 46.0, - "step": 16866 - }, - { - "epoch": 1.2895999388344133, - "grad_norm": 0.0004448969557415694, - "learning_rate": 0.0001999991802815466, - "loss": 46.0, - "step": 16867 - }, - { - "epoch": 1.289676395817803, - "grad_norm": 0.001530495355837047, - "learning_rate": 0.0001999991801842883, - "loss": 46.0, - "step": 16868 - }, - { - "epoch": 1.2897528528011928, - "grad_norm": 0.0016936227912083268, - "learning_rate": 0.00019999918008702423, - "loss": 46.0, - "step": 16869 - }, - { - "epoch": 1.2898293097845825, - "grad_norm": 0.000989087508060038, - "learning_rate": 0.00019999917998975438, - "loss": 46.0, - "step": 16870 - }, - { - "epoch": 1.2899057667679723, - "grad_norm": 0.0010438649915158749, - "learning_rate": 0.00019999917989247876, - "loss": 46.0, - "step": 16871 - }, - { - "epoch": 1.2899822237513618, - "grad_norm": 0.0006205153185874224, - "learning_rate": 0.0001999991797951974, - "loss": 46.0, - "step": 16872 - }, - { - "epoch": 1.2900586807347516, - "grad_norm": 0.0044082277454435825, - "learning_rate": 0.00019999917969791022, - "loss": 46.0, - "step": 16873 - }, - { - "epoch": 1.2901351377181414, - "grad_norm": 0.0012309845769777894, - "learning_rate": 0.0001999991796006173, - "loss": 46.0, - "step": 16874 - }, - { - "epoch": 1.2902115947015311, - "grad_norm": 0.0016119048232212663, - "learning_rate": 0.00019999917950331862, - "loss": 46.0, - "step": 16875 - }, - { - "epoch": 1.2902880516849207, - "grad_norm": 0.0013921381905674934, - "learning_rate": 0.00019999917940601413, - "loss": 46.0, - "step": 16876 - }, - { - "epoch": 1.2903645086683104, - "grad_norm": 0.0010991496965289116, - "learning_rate": 0.0001999991793087039, - "loss": 46.0, - "step": 16877 - }, - { - "epoch": 1.2904409656517002, - "grad_norm": 0.0008678184822201729, - "learning_rate": 0.0001999991792113879, - "loss": 46.0, - "step": 16878 - }, - { - "epoch": 1.29051742263509, - "grad_norm": 0.0007273616502061486, - "learning_rate": 0.0001999991791140661, - "loss": 46.0, - "step": 16879 - }, - { - "epoch": 1.2905938796184797, - "grad_norm": 0.002260018140077591, - "learning_rate": 0.0001999991790167386, - "loss": 46.0, - "step": 16880 - }, - { - "epoch": 1.2906703366018695, - "grad_norm": 0.0005118157132528722, - "learning_rate": 0.0001999991789194053, - "loss": 46.0, - "step": 16881 - }, - { - "epoch": 1.290746793585259, - "grad_norm": 0.0017869130242615938, - "learning_rate": 0.0001999991788220662, - "loss": 46.0, - "step": 16882 - }, - { - "epoch": 1.2908232505686488, - "grad_norm": 0.0008558796253055334, - "learning_rate": 0.00019999917872472135, - "loss": 46.0, - "step": 16883 - }, - { - "epoch": 1.2908997075520385, - "grad_norm": 0.0007911978755146265, - "learning_rate": 0.00019999917862737073, - "loss": 46.0, - "step": 16884 - }, - { - "epoch": 1.2909761645354283, - "grad_norm": 0.002293990459293127, - "learning_rate": 0.00019999917853001433, - "loss": 46.0, - "step": 16885 - }, - { - "epoch": 1.291052621518818, - "grad_norm": 0.003016027621924877, - "learning_rate": 0.0001999991784326522, - "loss": 46.0, - "step": 16886 - }, - { - "epoch": 1.2911290785022076, - "grad_norm": 0.0010313489474356174, - "learning_rate": 0.00019999917833528428, - "loss": 46.0, - "step": 16887 - }, - { - "epoch": 1.2912055354855974, - "grad_norm": 0.001564008998684585, - "learning_rate": 0.00019999917823791056, - "loss": 46.0, - "step": 16888 - }, - { - "epoch": 1.2912819924689871, - "grad_norm": 0.001408661832101643, - "learning_rate": 0.00019999917814053107, - "loss": 46.0, - "step": 16889 - }, - { - "epoch": 1.2913584494523769, - "grad_norm": 0.0010653232457116246, - "learning_rate": 0.00019999917804314587, - "loss": 46.0, - "step": 16890 - }, - { - "epoch": 1.2914349064357666, - "grad_norm": 0.002050354378297925, - "learning_rate": 0.00019999917794575486, - "loss": 46.0, - "step": 16891 - }, - { - "epoch": 1.2915113634191564, - "grad_norm": 0.0010547222336754203, - "learning_rate": 0.00019999917784835808, - "loss": 46.0, - "step": 16892 - }, - { - "epoch": 1.291587820402546, - "grad_norm": 0.002576560713350773, - "learning_rate": 0.00019999917775095555, - "loss": 46.0, - "step": 16893 - }, - { - "epoch": 1.2916642773859357, - "grad_norm": 0.0010254953522235155, - "learning_rate": 0.00019999917765354722, - "loss": 46.0, - "step": 16894 - }, - { - "epoch": 1.2917407343693255, - "grad_norm": 0.0006489516817964613, - "learning_rate": 0.00019999917755613315, - "loss": 46.0, - "step": 16895 - }, - { - "epoch": 1.2918171913527152, - "grad_norm": 0.001210812944918871, - "learning_rate": 0.0001999991774587133, - "loss": 46.0, - "step": 16896 - }, - { - "epoch": 1.2918936483361048, - "grad_norm": 0.002377795986831188, - "learning_rate": 0.00019999917736128768, - "loss": 46.0, - "step": 16897 - }, - { - "epoch": 1.2919701053194945, - "grad_norm": 0.00047870652633719146, - "learning_rate": 0.00019999917726385628, - "loss": 46.0, - "step": 16898 - }, - { - "epoch": 1.2920465623028843, - "grad_norm": 0.0013301590224727988, - "learning_rate": 0.00019999917716641914, - "loss": 46.0, - "step": 16899 - }, - { - "epoch": 1.292123019286274, - "grad_norm": 0.0005451434990391135, - "learning_rate": 0.0001999991770689762, - "loss": 46.0, - "step": 16900 - }, - { - "epoch": 1.2921994762696638, - "grad_norm": 0.0005562631413340569, - "learning_rate": 0.00019999917697152751, - "loss": 46.0, - "step": 16901 - }, - { - "epoch": 1.2922759332530536, - "grad_norm": 0.0021114330738782883, - "learning_rate": 0.00019999917687407303, - "loss": 46.0, - "step": 16902 - }, - { - "epoch": 1.2923523902364433, - "grad_norm": 0.002595886355265975, - "learning_rate": 0.0001999991767766128, - "loss": 46.0, - "step": 16903 - }, - { - "epoch": 1.2924288472198329, - "grad_norm": 0.0013018480967730284, - "learning_rate": 0.00019999917667914678, - "loss": 46.0, - "step": 16904 - }, - { - "epoch": 1.2925053042032226, - "grad_norm": 0.003693181788548827, - "learning_rate": 0.00019999917658167503, - "loss": 46.0, - "step": 16905 - }, - { - "epoch": 1.2925817611866124, - "grad_norm": 0.0013611132744699717, - "learning_rate": 0.00019999917648419748, - "loss": 46.0, - "step": 16906 - }, - { - "epoch": 1.2926582181700021, - "grad_norm": 0.0011091484921053052, - "learning_rate": 0.00019999917638671417, - "loss": 46.0, - "step": 16907 - }, - { - "epoch": 1.2927346751533917, - "grad_norm": 0.0007690757629461586, - "learning_rate": 0.0001999991762892251, - "loss": 46.0, - "step": 16908 - }, - { - "epoch": 1.2928111321367814, - "grad_norm": 0.006611744873225689, - "learning_rate": 0.00019999917619173023, - "loss": 46.0, - "step": 16909 - }, - { - "epoch": 1.2928875891201712, - "grad_norm": 0.0007839055033400655, - "learning_rate": 0.0001999991760942296, - "loss": 46.0, - "step": 16910 - }, - { - "epoch": 1.292964046103561, - "grad_norm": 0.000695764843840152, - "learning_rate": 0.0001999991759967232, - "loss": 46.0, - "step": 16911 - }, - { - "epoch": 1.2930405030869507, - "grad_norm": 0.002346856752410531, - "learning_rate": 0.00019999917589921104, - "loss": 46.0, - "step": 16912 - }, - { - "epoch": 1.2931169600703405, - "grad_norm": 0.0006746028084307909, - "learning_rate": 0.00019999917580169313, - "loss": 46.0, - "step": 16913 - }, - { - "epoch": 1.2931934170537303, - "grad_norm": 0.0008049709722399712, - "learning_rate": 0.0001999991757041694, - "loss": 46.0, - "step": 16914 - }, - { - "epoch": 1.2932698740371198, - "grad_norm": 0.0037269913591444492, - "learning_rate": 0.00019999917560663995, - "loss": 46.0, - "step": 16915 - }, - { - "epoch": 1.2933463310205096, - "grad_norm": 0.0007973831379786134, - "learning_rate": 0.0001999991755091047, - "loss": 46.0, - "step": 16916 - }, - { - "epoch": 1.2934227880038993, - "grad_norm": 0.0016177480574697256, - "learning_rate": 0.00019999917541156369, - "loss": 46.0, - "step": 16917 - }, - { - "epoch": 1.293499244987289, - "grad_norm": 0.0010194308124482632, - "learning_rate": 0.0001999991753140169, - "loss": 46.0, - "step": 16918 - }, - { - "epoch": 1.2935757019706786, - "grad_norm": 0.00223959656432271, - "learning_rate": 0.00019999917521646435, - "loss": 46.0, - "step": 16919 - }, - { - "epoch": 1.2936521589540684, - "grad_norm": 0.0007535201730206609, - "learning_rate": 0.00019999917511890605, - "loss": 46.0, - "step": 16920 - }, - { - "epoch": 1.2937286159374581, - "grad_norm": 0.0009935133857652545, - "learning_rate": 0.00019999917502134195, - "loss": 46.0, - "step": 16921 - }, - { - "epoch": 1.293805072920848, - "grad_norm": 0.0007746369228698313, - "learning_rate": 0.0001999991749237721, - "loss": 46.0, - "step": 16922 - }, - { - "epoch": 1.2938815299042377, - "grad_norm": 0.001072959857992828, - "learning_rate": 0.00019999917482619648, - "loss": 46.0, - "step": 16923 - }, - { - "epoch": 1.2939579868876274, - "grad_norm": 0.0007886750972829759, - "learning_rate": 0.0001999991747286151, - "loss": 46.0, - "step": 16924 - }, - { - "epoch": 1.2940344438710172, - "grad_norm": 0.0013376976130530238, - "learning_rate": 0.00019999917463102792, - "loss": 46.0, - "step": 16925 - }, - { - "epoch": 1.2941109008544067, - "grad_norm": 0.0019853590056300163, - "learning_rate": 0.000199999174533435, - "loss": 46.0, - "step": 16926 - }, - { - "epoch": 1.2941873578377965, - "grad_norm": 0.0013454477302730083, - "learning_rate": 0.0001999991744358363, - "loss": 46.0, - "step": 16927 - }, - { - "epoch": 1.2942638148211862, - "grad_norm": 0.0008105112356133759, - "learning_rate": 0.0001999991743382318, - "loss": 46.0, - "step": 16928 - }, - { - "epoch": 1.294340271804576, - "grad_norm": 0.0012198216281831264, - "learning_rate": 0.00019999917424062157, - "loss": 46.0, - "step": 16929 - }, - { - "epoch": 1.2944167287879655, - "grad_norm": 0.0009669942664913833, - "learning_rate": 0.00019999917414300557, - "loss": 46.0, - "step": 16930 - }, - { - "epoch": 1.2944931857713553, - "grad_norm": 0.0006907760398462415, - "learning_rate": 0.00019999917404538378, - "loss": 46.0, - "step": 16931 - }, - { - "epoch": 1.294569642754745, - "grad_norm": 0.0011938433162868023, - "learning_rate": 0.00019999917394775626, - "loss": 46.0, - "step": 16932 - }, - { - "epoch": 1.2946460997381348, - "grad_norm": 0.002145028207451105, - "learning_rate": 0.00019999917385012293, - "loss": 46.0, - "step": 16933 - }, - { - "epoch": 1.2947225567215246, - "grad_norm": 0.0037043741904199123, - "learning_rate": 0.00019999917375248383, - "loss": 46.0, - "step": 16934 - }, - { - "epoch": 1.2947990137049143, - "grad_norm": 0.0011335520539432764, - "learning_rate": 0.00019999917365483896, - "loss": 46.0, - "step": 16935 - }, - { - "epoch": 1.294875470688304, - "grad_norm": 0.0009578640456311405, - "learning_rate": 0.00019999917355718834, - "loss": 46.0, - "step": 16936 - }, - { - "epoch": 1.2949519276716936, - "grad_norm": 0.0009613609872758389, - "learning_rate": 0.00019999917345953194, - "loss": 46.0, - "step": 16937 - }, - { - "epoch": 1.2950283846550834, - "grad_norm": 0.0034558584447950125, - "learning_rate": 0.00019999917336186978, - "loss": 46.0, - "step": 16938 - }, - { - "epoch": 1.2951048416384732, - "grad_norm": 0.0004144326376263052, - "learning_rate": 0.00019999917326420184, - "loss": 46.0, - "step": 16939 - }, - { - "epoch": 1.295181298621863, - "grad_norm": 0.005028862506151199, - "learning_rate": 0.00019999917316652815, - "loss": 46.0, - "step": 16940 - }, - { - "epoch": 1.2952577556052525, - "grad_norm": 0.0014026965945959091, - "learning_rate": 0.0001999991730688487, - "loss": 46.0, - "step": 16941 - }, - { - "epoch": 1.2953342125886422, - "grad_norm": 0.0018483380554243922, - "learning_rate": 0.00019999917297116343, - "loss": 46.0, - "step": 16942 - }, - { - "epoch": 1.295410669572032, - "grad_norm": 0.001309492508880794, - "learning_rate": 0.0001999991728734724, - "loss": 46.0, - "step": 16943 - }, - { - "epoch": 1.2954871265554218, - "grad_norm": 0.0041581010445952415, - "learning_rate": 0.00019999917277577561, - "loss": 46.0, - "step": 16944 - }, - { - "epoch": 1.2955635835388115, - "grad_norm": 0.0006954861455596983, - "learning_rate": 0.00019999917267807306, - "loss": 46.0, - "step": 16945 - }, - { - "epoch": 1.2956400405222013, - "grad_norm": 0.010062218643724918, - "learning_rate": 0.00019999917258036476, - "loss": 46.0, - "step": 16946 - }, - { - "epoch": 1.295716497505591, - "grad_norm": 0.0006993044517003, - "learning_rate": 0.00019999917248265066, - "loss": 46.0, - "step": 16947 - }, - { - "epoch": 1.2957929544889806, - "grad_norm": 0.01372715923935175, - "learning_rate": 0.0001999991723849308, - "loss": 46.0, - "step": 16948 - }, - { - "epoch": 1.2958694114723703, - "grad_norm": 0.0014995898818597198, - "learning_rate": 0.00019999917228720517, - "loss": 46.0, - "step": 16949 - }, - { - "epoch": 1.29594586845576, - "grad_norm": 0.0010075741447508335, - "learning_rate": 0.00019999917218947377, - "loss": 46.0, - "step": 16950 - }, - { - "epoch": 1.2960223254391499, - "grad_norm": 0.0011956244707107544, - "learning_rate": 0.0001999991720917366, - "loss": 46.0, - "step": 16951 - }, - { - "epoch": 1.2960987824225394, - "grad_norm": 0.001037903013639152, - "learning_rate": 0.00019999917199399367, - "loss": 46.0, - "step": 16952 - }, - { - "epoch": 1.2961752394059292, - "grad_norm": 0.0011192620731890202, - "learning_rate": 0.00019999917189624495, - "loss": 46.0, - "step": 16953 - }, - { - "epoch": 1.296251696389319, - "grad_norm": 0.004891249351203442, - "learning_rate": 0.0001999991717984905, - "loss": 46.0, - "step": 16954 - }, - { - "epoch": 1.2963281533727087, - "grad_norm": 0.0010662992717698216, - "learning_rate": 0.0001999991717007302, - "loss": 46.0, - "step": 16955 - }, - { - "epoch": 1.2964046103560984, - "grad_norm": 0.003124087816104293, - "learning_rate": 0.00019999917160296423, - "loss": 46.0, - "step": 16956 - }, - { - "epoch": 1.2964810673394882, - "grad_norm": 0.0015029701171442866, - "learning_rate": 0.00019999917150519242, - "loss": 46.0, - "step": 16957 - }, - { - "epoch": 1.296557524322878, - "grad_norm": 0.005907348357141018, - "learning_rate": 0.0001999991714074149, - "loss": 46.0, - "step": 16958 - }, - { - "epoch": 1.2966339813062675, - "grad_norm": 0.0004651906783692539, - "learning_rate": 0.00019999917130963154, - "loss": 46.0, - "step": 16959 - }, - { - "epoch": 1.2967104382896573, - "grad_norm": 0.0007895607850514352, - "learning_rate": 0.00019999917121184247, - "loss": 46.0, - "step": 16960 - }, - { - "epoch": 1.296786895273047, - "grad_norm": 0.0018282203236594796, - "learning_rate": 0.0001999991711140476, - "loss": 46.0, - "step": 16961 - }, - { - "epoch": 1.2968633522564368, - "grad_norm": 0.002710596891120076, - "learning_rate": 0.00019999917101624697, - "loss": 46.0, - "step": 16962 - }, - { - "epoch": 1.2969398092398263, - "grad_norm": 0.0019121470395475626, - "learning_rate": 0.00019999917091844055, - "loss": 46.0, - "step": 16963 - }, - { - "epoch": 1.297016266223216, - "grad_norm": 0.0026673662941902876, - "learning_rate": 0.0001999991708206284, - "loss": 46.0, - "step": 16964 - }, - { - "epoch": 1.2970927232066058, - "grad_norm": 0.0010244291042909026, - "learning_rate": 0.00019999917072281045, - "loss": 46.0, - "step": 16965 - }, - { - "epoch": 1.2971691801899956, - "grad_norm": 0.002440034644678235, - "learning_rate": 0.00019999917062498674, - "loss": 46.0, - "step": 16966 - }, - { - "epoch": 1.2972456371733854, - "grad_norm": 0.000626893131993711, - "learning_rate": 0.00019999917052715728, - "loss": 46.0, - "step": 16967 - }, - { - "epoch": 1.2973220941567751, - "grad_norm": 0.0013734777458012104, - "learning_rate": 0.00019999917042932202, - "loss": 46.0, - "step": 16968 - }, - { - "epoch": 1.297398551140165, - "grad_norm": 0.0008380646468140185, - "learning_rate": 0.000199999170331481, - "loss": 46.0, - "step": 16969 - }, - { - "epoch": 1.2974750081235544, - "grad_norm": 0.0020817413460463285, - "learning_rate": 0.0001999991702336342, - "loss": 46.0, - "step": 16970 - }, - { - "epoch": 1.2975514651069442, - "grad_norm": 0.006914359051734209, - "learning_rate": 0.00019999917013578165, - "loss": 46.0, - "step": 16971 - }, - { - "epoch": 1.297627922090334, - "grad_norm": 0.0016369130462408066, - "learning_rate": 0.00019999917003792332, - "loss": 46.0, - "step": 16972 - }, - { - "epoch": 1.2977043790737237, - "grad_norm": 0.0008318163454532623, - "learning_rate": 0.00019999916994005922, - "loss": 46.0, - "step": 16973 - }, - { - "epoch": 1.2977808360571133, - "grad_norm": 0.002666981192305684, - "learning_rate": 0.00019999916984218938, - "loss": 46.0, - "step": 16974 - }, - { - "epoch": 1.297857293040503, - "grad_norm": 0.000722285476513207, - "learning_rate": 0.00019999916974431373, - "loss": 46.0, - "step": 16975 - }, - { - "epoch": 1.2979337500238928, - "grad_norm": 0.0008893163176253438, - "learning_rate": 0.00019999916964643234, - "loss": 46.0, - "step": 16976 - }, - { - "epoch": 1.2980102070072825, - "grad_norm": 0.007473403587937355, - "learning_rate": 0.00019999916954854517, - "loss": 46.0, - "step": 16977 - }, - { - "epoch": 1.2980866639906723, - "grad_norm": 0.0036508669145405293, - "learning_rate": 0.00019999916945065223, - "loss": 46.0, - "step": 16978 - }, - { - "epoch": 1.298163120974062, - "grad_norm": 0.0019909648690372705, - "learning_rate": 0.00019999916935275352, - "loss": 46.0, - "step": 16979 - }, - { - "epoch": 1.2982395779574518, - "grad_norm": 0.005040519405156374, - "learning_rate": 0.00019999916925484904, - "loss": 46.0, - "step": 16980 - }, - { - "epoch": 1.2983160349408414, - "grad_norm": 0.0005798061029054224, - "learning_rate": 0.00019999916915693878, - "loss": 46.0, - "step": 16981 - }, - { - "epoch": 1.2983924919242311, - "grad_norm": 0.004145279061049223, - "learning_rate": 0.00019999916905902274, - "loss": 46.0, - "step": 16982 - }, - { - "epoch": 1.2984689489076209, - "grad_norm": 0.0011156536638736725, - "learning_rate": 0.00019999916896110096, - "loss": 46.0, - "step": 16983 - }, - { - "epoch": 1.2985454058910106, - "grad_norm": 0.0006065593333914876, - "learning_rate": 0.0001999991688631734, - "loss": 46.0, - "step": 16984 - }, - { - "epoch": 1.2986218628744002, - "grad_norm": 0.0014073837082833052, - "learning_rate": 0.00019999916876524009, - "loss": 46.0, - "step": 16985 - }, - { - "epoch": 1.29869831985779, - "grad_norm": 0.0006809005280956626, - "learning_rate": 0.00019999916866730099, - "loss": 46.0, - "step": 16986 - }, - { - "epoch": 1.2987747768411797, - "grad_norm": 0.001131216180510819, - "learning_rate": 0.0001999991685693561, - "loss": 46.0, - "step": 16987 - }, - { - "epoch": 1.2988512338245695, - "grad_norm": 0.0003314427740406245, - "learning_rate": 0.00019999916847140547, - "loss": 46.0, - "step": 16988 - }, - { - "epoch": 1.2989276908079592, - "grad_norm": 0.002919338643550873, - "learning_rate": 0.00019999916837344905, - "loss": 46.0, - "step": 16989 - }, - { - "epoch": 1.299004147791349, - "grad_norm": 0.004110758192837238, - "learning_rate": 0.00019999916827548688, - "loss": 46.0, - "step": 16990 - }, - { - "epoch": 1.2990806047747387, - "grad_norm": 0.00038438753108493984, - "learning_rate": 0.00019999916817751894, - "loss": 46.0, - "step": 16991 - }, - { - "epoch": 1.2991570617581283, - "grad_norm": 0.001421051798388362, - "learning_rate": 0.00019999916807954523, - "loss": 46.0, - "step": 16992 - }, - { - "epoch": 1.299233518741518, - "grad_norm": 0.0003969838726334274, - "learning_rate": 0.00019999916798156574, - "loss": 46.0, - "step": 16993 - }, - { - "epoch": 1.2993099757249078, - "grad_norm": 0.003521139267832041, - "learning_rate": 0.0001999991678835805, - "loss": 46.0, - "step": 16994 - }, - { - "epoch": 1.2993864327082976, - "grad_norm": 0.0036777553614228964, - "learning_rate": 0.00019999916778558948, - "loss": 46.0, - "step": 16995 - }, - { - "epoch": 1.299462889691687, - "grad_norm": 0.0008138920529745519, - "learning_rate": 0.00019999916768759267, - "loss": 46.0, - "step": 16996 - }, - { - "epoch": 1.2995393466750769, - "grad_norm": 0.003016428090631962, - "learning_rate": 0.00019999916758959012, - "loss": 46.0, - "step": 16997 - }, - { - "epoch": 1.2996158036584666, - "grad_norm": 0.0014221933670341969, - "learning_rate": 0.0001999991674915818, - "loss": 46.0, - "step": 16998 - }, - { - "epoch": 1.2996922606418564, - "grad_norm": 0.002771981991827488, - "learning_rate": 0.00019999916739356767, - "loss": 46.0, - "step": 16999 - }, - { - "epoch": 1.2997687176252462, - "grad_norm": 0.0006767380400560796, - "learning_rate": 0.00019999916729554782, - "loss": 46.0, - "step": 17000 - }, - { - "epoch": 1.299845174608636, - "grad_norm": 0.0018132395343855023, - "learning_rate": 0.00019999916719752218, - "loss": 46.0, - "step": 17001 - }, - { - "epoch": 1.2999216315920257, - "grad_norm": 0.002206960925832391, - "learning_rate": 0.0001999991670994908, - "loss": 46.0, - "step": 17002 - }, - { - "epoch": 1.2999980885754152, - "grad_norm": 0.0010303910821676254, - "learning_rate": 0.0001999991670014536, - "loss": 46.0, - "step": 17003 - }, - { - "epoch": 1.300074545558805, - "grad_norm": 0.00044133688788861036, - "learning_rate": 0.00019999916690341066, - "loss": 46.0, - "step": 17004 - }, - { - "epoch": 1.3001510025421947, - "grad_norm": 0.0031654739286750555, - "learning_rate": 0.00019999916680536192, - "loss": 46.0, - "step": 17005 - }, - { - "epoch": 1.3002274595255845, - "grad_norm": 0.0017270793905481696, - "learning_rate": 0.00019999916670730743, - "loss": 46.0, - "step": 17006 - }, - { - "epoch": 1.300303916508974, - "grad_norm": 0.0026358310133218765, - "learning_rate": 0.0001999991666092472, - "loss": 46.0, - "step": 17007 - }, - { - "epoch": 1.3003803734923638, - "grad_norm": 0.0010879419278353453, - "learning_rate": 0.00019999916651118117, - "loss": 46.0, - "step": 17008 - }, - { - "epoch": 1.3004568304757536, - "grad_norm": 0.0011486713774502277, - "learning_rate": 0.0001999991664131094, - "loss": 46.0, - "step": 17009 - }, - { - "epoch": 1.3005332874591433, - "grad_norm": 0.000679433869663626, - "learning_rate": 0.00019999916631503178, - "loss": 46.0, - "step": 17010 - }, - { - "epoch": 1.300609744442533, - "grad_norm": 0.003507207613438368, - "learning_rate": 0.00019999916621694848, - "loss": 46.0, - "step": 17011 - }, - { - "epoch": 1.3006862014259228, - "grad_norm": 0.0022981667425483465, - "learning_rate": 0.00019999916611885938, - "loss": 46.0, - "step": 17012 - }, - { - "epoch": 1.3007626584093124, - "grad_norm": 0.0037976689636707306, - "learning_rate": 0.00019999916602076449, - "loss": 46.0, - "step": 17013 - }, - { - "epoch": 1.3008391153927021, - "grad_norm": 0.0007833215058781207, - "learning_rate": 0.00019999916592266387, - "loss": 46.0, - "step": 17014 - }, - { - "epoch": 1.300915572376092, - "grad_norm": 0.008463768288493156, - "learning_rate": 0.00019999916582455745, - "loss": 46.0, - "step": 17015 - }, - { - "epoch": 1.3009920293594817, - "grad_norm": 0.000680348020978272, - "learning_rate": 0.00019999916572644528, - "loss": 46.0, - "step": 17016 - }, - { - "epoch": 1.3010684863428714, - "grad_norm": 0.0005476119695231318, - "learning_rate": 0.00019999916562832732, - "loss": 46.0, - "step": 17017 - }, - { - "epoch": 1.301144943326261, - "grad_norm": 0.0007602603873237967, - "learning_rate": 0.00019999916553020358, - "loss": 46.0, - "step": 17018 - }, - { - "epoch": 1.3012214003096507, - "grad_norm": 0.00031254449277184904, - "learning_rate": 0.00019999916543207412, - "loss": 46.0, - "step": 17019 - }, - { - "epoch": 1.3012978572930405, - "grad_norm": 0.0008787038386799395, - "learning_rate": 0.00019999916533393886, - "loss": 46.0, - "step": 17020 - }, - { - "epoch": 1.3013743142764302, - "grad_norm": 0.015780337154865265, - "learning_rate": 0.00019999916523579783, - "loss": 46.0, - "step": 17021 - }, - { - "epoch": 1.30145077125982, - "grad_norm": 0.0013650988694280386, - "learning_rate": 0.00019999916513765102, - "loss": 46.0, - "step": 17022 - }, - { - "epoch": 1.3015272282432098, - "grad_norm": 0.004020048771053553, - "learning_rate": 0.00019999916503949847, - "loss": 46.0, - "step": 17023 - }, - { - "epoch": 1.3016036852265993, - "grad_norm": 0.0007510558352805674, - "learning_rate": 0.00019999916494134015, - "loss": 46.0, - "step": 17024 - }, - { - "epoch": 1.301680142209989, - "grad_norm": 0.0009251527953892946, - "learning_rate": 0.00019999916484317602, - "loss": 46.0, - "step": 17025 - }, - { - "epoch": 1.3017565991933788, - "grad_norm": 0.0006468918872997165, - "learning_rate": 0.00019999916474500615, - "loss": 46.0, - "step": 17026 - }, - { - "epoch": 1.3018330561767686, - "grad_norm": 0.002583061344921589, - "learning_rate": 0.0001999991646468305, - "loss": 46.0, - "step": 17027 - }, - { - "epoch": 1.3019095131601581, - "grad_norm": 0.0011829755967482924, - "learning_rate": 0.00019999916454864911, - "loss": 46.0, - "step": 17028 - }, - { - "epoch": 1.3019859701435479, - "grad_norm": 0.0015390904154628515, - "learning_rate": 0.00019999916445046192, - "loss": 46.0, - "step": 17029 - }, - { - "epoch": 1.3020624271269376, - "grad_norm": 0.001295155263505876, - "learning_rate": 0.00019999916435226896, - "loss": 46.0, - "step": 17030 - }, - { - "epoch": 1.3021388841103274, - "grad_norm": 0.0013396060094237328, - "learning_rate": 0.00019999916425407025, - "loss": 46.0, - "step": 17031 - }, - { - "epoch": 1.3022153410937172, - "grad_norm": 0.004085657186806202, - "learning_rate": 0.00019999916415586576, - "loss": 46.0, - "step": 17032 - }, - { - "epoch": 1.302291798077107, - "grad_norm": 0.0021983031183481216, - "learning_rate": 0.00019999916405765548, - "loss": 46.0, - "step": 17033 - }, - { - "epoch": 1.3023682550604967, - "grad_norm": 0.0012879796558991075, - "learning_rate": 0.00019999916395943944, - "loss": 46.0, - "step": 17034 - }, - { - "epoch": 1.3024447120438862, - "grad_norm": 0.0009685040568001568, - "learning_rate": 0.00019999916386121767, - "loss": 46.0, - "step": 17035 - }, - { - "epoch": 1.302521169027276, - "grad_norm": 0.0006651482544839382, - "learning_rate": 0.00019999916376299011, - "loss": 46.0, - "step": 17036 - }, - { - "epoch": 1.3025976260106658, - "grad_norm": 0.0007602511323057115, - "learning_rate": 0.00019999916366475676, - "loss": 46.0, - "step": 17037 - }, - { - "epoch": 1.3026740829940555, - "grad_norm": 0.003833473427221179, - "learning_rate": 0.00019999916356651764, - "loss": 46.0, - "step": 17038 - }, - { - "epoch": 1.302750539977445, - "grad_norm": 0.0036222226917743683, - "learning_rate": 0.0001999991634682728, - "loss": 46.0, - "step": 17039 - }, - { - "epoch": 1.3028269969608348, - "grad_norm": 0.0024131571408361197, - "learning_rate": 0.00019999916337002215, - "loss": 46.0, - "step": 17040 - }, - { - "epoch": 1.3029034539442246, - "grad_norm": 0.0005607795319519937, - "learning_rate": 0.0001999991632717657, - "loss": 46.0, - "step": 17041 - }, - { - "epoch": 1.3029799109276143, - "grad_norm": 0.0035813169088214636, - "learning_rate": 0.0001999991631735035, - "loss": 46.0, - "step": 17042 - }, - { - "epoch": 1.303056367911004, - "grad_norm": 0.0017612132942304015, - "learning_rate": 0.00019999916307523557, - "loss": 46.0, - "step": 17043 - }, - { - "epoch": 1.3031328248943939, - "grad_norm": 0.0031446327921003103, - "learning_rate": 0.00019999916297696184, - "loss": 46.0, - "step": 17044 - }, - { - "epoch": 1.3032092818777836, - "grad_norm": 0.0038262035232037306, - "learning_rate": 0.00019999916287868238, - "loss": 46.0, - "step": 17045 - }, - { - "epoch": 1.3032857388611732, - "grad_norm": 0.0005556940450333059, - "learning_rate": 0.0001999991627803971, - "loss": 46.0, - "step": 17046 - }, - { - "epoch": 1.303362195844563, - "grad_norm": 0.0035446039400994778, - "learning_rate": 0.00019999916268210606, - "loss": 46.0, - "step": 17047 - }, - { - "epoch": 1.3034386528279527, - "grad_norm": 0.0019526415271684527, - "learning_rate": 0.00019999916258380926, - "loss": 46.0, - "step": 17048 - }, - { - "epoch": 1.3035151098113424, - "grad_norm": 0.002811564365401864, - "learning_rate": 0.0001999991624855067, - "loss": 46.0, - "step": 17049 - }, - { - "epoch": 1.303591566794732, - "grad_norm": 0.0012579062022268772, - "learning_rate": 0.00019999916238719835, - "loss": 46.0, - "step": 17050 - }, - { - "epoch": 1.3036680237781217, - "grad_norm": 0.0019145956030115485, - "learning_rate": 0.00019999916228888426, - "loss": 46.0, - "step": 17051 - }, - { - "epoch": 1.3037444807615115, - "grad_norm": 0.0012322311522439122, - "learning_rate": 0.00019999916219056436, - "loss": 46.0, - "step": 17052 - }, - { - "epoch": 1.3038209377449013, - "grad_norm": 0.00039088213816285133, - "learning_rate": 0.00019999916209223871, - "loss": 46.0, - "step": 17053 - }, - { - "epoch": 1.303897394728291, - "grad_norm": 0.0014892910839989781, - "learning_rate": 0.00019999916199390732, - "loss": 46.0, - "step": 17054 - }, - { - "epoch": 1.3039738517116808, - "grad_norm": 0.005496213678270578, - "learning_rate": 0.0001999991618955701, - "loss": 46.0, - "step": 17055 - }, - { - "epoch": 1.3040503086950705, - "grad_norm": 0.000989635824225843, - "learning_rate": 0.00019999916179722714, - "loss": 46.0, - "step": 17056 - }, - { - "epoch": 1.30412676567846, - "grad_norm": 0.0012343868147581816, - "learning_rate": 0.00019999916169887843, - "loss": 46.0, - "step": 17057 - }, - { - "epoch": 1.3042032226618498, - "grad_norm": 0.01438385434448719, - "learning_rate": 0.00019999916160052394, - "loss": 46.0, - "step": 17058 - }, - { - "epoch": 1.3042796796452396, - "grad_norm": 0.0009648837149143219, - "learning_rate": 0.00019999916150216366, - "loss": 46.0, - "step": 17059 - }, - { - "epoch": 1.3043561366286294, - "grad_norm": 0.0018759606173262, - "learning_rate": 0.00019999916140379763, - "loss": 46.0, - "step": 17060 - }, - { - "epoch": 1.304432593612019, - "grad_norm": 0.0007930463762022555, - "learning_rate": 0.00019999916130542582, - "loss": 46.0, - "step": 17061 - }, - { - "epoch": 1.3045090505954087, - "grad_norm": 0.0004368225345388055, - "learning_rate": 0.00019999916120704825, - "loss": 46.0, - "step": 17062 - }, - { - "epoch": 1.3045855075787984, - "grad_norm": 0.0051443264819681644, - "learning_rate": 0.00019999916110866492, - "loss": 46.0, - "step": 17063 - }, - { - "epoch": 1.3046619645621882, - "grad_norm": 0.0034380012657493353, - "learning_rate": 0.0001999991610102758, - "loss": 46.0, - "step": 17064 - }, - { - "epoch": 1.304738421545578, - "grad_norm": 0.005695089232176542, - "learning_rate": 0.00019999916091188093, - "loss": 46.0, - "step": 17065 - }, - { - "epoch": 1.3048148785289677, - "grad_norm": 0.0024394141510128975, - "learning_rate": 0.00019999916081348026, - "loss": 46.0, - "step": 17066 - }, - { - "epoch": 1.3048913355123575, - "grad_norm": 0.0006356213125400245, - "learning_rate": 0.00019999916071507387, - "loss": 46.0, - "step": 17067 - }, - { - "epoch": 1.304967792495747, - "grad_norm": 0.0007458970067091286, - "learning_rate": 0.00019999916061666167, - "loss": 46.0, - "step": 17068 - }, - { - "epoch": 1.3050442494791368, - "grad_norm": 0.0006066332571208477, - "learning_rate": 0.0001999991605182437, - "loss": 46.0, - "step": 17069 - }, - { - "epoch": 1.3051207064625265, - "grad_norm": 0.0012583734933286905, - "learning_rate": 0.00019999916041982, - "loss": 46.0, - "step": 17070 - }, - { - "epoch": 1.3051971634459163, - "grad_norm": 0.0007267092005349696, - "learning_rate": 0.0001999991603213905, - "loss": 46.0, - "step": 17071 - }, - { - "epoch": 1.3052736204293058, - "grad_norm": 0.00033966515911743045, - "learning_rate": 0.0001999991602229552, - "loss": 46.0, - "step": 17072 - }, - { - "epoch": 1.3053500774126956, - "grad_norm": 0.004110801964998245, - "learning_rate": 0.00019999916012451417, - "loss": 46.0, - "step": 17073 - }, - { - "epoch": 1.3054265343960854, - "grad_norm": 0.0015570273390039802, - "learning_rate": 0.00019999916002606737, - "loss": 46.0, - "step": 17074 - }, - { - "epoch": 1.3055029913794751, - "grad_norm": 0.0010826793732121587, - "learning_rate": 0.0001999991599276148, - "loss": 46.0, - "step": 17075 - }, - { - "epoch": 1.3055794483628649, - "grad_norm": 0.0029062326066195965, - "learning_rate": 0.00019999915982915644, - "loss": 46.0, - "step": 17076 - }, - { - "epoch": 1.3056559053462546, - "grad_norm": 0.0006346172885969281, - "learning_rate": 0.00019999915973069234, - "loss": 46.0, - "step": 17077 - }, - { - "epoch": 1.3057323623296444, - "grad_norm": 0.0013415678404271603, - "learning_rate": 0.00019999915963222247, - "loss": 46.0, - "step": 17078 - }, - { - "epoch": 1.305808819313034, - "grad_norm": 0.0010799997253343463, - "learning_rate": 0.0001999991595337468, - "loss": 46.0, - "step": 17079 - }, - { - "epoch": 1.3058852762964237, - "grad_norm": 0.0012914076214656234, - "learning_rate": 0.00019999915943526539, - "loss": 46.0, - "step": 17080 - }, - { - "epoch": 1.3059617332798135, - "grad_norm": 0.002412175526842475, - "learning_rate": 0.0001999991593367782, - "loss": 46.0, - "step": 17081 - }, - { - "epoch": 1.3060381902632032, - "grad_norm": 0.0010982458479702473, - "learning_rate": 0.00019999915923828523, - "loss": 46.0, - "step": 17082 - }, - { - "epoch": 1.3061146472465928, - "grad_norm": 0.002270329277962446, - "learning_rate": 0.00019999915913978652, - "loss": 46.0, - "step": 17083 - }, - { - "epoch": 1.3061911042299825, - "grad_norm": 0.0010062523651868105, - "learning_rate": 0.000199999159041282, - "loss": 46.0, - "step": 17084 - }, - { - "epoch": 1.3062675612133723, - "grad_norm": 0.004939588252454996, - "learning_rate": 0.00019999915894277175, - "loss": 46.0, - "step": 17085 - }, - { - "epoch": 1.306344018196762, - "grad_norm": 0.0010617101797834039, - "learning_rate": 0.0001999991588442557, - "loss": 46.0, - "step": 17086 - }, - { - "epoch": 1.3064204751801518, - "grad_norm": 0.0007467318791896105, - "learning_rate": 0.0001999991587457339, - "loss": 46.0, - "step": 17087 - }, - { - "epoch": 1.3064969321635416, - "grad_norm": 0.0021483786404132843, - "learning_rate": 0.00019999915864720631, - "loss": 46.0, - "step": 17088 - }, - { - "epoch": 1.3065733891469313, - "grad_norm": 0.0027480274438858032, - "learning_rate": 0.00019999915854867296, - "loss": 46.0, - "step": 17089 - }, - { - "epoch": 1.3066498461303209, - "grad_norm": 0.0008927136077545583, - "learning_rate": 0.00019999915845013387, - "loss": 46.0, - "step": 17090 - }, - { - "epoch": 1.3067263031137106, - "grad_norm": 0.0013238799292594194, - "learning_rate": 0.00019999915835158894, - "loss": 46.0, - "step": 17091 - }, - { - "epoch": 1.3068027600971004, - "grad_norm": 0.0004264836315996945, - "learning_rate": 0.00019999915825303833, - "loss": 46.0, - "step": 17092 - }, - { - "epoch": 1.3068792170804902, - "grad_norm": 0.011239280924201012, - "learning_rate": 0.00019999915815448188, - "loss": 46.0, - "step": 17093 - }, - { - "epoch": 1.3069556740638797, - "grad_norm": 0.002597997896373272, - "learning_rate": 0.0001999991580559197, - "loss": 46.0, - "step": 17094 - }, - { - "epoch": 1.3070321310472695, - "grad_norm": 0.0017309971153736115, - "learning_rate": 0.00019999915795735173, - "loss": 46.0, - "step": 17095 - }, - { - "epoch": 1.3071085880306592, - "grad_norm": 0.007089436985552311, - "learning_rate": 0.00019999915785877802, - "loss": 46.0, - "step": 17096 - }, - { - "epoch": 1.307185045014049, - "grad_norm": 0.001025989418849349, - "learning_rate": 0.0001999991577601985, - "loss": 46.0, - "step": 17097 - }, - { - "epoch": 1.3072615019974387, - "grad_norm": 0.013778255321085453, - "learning_rate": 0.00019999915766161322, - "loss": 46.0, - "step": 17098 - }, - { - "epoch": 1.3073379589808285, - "grad_norm": 0.001568926265463233, - "learning_rate": 0.0001999991575630222, - "loss": 46.0, - "step": 17099 - }, - { - "epoch": 1.3074144159642183, - "grad_norm": 0.0010469084372743964, - "learning_rate": 0.00019999915746442536, - "loss": 46.0, - "step": 17100 - }, - { - "epoch": 1.3074908729476078, - "grad_norm": 0.0010955313919112086, - "learning_rate": 0.00019999915736582279, - "loss": 46.0, - "step": 17101 - }, - { - "epoch": 1.3075673299309976, - "grad_norm": 0.008914983831346035, - "learning_rate": 0.00019999915726721446, - "loss": 46.0, - "step": 17102 - }, - { - "epoch": 1.3076437869143873, - "grad_norm": 0.002404893049970269, - "learning_rate": 0.0001999991571686003, - "loss": 46.0, - "step": 17103 - }, - { - "epoch": 1.307720243897777, - "grad_norm": 0.010579340159893036, - "learning_rate": 0.00019999915706998044, - "loss": 46.0, - "step": 17104 - }, - { - "epoch": 1.3077967008811666, - "grad_norm": 0.005604743491858244, - "learning_rate": 0.00019999915697135477, - "loss": 46.0, - "step": 17105 - }, - { - "epoch": 1.3078731578645564, - "grad_norm": 0.006471514236181974, - "learning_rate": 0.00019999915687272336, - "loss": 46.0, - "step": 17106 - }, - { - "epoch": 1.3079496148479461, - "grad_norm": 0.0014159809798002243, - "learning_rate": 0.00019999915677408617, - "loss": 46.0, - "step": 17107 - }, - { - "epoch": 1.308026071831336, - "grad_norm": 0.0013574615586549044, - "learning_rate": 0.0001999991566754432, - "loss": 46.0, - "step": 17108 - }, - { - "epoch": 1.3081025288147257, - "grad_norm": 0.005113136954605579, - "learning_rate": 0.00019999915657679447, - "loss": 46.0, - "step": 17109 - }, - { - "epoch": 1.3081789857981154, - "grad_norm": 0.001255523762665689, - "learning_rate": 0.00019999915647813996, - "loss": 46.0, - "step": 17110 - }, - { - "epoch": 1.3082554427815052, - "grad_norm": 0.0008455773931927979, - "learning_rate": 0.00019999915637947967, - "loss": 46.0, - "step": 17111 - }, - { - "epoch": 1.3083318997648947, - "grad_norm": 0.00045527919428423047, - "learning_rate": 0.00019999915628081361, - "loss": 46.0, - "step": 17112 - }, - { - "epoch": 1.3084083567482845, - "grad_norm": 0.005838301964104176, - "learning_rate": 0.0001999991561821418, - "loss": 46.0, - "step": 17113 - }, - { - "epoch": 1.3084848137316742, - "grad_norm": 0.00553513178601861, - "learning_rate": 0.00019999915608346424, - "loss": 46.0, - "step": 17114 - }, - { - "epoch": 1.308561270715064, - "grad_norm": 0.004545619245618582, - "learning_rate": 0.00019999915598478089, - "loss": 46.0, - "step": 17115 - }, - { - "epoch": 1.3086377276984535, - "grad_norm": 0.0017334987642243505, - "learning_rate": 0.00019999915588609176, - "loss": 46.0, - "step": 17116 - }, - { - "epoch": 1.3087141846818433, - "grad_norm": 0.0009968295926228166, - "learning_rate": 0.0001999991557873969, - "loss": 46.0, - "step": 17117 - }, - { - "epoch": 1.308790641665233, - "grad_norm": 0.0004681750142481178, - "learning_rate": 0.00019999915568869622, - "loss": 46.0, - "step": 17118 - }, - { - "epoch": 1.3088670986486228, - "grad_norm": 0.0014000753872096539, - "learning_rate": 0.00019999915558998978, - "loss": 46.0, - "step": 17119 - }, - { - "epoch": 1.3089435556320126, - "grad_norm": 0.0009876416297629476, - "learning_rate": 0.0001999991554912776, - "loss": 46.0, - "step": 17120 - }, - { - "epoch": 1.3090200126154024, - "grad_norm": 0.0016021830961108208, - "learning_rate": 0.00019999915539255963, - "loss": 46.0, - "step": 17121 - }, - { - "epoch": 1.3090964695987921, - "grad_norm": 0.006823969539254904, - "learning_rate": 0.0001999991552938359, - "loss": 46.0, - "step": 17122 - }, - { - "epoch": 1.3091729265821817, - "grad_norm": 0.001259143347851932, - "learning_rate": 0.00019999915519510638, - "loss": 46.0, - "step": 17123 - }, - { - "epoch": 1.3092493835655714, - "grad_norm": 0.0028713110368698835, - "learning_rate": 0.00019999915509637112, - "loss": 46.0, - "step": 17124 - }, - { - "epoch": 1.3093258405489612, - "grad_norm": 0.0008640232845209539, - "learning_rate": 0.00019999915499763007, - "loss": 46.0, - "step": 17125 - }, - { - "epoch": 1.309402297532351, - "grad_norm": 0.00041096031782217324, - "learning_rate": 0.00019999915489888324, - "loss": 46.0, - "step": 17126 - }, - { - "epoch": 1.3094787545157405, - "grad_norm": 0.0018318830989301205, - "learning_rate": 0.00019999915480013066, - "loss": 46.0, - "step": 17127 - }, - { - "epoch": 1.3095552114991302, - "grad_norm": 0.0029584444127976894, - "learning_rate": 0.0001999991547013723, - "loss": 46.0, - "step": 17128 - }, - { - "epoch": 1.30963166848252, - "grad_norm": 0.0010804666671901941, - "learning_rate": 0.0001999991546026082, - "loss": 46.0, - "step": 17129 - }, - { - "epoch": 1.3097081254659098, - "grad_norm": 0.0009533656993880868, - "learning_rate": 0.0001999991545038383, - "loss": 46.0, - "step": 17130 - }, - { - "epoch": 1.3097845824492995, - "grad_norm": 0.0014663183828815818, - "learning_rate": 0.00019999915440506265, - "loss": 46.0, - "step": 17131 - }, - { - "epoch": 1.3098610394326893, - "grad_norm": 0.0003289435408078134, - "learning_rate": 0.00019999915430628123, - "loss": 46.0, - "step": 17132 - }, - { - "epoch": 1.309937496416079, - "grad_norm": 0.0017163041047751904, - "learning_rate": 0.00019999915420749402, - "loss": 46.0, - "step": 17133 - }, - { - "epoch": 1.3100139533994686, - "grad_norm": 0.0010907113319262862, - "learning_rate": 0.00019999915410870105, - "loss": 46.0, - "step": 17134 - }, - { - "epoch": 1.3100904103828583, - "grad_norm": 0.004672758746892214, - "learning_rate": 0.0001999991540099023, - "loss": 46.0, - "step": 17135 - }, - { - "epoch": 1.310166867366248, - "grad_norm": 0.0004823078343179077, - "learning_rate": 0.00019999915391109778, - "loss": 46.0, - "step": 17136 - }, - { - "epoch": 1.3102433243496379, - "grad_norm": 0.016972461715340614, - "learning_rate": 0.0001999991538122875, - "loss": 46.0, - "step": 17137 - }, - { - "epoch": 1.3103197813330274, - "grad_norm": 0.0007133137551136315, - "learning_rate": 0.00019999915371347147, - "loss": 46.0, - "step": 17138 - }, - { - "epoch": 1.3103962383164172, - "grad_norm": 0.003391512203961611, - "learning_rate": 0.00019999915361464964, - "loss": 46.0, - "step": 17139 - }, - { - "epoch": 1.310472695299807, - "grad_norm": 0.0032656979747116566, - "learning_rate": 0.00019999915351582206, - "loss": 46.0, - "step": 17140 - }, - { - "epoch": 1.3105491522831967, - "grad_norm": 0.002001130487769842, - "learning_rate": 0.00019999915341698872, - "loss": 46.0, - "step": 17141 - }, - { - "epoch": 1.3106256092665864, - "grad_norm": 0.0005703952629119158, - "learning_rate": 0.0001999991533181496, - "loss": 46.0, - "step": 17142 - }, - { - "epoch": 1.3107020662499762, - "grad_norm": 0.003303299192339182, - "learning_rate": 0.00019999915321930467, - "loss": 46.0, - "step": 17143 - }, - { - "epoch": 1.3107785232333657, - "grad_norm": 0.0022562930826097727, - "learning_rate": 0.000199999153120454, - "loss": 46.0, - "step": 17144 - }, - { - "epoch": 1.3108549802167555, - "grad_norm": 0.0009510604431852698, - "learning_rate": 0.00019999915302159758, - "loss": 46.0, - "step": 17145 - }, - { - "epoch": 1.3109314372001453, - "grad_norm": 0.0012925302144140005, - "learning_rate": 0.0001999991529227354, - "loss": 46.0, - "step": 17146 - }, - { - "epoch": 1.311007894183535, - "grad_norm": 0.0010041811037808657, - "learning_rate": 0.0001999991528238674, - "loss": 46.0, - "step": 17147 - }, - { - "epoch": 1.3110843511669248, - "grad_norm": 0.0016535257454961538, - "learning_rate": 0.00019999915272499367, - "loss": 46.0, - "step": 17148 - }, - { - "epoch": 1.3111608081503143, - "grad_norm": 0.0018469089409336448, - "learning_rate": 0.00019999915262611416, - "loss": 46.0, - "step": 17149 - }, - { - "epoch": 1.311237265133704, - "grad_norm": 0.001431250129826367, - "learning_rate": 0.0001999991525272289, - "loss": 46.0, - "step": 17150 - }, - { - "epoch": 1.3113137221170938, - "grad_norm": 0.0006772562628611922, - "learning_rate": 0.00019999915242833782, - "loss": 46.0, - "step": 17151 - }, - { - "epoch": 1.3113901791004836, - "grad_norm": 0.015381153672933578, - "learning_rate": 0.00019999915232944102, - "loss": 46.0, - "step": 17152 - }, - { - "epoch": 1.3114666360838734, - "grad_norm": 0.004695856012403965, - "learning_rate": 0.00019999915223053842, - "loss": 46.0, - "step": 17153 - }, - { - "epoch": 1.3115430930672631, - "grad_norm": 0.001373407314531505, - "learning_rate": 0.00019999915213163004, - "loss": 46.0, - "step": 17154 - }, - { - "epoch": 1.3116195500506527, - "grad_norm": 0.0005978519329801202, - "learning_rate": 0.00019999915203271592, - "loss": 46.0, - "step": 17155 - }, - { - "epoch": 1.3116960070340424, - "grad_norm": 0.0014906262513250113, - "learning_rate": 0.00019999915193379602, - "loss": 46.0, - "step": 17156 - }, - { - "epoch": 1.3117724640174322, - "grad_norm": 0.001896207220852375, - "learning_rate": 0.00019999915183487038, - "loss": 46.0, - "step": 17157 - }, - { - "epoch": 1.311848921000822, - "grad_norm": 0.0012717708013951778, - "learning_rate": 0.00019999915173593894, - "loss": 46.0, - "step": 17158 - }, - { - "epoch": 1.3119253779842117, - "grad_norm": 0.0013867573579773307, - "learning_rate": 0.00019999915163700172, - "loss": 46.0, - "step": 17159 - }, - { - "epoch": 1.3120018349676013, - "grad_norm": 0.0012509622611105442, - "learning_rate": 0.00019999915153805876, - "loss": 46.0, - "step": 17160 - }, - { - "epoch": 1.312078291950991, - "grad_norm": 0.005528236739337444, - "learning_rate": 0.00019999915143911, - "loss": 46.0, - "step": 17161 - }, - { - "epoch": 1.3121547489343808, - "grad_norm": 0.0029104817658662796, - "learning_rate": 0.0001999991513401555, - "loss": 46.0, - "step": 17162 - }, - { - "epoch": 1.3122312059177705, - "grad_norm": 0.010189109481871128, - "learning_rate": 0.0001999991512411952, - "loss": 46.0, - "step": 17163 - }, - { - "epoch": 1.3123076629011603, - "grad_norm": 0.000931020185817033, - "learning_rate": 0.00019999915114222918, - "loss": 46.0, - "step": 17164 - }, - { - "epoch": 1.31238411988455, - "grad_norm": 0.0012041906593367457, - "learning_rate": 0.00019999915104325735, - "loss": 46.0, - "step": 17165 - }, - { - "epoch": 1.3124605768679396, - "grad_norm": 0.002006001304835081, - "learning_rate": 0.00019999915094427975, - "loss": 46.0, - "step": 17166 - }, - { - "epoch": 1.3125370338513294, - "grad_norm": 0.0005529527552425861, - "learning_rate": 0.0001999991508452964, - "loss": 46.0, - "step": 17167 - }, - { - "epoch": 1.3126134908347191, - "grad_norm": 0.0008109658374451101, - "learning_rate": 0.00019999915074630725, - "loss": 46.0, - "step": 17168 - }, - { - "epoch": 1.3126899478181089, - "grad_norm": 0.004001515917479992, - "learning_rate": 0.00019999915064731235, - "loss": 46.0, - "step": 17169 - }, - { - "epoch": 1.3127664048014984, - "grad_norm": 0.0010594894411042333, - "learning_rate": 0.00019999915054831169, - "loss": 46.0, - "step": 17170 - }, - { - "epoch": 1.3128428617848882, - "grad_norm": 0.003379775444045663, - "learning_rate": 0.00019999915044930527, - "loss": 46.0, - "step": 17171 - }, - { - "epoch": 1.312919318768278, - "grad_norm": 0.0011823073728010058, - "learning_rate": 0.00019999915035029305, - "loss": 46.0, - "step": 17172 - }, - { - "epoch": 1.3129957757516677, - "grad_norm": 0.0024823795538395643, - "learning_rate": 0.00019999915025127507, - "loss": 46.0, - "step": 17173 - }, - { - "epoch": 1.3130722327350575, - "grad_norm": 0.0009069254738278687, - "learning_rate": 0.00019999915015225136, - "loss": 46.0, - "step": 17174 - }, - { - "epoch": 1.3131486897184472, - "grad_norm": 0.002019510604441166, - "learning_rate": 0.0001999991500532218, - "loss": 46.0, - "step": 17175 - }, - { - "epoch": 1.313225146701837, - "grad_norm": 0.0005911023472435772, - "learning_rate": 0.00019999914995418654, - "loss": 46.0, - "step": 17176 - }, - { - "epoch": 1.3133016036852265, - "grad_norm": 0.0009922138415277004, - "learning_rate": 0.00019999914985514548, - "loss": 46.0, - "step": 17177 - }, - { - "epoch": 1.3133780606686163, - "grad_norm": 0.0013644836144521832, - "learning_rate": 0.00019999914975609863, - "loss": 46.0, - "step": 17178 - }, - { - "epoch": 1.313454517652006, - "grad_norm": 0.002014856319874525, - "learning_rate": 0.00019999914965704605, - "loss": 46.0, - "step": 17179 - }, - { - "epoch": 1.3135309746353958, - "grad_norm": 0.0006061993772163987, - "learning_rate": 0.00019999914955798768, - "loss": 46.0, - "step": 17180 - }, - { - "epoch": 1.3136074316187853, - "grad_norm": 0.00035414070589467883, - "learning_rate": 0.00019999914945892356, - "loss": 46.0, - "step": 17181 - }, - { - "epoch": 1.313683888602175, - "grad_norm": 0.00031240942189469934, - "learning_rate": 0.00019999914935985366, - "loss": 46.0, - "step": 17182 - }, - { - "epoch": 1.3137603455855649, - "grad_norm": 0.0005707665113732219, - "learning_rate": 0.00019999914926077802, - "loss": 46.0, - "step": 17183 - }, - { - "epoch": 1.3138368025689546, - "grad_norm": 0.002884496469050646, - "learning_rate": 0.00019999914916169655, - "loss": 46.0, - "step": 17184 - }, - { - "epoch": 1.3139132595523444, - "grad_norm": 0.00043921807082369924, - "learning_rate": 0.00019999914906260934, - "loss": 46.0, - "step": 17185 - }, - { - "epoch": 1.3139897165357342, - "grad_norm": 0.0010038636391982436, - "learning_rate": 0.00019999914896351637, - "loss": 46.0, - "step": 17186 - }, - { - "epoch": 1.314066173519124, - "grad_norm": 0.0007171165780164301, - "learning_rate": 0.0001999991488644176, - "loss": 46.0, - "step": 17187 - }, - { - "epoch": 1.3141426305025135, - "grad_norm": 0.0010055151069536805, - "learning_rate": 0.0001999991487653131, - "loss": 46.0, - "step": 17188 - }, - { - "epoch": 1.3142190874859032, - "grad_norm": 0.0028502445202320814, - "learning_rate": 0.00019999914866620282, - "loss": 46.0, - "step": 17189 - }, - { - "epoch": 1.314295544469293, - "grad_norm": 0.0005379535141400993, - "learning_rate": 0.00019999914856708677, - "loss": 46.0, - "step": 17190 - }, - { - "epoch": 1.3143720014526827, - "grad_norm": 0.006283284164965153, - "learning_rate": 0.0001999991484679649, - "loss": 46.0, - "step": 17191 - }, - { - "epoch": 1.3144484584360723, - "grad_norm": 0.0006603418732993305, - "learning_rate": 0.00019999914836883734, - "loss": 46.0, - "step": 17192 - }, - { - "epoch": 1.314524915419462, - "grad_norm": 0.0006712102331221104, - "learning_rate": 0.000199999148269704, - "loss": 46.0, - "step": 17193 - }, - { - "epoch": 1.3146013724028518, - "grad_norm": 0.022888246923685074, - "learning_rate": 0.00019999914817056484, - "loss": 46.0, - "step": 17194 - }, - { - "epoch": 1.3146778293862416, - "grad_norm": 0.0027552032843232155, - "learning_rate": 0.00019999914807141992, - "loss": 46.0, - "step": 17195 - }, - { - "epoch": 1.3147542863696313, - "grad_norm": 0.0013505418319255114, - "learning_rate": 0.00019999914797226925, - "loss": 46.0, - "step": 17196 - }, - { - "epoch": 1.314830743353021, - "grad_norm": 0.003091516438871622, - "learning_rate": 0.00019999914787311284, - "loss": 46.0, - "step": 17197 - }, - { - "epoch": 1.3149072003364108, - "grad_norm": 0.0006567321834154427, - "learning_rate": 0.0001999991477739506, - "loss": 46.0, - "step": 17198 - }, - { - "epoch": 1.3149836573198004, - "grad_norm": 0.0005596888950094581, - "learning_rate": 0.00019999914767478263, - "loss": 46.0, - "step": 17199 - }, - { - "epoch": 1.3150601143031901, - "grad_norm": 0.0004952656454406679, - "learning_rate": 0.00019999914757560887, - "loss": 46.0, - "step": 17200 - }, - { - "epoch": 1.31513657128658, - "grad_norm": 0.0009215171448886395, - "learning_rate": 0.00019999914747642934, - "loss": 46.0, - "step": 17201 - }, - { - "epoch": 1.3152130282699697, - "grad_norm": 0.001959695480763912, - "learning_rate": 0.00019999914737724406, - "loss": 46.0, - "step": 17202 - }, - { - "epoch": 1.3152894852533592, - "grad_norm": 0.0008185189217329025, - "learning_rate": 0.000199999147278053, - "loss": 46.0, - "step": 17203 - }, - { - "epoch": 1.315365942236749, - "grad_norm": 0.0008231941028498113, - "learning_rate": 0.00019999914717885617, - "loss": 46.0, - "step": 17204 - }, - { - "epoch": 1.3154423992201387, - "grad_norm": 0.0007910511340014637, - "learning_rate": 0.00019999914707965357, - "loss": 46.0, - "step": 17205 - }, - { - "epoch": 1.3155188562035285, - "grad_norm": 0.0017435974441468716, - "learning_rate": 0.0001999991469804452, - "loss": 46.0, - "step": 17206 - }, - { - "epoch": 1.3155953131869182, - "grad_norm": 0.0004577609652187675, - "learning_rate": 0.00019999914688123108, - "loss": 46.0, - "step": 17207 - }, - { - "epoch": 1.315671770170308, - "grad_norm": 0.0011333676520735025, - "learning_rate": 0.00019999914678201118, - "loss": 46.0, - "step": 17208 - }, - { - "epoch": 1.3157482271536978, - "grad_norm": 0.0003037470451090485, - "learning_rate": 0.00019999914668278549, - "loss": 46.0, - "step": 17209 - }, - { - "epoch": 1.3158246841370873, - "grad_norm": 0.0004673453513532877, - "learning_rate": 0.00019999914658355405, - "loss": 46.0, - "step": 17210 - }, - { - "epoch": 1.315901141120477, - "grad_norm": 0.0026928263250738382, - "learning_rate": 0.00019999914648431683, - "loss": 46.0, - "step": 17211 - }, - { - "epoch": 1.3159775981038668, - "grad_norm": 0.001289918553084135, - "learning_rate": 0.00019999914638507384, - "loss": 46.0, - "step": 17212 - }, - { - "epoch": 1.3160540550872566, - "grad_norm": 0.0018694362370297313, - "learning_rate": 0.0001999991462858251, - "loss": 46.0, - "step": 17213 - }, - { - "epoch": 1.3161305120706461, - "grad_norm": 0.0022590463049709797, - "learning_rate": 0.00019999914618657057, - "loss": 46.0, - "step": 17214 - }, - { - "epoch": 1.316206969054036, - "grad_norm": 0.001186096458695829, - "learning_rate": 0.0001999991460873103, - "loss": 46.0, - "step": 17215 - }, - { - "epoch": 1.3162834260374257, - "grad_norm": 0.000799805682618171, - "learning_rate": 0.00019999914598804424, - "loss": 46.0, - "step": 17216 - }, - { - "epoch": 1.3163598830208154, - "grad_norm": 0.0035685899201780558, - "learning_rate": 0.0001999991458887724, - "loss": 46.0, - "step": 17217 - }, - { - "epoch": 1.3164363400042052, - "grad_norm": 0.0009092389955185354, - "learning_rate": 0.00019999914578949478, - "loss": 46.0, - "step": 17218 - }, - { - "epoch": 1.316512796987595, - "grad_norm": 0.0006872125086374581, - "learning_rate": 0.0001999991456902114, - "loss": 46.0, - "step": 17219 - }, - { - "epoch": 1.3165892539709847, - "grad_norm": 0.0009588262182660401, - "learning_rate": 0.0001999991455909223, - "loss": 46.0, - "step": 17220 - }, - { - "epoch": 1.3166657109543742, - "grad_norm": 0.0010056920582428575, - "learning_rate": 0.00019999914549162737, - "loss": 46.0, - "step": 17221 - }, - { - "epoch": 1.316742167937764, - "grad_norm": 0.0012327306903898716, - "learning_rate": 0.00019999914539232673, - "loss": 46.0, - "step": 17222 - }, - { - "epoch": 1.3168186249211538, - "grad_norm": 0.017056334763765335, - "learning_rate": 0.00019999914529302023, - "loss": 46.0, - "step": 17223 - }, - { - "epoch": 1.3168950819045435, - "grad_norm": 0.00018474803073331714, - "learning_rate": 0.00019999914519370804, - "loss": 46.0, - "step": 17224 - }, - { - "epoch": 1.316971538887933, - "grad_norm": 0.0005765540990978479, - "learning_rate": 0.00019999914509439006, - "loss": 46.0, - "step": 17225 - }, - { - "epoch": 1.3170479958713228, - "grad_norm": 0.0021213290747255087, - "learning_rate": 0.00019999914499506632, - "loss": 46.0, - "step": 17226 - }, - { - "epoch": 1.3171244528547126, - "grad_norm": 0.001359756919555366, - "learning_rate": 0.00019999914489573676, - "loss": 46.0, - "step": 17227 - }, - { - "epoch": 1.3172009098381023, - "grad_norm": 0.0009897955460473895, - "learning_rate": 0.0001999991447964015, - "loss": 46.0, - "step": 17228 - }, - { - "epoch": 1.317277366821492, - "grad_norm": 0.001963048242032528, - "learning_rate": 0.00019999914469706043, - "loss": 46.0, - "step": 17229 - }, - { - "epoch": 1.3173538238048819, - "grad_norm": 0.0011945561273023486, - "learning_rate": 0.00019999914459771357, - "loss": 46.0, - "step": 17230 - }, - { - "epoch": 1.3174302807882716, - "grad_norm": 0.0017985154408961535, - "learning_rate": 0.00019999914449836097, - "loss": 46.0, - "step": 17231 - }, - { - "epoch": 1.3175067377716612, - "grad_norm": 0.0009320020908489823, - "learning_rate": 0.00019999914439900262, - "loss": 46.0, - "step": 17232 - }, - { - "epoch": 1.317583194755051, - "grad_norm": 0.00032047543209046125, - "learning_rate": 0.00019999914429963848, - "loss": 46.0, - "step": 17233 - }, - { - "epoch": 1.3176596517384407, - "grad_norm": 0.0014175066025927663, - "learning_rate": 0.00019999914420026858, - "loss": 46.0, - "step": 17234 - }, - { - "epoch": 1.3177361087218304, - "grad_norm": 0.007659487891942263, - "learning_rate": 0.00019999914410089286, - "loss": 46.0, - "step": 17235 - }, - { - "epoch": 1.31781256570522, - "grad_norm": 0.0006222569500096142, - "learning_rate": 0.00019999914400151142, - "loss": 46.0, - "step": 17236 - }, - { - "epoch": 1.3178890226886097, - "grad_norm": 0.005294390022754669, - "learning_rate": 0.00019999914390212423, - "loss": 46.0, - "step": 17237 - }, - { - "epoch": 1.3179654796719995, - "grad_norm": 0.0004505709221120924, - "learning_rate": 0.00019999914380273125, - "loss": 46.0, - "step": 17238 - }, - { - "epoch": 1.3180419366553893, - "grad_norm": 0.004525754135102034, - "learning_rate": 0.00019999914370333248, - "loss": 46.0, - "step": 17239 - }, - { - "epoch": 1.318118393638779, - "grad_norm": 0.0014177690027281642, - "learning_rate": 0.00019999914360392795, - "loss": 46.0, - "step": 17240 - }, - { - "epoch": 1.3181948506221688, - "grad_norm": 0.0018735553603619337, - "learning_rate": 0.00019999914350451764, - "loss": 46.0, - "step": 17241 - }, - { - "epoch": 1.3182713076055586, - "grad_norm": 0.0017055986681953073, - "learning_rate": 0.00019999914340510156, - "loss": 46.0, - "step": 17242 - }, - { - "epoch": 1.318347764588948, - "grad_norm": 0.0007548227440565825, - "learning_rate": 0.00019999914330567974, - "loss": 46.0, - "step": 17243 - }, - { - "epoch": 1.3184242215723379, - "grad_norm": 0.0006721281679347157, - "learning_rate": 0.00019999914320625216, - "loss": 46.0, - "step": 17244 - }, - { - "epoch": 1.3185006785557276, - "grad_norm": 0.004983718506991863, - "learning_rate": 0.00019999914310681876, - "loss": 46.0, - "step": 17245 - }, - { - "epoch": 1.3185771355391174, - "grad_norm": 0.0030971700325608253, - "learning_rate": 0.00019999914300737964, - "loss": 46.0, - "step": 17246 - }, - { - "epoch": 1.318653592522507, - "grad_norm": 0.0027738006319850683, - "learning_rate": 0.00019999914290793472, - "loss": 46.0, - "step": 17247 - }, - { - "epoch": 1.3187300495058967, - "grad_norm": 0.001320280833169818, - "learning_rate": 0.00019999914280848403, - "loss": 46.0, - "step": 17248 - }, - { - "epoch": 1.3188065064892864, - "grad_norm": 0.003970456309616566, - "learning_rate": 0.00019999914270902759, - "loss": 46.0, - "step": 17249 - }, - { - "epoch": 1.3188829634726762, - "grad_norm": 0.0009836690733209252, - "learning_rate": 0.00019999914260956537, - "loss": 46.0, - "step": 17250 - }, - { - "epoch": 1.318959420456066, - "grad_norm": 0.0006394994561560452, - "learning_rate": 0.00019999914251009736, - "loss": 46.0, - "step": 17251 - }, - { - "epoch": 1.3190358774394557, - "grad_norm": 0.0011349063133820891, - "learning_rate": 0.0001999991424106236, - "loss": 46.0, - "step": 17252 - }, - { - "epoch": 1.3191123344228455, - "grad_norm": 0.001036398345604539, - "learning_rate": 0.00019999914231114406, - "loss": 46.0, - "step": 17253 - }, - { - "epoch": 1.319188791406235, - "grad_norm": 0.0017536289524286985, - "learning_rate": 0.00019999914221165876, - "loss": 46.0, - "step": 17254 - }, - { - "epoch": 1.3192652483896248, - "grad_norm": 0.0008155809482559562, - "learning_rate": 0.0001999991421121677, - "loss": 46.0, - "step": 17255 - }, - { - "epoch": 1.3193417053730145, - "grad_norm": 0.0010370647069066763, - "learning_rate": 0.00019999914201267088, - "loss": 46.0, - "step": 17256 - }, - { - "epoch": 1.3194181623564043, - "grad_norm": 0.0043812645599246025, - "learning_rate": 0.00019999914191316828, - "loss": 46.0, - "step": 17257 - }, - { - "epoch": 1.3194946193397938, - "grad_norm": 0.0005749440169893205, - "learning_rate": 0.00019999914181365988, - "loss": 46.0, - "step": 17258 - }, - { - "epoch": 1.3195710763231836, - "grad_norm": 0.004461383447051048, - "learning_rate": 0.00019999914171414576, - "loss": 46.0, - "step": 17259 - }, - { - "epoch": 1.3196475333065734, - "grad_norm": 0.002153238747268915, - "learning_rate": 0.00019999914161462584, - "loss": 46.0, - "step": 17260 - }, - { - "epoch": 1.3197239902899631, - "grad_norm": 0.00180660595651716, - "learning_rate": 0.00019999914151510014, - "loss": 46.0, - "step": 17261 - }, - { - "epoch": 1.3198004472733529, - "grad_norm": 0.0013833047123625875, - "learning_rate": 0.00019999914141556868, - "loss": 46.0, - "step": 17262 - }, - { - "epoch": 1.3198769042567426, - "grad_norm": 0.0014423176180571318, - "learning_rate": 0.00019999914131603146, - "loss": 46.0, - "step": 17263 - }, - { - "epoch": 1.3199533612401324, - "grad_norm": 0.0005700822221115232, - "learning_rate": 0.00019999914121648848, - "loss": 46.0, - "step": 17264 - }, - { - "epoch": 1.320029818223522, - "grad_norm": 0.0008115441305562854, - "learning_rate": 0.00019999914111693971, - "loss": 46.0, - "step": 17265 - }, - { - "epoch": 1.3201062752069117, - "grad_norm": 0.004543479066342115, - "learning_rate": 0.00019999914101738518, - "loss": 46.0, - "step": 17266 - }, - { - "epoch": 1.3201827321903015, - "grad_norm": 0.00228821556083858, - "learning_rate": 0.00019999914091782487, - "loss": 46.0, - "step": 17267 - }, - { - "epoch": 1.3202591891736912, - "grad_norm": 0.0040205069817602634, - "learning_rate": 0.00019999914081825882, - "loss": 46.0, - "step": 17268 - }, - { - "epoch": 1.3203356461570808, - "grad_norm": 0.0066104792058467865, - "learning_rate": 0.00019999914071868697, - "loss": 46.0, - "step": 17269 - }, - { - "epoch": 1.3204121031404705, - "grad_norm": 0.0067365881986916065, - "learning_rate": 0.00019999914061910937, - "loss": 46.0, - "step": 17270 - }, - { - "epoch": 1.3204885601238603, - "grad_norm": 0.0016946530668064952, - "learning_rate": 0.00019999914051952597, - "loss": 46.0, - "step": 17271 - }, - { - "epoch": 1.32056501710725, - "grad_norm": 0.002005101880058646, - "learning_rate": 0.00019999914041993685, - "loss": 46.0, - "step": 17272 - }, - { - "epoch": 1.3206414740906398, - "grad_norm": 0.003516450524330139, - "learning_rate": 0.00019999914032034193, - "loss": 46.0, - "step": 17273 - }, - { - "epoch": 1.3207179310740296, - "grad_norm": 0.0017321519553661346, - "learning_rate": 0.00019999914022074123, - "loss": 46.0, - "step": 17274 - }, - { - "epoch": 1.3207943880574193, - "grad_norm": 0.004706586245447397, - "learning_rate": 0.0001999991401211348, - "loss": 46.0, - "step": 17275 - }, - { - "epoch": 1.3208708450408089, - "grad_norm": 0.0007195802172645926, - "learning_rate": 0.00019999914002152256, - "loss": 46.0, - "step": 17276 - }, - { - "epoch": 1.3209473020241986, - "grad_norm": 0.00174514704849571, - "learning_rate": 0.00019999913992190454, - "loss": 46.0, - "step": 17277 - }, - { - "epoch": 1.3210237590075884, - "grad_norm": 0.005880001001060009, - "learning_rate": 0.00019999913982228078, - "loss": 46.0, - "step": 17278 - }, - { - "epoch": 1.3211002159909782, - "grad_norm": 0.0021151211112737656, - "learning_rate": 0.00019999913972265128, - "loss": 46.0, - "step": 17279 - }, - { - "epoch": 1.3211766729743677, - "grad_norm": 0.0013669688487425447, - "learning_rate": 0.00019999913962301594, - "loss": 46.0, - "step": 17280 - }, - { - "epoch": 1.3212531299577575, - "grad_norm": 0.0009188097901642323, - "learning_rate": 0.0001999991395233749, - "loss": 46.0, - "step": 17281 - }, - { - "epoch": 1.3213295869411472, - "grad_norm": 0.0017044630367308855, - "learning_rate": 0.00019999913942372804, - "loss": 46.0, - "step": 17282 - }, - { - "epoch": 1.321406043924537, - "grad_norm": 0.0013585459673777223, - "learning_rate": 0.00019999913932407544, - "loss": 46.0, - "step": 17283 - }, - { - "epoch": 1.3214825009079267, - "grad_norm": 0.0010362127795815468, - "learning_rate": 0.00019999913922441706, - "loss": 46.0, - "step": 17284 - }, - { - "epoch": 1.3215589578913165, - "grad_norm": 0.0021890075877308846, - "learning_rate": 0.0001999991391247529, - "loss": 46.0, - "step": 17285 - }, - { - "epoch": 1.321635414874706, - "grad_norm": 0.0016954620368778706, - "learning_rate": 0.00019999913902508297, - "loss": 46.0, - "step": 17286 - }, - { - "epoch": 1.3217118718580958, - "grad_norm": 0.0016661520348861814, - "learning_rate": 0.0001999991389254073, - "loss": 46.0, - "step": 17287 - }, - { - "epoch": 1.3217883288414856, - "grad_norm": 0.0007691810606047511, - "learning_rate": 0.00019999913882572584, - "loss": 46.0, - "step": 17288 - }, - { - "epoch": 1.3218647858248753, - "grad_norm": 0.0009226563270203769, - "learning_rate": 0.00019999913872603863, - "loss": 46.0, - "step": 17289 - }, - { - "epoch": 1.321941242808265, - "grad_norm": 0.0021697941701859236, - "learning_rate": 0.0001999991386263456, - "loss": 46.0, - "step": 17290 - }, - { - "epoch": 1.3220176997916546, - "grad_norm": 0.0017355961026623845, - "learning_rate": 0.00019999913852664685, - "loss": 46.0, - "step": 17291 - }, - { - "epoch": 1.3220941567750444, - "grad_norm": 0.0017698290757834911, - "learning_rate": 0.00019999913842694232, - "loss": 46.0, - "step": 17292 - }, - { - "epoch": 1.3221706137584341, - "grad_norm": 0.004600329790264368, - "learning_rate": 0.00019999913832723201, - "loss": 46.0, - "step": 17293 - }, - { - "epoch": 1.322247070741824, - "grad_norm": 0.0003432325320318341, - "learning_rate": 0.00019999913822751593, - "loss": 46.0, - "step": 17294 - }, - { - "epoch": 1.3223235277252137, - "grad_norm": 0.006027223076671362, - "learning_rate": 0.00019999913812779408, - "loss": 46.0, - "step": 17295 - }, - { - "epoch": 1.3223999847086034, - "grad_norm": 0.0016782063758000731, - "learning_rate": 0.00019999913802806646, - "loss": 46.0, - "step": 17296 - }, - { - "epoch": 1.322476441691993, - "grad_norm": 0.0010369824012741446, - "learning_rate": 0.00019999913792833308, - "loss": 46.0, - "step": 17297 - }, - { - "epoch": 1.3225528986753827, - "grad_norm": 0.00047666847240179777, - "learning_rate": 0.0001999991378285939, - "loss": 46.0, - "step": 17298 - }, - { - "epoch": 1.3226293556587725, - "grad_norm": 0.0015767350560054183, - "learning_rate": 0.00019999913772884902, - "loss": 46.0, - "step": 17299 - }, - { - "epoch": 1.3227058126421622, - "grad_norm": 0.0014668862568214536, - "learning_rate": 0.00019999913762909832, - "loss": 46.0, - "step": 17300 - }, - { - "epoch": 1.3227822696255518, - "grad_norm": 0.0008369010174646974, - "learning_rate": 0.00019999913752934189, - "loss": 46.0, - "step": 17301 - }, - { - "epoch": 1.3228587266089415, - "grad_norm": 0.000895605597179383, - "learning_rate": 0.00019999913742957962, - "loss": 46.0, - "step": 17302 - }, - { - "epoch": 1.3229351835923313, - "grad_norm": 0.0012225806713104248, - "learning_rate": 0.00019999913732981163, - "loss": 46.0, - "step": 17303 - }, - { - "epoch": 1.323011640575721, - "grad_norm": 0.0007701747235842049, - "learning_rate": 0.00019999913723003787, - "loss": 46.0, - "step": 17304 - }, - { - "epoch": 1.3230880975591108, - "grad_norm": 0.0016745930770412087, - "learning_rate": 0.00019999913713025834, - "loss": 46.0, - "step": 17305 - }, - { - "epoch": 1.3231645545425006, - "grad_norm": 0.002976943738758564, - "learning_rate": 0.000199999137030473, - "loss": 46.0, - "step": 17306 - }, - { - "epoch": 1.3232410115258904, - "grad_norm": 0.004786735400557518, - "learning_rate": 0.00019999913693068196, - "loss": 46.0, - "step": 17307 - }, - { - "epoch": 1.32331746850928, - "grad_norm": 0.01070769689977169, - "learning_rate": 0.0001999991368308851, - "loss": 46.0, - "step": 17308 - }, - { - "epoch": 1.3233939254926697, - "grad_norm": 0.000870837306138128, - "learning_rate": 0.00019999913673108248, - "loss": 46.0, - "step": 17309 - }, - { - "epoch": 1.3234703824760594, - "grad_norm": 0.003156396560370922, - "learning_rate": 0.0001999991366312741, - "loss": 46.0, - "step": 17310 - }, - { - "epoch": 1.3235468394594492, - "grad_norm": 0.0007843500934541225, - "learning_rate": 0.00019999913653145993, - "loss": 46.0, - "step": 17311 - }, - { - "epoch": 1.3236232964428387, - "grad_norm": 0.001117914798669517, - "learning_rate": 0.00019999913643164004, - "loss": 46.0, - "step": 17312 - }, - { - "epoch": 1.3236997534262285, - "grad_norm": 0.016299236565828323, - "learning_rate": 0.00019999913633181432, - "loss": 46.0, - "step": 17313 - }, - { - "epoch": 1.3237762104096182, - "grad_norm": 0.0009847644250839949, - "learning_rate": 0.00019999913623198286, - "loss": 46.0, - "step": 17314 - }, - { - "epoch": 1.323852667393008, - "grad_norm": 0.001436815713532269, - "learning_rate": 0.00019999913613214562, - "loss": 46.0, - "step": 17315 - }, - { - "epoch": 1.3239291243763978, - "grad_norm": 0.00039300520438700914, - "learning_rate": 0.00019999913603230263, - "loss": 46.0, - "step": 17316 - }, - { - "epoch": 1.3240055813597875, - "grad_norm": 0.005245849024504423, - "learning_rate": 0.00019999913593245384, - "loss": 46.0, - "step": 17317 - }, - { - "epoch": 1.3240820383431773, - "grad_norm": 0.0007793670520186424, - "learning_rate": 0.0001999991358325993, - "loss": 46.0, - "step": 17318 - }, - { - "epoch": 1.3241584953265668, - "grad_norm": 0.0005983553710393608, - "learning_rate": 0.000199999135732739, - "loss": 46.0, - "step": 17319 - }, - { - "epoch": 1.3242349523099566, - "grad_norm": 0.001569002284668386, - "learning_rate": 0.00019999913563287293, - "loss": 46.0, - "step": 17320 - }, - { - "epoch": 1.3243114092933463, - "grad_norm": 0.001614886336028576, - "learning_rate": 0.0001999991355330011, - "loss": 46.0, - "step": 17321 - }, - { - "epoch": 1.324387866276736, - "grad_norm": 0.0013751612277701497, - "learning_rate": 0.00019999913543312348, - "loss": 46.0, - "step": 17322 - }, - { - "epoch": 1.3244643232601256, - "grad_norm": 0.0022565105464309454, - "learning_rate": 0.00019999913533324008, - "loss": 46.0, - "step": 17323 - }, - { - "epoch": 1.3245407802435154, - "grad_norm": 0.002738203154876828, - "learning_rate": 0.00019999913523335093, - "loss": 46.0, - "step": 17324 - }, - { - "epoch": 1.3246172372269052, - "grad_norm": 0.000623198866378516, - "learning_rate": 0.000199999135133456, - "loss": 46.0, - "step": 17325 - }, - { - "epoch": 1.324693694210295, - "grad_norm": 0.0009948090882971883, - "learning_rate": 0.0001999991350335553, - "loss": 46.0, - "step": 17326 - }, - { - "epoch": 1.3247701511936847, - "grad_norm": 0.00345505285076797, - "learning_rate": 0.00019999913493364883, - "loss": 46.0, - "step": 17327 - }, - { - "epoch": 1.3248466081770744, - "grad_norm": 0.0011185671901330352, - "learning_rate": 0.0001999991348337366, - "loss": 46.0, - "step": 17328 - }, - { - "epoch": 1.3249230651604642, - "grad_norm": 0.002106211381033063, - "learning_rate": 0.0001999991347338186, - "loss": 46.0, - "step": 17329 - }, - { - "epoch": 1.3249995221438537, - "grad_norm": 0.0007250930066220462, - "learning_rate": 0.00019999913463389482, - "loss": 46.0, - "step": 17330 - }, - { - "epoch": 1.3250759791272435, - "grad_norm": 0.0012351027689874172, - "learning_rate": 0.00019999913453396526, - "loss": 46.0, - "step": 17331 - }, - { - "epoch": 1.3251524361106333, - "grad_norm": 0.0006779627874493599, - "learning_rate": 0.00019999913443402996, - "loss": 46.0, - "step": 17332 - }, - { - "epoch": 1.325228893094023, - "grad_norm": 0.0008195066475309432, - "learning_rate": 0.00019999913433408888, - "loss": 46.0, - "step": 17333 - }, - { - "epoch": 1.3253053500774126, - "grad_norm": 0.0010583183029666543, - "learning_rate": 0.00019999913423414202, - "loss": 46.0, - "step": 17334 - }, - { - "epoch": 1.3253818070608023, - "grad_norm": 0.0011396575719118118, - "learning_rate": 0.0001999991341341894, - "loss": 46.0, - "step": 17335 - }, - { - "epoch": 1.325458264044192, - "grad_norm": 0.0007618501549586654, - "learning_rate": 0.000199999134034231, - "loss": 46.0, - "step": 17336 - }, - { - "epoch": 1.3255347210275819, - "grad_norm": 0.0013859316240996122, - "learning_rate": 0.00019999913393426686, - "loss": 46.0, - "step": 17337 - }, - { - "epoch": 1.3256111780109716, - "grad_norm": 0.002333087846636772, - "learning_rate": 0.00019999913383429694, - "loss": 46.0, - "step": 17338 - }, - { - "epoch": 1.3256876349943614, - "grad_norm": 0.0005865683197043836, - "learning_rate": 0.00019999913373432122, - "loss": 46.0, - "step": 17339 - }, - { - "epoch": 1.3257640919777511, - "grad_norm": 0.0016845760401338339, - "learning_rate": 0.00019999913363433978, - "loss": 46.0, - "step": 17340 - }, - { - "epoch": 1.3258405489611407, - "grad_norm": 0.0005724859656766057, - "learning_rate": 0.00019999913353435254, - "loss": 46.0, - "step": 17341 - }, - { - "epoch": 1.3259170059445304, - "grad_norm": 0.0020432285964488983, - "learning_rate": 0.00019999913343435953, - "loss": 46.0, - "step": 17342 - }, - { - "epoch": 1.3259934629279202, - "grad_norm": 0.0016204017447307706, - "learning_rate": 0.00019999913333436074, - "loss": 46.0, - "step": 17343 - }, - { - "epoch": 1.32606991991131, - "grad_norm": 0.0010332120582461357, - "learning_rate": 0.0001999991332343562, - "loss": 46.0, - "step": 17344 - }, - { - "epoch": 1.3261463768946995, - "grad_norm": 0.00043746328447014093, - "learning_rate": 0.0001999991331343459, - "loss": 46.0, - "step": 17345 - }, - { - "epoch": 1.3262228338780893, - "grad_norm": 0.0010063236113637686, - "learning_rate": 0.00019999913303432983, - "loss": 46.0, - "step": 17346 - }, - { - "epoch": 1.326299290861479, - "grad_norm": 0.004033395554870367, - "learning_rate": 0.00019999913293430798, - "loss": 46.0, - "step": 17347 - }, - { - "epoch": 1.3263757478448688, - "grad_norm": 0.0006785108125768602, - "learning_rate": 0.00019999913283428033, - "loss": 46.0, - "step": 17348 - }, - { - "epoch": 1.3264522048282585, - "grad_norm": 0.0010622142581269145, - "learning_rate": 0.00019999913273424693, - "loss": 46.0, - "step": 17349 - }, - { - "epoch": 1.3265286618116483, - "grad_norm": 0.0031055437866598368, - "learning_rate": 0.00019999913263420778, - "loss": 46.0, - "step": 17350 - }, - { - "epoch": 1.326605118795038, - "grad_norm": 0.00039451938937418163, - "learning_rate": 0.00019999913253416284, - "loss": 46.0, - "step": 17351 - }, - { - "epoch": 1.3266815757784276, - "grad_norm": 0.0008647080976516008, - "learning_rate": 0.00019999913243411215, - "loss": 46.0, - "step": 17352 - }, - { - "epoch": 1.3267580327618174, - "grad_norm": 0.001688301214016974, - "learning_rate": 0.00019999913233405568, - "loss": 46.0, - "step": 17353 - }, - { - "epoch": 1.3268344897452071, - "grad_norm": 0.0012700185179710388, - "learning_rate": 0.00019999913223399344, - "loss": 46.0, - "step": 17354 - }, - { - "epoch": 1.3269109467285969, - "grad_norm": 0.0019978515338152647, - "learning_rate": 0.00019999913213392546, - "loss": 46.0, - "step": 17355 - }, - { - "epoch": 1.3269874037119864, - "grad_norm": 0.0013060306664556265, - "learning_rate": 0.00019999913203385165, - "loss": 46.0, - "step": 17356 - }, - { - "epoch": 1.3270638606953762, - "grad_norm": 0.0015286528505384922, - "learning_rate": 0.00019999913193377212, - "loss": 46.0, - "step": 17357 - }, - { - "epoch": 1.327140317678766, - "grad_norm": 0.005105131771415472, - "learning_rate": 0.00019999913183368678, - "loss": 46.0, - "step": 17358 - }, - { - "epoch": 1.3272167746621557, - "grad_norm": 0.000762317271437496, - "learning_rate": 0.0001999991317335957, - "loss": 46.0, - "step": 17359 - }, - { - "epoch": 1.3272932316455455, - "grad_norm": 0.0011469654273241758, - "learning_rate": 0.00019999913163349885, - "loss": 46.0, - "step": 17360 - }, - { - "epoch": 1.3273696886289352, - "grad_norm": 0.002654470968991518, - "learning_rate": 0.00019999913153339623, - "loss": 46.0, - "step": 17361 - }, - { - "epoch": 1.327446145612325, - "grad_norm": 0.0003295818460173905, - "learning_rate": 0.00019999913143328783, - "loss": 46.0, - "step": 17362 - }, - { - "epoch": 1.3275226025957145, - "grad_norm": 0.0011115336092188954, - "learning_rate": 0.00019999913133317366, - "loss": 46.0, - "step": 17363 - }, - { - "epoch": 1.3275990595791043, - "grad_norm": 0.008734781295061111, - "learning_rate": 0.00019999913123305374, - "loss": 46.0, - "step": 17364 - }, - { - "epoch": 1.327675516562494, - "grad_norm": 0.003452907083556056, - "learning_rate": 0.00019999913113292802, - "loss": 46.0, - "step": 17365 - }, - { - "epoch": 1.3277519735458838, - "grad_norm": 0.0008871277095749974, - "learning_rate": 0.00019999913103279656, - "loss": 46.0, - "step": 17366 - }, - { - "epoch": 1.3278284305292734, - "grad_norm": 0.0012537275906652212, - "learning_rate": 0.00019999913093265932, - "loss": 46.0, - "step": 17367 - }, - { - "epoch": 1.3279048875126631, - "grad_norm": 0.001192585565149784, - "learning_rate": 0.0001999991308325163, - "loss": 46.0, - "step": 17368 - }, - { - "epoch": 1.3279813444960529, - "grad_norm": 0.0018154066056013107, - "learning_rate": 0.00019999913073236753, - "loss": 46.0, - "step": 17369 - }, - { - "epoch": 1.3280578014794426, - "grad_norm": 0.0013291678624227643, - "learning_rate": 0.000199999130632213, - "loss": 46.0, - "step": 17370 - }, - { - "epoch": 1.3281342584628324, - "grad_norm": 0.0005590153159573674, - "learning_rate": 0.00019999913053205266, - "loss": 46.0, - "step": 17371 - }, - { - "epoch": 1.3282107154462222, - "grad_norm": 0.0012035639956593513, - "learning_rate": 0.00019999913043188659, - "loss": 46.0, - "step": 17372 - }, - { - "epoch": 1.328287172429612, - "grad_norm": 0.0034630370792001486, - "learning_rate": 0.0001999991303317147, - "loss": 46.0, - "step": 17373 - }, - { - "epoch": 1.3283636294130015, - "grad_norm": 0.0009711846360005438, - "learning_rate": 0.00019999913023153708, - "loss": 46.0, - "step": 17374 - }, - { - "epoch": 1.3284400863963912, - "grad_norm": 0.004778137430548668, - "learning_rate": 0.00019999913013135368, - "loss": 46.0, - "step": 17375 - }, - { - "epoch": 1.328516543379781, - "grad_norm": 0.0011376874754205346, - "learning_rate": 0.00019999913003116454, - "loss": 46.0, - "step": 17376 - }, - { - "epoch": 1.3285930003631707, - "grad_norm": 0.00118243636097759, - "learning_rate": 0.0001999991299309696, - "loss": 46.0, - "step": 17377 - }, - { - "epoch": 1.3286694573465603, - "grad_norm": 0.004341739695519209, - "learning_rate": 0.0001999991298307689, - "loss": 46.0, - "step": 17378 - }, - { - "epoch": 1.32874591432995, - "grad_norm": 0.0004418192838784307, - "learning_rate": 0.00019999912973056244, - "loss": 46.0, - "step": 17379 - }, - { - "epoch": 1.3288223713133398, - "grad_norm": 0.00048639284796081483, - "learning_rate": 0.00019999912963035018, - "loss": 46.0, - "step": 17380 - }, - { - "epoch": 1.3288988282967296, - "grad_norm": 0.0012268339050933719, - "learning_rate": 0.00019999912953013217, - "loss": 46.0, - "step": 17381 - }, - { - "epoch": 1.3289752852801193, - "grad_norm": 0.005768474191427231, - "learning_rate": 0.00019999912942990838, - "loss": 46.0, - "step": 17382 - }, - { - "epoch": 1.329051742263509, - "grad_norm": 0.00395636260509491, - "learning_rate": 0.00019999912932967882, - "loss": 46.0, - "step": 17383 - }, - { - "epoch": 1.3291281992468988, - "grad_norm": 0.0019664298743009567, - "learning_rate": 0.0001999991292294435, - "loss": 46.0, - "step": 17384 - }, - { - "epoch": 1.3292046562302884, - "grad_norm": 0.0016989157302305102, - "learning_rate": 0.0001999991291292024, - "loss": 46.0, - "step": 17385 - }, - { - "epoch": 1.3292811132136781, - "grad_norm": 0.00261163292452693, - "learning_rate": 0.00019999912902895556, - "loss": 46.0, - "step": 17386 - }, - { - "epoch": 1.329357570197068, - "grad_norm": 0.002815736923366785, - "learning_rate": 0.0001999991289287029, - "loss": 46.0, - "step": 17387 - }, - { - "epoch": 1.3294340271804577, - "grad_norm": 0.0008977506658993661, - "learning_rate": 0.00019999912882844454, - "loss": 46.0, - "step": 17388 - }, - { - "epoch": 1.3295104841638472, - "grad_norm": 0.0004119258082937449, - "learning_rate": 0.00019999912872818034, - "loss": 46.0, - "step": 17389 - }, - { - "epoch": 1.329586941147237, - "grad_norm": 0.0009729574667289853, - "learning_rate": 0.00019999912862791043, - "loss": 46.0, - "step": 17390 - }, - { - "epoch": 1.3296633981306267, - "grad_norm": 0.0008385512628592551, - "learning_rate": 0.0001999991285276347, - "loss": 46.0, - "step": 17391 - }, - { - "epoch": 1.3297398551140165, - "grad_norm": 0.0010143026011064649, - "learning_rate": 0.00019999912842735325, - "loss": 46.0, - "step": 17392 - }, - { - "epoch": 1.3298163120974063, - "grad_norm": 0.0022218613885343075, - "learning_rate": 0.000199999128327066, - "loss": 46.0, - "step": 17393 - }, - { - "epoch": 1.329892769080796, - "grad_norm": 0.0006187410326674581, - "learning_rate": 0.00019999912822677297, - "loss": 46.0, - "step": 17394 - }, - { - "epoch": 1.3299692260641858, - "grad_norm": 0.001159863080829382, - "learning_rate": 0.0001999991281264742, - "loss": 46.0, - "step": 17395 - }, - { - "epoch": 1.3300456830475753, - "grad_norm": 0.003690300974994898, - "learning_rate": 0.00019999912802616963, - "loss": 46.0, - "step": 17396 - }, - { - "epoch": 1.330122140030965, - "grad_norm": 0.0008360066567547619, - "learning_rate": 0.0001999991279258593, - "loss": 46.0, - "step": 17397 - }, - { - "epoch": 1.3301985970143548, - "grad_norm": 0.004908973351120949, - "learning_rate": 0.00019999912782554322, - "loss": 46.0, - "step": 17398 - }, - { - "epoch": 1.3302750539977446, - "grad_norm": 0.0005259255995042622, - "learning_rate": 0.00019999912772522137, - "loss": 46.0, - "step": 17399 - }, - { - "epoch": 1.3303515109811341, - "grad_norm": 0.00280459295026958, - "learning_rate": 0.00019999912762489372, - "loss": 46.0, - "step": 17400 - }, - { - "epoch": 1.330427967964524, - "grad_norm": 0.00237468839623034, - "learning_rate": 0.00019999912752456035, - "loss": 46.0, - "step": 17401 - }, - { - "epoch": 1.3305044249479137, - "grad_norm": 0.0003370768390595913, - "learning_rate": 0.00019999912742422115, - "loss": 46.0, - "step": 17402 - }, - { - "epoch": 1.3305808819313034, - "grad_norm": 0.0010442129569128156, - "learning_rate": 0.0001999991273238762, - "loss": 46.0, - "step": 17403 - }, - { - "epoch": 1.3306573389146932, - "grad_norm": 0.0016302593285217881, - "learning_rate": 0.0001999991272235255, - "loss": 46.0, - "step": 17404 - }, - { - "epoch": 1.330733795898083, - "grad_norm": 0.0019970976281911135, - "learning_rate": 0.00019999912712316903, - "loss": 46.0, - "step": 17405 - }, - { - "epoch": 1.3308102528814727, - "grad_norm": 0.0015073231188580394, - "learning_rate": 0.0001999991270228068, - "loss": 46.0, - "step": 17406 - }, - { - "epoch": 1.3308867098648622, - "grad_norm": 0.003061566036194563, - "learning_rate": 0.00019999912692243875, - "loss": 46.0, - "step": 17407 - }, - { - "epoch": 1.330963166848252, - "grad_norm": 0.003795386990532279, - "learning_rate": 0.000199999126822065, - "loss": 46.0, - "step": 17408 - }, - { - "epoch": 1.3310396238316418, - "grad_norm": 0.0045354003086686134, - "learning_rate": 0.0001999991267216854, - "loss": 46.0, - "step": 17409 - }, - { - "epoch": 1.3311160808150315, - "grad_norm": 0.000849285046570003, - "learning_rate": 0.00019999912662130008, - "loss": 46.0, - "step": 17410 - }, - { - "epoch": 1.331192537798421, - "grad_norm": 0.0013559279032051563, - "learning_rate": 0.000199999126520909, - "loss": 46.0, - "step": 17411 - }, - { - "epoch": 1.3312689947818108, - "grad_norm": 0.0004952421295456588, - "learning_rate": 0.00019999912642051215, - "loss": 46.0, - "step": 17412 - }, - { - "epoch": 1.3313454517652006, - "grad_norm": 0.0004372815601527691, - "learning_rate": 0.00019999912632010948, - "loss": 46.0, - "step": 17413 - }, - { - "epoch": 1.3314219087485903, - "grad_norm": 0.003960534930229187, - "learning_rate": 0.0001999991262197011, - "loss": 46.0, - "step": 17414 - }, - { - "epoch": 1.33149836573198, - "grad_norm": 0.0007412740960717201, - "learning_rate": 0.00019999912611928694, - "loss": 46.0, - "step": 17415 - }, - { - "epoch": 1.3315748227153699, - "grad_norm": 0.00043966012890450656, - "learning_rate": 0.000199999126018867, - "loss": 46.0, - "step": 17416 - }, - { - "epoch": 1.3316512796987594, - "grad_norm": 0.015806972980499268, - "learning_rate": 0.00019999912591844128, - "loss": 46.0, - "step": 17417 - }, - { - "epoch": 1.3317277366821492, - "grad_norm": 0.0011110759805887938, - "learning_rate": 0.00019999912581800979, - "loss": 46.0, - "step": 17418 - }, - { - "epoch": 1.331804193665539, - "grad_norm": 0.0011276552686467767, - "learning_rate": 0.00019999912571757252, - "loss": 46.0, - "step": 17419 - }, - { - "epoch": 1.3318806506489287, - "grad_norm": 0.0028547486290335655, - "learning_rate": 0.0001999991256171295, - "loss": 46.0, - "step": 17420 - }, - { - "epoch": 1.3319571076323184, - "grad_norm": 0.005724901333451271, - "learning_rate": 0.00019999912551668073, - "loss": 46.0, - "step": 17421 - }, - { - "epoch": 1.332033564615708, - "grad_norm": 0.001001560129225254, - "learning_rate": 0.00019999912541622617, - "loss": 46.0, - "step": 17422 - }, - { - "epoch": 1.3321100215990977, - "grad_norm": 0.011771823279559612, - "learning_rate": 0.00019999912531576587, - "loss": 46.0, - "step": 17423 - }, - { - "epoch": 1.3321864785824875, - "grad_norm": 0.0005923372809775174, - "learning_rate": 0.00019999912521529977, - "loss": 46.0, - "step": 17424 - }, - { - "epoch": 1.3322629355658773, - "grad_norm": 0.0007965003605931997, - "learning_rate": 0.0001999991251148279, - "loss": 46.0, - "step": 17425 - }, - { - "epoch": 1.332339392549267, - "grad_norm": 0.0034073968417942524, - "learning_rate": 0.00019999912501435027, - "loss": 46.0, - "step": 17426 - }, - { - "epoch": 1.3324158495326568, - "grad_norm": 0.0006852917140349746, - "learning_rate": 0.00019999912491386684, - "loss": 46.0, - "step": 17427 - }, - { - "epoch": 1.3324923065160463, - "grad_norm": 0.0002649962261784822, - "learning_rate": 0.00019999912481337767, - "loss": 46.0, - "step": 17428 - }, - { - "epoch": 1.332568763499436, - "grad_norm": 0.00125643250066787, - "learning_rate": 0.00019999912471288273, - "loss": 46.0, - "step": 17429 - }, - { - "epoch": 1.3326452204828259, - "grad_norm": 0.008375952951610088, - "learning_rate": 0.000199999124612382, - "loss": 46.0, - "step": 17430 - }, - { - "epoch": 1.3327216774662156, - "grad_norm": 0.002601398155093193, - "learning_rate": 0.00019999912451187552, - "loss": 46.0, - "step": 17431 - }, - { - "epoch": 1.3327981344496052, - "grad_norm": 0.002572769531980157, - "learning_rate": 0.00019999912441136329, - "loss": 46.0, - "step": 17432 - }, - { - "epoch": 1.332874591432995, - "grad_norm": 0.007171506993472576, - "learning_rate": 0.00019999912431084528, - "loss": 46.0, - "step": 17433 - }, - { - "epoch": 1.3329510484163847, - "grad_norm": 0.0010910426499322057, - "learning_rate": 0.00019999912421032147, - "loss": 46.0, - "step": 17434 - }, - { - "epoch": 1.3330275053997744, - "grad_norm": 0.001661907765083015, - "learning_rate": 0.0001999991241097919, - "loss": 46.0, - "step": 17435 - }, - { - "epoch": 1.3331039623831642, - "grad_norm": 0.004399863071739674, - "learning_rate": 0.00019999912400925658, - "loss": 46.0, - "step": 17436 - }, - { - "epoch": 1.333180419366554, - "grad_norm": 0.0007236532401293516, - "learning_rate": 0.00019999912390871548, - "loss": 46.0, - "step": 17437 - }, - { - "epoch": 1.3332568763499437, - "grad_norm": 0.0009657396003603935, - "learning_rate": 0.0001999991238081686, - "loss": 46.0, - "step": 17438 - }, - { - "epoch": 1.3333333333333333, - "grad_norm": 0.0004946592380292714, - "learning_rate": 0.00019999912370761598, - "loss": 46.0, - "step": 17439 - }, - { - "epoch": 1.333409790316723, - "grad_norm": 0.0009828481124714017, - "learning_rate": 0.00019999912360705758, - "loss": 46.0, - "step": 17440 - }, - { - "epoch": 1.3334862473001128, - "grad_norm": 0.0016624336130917072, - "learning_rate": 0.00019999912350649338, - "loss": 46.0, - "step": 17441 - }, - { - "epoch": 1.3335627042835025, - "grad_norm": 0.0024372676853090525, - "learning_rate": 0.00019999912340592344, - "loss": 46.0, - "step": 17442 - }, - { - "epoch": 1.333639161266892, - "grad_norm": 0.004044575151056051, - "learning_rate": 0.00019999912330534772, - "loss": 46.0, - "step": 17443 - }, - { - "epoch": 1.3337156182502818, - "grad_norm": 0.002666729735210538, - "learning_rate": 0.00019999912320476623, - "loss": 46.0, - "step": 17444 - }, - { - "epoch": 1.3337920752336716, - "grad_norm": 0.0012696458725258708, - "learning_rate": 0.000199999123104179, - "loss": 46.0, - "step": 17445 - }, - { - "epoch": 1.3338685322170614, - "grad_norm": 0.0024716819170862436, - "learning_rate": 0.00019999912300358593, - "loss": 46.0, - "step": 17446 - }, - { - "epoch": 1.3339449892004511, - "grad_norm": 0.017424043267965317, - "learning_rate": 0.00019999912290298718, - "loss": 46.0, - "step": 17447 - }, - { - "epoch": 1.3340214461838409, - "grad_norm": 0.0008413971518166363, - "learning_rate": 0.0001999991228023826, - "loss": 46.0, - "step": 17448 - }, - { - "epoch": 1.3340979031672306, - "grad_norm": 0.0008915279759094119, - "learning_rate": 0.00019999912270177227, - "loss": 46.0, - "step": 17449 - }, - { - "epoch": 1.3341743601506202, - "grad_norm": 0.0019343190360814333, - "learning_rate": 0.00019999912260115616, - "loss": 46.0, - "step": 17450 - }, - { - "epoch": 1.33425081713401, - "grad_norm": 0.0018069229554384947, - "learning_rate": 0.0001999991225005343, - "loss": 46.0, - "step": 17451 - }, - { - "epoch": 1.3343272741173997, - "grad_norm": 0.005727724637836218, - "learning_rate": 0.00019999912239990666, - "loss": 46.0, - "step": 17452 - }, - { - "epoch": 1.3344037311007895, - "grad_norm": 0.0010423107305541635, - "learning_rate": 0.00019999912229927324, - "loss": 46.0, - "step": 17453 - }, - { - "epoch": 1.334480188084179, - "grad_norm": 0.0004155821225140244, - "learning_rate": 0.00019999912219863407, - "loss": 46.0, - "step": 17454 - }, - { - "epoch": 1.3345566450675688, - "grad_norm": 0.0008572808001190424, - "learning_rate": 0.00019999912209798913, - "loss": 46.0, - "step": 17455 - }, - { - "epoch": 1.3346331020509585, - "grad_norm": 0.0019973027519881725, - "learning_rate": 0.0001999991219973384, - "loss": 46.0, - "step": 17456 - }, - { - "epoch": 1.3347095590343483, - "grad_norm": 0.0005126166506670415, - "learning_rate": 0.0001999991218966819, - "loss": 46.0, - "step": 17457 - }, - { - "epoch": 1.334786016017738, - "grad_norm": 0.0015972558176144958, - "learning_rate": 0.00019999912179601966, - "loss": 46.0, - "step": 17458 - }, - { - "epoch": 1.3348624730011278, - "grad_norm": 0.003226458327844739, - "learning_rate": 0.00019999912169535162, - "loss": 46.0, - "step": 17459 - }, - { - "epoch": 1.3349389299845176, - "grad_norm": 0.0010951668955385685, - "learning_rate": 0.00019999912159467784, - "loss": 46.0, - "step": 17460 - }, - { - "epoch": 1.3350153869679071, - "grad_norm": 0.0008140777354128659, - "learning_rate": 0.00019999912149399826, - "loss": 46.0, - "step": 17461 - }, - { - "epoch": 1.3350918439512969, - "grad_norm": 0.0005094542866572738, - "learning_rate": 0.00019999912139331293, - "loss": 46.0, - "step": 17462 - }, - { - "epoch": 1.3351683009346866, - "grad_norm": 0.0013758456334471703, - "learning_rate": 0.00019999912129262183, - "loss": 46.0, - "step": 17463 - }, - { - "epoch": 1.3352447579180764, - "grad_norm": 0.002713556634262204, - "learning_rate": 0.00019999912119192495, - "loss": 46.0, - "step": 17464 - }, - { - "epoch": 1.335321214901466, - "grad_norm": 0.0011309538967907429, - "learning_rate": 0.0001999991210912223, - "loss": 46.0, - "step": 17465 - }, - { - "epoch": 1.3353976718848557, - "grad_norm": 0.0012767646694555879, - "learning_rate": 0.0001999991209905139, - "loss": 46.0, - "step": 17466 - }, - { - "epoch": 1.3354741288682455, - "grad_norm": 0.001357071683742106, - "learning_rate": 0.00019999912088979974, - "loss": 46.0, - "step": 17467 - }, - { - "epoch": 1.3355505858516352, - "grad_norm": 0.009967680089175701, - "learning_rate": 0.00019999912078907977, - "loss": 46.0, - "step": 17468 - }, - { - "epoch": 1.335627042835025, - "grad_norm": 0.0008076328667812049, - "learning_rate": 0.00019999912068835405, - "loss": 46.0, - "step": 17469 - }, - { - "epoch": 1.3357034998184147, - "grad_norm": 0.001356467604637146, - "learning_rate": 0.00019999912058762256, - "loss": 46.0, - "step": 17470 - }, - { - "epoch": 1.3357799568018045, - "grad_norm": 0.0007080992218106985, - "learning_rate": 0.0001999991204868853, - "loss": 46.0, - "step": 17471 - }, - { - "epoch": 1.335856413785194, - "grad_norm": 0.0005376964691095054, - "learning_rate": 0.00019999912038614227, - "loss": 46.0, - "step": 17472 - }, - { - "epoch": 1.3359328707685838, - "grad_norm": 0.0011065345024690032, - "learning_rate": 0.00019999912028539348, - "loss": 46.0, - "step": 17473 - }, - { - "epoch": 1.3360093277519736, - "grad_norm": 0.0005677349399775267, - "learning_rate": 0.0001999991201846389, - "loss": 46.0, - "step": 17474 - }, - { - "epoch": 1.3360857847353633, - "grad_norm": 0.0005541481077671051, - "learning_rate": 0.00019999912008387857, - "loss": 46.0, - "step": 17475 - }, - { - "epoch": 1.3361622417187529, - "grad_norm": 0.0010768037755042315, - "learning_rate": 0.00019999911998311247, - "loss": 46.0, - "step": 17476 - }, - { - "epoch": 1.3362386987021426, - "grad_norm": 0.0036428302992135286, - "learning_rate": 0.0001999991198823406, - "loss": 46.0, - "step": 17477 - }, - { - "epoch": 1.3363151556855324, - "grad_norm": 0.0006980306934565306, - "learning_rate": 0.00019999911978156294, - "loss": 46.0, - "step": 17478 - }, - { - "epoch": 1.3363916126689221, - "grad_norm": 0.004891026299446821, - "learning_rate": 0.00019999911968077952, - "loss": 46.0, - "step": 17479 - }, - { - "epoch": 1.336468069652312, - "grad_norm": 0.0007515577599406242, - "learning_rate": 0.00019999911957999035, - "loss": 46.0, - "step": 17480 - }, - { - "epoch": 1.3365445266357017, - "grad_norm": 0.0016693056095391512, - "learning_rate": 0.00019999911947919538, - "loss": 46.0, - "step": 17481 - }, - { - "epoch": 1.3366209836190914, - "grad_norm": 0.0007030356791801751, - "learning_rate": 0.0001999991193783947, - "loss": 46.0, - "step": 17482 - }, - { - "epoch": 1.336697440602481, - "grad_norm": 0.0011476923245936632, - "learning_rate": 0.00019999911927758815, - "loss": 46.0, - "step": 17483 - }, - { - "epoch": 1.3367738975858707, - "grad_norm": 0.0023628147318959236, - "learning_rate": 0.00019999911917677592, - "loss": 46.0, - "step": 17484 - }, - { - "epoch": 1.3368503545692605, - "grad_norm": 0.0007388531230390072, - "learning_rate": 0.0001999991190759579, - "loss": 46.0, - "step": 17485 - }, - { - "epoch": 1.3369268115526503, - "grad_norm": 0.001888773636892438, - "learning_rate": 0.00019999911897513407, - "loss": 46.0, - "step": 17486 - }, - { - "epoch": 1.3370032685360398, - "grad_norm": 0.0008290036930702627, - "learning_rate": 0.00019999911887430452, - "loss": 46.0, - "step": 17487 - }, - { - "epoch": 1.3370797255194296, - "grad_norm": 0.0006322208209894598, - "learning_rate": 0.00019999911877346916, - "loss": 46.0, - "step": 17488 - }, - { - "epoch": 1.3371561825028193, - "grad_norm": 0.0021140817552804947, - "learning_rate": 0.0001999991186726281, - "loss": 46.0, - "step": 17489 - }, - { - "epoch": 1.337232639486209, - "grad_norm": 0.004914324264973402, - "learning_rate": 0.00019999911857178119, - "loss": 46.0, - "step": 17490 - }, - { - "epoch": 1.3373090964695988, - "grad_norm": 0.0008868437143974006, - "learning_rate": 0.00019999911847092854, - "loss": 46.0, - "step": 17491 - }, - { - "epoch": 1.3373855534529886, - "grad_norm": 0.003679433139041066, - "learning_rate": 0.00019999911837007011, - "loss": 46.0, - "step": 17492 - }, - { - "epoch": 1.3374620104363784, - "grad_norm": 0.0010031504789367318, - "learning_rate": 0.00019999911826920595, - "loss": 46.0, - "step": 17493 - }, - { - "epoch": 1.337538467419768, - "grad_norm": 0.0012689121067523956, - "learning_rate": 0.00019999911816833598, - "loss": 46.0, - "step": 17494 - }, - { - "epoch": 1.3376149244031577, - "grad_norm": 0.0010104383109137416, - "learning_rate": 0.00019999911806746026, - "loss": 46.0, - "step": 17495 - }, - { - "epoch": 1.3376913813865474, - "grad_norm": 0.0006006374023854733, - "learning_rate": 0.00019999911796657877, - "loss": 46.0, - "step": 17496 - }, - { - "epoch": 1.3377678383699372, - "grad_norm": 0.0014534505316987634, - "learning_rate": 0.0001999991178656915, - "loss": 46.0, - "step": 17497 - }, - { - "epoch": 1.3378442953533267, - "grad_norm": 0.0008841780945658684, - "learning_rate": 0.00019999911776479848, - "loss": 46.0, - "step": 17498 - }, - { - "epoch": 1.3379207523367165, - "grad_norm": 0.0021158214658498764, - "learning_rate": 0.00019999911766389967, - "loss": 46.0, - "step": 17499 - }, - { - "epoch": 1.3379972093201062, - "grad_norm": 0.004220927599817514, - "learning_rate": 0.00019999911756299509, - "loss": 46.0, - "step": 17500 - }, - { - "epoch": 1.338073666303496, - "grad_norm": 0.0014245593920350075, - "learning_rate": 0.00019999911746208476, - "loss": 46.0, - "step": 17501 - }, - { - "epoch": 1.3381501232868858, - "grad_norm": 0.0011016873177140951, - "learning_rate": 0.00019999911736116866, - "loss": 46.0, - "step": 17502 - }, - { - "epoch": 1.3382265802702755, - "grad_norm": 0.00121684733312577, - "learning_rate": 0.00019999911726024675, - "loss": 46.0, - "step": 17503 - }, - { - "epoch": 1.3383030372536653, - "grad_norm": 0.002816010732203722, - "learning_rate": 0.0001999991171593191, - "loss": 46.0, - "step": 17504 - }, - { - "epoch": 1.3383794942370548, - "grad_norm": 0.0015124849742278457, - "learning_rate": 0.0001999991170583857, - "loss": 46.0, - "step": 17505 - }, - { - "epoch": 1.3384559512204446, - "grad_norm": 0.006849550176411867, - "learning_rate": 0.00019999911695744652, - "loss": 46.0, - "step": 17506 - }, - { - "epoch": 1.3385324082038343, - "grad_norm": 0.0012817404931411147, - "learning_rate": 0.00019999911685650155, - "loss": 46.0, - "step": 17507 - }, - { - "epoch": 1.338608865187224, - "grad_norm": 0.000825156515929848, - "learning_rate": 0.00019999911675555083, - "loss": 46.0, - "step": 17508 - }, - { - "epoch": 1.3386853221706136, - "grad_norm": 0.0016330397920683026, - "learning_rate": 0.00019999911665459432, - "loss": 46.0, - "step": 17509 - }, - { - "epoch": 1.3387617791540034, - "grad_norm": 0.0031566356774419546, - "learning_rate": 0.00019999911655363206, - "loss": 46.0, - "step": 17510 - }, - { - "epoch": 1.3388382361373932, - "grad_norm": 0.002160874893888831, - "learning_rate": 0.00019999911645266402, - "loss": 46.0, - "step": 17511 - }, - { - "epoch": 1.338914693120783, - "grad_norm": 0.003055114531889558, - "learning_rate": 0.0001999991163516902, - "loss": 46.0, - "step": 17512 - }, - { - "epoch": 1.3389911501041727, - "grad_norm": 0.0018241537036374211, - "learning_rate": 0.00019999911625071066, - "loss": 46.0, - "step": 17513 - }, - { - "epoch": 1.3390676070875625, - "grad_norm": 0.0007302055601030588, - "learning_rate": 0.0001999991161497253, - "loss": 46.0, - "step": 17514 - }, - { - "epoch": 1.3391440640709522, - "grad_norm": 0.0003395903331693262, - "learning_rate": 0.00019999911604873417, - "loss": 46.0, - "step": 17515 - }, - { - "epoch": 1.3392205210543418, - "grad_norm": 0.0009740596287883818, - "learning_rate": 0.0001999991159477373, - "loss": 46.0, - "step": 17516 - }, - { - "epoch": 1.3392969780377315, - "grad_norm": 0.0013622610131278634, - "learning_rate": 0.00019999911584673465, - "loss": 46.0, - "step": 17517 - }, - { - "epoch": 1.3393734350211213, - "grad_norm": 0.0014347624965012074, - "learning_rate": 0.00019999911574572626, - "loss": 46.0, - "step": 17518 - }, - { - "epoch": 1.339449892004511, - "grad_norm": 0.0010314049432054162, - "learning_rate": 0.00019999911564471203, - "loss": 46.0, - "step": 17519 - }, - { - "epoch": 1.3395263489879006, - "grad_norm": 0.00046051066601648927, - "learning_rate": 0.0001999991155436921, - "loss": 46.0, - "step": 17520 - }, - { - "epoch": 1.3396028059712903, - "grad_norm": 0.00027199723990634084, - "learning_rate": 0.00019999911544266635, - "loss": 46.0, - "step": 17521 - }, - { - "epoch": 1.33967926295468, - "grad_norm": 0.0012747214641422033, - "learning_rate": 0.00019999911534163486, - "loss": 46.0, - "step": 17522 - }, - { - "epoch": 1.3397557199380699, - "grad_norm": 0.0023008554708212614, - "learning_rate": 0.0001999991152405976, - "loss": 46.0, - "step": 17523 - }, - { - "epoch": 1.3398321769214596, - "grad_norm": 0.0017643632600083947, - "learning_rate": 0.00019999911513955457, - "loss": 46.0, - "step": 17524 - }, - { - "epoch": 1.3399086339048494, - "grad_norm": 0.0009348672465421259, - "learning_rate": 0.00019999911503850573, - "loss": 46.0, - "step": 17525 - }, - { - "epoch": 1.3399850908882391, - "grad_norm": 0.000584596476983279, - "learning_rate": 0.00019999911493745115, - "loss": 46.0, - "step": 17526 - }, - { - "epoch": 1.3400615478716287, - "grad_norm": 0.0036607247311621904, - "learning_rate": 0.00019999911483639082, - "loss": 46.0, - "step": 17527 - }, - { - "epoch": 1.3401380048550184, - "grad_norm": 0.0008729560649953783, - "learning_rate": 0.00019999911473532472, - "loss": 46.0, - "step": 17528 - }, - { - "epoch": 1.3402144618384082, - "grad_norm": 0.0009502782486379147, - "learning_rate": 0.00019999911463425282, - "loss": 46.0, - "step": 17529 - }, - { - "epoch": 1.340290918821798, - "grad_norm": 0.0015565053327009082, - "learning_rate": 0.00019999911453317517, - "loss": 46.0, - "step": 17530 - }, - { - "epoch": 1.3403673758051875, - "grad_norm": 0.0023390448186546564, - "learning_rate": 0.00019999911443209175, - "loss": 46.0, - "step": 17531 - }, - { - "epoch": 1.3404438327885773, - "grad_norm": 0.0009186035022139549, - "learning_rate": 0.00019999911433100256, - "loss": 46.0, - "step": 17532 - }, - { - "epoch": 1.340520289771967, - "grad_norm": 0.0011317494791001081, - "learning_rate": 0.0001999991142299076, - "loss": 46.0, - "step": 17533 - }, - { - "epoch": 1.3405967467553568, - "grad_norm": 0.005681768525391817, - "learning_rate": 0.00019999911412880685, - "loss": 46.0, - "step": 17534 - }, - { - "epoch": 1.3406732037387465, - "grad_norm": 0.0020753771532326937, - "learning_rate": 0.00019999911402770036, - "loss": 46.0, - "step": 17535 - }, - { - "epoch": 1.3407496607221363, - "grad_norm": 0.001430074917152524, - "learning_rate": 0.00019999911392658807, - "loss": 46.0, - "step": 17536 - }, - { - "epoch": 1.340826117705526, - "grad_norm": 0.002051582559943199, - "learning_rate": 0.00019999911382547004, - "loss": 46.0, - "step": 17537 - }, - { - "epoch": 1.3409025746889156, - "grad_norm": 0.0010138758225366473, - "learning_rate": 0.00019999911372434626, - "loss": 46.0, - "step": 17538 - }, - { - "epoch": 1.3409790316723054, - "grad_norm": 0.0005562843871302903, - "learning_rate": 0.00019999911362321665, - "loss": 46.0, - "step": 17539 - }, - { - "epoch": 1.3410554886556951, - "grad_norm": 0.01306749228388071, - "learning_rate": 0.00019999911352208132, - "loss": 46.0, - "step": 17540 - }, - { - "epoch": 1.3411319456390849, - "grad_norm": 0.005159082822501659, - "learning_rate": 0.0001999991134209402, - "loss": 46.0, - "step": 17541 - }, - { - "epoch": 1.3412084026224744, - "grad_norm": 0.0009739922825247049, - "learning_rate": 0.0001999991133197933, - "loss": 46.0, - "step": 17542 - }, - { - "epoch": 1.3412848596058642, - "grad_norm": 0.0014173141680657864, - "learning_rate": 0.00019999911321864065, - "loss": 46.0, - "step": 17543 - }, - { - "epoch": 1.341361316589254, - "grad_norm": 0.012223972007632256, - "learning_rate": 0.00019999911311748223, - "loss": 46.0, - "step": 17544 - }, - { - "epoch": 1.3414377735726437, - "grad_norm": 0.00590596254914999, - "learning_rate": 0.00019999911301631803, - "loss": 46.0, - "step": 17545 - }, - { - "epoch": 1.3415142305560335, - "grad_norm": 0.008157900534570217, - "learning_rate": 0.0001999991129151481, - "loss": 46.0, - "step": 17546 - }, - { - "epoch": 1.3415906875394232, - "grad_norm": 0.0003982759080827236, - "learning_rate": 0.00019999911281397235, - "loss": 46.0, - "step": 17547 - }, - { - "epoch": 1.3416671445228128, - "grad_norm": 0.0034173447638750076, - "learning_rate": 0.00019999911271279084, - "loss": 46.0, - "step": 17548 - }, - { - "epoch": 1.3417436015062025, - "grad_norm": 0.0007977579371072352, - "learning_rate": 0.00019999911261160355, - "loss": 46.0, - "step": 17549 - }, - { - "epoch": 1.3418200584895923, - "grad_norm": 0.0013559350045397878, - "learning_rate": 0.00019999911251041051, - "loss": 46.0, - "step": 17550 - }, - { - "epoch": 1.341896515472982, - "grad_norm": 0.0009482645546086133, - "learning_rate": 0.0001999991124092117, - "loss": 46.0, - "step": 17551 - }, - { - "epoch": 1.3419729724563718, - "grad_norm": 0.0034379465505480766, - "learning_rate": 0.00019999911230800713, - "loss": 46.0, - "step": 17552 - }, - { - "epoch": 1.3420494294397614, - "grad_norm": 0.002199098700657487, - "learning_rate": 0.0001999991122067968, - "loss": 46.0, - "step": 17553 - }, - { - "epoch": 1.3421258864231511, - "grad_norm": 0.0014237448340281844, - "learning_rate": 0.00019999911210558064, - "loss": 46.0, - "step": 17554 - }, - { - "epoch": 1.3422023434065409, - "grad_norm": 0.0008657604339532554, - "learning_rate": 0.00019999911200435877, - "loss": 46.0, - "step": 17555 - }, - { - "epoch": 1.3422788003899306, - "grad_norm": 0.0005057230591773987, - "learning_rate": 0.00019999911190313112, - "loss": 46.0, - "step": 17556 - }, - { - "epoch": 1.3423552573733204, - "grad_norm": 0.005004973150789738, - "learning_rate": 0.00019999911180189768, - "loss": 46.0, - "step": 17557 - }, - { - "epoch": 1.3424317143567102, - "grad_norm": 0.001214786316268146, - "learning_rate": 0.00019999911170065848, - "loss": 46.0, - "step": 17558 - }, - { - "epoch": 1.3425081713400997, - "grad_norm": 0.0015020164428278804, - "learning_rate": 0.00019999911159941351, - "loss": 46.0, - "step": 17559 - }, - { - "epoch": 1.3425846283234895, - "grad_norm": 0.0029497521463781595, - "learning_rate": 0.0001999991114981628, - "loss": 46.0, - "step": 17560 - }, - { - "epoch": 1.3426610853068792, - "grad_norm": 0.01663339138031006, - "learning_rate": 0.0001999991113969063, - "loss": 46.0, - "step": 17561 - }, - { - "epoch": 1.342737542290269, - "grad_norm": 0.0014462413964793086, - "learning_rate": 0.000199999111295644, - "loss": 46.0, - "step": 17562 - }, - { - "epoch": 1.3428139992736585, - "grad_norm": 0.003001823555678129, - "learning_rate": 0.00019999911119437597, - "loss": 46.0, - "step": 17563 - }, - { - "epoch": 1.3428904562570483, - "grad_norm": 0.0007143891416490078, - "learning_rate": 0.00019999911109310213, - "loss": 46.0, - "step": 17564 - }, - { - "epoch": 1.342966913240438, - "grad_norm": 0.0032529369927942753, - "learning_rate": 0.00019999911099182255, - "loss": 46.0, - "step": 17565 - }, - { - "epoch": 1.3430433702238278, - "grad_norm": 0.0010566123528406024, - "learning_rate": 0.0001999991108905372, - "loss": 46.0, - "step": 17566 - }, - { - "epoch": 1.3431198272072176, - "grad_norm": 0.0012466214830055833, - "learning_rate": 0.0001999991107892461, - "loss": 46.0, - "step": 17567 - }, - { - "epoch": 1.3431962841906073, - "grad_norm": 0.000706339196767658, - "learning_rate": 0.0001999991106879492, - "loss": 46.0, - "step": 17568 - }, - { - "epoch": 1.343272741173997, - "grad_norm": 0.0004942166269756854, - "learning_rate": 0.00019999911058664652, - "loss": 46.0, - "step": 17569 - }, - { - "epoch": 1.3433491981573866, - "grad_norm": 0.0004397607408463955, - "learning_rate": 0.0001999991104853381, - "loss": 46.0, - "step": 17570 - }, - { - "epoch": 1.3434256551407764, - "grad_norm": 0.0012350472388789058, - "learning_rate": 0.0001999991103840239, - "loss": 46.0, - "step": 17571 - }, - { - "epoch": 1.3435021121241661, - "grad_norm": 0.0006688169087283313, - "learning_rate": 0.00019999911028270394, - "loss": 46.0, - "step": 17572 - }, - { - "epoch": 1.343578569107556, - "grad_norm": 0.0017817559419199824, - "learning_rate": 0.0001999991101813782, - "loss": 46.0, - "step": 17573 - }, - { - "epoch": 1.3436550260909454, - "grad_norm": 0.001308659091591835, - "learning_rate": 0.0001999991100800467, - "loss": 46.0, - "step": 17574 - }, - { - "epoch": 1.3437314830743352, - "grad_norm": 0.0012899389257654548, - "learning_rate": 0.0001999991099787094, - "loss": 46.0, - "step": 17575 - }, - { - "epoch": 1.343807940057725, - "grad_norm": 0.0010276848915964365, - "learning_rate": 0.00019999910987736637, - "loss": 46.0, - "step": 17576 - }, - { - "epoch": 1.3438843970411147, - "grad_norm": 0.00026489104493521154, - "learning_rate": 0.00019999910977601753, - "loss": 46.0, - "step": 17577 - }, - { - "epoch": 1.3439608540245045, - "grad_norm": 0.0007294208044186234, - "learning_rate": 0.00019999910967466295, - "loss": 46.0, - "step": 17578 - }, - { - "epoch": 1.3440373110078943, - "grad_norm": 0.005262431222945452, - "learning_rate": 0.0001999991095733026, - "loss": 46.0, - "step": 17579 - }, - { - "epoch": 1.344113767991284, - "grad_norm": 0.0023152728099375963, - "learning_rate": 0.0001999991094719365, - "loss": 46.0, - "step": 17580 - }, - { - "epoch": 1.3441902249746736, - "grad_norm": 0.0003386722528375685, - "learning_rate": 0.0001999991093705646, - "loss": 46.0, - "step": 17581 - }, - { - "epoch": 1.3442666819580633, - "grad_norm": 0.002151747699826956, - "learning_rate": 0.00019999910926918693, - "loss": 46.0, - "step": 17582 - }, - { - "epoch": 1.344343138941453, - "grad_norm": 0.0023805673699826, - "learning_rate": 0.0001999991091678035, - "loss": 46.0, - "step": 17583 - }, - { - "epoch": 1.3444195959248428, - "grad_norm": 0.0023206949699670076, - "learning_rate": 0.00019999910906641431, - "loss": 46.0, - "step": 17584 - }, - { - "epoch": 1.3444960529082324, - "grad_norm": 0.0012594126164913177, - "learning_rate": 0.00019999910896501932, - "loss": 46.0, - "step": 17585 - }, - { - "epoch": 1.3445725098916221, - "grad_norm": 0.0030643094796687365, - "learning_rate": 0.00019999910886361858, - "loss": 46.0, - "step": 17586 - }, - { - "epoch": 1.344648966875012, - "grad_norm": 0.0012008552439510822, - "learning_rate": 0.0001999991087622121, - "loss": 46.0, - "step": 17587 - }, - { - "epoch": 1.3447254238584017, - "grad_norm": 0.0010384246706962585, - "learning_rate": 0.00019999910866079978, - "loss": 46.0, - "step": 17588 - }, - { - "epoch": 1.3448018808417914, - "grad_norm": 0.0003308470477350056, - "learning_rate": 0.00019999910855938175, - "loss": 46.0, - "step": 17589 - }, - { - "epoch": 1.3448783378251812, - "grad_norm": 0.0005131775978952646, - "learning_rate": 0.00019999910845795794, - "loss": 46.0, - "step": 17590 - }, - { - "epoch": 1.344954794808571, - "grad_norm": 0.0008817252237349749, - "learning_rate": 0.00019999910835652833, - "loss": 46.0, - "step": 17591 - }, - { - "epoch": 1.3450312517919605, - "grad_norm": 0.0006807656609453261, - "learning_rate": 0.00019999910825509298, - "loss": 46.0, - "step": 17592 - }, - { - "epoch": 1.3451077087753502, - "grad_norm": 0.00342154735699296, - "learning_rate": 0.00019999910815365185, - "loss": 46.0, - "step": 17593 - }, - { - "epoch": 1.34518416575874, - "grad_norm": 0.0003578035393729806, - "learning_rate": 0.00019999910805220495, - "loss": 46.0, - "step": 17594 - }, - { - "epoch": 1.3452606227421298, - "grad_norm": 0.00356718129478395, - "learning_rate": 0.0001999991079507523, - "loss": 46.0, - "step": 17595 - }, - { - "epoch": 1.3453370797255193, - "grad_norm": 0.004658654797822237, - "learning_rate": 0.00019999910784929386, - "loss": 46.0, - "step": 17596 - }, - { - "epoch": 1.345413536708909, - "grad_norm": 0.0007298219134099782, - "learning_rate": 0.00019999910774782967, - "loss": 46.0, - "step": 17597 - }, - { - "epoch": 1.3454899936922988, - "grad_norm": 0.0007636790396645665, - "learning_rate": 0.0001999991076463597, - "loss": 46.0, - "step": 17598 - }, - { - "epoch": 1.3455664506756886, - "grad_norm": 0.0003657451015897095, - "learning_rate": 0.00019999910754488394, - "loss": 46.0, - "step": 17599 - }, - { - "epoch": 1.3456429076590783, - "grad_norm": 0.001634453539736569, - "learning_rate": 0.00019999910744340245, - "loss": 46.0, - "step": 17600 - }, - { - "epoch": 1.345719364642468, - "grad_norm": 0.0015206843381747603, - "learning_rate": 0.00019999910734191514, - "loss": 46.0, - "step": 17601 - }, - { - "epoch": 1.3457958216258579, - "grad_norm": 0.0006551849655807018, - "learning_rate": 0.0001999991072404221, - "loss": 46.0, - "step": 17602 - }, - { - "epoch": 1.3458722786092474, - "grad_norm": 0.0009461393929086626, - "learning_rate": 0.0001999991071389233, - "loss": 46.0, - "step": 17603 - }, - { - "epoch": 1.3459487355926372, - "grad_norm": 0.007163412403315306, - "learning_rate": 0.0001999991070374187, - "loss": 46.0, - "step": 17604 - }, - { - "epoch": 1.346025192576027, - "grad_norm": 0.002033101161941886, - "learning_rate": 0.00019999910693590834, - "loss": 46.0, - "step": 17605 - }, - { - "epoch": 1.3461016495594167, - "grad_norm": 0.001575839938595891, - "learning_rate": 0.0001999991068343922, - "loss": 46.0, - "step": 17606 - }, - { - "epoch": 1.3461781065428062, - "grad_norm": 0.00047944457037374377, - "learning_rate": 0.0001999991067328703, - "loss": 46.0, - "step": 17607 - }, - { - "epoch": 1.346254563526196, - "grad_norm": 0.002485336037352681, - "learning_rate": 0.00019999910663134264, - "loss": 46.0, - "step": 17608 - }, - { - "epoch": 1.3463310205095858, - "grad_norm": 0.002915919991210103, - "learning_rate": 0.0001999991065298092, - "loss": 46.0, - "step": 17609 - }, - { - "epoch": 1.3464074774929755, - "grad_norm": 0.0003705063136294484, - "learning_rate": 0.00019999910642826997, - "loss": 46.0, - "step": 17610 - }, - { - "epoch": 1.3464839344763653, - "grad_norm": 0.0006521191098727286, - "learning_rate": 0.00019999910632672503, - "loss": 46.0, - "step": 17611 - }, - { - "epoch": 1.346560391459755, - "grad_norm": 0.0012793421046808362, - "learning_rate": 0.00019999910622517427, - "loss": 46.0, - "step": 17612 - }, - { - "epoch": 1.3466368484431448, - "grad_norm": 0.000768558937124908, - "learning_rate": 0.00019999910612361775, - "loss": 46.0, - "step": 17613 - }, - { - "epoch": 1.3467133054265343, - "grad_norm": 0.0012492822716012597, - "learning_rate": 0.00019999910602205547, - "loss": 46.0, - "step": 17614 - }, - { - "epoch": 1.346789762409924, - "grad_norm": 0.0017069990281015635, - "learning_rate": 0.0001999991059204874, - "loss": 46.0, - "step": 17615 - }, - { - "epoch": 1.3468662193933139, - "grad_norm": 0.001446478650905192, - "learning_rate": 0.0001999991058189136, - "loss": 46.0, - "step": 17616 - }, - { - "epoch": 1.3469426763767036, - "grad_norm": 0.0005476575461216271, - "learning_rate": 0.000199999105717334, - "loss": 46.0, - "step": 17617 - }, - { - "epoch": 1.3470191333600932, - "grad_norm": 0.027798477560281754, - "learning_rate": 0.00019999910561574864, - "loss": 46.0, - "step": 17618 - }, - { - "epoch": 1.347095590343483, - "grad_norm": 0.0017784852534532547, - "learning_rate": 0.0001999991055141575, - "loss": 46.0, - "step": 17619 - }, - { - "epoch": 1.3471720473268727, - "grad_norm": 0.005920881405472755, - "learning_rate": 0.00019999910541256062, - "loss": 46.0, - "step": 17620 - }, - { - "epoch": 1.3472485043102624, - "grad_norm": 0.005008005537092686, - "learning_rate": 0.00019999910531095795, - "loss": 46.0, - "step": 17621 - }, - { - "epoch": 1.3473249612936522, - "grad_norm": 0.003869800129905343, - "learning_rate": 0.0001999991052093495, - "loss": 46.0, - "step": 17622 - }, - { - "epoch": 1.347401418277042, - "grad_norm": 0.001331445761024952, - "learning_rate": 0.0001999991051077353, - "loss": 46.0, - "step": 17623 - }, - { - "epoch": 1.3474778752604317, - "grad_norm": 0.0034771051723510027, - "learning_rate": 0.00019999910500611531, - "loss": 46.0, - "step": 17624 - }, - { - "epoch": 1.3475543322438213, - "grad_norm": 0.0010703509906306863, - "learning_rate": 0.00019999910490448958, - "loss": 46.0, - "step": 17625 - }, - { - "epoch": 1.347630789227211, - "grad_norm": 0.0052464609034359455, - "learning_rate": 0.00019999910480285804, - "loss": 46.0, - "step": 17626 - }, - { - "epoch": 1.3477072462106008, - "grad_norm": 0.0014775327872484922, - "learning_rate": 0.00019999910470122075, - "loss": 46.0, - "step": 17627 - }, - { - "epoch": 1.3477837031939905, - "grad_norm": 0.004175929818302393, - "learning_rate": 0.00019999910459957772, - "loss": 46.0, - "step": 17628 - }, - { - "epoch": 1.34786016017738, - "grad_norm": 0.0022350838407874107, - "learning_rate": 0.00019999910449792889, - "loss": 46.0, - "step": 17629 - }, - { - "epoch": 1.3479366171607698, - "grad_norm": 0.000799885077867657, - "learning_rate": 0.0001999991043962743, - "loss": 46.0, - "step": 17630 - }, - { - "epoch": 1.3480130741441596, - "grad_norm": 0.0013833839911967516, - "learning_rate": 0.00019999910429461393, - "loss": 46.0, - "step": 17631 - }, - { - "epoch": 1.3480895311275494, - "grad_norm": 0.0018418688559904695, - "learning_rate": 0.00019999910419294777, - "loss": 46.0, - "step": 17632 - }, - { - "epoch": 1.3481659881109391, - "grad_norm": 0.0002597272105049342, - "learning_rate": 0.0001999991040912759, - "loss": 46.0, - "step": 17633 - }, - { - "epoch": 1.348242445094329, - "grad_norm": 0.0009844328742474318, - "learning_rate": 0.00019999910398959823, - "loss": 46.0, - "step": 17634 - }, - { - "epoch": 1.3483189020777187, - "grad_norm": 0.014189861714839935, - "learning_rate": 0.00019999910388791478, - "loss": 46.0, - "step": 17635 - }, - { - "epoch": 1.3483953590611082, - "grad_norm": 0.0007297651027329266, - "learning_rate": 0.00019999910378622557, - "loss": 46.0, - "step": 17636 - }, - { - "epoch": 1.348471816044498, - "grad_norm": 0.004691058304160833, - "learning_rate": 0.0001999991036845306, - "loss": 46.0, - "step": 17637 - }, - { - "epoch": 1.3485482730278877, - "grad_norm": 0.0008430343004874885, - "learning_rate": 0.00019999910358282983, - "loss": 46.0, - "step": 17638 - }, - { - "epoch": 1.3486247300112775, - "grad_norm": 0.0005567933549173176, - "learning_rate": 0.00019999910348112332, - "loss": 46.0, - "step": 17639 - }, - { - "epoch": 1.348701186994667, - "grad_norm": 0.0010431535774841905, - "learning_rate": 0.00019999910337941104, - "loss": 46.0, - "step": 17640 - }, - { - "epoch": 1.3487776439780568, - "grad_norm": 0.0009747219155542552, - "learning_rate": 0.00019999910327769298, - "loss": 46.0, - "step": 17641 - }, - { - "epoch": 1.3488541009614465, - "grad_norm": 0.004696002695709467, - "learning_rate": 0.00019999910317596915, - "loss": 46.0, - "step": 17642 - }, - { - "epoch": 1.3489305579448363, - "grad_norm": 0.0005317625473253429, - "learning_rate": 0.00019999910307423954, - "loss": 46.0, - "step": 17643 - }, - { - "epoch": 1.349007014928226, - "grad_norm": 0.0007092943997122347, - "learning_rate": 0.0001999991029725042, - "loss": 46.0, - "step": 17644 - }, - { - "epoch": 1.3490834719116158, - "grad_norm": 0.002394125098362565, - "learning_rate": 0.00019999910287076306, - "loss": 46.0, - "step": 17645 - }, - { - "epoch": 1.3491599288950056, - "grad_norm": 0.006621365435421467, - "learning_rate": 0.00019999910276901614, - "loss": 46.0, - "step": 17646 - }, - { - "epoch": 1.3492363858783951, - "grad_norm": 0.0010238155955448747, - "learning_rate": 0.00019999910266726346, - "loss": 46.0, - "step": 17647 - }, - { - "epoch": 1.3493128428617849, - "grad_norm": 0.0027714355383068323, - "learning_rate": 0.00019999910256550502, - "loss": 46.0, - "step": 17648 - }, - { - "epoch": 1.3493892998451746, - "grad_norm": 0.006141017656773329, - "learning_rate": 0.00019999910246374083, - "loss": 46.0, - "step": 17649 - }, - { - "epoch": 1.3494657568285644, - "grad_norm": 0.0004310406220611185, - "learning_rate": 0.00019999910236197084, - "loss": 46.0, - "step": 17650 - }, - { - "epoch": 1.349542213811954, - "grad_norm": 0.0005662787007167935, - "learning_rate": 0.00019999910226019507, - "loss": 46.0, - "step": 17651 - }, - { - "epoch": 1.3496186707953437, - "grad_norm": 0.003674947889521718, - "learning_rate": 0.00019999910215841356, - "loss": 46.0, - "step": 17652 - }, - { - "epoch": 1.3496951277787335, - "grad_norm": 0.0012099051382392645, - "learning_rate": 0.00019999910205662627, - "loss": 46.0, - "step": 17653 - }, - { - "epoch": 1.3497715847621232, - "grad_norm": 0.009085611440241337, - "learning_rate": 0.00019999910195483321, - "loss": 46.0, - "step": 17654 - }, - { - "epoch": 1.349848041745513, - "grad_norm": 0.0013135239714756608, - "learning_rate": 0.00019999910185303438, - "loss": 46.0, - "step": 17655 - }, - { - "epoch": 1.3499244987289027, - "grad_norm": 0.0012159928446635604, - "learning_rate": 0.0001999991017512298, - "loss": 46.0, - "step": 17656 - }, - { - "epoch": 1.3500009557122925, - "grad_norm": 0.0005517210811376572, - "learning_rate": 0.00019999910164941943, - "loss": 46.0, - "step": 17657 - }, - { - "epoch": 1.350077412695682, - "grad_norm": 0.002159824827685952, - "learning_rate": 0.00019999910154760327, - "loss": 46.0, - "step": 17658 - }, - { - "epoch": 1.3501538696790718, - "grad_norm": 0.001300903270021081, - "learning_rate": 0.00019999910144578138, - "loss": 46.0, - "step": 17659 - }, - { - "epoch": 1.3502303266624616, - "grad_norm": 0.005021155346184969, - "learning_rate": 0.0001999991013439537, - "loss": 46.0, - "step": 17660 - }, - { - "epoch": 1.3503067836458513, - "grad_norm": 0.002784095937386155, - "learning_rate": 0.00019999910124212026, - "loss": 46.0, - "step": 17661 - }, - { - "epoch": 1.3503832406292409, - "grad_norm": 0.0018474971875548363, - "learning_rate": 0.00019999910114028104, - "loss": 46.0, - "step": 17662 - }, - { - "epoch": 1.3504596976126306, - "grad_norm": 0.009056279435753822, - "learning_rate": 0.00019999910103843608, - "loss": 46.0, - "step": 17663 - }, - { - "epoch": 1.3505361545960204, - "grad_norm": 0.0019982350058853626, - "learning_rate": 0.0001999991009365853, - "loss": 46.0, - "step": 17664 - }, - { - "epoch": 1.3506126115794101, - "grad_norm": 0.0012308531440794468, - "learning_rate": 0.00019999910083472877, - "loss": 46.0, - "step": 17665 - }, - { - "epoch": 1.3506890685628, - "grad_norm": 0.012501278892159462, - "learning_rate": 0.00019999910073286649, - "loss": 46.0, - "step": 17666 - }, - { - "epoch": 1.3507655255461897, - "grad_norm": 0.0017271460965275764, - "learning_rate": 0.00019999910063099843, - "loss": 46.0, - "step": 17667 - }, - { - "epoch": 1.3508419825295794, - "grad_norm": 0.0011376762995496392, - "learning_rate": 0.0001999991005291246, - "loss": 46.0, - "step": 17668 - }, - { - "epoch": 1.350918439512969, - "grad_norm": 0.0017403539968654513, - "learning_rate": 0.000199999100427245, - "loss": 46.0, - "step": 17669 - }, - { - "epoch": 1.3509948964963587, - "grad_norm": 0.0010393804404884577, - "learning_rate": 0.0001999991003253596, - "loss": 46.0, - "step": 17670 - }, - { - "epoch": 1.3510713534797485, - "grad_norm": 0.0010857520392164588, - "learning_rate": 0.0001999991002234685, - "loss": 46.0, - "step": 17671 - }, - { - "epoch": 1.3511478104631383, - "grad_norm": 0.0012325705029070377, - "learning_rate": 0.00019999910012157156, - "loss": 46.0, - "step": 17672 - }, - { - "epoch": 1.3512242674465278, - "grad_norm": 0.0016186099965125322, - "learning_rate": 0.00019999910001966892, - "loss": 46.0, - "step": 17673 - }, - { - "epoch": 1.3513007244299176, - "grad_norm": 0.004259428009390831, - "learning_rate": 0.00019999909991776045, - "loss": 46.0, - "step": 17674 - }, - { - "epoch": 1.3513771814133073, - "grad_norm": 0.0007236300152726471, - "learning_rate": 0.0001999990998158462, - "loss": 46.0, - "step": 17675 - }, - { - "epoch": 1.351453638396697, - "grad_norm": 0.001005738042294979, - "learning_rate": 0.00019999909971392624, - "loss": 46.0, - "step": 17676 - }, - { - "epoch": 1.3515300953800868, - "grad_norm": 0.0008229472441598773, - "learning_rate": 0.00019999909961200047, - "loss": 46.0, - "step": 17677 - }, - { - "epoch": 1.3516065523634766, - "grad_norm": 0.0011688934173434973, - "learning_rate": 0.00019999909951006893, - "loss": 46.0, - "step": 17678 - }, - { - "epoch": 1.3516830093468661, - "grad_norm": 0.00225238804705441, - "learning_rate": 0.00019999909940813165, - "loss": 46.0, - "step": 17679 - }, - { - "epoch": 1.351759466330256, - "grad_norm": 0.0045624286867678165, - "learning_rate": 0.00019999909930618862, - "loss": 46.0, - "step": 17680 - }, - { - "epoch": 1.3518359233136457, - "grad_norm": 0.0020937968511134386, - "learning_rate": 0.00019999909920423976, - "loss": 46.0, - "step": 17681 - }, - { - "epoch": 1.3519123802970354, - "grad_norm": 0.0017957761883735657, - "learning_rate": 0.00019999909910228516, - "loss": 46.0, - "step": 17682 - }, - { - "epoch": 1.3519888372804252, - "grad_norm": 0.008097846060991287, - "learning_rate": 0.00019999909900032478, - "loss": 46.0, - "step": 17683 - }, - { - "epoch": 1.3520652942638147, - "grad_norm": 0.00241546961478889, - "learning_rate": 0.00019999909889835865, - "loss": 46.0, - "step": 17684 - }, - { - "epoch": 1.3521417512472045, - "grad_norm": 0.0011286619119346142, - "learning_rate": 0.00019999909879638673, - "loss": 46.0, - "step": 17685 - }, - { - "epoch": 1.3522182082305942, - "grad_norm": 0.0008477772353217006, - "learning_rate": 0.00019999909869440906, - "loss": 46.0, - "step": 17686 - }, - { - "epoch": 1.352294665213984, - "grad_norm": 0.0014911523321643472, - "learning_rate": 0.0001999990985924256, - "loss": 46.0, - "step": 17687 - }, - { - "epoch": 1.3523711221973738, - "grad_norm": 0.004975670017302036, - "learning_rate": 0.0001999990984904364, - "loss": 46.0, - "step": 17688 - }, - { - "epoch": 1.3524475791807635, - "grad_norm": 0.007092161104083061, - "learning_rate": 0.0001999990983884414, - "loss": 46.0, - "step": 17689 - }, - { - "epoch": 1.352524036164153, - "grad_norm": 0.0006369981565512717, - "learning_rate": 0.00019999909828644064, - "loss": 46.0, - "step": 17690 - }, - { - "epoch": 1.3526004931475428, - "grad_norm": 0.00046385114546865225, - "learning_rate": 0.0001999990981844341, - "loss": 46.0, - "step": 17691 - }, - { - "epoch": 1.3526769501309326, - "grad_norm": 0.004023105837404728, - "learning_rate": 0.00019999909808242182, - "loss": 46.0, - "step": 17692 - }, - { - "epoch": 1.3527534071143223, - "grad_norm": 0.0018335661152377725, - "learning_rate": 0.00019999909798040376, - "loss": 46.0, - "step": 17693 - }, - { - "epoch": 1.3528298640977119, - "grad_norm": 0.0005817157798446715, - "learning_rate": 0.00019999909787837993, - "loss": 46.0, - "step": 17694 - }, - { - "epoch": 1.3529063210811016, - "grad_norm": 0.000753791187889874, - "learning_rate": 0.0001999990977763503, - "loss": 46.0, - "step": 17695 - }, - { - "epoch": 1.3529827780644914, - "grad_norm": 0.0009575884905643761, - "learning_rate": 0.00019999909767431492, - "loss": 46.0, - "step": 17696 - }, - { - "epoch": 1.3530592350478812, - "grad_norm": 0.0030338154174387455, - "learning_rate": 0.00019999909757227377, - "loss": 46.0, - "step": 17697 - }, - { - "epoch": 1.353135692031271, - "grad_norm": 0.0007067607948556542, - "learning_rate": 0.00019999909747022687, - "loss": 46.0, - "step": 17698 - }, - { - "epoch": 1.3532121490146607, - "grad_norm": 0.0006542197079397738, - "learning_rate": 0.0001999990973681742, - "loss": 46.0, - "step": 17699 - }, - { - "epoch": 1.3532886059980505, - "grad_norm": 0.0011140276910737157, - "learning_rate": 0.00019999909726611576, - "loss": 46.0, - "step": 17700 - }, - { - "epoch": 1.35336506298144, - "grad_norm": 0.0007084396784193814, - "learning_rate": 0.0001999990971640515, - "loss": 46.0, - "step": 17701 - }, - { - "epoch": 1.3534415199648298, - "grad_norm": 0.001138114370405674, - "learning_rate": 0.00019999909706198152, - "loss": 46.0, - "step": 17702 - }, - { - "epoch": 1.3535179769482195, - "grad_norm": 0.001708924537524581, - "learning_rate": 0.00019999909695990576, - "loss": 46.0, - "step": 17703 - }, - { - "epoch": 1.3535944339316093, - "grad_norm": 0.0005078091635368764, - "learning_rate": 0.00019999909685782422, - "loss": 46.0, - "step": 17704 - }, - { - "epoch": 1.3536708909149988, - "grad_norm": 0.0020662799943238497, - "learning_rate": 0.00019999909675573693, - "loss": 46.0, - "step": 17705 - }, - { - "epoch": 1.3537473478983886, - "grad_norm": 0.00034990065614692867, - "learning_rate": 0.00019999909665364388, - "loss": 46.0, - "step": 17706 - }, - { - "epoch": 1.3538238048817783, - "grad_norm": 0.0024372527841478586, - "learning_rate": 0.00019999909655154502, - "loss": 46.0, - "step": 17707 - }, - { - "epoch": 1.353900261865168, - "grad_norm": 0.0006608974654227495, - "learning_rate": 0.00019999909644944044, - "loss": 46.0, - "step": 17708 - }, - { - "epoch": 1.3539767188485579, - "grad_norm": 0.0021569731179624796, - "learning_rate": 0.00019999909634733004, - "loss": 46.0, - "step": 17709 - }, - { - "epoch": 1.3540531758319476, - "grad_norm": 0.0010395536664873362, - "learning_rate": 0.0001999990962452139, - "loss": 46.0, - "step": 17710 - }, - { - "epoch": 1.3541296328153374, - "grad_norm": 0.0011058994568884373, - "learning_rate": 0.000199999096143092, - "loss": 46.0, - "step": 17711 - }, - { - "epoch": 1.354206089798727, - "grad_norm": 0.0020159909036010504, - "learning_rate": 0.0001999990960409643, - "loss": 46.0, - "step": 17712 - }, - { - "epoch": 1.3542825467821167, - "grad_norm": 0.005407491698861122, - "learning_rate": 0.00019999909593883085, - "loss": 46.0, - "step": 17713 - }, - { - "epoch": 1.3543590037655064, - "grad_norm": 0.00033945226459763944, - "learning_rate": 0.0001999990958366916, - "loss": 46.0, - "step": 17714 - }, - { - "epoch": 1.3544354607488962, - "grad_norm": 0.0035249083302915096, - "learning_rate": 0.0001999990957345466, - "loss": 46.0, - "step": 17715 - }, - { - "epoch": 1.3545119177322857, - "grad_norm": 0.0014321065973490477, - "learning_rate": 0.00019999909563239588, - "loss": 46.0, - "step": 17716 - }, - { - "epoch": 1.3545883747156755, - "grad_norm": 0.0012607151875272393, - "learning_rate": 0.0001999990955302393, - "loss": 46.0, - "step": 17717 - }, - { - "epoch": 1.3546648316990653, - "grad_norm": 0.0012724926928058267, - "learning_rate": 0.00019999909542807706, - "loss": 46.0, - "step": 17718 - }, - { - "epoch": 1.354741288682455, - "grad_norm": 0.0028961985372006893, - "learning_rate": 0.00019999909532590894, - "loss": 46.0, - "step": 17719 - }, - { - "epoch": 1.3548177456658448, - "grad_norm": 0.007116749417036772, - "learning_rate": 0.00019999909522373511, - "loss": 46.0, - "step": 17720 - }, - { - "epoch": 1.3548942026492345, - "grad_norm": 0.0005645353230647743, - "learning_rate": 0.0001999990951215555, - "loss": 46.0, - "step": 17721 - }, - { - "epoch": 1.3549706596326243, - "grad_norm": 0.0023879176005721092, - "learning_rate": 0.00019999909501937013, - "loss": 46.0, - "step": 17722 - }, - { - "epoch": 1.3550471166160138, - "grad_norm": 0.0007873786962591112, - "learning_rate": 0.00019999909491717896, - "loss": 46.0, - "step": 17723 - }, - { - "epoch": 1.3551235735994036, - "grad_norm": 0.0006270324229262769, - "learning_rate": 0.00019999909481498206, - "loss": 46.0, - "step": 17724 - }, - { - "epoch": 1.3552000305827934, - "grad_norm": 0.001199326361529529, - "learning_rate": 0.0001999990947127794, - "loss": 46.0, - "step": 17725 - }, - { - "epoch": 1.3552764875661831, - "grad_norm": 0.0004179398238193244, - "learning_rate": 0.00019999909461057092, - "loss": 46.0, - "step": 17726 - }, - { - "epoch": 1.3553529445495727, - "grad_norm": 0.0007768411887809634, - "learning_rate": 0.0001999990945083567, - "loss": 46.0, - "step": 17727 - }, - { - "epoch": 1.3554294015329624, - "grad_norm": 0.0009438990382477641, - "learning_rate": 0.00019999909440613668, - "loss": 46.0, - "step": 17728 - }, - { - "epoch": 1.3555058585163522, - "grad_norm": 0.0007272938382811844, - "learning_rate": 0.00019999909430391092, - "loss": 46.0, - "step": 17729 - }, - { - "epoch": 1.355582315499742, - "grad_norm": 0.0012425988679751754, - "learning_rate": 0.00019999909420167938, - "loss": 46.0, - "step": 17730 - }, - { - "epoch": 1.3556587724831317, - "grad_norm": 0.001140510430559516, - "learning_rate": 0.0001999990940994421, - "loss": 46.0, - "step": 17731 - }, - { - "epoch": 1.3557352294665215, - "grad_norm": 0.0007697140681557357, - "learning_rate": 0.00019999909399719902, - "loss": 46.0, - "step": 17732 - }, - { - "epoch": 1.3558116864499112, - "grad_norm": 0.0032814880833029747, - "learning_rate": 0.00019999909389495019, - "loss": 46.0, - "step": 17733 - }, - { - "epoch": 1.3558881434333008, - "grad_norm": 0.0018105126218870282, - "learning_rate": 0.00019999909379269558, - "loss": 46.0, - "step": 17734 - }, - { - "epoch": 1.3559646004166905, - "grad_norm": 0.0013273388613015413, - "learning_rate": 0.00019999909369043518, - "loss": 46.0, - "step": 17735 - }, - { - "epoch": 1.3560410574000803, - "grad_norm": 0.006241875700652599, - "learning_rate": 0.00019999909358816903, - "loss": 46.0, - "step": 17736 - }, - { - "epoch": 1.35611751438347, - "grad_norm": 0.00621512345969677, - "learning_rate": 0.0001999990934858971, - "loss": 46.0, - "step": 17737 - }, - { - "epoch": 1.3561939713668596, - "grad_norm": 0.0018622925272211432, - "learning_rate": 0.0001999990933836194, - "loss": 46.0, - "step": 17738 - }, - { - "epoch": 1.3562704283502494, - "grad_norm": 0.0009286011918447912, - "learning_rate": 0.00019999909328133594, - "loss": 46.0, - "step": 17739 - }, - { - "epoch": 1.3563468853336391, - "grad_norm": 0.0007228510221466422, - "learning_rate": 0.00019999909317904675, - "loss": 46.0, - "step": 17740 - }, - { - "epoch": 1.3564233423170289, - "grad_norm": 0.0010326404590159655, - "learning_rate": 0.00019999909307675173, - "loss": 46.0, - "step": 17741 - }, - { - "epoch": 1.3564997993004186, - "grad_norm": 0.0007617344381287694, - "learning_rate": 0.000199999092974451, - "loss": 46.0, - "step": 17742 - }, - { - "epoch": 1.3565762562838084, - "grad_norm": 0.0007650285842828453, - "learning_rate": 0.00019999909287214443, - "loss": 46.0, - "step": 17743 - }, - { - "epoch": 1.3566527132671982, - "grad_norm": 0.0009211569558829069, - "learning_rate": 0.00019999909276983212, - "loss": 46.0, - "step": 17744 - }, - { - "epoch": 1.3567291702505877, - "grad_norm": 0.005173215176910162, - "learning_rate": 0.00019999909266751406, - "loss": 46.0, - "step": 17745 - }, - { - "epoch": 1.3568056272339775, - "grad_norm": 0.00498534832149744, - "learning_rate": 0.0001999990925651902, - "loss": 46.0, - "step": 17746 - }, - { - "epoch": 1.3568820842173672, - "grad_norm": 0.0003701482492033392, - "learning_rate": 0.0001999990924628606, - "loss": 46.0, - "step": 17747 - }, - { - "epoch": 1.356958541200757, - "grad_norm": 0.000933669856749475, - "learning_rate": 0.0001999990923605252, - "loss": 46.0, - "step": 17748 - }, - { - "epoch": 1.3570349981841465, - "grad_norm": 0.0011430637678131461, - "learning_rate": 0.00019999909225818405, - "loss": 46.0, - "step": 17749 - }, - { - "epoch": 1.3571114551675363, - "grad_norm": 0.002336802426725626, - "learning_rate": 0.00019999909215583715, - "loss": 46.0, - "step": 17750 - }, - { - "epoch": 1.357187912150926, - "grad_norm": 0.0013836455764248967, - "learning_rate": 0.00019999909205348443, - "loss": 46.0, - "step": 17751 - }, - { - "epoch": 1.3572643691343158, - "grad_norm": 0.006308353040367365, - "learning_rate": 0.000199999091951126, - "loss": 46.0, - "step": 17752 - }, - { - "epoch": 1.3573408261177056, - "grad_norm": 0.003316121641546488, - "learning_rate": 0.00019999909184876174, - "loss": 46.0, - "step": 17753 - }, - { - "epoch": 1.3574172831010953, - "grad_norm": 0.0011328560067340732, - "learning_rate": 0.00019999909174639175, - "loss": 46.0, - "step": 17754 - }, - { - "epoch": 1.357493740084485, - "grad_norm": 0.00046925709466449916, - "learning_rate": 0.000199999091644016, - "loss": 46.0, - "step": 17755 - }, - { - "epoch": 1.3575701970678746, - "grad_norm": 0.003665717551484704, - "learning_rate": 0.00019999909154163446, - "loss": 46.0, - "step": 17756 - }, - { - "epoch": 1.3576466540512644, - "grad_norm": 0.0008024421986192465, - "learning_rate": 0.00019999909143924715, - "loss": 46.0, - "step": 17757 - }, - { - "epoch": 1.3577231110346542, - "grad_norm": 0.000460057461168617, - "learning_rate": 0.00019999909133685406, - "loss": 46.0, - "step": 17758 - }, - { - "epoch": 1.357799568018044, - "grad_norm": 0.0011262510670349002, - "learning_rate": 0.0001999990912344552, - "loss": 46.0, - "step": 17759 - }, - { - "epoch": 1.3578760250014335, - "grad_norm": 0.00027881463756784797, - "learning_rate": 0.0001999990911320506, - "loss": 46.0, - "step": 17760 - }, - { - "epoch": 1.3579524819848232, - "grad_norm": 0.0037160865031182766, - "learning_rate": 0.0001999990910296402, - "loss": 46.0, - "step": 17761 - }, - { - "epoch": 1.358028938968213, - "grad_norm": 0.002092040376737714, - "learning_rate": 0.00019999909092722408, - "loss": 46.0, - "step": 17762 - }, - { - "epoch": 1.3581053959516027, - "grad_norm": 0.00047612396883778274, - "learning_rate": 0.00019999909082480213, - "loss": 46.0, - "step": 17763 - }, - { - "epoch": 1.3581818529349925, - "grad_norm": 0.010135921649634838, - "learning_rate": 0.00019999909072237443, - "loss": 46.0, - "step": 17764 - }, - { - "epoch": 1.3582583099183823, - "grad_norm": 0.0006043854518793523, - "learning_rate": 0.000199999090619941, - "loss": 46.0, - "step": 17765 - }, - { - "epoch": 1.358334766901772, - "grad_norm": 0.0006798132671974599, - "learning_rate": 0.00019999909051750175, - "loss": 46.0, - "step": 17766 - }, - { - "epoch": 1.3584112238851616, - "grad_norm": 0.0009309955057688057, - "learning_rate": 0.00019999909041505673, - "loss": 46.0, - "step": 17767 - }, - { - "epoch": 1.3584876808685513, - "grad_norm": 0.001363671850413084, - "learning_rate": 0.00019999909031260597, - "loss": 46.0, - "step": 17768 - }, - { - "epoch": 1.358564137851941, - "grad_norm": 0.0024964939802885056, - "learning_rate": 0.00019999909021014943, - "loss": 46.0, - "step": 17769 - }, - { - "epoch": 1.3586405948353308, - "grad_norm": 0.002011209260672331, - "learning_rate": 0.00019999909010768712, - "loss": 46.0, - "step": 17770 - }, - { - "epoch": 1.3587170518187204, - "grad_norm": 0.012724400497972965, - "learning_rate": 0.00019999909000521904, - "loss": 46.0, - "step": 17771 - }, - { - "epoch": 1.3587935088021101, - "grad_norm": 0.002366705797612667, - "learning_rate": 0.00019999908990274518, - "loss": 46.0, - "step": 17772 - }, - { - "epoch": 1.3588699657855, - "grad_norm": 0.001034746179357171, - "learning_rate": 0.00019999908980026555, - "loss": 46.0, - "step": 17773 - }, - { - "epoch": 1.3589464227688897, - "grad_norm": 0.0011732979910448194, - "learning_rate": 0.00019999908969778018, - "loss": 46.0, - "step": 17774 - }, - { - "epoch": 1.3590228797522794, - "grad_norm": 0.0012751020258292556, - "learning_rate": 0.000199999089595289, - "loss": 46.0, - "step": 17775 - }, - { - "epoch": 1.3590993367356692, - "grad_norm": 0.0007026111707091331, - "learning_rate": 0.0001999990894927921, - "loss": 46.0, - "step": 17776 - }, - { - "epoch": 1.359175793719059, - "grad_norm": 0.0005206980276852846, - "learning_rate": 0.0001999990893902894, - "loss": 46.0, - "step": 17777 - }, - { - "epoch": 1.3592522507024485, - "grad_norm": 0.0006734158378094435, - "learning_rate": 0.00019999908928778094, - "loss": 46.0, - "step": 17778 - }, - { - "epoch": 1.3593287076858382, - "grad_norm": 0.0009815518278628588, - "learning_rate": 0.0001999990891852667, - "loss": 46.0, - "step": 17779 - }, - { - "epoch": 1.359405164669228, - "grad_norm": 0.0008330151904374361, - "learning_rate": 0.0001999990890827467, - "loss": 46.0, - "step": 17780 - }, - { - "epoch": 1.3594816216526178, - "grad_norm": 0.0009843712905421853, - "learning_rate": 0.00019999908898022092, - "loss": 46.0, - "step": 17781 - }, - { - "epoch": 1.3595580786360073, - "grad_norm": 0.0014770576963201165, - "learning_rate": 0.00019999908887768935, - "loss": 46.0, - "step": 17782 - }, - { - "epoch": 1.359634535619397, - "grad_norm": 0.000717276765499264, - "learning_rate": 0.00019999908877515205, - "loss": 46.0, - "step": 17783 - }, - { - "epoch": 1.3597109926027868, - "grad_norm": 0.001629303558729589, - "learning_rate": 0.000199999088672609, - "loss": 46.0, - "step": 17784 - }, - { - "epoch": 1.3597874495861766, - "grad_norm": 0.0011074998183175921, - "learning_rate": 0.00019999908857006013, - "loss": 46.0, - "step": 17785 - }, - { - "epoch": 1.3598639065695663, - "grad_norm": 0.0013326950138434768, - "learning_rate": 0.0001999990884675055, - "loss": 46.0, - "step": 17786 - }, - { - "epoch": 1.359940363552956, - "grad_norm": 0.0006041023880243301, - "learning_rate": 0.00019999908836494513, - "loss": 46.0, - "step": 17787 - }, - { - "epoch": 1.3600168205363459, - "grad_norm": 0.00262673432007432, - "learning_rate": 0.00019999908826237895, - "loss": 46.0, - "step": 17788 - }, - { - "epoch": 1.3600932775197354, - "grad_norm": 0.0012383033754304051, - "learning_rate": 0.000199999088159807, - "loss": 46.0, - "step": 17789 - }, - { - "epoch": 1.3601697345031252, - "grad_norm": 0.0019016830483451486, - "learning_rate": 0.00019999908805722934, - "loss": 46.0, - "step": 17790 - }, - { - "epoch": 1.360246191486515, - "grad_norm": 0.0008480129181407392, - "learning_rate": 0.00019999908795464587, - "loss": 46.0, - "step": 17791 - }, - { - "epoch": 1.3603226484699047, - "grad_norm": 0.0023631234653294086, - "learning_rate": 0.00019999908785205662, - "loss": 46.0, - "step": 17792 - }, - { - "epoch": 1.3603991054532942, - "grad_norm": 0.0026122103445231915, - "learning_rate": 0.0001999990877494616, - "loss": 46.0, - "step": 17793 - }, - { - "epoch": 1.360475562436684, - "grad_norm": 0.0030562952160835266, - "learning_rate": 0.00019999908764686085, - "loss": 46.0, - "step": 17794 - }, - { - "epoch": 1.3605520194200738, - "grad_norm": 0.0009380441042594612, - "learning_rate": 0.00019999908754425428, - "loss": 46.0, - "step": 17795 - }, - { - "epoch": 1.3606284764034635, - "grad_norm": 0.00028944300720468163, - "learning_rate": 0.00019999908744164197, - "loss": 46.0, - "step": 17796 - }, - { - "epoch": 1.3607049333868533, - "grad_norm": 0.00037508911918848753, - "learning_rate": 0.0001999990873390239, - "loss": 46.0, - "step": 17797 - }, - { - "epoch": 1.360781390370243, - "grad_norm": 0.002429352840408683, - "learning_rate": 0.00019999908723640004, - "loss": 46.0, - "step": 17798 - }, - { - "epoch": 1.3608578473536328, - "grad_norm": 0.001250554691068828, - "learning_rate": 0.00019999908713377043, - "loss": 46.0, - "step": 17799 - }, - { - "epoch": 1.3609343043370223, - "grad_norm": 0.0020091719925403595, - "learning_rate": 0.000199999087031135, - "loss": 46.0, - "step": 17800 - }, - { - "epoch": 1.361010761320412, - "grad_norm": 0.0009023642633110285, - "learning_rate": 0.00019999908692849388, - "loss": 46.0, - "step": 17801 - }, - { - "epoch": 1.3610872183038019, - "grad_norm": 0.0016051430720835924, - "learning_rate": 0.00019999908682584693, - "loss": 46.0, - "step": 17802 - }, - { - "epoch": 1.3611636752871916, - "grad_norm": 0.0009749067248776555, - "learning_rate": 0.00019999908672319424, - "loss": 46.0, - "step": 17803 - }, - { - "epoch": 1.3612401322705812, - "grad_norm": 0.0029462212696671486, - "learning_rate": 0.00019999908662053577, - "loss": 46.0, - "step": 17804 - }, - { - "epoch": 1.361316589253971, - "grad_norm": 0.0022877671290189028, - "learning_rate": 0.00019999908651787153, - "loss": 46.0, - "step": 17805 - }, - { - "epoch": 1.3613930462373607, - "grad_norm": 0.012493717484176159, - "learning_rate": 0.0001999990864152015, - "loss": 46.0, - "step": 17806 - }, - { - "epoch": 1.3614695032207504, - "grad_norm": 0.0013477298198267817, - "learning_rate": 0.00019999908631252572, - "loss": 46.0, - "step": 17807 - }, - { - "epoch": 1.3615459602041402, - "grad_norm": 0.0006354089127853513, - "learning_rate": 0.0001999990862098442, - "loss": 46.0, - "step": 17808 - }, - { - "epoch": 1.36162241718753, - "grad_norm": 0.0030554740224033594, - "learning_rate": 0.00019999908610715688, - "loss": 46.0, - "step": 17809 - }, - { - "epoch": 1.3616988741709195, - "grad_norm": 0.001531387330032885, - "learning_rate": 0.00019999908600446377, - "loss": 46.0, - "step": 17810 - }, - { - "epoch": 1.3617753311543093, - "grad_norm": 0.001600883319042623, - "learning_rate": 0.00019999908590176492, - "loss": 46.0, - "step": 17811 - }, - { - "epoch": 1.361851788137699, - "grad_norm": 0.0020242915488779545, - "learning_rate": 0.00019999908579906031, - "loss": 46.0, - "step": 17812 - }, - { - "epoch": 1.3619282451210888, - "grad_norm": 0.0034273462370038033, - "learning_rate": 0.0001999990856963499, - "loss": 46.0, - "step": 17813 - }, - { - "epoch": 1.3620047021044785, - "grad_norm": 0.0010653403587639332, - "learning_rate": 0.00019999908559363374, - "loss": 46.0, - "step": 17814 - }, - { - "epoch": 1.362081159087868, - "grad_norm": 0.0006787891616113484, - "learning_rate": 0.00019999908549091182, - "loss": 46.0, - "step": 17815 - }, - { - "epoch": 1.3621576160712578, - "grad_norm": 0.0017184378812089562, - "learning_rate": 0.0001999990853881841, - "loss": 46.0, - "step": 17816 - }, - { - "epoch": 1.3622340730546476, - "grad_norm": 0.002074718941003084, - "learning_rate": 0.00019999908528545062, - "loss": 46.0, - "step": 17817 - }, - { - "epoch": 1.3623105300380374, - "grad_norm": 0.004496516659855843, - "learning_rate": 0.00019999908518271138, - "loss": 46.0, - "step": 17818 - }, - { - "epoch": 1.3623869870214271, - "grad_norm": 0.0006063260952942073, - "learning_rate": 0.0001999990850799664, - "loss": 46.0, - "step": 17819 - }, - { - "epoch": 1.362463444004817, - "grad_norm": 0.0015051227528601885, - "learning_rate": 0.0001999990849772156, - "loss": 46.0, - "step": 17820 - }, - { - "epoch": 1.3625399009882064, - "grad_norm": 0.00028397294227033854, - "learning_rate": 0.00019999908487445907, - "loss": 46.0, - "step": 17821 - }, - { - "epoch": 1.3626163579715962, - "grad_norm": 0.000749338127207011, - "learning_rate": 0.00019999908477169674, - "loss": 46.0, - "step": 17822 - }, - { - "epoch": 1.362692814954986, - "grad_norm": 0.0006876124534755945, - "learning_rate": 0.00019999908466892863, - "loss": 46.0, - "step": 17823 - }, - { - "epoch": 1.3627692719383757, - "grad_norm": 0.0008136221440508962, - "learning_rate": 0.0001999990845661548, - "loss": 46.0, - "step": 17824 - }, - { - "epoch": 1.3628457289217653, - "grad_norm": 0.0007484632078558207, - "learning_rate": 0.00019999908446337514, - "loss": 46.0, - "step": 17825 - }, - { - "epoch": 1.362922185905155, - "grad_norm": 0.0013365275226533413, - "learning_rate": 0.00019999908436058977, - "loss": 46.0, - "step": 17826 - }, - { - "epoch": 1.3629986428885448, - "grad_norm": 0.006817058194428682, - "learning_rate": 0.0001999990842577986, - "loss": 46.0, - "step": 17827 - }, - { - "epoch": 1.3630750998719345, - "grad_norm": 0.000941616075579077, - "learning_rate": 0.00019999908415500165, - "loss": 46.0, - "step": 17828 - }, - { - "epoch": 1.3631515568553243, - "grad_norm": 0.001861086580902338, - "learning_rate": 0.00019999908405219892, - "loss": 46.0, - "step": 17829 - }, - { - "epoch": 1.363228013838714, - "grad_norm": 0.002122849691659212, - "learning_rate": 0.00019999908394939046, - "loss": 46.0, - "step": 17830 - }, - { - "epoch": 1.3633044708221038, - "grad_norm": 0.002680432517081499, - "learning_rate": 0.00019999908384657621, - "loss": 46.0, - "step": 17831 - }, - { - "epoch": 1.3633809278054934, - "grad_norm": 0.0007977135828696191, - "learning_rate": 0.00019999908374375623, - "loss": 46.0, - "step": 17832 - }, - { - "epoch": 1.3634573847888831, - "grad_norm": 0.0005986588657833636, - "learning_rate": 0.00019999908364093044, - "loss": 46.0, - "step": 17833 - }, - { - "epoch": 1.3635338417722729, - "grad_norm": 0.0006033839308656752, - "learning_rate": 0.00019999908353809888, - "loss": 46.0, - "step": 17834 - }, - { - "epoch": 1.3636102987556626, - "grad_norm": 0.0010023170616477728, - "learning_rate": 0.00019999908343526157, - "loss": 46.0, - "step": 17835 - }, - { - "epoch": 1.3636867557390522, - "grad_norm": 0.000915006035938859, - "learning_rate": 0.0001999990833324185, - "loss": 46.0, - "step": 17836 - }, - { - "epoch": 1.363763212722442, - "grad_norm": 0.0014478982193395495, - "learning_rate": 0.0001999990832295696, - "loss": 46.0, - "step": 17837 - }, - { - "epoch": 1.3638396697058317, - "grad_norm": 0.0011957855895161629, - "learning_rate": 0.00019999908312671498, - "loss": 46.0, - "step": 17838 - }, - { - "epoch": 1.3639161266892215, - "grad_norm": 0.0023915900383144617, - "learning_rate": 0.0001999990830238546, - "loss": 46.0, - "step": 17839 - }, - { - "epoch": 1.3639925836726112, - "grad_norm": 0.0036710863932967186, - "learning_rate": 0.0001999990829209884, - "loss": 46.0, - "step": 17840 - }, - { - "epoch": 1.364069040656001, - "grad_norm": 0.002569563454017043, - "learning_rate": 0.00019999908281811648, - "loss": 46.0, - "step": 17841 - }, - { - "epoch": 1.3641454976393907, - "grad_norm": 0.0015425558667629957, - "learning_rate": 0.00019999908271523876, - "loss": 46.0, - "step": 17842 - }, - { - "epoch": 1.3642219546227803, - "grad_norm": 0.0015317416982725263, - "learning_rate": 0.0001999990826123553, - "loss": 46.0, - "step": 17843 - }, - { - "epoch": 1.36429841160617, - "grad_norm": 0.0010371280368417501, - "learning_rate": 0.00019999908250946605, - "loss": 46.0, - "step": 17844 - }, - { - "epoch": 1.3643748685895598, - "grad_norm": 0.0010485831880941987, - "learning_rate": 0.00019999908240657104, - "loss": 46.0, - "step": 17845 - }, - { - "epoch": 1.3644513255729496, - "grad_norm": 0.006667489185929298, - "learning_rate": 0.00019999908230367025, - "loss": 46.0, - "step": 17846 - }, - { - "epoch": 1.364527782556339, - "grad_norm": 0.003453640267252922, - "learning_rate": 0.00019999908220076369, - "loss": 46.0, - "step": 17847 - }, - { - "epoch": 1.3646042395397289, - "grad_norm": 0.0016167510766535997, - "learning_rate": 0.00019999908209785135, - "loss": 46.0, - "step": 17848 - }, - { - "epoch": 1.3646806965231186, - "grad_norm": 0.0010021318448707461, - "learning_rate": 0.0001999990819949333, - "loss": 46.0, - "step": 17849 - }, - { - "epoch": 1.3647571535065084, - "grad_norm": 0.0006228694110177457, - "learning_rate": 0.0001999990818920094, - "loss": 46.0, - "step": 17850 - }, - { - "epoch": 1.3648336104898982, - "grad_norm": 0.0014177424600347877, - "learning_rate": 0.0001999990817890798, - "loss": 46.0, - "step": 17851 - }, - { - "epoch": 1.364910067473288, - "grad_norm": 0.00393153028562665, - "learning_rate": 0.0001999990816861444, - "loss": 46.0, - "step": 17852 - }, - { - "epoch": 1.3649865244566777, - "grad_norm": 0.0010082251392304897, - "learning_rate": 0.0001999990815832032, - "loss": 46.0, - "step": 17853 - }, - { - "epoch": 1.3650629814400672, - "grad_norm": 0.0018287351122125983, - "learning_rate": 0.00019999908148025627, - "loss": 46.0, - "step": 17854 - }, - { - "epoch": 1.365139438423457, - "grad_norm": 0.004699264653027058, - "learning_rate": 0.00019999908137730357, - "loss": 46.0, - "step": 17855 - }, - { - "epoch": 1.3652158954068467, - "grad_norm": 0.014791145920753479, - "learning_rate": 0.00019999908127434508, - "loss": 46.0, - "step": 17856 - }, - { - "epoch": 1.3652923523902365, - "grad_norm": 0.0007130418671295047, - "learning_rate": 0.0001999990811713808, - "loss": 46.0, - "step": 17857 - }, - { - "epoch": 1.365368809373626, - "grad_norm": 0.0011027216678485274, - "learning_rate": 0.00019999908106841082, - "loss": 46.0, - "step": 17858 - }, - { - "epoch": 1.3654452663570158, - "grad_norm": 0.0023836635518819094, - "learning_rate": 0.00019999908096543503, - "loss": 46.0, - "step": 17859 - }, - { - "epoch": 1.3655217233404056, - "grad_norm": 0.0022195016499608755, - "learning_rate": 0.00019999908086245347, - "loss": 46.0, - "step": 17860 - }, - { - "epoch": 1.3655981803237953, - "grad_norm": 0.0012998756719753146, - "learning_rate": 0.00019999908075946614, - "loss": 46.0, - "step": 17861 - }, - { - "epoch": 1.365674637307185, - "grad_norm": 0.0008149457862600684, - "learning_rate": 0.00019999908065647306, - "loss": 46.0, - "step": 17862 - }, - { - "epoch": 1.3657510942905748, - "grad_norm": 0.0020915286149829626, - "learning_rate": 0.00019999908055347418, - "loss": 46.0, - "step": 17863 - }, - { - "epoch": 1.3658275512739646, - "grad_norm": 0.0012571917613968253, - "learning_rate": 0.00019999908045046952, - "loss": 46.0, - "step": 17864 - }, - { - "epoch": 1.3659040082573541, - "grad_norm": 0.01209266483783722, - "learning_rate": 0.00019999908034745912, - "loss": 46.0, - "step": 17865 - }, - { - "epoch": 1.365980465240744, - "grad_norm": 0.0008113992516882718, - "learning_rate": 0.00019999908024444295, - "loss": 46.0, - "step": 17866 - }, - { - "epoch": 1.3660569222241337, - "grad_norm": 0.0009873210219666362, - "learning_rate": 0.00019999908014142103, - "loss": 46.0, - "step": 17867 - }, - { - "epoch": 1.3661333792075234, - "grad_norm": 0.0007087065605446696, - "learning_rate": 0.0001999990800383933, - "loss": 46.0, - "step": 17868 - }, - { - "epoch": 1.366209836190913, - "grad_norm": 0.0008767751860432327, - "learning_rate": 0.00019999907993535984, - "loss": 46.0, - "step": 17869 - }, - { - "epoch": 1.3662862931743027, - "grad_norm": 0.0004419413744471967, - "learning_rate": 0.00019999907983232057, - "loss": 46.0, - "step": 17870 - }, - { - "epoch": 1.3663627501576925, - "grad_norm": 0.005212228279560804, - "learning_rate": 0.00019999907972927556, - "loss": 46.0, - "step": 17871 - }, - { - "epoch": 1.3664392071410822, - "grad_norm": 0.006762003526091576, - "learning_rate": 0.00019999907962622477, - "loss": 46.0, - "step": 17872 - }, - { - "epoch": 1.366515664124472, - "grad_norm": 0.0020630399230867624, - "learning_rate": 0.00019999907952316824, - "loss": 46.0, - "step": 17873 - }, - { - "epoch": 1.3665921211078618, - "grad_norm": 0.0007178988307714462, - "learning_rate": 0.0001999990794201059, - "loss": 46.0, - "step": 17874 - }, - { - "epoch": 1.3666685780912515, - "grad_norm": 0.00037824458559043705, - "learning_rate": 0.0001999990793170378, - "loss": 46.0, - "step": 17875 - }, - { - "epoch": 1.366745035074641, - "grad_norm": 0.0027194602880626917, - "learning_rate": 0.00019999907921396391, - "loss": 46.0, - "step": 17876 - }, - { - "epoch": 1.3668214920580308, - "grad_norm": 0.0009563557105138898, - "learning_rate": 0.0001999990791108843, - "loss": 46.0, - "step": 17877 - }, - { - "epoch": 1.3668979490414206, - "grad_norm": 0.001081207301467657, - "learning_rate": 0.0001999990790077989, - "loss": 46.0, - "step": 17878 - }, - { - "epoch": 1.3669744060248104, - "grad_norm": 0.0022381143644452095, - "learning_rate": 0.0001999990789047077, - "loss": 46.0, - "step": 17879 - }, - { - "epoch": 1.3670508630082, - "grad_norm": 0.0019142135279253125, - "learning_rate": 0.00019999907880161074, - "loss": 46.0, - "step": 17880 - }, - { - "epoch": 1.3671273199915897, - "grad_norm": 0.0004608039162121713, - "learning_rate": 0.00019999907869850804, - "loss": 46.0, - "step": 17881 - }, - { - "epoch": 1.3672037769749794, - "grad_norm": 0.0015253640012815595, - "learning_rate": 0.00019999907859539958, - "loss": 46.0, - "step": 17882 - }, - { - "epoch": 1.3672802339583692, - "grad_norm": 0.0005132431979291141, - "learning_rate": 0.0001999990784922853, - "loss": 46.0, - "step": 17883 - }, - { - "epoch": 1.367356690941759, - "grad_norm": 0.0008245253120549023, - "learning_rate": 0.0001999990783891653, - "loss": 46.0, - "step": 17884 - }, - { - "epoch": 1.3674331479251487, - "grad_norm": 0.0005842418177053332, - "learning_rate": 0.0001999990782860395, - "loss": 46.0, - "step": 17885 - }, - { - "epoch": 1.3675096049085385, - "grad_norm": 0.0039068120531737804, - "learning_rate": 0.00019999907818290792, - "loss": 46.0, - "step": 17886 - }, - { - "epoch": 1.367586061891928, - "grad_norm": 0.002817972796037793, - "learning_rate": 0.00019999907807977062, - "loss": 46.0, - "step": 17887 - }, - { - "epoch": 1.3676625188753178, - "grad_norm": 0.0013007987290620804, - "learning_rate": 0.0001999990779766275, - "loss": 46.0, - "step": 17888 - }, - { - "epoch": 1.3677389758587075, - "grad_norm": 0.0021931403316557407, - "learning_rate": 0.00019999907787347866, - "loss": 46.0, - "step": 17889 - }, - { - "epoch": 1.3678154328420973, - "grad_norm": 0.001547219348140061, - "learning_rate": 0.000199999077770324, - "loss": 46.0, - "step": 17890 - }, - { - "epoch": 1.3678918898254868, - "grad_norm": 0.0011261365143582225, - "learning_rate": 0.0001999990776671636, - "loss": 46.0, - "step": 17891 - }, - { - "epoch": 1.3679683468088766, - "grad_norm": 0.0013355721021071076, - "learning_rate": 0.0001999990775639974, - "loss": 46.0, - "step": 17892 - }, - { - "epoch": 1.3680448037922663, - "grad_norm": 0.0009573460556566715, - "learning_rate": 0.00019999907746082548, - "loss": 46.0, - "step": 17893 - }, - { - "epoch": 1.368121260775656, - "grad_norm": 0.005690068006515503, - "learning_rate": 0.00019999907735764776, - "loss": 46.0, - "step": 17894 - }, - { - "epoch": 1.3681977177590459, - "grad_norm": 0.0011770413257181644, - "learning_rate": 0.00019999907725446424, - "loss": 46.0, - "step": 17895 - }, - { - "epoch": 1.3682741747424356, - "grad_norm": 0.0020774193108081818, - "learning_rate": 0.00019999907715127503, - "loss": 46.0, - "step": 17896 - }, - { - "epoch": 1.3683506317258254, - "grad_norm": 0.0014860432129353285, - "learning_rate": 0.00019999907704808, - "loss": 46.0, - "step": 17897 - }, - { - "epoch": 1.368427088709215, - "grad_norm": 0.0005194481927901506, - "learning_rate": 0.00019999907694487917, - "loss": 46.0, - "step": 17898 - }, - { - "epoch": 1.3685035456926047, - "grad_norm": 0.007208625786006451, - "learning_rate": 0.00019999907684167264, - "loss": 46.0, - "step": 17899 - }, - { - "epoch": 1.3685800026759944, - "grad_norm": 0.0011275600409135222, - "learning_rate": 0.0001999990767384603, - "loss": 46.0, - "step": 17900 - }, - { - "epoch": 1.3686564596593842, - "grad_norm": 0.0010615979554131627, - "learning_rate": 0.0001999990766352422, - "loss": 46.0, - "step": 17901 - }, - { - "epoch": 1.3687329166427737, - "grad_norm": 0.0016583160031586885, - "learning_rate": 0.00019999907653201832, - "loss": 46.0, - "step": 17902 - }, - { - "epoch": 1.3688093736261635, - "grad_norm": 0.0018497579731047153, - "learning_rate": 0.00019999907642878867, - "loss": 46.0, - "step": 17903 - }, - { - "epoch": 1.3688858306095533, - "grad_norm": 0.0030268337577581406, - "learning_rate": 0.00019999907632555327, - "loss": 46.0, - "step": 17904 - }, - { - "epoch": 1.368962287592943, - "grad_norm": 0.0031173464376479387, - "learning_rate": 0.00019999907622231212, - "loss": 46.0, - "step": 17905 - }, - { - "epoch": 1.3690387445763328, - "grad_norm": 0.0028509721159934998, - "learning_rate": 0.00019999907611906515, - "loss": 46.0, - "step": 17906 - }, - { - "epoch": 1.3691152015597225, - "grad_norm": 0.0009712688042782247, - "learning_rate": 0.00019999907601581243, - "loss": 46.0, - "step": 17907 - }, - { - "epoch": 1.3691916585431123, - "grad_norm": 0.0008491530315950513, - "learning_rate": 0.00019999907591255393, - "loss": 46.0, - "step": 17908 - }, - { - "epoch": 1.3692681155265018, - "grad_norm": 0.0006790999323129654, - "learning_rate": 0.0001999990758092897, - "loss": 46.0, - "step": 17909 - }, - { - "epoch": 1.3693445725098916, - "grad_norm": 0.0007914750021882355, - "learning_rate": 0.00019999907570601965, - "loss": 46.0, - "step": 17910 - }, - { - "epoch": 1.3694210294932814, - "grad_norm": 0.0009068988147191703, - "learning_rate": 0.00019999907560274384, - "loss": 46.0, - "step": 17911 - }, - { - "epoch": 1.3694974864766711, - "grad_norm": 0.002898185048252344, - "learning_rate": 0.00019999907549946228, - "loss": 46.0, - "step": 17912 - }, - { - "epoch": 1.3695739434600607, - "grad_norm": 0.00536075234413147, - "learning_rate": 0.00019999907539617495, - "loss": 46.0, - "step": 17913 - }, - { - "epoch": 1.3696504004434504, - "grad_norm": 0.0004926254041492939, - "learning_rate": 0.00019999907529288184, - "loss": 46.0, - "step": 17914 - }, - { - "epoch": 1.3697268574268402, - "grad_norm": 0.0009255745098926127, - "learning_rate": 0.000199999075189583, - "loss": 46.0, - "step": 17915 - }, - { - "epoch": 1.36980331441023, - "grad_norm": 0.001889142906293273, - "learning_rate": 0.00019999907508627836, - "loss": 46.0, - "step": 17916 - }, - { - "epoch": 1.3698797713936197, - "grad_norm": 0.00608845567330718, - "learning_rate": 0.00019999907498296794, - "loss": 46.0, - "step": 17917 - }, - { - "epoch": 1.3699562283770095, - "grad_norm": 0.0018852971261367202, - "learning_rate": 0.00019999907487965174, - "loss": 46.0, - "step": 17918 - }, - { - "epoch": 1.3700326853603992, - "grad_norm": 0.0007477473118342459, - "learning_rate": 0.0001999990747763298, - "loss": 46.0, - "step": 17919 - }, - { - "epoch": 1.3701091423437888, - "grad_norm": 0.0013447540113702416, - "learning_rate": 0.00019999907467300207, - "loss": 46.0, - "step": 17920 - }, - { - "epoch": 1.3701855993271785, - "grad_norm": 0.0011717418674379587, - "learning_rate": 0.00019999907456966858, - "loss": 46.0, - "step": 17921 - }, - { - "epoch": 1.3702620563105683, - "grad_norm": 0.0016690271440893412, - "learning_rate": 0.00019999907446632934, - "loss": 46.0, - "step": 17922 - }, - { - "epoch": 1.370338513293958, - "grad_norm": 0.0013515135506168008, - "learning_rate": 0.0001999990743629843, - "loss": 46.0, - "step": 17923 - }, - { - "epoch": 1.3704149702773476, - "grad_norm": 0.0006318299565464258, - "learning_rate": 0.00019999907425963348, - "loss": 46.0, - "step": 17924 - }, - { - "epoch": 1.3704914272607374, - "grad_norm": 0.0007922223303467035, - "learning_rate": 0.00019999907415627692, - "loss": 46.0, - "step": 17925 - }, - { - "epoch": 1.3705678842441271, - "grad_norm": 0.0012414443772286177, - "learning_rate": 0.0001999990740529146, - "loss": 46.0, - "step": 17926 - }, - { - "epoch": 1.3706443412275169, - "grad_norm": 0.002843524096533656, - "learning_rate": 0.00019999907394954649, - "loss": 46.0, - "step": 17927 - }, - { - "epoch": 1.3707207982109066, - "grad_norm": 0.0016238440293818712, - "learning_rate": 0.0001999990738461726, - "loss": 46.0, - "step": 17928 - }, - { - "epoch": 1.3707972551942964, - "grad_norm": 0.007920495234429836, - "learning_rate": 0.00019999907374279295, - "loss": 46.0, - "step": 17929 - }, - { - "epoch": 1.3708737121776862, - "grad_norm": 0.0012356085935607553, - "learning_rate": 0.00019999907363940755, - "loss": 46.0, - "step": 17930 - }, - { - "epoch": 1.3709501691610757, - "grad_norm": 0.003255319781601429, - "learning_rate": 0.00019999907353601638, - "loss": 46.0, - "step": 17931 - }, - { - "epoch": 1.3710266261444655, - "grad_norm": 0.0024844917934387922, - "learning_rate": 0.0001999990734326194, - "loss": 46.0, - "step": 17932 - }, - { - "epoch": 1.3711030831278552, - "grad_norm": 0.0020016005728393793, - "learning_rate": 0.0001999990733292167, - "loss": 46.0, - "step": 17933 - }, - { - "epoch": 1.371179540111245, - "grad_norm": 0.0007648197934031487, - "learning_rate": 0.0001999990732258082, - "loss": 46.0, - "step": 17934 - }, - { - "epoch": 1.3712559970946345, - "grad_norm": 0.000825755181722343, - "learning_rate": 0.00019999907312239393, - "loss": 46.0, - "step": 17935 - }, - { - "epoch": 1.3713324540780243, - "grad_norm": 0.004587918519973755, - "learning_rate": 0.0001999990730189739, - "loss": 46.0, - "step": 17936 - }, - { - "epoch": 1.371408911061414, - "grad_norm": 0.0035124497953802347, - "learning_rate": 0.00019999907291554808, - "loss": 46.0, - "step": 17937 - }, - { - "epoch": 1.3714853680448038, - "grad_norm": 0.0048817857168614864, - "learning_rate": 0.00019999907281211652, - "loss": 46.0, - "step": 17938 - }, - { - "epoch": 1.3715618250281936, - "grad_norm": 0.0010551700834184885, - "learning_rate": 0.00019999907270867919, - "loss": 46.0, - "step": 17939 - }, - { - "epoch": 1.3716382820115833, - "grad_norm": 0.0010012377751991153, - "learning_rate": 0.00019999907260523608, - "loss": 46.0, - "step": 17940 - }, - { - "epoch": 1.3717147389949729, - "grad_norm": 0.0027756786439567804, - "learning_rate": 0.0001999990725017872, - "loss": 46.0, - "step": 17941 - }, - { - "epoch": 1.3717911959783626, - "grad_norm": 0.00627170642837882, - "learning_rate": 0.00019999907239833255, - "loss": 46.0, - "step": 17942 - }, - { - "epoch": 1.3718676529617524, - "grad_norm": 0.0006332259508781135, - "learning_rate": 0.00019999907229487215, - "loss": 46.0, - "step": 17943 - }, - { - "epoch": 1.3719441099451422, - "grad_norm": 0.0006545702344737947, - "learning_rate": 0.00019999907219140598, - "loss": 46.0, - "step": 17944 - }, - { - "epoch": 1.372020566928532, - "grad_norm": 0.00031641146051697433, - "learning_rate": 0.000199999072087934, - "loss": 46.0, - "step": 17945 - }, - { - "epoch": 1.3720970239119215, - "grad_norm": 0.0009163955692201853, - "learning_rate": 0.00019999907198445626, - "loss": 46.0, - "step": 17946 - }, - { - "epoch": 1.3721734808953112, - "grad_norm": 0.0012735750060528517, - "learning_rate": 0.00019999907188097277, - "loss": 46.0, - "step": 17947 - }, - { - "epoch": 1.372249937878701, - "grad_norm": 0.0017701095202937722, - "learning_rate": 0.00019999907177748353, - "loss": 46.0, - "step": 17948 - }, - { - "epoch": 1.3723263948620907, - "grad_norm": 0.0012463940074667335, - "learning_rate": 0.0001999990716739885, - "loss": 46.0, - "step": 17949 - }, - { - "epoch": 1.3724028518454805, - "grad_norm": 0.0007806146750226617, - "learning_rate": 0.00019999907157048768, - "loss": 46.0, - "step": 17950 - }, - { - "epoch": 1.3724793088288703, - "grad_norm": 0.0014912097249180079, - "learning_rate": 0.0001999990714669811, - "loss": 46.0, - "step": 17951 - }, - { - "epoch": 1.3725557658122598, - "grad_norm": 0.000257108302321285, - "learning_rate": 0.00019999907136346878, - "loss": 46.0, - "step": 17952 - }, - { - "epoch": 1.3726322227956496, - "grad_norm": 0.003247505519539118, - "learning_rate": 0.00019999907125995065, - "loss": 46.0, - "step": 17953 - }, - { - "epoch": 1.3727086797790393, - "grad_norm": 0.0015863614389672875, - "learning_rate": 0.00019999907115642677, - "loss": 46.0, - "step": 17954 - }, - { - "epoch": 1.372785136762429, - "grad_norm": 0.000813423132058233, - "learning_rate": 0.00019999907105289712, - "loss": 46.0, - "step": 17955 - }, - { - "epoch": 1.3728615937458186, - "grad_norm": 0.0008148307679221034, - "learning_rate": 0.00019999907094936172, - "loss": 46.0, - "step": 17956 - }, - { - "epoch": 1.3729380507292084, - "grad_norm": 0.0012644513044506311, - "learning_rate": 0.0001999990708458205, - "loss": 46.0, - "step": 17957 - }, - { - "epoch": 1.3730145077125981, - "grad_norm": 0.0009246161789633334, - "learning_rate": 0.00019999907074227355, - "loss": 46.0, - "step": 17958 - }, - { - "epoch": 1.373090964695988, - "grad_norm": 0.00284808618016541, - "learning_rate": 0.00019999907063872083, - "loss": 46.0, - "step": 17959 - }, - { - "epoch": 1.3731674216793777, - "grad_norm": 0.008393158204853535, - "learning_rate": 0.00019999907053516234, - "loss": 46.0, - "step": 17960 - }, - { - "epoch": 1.3732438786627674, - "grad_norm": 0.0004486207035370171, - "learning_rate": 0.00019999907043159807, - "loss": 46.0, - "step": 17961 - }, - { - "epoch": 1.3733203356461572, - "grad_norm": 0.0007089965511113405, - "learning_rate": 0.00019999907032802804, - "loss": 46.0, - "step": 17962 - }, - { - "epoch": 1.3733967926295467, - "grad_norm": 0.0009626184473745525, - "learning_rate": 0.00019999907022445222, - "loss": 46.0, - "step": 17963 - }, - { - "epoch": 1.3734732496129365, - "grad_norm": 0.0025398533325642347, - "learning_rate": 0.00019999907012087067, - "loss": 46.0, - "step": 17964 - }, - { - "epoch": 1.3735497065963262, - "grad_norm": 0.010438891127705574, - "learning_rate": 0.0001999990700172833, - "loss": 46.0, - "step": 17965 - }, - { - "epoch": 1.373626163579716, - "grad_norm": 0.0007563194376416504, - "learning_rate": 0.0001999990699136902, - "loss": 46.0, - "step": 17966 - }, - { - "epoch": 1.3737026205631055, - "grad_norm": 0.0009807052556425333, - "learning_rate": 0.00019999906981009132, - "loss": 46.0, - "step": 17967 - }, - { - "epoch": 1.3737790775464953, - "grad_norm": 0.0006703404360450804, - "learning_rate": 0.00019999906970648667, - "loss": 46.0, - "step": 17968 - }, - { - "epoch": 1.373855534529885, - "grad_norm": 0.0011624944163486362, - "learning_rate": 0.00019999906960287625, - "loss": 46.0, - "step": 17969 - }, - { - "epoch": 1.3739319915132748, - "grad_norm": 0.0015916782431304455, - "learning_rate": 0.00019999906949926005, - "loss": 46.0, - "step": 17970 - }, - { - "epoch": 1.3740084484966646, - "grad_norm": 0.0005979355191811919, - "learning_rate": 0.00019999906939563807, - "loss": 46.0, - "step": 17971 - }, - { - "epoch": 1.3740849054800544, - "grad_norm": 0.000791784783359617, - "learning_rate": 0.00019999906929201036, - "loss": 46.0, - "step": 17972 - }, - { - "epoch": 1.3741613624634441, - "grad_norm": 0.0009857387049123645, - "learning_rate": 0.00019999906918837686, - "loss": 46.0, - "step": 17973 - }, - { - "epoch": 1.3742378194468337, - "grad_norm": 0.0004861295747105032, - "learning_rate": 0.0001999990690847376, - "loss": 46.0, - "step": 17974 - }, - { - "epoch": 1.3743142764302234, - "grad_norm": 0.0010758782736957073, - "learning_rate": 0.0001999990689810926, - "loss": 46.0, - "step": 17975 - }, - { - "epoch": 1.3743907334136132, - "grad_norm": 0.0011926807928830385, - "learning_rate": 0.00019999906887744175, - "loss": 46.0, - "step": 17976 - }, - { - "epoch": 1.374467190397003, - "grad_norm": 0.0020258103031665087, - "learning_rate": 0.00019999906877378516, - "loss": 46.0, - "step": 17977 - }, - { - "epoch": 1.3745436473803925, - "grad_norm": 0.002082027029246092, - "learning_rate": 0.00019999906867012283, - "loss": 46.0, - "step": 17978 - }, - { - "epoch": 1.3746201043637822, - "grad_norm": 0.0010071509750559926, - "learning_rate": 0.00019999906856645473, - "loss": 46.0, - "step": 17979 - }, - { - "epoch": 1.374696561347172, - "grad_norm": 0.0007822625921107829, - "learning_rate": 0.00019999906846278082, - "loss": 46.0, - "step": 17980 - }, - { - "epoch": 1.3747730183305618, - "grad_norm": 0.0021289994474500418, - "learning_rate": 0.00019999906835910117, - "loss": 46.0, - "step": 17981 - }, - { - "epoch": 1.3748494753139515, - "grad_norm": 0.0009119003661908209, - "learning_rate": 0.00019999906825541577, - "loss": 46.0, - "step": 17982 - }, - { - "epoch": 1.3749259322973413, - "grad_norm": 0.0012132251868024468, - "learning_rate": 0.00019999906815172455, - "loss": 46.0, - "step": 17983 - }, - { - "epoch": 1.375002389280731, - "grad_norm": 0.0029130529146641493, - "learning_rate": 0.0001999990680480276, - "loss": 46.0, - "step": 17984 - }, - { - "epoch": 1.3750788462641206, - "grad_norm": 0.0032822180073708296, - "learning_rate": 0.00019999906794432488, - "loss": 46.0, - "step": 17985 - }, - { - "epoch": 1.3751553032475103, - "grad_norm": 0.011254685930907726, - "learning_rate": 0.00019999906784061636, - "loss": 46.0, - "step": 17986 - }, - { - "epoch": 1.3752317602309, - "grad_norm": 0.000657734926789999, - "learning_rate": 0.0001999990677369021, - "loss": 46.0, - "step": 17987 - }, - { - "epoch": 1.3753082172142899, - "grad_norm": 0.001019347575493157, - "learning_rate": 0.00019999906763318206, - "loss": 46.0, - "step": 17988 - }, - { - "epoch": 1.3753846741976794, - "grad_norm": 0.0015836822567507625, - "learning_rate": 0.00019999906752945625, - "loss": 46.0, - "step": 17989 - }, - { - "epoch": 1.3754611311810692, - "grad_norm": 0.0008894450729712844, - "learning_rate": 0.00019999906742572464, - "loss": 46.0, - "step": 17990 - }, - { - "epoch": 1.375537588164459, - "grad_norm": 0.0017351461574435234, - "learning_rate": 0.00019999906732198733, - "loss": 46.0, - "step": 17991 - }, - { - "epoch": 1.3756140451478487, - "grad_norm": 0.0009439553832635283, - "learning_rate": 0.0001999990672182442, - "loss": 46.0, - "step": 17992 - }, - { - "epoch": 1.3756905021312384, - "grad_norm": 0.0010688863694667816, - "learning_rate": 0.0001999990671144953, - "loss": 46.0, - "step": 17993 - }, - { - "epoch": 1.3757669591146282, - "grad_norm": 0.004524154122918844, - "learning_rate": 0.00019999906701074065, - "loss": 46.0, - "step": 17994 - }, - { - "epoch": 1.375843416098018, - "grad_norm": 0.0012322546681389213, - "learning_rate": 0.00019999906690698022, - "loss": 46.0, - "step": 17995 - }, - { - "epoch": 1.3759198730814075, - "grad_norm": 0.0018652474973350763, - "learning_rate": 0.00019999906680321402, - "loss": 46.0, - "step": 17996 - }, - { - "epoch": 1.3759963300647973, - "grad_norm": 0.0015578032471239567, - "learning_rate": 0.00019999906669944205, - "loss": 46.0, - "step": 17997 - }, - { - "epoch": 1.376072787048187, - "grad_norm": 0.0012477594427764416, - "learning_rate": 0.00019999906659566433, - "loss": 46.0, - "step": 17998 - }, - { - "epoch": 1.3761492440315768, - "grad_norm": 0.005300607997924089, - "learning_rate": 0.00019999906649188084, - "loss": 46.0, - "step": 17999 - }, - { - "epoch": 1.3762257010149663, - "grad_norm": 0.0008351447177119553, - "learning_rate": 0.00019999906638809155, - "loss": 46.0, - "step": 18000 - }, - { - "epoch": 1.376302157998356, - "grad_norm": 0.0007692829240113497, - "learning_rate": 0.0001999990662842965, - "loss": 46.0, - "step": 18001 - }, - { - "epoch": 1.3763786149817459, - "grad_norm": 0.0037672400940209627, - "learning_rate": 0.0001999990661804957, - "loss": 46.0, - "step": 18002 - }, - { - "epoch": 1.3764550719651356, - "grad_norm": 0.0014659576117992401, - "learning_rate": 0.00019999906607668912, - "loss": 46.0, - "step": 18003 - }, - { - "epoch": 1.3765315289485254, - "grad_norm": 0.0017991299973800778, - "learning_rate": 0.00019999906597287676, - "loss": 46.0, - "step": 18004 - }, - { - "epoch": 1.3766079859319151, - "grad_norm": 0.0021526277996599674, - "learning_rate": 0.00019999906586905866, - "loss": 46.0, - "step": 18005 - }, - { - "epoch": 1.376684442915305, - "grad_norm": 0.0014948190655559301, - "learning_rate": 0.00019999906576523478, - "loss": 46.0, - "step": 18006 - }, - { - "epoch": 1.3767608998986944, - "grad_norm": 0.0003963281342294067, - "learning_rate": 0.00019999906566140513, - "loss": 46.0, - "step": 18007 - }, - { - "epoch": 1.3768373568820842, - "grad_norm": 0.008857641369104385, - "learning_rate": 0.00019999906555756967, - "loss": 46.0, - "step": 18008 - }, - { - "epoch": 1.376913813865474, - "grad_norm": 0.0010356189450249076, - "learning_rate": 0.00019999906545372848, - "loss": 46.0, - "step": 18009 - }, - { - "epoch": 1.3769902708488637, - "grad_norm": 0.0034679740201681852, - "learning_rate": 0.0001999990653498815, - "loss": 46.0, - "step": 18010 - }, - { - "epoch": 1.3770667278322533, - "grad_norm": 0.0016193451592698693, - "learning_rate": 0.0001999990652460288, - "loss": 46.0, - "step": 18011 - }, - { - "epoch": 1.377143184815643, - "grad_norm": 0.0035377254243940115, - "learning_rate": 0.00019999906514217027, - "loss": 46.0, - "step": 18012 - }, - { - "epoch": 1.3772196417990328, - "grad_norm": 0.0008031788165681064, - "learning_rate": 0.00019999906503830598, - "loss": 46.0, - "step": 18013 - }, - { - "epoch": 1.3772960987824225, - "grad_norm": 0.0006095413118600845, - "learning_rate": 0.00019999906493443597, - "loss": 46.0, - "step": 18014 - }, - { - "epoch": 1.3773725557658123, - "grad_norm": 0.006988126784563065, - "learning_rate": 0.00019999906483056016, - "loss": 46.0, - "step": 18015 - }, - { - "epoch": 1.377449012749202, - "grad_norm": 0.0007553836330771446, - "learning_rate": 0.00019999906472667855, - "loss": 46.0, - "step": 18016 - }, - { - "epoch": 1.3775254697325918, - "grad_norm": 0.0019366928609088063, - "learning_rate": 0.00019999906462279122, - "loss": 46.0, - "step": 18017 - }, - { - "epoch": 1.3776019267159814, - "grad_norm": 0.0012283315882086754, - "learning_rate": 0.00019999906451889808, - "loss": 46.0, - "step": 18018 - }, - { - "epoch": 1.3776783836993711, - "grad_norm": 0.0007402935298159719, - "learning_rate": 0.0001999990644149992, - "loss": 46.0, - "step": 18019 - }, - { - "epoch": 1.3777548406827609, - "grad_norm": 0.0009826201712712646, - "learning_rate": 0.00019999906431109453, - "loss": 46.0, - "step": 18020 - }, - { - "epoch": 1.3778312976661506, - "grad_norm": 0.000709379673935473, - "learning_rate": 0.0001999990642071841, - "loss": 46.0, - "step": 18021 - }, - { - "epoch": 1.3779077546495402, - "grad_norm": 0.0015795029466971755, - "learning_rate": 0.0001999990641032679, - "loss": 46.0, - "step": 18022 - }, - { - "epoch": 1.37798421163293, - "grad_norm": 0.0005581254372373223, - "learning_rate": 0.00019999906399934594, - "loss": 46.0, - "step": 18023 - }, - { - "epoch": 1.3780606686163197, - "grad_norm": 0.0004443573416210711, - "learning_rate": 0.0001999990638954182, - "loss": 46.0, - "step": 18024 - }, - { - "epoch": 1.3781371255997095, - "grad_norm": 0.0005796657060272992, - "learning_rate": 0.00019999906379148468, - "loss": 46.0, - "step": 18025 - }, - { - "epoch": 1.3782135825830992, - "grad_norm": 0.002661475446075201, - "learning_rate": 0.0001999990636875454, - "loss": 46.0, - "step": 18026 - }, - { - "epoch": 1.378290039566489, - "grad_norm": 0.0007935474277473986, - "learning_rate": 0.00019999906358360037, - "loss": 46.0, - "step": 18027 - }, - { - "epoch": 1.3783664965498788, - "grad_norm": 0.00486373994499445, - "learning_rate": 0.00019999906347964954, - "loss": 46.0, - "step": 18028 - }, - { - "epoch": 1.3784429535332683, - "grad_norm": 0.003100744681432843, - "learning_rate": 0.00019999906337569295, - "loss": 46.0, - "step": 18029 - }, - { - "epoch": 1.378519410516658, - "grad_norm": 0.0008301998022943735, - "learning_rate": 0.00019999906327173062, - "loss": 46.0, - "step": 18030 - }, - { - "epoch": 1.3785958675000478, - "grad_norm": 0.0011180470464751124, - "learning_rate": 0.0001999990631677625, - "loss": 46.0, - "step": 18031 - }, - { - "epoch": 1.3786723244834376, - "grad_norm": 0.00048506358871236444, - "learning_rate": 0.0001999990630637886, - "loss": 46.0, - "step": 18032 - }, - { - "epoch": 1.3787487814668271, - "grad_norm": 0.0005775447352789342, - "learning_rate": 0.00019999906295980894, - "loss": 46.0, - "step": 18033 - }, - { - "epoch": 1.3788252384502169, - "grad_norm": 0.0025686314329504967, - "learning_rate": 0.00019999906285582349, - "loss": 46.0, - "step": 18034 - }, - { - "epoch": 1.3789016954336066, - "grad_norm": 0.0007559625082649291, - "learning_rate": 0.00019999906275183232, - "loss": 46.0, - "step": 18035 - }, - { - "epoch": 1.3789781524169964, - "grad_norm": 0.0007519559003412724, - "learning_rate": 0.00019999906264783532, - "loss": 46.0, - "step": 18036 - }, - { - "epoch": 1.3790546094003862, - "grad_norm": 0.0005074836080893874, - "learning_rate": 0.00019999906254383257, - "loss": 46.0, - "step": 18037 - }, - { - "epoch": 1.379131066383776, - "grad_norm": 0.0018584885401651263, - "learning_rate": 0.00019999906243982406, - "loss": 46.0, - "step": 18038 - }, - { - "epoch": 1.3792075233671657, - "grad_norm": 0.0008913018391467631, - "learning_rate": 0.0001999990623358098, - "loss": 46.0, - "step": 18039 - }, - { - "epoch": 1.3792839803505552, - "grad_norm": 0.0002679518365766853, - "learning_rate": 0.00019999906223178976, - "loss": 46.0, - "step": 18040 - }, - { - "epoch": 1.379360437333945, - "grad_norm": 0.012079217471182346, - "learning_rate": 0.00019999906212776392, - "loss": 46.0, - "step": 18041 - }, - { - "epoch": 1.3794368943173347, - "grad_norm": 0.0012403877917677164, - "learning_rate": 0.00019999906202373234, - "loss": 46.0, - "step": 18042 - }, - { - "epoch": 1.3795133513007245, - "grad_norm": 0.001639240887016058, - "learning_rate": 0.00019999906191969498, - "loss": 46.0, - "step": 18043 - }, - { - "epoch": 1.379589808284114, - "grad_norm": 0.0009506415808573365, - "learning_rate": 0.00019999906181565185, - "loss": 46.0, - "step": 18044 - }, - { - "epoch": 1.3796662652675038, - "grad_norm": 0.005230034701526165, - "learning_rate": 0.00019999906171160297, - "loss": 46.0, - "step": 18045 - }, - { - "epoch": 1.3797427222508936, - "grad_norm": 0.0006512513500638306, - "learning_rate": 0.00019999906160754832, - "loss": 46.0, - "step": 18046 - }, - { - "epoch": 1.3798191792342833, - "grad_norm": 0.00037874715053476393, - "learning_rate": 0.00019999906150348787, - "loss": 46.0, - "step": 18047 - }, - { - "epoch": 1.379895636217673, - "grad_norm": 0.0018193217692896724, - "learning_rate": 0.00019999906139942167, - "loss": 46.0, - "step": 18048 - }, - { - "epoch": 1.3799720932010628, - "grad_norm": 0.0005614160909317434, - "learning_rate": 0.00019999906129534968, - "loss": 46.0, - "step": 18049 - }, - { - "epoch": 1.3800485501844526, - "grad_norm": 0.00171806407161057, - "learning_rate": 0.00019999906119127193, - "loss": 46.0, - "step": 18050 - }, - { - "epoch": 1.3801250071678421, - "grad_norm": 0.001787136192433536, - "learning_rate": 0.00019999906108718842, - "loss": 46.0, - "step": 18051 - }, - { - "epoch": 1.380201464151232, - "grad_norm": 0.001263280282728374, - "learning_rate": 0.00019999906098309915, - "loss": 46.0, - "step": 18052 - }, - { - "epoch": 1.3802779211346217, - "grad_norm": 0.0009184384834952652, - "learning_rate": 0.00019999906087900412, - "loss": 46.0, - "step": 18053 - }, - { - "epoch": 1.3803543781180114, - "grad_norm": 0.00200935872271657, - "learning_rate": 0.00019999906077490328, - "loss": 46.0, - "step": 18054 - }, - { - "epoch": 1.380430835101401, - "grad_norm": 0.0013755237450823188, - "learning_rate": 0.0001999990606707967, - "loss": 46.0, - "step": 18055 - }, - { - "epoch": 1.3805072920847907, - "grad_norm": 0.00067552225664258, - "learning_rate": 0.00019999906056668434, - "loss": 46.0, - "step": 18056 - }, - { - "epoch": 1.3805837490681805, - "grad_norm": 0.001759067177772522, - "learning_rate": 0.0001999990604625662, - "loss": 46.0, - "step": 18057 - }, - { - "epoch": 1.3806602060515702, - "grad_norm": 0.0011331499554216862, - "learning_rate": 0.0001999990603584423, - "loss": 46.0, - "step": 18058 - }, - { - "epoch": 1.38073666303496, - "grad_norm": 0.0015485257608816028, - "learning_rate": 0.00019999906025431263, - "loss": 46.0, - "step": 18059 - }, - { - "epoch": 1.3808131200183498, - "grad_norm": 0.0003694407641887665, - "learning_rate": 0.0001999990601501772, - "loss": 46.0, - "step": 18060 - }, - { - "epoch": 1.3808895770017395, - "grad_norm": 0.0037997965700924397, - "learning_rate": 0.00019999906004603598, - "loss": 46.0, - "step": 18061 - }, - { - "epoch": 1.380966033985129, - "grad_norm": 0.0008745102677494287, - "learning_rate": 0.000199999059941889, - "loss": 46.0, - "step": 18062 - }, - { - "epoch": 1.3810424909685188, - "grad_norm": 0.0009955859277397394, - "learning_rate": 0.00019999905983773627, - "loss": 46.0, - "step": 18063 - }, - { - "epoch": 1.3811189479519086, - "grad_norm": 0.0020191327203065157, - "learning_rate": 0.00019999905973357775, - "loss": 46.0, - "step": 18064 - }, - { - "epoch": 1.3811954049352984, - "grad_norm": 0.0013388830702751875, - "learning_rate": 0.0001999990596294135, - "loss": 46.0, - "step": 18065 - }, - { - "epoch": 1.381271861918688, - "grad_norm": 0.03064756467938423, - "learning_rate": 0.0001999990595252434, - "loss": 46.0, - "step": 18066 - }, - { - "epoch": 1.3813483189020777, - "grad_norm": 0.002888164948672056, - "learning_rate": 0.00019999905942106762, - "loss": 46.0, - "step": 18067 - }, - { - "epoch": 1.3814247758854674, - "grad_norm": 0.001091856975108385, - "learning_rate": 0.000199999059316886, - "loss": 46.0, - "step": 18068 - }, - { - "epoch": 1.3815012328688572, - "grad_norm": 0.002184961922466755, - "learning_rate": 0.00019999905921269865, - "loss": 46.0, - "step": 18069 - }, - { - "epoch": 1.381577689852247, - "grad_norm": 0.0020398579072207212, - "learning_rate": 0.00019999905910850552, - "loss": 46.0, - "step": 18070 - }, - { - "epoch": 1.3816541468356367, - "grad_norm": 0.0006634164601564407, - "learning_rate": 0.0001999990590043066, - "loss": 46.0, - "step": 18071 - }, - { - "epoch": 1.3817306038190262, - "grad_norm": 0.0009516172576695681, - "learning_rate": 0.00019999905890010197, - "loss": 46.0, - "step": 18072 - }, - { - "epoch": 1.381807060802416, - "grad_norm": 0.0010657728416845202, - "learning_rate": 0.0001999990587958915, - "loss": 46.0, - "step": 18073 - }, - { - "epoch": 1.3818835177858058, - "grad_norm": 0.0009695056360214949, - "learning_rate": 0.0001999990586916753, - "loss": 46.0, - "step": 18074 - }, - { - "epoch": 1.3819599747691955, - "grad_norm": 0.0032378961332142353, - "learning_rate": 0.00019999905858745332, - "loss": 46.0, - "step": 18075 - }, - { - "epoch": 1.3820364317525853, - "grad_norm": 0.011132714338600636, - "learning_rate": 0.00019999905848322558, - "loss": 46.0, - "step": 18076 - }, - { - "epoch": 1.3821128887359748, - "grad_norm": 0.0009438168490305543, - "learning_rate": 0.00019999905837899204, - "loss": 46.0, - "step": 18077 - }, - { - "epoch": 1.3821893457193646, - "grad_norm": 0.0013492620782926679, - "learning_rate": 0.00019999905827475275, - "loss": 46.0, - "step": 18078 - }, - { - "epoch": 1.3822658027027543, - "grad_norm": 0.0009250510483980179, - "learning_rate": 0.0001999990581705077, - "loss": 46.0, - "step": 18079 - }, - { - "epoch": 1.382342259686144, - "grad_norm": 0.01226492878049612, - "learning_rate": 0.0001999990580662569, - "loss": 46.0, - "step": 18080 - }, - { - "epoch": 1.3824187166695339, - "grad_norm": 0.002441051183268428, - "learning_rate": 0.0001999990579620003, - "loss": 46.0, - "step": 18081 - }, - { - "epoch": 1.3824951736529236, - "grad_norm": 0.005781848449259996, - "learning_rate": 0.00019999905785773794, - "loss": 46.0, - "step": 18082 - }, - { - "epoch": 1.3825716306363132, - "grad_norm": 0.0007825401844456792, - "learning_rate": 0.00019999905775346978, - "loss": 46.0, - "step": 18083 - }, - { - "epoch": 1.382648087619703, - "grad_norm": 0.0014283062191680074, - "learning_rate": 0.0001999990576491959, - "loss": 46.0, - "step": 18084 - }, - { - "epoch": 1.3827245446030927, - "grad_norm": 0.0014039864763617516, - "learning_rate": 0.0001999990575449162, - "loss": 46.0, - "step": 18085 - }, - { - "epoch": 1.3828010015864824, - "grad_norm": 0.01543811522424221, - "learning_rate": 0.00019999905744063078, - "loss": 46.0, - "step": 18086 - }, - { - "epoch": 1.3828774585698722, - "grad_norm": 0.0005554198869504035, - "learning_rate": 0.00019999905733633955, - "loss": 46.0, - "step": 18087 - }, - { - "epoch": 1.3829539155532617, - "grad_norm": 0.0008181114681065083, - "learning_rate": 0.00019999905723204259, - "loss": 46.0, - "step": 18088 - }, - { - "epoch": 1.3830303725366515, - "grad_norm": 0.0018580456962808967, - "learning_rate": 0.00019999905712773984, - "loss": 46.0, - "step": 18089 - }, - { - "epoch": 1.3831068295200413, - "grad_norm": 0.005458520259708166, - "learning_rate": 0.00019999905702343133, - "loss": 46.0, - "step": 18090 - }, - { - "epoch": 1.383183286503431, - "grad_norm": 0.0006446646875701845, - "learning_rate": 0.000199999056919117, - "loss": 46.0, - "step": 18091 - }, - { - "epoch": 1.3832597434868208, - "grad_norm": 0.01109933853149414, - "learning_rate": 0.00019999905681479697, - "loss": 46.0, - "step": 18092 - }, - { - "epoch": 1.3833362004702106, - "grad_norm": 0.0034960834309458733, - "learning_rate": 0.00019999905671047117, - "loss": 46.0, - "step": 18093 - }, - { - "epoch": 1.3834126574536, - "grad_norm": 0.001120292698033154, - "learning_rate": 0.00019999905660613956, - "loss": 46.0, - "step": 18094 - }, - { - "epoch": 1.3834891144369899, - "grad_norm": 0.0008797007612884045, - "learning_rate": 0.0001999990565018022, - "loss": 46.0, - "step": 18095 - }, - { - "epoch": 1.3835655714203796, - "grad_norm": 0.001252058893442154, - "learning_rate": 0.00019999905639745907, - "loss": 46.0, - "step": 18096 - }, - { - "epoch": 1.3836420284037694, - "grad_norm": 0.0007532025338150561, - "learning_rate": 0.00019999905629311014, - "loss": 46.0, - "step": 18097 - }, - { - "epoch": 1.383718485387159, - "grad_norm": 0.0006492463871836662, - "learning_rate": 0.00019999905618875547, - "loss": 46.0, - "step": 18098 - }, - { - "epoch": 1.3837949423705487, - "grad_norm": 0.0011558914557099342, - "learning_rate": 0.00019999905608439504, - "loss": 46.0, - "step": 18099 - }, - { - "epoch": 1.3838713993539384, - "grad_norm": 0.0006250928272493184, - "learning_rate": 0.00019999905598002885, - "loss": 46.0, - "step": 18100 - }, - { - "epoch": 1.3839478563373282, - "grad_norm": 0.0006683696992695332, - "learning_rate": 0.00019999905587565685, - "loss": 46.0, - "step": 18101 - }, - { - "epoch": 1.384024313320718, - "grad_norm": 0.0007870956906117499, - "learning_rate": 0.00019999905577127908, - "loss": 46.0, - "step": 18102 - }, - { - "epoch": 1.3841007703041077, - "grad_norm": 0.00279410881921649, - "learning_rate": 0.00019999905566689557, - "loss": 46.0, - "step": 18103 - }, - { - "epoch": 1.3841772272874975, - "grad_norm": 0.0008921432308852673, - "learning_rate": 0.00019999905556250628, - "loss": 46.0, - "step": 18104 - }, - { - "epoch": 1.384253684270887, - "grad_norm": 0.000745880592148751, - "learning_rate": 0.00019999905545811121, - "loss": 46.0, - "step": 18105 - }, - { - "epoch": 1.3843301412542768, - "grad_norm": 0.0004097307682968676, - "learning_rate": 0.00019999905535371038, - "loss": 46.0, - "step": 18106 - }, - { - "epoch": 1.3844065982376665, - "grad_norm": 0.00046161108184605837, - "learning_rate": 0.0001999990552493038, - "loss": 46.0, - "step": 18107 - }, - { - "epoch": 1.3844830552210563, - "grad_norm": 0.0007116220076568425, - "learning_rate": 0.00019999905514489141, - "loss": 46.0, - "step": 18108 - }, - { - "epoch": 1.3845595122044458, - "grad_norm": 0.0009194620070047677, - "learning_rate": 0.00019999905504047329, - "loss": 46.0, - "step": 18109 - }, - { - "epoch": 1.3846359691878356, - "grad_norm": 0.0036174501292407513, - "learning_rate": 0.00019999905493604938, - "loss": 46.0, - "step": 18110 - }, - { - "epoch": 1.3847124261712254, - "grad_norm": 0.0023814463056623936, - "learning_rate": 0.00019999905483161968, - "loss": 46.0, - "step": 18111 - }, - { - "epoch": 1.3847888831546151, - "grad_norm": 0.002753094071522355, - "learning_rate": 0.00019999905472718426, - "loss": 46.0, - "step": 18112 - }, - { - "epoch": 1.3848653401380049, - "grad_norm": 0.0006366602028720081, - "learning_rate": 0.00019999905462274304, - "loss": 46.0, - "step": 18113 - }, - { - "epoch": 1.3849417971213946, - "grad_norm": 0.00701924879103899, - "learning_rate": 0.00019999905451829607, - "loss": 46.0, - "step": 18114 - }, - { - "epoch": 1.3850182541047844, - "grad_norm": 0.0017806018004193902, - "learning_rate": 0.0001999990544138433, - "loss": 46.0, - "step": 18115 - }, - { - "epoch": 1.385094711088174, - "grad_norm": 0.0009679956128820777, - "learning_rate": 0.00019999905430938478, - "loss": 46.0, - "step": 18116 - }, - { - "epoch": 1.3851711680715637, - "grad_norm": 0.0004925800021737814, - "learning_rate": 0.0001999990542049205, - "loss": 46.0, - "step": 18117 - }, - { - "epoch": 1.3852476250549535, - "grad_norm": 0.0009191755088977516, - "learning_rate": 0.00019999905410045043, - "loss": 46.0, - "step": 18118 - }, - { - "epoch": 1.3853240820383432, - "grad_norm": 0.0038608727045357227, - "learning_rate": 0.0001999990539959746, - "loss": 46.0, - "step": 18119 - }, - { - "epoch": 1.3854005390217328, - "grad_norm": 0.0008928577299229801, - "learning_rate": 0.00019999905389149301, - "loss": 46.0, - "step": 18120 - }, - { - "epoch": 1.3854769960051225, - "grad_norm": 0.0006483289762400091, - "learning_rate": 0.0001999990537870056, - "loss": 46.0, - "step": 18121 - }, - { - "epoch": 1.3855534529885123, - "grad_norm": 0.0017912021139636636, - "learning_rate": 0.00019999905368251248, - "loss": 46.0, - "step": 18122 - }, - { - "epoch": 1.385629909971902, - "grad_norm": 0.005643123760819435, - "learning_rate": 0.00019999905357801357, - "loss": 46.0, - "step": 18123 - }, - { - "epoch": 1.3857063669552918, - "grad_norm": 0.0035028192214667797, - "learning_rate": 0.00019999905347350893, - "loss": 46.0, - "step": 18124 - }, - { - "epoch": 1.3857828239386816, - "grad_norm": 0.006375194061547518, - "learning_rate": 0.00019999905336899845, - "loss": 46.0, - "step": 18125 - }, - { - "epoch": 1.3858592809220713, - "grad_norm": 0.0007990702288225293, - "learning_rate": 0.00019999905326448223, - "loss": 46.0, - "step": 18126 - }, - { - "epoch": 1.3859357379054609, - "grad_norm": 0.001139629865065217, - "learning_rate": 0.00019999905315996026, - "loss": 46.0, - "step": 18127 - }, - { - "epoch": 1.3860121948888506, - "grad_norm": 0.002823038026690483, - "learning_rate": 0.0001999990530554325, - "loss": 46.0, - "step": 18128 - }, - { - "epoch": 1.3860886518722404, - "grad_norm": 0.0005093346117064357, - "learning_rate": 0.00019999905295089895, - "loss": 46.0, - "step": 18129 - }, - { - "epoch": 1.3861651088556302, - "grad_norm": 0.006516624707728624, - "learning_rate": 0.00019999905284635966, - "loss": 46.0, - "step": 18130 - }, - { - "epoch": 1.3862415658390197, - "grad_norm": 0.0016051542479544878, - "learning_rate": 0.0001999990527418146, - "loss": 46.0, - "step": 18131 - }, - { - "epoch": 1.3863180228224095, - "grad_norm": 0.0006347924936562777, - "learning_rate": 0.00019999905263726376, - "loss": 46.0, - "step": 18132 - }, - { - "epoch": 1.3863944798057992, - "grad_norm": 0.002941821003332734, - "learning_rate": 0.00019999905253270715, - "loss": 46.0, - "step": 18133 - }, - { - "epoch": 1.386470936789189, - "grad_norm": 0.0010585044510662556, - "learning_rate": 0.0001999990524281448, - "loss": 46.0, - "step": 18134 - }, - { - "epoch": 1.3865473937725787, - "grad_norm": 0.0011076467344537377, - "learning_rate": 0.00019999905232357664, - "loss": 46.0, - "step": 18135 - }, - { - "epoch": 1.3866238507559685, - "grad_norm": 0.0016473408322781324, - "learning_rate": 0.00019999905221900274, - "loss": 46.0, - "step": 18136 - }, - { - "epoch": 1.3867003077393583, - "grad_norm": 0.0028028725646436214, - "learning_rate": 0.00019999905211442307, - "loss": 46.0, - "step": 18137 - }, - { - "epoch": 1.3867767647227478, - "grad_norm": 0.0036406780127435923, - "learning_rate": 0.00019999905200983762, - "loss": 46.0, - "step": 18138 - }, - { - "epoch": 1.3868532217061376, - "grad_norm": 0.0006841887370683253, - "learning_rate": 0.0001999990519052464, - "loss": 46.0, - "step": 18139 - }, - { - "epoch": 1.3869296786895273, - "grad_norm": 0.0030098850838840008, - "learning_rate": 0.00019999905180064943, - "loss": 46.0, - "step": 18140 - }, - { - "epoch": 1.387006135672917, - "grad_norm": 0.0015919606667011976, - "learning_rate": 0.00019999905169604663, - "loss": 46.0, - "step": 18141 - }, - { - "epoch": 1.3870825926563066, - "grad_norm": 0.0008367933914996684, - "learning_rate": 0.00019999905159143812, - "loss": 46.0, - "step": 18142 - }, - { - "epoch": 1.3871590496396964, - "grad_norm": 0.0021825837902724743, - "learning_rate": 0.00019999905148682383, - "loss": 46.0, - "step": 18143 - }, - { - "epoch": 1.3872355066230861, - "grad_norm": 0.002459099283441901, - "learning_rate": 0.00019999905138220377, - "loss": 46.0, - "step": 18144 - }, - { - "epoch": 1.387311963606476, - "grad_norm": 0.0010476986644789577, - "learning_rate": 0.0001999990512775779, - "loss": 46.0, - "step": 18145 - }, - { - "epoch": 1.3873884205898657, - "grad_norm": 0.0003944080963265151, - "learning_rate": 0.00019999905117294633, - "loss": 46.0, - "step": 18146 - }, - { - "epoch": 1.3874648775732554, - "grad_norm": 0.000341695238603279, - "learning_rate": 0.00019999905106830895, - "loss": 46.0, - "step": 18147 - }, - { - "epoch": 1.3875413345566452, - "grad_norm": 0.002485455246642232, - "learning_rate": 0.0001999990509636658, - "loss": 46.0, - "step": 18148 - }, - { - "epoch": 1.3876177915400347, - "grad_norm": 0.0016624669078737497, - "learning_rate": 0.0001999990508590169, - "loss": 46.0, - "step": 18149 - }, - { - "epoch": 1.3876942485234245, - "grad_norm": 0.0015724037075415254, - "learning_rate": 0.00019999905075436221, - "loss": 46.0, - "step": 18150 - }, - { - "epoch": 1.3877707055068143, - "grad_norm": 0.0010537775233387947, - "learning_rate": 0.00019999905064970174, - "loss": 46.0, - "step": 18151 - }, - { - "epoch": 1.387847162490204, - "grad_norm": 0.003523198189213872, - "learning_rate": 0.00019999905054503554, - "loss": 46.0, - "step": 18152 - }, - { - "epoch": 1.3879236194735936, - "grad_norm": 0.002381999744102359, - "learning_rate": 0.00019999905044036352, - "loss": 46.0, - "step": 18153 - }, - { - "epoch": 1.3880000764569833, - "grad_norm": 0.001294403919018805, - "learning_rate": 0.00019999905033568578, - "loss": 46.0, - "step": 18154 - }, - { - "epoch": 1.388076533440373, - "grad_norm": 0.0008013628539629281, - "learning_rate": 0.00019999905023100224, - "loss": 46.0, - "step": 18155 - }, - { - "epoch": 1.3881529904237628, - "grad_norm": 0.0018153274431824684, - "learning_rate": 0.00019999905012631295, - "loss": 46.0, - "step": 18156 - }, - { - "epoch": 1.3882294474071526, - "grad_norm": 0.0008623525500297546, - "learning_rate": 0.00019999905002161786, - "loss": 46.0, - "step": 18157 - }, - { - "epoch": 1.3883059043905424, - "grad_norm": 0.0004039740015286952, - "learning_rate": 0.00019999904991691706, - "loss": 46.0, - "step": 18158 - }, - { - "epoch": 1.3883823613739321, - "grad_norm": 0.000994433299638331, - "learning_rate": 0.00019999904981221045, - "loss": 46.0, - "step": 18159 - }, - { - "epoch": 1.3884588183573217, - "grad_norm": 0.007976262830197811, - "learning_rate": 0.00019999904970749807, - "loss": 46.0, - "step": 18160 - }, - { - "epoch": 1.3885352753407114, - "grad_norm": 0.0012299882946535945, - "learning_rate": 0.0001999990496027799, - "loss": 46.0, - "step": 18161 - }, - { - "epoch": 1.3886117323241012, - "grad_norm": 0.0017116060480475426, - "learning_rate": 0.000199999049498056, - "loss": 46.0, - "step": 18162 - }, - { - "epoch": 1.388688189307491, - "grad_norm": 0.0010554678738117218, - "learning_rate": 0.0001999990493933263, - "loss": 46.0, - "step": 18163 - }, - { - "epoch": 1.3887646462908805, - "grad_norm": 0.000657076423522085, - "learning_rate": 0.00019999904928859086, - "loss": 46.0, - "step": 18164 - }, - { - "epoch": 1.3888411032742702, - "grad_norm": 0.0016456126468256116, - "learning_rate": 0.00019999904918384961, - "loss": 46.0, - "step": 18165 - }, - { - "epoch": 1.38891756025766, - "grad_norm": 0.000919406593311578, - "learning_rate": 0.00019999904907910265, - "loss": 46.0, - "step": 18166 - }, - { - "epoch": 1.3889940172410498, - "grad_norm": 0.0007722167647443712, - "learning_rate": 0.00019999904897434988, - "loss": 46.0, - "step": 18167 - }, - { - "epoch": 1.3890704742244395, - "grad_norm": 0.003925792407244444, - "learning_rate": 0.00019999904886959136, - "loss": 46.0, - "step": 18168 - }, - { - "epoch": 1.3891469312078293, - "grad_norm": 0.001052317675203085, - "learning_rate": 0.00019999904876482705, - "loss": 46.0, - "step": 18169 - }, - { - "epoch": 1.389223388191219, - "grad_norm": 0.0009575384319759905, - "learning_rate": 0.00019999904866005696, - "loss": 46.0, - "step": 18170 - }, - { - "epoch": 1.3892998451746086, - "grad_norm": 0.0005681662005372345, - "learning_rate": 0.00019999904855528113, - "loss": 46.0, - "step": 18171 - }, - { - "epoch": 1.3893763021579983, - "grad_norm": 0.0010825723875313997, - "learning_rate": 0.00019999904845049952, - "loss": 46.0, - "step": 18172 - }, - { - "epoch": 1.389452759141388, - "grad_norm": 0.0005776237812824547, - "learning_rate": 0.00019999904834571214, - "loss": 46.0, - "step": 18173 - }, - { - "epoch": 1.3895292161247779, - "grad_norm": 0.0010564846452325583, - "learning_rate": 0.00019999904824091898, - "loss": 46.0, - "step": 18174 - }, - { - "epoch": 1.3896056731081674, - "grad_norm": 0.002812911756336689, - "learning_rate": 0.00019999904813612008, - "loss": 46.0, - "step": 18175 - }, - { - "epoch": 1.3896821300915572, - "grad_norm": 0.0008109891205094755, - "learning_rate": 0.00019999904803131538, - "loss": 46.0, - "step": 18176 - }, - { - "epoch": 1.389758587074947, - "grad_norm": 0.002363978885114193, - "learning_rate": 0.00019999904792650494, - "loss": 46.0, - "step": 18177 - }, - { - "epoch": 1.3898350440583367, - "grad_norm": 0.0009998250752687454, - "learning_rate": 0.00019999904782168871, - "loss": 46.0, - "step": 18178 - }, - { - "epoch": 1.3899115010417264, - "grad_norm": 0.001122571644373238, - "learning_rate": 0.00019999904771686672, - "loss": 46.0, - "step": 18179 - }, - { - "epoch": 1.3899879580251162, - "grad_norm": 0.0007013622089289129, - "learning_rate": 0.00019999904761203893, - "loss": 46.0, - "step": 18180 - }, - { - "epoch": 1.390064415008506, - "grad_norm": 0.0003700793313328177, - "learning_rate": 0.0001999990475072054, - "loss": 46.0, - "step": 18181 - }, - { - "epoch": 1.3901408719918955, - "grad_norm": 0.0012531543616205454, - "learning_rate": 0.00019999904740236612, - "loss": 46.0, - "step": 18182 - }, - { - "epoch": 1.3902173289752853, - "grad_norm": 0.002273143734782934, - "learning_rate": 0.00019999904729752104, - "loss": 46.0, - "step": 18183 - }, - { - "epoch": 1.390293785958675, - "grad_norm": 0.001236573327332735, - "learning_rate": 0.0001999990471926702, - "loss": 46.0, - "step": 18184 - }, - { - "epoch": 1.3903702429420648, - "grad_norm": 0.0004166388534940779, - "learning_rate": 0.0001999990470878136, - "loss": 46.0, - "step": 18185 - }, - { - "epoch": 1.3904466999254543, - "grad_norm": 0.0013104334939271212, - "learning_rate": 0.00019999904698295121, - "loss": 46.0, - "step": 18186 - }, - { - "epoch": 1.390523156908844, - "grad_norm": 0.0015591164119541645, - "learning_rate": 0.00019999904687808306, - "loss": 46.0, - "step": 18187 - }, - { - "epoch": 1.3905996138922339, - "grad_norm": 0.006224802695214748, - "learning_rate": 0.00019999904677320913, - "loss": 46.0, - "step": 18188 - }, - { - "epoch": 1.3906760708756236, - "grad_norm": 0.001653607003390789, - "learning_rate": 0.00019999904666832946, - "loss": 46.0, - "step": 18189 - }, - { - "epoch": 1.3907525278590134, - "grad_norm": 0.0017317173769697547, - "learning_rate": 0.00019999904656344399, - "loss": 46.0, - "step": 18190 - }, - { - "epoch": 1.3908289848424031, - "grad_norm": 0.0013346589403226972, - "learning_rate": 0.00019999904645855276, - "loss": 46.0, - "step": 18191 - }, - { - "epoch": 1.390905441825793, - "grad_norm": 0.0019391510868445039, - "learning_rate": 0.00019999904635365577, - "loss": 46.0, - "step": 18192 - }, - { - "epoch": 1.3909818988091824, - "grad_norm": 0.005857347045093775, - "learning_rate": 0.000199999046248753, - "loss": 46.0, - "step": 18193 - }, - { - "epoch": 1.3910583557925722, - "grad_norm": 0.0011941107222810388, - "learning_rate": 0.00019999904614384446, - "loss": 46.0, - "step": 18194 - }, - { - "epoch": 1.391134812775962, - "grad_norm": 0.009722065180540085, - "learning_rate": 0.00019999904603893015, - "loss": 46.0, - "step": 18195 - }, - { - "epoch": 1.3912112697593517, - "grad_norm": 0.0006942698382772505, - "learning_rate": 0.00019999904593401006, - "loss": 46.0, - "step": 18196 - }, - { - "epoch": 1.3912877267427413, - "grad_norm": 0.0006114181596785784, - "learning_rate": 0.00019999904582908423, - "loss": 46.0, - "step": 18197 - }, - { - "epoch": 1.391364183726131, - "grad_norm": 0.0012399927945807576, - "learning_rate": 0.00019999904572415262, - "loss": 46.0, - "step": 18198 - }, - { - "epoch": 1.3914406407095208, - "grad_norm": 0.0009988698875531554, - "learning_rate": 0.00019999904561921524, - "loss": 46.0, - "step": 18199 - }, - { - "epoch": 1.3915170976929105, - "grad_norm": 0.000732092303223908, - "learning_rate": 0.0001999990455142721, - "loss": 46.0, - "step": 18200 - }, - { - "epoch": 1.3915935546763003, - "grad_norm": 0.0012976337457075715, - "learning_rate": 0.00019999904540932316, - "loss": 46.0, - "step": 18201 - }, - { - "epoch": 1.39167001165969, - "grad_norm": 0.0012231545988470316, - "learning_rate": 0.00019999904530436849, - "loss": 46.0, - "step": 18202 - }, - { - "epoch": 1.3917464686430798, - "grad_norm": 0.003263626014813781, - "learning_rate": 0.00019999904519940799, - "loss": 46.0, - "step": 18203 - }, - { - "epoch": 1.3918229256264694, - "grad_norm": 0.0005668877274729311, - "learning_rate": 0.0001999990450944418, - "loss": 46.0, - "step": 18204 - }, - { - "epoch": 1.3918993826098591, - "grad_norm": 0.0015439471462741494, - "learning_rate": 0.0001999990449894698, - "loss": 46.0, - "step": 18205 - }, - { - "epoch": 1.3919758395932489, - "grad_norm": 0.0019080775091424584, - "learning_rate": 0.000199999044884492, - "loss": 46.0, - "step": 18206 - }, - { - "epoch": 1.3920522965766386, - "grad_norm": 0.0003980291076004505, - "learning_rate": 0.00019999904477950846, - "loss": 46.0, - "step": 18207 - }, - { - "epoch": 1.3921287535600282, - "grad_norm": 0.0006080828607082367, - "learning_rate": 0.00019999904467451915, - "loss": 46.0, - "step": 18208 - }, - { - "epoch": 1.392205210543418, - "grad_norm": 0.002454567700624466, - "learning_rate": 0.00019999904456952406, - "loss": 46.0, - "step": 18209 - }, - { - "epoch": 1.3922816675268077, - "grad_norm": 0.0016060691559687257, - "learning_rate": 0.00019999904446452326, - "loss": 46.0, - "step": 18210 - }, - { - "epoch": 1.3923581245101975, - "grad_norm": 0.0007170222816057503, - "learning_rate": 0.00019999904435951665, - "loss": 46.0, - "step": 18211 - }, - { - "epoch": 1.3924345814935872, - "grad_norm": 0.0011072804918512702, - "learning_rate": 0.00019999904425450424, - "loss": 46.0, - "step": 18212 - }, - { - "epoch": 1.392511038476977, - "grad_norm": 0.004278427455574274, - "learning_rate": 0.00019999904414948606, - "loss": 46.0, - "step": 18213 - }, - { - "epoch": 1.3925874954603665, - "grad_norm": 0.0011637897696346045, - "learning_rate": 0.00019999904404446216, - "loss": 46.0, - "step": 18214 - }, - { - "epoch": 1.3926639524437563, - "grad_norm": 0.0004555878695100546, - "learning_rate": 0.00019999904393943246, - "loss": 46.0, - "step": 18215 - }, - { - "epoch": 1.392740409427146, - "grad_norm": 0.0012224672827869654, - "learning_rate": 0.00019999904383439702, - "loss": 46.0, - "step": 18216 - }, - { - "epoch": 1.3928168664105358, - "grad_norm": 0.0016161407111212611, - "learning_rate": 0.0001999990437293558, - "loss": 46.0, - "step": 18217 - }, - { - "epoch": 1.3928933233939256, - "grad_norm": 0.0005803713575005531, - "learning_rate": 0.00019999904362430878, - "loss": 46.0, - "step": 18218 - }, - { - "epoch": 1.3929697803773151, - "grad_norm": 0.0010446751257404685, - "learning_rate": 0.000199999043519256, - "loss": 46.0, - "step": 18219 - }, - { - "epoch": 1.3930462373607049, - "grad_norm": 0.0009772925404831767, - "learning_rate": 0.00019999904341419747, - "loss": 46.0, - "step": 18220 - }, - { - "epoch": 1.3931226943440946, - "grad_norm": 0.0005918702227063477, - "learning_rate": 0.00019999904330913316, - "loss": 46.0, - "step": 18221 - }, - { - "epoch": 1.3931991513274844, - "grad_norm": 0.001663452247157693, - "learning_rate": 0.0001999990432040631, - "loss": 46.0, - "step": 18222 - }, - { - "epoch": 1.3932756083108742, - "grad_norm": 0.004550378769636154, - "learning_rate": 0.00019999904309898724, - "loss": 46.0, - "step": 18223 - }, - { - "epoch": 1.393352065294264, - "grad_norm": 0.00024089851649478078, - "learning_rate": 0.0001999990429939056, - "loss": 46.0, - "step": 18224 - }, - { - "epoch": 1.3934285222776535, - "grad_norm": 0.0008896503131836653, - "learning_rate": 0.00019999904288881822, - "loss": 46.0, - "step": 18225 - }, - { - "epoch": 1.3935049792610432, - "grad_norm": 0.0010326530318707228, - "learning_rate": 0.00019999904278372504, - "loss": 46.0, - "step": 18226 - }, - { - "epoch": 1.393581436244433, - "grad_norm": 0.0008250580867752433, - "learning_rate": 0.00019999904267862614, - "loss": 46.0, - "step": 18227 - }, - { - "epoch": 1.3936578932278227, - "grad_norm": 0.0008015081402845681, - "learning_rate": 0.00019999904257352142, - "loss": 46.0, - "step": 18228 - }, - { - "epoch": 1.3937343502112123, - "grad_norm": 0.0025446105282753706, - "learning_rate": 0.00019999904246841097, - "loss": 46.0, - "step": 18229 - }, - { - "epoch": 1.393810807194602, - "grad_norm": 0.00039046400343067944, - "learning_rate": 0.00019999904236329475, - "loss": 46.0, - "step": 18230 - }, - { - "epoch": 1.3938872641779918, - "grad_norm": 0.0021942162420600653, - "learning_rate": 0.00019999904225817273, - "loss": 46.0, - "step": 18231 - }, - { - "epoch": 1.3939637211613816, - "grad_norm": 0.010255785658955574, - "learning_rate": 0.00019999904215304496, - "loss": 46.0, - "step": 18232 - }, - { - "epoch": 1.3940401781447713, - "grad_norm": 0.0015067668864503503, - "learning_rate": 0.00019999904204791142, - "loss": 46.0, - "step": 18233 - }, - { - "epoch": 1.394116635128161, - "grad_norm": 0.0006419286364689469, - "learning_rate": 0.0001999990419427721, - "loss": 46.0, - "step": 18234 - }, - { - "epoch": 1.3941930921115508, - "grad_norm": 0.001984857954084873, - "learning_rate": 0.000199999041837627, - "loss": 46.0, - "step": 18235 - }, - { - "epoch": 1.3942695490949404, - "grad_norm": 0.0015746374847367406, - "learning_rate": 0.00019999904173247617, - "loss": 46.0, - "step": 18236 - }, - { - "epoch": 1.3943460060783301, - "grad_norm": 0.0007287380867637694, - "learning_rate": 0.00019999904162731956, - "loss": 46.0, - "step": 18237 - }, - { - "epoch": 1.39442246306172, - "grad_norm": 0.0018226653337478638, - "learning_rate": 0.00019999904152215715, - "loss": 46.0, - "step": 18238 - }, - { - "epoch": 1.3944989200451097, - "grad_norm": 0.0035836955066770315, - "learning_rate": 0.000199999041416989, - "loss": 46.0, - "step": 18239 - }, - { - "epoch": 1.3945753770284992, - "grad_norm": 0.0006668817368336022, - "learning_rate": 0.00019999904131181507, - "loss": 46.0, - "step": 18240 - }, - { - "epoch": 1.394651834011889, - "grad_norm": 0.001640597707591951, - "learning_rate": 0.00019999904120663537, - "loss": 46.0, - "step": 18241 - }, - { - "epoch": 1.3947282909952787, - "grad_norm": 0.0020899372175335884, - "learning_rate": 0.0001999990411014499, - "loss": 46.0, - "step": 18242 - }, - { - "epoch": 1.3948047479786685, - "grad_norm": 0.0014405716210603714, - "learning_rate": 0.00019999904099625865, - "loss": 46.0, - "step": 18243 - }, - { - "epoch": 1.3948812049620583, - "grad_norm": 0.0013223255518823862, - "learning_rate": 0.00019999904089106166, - "loss": 46.0, - "step": 18244 - }, - { - "epoch": 1.394957661945448, - "grad_norm": 0.008614566177129745, - "learning_rate": 0.0001999990407858589, - "loss": 46.0, - "step": 18245 - }, - { - "epoch": 1.3950341189288378, - "grad_norm": 0.0011943380814045668, - "learning_rate": 0.00019999904068065033, - "loss": 46.0, - "step": 18246 - }, - { - "epoch": 1.3951105759122273, - "grad_norm": 0.010545573197305202, - "learning_rate": 0.00019999904057543604, - "loss": 46.0, - "step": 18247 - }, - { - "epoch": 1.395187032895617, - "grad_norm": 0.0006156945019029081, - "learning_rate": 0.00019999904047021593, - "loss": 46.0, - "step": 18248 - }, - { - "epoch": 1.3952634898790068, - "grad_norm": 0.0012297678040340543, - "learning_rate": 0.0001999990403649901, - "loss": 46.0, - "step": 18249 - }, - { - "epoch": 1.3953399468623966, - "grad_norm": 0.002259167144075036, - "learning_rate": 0.00019999904025975847, - "loss": 46.0, - "step": 18250 - }, - { - "epoch": 1.3954164038457861, - "grad_norm": 0.0022718163672834635, - "learning_rate": 0.0001999990401545211, - "loss": 46.0, - "step": 18251 - }, - { - "epoch": 1.395492860829176, - "grad_norm": 0.0020323318894952536, - "learning_rate": 0.0001999990400492779, - "loss": 46.0, - "step": 18252 - }, - { - "epoch": 1.3955693178125657, - "grad_norm": 0.0017350508132949471, - "learning_rate": 0.00019999903994402898, - "loss": 46.0, - "step": 18253 - }, - { - "epoch": 1.3956457747959554, - "grad_norm": 0.0012287198333069682, - "learning_rate": 0.00019999903983877428, - "loss": 46.0, - "step": 18254 - }, - { - "epoch": 1.3957222317793452, - "grad_norm": 0.0009139716858044267, - "learning_rate": 0.0001999990397335138, - "loss": 46.0, - "step": 18255 - }, - { - "epoch": 1.395798688762735, - "grad_norm": 0.001420615124516189, - "learning_rate": 0.00019999903962824756, - "loss": 46.0, - "step": 18256 - }, - { - "epoch": 1.3958751457461247, - "grad_norm": 0.0005012263427488506, - "learning_rate": 0.00019999903952297557, - "loss": 46.0, - "step": 18257 - }, - { - "epoch": 1.3959516027295142, - "grad_norm": 0.0016221682308241725, - "learning_rate": 0.00019999903941769778, - "loss": 46.0, - "step": 18258 - }, - { - "epoch": 1.396028059712904, - "grad_norm": 0.0007155595812946558, - "learning_rate": 0.00019999903931241427, - "loss": 46.0, - "step": 18259 - }, - { - "epoch": 1.3961045166962938, - "grad_norm": 0.007044223137199879, - "learning_rate": 0.00019999903920712493, - "loss": 46.0, - "step": 18260 - }, - { - "epoch": 1.3961809736796835, - "grad_norm": 0.0011055180802941322, - "learning_rate": 0.00019999903910182984, - "loss": 46.0, - "step": 18261 - }, - { - "epoch": 1.396257430663073, - "grad_norm": 0.0015097169671207666, - "learning_rate": 0.00019999903899652898, - "loss": 46.0, - "step": 18262 - }, - { - "epoch": 1.3963338876464628, - "grad_norm": 0.0043243844993412495, - "learning_rate": 0.00019999903889122235, - "loss": 46.0, - "step": 18263 - }, - { - "epoch": 1.3964103446298526, - "grad_norm": 0.0012168345274403691, - "learning_rate": 0.00019999903878590994, - "loss": 46.0, - "step": 18264 - }, - { - "epoch": 1.3964868016132423, - "grad_norm": 0.0007755389669910073, - "learning_rate": 0.0001999990386805918, - "loss": 46.0, - "step": 18265 - }, - { - "epoch": 1.396563258596632, - "grad_norm": 0.0021192762069404125, - "learning_rate": 0.00019999903857526787, - "loss": 46.0, - "step": 18266 - }, - { - "epoch": 1.3966397155800219, - "grad_norm": 0.002402530750259757, - "learning_rate": 0.00019999903846993817, - "loss": 46.0, - "step": 18267 - }, - { - "epoch": 1.3967161725634116, - "grad_norm": 0.0004343707987572998, - "learning_rate": 0.0001999990383646027, - "loss": 46.0, - "step": 18268 - }, - { - "epoch": 1.3967926295468012, - "grad_norm": 0.0013917690375819802, - "learning_rate": 0.00019999903825926145, - "loss": 46.0, - "step": 18269 - }, - { - "epoch": 1.396869086530191, - "grad_norm": 0.0006006306502968073, - "learning_rate": 0.00019999903815391443, - "loss": 46.0, - "step": 18270 - }, - { - "epoch": 1.3969455435135807, - "grad_norm": 0.0033171798568218946, - "learning_rate": 0.00019999903804856166, - "loss": 46.0, - "step": 18271 - }, - { - "epoch": 1.3970220004969705, - "grad_norm": 0.0005021097022108734, - "learning_rate": 0.00019999903794320313, - "loss": 46.0, - "step": 18272 - }, - { - "epoch": 1.39709845748036, - "grad_norm": 0.0017547993920743465, - "learning_rate": 0.00019999903783783879, - "loss": 46.0, - "step": 18273 - }, - { - "epoch": 1.3971749144637498, - "grad_norm": 0.0010602496331557631, - "learning_rate": 0.0001999990377324687, - "loss": 46.0, - "step": 18274 - }, - { - "epoch": 1.3972513714471395, - "grad_norm": 0.000917400699108839, - "learning_rate": 0.00019999903762709287, - "loss": 46.0, - "step": 18275 - }, - { - "epoch": 1.3973278284305293, - "grad_norm": 0.0012330766767263412, - "learning_rate": 0.0001999990375217112, - "loss": 46.0, - "step": 18276 - }, - { - "epoch": 1.397404285413919, - "grad_norm": 0.0012281337985768914, - "learning_rate": 0.00019999903741632383, - "loss": 46.0, - "step": 18277 - }, - { - "epoch": 1.3974807423973088, - "grad_norm": 0.0008549666381441057, - "learning_rate": 0.00019999903731093065, - "loss": 46.0, - "step": 18278 - }, - { - "epoch": 1.3975571993806986, - "grad_norm": 0.004524443298578262, - "learning_rate": 0.00019999903720553173, - "loss": 46.0, - "step": 18279 - }, - { - "epoch": 1.397633656364088, - "grad_norm": 0.0015528372023254633, - "learning_rate": 0.00019999903710012703, - "loss": 46.0, - "step": 18280 - }, - { - "epoch": 1.3977101133474779, - "grad_norm": 0.0013199535897001624, - "learning_rate": 0.00019999903699471653, - "loss": 46.0, - "step": 18281 - }, - { - "epoch": 1.3977865703308676, - "grad_norm": 0.006697415374219418, - "learning_rate": 0.0001999990368893003, - "loss": 46.0, - "step": 18282 - }, - { - "epoch": 1.3978630273142574, - "grad_norm": 0.006428098306059837, - "learning_rate": 0.0001999990367838783, - "loss": 46.0, - "step": 18283 - }, - { - "epoch": 1.397939484297647, - "grad_norm": 0.0013741186121478677, - "learning_rate": 0.0001999990366784505, - "loss": 46.0, - "step": 18284 - }, - { - "epoch": 1.3980159412810367, - "grad_norm": 0.014019043184816837, - "learning_rate": 0.00019999903657301696, - "loss": 46.0, - "step": 18285 - }, - { - "epoch": 1.3980923982644264, - "grad_norm": 0.0014207991771399975, - "learning_rate": 0.00019999903646757762, - "loss": 46.0, - "step": 18286 - }, - { - "epoch": 1.3981688552478162, - "grad_norm": 0.009901546873152256, - "learning_rate": 0.00019999903636213256, - "loss": 46.0, - "step": 18287 - }, - { - "epoch": 1.398245312231206, - "grad_norm": 0.0015407156897708774, - "learning_rate": 0.00019999903625668168, - "loss": 46.0, - "step": 18288 - }, - { - "epoch": 1.3983217692145957, - "grad_norm": 0.0014143314911052585, - "learning_rate": 0.00019999903615122507, - "loss": 46.0, - "step": 18289 - }, - { - "epoch": 1.3983982261979855, - "grad_norm": 0.0013123456155881286, - "learning_rate": 0.00019999903604576264, - "loss": 46.0, - "step": 18290 - }, - { - "epoch": 1.398474683181375, - "grad_norm": 0.0016088540432974696, - "learning_rate": 0.0001999990359402945, - "loss": 46.0, - "step": 18291 - }, - { - "epoch": 1.3985511401647648, - "grad_norm": 0.00041369968676008284, - "learning_rate": 0.00019999903583482056, - "loss": 46.0, - "step": 18292 - }, - { - "epoch": 1.3986275971481545, - "grad_norm": 0.004795182961970568, - "learning_rate": 0.00019999903572934084, - "loss": 46.0, - "step": 18293 - }, - { - "epoch": 1.3987040541315443, - "grad_norm": 0.0006917525315657258, - "learning_rate": 0.00019999903562385537, - "loss": 46.0, - "step": 18294 - }, - { - "epoch": 1.3987805111149338, - "grad_norm": 0.003914724104106426, - "learning_rate": 0.00019999903551836412, - "loss": 46.0, - "step": 18295 - }, - { - "epoch": 1.3988569680983236, - "grad_norm": 0.0014889624435454607, - "learning_rate": 0.0001999990354128671, - "loss": 46.0, - "step": 18296 - }, - { - "epoch": 1.3989334250817134, - "grad_norm": 0.0008348251576535404, - "learning_rate": 0.0001999990353073643, - "loss": 46.0, - "step": 18297 - }, - { - "epoch": 1.3990098820651031, - "grad_norm": 0.009671281091868877, - "learning_rate": 0.0001999990352018558, - "loss": 46.0, - "step": 18298 - }, - { - "epoch": 1.3990863390484929, - "grad_norm": 0.008715366944670677, - "learning_rate": 0.00019999903509634144, - "loss": 46.0, - "step": 18299 - }, - { - "epoch": 1.3991627960318826, - "grad_norm": 0.0030145812779664993, - "learning_rate": 0.00019999903499082135, - "loss": 46.0, - "step": 18300 - }, - { - "epoch": 1.3992392530152724, - "grad_norm": 0.0010817403672263026, - "learning_rate": 0.0001999990348852955, - "loss": 46.0, - "step": 18301 - }, - { - "epoch": 1.399315709998662, - "grad_norm": 0.0003958876186516136, - "learning_rate": 0.00019999903477976386, - "loss": 46.0, - "step": 18302 - }, - { - "epoch": 1.3993921669820517, - "grad_norm": 0.0022777714766561985, - "learning_rate": 0.00019999903467422648, - "loss": 46.0, - "step": 18303 - }, - { - "epoch": 1.3994686239654415, - "grad_norm": 0.0016170493327081203, - "learning_rate": 0.0001999990345686833, - "loss": 46.0, - "step": 18304 - }, - { - "epoch": 1.3995450809488312, - "grad_norm": 0.0036381310783326626, - "learning_rate": 0.00019999903446313435, - "loss": 46.0, - "step": 18305 - }, - { - "epoch": 1.3996215379322208, - "grad_norm": 0.002684554550796747, - "learning_rate": 0.00019999903435757963, - "loss": 46.0, - "step": 18306 - }, - { - "epoch": 1.3996979949156105, - "grad_norm": 0.0026757996529340744, - "learning_rate": 0.00019999903425201918, - "loss": 46.0, - "step": 18307 - }, - { - "epoch": 1.3997744518990003, - "grad_norm": 0.0011949578765779734, - "learning_rate": 0.0001999990341464529, - "loss": 46.0, - "step": 18308 - }, - { - "epoch": 1.39985090888239, - "grad_norm": 0.0006952470284886658, - "learning_rate": 0.00019999903404088092, - "loss": 46.0, - "step": 18309 - }, - { - "epoch": 1.3999273658657798, - "grad_norm": 0.002998712472617626, - "learning_rate": 0.0001999990339353031, - "loss": 46.0, - "step": 18310 - }, - { - "epoch": 1.4000038228491696, - "grad_norm": 0.0013851657276973128, - "learning_rate": 0.00019999903382971956, - "loss": 46.0, - "step": 18311 - }, - { - "epoch": 1.4000802798325593, - "grad_norm": 0.0017672590911388397, - "learning_rate": 0.00019999903372413025, - "loss": 46.0, - "step": 18312 - }, - { - "epoch": 1.4001567368159489, - "grad_norm": 0.001287979306653142, - "learning_rate": 0.00019999903361853514, - "loss": 46.0, - "step": 18313 - }, - { - "epoch": 1.4002331937993386, - "grad_norm": 0.0008470408502034843, - "learning_rate": 0.00019999903351293428, - "loss": 46.0, - "step": 18314 - }, - { - "epoch": 1.4003096507827284, - "grad_norm": 0.0009091580868698657, - "learning_rate": 0.00019999903340732765, - "loss": 46.0, - "step": 18315 - }, - { - "epoch": 1.4003861077661182, - "grad_norm": 0.0037146888207644224, - "learning_rate": 0.00019999903330171525, - "loss": 46.0, - "step": 18316 - }, - { - "epoch": 1.4004625647495077, - "grad_norm": 0.0018429117044433951, - "learning_rate": 0.00019999903319609707, - "loss": 46.0, - "step": 18317 - }, - { - "epoch": 1.4005390217328975, - "grad_norm": 0.0010084389941766858, - "learning_rate": 0.00019999903309047312, - "loss": 46.0, - "step": 18318 - }, - { - "epoch": 1.4006154787162872, - "grad_norm": 0.002414534566923976, - "learning_rate": 0.00019999903298484342, - "loss": 46.0, - "step": 18319 - }, - { - "epoch": 1.400691935699677, - "grad_norm": 0.0010621643159538507, - "learning_rate": 0.00019999903287920795, - "loss": 46.0, - "step": 18320 - }, - { - "epoch": 1.4007683926830667, - "grad_norm": 0.0004849625111091882, - "learning_rate": 0.00019999903277356668, - "loss": 46.0, - "step": 18321 - }, - { - "epoch": 1.4008448496664565, - "grad_norm": 0.0011082657147198915, - "learning_rate": 0.00019999903266791966, - "loss": 46.0, - "step": 18322 - }, - { - "epoch": 1.4009213066498463, - "grad_norm": 0.0006692179595120251, - "learning_rate": 0.0001999990325622669, - "loss": 46.0, - "step": 18323 - }, - { - "epoch": 1.4009977636332358, - "grad_norm": 0.0028096118476241827, - "learning_rate": 0.0001999990324566083, - "loss": 46.0, - "step": 18324 - }, - { - "epoch": 1.4010742206166256, - "grad_norm": 0.009814753197133541, - "learning_rate": 0.000199999032350944, - "loss": 46.0, - "step": 18325 - }, - { - "epoch": 1.4011506776000153, - "grad_norm": 0.00365639035589993, - "learning_rate": 0.0001999990322452739, - "loss": 46.0, - "step": 18326 - }, - { - "epoch": 1.401227134583405, - "grad_norm": 0.003469970775768161, - "learning_rate": 0.00019999903213959802, - "loss": 46.0, - "step": 18327 - }, - { - "epoch": 1.4013035915667946, - "grad_norm": 0.0012035678373649716, - "learning_rate": 0.0001999990320339164, - "loss": 46.0, - "step": 18328 - }, - { - "epoch": 1.4013800485501844, - "grad_norm": 0.0005106648895889521, - "learning_rate": 0.000199999031928229, - "loss": 46.0, - "step": 18329 - }, - { - "epoch": 1.4014565055335741, - "grad_norm": 0.002731070388108492, - "learning_rate": 0.0001999990318225358, - "loss": 46.0, - "step": 18330 - }, - { - "epoch": 1.401532962516964, - "grad_norm": 0.0008909603348001838, - "learning_rate": 0.00019999903171683686, - "loss": 46.0, - "step": 18331 - }, - { - "epoch": 1.4016094195003537, - "grad_norm": 0.0011393693275749683, - "learning_rate": 0.00019999903161113216, - "loss": 46.0, - "step": 18332 - }, - { - "epoch": 1.4016858764837434, - "grad_norm": 0.0022736121900379658, - "learning_rate": 0.00019999903150542166, - "loss": 46.0, - "step": 18333 - }, - { - "epoch": 1.4017623334671332, - "grad_norm": 0.0006398377008736134, - "learning_rate": 0.00019999903139970542, - "loss": 46.0, - "step": 18334 - }, - { - "epoch": 1.4018387904505227, - "grad_norm": 0.000655961106531322, - "learning_rate": 0.0001999990312939834, - "loss": 46.0, - "step": 18335 - }, - { - "epoch": 1.4019152474339125, - "grad_norm": 0.001291886786930263, - "learning_rate": 0.0001999990311882556, - "loss": 46.0, - "step": 18336 - }, - { - "epoch": 1.4019917044173023, - "grad_norm": 0.002854206133633852, - "learning_rate": 0.00019999903108252205, - "loss": 46.0, - "step": 18337 - }, - { - "epoch": 1.402068161400692, - "grad_norm": 0.001123277936130762, - "learning_rate": 0.0001999990309767827, - "loss": 46.0, - "step": 18338 - }, - { - "epoch": 1.4021446183840816, - "grad_norm": 0.0006095334538258612, - "learning_rate": 0.00019999903087103763, - "loss": 46.0, - "step": 18339 - }, - { - "epoch": 1.4022210753674713, - "grad_norm": 0.0033795267809182405, - "learning_rate": 0.00019999903076528677, - "loss": 46.0, - "step": 18340 - }, - { - "epoch": 1.402297532350861, - "grad_norm": 0.00105930189602077, - "learning_rate": 0.0001999990306595301, - "loss": 46.0, - "step": 18341 - }, - { - "epoch": 1.4023739893342508, - "grad_norm": 0.01124873198568821, - "learning_rate": 0.0001999990305537677, - "loss": 46.0, - "step": 18342 - }, - { - "epoch": 1.4024504463176406, - "grad_norm": 0.0012958410661667585, - "learning_rate": 0.00019999903044799953, - "loss": 46.0, - "step": 18343 - }, - { - "epoch": 1.4025269033010304, - "grad_norm": 0.002868752693757415, - "learning_rate": 0.00019999903034222558, - "loss": 46.0, - "step": 18344 - }, - { - "epoch": 1.40260336028442, - "grad_norm": 0.0014347676187753677, - "learning_rate": 0.00019999903023644588, - "loss": 46.0, - "step": 18345 - }, - { - "epoch": 1.4026798172678097, - "grad_norm": 0.00029974826611578465, - "learning_rate": 0.00019999903013066036, - "loss": 46.0, - "step": 18346 - }, - { - "epoch": 1.4027562742511994, - "grad_norm": 0.0026474210899323225, - "learning_rate": 0.00019999903002486914, - "loss": 46.0, - "step": 18347 - }, - { - "epoch": 1.4028327312345892, - "grad_norm": 0.0010696279350668192, - "learning_rate": 0.00019999902991907212, - "loss": 46.0, - "step": 18348 - }, - { - "epoch": 1.402909188217979, - "grad_norm": 0.0006355736986733973, - "learning_rate": 0.00019999902981326933, - "loss": 46.0, - "step": 18349 - }, - { - "epoch": 1.4029856452013685, - "grad_norm": 0.0008076782687567174, - "learning_rate": 0.00019999902970746077, - "loss": 46.0, - "step": 18350 - }, - { - "epoch": 1.4030621021847582, - "grad_norm": 0.0005658628651872277, - "learning_rate": 0.00019999902960164643, - "loss": 46.0, - "step": 18351 - }, - { - "epoch": 1.403138559168148, - "grad_norm": 0.0008830587030388415, - "learning_rate": 0.00019999902949582632, - "loss": 46.0, - "step": 18352 - }, - { - "epoch": 1.4032150161515378, - "grad_norm": 0.0007348970975726843, - "learning_rate": 0.00019999902939000044, - "loss": 46.0, - "step": 18353 - }, - { - "epoch": 1.4032914731349275, - "grad_norm": 0.0007917945040389895, - "learning_rate": 0.0001999990292841688, - "loss": 46.0, - "step": 18354 - }, - { - "epoch": 1.4033679301183173, - "grad_norm": 0.0025544478558003902, - "learning_rate": 0.0001999990291783314, - "loss": 46.0, - "step": 18355 - }, - { - "epoch": 1.4034443871017068, - "grad_norm": 0.0004933216259814799, - "learning_rate": 0.00019999902907248823, - "loss": 46.0, - "step": 18356 - }, - { - "epoch": 1.4035208440850966, - "grad_norm": 0.0007026851526461542, - "learning_rate": 0.00019999902896663928, - "loss": 46.0, - "step": 18357 - }, - { - "epoch": 1.4035973010684863, - "grad_norm": 0.0010201854165643454, - "learning_rate": 0.00019999902886078458, - "loss": 46.0, - "step": 18358 - }, - { - "epoch": 1.403673758051876, - "grad_norm": 0.0006736062350682914, - "learning_rate": 0.00019999902875492405, - "loss": 46.0, - "step": 18359 - }, - { - "epoch": 1.4037502150352656, - "grad_norm": 0.0006356733501888812, - "learning_rate": 0.00019999902864905778, - "loss": 46.0, - "step": 18360 - }, - { - "epoch": 1.4038266720186554, - "grad_norm": 0.0006490937666967511, - "learning_rate": 0.00019999902854318577, - "loss": 46.0, - "step": 18361 - }, - { - "epoch": 1.4039031290020452, - "grad_norm": 0.0063923317939043045, - "learning_rate": 0.00019999902843730798, - "loss": 46.0, - "step": 18362 - }, - { - "epoch": 1.403979585985435, - "grad_norm": 0.003219634760171175, - "learning_rate": 0.0001999990283314244, - "loss": 46.0, - "step": 18363 - }, - { - "epoch": 1.4040560429688247, - "grad_norm": 0.0013041739584878087, - "learning_rate": 0.0001999990282255351, - "loss": 46.0, - "step": 18364 - }, - { - "epoch": 1.4041324999522145, - "grad_norm": 0.0015903398161754012, - "learning_rate": 0.00019999902811964, - "loss": 46.0, - "step": 18365 - }, - { - "epoch": 1.4042089569356042, - "grad_norm": 0.004123296122997999, - "learning_rate": 0.0001999990280137391, - "loss": 46.0, - "step": 18366 - }, - { - "epoch": 1.4042854139189938, - "grad_norm": 0.0008914932841435075, - "learning_rate": 0.00019999902790783248, - "loss": 46.0, - "step": 18367 - }, - { - "epoch": 1.4043618709023835, - "grad_norm": 0.0006523121846839786, - "learning_rate": 0.00019999902780192005, - "loss": 46.0, - "step": 18368 - }, - { - "epoch": 1.4044383278857733, - "grad_norm": 0.007997400127351284, - "learning_rate": 0.00019999902769600187, - "loss": 46.0, - "step": 18369 - }, - { - "epoch": 1.404514784869163, - "grad_norm": 0.0005990521749481559, - "learning_rate": 0.00019999902759007792, - "loss": 46.0, - "step": 18370 - }, - { - "epoch": 1.4045912418525526, - "grad_norm": 0.0008489358006045222, - "learning_rate": 0.0001999990274841482, - "loss": 46.0, - "step": 18371 - }, - { - "epoch": 1.4046676988359423, - "grad_norm": 0.0013013811549171805, - "learning_rate": 0.00019999902737821273, - "loss": 46.0, - "step": 18372 - }, - { - "epoch": 1.404744155819332, - "grad_norm": 0.003888198174536228, - "learning_rate": 0.00019999902727227146, - "loss": 46.0, - "step": 18373 - }, - { - "epoch": 1.4048206128027219, - "grad_norm": 0.0003828374610748142, - "learning_rate": 0.00019999902716632444, - "loss": 46.0, - "step": 18374 - }, - { - "epoch": 1.4048970697861116, - "grad_norm": 0.0018283678218722343, - "learning_rate": 0.00019999902706037162, - "loss": 46.0, - "step": 18375 - }, - { - "epoch": 1.4049735267695014, - "grad_norm": 0.00234527001157403, - "learning_rate": 0.00019999902695441306, - "loss": 46.0, - "step": 18376 - }, - { - "epoch": 1.4050499837528911, - "grad_norm": 0.001431879703886807, - "learning_rate": 0.00019999902684844872, - "loss": 46.0, - "step": 18377 - }, - { - "epoch": 1.4051264407362807, - "grad_norm": 0.000716458831448108, - "learning_rate": 0.00019999902674247864, - "loss": 46.0, - "step": 18378 - }, - { - "epoch": 1.4052028977196704, - "grad_norm": 0.003407655283808708, - "learning_rate": 0.00019999902663650273, - "loss": 46.0, - "step": 18379 - }, - { - "epoch": 1.4052793547030602, - "grad_norm": 0.0023236097767949104, - "learning_rate": 0.00019999902653052113, - "loss": 46.0, - "step": 18380 - }, - { - "epoch": 1.40535581168645, - "grad_norm": 0.0033848697785288095, - "learning_rate": 0.0001999990264245337, - "loss": 46.0, - "step": 18381 - }, - { - "epoch": 1.4054322686698395, - "grad_norm": 0.0005153603269718587, - "learning_rate": 0.0001999990263185405, - "loss": 46.0, - "step": 18382 - }, - { - "epoch": 1.4055087256532293, - "grad_norm": 0.00100629439111799, - "learning_rate": 0.00019999902621254157, - "loss": 46.0, - "step": 18383 - }, - { - "epoch": 1.405585182636619, - "grad_norm": 0.005232179071754217, - "learning_rate": 0.00019999902610653685, - "loss": 46.0, - "step": 18384 - }, - { - "epoch": 1.4056616396200088, - "grad_norm": 0.00042024534195661545, - "learning_rate": 0.00019999902600052635, - "loss": 46.0, - "step": 18385 - }, - { - "epoch": 1.4057380966033985, - "grad_norm": 0.0026354603469371796, - "learning_rate": 0.00019999902589451008, - "loss": 46.0, - "step": 18386 - }, - { - "epoch": 1.4058145535867883, - "grad_norm": 0.005128634627908468, - "learning_rate": 0.00019999902578848807, - "loss": 46.0, - "step": 18387 - }, - { - "epoch": 1.405891010570178, - "grad_norm": 0.0005527346511371434, - "learning_rate": 0.00019999902568246025, - "loss": 46.0, - "step": 18388 - }, - { - "epoch": 1.4059674675535676, - "grad_norm": 0.0005377604975365102, - "learning_rate": 0.00019999902557642666, - "loss": 46.0, - "step": 18389 - }, - { - "epoch": 1.4060439245369574, - "grad_norm": 0.0015182136557996273, - "learning_rate": 0.00019999902547038735, - "loss": 46.0, - "step": 18390 - }, - { - "epoch": 1.4061203815203471, - "grad_norm": 0.0008125728345476091, - "learning_rate": 0.00019999902536434224, - "loss": 46.0, - "step": 18391 - }, - { - "epoch": 1.406196838503737, - "grad_norm": 0.0009747163858264685, - "learning_rate": 0.00019999902525829138, - "loss": 46.0, - "step": 18392 - }, - { - "epoch": 1.4062732954871264, - "grad_norm": 0.0013950306456536055, - "learning_rate": 0.0001999990251522347, - "loss": 46.0, - "step": 18393 - }, - { - "epoch": 1.4063497524705162, - "grad_norm": 0.0017438828945159912, - "learning_rate": 0.0001999990250461723, - "loss": 46.0, - "step": 18394 - }, - { - "epoch": 1.406426209453906, - "grad_norm": 0.0008534836815670133, - "learning_rate": 0.00019999902494010412, - "loss": 46.0, - "step": 18395 - }, - { - "epoch": 1.4065026664372957, - "grad_norm": 0.0009154222207143903, - "learning_rate": 0.00019999902483403017, - "loss": 46.0, - "step": 18396 - }, - { - "epoch": 1.4065791234206855, - "grad_norm": 0.0009027472697198391, - "learning_rate": 0.00019999902472795045, - "loss": 46.0, - "step": 18397 - }, - { - "epoch": 1.4066555804040752, - "grad_norm": 0.003171935910359025, - "learning_rate": 0.00019999902462186495, - "loss": 46.0, - "step": 18398 - }, - { - "epoch": 1.406732037387465, - "grad_norm": 0.004676215350627899, - "learning_rate": 0.00019999902451577368, - "loss": 46.0, - "step": 18399 - }, - { - "epoch": 1.4068084943708545, - "grad_norm": 0.0008767013205215335, - "learning_rate": 0.00019999902440967664, - "loss": 46.0, - "step": 18400 - }, - { - "epoch": 1.4068849513542443, - "grad_norm": 0.0007084507960826159, - "learning_rate": 0.00019999902430357385, - "loss": 46.0, - "step": 18401 - }, - { - "epoch": 1.406961408337634, - "grad_norm": 0.001167476293630898, - "learning_rate": 0.0001999990241974653, - "loss": 46.0, - "step": 18402 - }, - { - "epoch": 1.4070378653210238, - "grad_norm": 0.00039947484037838876, - "learning_rate": 0.00019999902409135092, - "loss": 46.0, - "step": 18403 - }, - { - "epoch": 1.4071143223044134, - "grad_norm": 0.0010710599599406123, - "learning_rate": 0.00019999902398523081, - "loss": 46.0, - "step": 18404 - }, - { - "epoch": 1.4071907792878031, - "grad_norm": 0.0005583700840361416, - "learning_rate": 0.00019999902387910493, - "loss": 46.0, - "step": 18405 - }, - { - "epoch": 1.4072672362711929, - "grad_norm": 0.0008522698190063238, - "learning_rate": 0.00019999902377297328, - "loss": 46.0, - "step": 18406 - }, - { - "epoch": 1.4073436932545826, - "grad_norm": 0.0007379850139841437, - "learning_rate": 0.00019999902366683587, - "loss": 46.0, - "step": 18407 - }, - { - "epoch": 1.4074201502379724, - "grad_norm": 0.00615641800686717, - "learning_rate": 0.0001999990235606927, - "loss": 46.0, - "step": 18408 - }, - { - "epoch": 1.4074966072213622, - "grad_norm": 0.0008752090507186949, - "learning_rate": 0.00019999902345454372, - "loss": 46.0, - "step": 18409 - }, - { - "epoch": 1.407573064204752, - "grad_norm": 0.0003531481488607824, - "learning_rate": 0.000199999023348389, - "loss": 46.0, - "step": 18410 - }, - { - "epoch": 1.4076495211881415, - "grad_norm": 0.0014633395476266742, - "learning_rate": 0.0001999990232422285, - "loss": 46.0, - "step": 18411 - }, - { - "epoch": 1.4077259781715312, - "grad_norm": 0.0010693443473428488, - "learning_rate": 0.00019999902313606226, - "loss": 46.0, - "step": 18412 - }, - { - "epoch": 1.407802435154921, - "grad_norm": 0.0009467890486121178, - "learning_rate": 0.00019999902302989022, - "loss": 46.0, - "step": 18413 - }, - { - "epoch": 1.4078788921383107, - "grad_norm": 0.0047689140774309635, - "learning_rate": 0.0001999990229237124, - "loss": 46.0, - "step": 18414 - }, - { - "epoch": 1.4079553491217003, - "grad_norm": 0.004703803453594446, - "learning_rate": 0.00019999902281752884, - "loss": 46.0, - "step": 18415 - }, - { - "epoch": 1.40803180610509, - "grad_norm": 0.0007608246523886919, - "learning_rate": 0.00019999902271133948, - "loss": 46.0, - "step": 18416 - }, - { - "epoch": 1.4081082630884798, - "grad_norm": 0.003276802832260728, - "learning_rate": 0.0001999990226051444, - "loss": 46.0, - "step": 18417 - }, - { - "epoch": 1.4081847200718696, - "grad_norm": 0.0008925948641262949, - "learning_rate": 0.0001999990224989435, - "loss": 46.0, - "step": 18418 - }, - { - "epoch": 1.4082611770552593, - "grad_norm": 0.00042892477358691394, - "learning_rate": 0.00019999902239273686, - "loss": 46.0, - "step": 18419 - }, - { - "epoch": 1.408337634038649, - "grad_norm": 0.001327955978922546, - "learning_rate": 0.00019999902228652443, - "loss": 46.0, - "step": 18420 - }, - { - "epoch": 1.4084140910220388, - "grad_norm": 0.0019752285443246365, - "learning_rate": 0.00019999902218030625, - "loss": 46.0, - "step": 18421 - }, - { - "epoch": 1.4084905480054284, - "grad_norm": 0.00046541838673874736, - "learning_rate": 0.00019999902207408227, - "loss": 46.0, - "step": 18422 - }, - { - "epoch": 1.4085670049888181, - "grad_norm": 0.0009978982852771878, - "learning_rate": 0.00019999902196785255, - "loss": 46.0, - "step": 18423 - }, - { - "epoch": 1.408643461972208, - "grad_norm": 0.0006274883635342121, - "learning_rate": 0.00019999902186161706, - "loss": 46.0, - "step": 18424 - }, - { - "epoch": 1.4087199189555977, - "grad_norm": 0.001348936348222196, - "learning_rate": 0.00019999902175537576, - "loss": 46.0, - "step": 18425 - }, - { - "epoch": 1.4087963759389872, - "grad_norm": 0.0012112503172829747, - "learning_rate": 0.00019999902164912877, - "loss": 46.0, - "step": 18426 - }, - { - "epoch": 1.408872832922377, - "grad_norm": 0.001995028229430318, - "learning_rate": 0.00019999902154287593, - "loss": 46.0, - "step": 18427 - }, - { - "epoch": 1.4089492899057667, - "grad_norm": 0.0014687899965792894, - "learning_rate": 0.00019999902143661737, - "loss": 46.0, - "step": 18428 - }, - { - "epoch": 1.4090257468891565, - "grad_norm": 0.0006656462792307138, - "learning_rate": 0.00019999902133035303, - "loss": 46.0, - "step": 18429 - }, - { - "epoch": 1.4091022038725463, - "grad_norm": 0.0026355283334851265, - "learning_rate": 0.0001999990212240829, - "loss": 46.0, - "step": 18430 - }, - { - "epoch": 1.409178660855936, - "grad_norm": 0.0028784542810171843, - "learning_rate": 0.00019999902111780701, - "loss": 46.0, - "step": 18431 - }, - { - "epoch": 1.4092551178393258, - "grad_norm": 0.0012396732345223427, - "learning_rate": 0.00019999902101152536, - "loss": 46.0, - "step": 18432 - }, - { - "epoch": 1.4093315748227153, - "grad_norm": 0.0003747464215848595, - "learning_rate": 0.00019999902090523796, - "loss": 46.0, - "step": 18433 - }, - { - "epoch": 1.409408031806105, - "grad_norm": 0.001326856086961925, - "learning_rate": 0.00019999902079894475, - "loss": 46.0, - "step": 18434 - }, - { - "epoch": 1.4094844887894948, - "grad_norm": 0.0017358335899189115, - "learning_rate": 0.00019999902069264578, - "loss": 46.0, - "step": 18435 - }, - { - "epoch": 1.4095609457728846, - "grad_norm": 0.0005002904217690229, - "learning_rate": 0.00019999902058634106, - "loss": 46.0, - "step": 18436 - }, - { - "epoch": 1.4096374027562741, - "grad_norm": 0.000988552812486887, - "learning_rate": 0.00019999902048003056, - "loss": 46.0, - "step": 18437 - }, - { - "epoch": 1.409713859739664, - "grad_norm": 0.000578157021664083, - "learning_rate": 0.0001999990203737143, - "loss": 46.0, - "step": 18438 - }, - { - "epoch": 1.4097903167230537, - "grad_norm": 0.0007497709011659026, - "learning_rate": 0.00019999902026739225, - "loss": 46.0, - "step": 18439 - }, - { - "epoch": 1.4098667737064434, - "grad_norm": 0.0009373456705361605, - "learning_rate": 0.00019999902016106444, - "loss": 46.0, - "step": 18440 - }, - { - "epoch": 1.4099432306898332, - "grad_norm": 0.0007314086542464793, - "learning_rate": 0.00019999902005473085, - "loss": 46.0, - "step": 18441 - }, - { - "epoch": 1.410019687673223, - "grad_norm": 0.0011600431753322482, - "learning_rate": 0.0001999990199483915, - "loss": 46.0, - "step": 18442 - }, - { - "epoch": 1.4100961446566127, - "grad_norm": 0.00025432309485040605, - "learning_rate": 0.0001999990198420464, - "loss": 46.0, - "step": 18443 - }, - { - "epoch": 1.4101726016400022, - "grad_norm": 0.023697514086961746, - "learning_rate": 0.00019999901973569552, - "loss": 46.0, - "step": 18444 - }, - { - "epoch": 1.410249058623392, - "grad_norm": 0.0023732506670057774, - "learning_rate": 0.00019999901962933887, - "loss": 46.0, - "step": 18445 - }, - { - "epoch": 1.4103255156067818, - "grad_norm": 0.0023167117033153772, - "learning_rate": 0.00019999901952297644, - "loss": 46.0, - "step": 18446 - }, - { - "epoch": 1.4104019725901715, - "grad_norm": 0.00045706008677370846, - "learning_rate": 0.00019999901941660823, - "loss": 46.0, - "step": 18447 - }, - { - "epoch": 1.410478429573561, - "grad_norm": 0.0009085984784178436, - "learning_rate": 0.0001999990193102343, - "loss": 46.0, - "step": 18448 - }, - { - "epoch": 1.4105548865569508, - "grad_norm": 0.010532423853874207, - "learning_rate": 0.00019999901920385456, - "loss": 46.0, - "step": 18449 - }, - { - "epoch": 1.4106313435403406, - "grad_norm": 0.001632206724025309, - "learning_rate": 0.00019999901909746904, - "loss": 46.0, - "step": 18450 - }, - { - "epoch": 1.4107078005237303, - "grad_norm": 0.0016392648685723543, - "learning_rate": 0.00019999901899107777, - "loss": 46.0, - "step": 18451 - }, - { - "epoch": 1.41078425750712, - "grad_norm": 0.002026929520070553, - "learning_rate": 0.00019999901888468073, - "loss": 46.0, - "step": 18452 - }, - { - "epoch": 1.4108607144905099, - "grad_norm": 0.0008087048772722483, - "learning_rate": 0.00019999901877827792, - "loss": 46.0, - "step": 18453 - }, - { - "epoch": 1.4109371714738996, - "grad_norm": 0.002954391296952963, - "learning_rate": 0.00019999901867186936, - "loss": 46.0, - "step": 18454 - }, - { - "epoch": 1.4110136284572892, - "grad_norm": 0.004516988061368465, - "learning_rate": 0.000199999018565455, - "loss": 46.0, - "step": 18455 - }, - { - "epoch": 1.411090085440679, - "grad_norm": 0.003901523072272539, - "learning_rate": 0.00019999901845903489, - "loss": 46.0, - "step": 18456 - }, - { - "epoch": 1.4111665424240687, - "grad_norm": 0.0011401647934690118, - "learning_rate": 0.000199999018352609, - "loss": 46.0, - "step": 18457 - }, - { - "epoch": 1.4112429994074585, - "grad_norm": 0.0014308291720226407, - "learning_rate": 0.00019999901824617732, - "loss": 46.0, - "step": 18458 - }, - { - "epoch": 1.411319456390848, - "grad_norm": 0.004948511719703674, - "learning_rate": 0.0001999990181397399, - "loss": 46.0, - "step": 18459 - }, - { - "epoch": 1.4113959133742378, - "grad_norm": 0.000722626515198499, - "learning_rate": 0.0001999990180332967, - "loss": 46.0, - "step": 18460 - }, - { - "epoch": 1.4114723703576275, - "grad_norm": 0.0021766405552625656, - "learning_rate": 0.00019999901792684774, - "loss": 46.0, - "step": 18461 - }, - { - "epoch": 1.4115488273410173, - "grad_norm": 0.0012948965886607766, - "learning_rate": 0.000199999017820393, - "loss": 46.0, - "step": 18462 - }, - { - "epoch": 1.411625284324407, - "grad_norm": 0.00854575913399458, - "learning_rate": 0.0001999990177139325, - "loss": 46.0, - "step": 18463 - }, - { - "epoch": 1.4117017413077968, - "grad_norm": 0.0036632472183555365, - "learning_rate": 0.00019999901760746623, - "loss": 46.0, - "step": 18464 - }, - { - "epoch": 1.4117781982911866, - "grad_norm": 0.0010747485794126987, - "learning_rate": 0.0001999990175009942, - "loss": 46.0, - "step": 18465 - }, - { - "epoch": 1.411854655274576, - "grad_norm": 0.004225791897624731, - "learning_rate": 0.00019999901739451638, - "loss": 46.0, - "step": 18466 - }, - { - "epoch": 1.4119311122579659, - "grad_norm": 0.001447275746613741, - "learning_rate": 0.0001999990172880328, - "loss": 46.0, - "step": 18467 - }, - { - "epoch": 1.4120075692413556, - "grad_norm": 0.001079175970517099, - "learning_rate": 0.00019999901718154343, - "loss": 46.0, - "step": 18468 - }, - { - "epoch": 1.4120840262247454, - "grad_norm": 0.0010275662643834949, - "learning_rate": 0.00019999901707504832, - "loss": 46.0, - "step": 18469 - }, - { - "epoch": 1.412160483208135, - "grad_norm": 0.0005107459146529436, - "learning_rate": 0.0001999990169685474, - "loss": 46.0, - "step": 18470 - }, - { - "epoch": 1.4122369401915247, - "grad_norm": 0.0029556278605014086, - "learning_rate": 0.00019999901686204076, - "loss": 46.0, - "step": 18471 - }, - { - "epoch": 1.4123133971749144, - "grad_norm": 0.0016287070466205478, - "learning_rate": 0.00019999901675552836, - "loss": 46.0, - "step": 18472 - }, - { - "epoch": 1.4123898541583042, - "grad_norm": 0.0010849777609109879, - "learning_rate": 0.00019999901664901013, - "loss": 46.0, - "step": 18473 - }, - { - "epoch": 1.412466311141694, - "grad_norm": 0.0010581731330603361, - "learning_rate": 0.00019999901654248618, - "loss": 46.0, - "step": 18474 - }, - { - "epoch": 1.4125427681250837, - "grad_norm": 0.005207499954849482, - "learning_rate": 0.00019999901643595643, - "loss": 46.0, - "step": 18475 - }, - { - "epoch": 1.4126192251084733, - "grad_norm": 0.01932896487414837, - "learning_rate": 0.00019999901632942094, - "loss": 46.0, - "step": 18476 - }, - { - "epoch": 1.412695682091863, - "grad_norm": 0.0028620220255106688, - "learning_rate": 0.00019999901622287967, - "loss": 46.0, - "step": 18477 - }, - { - "epoch": 1.4127721390752528, - "grad_norm": 0.0013349989894777536, - "learning_rate": 0.0001999990161163326, - "loss": 46.0, - "step": 18478 - }, - { - "epoch": 1.4128485960586425, - "grad_norm": 0.0021573402918875217, - "learning_rate": 0.00019999901600977979, - "loss": 46.0, - "step": 18479 - }, - { - "epoch": 1.4129250530420323, - "grad_norm": 0.0010723735904321074, - "learning_rate": 0.0001999990159032212, - "loss": 46.0, - "step": 18480 - }, - { - "epoch": 1.4130015100254218, - "grad_norm": 0.00990714505314827, - "learning_rate": 0.00019999901579665686, - "loss": 46.0, - "step": 18481 - }, - { - "epoch": 1.4130779670088116, - "grad_norm": 0.0011306923115625978, - "learning_rate": 0.00019999901569008676, - "loss": 46.0, - "step": 18482 - }, - { - "epoch": 1.4131544239922014, - "grad_norm": 0.0011009775334969163, - "learning_rate": 0.00019999901558351085, - "loss": 46.0, - "step": 18483 - }, - { - "epoch": 1.4132308809755911, - "grad_norm": 0.0013946243561804295, - "learning_rate": 0.00019999901547692917, - "loss": 46.0, - "step": 18484 - }, - { - "epoch": 1.413307337958981, - "grad_norm": 0.0011130532948300242, - "learning_rate": 0.00019999901537034177, - "loss": 46.0, - "step": 18485 - }, - { - "epoch": 1.4133837949423707, - "grad_norm": 0.0018272692104801536, - "learning_rate": 0.00019999901526374854, - "loss": 46.0, - "step": 18486 - }, - { - "epoch": 1.4134602519257602, - "grad_norm": 0.001515660434961319, - "learning_rate": 0.0001999990151571496, - "loss": 46.0, - "step": 18487 - }, - { - "epoch": 1.41353670890915, - "grad_norm": 0.0005353334126994014, - "learning_rate": 0.00019999901505054484, - "loss": 46.0, - "step": 18488 - }, - { - "epoch": 1.4136131658925397, - "grad_norm": 0.0019273837096989155, - "learning_rate": 0.00019999901494393432, - "loss": 46.0, - "step": 18489 - }, - { - "epoch": 1.4136896228759295, - "grad_norm": 0.003083708230406046, - "learning_rate": 0.00019999901483731805, - "loss": 46.0, - "step": 18490 - }, - { - "epoch": 1.413766079859319, - "grad_norm": 0.001123978290706873, - "learning_rate": 0.00019999901473069599, - "loss": 46.0, - "step": 18491 - }, - { - "epoch": 1.4138425368427088, - "grad_norm": 0.0005934460205025971, - "learning_rate": 0.00019999901462406817, - "loss": 46.0, - "step": 18492 - }, - { - "epoch": 1.4139189938260985, - "grad_norm": 0.0016057519242167473, - "learning_rate": 0.00019999901451743458, - "loss": 46.0, - "step": 18493 - }, - { - "epoch": 1.4139954508094883, - "grad_norm": 0.0028722987044602633, - "learning_rate": 0.00019999901441079525, - "loss": 46.0, - "step": 18494 - }, - { - "epoch": 1.414071907792878, - "grad_norm": 0.0031471208203583956, - "learning_rate": 0.00019999901430415014, - "loss": 46.0, - "step": 18495 - }, - { - "epoch": 1.4141483647762678, - "grad_norm": 0.0006638827035203576, - "learning_rate": 0.00019999901419749924, - "loss": 46.0, - "step": 18496 - }, - { - "epoch": 1.4142248217596576, - "grad_norm": 0.0016214716015383601, - "learning_rate": 0.00019999901409084255, - "loss": 46.0, - "step": 18497 - }, - { - "epoch": 1.4143012787430471, - "grad_norm": 0.001752912299707532, - "learning_rate": 0.00019999901398418013, - "loss": 46.0, - "step": 18498 - }, - { - "epoch": 1.4143777357264369, - "grad_norm": 0.0023880701046437025, - "learning_rate": 0.00019999901387751195, - "loss": 46.0, - "step": 18499 - }, - { - "epoch": 1.4144541927098266, - "grad_norm": 0.0004587325674947351, - "learning_rate": 0.00019999901377083795, - "loss": 46.0, - "step": 18500 - }, - { - "epoch": 1.4145306496932164, - "grad_norm": 0.003012463916093111, - "learning_rate": 0.0001999990136641582, - "loss": 46.0, - "step": 18501 - }, - { - "epoch": 1.414607106676606, - "grad_norm": 0.0010055694729089737, - "learning_rate": 0.0001999990135574727, - "loss": 46.0, - "step": 18502 - }, - { - "epoch": 1.4146835636599957, - "grad_norm": 0.0007959576905705035, - "learning_rate": 0.00019999901345078142, - "loss": 46.0, - "step": 18503 - }, - { - "epoch": 1.4147600206433855, - "grad_norm": 0.0008114899392239749, - "learning_rate": 0.00019999901334408438, - "loss": 46.0, - "step": 18504 - }, - { - "epoch": 1.4148364776267752, - "grad_norm": 0.003362583927810192, - "learning_rate": 0.00019999901323738156, - "loss": 46.0, - "step": 18505 - }, - { - "epoch": 1.414912934610165, - "grad_norm": 0.0010398655431345105, - "learning_rate": 0.00019999901313067295, - "loss": 46.0, - "step": 18506 - }, - { - "epoch": 1.4149893915935547, - "grad_norm": 0.0007882789359427989, - "learning_rate": 0.00019999901302395861, - "loss": 46.0, - "step": 18507 - }, - { - "epoch": 1.4150658485769445, - "grad_norm": 0.001123824156820774, - "learning_rate": 0.00019999901291723848, - "loss": 46.0, - "step": 18508 - }, - { - "epoch": 1.415142305560334, - "grad_norm": 0.0004243047733325511, - "learning_rate": 0.0001999990128105126, - "loss": 46.0, - "step": 18509 - }, - { - "epoch": 1.4152187625437238, - "grad_norm": 0.004101932048797607, - "learning_rate": 0.00019999901270378092, - "loss": 46.0, - "step": 18510 - }, - { - "epoch": 1.4152952195271136, - "grad_norm": 0.01726161688566208, - "learning_rate": 0.00019999901259704347, - "loss": 46.0, - "step": 18511 - }, - { - "epoch": 1.4153716765105033, - "grad_norm": 0.000661838857922703, - "learning_rate": 0.00019999901249030026, - "loss": 46.0, - "step": 18512 - }, - { - "epoch": 1.4154481334938929, - "grad_norm": 0.0009470888180658221, - "learning_rate": 0.0001999990123835513, - "loss": 46.0, - "step": 18513 - }, - { - "epoch": 1.4155245904772826, - "grad_norm": 0.0024587218649685383, - "learning_rate": 0.00019999901227679657, - "loss": 46.0, - "step": 18514 - }, - { - "epoch": 1.4156010474606724, - "grad_norm": 0.001808356842957437, - "learning_rate": 0.00019999901217003605, - "loss": 46.0, - "step": 18515 - }, - { - "epoch": 1.4156775044440622, - "grad_norm": 0.0018504871986806393, - "learning_rate": 0.00019999901206326976, - "loss": 46.0, - "step": 18516 - }, - { - "epoch": 1.415753961427452, - "grad_norm": 0.002093084156513214, - "learning_rate": 0.00019999901195649772, - "loss": 46.0, - "step": 18517 - }, - { - "epoch": 1.4158304184108417, - "grad_norm": 0.0016352457460016012, - "learning_rate": 0.00019999901184971988, - "loss": 46.0, - "step": 18518 - }, - { - "epoch": 1.4159068753942314, - "grad_norm": 0.0013365078484639525, - "learning_rate": 0.00019999901174293632, - "loss": 46.0, - "step": 18519 - }, - { - "epoch": 1.415983332377621, - "grad_norm": 0.0006704757106490433, - "learning_rate": 0.00019999901163614693, - "loss": 46.0, - "step": 18520 - }, - { - "epoch": 1.4160597893610107, - "grad_norm": 0.0017654619878157973, - "learning_rate": 0.00019999901152935182, - "loss": 46.0, - "step": 18521 - }, - { - "epoch": 1.4161362463444005, - "grad_norm": 0.0015793463680893183, - "learning_rate": 0.00019999901142255091, - "loss": 46.0, - "step": 18522 - }, - { - "epoch": 1.4162127033277903, - "grad_norm": 0.0007882977952249348, - "learning_rate": 0.00019999901131574426, - "loss": 46.0, - "step": 18523 - }, - { - "epoch": 1.4162891603111798, - "grad_norm": 0.0008499923278577626, - "learning_rate": 0.0001999990112089318, - "loss": 46.0, - "step": 18524 - }, - { - "epoch": 1.4163656172945696, - "grad_norm": 0.004672562703490257, - "learning_rate": 0.00019999901110211358, - "loss": 46.0, - "step": 18525 - }, - { - "epoch": 1.4164420742779593, - "grad_norm": 0.0005966693279333413, - "learning_rate": 0.0001999990109952896, - "loss": 46.0, - "step": 18526 - }, - { - "epoch": 1.416518531261349, - "grad_norm": 0.0005770426942035556, - "learning_rate": 0.00019999901088845986, - "loss": 46.0, - "step": 18527 - }, - { - "epoch": 1.4165949882447388, - "grad_norm": 0.0002567377232480794, - "learning_rate": 0.00019999901078162434, - "loss": 46.0, - "step": 18528 - }, - { - "epoch": 1.4166714452281286, - "grad_norm": 0.0005484905559569597, - "learning_rate": 0.00019999901067478307, - "loss": 46.0, - "step": 18529 - }, - { - "epoch": 1.4167479022115184, - "grad_norm": 0.00044046645052731037, - "learning_rate": 0.000199999010567936, - "loss": 46.0, - "step": 18530 - }, - { - "epoch": 1.416824359194908, - "grad_norm": 0.0013616152573376894, - "learning_rate": 0.0001999990104610832, - "loss": 46.0, - "step": 18531 - }, - { - "epoch": 1.4169008161782977, - "grad_norm": 0.0009252128074876964, - "learning_rate": 0.00019999901035422458, - "loss": 46.0, - "step": 18532 - }, - { - "epoch": 1.4169772731616874, - "grad_norm": 0.0017123956931754947, - "learning_rate": 0.00019999901024736025, - "loss": 46.0, - "step": 18533 - }, - { - "epoch": 1.4170537301450772, - "grad_norm": 0.0014507912565022707, - "learning_rate": 0.0001999990101404901, - "loss": 46.0, - "step": 18534 - }, - { - "epoch": 1.4171301871284667, - "grad_norm": 0.00126954959705472, - "learning_rate": 0.0001999990100336142, - "loss": 46.0, - "step": 18535 - }, - { - "epoch": 1.4172066441118565, - "grad_norm": 0.003204815788194537, - "learning_rate": 0.00019999900992673255, - "loss": 46.0, - "step": 18536 - }, - { - "epoch": 1.4172831010952462, - "grad_norm": 0.002077037002891302, - "learning_rate": 0.0001999990098198451, - "loss": 46.0, - "step": 18537 - }, - { - "epoch": 1.417359558078636, - "grad_norm": 0.000843019166495651, - "learning_rate": 0.0001999990097129519, - "loss": 46.0, - "step": 18538 - }, - { - "epoch": 1.4174360150620258, - "grad_norm": 0.0029420412611216307, - "learning_rate": 0.0001999990096060529, - "loss": 46.0, - "step": 18539 - }, - { - "epoch": 1.4175124720454155, - "grad_norm": 0.0011912168702110648, - "learning_rate": 0.00019999900949914815, - "loss": 46.0, - "step": 18540 - }, - { - "epoch": 1.4175889290288053, - "grad_norm": 0.0011484661372378469, - "learning_rate": 0.00019999900939223766, - "loss": 46.0, - "step": 18541 - }, - { - "epoch": 1.4176653860121948, - "grad_norm": 0.002181977266445756, - "learning_rate": 0.00019999900928532137, - "loss": 46.0, - "step": 18542 - }, - { - "epoch": 1.4177418429955846, - "grad_norm": 0.002635022159665823, - "learning_rate": 0.0001999990091783993, - "loss": 46.0, - "step": 18543 - }, - { - "epoch": 1.4178182999789744, - "grad_norm": 0.004045007284730673, - "learning_rate": 0.0001999990090714715, - "loss": 46.0, - "step": 18544 - }, - { - "epoch": 1.4178947569623641, - "grad_norm": 0.0023879518266767263, - "learning_rate": 0.00019999900896453787, - "loss": 46.0, - "step": 18545 - }, - { - "epoch": 1.4179712139457537, - "grad_norm": 0.011243748478591442, - "learning_rate": 0.0001999990088575985, - "loss": 46.0, - "step": 18546 - }, - { - "epoch": 1.4180476709291434, - "grad_norm": 0.0013129001017659903, - "learning_rate": 0.00019999900875065338, - "loss": 46.0, - "step": 18547 - }, - { - "epoch": 1.4181241279125332, - "grad_norm": 0.0005883431294932961, - "learning_rate": 0.00019999900864370247, - "loss": 46.0, - "step": 18548 - }, - { - "epoch": 1.418200584895923, - "grad_norm": 0.0014756311429664493, - "learning_rate": 0.0001999990085367458, - "loss": 46.0, - "step": 18549 - }, - { - "epoch": 1.4182770418793127, - "grad_norm": 0.0028583332896232605, - "learning_rate": 0.00019999900842978337, - "loss": 46.0, - "step": 18550 - }, - { - "epoch": 1.4183534988627025, - "grad_norm": 0.0009094447013922036, - "learning_rate": 0.00019999900832281514, - "loss": 46.0, - "step": 18551 - }, - { - "epoch": 1.4184299558460922, - "grad_norm": 0.0030003993306308985, - "learning_rate": 0.00019999900821584117, - "loss": 46.0, - "step": 18552 - }, - { - "epoch": 1.4185064128294818, - "grad_norm": 0.001980611588805914, - "learning_rate": 0.00019999900810886142, - "loss": 46.0, - "step": 18553 - }, - { - "epoch": 1.4185828698128715, - "grad_norm": 0.0010967858834192157, - "learning_rate": 0.00019999900800187593, - "loss": 46.0, - "step": 18554 - }, - { - "epoch": 1.4186593267962613, - "grad_norm": 0.014430834911763668, - "learning_rate": 0.0001999990078948846, - "loss": 46.0, - "step": 18555 - }, - { - "epoch": 1.418735783779651, - "grad_norm": 0.0009640708449296653, - "learning_rate": 0.00019999900778788757, - "loss": 46.0, - "step": 18556 - }, - { - "epoch": 1.4188122407630406, - "grad_norm": 0.0006808165344409645, - "learning_rate": 0.00019999900768088473, - "loss": 46.0, - "step": 18557 - }, - { - "epoch": 1.4188886977464303, - "grad_norm": 0.002339055296033621, - "learning_rate": 0.00019999900757387612, - "loss": 46.0, - "step": 18558 - }, - { - "epoch": 1.41896515472982, - "grad_norm": 0.008080870844423771, - "learning_rate": 0.00019999900746686176, - "loss": 46.0, - "step": 18559 - }, - { - "epoch": 1.4190416117132099, - "grad_norm": 0.0031779950950294733, - "learning_rate": 0.00019999900735984163, - "loss": 46.0, - "step": 18560 - }, - { - "epoch": 1.4191180686965996, - "grad_norm": 0.0008830921724438667, - "learning_rate": 0.00019999900725281572, - "loss": 46.0, - "step": 18561 - }, - { - "epoch": 1.4191945256799894, - "grad_norm": 0.00106416130438447, - "learning_rate": 0.00019999900714578407, - "loss": 46.0, - "step": 18562 - }, - { - "epoch": 1.4192709826633791, - "grad_norm": 0.0028730027843266726, - "learning_rate": 0.00019999900703874661, - "loss": 46.0, - "step": 18563 - }, - { - "epoch": 1.4193474396467687, - "grad_norm": 0.0025810750667005777, - "learning_rate": 0.0001999990069317034, - "loss": 46.0, - "step": 18564 - }, - { - "epoch": 1.4194238966301584, - "grad_norm": 0.004935820586979389, - "learning_rate": 0.0001999990068246544, - "loss": 46.0, - "step": 18565 - }, - { - "epoch": 1.4195003536135482, - "grad_norm": 0.001362173818051815, - "learning_rate": 0.00019999900671759967, - "loss": 46.0, - "step": 18566 - }, - { - "epoch": 1.419576810596938, - "grad_norm": 0.0067724683322012424, - "learning_rate": 0.00019999900661053915, - "loss": 46.0, - "step": 18567 - }, - { - "epoch": 1.4196532675803275, - "grad_norm": 0.003695362014696002, - "learning_rate": 0.00019999900650347286, - "loss": 46.0, - "step": 18568 - }, - { - "epoch": 1.4197297245637173, - "grad_norm": 0.0011306408559903502, - "learning_rate": 0.0001999990063964008, - "loss": 46.0, - "step": 18569 - }, - { - "epoch": 1.419806181547107, - "grad_norm": 0.0008153924718499184, - "learning_rate": 0.00019999900628932295, - "loss": 46.0, - "step": 18570 - }, - { - "epoch": 1.4198826385304968, - "grad_norm": 0.0007796114659868181, - "learning_rate": 0.00019999900618223934, - "loss": 46.0, - "step": 18571 - }, - { - "epoch": 1.4199590955138865, - "grad_norm": 0.0006157926400192082, - "learning_rate": 0.00019999900607514998, - "loss": 46.0, - "step": 18572 - }, - { - "epoch": 1.4200355524972763, - "grad_norm": 0.0008048530435189605, - "learning_rate": 0.00019999900596805485, - "loss": 46.0, - "step": 18573 - }, - { - "epoch": 1.420112009480666, - "grad_norm": 0.0011103039141744375, - "learning_rate": 0.00019999900586095394, - "loss": 46.0, - "step": 18574 - }, - { - "epoch": 1.4201884664640556, - "grad_norm": 0.0008789126295596361, - "learning_rate": 0.00019999900575384726, - "loss": 46.0, - "step": 18575 - }, - { - "epoch": 1.4202649234474454, - "grad_norm": 0.002710483269765973, - "learning_rate": 0.0001999990056467348, - "loss": 46.0, - "step": 18576 - }, - { - "epoch": 1.4203413804308351, - "grad_norm": 0.0008579171262681484, - "learning_rate": 0.00019999900553961658, - "loss": 46.0, - "step": 18577 - }, - { - "epoch": 1.420417837414225, - "grad_norm": 0.0021600141189992428, - "learning_rate": 0.0001999990054324926, - "loss": 46.0, - "step": 18578 - }, - { - "epoch": 1.4204942943976144, - "grad_norm": 0.001998534658923745, - "learning_rate": 0.00019999900532536287, - "loss": 46.0, - "step": 18579 - }, - { - "epoch": 1.4205707513810042, - "grad_norm": 0.007283096667379141, - "learning_rate": 0.00019999900521822735, - "loss": 46.0, - "step": 18580 - }, - { - "epoch": 1.420647208364394, - "grad_norm": 0.0022521934006363153, - "learning_rate": 0.00019999900511108605, - "loss": 46.0, - "step": 18581 - }, - { - "epoch": 1.4207236653477837, - "grad_norm": 0.0009552362025715411, - "learning_rate": 0.000199999005003939, - "loss": 46.0, - "step": 18582 - }, - { - "epoch": 1.4208001223311735, - "grad_norm": 0.004177308641374111, - "learning_rate": 0.00019999900489678615, - "loss": 46.0, - "step": 18583 - }, - { - "epoch": 1.4208765793145632, - "grad_norm": 0.0016907198587432504, - "learning_rate": 0.00019999900478962754, - "loss": 46.0, - "step": 18584 - }, - { - "epoch": 1.420953036297953, - "grad_norm": 0.004697458818554878, - "learning_rate": 0.00019999900468246318, - "loss": 46.0, - "step": 18585 - }, - { - "epoch": 1.4210294932813425, - "grad_norm": 0.0005175459664314985, - "learning_rate": 0.00019999900457529305, - "loss": 46.0, - "step": 18586 - }, - { - "epoch": 1.4211059502647323, - "grad_norm": 0.00032005272805690765, - "learning_rate": 0.00019999900446811711, - "loss": 46.0, - "step": 18587 - }, - { - "epoch": 1.421182407248122, - "grad_norm": 0.0009023714810609818, - "learning_rate": 0.00019999900436093543, - "loss": 46.0, - "step": 18588 - }, - { - "epoch": 1.4212588642315118, - "grad_norm": 0.00041148753371089697, - "learning_rate": 0.000199999004253748, - "loss": 46.0, - "step": 18589 - }, - { - "epoch": 1.4213353212149014, - "grad_norm": 0.0015865247696638107, - "learning_rate": 0.00019999900414655478, - "loss": 46.0, - "step": 18590 - }, - { - "epoch": 1.4214117781982911, - "grad_norm": 0.0012406307505443692, - "learning_rate": 0.00019999900403935578, - "loss": 46.0, - "step": 18591 - }, - { - "epoch": 1.4214882351816809, - "grad_norm": 0.001070024911314249, - "learning_rate": 0.00019999900393215104, - "loss": 46.0, - "step": 18592 - }, - { - "epoch": 1.4215646921650706, - "grad_norm": 0.0011345661478117108, - "learning_rate": 0.0001999990038249405, - "loss": 46.0, - "step": 18593 - }, - { - "epoch": 1.4216411491484604, - "grad_norm": 0.0009538857848383486, - "learning_rate": 0.00019999900371772423, - "loss": 46.0, - "step": 18594 - }, - { - "epoch": 1.4217176061318502, - "grad_norm": 0.0006144797662273049, - "learning_rate": 0.00019999900361050216, - "loss": 46.0, - "step": 18595 - }, - { - "epoch": 1.42179406311524, - "grad_norm": 0.0021742782555520535, - "learning_rate": 0.00019999900350327432, - "loss": 46.0, - "step": 18596 - }, - { - "epoch": 1.4218705200986295, - "grad_norm": 0.0012468538479879498, - "learning_rate": 0.0001999990033960407, - "loss": 46.0, - "step": 18597 - }, - { - "epoch": 1.4219469770820192, - "grad_norm": 0.0013210023753345013, - "learning_rate": 0.00019999900328880135, - "loss": 46.0, - "step": 18598 - }, - { - "epoch": 1.422023434065409, - "grad_norm": 0.001301935059018433, - "learning_rate": 0.0001999990031815562, - "loss": 46.0, - "step": 18599 - }, - { - "epoch": 1.4220998910487987, - "grad_norm": 0.0007719448185525835, - "learning_rate": 0.0001999990030743053, - "loss": 46.0, - "step": 18600 - }, - { - "epoch": 1.4221763480321883, - "grad_norm": 0.0016647737938910723, - "learning_rate": 0.0001999990029670486, - "loss": 46.0, - "step": 18601 - }, - { - "epoch": 1.422252805015578, - "grad_norm": 0.0007330256048589945, - "learning_rate": 0.00019999900285978613, - "loss": 46.0, - "step": 18602 - }, - { - "epoch": 1.4223292619989678, - "grad_norm": 0.0008366921101696789, - "learning_rate": 0.00019999900275251796, - "loss": 46.0, - "step": 18603 - }, - { - "epoch": 1.4224057189823576, - "grad_norm": 0.0009837065590545535, - "learning_rate": 0.00019999900264524394, - "loss": 46.0, - "step": 18604 - }, - { - "epoch": 1.4224821759657473, - "grad_norm": 0.0010996019700542092, - "learning_rate": 0.0001999990025379642, - "loss": 46.0, - "step": 18605 - }, - { - "epoch": 1.422558632949137, - "grad_norm": 0.0003169424890074879, - "learning_rate": 0.00019999900243067867, - "loss": 46.0, - "step": 18606 - }, - { - "epoch": 1.4226350899325266, - "grad_norm": 0.0011030015302821994, - "learning_rate": 0.00019999900232338735, - "loss": 46.0, - "step": 18607 - }, - { - "epoch": 1.4227115469159164, - "grad_norm": 0.0015582737978547812, - "learning_rate": 0.0001999990022160903, - "loss": 46.0, - "step": 18608 - }, - { - "epoch": 1.4227880038993062, - "grad_norm": 0.0009905400220304728, - "learning_rate": 0.00019999900210878745, - "loss": 46.0, - "step": 18609 - }, - { - "epoch": 1.422864460882696, - "grad_norm": 0.0008396326447837055, - "learning_rate": 0.00019999900200147886, - "loss": 46.0, - "step": 18610 - }, - { - "epoch": 1.4229409178660857, - "grad_norm": 0.0005049839965067804, - "learning_rate": 0.00019999900189416448, - "loss": 46.0, - "step": 18611 - }, - { - "epoch": 1.4230173748494752, - "grad_norm": 0.003065577009692788, - "learning_rate": 0.00019999900178684432, - "loss": 46.0, - "step": 18612 - }, - { - "epoch": 1.423093831832865, - "grad_norm": 0.0027007723692804575, - "learning_rate": 0.00019999900167951844, - "loss": 46.0, - "step": 18613 - }, - { - "epoch": 1.4231702888162547, - "grad_norm": 0.003133367747068405, - "learning_rate": 0.00019999900157218674, - "loss": 46.0, - "step": 18614 - }, - { - "epoch": 1.4232467457996445, - "grad_norm": 0.0007613941561430693, - "learning_rate": 0.00019999900146484929, - "loss": 46.0, - "step": 18615 - }, - { - "epoch": 1.4233232027830343, - "grad_norm": 0.001023577991873026, - "learning_rate": 0.00019999900135750606, - "loss": 46.0, - "step": 18616 - }, - { - "epoch": 1.423399659766424, - "grad_norm": 0.0007779033039696515, - "learning_rate": 0.00019999900125015706, - "loss": 46.0, - "step": 18617 - }, - { - "epoch": 1.4234761167498136, - "grad_norm": 0.0006752707413397729, - "learning_rate": 0.0001999990011428023, - "loss": 46.0, - "step": 18618 - }, - { - "epoch": 1.4235525737332033, - "grad_norm": 0.0015332060866057873, - "learning_rate": 0.00019999900103544177, - "loss": 46.0, - "step": 18619 - }, - { - "epoch": 1.423629030716593, - "grad_norm": 0.018377667292952538, - "learning_rate": 0.00019999900092807548, - "loss": 46.0, - "step": 18620 - }, - { - "epoch": 1.4237054876999828, - "grad_norm": 0.0014321247581392527, - "learning_rate": 0.00019999900082070342, - "loss": 46.0, - "step": 18621 - }, - { - "epoch": 1.4237819446833724, - "grad_norm": 0.006086652632802725, - "learning_rate": 0.00019999900071332558, - "loss": 46.0, - "step": 18622 - }, - { - "epoch": 1.4238584016667621, - "grad_norm": 0.0026824716478586197, - "learning_rate": 0.00019999900060594197, - "loss": 46.0, - "step": 18623 - }, - { - "epoch": 1.423934858650152, - "grad_norm": 0.0016962835798040032, - "learning_rate": 0.00019999900049855258, - "loss": 46.0, - "step": 18624 - }, - { - "epoch": 1.4240113156335417, - "grad_norm": 0.002232568571344018, - "learning_rate": 0.00019999900039115745, - "loss": 46.0, - "step": 18625 - }, - { - "epoch": 1.4240877726169314, - "grad_norm": 0.0014266332145780325, - "learning_rate": 0.00019999900028375652, - "loss": 46.0, - "step": 18626 - }, - { - "epoch": 1.4241642296003212, - "grad_norm": 0.004583638161420822, - "learning_rate": 0.00019999900017634981, - "loss": 46.0, - "step": 18627 - }, - { - "epoch": 1.424240686583711, - "grad_norm": 0.0007547771674580872, - "learning_rate": 0.0001999990000689374, - "loss": 46.0, - "step": 18628 - }, - { - "epoch": 1.4243171435671005, - "grad_norm": 0.0008986523025669158, - "learning_rate": 0.00019999899996151917, - "loss": 46.0, - "step": 18629 - }, - { - "epoch": 1.4243936005504902, - "grad_norm": 0.0010006674565374851, - "learning_rate": 0.00019999899985409517, - "loss": 46.0, - "step": 18630 - }, - { - "epoch": 1.42447005753388, - "grad_norm": 0.0004649788897950202, - "learning_rate": 0.0001999989997466654, - "loss": 46.0, - "step": 18631 - }, - { - "epoch": 1.4245465145172698, - "grad_norm": 0.0016545712715014815, - "learning_rate": 0.00019999899963922988, - "loss": 46.0, - "step": 18632 - }, - { - "epoch": 1.4246229715006593, - "grad_norm": 0.0027371577452868223, - "learning_rate": 0.0001999989995317886, - "loss": 46.0, - "step": 18633 - }, - { - "epoch": 1.424699428484049, - "grad_norm": 0.0026675439439713955, - "learning_rate": 0.0001999989994243415, - "loss": 46.0, - "step": 18634 - }, - { - "epoch": 1.4247758854674388, - "grad_norm": 0.0018106952775269747, - "learning_rate": 0.00019999899931688866, - "loss": 46.0, - "step": 18635 - }, - { - "epoch": 1.4248523424508286, - "grad_norm": 0.0012644518865272403, - "learning_rate": 0.00019999899920943005, - "loss": 46.0, - "step": 18636 - }, - { - "epoch": 1.4249287994342184, - "grad_norm": 0.0004560202651191503, - "learning_rate": 0.00019999899910196566, - "loss": 46.0, - "step": 18637 - }, - { - "epoch": 1.4250052564176081, - "grad_norm": 0.0034558644983917475, - "learning_rate": 0.00019999899899449553, - "loss": 46.0, - "step": 18638 - }, - { - "epoch": 1.4250817134009979, - "grad_norm": 0.0016475735465064645, - "learning_rate": 0.0001999989988870196, - "loss": 46.0, - "step": 18639 - }, - { - "epoch": 1.4251581703843874, - "grad_norm": 0.0036439159885048866, - "learning_rate": 0.00019999899877953792, - "loss": 46.0, - "step": 18640 - }, - { - "epoch": 1.4252346273677772, - "grad_norm": 0.0033887161407619715, - "learning_rate": 0.00019999899867205044, - "loss": 46.0, - "step": 18641 - }, - { - "epoch": 1.425311084351167, - "grad_norm": 0.0007457028259523213, - "learning_rate": 0.00019999899856455722, - "loss": 46.0, - "step": 18642 - }, - { - "epoch": 1.4253875413345567, - "grad_norm": 0.006611408665776253, - "learning_rate": 0.00019999899845705822, - "loss": 46.0, - "step": 18643 - }, - { - "epoch": 1.4254639983179462, - "grad_norm": 0.0007065095123834908, - "learning_rate": 0.00019999899834955348, - "loss": 46.0, - "step": 18644 - }, - { - "epoch": 1.425540455301336, - "grad_norm": 0.0006840868736617267, - "learning_rate": 0.00019999899824204293, - "loss": 46.0, - "step": 18645 - }, - { - "epoch": 1.4256169122847258, - "grad_norm": 0.0018743122927844524, - "learning_rate": 0.00019999899813452661, - "loss": 46.0, - "step": 18646 - }, - { - "epoch": 1.4256933692681155, - "grad_norm": 0.0008903193520382047, - "learning_rate": 0.00019999899802700455, - "loss": 46.0, - "step": 18647 - }, - { - "epoch": 1.4257698262515053, - "grad_norm": 0.0016152093885466456, - "learning_rate": 0.0001999989979194767, - "loss": 46.0, - "step": 18648 - }, - { - "epoch": 1.425846283234895, - "grad_norm": 0.004433429334312677, - "learning_rate": 0.0001999989978119431, - "loss": 46.0, - "step": 18649 - }, - { - "epoch": 1.4259227402182848, - "grad_norm": 0.0011305814841762185, - "learning_rate": 0.00019999899770440372, - "loss": 46.0, - "step": 18650 - }, - { - "epoch": 1.4259991972016743, - "grad_norm": 0.007987190037965775, - "learning_rate": 0.0001999989975968586, - "loss": 46.0, - "step": 18651 - }, - { - "epoch": 1.426075654185064, - "grad_norm": 0.00039009845932014287, - "learning_rate": 0.00019999899748930763, - "loss": 46.0, - "step": 18652 - }, - { - "epoch": 1.4261521111684539, - "grad_norm": 0.007430423516780138, - "learning_rate": 0.00019999899738175095, - "loss": 46.0, - "step": 18653 - }, - { - "epoch": 1.4262285681518436, - "grad_norm": 0.003537822747603059, - "learning_rate": 0.00019999899727418853, - "loss": 46.0, - "step": 18654 - }, - { - "epoch": 1.4263050251352332, - "grad_norm": 0.0020287923980504274, - "learning_rate": 0.00019999899716662028, - "loss": 46.0, - "step": 18655 - }, - { - "epoch": 1.426381482118623, - "grad_norm": 0.013133603148162365, - "learning_rate": 0.00019999899705904628, - "loss": 46.0, - "step": 18656 - }, - { - "epoch": 1.4264579391020127, - "grad_norm": 0.001169879105873406, - "learning_rate": 0.0001999989969514665, - "loss": 46.0, - "step": 18657 - }, - { - "epoch": 1.4265343960854024, - "grad_norm": 0.0032761956099420786, - "learning_rate": 0.00019999899684388096, - "loss": 46.0, - "step": 18658 - }, - { - "epoch": 1.4266108530687922, - "grad_norm": 0.0006516337743960321, - "learning_rate": 0.00019999899673628967, - "loss": 46.0, - "step": 18659 - }, - { - "epoch": 1.426687310052182, - "grad_norm": 0.0021252119913697243, - "learning_rate": 0.00019999899662869258, - "loss": 46.0, - "step": 18660 - }, - { - "epoch": 1.4267637670355717, - "grad_norm": 0.0009915714617818594, - "learning_rate": 0.00019999899652108972, - "loss": 46.0, - "step": 18661 - }, - { - "epoch": 1.4268402240189613, - "grad_norm": 0.0036959603894501925, - "learning_rate": 0.00019999899641348114, - "loss": 46.0, - "step": 18662 - }, - { - "epoch": 1.426916681002351, - "grad_norm": 0.0005464826244860888, - "learning_rate": 0.00019999899630586672, - "loss": 46.0, - "step": 18663 - }, - { - "epoch": 1.4269931379857408, - "grad_norm": 0.0007365245837718248, - "learning_rate": 0.0001999989961982466, - "loss": 46.0, - "step": 18664 - }, - { - "epoch": 1.4270695949691306, - "grad_norm": 0.0010078215273097157, - "learning_rate": 0.00019999899609062066, - "loss": 46.0, - "step": 18665 - }, - { - "epoch": 1.42714605195252, - "grad_norm": 0.005040076561272144, - "learning_rate": 0.00019999899598298896, - "loss": 46.0, - "step": 18666 - }, - { - "epoch": 1.4272225089359099, - "grad_norm": 0.004677411634474993, - "learning_rate": 0.0001999989958753515, - "loss": 46.0, - "step": 18667 - }, - { - "epoch": 1.4272989659192996, - "grad_norm": 0.0009450827492401004, - "learning_rate": 0.00019999899576770828, - "loss": 46.0, - "step": 18668 - }, - { - "epoch": 1.4273754229026894, - "grad_norm": 0.0006946150679141283, - "learning_rate": 0.0001999989956600593, - "loss": 46.0, - "step": 18669 - }, - { - "epoch": 1.4274518798860791, - "grad_norm": 0.0008818952483125031, - "learning_rate": 0.00019999899555240452, - "loss": 46.0, - "step": 18670 - }, - { - "epoch": 1.427528336869469, - "grad_norm": 0.0015344680286943913, - "learning_rate": 0.00019999899544474397, - "loss": 46.0, - "step": 18671 - }, - { - "epoch": 1.4276047938528587, - "grad_norm": 0.0009433648083359003, - "learning_rate": 0.00019999899533707766, - "loss": 46.0, - "step": 18672 - }, - { - "epoch": 1.4276812508362482, - "grad_norm": 0.004973521456122398, - "learning_rate": 0.0001999989952294056, - "loss": 46.0, - "step": 18673 - }, - { - "epoch": 1.427757707819638, - "grad_norm": 0.001084906398318708, - "learning_rate": 0.00019999899512172773, - "loss": 46.0, - "step": 18674 - }, - { - "epoch": 1.4278341648030277, - "grad_norm": 0.001618018141016364, - "learning_rate": 0.00019999899501404415, - "loss": 46.0, - "step": 18675 - }, - { - "epoch": 1.4279106217864175, - "grad_norm": 0.0013044291408732533, - "learning_rate": 0.00019999899490635473, - "loss": 46.0, - "step": 18676 - }, - { - "epoch": 1.427987078769807, - "grad_norm": 0.0006601489149034023, - "learning_rate": 0.00019999899479865958, - "loss": 46.0, - "step": 18677 - }, - { - "epoch": 1.4280635357531968, - "grad_norm": 0.0010181114776059985, - "learning_rate": 0.00019999899469095867, - "loss": 46.0, - "step": 18678 - }, - { - "epoch": 1.4281399927365865, - "grad_norm": 0.0048792739398777485, - "learning_rate": 0.00019999899458325197, - "loss": 46.0, - "step": 18679 - }, - { - "epoch": 1.4282164497199763, - "grad_norm": 0.0006001926958560944, - "learning_rate": 0.00019999899447553952, - "loss": 46.0, - "step": 18680 - }, - { - "epoch": 1.428292906703366, - "grad_norm": 0.003065509255975485, - "learning_rate": 0.00019999899436782127, - "loss": 46.0, - "step": 18681 - }, - { - "epoch": 1.4283693636867558, - "grad_norm": 0.012520805932581425, - "learning_rate": 0.00019999899426009727, - "loss": 46.0, - "step": 18682 - }, - { - "epoch": 1.4284458206701456, - "grad_norm": 0.0010263193398714066, - "learning_rate": 0.0001999989941523675, - "loss": 46.0, - "step": 18683 - }, - { - "epoch": 1.4285222776535351, - "grad_norm": 0.0033806944265961647, - "learning_rate": 0.00019999899404463196, - "loss": 46.0, - "step": 18684 - }, - { - "epoch": 1.4285987346369249, - "grad_norm": 0.000461573654320091, - "learning_rate": 0.00019999899393689064, - "loss": 46.0, - "step": 18685 - }, - { - "epoch": 1.4286751916203146, - "grad_norm": 0.0005505571025423706, - "learning_rate": 0.00019999899382914355, - "loss": 46.0, - "step": 18686 - }, - { - "epoch": 1.4287516486037044, - "grad_norm": 0.0016493485309183598, - "learning_rate": 0.00019999899372139072, - "loss": 46.0, - "step": 18687 - }, - { - "epoch": 1.428828105587094, - "grad_norm": 0.0012330812169238925, - "learning_rate": 0.0001999989936136321, - "loss": 46.0, - "step": 18688 - }, - { - "epoch": 1.4289045625704837, - "grad_norm": 0.0018370734760537744, - "learning_rate": 0.0001999989935058677, - "loss": 46.0, - "step": 18689 - }, - { - "epoch": 1.4289810195538735, - "grad_norm": 0.000623841944616288, - "learning_rate": 0.00019999899339809757, - "loss": 46.0, - "step": 18690 - }, - { - "epoch": 1.4290574765372632, - "grad_norm": 0.001196195837110281, - "learning_rate": 0.00019999899329032164, - "loss": 46.0, - "step": 18691 - }, - { - "epoch": 1.429133933520653, - "grad_norm": 0.0033842779230326414, - "learning_rate": 0.00019999899318253993, - "loss": 46.0, - "step": 18692 - }, - { - "epoch": 1.4292103905040427, - "grad_norm": 0.0008980220300145447, - "learning_rate": 0.00019999899307475248, - "loss": 46.0, - "step": 18693 - }, - { - "epoch": 1.4292868474874325, - "grad_norm": 0.0004935725592076778, - "learning_rate": 0.00019999899296695923, - "loss": 46.0, - "step": 18694 - }, - { - "epoch": 1.429363304470822, - "grad_norm": 0.008362212218344212, - "learning_rate": 0.0001999989928591602, - "loss": 46.0, - "step": 18695 - }, - { - "epoch": 1.4294397614542118, - "grad_norm": 0.004225720185786486, - "learning_rate": 0.00019999899275135547, - "loss": 46.0, - "step": 18696 - }, - { - "epoch": 1.4295162184376016, - "grad_norm": 0.0020503634586930275, - "learning_rate": 0.00019999899264354492, - "loss": 46.0, - "step": 18697 - }, - { - "epoch": 1.4295926754209913, - "grad_norm": 0.0024455529637634754, - "learning_rate": 0.00019999899253572858, - "loss": 46.0, - "step": 18698 - }, - { - "epoch": 1.4296691324043809, - "grad_norm": 0.001271538552828133, - "learning_rate": 0.00019999899242790652, - "loss": 46.0, - "step": 18699 - }, - { - "epoch": 1.4297455893877706, - "grad_norm": 0.0020179194398224354, - "learning_rate": 0.00019999899232007865, - "loss": 46.0, - "step": 18700 - }, - { - "epoch": 1.4298220463711604, - "grad_norm": 0.0009006079635582864, - "learning_rate": 0.00019999899221224504, - "loss": 46.0, - "step": 18701 - }, - { - "epoch": 1.4298985033545502, - "grad_norm": 0.0010742960730567575, - "learning_rate": 0.00019999899210440566, - "loss": 46.0, - "step": 18702 - }, - { - "epoch": 1.42997496033794, - "grad_norm": 0.002057841047644615, - "learning_rate": 0.0001999989919965605, - "loss": 46.0, - "step": 18703 - }, - { - "epoch": 1.4300514173213297, - "grad_norm": 0.001576918177306652, - "learning_rate": 0.00019999899188870957, - "loss": 46.0, - "step": 18704 - }, - { - "epoch": 1.4301278743047194, - "grad_norm": 0.0008519926341250539, - "learning_rate": 0.00019999899178085287, - "loss": 46.0, - "step": 18705 - }, - { - "epoch": 1.430204331288109, - "grad_norm": 0.0007095919572748244, - "learning_rate": 0.0001999989916729904, - "loss": 46.0, - "step": 18706 - }, - { - "epoch": 1.4302807882714987, - "grad_norm": 0.0015185867669060826, - "learning_rate": 0.00019999899156512217, - "loss": 46.0, - "step": 18707 - }, - { - "epoch": 1.4303572452548885, - "grad_norm": 0.0009592008427716792, - "learning_rate": 0.00019999899145724817, - "loss": 46.0, - "step": 18708 - }, - { - "epoch": 1.4304337022382783, - "grad_norm": 0.0009776568040251732, - "learning_rate": 0.00019999899134936838, - "loss": 46.0, - "step": 18709 - }, - { - "epoch": 1.4305101592216678, - "grad_norm": 0.001985912909731269, - "learning_rate": 0.00019999899124148283, - "loss": 46.0, - "step": 18710 - }, - { - "epoch": 1.4305866162050576, - "grad_norm": 0.0006811233470216393, - "learning_rate": 0.00019999899113359152, - "loss": 46.0, - "step": 18711 - }, - { - "epoch": 1.4306630731884473, - "grad_norm": 0.0062431227415800095, - "learning_rate": 0.00019999899102569443, - "loss": 46.0, - "step": 18712 - }, - { - "epoch": 1.430739530171837, - "grad_norm": 0.0007096232729963958, - "learning_rate": 0.00019999899091779157, - "loss": 46.0, - "step": 18713 - }, - { - "epoch": 1.4308159871552268, - "grad_norm": 0.0015456915134564042, - "learning_rate": 0.00019999899080988296, - "loss": 46.0, - "step": 18714 - }, - { - "epoch": 1.4308924441386166, - "grad_norm": 0.018905820325016975, - "learning_rate": 0.00019999899070196855, - "loss": 46.0, - "step": 18715 - }, - { - "epoch": 1.4309689011220064, - "grad_norm": 0.003829828230664134, - "learning_rate": 0.0001999989905940484, - "loss": 46.0, - "step": 18716 - }, - { - "epoch": 1.431045358105396, - "grad_norm": 0.0004912882577627897, - "learning_rate": 0.00019999899048612246, - "loss": 46.0, - "step": 18717 - }, - { - "epoch": 1.4311218150887857, - "grad_norm": 0.001517701894044876, - "learning_rate": 0.00019999899037819076, - "loss": 46.0, - "step": 18718 - }, - { - "epoch": 1.4311982720721754, - "grad_norm": 0.0017941041151061654, - "learning_rate": 0.0001999989902702533, - "loss": 46.0, - "step": 18719 - }, - { - "epoch": 1.4312747290555652, - "grad_norm": 0.000769377569667995, - "learning_rate": 0.00019999899016231006, - "loss": 46.0, - "step": 18720 - }, - { - "epoch": 1.4313511860389547, - "grad_norm": 0.0012285992270335555, - "learning_rate": 0.00019999899005436104, - "loss": 46.0, - "step": 18721 - }, - { - "epoch": 1.4314276430223445, - "grad_norm": 0.007775334641337395, - "learning_rate": 0.00019999898994640627, - "loss": 46.0, - "step": 18722 - }, - { - "epoch": 1.4315041000057342, - "grad_norm": 0.0014218116411939263, - "learning_rate": 0.00019999898983844575, - "loss": 46.0, - "step": 18723 - }, - { - "epoch": 1.431580556989124, - "grad_norm": 0.0006094217533245683, - "learning_rate": 0.00019999898973047938, - "loss": 46.0, - "step": 18724 - }, - { - "epoch": 1.4316570139725138, - "grad_norm": 0.00151549291331321, - "learning_rate": 0.00019999898962250732, - "loss": 46.0, - "step": 18725 - }, - { - "epoch": 1.4317334709559035, - "grad_norm": 0.0015793833881616592, - "learning_rate": 0.00019999898951452946, - "loss": 46.0, - "step": 18726 - }, - { - "epoch": 1.4318099279392933, - "grad_norm": 0.0007776551647111773, - "learning_rate": 0.00019999898940654585, - "loss": 46.0, - "step": 18727 - }, - { - "epoch": 1.4318863849226828, - "grad_norm": 0.0027849250473082066, - "learning_rate": 0.00019999898929855644, - "loss": 46.0, - "step": 18728 - }, - { - "epoch": 1.4319628419060726, - "grad_norm": 0.0006977255106903613, - "learning_rate": 0.00019999898919056128, - "loss": 46.0, - "step": 18729 - }, - { - "epoch": 1.4320392988894624, - "grad_norm": 0.00579830864444375, - "learning_rate": 0.00019999898908256035, - "loss": 46.0, - "step": 18730 - }, - { - "epoch": 1.4321157558728521, - "grad_norm": 0.0015832193894311786, - "learning_rate": 0.00019999898897455365, - "loss": 46.0, - "step": 18731 - }, - { - "epoch": 1.4321922128562417, - "grad_norm": 0.001268958323635161, - "learning_rate": 0.00019999898886654117, - "loss": 46.0, - "step": 18732 - }, - { - "epoch": 1.4322686698396314, - "grad_norm": 0.0023746297229081392, - "learning_rate": 0.00019999898875852292, - "loss": 46.0, - "step": 18733 - }, - { - "epoch": 1.4323451268230212, - "grad_norm": 0.003861360251903534, - "learning_rate": 0.00019999898865049893, - "loss": 46.0, - "step": 18734 - }, - { - "epoch": 1.432421583806411, - "grad_norm": 0.0011787962866947055, - "learning_rate": 0.00019999898854246913, - "loss": 46.0, - "step": 18735 - }, - { - "epoch": 1.4324980407898007, - "grad_norm": 0.012241599150002003, - "learning_rate": 0.0001999989884344336, - "loss": 46.0, - "step": 18736 - }, - { - "epoch": 1.4325744977731905, - "grad_norm": 0.0024612946435809135, - "learning_rate": 0.00019999898832639228, - "loss": 46.0, - "step": 18737 - }, - { - "epoch": 1.43265095475658, - "grad_norm": 0.0006849869969300926, - "learning_rate": 0.00019999898821834519, - "loss": 46.0, - "step": 18738 - }, - { - "epoch": 1.4327274117399698, - "grad_norm": 0.001151581876911223, - "learning_rate": 0.00019999898811029235, - "loss": 46.0, - "step": 18739 - }, - { - "epoch": 1.4328038687233595, - "grad_norm": 0.0014457560610026121, - "learning_rate": 0.00019999898800223372, - "loss": 46.0, - "step": 18740 - }, - { - "epoch": 1.4328803257067493, - "grad_norm": 0.0006206260877661407, - "learning_rate": 0.0001999989878941693, - "loss": 46.0, - "step": 18741 - }, - { - "epoch": 1.432956782690139, - "grad_norm": 0.0013611504109576344, - "learning_rate": 0.00019999898778609912, - "loss": 46.0, - "step": 18742 - }, - { - "epoch": 1.4330332396735286, - "grad_norm": 0.0021817791275680065, - "learning_rate": 0.0001999989876780232, - "loss": 46.0, - "step": 18743 - }, - { - "epoch": 1.4331096966569183, - "grad_norm": 0.002085766987875104, - "learning_rate": 0.00019999898756994152, - "loss": 46.0, - "step": 18744 - }, - { - "epoch": 1.433186153640308, - "grad_norm": 0.0009521907195448875, - "learning_rate": 0.00019999898746185402, - "loss": 46.0, - "step": 18745 - }, - { - "epoch": 1.4332626106236979, - "grad_norm": 0.0008955611265264452, - "learning_rate": 0.0001999989873537608, - "loss": 46.0, - "step": 18746 - }, - { - "epoch": 1.4333390676070876, - "grad_norm": 0.0026764387730509043, - "learning_rate": 0.00019999898724566175, - "loss": 46.0, - "step": 18747 - }, - { - "epoch": 1.4334155245904774, - "grad_norm": 0.0013781870948150754, - "learning_rate": 0.00019999898713755698, - "loss": 46.0, - "step": 18748 - }, - { - "epoch": 1.433491981573867, - "grad_norm": 0.0023273660335689783, - "learning_rate": 0.00019999898702944643, - "loss": 46.0, - "step": 18749 - }, - { - "epoch": 1.4335684385572567, - "grad_norm": 0.005769746843725443, - "learning_rate": 0.00019999898692133012, - "loss": 46.0, - "step": 18750 - }, - { - "epoch": 1.4336448955406464, - "grad_norm": 0.007147619500756264, - "learning_rate": 0.00019999898681320803, - "loss": 46.0, - "step": 18751 - }, - { - "epoch": 1.4337213525240362, - "grad_norm": 0.000937281409278512, - "learning_rate": 0.00019999898670508017, - "loss": 46.0, - "step": 18752 - }, - { - "epoch": 1.4337978095074257, - "grad_norm": 0.0015842615393921733, - "learning_rate": 0.00019999898659694653, - "loss": 46.0, - "step": 18753 - }, - { - "epoch": 1.4338742664908155, - "grad_norm": 0.004984136670827866, - "learning_rate": 0.00019999898648880712, - "loss": 46.0, - "step": 18754 - }, - { - "epoch": 1.4339507234742053, - "grad_norm": 0.0006235511391423643, - "learning_rate": 0.00019999898638066197, - "loss": 46.0, - "step": 18755 - }, - { - "epoch": 1.434027180457595, - "grad_norm": 0.0013173272600397468, - "learning_rate": 0.00019999898627251104, - "loss": 46.0, - "step": 18756 - }, - { - "epoch": 1.4341036374409848, - "grad_norm": 0.004447820130735636, - "learning_rate": 0.0001999989861643543, - "loss": 46.0, - "step": 18757 - }, - { - "epoch": 1.4341800944243746, - "grad_norm": 0.005511179566383362, - "learning_rate": 0.00019999898605619186, - "loss": 46.0, - "step": 18758 - }, - { - "epoch": 1.4342565514077643, - "grad_norm": 0.0011743898503482342, - "learning_rate": 0.0001999989859480236, - "loss": 46.0, - "step": 18759 - }, - { - "epoch": 1.4343330083911539, - "grad_norm": 0.006367901805788279, - "learning_rate": 0.00019999898583984957, - "loss": 46.0, - "step": 18760 - }, - { - "epoch": 1.4344094653745436, - "grad_norm": 0.0007368992082774639, - "learning_rate": 0.0001999989857316698, - "loss": 46.0, - "step": 18761 - }, - { - "epoch": 1.4344859223579334, - "grad_norm": 0.001369076082482934, - "learning_rate": 0.00019999898562348426, - "loss": 46.0, - "step": 18762 - }, - { - "epoch": 1.4345623793413231, - "grad_norm": 0.0005207036156207323, - "learning_rate": 0.0001999989855152929, - "loss": 46.0, - "step": 18763 - }, - { - "epoch": 1.4346388363247127, - "grad_norm": 0.001141222775913775, - "learning_rate": 0.00019999898540709582, - "loss": 46.0, - "step": 18764 - }, - { - "epoch": 1.4347152933081024, - "grad_norm": 0.002879117848351598, - "learning_rate": 0.00019999898529889296, - "loss": 46.0, - "step": 18765 - }, - { - "epoch": 1.4347917502914922, - "grad_norm": 0.0013714451342821121, - "learning_rate": 0.00019999898519068433, - "loss": 46.0, - "step": 18766 - }, - { - "epoch": 1.434868207274882, - "grad_norm": 0.0007758744759485126, - "learning_rate": 0.00019999898508246995, - "loss": 46.0, - "step": 18767 - }, - { - "epoch": 1.4349446642582717, - "grad_norm": 0.0003821845748461783, - "learning_rate": 0.00019999898497424976, - "loss": 46.0, - "step": 18768 - }, - { - "epoch": 1.4350211212416615, - "grad_norm": 0.005240944214165211, - "learning_rate": 0.0001999989848660238, - "loss": 46.0, - "step": 18769 - }, - { - "epoch": 1.4350975782250512, - "grad_norm": 0.003266120096668601, - "learning_rate": 0.0001999989847577921, - "loss": 46.0, - "step": 18770 - }, - { - "epoch": 1.4351740352084408, - "grad_norm": 0.0032120102550834417, - "learning_rate": 0.00019999898464955463, - "loss": 46.0, - "step": 18771 - }, - { - "epoch": 1.4352504921918305, - "grad_norm": 0.000991167384199798, - "learning_rate": 0.00019999898454131138, - "loss": 46.0, - "step": 18772 - }, - { - "epoch": 1.4353269491752203, - "grad_norm": 0.004238905850797892, - "learning_rate": 0.00019999898443306236, - "loss": 46.0, - "step": 18773 - }, - { - "epoch": 1.43540340615861, - "grad_norm": 0.0014413670869544148, - "learning_rate": 0.00019999898432480757, - "loss": 46.0, - "step": 18774 - }, - { - "epoch": 1.4354798631419996, - "grad_norm": 0.0007632928900420666, - "learning_rate": 0.00019999898421654703, - "loss": 46.0, - "step": 18775 - }, - { - "epoch": 1.4355563201253894, - "grad_norm": 0.0008038313826546073, - "learning_rate": 0.00019999898410828068, - "loss": 46.0, - "step": 18776 - }, - { - "epoch": 1.4356327771087791, - "grad_norm": 0.001329036895185709, - "learning_rate": 0.0001999989840000086, - "loss": 46.0, - "step": 18777 - }, - { - "epoch": 1.4357092340921689, - "grad_norm": 0.00276779243722558, - "learning_rate": 0.00019999898389173076, - "loss": 46.0, - "step": 18778 - }, - { - "epoch": 1.4357856910755586, - "grad_norm": 0.002485651755705476, - "learning_rate": 0.0001999989837834471, - "loss": 46.0, - "step": 18779 - }, - { - "epoch": 1.4358621480589484, - "grad_norm": 0.0012535244459286332, - "learning_rate": 0.0001999989836751577, - "loss": 46.0, - "step": 18780 - }, - { - "epoch": 1.4359386050423382, - "grad_norm": 0.0006342001724988222, - "learning_rate": 0.00019999898356686254, - "loss": 46.0, - "step": 18781 - }, - { - "epoch": 1.4360150620257277, - "grad_norm": 0.0014903387054800987, - "learning_rate": 0.0001999989834585616, - "loss": 46.0, - "step": 18782 - }, - { - "epoch": 1.4360915190091175, - "grad_norm": 0.0011879093945026398, - "learning_rate": 0.00019999898335025485, - "loss": 46.0, - "step": 18783 - }, - { - "epoch": 1.4361679759925072, - "grad_norm": 0.0007911497377790511, - "learning_rate": 0.0001999989832419424, - "loss": 46.0, - "step": 18784 - }, - { - "epoch": 1.436244432975897, - "grad_norm": 0.0009299229714088142, - "learning_rate": 0.00019999898313362416, - "loss": 46.0, - "step": 18785 - }, - { - "epoch": 1.4363208899592865, - "grad_norm": 0.0009314122726209462, - "learning_rate": 0.00019999898302530014, - "loss": 46.0, - "step": 18786 - }, - { - "epoch": 1.4363973469426763, - "grad_norm": 0.004738044925034046, - "learning_rate": 0.00019999898291697034, - "loss": 46.0, - "step": 18787 - }, - { - "epoch": 1.436473803926066, - "grad_norm": 0.0011719597969204187, - "learning_rate": 0.00019999898280863477, - "loss": 46.0, - "step": 18788 - }, - { - "epoch": 1.4365502609094558, - "grad_norm": 0.0007641953998245299, - "learning_rate": 0.00019999898270029346, - "loss": 46.0, - "step": 18789 - }, - { - "epoch": 1.4366267178928456, - "grad_norm": 0.0008409270667470992, - "learning_rate": 0.00019999898259194637, - "loss": 46.0, - "step": 18790 - }, - { - "epoch": 1.4367031748762353, - "grad_norm": 0.0011571451323106885, - "learning_rate": 0.0001999989824835935, - "loss": 46.0, - "step": 18791 - }, - { - "epoch": 1.436779631859625, - "grad_norm": 0.0006027471390552819, - "learning_rate": 0.00019999898237523485, - "loss": 46.0, - "step": 18792 - }, - { - "epoch": 1.4368560888430146, - "grad_norm": 0.0018605849472805858, - "learning_rate": 0.00019999898226687047, - "loss": 46.0, - "step": 18793 - }, - { - "epoch": 1.4369325458264044, - "grad_norm": 0.001404240378178656, - "learning_rate": 0.00019999898215850026, - "loss": 46.0, - "step": 18794 - }, - { - "epoch": 1.4370090028097942, - "grad_norm": 0.008444486185908318, - "learning_rate": 0.00019999898205012433, - "loss": 46.0, - "step": 18795 - }, - { - "epoch": 1.437085459793184, - "grad_norm": 0.0052825771272182465, - "learning_rate": 0.00019999898194174263, - "loss": 46.0, - "step": 18796 - }, - { - "epoch": 1.4371619167765735, - "grad_norm": 0.0006845666794106364, - "learning_rate": 0.00019999898183335516, - "loss": 46.0, - "step": 18797 - }, - { - "epoch": 1.4372383737599632, - "grad_norm": 0.004222303628921509, - "learning_rate": 0.0001999989817249619, - "loss": 46.0, - "step": 18798 - }, - { - "epoch": 1.437314830743353, - "grad_norm": 0.0011709652608260512, - "learning_rate": 0.0001999989816165629, - "loss": 46.0, - "step": 18799 - }, - { - "epoch": 1.4373912877267427, - "grad_norm": 0.000967221858445555, - "learning_rate": 0.00019999898150815807, - "loss": 46.0, - "step": 18800 - }, - { - "epoch": 1.4374677447101325, - "grad_norm": 0.0009400346898473799, - "learning_rate": 0.0001999989813997475, - "loss": 46.0, - "step": 18801 - }, - { - "epoch": 1.4375442016935223, - "grad_norm": 0.0038634752854704857, - "learning_rate": 0.00019999898129133116, - "loss": 46.0, - "step": 18802 - }, - { - "epoch": 1.437620658676912, - "grad_norm": 0.00157473748549819, - "learning_rate": 0.0001999989811829091, - "loss": 46.0, - "step": 18803 - }, - { - "epoch": 1.4376971156603016, - "grad_norm": 0.0012032538652420044, - "learning_rate": 0.0001999989810744812, - "loss": 46.0, - "step": 18804 - }, - { - "epoch": 1.4377735726436913, - "grad_norm": 0.004058492835611105, - "learning_rate": 0.0001999989809660476, - "loss": 46.0, - "step": 18805 - }, - { - "epoch": 1.437850029627081, - "grad_norm": 0.0007875863229855895, - "learning_rate": 0.00019999898085760817, - "loss": 46.0, - "step": 18806 - }, - { - "epoch": 1.4379264866104708, - "grad_norm": 0.0020004352554678917, - "learning_rate": 0.00019999898074916299, - "loss": 46.0, - "step": 18807 - }, - { - "epoch": 1.4380029435938604, - "grad_norm": 0.0006731065222993493, - "learning_rate": 0.00019999898064071206, - "loss": 46.0, - "step": 18808 - }, - { - "epoch": 1.4380794005772501, - "grad_norm": 0.0009487764327786863, - "learning_rate": 0.00019999898053225533, - "loss": 46.0, - "step": 18809 - }, - { - "epoch": 1.43815585756064, - "grad_norm": 0.008388428017497063, - "learning_rate": 0.00019999898042379286, - "loss": 46.0, - "step": 18810 - }, - { - "epoch": 1.4382323145440297, - "grad_norm": 0.001875359215773642, - "learning_rate": 0.0001999989803153246, - "loss": 46.0, - "step": 18811 - }, - { - "epoch": 1.4383087715274194, - "grad_norm": 0.0007180998218245804, - "learning_rate": 0.00019999898020685056, - "loss": 46.0, - "step": 18812 - }, - { - "epoch": 1.4383852285108092, - "grad_norm": 0.0015995518770068884, - "learning_rate": 0.00019999898009837077, - "loss": 46.0, - "step": 18813 - }, - { - "epoch": 1.438461685494199, - "grad_norm": 0.002107905223965645, - "learning_rate": 0.0001999989799898852, - "loss": 46.0, - "step": 18814 - }, - { - "epoch": 1.4385381424775885, - "grad_norm": 0.001844717189669609, - "learning_rate": 0.00019999897988139389, - "loss": 46.0, - "step": 18815 - }, - { - "epoch": 1.4386145994609782, - "grad_norm": 0.0005767298280261457, - "learning_rate": 0.00019999897977289677, - "loss": 46.0, - "step": 18816 - }, - { - "epoch": 1.438691056444368, - "grad_norm": 0.0004050273564644158, - "learning_rate": 0.0001999989796643939, - "loss": 46.0, - "step": 18817 - }, - { - "epoch": 1.4387675134277578, - "grad_norm": 0.0009034311515279114, - "learning_rate": 0.00019999897955588528, - "loss": 46.0, - "step": 18818 - }, - { - "epoch": 1.4388439704111473, - "grad_norm": 0.0034355544485151768, - "learning_rate": 0.00019999897944737084, - "loss": 46.0, - "step": 18819 - }, - { - "epoch": 1.438920427394537, - "grad_norm": 0.0009344511199742556, - "learning_rate": 0.0001999989793388507, - "loss": 46.0, - "step": 18820 - }, - { - "epoch": 1.4389968843779268, - "grad_norm": 0.0022184192202985287, - "learning_rate": 0.00019999897923032473, - "loss": 46.0, - "step": 18821 - }, - { - "epoch": 1.4390733413613166, - "grad_norm": 0.00031321513233706355, - "learning_rate": 0.000199998979121793, - "loss": 46.0, - "step": 18822 - }, - { - "epoch": 1.4391497983447064, - "grad_norm": 0.004710352048277855, - "learning_rate": 0.00019999897901325553, - "loss": 46.0, - "step": 18823 - }, - { - "epoch": 1.4392262553280961, - "grad_norm": 0.006851649843156338, - "learning_rate": 0.00019999897890471226, - "loss": 46.0, - "step": 18824 - }, - { - "epoch": 1.4393027123114859, - "grad_norm": 0.001001182128675282, - "learning_rate": 0.00019999897879616324, - "loss": 46.0, - "step": 18825 - }, - { - "epoch": 1.4393791692948754, - "grad_norm": 0.001211144495755434, - "learning_rate": 0.00019999897868760847, - "loss": 46.0, - "step": 18826 - }, - { - "epoch": 1.4394556262782652, - "grad_norm": 0.0019999388605356216, - "learning_rate": 0.00019999897857904788, - "loss": 46.0, - "step": 18827 - }, - { - "epoch": 1.439532083261655, - "grad_norm": 0.005038689821958542, - "learning_rate": 0.00019999897847048154, - "loss": 46.0, - "step": 18828 - }, - { - "epoch": 1.4396085402450447, - "grad_norm": 0.0009320495883002877, - "learning_rate": 0.00019999897836190945, - "loss": 46.0, - "step": 18829 - }, - { - "epoch": 1.4396849972284342, - "grad_norm": 0.0018318586517125368, - "learning_rate": 0.0001999989782533316, - "loss": 46.0, - "step": 18830 - }, - { - "epoch": 1.439761454211824, - "grad_norm": 0.0012748222798109055, - "learning_rate": 0.00019999897814474793, - "loss": 46.0, - "step": 18831 - }, - { - "epoch": 1.4398379111952138, - "grad_norm": 0.0051794955506920815, - "learning_rate": 0.0001999989780361585, - "loss": 46.0, - "step": 18832 - }, - { - "epoch": 1.4399143681786035, - "grad_norm": 0.0009334392962045968, - "learning_rate": 0.00019999897792756331, - "loss": 46.0, - "step": 18833 - }, - { - "epoch": 1.4399908251619933, - "grad_norm": 0.0013264057924970984, - "learning_rate": 0.00019999897781896236, - "loss": 46.0, - "step": 18834 - }, - { - "epoch": 1.440067282145383, - "grad_norm": 0.0003454741963651031, - "learning_rate": 0.00019999897771035566, - "loss": 46.0, - "step": 18835 - }, - { - "epoch": 1.4401437391287728, - "grad_norm": 0.0011132069630548358, - "learning_rate": 0.0001999989776017432, - "loss": 46.0, - "step": 18836 - }, - { - "epoch": 1.4402201961121623, - "grad_norm": 0.0006953852134756744, - "learning_rate": 0.00019999897749312494, - "loss": 46.0, - "step": 18837 - }, - { - "epoch": 1.440296653095552, - "grad_norm": 0.0011303108185529709, - "learning_rate": 0.0001999989773845009, - "loss": 46.0, - "step": 18838 - }, - { - "epoch": 1.4403731100789419, - "grad_norm": 0.0009725252748467028, - "learning_rate": 0.0001999989772758711, - "loss": 46.0, - "step": 18839 - }, - { - "epoch": 1.4404495670623316, - "grad_norm": 0.001734154182486236, - "learning_rate": 0.00019999897716723553, - "loss": 46.0, - "step": 18840 - }, - { - "epoch": 1.4405260240457212, - "grad_norm": 0.0009559121681377292, - "learning_rate": 0.0001999989770585942, - "loss": 46.0, - "step": 18841 - }, - { - "epoch": 1.440602481029111, - "grad_norm": 0.00429040752351284, - "learning_rate": 0.00019999897694994708, - "loss": 46.0, - "step": 18842 - }, - { - "epoch": 1.4406789380125007, - "grad_norm": 0.0003997149469796568, - "learning_rate": 0.0001999989768412942, - "loss": 46.0, - "step": 18843 - }, - { - "epoch": 1.4407553949958904, - "grad_norm": 0.007469318341463804, - "learning_rate": 0.00019999897673263556, - "loss": 46.0, - "step": 18844 - }, - { - "epoch": 1.4408318519792802, - "grad_norm": 0.003976054023951292, - "learning_rate": 0.00019999897662397115, - "loss": 46.0, - "step": 18845 - }, - { - "epoch": 1.44090830896267, - "grad_norm": 0.0007124468102119863, - "learning_rate": 0.00019999897651530097, - "loss": 46.0, - "step": 18846 - }, - { - "epoch": 1.4409847659460597, - "grad_norm": 0.0011592571390792727, - "learning_rate": 0.00019999897640662502, - "loss": 46.0, - "step": 18847 - }, - { - "epoch": 1.4410612229294493, - "grad_norm": 0.0010226726299151778, - "learning_rate": 0.0001999989762979433, - "loss": 46.0, - "step": 18848 - }, - { - "epoch": 1.441137679912839, - "grad_norm": 0.0007074595778249204, - "learning_rate": 0.0001999989761892558, - "loss": 46.0, - "step": 18849 - }, - { - "epoch": 1.4412141368962288, - "grad_norm": 0.001997965620830655, - "learning_rate": 0.00019999897608056255, - "loss": 46.0, - "step": 18850 - }, - { - "epoch": 1.4412905938796186, - "grad_norm": 0.0005131909274496138, - "learning_rate": 0.0001999989759718635, - "loss": 46.0, - "step": 18851 - }, - { - "epoch": 1.441367050863008, - "grad_norm": 0.0017190531361848116, - "learning_rate": 0.00019999897586315873, - "loss": 46.0, - "step": 18852 - }, - { - "epoch": 1.4414435078463979, - "grad_norm": 0.001054232008755207, - "learning_rate": 0.00019999897575444814, - "loss": 46.0, - "step": 18853 - }, - { - "epoch": 1.4415199648297876, - "grad_norm": 0.001867811195552349, - "learning_rate": 0.0001999989756457318, - "loss": 46.0, - "step": 18854 - }, - { - "epoch": 1.4415964218131774, - "grad_norm": 0.0007233214564621449, - "learning_rate": 0.0001999989755370097, - "loss": 46.0, - "step": 18855 - }, - { - "epoch": 1.4416728787965671, - "grad_norm": 0.004756170324981213, - "learning_rate": 0.00019999897542828183, - "loss": 46.0, - "step": 18856 - }, - { - "epoch": 1.441749335779957, - "grad_norm": 0.0009767322335392237, - "learning_rate": 0.0001999989753195482, - "loss": 46.0, - "step": 18857 - }, - { - "epoch": 1.4418257927633467, - "grad_norm": 0.0009141616756096482, - "learning_rate": 0.00019999897521080876, - "loss": 46.0, - "step": 18858 - }, - { - "epoch": 1.4419022497467362, - "grad_norm": 0.0006841244176030159, - "learning_rate": 0.00019999897510206358, - "loss": 46.0, - "step": 18859 - }, - { - "epoch": 1.441978706730126, - "grad_norm": 0.000625805405434221, - "learning_rate": 0.00019999897499331263, - "loss": 46.0, - "step": 18860 - }, - { - "epoch": 1.4420551637135157, - "grad_norm": 0.005052138119935989, - "learning_rate": 0.00019999897488455587, - "loss": 46.0, - "step": 18861 - }, - { - "epoch": 1.4421316206969055, - "grad_norm": 0.0003998340980615467, - "learning_rate": 0.0001999989747757934, - "loss": 46.0, - "step": 18862 - }, - { - "epoch": 1.442208077680295, - "grad_norm": 0.0015206149546429515, - "learning_rate": 0.00019999897466702513, - "loss": 46.0, - "step": 18863 - }, - { - "epoch": 1.4422845346636848, - "grad_norm": 0.000785012380219996, - "learning_rate": 0.0001999989745582511, - "loss": 46.0, - "step": 18864 - }, - { - "epoch": 1.4423609916470745, - "grad_norm": 0.0013361851451918483, - "learning_rate": 0.0001999989744494713, - "loss": 46.0, - "step": 18865 - }, - { - "epoch": 1.4424374486304643, - "grad_norm": 0.00223688711412251, - "learning_rate": 0.00019999897434068572, - "loss": 46.0, - "step": 18866 - }, - { - "epoch": 1.442513905613854, - "grad_norm": 0.0008344555390067399, - "learning_rate": 0.0001999989742318944, - "loss": 46.0, - "step": 18867 - }, - { - "epoch": 1.4425903625972438, - "grad_norm": 0.0009108957601711154, - "learning_rate": 0.0001999989741230973, - "loss": 46.0, - "step": 18868 - }, - { - "epoch": 1.4426668195806334, - "grad_norm": 0.001850196160376072, - "learning_rate": 0.0001999989740142944, - "loss": 46.0, - "step": 18869 - }, - { - "epoch": 1.4427432765640231, - "grad_norm": 0.0010056825121864676, - "learning_rate": 0.00019999897390548578, - "loss": 46.0, - "step": 18870 - }, - { - "epoch": 1.4428197335474129, - "grad_norm": 0.0005432313773781061, - "learning_rate": 0.00019999897379667134, - "loss": 46.0, - "step": 18871 - }, - { - "epoch": 1.4428961905308026, - "grad_norm": 0.0024772018659859896, - "learning_rate": 0.00019999897368785114, - "loss": 46.0, - "step": 18872 - }, - { - "epoch": 1.4429726475141924, - "grad_norm": 0.003192031057551503, - "learning_rate": 0.00019999897357902518, - "loss": 46.0, - "step": 18873 - }, - { - "epoch": 1.443049104497582, - "grad_norm": 0.0018787558656185865, - "learning_rate": 0.00019999897347019348, - "loss": 46.0, - "step": 18874 - }, - { - "epoch": 1.4431255614809717, - "grad_norm": 0.003805401735007763, - "learning_rate": 0.00019999897336135596, - "loss": 46.0, - "step": 18875 - }, - { - "epoch": 1.4432020184643615, - "grad_norm": 0.0006748136365786195, - "learning_rate": 0.0001999989732525127, - "loss": 46.0, - "step": 18876 - }, - { - "epoch": 1.4432784754477512, - "grad_norm": 0.0007874740986153483, - "learning_rate": 0.00019999897314366367, - "loss": 46.0, - "step": 18877 - }, - { - "epoch": 1.443354932431141, - "grad_norm": 0.005053788423538208, - "learning_rate": 0.00019999897303480885, - "loss": 46.0, - "step": 18878 - }, - { - "epoch": 1.4434313894145308, - "grad_norm": 0.0004960333462804556, - "learning_rate": 0.00019999897292594828, - "loss": 46.0, - "step": 18879 - }, - { - "epoch": 1.4435078463979203, - "grad_norm": 0.005428021308034658, - "learning_rate": 0.00019999897281708194, - "loss": 46.0, - "step": 18880 - }, - { - "epoch": 1.44358430338131, - "grad_norm": 0.001701239263638854, - "learning_rate": 0.00019999897270820986, - "loss": 46.0, - "step": 18881 - }, - { - "epoch": 1.4436607603646998, - "grad_norm": 0.0010987732093781233, - "learning_rate": 0.00019999897259933197, - "loss": 46.0, - "step": 18882 - }, - { - "epoch": 1.4437372173480896, - "grad_norm": 0.0034763775765895844, - "learning_rate": 0.0001999989724904483, - "loss": 46.0, - "step": 18883 - }, - { - "epoch": 1.4438136743314791, - "grad_norm": 0.0007811313262209296, - "learning_rate": 0.0001999989723815589, - "loss": 46.0, - "step": 18884 - }, - { - "epoch": 1.4438901313148689, - "grad_norm": 0.0023446024861186743, - "learning_rate": 0.0001999989722726637, - "loss": 46.0, - "step": 18885 - }, - { - "epoch": 1.4439665882982586, - "grad_norm": 0.0031363684684038162, - "learning_rate": 0.00019999897216376275, - "loss": 46.0, - "step": 18886 - }, - { - "epoch": 1.4440430452816484, - "grad_norm": 0.002317397389560938, - "learning_rate": 0.000199998972054856, - "loss": 46.0, - "step": 18887 - }, - { - "epoch": 1.4441195022650382, - "grad_norm": 0.000758698966819793, - "learning_rate": 0.0001999989719459435, - "loss": 46.0, - "step": 18888 - }, - { - "epoch": 1.444195959248428, - "grad_norm": 0.0005982097354717553, - "learning_rate": 0.00019999897183702525, - "loss": 46.0, - "step": 18889 - }, - { - "epoch": 1.4442724162318177, - "grad_norm": 0.0007652511703781784, - "learning_rate": 0.0001999989717281012, - "loss": 46.0, - "step": 18890 - }, - { - "epoch": 1.4443488732152072, - "grad_norm": 0.0014397187624126673, - "learning_rate": 0.0001999989716191714, - "loss": 46.0, - "step": 18891 - }, - { - "epoch": 1.444425330198597, - "grad_norm": 0.0007819320890121162, - "learning_rate": 0.00019999897151023582, - "loss": 46.0, - "step": 18892 - }, - { - "epoch": 1.4445017871819867, - "grad_norm": 0.0026882081292569637, - "learning_rate": 0.00019999897140129448, - "loss": 46.0, - "step": 18893 - }, - { - "epoch": 1.4445782441653765, - "grad_norm": 0.0012461873702704906, - "learning_rate": 0.00019999897129234737, - "loss": 46.0, - "step": 18894 - }, - { - "epoch": 1.444654701148766, - "grad_norm": 0.0004840005422011018, - "learning_rate": 0.00019999897118339448, - "loss": 46.0, - "step": 18895 - }, - { - "epoch": 1.4447311581321558, - "grad_norm": 0.000709084328263998, - "learning_rate": 0.00019999897107443585, - "loss": 46.0, - "step": 18896 - }, - { - "epoch": 1.4448076151155456, - "grad_norm": 0.002557893982157111, - "learning_rate": 0.0001999989709654714, - "loss": 46.0, - "step": 18897 - }, - { - "epoch": 1.4448840720989353, - "grad_norm": 0.0005654958658851683, - "learning_rate": 0.00019999897085650124, - "loss": 46.0, - "step": 18898 - }, - { - "epoch": 1.444960529082325, - "grad_norm": 0.001185168162919581, - "learning_rate": 0.00019999897074752526, - "loss": 46.0, - "step": 18899 - }, - { - "epoch": 1.4450369860657148, - "grad_norm": 0.0006020786240696907, - "learning_rate": 0.00019999897063854353, - "loss": 46.0, - "step": 18900 - }, - { - "epoch": 1.4451134430491046, - "grad_norm": 0.029584430158138275, - "learning_rate": 0.00019999897052955603, - "loss": 46.0, - "step": 18901 - }, - { - "epoch": 1.4451899000324941, - "grad_norm": 0.0012202764628455043, - "learning_rate": 0.00019999897042056276, - "loss": 46.0, - "step": 18902 - }, - { - "epoch": 1.445266357015884, - "grad_norm": 0.0004320077132433653, - "learning_rate": 0.00019999897031156371, - "loss": 46.0, - "step": 18903 - }, - { - "epoch": 1.4453428139992737, - "grad_norm": 0.0008896985091269016, - "learning_rate": 0.00019999897020255892, - "loss": 46.0, - "step": 18904 - }, - { - "epoch": 1.4454192709826634, - "grad_norm": 0.0007843612693250179, - "learning_rate": 0.00019999897009354833, - "loss": 46.0, - "step": 18905 - }, - { - "epoch": 1.445495727966053, - "grad_norm": 0.0011643936159089208, - "learning_rate": 0.000199998969984532, - "loss": 46.0, - "step": 18906 - }, - { - "epoch": 1.4455721849494427, - "grad_norm": 0.0007157287909649312, - "learning_rate": 0.00019999896987550985, - "loss": 46.0, - "step": 18907 - }, - { - "epoch": 1.4456486419328325, - "grad_norm": 0.0010913871228694916, - "learning_rate": 0.000199998969766482, - "loss": 46.0, - "step": 18908 - }, - { - "epoch": 1.4457250989162223, - "grad_norm": 0.0015709202270954847, - "learning_rate": 0.00019999896965744833, - "loss": 46.0, - "step": 18909 - }, - { - "epoch": 1.445801555899612, - "grad_norm": 0.003016118425875902, - "learning_rate": 0.00019999896954840888, - "loss": 46.0, - "step": 18910 - }, - { - "epoch": 1.4458780128830018, - "grad_norm": 0.0004957798519171774, - "learning_rate": 0.00019999896943936372, - "loss": 46.0, - "step": 18911 - }, - { - "epoch": 1.4459544698663915, - "grad_norm": 0.0101634431630373, - "learning_rate": 0.00019999896933031275, - "loss": 46.0, - "step": 18912 - }, - { - "epoch": 1.446030926849781, - "grad_norm": 0.001034548506140709, - "learning_rate": 0.000199998969221256, - "loss": 46.0, - "step": 18913 - }, - { - "epoch": 1.4461073838331708, - "grad_norm": 0.0015693142777308822, - "learning_rate": 0.00019999896911219352, - "loss": 46.0, - "step": 18914 - }, - { - "epoch": 1.4461838408165606, - "grad_norm": 0.0011236666468903422, - "learning_rate": 0.00019999896900312525, - "loss": 46.0, - "step": 18915 - }, - { - "epoch": 1.4462602977999504, - "grad_norm": 0.0019882076885551214, - "learning_rate": 0.0001999989688940512, - "loss": 46.0, - "step": 18916 - }, - { - "epoch": 1.44633675478334, - "grad_norm": 0.0009653542074374855, - "learning_rate": 0.00019999896878497139, - "loss": 46.0, - "step": 18917 - }, - { - "epoch": 1.4464132117667297, - "grad_norm": 0.0003363323921803385, - "learning_rate": 0.00019999896867588582, - "loss": 46.0, - "step": 18918 - }, - { - "epoch": 1.4464896687501194, - "grad_norm": 0.008416110649704933, - "learning_rate": 0.00019999896856679445, - "loss": 46.0, - "step": 18919 - }, - { - "epoch": 1.4465661257335092, - "grad_norm": 0.0011363705853000283, - "learning_rate": 0.00019999896845769734, - "loss": 46.0, - "step": 18920 - }, - { - "epoch": 1.446642582716899, - "grad_norm": 0.0014391785953193903, - "learning_rate": 0.00019999896834859446, - "loss": 46.0, - "step": 18921 - }, - { - "epoch": 1.4467190397002887, - "grad_norm": 0.002617859048768878, - "learning_rate": 0.00019999896823948577, - "loss": 46.0, - "step": 18922 - }, - { - "epoch": 1.4467954966836785, - "grad_norm": 0.0021110600791871548, - "learning_rate": 0.00019999896813037137, - "loss": 46.0, - "step": 18923 - }, - { - "epoch": 1.446871953667068, - "grad_norm": 0.0017390345456078649, - "learning_rate": 0.0001999989680212512, - "loss": 46.0, - "step": 18924 - }, - { - "epoch": 1.4469484106504578, - "grad_norm": 0.0017242267495021224, - "learning_rate": 0.0001999989679121252, - "loss": 46.0, - "step": 18925 - }, - { - "epoch": 1.4470248676338475, - "grad_norm": 0.0009134160936810076, - "learning_rate": 0.00019999896780299349, - "loss": 46.0, - "step": 18926 - }, - { - "epoch": 1.4471013246172373, - "grad_norm": 0.0040166620165109634, - "learning_rate": 0.000199998967693856, - "loss": 46.0, - "step": 18927 - }, - { - "epoch": 1.4471777816006268, - "grad_norm": 0.0008931373013183475, - "learning_rate": 0.00019999896758471272, - "loss": 46.0, - "step": 18928 - }, - { - "epoch": 1.4472542385840166, - "grad_norm": 0.0004858648171648383, - "learning_rate": 0.00019999896747556367, - "loss": 46.0, - "step": 18929 - }, - { - "epoch": 1.4473306955674063, - "grad_norm": 0.004265401978045702, - "learning_rate": 0.00019999896736640885, - "loss": 46.0, - "step": 18930 - }, - { - "epoch": 1.447407152550796, - "grad_norm": 0.0032247009221464396, - "learning_rate": 0.00019999896725724826, - "loss": 46.0, - "step": 18931 - }, - { - "epoch": 1.4474836095341859, - "grad_norm": 0.0016347304917871952, - "learning_rate": 0.0001999989671480819, - "loss": 46.0, - "step": 18932 - }, - { - "epoch": 1.4475600665175756, - "grad_norm": 0.001962438225746155, - "learning_rate": 0.00019999896703890978, - "loss": 46.0, - "step": 18933 - }, - { - "epoch": 1.4476365235009654, - "grad_norm": 0.0010024956427514553, - "learning_rate": 0.0001999989669297319, - "loss": 46.0, - "step": 18934 - }, - { - "epoch": 1.447712980484355, - "grad_norm": 0.001242920639924705, - "learning_rate": 0.00019999896682054824, - "loss": 46.0, - "step": 18935 - }, - { - "epoch": 1.4477894374677447, - "grad_norm": 0.004915470723062754, - "learning_rate": 0.00019999896671135884, - "loss": 46.0, - "step": 18936 - }, - { - "epoch": 1.4478658944511344, - "grad_norm": 0.0003239080833736807, - "learning_rate": 0.00019999896660216363, - "loss": 46.0, - "step": 18937 - }, - { - "epoch": 1.4479423514345242, - "grad_norm": 0.000989253749139607, - "learning_rate": 0.00019999896649296263, - "loss": 46.0, - "step": 18938 - }, - { - "epoch": 1.4480188084179137, - "grad_norm": 0.0006176682072691619, - "learning_rate": 0.0001999989663837559, - "loss": 46.0, - "step": 18939 - }, - { - "epoch": 1.4480952654013035, - "grad_norm": 0.0016268908511847258, - "learning_rate": 0.0001999989662745434, - "loss": 46.0, - "step": 18940 - }, - { - "epoch": 1.4481717223846933, - "grad_norm": 0.0005626184865832329, - "learning_rate": 0.00019999896616532513, - "loss": 46.0, - "step": 18941 - }, - { - "epoch": 1.448248179368083, - "grad_norm": 0.004932803101837635, - "learning_rate": 0.00019999896605610106, - "loss": 46.0, - "step": 18942 - }, - { - "epoch": 1.4483246363514728, - "grad_norm": 0.0011716191656887531, - "learning_rate": 0.00019999896594687127, - "loss": 46.0, - "step": 18943 - }, - { - "epoch": 1.4484010933348626, - "grad_norm": 0.0014760260237380862, - "learning_rate": 0.00019999896583763568, - "loss": 46.0, - "step": 18944 - }, - { - "epoch": 1.4484775503182523, - "grad_norm": 0.0014312447747215629, - "learning_rate": 0.0001999989657283943, - "loss": 46.0, - "step": 18945 - }, - { - "epoch": 1.4485540073016419, - "grad_norm": 0.0019218989182263613, - "learning_rate": 0.00019999896561914723, - "loss": 46.0, - "step": 18946 - }, - { - "epoch": 1.4486304642850316, - "grad_norm": 0.0011618298012763262, - "learning_rate": 0.0001999989655098943, - "loss": 46.0, - "step": 18947 - }, - { - "epoch": 1.4487069212684214, - "grad_norm": 0.0011045156279578805, - "learning_rate": 0.00019999896540063566, - "loss": 46.0, - "step": 18948 - }, - { - "epoch": 1.4487833782518111, - "grad_norm": 0.0010301691945642233, - "learning_rate": 0.00019999896529137123, - "loss": 46.0, - "step": 18949 - }, - { - "epoch": 1.4488598352352007, - "grad_norm": 0.0005085246521048248, - "learning_rate": 0.00019999896518210102, - "loss": 46.0, - "step": 18950 - }, - { - "epoch": 1.4489362922185904, - "grad_norm": 0.0012716705678030849, - "learning_rate": 0.00019999896507282504, - "loss": 46.0, - "step": 18951 - }, - { - "epoch": 1.4490127492019802, - "grad_norm": 0.0007319659925997257, - "learning_rate": 0.00019999896496354332, - "loss": 46.0, - "step": 18952 - }, - { - "epoch": 1.44908920618537, - "grad_norm": 0.0005724075599573553, - "learning_rate": 0.00019999896485425582, - "loss": 46.0, - "step": 18953 - }, - { - "epoch": 1.4491656631687597, - "grad_norm": 0.0008692061528563499, - "learning_rate": 0.00019999896474496252, - "loss": 46.0, - "step": 18954 - }, - { - "epoch": 1.4492421201521495, - "grad_norm": 0.0025127530097961426, - "learning_rate": 0.00019999896463566348, - "loss": 46.0, - "step": 18955 - }, - { - "epoch": 1.4493185771355392, - "grad_norm": 0.0008072750060819089, - "learning_rate": 0.00019999896452635866, - "loss": 46.0, - "step": 18956 - }, - { - "epoch": 1.4493950341189288, - "grad_norm": 0.001020344439893961, - "learning_rate": 0.00019999896441704807, - "loss": 46.0, - "step": 18957 - }, - { - "epoch": 1.4494714911023185, - "grad_norm": 0.003414277918636799, - "learning_rate": 0.0001999989643077317, - "loss": 46.0, - "step": 18958 - }, - { - "epoch": 1.4495479480857083, - "grad_norm": 0.0019378105644136667, - "learning_rate": 0.0001999989641984096, - "loss": 46.0, - "step": 18959 - }, - { - "epoch": 1.449624405069098, - "grad_norm": 0.000865251524373889, - "learning_rate": 0.0001999989640890817, - "loss": 46.0, - "step": 18960 - }, - { - "epoch": 1.4497008620524876, - "grad_norm": 0.0004280477878637612, - "learning_rate": 0.00019999896397974805, - "loss": 46.0, - "step": 18961 - }, - { - "epoch": 1.4497773190358774, - "grad_norm": 0.0020386280957609415, - "learning_rate": 0.0001999989638704086, - "loss": 46.0, - "step": 18962 - }, - { - "epoch": 1.4498537760192671, - "grad_norm": 0.000983902020379901, - "learning_rate": 0.0001999989637610634, - "loss": 46.0, - "step": 18963 - }, - { - "epoch": 1.4499302330026569, - "grad_norm": 0.0009496128186583519, - "learning_rate": 0.00019999896365171241, - "loss": 46.0, - "step": 18964 - }, - { - "epoch": 1.4500066899860466, - "grad_norm": 0.0017147546168416739, - "learning_rate": 0.0001999989635423557, - "loss": 46.0, - "step": 18965 - }, - { - "epoch": 1.4500831469694364, - "grad_norm": 0.0011938734678551555, - "learning_rate": 0.0001999989634329932, - "loss": 46.0, - "step": 18966 - }, - { - "epoch": 1.4501596039528262, - "grad_norm": 0.002902195556089282, - "learning_rate": 0.00019999896332362492, - "loss": 46.0, - "step": 18967 - }, - { - "epoch": 1.4502360609362157, - "grad_norm": 0.001075799111276865, - "learning_rate": 0.00019999896321425085, - "loss": 46.0, - "step": 18968 - }, - { - "epoch": 1.4503125179196055, - "grad_norm": 0.0005118598346598446, - "learning_rate": 0.00019999896310487103, - "loss": 46.0, - "step": 18969 - }, - { - "epoch": 1.4503889749029952, - "grad_norm": 0.0009796771919354796, - "learning_rate": 0.00019999896299548544, - "loss": 46.0, - "step": 18970 - }, - { - "epoch": 1.450465431886385, - "grad_norm": 0.0016069040866568685, - "learning_rate": 0.0001999989628860941, - "loss": 46.0, - "step": 18971 - }, - { - "epoch": 1.4505418888697745, - "grad_norm": 0.0007837492157705128, - "learning_rate": 0.00019999896277669697, - "loss": 46.0, - "step": 18972 - }, - { - "epoch": 1.4506183458531643, - "grad_norm": 0.0008090370683930814, - "learning_rate": 0.00019999896266729406, - "loss": 46.0, - "step": 18973 - }, - { - "epoch": 1.450694802836554, - "grad_norm": 0.0007583677652291954, - "learning_rate": 0.0001999989625578854, - "loss": 46.0, - "step": 18974 - }, - { - "epoch": 1.4507712598199438, - "grad_norm": 0.009012933820486069, - "learning_rate": 0.00019999896244847097, - "loss": 46.0, - "step": 18975 - }, - { - "epoch": 1.4508477168033336, - "grad_norm": 0.0018240658100694418, - "learning_rate": 0.00019999896233905076, - "loss": 46.0, - "step": 18976 - }, - { - "epoch": 1.4509241737867233, - "grad_norm": 0.0019005857175216079, - "learning_rate": 0.0001999989622296248, - "loss": 46.0, - "step": 18977 - }, - { - "epoch": 1.451000630770113, - "grad_norm": 0.00193652359303087, - "learning_rate": 0.00019999896212019304, - "loss": 46.0, - "step": 18978 - }, - { - "epoch": 1.4510770877535026, - "grad_norm": 0.001566311577335, - "learning_rate": 0.00019999896201075554, - "loss": 46.0, - "step": 18979 - }, - { - "epoch": 1.4511535447368924, - "grad_norm": 0.0009208136470988393, - "learning_rate": 0.00019999896190131224, - "loss": 46.0, - "step": 18980 - }, - { - "epoch": 1.4512300017202822, - "grad_norm": 0.0008543008589185774, - "learning_rate": 0.0001999989617918632, - "loss": 46.0, - "step": 18981 - }, - { - "epoch": 1.451306458703672, - "grad_norm": 0.0014147782931104302, - "learning_rate": 0.00019999896168240838, - "loss": 46.0, - "step": 18982 - }, - { - "epoch": 1.4513829156870615, - "grad_norm": 0.0014980046544224024, - "learning_rate": 0.0001999989615729478, - "loss": 46.0, - "step": 18983 - }, - { - "epoch": 1.4514593726704512, - "grad_norm": 0.0008350092102773488, - "learning_rate": 0.00019999896146348143, - "loss": 46.0, - "step": 18984 - }, - { - "epoch": 1.451535829653841, - "grad_norm": 0.0006741269025951624, - "learning_rate": 0.0001999989613540093, - "loss": 46.0, - "step": 18985 - }, - { - "epoch": 1.4516122866372307, - "grad_norm": 0.0005927701713517308, - "learning_rate": 0.00019999896124453138, - "loss": 46.0, - "step": 18986 - }, - { - "epoch": 1.4516887436206205, - "grad_norm": 0.0008082900894805789, - "learning_rate": 0.00019999896113504772, - "loss": 46.0, - "step": 18987 - }, - { - "epoch": 1.4517652006040103, - "grad_norm": 0.005529089365154505, - "learning_rate": 0.0001999989610255583, - "loss": 46.0, - "step": 18988 - }, - { - "epoch": 1.4518416575874, - "grad_norm": 0.0012249990832060575, - "learning_rate": 0.00019999896091606306, - "loss": 46.0, - "step": 18989 - }, - { - "epoch": 1.4519181145707896, - "grad_norm": 0.000879644590895623, - "learning_rate": 0.0001999989608065621, - "loss": 46.0, - "step": 18990 - }, - { - "epoch": 1.4519945715541793, - "grad_norm": 0.0020269686356186867, - "learning_rate": 0.00019999896069705536, - "loss": 46.0, - "step": 18991 - }, - { - "epoch": 1.452071028537569, - "grad_norm": 0.0027563904877752066, - "learning_rate": 0.00019999896058754284, - "loss": 46.0, - "step": 18992 - }, - { - "epoch": 1.4521474855209588, - "grad_norm": 0.0037514863070100546, - "learning_rate": 0.00019999896047802457, - "loss": 46.0, - "step": 18993 - }, - { - "epoch": 1.4522239425043484, - "grad_norm": 0.0015333560295403004, - "learning_rate": 0.0001999989603685005, - "loss": 46.0, - "step": 18994 - }, - { - "epoch": 1.4523003994877381, - "grad_norm": 0.0031711405608803034, - "learning_rate": 0.00019999896025897068, - "loss": 46.0, - "step": 18995 - }, - { - "epoch": 1.452376856471128, - "grad_norm": 0.0009732060716487467, - "learning_rate": 0.00019999896014943512, - "loss": 46.0, - "step": 18996 - }, - { - "epoch": 1.4524533134545177, - "grad_norm": 0.002221033675596118, - "learning_rate": 0.00019999896003989372, - "loss": 46.0, - "step": 18997 - }, - { - "epoch": 1.4525297704379074, - "grad_norm": 0.0010713287629187107, - "learning_rate": 0.00019999895993034662, - "loss": 46.0, - "step": 18998 - }, - { - "epoch": 1.4526062274212972, - "grad_norm": 0.003200495382770896, - "learning_rate": 0.0001999989598207937, - "loss": 46.0, - "step": 18999 - }, - { - "epoch": 1.4526826844046867, - "grad_norm": 0.000680546450894326, - "learning_rate": 0.00019999895971123502, - "loss": 46.0, - "step": 19000 - }, - { - "epoch": 1.4527591413880765, - "grad_norm": 0.0010694727534428239, - "learning_rate": 0.00019999895960167062, - "loss": 46.0, - "step": 19001 - }, - { - "epoch": 1.4528355983714663, - "grad_norm": 0.0013278444530442357, - "learning_rate": 0.0001999989594921004, - "loss": 46.0, - "step": 19002 - }, - { - "epoch": 1.452912055354856, - "grad_norm": 0.0005983138689771295, - "learning_rate": 0.0001999989593825244, - "loss": 46.0, - "step": 19003 - }, - { - "epoch": 1.4529885123382458, - "grad_norm": 0.0009986660443246365, - "learning_rate": 0.00019999895927294266, - "loss": 46.0, - "step": 19004 - }, - { - "epoch": 1.4530649693216353, - "grad_norm": 0.0006804827717132866, - "learning_rate": 0.00019999895916335514, - "loss": 46.0, - "step": 19005 - }, - { - "epoch": 1.453141426305025, - "grad_norm": 0.007900598458945751, - "learning_rate": 0.00019999895905376187, - "loss": 46.0, - "step": 19006 - }, - { - "epoch": 1.4532178832884148, - "grad_norm": 0.0008294080616906285, - "learning_rate": 0.0001999989589441628, - "loss": 46.0, - "step": 19007 - }, - { - "epoch": 1.4532943402718046, - "grad_norm": 0.017306895926594734, - "learning_rate": 0.00019999895883455798, - "loss": 46.0, - "step": 19008 - }, - { - "epoch": 1.4533707972551944, - "grad_norm": 0.004924267530441284, - "learning_rate": 0.0001999989587249474, - "loss": 46.0, - "step": 19009 - }, - { - "epoch": 1.4534472542385841, - "grad_norm": 0.0024102837778627872, - "learning_rate": 0.00019999895861533103, - "loss": 46.0, - "step": 19010 - }, - { - "epoch": 1.4535237112219737, - "grad_norm": 0.0006523790070787072, - "learning_rate": 0.0001999989585057089, - "loss": 46.0, - "step": 19011 - }, - { - "epoch": 1.4536001682053634, - "grad_norm": 0.01680338755249977, - "learning_rate": 0.00019999895839608098, - "loss": 46.0, - "step": 19012 - }, - { - "epoch": 1.4536766251887532, - "grad_norm": 0.0029436489567160606, - "learning_rate": 0.0001999989582864473, - "loss": 46.0, - "step": 19013 - }, - { - "epoch": 1.453753082172143, - "grad_norm": 0.0008789649582467973, - "learning_rate": 0.00019999895817680787, - "loss": 46.0, - "step": 19014 - }, - { - "epoch": 1.4538295391555325, - "grad_norm": 0.00046684793778695166, - "learning_rate": 0.00019999895806716266, - "loss": 46.0, - "step": 19015 - }, - { - "epoch": 1.4539059961389222, - "grad_norm": 0.0024449429474771023, - "learning_rate": 0.00019999895795751166, - "loss": 46.0, - "step": 19016 - }, - { - "epoch": 1.453982453122312, - "grad_norm": 0.0016013651620596647, - "learning_rate": 0.00019999895784785494, - "loss": 46.0, - "step": 19017 - }, - { - "epoch": 1.4540589101057018, - "grad_norm": 0.0036540080327540636, - "learning_rate": 0.0001999989577381924, - "loss": 46.0, - "step": 19018 - }, - { - "epoch": 1.4541353670890915, - "grad_norm": 0.003510485403239727, - "learning_rate": 0.00019999895762852412, - "loss": 46.0, - "step": 19019 - }, - { - "epoch": 1.4542118240724813, - "grad_norm": 0.013017495162785053, - "learning_rate": 0.00019999895751885008, - "loss": 46.0, - "step": 19020 - }, - { - "epoch": 1.454288281055871, - "grad_norm": 0.0017165071330964565, - "learning_rate": 0.00019999895740917023, - "loss": 46.0, - "step": 19021 - }, - { - "epoch": 1.4543647380392606, - "grad_norm": 0.0026019043289124966, - "learning_rate": 0.00019999895729948464, - "loss": 46.0, - "step": 19022 - }, - { - "epoch": 1.4544411950226503, - "grad_norm": 0.0004465322708711028, - "learning_rate": 0.00019999895718979328, - "loss": 46.0, - "step": 19023 - }, - { - "epoch": 1.45451765200604, - "grad_norm": 0.014807252213358879, - "learning_rate": 0.00019999895708009614, - "loss": 46.0, - "step": 19024 - }, - { - "epoch": 1.4545941089894299, - "grad_norm": 0.001151958596892655, - "learning_rate": 0.00019999895697039323, - "loss": 46.0, - "step": 19025 - }, - { - "epoch": 1.4546705659728194, - "grad_norm": 0.00103965203743428, - "learning_rate": 0.00019999895686068455, - "loss": 46.0, - "step": 19026 - }, - { - "epoch": 1.4547470229562092, - "grad_norm": 0.003957048524171114, - "learning_rate": 0.00019999895675097012, - "loss": 46.0, - "step": 19027 - }, - { - "epoch": 1.454823479939599, - "grad_norm": 0.0008143301820382476, - "learning_rate": 0.0001999989566412499, - "loss": 46.0, - "step": 19028 - }, - { - "epoch": 1.4548999369229887, - "grad_norm": 0.002168800914660096, - "learning_rate": 0.00019999895653152392, - "loss": 46.0, - "step": 19029 - }, - { - "epoch": 1.4549763939063785, - "grad_norm": 0.0007615119102410972, - "learning_rate": 0.00019999895642179217, - "loss": 46.0, - "step": 19030 - }, - { - "epoch": 1.4550528508897682, - "grad_norm": 0.0005023975973017514, - "learning_rate": 0.00019999895631205464, - "loss": 46.0, - "step": 19031 - }, - { - "epoch": 1.455129307873158, - "grad_norm": 0.004793549422174692, - "learning_rate": 0.00019999895620231135, - "loss": 46.0, - "step": 19032 - }, - { - "epoch": 1.4552057648565475, - "grad_norm": 0.0013992143794894218, - "learning_rate": 0.0001999989560925623, - "loss": 46.0, - "step": 19033 - }, - { - "epoch": 1.4552822218399373, - "grad_norm": 0.004013485740870237, - "learning_rate": 0.00019999895598280746, - "loss": 46.0, - "step": 19034 - }, - { - "epoch": 1.455358678823327, - "grad_norm": 0.0008225878700613976, - "learning_rate": 0.00019999895587304687, - "loss": 46.0, - "step": 19035 - }, - { - "epoch": 1.4554351358067168, - "grad_norm": 0.0013154208427295089, - "learning_rate": 0.00019999895576328048, - "loss": 46.0, - "step": 19036 - }, - { - "epoch": 1.4555115927901063, - "grad_norm": 0.0010862386552616954, - "learning_rate": 0.00019999895565350837, - "loss": 46.0, - "step": 19037 - }, - { - "epoch": 1.455588049773496, - "grad_norm": 0.0018405833980068564, - "learning_rate": 0.00019999895554373044, - "loss": 46.0, - "step": 19038 - }, - { - "epoch": 1.4556645067568859, - "grad_norm": 0.005517409183084965, - "learning_rate": 0.00019999895543394678, - "loss": 46.0, - "step": 19039 - }, - { - "epoch": 1.4557409637402756, - "grad_norm": 0.0025462207850068808, - "learning_rate": 0.00019999895532415732, - "loss": 46.0, - "step": 19040 - }, - { - "epoch": 1.4558174207236654, - "grad_norm": 0.0007382428157143295, - "learning_rate": 0.00019999895521436212, - "loss": 46.0, - "step": 19041 - }, - { - "epoch": 1.4558938777070551, - "grad_norm": 0.0011158391134813428, - "learning_rate": 0.00019999895510456112, - "loss": 46.0, - "step": 19042 - }, - { - "epoch": 1.455970334690445, - "grad_norm": 0.0013417394366115332, - "learning_rate": 0.0001999989549947544, - "loss": 46.0, - "step": 19043 - }, - { - "epoch": 1.4560467916738344, - "grad_norm": 0.0006569102988578379, - "learning_rate": 0.00019999895488494187, - "loss": 46.0, - "step": 19044 - }, - { - "epoch": 1.4561232486572242, - "grad_norm": 0.0022195298224687576, - "learning_rate": 0.00019999895477512355, - "loss": 46.0, - "step": 19045 - }, - { - "epoch": 1.456199705640614, - "grad_norm": 0.010601436719298363, - "learning_rate": 0.0001999989546652995, - "loss": 46.0, - "step": 19046 - }, - { - "epoch": 1.4562761626240037, - "grad_norm": 0.0007732558296993375, - "learning_rate": 0.00019999895455546967, - "loss": 46.0, - "step": 19047 - }, - { - "epoch": 1.4563526196073933, - "grad_norm": 0.000681392615661025, - "learning_rate": 0.00019999895444563408, - "loss": 46.0, - "step": 19048 - }, - { - "epoch": 1.456429076590783, - "grad_norm": 0.003260162193328142, - "learning_rate": 0.0001999989543357927, - "loss": 46.0, - "step": 19049 - }, - { - "epoch": 1.4565055335741728, - "grad_norm": 0.0014880695380270481, - "learning_rate": 0.00019999895422594555, - "loss": 46.0, - "step": 19050 - }, - { - "epoch": 1.4565819905575625, - "grad_norm": 0.0012432746589183807, - "learning_rate": 0.00019999895411609267, - "loss": 46.0, - "step": 19051 - }, - { - "epoch": 1.4566584475409523, - "grad_norm": 0.03235870227217674, - "learning_rate": 0.00019999895400623396, - "loss": 46.0, - "step": 19052 - }, - { - "epoch": 1.456734904524342, - "grad_norm": 0.001498910249210894, - "learning_rate": 0.00019999895389636953, - "loss": 46.0, - "step": 19053 - }, - { - "epoch": 1.4568113615077318, - "grad_norm": 0.0007728941855020821, - "learning_rate": 0.0001999989537864993, - "loss": 46.0, - "step": 19054 - }, - { - "epoch": 1.4568878184911214, - "grad_norm": 0.0005912454798817635, - "learning_rate": 0.0001999989536766233, - "loss": 46.0, - "step": 19055 - }, - { - "epoch": 1.4569642754745111, - "grad_norm": 0.0008412210736423731, - "learning_rate": 0.00019999895356674158, - "loss": 46.0, - "step": 19056 - }, - { - "epoch": 1.4570407324579009, - "grad_norm": 0.013740879483520985, - "learning_rate": 0.00019999895345685406, - "loss": 46.0, - "step": 19057 - }, - { - "epoch": 1.4571171894412907, - "grad_norm": 0.001597710303030908, - "learning_rate": 0.00019999895334696076, - "loss": 46.0, - "step": 19058 - }, - { - "epoch": 1.4571936464246802, - "grad_norm": 0.001536812400445342, - "learning_rate": 0.0001999989532370617, - "loss": 46.0, - "step": 19059 - }, - { - "epoch": 1.45727010340807, - "grad_norm": 0.0016400080639868975, - "learning_rate": 0.00019999895312715685, - "loss": 46.0, - "step": 19060 - }, - { - "epoch": 1.4573465603914597, - "grad_norm": 0.0024575062561780214, - "learning_rate": 0.00019999895301724623, - "loss": 46.0, - "step": 19061 - }, - { - "epoch": 1.4574230173748495, - "grad_norm": 0.013304968364536762, - "learning_rate": 0.00019999895290732987, - "loss": 46.0, - "step": 19062 - }, - { - "epoch": 1.4574994743582392, - "grad_norm": 0.0007338508148677647, - "learning_rate": 0.00019999895279740774, - "loss": 46.0, - "step": 19063 - }, - { - "epoch": 1.457575931341629, - "grad_norm": 0.0008978633559308946, - "learning_rate": 0.0001999989526874798, - "loss": 46.0, - "step": 19064 - }, - { - "epoch": 1.4576523883250188, - "grad_norm": 0.0007933261804282665, - "learning_rate": 0.00019999895257754612, - "loss": 46.0, - "step": 19065 - }, - { - "epoch": 1.4577288453084083, - "grad_norm": 0.0020673710387200117, - "learning_rate": 0.0001999989524676067, - "loss": 46.0, - "step": 19066 - }, - { - "epoch": 1.457805302291798, - "grad_norm": 0.0012937564169988036, - "learning_rate": 0.00019999895235766146, - "loss": 46.0, - "step": 19067 - }, - { - "epoch": 1.4578817592751878, - "grad_norm": 0.0028017491567879915, - "learning_rate": 0.00019999895224771049, - "loss": 46.0, - "step": 19068 - }, - { - "epoch": 1.4579582162585776, - "grad_norm": 0.006538200657814741, - "learning_rate": 0.00019999895213775374, - "loss": 46.0, - "step": 19069 - }, - { - "epoch": 1.4580346732419671, - "grad_norm": 0.0007572290487587452, - "learning_rate": 0.0001999989520277912, - "loss": 46.0, - "step": 19070 - }, - { - "epoch": 1.4581111302253569, - "grad_norm": 0.0012289370642974973, - "learning_rate": 0.0001999989519178229, - "loss": 46.0, - "step": 19071 - }, - { - "epoch": 1.4581875872087466, - "grad_norm": 0.0025619827210903168, - "learning_rate": 0.00019999895180784885, - "loss": 46.0, - "step": 19072 - }, - { - "epoch": 1.4582640441921364, - "grad_norm": 0.0011546517489477992, - "learning_rate": 0.000199998951697869, - "loss": 46.0, - "step": 19073 - }, - { - "epoch": 1.4583405011755262, - "grad_norm": 0.001893850276246667, - "learning_rate": 0.00019999895158788337, - "loss": 46.0, - "step": 19074 - }, - { - "epoch": 1.458416958158916, - "grad_norm": 0.00036115458351559937, - "learning_rate": 0.00019999895147789203, - "loss": 46.0, - "step": 19075 - }, - { - "epoch": 1.4584934151423057, - "grad_norm": 0.0012535308487713337, - "learning_rate": 0.00019999895136789487, - "loss": 46.0, - "step": 19076 - }, - { - "epoch": 1.4585698721256952, - "grad_norm": 0.009620990604162216, - "learning_rate": 0.00019999895125789196, - "loss": 46.0, - "step": 19077 - }, - { - "epoch": 1.458646329109085, - "grad_norm": 0.00458752503618598, - "learning_rate": 0.00019999895114788328, - "loss": 46.0, - "step": 19078 - }, - { - "epoch": 1.4587227860924747, - "grad_norm": 0.002012411365285516, - "learning_rate": 0.0001999989510378688, - "loss": 46.0, - "step": 19079 - }, - { - "epoch": 1.4587992430758645, - "grad_norm": 0.0032266732305288315, - "learning_rate": 0.0001999989509278486, - "loss": 46.0, - "step": 19080 - }, - { - "epoch": 1.458875700059254, - "grad_norm": 0.0014690555399283767, - "learning_rate": 0.0001999989508178226, - "loss": 46.0, - "step": 19081 - }, - { - "epoch": 1.4589521570426438, - "grad_norm": 0.0009337826631963253, - "learning_rate": 0.00019999895070779084, - "loss": 46.0, - "step": 19082 - }, - { - "epoch": 1.4590286140260336, - "grad_norm": 0.001110545010305941, - "learning_rate": 0.0001999989505977533, - "loss": 46.0, - "step": 19083 - }, - { - "epoch": 1.4591050710094233, - "grad_norm": 0.0039435699582099915, - "learning_rate": 0.00019999895048771003, - "loss": 46.0, - "step": 19084 - }, - { - "epoch": 1.459181527992813, - "grad_norm": 0.003430826822295785, - "learning_rate": 0.00019999895037766096, - "loss": 46.0, - "step": 19085 - }, - { - "epoch": 1.4592579849762028, - "grad_norm": 0.0004946888657286763, - "learning_rate": 0.00019999895026760612, - "loss": 46.0, - "step": 19086 - }, - { - "epoch": 1.4593344419595926, - "grad_norm": 0.0012747155269607902, - "learning_rate": 0.0001999989501575455, - "loss": 46.0, - "step": 19087 - }, - { - "epoch": 1.4594108989429821, - "grad_norm": 0.0010246805613860488, - "learning_rate": 0.00019999895004747911, - "loss": 46.0, - "step": 19088 - }, - { - "epoch": 1.459487355926372, - "grad_norm": 0.0024050110951066017, - "learning_rate": 0.000199998949937407, - "loss": 46.0, - "step": 19089 - }, - { - "epoch": 1.4595638129097617, - "grad_norm": 0.0014022196410223842, - "learning_rate": 0.00019999894982732907, - "loss": 46.0, - "step": 19090 - }, - { - "epoch": 1.4596402698931514, - "grad_norm": 0.004510878119617701, - "learning_rate": 0.00019999894971724536, - "loss": 46.0, - "step": 19091 - }, - { - "epoch": 1.459716726876541, - "grad_norm": 0.0013228988973423839, - "learning_rate": 0.00019999894960715593, - "loss": 46.0, - "step": 19092 - }, - { - "epoch": 1.4597931838599307, - "grad_norm": 0.003901219228282571, - "learning_rate": 0.0001999989494970607, - "loss": 46.0, - "step": 19093 - }, - { - "epoch": 1.4598696408433205, - "grad_norm": 0.0011969677871093154, - "learning_rate": 0.0001999989493869597, - "loss": 46.0, - "step": 19094 - }, - { - "epoch": 1.4599460978267103, - "grad_norm": 0.0005538155674003065, - "learning_rate": 0.00019999894927685293, - "loss": 46.0, - "step": 19095 - }, - { - "epoch": 1.4600225548101, - "grad_norm": 0.0014049196615815163, - "learning_rate": 0.00019999894916674044, - "loss": 46.0, - "step": 19096 - }, - { - "epoch": 1.4600990117934898, - "grad_norm": 0.00385579327121377, - "learning_rate": 0.00019999894905662214, - "loss": 46.0, - "step": 19097 - }, - { - "epoch": 1.4601754687768795, - "grad_norm": 0.0010067732073366642, - "learning_rate": 0.00019999894894649805, - "loss": 46.0, - "step": 19098 - }, - { - "epoch": 1.460251925760269, - "grad_norm": 0.0005056728259660304, - "learning_rate": 0.0001999989488363682, - "loss": 46.0, - "step": 19099 - }, - { - "epoch": 1.4603283827436588, - "grad_norm": 0.0012051729718223214, - "learning_rate": 0.0001999989487262326, - "loss": 46.0, - "step": 19100 - }, - { - "epoch": 1.4604048397270486, - "grad_norm": 0.0005369476857595146, - "learning_rate": 0.00019999894861609123, - "loss": 46.0, - "step": 19101 - }, - { - "epoch": 1.4604812967104384, - "grad_norm": 0.0015683204401284456, - "learning_rate": 0.00019999894850594407, - "loss": 46.0, - "step": 19102 - }, - { - "epoch": 1.460557753693828, - "grad_norm": 0.004127866122871637, - "learning_rate": 0.00019999894839579113, - "loss": 46.0, - "step": 19103 - }, - { - "epoch": 1.4606342106772177, - "grad_norm": 0.0008356585749424994, - "learning_rate": 0.00019999894828563245, - "loss": 46.0, - "step": 19104 - }, - { - "epoch": 1.4607106676606074, - "grad_norm": 0.0010605361312627792, - "learning_rate": 0.000199998948175468, - "loss": 46.0, - "step": 19105 - }, - { - "epoch": 1.4607871246439972, - "grad_norm": 0.0037868469953536987, - "learning_rate": 0.00019999894806529777, - "loss": 46.0, - "step": 19106 - }, - { - "epoch": 1.460863581627387, - "grad_norm": 0.0011186314513906837, - "learning_rate": 0.0001999989479551218, - "loss": 46.0, - "step": 19107 - }, - { - "epoch": 1.4609400386107767, - "grad_norm": 0.0052525680512189865, - "learning_rate": 0.00019999894784494005, - "loss": 46.0, - "step": 19108 - }, - { - "epoch": 1.4610164955941665, - "grad_norm": 0.003108471166342497, - "learning_rate": 0.0001999989477347525, - "loss": 46.0, - "step": 19109 - }, - { - "epoch": 1.461092952577556, - "grad_norm": 0.0016261577839031816, - "learning_rate": 0.00019999894762455918, - "loss": 46.0, - "step": 19110 - }, - { - "epoch": 1.4611694095609458, - "grad_norm": 0.0011216031853109598, - "learning_rate": 0.00019999894751436014, - "loss": 46.0, - "step": 19111 - }, - { - "epoch": 1.4612458665443355, - "grad_norm": 0.004440872464329004, - "learning_rate": 0.00019999894740415527, - "loss": 46.0, - "step": 19112 - }, - { - "epoch": 1.4613223235277253, - "grad_norm": 0.002481652656570077, - "learning_rate": 0.00019999894729394465, - "loss": 46.0, - "step": 19113 - }, - { - "epoch": 1.4613987805111148, - "grad_norm": 0.0017673727124929428, - "learning_rate": 0.0001999989471837283, - "loss": 46.0, - "step": 19114 - }, - { - "epoch": 1.4614752374945046, - "grad_norm": 0.0007568387663923204, - "learning_rate": 0.00019999894707350616, - "loss": 46.0, - "step": 19115 - }, - { - "epoch": 1.4615516944778943, - "grad_norm": 0.00285971793346107, - "learning_rate": 0.00019999894696327822, - "loss": 46.0, - "step": 19116 - }, - { - "epoch": 1.461628151461284, - "grad_norm": 0.002128881635144353, - "learning_rate": 0.00019999894685304452, - "loss": 46.0, - "step": 19117 - }, - { - "epoch": 1.4617046084446739, - "grad_norm": 0.0010401071049273014, - "learning_rate": 0.00019999894674280506, - "loss": 46.0, - "step": 19118 - }, - { - "epoch": 1.4617810654280636, - "grad_norm": 0.0010843260679394007, - "learning_rate": 0.00019999894663255983, - "loss": 46.0, - "step": 19119 - }, - { - "epoch": 1.4618575224114534, - "grad_norm": 0.00039553752867504954, - "learning_rate": 0.00019999894652230883, - "loss": 46.0, - "step": 19120 - }, - { - "epoch": 1.461933979394843, - "grad_norm": 0.0015113131375983357, - "learning_rate": 0.00019999894641205206, - "loss": 46.0, - "step": 19121 - }, - { - "epoch": 1.4620104363782327, - "grad_norm": 0.0009209364070557058, - "learning_rate": 0.00019999894630178954, - "loss": 46.0, - "step": 19122 - }, - { - "epoch": 1.4620868933616225, - "grad_norm": 0.0020361284259706736, - "learning_rate": 0.00019999894619152122, - "loss": 46.0, - "step": 19123 - }, - { - "epoch": 1.4621633503450122, - "grad_norm": 0.003316224319860339, - "learning_rate": 0.00019999894608124718, - "loss": 46.0, - "step": 19124 - }, - { - "epoch": 1.4622398073284018, - "grad_norm": 0.004525564145296812, - "learning_rate": 0.0001999989459709673, - "loss": 46.0, - "step": 19125 - }, - { - "epoch": 1.4623162643117915, - "grad_norm": 0.0013780089793726802, - "learning_rate": 0.0001999989458606817, - "loss": 46.0, - "step": 19126 - }, - { - "epoch": 1.4623927212951813, - "grad_norm": 0.0015095982234925032, - "learning_rate": 0.0001999989457503903, - "loss": 46.0, - "step": 19127 - }, - { - "epoch": 1.462469178278571, - "grad_norm": 0.0011300451587885618, - "learning_rate": 0.00019999894564009317, - "loss": 46.0, - "step": 19128 - }, - { - "epoch": 1.4625456352619608, - "grad_norm": 0.0009763225098140538, - "learning_rate": 0.00019999894552979024, - "loss": 46.0, - "step": 19129 - }, - { - "epoch": 1.4626220922453506, - "grad_norm": 0.0012847681064158678, - "learning_rate": 0.00019999894541948153, - "loss": 46.0, - "step": 19130 - }, - { - "epoch": 1.46269854922874, - "grad_norm": 0.0009895693510770798, - "learning_rate": 0.0001999989453091671, - "loss": 46.0, - "step": 19131 - }, - { - "epoch": 1.4627750062121299, - "grad_norm": 0.0012944055488333106, - "learning_rate": 0.00019999894519884688, - "loss": 46.0, - "step": 19132 - }, - { - "epoch": 1.4628514631955196, - "grad_norm": 0.0019849692471325397, - "learning_rate": 0.00019999894508852085, - "loss": 46.0, - "step": 19133 - }, - { - "epoch": 1.4629279201789094, - "grad_norm": 0.0009527428192086518, - "learning_rate": 0.0001999989449781891, - "loss": 46.0, - "step": 19134 - }, - { - "epoch": 1.4630043771622991, - "grad_norm": 0.0008851123275235295, - "learning_rate": 0.00019999894486785155, - "loss": 46.0, - "step": 19135 - }, - { - "epoch": 1.4630808341456887, - "grad_norm": 0.0009851803770288825, - "learning_rate": 0.00019999894475750823, - "loss": 46.0, - "step": 19136 - }, - { - "epoch": 1.4631572911290784, - "grad_norm": 0.0020150537602603436, - "learning_rate": 0.00019999894464715917, - "loss": 46.0, - "step": 19137 - }, - { - "epoch": 1.4632337481124682, - "grad_norm": 0.0011949003674089909, - "learning_rate": 0.00019999894453680433, - "loss": 46.0, - "step": 19138 - }, - { - "epoch": 1.463310205095858, - "grad_norm": 0.000945518200751394, - "learning_rate": 0.0001999989444264437, - "loss": 46.0, - "step": 19139 - }, - { - "epoch": 1.4633866620792477, - "grad_norm": 0.003644590498879552, - "learning_rate": 0.00019999894431607732, - "loss": 46.0, - "step": 19140 - }, - { - "epoch": 1.4634631190626375, - "grad_norm": 0.0043143476359546185, - "learning_rate": 0.00019999894420570516, - "loss": 46.0, - "step": 19141 - }, - { - "epoch": 1.463539576046027, - "grad_norm": 0.0015756627544760704, - "learning_rate": 0.00019999894409532726, - "loss": 46.0, - "step": 19142 - }, - { - "epoch": 1.4636160330294168, - "grad_norm": 0.0007920751231722534, - "learning_rate": 0.00019999894398494355, - "loss": 46.0, - "step": 19143 - }, - { - "epoch": 1.4636924900128065, - "grad_norm": 0.0004181976546533406, - "learning_rate": 0.0001999989438745541, - "loss": 46.0, - "step": 19144 - }, - { - "epoch": 1.4637689469961963, - "grad_norm": 0.0029525617137551308, - "learning_rate": 0.00019999894376415887, - "loss": 46.0, - "step": 19145 - }, - { - "epoch": 1.463845403979586, - "grad_norm": 0.003163623157888651, - "learning_rate": 0.00019999894365375784, - "loss": 46.0, - "step": 19146 - }, - { - "epoch": 1.4639218609629756, - "grad_norm": 0.0004567409341689199, - "learning_rate": 0.0001999989435433511, - "loss": 46.0, - "step": 19147 - }, - { - "epoch": 1.4639983179463654, - "grad_norm": 0.0003693280159495771, - "learning_rate": 0.00019999894343293855, - "loss": 46.0, - "step": 19148 - }, - { - "epoch": 1.4640747749297551, - "grad_norm": 0.00269862893037498, - "learning_rate": 0.00019999894332252023, - "loss": 46.0, - "step": 19149 - }, - { - "epoch": 1.464151231913145, - "grad_norm": 0.0007455289014615119, - "learning_rate": 0.00019999894321209616, - "loss": 46.0, - "step": 19150 - }, - { - "epoch": 1.4642276888965347, - "grad_norm": 0.003705678042024374, - "learning_rate": 0.00019999894310166632, - "loss": 46.0, - "step": 19151 - }, - { - "epoch": 1.4643041458799244, - "grad_norm": 0.0006435929099097848, - "learning_rate": 0.0001999989429912307, - "loss": 46.0, - "step": 19152 - }, - { - "epoch": 1.464380602863314, - "grad_norm": 0.0011181682348251343, - "learning_rate": 0.00019999894288078932, - "loss": 46.0, - "step": 19153 - }, - { - "epoch": 1.4644570598467037, - "grad_norm": 0.0007335380068980157, - "learning_rate": 0.00019999894277034216, - "loss": 46.0, - "step": 19154 - }, - { - "epoch": 1.4645335168300935, - "grad_norm": 0.000914985139388591, - "learning_rate": 0.0001999989426598892, - "loss": 46.0, - "step": 19155 - }, - { - "epoch": 1.4646099738134832, - "grad_norm": 0.0005888041923753917, - "learning_rate": 0.00019999894254943052, - "loss": 46.0, - "step": 19156 - }, - { - "epoch": 1.4646864307968728, - "grad_norm": 0.0014561886200681329, - "learning_rate": 0.00019999894243896607, - "loss": 46.0, - "step": 19157 - }, - { - "epoch": 1.4647628877802625, - "grad_norm": 0.017310000956058502, - "learning_rate": 0.00019999894232849584, - "loss": 46.0, - "step": 19158 - }, - { - "epoch": 1.4648393447636523, - "grad_norm": 0.00172267179004848, - "learning_rate": 0.0001999989422180198, - "loss": 46.0, - "step": 19159 - }, - { - "epoch": 1.464915801747042, - "grad_norm": 0.001299018389545381, - "learning_rate": 0.00019999894210753804, - "loss": 46.0, - "step": 19160 - }, - { - "epoch": 1.4649922587304318, - "grad_norm": 0.003932367078959942, - "learning_rate": 0.0001999989419970505, - "loss": 46.0, - "step": 19161 - }, - { - "epoch": 1.4650687157138216, - "grad_norm": 0.0007361496100202203, - "learning_rate": 0.0001999989418865572, - "loss": 46.0, - "step": 19162 - }, - { - "epoch": 1.4651451726972113, - "grad_norm": 0.003676505759358406, - "learning_rate": 0.0001999989417760581, - "loss": 46.0, - "step": 19163 - }, - { - "epoch": 1.4652216296806009, - "grad_norm": 0.0025701585691422224, - "learning_rate": 0.00019999894166555326, - "loss": 46.0, - "step": 19164 - }, - { - "epoch": 1.4652980866639906, - "grad_norm": 0.0011493987403810024, - "learning_rate": 0.00019999894155504265, - "loss": 46.0, - "step": 19165 - }, - { - "epoch": 1.4653745436473804, - "grad_norm": 0.001910841092467308, - "learning_rate": 0.00019999894144452624, - "loss": 46.0, - "step": 19166 - }, - { - "epoch": 1.4654510006307702, - "grad_norm": 0.0006643445231020451, - "learning_rate": 0.0001999989413340041, - "loss": 46.0, - "step": 19167 - }, - { - "epoch": 1.4655274576141597, - "grad_norm": 0.0007738910499028862, - "learning_rate": 0.00019999894122347617, - "loss": 46.0, - "step": 19168 - }, - { - "epoch": 1.4656039145975495, - "grad_norm": 0.0019675877410918474, - "learning_rate": 0.00019999894111294246, - "loss": 46.0, - "step": 19169 - }, - { - "epoch": 1.4656803715809392, - "grad_norm": 0.0012034904211759567, - "learning_rate": 0.000199998941002403, - "loss": 46.0, - "step": 19170 - }, - { - "epoch": 1.465756828564329, - "grad_norm": 0.00350357498973608, - "learning_rate": 0.00019999894089185776, - "loss": 46.0, - "step": 19171 - }, - { - "epoch": 1.4658332855477187, - "grad_norm": 0.0006386736058630049, - "learning_rate": 0.00019999894078130676, - "loss": 46.0, - "step": 19172 - }, - { - "epoch": 1.4659097425311085, - "grad_norm": 0.0006145481602288783, - "learning_rate": 0.00019999894067075, - "loss": 46.0, - "step": 19173 - }, - { - "epoch": 1.4659861995144983, - "grad_norm": 0.006934434175491333, - "learning_rate": 0.00019999894056018744, - "loss": 46.0, - "step": 19174 - }, - { - "epoch": 1.4660626564978878, - "grad_norm": 0.001964472234249115, - "learning_rate": 0.00019999894044961915, - "loss": 46.0, - "step": 19175 - }, - { - "epoch": 1.4661391134812776, - "grad_norm": 0.0022962279617786407, - "learning_rate": 0.00019999894033904505, - "loss": 46.0, - "step": 19176 - }, - { - "epoch": 1.4662155704646673, - "grad_norm": 0.0008476219372823834, - "learning_rate": 0.00019999894022846519, - "loss": 46.0, - "step": 19177 - }, - { - "epoch": 1.466292027448057, - "grad_norm": 0.005569208413362503, - "learning_rate": 0.00019999894011787957, - "loss": 46.0, - "step": 19178 - }, - { - "epoch": 1.4663684844314466, - "grad_norm": 0.004880499094724655, - "learning_rate": 0.00019999894000728819, - "loss": 46.0, - "step": 19179 - }, - { - "epoch": 1.4664449414148364, - "grad_norm": 0.00301688932813704, - "learning_rate": 0.000199998939896691, - "loss": 46.0, - "step": 19180 - }, - { - "epoch": 1.4665213983982262, - "grad_norm": 0.0043104891665279865, - "learning_rate": 0.0001999989397860881, - "loss": 46.0, - "step": 19181 - }, - { - "epoch": 1.466597855381616, - "grad_norm": 0.0006606430979445577, - "learning_rate": 0.0001999989396754794, - "loss": 46.0, - "step": 19182 - }, - { - "epoch": 1.4666743123650057, - "grad_norm": 0.0013644114369526505, - "learning_rate": 0.00019999893956486493, - "loss": 46.0, - "step": 19183 - }, - { - "epoch": 1.4667507693483954, - "grad_norm": 0.0006841550348326564, - "learning_rate": 0.00019999893945424468, - "loss": 46.0, - "step": 19184 - }, - { - "epoch": 1.4668272263317852, - "grad_norm": 0.0017201006412506104, - "learning_rate": 0.00019999893934361865, - "loss": 46.0, - "step": 19185 - }, - { - "epoch": 1.4669036833151747, - "grad_norm": 0.0004923130036331713, - "learning_rate": 0.00019999893923298688, - "loss": 46.0, - "step": 19186 - }, - { - "epoch": 1.4669801402985645, - "grad_norm": 0.0010550328297540545, - "learning_rate": 0.00019999893912234934, - "loss": 46.0, - "step": 19187 - }, - { - "epoch": 1.4670565972819543, - "grad_norm": 0.0011936826631426811, - "learning_rate": 0.00019999893901170604, - "loss": 46.0, - "step": 19188 - }, - { - "epoch": 1.467133054265344, - "grad_norm": 0.0018220498459413648, - "learning_rate": 0.00019999893890105695, - "loss": 46.0, - "step": 19189 - }, - { - "epoch": 1.4672095112487336, - "grad_norm": 0.0015751337632536888, - "learning_rate": 0.00019999893879040208, - "loss": 46.0, - "step": 19190 - }, - { - "epoch": 1.4672859682321233, - "grad_norm": 0.00822540931403637, - "learning_rate": 0.00019999893867974147, - "loss": 46.0, - "step": 19191 - }, - { - "epoch": 1.467362425215513, - "grad_norm": 0.0007038480835035443, - "learning_rate": 0.00019999893856907509, - "loss": 46.0, - "step": 19192 - }, - { - "epoch": 1.4674388821989028, - "grad_norm": 0.0032394074369221926, - "learning_rate": 0.00019999893845840293, - "loss": 46.0, - "step": 19193 - }, - { - "epoch": 1.4675153391822926, - "grad_norm": 0.00037621636874973774, - "learning_rate": 0.000199998938347725, - "loss": 46.0, - "step": 19194 - }, - { - "epoch": 1.4675917961656824, - "grad_norm": 0.0005880384123884141, - "learning_rate": 0.0001999989382370413, - "loss": 46.0, - "step": 19195 - }, - { - "epoch": 1.4676682531490721, - "grad_norm": 0.000750627601519227, - "learning_rate": 0.0001999989381263518, - "loss": 46.0, - "step": 19196 - }, - { - "epoch": 1.4677447101324617, - "grad_norm": 0.0022673553321510553, - "learning_rate": 0.00019999893801565655, - "loss": 46.0, - "step": 19197 - }, - { - "epoch": 1.4678211671158514, - "grad_norm": 0.002134941751137376, - "learning_rate": 0.00019999893790495556, - "loss": 46.0, - "step": 19198 - }, - { - "epoch": 1.4678976240992412, - "grad_norm": 0.0019374073017388582, - "learning_rate": 0.00019999893779424878, - "loss": 46.0, - "step": 19199 - }, - { - "epoch": 1.467974081082631, - "grad_norm": 0.002374977571889758, - "learning_rate": 0.00019999893768353624, - "loss": 46.0, - "step": 19200 - }, - { - "epoch": 1.4680505380660205, - "grad_norm": 0.002467510523274541, - "learning_rate": 0.00019999893757281792, - "loss": 46.0, - "step": 19201 - }, - { - "epoch": 1.4681269950494102, - "grad_norm": 0.0027188737876713276, - "learning_rate": 0.00019999893746209383, - "loss": 46.0, - "step": 19202 - }, - { - "epoch": 1.4682034520328, - "grad_norm": 0.0017604512395337224, - "learning_rate": 0.00019999893735136396, - "loss": 46.0, - "step": 19203 - }, - { - "epoch": 1.4682799090161898, - "grad_norm": 0.001243614824488759, - "learning_rate": 0.00019999893724062835, - "loss": 46.0, - "step": 19204 - }, - { - "epoch": 1.4683563659995795, - "grad_norm": 0.0015066171763464808, - "learning_rate": 0.00019999893712988693, - "loss": 46.0, - "step": 19205 - }, - { - "epoch": 1.4684328229829693, - "grad_norm": 0.0005808811401948333, - "learning_rate": 0.00019999893701913977, - "loss": 46.0, - "step": 19206 - }, - { - "epoch": 1.468509279966359, - "grad_norm": 0.006335408892482519, - "learning_rate": 0.00019999893690838684, - "loss": 46.0, - "step": 19207 - }, - { - "epoch": 1.4685857369497486, - "grad_norm": 0.0008780763600952923, - "learning_rate": 0.00019999893679762814, - "loss": 46.0, - "step": 19208 - }, - { - "epoch": 1.4686621939331383, - "grad_norm": 0.0014805751852691174, - "learning_rate": 0.00019999893668686366, - "loss": 46.0, - "step": 19209 - }, - { - "epoch": 1.468738650916528, - "grad_norm": 0.0006930132512934506, - "learning_rate": 0.00019999893657609343, - "loss": 46.0, - "step": 19210 - }, - { - "epoch": 1.4688151078999179, - "grad_norm": 0.0015620873309671879, - "learning_rate": 0.00019999893646531743, - "loss": 46.0, - "step": 19211 - }, - { - "epoch": 1.4688915648833074, - "grad_norm": 0.0011235555866733193, - "learning_rate": 0.00019999893635453563, - "loss": 46.0, - "step": 19212 - }, - { - "epoch": 1.4689680218666972, - "grad_norm": 0.0007692488725297153, - "learning_rate": 0.0001999989362437481, - "loss": 46.0, - "step": 19213 - }, - { - "epoch": 1.469044478850087, - "grad_norm": 0.006577357184141874, - "learning_rate": 0.00019999893613295474, - "loss": 46.0, - "step": 19214 - }, - { - "epoch": 1.4691209358334767, - "grad_norm": 0.002416061470285058, - "learning_rate": 0.00019999893602215568, - "loss": 46.0, - "step": 19215 - }, - { - "epoch": 1.4691973928168665, - "grad_norm": 0.0009787181625142694, - "learning_rate": 0.0001999989359113508, - "loss": 46.0, - "step": 19216 - }, - { - "epoch": 1.4692738498002562, - "grad_norm": 0.0017808849224820733, - "learning_rate": 0.0001999989358005402, - "loss": 46.0, - "step": 19217 - }, - { - "epoch": 1.469350306783646, - "grad_norm": 0.00116780458483845, - "learning_rate": 0.0001999989356897238, - "loss": 46.0, - "step": 19218 - }, - { - "epoch": 1.4694267637670355, - "grad_norm": 0.0007117206114344299, - "learning_rate": 0.00019999893557890163, - "loss": 46.0, - "step": 19219 - }, - { - "epoch": 1.4695032207504253, - "grad_norm": 0.0008770351414568722, - "learning_rate": 0.0001999989354680737, - "loss": 46.0, - "step": 19220 - }, - { - "epoch": 1.469579677733815, - "grad_norm": 0.001342743868008256, - "learning_rate": 0.00019999893535723996, - "loss": 46.0, - "step": 19221 - }, - { - "epoch": 1.4696561347172048, - "grad_norm": 0.0008404088439419866, - "learning_rate": 0.0001999989352464005, - "loss": 46.0, - "step": 19222 - }, - { - "epoch": 1.4697325917005943, - "grad_norm": 0.000537903280928731, - "learning_rate": 0.00019999893513555526, - "loss": 46.0, - "step": 19223 - }, - { - "epoch": 1.469809048683984, - "grad_norm": 0.0006145606748759747, - "learning_rate": 0.00019999893502470423, - "loss": 46.0, - "step": 19224 - }, - { - "epoch": 1.4698855056673739, - "grad_norm": 0.0010471732821315527, - "learning_rate": 0.00019999893491384743, - "loss": 46.0, - "step": 19225 - }, - { - "epoch": 1.4699619626507636, - "grad_norm": 0.0010731476359069347, - "learning_rate": 0.00019999893480298492, - "loss": 46.0, - "step": 19226 - }, - { - "epoch": 1.4700384196341534, - "grad_norm": 0.003976867068558931, - "learning_rate": 0.0001999989346921166, - "loss": 46.0, - "step": 19227 - }, - { - "epoch": 1.4701148766175431, - "grad_norm": 0.0024218300823122263, - "learning_rate": 0.00019999893458124248, - "loss": 46.0, - "step": 19228 - }, - { - "epoch": 1.470191333600933, - "grad_norm": 0.0007920422940514982, - "learning_rate": 0.00019999893447036264, - "loss": 46.0, - "step": 19229 - }, - { - "epoch": 1.4702677905843224, - "grad_norm": 0.002143591409549117, - "learning_rate": 0.000199998934359477, - "loss": 46.0, - "step": 19230 - }, - { - "epoch": 1.4703442475677122, - "grad_norm": 0.000491861894261092, - "learning_rate": 0.0001999989342485856, - "loss": 46.0, - "step": 19231 - }, - { - "epoch": 1.470420704551102, - "grad_norm": 0.0012849452905356884, - "learning_rate": 0.00019999893413768843, - "loss": 46.0, - "step": 19232 - }, - { - "epoch": 1.4704971615344917, - "grad_norm": 0.0015341893304139376, - "learning_rate": 0.0001999989340267855, - "loss": 46.0, - "step": 19233 - }, - { - "epoch": 1.4705736185178813, - "grad_norm": 0.00040873538819141686, - "learning_rate": 0.0001999989339158768, - "loss": 46.0, - "step": 19234 - }, - { - "epoch": 1.470650075501271, - "grad_norm": 0.000625201384536922, - "learning_rate": 0.00019999893380496232, - "loss": 46.0, - "step": 19235 - }, - { - "epoch": 1.4707265324846608, - "grad_norm": 0.0022729034535586834, - "learning_rate": 0.0001999989336940421, - "loss": 46.0, - "step": 19236 - }, - { - "epoch": 1.4708029894680505, - "grad_norm": 0.0008619332802481949, - "learning_rate": 0.00019999893358311607, - "loss": 46.0, - "step": 19237 - }, - { - "epoch": 1.4708794464514403, - "grad_norm": 0.0010886061936616898, - "learning_rate": 0.00019999893347218427, - "loss": 46.0, - "step": 19238 - }, - { - "epoch": 1.47095590343483, - "grad_norm": 0.0009552035480737686, - "learning_rate": 0.00019999893336124672, - "loss": 46.0, - "step": 19239 - }, - { - "epoch": 1.4710323604182198, - "grad_norm": 0.0009725200361572206, - "learning_rate": 0.00019999893325030338, - "loss": 46.0, - "step": 19240 - }, - { - "epoch": 1.4711088174016094, - "grad_norm": 0.001992037519812584, - "learning_rate": 0.00019999893313935428, - "loss": 46.0, - "step": 19241 - }, - { - "epoch": 1.4711852743849991, - "grad_norm": 0.00086099689360708, - "learning_rate": 0.00019999893302839942, - "loss": 46.0, - "step": 19242 - }, - { - "epoch": 1.471261731368389, - "grad_norm": 0.00737357884645462, - "learning_rate": 0.0001999989329174388, - "loss": 46.0, - "step": 19243 - }, - { - "epoch": 1.4713381883517787, - "grad_norm": 0.0017700770404189825, - "learning_rate": 0.0001999989328064724, - "loss": 46.0, - "step": 19244 - }, - { - "epoch": 1.4714146453351682, - "grad_norm": 0.0010289129568263888, - "learning_rate": 0.0001999989326955002, - "loss": 46.0, - "step": 19245 - }, - { - "epoch": 1.471491102318558, - "grad_norm": 0.0013079507043585181, - "learning_rate": 0.00019999893258452228, - "loss": 46.0, - "step": 19246 - }, - { - "epoch": 1.4715675593019477, - "grad_norm": 0.0015224862145259976, - "learning_rate": 0.00019999893247353855, - "loss": 46.0, - "step": 19247 - }, - { - "epoch": 1.4716440162853375, - "grad_norm": 0.0011894813505932689, - "learning_rate": 0.00019999893236254907, - "loss": 46.0, - "step": 19248 - }, - { - "epoch": 1.4717204732687272, - "grad_norm": 0.0008942015119828284, - "learning_rate": 0.00019999893225155384, - "loss": 46.0, - "step": 19249 - }, - { - "epoch": 1.471796930252117, - "grad_norm": 0.0007221175474114716, - "learning_rate": 0.00019999893214055282, - "loss": 46.0, - "step": 19250 - }, - { - "epoch": 1.4718733872355068, - "grad_norm": 0.0005632580723613501, - "learning_rate": 0.00019999893202954602, - "loss": 46.0, - "step": 19251 - }, - { - "epoch": 1.4719498442188963, - "grad_norm": 0.002287228126078844, - "learning_rate": 0.0001999989319185335, - "loss": 46.0, - "step": 19252 - }, - { - "epoch": 1.472026301202286, - "grad_norm": 0.004281922243535519, - "learning_rate": 0.00019999893180751513, - "loss": 46.0, - "step": 19253 - }, - { - "epoch": 1.4721027581856758, - "grad_norm": 0.0004845394869334996, - "learning_rate": 0.00019999893169649107, - "loss": 46.0, - "step": 19254 - }, - { - "epoch": 1.4721792151690656, - "grad_norm": 0.0028777297120541334, - "learning_rate": 0.0001999989315854612, - "loss": 46.0, - "step": 19255 - }, - { - "epoch": 1.4722556721524551, - "grad_norm": 0.000936887867283076, - "learning_rate": 0.00019999893147442554, - "loss": 46.0, - "step": 19256 - }, - { - "epoch": 1.4723321291358449, - "grad_norm": 0.0031726856250315905, - "learning_rate": 0.00019999893136338415, - "loss": 46.0, - "step": 19257 - }, - { - "epoch": 1.4724085861192346, - "grad_norm": 0.0007614083588123322, - "learning_rate": 0.00019999893125233697, - "loss": 46.0, - "step": 19258 - }, - { - "epoch": 1.4724850431026244, - "grad_norm": 0.004533240105956793, - "learning_rate": 0.00019999893114128404, - "loss": 46.0, - "step": 19259 - }, - { - "epoch": 1.4725615000860142, - "grad_norm": 0.0026064226403832436, - "learning_rate": 0.0001999989310302253, - "loss": 46.0, - "step": 19260 - }, - { - "epoch": 1.472637957069404, - "grad_norm": 0.0017717110458761454, - "learning_rate": 0.00019999893091916085, - "loss": 46.0, - "step": 19261 - }, - { - "epoch": 1.4727144140527937, - "grad_norm": 0.005900966934859753, - "learning_rate": 0.0001999989308080906, - "loss": 46.0, - "step": 19262 - }, - { - "epoch": 1.4727908710361832, - "grad_norm": 0.0006830496131442487, - "learning_rate": 0.00019999893069701455, - "loss": 46.0, - "step": 19263 - }, - { - "epoch": 1.472867328019573, - "grad_norm": 0.0004614562203641981, - "learning_rate": 0.00019999893058593278, - "loss": 46.0, - "step": 19264 - }, - { - "epoch": 1.4729437850029627, - "grad_norm": 0.0009629227570258081, - "learning_rate": 0.0001999989304748452, - "loss": 46.0, - "step": 19265 - }, - { - "epoch": 1.4730202419863525, - "grad_norm": 0.0014378144405782223, - "learning_rate": 0.0001999989303637519, - "loss": 46.0, - "step": 19266 - }, - { - "epoch": 1.473096698969742, - "grad_norm": 0.0012453735107555985, - "learning_rate": 0.0001999989302526528, - "loss": 46.0, - "step": 19267 - }, - { - "epoch": 1.4731731559531318, - "grad_norm": 0.0014825640246272087, - "learning_rate": 0.0001999989301415479, - "loss": 46.0, - "step": 19268 - }, - { - "epoch": 1.4732496129365216, - "grad_norm": 0.0011866736458614469, - "learning_rate": 0.00019999893003043727, - "loss": 46.0, - "step": 19269 - }, - { - "epoch": 1.4733260699199113, - "grad_norm": 0.0011308465618640184, - "learning_rate": 0.0001999989299193209, - "loss": 46.0, - "step": 19270 - }, - { - "epoch": 1.473402526903301, - "grad_norm": 0.0007920544594526291, - "learning_rate": 0.0001999989298081987, - "loss": 46.0, - "step": 19271 - }, - { - "epoch": 1.4734789838866909, - "grad_norm": 0.004185117781162262, - "learning_rate": 0.00019999892969707077, - "loss": 46.0, - "step": 19272 - }, - { - "epoch": 1.4735554408700804, - "grad_norm": 0.0012875179527327418, - "learning_rate": 0.00019999892958593704, - "loss": 46.0, - "step": 19273 - }, - { - "epoch": 1.4736318978534702, - "grad_norm": 0.001267644576728344, - "learning_rate": 0.00019999892947479756, - "loss": 46.0, - "step": 19274 - }, - { - "epoch": 1.47370835483686, - "grad_norm": 0.0005683074123226106, - "learning_rate": 0.0001999989293636523, - "loss": 46.0, - "step": 19275 - }, - { - "epoch": 1.4737848118202497, - "grad_norm": 0.002309659728780389, - "learning_rate": 0.0001999989292525013, - "loss": 46.0, - "step": 19276 - }, - { - "epoch": 1.4738612688036394, - "grad_norm": 0.0006356228259392083, - "learning_rate": 0.0001999989291413445, - "loss": 46.0, - "step": 19277 - }, - { - "epoch": 1.473937725787029, - "grad_norm": 0.0013314933748915792, - "learning_rate": 0.00019999892903018195, - "loss": 46.0, - "step": 19278 - }, - { - "epoch": 1.4740141827704187, - "grad_norm": 0.0023864246904850006, - "learning_rate": 0.00019999892891901363, - "loss": 46.0, - "step": 19279 - }, - { - "epoch": 1.4740906397538085, - "grad_norm": 0.0019366954220458865, - "learning_rate": 0.0001999989288078395, - "loss": 46.0, - "step": 19280 - }, - { - "epoch": 1.4741670967371983, - "grad_norm": 0.0012591093545779586, - "learning_rate": 0.00019999892869665965, - "loss": 46.0, - "step": 19281 - }, - { - "epoch": 1.474243553720588, - "grad_norm": 0.0012208328116685152, - "learning_rate": 0.000199998928585474, - "loss": 46.0, - "step": 19282 - }, - { - "epoch": 1.4743200107039778, - "grad_norm": 0.0017227797070518136, - "learning_rate": 0.0001999989284742826, - "loss": 46.0, - "step": 19283 - }, - { - "epoch": 1.4743964676873673, - "grad_norm": 0.002297386759892106, - "learning_rate": 0.0001999989283630854, - "loss": 46.0, - "step": 19284 - }, - { - "epoch": 1.474472924670757, - "grad_norm": 0.0006985300569795072, - "learning_rate": 0.00019999892825188246, - "loss": 46.0, - "step": 19285 - }, - { - "epoch": 1.4745493816541468, - "grad_norm": 0.006447529420256615, - "learning_rate": 0.00019999892814067372, - "loss": 46.0, - "step": 19286 - }, - { - "epoch": 1.4746258386375366, - "grad_norm": 0.0004198014212306589, - "learning_rate": 0.00019999892802945927, - "loss": 46.0, - "step": 19287 - }, - { - "epoch": 1.4747022956209261, - "grad_norm": 0.0024661975912749767, - "learning_rate": 0.000199998927918239, - "loss": 46.0, - "step": 19288 - }, - { - "epoch": 1.474778752604316, - "grad_norm": 0.0011110918130725622, - "learning_rate": 0.000199998927807013, - "loss": 46.0, - "step": 19289 - }, - { - "epoch": 1.4748552095877057, - "grad_norm": 0.002359352307394147, - "learning_rate": 0.0001999989276957812, - "loss": 46.0, - "step": 19290 - }, - { - "epoch": 1.4749316665710954, - "grad_norm": 0.00275266170501709, - "learning_rate": 0.0001999989275845436, - "loss": 46.0, - "step": 19291 - }, - { - "epoch": 1.4750081235544852, - "grad_norm": 0.007068646140396595, - "learning_rate": 0.0001999989274733003, - "loss": 46.0, - "step": 19292 - }, - { - "epoch": 1.475084580537875, - "grad_norm": 0.0005153819220140576, - "learning_rate": 0.0001999989273620512, - "loss": 46.0, - "step": 19293 - }, - { - "epoch": 1.4751610375212647, - "grad_norm": 0.0012687172275036573, - "learning_rate": 0.00019999892725079633, - "loss": 46.0, - "step": 19294 - }, - { - "epoch": 1.4752374945046542, - "grad_norm": 0.00040213772444985807, - "learning_rate": 0.00019999892713953567, - "loss": 46.0, - "step": 19295 - }, - { - "epoch": 1.475313951488044, - "grad_norm": 0.00597192021086812, - "learning_rate": 0.00019999892702826926, - "loss": 46.0, - "step": 19296 - }, - { - "epoch": 1.4753904084714338, - "grad_norm": 0.0008220398449338973, - "learning_rate": 0.00019999892691699707, - "loss": 46.0, - "step": 19297 - }, - { - "epoch": 1.4754668654548235, - "grad_norm": 0.002113528084009886, - "learning_rate": 0.00019999892680571914, - "loss": 46.0, - "step": 19298 - }, - { - "epoch": 1.475543322438213, - "grad_norm": 0.002578044543042779, - "learning_rate": 0.0001999989266944354, - "loss": 46.0, - "step": 19299 - }, - { - "epoch": 1.4756197794216028, - "grad_norm": 0.0014027304714545608, - "learning_rate": 0.00019999892658314594, - "loss": 46.0, - "step": 19300 - }, - { - "epoch": 1.4756962364049926, - "grad_norm": 0.0008738245232962072, - "learning_rate": 0.00019999892647185069, - "loss": 46.0, - "step": 19301 - }, - { - "epoch": 1.4757726933883824, - "grad_norm": 0.001580814947374165, - "learning_rate": 0.00019999892636054963, - "loss": 46.0, - "step": 19302 - }, - { - "epoch": 1.4758491503717721, - "grad_norm": 0.0008109455229714513, - "learning_rate": 0.00019999892624924286, - "loss": 46.0, - "step": 19303 - }, - { - "epoch": 1.4759256073551619, - "grad_norm": 0.008592908270657063, - "learning_rate": 0.0001999989261379303, - "loss": 46.0, - "step": 19304 - }, - { - "epoch": 1.4760020643385516, - "grad_norm": 0.001697972766123712, - "learning_rate": 0.00019999892602661198, - "loss": 46.0, - "step": 19305 - }, - { - "epoch": 1.4760785213219412, - "grad_norm": 0.0009819112019613385, - "learning_rate": 0.00019999892591528786, - "loss": 46.0, - "step": 19306 - }, - { - "epoch": 1.476154978305331, - "grad_norm": 0.006307743024080992, - "learning_rate": 0.00019999892580395797, - "loss": 46.0, - "step": 19307 - }, - { - "epoch": 1.4762314352887207, - "grad_norm": 0.0012942017056047916, - "learning_rate": 0.00019999892569262233, - "loss": 46.0, - "step": 19308 - }, - { - "epoch": 1.4763078922721105, - "grad_norm": 0.0005027378792874515, - "learning_rate": 0.0001999989255812809, - "loss": 46.0, - "step": 19309 - }, - { - "epoch": 1.4763843492555, - "grad_norm": 0.0006243587704375386, - "learning_rate": 0.00019999892546993374, - "loss": 46.0, - "step": 19310 - }, - { - "epoch": 1.4764608062388898, - "grad_norm": 0.0005012330366298556, - "learning_rate": 0.00019999892535858078, - "loss": 46.0, - "step": 19311 - }, - { - "epoch": 1.4765372632222795, - "grad_norm": 0.004242443013936281, - "learning_rate": 0.00019999892524722208, - "loss": 46.0, - "step": 19312 - }, - { - "epoch": 1.4766137202056693, - "grad_norm": 0.002433671150356531, - "learning_rate": 0.00019999892513585757, - "loss": 46.0, - "step": 19313 - }, - { - "epoch": 1.476690177189059, - "grad_norm": 0.0004711611836683005, - "learning_rate": 0.00019999892502448732, - "loss": 46.0, - "step": 19314 - }, - { - "epoch": 1.4767666341724488, - "grad_norm": 0.000834117759950459, - "learning_rate": 0.0001999989249131113, - "loss": 46.0, - "step": 19315 - }, - { - "epoch": 1.4768430911558386, - "grad_norm": 0.00785927101969719, - "learning_rate": 0.0001999989248017295, - "loss": 46.0, - "step": 19316 - }, - { - "epoch": 1.476919548139228, - "grad_norm": 0.004790124483406544, - "learning_rate": 0.00019999892469034193, - "loss": 46.0, - "step": 19317 - }, - { - "epoch": 1.4769960051226179, - "grad_norm": 0.0037100291810929775, - "learning_rate": 0.0001999989245789486, - "loss": 46.0, - "step": 19318 - }, - { - "epoch": 1.4770724621060076, - "grad_norm": 0.007419370114803314, - "learning_rate": 0.00019999892446754947, - "loss": 46.0, - "step": 19319 - }, - { - "epoch": 1.4771489190893974, - "grad_norm": 0.004334686789661646, - "learning_rate": 0.0001999989243561446, - "loss": 46.0, - "step": 19320 - }, - { - "epoch": 1.477225376072787, - "grad_norm": 0.0007274855161085725, - "learning_rate": 0.00019999892424473395, - "loss": 46.0, - "step": 19321 - }, - { - "epoch": 1.4773018330561767, - "grad_norm": 0.0012469470966607332, - "learning_rate": 0.00019999892413331756, - "loss": 46.0, - "step": 19322 - }, - { - "epoch": 1.4773782900395664, - "grad_norm": 0.00805369671434164, - "learning_rate": 0.00019999892402189535, - "loss": 46.0, - "step": 19323 - }, - { - "epoch": 1.4774547470229562, - "grad_norm": 0.0037581881042569876, - "learning_rate": 0.00019999892391046742, - "loss": 46.0, - "step": 19324 - }, - { - "epoch": 1.477531204006346, - "grad_norm": 0.0024633165448904037, - "learning_rate": 0.0001999989237990337, - "loss": 46.0, - "step": 19325 - }, - { - "epoch": 1.4776076609897357, - "grad_norm": 0.002172968117520213, - "learning_rate": 0.0001999989236875942, - "loss": 46.0, - "step": 19326 - }, - { - "epoch": 1.4776841179731255, - "grad_norm": 0.0007751628290861845, - "learning_rate": 0.00019999892357614894, - "loss": 46.0, - "step": 19327 - }, - { - "epoch": 1.477760574956515, - "grad_norm": 0.0016616367502138019, - "learning_rate": 0.00019999892346469792, - "loss": 46.0, - "step": 19328 - }, - { - "epoch": 1.4778370319399048, - "grad_norm": 0.0017188034253194928, - "learning_rate": 0.0001999989233532411, - "loss": 46.0, - "step": 19329 - }, - { - "epoch": 1.4779134889232945, - "grad_norm": 0.0013235813239589334, - "learning_rate": 0.00019999892324177852, - "loss": 46.0, - "step": 19330 - }, - { - "epoch": 1.4779899459066843, - "grad_norm": 0.0013228385942056775, - "learning_rate": 0.00019999892313031018, - "loss": 46.0, - "step": 19331 - }, - { - "epoch": 1.4780664028900738, - "grad_norm": 0.0006751374457962811, - "learning_rate": 0.00019999892301883606, - "loss": 46.0, - "step": 19332 - }, - { - "epoch": 1.4781428598734636, - "grad_norm": 0.006896035745739937, - "learning_rate": 0.0001999989229073562, - "loss": 46.0, - "step": 19333 - }, - { - "epoch": 1.4782193168568534, - "grad_norm": 0.002090553054586053, - "learning_rate": 0.00019999892279587057, - "loss": 46.0, - "step": 19334 - }, - { - "epoch": 1.4782957738402431, - "grad_norm": 0.003226600820198655, - "learning_rate": 0.00019999892268437913, - "loss": 46.0, - "step": 19335 - }, - { - "epoch": 1.478372230823633, - "grad_norm": 0.001297247363254428, - "learning_rate": 0.00019999892257288195, - "loss": 46.0, - "step": 19336 - }, - { - "epoch": 1.4784486878070227, - "grad_norm": 0.0007756868144497275, - "learning_rate": 0.00019999892246137902, - "loss": 46.0, - "step": 19337 - }, - { - "epoch": 1.4785251447904124, - "grad_norm": 0.0009608445107005537, - "learning_rate": 0.0001999989223498703, - "loss": 46.0, - "step": 19338 - }, - { - "epoch": 1.478601601773802, - "grad_norm": 0.0019543610978871584, - "learning_rate": 0.00019999892223835576, - "loss": 46.0, - "step": 19339 - }, - { - "epoch": 1.4786780587571917, - "grad_norm": 0.0020117731764912605, - "learning_rate": 0.0001999989221268355, - "loss": 46.0, - "step": 19340 - }, - { - "epoch": 1.4787545157405815, - "grad_norm": 0.0012466547777876258, - "learning_rate": 0.00019999892201530948, - "loss": 46.0, - "step": 19341 - }, - { - "epoch": 1.4788309727239712, - "grad_norm": 0.000765244709327817, - "learning_rate": 0.00019999892190377766, - "loss": 46.0, - "step": 19342 - }, - { - "epoch": 1.4789074297073608, - "grad_norm": 0.0014841114170849323, - "learning_rate": 0.00019999892179224012, - "loss": 46.0, - "step": 19343 - }, - { - "epoch": 1.4789838866907505, - "grad_norm": 0.0016742039006203413, - "learning_rate": 0.00019999892168069678, - "loss": 46.0, - "step": 19344 - }, - { - "epoch": 1.4790603436741403, - "grad_norm": 0.006475646514445543, - "learning_rate": 0.00019999892156914766, - "loss": 46.0, - "step": 19345 - }, - { - "epoch": 1.47913680065753, - "grad_norm": 0.001939265406690538, - "learning_rate": 0.00019999892145759277, - "loss": 46.0, - "step": 19346 - }, - { - "epoch": 1.4792132576409198, - "grad_norm": 0.0008946038433350623, - "learning_rate": 0.00019999892134603213, - "loss": 46.0, - "step": 19347 - }, - { - "epoch": 1.4792897146243096, - "grad_norm": 0.0015546358190476894, - "learning_rate": 0.0001999989212344657, - "loss": 46.0, - "step": 19348 - }, - { - "epoch": 1.4793661716076993, - "grad_norm": 0.002928095869719982, - "learning_rate": 0.00019999892112289351, - "loss": 46.0, - "step": 19349 - }, - { - "epoch": 1.4794426285910889, - "grad_norm": 0.0011154068633913994, - "learning_rate": 0.00019999892101131556, - "loss": 46.0, - "step": 19350 - }, - { - "epoch": 1.4795190855744786, - "grad_norm": 0.0015208248514682055, - "learning_rate": 0.00019999892089973186, - "loss": 46.0, - "step": 19351 - }, - { - "epoch": 1.4795955425578684, - "grad_norm": 0.0008519775001332164, - "learning_rate": 0.00019999892078814235, - "loss": 46.0, - "step": 19352 - }, - { - "epoch": 1.4796719995412582, - "grad_norm": 0.0022986182011663914, - "learning_rate": 0.00019999892067654708, - "loss": 46.0, - "step": 19353 - }, - { - "epoch": 1.4797484565246477, - "grad_norm": 0.003029866376891732, - "learning_rate": 0.00019999892056494605, - "loss": 46.0, - "step": 19354 - }, - { - "epoch": 1.4798249135080375, - "grad_norm": 0.0005599893629550934, - "learning_rate": 0.00019999892045333923, - "loss": 46.0, - "step": 19355 - }, - { - "epoch": 1.4799013704914272, - "grad_norm": 0.006208906881511211, - "learning_rate": 0.00019999892034172664, - "loss": 46.0, - "step": 19356 - }, - { - "epoch": 1.479977827474817, - "grad_norm": 0.0008485438884235919, - "learning_rate": 0.00019999892023010832, - "loss": 46.0, - "step": 19357 - }, - { - "epoch": 1.4800542844582067, - "grad_norm": 0.0005953279323875904, - "learning_rate": 0.0001999989201184842, - "loss": 46.0, - "step": 19358 - }, - { - "epoch": 1.4801307414415965, - "grad_norm": 0.001970952143892646, - "learning_rate": 0.00019999892000685431, - "loss": 46.0, - "step": 19359 - }, - { - "epoch": 1.4802071984249863, - "grad_norm": 0.0007726777112111449, - "learning_rate": 0.00019999891989521865, - "loss": 46.0, - "step": 19360 - }, - { - "epoch": 1.4802836554083758, - "grad_norm": 0.0034486232325434685, - "learning_rate": 0.00019999891978357724, - "loss": 46.0, - "step": 19361 - }, - { - "epoch": 1.4803601123917656, - "grad_norm": 0.0002802393864840269, - "learning_rate": 0.0001999989196719301, - "loss": 46.0, - "step": 19362 - }, - { - "epoch": 1.4804365693751553, - "grad_norm": 0.0007574927294626832, - "learning_rate": 0.0001999989195602771, - "loss": 46.0, - "step": 19363 - }, - { - "epoch": 1.480513026358545, - "grad_norm": 0.0012183692306280136, - "learning_rate": 0.00019999891944861838, - "loss": 46.0, - "step": 19364 - }, - { - "epoch": 1.4805894833419346, - "grad_norm": 0.002184170065447688, - "learning_rate": 0.00019999891933695387, - "loss": 46.0, - "step": 19365 - }, - { - "epoch": 1.4806659403253244, - "grad_norm": 0.0009818640537559986, - "learning_rate": 0.00019999891922528362, - "loss": 46.0, - "step": 19366 - }, - { - "epoch": 1.4807423973087142, - "grad_norm": 0.0053679752163589, - "learning_rate": 0.00019999891911360757, - "loss": 46.0, - "step": 19367 - }, - { - "epoch": 1.480818854292104, - "grad_norm": 0.0030079404823482037, - "learning_rate": 0.00019999891900192575, - "loss": 46.0, - "step": 19368 - }, - { - "epoch": 1.4808953112754937, - "grad_norm": 0.0009387653553858399, - "learning_rate": 0.00019999891889023818, - "loss": 46.0, - "step": 19369 - }, - { - "epoch": 1.4809717682588834, - "grad_norm": 0.0024747024290263653, - "learning_rate": 0.00019999891877854484, - "loss": 46.0, - "step": 19370 - }, - { - "epoch": 1.4810482252422732, - "grad_norm": 0.0010155820054933429, - "learning_rate": 0.00019999891866684573, - "loss": 46.0, - "step": 19371 - }, - { - "epoch": 1.4811246822256627, - "grad_norm": 0.002400304889306426, - "learning_rate": 0.00019999891855514084, - "loss": 46.0, - "step": 19372 - }, - { - "epoch": 1.4812011392090525, - "grad_norm": 0.012223457917571068, - "learning_rate": 0.0001999989184434302, - "loss": 46.0, - "step": 19373 - }, - { - "epoch": 1.4812775961924423, - "grad_norm": 0.001629875274375081, - "learning_rate": 0.0001999989183317138, - "loss": 46.0, - "step": 19374 - }, - { - "epoch": 1.481354053175832, - "grad_norm": 0.0008832475286908448, - "learning_rate": 0.00019999891821999158, - "loss": 46.0, - "step": 19375 - }, - { - "epoch": 1.4814305101592216, - "grad_norm": 0.009094862267374992, - "learning_rate": 0.0001999989181082636, - "loss": 46.0, - "step": 19376 - }, - { - "epoch": 1.4815069671426113, - "grad_norm": 0.0006835999083705246, - "learning_rate": 0.00019999891799652987, - "loss": 46.0, - "step": 19377 - }, - { - "epoch": 1.481583424126001, - "grad_norm": 0.0019940081983804703, - "learning_rate": 0.0001999989178847904, - "loss": 46.0, - "step": 19378 - }, - { - "epoch": 1.4816598811093908, - "grad_norm": 0.0066498215310275555, - "learning_rate": 0.00019999891777304512, - "loss": 46.0, - "step": 19379 - }, - { - "epoch": 1.4817363380927806, - "grad_norm": 0.0008446233114227653, - "learning_rate": 0.00019999891766129407, - "loss": 46.0, - "step": 19380 - }, - { - "epoch": 1.4818127950761704, - "grad_norm": 0.0005438279476948082, - "learning_rate": 0.00019999891754953725, - "loss": 46.0, - "step": 19381 - }, - { - "epoch": 1.4818892520595601, - "grad_norm": 0.0013539448846131563, - "learning_rate": 0.00019999891743777468, - "loss": 46.0, - "step": 19382 - }, - { - "epoch": 1.4819657090429497, - "grad_norm": 0.0013285381719470024, - "learning_rate": 0.00019999891732600634, - "loss": 46.0, - "step": 19383 - }, - { - "epoch": 1.4820421660263394, - "grad_norm": 0.0017920794198289514, - "learning_rate": 0.0001999989172142322, - "loss": 46.0, - "step": 19384 - }, - { - "epoch": 1.4821186230097292, - "grad_norm": 0.0007969422149471939, - "learning_rate": 0.00019999891710245234, - "loss": 46.0, - "step": 19385 - }, - { - "epoch": 1.482195079993119, - "grad_norm": 0.0008265781216323376, - "learning_rate": 0.00019999891699066667, - "loss": 46.0, - "step": 19386 - }, - { - "epoch": 1.4822715369765085, - "grad_norm": 0.0007214270299300551, - "learning_rate": 0.00019999891687887526, - "loss": 46.0, - "step": 19387 - }, - { - "epoch": 1.4823479939598982, - "grad_norm": 0.0065918490290641785, - "learning_rate": 0.00019999891676707808, - "loss": 46.0, - "step": 19388 - }, - { - "epoch": 1.482424450943288, - "grad_norm": 0.0022306328173726797, - "learning_rate": 0.0001999989166552751, - "loss": 46.0, - "step": 19389 - }, - { - "epoch": 1.4825009079266778, - "grad_norm": 0.0005873983609490097, - "learning_rate": 0.00019999891654346637, - "loss": 46.0, - "step": 19390 - }, - { - "epoch": 1.4825773649100675, - "grad_norm": 0.0021284841932356358, - "learning_rate": 0.00019999891643165184, - "loss": 46.0, - "step": 19391 - }, - { - "epoch": 1.4826538218934573, - "grad_norm": 0.004474198445677757, - "learning_rate": 0.0001999989163198316, - "loss": 46.0, - "step": 19392 - }, - { - "epoch": 1.482730278876847, - "grad_norm": 0.0007538386271335185, - "learning_rate": 0.00019999891620800555, - "loss": 46.0, - "step": 19393 - }, - { - "epoch": 1.4828067358602366, - "grad_norm": 0.0006229786667972803, - "learning_rate": 0.00019999891609617375, - "loss": 46.0, - "step": 19394 - }, - { - "epoch": 1.4828831928436264, - "grad_norm": 0.0014410275034606457, - "learning_rate": 0.00019999891598433616, - "loss": 46.0, - "step": 19395 - }, - { - "epoch": 1.4829596498270161, - "grad_norm": 0.002674880437552929, - "learning_rate": 0.00019999891587249281, - "loss": 46.0, - "step": 19396 - }, - { - "epoch": 1.4830361068104059, - "grad_norm": 0.0015902069862931967, - "learning_rate": 0.0001999989157606437, - "loss": 46.0, - "step": 19397 - }, - { - "epoch": 1.4831125637937954, - "grad_norm": 0.0031280554831027985, - "learning_rate": 0.00019999891564878878, - "loss": 46.0, - "step": 19398 - }, - { - "epoch": 1.4831890207771852, - "grad_norm": 0.0032418814953416586, - "learning_rate": 0.00019999891553692812, - "loss": 46.0, - "step": 19399 - }, - { - "epoch": 1.483265477760575, - "grad_norm": 0.0013350851368159056, - "learning_rate": 0.00019999891542506171, - "loss": 46.0, - "step": 19400 - }, - { - "epoch": 1.4833419347439647, - "grad_norm": 0.0021014041267335415, - "learning_rate": 0.0001999989153131895, - "loss": 46.0, - "step": 19401 - }, - { - "epoch": 1.4834183917273545, - "grad_norm": 0.0011488395975902677, - "learning_rate": 0.00019999891520131155, - "loss": 46.0, - "step": 19402 - }, - { - "epoch": 1.4834948487107442, - "grad_norm": 0.0008808790007606149, - "learning_rate": 0.00019999891508942782, - "loss": 46.0, - "step": 19403 - }, - { - "epoch": 1.4835713056941338, - "grad_norm": 0.0009377410751767457, - "learning_rate": 0.00019999891497753832, - "loss": 46.0, - "step": 19404 - }, - { - "epoch": 1.4836477626775235, - "grad_norm": 0.0009179608314298093, - "learning_rate": 0.00019999891486564305, - "loss": 46.0, - "step": 19405 - }, - { - "epoch": 1.4837242196609133, - "grad_norm": 0.002284203888848424, - "learning_rate": 0.000199998914753742, - "loss": 46.0, - "step": 19406 - }, - { - "epoch": 1.483800676644303, - "grad_norm": 0.0010577746434137225, - "learning_rate": 0.00019999891464183518, - "loss": 46.0, - "step": 19407 - }, - { - "epoch": 1.4838771336276928, - "grad_norm": 0.0014719637110829353, - "learning_rate": 0.00019999891452992258, - "loss": 46.0, - "step": 19408 - }, - { - "epoch": 1.4839535906110823, - "grad_norm": 0.0009698682115413249, - "learning_rate": 0.00019999891441800424, - "loss": 46.0, - "step": 19409 - }, - { - "epoch": 1.484030047594472, - "grad_norm": 0.0007479399209842086, - "learning_rate": 0.00019999891430608012, - "loss": 46.0, - "step": 19410 - }, - { - "epoch": 1.4841065045778619, - "grad_norm": 0.0005773433367721736, - "learning_rate": 0.00019999891419415024, - "loss": 46.0, - "step": 19411 - }, - { - "epoch": 1.4841829615612516, - "grad_norm": 0.0007472485885955393, - "learning_rate": 0.00019999891408221458, - "loss": 46.0, - "step": 19412 - }, - { - "epoch": 1.4842594185446414, - "grad_norm": 0.004900270141661167, - "learning_rate": 0.00019999891397027314, - "loss": 46.0, - "step": 19413 - }, - { - "epoch": 1.4843358755280311, - "grad_norm": 0.004642863292247057, - "learning_rate": 0.00019999891385832593, - "loss": 46.0, - "step": 19414 - }, - { - "epoch": 1.4844123325114207, - "grad_norm": 0.005004368722438812, - "learning_rate": 0.00019999891374637298, - "loss": 46.0, - "step": 19415 - }, - { - "epoch": 1.4844887894948104, - "grad_norm": 0.003215061966329813, - "learning_rate": 0.00019999891363441425, - "loss": 46.0, - "step": 19416 - }, - { - "epoch": 1.4845652464782002, - "grad_norm": 0.011967454105615616, - "learning_rate": 0.00019999891352244972, - "loss": 46.0, - "step": 19417 - }, - { - "epoch": 1.48464170346159, - "grad_norm": 0.0008361469954252243, - "learning_rate": 0.00019999891341047947, - "loss": 46.0, - "step": 19418 - }, - { - "epoch": 1.4847181604449795, - "grad_norm": 0.001079672365449369, - "learning_rate": 0.0001999989132985034, - "loss": 46.0, - "step": 19419 - }, - { - "epoch": 1.4847946174283693, - "grad_norm": 0.005496449302881956, - "learning_rate": 0.0001999989131865216, - "loss": 46.0, - "step": 19420 - }, - { - "epoch": 1.484871074411759, - "grad_norm": 0.005404963623732328, - "learning_rate": 0.000199998913074534, - "loss": 46.0, - "step": 19421 - }, - { - "epoch": 1.4849475313951488, - "grad_norm": 0.0014856626512482762, - "learning_rate": 0.00019999891296254067, - "loss": 46.0, - "step": 19422 - }, - { - "epoch": 1.4850239883785386, - "grad_norm": 0.004496196284890175, - "learning_rate": 0.00019999891285054155, - "loss": 46.0, - "step": 19423 - }, - { - "epoch": 1.4851004453619283, - "grad_norm": 0.001068907673470676, - "learning_rate": 0.00019999891273853667, - "loss": 46.0, - "step": 19424 - }, - { - "epoch": 1.485176902345318, - "grad_norm": 0.006409113295376301, - "learning_rate": 0.00019999891262652598, - "loss": 46.0, - "step": 19425 - }, - { - "epoch": 1.4852533593287076, - "grad_norm": 0.0012095402926206589, - "learning_rate": 0.00019999891251450954, - "loss": 46.0, - "step": 19426 - }, - { - "epoch": 1.4853298163120974, - "grad_norm": 0.001035945606417954, - "learning_rate": 0.00019999891240248734, - "loss": 46.0, - "step": 19427 - }, - { - "epoch": 1.4854062732954871, - "grad_norm": 0.0006595029844902456, - "learning_rate": 0.00019999891229045938, - "loss": 46.0, - "step": 19428 - }, - { - "epoch": 1.485482730278877, - "grad_norm": 0.0012759588425979018, - "learning_rate": 0.00019999891217842563, - "loss": 46.0, - "step": 19429 - }, - { - "epoch": 1.4855591872622664, - "grad_norm": 0.001290992135182023, - "learning_rate": 0.00019999891206638615, - "loss": 46.0, - "step": 19430 - }, - { - "epoch": 1.4856356442456562, - "grad_norm": 0.0007580990786664188, - "learning_rate": 0.00019999891195434085, - "loss": 46.0, - "step": 19431 - }, - { - "epoch": 1.485712101229046, - "grad_norm": 0.0008399473736062646, - "learning_rate": 0.0001999989118422898, - "loss": 46.0, - "step": 19432 - }, - { - "epoch": 1.4857885582124357, - "grad_norm": 0.00062708550831303, - "learning_rate": 0.00019999891173023298, - "loss": 46.0, - "step": 19433 - }, - { - "epoch": 1.4858650151958255, - "grad_norm": 0.0054834159091115, - "learning_rate": 0.00019999891161817042, - "loss": 46.0, - "step": 19434 - }, - { - "epoch": 1.4859414721792152, - "grad_norm": 0.002074608113616705, - "learning_rate": 0.00019999891150610205, - "loss": 46.0, - "step": 19435 - }, - { - "epoch": 1.486017929162605, - "grad_norm": 0.0014773256843909621, - "learning_rate": 0.00019999891139402793, - "loss": 46.0, - "step": 19436 - }, - { - "epoch": 1.4860943861459945, - "grad_norm": 0.0006069326191209257, - "learning_rate": 0.00019999891128194802, - "loss": 46.0, - "step": 19437 - }, - { - "epoch": 1.4861708431293843, - "grad_norm": 0.001013072906062007, - "learning_rate": 0.00019999891116986236, - "loss": 46.0, - "step": 19438 - }, - { - "epoch": 1.486247300112774, - "grad_norm": 0.0008229255327023566, - "learning_rate": 0.00019999891105777092, - "loss": 46.0, - "step": 19439 - }, - { - "epoch": 1.4863237570961638, - "grad_norm": 0.0009191535646095872, - "learning_rate": 0.00019999891094567374, - "loss": 46.0, - "step": 19440 - }, - { - "epoch": 1.4864002140795534, - "grad_norm": 0.0007151351310312748, - "learning_rate": 0.00019999891083357074, - "loss": 46.0, - "step": 19441 - }, - { - "epoch": 1.4864766710629431, - "grad_norm": 0.0014817918417975307, - "learning_rate": 0.000199998910721462, - "loss": 46.0, - "step": 19442 - }, - { - "epoch": 1.4865531280463329, - "grad_norm": 0.0009195443708449602, - "learning_rate": 0.0001999989106093475, - "loss": 46.0, - "step": 19443 - }, - { - "epoch": 1.4866295850297226, - "grad_norm": 0.0005606548511423171, - "learning_rate": 0.0001999989104972272, - "loss": 46.0, - "step": 19444 - }, - { - "epoch": 1.4867060420131124, - "grad_norm": 0.0019231239566579461, - "learning_rate": 0.00019999891038510116, - "loss": 46.0, - "step": 19445 - }, - { - "epoch": 1.4867824989965022, - "grad_norm": 0.003348397556692362, - "learning_rate": 0.00019999891027296937, - "loss": 46.0, - "step": 19446 - }, - { - "epoch": 1.486858955979892, - "grad_norm": 0.0009724876726977527, - "learning_rate": 0.00019999891016083177, - "loss": 46.0, - "step": 19447 - }, - { - "epoch": 1.4869354129632815, - "grad_norm": 0.0013627978041768074, - "learning_rate": 0.0001999989100486884, - "loss": 46.0, - "step": 19448 - }, - { - "epoch": 1.4870118699466712, - "grad_norm": 0.002209307625889778, - "learning_rate": 0.00019999890993653926, - "loss": 46.0, - "step": 19449 - }, - { - "epoch": 1.487088326930061, - "grad_norm": 0.0014282738557085395, - "learning_rate": 0.00019999890982438438, - "loss": 46.0, - "step": 19450 - }, - { - "epoch": 1.4871647839134507, - "grad_norm": 0.010506403632462025, - "learning_rate": 0.00019999890971222372, - "loss": 46.0, - "step": 19451 - }, - { - "epoch": 1.4872412408968403, - "grad_norm": 0.0008174251415766776, - "learning_rate": 0.00019999890960005728, - "loss": 46.0, - "step": 19452 - }, - { - "epoch": 1.48731769788023, - "grad_norm": 0.0007639850373379886, - "learning_rate": 0.0001999989094878851, - "loss": 46.0, - "step": 19453 - }, - { - "epoch": 1.4873941548636198, - "grad_norm": 0.0011500392574816942, - "learning_rate": 0.0001999989093757071, - "loss": 46.0, - "step": 19454 - }, - { - "epoch": 1.4874706118470096, - "grad_norm": 0.0007135348860174417, - "learning_rate": 0.00019999890926352337, - "loss": 46.0, - "step": 19455 - }, - { - "epoch": 1.4875470688303993, - "grad_norm": 0.0017271365504711866, - "learning_rate": 0.00019999890915133387, - "loss": 46.0, - "step": 19456 - }, - { - "epoch": 1.487623525813789, - "grad_norm": 0.0016721192514523864, - "learning_rate": 0.00019999890903913857, - "loss": 46.0, - "step": 19457 - }, - { - "epoch": 1.4876999827971789, - "grad_norm": 0.0010404117638245225, - "learning_rate": 0.00019999890892693752, - "loss": 46.0, - "step": 19458 - }, - { - "epoch": 1.4877764397805684, - "grad_norm": 0.003896439913660288, - "learning_rate": 0.0001999989088147307, - "loss": 46.0, - "step": 19459 - }, - { - "epoch": 1.4878528967639582, - "grad_norm": 0.006754432339221239, - "learning_rate": 0.00019999890870251813, - "loss": 46.0, - "step": 19460 - }, - { - "epoch": 1.487929353747348, - "grad_norm": 0.0007660723058506846, - "learning_rate": 0.00019999890859029974, - "loss": 46.0, - "step": 19461 - }, - { - "epoch": 1.4880058107307377, - "grad_norm": 0.0006175947492010891, - "learning_rate": 0.0001999989084780756, - "loss": 46.0, - "step": 19462 - }, - { - "epoch": 1.4880822677141272, - "grad_norm": 0.001723306137137115, - "learning_rate": 0.0001999989083658457, - "loss": 46.0, - "step": 19463 - }, - { - "epoch": 1.488158724697517, - "grad_norm": 0.0030678361654281616, - "learning_rate": 0.00019999890825361005, - "loss": 46.0, - "step": 19464 - }, - { - "epoch": 1.4882351816809067, - "grad_norm": 0.0008425671840086579, - "learning_rate": 0.00019999890814136862, - "loss": 46.0, - "step": 19465 - }, - { - "epoch": 1.4883116386642965, - "grad_norm": 0.00228483765386045, - "learning_rate": 0.0001999989080291214, - "loss": 46.0, - "step": 19466 - }, - { - "epoch": 1.4883880956476863, - "grad_norm": 0.0017840571235865355, - "learning_rate": 0.00019999890791686843, - "loss": 46.0, - "step": 19467 - }, - { - "epoch": 1.488464552631076, - "grad_norm": 0.0010684585431590676, - "learning_rate": 0.00019999890780460967, - "loss": 46.0, - "step": 19468 - }, - { - "epoch": 1.4885410096144658, - "grad_norm": 0.005592750385403633, - "learning_rate": 0.00019999890769234515, - "loss": 46.0, - "step": 19469 - }, - { - "epoch": 1.4886174665978553, - "grad_norm": 0.0050817388109862804, - "learning_rate": 0.00019999890758007487, - "loss": 46.0, - "step": 19470 - }, - { - "epoch": 1.488693923581245, - "grad_norm": 0.001060408540070057, - "learning_rate": 0.0001999989074677988, - "loss": 46.0, - "step": 19471 - }, - { - "epoch": 1.4887703805646348, - "grad_norm": 0.004463254474103451, - "learning_rate": 0.000199998907355517, - "loss": 46.0, - "step": 19472 - }, - { - "epoch": 1.4888468375480246, - "grad_norm": 0.0006602173089049757, - "learning_rate": 0.0001999989072432294, - "loss": 46.0, - "step": 19473 - }, - { - "epoch": 1.4889232945314141, - "grad_norm": 0.000831605342682451, - "learning_rate": 0.00019999890713093605, - "loss": 46.0, - "step": 19474 - }, - { - "epoch": 1.488999751514804, - "grad_norm": 0.0028210084419697523, - "learning_rate": 0.0001999989070186369, - "loss": 46.0, - "step": 19475 - }, - { - "epoch": 1.4890762084981937, - "grad_norm": 0.002045969944447279, - "learning_rate": 0.00019999890690633202, - "loss": 46.0, - "step": 19476 - }, - { - "epoch": 1.4891526654815834, - "grad_norm": 0.000600756611675024, - "learning_rate": 0.00019999890679402133, - "loss": 46.0, - "step": 19477 - }, - { - "epoch": 1.4892291224649732, - "grad_norm": 0.0020963200367987156, - "learning_rate": 0.0001999989066817049, - "loss": 46.0, - "step": 19478 - }, - { - "epoch": 1.489305579448363, - "grad_norm": 0.0006956481374800205, - "learning_rate": 0.0001999989065693827, - "loss": 46.0, - "step": 19479 - }, - { - "epoch": 1.4893820364317527, - "grad_norm": 0.002297037048265338, - "learning_rate": 0.0001999989064570547, - "loss": 46.0, - "step": 19480 - }, - { - "epoch": 1.4894584934151422, - "grad_norm": 0.001409986987709999, - "learning_rate": 0.00019999890634472096, - "loss": 46.0, - "step": 19481 - }, - { - "epoch": 1.489534950398532, - "grad_norm": 0.003171502845361829, - "learning_rate": 0.00019999890623238143, - "loss": 46.0, - "step": 19482 - }, - { - "epoch": 1.4896114073819218, - "grad_norm": 0.00182953046169132, - "learning_rate": 0.00019999890612003616, - "loss": 46.0, - "step": 19483 - }, - { - "epoch": 1.4896878643653115, - "grad_norm": 0.002752818865701556, - "learning_rate": 0.00019999890600768509, - "loss": 46.0, - "step": 19484 - }, - { - "epoch": 1.489764321348701, - "grad_norm": 0.0014071421464905143, - "learning_rate": 0.0001999989058953283, - "loss": 46.0, - "step": 19485 - }, - { - "epoch": 1.4898407783320908, - "grad_norm": 0.004534035921096802, - "learning_rate": 0.0001999989057829657, - "loss": 46.0, - "step": 19486 - }, - { - "epoch": 1.4899172353154806, - "grad_norm": 0.0033098235726356506, - "learning_rate": 0.0001999989056705973, - "loss": 46.0, - "step": 19487 - }, - { - "epoch": 1.4899936922988704, - "grad_norm": 0.0017915000207722187, - "learning_rate": 0.0001999989055582232, - "loss": 46.0, - "step": 19488 - }, - { - "epoch": 1.4900701492822601, - "grad_norm": 0.008325501345098019, - "learning_rate": 0.00019999890544584328, - "loss": 46.0, - "step": 19489 - }, - { - "epoch": 1.4901466062656499, - "grad_norm": 0.0020200293511152267, - "learning_rate": 0.00019999890533345762, - "loss": 46.0, - "step": 19490 - }, - { - "epoch": 1.4902230632490396, - "grad_norm": 0.0010951198637485504, - "learning_rate": 0.0001999989052210662, - "loss": 46.0, - "step": 19491 - }, - { - "epoch": 1.4902995202324292, - "grad_norm": 0.0022767281625419855, - "learning_rate": 0.00019999890510866895, - "loss": 46.0, - "step": 19492 - }, - { - "epoch": 1.490375977215819, - "grad_norm": 0.0007755453698337078, - "learning_rate": 0.00019999890499626597, - "loss": 46.0, - "step": 19493 - }, - { - "epoch": 1.4904524341992087, - "grad_norm": 0.0006250563892535865, - "learning_rate": 0.00019999890488385722, - "loss": 46.0, - "step": 19494 - }, - { - "epoch": 1.4905288911825985, - "grad_norm": 0.0030151105020195246, - "learning_rate": 0.00019999890477144272, - "loss": 46.0, - "step": 19495 - }, - { - "epoch": 1.490605348165988, - "grad_norm": 0.0007749577052891254, - "learning_rate": 0.00019999890465902242, - "loss": 46.0, - "step": 19496 - }, - { - "epoch": 1.4906818051493778, - "grad_norm": 0.0008271206170320511, - "learning_rate": 0.00019999890454659638, - "loss": 46.0, - "step": 19497 - }, - { - "epoch": 1.4907582621327675, - "grad_norm": 0.004292960744351149, - "learning_rate": 0.00019999890443416453, - "loss": 46.0, - "step": 19498 - }, - { - "epoch": 1.4908347191161573, - "grad_norm": 0.0010203635320067406, - "learning_rate": 0.00019999890432172694, - "loss": 46.0, - "step": 19499 - }, - { - "epoch": 1.490911176099547, - "grad_norm": 0.0018442235887050629, - "learning_rate": 0.00019999890420928357, - "loss": 46.0, - "step": 19500 - }, - { - "epoch": 1.4909876330829368, - "grad_norm": 0.0005117595428600907, - "learning_rate": 0.00019999890409683443, - "loss": 46.0, - "step": 19501 - }, - { - "epoch": 1.4910640900663266, - "grad_norm": 0.0009238338097929955, - "learning_rate": 0.00019999890398437952, - "loss": 46.0, - "step": 19502 - }, - { - "epoch": 1.491140547049716, - "grad_norm": 0.0006527479854412377, - "learning_rate": 0.00019999890387191886, - "loss": 46.0, - "step": 19503 - }, - { - "epoch": 1.4912170040331059, - "grad_norm": 0.001469060662202537, - "learning_rate": 0.00019999890375945243, - "loss": 46.0, - "step": 19504 - }, - { - "epoch": 1.4912934610164956, - "grad_norm": 0.000661347818095237, - "learning_rate": 0.00019999890364698022, - "loss": 46.0, - "step": 19505 - }, - { - "epoch": 1.4913699179998854, - "grad_norm": 0.0010039459448307753, - "learning_rate": 0.00019999890353450224, - "loss": 46.0, - "step": 19506 - }, - { - "epoch": 1.491446374983275, - "grad_norm": 0.0006407740875147283, - "learning_rate": 0.0001999989034220185, - "loss": 46.0, - "step": 19507 - }, - { - "epoch": 1.4915228319666647, - "grad_norm": 0.00034887384390458465, - "learning_rate": 0.00019999890330952896, - "loss": 46.0, - "step": 19508 - }, - { - "epoch": 1.4915992889500544, - "grad_norm": 0.0010198383824899793, - "learning_rate": 0.00019999890319703366, - "loss": 46.0, - "step": 19509 - }, - { - "epoch": 1.4916757459334442, - "grad_norm": 0.002623027889057994, - "learning_rate": 0.0001999989030845326, - "loss": 46.0, - "step": 19510 - }, - { - "epoch": 1.491752202916834, - "grad_norm": 0.0005774474120698869, - "learning_rate": 0.00019999890297202577, - "loss": 46.0, - "step": 19511 - }, - { - "epoch": 1.4918286599002237, - "grad_norm": 0.002018352970480919, - "learning_rate": 0.00019999890285951318, - "loss": 46.0, - "step": 19512 - }, - { - "epoch": 1.4919051168836135, - "grad_norm": 0.0003837211988866329, - "learning_rate": 0.0001999989027469948, - "loss": 46.0, - "step": 19513 - }, - { - "epoch": 1.491981573867003, - "grad_norm": 0.0021206915844231844, - "learning_rate": 0.00019999890263447067, - "loss": 46.0, - "step": 19514 - }, - { - "epoch": 1.4920580308503928, - "grad_norm": 0.0008061110856942832, - "learning_rate": 0.00019999890252194076, - "loss": 46.0, - "step": 19515 - }, - { - "epoch": 1.4921344878337826, - "grad_norm": 0.0017214424442499876, - "learning_rate": 0.00019999890240940507, - "loss": 46.0, - "step": 19516 - }, - { - "epoch": 1.4922109448171723, - "grad_norm": 0.0011911719338968396, - "learning_rate": 0.00019999890229686361, - "loss": 46.0, - "step": 19517 - }, - { - "epoch": 1.4922874018005619, - "grad_norm": 0.00245038908906281, - "learning_rate": 0.0001999989021843164, - "loss": 46.0, - "step": 19518 - }, - { - "epoch": 1.4923638587839516, - "grad_norm": 0.002539834240451455, - "learning_rate": 0.00019999890207176343, - "loss": 46.0, - "step": 19519 - }, - { - "epoch": 1.4924403157673414, - "grad_norm": 0.0006074015982449055, - "learning_rate": 0.00019999890195920468, - "loss": 46.0, - "step": 19520 - }, - { - "epoch": 1.4925167727507311, - "grad_norm": 0.0009795281803235412, - "learning_rate": 0.00019999890184664015, - "loss": 46.0, - "step": 19521 - }, - { - "epoch": 1.492593229734121, - "grad_norm": 0.0007095045875757933, - "learning_rate": 0.00019999890173406985, - "loss": 46.0, - "step": 19522 - }, - { - "epoch": 1.4926696867175107, - "grad_norm": 0.002733758883550763, - "learning_rate": 0.0001999989016214938, - "loss": 46.0, - "step": 19523 - }, - { - "epoch": 1.4927461437009004, - "grad_norm": 0.0013536748010665178, - "learning_rate": 0.00019999890150891196, - "loss": 46.0, - "step": 19524 - }, - { - "epoch": 1.49282260068429, - "grad_norm": 0.001242319936864078, - "learning_rate": 0.00019999890139632437, - "loss": 46.0, - "step": 19525 - }, - { - "epoch": 1.4928990576676797, - "grad_norm": 0.0012543570483103395, - "learning_rate": 0.000199998901283731, - "loss": 46.0, - "step": 19526 - }, - { - "epoch": 1.4929755146510695, - "grad_norm": 0.002490056911483407, - "learning_rate": 0.00019999890117113184, - "loss": 46.0, - "step": 19527 - }, - { - "epoch": 1.4930519716344592, - "grad_norm": 0.0014810101129114628, - "learning_rate": 0.00019999890105852695, - "loss": 46.0, - "step": 19528 - }, - { - "epoch": 1.4931284286178488, - "grad_norm": 0.0009366166195832193, - "learning_rate": 0.00019999890094591627, - "loss": 46.0, - "step": 19529 - }, - { - "epoch": 1.4932048856012385, - "grad_norm": 0.011300221085548401, - "learning_rate": 0.0001999989008332998, - "loss": 46.0, - "step": 19530 - }, - { - "epoch": 1.4932813425846283, - "grad_norm": 0.0009357829112559557, - "learning_rate": 0.0001999989007206776, - "loss": 46.0, - "step": 19531 - }, - { - "epoch": 1.493357799568018, - "grad_norm": 0.0012869485653936863, - "learning_rate": 0.00019999890060804962, - "loss": 46.0, - "step": 19532 - }, - { - "epoch": 1.4934342565514078, - "grad_norm": 0.00031263078562915325, - "learning_rate": 0.00019999890049541587, - "loss": 46.0, - "step": 19533 - }, - { - "epoch": 1.4935107135347976, - "grad_norm": 0.0006317394436337054, - "learning_rate": 0.00019999890038277634, - "loss": 46.0, - "step": 19534 - }, - { - "epoch": 1.4935871705181871, - "grad_norm": 0.0019006740767508745, - "learning_rate": 0.00019999890027013105, - "loss": 46.0, - "step": 19535 - }, - { - "epoch": 1.4936636275015769, - "grad_norm": 0.0007488125120289624, - "learning_rate": 0.00019999890015747997, - "loss": 46.0, - "step": 19536 - }, - { - "epoch": 1.4937400844849666, - "grad_norm": 0.003511018818244338, - "learning_rate": 0.00019999890004482315, - "loss": 46.0, - "step": 19537 - }, - { - "epoch": 1.4938165414683564, - "grad_norm": 0.0006438705022446811, - "learning_rate": 0.00019999889993216054, - "loss": 46.0, - "step": 19538 - }, - { - "epoch": 1.4938929984517462, - "grad_norm": 0.0010862246854230762, - "learning_rate": 0.00019999889981949217, - "loss": 46.0, - "step": 19539 - }, - { - "epoch": 1.4939694554351357, - "grad_norm": 0.0017551570199429989, - "learning_rate": 0.00019999889970681803, - "loss": 46.0, - "step": 19540 - }, - { - "epoch": 1.4940459124185255, - "grad_norm": 0.0004184254794381559, - "learning_rate": 0.00019999889959413812, - "loss": 46.0, - "step": 19541 - }, - { - "epoch": 1.4941223694019152, - "grad_norm": 0.0005411518504843116, - "learning_rate": 0.00019999889948145243, - "loss": 46.0, - "step": 19542 - }, - { - "epoch": 1.494198826385305, - "grad_norm": 0.0023433968890458345, - "learning_rate": 0.00019999889936876098, - "loss": 46.0, - "step": 19543 - }, - { - "epoch": 1.4942752833686948, - "grad_norm": 0.0049154614098370075, - "learning_rate": 0.00019999889925606377, - "loss": 46.0, - "step": 19544 - }, - { - "epoch": 1.4943517403520845, - "grad_norm": 0.00227510672993958, - "learning_rate": 0.0001999988991433608, - "loss": 46.0, - "step": 19545 - }, - { - "epoch": 1.494428197335474, - "grad_norm": 0.0005902040284126997, - "learning_rate": 0.000199998899030652, - "loss": 46.0, - "step": 19546 - }, - { - "epoch": 1.4945046543188638, - "grad_norm": 0.0036403939593583345, - "learning_rate": 0.00019999889891793751, - "loss": 46.0, - "step": 19547 - }, - { - "epoch": 1.4945811113022536, - "grad_norm": 0.0021516934502869844, - "learning_rate": 0.0001999988988052172, - "loss": 46.0, - "step": 19548 - }, - { - "epoch": 1.4946575682856433, - "grad_norm": 0.004841632209718227, - "learning_rate": 0.00019999889869249114, - "loss": 46.0, - "step": 19549 - }, - { - "epoch": 1.4947340252690329, - "grad_norm": 0.0008717633900232613, - "learning_rate": 0.0001999988985797593, - "loss": 46.0, - "step": 19550 - }, - { - "epoch": 1.4948104822524226, - "grad_norm": 0.0004530810983851552, - "learning_rate": 0.00019999889846702168, - "loss": 46.0, - "step": 19551 - }, - { - "epoch": 1.4948869392358124, - "grad_norm": 0.0013703095028176904, - "learning_rate": 0.00019999889835427831, - "loss": 46.0, - "step": 19552 - }, - { - "epoch": 1.4949633962192022, - "grad_norm": 0.0008979664999060333, - "learning_rate": 0.00019999889824152915, - "loss": 46.0, - "step": 19553 - }, - { - "epoch": 1.495039853202592, - "grad_norm": 0.007226672489196062, - "learning_rate": 0.00019999889812877426, - "loss": 46.0, - "step": 19554 - }, - { - "epoch": 1.4951163101859817, - "grad_norm": 0.00529554532840848, - "learning_rate": 0.00019999889801601358, - "loss": 46.0, - "step": 19555 - }, - { - "epoch": 1.4951927671693714, - "grad_norm": 0.0008369798306375742, - "learning_rate": 0.00019999889790324712, - "loss": 46.0, - "step": 19556 - }, - { - "epoch": 1.495269224152761, - "grad_norm": 0.0005549624329432845, - "learning_rate": 0.0001999988977904749, - "loss": 46.0, - "step": 19557 - }, - { - "epoch": 1.4953456811361507, - "grad_norm": 0.0020292503759264946, - "learning_rate": 0.0001999988976776969, - "loss": 46.0, - "step": 19558 - }, - { - "epoch": 1.4954221381195405, - "grad_norm": 0.002541651949286461, - "learning_rate": 0.00019999889756491313, - "loss": 46.0, - "step": 19559 - }, - { - "epoch": 1.4954985951029303, - "grad_norm": 0.002015515463426709, - "learning_rate": 0.00019999889745212363, - "loss": 46.0, - "step": 19560 - }, - { - "epoch": 1.4955750520863198, - "grad_norm": 0.005844128783792257, - "learning_rate": 0.0001999988973393283, - "loss": 46.0, - "step": 19561 - }, - { - "epoch": 1.4956515090697096, - "grad_norm": 0.0005108119221404195, - "learning_rate": 0.00019999889722652724, - "loss": 46.0, - "step": 19562 - }, - { - "epoch": 1.4957279660530993, - "grad_norm": 0.0018171415431424975, - "learning_rate": 0.00019999889711372042, - "loss": 46.0, - "step": 19563 - }, - { - "epoch": 1.495804423036489, - "grad_norm": 0.0030105323530733585, - "learning_rate": 0.0001999988970009078, - "loss": 46.0, - "step": 19564 - }, - { - "epoch": 1.4958808800198788, - "grad_norm": 0.0022451234981417656, - "learning_rate": 0.00019999889688808944, - "loss": 46.0, - "step": 19565 - }, - { - "epoch": 1.4959573370032686, - "grad_norm": 0.0031978993210941553, - "learning_rate": 0.0001999988967752653, - "loss": 46.0, - "step": 19566 - }, - { - "epoch": 1.4960337939866584, - "grad_norm": 0.005774857476353645, - "learning_rate": 0.00019999889666243536, - "loss": 46.0, - "step": 19567 - }, - { - "epoch": 1.496110250970048, - "grad_norm": 0.0005562927690334618, - "learning_rate": 0.00019999889654959968, - "loss": 46.0, - "step": 19568 - }, - { - "epoch": 1.4961867079534377, - "grad_norm": 0.021002499386668205, - "learning_rate": 0.00019999889643675824, - "loss": 46.0, - "step": 19569 - }, - { - "epoch": 1.4962631649368274, - "grad_norm": 0.00046855604159645736, - "learning_rate": 0.000199998896323911, - "loss": 46.0, - "step": 19570 - }, - { - "epoch": 1.4963396219202172, - "grad_norm": 0.00031454942654818296, - "learning_rate": 0.00019999889621105803, - "loss": 46.0, - "step": 19571 - }, - { - "epoch": 1.4964160789036067, - "grad_norm": 0.00027260222123004496, - "learning_rate": 0.00019999889609819923, - "loss": 46.0, - "step": 19572 - }, - { - "epoch": 1.4964925358869965, - "grad_norm": 0.0005289419204927981, - "learning_rate": 0.00019999889598533473, - "loss": 46.0, - "step": 19573 - }, - { - "epoch": 1.4965689928703862, - "grad_norm": 0.0016695429803803563, - "learning_rate": 0.00019999889587246443, - "loss": 46.0, - "step": 19574 - }, - { - "epoch": 1.496645449853776, - "grad_norm": 0.0008459216332994401, - "learning_rate": 0.00019999889575958834, - "loss": 46.0, - "step": 19575 - }, - { - "epoch": 1.4967219068371658, - "grad_norm": 0.0031055586878210306, - "learning_rate": 0.00019999889564670652, - "loss": 46.0, - "step": 19576 - }, - { - "epoch": 1.4967983638205555, - "grad_norm": 0.0011080635013058782, - "learning_rate": 0.0001999988955338189, - "loss": 46.0, - "step": 19577 - }, - { - "epoch": 1.4968748208039453, - "grad_norm": 0.001750386436469853, - "learning_rate": 0.00019999889542092554, - "loss": 46.0, - "step": 19578 - }, - { - "epoch": 1.4969512777873348, - "grad_norm": 0.0021380395628511906, - "learning_rate": 0.00019999889530802637, - "loss": 46.0, - "step": 19579 - }, - { - "epoch": 1.4970277347707246, - "grad_norm": 0.001577091170474887, - "learning_rate": 0.00019999889519512146, - "loss": 46.0, - "step": 19580 - }, - { - "epoch": 1.4971041917541144, - "grad_norm": 0.005649860482662916, - "learning_rate": 0.00019999889508221077, - "loss": 46.0, - "step": 19581 - }, - { - "epoch": 1.4971806487375041, - "grad_norm": 0.0021517756395041943, - "learning_rate": 0.00019999889496929432, - "loss": 46.0, - "step": 19582 - }, - { - "epoch": 1.4972571057208937, - "grad_norm": 0.0026646198239177465, - "learning_rate": 0.00019999889485637209, - "loss": 46.0, - "step": 19583 - }, - { - "epoch": 1.4973335627042834, - "grad_norm": 0.0013939602067694068, - "learning_rate": 0.00019999889474344408, - "loss": 46.0, - "step": 19584 - }, - { - "epoch": 1.4974100196876732, - "grad_norm": 0.0006275244522839785, - "learning_rate": 0.00019999889463051033, - "loss": 46.0, - "step": 19585 - }, - { - "epoch": 1.497486476671063, - "grad_norm": 0.0008980878628790379, - "learning_rate": 0.00019999889451757078, - "loss": 46.0, - "step": 19586 - }, - { - "epoch": 1.4975629336544527, - "grad_norm": 0.0012236405164003372, - "learning_rate": 0.00019999889440462548, - "loss": 46.0, - "step": 19587 - }, - { - "epoch": 1.4976393906378425, - "grad_norm": 0.001069837948307395, - "learning_rate": 0.0001999988942916744, - "loss": 46.0, - "step": 19588 - }, - { - "epoch": 1.4977158476212322, - "grad_norm": 0.0023093358613550663, - "learning_rate": 0.00019999889417871757, - "loss": 46.0, - "step": 19589 - }, - { - "epoch": 1.4977923046046218, - "grad_norm": 0.0009686610428616405, - "learning_rate": 0.00019999889406575495, - "loss": 46.0, - "step": 19590 - }, - { - "epoch": 1.4978687615880115, - "grad_norm": 0.002314019249752164, - "learning_rate": 0.00019999889395278658, - "loss": 46.0, - "step": 19591 - }, - { - "epoch": 1.4979452185714013, - "grad_norm": 0.0045213657431304455, - "learning_rate": 0.00019999889383981242, - "loss": 46.0, - "step": 19592 - }, - { - "epoch": 1.498021675554791, - "grad_norm": 0.0038004473317414522, - "learning_rate": 0.0001999988937268325, - "loss": 46.0, - "step": 19593 - }, - { - "epoch": 1.4980981325381806, - "grad_norm": 0.002153785200789571, - "learning_rate": 0.00019999889361384682, - "loss": 46.0, - "step": 19594 - }, - { - "epoch": 1.4981745895215703, - "grad_norm": 0.001252977759577334, - "learning_rate": 0.00019999889350085537, - "loss": 46.0, - "step": 19595 - }, - { - "epoch": 1.49825104650496, - "grad_norm": 0.006883320864289999, - "learning_rate": 0.00019999889338785814, - "loss": 46.0, - "step": 19596 - }, - { - "epoch": 1.4983275034883499, - "grad_norm": 0.0012721295934170485, - "learning_rate": 0.00019999889327485516, - "loss": 46.0, - "step": 19597 - }, - { - "epoch": 1.4984039604717396, - "grad_norm": 0.0016243761638179421, - "learning_rate": 0.00019999889316184638, - "loss": 46.0, - "step": 19598 - }, - { - "epoch": 1.4984804174551294, - "grad_norm": 0.0016149788862094283, - "learning_rate": 0.00019999889304883183, - "loss": 46.0, - "step": 19599 - }, - { - "epoch": 1.4985568744385191, - "grad_norm": 0.03335101529955864, - "learning_rate": 0.00019999889293581153, - "loss": 46.0, - "step": 19600 - }, - { - "epoch": 1.4986333314219087, - "grad_norm": 0.0009267142158932984, - "learning_rate": 0.00019999889282278546, - "loss": 46.0, - "step": 19601 - }, - { - "epoch": 1.4987097884052984, - "grad_norm": 0.0025871070101857185, - "learning_rate": 0.00019999889270975362, - "loss": 46.0, - "step": 19602 - }, - { - "epoch": 1.4987862453886882, - "grad_norm": 0.0006616840837523341, - "learning_rate": 0.000199998892596716, - "loss": 46.0, - "step": 19603 - }, - { - "epoch": 1.498862702372078, - "grad_norm": 0.0013267133617773652, - "learning_rate": 0.0001999988924836726, - "loss": 46.0, - "step": 19604 - }, - { - "epoch": 1.4989391593554675, - "grad_norm": 0.006896014790982008, - "learning_rate": 0.00019999889237062347, - "loss": 46.0, - "step": 19605 - }, - { - "epoch": 1.4990156163388573, - "grad_norm": 0.002806306118145585, - "learning_rate": 0.00019999889225756854, - "loss": 46.0, - "step": 19606 - }, - { - "epoch": 1.499092073322247, - "grad_norm": 0.006940712220966816, - "learning_rate": 0.00019999889214450788, - "loss": 46.0, - "step": 19607 - }, - { - "epoch": 1.4991685303056368, - "grad_norm": 0.0008994821691885591, - "learning_rate": 0.00019999889203144137, - "loss": 46.0, - "step": 19608 - }, - { - "epoch": 1.4992449872890266, - "grad_norm": 0.0034189713187515736, - "learning_rate": 0.00019999889191836916, - "loss": 46.0, - "step": 19609 - }, - { - "epoch": 1.4993214442724163, - "grad_norm": 0.0022839833982288837, - "learning_rate": 0.00019999889180529116, - "loss": 46.0, - "step": 19610 - }, - { - "epoch": 1.499397901255806, - "grad_norm": 0.003786930348724127, - "learning_rate": 0.0001999988916922074, - "loss": 46.0, - "step": 19611 - }, - { - "epoch": 1.4994743582391956, - "grad_norm": 0.0026628703344613314, - "learning_rate": 0.00019999889157911786, - "loss": 46.0, - "step": 19612 - }, - { - "epoch": 1.4995508152225854, - "grad_norm": 0.0037180662620812654, - "learning_rate": 0.00019999889146602256, - "loss": 46.0, - "step": 19613 - }, - { - "epoch": 1.4996272722059751, - "grad_norm": 0.003466070396825671, - "learning_rate": 0.00019999889135292146, - "loss": 46.0, - "step": 19614 - }, - { - "epoch": 1.499703729189365, - "grad_norm": 0.0005706932279281318, - "learning_rate": 0.00019999889123981462, - "loss": 46.0, - "step": 19615 - }, - { - "epoch": 1.4997801861727544, - "grad_norm": 0.0008191209635697305, - "learning_rate": 0.00019999889112670203, - "loss": 46.0, - "step": 19616 - }, - { - "epoch": 1.4998566431561442, - "grad_norm": 0.002530982717871666, - "learning_rate": 0.0001999988910135836, - "loss": 46.0, - "step": 19617 - }, - { - "epoch": 1.499933100139534, - "grad_norm": 0.0015611827839165926, - "learning_rate": 0.00019999889090045948, - "loss": 46.0, - "step": 19618 - }, - { - "epoch": 1.5000095571229237, - "grad_norm": 0.0015027644112706184, - "learning_rate": 0.00019999889078732954, - "loss": 46.0, - "step": 19619 - }, - { - "epoch": 1.5000860141063135, - "grad_norm": 0.0010443015489727259, - "learning_rate": 0.00019999889067419385, - "loss": 46.0, - "step": 19620 - }, - { - "epoch": 1.5001624710897032, - "grad_norm": 0.0006005424074828625, - "learning_rate": 0.0001999988905610524, - "loss": 46.0, - "step": 19621 - }, - { - "epoch": 1.500238928073093, - "grad_norm": 0.0046441699378192425, - "learning_rate": 0.00019999889044790517, - "loss": 46.0, - "step": 19622 - }, - { - "epoch": 1.5003153850564825, - "grad_norm": 0.001452158554457128, - "learning_rate": 0.00019999889033475216, - "loss": 46.0, - "step": 19623 - }, - { - "epoch": 1.5003918420398723, - "grad_norm": 0.009183228015899658, - "learning_rate": 0.0001999988902215934, - "loss": 46.0, - "step": 19624 - }, - { - "epoch": 1.500468299023262, - "grad_norm": 0.001078345812857151, - "learning_rate": 0.00019999889010842884, - "loss": 46.0, - "step": 19625 - }, - { - "epoch": 1.5005447560066516, - "grad_norm": 0.0058320192620158195, - "learning_rate": 0.00019999888999525857, - "loss": 46.0, - "step": 19626 - }, - { - "epoch": 1.5006212129900414, - "grad_norm": 0.0004143244295846671, - "learning_rate": 0.00019999888988208247, - "loss": 46.0, - "step": 19627 - }, - { - "epoch": 1.5006976699734311, - "grad_norm": 0.0020292585249990225, - "learning_rate": 0.0001999988897689006, - "loss": 46.0, - "step": 19628 - }, - { - "epoch": 1.5007741269568209, - "grad_norm": 0.0009465735056437552, - "learning_rate": 0.000199998889655713, - "loss": 46.0, - "step": 19629 - }, - { - "epoch": 1.5008505839402106, - "grad_norm": 0.0013611565809696913, - "learning_rate": 0.0001999988895425196, - "loss": 46.0, - "step": 19630 - }, - { - "epoch": 1.5009270409236004, - "grad_norm": 0.0036437571980059147, - "learning_rate": 0.00019999888942932046, - "loss": 46.0, - "step": 19631 - }, - { - "epoch": 1.5010034979069902, - "grad_norm": 0.0006971657858230174, - "learning_rate": 0.00019999888931611552, - "loss": 46.0, - "step": 19632 - }, - { - "epoch": 1.50107995489038, - "grad_norm": 0.002104934537783265, - "learning_rate": 0.0001999988892029048, - "loss": 46.0, - "step": 19633 - }, - { - "epoch": 1.5011564118737695, - "grad_norm": 0.0009927240898832679, - "learning_rate": 0.00019999888908968835, - "loss": 46.0, - "step": 19634 - }, - { - "epoch": 1.5012328688571592, - "grad_norm": 0.0010329802753403783, - "learning_rate": 0.00019999888897646612, - "loss": 46.0, - "step": 19635 - }, - { - "epoch": 1.501309325840549, - "grad_norm": 0.0012246761471033096, - "learning_rate": 0.00019999888886323812, - "loss": 46.0, - "step": 19636 - }, - { - "epoch": 1.5013857828239385, - "grad_norm": 0.0004657434765249491, - "learning_rate": 0.00019999888875000434, - "loss": 46.0, - "step": 19637 - }, - { - "epoch": 1.5014622398073283, - "grad_norm": 0.002692838665097952, - "learning_rate": 0.00019999888863676482, - "loss": 46.0, - "step": 19638 - }, - { - "epoch": 1.501538696790718, - "grad_norm": 0.0014246393693611026, - "learning_rate": 0.00019999888852351952, - "loss": 46.0, - "step": 19639 - }, - { - "epoch": 1.5016151537741078, - "grad_norm": 0.0006924595218151808, - "learning_rate": 0.00019999888841026843, - "loss": 46.0, - "step": 19640 - }, - { - "epoch": 1.5016916107574976, - "grad_norm": 0.0008101038401946425, - "learning_rate": 0.00019999888829701155, - "loss": 46.0, - "step": 19641 - }, - { - "epoch": 1.5017680677408873, - "grad_norm": 0.0018298059003427625, - "learning_rate": 0.00019999888818374894, - "loss": 46.0, - "step": 19642 - }, - { - "epoch": 1.501844524724277, - "grad_norm": 0.0005969632184132934, - "learning_rate": 0.00019999888807048058, - "loss": 46.0, - "step": 19643 - }, - { - "epoch": 1.5019209817076669, - "grad_norm": 0.0038788749370723963, - "learning_rate": 0.0001999988879572064, - "loss": 46.0, - "step": 19644 - }, - { - "epoch": 1.5019974386910564, - "grad_norm": 0.002375392708927393, - "learning_rate": 0.00019999888784392648, - "loss": 46.0, - "step": 19645 - }, - { - "epoch": 1.5020738956744462, - "grad_norm": 0.0006361521664075553, - "learning_rate": 0.00019999888773064077, - "loss": 46.0, - "step": 19646 - }, - { - "epoch": 1.502150352657836, - "grad_norm": 0.0008676521829329431, - "learning_rate": 0.0001999988876173493, - "loss": 46.0, - "step": 19647 - }, - { - "epoch": 1.5022268096412255, - "grad_norm": 0.0005255150608718395, - "learning_rate": 0.00019999888750405208, - "loss": 46.0, - "step": 19648 - }, - { - "epoch": 1.5023032666246152, - "grad_norm": 0.00031688372837379575, - "learning_rate": 0.00019999888739074905, - "loss": 46.0, - "step": 19649 - }, - { - "epoch": 1.502379723608005, - "grad_norm": 0.00045653030974790454, - "learning_rate": 0.0001999988872774403, - "loss": 46.0, - "step": 19650 - }, - { - "epoch": 1.5024561805913947, - "grad_norm": 0.005554562900215387, - "learning_rate": 0.00019999888716412575, - "loss": 46.0, - "step": 19651 - }, - { - "epoch": 1.5025326375747845, - "grad_norm": 0.0020736439619213343, - "learning_rate": 0.00019999888705080543, - "loss": 46.0, - "step": 19652 - }, - { - "epoch": 1.5026090945581743, - "grad_norm": 0.004107438027858734, - "learning_rate": 0.00019999888693747936, - "loss": 46.0, - "step": 19653 - }, - { - "epoch": 1.502685551541564, - "grad_norm": 0.0016275091329589486, - "learning_rate": 0.0001999988868241475, - "loss": 46.0, - "step": 19654 - }, - { - "epoch": 1.5027620085249538, - "grad_norm": 0.001553536974824965, - "learning_rate": 0.00019999888671080987, - "loss": 46.0, - "step": 19655 - }, - { - "epoch": 1.5028384655083433, - "grad_norm": 0.0013049402041360736, - "learning_rate": 0.0001999988865974665, - "loss": 46.0, - "step": 19656 - }, - { - "epoch": 1.502914922491733, - "grad_norm": 0.0013029269175603986, - "learning_rate": 0.00019999888648411732, - "loss": 46.0, - "step": 19657 - }, - { - "epoch": 1.5029913794751228, - "grad_norm": 0.0009008460910990834, - "learning_rate": 0.00019999888637076238, - "loss": 46.0, - "step": 19658 - }, - { - "epoch": 1.5030678364585124, - "grad_norm": 0.0007676975801587105, - "learning_rate": 0.00019999888625740167, - "loss": 46.0, - "step": 19659 - }, - { - "epoch": 1.5031442934419021, - "grad_norm": 0.003085235133767128, - "learning_rate": 0.00019999888614403524, - "loss": 46.0, - "step": 19660 - }, - { - "epoch": 1.503220750425292, - "grad_norm": 0.0006421257858164608, - "learning_rate": 0.000199998886030663, - "loss": 46.0, - "step": 19661 - }, - { - "epoch": 1.5032972074086817, - "grad_norm": 0.002629264257848263, - "learning_rate": 0.00019999888591728499, - "loss": 46.0, - "step": 19662 - }, - { - "epoch": 1.5033736643920714, - "grad_norm": 0.000902424450032413, - "learning_rate": 0.0001999988858039012, - "loss": 46.0, - "step": 19663 - }, - { - "epoch": 1.5034501213754612, - "grad_norm": 0.002749947365373373, - "learning_rate": 0.00019999888569051166, - "loss": 46.0, - "step": 19664 - }, - { - "epoch": 1.503526578358851, - "grad_norm": 0.0008771510329097509, - "learning_rate": 0.00019999888557711634, - "loss": 46.0, - "step": 19665 - }, - { - "epoch": 1.5036030353422407, - "grad_norm": 0.0018273703753948212, - "learning_rate": 0.00019999888546371527, - "loss": 46.0, - "step": 19666 - }, - { - "epoch": 1.5036794923256303, - "grad_norm": 0.0004970564623363316, - "learning_rate": 0.0001999988853503084, - "loss": 46.0, - "step": 19667 - }, - { - "epoch": 1.50375594930902, - "grad_norm": 0.0012448756024241447, - "learning_rate": 0.00019999888523689578, - "loss": 46.0, - "step": 19668 - }, - { - "epoch": 1.5038324062924098, - "grad_norm": 0.0006642649532295763, - "learning_rate": 0.0001999988851234774, - "loss": 46.0, - "step": 19669 - }, - { - "epoch": 1.5039088632757993, - "grad_norm": 0.000801940041128546, - "learning_rate": 0.00019999888501005323, - "loss": 46.0, - "step": 19670 - }, - { - "epoch": 1.503985320259189, - "grad_norm": 0.0022543715313076973, - "learning_rate": 0.0001999988848966233, - "loss": 46.0, - "step": 19671 - }, - { - "epoch": 1.5040617772425788, - "grad_norm": 0.0014665884664282203, - "learning_rate": 0.0001999988847831876, - "loss": 46.0, - "step": 19672 - }, - { - "epoch": 1.5041382342259686, - "grad_norm": 0.0003214908065274358, - "learning_rate": 0.00019999888466974613, - "loss": 46.0, - "step": 19673 - }, - { - "epoch": 1.5042146912093584, - "grad_norm": 0.001157070160843432, - "learning_rate": 0.00019999888455629887, - "loss": 46.0, - "step": 19674 - }, - { - "epoch": 1.5042911481927481, - "grad_norm": 0.014479960314929485, - "learning_rate": 0.0001999988844428459, - "loss": 46.0, - "step": 19675 - }, - { - "epoch": 1.5043676051761379, - "grad_norm": 0.000796623935457319, - "learning_rate": 0.0001999988843293871, - "loss": 46.0, - "step": 19676 - }, - { - "epoch": 1.5044440621595276, - "grad_norm": 0.0005131034413352609, - "learning_rate": 0.00019999888421592255, - "loss": 46.0, - "step": 19677 - }, - { - "epoch": 1.5045205191429172, - "grad_norm": 0.0030730010475963354, - "learning_rate": 0.00019999888410245222, - "loss": 46.0, - "step": 19678 - }, - { - "epoch": 1.504596976126307, - "grad_norm": 0.0005181065644137561, - "learning_rate": 0.00019999888398897615, - "loss": 46.0, - "step": 19679 - }, - { - "epoch": 1.5046734331096967, - "grad_norm": 0.0009171762503683567, - "learning_rate": 0.00019999888387549428, - "loss": 46.0, - "step": 19680 - }, - { - "epoch": 1.5047498900930862, - "grad_norm": 0.0012362569104880095, - "learning_rate": 0.00019999888376200667, - "loss": 46.0, - "step": 19681 - }, - { - "epoch": 1.504826347076476, - "grad_norm": 0.001237946911714971, - "learning_rate": 0.00019999888364851328, - "loss": 46.0, - "step": 19682 - }, - { - "epoch": 1.5049028040598658, - "grad_norm": 0.0007379649905487895, - "learning_rate": 0.0001999988835350141, - "loss": 46.0, - "step": 19683 - }, - { - "epoch": 1.5049792610432555, - "grad_norm": 0.001304334495216608, - "learning_rate": 0.00019999888342150916, - "loss": 46.0, - "step": 19684 - }, - { - "epoch": 1.5050557180266453, - "grad_norm": 0.0013156364439055324, - "learning_rate": 0.00019999888330799847, - "loss": 46.0, - "step": 19685 - }, - { - "epoch": 1.505132175010035, - "grad_norm": 0.0022053250577300787, - "learning_rate": 0.000199998883194482, - "loss": 46.0, - "step": 19686 - }, - { - "epoch": 1.5052086319934248, - "grad_norm": 0.0009780323598533869, - "learning_rate": 0.00019999888308095973, - "loss": 46.0, - "step": 19687 - }, - { - "epoch": 1.5052850889768146, - "grad_norm": 0.001505318097770214, - "learning_rate": 0.00019999888296743173, - "loss": 46.0, - "step": 19688 - }, - { - "epoch": 1.505361545960204, - "grad_norm": 0.0036668111570179462, - "learning_rate": 0.00019999888285389793, - "loss": 46.0, - "step": 19689 - }, - { - "epoch": 1.5054380029435939, - "grad_norm": 0.0027202495839446783, - "learning_rate": 0.0001999988827403584, - "loss": 46.0, - "step": 19690 - }, - { - "epoch": 1.5055144599269836, - "grad_norm": 0.008233198896050453, - "learning_rate": 0.0001999988826268131, - "loss": 46.0, - "step": 19691 - }, - { - "epoch": 1.5055909169103732, - "grad_norm": 0.0010036530438810587, - "learning_rate": 0.000199998882513262, - "loss": 46.0, - "step": 19692 - }, - { - "epoch": 1.505667373893763, - "grad_norm": 0.0010734088718891144, - "learning_rate": 0.00019999888239970512, - "loss": 46.0, - "step": 19693 - }, - { - "epoch": 1.5057438308771527, - "grad_norm": 0.001148715615272522, - "learning_rate": 0.0001999988822861425, - "loss": 46.0, - "step": 19694 - }, - { - "epoch": 1.5058202878605425, - "grad_norm": 0.0015884805470705032, - "learning_rate": 0.0001999988821725741, - "loss": 46.0, - "step": 19695 - }, - { - "epoch": 1.5058967448439322, - "grad_norm": 0.0012611215934157372, - "learning_rate": 0.00019999888205899996, - "loss": 46.0, - "step": 19696 - }, - { - "epoch": 1.505973201827322, - "grad_norm": 0.008940557949244976, - "learning_rate": 0.00019999888194542002, - "loss": 46.0, - "step": 19697 - }, - { - "epoch": 1.5060496588107117, - "grad_norm": 0.0007845713407732546, - "learning_rate": 0.0001999988818318343, - "loss": 46.0, - "step": 19698 - }, - { - "epoch": 1.5061261157941015, - "grad_norm": 0.0018031923100352287, - "learning_rate": 0.0001999988817182428, - "loss": 46.0, - "step": 19699 - }, - { - "epoch": 1.506202572777491, - "grad_norm": 0.001237313961610198, - "learning_rate": 0.0001999988816046456, - "loss": 46.0, - "step": 19700 - }, - { - "epoch": 1.5062790297608808, - "grad_norm": 0.01438084151595831, - "learning_rate": 0.00019999888149104255, - "loss": 46.0, - "step": 19701 - }, - { - "epoch": 1.5063554867442706, - "grad_norm": 0.0011346966493874788, - "learning_rate": 0.0001999988813774338, - "loss": 46.0, - "step": 19702 - }, - { - "epoch": 1.50643194372766, - "grad_norm": 0.0008850598824210465, - "learning_rate": 0.00019999888126381925, - "loss": 46.0, - "step": 19703 - }, - { - "epoch": 1.5065084007110499, - "grad_norm": 0.0011766334064304829, - "learning_rate": 0.00019999888115019893, - "loss": 46.0, - "step": 19704 - }, - { - "epoch": 1.5065848576944396, - "grad_norm": 0.0009010198409669101, - "learning_rate": 0.00019999888103657283, - "loss": 46.0, - "step": 19705 - }, - { - "epoch": 1.5066613146778294, - "grad_norm": 0.0010107802227139473, - "learning_rate": 0.00019999888092294094, - "loss": 46.0, - "step": 19706 - }, - { - "epoch": 1.5067377716612191, - "grad_norm": 0.0011119669070467353, - "learning_rate": 0.00019999888080930335, - "loss": 46.0, - "step": 19707 - }, - { - "epoch": 1.506814228644609, - "grad_norm": 0.0011931966291740537, - "learning_rate": 0.00019999888069565993, - "loss": 46.0, - "step": 19708 - }, - { - "epoch": 1.5068906856279987, - "grad_norm": 0.0009842299623414874, - "learning_rate": 0.00019999888058201077, - "loss": 46.0, - "step": 19709 - }, - { - "epoch": 1.5069671426113884, - "grad_norm": 0.0014397201593965292, - "learning_rate": 0.0001999988804683558, - "loss": 46.0, - "step": 19710 - }, - { - "epoch": 1.507043599594778, - "grad_norm": 0.0009246676345355809, - "learning_rate": 0.00019999888035469513, - "loss": 46.0, - "step": 19711 - }, - { - "epoch": 1.5071200565781677, - "grad_norm": 0.010779195465147495, - "learning_rate": 0.00019999888024102865, - "loss": 46.0, - "step": 19712 - }, - { - "epoch": 1.5071965135615575, - "grad_norm": 0.000838337407913059, - "learning_rate": 0.0001999988801273564, - "loss": 46.0, - "step": 19713 - }, - { - "epoch": 1.507272970544947, - "grad_norm": 0.0021979818120598793, - "learning_rate": 0.00019999888001367836, - "loss": 46.0, - "step": 19714 - }, - { - "epoch": 1.5073494275283368, - "grad_norm": 0.0008863132097758353, - "learning_rate": 0.0001999988798999946, - "loss": 46.0, - "step": 19715 - }, - { - "epoch": 1.5074258845117265, - "grad_norm": 0.0014464984415099025, - "learning_rate": 0.00019999887978630504, - "loss": 46.0, - "step": 19716 - }, - { - "epoch": 1.5075023414951163, - "grad_norm": 0.001759712235070765, - "learning_rate": 0.00019999887967260972, - "loss": 46.0, - "step": 19717 - }, - { - "epoch": 1.507578798478506, - "grad_norm": 0.005756087601184845, - "learning_rate": 0.00019999887955890865, - "loss": 46.0, - "step": 19718 - }, - { - "epoch": 1.5076552554618958, - "grad_norm": 0.0007727245683781803, - "learning_rate": 0.00019999887944520178, - "loss": 46.0, - "step": 19719 - }, - { - "epoch": 1.5077317124452856, - "grad_norm": 0.0018911795923486352, - "learning_rate": 0.00019999887933148914, - "loss": 46.0, - "step": 19720 - }, - { - "epoch": 1.5078081694286753, - "grad_norm": 0.0010581546230241656, - "learning_rate": 0.00019999887921777072, - "loss": 46.0, - "step": 19721 - }, - { - "epoch": 1.5078846264120649, - "grad_norm": 0.004947170149534941, - "learning_rate": 0.00019999887910404656, - "loss": 46.0, - "step": 19722 - }, - { - "epoch": 1.5079610833954546, - "grad_norm": 0.005442941095679998, - "learning_rate": 0.00019999887899031663, - "loss": 46.0, - "step": 19723 - }, - { - "epoch": 1.5080375403788442, - "grad_norm": 0.0009650898864492774, - "learning_rate": 0.00019999887887658092, - "loss": 46.0, - "step": 19724 - }, - { - "epoch": 1.508113997362234, - "grad_norm": 0.0005430366145446897, - "learning_rate": 0.00019999887876283944, - "loss": 46.0, - "step": 19725 - }, - { - "epoch": 1.5081904543456237, - "grad_norm": 0.0025717385578900576, - "learning_rate": 0.00019999887864909218, - "loss": 46.0, - "step": 19726 - }, - { - "epoch": 1.5082669113290135, - "grad_norm": 0.005509734619408846, - "learning_rate": 0.00019999887853533915, - "loss": 46.0, - "step": 19727 - }, - { - "epoch": 1.5083433683124032, - "grad_norm": 0.0022998247295618057, - "learning_rate": 0.00019999887842158035, - "loss": 46.0, - "step": 19728 - }, - { - "epoch": 1.508419825295793, - "grad_norm": 0.0012572853593155742, - "learning_rate": 0.0001999988783078158, - "loss": 46.0, - "step": 19729 - }, - { - "epoch": 1.5084962822791828, - "grad_norm": 0.0007268390618264675, - "learning_rate": 0.00019999887819404548, - "loss": 46.0, - "step": 19730 - }, - { - "epoch": 1.5085727392625725, - "grad_norm": 0.0019113688031211495, - "learning_rate": 0.0001999988780802694, - "loss": 46.0, - "step": 19731 - }, - { - "epoch": 1.5086491962459623, - "grad_norm": 0.0024600664619356394, - "learning_rate": 0.00019999887796648752, - "loss": 46.0, - "step": 19732 - }, - { - "epoch": 1.5087256532293518, - "grad_norm": 0.0015150255057960749, - "learning_rate": 0.0001999988778526999, - "loss": 46.0, - "step": 19733 - }, - { - "epoch": 1.5088021102127416, - "grad_norm": 0.0005535635282285511, - "learning_rate": 0.0001999988777389065, - "loss": 46.0, - "step": 19734 - }, - { - "epoch": 1.5088785671961311, - "grad_norm": 0.0010429403046146035, - "learning_rate": 0.0001999988776251073, - "loss": 46.0, - "step": 19735 - }, - { - "epoch": 1.5089550241795209, - "grad_norm": 0.001272533554583788, - "learning_rate": 0.00019999887751130237, - "loss": 46.0, - "step": 19736 - }, - { - "epoch": 1.5090314811629106, - "grad_norm": 0.0007790165254846215, - "learning_rate": 0.0001999988773974917, - "loss": 46.0, - "step": 19737 - }, - { - "epoch": 1.5091079381463004, - "grad_norm": 0.0024776593782007694, - "learning_rate": 0.00019999887728367518, - "loss": 46.0, - "step": 19738 - }, - { - "epoch": 1.5091843951296902, - "grad_norm": 0.0009201911743730307, - "learning_rate": 0.00019999887716985293, - "loss": 46.0, - "step": 19739 - }, - { - "epoch": 1.50926085211308, - "grad_norm": 0.0014332858845591545, - "learning_rate": 0.00019999887705602492, - "loss": 46.0, - "step": 19740 - }, - { - "epoch": 1.5093373090964697, - "grad_norm": 0.0010543070966377854, - "learning_rate": 0.00019999887694219112, - "loss": 46.0, - "step": 19741 - }, - { - "epoch": 1.5094137660798594, - "grad_norm": 0.002088436158373952, - "learning_rate": 0.00019999887682835158, - "loss": 46.0, - "step": 19742 - }, - { - "epoch": 1.5094902230632492, - "grad_norm": 0.0014168419875204563, - "learning_rate": 0.00019999887671450623, - "loss": 46.0, - "step": 19743 - }, - { - "epoch": 1.5095666800466387, - "grad_norm": 0.005782569758594036, - "learning_rate": 0.00019999887660065513, - "loss": 46.0, - "step": 19744 - }, - { - "epoch": 1.5096431370300285, - "grad_norm": 0.0011829511495307088, - "learning_rate": 0.00019999887648679827, - "loss": 46.0, - "step": 19745 - }, - { - "epoch": 1.509719594013418, - "grad_norm": 0.0007292631780728698, - "learning_rate": 0.00019999887637293565, - "loss": 46.0, - "step": 19746 - }, - { - "epoch": 1.5097960509968078, - "grad_norm": 0.0018247476546093822, - "learning_rate": 0.00019999887625906724, - "loss": 46.0, - "step": 19747 - }, - { - "epoch": 1.5098725079801976, - "grad_norm": 0.0010375638958066702, - "learning_rate": 0.00019999887614519305, - "loss": 46.0, - "step": 19748 - }, - { - "epoch": 1.5099489649635873, - "grad_norm": 0.0006080397288314998, - "learning_rate": 0.00019999887603131312, - "loss": 46.0, - "step": 19749 - }, - { - "epoch": 1.510025421946977, - "grad_norm": 0.0009214449673891068, - "learning_rate": 0.00019999887591742738, - "loss": 46.0, - "step": 19750 - }, - { - "epoch": 1.5101018789303668, - "grad_norm": 0.00048188489745371044, - "learning_rate": 0.0001999988758035359, - "loss": 46.0, - "step": 19751 - }, - { - "epoch": 1.5101783359137566, - "grad_norm": 0.0010743956081569195, - "learning_rate": 0.00019999887568963867, - "loss": 46.0, - "step": 19752 - }, - { - "epoch": 1.5102547928971464, - "grad_norm": 0.0010220925323665142, - "learning_rate": 0.00019999887557573565, - "loss": 46.0, - "step": 19753 - }, - { - "epoch": 1.510331249880536, - "grad_norm": 0.0004688806366175413, - "learning_rate": 0.00019999887546182684, - "loss": 46.0, - "step": 19754 - }, - { - "epoch": 1.5104077068639257, - "grad_norm": 0.0008299436885863543, - "learning_rate": 0.0001999988753479123, - "loss": 46.0, - "step": 19755 - }, - { - "epoch": 1.5104841638473154, - "grad_norm": 0.006093153730034828, - "learning_rate": 0.00019999887523399195, - "loss": 46.0, - "step": 19756 - }, - { - "epoch": 1.510560620830705, - "grad_norm": 0.0011010019807145, - "learning_rate": 0.00019999887512006588, - "loss": 46.0, - "step": 19757 - }, - { - "epoch": 1.5106370778140947, - "grad_norm": 0.0006050923257134855, - "learning_rate": 0.000199998875006134, - "loss": 46.0, - "step": 19758 - }, - { - "epoch": 1.5107135347974845, - "grad_norm": 0.0036128507927060127, - "learning_rate": 0.00019999887489219635, - "loss": 46.0, - "step": 19759 - }, - { - "epoch": 1.5107899917808743, - "grad_norm": 0.0005929625476710498, - "learning_rate": 0.00019999887477825293, - "loss": 46.0, - "step": 19760 - }, - { - "epoch": 1.510866448764264, - "grad_norm": 0.002497768495231867, - "learning_rate": 0.00019999887466430377, - "loss": 46.0, - "step": 19761 - }, - { - "epoch": 1.5109429057476538, - "grad_norm": 0.0007081806543283165, - "learning_rate": 0.0001999988745503488, - "loss": 46.0, - "step": 19762 - }, - { - "epoch": 1.5110193627310435, - "grad_norm": 0.0013643552083522081, - "learning_rate": 0.0001999988744363881, - "loss": 46.0, - "step": 19763 - }, - { - "epoch": 1.5110958197144333, - "grad_norm": 0.000939934398047626, - "learning_rate": 0.00019999887432242162, - "loss": 46.0, - "step": 19764 - }, - { - "epoch": 1.5111722766978228, - "grad_norm": 0.0010947646806016564, - "learning_rate": 0.00019999887420844937, - "loss": 46.0, - "step": 19765 - }, - { - "epoch": 1.5112487336812126, - "grad_norm": 0.0008887078147381544, - "learning_rate": 0.00019999887409447134, - "loss": 46.0, - "step": 19766 - }, - { - "epoch": 1.5113251906646024, - "grad_norm": 0.00033823965350165963, - "learning_rate": 0.00019999887398048754, - "loss": 46.0, - "step": 19767 - }, - { - "epoch": 1.511401647647992, - "grad_norm": 0.00070320995291695, - "learning_rate": 0.000199998873866498, - "loss": 46.0, - "step": 19768 - }, - { - "epoch": 1.5114781046313817, - "grad_norm": 0.00603750953450799, - "learning_rate": 0.00019999887375250265, - "loss": 46.0, - "step": 19769 - }, - { - "epoch": 1.5115545616147714, - "grad_norm": 0.0007358907023444772, - "learning_rate": 0.00019999887363850153, - "loss": 46.0, - "step": 19770 - }, - { - "epoch": 1.5116310185981612, - "grad_norm": 0.0017141265561804175, - "learning_rate": 0.00019999887352449469, - "loss": 46.0, - "step": 19771 - }, - { - "epoch": 1.511707475581551, - "grad_norm": 0.0012082947650924325, - "learning_rate": 0.00019999887341048205, - "loss": 46.0, - "step": 19772 - }, - { - "epoch": 1.5117839325649407, - "grad_norm": 0.002776392502710223, - "learning_rate": 0.00019999887329646363, - "loss": 46.0, - "step": 19773 - }, - { - "epoch": 1.5118603895483305, - "grad_norm": 0.0014532109489664435, - "learning_rate": 0.00019999887318243947, - "loss": 46.0, - "step": 19774 - }, - { - "epoch": 1.5119368465317202, - "grad_norm": 0.008840211667120457, - "learning_rate": 0.0001999988730684095, - "loss": 46.0, - "step": 19775 - }, - { - "epoch": 1.5120133035151098, - "grad_norm": 0.002735511865466833, - "learning_rate": 0.00019999887295437378, - "loss": 46.0, - "step": 19776 - }, - { - "epoch": 1.5120897604984995, - "grad_norm": 0.0020389084238559008, - "learning_rate": 0.0001999988728403323, - "loss": 46.0, - "step": 19777 - }, - { - "epoch": 1.5121662174818893, - "grad_norm": 0.004298189654946327, - "learning_rate": 0.00019999887272628507, - "loss": 46.0, - "step": 19778 - }, - { - "epoch": 1.5122426744652788, - "grad_norm": 0.0005396513734012842, - "learning_rate": 0.00019999887261223202, - "loss": 46.0, - "step": 19779 - }, - { - "epoch": 1.5123191314486686, - "grad_norm": 0.0008666576468385756, - "learning_rate": 0.00019999887249817322, - "loss": 46.0, - "step": 19780 - }, - { - "epoch": 1.5123955884320583, - "grad_norm": 0.005232043098658323, - "learning_rate": 0.00019999887238410867, - "loss": 46.0, - "step": 19781 - }, - { - "epoch": 1.512472045415448, - "grad_norm": 0.001679981709457934, - "learning_rate": 0.00019999887227003832, - "loss": 46.0, - "step": 19782 - }, - { - "epoch": 1.5125485023988379, - "grad_norm": 0.0016673285281285644, - "learning_rate": 0.0001999988721559622, - "loss": 46.0, - "step": 19783 - }, - { - "epoch": 1.5126249593822276, - "grad_norm": 0.005448025651276112, - "learning_rate": 0.00019999887204188033, - "loss": 46.0, - "step": 19784 - }, - { - "epoch": 1.5127014163656174, - "grad_norm": 0.0016198459779843688, - "learning_rate": 0.0001999988719277927, - "loss": 46.0, - "step": 19785 - }, - { - "epoch": 1.5127778733490072, - "grad_norm": 0.0003494654083624482, - "learning_rate": 0.0001999988718136993, - "loss": 46.0, - "step": 19786 - }, - { - "epoch": 1.5128543303323967, - "grad_norm": 0.0015478498535230756, - "learning_rate": 0.0001999988716996001, - "loss": 46.0, - "step": 19787 - }, - { - "epoch": 1.5129307873157865, - "grad_norm": 0.0007455118466168642, - "learning_rate": 0.00019999887158549516, - "loss": 46.0, - "step": 19788 - }, - { - "epoch": 1.5130072442991762, - "grad_norm": 0.0004042270011268556, - "learning_rate": 0.00019999887147138443, - "loss": 46.0, - "step": 19789 - }, - { - "epoch": 1.5130837012825658, - "grad_norm": 0.005060931202024221, - "learning_rate": 0.00019999887135726795, - "loss": 46.0, - "step": 19790 - }, - { - "epoch": 1.5131601582659555, - "grad_norm": 0.03275544196367264, - "learning_rate": 0.0001999988712431457, - "loss": 46.0, - "step": 19791 - }, - { - "epoch": 1.5132366152493453, - "grad_norm": 0.0015940743032842875, - "learning_rate": 0.00019999887112901767, - "loss": 46.0, - "step": 19792 - }, - { - "epoch": 1.513313072232735, - "grad_norm": 0.001091941841877997, - "learning_rate": 0.00019999887101488387, - "loss": 46.0, - "step": 19793 - }, - { - "epoch": 1.5133895292161248, - "grad_norm": 0.003730242606252432, - "learning_rate": 0.0001999988709007443, - "loss": 46.0, - "step": 19794 - }, - { - "epoch": 1.5134659861995146, - "grad_norm": 0.0009056238923221827, - "learning_rate": 0.00019999887078659895, - "loss": 46.0, - "step": 19795 - }, - { - "epoch": 1.5135424431829043, - "grad_norm": 0.000796723528765142, - "learning_rate": 0.00019999887067244788, - "loss": 46.0, - "step": 19796 - }, - { - "epoch": 1.513618900166294, - "grad_norm": 0.0008455587667413056, - "learning_rate": 0.000199998870558291, - "loss": 46.0, - "step": 19797 - }, - { - "epoch": 1.5136953571496836, - "grad_norm": 0.0009108296362683177, - "learning_rate": 0.00019999887044412832, - "loss": 46.0, - "step": 19798 - }, - { - "epoch": 1.5137718141330734, - "grad_norm": 0.0012390697374939919, - "learning_rate": 0.0001999988703299599, - "loss": 46.0, - "step": 19799 - }, - { - "epoch": 1.5138482711164631, - "grad_norm": 0.0021715802140533924, - "learning_rate": 0.00019999887021578575, - "loss": 46.0, - "step": 19800 - }, - { - "epoch": 1.5139247280998527, - "grad_norm": 0.0022135537583380938, - "learning_rate": 0.0001999988701016058, - "loss": 46.0, - "step": 19801 - }, - { - "epoch": 1.5140011850832424, - "grad_norm": 0.0004373548144940287, - "learning_rate": 0.00019999886998742005, - "loss": 46.0, - "step": 19802 - }, - { - "epoch": 1.5140776420666322, - "grad_norm": 0.0006472120294347405, - "learning_rate": 0.00019999886987322858, - "loss": 46.0, - "step": 19803 - }, - { - "epoch": 1.514154099050022, - "grad_norm": 0.0003856404800899327, - "learning_rate": 0.00019999886975903132, - "loss": 46.0, - "step": 19804 - }, - { - "epoch": 1.5142305560334117, - "grad_norm": 0.0009939444717019796, - "learning_rate": 0.00019999886964482827, - "loss": 46.0, - "step": 19805 - }, - { - "epoch": 1.5143070130168015, - "grad_norm": 0.0025241789408028126, - "learning_rate": 0.00019999886953061947, - "loss": 46.0, - "step": 19806 - }, - { - "epoch": 1.5143834700001912, - "grad_norm": 0.002797146327793598, - "learning_rate": 0.00019999886941640492, - "loss": 46.0, - "step": 19807 - }, - { - "epoch": 1.514459926983581, - "grad_norm": 0.002307654358446598, - "learning_rate": 0.00019999886930218458, - "loss": 46.0, - "step": 19808 - }, - { - "epoch": 1.5145363839669705, - "grad_norm": 0.016860200092196465, - "learning_rate": 0.00019999886918795848, - "loss": 46.0, - "step": 19809 - }, - { - "epoch": 1.5146128409503603, - "grad_norm": 0.004295938648283482, - "learning_rate": 0.0001999988690737266, - "loss": 46.0, - "step": 19810 - }, - { - "epoch": 1.51468929793375, - "grad_norm": 0.0009333654306828976, - "learning_rate": 0.00019999886895948895, - "loss": 46.0, - "step": 19811 - }, - { - "epoch": 1.5147657549171396, - "grad_norm": 0.0008975553791970015, - "learning_rate": 0.0001999988688452455, - "loss": 46.0, - "step": 19812 - }, - { - "epoch": 1.5148422119005294, - "grad_norm": 0.001588331419043243, - "learning_rate": 0.00019999886873099632, - "loss": 46.0, - "step": 19813 - }, - { - "epoch": 1.5149186688839191, - "grad_norm": 0.0015657334588468075, - "learning_rate": 0.00019999886861674136, - "loss": 46.0, - "step": 19814 - }, - { - "epoch": 1.514995125867309, - "grad_norm": 0.0013467337703332305, - "learning_rate": 0.00019999886850248066, - "loss": 46.0, - "step": 19815 - }, - { - "epoch": 1.5150715828506987, - "grad_norm": 0.0026081660762429237, - "learning_rate": 0.00019999886838821418, - "loss": 46.0, - "step": 19816 - }, - { - "epoch": 1.5151480398340884, - "grad_norm": 0.0019258181564509869, - "learning_rate": 0.0001999988682739419, - "loss": 46.0, - "step": 19817 - }, - { - "epoch": 1.5152244968174782, - "grad_norm": 0.005441577173769474, - "learning_rate": 0.0001999988681596639, - "loss": 46.0, - "step": 19818 - }, - { - "epoch": 1.515300953800868, - "grad_norm": 0.0010827280348166823, - "learning_rate": 0.00019999886804538007, - "loss": 46.0, - "step": 19819 - }, - { - "epoch": 1.5153774107842575, - "grad_norm": 0.0018788437591865659, - "learning_rate": 0.0001999988679310905, - "loss": 46.0, - "step": 19820 - }, - { - "epoch": 1.5154538677676472, - "grad_norm": 0.0027129354421049356, - "learning_rate": 0.00019999886781679518, - "loss": 46.0, - "step": 19821 - }, - { - "epoch": 1.515530324751037, - "grad_norm": 0.0003765798464883119, - "learning_rate": 0.00019999886770249406, - "loss": 46.0, - "step": 19822 - }, - { - "epoch": 1.5156067817344265, - "grad_norm": 0.004867858719080687, - "learning_rate": 0.00019999886758818717, - "loss": 46.0, - "step": 19823 - }, - { - "epoch": 1.5156832387178163, - "grad_norm": 0.00627858517691493, - "learning_rate": 0.00019999886747387453, - "loss": 46.0, - "step": 19824 - }, - { - "epoch": 1.515759695701206, - "grad_norm": 0.0004949085414409637, - "learning_rate": 0.00019999886735955612, - "loss": 46.0, - "step": 19825 - }, - { - "epoch": 1.5158361526845958, - "grad_norm": 0.00047921508667059243, - "learning_rate": 0.00019999886724523193, - "loss": 46.0, - "step": 19826 - }, - { - "epoch": 1.5159126096679856, - "grad_norm": 0.003148201620206237, - "learning_rate": 0.00019999886713090197, - "loss": 46.0, - "step": 19827 - }, - { - "epoch": 1.5159890666513753, - "grad_norm": 0.0010536847403272986, - "learning_rate": 0.00019999886701656624, - "loss": 46.0, - "step": 19828 - }, - { - "epoch": 1.516065523634765, - "grad_norm": 0.0011822348460555077, - "learning_rate": 0.00019999886690222476, - "loss": 46.0, - "step": 19829 - }, - { - "epoch": 1.5161419806181549, - "grad_norm": 0.005962466821074486, - "learning_rate": 0.00019999886678787748, - "loss": 46.0, - "step": 19830 - }, - { - "epoch": 1.5162184376015444, - "grad_norm": 0.0014695709105581045, - "learning_rate": 0.00019999886667352446, - "loss": 46.0, - "step": 19831 - }, - { - "epoch": 1.5162948945849342, - "grad_norm": 0.0014733609277755022, - "learning_rate": 0.00019999886655916563, - "loss": 46.0, - "step": 19832 - }, - { - "epoch": 1.516371351568324, - "grad_norm": 0.0011888614390045404, - "learning_rate": 0.00019999886644480106, - "loss": 46.0, - "step": 19833 - }, - { - "epoch": 1.5164478085517135, - "grad_norm": 0.0009545217035338283, - "learning_rate": 0.0001999988663304307, - "loss": 46.0, - "step": 19834 - }, - { - "epoch": 1.5165242655351032, - "grad_norm": 0.007244802080094814, - "learning_rate": 0.00019999886621605462, - "loss": 46.0, - "step": 19835 - }, - { - "epoch": 1.516600722518493, - "grad_norm": 0.0058004772290587425, - "learning_rate": 0.00019999886610167273, - "loss": 46.0, - "step": 19836 - }, - { - "epoch": 1.5166771795018827, - "grad_norm": 0.0005330651765689254, - "learning_rate": 0.0001999988659872851, - "loss": 46.0, - "step": 19837 - }, - { - "epoch": 1.5167536364852725, - "grad_norm": 0.0052987937815487385, - "learning_rate": 0.00019999886587289165, - "loss": 46.0, - "step": 19838 - }, - { - "epoch": 1.5168300934686623, - "grad_norm": 0.001190151902846992, - "learning_rate": 0.00019999886575849246, - "loss": 46.0, - "step": 19839 - }, - { - "epoch": 1.516906550452052, - "grad_norm": 0.0006867880583740771, - "learning_rate": 0.00019999886564408753, - "loss": 46.0, - "step": 19840 - }, - { - "epoch": 1.5169830074354418, - "grad_norm": 0.0002723844372667372, - "learning_rate": 0.00019999886552967677, - "loss": 46.0, - "step": 19841 - }, - { - "epoch": 1.5170594644188313, - "grad_norm": 0.005987521726638079, - "learning_rate": 0.0001999988654152603, - "loss": 46.0, - "step": 19842 - }, - { - "epoch": 1.517135921402221, - "grad_norm": 0.002486526034772396, - "learning_rate": 0.000199998865300838, - "loss": 46.0, - "step": 19843 - }, - { - "epoch": 1.5172123783856108, - "grad_norm": 0.0009461008012294769, - "learning_rate": 0.00019999886518641, - "loss": 46.0, - "step": 19844 - }, - { - "epoch": 1.5172888353690004, - "grad_norm": 0.003981179557740688, - "learning_rate": 0.0001999988650719762, - "loss": 46.0, - "step": 19845 - }, - { - "epoch": 1.5173652923523901, - "grad_norm": 0.0012268285499885678, - "learning_rate": 0.00019999886495753662, - "loss": 46.0, - "step": 19846 - }, - { - "epoch": 1.51744174933578, - "grad_norm": 0.0007840031757950783, - "learning_rate": 0.00019999886484309127, - "loss": 46.0, - "step": 19847 - }, - { - "epoch": 1.5175182063191697, - "grad_norm": 0.0009198356419801712, - "learning_rate": 0.00019999886472864015, - "loss": 46.0, - "step": 19848 - }, - { - "epoch": 1.5175946633025594, - "grad_norm": 0.0009282948449254036, - "learning_rate": 0.00019999886461418326, - "loss": 46.0, - "step": 19849 - }, - { - "epoch": 1.5176711202859492, - "grad_norm": 0.0022731777280569077, - "learning_rate": 0.0001999988644997206, - "loss": 46.0, - "step": 19850 - }, - { - "epoch": 1.517747577269339, - "grad_norm": 0.0013992219464853406, - "learning_rate": 0.00019999886438525218, - "loss": 46.0, - "step": 19851 - }, - { - "epoch": 1.5178240342527287, - "grad_norm": 0.0009773056954145432, - "learning_rate": 0.000199998864270778, - "loss": 46.0, - "step": 19852 - }, - { - "epoch": 1.5179004912361183, - "grad_norm": 0.0011099019320681691, - "learning_rate": 0.00019999886415629804, - "loss": 46.0, - "step": 19853 - }, - { - "epoch": 1.517976948219508, - "grad_norm": 0.0029436720069497824, - "learning_rate": 0.0001999988640418123, - "loss": 46.0, - "step": 19854 - }, - { - "epoch": 1.5180534052028976, - "grad_norm": 0.0017158595146611333, - "learning_rate": 0.0001999988639273208, - "loss": 46.0, - "step": 19855 - }, - { - "epoch": 1.5181298621862873, - "grad_norm": 0.007042300421744585, - "learning_rate": 0.00019999886381282352, - "loss": 46.0, - "step": 19856 - }, - { - "epoch": 1.518206319169677, - "grad_norm": 0.0006916631828062236, - "learning_rate": 0.0001999988636983205, - "loss": 46.0, - "step": 19857 - }, - { - "epoch": 1.5182827761530668, - "grad_norm": 0.0009145598160102963, - "learning_rate": 0.0001999988635838117, - "loss": 46.0, - "step": 19858 - }, - { - "epoch": 1.5183592331364566, - "grad_norm": 0.0010668050963431597, - "learning_rate": 0.00019999886346929713, - "loss": 46.0, - "step": 19859 - }, - { - "epoch": 1.5184356901198464, - "grad_norm": 0.002123479265719652, - "learning_rate": 0.00019999886335477678, - "loss": 46.0, - "step": 19860 - }, - { - "epoch": 1.5185121471032361, - "grad_norm": 0.0005922801210545003, - "learning_rate": 0.00019999886324025066, - "loss": 46.0, - "step": 19861 - }, - { - "epoch": 1.5185886040866259, - "grad_norm": 0.0012449015630409122, - "learning_rate": 0.00019999886312571877, - "loss": 46.0, - "step": 19862 - }, - { - "epoch": 1.5186650610700156, - "grad_norm": 0.0007242189603857696, - "learning_rate": 0.0001999988630111811, - "loss": 46.0, - "step": 19863 - }, - { - "epoch": 1.5187415180534052, - "grad_norm": 0.00037359242560341954, - "learning_rate": 0.0001999988628966377, - "loss": 46.0, - "step": 19864 - }, - { - "epoch": 1.518817975036795, - "grad_norm": 0.0015802932903170586, - "learning_rate": 0.00019999886278208848, - "loss": 46.0, - "step": 19865 - }, - { - "epoch": 1.5188944320201845, - "grad_norm": 0.0010674564400687814, - "learning_rate": 0.00019999886266753355, - "loss": 46.0, - "step": 19866 - }, - { - "epoch": 1.5189708890035742, - "grad_norm": 0.0022782045416533947, - "learning_rate": 0.0001999988625529728, - "loss": 46.0, - "step": 19867 - }, - { - "epoch": 1.519047345986964, - "grad_norm": 0.001007290557026863, - "learning_rate": 0.00019999886243840629, - "loss": 46.0, - "step": 19868 - }, - { - "epoch": 1.5191238029703538, - "grad_norm": 0.0009211021242663264, - "learning_rate": 0.00019999886232383403, - "loss": 46.0, - "step": 19869 - }, - { - "epoch": 1.5192002599537435, - "grad_norm": 0.0013268961338326335, - "learning_rate": 0.00019999886220925598, - "loss": 46.0, - "step": 19870 - }, - { - "epoch": 1.5192767169371333, - "grad_norm": 0.0005880377138964832, - "learning_rate": 0.00019999886209467218, - "loss": 46.0, - "step": 19871 - }, - { - "epoch": 1.519353173920523, - "grad_norm": 0.005156193859875202, - "learning_rate": 0.00019999886198008259, - "loss": 46.0, - "step": 19872 - }, - { - "epoch": 1.5194296309039128, - "grad_norm": 0.0006965100765228271, - "learning_rate": 0.00019999886186548724, - "loss": 46.0, - "step": 19873 - }, - { - "epoch": 1.5195060878873026, - "grad_norm": 0.000582943030167371, - "learning_rate": 0.00019999886175088612, - "loss": 46.0, - "step": 19874 - }, - { - "epoch": 1.519582544870692, - "grad_norm": 0.000550413562450558, - "learning_rate": 0.00019999886163627926, - "loss": 46.0, - "step": 19875 - }, - { - "epoch": 1.5196590018540819, - "grad_norm": 0.003940922673791647, - "learning_rate": 0.0001999988615216666, - "loss": 46.0, - "step": 19876 - }, - { - "epoch": 1.5197354588374714, - "grad_norm": 0.0006562622729688883, - "learning_rate": 0.00019999886140704815, - "loss": 46.0, - "step": 19877 - }, - { - "epoch": 1.5198119158208612, - "grad_norm": 0.0006262643728405237, - "learning_rate": 0.00019999886129242394, - "loss": 46.0, - "step": 19878 - }, - { - "epoch": 1.519888372804251, - "grad_norm": 0.0006632976001128554, - "learning_rate": 0.000199998861177794, - "loss": 46.0, - "step": 19879 - }, - { - "epoch": 1.5199648297876407, - "grad_norm": 0.0005016404902562499, - "learning_rate": 0.00019999886106315825, - "loss": 46.0, - "step": 19880 - }, - { - "epoch": 1.5200412867710305, - "grad_norm": 0.0022389309015125036, - "learning_rate": 0.00019999886094851678, - "loss": 46.0, - "step": 19881 - }, - { - "epoch": 1.5201177437544202, - "grad_norm": 0.0009701202507130802, - "learning_rate": 0.00019999886083386947, - "loss": 46.0, - "step": 19882 - }, - { - "epoch": 1.52019420073781, - "grad_norm": 0.0020565709564834833, - "learning_rate": 0.00019999886071921642, - "loss": 46.0, - "step": 19883 - }, - { - "epoch": 1.5202706577211997, - "grad_norm": 0.0010162091348320246, - "learning_rate": 0.00019999886060455765, - "loss": 46.0, - "step": 19884 - }, - { - "epoch": 1.5203471147045893, - "grad_norm": 0.0009761560359038413, - "learning_rate": 0.00019999886048989305, - "loss": 46.0, - "step": 19885 - }, - { - "epoch": 1.520423571687979, - "grad_norm": 0.000699535827152431, - "learning_rate": 0.0001999988603752227, - "loss": 46.0, - "step": 19886 - }, - { - "epoch": 1.5205000286713688, - "grad_norm": 0.0007347283535636961, - "learning_rate": 0.00019999886026054656, - "loss": 46.0, - "step": 19887 - }, - { - "epoch": 1.5205764856547583, - "grad_norm": 0.004425846505910158, - "learning_rate": 0.0001999988601458647, - "loss": 46.0, - "step": 19888 - }, - { - "epoch": 1.520652942638148, - "grad_norm": 0.004591597244143486, - "learning_rate": 0.00019999886003117703, - "loss": 46.0, - "step": 19889 - }, - { - "epoch": 1.5207293996215379, - "grad_norm": 0.002363857114687562, - "learning_rate": 0.00019999885991648362, - "loss": 46.0, - "step": 19890 - }, - { - "epoch": 1.5208058566049276, - "grad_norm": 0.002615045290440321, - "learning_rate": 0.0001999988598017844, - "loss": 46.0, - "step": 19891 - }, - { - "epoch": 1.5208823135883174, - "grad_norm": 0.0018921426963061094, - "learning_rate": 0.00019999885968707942, - "loss": 46.0, - "step": 19892 - }, - { - "epoch": 1.5209587705717071, - "grad_norm": 0.0009470746736042202, - "learning_rate": 0.0001999988595723687, - "loss": 46.0, - "step": 19893 - }, - { - "epoch": 1.521035227555097, - "grad_norm": 0.0013329192297533154, - "learning_rate": 0.0001999988594576522, - "loss": 46.0, - "step": 19894 - }, - { - "epoch": 1.5211116845384867, - "grad_norm": 0.0018824591534212232, - "learning_rate": 0.0001999988593429299, - "loss": 46.0, - "step": 19895 - }, - { - "epoch": 1.5211881415218762, - "grad_norm": 0.0019770858343690634, - "learning_rate": 0.00019999885922820189, - "loss": 46.0, - "step": 19896 - }, - { - "epoch": 1.521264598505266, - "grad_norm": 0.0010360676096752286, - "learning_rate": 0.00019999885911346806, - "loss": 46.0, - "step": 19897 - }, - { - "epoch": 1.5213410554886557, - "grad_norm": 0.0022221815306693316, - "learning_rate": 0.0001999988589987285, - "loss": 46.0, - "step": 19898 - }, - { - "epoch": 1.5214175124720453, - "grad_norm": 0.002833024365827441, - "learning_rate": 0.00019999885888398312, - "loss": 46.0, - "step": 19899 - }, - { - "epoch": 1.521493969455435, - "grad_norm": 0.0014749213587492704, - "learning_rate": 0.000199998858769232, - "loss": 46.0, - "step": 19900 - }, - { - "epoch": 1.5215704264388248, - "grad_norm": 0.0015110863605514169, - "learning_rate": 0.0001999988586544751, - "loss": 46.0, - "step": 19901 - }, - { - "epoch": 1.5216468834222145, - "grad_norm": 0.0012851685751229525, - "learning_rate": 0.00019999885853971245, - "loss": 46.0, - "step": 19902 - }, - { - "epoch": 1.5217233404056043, - "grad_norm": 0.0008480880642309785, - "learning_rate": 0.000199998858424944, - "loss": 46.0, - "step": 19903 - }, - { - "epoch": 1.521799797388994, - "grad_norm": 0.0013265982270240784, - "learning_rate": 0.00019999885831016982, - "loss": 46.0, - "step": 19904 - }, - { - "epoch": 1.5218762543723838, - "grad_norm": 0.0019406657665967941, - "learning_rate": 0.00019999885819538987, - "loss": 46.0, - "step": 19905 - }, - { - "epoch": 1.5219527113557736, - "grad_norm": 0.003281353274360299, - "learning_rate": 0.00019999885808060414, - "loss": 46.0, - "step": 19906 - }, - { - "epoch": 1.5220291683391631, - "grad_norm": 0.0010717188706621528, - "learning_rate": 0.0001999988579658126, - "loss": 46.0, - "step": 19907 - }, - { - "epoch": 1.522105625322553, - "grad_norm": 0.0017535180086269975, - "learning_rate": 0.00019999885785101533, - "loss": 46.0, - "step": 19908 - }, - { - "epoch": 1.5221820823059427, - "grad_norm": 0.0034215175546705723, - "learning_rate": 0.00019999885773621228, - "loss": 46.0, - "step": 19909 - }, - { - "epoch": 1.5222585392893322, - "grad_norm": 0.0012956680729985237, - "learning_rate": 0.00019999885762140348, - "loss": 46.0, - "step": 19910 - }, - { - "epoch": 1.522334996272722, - "grad_norm": 0.0026397292967885733, - "learning_rate": 0.00019999885750658888, - "loss": 46.0, - "step": 19911 - }, - { - "epoch": 1.5224114532561117, - "grad_norm": 0.0011051550973206758, - "learning_rate": 0.0001999988573917685, - "loss": 46.0, - "step": 19912 - }, - { - "epoch": 1.5224879102395015, - "grad_norm": 0.00885475892573595, - "learning_rate": 0.00019999885727694242, - "loss": 46.0, - "step": 19913 - }, - { - "epoch": 1.5225643672228912, - "grad_norm": 0.0034929709509015083, - "learning_rate": 0.0001999988571621105, - "loss": 46.0, - "step": 19914 - }, - { - "epoch": 1.522640824206281, - "grad_norm": 0.0005535755190066993, - "learning_rate": 0.00019999885704727284, - "loss": 46.0, - "step": 19915 - }, - { - "epoch": 1.5227172811896708, - "grad_norm": 0.0007332603563554585, - "learning_rate": 0.0001999988569324294, - "loss": 46.0, - "step": 19916 - }, - { - "epoch": 1.5227937381730605, - "grad_norm": 0.0023711537942290306, - "learning_rate": 0.00019999885681758022, - "loss": 46.0, - "step": 19917 - }, - { - "epoch": 1.52287019515645, - "grad_norm": 0.0012392589123919606, - "learning_rate": 0.00019999885670272526, - "loss": 46.0, - "step": 19918 - }, - { - "epoch": 1.5229466521398398, - "grad_norm": 0.0011114421067759395, - "learning_rate": 0.0001999988565878645, - "loss": 46.0, - "step": 19919 - }, - { - "epoch": 1.5230231091232296, - "grad_norm": 0.0013310209615156054, - "learning_rate": 0.000199998856472998, - "loss": 46.0, - "step": 19920 - }, - { - "epoch": 1.5230995661066191, - "grad_norm": 0.005071427207440138, - "learning_rate": 0.00019999885635812572, - "loss": 46.0, - "step": 19921 - }, - { - "epoch": 1.5231760230900089, - "grad_norm": 0.0017212532693520188, - "learning_rate": 0.00019999885624324767, - "loss": 46.0, - "step": 19922 - }, - { - "epoch": 1.5232524800733986, - "grad_norm": 0.0013851674739271402, - "learning_rate": 0.00019999885612836385, - "loss": 46.0, - "step": 19923 - }, - { - "epoch": 1.5233289370567884, - "grad_norm": 0.001247505540959537, - "learning_rate": 0.00019999885601347425, - "loss": 46.0, - "step": 19924 - }, - { - "epoch": 1.5234053940401782, - "grad_norm": 0.0015065122861415148, - "learning_rate": 0.0001999988558985789, - "loss": 46.0, - "step": 19925 - }, - { - "epoch": 1.523481851023568, - "grad_norm": 0.000630170397926122, - "learning_rate": 0.0001999988557836778, - "loss": 46.0, - "step": 19926 - }, - { - "epoch": 1.5235583080069577, - "grad_norm": 0.0005325032398104668, - "learning_rate": 0.00019999885566877087, - "loss": 46.0, - "step": 19927 - }, - { - "epoch": 1.5236347649903474, - "grad_norm": 0.0009795564692467451, - "learning_rate": 0.0001999988555538582, - "loss": 46.0, - "step": 19928 - }, - { - "epoch": 1.523711221973737, - "grad_norm": 0.0048400405794382095, - "learning_rate": 0.00019999885543893977, - "loss": 46.0, - "step": 19929 - }, - { - "epoch": 1.5237876789571267, - "grad_norm": 0.0008271417464129627, - "learning_rate": 0.00019999885532401556, - "loss": 46.0, - "step": 19930 - }, - { - "epoch": 1.5238641359405165, - "grad_norm": 0.002531171077862382, - "learning_rate": 0.0001999988552090856, - "loss": 46.0, - "step": 19931 - }, - { - "epoch": 1.523940592923906, - "grad_norm": 0.002875386504456401, - "learning_rate": 0.00019999885509414987, - "loss": 46.0, - "step": 19932 - }, - { - "epoch": 1.5240170499072958, - "grad_norm": 0.001793494215235114, - "learning_rate": 0.00019999885497920837, - "loss": 46.0, - "step": 19933 - }, - { - "epoch": 1.5240935068906856, - "grad_norm": 0.0012160597834736109, - "learning_rate": 0.00019999885486426106, - "loss": 46.0, - "step": 19934 - }, - { - "epoch": 1.5241699638740753, - "grad_norm": 0.0020554724615067244, - "learning_rate": 0.000199998854749308, - "loss": 46.0, - "step": 19935 - }, - { - "epoch": 1.524246420857465, - "grad_norm": 0.0023764052893966436, - "learning_rate": 0.0001999988546343492, - "loss": 46.0, - "step": 19936 - }, - { - "epoch": 1.5243228778408549, - "grad_norm": 0.0009446701151318848, - "learning_rate": 0.0001999988545193846, - "loss": 46.0, - "step": 19937 - }, - { - "epoch": 1.5243993348242446, - "grad_norm": 0.000792208593338728, - "learning_rate": 0.00019999885440441425, - "loss": 46.0, - "step": 19938 - }, - { - "epoch": 1.5244757918076344, - "grad_norm": 0.0006929371156729758, - "learning_rate": 0.00019999885428943813, - "loss": 46.0, - "step": 19939 - }, - { - "epoch": 1.524552248791024, - "grad_norm": 0.0008702266495674849, - "learning_rate": 0.00019999885417445624, - "loss": 46.0, - "step": 19940 - }, - { - "epoch": 1.5246287057744137, - "grad_norm": 0.001397086656652391, - "learning_rate": 0.00019999885405946858, - "loss": 46.0, - "step": 19941 - }, - { - "epoch": 1.5247051627578034, - "grad_norm": 0.013896516524255276, - "learning_rate": 0.00019999885394447514, - "loss": 46.0, - "step": 19942 - }, - { - "epoch": 1.524781619741193, - "grad_norm": 0.0003093729610554874, - "learning_rate": 0.00019999885382947593, - "loss": 46.0, - "step": 19943 - }, - { - "epoch": 1.5248580767245827, - "grad_norm": 0.0013066452229395509, - "learning_rate": 0.00019999885371447095, - "loss": 46.0, - "step": 19944 - }, - { - "epoch": 1.5249345337079725, - "grad_norm": 0.0005022439872846007, - "learning_rate": 0.0001999988535994602, - "loss": 46.0, - "step": 19945 - }, - { - "epoch": 1.5250109906913623, - "grad_norm": 0.0031297921668738127, - "learning_rate": 0.0001999988534844437, - "loss": 46.0, - "step": 19946 - }, - { - "epoch": 1.525087447674752, - "grad_norm": 0.0038235660176724195, - "learning_rate": 0.00019999885336942138, - "loss": 46.0, - "step": 19947 - }, - { - "epoch": 1.5251639046581418, - "grad_norm": 0.0017294284189119935, - "learning_rate": 0.00019999885325439336, - "loss": 46.0, - "step": 19948 - }, - { - "epoch": 1.5252403616415315, - "grad_norm": 0.0014719052705913782, - "learning_rate": 0.0001999988531393595, - "loss": 46.0, - "step": 19949 - }, - { - "epoch": 1.5253168186249213, - "grad_norm": 0.0021882113069295883, - "learning_rate": 0.00019999885302431994, - "loss": 46.0, - "step": 19950 - }, - { - "epoch": 1.5253932756083108, - "grad_norm": 0.001612881664186716, - "learning_rate": 0.00019999885290927457, - "loss": 46.0, - "step": 19951 - }, - { - "epoch": 1.5254697325917006, - "grad_norm": 0.0007277579279616475, - "learning_rate": 0.00019999885279422345, - "loss": 46.0, - "step": 19952 - }, - { - "epoch": 1.5255461895750904, - "grad_norm": 0.0017669732915237546, - "learning_rate": 0.00019999885267916654, - "loss": 46.0, - "step": 19953 - }, - { - "epoch": 1.52562264655848, - "grad_norm": 0.0012967296643182635, - "learning_rate": 0.0001999988525641039, - "loss": 46.0, - "step": 19954 - }, - { - "epoch": 1.5256991035418697, - "grad_norm": 0.010033457539975643, - "learning_rate": 0.00019999885244903544, - "loss": 46.0, - "step": 19955 - }, - { - "epoch": 1.5257755605252594, - "grad_norm": 0.0009488180512562394, - "learning_rate": 0.00019999885233396123, - "loss": 46.0, - "step": 19956 - }, - { - "epoch": 1.5258520175086492, - "grad_norm": 0.0005836423952132463, - "learning_rate": 0.00019999885221888124, - "loss": 46.0, - "step": 19957 - }, - { - "epoch": 1.525928474492039, - "grad_norm": 0.0040775868110358715, - "learning_rate": 0.0001999988521037955, - "loss": 46.0, - "step": 19958 - }, - { - "epoch": 1.5260049314754287, - "grad_norm": 0.012530580163002014, - "learning_rate": 0.00019999885198870398, - "loss": 46.0, - "step": 19959 - }, - { - "epoch": 1.5260813884588185, - "grad_norm": 0.0007836756412871182, - "learning_rate": 0.0001999988518736067, - "loss": 46.0, - "step": 19960 - }, - { - "epoch": 1.5261578454422082, - "grad_norm": 0.0009908212814480066, - "learning_rate": 0.00019999885175850363, - "loss": 46.0, - "step": 19961 - }, - { - "epoch": 1.5262343024255978, - "grad_norm": 0.002022718545049429, - "learning_rate": 0.00019999885164339484, - "loss": 46.0, - "step": 19962 - }, - { - "epoch": 1.5263107594089875, - "grad_norm": 0.0010559409856796265, - "learning_rate": 0.00019999885152828024, - "loss": 46.0, - "step": 19963 - }, - { - "epoch": 1.5263872163923773, - "grad_norm": 0.0021514487452805042, - "learning_rate": 0.00019999885141315987, - "loss": 46.0, - "step": 19964 - }, - { - "epoch": 1.5264636733757668, - "grad_norm": 0.0011832613963633776, - "learning_rate": 0.00019999885129803373, - "loss": 46.0, - "step": 19965 - }, - { - "epoch": 1.5265401303591566, - "grad_norm": 0.0013538129860535264, - "learning_rate": 0.00019999885118290184, - "loss": 46.0, - "step": 19966 - }, - { - "epoch": 1.5266165873425463, - "grad_norm": 0.002653195522725582, - "learning_rate": 0.00019999885106776417, - "loss": 46.0, - "step": 19967 - }, - { - "epoch": 1.526693044325936, - "grad_norm": 0.000602905114647001, - "learning_rate": 0.0001999988509526207, - "loss": 46.0, - "step": 19968 - }, - { - "epoch": 1.5267695013093259, - "grad_norm": 0.0027083309832960367, - "learning_rate": 0.0001999988508374715, - "loss": 46.0, - "step": 19969 - }, - { - "epoch": 1.5268459582927156, - "grad_norm": 0.0029829933773726225, - "learning_rate": 0.00019999885072231654, - "loss": 46.0, - "step": 19970 - }, - { - "epoch": 1.5269224152761054, - "grad_norm": 0.0008750649867579341, - "learning_rate": 0.00019999885060715576, - "loss": 46.0, - "step": 19971 - }, - { - "epoch": 1.5269988722594952, - "grad_norm": 0.0007617862429469824, - "learning_rate": 0.00019999885049198929, - "loss": 46.0, - "step": 19972 - }, - { - "epoch": 1.5270753292428847, - "grad_norm": 0.0010432065464556217, - "learning_rate": 0.00019999885037681698, - "loss": 46.0, - "step": 19973 - }, - { - "epoch": 1.5271517862262745, - "grad_norm": 0.0012251805746927857, - "learning_rate": 0.0001999988502616389, - "loss": 46.0, - "step": 19974 - }, - { - "epoch": 1.5272282432096642, - "grad_norm": 0.0009974499698728323, - "learning_rate": 0.00019999885014645508, - "loss": 46.0, - "step": 19975 - }, - { - "epoch": 1.5273047001930538, - "grad_norm": 0.00042729786946438253, - "learning_rate": 0.00019999885003126551, - "loss": 46.0, - "step": 19976 - }, - { - "epoch": 1.5273811571764435, - "grad_norm": 0.0006383644649758935, - "learning_rate": 0.00019999884991607014, - "loss": 46.0, - "step": 19977 - }, - { - "epoch": 1.5274576141598333, - "grad_norm": 0.0004573434707708657, - "learning_rate": 0.000199998849800869, - "loss": 46.0, - "step": 19978 - }, - { - "epoch": 1.527534071143223, - "grad_norm": 0.0008911238401196897, - "learning_rate": 0.00019999884968566209, - "loss": 46.0, - "step": 19979 - }, - { - "epoch": 1.5276105281266128, - "grad_norm": 0.001186067471280694, - "learning_rate": 0.0001999988495704494, - "loss": 46.0, - "step": 19980 - }, - { - "epoch": 1.5276869851100026, - "grad_norm": 0.001374711631797254, - "learning_rate": 0.000199998849455231, - "loss": 46.0, - "step": 19981 - }, - { - "epoch": 1.5277634420933923, - "grad_norm": 0.0020042192190885544, - "learning_rate": 0.00019999884934000678, - "loss": 46.0, - "step": 19982 - }, - { - "epoch": 1.527839899076782, - "grad_norm": 0.0006101718172430992, - "learning_rate": 0.0001999988492247768, - "loss": 46.0, - "step": 19983 - }, - { - "epoch": 1.5279163560601716, - "grad_norm": 0.0026644417084753513, - "learning_rate": 0.00019999884910954104, - "loss": 46.0, - "step": 19984 - }, - { - "epoch": 1.5279928130435614, - "grad_norm": 0.0017771368147805333, - "learning_rate": 0.00019999884899429954, - "loss": 46.0, - "step": 19985 - }, - { - "epoch": 1.528069270026951, - "grad_norm": 0.0013287786860018969, - "learning_rate": 0.00019999884887905223, - "loss": 46.0, - "step": 19986 - }, - { - "epoch": 1.5281457270103407, - "grad_norm": 0.002531816717237234, - "learning_rate": 0.00019999884876379916, - "loss": 46.0, - "step": 19987 - }, - { - "epoch": 1.5282221839937304, - "grad_norm": 0.00046768636093474925, - "learning_rate": 0.00019999884864854034, - "loss": 46.0, - "step": 19988 - }, - { - "epoch": 1.5282986409771202, - "grad_norm": 0.0010499063646420836, - "learning_rate": 0.00019999884853327574, - "loss": 46.0, - "step": 19989 - }, - { - "epoch": 1.52837509796051, - "grad_norm": 0.0012143907370045781, - "learning_rate": 0.00019999884841800537, - "loss": 46.0, - "step": 19990 - }, - { - "epoch": 1.5284515549438997, - "grad_norm": 0.001626562443561852, - "learning_rate": 0.00019999884830272923, - "loss": 46.0, - "step": 19991 - }, - { - "epoch": 1.5285280119272895, - "grad_norm": 0.004600904881954193, - "learning_rate": 0.00019999884818744734, - "loss": 46.0, - "step": 19992 - }, - { - "epoch": 1.5286044689106792, - "grad_norm": 0.012294813990592957, - "learning_rate": 0.00019999884807215965, - "loss": 46.0, - "step": 19993 - }, - { - "epoch": 1.528680925894069, - "grad_norm": 0.0006995838484726846, - "learning_rate": 0.00019999884795686621, - "loss": 46.0, - "step": 19994 - }, - { - "epoch": 1.5287573828774585, - "grad_norm": 0.007329861633479595, - "learning_rate": 0.000199998847841567, - "loss": 46.0, - "step": 19995 - }, - { - "epoch": 1.5288338398608483, - "grad_norm": 0.000449772720457986, - "learning_rate": 0.00019999884772626202, - "loss": 46.0, - "step": 19996 - }, - { - "epoch": 1.5289102968442378, - "grad_norm": 0.0013943086378276348, - "learning_rate": 0.00019999884761095127, - "loss": 46.0, - "step": 19997 - }, - { - "epoch": 1.5289867538276276, - "grad_norm": 0.0011242623440921307, - "learning_rate": 0.00019999884749563474, - "loss": 46.0, - "step": 19998 - }, - { - "epoch": 1.5290632108110174, - "grad_norm": 0.0015805922448635101, - "learning_rate": 0.00019999884738031244, - "loss": 46.0, - "step": 19999 - }, - { - "epoch": 1.5291396677944071, - "grad_norm": 0.0013938507763668895, - "learning_rate": 0.0001999988472649844, - "loss": 46.0, - "step": 20000 - }, - { - "epoch": 1.529216124777797, - "grad_norm": 0.0010079655330628157, - "learning_rate": 0.00019999884714965056, - "loss": 46.0, - "step": 20001 - }, - { - "epoch": 1.5292925817611867, - "grad_norm": 0.0005260030156932771, - "learning_rate": 0.00019999884703431094, - "loss": 46.0, - "step": 20002 - }, - { - "epoch": 1.5293690387445764, - "grad_norm": 0.002525837393477559, - "learning_rate": 0.00019999884691896557, - "loss": 46.0, - "step": 20003 - }, - { - "epoch": 1.5294454957279662, - "grad_norm": 0.0007301702862605453, - "learning_rate": 0.00019999884680361446, - "loss": 46.0, - "step": 20004 - }, - { - "epoch": 1.529521952711356, - "grad_norm": 0.003996156621724367, - "learning_rate": 0.00019999884668825754, - "loss": 46.0, - "step": 20005 - }, - { - "epoch": 1.5295984096947455, - "grad_norm": 0.0011266169603914022, - "learning_rate": 0.00019999884657289488, - "loss": 46.0, - "step": 20006 - }, - { - "epoch": 1.5296748666781352, - "grad_norm": 0.0011759857879951596, - "learning_rate": 0.00019999884645752642, - "loss": 46.0, - "step": 20007 - }, - { - "epoch": 1.5297513236615248, - "grad_norm": 0.0010739343706518412, - "learning_rate": 0.0001999988463421522, - "loss": 46.0, - "step": 20008 - }, - { - "epoch": 1.5298277806449145, - "grad_norm": 0.0019064394291490316, - "learning_rate": 0.0001999988462267722, - "loss": 46.0, - "step": 20009 - }, - { - "epoch": 1.5299042376283043, - "grad_norm": 0.0009081195457838476, - "learning_rate": 0.00019999884611138647, - "loss": 46.0, - "step": 20010 - }, - { - "epoch": 1.529980694611694, - "grad_norm": 0.0017312541604042053, - "learning_rate": 0.00019999884599599494, - "loss": 46.0, - "step": 20011 - }, - { - "epoch": 1.5300571515950838, - "grad_norm": 0.000858985586091876, - "learning_rate": 0.00019999884588059764, - "loss": 46.0, - "step": 20012 - }, - { - "epoch": 1.5301336085784736, - "grad_norm": 0.0005749059491790831, - "learning_rate": 0.0001999988457651946, - "loss": 46.0, - "step": 20013 - }, - { - "epoch": 1.5302100655618633, - "grad_norm": 0.0017046816647052765, - "learning_rate": 0.00019999884564978574, - "loss": 46.0, - "step": 20014 - }, - { - "epoch": 1.530286522545253, - "grad_norm": 0.0017081631813198328, - "learning_rate": 0.00019999884553437115, - "loss": 46.0, - "step": 20015 - }, - { - "epoch": 1.5303629795286429, - "grad_norm": 0.000795937841758132, - "learning_rate": 0.00019999884541895078, - "loss": 46.0, - "step": 20016 - }, - { - "epoch": 1.5304394365120324, - "grad_norm": 0.0007827605004422367, - "learning_rate": 0.00019999884530352464, - "loss": 46.0, - "step": 20017 - }, - { - "epoch": 1.5305158934954222, - "grad_norm": 0.0008895203936845064, - "learning_rate": 0.00019999884518809272, - "loss": 46.0, - "step": 20018 - }, - { - "epoch": 1.5305923504788117, - "grad_norm": 0.0006428833003155887, - "learning_rate": 0.00019999884507265506, - "loss": 46.0, - "step": 20019 - }, - { - "epoch": 1.5306688074622015, - "grad_norm": 0.0007569598965346813, - "learning_rate": 0.0001999988449572116, - "loss": 46.0, - "step": 20020 - }, - { - "epoch": 1.5307452644455912, - "grad_norm": 0.0015019163256511092, - "learning_rate": 0.00019999884484176236, - "loss": 46.0, - "step": 20021 - }, - { - "epoch": 1.530821721428981, - "grad_norm": 0.0016402594046667218, - "learning_rate": 0.00019999884472630738, - "loss": 46.0, - "step": 20022 - }, - { - "epoch": 1.5308981784123707, - "grad_norm": 0.0005509136826731265, - "learning_rate": 0.00019999884461084663, - "loss": 46.0, - "step": 20023 - }, - { - "epoch": 1.5309746353957605, - "grad_norm": 0.005408469121903181, - "learning_rate": 0.0001999988444953801, - "loss": 46.0, - "step": 20024 - }, - { - "epoch": 1.5310510923791503, - "grad_norm": 0.0016071624122560024, - "learning_rate": 0.00019999884437990782, - "loss": 46.0, - "step": 20025 - }, - { - "epoch": 1.53112754936254, - "grad_norm": 0.002094559371471405, - "learning_rate": 0.00019999884426442975, - "loss": 46.0, - "step": 20026 - }, - { - "epoch": 1.5312040063459296, - "grad_norm": 0.008855417370796204, - "learning_rate": 0.00019999884414894593, - "loss": 46.0, - "step": 20027 - }, - { - "epoch": 1.5312804633293193, - "grad_norm": 0.0005544427549466491, - "learning_rate": 0.00019999884403345633, - "loss": 46.0, - "step": 20028 - }, - { - "epoch": 1.531356920312709, - "grad_norm": 0.0009228939888998866, - "learning_rate": 0.00019999884391796094, - "loss": 46.0, - "step": 20029 - }, - { - "epoch": 1.5314333772960986, - "grad_norm": 0.0007227399037219584, - "learning_rate": 0.00019999884380245982, - "loss": 46.0, - "step": 20030 - }, - { - "epoch": 1.5315098342794884, - "grad_norm": 0.001131095690652728, - "learning_rate": 0.00019999884368695288, - "loss": 46.0, - "step": 20031 - }, - { - "epoch": 1.5315862912628782, - "grad_norm": 0.0007045971578918397, - "learning_rate": 0.00019999884357144022, - "loss": 46.0, - "step": 20032 - }, - { - "epoch": 1.531662748246268, - "grad_norm": 0.0007540889200754464, - "learning_rate": 0.00019999884345592176, - "loss": 46.0, - "step": 20033 - }, - { - "epoch": 1.5317392052296577, - "grad_norm": 0.005252319388091564, - "learning_rate": 0.00019999884334039752, - "loss": 46.0, - "step": 20034 - }, - { - "epoch": 1.5318156622130474, - "grad_norm": 0.0013515064492821693, - "learning_rate": 0.00019999884322486754, - "loss": 46.0, - "step": 20035 - }, - { - "epoch": 1.5318921191964372, - "grad_norm": 0.0013417586451396346, - "learning_rate": 0.00019999884310933179, - "loss": 46.0, - "step": 20036 - }, - { - "epoch": 1.531968576179827, - "grad_norm": 0.0006675160257145762, - "learning_rate": 0.00019999884299379026, - "loss": 46.0, - "step": 20037 - }, - { - "epoch": 1.5320450331632165, - "grad_norm": 0.0012373344507068396, - "learning_rate": 0.00019999884287824298, - "loss": 46.0, - "step": 20038 - }, - { - "epoch": 1.5321214901466063, - "grad_norm": 0.002380277495831251, - "learning_rate": 0.0001999988427626899, - "loss": 46.0, - "step": 20039 - }, - { - "epoch": 1.532197947129996, - "grad_norm": 0.005655379965901375, - "learning_rate": 0.00019999884264713106, - "loss": 46.0, - "step": 20040 - }, - { - "epoch": 1.5322744041133856, - "grad_norm": 0.0016430789837613702, - "learning_rate": 0.00019999884253156646, - "loss": 46.0, - "step": 20041 - }, - { - "epoch": 1.5323508610967753, - "grad_norm": 0.0007284291787073016, - "learning_rate": 0.00019999884241599607, - "loss": 46.0, - "step": 20042 - }, - { - "epoch": 1.532427318080165, - "grad_norm": 0.0016675929073244333, - "learning_rate": 0.00019999884230041993, - "loss": 46.0, - "step": 20043 - }, - { - "epoch": 1.5325037750635548, - "grad_norm": 0.0006671563605777919, - "learning_rate": 0.00019999884218483801, - "loss": 46.0, - "step": 20044 - }, - { - "epoch": 1.5325802320469446, - "grad_norm": 0.0015132996486499906, - "learning_rate": 0.00019999884206925033, - "loss": 46.0, - "step": 20045 - }, - { - "epoch": 1.5326566890303344, - "grad_norm": 0.0006749456515535712, - "learning_rate": 0.0001999988419536569, - "loss": 46.0, - "step": 20046 - }, - { - "epoch": 1.5327331460137241, - "grad_norm": 0.00534209655597806, - "learning_rate": 0.00019999884183805766, - "loss": 46.0, - "step": 20047 - }, - { - "epoch": 1.5328096029971139, - "grad_norm": 0.0009820647537708282, - "learning_rate": 0.00019999884172245268, - "loss": 46.0, - "step": 20048 - }, - { - "epoch": 1.5328860599805034, - "grad_norm": 0.0027369658928364515, - "learning_rate": 0.0001999988416068419, - "loss": 46.0, - "step": 20049 - }, - { - "epoch": 1.5329625169638932, - "grad_norm": 0.0008373742457479239, - "learning_rate": 0.00019999884149122537, - "loss": 46.0, - "step": 20050 - }, - { - "epoch": 1.533038973947283, - "grad_norm": 0.004651294555515051, - "learning_rate": 0.00019999884137560306, - "loss": 46.0, - "step": 20051 - }, - { - "epoch": 1.5331154309306725, - "grad_norm": 0.004516802728176117, - "learning_rate": 0.00019999884125997502, - "loss": 46.0, - "step": 20052 - }, - { - "epoch": 1.5331918879140622, - "grad_norm": 0.0005958874826319516, - "learning_rate": 0.00019999884114434114, - "loss": 46.0, - "step": 20053 - }, - { - "epoch": 1.533268344897452, - "grad_norm": 0.0006082784384489059, - "learning_rate": 0.00019999884102870157, - "loss": 46.0, - "step": 20054 - }, - { - "epoch": 1.5333448018808418, - "grad_norm": 0.0018846383318305016, - "learning_rate": 0.00019999884091305618, - "loss": 46.0, - "step": 20055 - }, - { - "epoch": 1.5334212588642315, - "grad_norm": 0.0006319937529042363, - "learning_rate": 0.00019999884079740504, - "loss": 46.0, - "step": 20056 - }, - { - "epoch": 1.5334977158476213, - "grad_norm": 0.0004855721490457654, - "learning_rate": 0.00019999884068174812, - "loss": 46.0, - "step": 20057 - }, - { - "epoch": 1.533574172831011, - "grad_norm": 0.0020239066798239946, - "learning_rate": 0.00019999884056608544, - "loss": 46.0, - "step": 20058 - }, - { - "epoch": 1.5336506298144008, - "grad_norm": 0.003815114265307784, - "learning_rate": 0.000199998840450417, - "loss": 46.0, - "step": 20059 - }, - { - "epoch": 1.5337270867977904, - "grad_norm": 0.004444007761776447, - "learning_rate": 0.00019999884033474277, - "loss": 46.0, - "step": 20060 - }, - { - "epoch": 1.5338035437811801, - "grad_norm": 0.0005429207230918109, - "learning_rate": 0.00019999884021906276, - "loss": 46.0, - "step": 20061 - }, - { - "epoch": 1.5338800007645699, - "grad_norm": 0.000800707726739347, - "learning_rate": 0.000199998840103377, - "loss": 46.0, - "step": 20062 - }, - { - "epoch": 1.5339564577479594, - "grad_norm": 0.0013812314718961716, - "learning_rate": 0.00019999883998768548, - "loss": 46.0, - "step": 20063 - }, - { - "epoch": 1.5340329147313492, - "grad_norm": 0.0008911380427889526, - "learning_rate": 0.00019999883987198818, - "loss": 46.0, - "step": 20064 - }, - { - "epoch": 1.534109371714739, - "grad_norm": 0.0007954948232509196, - "learning_rate": 0.00019999883975628508, - "loss": 46.0, - "step": 20065 - }, - { - "epoch": 1.5341858286981287, - "grad_norm": 0.0008387573761865497, - "learning_rate": 0.00019999883964057626, - "loss": 46.0, - "step": 20066 - }, - { - "epoch": 1.5342622856815185, - "grad_norm": 0.0012238919734954834, - "learning_rate": 0.00019999883952486163, - "loss": 46.0, - "step": 20067 - }, - { - "epoch": 1.5343387426649082, - "grad_norm": 0.0007302372832782567, - "learning_rate": 0.00019999883940914127, - "loss": 46.0, - "step": 20068 - }, - { - "epoch": 1.534415199648298, - "grad_norm": 0.0011135886888951063, - "learning_rate": 0.00019999883929341513, - "loss": 46.0, - "step": 20069 - }, - { - "epoch": 1.5344916566316877, - "grad_norm": 0.0008723319042474031, - "learning_rate": 0.0001999988391776832, - "loss": 46.0, - "step": 20070 - }, - { - "epoch": 1.5345681136150773, - "grad_norm": 0.0005100786220282316, - "learning_rate": 0.00019999883906194553, - "loss": 46.0, - "step": 20071 - }, - { - "epoch": 1.534644570598467, - "grad_norm": 0.0008733382564969361, - "learning_rate": 0.00019999883894620206, - "loss": 46.0, - "step": 20072 - }, - { - "epoch": 1.5347210275818568, - "grad_norm": 0.0013452529674395919, - "learning_rate": 0.00019999883883045283, - "loss": 46.0, - "step": 20073 - }, - { - "epoch": 1.5347974845652463, - "grad_norm": 0.003339141607284546, - "learning_rate": 0.00019999883871469782, - "loss": 46.0, - "step": 20074 - }, - { - "epoch": 1.534873941548636, - "grad_norm": 0.0014340804191306233, - "learning_rate": 0.00019999883859893707, - "loss": 46.0, - "step": 20075 - }, - { - "epoch": 1.5349503985320259, - "grad_norm": 0.011916508898139, - "learning_rate": 0.00019999883848317054, - "loss": 46.0, - "step": 20076 - }, - { - "epoch": 1.5350268555154156, - "grad_norm": 0.0015547092771157622, - "learning_rate": 0.00019999883836739824, - "loss": 46.0, - "step": 20077 - }, - { - "epoch": 1.5351033124988054, - "grad_norm": 0.00432837288826704, - "learning_rate": 0.00019999883825162017, - "loss": 46.0, - "step": 20078 - }, - { - "epoch": 1.5351797694821951, - "grad_norm": 0.0004542127426248044, - "learning_rate": 0.00019999883813583632, - "loss": 46.0, - "step": 20079 - }, - { - "epoch": 1.535256226465585, - "grad_norm": 0.002094621304422617, - "learning_rate": 0.0001999988380200467, - "loss": 46.0, - "step": 20080 - }, - { - "epoch": 1.5353326834489747, - "grad_norm": 0.0033219624310731888, - "learning_rate": 0.0001999988379042513, - "loss": 46.0, - "step": 20081 - }, - { - "epoch": 1.5354091404323642, - "grad_norm": 0.0010463689686730504, - "learning_rate": 0.00019999883778845016, - "loss": 46.0, - "step": 20082 - }, - { - "epoch": 1.535485597415754, - "grad_norm": 0.0016901319613680243, - "learning_rate": 0.00019999883767264325, - "loss": 46.0, - "step": 20083 - }, - { - "epoch": 1.5355620543991437, - "grad_norm": 0.001070959959179163, - "learning_rate": 0.00019999883755683056, - "loss": 46.0, - "step": 20084 - }, - { - "epoch": 1.5356385113825333, - "grad_norm": 0.002557846950367093, - "learning_rate": 0.00019999883744101208, - "loss": 46.0, - "step": 20085 - }, - { - "epoch": 1.535714968365923, - "grad_norm": 0.0010747192427515984, - "learning_rate": 0.00019999883732518787, - "loss": 46.0, - "step": 20086 - }, - { - "epoch": 1.5357914253493128, - "grad_norm": 0.0009346804581582546, - "learning_rate": 0.0001999988372093579, - "loss": 46.0, - "step": 20087 - }, - { - "epoch": 1.5358678823327025, - "grad_norm": 0.0012615770101547241, - "learning_rate": 0.0001999988370935221, - "loss": 46.0, - "step": 20088 - }, - { - "epoch": 1.5359443393160923, - "grad_norm": 0.0010398214217275381, - "learning_rate": 0.00019999883697768058, - "loss": 46.0, - "step": 20089 - }, - { - "epoch": 1.536020796299482, - "grad_norm": 0.00044385544606484473, - "learning_rate": 0.00019999883686183328, - "loss": 46.0, - "step": 20090 - }, - { - "epoch": 1.5360972532828718, - "grad_norm": 0.002626515459269285, - "learning_rate": 0.00019999883674598018, - "loss": 46.0, - "step": 20091 - }, - { - "epoch": 1.5361737102662616, - "grad_norm": 0.0006823960575275123, - "learning_rate": 0.00019999883663012136, - "loss": 46.0, - "step": 20092 - }, - { - "epoch": 1.5362501672496511, - "grad_norm": 0.0021544608753174543, - "learning_rate": 0.00019999883651425674, - "loss": 46.0, - "step": 20093 - }, - { - "epoch": 1.536326624233041, - "grad_norm": 0.0011576790129765868, - "learning_rate": 0.00019999883639838635, - "loss": 46.0, - "step": 20094 - }, - { - "epoch": 1.5364030812164307, - "grad_norm": 0.0009752301266416907, - "learning_rate": 0.0001999988362825102, - "loss": 46.0, - "step": 20095 - }, - { - "epoch": 1.5364795381998202, - "grad_norm": 0.01034811232239008, - "learning_rate": 0.0001999988361666283, - "loss": 46.0, - "step": 20096 - }, - { - "epoch": 1.53655599518321, - "grad_norm": 0.0010354340774938464, - "learning_rate": 0.00019999883605074058, - "loss": 46.0, - "step": 20097 - }, - { - "epoch": 1.5366324521665997, - "grad_norm": 0.0014967684401199222, - "learning_rate": 0.00019999883593484712, - "loss": 46.0, - "step": 20098 - }, - { - "epoch": 1.5367089091499895, - "grad_norm": 0.005033082794398069, - "learning_rate": 0.00019999883581894789, - "loss": 46.0, - "step": 20099 - }, - { - "epoch": 1.5367853661333792, - "grad_norm": 0.0038119973614811897, - "learning_rate": 0.0001999988357030429, - "loss": 46.0, - "step": 20100 - }, - { - "epoch": 1.536861823116769, - "grad_norm": 0.0019197850488126278, - "learning_rate": 0.00019999883558713213, - "loss": 46.0, - "step": 20101 - }, - { - "epoch": 1.5369382801001588, - "grad_norm": 0.0009453543461859226, - "learning_rate": 0.00019999883547121557, - "loss": 46.0, - "step": 20102 - }, - { - "epoch": 1.5370147370835485, - "grad_norm": 0.0007630455656908453, - "learning_rate": 0.0001999988353552933, - "loss": 46.0, - "step": 20103 - }, - { - "epoch": 1.537091194066938, - "grad_norm": 0.002025666879490018, - "learning_rate": 0.0001999988352393652, - "loss": 46.0, - "step": 20104 - }, - { - "epoch": 1.5371676510503278, - "grad_norm": 0.004234236665070057, - "learning_rate": 0.00019999883512343135, - "loss": 46.0, - "step": 20105 - }, - { - "epoch": 1.5372441080337176, - "grad_norm": 0.005363223142921925, - "learning_rate": 0.00019999883500749173, - "loss": 46.0, - "step": 20106 - }, - { - "epoch": 1.5373205650171071, - "grad_norm": 0.004469661973416805, - "learning_rate": 0.00019999883489154634, - "loss": 46.0, - "step": 20107 - }, - { - "epoch": 1.5373970220004969, - "grad_norm": 0.000897183024790138, - "learning_rate": 0.0001999988347755952, - "loss": 46.0, - "step": 20108 - }, - { - "epoch": 1.5374734789838866, - "grad_norm": 0.0028845383785665035, - "learning_rate": 0.00019999883465963828, - "loss": 46.0, - "step": 20109 - }, - { - "epoch": 1.5375499359672764, - "grad_norm": 0.00042767953709699214, - "learning_rate": 0.00019999883454367557, - "loss": 46.0, - "step": 20110 - }, - { - "epoch": 1.5376263929506662, - "grad_norm": 0.0009216518956236541, - "learning_rate": 0.0001999988344277071, - "loss": 46.0, - "step": 20111 - }, - { - "epoch": 1.537702849934056, - "grad_norm": 0.0008002610993571579, - "learning_rate": 0.0001999988343117329, - "loss": 46.0, - "step": 20112 - }, - { - "epoch": 1.5377793069174457, - "grad_norm": 0.001917055924423039, - "learning_rate": 0.00019999883419575287, - "loss": 46.0, - "step": 20113 - }, - { - "epoch": 1.5378557639008354, - "grad_norm": 0.001889210776425898, - "learning_rate": 0.00019999883407976712, - "loss": 46.0, - "step": 20114 - }, - { - "epoch": 1.537932220884225, - "grad_norm": 0.001500417129136622, - "learning_rate": 0.0001999988339637756, - "loss": 46.0, - "step": 20115 - }, - { - "epoch": 1.5380086778676147, - "grad_norm": 0.0012968312948942184, - "learning_rate": 0.00019999883384777827, - "loss": 46.0, - "step": 20116 - }, - { - "epoch": 1.5380851348510043, - "grad_norm": 0.003965589217841625, - "learning_rate": 0.00019999883373177517, - "loss": 46.0, - "step": 20117 - }, - { - "epoch": 1.538161591834394, - "grad_norm": 0.0021684097591787577, - "learning_rate": 0.00019999883361576632, - "loss": 46.0, - "step": 20118 - }, - { - "epoch": 1.5382380488177838, - "grad_norm": 0.001090253354050219, - "learning_rate": 0.0001999988334997517, - "loss": 46.0, - "step": 20119 - }, - { - "epoch": 1.5383145058011736, - "grad_norm": 0.0009661591029725969, - "learning_rate": 0.00019999883338373133, - "loss": 46.0, - "step": 20120 - }, - { - "epoch": 1.5383909627845633, - "grad_norm": 0.010315361432731152, - "learning_rate": 0.0001999988332677052, - "loss": 46.0, - "step": 20121 - }, - { - "epoch": 1.538467419767953, - "grad_norm": 0.0015649535926058888, - "learning_rate": 0.00019999883315167325, - "loss": 46.0, - "step": 20122 - }, - { - "epoch": 1.5385438767513429, - "grad_norm": 0.0015382713172584772, - "learning_rate": 0.00019999883303563557, - "loss": 46.0, - "step": 20123 - }, - { - "epoch": 1.5386203337347326, - "grad_norm": 0.0022082326468080282, - "learning_rate": 0.00019999883291959208, - "loss": 46.0, - "step": 20124 - }, - { - "epoch": 1.5386967907181224, - "grad_norm": 0.000669982167892158, - "learning_rate": 0.00019999883280354285, - "loss": 46.0, - "step": 20125 - }, - { - "epoch": 1.538773247701512, - "grad_norm": 0.0016096193576231599, - "learning_rate": 0.00019999883268748784, - "loss": 46.0, - "step": 20126 - }, - { - "epoch": 1.5388497046849017, - "grad_norm": 0.0007320628501474857, - "learning_rate": 0.0001999988325714271, - "loss": 46.0, - "step": 20127 - }, - { - "epoch": 1.5389261616682912, - "grad_norm": 0.001097666914574802, - "learning_rate": 0.00019999883245536054, - "loss": 46.0, - "step": 20128 - }, - { - "epoch": 1.539002618651681, - "grad_norm": 0.0013889233814552426, - "learning_rate": 0.0001999988323392882, - "loss": 46.0, - "step": 20129 - }, - { - "epoch": 1.5390790756350707, - "grad_norm": 0.0020983247086405754, - "learning_rate": 0.00019999883222321014, - "loss": 46.0, - "step": 20130 - }, - { - "epoch": 1.5391555326184605, - "grad_norm": 0.0011359930504113436, - "learning_rate": 0.0001999988321071263, - "loss": 46.0, - "step": 20131 - }, - { - "epoch": 1.5392319896018503, - "grad_norm": 0.010866709053516388, - "learning_rate": 0.00019999883199103667, - "loss": 46.0, - "step": 20132 - }, - { - "epoch": 1.53930844658524, - "grad_norm": 0.0008480479009449482, - "learning_rate": 0.00019999883187494128, - "loss": 46.0, - "step": 20133 - }, - { - "epoch": 1.5393849035686298, - "grad_norm": 0.0010555190965533257, - "learning_rate": 0.0001999988317588401, - "loss": 46.0, - "step": 20134 - }, - { - "epoch": 1.5394613605520195, - "grad_norm": 0.0010322027374058962, - "learning_rate": 0.0001999988316427332, - "loss": 46.0, - "step": 20135 - }, - { - "epoch": 1.5395378175354093, - "grad_norm": 0.0011218597646802664, - "learning_rate": 0.0001999988315266205, - "loss": 46.0, - "step": 20136 - }, - { - "epoch": 1.5396142745187988, - "grad_norm": 0.001945384545251727, - "learning_rate": 0.00019999883141050205, - "loss": 46.0, - "step": 20137 - }, - { - "epoch": 1.5396907315021886, - "grad_norm": 0.0014750653645023704, - "learning_rate": 0.0001999988312943778, - "loss": 46.0, - "step": 20138 - }, - { - "epoch": 1.5397671884855781, - "grad_norm": 0.0015266623813658953, - "learning_rate": 0.0001999988311782478, - "loss": 46.0, - "step": 20139 - }, - { - "epoch": 1.539843645468968, - "grad_norm": 0.0012810645857825875, - "learning_rate": 0.000199998831062112, - "loss": 46.0, - "step": 20140 - }, - { - "epoch": 1.5399201024523577, - "grad_norm": 0.003351570339873433, - "learning_rate": 0.00019999883094597048, - "loss": 46.0, - "step": 20141 - }, - { - "epoch": 1.5399965594357474, - "grad_norm": 0.0015526647912338376, - "learning_rate": 0.00019999883082982316, - "loss": 46.0, - "step": 20142 - }, - { - "epoch": 1.5400730164191372, - "grad_norm": 0.0012627204414457083, - "learning_rate": 0.00019999883071367006, - "loss": 46.0, - "step": 20143 - }, - { - "epoch": 1.540149473402527, - "grad_norm": 0.002908539492636919, - "learning_rate": 0.0001999988305975112, - "loss": 46.0, - "step": 20144 - }, - { - "epoch": 1.5402259303859167, - "grad_norm": 0.0026809454429894686, - "learning_rate": 0.0001999988304813466, - "loss": 46.0, - "step": 20145 - }, - { - "epoch": 1.5403023873693065, - "grad_norm": 0.006418232340365648, - "learning_rate": 0.00019999883036517623, - "loss": 46.0, - "step": 20146 - }, - { - "epoch": 1.5403788443526962, - "grad_norm": 0.001138602034188807, - "learning_rate": 0.00019999883024900003, - "loss": 46.0, - "step": 20147 - }, - { - "epoch": 1.5404553013360858, - "grad_norm": 0.0009076328715309501, - "learning_rate": 0.00019999883013281812, - "loss": 46.0, - "step": 20148 - }, - { - "epoch": 1.5405317583194755, - "grad_norm": 0.0023125596344470978, - "learning_rate": 0.0001999988300166304, - "loss": 46.0, - "step": 20149 - }, - { - "epoch": 1.540608215302865, - "grad_norm": 0.007807811722159386, - "learning_rate": 0.00019999882990043695, - "loss": 46.0, - "step": 20150 - }, - { - "epoch": 1.5406846722862548, - "grad_norm": 0.0021241719368845224, - "learning_rate": 0.00019999882978423772, - "loss": 46.0, - "step": 20151 - }, - { - "epoch": 1.5407611292696446, - "grad_norm": 0.0007885193917900324, - "learning_rate": 0.00019999882966803269, - "loss": 46.0, - "step": 20152 - }, - { - "epoch": 1.5408375862530344, - "grad_norm": 0.0008994427626021206, - "learning_rate": 0.0001999988295518219, - "loss": 46.0, - "step": 20153 - }, - { - "epoch": 1.5409140432364241, - "grad_norm": 0.004090954549610615, - "learning_rate": 0.00019999882943560538, - "loss": 46.0, - "step": 20154 - }, - { - "epoch": 1.5409905002198139, - "grad_norm": 0.001574847148731351, - "learning_rate": 0.00019999882931938308, - "loss": 46.0, - "step": 20155 - }, - { - "epoch": 1.5410669572032036, - "grad_norm": 0.000622278661467135, - "learning_rate": 0.00019999882920315498, - "loss": 46.0, - "step": 20156 - }, - { - "epoch": 1.5411434141865934, - "grad_norm": 0.0005950985359959304, - "learning_rate": 0.0001999988290869211, - "loss": 46.0, - "step": 20157 - }, - { - "epoch": 1.541219871169983, - "grad_norm": 0.0012609973782673478, - "learning_rate": 0.0001999988289706815, - "loss": 46.0, - "step": 20158 - }, - { - "epoch": 1.5412963281533727, - "grad_norm": 0.0012368971947580576, - "learning_rate": 0.0001999988288544361, - "loss": 46.0, - "step": 20159 - }, - { - "epoch": 1.5413727851367625, - "grad_norm": 0.002296286867931485, - "learning_rate": 0.00019999882873818493, - "loss": 46.0, - "step": 20160 - }, - { - "epoch": 1.541449242120152, - "grad_norm": 0.0006178400362841785, - "learning_rate": 0.000199998828621928, - "loss": 46.0, - "step": 20161 - }, - { - "epoch": 1.5415256991035418, - "grad_norm": 0.000837925064843148, - "learning_rate": 0.0001999988285056653, - "loss": 46.0, - "step": 20162 - }, - { - "epoch": 1.5416021560869315, - "grad_norm": 0.0009972784901037812, - "learning_rate": 0.00019999882838939682, - "loss": 46.0, - "step": 20163 - }, - { - "epoch": 1.5416786130703213, - "grad_norm": 0.0009486893541179597, - "learning_rate": 0.0001999988282731226, - "loss": 46.0, - "step": 20164 - }, - { - "epoch": 1.541755070053711, - "grad_norm": 0.005483708810061216, - "learning_rate": 0.0001999988281568426, - "loss": 46.0, - "step": 20165 - }, - { - "epoch": 1.5418315270371008, - "grad_norm": 0.0035777557641267776, - "learning_rate": 0.0001999988280405568, - "loss": 46.0, - "step": 20166 - }, - { - "epoch": 1.5419079840204906, - "grad_norm": 0.0019115410977974534, - "learning_rate": 0.00019999882792426526, - "loss": 46.0, - "step": 20167 - }, - { - "epoch": 1.5419844410038803, - "grad_norm": 0.003945767879486084, - "learning_rate": 0.00019999882780796796, - "loss": 46.0, - "step": 20168 - }, - { - "epoch": 1.5420608979872699, - "grad_norm": 0.0011247425572946668, - "learning_rate": 0.00019999882769166486, - "loss": 46.0, - "step": 20169 - }, - { - "epoch": 1.5421373549706596, - "grad_norm": 0.0007622959674336016, - "learning_rate": 0.000199998827575356, - "loss": 46.0, - "step": 20170 - }, - { - "epoch": 1.5422138119540494, - "grad_norm": 0.0007714423118159175, - "learning_rate": 0.00019999882745904137, - "loss": 46.0, - "step": 20171 - }, - { - "epoch": 1.542290268937439, - "grad_norm": 0.0012912163510918617, - "learning_rate": 0.00019999882734272098, - "loss": 46.0, - "step": 20172 - }, - { - "epoch": 1.5423667259208287, - "grad_norm": 0.002913554199039936, - "learning_rate": 0.0001999988272263948, - "loss": 46.0, - "step": 20173 - }, - { - "epoch": 1.5424431829042184, - "grad_norm": 0.0004806739743798971, - "learning_rate": 0.00019999882711006287, - "loss": 46.0, - "step": 20174 - }, - { - "epoch": 1.5425196398876082, - "grad_norm": 0.008131139911711216, - "learning_rate": 0.0001999988269937252, - "loss": 46.0, - "step": 20175 - }, - { - "epoch": 1.542596096870998, - "grad_norm": 0.0006606580573134124, - "learning_rate": 0.0001999988268773817, - "loss": 46.0, - "step": 20176 - }, - { - "epoch": 1.5426725538543877, - "grad_norm": 0.0037447737995535135, - "learning_rate": 0.00019999882676103244, - "loss": 46.0, - "step": 20177 - }, - { - "epoch": 1.5427490108377775, - "grad_norm": 0.0024279500357806683, - "learning_rate": 0.00019999882664467744, - "loss": 46.0, - "step": 20178 - }, - { - "epoch": 1.5428254678211673, - "grad_norm": 0.0014981512213125825, - "learning_rate": 0.0001999988265283167, - "loss": 46.0, - "step": 20179 - }, - { - "epoch": 1.5429019248045568, - "grad_norm": 0.00046638367348350585, - "learning_rate": 0.0001999988264119501, - "loss": 46.0, - "step": 20180 - }, - { - "epoch": 1.5429783817879466, - "grad_norm": 0.0008069496252574027, - "learning_rate": 0.00019999882629557778, - "loss": 46.0, - "step": 20181 - }, - { - "epoch": 1.5430548387713363, - "grad_norm": 0.0025949811097234488, - "learning_rate": 0.0001999988261791997, - "loss": 46.0, - "step": 20182 - }, - { - "epoch": 1.5431312957547259, - "grad_norm": 0.002164746169000864, - "learning_rate": 0.00019999882606281584, - "loss": 46.0, - "step": 20183 - }, - { - "epoch": 1.5432077527381156, - "grad_norm": 0.010595188476145267, - "learning_rate": 0.00019999882594642622, - "loss": 46.0, - "step": 20184 - }, - { - "epoch": 1.5432842097215054, - "grad_norm": 0.003314277622848749, - "learning_rate": 0.00019999882583003083, - "loss": 46.0, - "step": 20185 - }, - { - "epoch": 1.5433606667048951, - "grad_norm": 0.00048742940998636186, - "learning_rate": 0.00019999882571362964, - "loss": 46.0, - "step": 20186 - }, - { - "epoch": 1.543437123688285, - "grad_norm": 0.0005593310343101621, - "learning_rate": 0.00019999882559722273, - "loss": 46.0, - "step": 20187 - }, - { - "epoch": 1.5435135806716747, - "grad_norm": 0.0014772203285247087, - "learning_rate": 0.00019999882548081001, - "loss": 46.0, - "step": 20188 - }, - { - "epoch": 1.5435900376550644, - "grad_norm": 0.004009888041764498, - "learning_rate": 0.00019999882536439153, - "loss": 46.0, - "step": 20189 - }, - { - "epoch": 1.5436664946384542, - "grad_norm": 0.0011133728548884392, - "learning_rate": 0.0001999988252479673, - "loss": 46.0, - "step": 20190 - }, - { - "epoch": 1.5437429516218437, - "grad_norm": 0.0008045951835811138, - "learning_rate": 0.00019999882513153727, - "loss": 46.0, - "step": 20191 - }, - { - "epoch": 1.5438194086052335, - "grad_norm": 0.0016953253652900457, - "learning_rate": 0.0001999988250151015, - "loss": 46.0, - "step": 20192 - }, - { - "epoch": 1.5438958655886232, - "grad_norm": 0.004482306074351072, - "learning_rate": 0.00019999882489865996, - "loss": 46.0, - "step": 20193 - }, - { - "epoch": 1.5439723225720128, - "grad_norm": 0.010717046447098255, - "learning_rate": 0.0001999988247822126, - "loss": 46.0, - "step": 20194 - }, - { - "epoch": 1.5440487795554025, - "grad_norm": 0.0038280882872641087, - "learning_rate": 0.00019999882466575954, - "loss": 46.0, - "step": 20195 - }, - { - "epoch": 1.5441252365387923, - "grad_norm": 0.0015682651428505778, - "learning_rate": 0.00019999882454930067, - "loss": 46.0, - "step": 20196 - }, - { - "epoch": 1.544201693522182, - "grad_norm": 0.0009037533891387284, - "learning_rate": 0.00019999882443283605, - "loss": 46.0, - "step": 20197 - }, - { - "epoch": 1.5442781505055718, - "grad_norm": 0.0016367345815524459, - "learning_rate": 0.00019999882431636566, - "loss": 46.0, - "step": 20198 - }, - { - "epoch": 1.5443546074889616, - "grad_norm": 0.0007118707871995866, - "learning_rate": 0.00019999882419988947, - "loss": 46.0, - "step": 20199 - }, - { - "epoch": 1.5444310644723513, - "grad_norm": 0.006440271623432636, - "learning_rate": 0.00019999882408340753, - "loss": 46.0, - "step": 20200 - }, - { - "epoch": 1.544507521455741, - "grad_norm": 0.0005165501497685909, - "learning_rate": 0.00019999882396691982, - "loss": 46.0, - "step": 20201 - }, - { - "epoch": 1.5445839784391306, - "grad_norm": 0.0011839722283184528, - "learning_rate": 0.00019999882385042633, - "loss": 46.0, - "step": 20202 - }, - { - "epoch": 1.5446604354225204, - "grad_norm": 0.0017505462747067213, - "learning_rate": 0.0001999988237339271, - "loss": 46.0, - "step": 20203 - }, - { - "epoch": 1.5447368924059102, - "grad_norm": 0.001137514947913587, - "learning_rate": 0.00019999882361742207, - "loss": 46.0, - "step": 20204 - }, - { - "epoch": 1.5448133493892997, - "grad_norm": 0.0036035377997905016, - "learning_rate": 0.0001999988235009113, - "loss": 46.0, - "step": 20205 - }, - { - "epoch": 1.5448898063726895, - "grad_norm": 0.002373239491134882, - "learning_rate": 0.00019999882338439474, - "loss": 46.0, - "step": 20206 - }, - { - "epoch": 1.5449662633560792, - "grad_norm": 0.011920620687305927, - "learning_rate": 0.0001999988232678724, - "loss": 46.0, - "step": 20207 - }, - { - "epoch": 1.545042720339469, - "grad_norm": 0.0007025506929494441, - "learning_rate": 0.00019999882315134432, - "loss": 46.0, - "step": 20208 - }, - { - "epoch": 1.5451191773228588, - "grad_norm": 0.0006856209365651011, - "learning_rate": 0.00019999882303481048, - "loss": 46.0, - "step": 20209 - }, - { - "epoch": 1.5451956343062485, - "grad_norm": 0.0031736339442431927, - "learning_rate": 0.00019999882291827083, - "loss": 46.0, - "step": 20210 - }, - { - "epoch": 1.5452720912896383, - "grad_norm": 0.0026848490815609694, - "learning_rate": 0.0001999988228017254, - "loss": 46.0, - "step": 20211 - }, - { - "epoch": 1.545348548273028, - "grad_norm": 0.0007010195986367762, - "learning_rate": 0.00019999882268517425, - "loss": 46.0, - "step": 20212 - }, - { - "epoch": 1.5454250052564176, - "grad_norm": 0.0015660380013287067, - "learning_rate": 0.0001999988225686173, - "loss": 46.0, - "step": 20213 - }, - { - "epoch": 1.5455014622398073, - "grad_norm": 0.0009423309820704162, - "learning_rate": 0.0001999988224520546, - "loss": 46.0, - "step": 20214 - }, - { - "epoch": 1.545577919223197, - "grad_norm": 0.002163567114621401, - "learning_rate": 0.00019999882233548614, - "loss": 46.0, - "step": 20215 - }, - { - "epoch": 1.5456543762065866, - "grad_norm": 0.0007394609274342656, - "learning_rate": 0.00019999882221891188, - "loss": 46.0, - "step": 20216 - }, - { - "epoch": 1.5457308331899764, - "grad_norm": 0.0004886023234575987, - "learning_rate": 0.00019999882210233185, - "loss": 46.0, - "step": 20217 - }, - { - "epoch": 1.5458072901733662, - "grad_norm": 0.002775841159746051, - "learning_rate": 0.00019999882198574608, - "loss": 46.0, - "step": 20218 - }, - { - "epoch": 1.545883747156756, - "grad_norm": 0.0008558969129808247, - "learning_rate": 0.0001999988218691545, - "loss": 46.0, - "step": 20219 - }, - { - "epoch": 1.5459602041401457, - "grad_norm": 0.002447514096274972, - "learning_rate": 0.0001999988217525572, - "loss": 46.0, - "step": 20220 - }, - { - "epoch": 1.5460366611235354, - "grad_norm": 0.001322864554822445, - "learning_rate": 0.0001999988216359541, - "loss": 46.0, - "step": 20221 - }, - { - "epoch": 1.5461131181069252, - "grad_norm": 0.0018312036991119385, - "learning_rate": 0.00019999882151934523, - "loss": 46.0, - "step": 20222 - }, - { - "epoch": 1.546189575090315, - "grad_norm": 0.0009744654526002705, - "learning_rate": 0.0001999988214027306, - "loss": 46.0, - "step": 20223 - }, - { - "epoch": 1.5462660320737045, - "grad_norm": 0.001992936013266444, - "learning_rate": 0.00019999882128611017, - "loss": 46.0, - "step": 20224 - }, - { - "epoch": 1.5463424890570943, - "grad_norm": 0.0021744153928011656, - "learning_rate": 0.000199998821169484, - "loss": 46.0, - "step": 20225 - }, - { - "epoch": 1.546418946040484, - "grad_norm": 0.000638212775811553, - "learning_rate": 0.00019999882105285207, - "loss": 46.0, - "step": 20226 - }, - { - "epoch": 1.5464954030238736, - "grad_norm": 0.0009381708805449307, - "learning_rate": 0.00019999882093621438, - "loss": 46.0, - "step": 20227 - }, - { - "epoch": 1.5465718600072633, - "grad_norm": 0.0008068294264376163, - "learning_rate": 0.00019999882081957087, - "loss": 46.0, - "step": 20228 - }, - { - "epoch": 1.546648316990653, - "grad_norm": 0.0024747317656874657, - "learning_rate": 0.00019999882070292164, - "loss": 46.0, - "step": 20229 - }, - { - "epoch": 1.5467247739740428, - "grad_norm": 0.002904994646087289, - "learning_rate": 0.0001999988205862666, - "loss": 46.0, - "step": 20230 - }, - { - "epoch": 1.5468012309574326, - "grad_norm": 0.0014590652426704764, - "learning_rate": 0.00019999882046960584, - "loss": 46.0, - "step": 20231 - }, - { - "epoch": 1.5468776879408224, - "grad_norm": 0.0013190620811656117, - "learning_rate": 0.00019999882035293926, - "loss": 46.0, - "step": 20232 - }, - { - "epoch": 1.5469541449242121, - "grad_norm": 0.0006903313333168626, - "learning_rate": 0.00019999882023626696, - "loss": 46.0, - "step": 20233 - }, - { - "epoch": 1.5470306019076019, - "grad_norm": 0.0008527478785254061, - "learning_rate": 0.00019999882011958886, - "loss": 46.0, - "step": 20234 - }, - { - "epoch": 1.5471070588909914, - "grad_norm": 0.0011800432112067938, - "learning_rate": 0.00019999882000290497, - "loss": 46.0, - "step": 20235 - }, - { - "epoch": 1.5471835158743812, - "grad_norm": 0.0005883224657736719, - "learning_rate": 0.00019999881988621535, - "loss": 46.0, - "step": 20236 - }, - { - "epoch": 1.547259972857771, - "grad_norm": 0.002735986141487956, - "learning_rate": 0.00019999881976951996, - "loss": 46.0, - "step": 20237 - }, - { - "epoch": 1.5473364298411605, - "grad_norm": 0.0007256532553583384, - "learning_rate": 0.00019999881965281877, - "loss": 46.0, - "step": 20238 - }, - { - "epoch": 1.5474128868245502, - "grad_norm": 0.0011123957810923457, - "learning_rate": 0.0001999988195361118, - "loss": 46.0, - "step": 20239 - }, - { - "epoch": 1.54748934380794, - "grad_norm": 0.0010934810852631927, - "learning_rate": 0.00019999881941939912, - "loss": 46.0, - "step": 20240 - }, - { - "epoch": 1.5475658007913298, - "grad_norm": 0.001546977087855339, - "learning_rate": 0.00019999881930268064, - "loss": 46.0, - "step": 20241 - }, - { - "epoch": 1.5476422577747195, - "grad_norm": 0.0026557340752333403, - "learning_rate": 0.00019999881918595635, - "loss": 46.0, - "step": 20242 - }, - { - "epoch": 1.5477187147581093, - "grad_norm": 0.0004453608999028802, - "learning_rate": 0.00019999881906922635, - "loss": 46.0, - "step": 20243 - }, - { - "epoch": 1.547795171741499, - "grad_norm": 0.0009327111765742302, - "learning_rate": 0.00019999881895249054, - "loss": 46.0, - "step": 20244 - }, - { - "epoch": 1.5478716287248888, - "grad_norm": 0.003226051339879632, - "learning_rate": 0.00019999881883574897, - "loss": 46.0, - "step": 20245 - }, - { - "epoch": 1.5479480857082784, - "grad_norm": 0.0032770477700978518, - "learning_rate": 0.00019999881871900167, - "loss": 46.0, - "step": 20246 - }, - { - "epoch": 1.5480245426916681, - "grad_norm": 0.0011336986208334565, - "learning_rate": 0.00019999881860224857, - "loss": 46.0, - "step": 20247 - }, - { - "epoch": 1.5481009996750579, - "grad_norm": 0.0022451074328273535, - "learning_rate": 0.0001999988184854897, - "loss": 46.0, - "step": 20248 - }, - { - "epoch": 1.5481774566584474, - "grad_norm": 0.001434828620404005, - "learning_rate": 0.00019999881836872506, - "loss": 46.0, - "step": 20249 - }, - { - "epoch": 1.5482539136418372, - "grad_norm": 0.0011933945352211595, - "learning_rate": 0.00019999881825195464, - "loss": 46.0, - "step": 20250 - }, - { - "epoch": 1.548330370625227, - "grad_norm": 0.0009847356704995036, - "learning_rate": 0.00019999881813517848, - "loss": 46.0, - "step": 20251 - }, - { - "epoch": 1.5484068276086167, - "grad_norm": 0.0013555812183767557, - "learning_rate": 0.00019999881801839651, - "loss": 46.0, - "step": 20252 - }, - { - "epoch": 1.5484832845920065, - "grad_norm": 0.0015788115561008453, - "learning_rate": 0.0001999988179016088, - "loss": 46.0, - "step": 20253 - }, - { - "epoch": 1.5485597415753962, - "grad_norm": 0.0005741255008615553, - "learning_rate": 0.00019999881778481532, - "loss": 46.0, - "step": 20254 - }, - { - "epoch": 1.548636198558786, - "grad_norm": 0.0008115789969451725, - "learning_rate": 0.00019999881766801606, - "loss": 46.0, - "step": 20255 - }, - { - "epoch": 1.5487126555421757, - "grad_norm": 0.002358938567340374, - "learning_rate": 0.00019999881755121103, - "loss": 46.0, - "step": 20256 - }, - { - "epoch": 1.5487891125255653, - "grad_norm": 0.00048008712474256754, - "learning_rate": 0.00019999881743440026, - "loss": 46.0, - "step": 20257 - }, - { - "epoch": 1.548865569508955, - "grad_norm": 0.010214579291641712, - "learning_rate": 0.0001999988173175837, - "loss": 46.0, - "step": 20258 - }, - { - "epoch": 1.5489420264923446, - "grad_norm": 0.0048224530182778835, - "learning_rate": 0.00019999881720076136, - "loss": 46.0, - "step": 20259 - }, - { - "epoch": 1.5490184834757343, - "grad_norm": 0.002420478267595172, - "learning_rate": 0.00019999881708393326, - "loss": 46.0, - "step": 20260 - }, - { - "epoch": 1.549094940459124, - "grad_norm": 0.002419776748865843, - "learning_rate": 0.0001999988169670994, - "loss": 46.0, - "step": 20261 - }, - { - "epoch": 1.5491713974425139, - "grad_norm": 0.0010610036551952362, - "learning_rate": 0.00019999881685025975, - "loss": 46.0, - "step": 20262 - }, - { - "epoch": 1.5492478544259036, - "grad_norm": 0.0023971563205122948, - "learning_rate": 0.00019999881673341433, - "loss": 46.0, - "step": 20263 - }, - { - "epoch": 1.5493243114092934, - "grad_norm": 0.0010830481769517064, - "learning_rate": 0.00019999881661656317, - "loss": 46.0, - "step": 20264 - }, - { - "epoch": 1.5494007683926831, - "grad_norm": 0.0005202654283493757, - "learning_rate": 0.0001999988164997062, - "loss": 46.0, - "step": 20265 - }, - { - "epoch": 1.549477225376073, - "grad_norm": 0.0006369129405356944, - "learning_rate": 0.0001999988163828435, - "loss": 46.0, - "step": 20266 - }, - { - "epoch": 1.5495536823594627, - "grad_norm": 0.0012664797250181437, - "learning_rate": 0.000199998816265975, - "loss": 46.0, - "step": 20267 - }, - { - "epoch": 1.5496301393428522, - "grad_norm": 0.0009975587017834187, - "learning_rate": 0.00019999881614910075, - "loss": 46.0, - "step": 20268 - }, - { - "epoch": 1.549706596326242, - "grad_norm": 0.001327601377852261, - "learning_rate": 0.00019999881603222072, - "loss": 46.0, - "step": 20269 - }, - { - "epoch": 1.5497830533096315, - "grad_norm": 0.0007339943549595773, - "learning_rate": 0.00019999881591533495, - "loss": 46.0, - "step": 20270 - }, - { - "epoch": 1.5498595102930213, - "grad_norm": 0.0029646975453943014, - "learning_rate": 0.00019999881579844337, - "loss": 46.0, - "step": 20271 - }, - { - "epoch": 1.549935967276411, - "grad_norm": 0.0009326311992481351, - "learning_rate": 0.00019999881568154605, - "loss": 46.0, - "step": 20272 - }, - { - "epoch": 1.5500124242598008, - "grad_norm": 0.0031023994088172913, - "learning_rate": 0.00019999881556464292, - "loss": 46.0, - "step": 20273 - }, - { - "epoch": 1.5500888812431906, - "grad_norm": 0.0019929411355406046, - "learning_rate": 0.00019999881544773405, - "loss": 46.0, - "step": 20274 - }, - { - "epoch": 1.5501653382265803, - "grad_norm": 0.0015137135051190853, - "learning_rate": 0.0001999988153308194, - "loss": 46.0, - "step": 20275 - }, - { - "epoch": 1.55024179520997, - "grad_norm": 0.0008138444391079247, - "learning_rate": 0.000199998815213899, - "loss": 46.0, - "step": 20276 - }, - { - "epoch": 1.5503182521933598, - "grad_norm": 0.004656719043850899, - "learning_rate": 0.0001999988150969728, - "loss": 46.0, - "step": 20277 - }, - { - "epoch": 1.5503947091767496, - "grad_norm": 0.0005335908499546349, - "learning_rate": 0.00019999881498004087, - "loss": 46.0, - "step": 20278 - }, - { - "epoch": 1.5504711661601391, - "grad_norm": 0.0022118152119219303, - "learning_rate": 0.00019999881486310313, - "loss": 46.0, - "step": 20279 - }, - { - "epoch": 1.550547623143529, - "grad_norm": 0.0011276851873844862, - "learning_rate": 0.00019999881474615965, - "loss": 46.0, - "step": 20280 - }, - { - "epoch": 1.5506240801269184, - "grad_norm": 0.0010222881101071835, - "learning_rate": 0.0001999988146292104, - "loss": 46.0, - "step": 20281 - }, - { - "epoch": 1.5507005371103082, - "grad_norm": 0.001826761057600379, - "learning_rate": 0.00019999881451225536, - "loss": 46.0, - "step": 20282 - }, - { - "epoch": 1.550776994093698, - "grad_norm": 0.0014074405189603567, - "learning_rate": 0.0001999988143952946, - "loss": 46.0, - "step": 20283 - }, - { - "epoch": 1.5508534510770877, - "grad_norm": 0.0007083318778313696, - "learning_rate": 0.000199998814278328, - "loss": 46.0, - "step": 20284 - }, - { - "epoch": 1.5509299080604775, - "grad_norm": 0.0011526060989126563, - "learning_rate": 0.00019999881416135566, - "loss": 46.0, - "step": 20285 - }, - { - "epoch": 1.5510063650438672, - "grad_norm": 0.0005087851313874125, - "learning_rate": 0.00019999881404437757, - "loss": 46.0, - "step": 20286 - }, - { - "epoch": 1.551082822027257, - "grad_norm": 0.001099860412068665, - "learning_rate": 0.0001999988139273937, - "loss": 46.0, - "step": 20287 - }, - { - "epoch": 1.5511592790106468, - "grad_norm": 0.0009887852938845754, - "learning_rate": 0.00019999881381040403, - "loss": 46.0, - "step": 20288 - }, - { - "epoch": 1.5512357359940363, - "grad_norm": 0.0009057808783836663, - "learning_rate": 0.00019999881369340864, - "loss": 46.0, - "step": 20289 - }, - { - "epoch": 1.551312192977426, - "grad_norm": 0.004380343947559595, - "learning_rate": 0.00019999881357640745, - "loss": 46.0, - "step": 20290 - }, - { - "epoch": 1.5513886499608158, - "grad_norm": 0.0009076056885533035, - "learning_rate": 0.0001999988134594005, - "loss": 46.0, - "step": 20291 - }, - { - "epoch": 1.5514651069442054, - "grad_norm": 0.000971232249867171, - "learning_rate": 0.0001999988133423878, - "loss": 46.0, - "step": 20292 - }, - { - "epoch": 1.5515415639275951, - "grad_norm": 0.011418294161558151, - "learning_rate": 0.0001999988132253693, - "loss": 46.0, - "step": 20293 - }, - { - "epoch": 1.5516180209109849, - "grad_norm": 0.00544810201972723, - "learning_rate": 0.00019999881310834504, - "loss": 46.0, - "step": 20294 - }, - { - "epoch": 1.5516944778943746, - "grad_norm": 0.0012689060531556606, - "learning_rate": 0.000199998812991315, - "loss": 46.0, - "step": 20295 - }, - { - "epoch": 1.5517709348777644, - "grad_norm": 0.0004988238215446472, - "learning_rate": 0.0001999988128742792, - "loss": 46.0, - "step": 20296 - }, - { - "epoch": 1.5518473918611542, - "grad_norm": 0.0017380461795255542, - "learning_rate": 0.00019999881275723763, - "loss": 46.0, - "step": 20297 - }, - { - "epoch": 1.551923848844544, - "grad_norm": 0.001257389667443931, - "learning_rate": 0.00019999881264019028, - "loss": 46.0, - "step": 20298 - }, - { - "epoch": 1.5520003058279337, - "grad_norm": 0.001535171759314835, - "learning_rate": 0.00019999881252313716, - "loss": 46.0, - "step": 20299 - }, - { - "epoch": 1.5520767628113232, - "grad_norm": 0.011539758183062077, - "learning_rate": 0.00019999881240607831, - "loss": 46.0, - "step": 20300 - }, - { - "epoch": 1.552153219794713, - "grad_norm": 0.007358429953455925, - "learning_rate": 0.00019999881228901365, - "loss": 46.0, - "step": 20301 - }, - { - "epoch": 1.5522296767781028, - "grad_norm": 0.0010508454870432615, - "learning_rate": 0.00019999881217194326, - "loss": 46.0, - "step": 20302 - }, - { - "epoch": 1.5523061337614923, - "grad_norm": 0.004711613059043884, - "learning_rate": 0.00019999881205486707, - "loss": 46.0, - "step": 20303 - }, - { - "epoch": 1.552382590744882, - "grad_norm": 0.0010608049342408776, - "learning_rate": 0.0001999988119377851, - "loss": 46.0, - "step": 20304 - }, - { - "epoch": 1.5524590477282718, - "grad_norm": 0.00034053970011882484, - "learning_rate": 0.00019999881182069737, - "loss": 46.0, - "step": 20305 - }, - { - "epoch": 1.5525355047116616, - "grad_norm": 0.007171119097620249, - "learning_rate": 0.00019999881170360389, - "loss": 46.0, - "step": 20306 - }, - { - "epoch": 1.5526119616950513, - "grad_norm": 0.0009845270542427897, - "learning_rate": 0.00019999881158650463, - "loss": 46.0, - "step": 20307 - }, - { - "epoch": 1.552688418678441, - "grad_norm": 0.0009325945866294205, - "learning_rate": 0.00019999881146939957, - "loss": 46.0, - "step": 20308 - }, - { - "epoch": 1.5527648756618309, - "grad_norm": 0.0014653062680736184, - "learning_rate": 0.00019999881135228877, - "loss": 46.0, - "step": 20309 - }, - { - "epoch": 1.5528413326452206, - "grad_norm": 0.0006368522881530225, - "learning_rate": 0.00019999881123517222, - "loss": 46.0, - "step": 20310 - }, - { - "epoch": 1.5529177896286102, - "grad_norm": 0.004681960679590702, - "learning_rate": 0.00019999881111804987, - "loss": 46.0, - "step": 20311 - }, - { - "epoch": 1.552994246612, - "grad_norm": 0.0007058492046780884, - "learning_rate": 0.00019999881100092178, - "loss": 46.0, - "step": 20312 - }, - { - "epoch": 1.5530707035953897, - "grad_norm": 0.0009261932573281229, - "learning_rate": 0.00019999881088378788, - "loss": 46.0, - "step": 20313 - }, - { - "epoch": 1.5531471605787792, - "grad_norm": 0.006575034931302071, - "learning_rate": 0.00019999881076664824, - "loss": 46.0, - "step": 20314 - }, - { - "epoch": 1.553223617562169, - "grad_norm": 0.00036820978857576847, - "learning_rate": 0.0001999988106495028, - "loss": 46.0, - "step": 20315 - }, - { - "epoch": 1.5533000745455587, - "grad_norm": 0.0007867935346439481, - "learning_rate": 0.00019999881053235164, - "loss": 46.0, - "step": 20316 - }, - { - "epoch": 1.5533765315289485, - "grad_norm": 0.0007679632399231195, - "learning_rate": 0.00019999881041519467, - "loss": 46.0, - "step": 20317 - }, - { - "epoch": 1.5534529885123383, - "grad_norm": 0.0014141318388283253, - "learning_rate": 0.00019999881029803197, - "loss": 46.0, - "step": 20318 - }, - { - "epoch": 1.553529445495728, - "grad_norm": 0.001984365750104189, - "learning_rate": 0.00019999881018086346, - "loss": 46.0, - "step": 20319 - }, - { - "epoch": 1.5536059024791178, - "grad_norm": 0.0010668091708794236, - "learning_rate": 0.0001999988100636892, - "loss": 46.0, - "step": 20320 - }, - { - "epoch": 1.5536823594625075, - "grad_norm": 0.0019426877843216062, - "learning_rate": 0.00019999880994650915, - "loss": 46.0, - "step": 20321 - }, - { - "epoch": 1.553758816445897, - "grad_norm": 0.0005831917515024543, - "learning_rate": 0.00019999880982932334, - "loss": 46.0, - "step": 20322 - }, - { - "epoch": 1.5538352734292868, - "grad_norm": 0.0013772249221801758, - "learning_rate": 0.0001999988097121318, - "loss": 46.0, - "step": 20323 - }, - { - "epoch": 1.5539117304126766, - "grad_norm": 0.0024889749474823475, - "learning_rate": 0.00019999880959493445, - "loss": 46.0, - "step": 20324 - }, - { - "epoch": 1.5539881873960661, - "grad_norm": 0.0010758873540908098, - "learning_rate": 0.00019999880947773132, - "loss": 46.0, - "step": 20325 - }, - { - "epoch": 1.554064644379456, - "grad_norm": 0.005038782954216003, - "learning_rate": 0.00019999880936052246, - "loss": 46.0, - "step": 20326 - }, - { - "epoch": 1.5541411013628457, - "grad_norm": 0.0004562149115372449, - "learning_rate": 0.0001999988092433078, - "loss": 46.0, - "step": 20327 - }, - { - "epoch": 1.5542175583462354, - "grad_norm": 0.0022854830604046583, - "learning_rate": 0.00019999880912608737, - "loss": 46.0, - "step": 20328 - }, - { - "epoch": 1.5542940153296252, - "grad_norm": 0.0008763526566326618, - "learning_rate": 0.00019999880900886118, - "loss": 46.0, - "step": 20329 - }, - { - "epoch": 1.554370472313015, - "grad_norm": 0.001301970100030303, - "learning_rate": 0.00019999880889162922, - "loss": 46.0, - "step": 20330 - }, - { - "epoch": 1.5544469292964047, - "grad_norm": 0.0016204144340008497, - "learning_rate": 0.0001999988087743915, - "loss": 46.0, - "step": 20331 - }, - { - "epoch": 1.5545233862797945, - "grad_norm": 0.0008843140094541013, - "learning_rate": 0.000199998808657148, - "loss": 46.0, - "step": 20332 - }, - { - "epoch": 1.554599843263184, - "grad_norm": 0.0006387456669472158, - "learning_rate": 0.00019999880853989875, - "loss": 46.0, - "step": 20333 - }, - { - "epoch": 1.5546763002465738, - "grad_norm": 0.0008754501468501985, - "learning_rate": 0.00019999880842264372, - "loss": 46.0, - "step": 20334 - }, - { - "epoch": 1.5547527572299635, - "grad_norm": 0.0004958927165716887, - "learning_rate": 0.0001999988083053829, - "loss": 46.0, - "step": 20335 - }, - { - "epoch": 1.554829214213353, - "grad_norm": 0.0007865107618272305, - "learning_rate": 0.00019999880818811634, - "loss": 46.0, - "step": 20336 - }, - { - "epoch": 1.5549056711967428, - "grad_norm": 0.0013830723473802209, - "learning_rate": 0.00019999880807084397, - "loss": 46.0, - "step": 20337 - }, - { - "epoch": 1.5549821281801326, - "grad_norm": 0.0020435727201402187, - "learning_rate": 0.00019999880795356587, - "loss": 46.0, - "step": 20338 - }, - { - "epoch": 1.5550585851635224, - "grad_norm": 0.0009164935327135026, - "learning_rate": 0.000199998807836282, - "loss": 46.0, - "step": 20339 - }, - { - "epoch": 1.5551350421469121, - "grad_norm": 0.001196700381115079, - "learning_rate": 0.00019999880771899234, - "loss": 46.0, - "step": 20340 - }, - { - "epoch": 1.5552114991303019, - "grad_norm": 0.0022680407855659723, - "learning_rate": 0.00019999880760169692, - "loss": 46.0, - "step": 20341 - }, - { - "epoch": 1.5552879561136916, - "grad_norm": 0.0005042555276304483, - "learning_rate": 0.00019999880748439573, - "loss": 46.0, - "step": 20342 - }, - { - "epoch": 1.5553644130970814, - "grad_norm": 0.0016276228707283735, - "learning_rate": 0.00019999880736708877, - "loss": 46.0, - "step": 20343 - }, - { - "epoch": 1.555440870080471, - "grad_norm": 0.000963117927312851, - "learning_rate": 0.00019999880724977604, - "loss": 46.0, - "step": 20344 - }, - { - "epoch": 1.5555173270638607, - "grad_norm": 0.0010502267396077514, - "learning_rate": 0.00019999880713245753, - "loss": 46.0, - "step": 20345 - }, - { - "epoch": 1.5555937840472505, - "grad_norm": 0.0013835520949214697, - "learning_rate": 0.00019999880701513325, - "loss": 46.0, - "step": 20346 - }, - { - "epoch": 1.55567024103064, - "grad_norm": 0.0008860574453137815, - "learning_rate": 0.00019999880689780324, - "loss": 46.0, - "step": 20347 - }, - { - "epoch": 1.5557466980140298, - "grad_norm": 0.0024157913867384195, - "learning_rate": 0.00019999880678046742, - "loss": 46.0, - "step": 20348 - }, - { - "epoch": 1.5558231549974195, - "grad_norm": 0.003710251534357667, - "learning_rate": 0.00019999880666312584, - "loss": 46.0, - "step": 20349 - }, - { - "epoch": 1.5558996119808093, - "grad_norm": 0.0009747496224008501, - "learning_rate": 0.0001999988065457785, - "loss": 46.0, - "step": 20350 - }, - { - "epoch": 1.555976068964199, - "grad_norm": 0.0016732370713725686, - "learning_rate": 0.00019999880642842537, - "loss": 46.0, - "step": 20351 - }, - { - "epoch": 1.5560525259475888, - "grad_norm": 0.00039919669507071376, - "learning_rate": 0.00019999880631106648, - "loss": 46.0, - "step": 20352 - }, - { - "epoch": 1.5561289829309786, - "grad_norm": 0.0008389498107135296, - "learning_rate": 0.00019999880619370184, - "loss": 46.0, - "step": 20353 - }, - { - "epoch": 1.5562054399143683, - "grad_norm": 0.0010824992787092924, - "learning_rate": 0.00019999880607633142, - "loss": 46.0, - "step": 20354 - }, - { - "epoch": 1.5562818968977579, - "grad_norm": 0.0010296195978298783, - "learning_rate": 0.0001999988059589552, - "loss": 46.0, - "step": 20355 - }, - { - "epoch": 1.5563583538811476, - "grad_norm": 0.004622041247785091, - "learning_rate": 0.00019999880584157324, - "loss": 46.0, - "step": 20356 - }, - { - "epoch": 1.5564348108645374, - "grad_norm": 0.0009110590908676386, - "learning_rate": 0.0001999988057241855, - "loss": 46.0, - "step": 20357 - }, - { - "epoch": 1.556511267847927, - "grad_norm": 0.0015861720312386751, - "learning_rate": 0.00019999880560679203, - "loss": 46.0, - "step": 20358 - }, - { - "epoch": 1.5565877248313167, - "grad_norm": 0.0007488686824217439, - "learning_rate": 0.00019999880548939275, - "loss": 46.0, - "step": 20359 - }, - { - "epoch": 1.5566641818147064, - "grad_norm": 0.001435458310879767, - "learning_rate": 0.00019999880537198772, - "loss": 46.0, - "step": 20360 - }, - { - "epoch": 1.5567406387980962, - "grad_norm": 0.0018419409170746803, - "learning_rate": 0.0001999988052545769, - "loss": 46.0, - "step": 20361 - }, - { - "epoch": 1.556817095781486, - "grad_norm": 0.0011355690658092499, - "learning_rate": 0.00019999880513716032, - "loss": 46.0, - "step": 20362 - }, - { - "epoch": 1.5568935527648757, - "grad_norm": 0.0019013368291780353, - "learning_rate": 0.00019999880501973797, - "loss": 46.0, - "step": 20363 - }, - { - "epoch": 1.5569700097482655, - "grad_norm": 0.012250037863850594, - "learning_rate": 0.00019999880490230987, - "loss": 46.0, - "step": 20364 - }, - { - "epoch": 1.5570464667316553, - "grad_norm": 0.0035572205670177937, - "learning_rate": 0.00019999880478487598, - "loss": 46.0, - "step": 20365 - }, - { - "epoch": 1.5571229237150448, - "grad_norm": 0.0017253459664061666, - "learning_rate": 0.0001999988046674363, - "loss": 46.0, - "step": 20366 - }, - { - "epoch": 1.5571993806984346, - "grad_norm": 0.0010187855223193765, - "learning_rate": 0.0001999988045499909, - "loss": 46.0, - "step": 20367 - }, - { - "epoch": 1.5572758376818243, - "grad_norm": 0.0011968839680776, - "learning_rate": 0.00019999880443253968, - "loss": 46.0, - "step": 20368 - }, - { - "epoch": 1.5573522946652139, - "grad_norm": 0.0007393784471787512, - "learning_rate": 0.00019999880431508272, - "loss": 46.0, - "step": 20369 - }, - { - "epoch": 1.5574287516486036, - "grad_norm": 0.0054636672139167786, - "learning_rate": 0.00019999880419762001, - "loss": 46.0, - "step": 20370 - }, - { - "epoch": 1.5575052086319934, - "grad_norm": 0.0013797142310068011, - "learning_rate": 0.00019999880408015148, - "loss": 46.0, - "step": 20371 - }, - { - "epoch": 1.5575816656153831, - "grad_norm": 0.0006277625216171145, - "learning_rate": 0.00019999880396267723, - "loss": 46.0, - "step": 20372 - }, - { - "epoch": 1.557658122598773, - "grad_norm": 0.0021176051814109087, - "learning_rate": 0.00019999880384519717, - "loss": 46.0, - "step": 20373 - }, - { - "epoch": 1.5577345795821627, - "grad_norm": 0.0010102363303303719, - "learning_rate": 0.00019999880372771137, - "loss": 46.0, - "step": 20374 - }, - { - "epoch": 1.5578110365655524, - "grad_norm": 0.0019918824546039104, - "learning_rate": 0.0001999988036102198, - "loss": 46.0, - "step": 20375 - }, - { - "epoch": 1.5578874935489422, - "grad_norm": 0.0008416366181336343, - "learning_rate": 0.00019999880349272245, - "loss": 46.0, - "step": 20376 - }, - { - "epoch": 1.5579639505323317, - "grad_norm": 0.0039924620650708675, - "learning_rate": 0.00019999880337521933, - "loss": 46.0, - "step": 20377 - }, - { - "epoch": 1.5580404075157215, - "grad_norm": 0.001947305747307837, - "learning_rate": 0.00019999880325771043, - "loss": 46.0, - "step": 20378 - }, - { - "epoch": 1.5581168644991112, - "grad_norm": 0.0015651760622859001, - "learning_rate": 0.00019999880314019577, - "loss": 46.0, - "step": 20379 - }, - { - "epoch": 1.5581933214825008, - "grad_norm": 0.0019158568466082215, - "learning_rate": 0.00019999880302267532, - "loss": 46.0, - "step": 20380 - }, - { - "epoch": 1.5582697784658905, - "grad_norm": 0.005281593184918165, - "learning_rate": 0.00019999880290514916, - "loss": 46.0, - "step": 20381 - }, - { - "epoch": 1.5583462354492803, - "grad_norm": 0.0023288815282285213, - "learning_rate": 0.00019999880278761718, - "loss": 46.0, - "step": 20382 - }, - { - "epoch": 1.55842269243267, - "grad_norm": 0.0035321833565831184, - "learning_rate": 0.00019999880267007944, - "loss": 46.0, - "step": 20383 - }, - { - "epoch": 1.5584991494160598, - "grad_norm": 0.0005555053357966244, - "learning_rate": 0.00019999880255253593, - "loss": 46.0, - "step": 20384 - }, - { - "epoch": 1.5585756063994496, - "grad_norm": 0.0017697765724733472, - "learning_rate": 0.00019999880243498668, - "loss": 46.0, - "step": 20385 - }, - { - "epoch": 1.5586520633828393, - "grad_norm": 0.0005751054850406945, - "learning_rate": 0.00019999880231743163, - "loss": 46.0, - "step": 20386 - }, - { - "epoch": 1.558728520366229, - "grad_norm": 0.0010335806291550398, - "learning_rate": 0.0001999988021998708, - "loss": 46.0, - "step": 20387 - }, - { - "epoch": 1.5588049773496186, - "grad_norm": 0.0007465775706805289, - "learning_rate": 0.00019999880208230423, - "loss": 46.0, - "step": 20388 - }, - { - "epoch": 1.5588814343330084, - "grad_norm": 0.0005362386582419276, - "learning_rate": 0.00019999880196473185, - "loss": 46.0, - "step": 20389 - }, - { - "epoch": 1.558957891316398, - "grad_norm": 0.000868162838742137, - "learning_rate": 0.00019999880184715373, - "loss": 46.0, - "step": 20390 - }, - { - "epoch": 1.5590343482997877, - "grad_norm": 0.0016189463203772902, - "learning_rate": 0.00019999880172956986, - "loss": 46.0, - "step": 20391 - }, - { - "epoch": 1.5591108052831775, - "grad_norm": 0.0016034849686548114, - "learning_rate": 0.00019999880161198022, - "loss": 46.0, - "step": 20392 - }, - { - "epoch": 1.5591872622665672, - "grad_norm": 0.005704442970454693, - "learning_rate": 0.00019999880149438476, - "loss": 46.0, - "step": 20393 - }, - { - "epoch": 1.559263719249957, - "grad_norm": 0.0015032096998766065, - "learning_rate": 0.00019999880137678357, - "loss": 46.0, - "step": 20394 - }, - { - "epoch": 1.5593401762333468, - "grad_norm": 0.0014174515381455421, - "learning_rate": 0.0001999988012591766, - "loss": 46.0, - "step": 20395 - }, - { - "epoch": 1.5594166332167365, - "grad_norm": 0.0010963774984702468, - "learning_rate": 0.00019999880114156387, - "loss": 46.0, - "step": 20396 - }, - { - "epoch": 1.5594930902001263, - "grad_norm": 0.0008113204967230558, - "learning_rate": 0.00019999880102394534, - "loss": 46.0, - "step": 20397 - }, - { - "epoch": 1.559569547183516, - "grad_norm": 0.006441914476454258, - "learning_rate": 0.00019999880090632109, - "loss": 46.0, - "step": 20398 - }, - { - "epoch": 1.5596460041669056, - "grad_norm": 0.0019139398355036974, - "learning_rate": 0.00019999880078869103, - "loss": 46.0, - "step": 20399 - }, - { - "epoch": 1.5597224611502953, - "grad_norm": 0.0010233854409307241, - "learning_rate": 0.00019999880067105523, - "loss": 46.0, - "step": 20400 - }, - { - "epoch": 1.5597989181336849, - "grad_norm": 0.0006365884328261018, - "learning_rate": 0.00019999880055341366, - "loss": 46.0, - "step": 20401 - }, - { - "epoch": 1.5598753751170746, - "grad_norm": 0.0016257824609056115, - "learning_rate": 0.00019999880043576628, - "loss": 46.0, - "step": 20402 - }, - { - "epoch": 1.5599518321004644, - "grad_norm": 0.0005208643851801753, - "learning_rate": 0.00019999880031811316, - "loss": 46.0, - "step": 20403 - }, - { - "epoch": 1.5600282890838542, - "grad_norm": 0.0019847780931741, - "learning_rate": 0.00019999880020045424, - "loss": 46.0, - "step": 20404 - }, - { - "epoch": 1.560104746067244, - "grad_norm": 0.0008785325335338712, - "learning_rate": 0.0001999988000827896, - "loss": 46.0, - "step": 20405 - }, - { - "epoch": 1.5601812030506337, - "grad_norm": 0.013772883452475071, - "learning_rate": 0.00019999879996511916, - "loss": 46.0, - "step": 20406 - }, - { - "epoch": 1.5602576600340234, - "grad_norm": 0.001103578950278461, - "learning_rate": 0.00019999879984744298, - "loss": 46.0, - "step": 20407 - }, - { - "epoch": 1.5603341170174132, - "grad_norm": 0.009055916219949722, - "learning_rate": 0.000199998799729761, - "loss": 46.0, - "step": 20408 - }, - { - "epoch": 1.560410574000803, - "grad_norm": 0.0012917376589030027, - "learning_rate": 0.00019999879961207326, - "loss": 46.0, - "step": 20409 - }, - { - "epoch": 1.5604870309841925, - "grad_norm": 0.0007482227520085871, - "learning_rate": 0.00019999879949437978, - "loss": 46.0, - "step": 20410 - }, - { - "epoch": 1.5605634879675823, - "grad_norm": 0.0004723152087535709, - "learning_rate": 0.00019999879937668047, - "loss": 46.0, - "step": 20411 - }, - { - "epoch": 1.5606399449509718, - "grad_norm": 0.004322065506130457, - "learning_rate": 0.00019999879925897542, - "loss": 46.0, - "step": 20412 - }, - { - "epoch": 1.5607164019343616, - "grad_norm": 0.0008148546330630779, - "learning_rate": 0.00019999879914126462, - "loss": 46.0, - "step": 20413 - }, - { - "epoch": 1.5607928589177513, - "grad_norm": 0.0003856477269437164, - "learning_rate": 0.00019999879902354804, - "loss": 46.0, - "step": 20414 - }, - { - "epoch": 1.560869315901141, - "grad_norm": 0.0010075336322188377, - "learning_rate": 0.00019999879890582567, - "loss": 46.0, - "step": 20415 - }, - { - "epoch": 1.5609457728845308, - "grad_norm": 0.005647373851388693, - "learning_rate": 0.00019999879878809752, - "loss": 46.0, - "step": 20416 - }, - { - "epoch": 1.5610222298679206, - "grad_norm": 0.0021779201924800873, - "learning_rate": 0.00019999879867036363, - "loss": 46.0, - "step": 20417 - }, - { - "epoch": 1.5610986868513104, - "grad_norm": 0.001866922597400844, - "learning_rate": 0.000199998798552624, - "loss": 46.0, - "step": 20418 - }, - { - "epoch": 1.5611751438347001, - "grad_norm": 0.001715793740004301, - "learning_rate": 0.00019999879843487855, - "loss": 46.0, - "step": 20419 - }, - { - "epoch": 1.5612516008180897, - "grad_norm": 0.0006460725562646985, - "learning_rate": 0.00019999879831712733, - "loss": 46.0, - "step": 20420 - }, - { - "epoch": 1.5613280578014794, - "grad_norm": 0.0024972131941467524, - "learning_rate": 0.00019999879819937037, - "loss": 46.0, - "step": 20421 - }, - { - "epoch": 1.5614045147848692, - "grad_norm": 0.0012341371038928628, - "learning_rate": 0.00019999879808160764, - "loss": 46.0, - "step": 20422 - }, - { - "epoch": 1.5614809717682587, - "grad_norm": 0.0026142382994294167, - "learning_rate": 0.0001999987979638391, - "loss": 46.0, - "step": 20423 - }, - { - "epoch": 1.5615574287516485, - "grad_norm": 0.0022262735292315483, - "learning_rate": 0.00019999879784606483, - "loss": 46.0, - "step": 20424 - }, - { - "epoch": 1.5616338857350383, - "grad_norm": 0.0008153364178724587, - "learning_rate": 0.00019999879772828477, - "loss": 46.0, - "step": 20425 - }, - { - "epoch": 1.561710342718428, - "grad_norm": 0.0008112417417578399, - "learning_rate": 0.00019999879761049898, - "loss": 46.0, - "step": 20426 - }, - { - "epoch": 1.5617867997018178, - "grad_norm": 0.0006853495142422616, - "learning_rate": 0.00019999879749270737, - "loss": 46.0, - "step": 20427 - }, - { - "epoch": 1.5618632566852075, - "grad_norm": 0.0013695686357095838, - "learning_rate": 0.00019999879737491, - "loss": 46.0, - "step": 20428 - }, - { - "epoch": 1.5619397136685973, - "grad_norm": 0.001403912785463035, - "learning_rate": 0.0001999987972571069, - "loss": 46.0, - "step": 20429 - }, - { - "epoch": 1.562016170651987, - "grad_norm": 0.0006473068497143686, - "learning_rate": 0.000199998797139298, - "loss": 46.0, - "step": 20430 - }, - { - "epoch": 1.5620926276353766, - "grad_norm": 0.001170830219052732, - "learning_rate": 0.00019999879702148335, - "loss": 46.0, - "step": 20431 - }, - { - "epoch": 1.5621690846187664, - "grad_norm": 0.001028629019856453, - "learning_rate": 0.00019999879690366288, - "loss": 46.0, - "step": 20432 - }, - { - "epoch": 1.5622455416021561, - "grad_norm": 0.0033223647624254227, - "learning_rate": 0.0001999987967858367, - "loss": 46.0, - "step": 20433 - }, - { - "epoch": 1.5623219985855457, - "grad_norm": 0.0022415989078581333, - "learning_rate": 0.0001999987966680047, - "loss": 46.0, - "step": 20434 - }, - { - "epoch": 1.5623984555689354, - "grad_norm": 0.0015029300702735782, - "learning_rate": 0.00019999879655016698, - "loss": 46.0, - "step": 20435 - }, - { - "epoch": 1.5624749125523252, - "grad_norm": 0.004798507783561945, - "learning_rate": 0.00019999879643232347, - "loss": 46.0, - "step": 20436 - }, - { - "epoch": 1.562551369535715, - "grad_norm": 0.0008729594992473722, - "learning_rate": 0.0001999987963144742, - "loss": 46.0, - "step": 20437 - }, - { - "epoch": 1.5626278265191047, - "grad_norm": 0.002530557569116354, - "learning_rate": 0.0001999987961966191, - "loss": 46.0, - "step": 20438 - }, - { - "epoch": 1.5627042835024945, - "grad_norm": 0.0009950305102393031, - "learning_rate": 0.00019999879607875828, - "loss": 46.0, - "step": 20439 - }, - { - "epoch": 1.5627807404858842, - "grad_norm": 0.0016097306506708264, - "learning_rate": 0.0001999987959608917, - "loss": 46.0, - "step": 20440 - }, - { - "epoch": 1.562857197469274, - "grad_norm": 0.0011646109633147717, - "learning_rate": 0.00019999879584301934, - "loss": 46.0, - "step": 20441 - }, - { - "epoch": 1.5629336544526635, - "grad_norm": 0.00990972202271223, - "learning_rate": 0.00019999879572514122, - "loss": 46.0, - "step": 20442 - }, - { - "epoch": 1.5630101114360533, - "grad_norm": 0.0029427348636090755, - "learning_rate": 0.00019999879560725733, - "loss": 46.0, - "step": 20443 - }, - { - "epoch": 1.563086568419443, - "grad_norm": 0.001082059694454074, - "learning_rate": 0.00019999879548936766, - "loss": 46.0, - "step": 20444 - }, - { - "epoch": 1.5631630254028326, - "grad_norm": 0.0010490723652765155, - "learning_rate": 0.00019999879537147222, - "loss": 46.0, - "step": 20445 - }, - { - "epoch": 1.5632394823862223, - "grad_norm": 0.002688538981601596, - "learning_rate": 0.00019999879525357098, - "loss": 46.0, - "step": 20446 - }, - { - "epoch": 1.563315939369612, - "grad_norm": 0.001219554920680821, - "learning_rate": 0.00019999879513566402, - "loss": 46.0, - "step": 20447 - }, - { - "epoch": 1.5633923963530019, - "grad_norm": 0.0008803309174254537, - "learning_rate": 0.00019999879501775126, - "loss": 46.0, - "step": 20448 - }, - { - "epoch": 1.5634688533363916, - "grad_norm": 0.0011195058468729258, - "learning_rate": 0.00019999879489983276, - "loss": 46.0, - "step": 20449 - }, - { - "epoch": 1.5635453103197814, - "grad_norm": 0.0022292984649538994, - "learning_rate": 0.00019999879478190848, - "loss": 46.0, - "step": 20450 - }, - { - "epoch": 1.5636217673031712, - "grad_norm": 0.002224868396297097, - "learning_rate": 0.00019999879466397845, - "loss": 46.0, - "step": 20451 - }, - { - "epoch": 1.563698224286561, - "grad_norm": 0.0018027651822194457, - "learning_rate": 0.0001999987945460426, - "loss": 46.0, - "step": 20452 - }, - { - "epoch": 1.5637746812699505, - "grad_norm": 0.0011839448707178235, - "learning_rate": 0.000199998794428101, - "loss": 46.0, - "step": 20453 - }, - { - "epoch": 1.5638511382533402, - "grad_norm": 0.0015217720065265894, - "learning_rate": 0.00019999879431015365, - "loss": 46.0, - "step": 20454 - }, - { - "epoch": 1.56392759523673, - "grad_norm": 0.0007957973284646869, - "learning_rate": 0.00019999879419220053, - "loss": 46.0, - "step": 20455 - }, - { - "epoch": 1.5640040522201195, - "grad_norm": 0.0010524482931941748, - "learning_rate": 0.00019999879407424161, - "loss": 46.0, - "step": 20456 - }, - { - "epoch": 1.5640805092035093, - "grad_norm": 0.0006354114739224315, - "learning_rate": 0.00019999879395627695, - "loss": 46.0, - "step": 20457 - }, - { - "epoch": 1.564156966186899, - "grad_norm": 0.0016668433090671897, - "learning_rate": 0.0001999987938383065, - "loss": 46.0, - "step": 20458 - }, - { - "epoch": 1.5642334231702888, - "grad_norm": 0.0011136078974232078, - "learning_rate": 0.0001999987937203303, - "loss": 46.0, - "step": 20459 - }, - { - "epoch": 1.5643098801536786, - "grad_norm": 0.0038942249957472086, - "learning_rate": 0.0001999987936023483, - "loss": 46.0, - "step": 20460 - }, - { - "epoch": 1.5643863371370683, - "grad_norm": 0.004990273155272007, - "learning_rate": 0.00019999879348436058, - "loss": 46.0, - "step": 20461 - }, - { - "epoch": 1.564462794120458, - "grad_norm": 0.0032325657084584236, - "learning_rate": 0.00019999879336636705, - "loss": 46.0, - "step": 20462 - }, - { - "epoch": 1.5645392511038478, - "grad_norm": 0.0012750310124829412, - "learning_rate": 0.00019999879324836777, - "loss": 46.0, - "step": 20463 - }, - { - "epoch": 1.5646157080872374, - "grad_norm": 0.0010060176718980074, - "learning_rate": 0.00019999879313036272, - "loss": 46.0, - "step": 20464 - }, - { - "epoch": 1.5646921650706271, - "grad_norm": 0.0006512170075438917, - "learning_rate": 0.0001999987930123519, - "loss": 46.0, - "step": 20465 - }, - { - "epoch": 1.564768622054017, - "grad_norm": 0.0012688649585470557, - "learning_rate": 0.0001999987928943353, - "loss": 46.0, - "step": 20466 - }, - { - "epoch": 1.5648450790374064, - "grad_norm": 0.00587441073730588, - "learning_rate": 0.00019999879277631292, - "loss": 46.0, - "step": 20467 - }, - { - "epoch": 1.5649215360207962, - "grad_norm": 0.0014448395231738687, - "learning_rate": 0.00019999879265828477, - "loss": 46.0, - "step": 20468 - }, - { - "epoch": 1.564997993004186, - "grad_norm": 0.0010669083567336202, - "learning_rate": 0.00019999879254025088, - "loss": 46.0, - "step": 20469 - }, - { - "epoch": 1.5650744499875757, - "grad_norm": 0.0026821040082722902, - "learning_rate": 0.00019999879242221121, - "loss": 46.0, - "step": 20470 - }, - { - "epoch": 1.5651509069709655, - "grad_norm": 0.000854135665576905, - "learning_rate": 0.00019999879230416578, - "loss": 46.0, - "step": 20471 - }, - { - "epoch": 1.5652273639543552, - "grad_norm": 0.0017521621193736792, - "learning_rate": 0.00019999879218611456, - "loss": 46.0, - "step": 20472 - }, - { - "epoch": 1.565303820937745, - "grad_norm": 0.0003465542977210134, - "learning_rate": 0.00019999879206805758, - "loss": 46.0, - "step": 20473 - }, - { - "epoch": 1.5653802779211348, - "grad_norm": 0.00042865925934165716, - "learning_rate": 0.00019999879194999482, - "loss": 46.0, - "step": 20474 - }, - { - "epoch": 1.5654567349045243, - "grad_norm": 0.0010470196139067411, - "learning_rate": 0.0001999987918319263, - "loss": 46.0, - "step": 20475 - }, - { - "epoch": 1.565533191887914, - "grad_norm": 0.0008275810978375375, - "learning_rate": 0.00019999879171385203, - "loss": 46.0, - "step": 20476 - }, - { - "epoch": 1.5656096488713038, - "grad_norm": 0.0010817410657182336, - "learning_rate": 0.00019999879159577198, - "loss": 46.0, - "step": 20477 - }, - { - "epoch": 1.5656861058546934, - "grad_norm": 0.0007110472652129829, - "learning_rate": 0.00019999879147768613, - "loss": 46.0, - "step": 20478 - }, - { - "epoch": 1.5657625628380831, - "grad_norm": 0.0036815183702856302, - "learning_rate": 0.00019999879135959456, - "loss": 46.0, - "step": 20479 - }, - { - "epoch": 1.5658390198214729, - "grad_norm": 0.001409601652994752, - "learning_rate": 0.00019999879124149719, - "loss": 46.0, - "step": 20480 - }, - { - "epoch": 1.5659154768048626, - "grad_norm": 0.0006302836118265986, - "learning_rate": 0.00019999879112339404, - "loss": 46.0, - "step": 20481 - }, - { - "epoch": 1.5659919337882524, - "grad_norm": 0.0012520537711679935, - "learning_rate": 0.00019999879100528512, - "loss": 46.0, - "step": 20482 - }, - { - "epoch": 1.5660683907716422, - "grad_norm": 0.0032005272805690765, - "learning_rate": 0.00019999879088717046, - "loss": 46.0, - "step": 20483 - }, - { - "epoch": 1.566144847755032, - "grad_norm": 0.0020078090019524097, - "learning_rate": 0.00019999879076905002, - "loss": 46.0, - "step": 20484 - }, - { - "epoch": 1.5662213047384217, - "grad_norm": 0.0019918247126042843, - "learning_rate": 0.0001999987906509238, - "loss": 46.0, - "step": 20485 - }, - { - "epoch": 1.5662977617218112, - "grad_norm": 0.002960031619295478, - "learning_rate": 0.00019999879053279182, - "loss": 46.0, - "step": 20486 - }, - { - "epoch": 1.566374218705201, - "grad_norm": 0.0006002835580147803, - "learning_rate": 0.0001999987904146541, - "loss": 46.0, - "step": 20487 - }, - { - "epoch": 1.5664506756885908, - "grad_norm": 0.0034434457775205374, - "learning_rate": 0.00019999879029651053, - "loss": 46.0, - "step": 20488 - }, - { - "epoch": 1.5665271326719803, - "grad_norm": 0.002080704551190138, - "learning_rate": 0.00019999879017836128, - "loss": 46.0, - "step": 20489 - }, - { - "epoch": 1.56660358965537, - "grad_norm": 0.001218809629790485, - "learning_rate": 0.0001999987900602062, - "loss": 46.0, - "step": 20490 - }, - { - "epoch": 1.5666800466387598, - "grad_norm": 0.00241008005104959, - "learning_rate": 0.00019999878994204538, - "loss": 46.0, - "step": 20491 - }, - { - "epoch": 1.5667565036221496, - "grad_norm": 0.0023835906758904457, - "learning_rate": 0.00019999878982387878, - "loss": 46.0, - "step": 20492 - }, - { - "epoch": 1.5668329606055393, - "grad_norm": 0.0010999118676409125, - "learning_rate": 0.0001999987897057064, - "loss": 46.0, - "step": 20493 - }, - { - "epoch": 1.566909417588929, - "grad_norm": 0.0008885576971806586, - "learning_rate": 0.00019999878958752826, - "loss": 46.0, - "step": 20494 - }, - { - "epoch": 1.5669858745723189, - "grad_norm": 0.006827152334153652, - "learning_rate": 0.00019999878946934437, - "loss": 46.0, - "step": 20495 - }, - { - "epoch": 1.5670623315557086, - "grad_norm": 0.0013334945542737842, - "learning_rate": 0.00019999878935115468, - "loss": 46.0, - "step": 20496 - }, - { - "epoch": 1.5671387885390982, - "grad_norm": 0.0016205388819798827, - "learning_rate": 0.00019999878923295924, - "loss": 46.0, - "step": 20497 - }, - { - "epoch": 1.567215245522488, - "grad_norm": 0.0006641803774982691, - "learning_rate": 0.00019999878911475803, - "loss": 46.0, - "step": 20498 - }, - { - "epoch": 1.5672917025058777, - "grad_norm": 0.008667604066431522, - "learning_rate": 0.00019999878899655104, - "loss": 46.0, - "step": 20499 - }, - { - "epoch": 1.5673681594892672, - "grad_norm": 0.0015205841045826674, - "learning_rate": 0.00019999878887833828, - "loss": 46.0, - "step": 20500 - }, - { - "epoch": 1.567444616472657, - "grad_norm": 0.0013605996500700712, - "learning_rate": 0.00019999878876011975, - "loss": 46.0, - "step": 20501 - }, - { - "epoch": 1.5675210734560467, - "grad_norm": 0.005175224505364895, - "learning_rate": 0.00019999878864189547, - "loss": 46.0, - "step": 20502 - }, - { - "epoch": 1.5675975304394365, - "grad_norm": 0.001720156753435731, - "learning_rate": 0.0001999987885236654, - "loss": 46.0, - "step": 20503 - }, - { - "epoch": 1.5676739874228263, - "grad_norm": 0.006137765012681484, - "learning_rate": 0.00019999878840542957, - "loss": 46.0, - "step": 20504 - }, - { - "epoch": 1.567750444406216, - "grad_norm": 0.004961061757057905, - "learning_rate": 0.00019999878828718797, - "loss": 46.0, - "step": 20505 - }, - { - "epoch": 1.5678269013896058, - "grad_norm": 0.001239399309270084, - "learning_rate": 0.0001999987881689406, - "loss": 46.0, - "step": 20506 - }, - { - "epoch": 1.5679033583729955, - "grad_norm": 0.0023358126636594534, - "learning_rate": 0.00019999878805068746, - "loss": 46.0, - "step": 20507 - }, - { - "epoch": 1.567979815356385, - "grad_norm": 0.0011175277177244425, - "learning_rate": 0.00019999878793242854, - "loss": 46.0, - "step": 20508 - }, - { - "epoch": 1.5680562723397748, - "grad_norm": 0.0007174051133915782, - "learning_rate": 0.00019999878781416387, - "loss": 46.0, - "step": 20509 - }, - { - "epoch": 1.5681327293231646, - "grad_norm": 0.00573556125164032, - "learning_rate": 0.00019999878769589343, - "loss": 46.0, - "step": 20510 - }, - { - "epoch": 1.5682091863065541, - "grad_norm": 0.0022795796394348145, - "learning_rate": 0.0001999987875776172, - "loss": 46.0, - "step": 20511 - }, - { - "epoch": 1.568285643289944, - "grad_norm": 0.0012685037218034267, - "learning_rate": 0.0001999987874593352, - "loss": 46.0, - "step": 20512 - }, - { - "epoch": 1.5683621002733337, - "grad_norm": 0.001142531749792397, - "learning_rate": 0.00019999878734104745, - "loss": 46.0, - "step": 20513 - }, - { - "epoch": 1.5684385572567234, - "grad_norm": 0.001449443749152124, - "learning_rate": 0.00019999878722275395, - "loss": 46.0, - "step": 20514 - }, - { - "epoch": 1.5685150142401132, - "grad_norm": 0.0059852758422493935, - "learning_rate": 0.00019999878710445464, - "loss": 46.0, - "step": 20515 - }, - { - "epoch": 1.568591471223503, - "grad_norm": 0.0008569424971938133, - "learning_rate": 0.00019999878698614957, - "loss": 46.0, - "step": 20516 - }, - { - "epoch": 1.5686679282068927, - "grad_norm": 0.0003648698329925537, - "learning_rate": 0.00019999878686783874, - "loss": 46.0, - "step": 20517 - }, - { - "epoch": 1.5687443851902825, - "grad_norm": 0.0007804068154655397, - "learning_rate": 0.00019999878674952214, - "loss": 46.0, - "step": 20518 - }, - { - "epoch": 1.568820842173672, - "grad_norm": 0.0008888784795999527, - "learning_rate": 0.00019999878663119977, - "loss": 46.0, - "step": 20519 - }, - { - "epoch": 1.5688972991570618, - "grad_norm": 0.001737418700940907, - "learning_rate": 0.00019999878651287163, - "loss": 46.0, - "step": 20520 - }, - { - "epoch": 1.5689737561404513, - "grad_norm": 0.0015959254233166575, - "learning_rate": 0.00019999878639453774, - "loss": 46.0, - "step": 20521 - }, - { - "epoch": 1.569050213123841, - "grad_norm": 0.0033062328584492207, - "learning_rate": 0.00019999878627619805, - "loss": 46.0, - "step": 20522 - }, - { - "epoch": 1.5691266701072308, - "grad_norm": 0.0009737692889757454, - "learning_rate": 0.00019999878615785258, - "loss": 46.0, - "step": 20523 - }, - { - "epoch": 1.5692031270906206, - "grad_norm": 0.0012240028008818626, - "learning_rate": 0.00019999878603950137, - "loss": 46.0, - "step": 20524 - }, - { - "epoch": 1.5692795840740104, - "grad_norm": 0.0020970734767615795, - "learning_rate": 0.00019999878592114439, - "loss": 46.0, - "step": 20525 - }, - { - "epoch": 1.5693560410574001, - "grad_norm": 0.002257255371659994, - "learning_rate": 0.00019999878580278163, - "loss": 46.0, - "step": 20526 - }, - { - "epoch": 1.5694324980407899, - "grad_norm": 0.006322106812149286, - "learning_rate": 0.0001999987856844131, - "loss": 46.0, - "step": 20527 - }, - { - "epoch": 1.5695089550241796, - "grad_norm": 0.0007832669652998447, - "learning_rate": 0.0001999987855660388, - "loss": 46.0, - "step": 20528 - }, - { - "epoch": 1.5695854120075694, - "grad_norm": 0.0039178491570055485, - "learning_rate": 0.00019999878544765874, - "loss": 46.0, - "step": 20529 - }, - { - "epoch": 1.569661868990959, - "grad_norm": 0.0010992736788466573, - "learning_rate": 0.0001999987853292729, - "loss": 46.0, - "step": 20530 - }, - { - "epoch": 1.5697383259743487, - "grad_norm": 0.0040085711516439915, - "learning_rate": 0.0001999987852108813, - "loss": 46.0, - "step": 20531 - }, - { - "epoch": 1.5698147829577382, - "grad_norm": 0.0010158132063224912, - "learning_rate": 0.00019999878509248392, - "loss": 46.0, - "step": 20532 - }, - { - "epoch": 1.569891239941128, - "grad_norm": 0.0010326748015359044, - "learning_rate": 0.0001999987849740808, - "loss": 46.0, - "step": 20533 - }, - { - "epoch": 1.5699676969245178, - "grad_norm": 0.0009951175889000297, - "learning_rate": 0.00019999878485567186, - "loss": 46.0, - "step": 20534 - }, - { - "epoch": 1.5700441539079075, - "grad_norm": 0.0010285226162523031, - "learning_rate": 0.0001999987847372572, - "loss": 46.0, - "step": 20535 - }, - { - "epoch": 1.5701206108912973, - "grad_norm": 0.0018726319540292025, - "learning_rate": 0.00019999878461883673, - "loss": 46.0, - "step": 20536 - }, - { - "epoch": 1.570197067874687, - "grad_norm": 0.0010387040674686432, - "learning_rate": 0.00019999878450041052, - "loss": 46.0, - "step": 20537 - }, - { - "epoch": 1.5702735248580768, - "grad_norm": 0.003290874185040593, - "learning_rate": 0.0001999987843819785, - "loss": 46.0, - "step": 20538 - }, - { - "epoch": 1.5703499818414666, - "grad_norm": 0.0007973746978677809, - "learning_rate": 0.00019999878426354078, - "loss": 46.0, - "step": 20539 - }, - { - "epoch": 1.5704264388248563, - "grad_norm": 0.0008850062149576843, - "learning_rate": 0.00019999878414509725, - "loss": 46.0, - "step": 20540 - }, - { - "epoch": 1.5705028958082459, - "grad_norm": 0.0010700584389269352, - "learning_rate": 0.00019999878402664794, - "loss": 46.0, - "step": 20541 - }, - { - "epoch": 1.5705793527916356, - "grad_norm": 0.00105974730104208, - "learning_rate": 0.00019999878390819287, - "loss": 46.0, - "step": 20542 - }, - { - "epoch": 1.5706558097750252, - "grad_norm": 0.002016504528000951, - "learning_rate": 0.00019999878378973204, - "loss": 46.0, - "step": 20543 - }, - { - "epoch": 1.570732266758415, - "grad_norm": 0.0009395172819495201, - "learning_rate": 0.00019999878367126545, - "loss": 46.0, - "step": 20544 - }, - { - "epoch": 1.5708087237418047, - "grad_norm": 0.0008172324160113931, - "learning_rate": 0.00019999878355279305, - "loss": 46.0, - "step": 20545 - }, - { - "epoch": 1.5708851807251945, - "grad_norm": 0.0006112594273872674, - "learning_rate": 0.0001999987834343149, - "loss": 46.0, - "step": 20546 - }, - { - "epoch": 1.5709616377085842, - "grad_norm": 0.0021788757294416428, - "learning_rate": 0.000199998783315831, - "loss": 46.0, - "step": 20547 - }, - { - "epoch": 1.571038094691974, - "grad_norm": 0.00084165227599442, - "learning_rate": 0.00019999878319734132, - "loss": 46.0, - "step": 20548 - }, - { - "epoch": 1.5711145516753637, - "grad_norm": 0.0029515374917536974, - "learning_rate": 0.00019999878307884586, - "loss": 46.0, - "step": 20549 - }, - { - "epoch": 1.5711910086587535, - "grad_norm": 0.0006206241669133306, - "learning_rate": 0.00019999878296034465, - "loss": 46.0, - "step": 20550 - }, - { - "epoch": 1.571267465642143, - "grad_norm": 0.0008314374717883766, - "learning_rate": 0.00019999878284183764, - "loss": 46.0, - "step": 20551 - }, - { - "epoch": 1.5713439226255328, - "grad_norm": 0.0006867795600555837, - "learning_rate": 0.00019999878272332485, - "loss": 46.0, - "step": 20552 - }, - { - "epoch": 1.5714203796089226, - "grad_norm": 0.0009752354235388339, - "learning_rate": 0.00019999878260480632, - "loss": 46.0, - "step": 20553 - }, - { - "epoch": 1.571496836592312, - "grad_norm": 0.0022557354532182217, - "learning_rate": 0.00019999878248628202, - "loss": 46.0, - "step": 20554 - }, - { - "epoch": 1.5715732935757019, - "grad_norm": 0.003024192526936531, - "learning_rate": 0.00019999878236775197, - "loss": 46.0, - "step": 20555 - }, - { - "epoch": 1.5716497505590916, - "grad_norm": 0.04782325029373169, - "learning_rate": 0.00019999878224921615, - "loss": 46.0, - "step": 20556 - }, - { - "epoch": 1.5717262075424814, - "grad_norm": 0.002820422640070319, - "learning_rate": 0.00019999878213067452, - "loss": 46.0, - "step": 20557 - }, - { - "epoch": 1.5718026645258711, - "grad_norm": 0.001744549022987485, - "learning_rate": 0.00019999878201212715, - "loss": 46.0, - "step": 20558 - }, - { - "epoch": 1.571879121509261, - "grad_norm": 0.002101277466863394, - "learning_rate": 0.000199998781893574, - "loss": 46.0, - "step": 20559 - }, - { - "epoch": 1.5719555784926507, - "grad_norm": 0.0019647928420454264, - "learning_rate": 0.00019999878177501506, - "loss": 46.0, - "step": 20560 - }, - { - "epoch": 1.5720320354760404, - "grad_norm": 0.0008673628326505423, - "learning_rate": 0.0001999987816564504, - "loss": 46.0, - "step": 20561 - }, - { - "epoch": 1.57210849245943, - "grad_norm": 0.002075694967061281, - "learning_rate": 0.00019999878153787994, - "loss": 46.0, - "step": 20562 - }, - { - "epoch": 1.5721849494428197, - "grad_norm": 0.002163885161280632, - "learning_rate": 0.0001999987814193037, - "loss": 46.0, - "step": 20563 - }, - { - "epoch": 1.5722614064262095, - "grad_norm": 0.0017930259928107262, - "learning_rate": 0.00019999878130072172, - "loss": 46.0, - "step": 20564 - }, - { - "epoch": 1.572337863409599, - "grad_norm": 0.004732145927846432, - "learning_rate": 0.00019999878118213396, - "loss": 46.0, - "step": 20565 - }, - { - "epoch": 1.5724143203929888, - "grad_norm": 0.0011254495475441217, - "learning_rate": 0.00019999878106354043, - "loss": 46.0, - "step": 20566 - }, - { - "epoch": 1.5724907773763785, - "grad_norm": 0.004453013651072979, - "learning_rate": 0.0001999987809449411, - "loss": 46.0, - "step": 20567 - }, - { - "epoch": 1.5725672343597683, - "grad_norm": 0.003936637192964554, - "learning_rate": 0.00019999878082633605, - "loss": 46.0, - "step": 20568 - }, - { - "epoch": 1.572643691343158, - "grad_norm": 0.00412857998162508, - "learning_rate": 0.00019999878070772523, - "loss": 46.0, - "step": 20569 - }, - { - "epoch": 1.5727201483265478, - "grad_norm": 0.0025975273456424475, - "learning_rate": 0.0001999987805891086, - "loss": 46.0, - "step": 20570 - }, - { - "epoch": 1.5727966053099376, - "grad_norm": 0.004237269517034292, - "learning_rate": 0.0001999987804704862, - "loss": 46.0, - "step": 20571 - }, - { - "epoch": 1.5728730622933274, - "grad_norm": 0.0011645270278677344, - "learning_rate": 0.00019999878035185806, - "loss": 46.0, - "step": 20572 - }, - { - "epoch": 1.572949519276717, - "grad_norm": 0.0016765110194683075, - "learning_rate": 0.00019999878023322417, - "loss": 46.0, - "step": 20573 - }, - { - "epoch": 1.5730259762601067, - "grad_norm": 0.0006472570239566267, - "learning_rate": 0.00019999878011458445, - "loss": 46.0, - "step": 20574 - }, - { - "epoch": 1.5731024332434964, - "grad_norm": 0.000870082585606724, - "learning_rate": 0.000199998779995939, - "loss": 46.0, - "step": 20575 - }, - { - "epoch": 1.573178890226886, - "grad_norm": 0.0007107230485416949, - "learning_rate": 0.00019999877987728775, - "loss": 46.0, - "step": 20576 - }, - { - "epoch": 1.5732553472102757, - "grad_norm": 0.0020948899909853935, - "learning_rate": 0.00019999877975863077, - "loss": 46.0, - "step": 20577 - }, - { - "epoch": 1.5733318041936655, - "grad_norm": 0.0005416327039711177, - "learning_rate": 0.00019999877963996801, - "loss": 46.0, - "step": 20578 - }, - { - "epoch": 1.5734082611770552, - "grad_norm": 0.001961984671652317, - "learning_rate": 0.00019999877952129946, - "loss": 46.0, - "step": 20579 - }, - { - "epoch": 1.573484718160445, - "grad_norm": 0.0012567578814923763, - "learning_rate": 0.00019999877940262518, - "loss": 46.0, - "step": 20580 - }, - { - "epoch": 1.5735611751438348, - "grad_norm": 0.009453502483665943, - "learning_rate": 0.0001999987792839451, - "loss": 46.0, - "step": 20581 - }, - { - "epoch": 1.5736376321272245, - "grad_norm": 0.00041271818918175995, - "learning_rate": 0.00019999877916525925, - "loss": 46.0, - "step": 20582 - }, - { - "epoch": 1.5737140891106143, - "grad_norm": 0.0016757945995777845, - "learning_rate": 0.00019999877904656766, - "loss": 46.0, - "step": 20583 - }, - { - "epoch": 1.5737905460940038, - "grad_norm": 0.00207105977460742, - "learning_rate": 0.00019999877892787023, - "loss": 46.0, - "step": 20584 - }, - { - "epoch": 1.5738670030773936, - "grad_norm": 0.0008442188263870776, - "learning_rate": 0.00019999877880916712, - "loss": 46.0, - "step": 20585 - }, - { - "epoch": 1.5739434600607833, - "grad_norm": 0.0008008594159036875, - "learning_rate": 0.0001999987786904582, - "loss": 46.0, - "step": 20586 - }, - { - "epoch": 1.5740199170441729, - "grad_norm": 0.0011094157816842198, - "learning_rate": 0.00019999877857174349, - "loss": 46.0, - "step": 20587 - }, - { - "epoch": 1.5740963740275626, - "grad_norm": 0.0004447736428119242, - "learning_rate": 0.00019999877845302305, - "loss": 46.0, - "step": 20588 - }, - { - "epoch": 1.5741728310109524, - "grad_norm": 0.0006118929595686495, - "learning_rate": 0.0001999987783342968, - "loss": 46.0, - "step": 20589 - }, - { - "epoch": 1.5742492879943422, - "grad_norm": 0.0006569484248757362, - "learning_rate": 0.0001999987782155648, - "loss": 46.0, - "step": 20590 - }, - { - "epoch": 1.574325744977732, - "grad_norm": 0.004677473101764917, - "learning_rate": 0.00019999877809682705, - "loss": 46.0, - "step": 20591 - }, - { - "epoch": 1.5744022019611217, - "grad_norm": 0.0007658423273824155, - "learning_rate": 0.00019999877797808352, - "loss": 46.0, - "step": 20592 - }, - { - "epoch": 1.5744786589445114, - "grad_norm": 0.002214341424405575, - "learning_rate": 0.0001999987778593342, - "loss": 46.0, - "step": 20593 - }, - { - "epoch": 1.5745551159279012, - "grad_norm": 0.0021820280235260725, - "learning_rate": 0.00019999877774057914, - "loss": 46.0, - "step": 20594 - }, - { - "epoch": 1.5746315729112907, - "grad_norm": 0.0028867812361568213, - "learning_rate": 0.00019999877762181826, - "loss": 46.0, - "step": 20595 - }, - { - "epoch": 1.5747080298946805, - "grad_norm": 0.0019501956412568688, - "learning_rate": 0.00019999877750305167, - "loss": 46.0, - "step": 20596 - }, - { - "epoch": 1.5747844868780703, - "grad_norm": 0.010670505464076996, - "learning_rate": 0.00019999877738427927, - "loss": 46.0, - "step": 20597 - }, - { - "epoch": 1.5748609438614598, - "grad_norm": 0.0008644146146252751, - "learning_rate": 0.00019999877726550113, - "loss": 46.0, - "step": 20598 - }, - { - "epoch": 1.5749374008448496, - "grad_norm": 0.0017332402057945728, - "learning_rate": 0.0001999987771467172, - "loss": 46.0, - "step": 20599 - }, - { - "epoch": 1.5750138578282393, - "grad_norm": 0.005928058177232742, - "learning_rate": 0.00019999877702792752, - "loss": 46.0, - "step": 20600 - }, - { - "epoch": 1.575090314811629, - "grad_norm": 0.0026727814693003893, - "learning_rate": 0.00019999877690913206, - "loss": 46.0, - "step": 20601 - }, - { - "epoch": 1.5751667717950188, - "grad_norm": 0.0022742270957678556, - "learning_rate": 0.00019999877679033082, - "loss": 46.0, - "step": 20602 - }, - { - "epoch": 1.5752432287784086, - "grad_norm": 0.0019309110939502716, - "learning_rate": 0.0001999987766715238, - "loss": 46.0, - "step": 20603 - }, - { - "epoch": 1.5753196857617984, - "grad_norm": 0.0012425679014995694, - "learning_rate": 0.00019999877655271106, - "loss": 46.0, - "step": 20604 - }, - { - "epoch": 1.5753961427451881, - "grad_norm": 0.0011150309583172202, - "learning_rate": 0.0001999987764338925, - "loss": 46.0, - "step": 20605 - }, - { - "epoch": 1.5754725997285777, - "grad_norm": 0.007273803930729628, - "learning_rate": 0.0001999987763150682, - "loss": 46.0, - "step": 20606 - }, - { - "epoch": 1.5755490567119674, - "grad_norm": 0.002322570187970996, - "learning_rate": 0.00019999877619623812, - "loss": 46.0, - "step": 20607 - }, - { - "epoch": 1.5756255136953572, - "grad_norm": 0.001206431072205305, - "learning_rate": 0.00019999877607740227, - "loss": 46.0, - "step": 20608 - }, - { - "epoch": 1.5757019706787467, - "grad_norm": 0.0004714473325293511, - "learning_rate": 0.00019999877595856065, - "loss": 46.0, - "step": 20609 - }, - { - "epoch": 1.5757784276621365, - "grad_norm": 0.0010275548556819558, - "learning_rate": 0.00019999877583971328, - "loss": 46.0, - "step": 20610 - }, - { - "epoch": 1.5758548846455263, - "grad_norm": 0.0010984393302351236, - "learning_rate": 0.0001999987757208601, - "loss": 46.0, - "step": 20611 - }, - { - "epoch": 1.575931341628916, - "grad_norm": 0.0033590730745345354, - "learning_rate": 0.0001999987756020012, - "loss": 46.0, - "step": 20612 - }, - { - "epoch": 1.5760077986123058, - "grad_norm": 0.0015435887034982443, - "learning_rate": 0.0001999987754831365, - "loss": 46.0, - "step": 20613 - }, - { - "epoch": 1.5760842555956955, - "grad_norm": 0.0014658686704933643, - "learning_rate": 0.00019999877536426604, - "loss": 46.0, - "step": 20614 - }, - { - "epoch": 1.5761607125790853, - "grad_norm": 0.0015555268619209528, - "learning_rate": 0.00019999877524538978, - "loss": 46.0, - "step": 20615 - }, - { - "epoch": 1.576237169562475, - "grad_norm": 0.006283890455961227, - "learning_rate": 0.0001999987751265078, - "loss": 46.0, - "step": 20616 - }, - { - "epoch": 1.5763136265458646, - "grad_norm": 0.0006554757128469646, - "learning_rate": 0.00019999877500762004, - "loss": 46.0, - "step": 20617 - }, - { - "epoch": 1.5763900835292544, - "grad_norm": 0.0010277057299390435, - "learning_rate": 0.00019999877488872646, - "loss": 46.0, - "step": 20618 - }, - { - "epoch": 1.5764665405126441, - "grad_norm": 0.01042028609663248, - "learning_rate": 0.00019999877476982718, - "loss": 46.0, - "step": 20619 - }, - { - "epoch": 1.5765429974960337, - "grad_norm": 0.0014580290298908949, - "learning_rate": 0.00019999877465092208, - "loss": 46.0, - "step": 20620 - }, - { - "epoch": 1.5766194544794234, - "grad_norm": 0.000923613493796438, - "learning_rate": 0.00019999877453201123, - "loss": 46.0, - "step": 20621 - }, - { - "epoch": 1.5766959114628132, - "grad_norm": 0.0004414485301822424, - "learning_rate": 0.00019999877441309464, - "loss": 46.0, - "step": 20622 - }, - { - "epoch": 1.576772368446203, - "grad_norm": 0.0008916439837776124, - "learning_rate": 0.00019999877429417224, - "loss": 46.0, - "step": 20623 - }, - { - "epoch": 1.5768488254295927, - "grad_norm": 0.0014385569375008345, - "learning_rate": 0.0001999987741752441, - "loss": 46.0, - "step": 20624 - }, - { - "epoch": 1.5769252824129825, - "grad_norm": 0.0008587023476138711, - "learning_rate": 0.00019999877405631016, - "loss": 46.0, - "step": 20625 - }, - { - "epoch": 1.5770017393963722, - "grad_norm": 0.0012934551341459155, - "learning_rate": 0.00019999877393737044, - "loss": 46.0, - "step": 20626 - }, - { - "epoch": 1.577078196379762, - "grad_norm": 0.004118271172046661, - "learning_rate": 0.00019999877381842498, - "loss": 46.0, - "step": 20627 - }, - { - "epoch": 1.5771546533631515, - "grad_norm": 0.003280634991824627, - "learning_rate": 0.00019999877369947377, - "loss": 46.0, - "step": 20628 - }, - { - "epoch": 1.5772311103465413, - "grad_norm": 0.0030874786898493767, - "learning_rate": 0.00019999877358051676, - "loss": 46.0, - "step": 20629 - }, - { - "epoch": 1.577307567329931, - "grad_norm": 0.0005862012039870024, - "learning_rate": 0.00019999877346155398, - "loss": 46.0, - "step": 20630 - }, - { - "epoch": 1.5773840243133206, - "grad_norm": 0.0010861713672056794, - "learning_rate": 0.00019999877334258542, - "loss": 46.0, - "step": 20631 - }, - { - "epoch": 1.5774604812967103, - "grad_norm": 0.0014244478661566973, - "learning_rate": 0.00019999877322361115, - "loss": 46.0, - "step": 20632 - }, - { - "epoch": 1.5775369382801, - "grad_norm": 0.0007127447752282023, - "learning_rate": 0.00019999877310463105, - "loss": 46.0, - "step": 20633 - }, - { - "epoch": 1.5776133952634899, - "grad_norm": 0.0009441408910788596, - "learning_rate": 0.00019999877298564517, - "loss": 46.0, - "step": 20634 - }, - { - "epoch": 1.5776898522468796, - "grad_norm": 0.0007822144543752074, - "learning_rate": 0.00019999877286665355, - "loss": 46.0, - "step": 20635 - }, - { - "epoch": 1.5777663092302694, - "grad_norm": 0.0006972999544814229, - "learning_rate": 0.00019999877274765618, - "loss": 46.0, - "step": 20636 - }, - { - "epoch": 1.5778427662136592, - "grad_norm": 0.0010089692659676075, - "learning_rate": 0.000199998772628653, - "loss": 46.0, - "step": 20637 - }, - { - "epoch": 1.577919223197049, - "grad_norm": 0.000784090138040483, - "learning_rate": 0.0001999987725096441, - "loss": 46.0, - "step": 20638 - }, - { - "epoch": 1.5779956801804385, - "grad_norm": 0.0022438494488596916, - "learning_rate": 0.0001999987723906294, - "loss": 46.0, - "step": 20639 - }, - { - "epoch": 1.5780721371638282, - "grad_norm": 0.0030220877379179, - "learning_rate": 0.00019999877227160892, - "loss": 46.0, - "step": 20640 - }, - { - "epoch": 1.578148594147218, - "grad_norm": 0.011319195851683617, - "learning_rate": 0.00019999877215258268, - "loss": 46.0, - "step": 20641 - }, - { - "epoch": 1.5782250511306075, - "grad_norm": 0.010835738852620125, - "learning_rate": 0.00019999877203355067, - "loss": 46.0, - "step": 20642 - }, - { - "epoch": 1.5783015081139973, - "grad_norm": 0.0028837614227086306, - "learning_rate": 0.00019999877191451292, - "loss": 46.0, - "step": 20643 - }, - { - "epoch": 1.578377965097387, - "grad_norm": 0.00233117095194757, - "learning_rate": 0.00019999877179546936, - "loss": 46.0, - "step": 20644 - }, - { - "epoch": 1.5784544220807768, - "grad_norm": 0.000977079151198268, - "learning_rate": 0.00019999877167642006, - "loss": 46.0, - "step": 20645 - }, - { - "epoch": 1.5785308790641666, - "grad_norm": 0.0034887176007032394, - "learning_rate": 0.00019999877155736496, - "loss": 46.0, - "step": 20646 - }, - { - "epoch": 1.5786073360475563, - "grad_norm": 0.0014209384098649025, - "learning_rate": 0.0001999987714383041, - "loss": 46.0, - "step": 20647 - }, - { - "epoch": 1.578683793030946, - "grad_norm": 0.00106794573366642, - "learning_rate": 0.0001999987713192375, - "loss": 46.0, - "step": 20648 - }, - { - "epoch": 1.5787602500143358, - "grad_norm": 0.0006753666675649583, - "learning_rate": 0.0001999987712001651, - "loss": 46.0, - "step": 20649 - }, - { - "epoch": 1.5788367069977254, - "grad_norm": 0.003812754061073065, - "learning_rate": 0.00019999877108108693, - "loss": 46.0, - "step": 20650 - }, - { - "epoch": 1.5789131639811151, - "grad_norm": 0.0013617555378004909, - "learning_rate": 0.00019999877096200298, - "loss": 46.0, - "step": 20651 - }, - { - "epoch": 1.5789896209645047, - "grad_norm": 0.005027902778238058, - "learning_rate": 0.00019999877084291332, - "loss": 46.0, - "step": 20652 - }, - { - "epoch": 1.5790660779478944, - "grad_norm": 0.0022772192023694515, - "learning_rate": 0.00019999877072381783, - "loss": 46.0, - "step": 20653 - }, - { - "epoch": 1.5791425349312842, - "grad_norm": 0.0010735277319326997, - "learning_rate": 0.0001999987706047166, - "loss": 46.0, - "step": 20654 - }, - { - "epoch": 1.579218991914674, - "grad_norm": 0.002673623850569129, - "learning_rate": 0.00019999877048560956, - "loss": 46.0, - "step": 20655 - }, - { - "epoch": 1.5792954488980637, - "grad_norm": 0.0008719094912521541, - "learning_rate": 0.0001999987703664968, - "loss": 46.0, - "step": 20656 - }, - { - "epoch": 1.5793719058814535, - "grad_norm": 0.0009952655527740717, - "learning_rate": 0.00019999877024737828, - "loss": 46.0, - "step": 20657 - }, - { - "epoch": 1.5794483628648432, - "grad_norm": 0.002484864555299282, - "learning_rate": 0.00019999877012825395, - "loss": 46.0, - "step": 20658 - }, - { - "epoch": 1.579524819848233, - "grad_norm": 0.00041910645086318254, - "learning_rate": 0.00019999877000912385, - "loss": 46.0, - "step": 20659 - }, - { - "epoch": 1.5796012768316228, - "grad_norm": 0.0013854647986590862, - "learning_rate": 0.000199998769889988, - "loss": 46.0, - "step": 20660 - }, - { - "epoch": 1.5796777338150123, - "grad_norm": 0.0016841775504872203, - "learning_rate": 0.0001999987697708464, - "loss": 46.0, - "step": 20661 - }, - { - "epoch": 1.579754190798402, - "grad_norm": 0.003285430371761322, - "learning_rate": 0.00019999876965169899, - "loss": 46.0, - "step": 20662 - }, - { - "epoch": 1.5798306477817916, - "grad_norm": 0.0012333591002970934, - "learning_rate": 0.00019999876953254582, - "loss": 46.0, - "step": 20663 - }, - { - "epoch": 1.5799071047651814, - "grad_norm": 0.0013427617959678173, - "learning_rate": 0.00019999876941338688, - "loss": 46.0, - "step": 20664 - }, - { - "epoch": 1.5799835617485711, - "grad_norm": 0.000605247390922159, - "learning_rate": 0.0001999987692942222, - "loss": 46.0, - "step": 20665 - }, - { - "epoch": 1.580060018731961, - "grad_norm": 0.0016831998946145177, - "learning_rate": 0.00019999876917505173, - "loss": 46.0, - "step": 20666 - }, - { - "epoch": 1.5801364757153507, - "grad_norm": 0.002568417228758335, - "learning_rate": 0.00019999876905587547, - "loss": 46.0, - "step": 20667 - }, - { - "epoch": 1.5802129326987404, - "grad_norm": 0.000863865832798183, - "learning_rate": 0.00019999876893669346, - "loss": 46.0, - "step": 20668 - }, - { - "epoch": 1.5802893896821302, - "grad_norm": 0.001574463676661253, - "learning_rate": 0.0001999987688175057, - "loss": 46.0, - "step": 20669 - }, - { - "epoch": 1.58036584666552, - "grad_norm": 0.0005493666394613683, - "learning_rate": 0.00019999876869831212, - "loss": 46.0, - "step": 20670 - }, - { - "epoch": 1.5804423036489097, - "grad_norm": 0.0020681836176663637, - "learning_rate": 0.00019999876857911282, - "loss": 46.0, - "step": 20671 - }, - { - "epoch": 1.5805187606322992, - "grad_norm": 0.00034496409352868795, - "learning_rate": 0.00019999876845990772, - "loss": 46.0, - "step": 20672 - }, - { - "epoch": 1.580595217615689, - "grad_norm": 0.0012792296474799514, - "learning_rate": 0.0001999987683406969, - "loss": 46.0, - "step": 20673 - }, - { - "epoch": 1.5806716745990785, - "grad_norm": 0.0008240740280598402, - "learning_rate": 0.00019999876822148025, - "loss": 46.0, - "step": 20674 - }, - { - "epoch": 1.5807481315824683, - "grad_norm": 0.0009947834769263864, - "learning_rate": 0.00019999876810225783, - "loss": 46.0, - "step": 20675 - }, - { - "epoch": 1.580824588565858, - "grad_norm": 0.000829110445920378, - "learning_rate": 0.0001999987679830297, - "loss": 46.0, - "step": 20676 - }, - { - "epoch": 1.5809010455492478, - "grad_norm": 0.0026388815604150295, - "learning_rate": 0.00019999876786379575, - "loss": 46.0, - "step": 20677 - }, - { - "epoch": 1.5809775025326376, - "grad_norm": 0.001018973533064127, - "learning_rate": 0.00019999876774455606, - "loss": 46.0, - "step": 20678 - }, - { - "epoch": 1.5810539595160273, - "grad_norm": 0.000981897464953363, - "learning_rate": 0.00019999876762531054, - "loss": 46.0, - "step": 20679 - }, - { - "epoch": 1.581130416499417, - "grad_norm": 0.001099984860047698, - "learning_rate": 0.00019999876750605934, - "loss": 46.0, - "step": 20680 - }, - { - "epoch": 1.5812068734828069, - "grad_norm": 0.001517529017291963, - "learning_rate": 0.00019999876738680233, - "loss": 46.0, - "step": 20681 - }, - { - "epoch": 1.5812833304661964, - "grad_norm": 0.0002112446236424148, - "learning_rate": 0.00019999876726753955, - "loss": 46.0, - "step": 20682 - }, - { - "epoch": 1.5813597874495862, - "grad_norm": 0.0005338817136362195, - "learning_rate": 0.000199998767148271, - "loss": 46.0, - "step": 20683 - }, - { - "epoch": 1.581436244432976, - "grad_norm": 0.00727510591968894, - "learning_rate": 0.00019999876702899667, - "loss": 46.0, - "step": 20684 - }, - { - "epoch": 1.5815127014163655, - "grad_norm": 0.0014092057244852185, - "learning_rate": 0.00019999876690971656, - "loss": 46.0, - "step": 20685 - }, - { - "epoch": 1.5815891583997552, - "grad_norm": 0.0008078165119513869, - "learning_rate": 0.0001999987667904307, - "loss": 46.0, - "step": 20686 - }, - { - "epoch": 1.581665615383145, - "grad_norm": 0.00041847664397209883, - "learning_rate": 0.00019999876667113907, - "loss": 46.0, - "step": 20687 - }, - { - "epoch": 1.5817420723665347, - "grad_norm": 0.0012829189654439688, - "learning_rate": 0.00019999876655184167, - "loss": 46.0, - "step": 20688 - }, - { - "epoch": 1.5818185293499245, - "grad_norm": 0.0007831389084458351, - "learning_rate": 0.0001999987664325385, - "loss": 46.0, - "step": 20689 - }, - { - "epoch": 1.5818949863333143, - "grad_norm": 0.0027099738363176584, - "learning_rate": 0.0001999987663132296, - "loss": 46.0, - "step": 20690 - }, - { - "epoch": 1.581971443316704, - "grad_norm": 0.002286385279148817, - "learning_rate": 0.00019999876619391488, - "loss": 46.0, - "step": 20691 - }, - { - "epoch": 1.5820479003000938, - "grad_norm": 0.0015743793919682503, - "learning_rate": 0.0001999987660745944, - "loss": 46.0, - "step": 20692 - }, - { - "epoch": 1.5821243572834833, - "grad_norm": 0.0008356235921382904, - "learning_rate": 0.00019999876595526813, - "loss": 46.0, - "step": 20693 - }, - { - "epoch": 1.582200814266873, - "grad_norm": 0.0007721131551079452, - "learning_rate": 0.00019999876583593612, - "loss": 46.0, - "step": 20694 - }, - { - "epoch": 1.5822772712502629, - "grad_norm": 0.0007326969061978161, - "learning_rate": 0.00019999876571659834, - "loss": 46.0, - "step": 20695 - }, - { - "epoch": 1.5823537282336524, - "grad_norm": 0.0013834059936925769, - "learning_rate": 0.0001999987655972548, - "loss": 46.0, - "step": 20696 - }, - { - "epoch": 1.5824301852170422, - "grad_norm": 0.0014844231773167849, - "learning_rate": 0.0001999987654779055, - "loss": 46.0, - "step": 20697 - }, - { - "epoch": 1.582506642200432, - "grad_norm": 0.0014713979326188564, - "learning_rate": 0.00019999876535855039, - "loss": 46.0, - "step": 20698 - }, - { - "epoch": 1.5825830991838217, - "grad_norm": 0.003678596578538418, - "learning_rate": 0.0001999987652391895, - "loss": 46.0, - "step": 20699 - }, - { - "epoch": 1.5826595561672114, - "grad_norm": 0.0004370861279312521, - "learning_rate": 0.0001999987651198229, - "loss": 46.0, - "step": 20700 - }, - { - "epoch": 1.5827360131506012, - "grad_norm": 0.0005486364243552089, - "learning_rate": 0.0001999987650004505, - "loss": 46.0, - "step": 20701 - }, - { - "epoch": 1.582812470133991, - "grad_norm": 0.00366935133934021, - "learning_rate": 0.00019999876488107233, - "loss": 46.0, - "step": 20702 - }, - { - "epoch": 1.5828889271173807, - "grad_norm": 0.0012095060665160418, - "learning_rate": 0.0001999987647616884, - "loss": 46.0, - "step": 20703 - }, - { - "epoch": 1.5829653841007703, - "grad_norm": 0.001610441948287189, - "learning_rate": 0.00019999876464229868, - "loss": 46.0, - "step": 20704 - }, - { - "epoch": 1.58304184108416, - "grad_norm": 0.0005798356723971665, - "learning_rate": 0.00019999876452290321, - "loss": 46.0, - "step": 20705 - }, - { - "epoch": 1.5831182980675498, - "grad_norm": 0.005967284552752972, - "learning_rate": 0.00019999876440350195, - "loss": 46.0, - "step": 20706 - }, - { - "epoch": 1.5831947550509393, - "grad_norm": 0.004606550559401512, - "learning_rate": 0.00019999876428409495, - "loss": 46.0, - "step": 20707 - }, - { - "epoch": 1.583271212034329, - "grad_norm": 0.00360392895527184, - "learning_rate": 0.00019999876416468214, - "loss": 46.0, - "step": 20708 - }, - { - "epoch": 1.5833476690177188, - "grad_norm": 0.0006789357867091894, - "learning_rate": 0.00019999876404526359, - "loss": 46.0, - "step": 20709 - }, - { - "epoch": 1.5834241260011086, - "grad_norm": 0.0015642885118722916, - "learning_rate": 0.00019999876392583926, - "loss": 46.0, - "step": 20710 - }, - { - "epoch": 1.5835005829844984, - "grad_norm": 0.00783013366162777, - "learning_rate": 0.00019999876380640919, - "loss": 46.0, - "step": 20711 - }, - { - "epoch": 1.5835770399678881, - "grad_norm": 0.001398614956997335, - "learning_rate": 0.0001999987636869733, - "loss": 46.0, - "step": 20712 - }, - { - "epoch": 1.5836534969512779, - "grad_norm": 0.0005407409043982625, - "learning_rate": 0.0001999987635675317, - "loss": 46.0, - "step": 20713 - }, - { - "epoch": 1.5837299539346676, - "grad_norm": 0.005178102757781744, - "learning_rate": 0.00019999876344808427, - "loss": 46.0, - "step": 20714 - }, - { - "epoch": 1.5838064109180572, - "grad_norm": 0.0008475991198793054, - "learning_rate": 0.0001999987633286311, - "loss": 46.0, - "step": 20715 - }, - { - "epoch": 1.583882867901447, - "grad_norm": 0.0022023834753781557, - "learning_rate": 0.00019999876320917216, - "loss": 46.0, - "step": 20716 - }, - { - "epoch": 1.5839593248848367, - "grad_norm": 0.0016136984340846539, - "learning_rate": 0.00019999876308970748, - "loss": 46.0, - "step": 20717 - }, - { - "epoch": 1.5840357818682262, - "grad_norm": 0.00853719748556614, - "learning_rate": 0.000199998762970237, - "loss": 46.0, - "step": 20718 - }, - { - "epoch": 1.584112238851616, - "grad_norm": 0.0005609886138699949, - "learning_rate": 0.00019999876285076073, - "loss": 46.0, - "step": 20719 - }, - { - "epoch": 1.5841886958350058, - "grad_norm": 0.0009174311999231577, - "learning_rate": 0.00019999876273127872, - "loss": 46.0, - "step": 20720 - }, - { - "epoch": 1.5842651528183955, - "grad_norm": 0.001000142190605402, - "learning_rate": 0.00019999876261179091, - "loss": 46.0, - "step": 20721 - }, - { - "epoch": 1.5843416098017853, - "grad_norm": 0.0019016934093087912, - "learning_rate": 0.0001999987624922974, - "loss": 46.0, - "step": 20722 - }, - { - "epoch": 1.584418066785175, - "grad_norm": 0.000705777492839843, - "learning_rate": 0.00019999876237279803, - "loss": 46.0, - "step": 20723 - }, - { - "epoch": 1.5844945237685648, - "grad_norm": 0.0008949976763688028, - "learning_rate": 0.00019999876225329296, - "loss": 46.0, - "step": 20724 - }, - { - "epoch": 1.5845709807519546, - "grad_norm": 0.0016376987332478166, - "learning_rate": 0.0001999987621337821, - "loss": 46.0, - "step": 20725 - }, - { - "epoch": 1.584647437735344, - "grad_norm": 0.0020857248455286026, - "learning_rate": 0.00019999876201426547, - "loss": 46.0, - "step": 20726 - }, - { - "epoch": 1.5847238947187339, - "grad_norm": 0.0008070914191193879, - "learning_rate": 0.00019999876189474305, - "loss": 46.0, - "step": 20727 - }, - { - "epoch": 1.5848003517021236, - "grad_norm": 0.00403270497918129, - "learning_rate": 0.00019999876177521485, - "loss": 46.0, - "step": 20728 - }, - { - "epoch": 1.5848768086855132, - "grad_norm": 0.004228347912430763, - "learning_rate": 0.0001999987616556809, - "loss": 46.0, - "step": 20729 - }, - { - "epoch": 1.584953265668903, - "grad_norm": 0.0013364336919039488, - "learning_rate": 0.0001999987615361412, - "loss": 46.0, - "step": 20730 - }, - { - "epoch": 1.5850297226522927, - "grad_norm": 0.0017118168761953712, - "learning_rate": 0.0001999987614165957, - "loss": 46.0, - "step": 20731 - }, - { - "epoch": 1.5851061796356825, - "grad_norm": 0.004465586040169001, - "learning_rate": 0.00019999876129704448, - "loss": 46.0, - "step": 20732 - }, - { - "epoch": 1.5851826366190722, - "grad_norm": 0.0030279208440333605, - "learning_rate": 0.00019999876117748747, - "loss": 46.0, - "step": 20733 - }, - { - "epoch": 1.585259093602462, - "grad_norm": 0.0004620306135620922, - "learning_rate": 0.00019999876105792467, - "loss": 46.0, - "step": 20734 - }, - { - "epoch": 1.5853355505858517, - "grad_norm": 0.0067252651788294315, - "learning_rate": 0.0001999987609383561, - "loss": 46.0, - "step": 20735 - }, - { - "epoch": 1.5854120075692415, - "grad_norm": 0.001134400488808751, - "learning_rate": 0.0001999987608187818, - "loss": 46.0, - "step": 20736 - }, - { - "epoch": 1.585488464552631, - "grad_norm": 0.00219959719106555, - "learning_rate": 0.0001999987606992017, - "loss": 46.0, - "step": 20737 - }, - { - "epoch": 1.5855649215360208, - "grad_norm": 0.0007221494452096522, - "learning_rate": 0.00019999876057961581, - "loss": 46.0, - "step": 20738 - }, - { - "epoch": 1.5856413785194106, - "grad_norm": 0.0009620852069929242, - "learning_rate": 0.0001999987604600242, - "loss": 46.0, - "step": 20739 - }, - { - "epoch": 1.5857178355028, - "grad_norm": 0.0038043714594095945, - "learning_rate": 0.0001999987603404268, - "loss": 46.0, - "step": 20740 - }, - { - "epoch": 1.5857942924861899, - "grad_norm": 0.00460520526394248, - "learning_rate": 0.00019999876022082363, - "loss": 46.0, - "step": 20741 - }, - { - "epoch": 1.5858707494695796, - "grad_norm": 0.0009562601917423308, - "learning_rate": 0.00019999876010121467, - "loss": 46.0, - "step": 20742 - }, - { - "epoch": 1.5859472064529694, - "grad_norm": 0.0016995855839923024, - "learning_rate": 0.00019999875998159995, - "loss": 46.0, - "step": 20743 - }, - { - "epoch": 1.5860236634363591, - "grad_norm": 0.0008635168196633458, - "learning_rate": 0.00019999875986197947, - "loss": 46.0, - "step": 20744 - }, - { - "epoch": 1.586100120419749, - "grad_norm": 0.0006289152661338449, - "learning_rate": 0.0001999987597423532, - "loss": 46.0, - "step": 20745 - }, - { - "epoch": 1.5861765774031387, - "grad_norm": 0.04407087340950966, - "learning_rate": 0.00019999875962272118, - "loss": 46.0, - "step": 20746 - }, - { - "epoch": 1.5862530343865284, - "grad_norm": 0.0005366257973946631, - "learning_rate": 0.0001999987595030834, - "loss": 46.0, - "step": 20747 - }, - { - "epoch": 1.586329491369918, - "grad_norm": 0.0011152320075780153, - "learning_rate": 0.00019999875938343984, - "loss": 46.0, - "step": 20748 - }, - { - "epoch": 1.5864059483533077, - "grad_norm": 0.0016431775875389576, - "learning_rate": 0.0001999987592637905, - "loss": 46.0, - "step": 20749 - }, - { - "epoch": 1.5864824053366975, - "grad_norm": 0.008029848337173462, - "learning_rate": 0.00019999875914413542, - "loss": 46.0, - "step": 20750 - }, - { - "epoch": 1.586558862320087, - "grad_norm": 0.0025705730076879263, - "learning_rate": 0.00019999875902447454, - "loss": 46.0, - "step": 20751 - }, - { - "epoch": 1.5866353193034768, - "grad_norm": 0.004085468128323555, - "learning_rate": 0.0001999987589048079, - "loss": 46.0, - "step": 20752 - }, - { - "epoch": 1.5867117762868665, - "grad_norm": 0.0039822980761528015, - "learning_rate": 0.00019999875878513548, - "loss": 46.0, - "step": 20753 - }, - { - "epoch": 1.5867882332702563, - "grad_norm": 0.0006404825253412127, - "learning_rate": 0.00019999875866545728, - "loss": 46.0, - "step": 20754 - }, - { - "epoch": 1.586864690253646, - "grad_norm": 0.0008114344673231244, - "learning_rate": 0.00019999875854577337, - "loss": 46.0, - "step": 20755 - }, - { - "epoch": 1.5869411472370358, - "grad_norm": 0.0010592964245006442, - "learning_rate": 0.00019999875842608366, - "loss": 46.0, - "step": 20756 - }, - { - "epoch": 1.5870176042204256, - "grad_norm": 0.002433561021462083, - "learning_rate": 0.00019999875830638817, - "loss": 46.0, - "step": 20757 - }, - { - "epoch": 1.5870940612038154, - "grad_norm": 0.0029834541492164135, - "learning_rate": 0.0001999987581866869, - "loss": 46.0, - "step": 20758 - }, - { - "epoch": 1.587170518187205, - "grad_norm": 0.002690690103918314, - "learning_rate": 0.0001999987580669799, - "loss": 46.0, - "step": 20759 - }, - { - "epoch": 1.5872469751705947, - "grad_norm": 0.0016464430373162031, - "learning_rate": 0.00019999875794726707, - "loss": 46.0, - "step": 20760 - }, - { - "epoch": 1.5873234321539844, - "grad_norm": 0.0013718388509005308, - "learning_rate": 0.00019999875782754852, - "loss": 46.0, - "step": 20761 - }, - { - "epoch": 1.587399889137374, - "grad_norm": 0.0010634150821715593, - "learning_rate": 0.00019999875770782422, - "loss": 46.0, - "step": 20762 - }, - { - "epoch": 1.5874763461207637, - "grad_norm": 0.0021020257845520973, - "learning_rate": 0.0001999987575880941, - "loss": 46.0, - "step": 20763 - }, - { - "epoch": 1.5875528031041535, - "grad_norm": 0.0013116709887981415, - "learning_rate": 0.00019999875746835822, - "loss": 46.0, - "step": 20764 - }, - { - "epoch": 1.5876292600875432, - "grad_norm": 0.0010398468002676964, - "learning_rate": 0.00019999875734861658, - "loss": 46.0, - "step": 20765 - }, - { - "epoch": 1.587705717070933, - "grad_norm": 0.002630737377330661, - "learning_rate": 0.00019999875722886916, - "loss": 46.0, - "step": 20766 - }, - { - "epoch": 1.5877821740543228, - "grad_norm": 0.0012553774286061525, - "learning_rate": 0.000199998757109116, - "loss": 46.0, - "step": 20767 - }, - { - "epoch": 1.5878586310377125, - "grad_norm": 0.0010421781335026026, - "learning_rate": 0.00019999875698935705, - "loss": 46.0, - "step": 20768 - }, - { - "epoch": 1.5879350880211023, - "grad_norm": 0.0014124239096418023, - "learning_rate": 0.00019999875686959234, - "loss": 46.0, - "step": 20769 - }, - { - "epoch": 1.5880115450044918, - "grad_norm": 0.002958934521302581, - "learning_rate": 0.00019999875674982183, - "loss": 46.0, - "step": 20770 - }, - { - "epoch": 1.5880880019878816, - "grad_norm": 0.0010132124880328774, - "learning_rate": 0.0001999987566300456, - "loss": 46.0, - "step": 20771 - }, - { - "epoch": 1.5881644589712713, - "grad_norm": 0.006387178786098957, - "learning_rate": 0.00019999875651026356, - "loss": 46.0, - "step": 20772 - }, - { - "epoch": 1.5882409159546609, - "grad_norm": 0.0022145016118884087, - "learning_rate": 0.00019999875639047576, - "loss": 46.0, - "step": 20773 - }, - { - "epoch": 1.5883173729380506, - "grad_norm": 0.0007645189180038869, - "learning_rate": 0.0001999987562706822, - "loss": 46.0, - "step": 20774 - }, - { - "epoch": 1.5883938299214404, - "grad_norm": 0.0008706299122422934, - "learning_rate": 0.00019999875615088285, - "loss": 46.0, - "step": 20775 - }, - { - "epoch": 1.5884702869048302, - "grad_norm": 0.0010577547363936901, - "learning_rate": 0.00019999875603107775, - "loss": 46.0, - "step": 20776 - }, - { - "epoch": 1.58854674388822, - "grad_norm": 0.0009517275029793382, - "learning_rate": 0.00019999875591126688, - "loss": 46.0, - "step": 20777 - }, - { - "epoch": 1.5886232008716097, - "grad_norm": 0.01015094667673111, - "learning_rate": 0.00019999875579145024, - "loss": 46.0, - "step": 20778 - }, - { - "epoch": 1.5886996578549994, - "grad_norm": 0.0011491603218019009, - "learning_rate": 0.00019999875567162784, - "loss": 46.0, - "step": 20779 - }, - { - "epoch": 1.5887761148383892, - "grad_norm": 0.0012401501880958676, - "learning_rate": 0.00019999875555179965, - "loss": 46.0, - "step": 20780 - }, - { - "epoch": 1.5888525718217787, - "grad_norm": 0.0016608699224889278, - "learning_rate": 0.00019999875543196569, - "loss": 46.0, - "step": 20781 - }, - { - "epoch": 1.5889290288051685, - "grad_norm": 0.022490549832582474, - "learning_rate": 0.000199998755312126, - "loss": 46.0, - "step": 20782 - }, - { - "epoch": 1.589005485788558, - "grad_norm": 0.002403588267043233, - "learning_rate": 0.0001999987551922805, - "loss": 46.0, - "step": 20783 - }, - { - "epoch": 1.5890819427719478, - "grad_norm": 0.0007447859970852733, - "learning_rate": 0.00019999875507242926, - "loss": 46.0, - "step": 20784 - }, - { - "epoch": 1.5891583997553376, - "grad_norm": 0.0008410013979300857, - "learning_rate": 0.0001999987549525722, - "loss": 46.0, - "step": 20785 - }, - { - "epoch": 1.5892348567387273, - "grad_norm": 0.005045631900429726, - "learning_rate": 0.0001999987548327094, - "loss": 46.0, - "step": 20786 - }, - { - "epoch": 1.589311313722117, - "grad_norm": 0.00039256393210962415, - "learning_rate": 0.00019999875471284084, - "loss": 46.0, - "step": 20787 - }, - { - "epoch": 1.5893877707055069, - "grad_norm": 0.0008464088896289468, - "learning_rate": 0.0001999987545929665, - "loss": 46.0, - "step": 20788 - }, - { - "epoch": 1.5894642276888966, - "grad_norm": 0.0005065621226094663, - "learning_rate": 0.00019999875447308641, - "loss": 46.0, - "step": 20789 - }, - { - "epoch": 1.5895406846722864, - "grad_norm": 0.0009137816377915442, - "learning_rate": 0.00019999875435320052, - "loss": 46.0, - "step": 20790 - }, - { - "epoch": 1.5896171416556761, - "grad_norm": 0.0018472615629434586, - "learning_rate": 0.00019999875423330887, - "loss": 46.0, - "step": 20791 - }, - { - "epoch": 1.5896935986390657, - "grad_norm": 0.0076946658082306385, - "learning_rate": 0.00019999875411341148, - "loss": 46.0, - "step": 20792 - }, - { - "epoch": 1.5897700556224554, - "grad_norm": 0.001870285370387137, - "learning_rate": 0.00019999875399350829, - "loss": 46.0, - "step": 20793 - }, - { - "epoch": 1.589846512605845, - "grad_norm": 0.0007157549844123423, - "learning_rate": 0.00019999875387359932, - "loss": 46.0, - "step": 20794 - }, - { - "epoch": 1.5899229695892347, - "grad_norm": 0.0024230354465544224, - "learning_rate": 0.0001999987537536846, - "loss": 46.0, - "step": 20795 - }, - { - "epoch": 1.5899994265726245, - "grad_norm": 0.0018404213478788733, - "learning_rate": 0.0001999987536337641, - "loss": 46.0, - "step": 20796 - }, - { - "epoch": 1.5900758835560143, - "grad_norm": 0.0014037626096978784, - "learning_rate": 0.00019999875351383787, - "loss": 46.0, - "step": 20797 - }, - { - "epoch": 1.590152340539404, - "grad_norm": 0.0028544871602207422, - "learning_rate": 0.00019999875339390583, - "loss": 46.0, - "step": 20798 - }, - { - "epoch": 1.5902287975227938, - "grad_norm": 0.003408226650208235, - "learning_rate": 0.00019999875327396805, - "loss": 46.0, - "step": 20799 - }, - { - "epoch": 1.5903052545061835, - "grad_norm": 0.0007468569674529135, - "learning_rate": 0.00019999875315402445, - "loss": 46.0, - "step": 20800 - }, - { - "epoch": 1.5903817114895733, - "grad_norm": 0.002749278210103512, - "learning_rate": 0.00019999875303407512, - "loss": 46.0, - "step": 20801 - }, - { - "epoch": 1.590458168472963, - "grad_norm": 0.0016064891824498773, - "learning_rate": 0.00019999875291412003, - "loss": 46.0, - "step": 20802 - }, - { - "epoch": 1.5905346254563526, - "grad_norm": 0.0005981055437587202, - "learning_rate": 0.00019999875279415913, - "loss": 46.0, - "step": 20803 - }, - { - "epoch": 1.5906110824397424, - "grad_norm": 0.005029712803661823, - "learning_rate": 0.00019999875267419248, - "loss": 46.0, - "step": 20804 - }, - { - "epoch": 1.590687539423132, - "grad_norm": 0.00731795746833086, - "learning_rate": 0.00019999875255422006, - "loss": 46.0, - "step": 20805 - }, - { - "epoch": 1.5907639964065217, - "grad_norm": 0.0022763372398912907, - "learning_rate": 0.0001999987524342419, - "loss": 46.0, - "step": 20806 - }, - { - "epoch": 1.5908404533899114, - "grad_norm": 0.000999784329906106, - "learning_rate": 0.00019999875231425793, - "loss": 46.0, - "step": 20807 - }, - { - "epoch": 1.5909169103733012, - "grad_norm": 0.0031457243021577597, - "learning_rate": 0.0001999987521942682, - "loss": 46.0, - "step": 20808 - }, - { - "epoch": 1.590993367356691, - "grad_norm": 0.002259228378534317, - "learning_rate": 0.0001999987520742727, - "loss": 46.0, - "step": 20809 - }, - { - "epoch": 1.5910698243400807, - "grad_norm": 0.0005722450441680849, - "learning_rate": 0.00019999875195427148, - "loss": 46.0, - "step": 20810 - }, - { - "epoch": 1.5911462813234705, - "grad_norm": 0.0035994506906718016, - "learning_rate": 0.00019999875183426442, - "loss": 46.0, - "step": 20811 - }, - { - "epoch": 1.5912227383068602, - "grad_norm": 0.0035778535529971123, - "learning_rate": 0.00019999875171425161, - "loss": 46.0, - "step": 20812 - }, - { - "epoch": 1.5912991952902498, - "grad_norm": 0.0017123211873695254, - "learning_rate": 0.00019999875159423304, - "loss": 46.0, - "step": 20813 - }, - { - "epoch": 1.5913756522736395, - "grad_norm": 0.004178970120847225, - "learning_rate": 0.0001999987514742087, - "loss": 46.0, - "step": 20814 - }, - { - "epoch": 1.5914521092570293, - "grad_norm": 0.0010184430284425616, - "learning_rate": 0.0001999987513541786, - "loss": 46.0, - "step": 20815 - }, - { - "epoch": 1.5915285662404188, - "grad_norm": 0.0008125404710881412, - "learning_rate": 0.00019999875123414271, - "loss": 46.0, - "step": 20816 - }, - { - "epoch": 1.5916050232238086, - "grad_norm": 0.0008357223705388606, - "learning_rate": 0.00019999875111410107, - "loss": 46.0, - "step": 20817 - }, - { - "epoch": 1.5916814802071984, - "grad_norm": 0.003372203791514039, - "learning_rate": 0.00019999875099405365, - "loss": 46.0, - "step": 20818 - }, - { - "epoch": 1.5917579371905881, - "grad_norm": 0.007136847358196974, - "learning_rate": 0.00019999875087400046, - "loss": 46.0, - "step": 20819 - }, - { - "epoch": 1.5918343941739779, - "grad_norm": 0.0007031827699393034, - "learning_rate": 0.0001999987507539415, - "loss": 46.0, - "step": 20820 - }, - { - "epoch": 1.5919108511573676, - "grad_norm": 0.0008745537488721311, - "learning_rate": 0.00019999875063387678, - "loss": 46.0, - "step": 20821 - }, - { - "epoch": 1.5919873081407574, - "grad_norm": 0.0006434021051973104, - "learning_rate": 0.0001999987505138063, - "loss": 46.0, - "step": 20822 - }, - { - "epoch": 1.5920637651241472, - "grad_norm": 0.0007673674845136702, - "learning_rate": 0.00019999875039373004, - "loss": 46.0, - "step": 20823 - }, - { - "epoch": 1.5921402221075367, - "grad_norm": 0.0003213771269656718, - "learning_rate": 0.000199998750273648, - "loss": 46.0, - "step": 20824 - }, - { - "epoch": 1.5922166790909265, - "grad_norm": 0.0010954872705042362, - "learning_rate": 0.0001999987501535602, - "loss": 46.0, - "step": 20825 - }, - { - "epoch": 1.5922931360743162, - "grad_norm": 0.0029426219407469034, - "learning_rate": 0.00019999875003346663, - "loss": 46.0, - "step": 20826 - }, - { - "epoch": 1.5923695930577058, - "grad_norm": 0.0006779970717616379, - "learning_rate": 0.00019999874991336728, - "loss": 46.0, - "step": 20827 - }, - { - "epoch": 1.5924460500410955, - "grad_norm": 0.001310917316004634, - "learning_rate": 0.00019999874979326215, - "loss": 46.0, - "step": 20828 - }, - { - "epoch": 1.5925225070244853, - "grad_norm": 0.0008367177797481418, - "learning_rate": 0.00019999874967315128, - "loss": 46.0, - "step": 20829 - }, - { - "epoch": 1.592598964007875, - "grad_norm": 0.0020137790124863386, - "learning_rate": 0.0001999987495530346, - "loss": 46.0, - "step": 20830 - }, - { - "epoch": 1.5926754209912648, - "grad_norm": 0.002945945831015706, - "learning_rate": 0.00019999874943291222, - "loss": 46.0, - "step": 20831 - }, - { - "epoch": 1.5927518779746546, - "grad_norm": 0.0027137494180351496, - "learning_rate": 0.00019999874931278403, - "loss": 46.0, - "step": 20832 - }, - { - "epoch": 1.5928283349580443, - "grad_norm": 0.0009312950423918664, - "learning_rate": 0.00019999874919265003, - "loss": 46.0, - "step": 20833 - }, - { - "epoch": 1.592904791941434, - "grad_norm": 0.0016654873033985496, - "learning_rate": 0.00019999874907251032, - "loss": 46.0, - "step": 20834 - }, - { - "epoch": 1.5929812489248236, - "grad_norm": 0.0023353395517915487, - "learning_rate": 0.00019999874895236484, - "loss": 46.0, - "step": 20835 - }, - { - "epoch": 1.5930577059082134, - "grad_norm": 0.0016996051417663693, - "learning_rate": 0.00019999874883221355, - "loss": 46.0, - "step": 20836 - }, - { - "epoch": 1.5931341628916031, - "grad_norm": 0.0015450542559847236, - "learning_rate": 0.00019999874871205652, - "loss": 46.0, - "step": 20837 - }, - { - "epoch": 1.5932106198749927, - "grad_norm": 0.005874384194612503, - "learning_rate": 0.00019999874859189372, - "loss": 46.0, - "step": 20838 - }, - { - "epoch": 1.5932870768583824, - "grad_norm": 0.001090421574190259, - "learning_rate": 0.00019999874847172514, - "loss": 46.0, - "step": 20839 - }, - { - "epoch": 1.5933635338417722, - "grad_norm": 0.0009212449076585472, - "learning_rate": 0.00019999874835155082, - "loss": 46.0, - "step": 20840 - }, - { - "epoch": 1.593439990825162, - "grad_norm": 0.0010577606735751033, - "learning_rate": 0.0001999987482313707, - "loss": 46.0, - "step": 20841 - }, - { - "epoch": 1.5935164478085517, - "grad_norm": 0.0018657715991139412, - "learning_rate": 0.00019999874811118482, - "loss": 46.0, - "step": 20842 - }, - { - "epoch": 1.5935929047919415, - "grad_norm": 0.0012882158625870943, - "learning_rate": 0.00019999874799099315, - "loss": 46.0, - "step": 20843 - }, - { - "epoch": 1.5936693617753313, - "grad_norm": 0.002236689440906048, - "learning_rate": 0.00019999874787079573, - "loss": 46.0, - "step": 20844 - }, - { - "epoch": 1.593745818758721, - "grad_norm": 0.0011569999624043703, - "learning_rate": 0.00019999874775059254, - "loss": 46.0, - "step": 20845 - }, - { - "epoch": 1.5938222757421106, - "grad_norm": 0.0011731341946870089, - "learning_rate": 0.0001999987476303836, - "loss": 46.0, - "step": 20846 - }, - { - "epoch": 1.5938987327255003, - "grad_norm": 0.0016871416009962559, - "learning_rate": 0.00019999874751016887, - "loss": 46.0, - "step": 20847 - }, - { - "epoch": 1.59397518970889, - "grad_norm": 0.0012930063530802727, - "learning_rate": 0.00019999874738994838, - "loss": 46.0, - "step": 20848 - }, - { - "epoch": 1.5940516466922796, - "grad_norm": 0.0011039967648684978, - "learning_rate": 0.0001999987472697221, - "loss": 46.0, - "step": 20849 - }, - { - "epoch": 1.5941281036756694, - "grad_norm": 0.0024709177669137716, - "learning_rate": 0.00019999874714949004, - "loss": 46.0, - "step": 20850 - }, - { - "epoch": 1.5942045606590591, - "grad_norm": 0.00043582136277109385, - "learning_rate": 0.00019999874702925224, - "loss": 46.0, - "step": 20851 - }, - { - "epoch": 1.594281017642449, - "grad_norm": 0.0008094763616099954, - "learning_rate": 0.0001999987469090087, - "loss": 46.0, - "step": 20852 - }, - { - "epoch": 1.5943574746258387, - "grad_norm": 0.0012028083438053727, - "learning_rate": 0.0001999987467887593, - "loss": 46.0, - "step": 20853 - }, - { - "epoch": 1.5944339316092284, - "grad_norm": 0.0019079288467764854, - "learning_rate": 0.0001999987466685042, - "loss": 46.0, - "step": 20854 - }, - { - "epoch": 1.5945103885926182, - "grad_norm": 0.0005420685047283769, - "learning_rate": 0.00019999874654824331, - "loss": 46.0, - "step": 20855 - }, - { - "epoch": 1.594586845576008, - "grad_norm": 0.0006356465164572, - "learning_rate": 0.00019999874642797667, - "loss": 46.0, - "step": 20856 - }, - { - "epoch": 1.5946633025593975, - "grad_norm": 0.002818293171003461, - "learning_rate": 0.00019999874630770425, - "loss": 46.0, - "step": 20857 - }, - { - "epoch": 1.5947397595427872, - "grad_norm": 0.010402724146842957, - "learning_rate": 0.00019999874618742606, - "loss": 46.0, - "step": 20858 - }, - { - "epoch": 1.594816216526177, - "grad_norm": 0.0012574056163430214, - "learning_rate": 0.00019999874606714207, - "loss": 46.0, - "step": 20859 - }, - { - "epoch": 1.5948926735095665, - "grad_norm": 0.0017806269461289048, - "learning_rate": 0.00019999874594685236, - "loss": 46.0, - "step": 20860 - }, - { - "epoch": 1.5949691304929563, - "grad_norm": 0.0008180948789231479, - "learning_rate": 0.00019999874582655688, - "loss": 46.0, - "step": 20861 - }, - { - "epoch": 1.595045587476346, - "grad_norm": 0.0005747161339968443, - "learning_rate": 0.0001999987457062556, - "loss": 46.0, - "step": 20862 - }, - { - "epoch": 1.5951220444597358, - "grad_norm": 0.0013234395300969481, - "learning_rate": 0.00019999874558594854, - "loss": 46.0, - "step": 20863 - }, - { - "epoch": 1.5951985014431256, - "grad_norm": 0.0012940064771100879, - "learning_rate": 0.00019999874546563576, - "loss": 46.0, - "step": 20864 - }, - { - "epoch": 1.5952749584265153, - "grad_norm": 0.00130782974883914, - "learning_rate": 0.00019999874534531715, - "loss": 46.0, - "step": 20865 - }, - { - "epoch": 1.595351415409905, - "grad_norm": 0.0016017970629036427, - "learning_rate": 0.00019999874522499283, - "loss": 46.0, - "step": 20866 - }, - { - "epoch": 1.5954278723932949, - "grad_norm": 0.0006534263375215232, - "learning_rate": 0.00019999874510466268, - "loss": 46.0, - "step": 20867 - }, - { - "epoch": 1.5955043293766844, - "grad_norm": 0.0019020965555682778, - "learning_rate": 0.0001999987449843268, - "loss": 46.0, - "step": 20868 - }, - { - "epoch": 1.5955807863600742, - "grad_norm": 0.0007338155410252512, - "learning_rate": 0.00019999874486398517, - "loss": 46.0, - "step": 20869 - }, - { - "epoch": 1.595657243343464, - "grad_norm": 0.0007633164641447365, - "learning_rate": 0.00019999874474363772, - "loss": 46.0, - "step": 20870 - }, - { - "epoch": 1.5957337003268535, - "grad_norm": 0.0007111151935532689, - "learning_rate": 0.00019999874462328453, - "loss": 46.0, - "step": 20871 - }, - { - "epoch": 1.5958101573102432, - "grad_norm": 0.0005665522185154259, - "learning_rate": 0.00019999874450292557, - "loss": 46.0, - "step": 20872 - }, - { - "epoch": 1.595886614293633, - "grad_norm": 0.00092048489023, - "learning_rate": 0.00019999874438256083, - "loss": 46.0, - "step": 20873 - }, - { - "epoch": 1.5959630712770227, - "grad_norm": 0.0006376648088917136, - "learning_rate": 0.00019999874426219032, - "loss": 46.0, - "step": 20874 - }, - { - "epoch": 1.5960395282604125, - "grad_norm": 0.0003015449910890311, - "learning_rate": 0.00019999874414181406, - "loss": 46.0, - "step": 20875 - }, - { - "epoch": 1.5961159852438023, - "grad_norm": 0.002682760590687394, - "learning_rate": 0.000199998744021432, - "loss": 46.0, - "step": 20876 - }, - { - "epoch": 1.596192442227192, - "grad_norm": 0.0023874102625995874, - "learning_rate": 0.0001999987439010442, - "loss": 46.0, - "step": 20877 - }, - { - "epoch": 1.5962688992105818, - "grad_norm": 0.0016277419636026025, - "learning_rate": 0.00019999874378065063, - "loss": 46.0, - "step": 20878 - }, - { - "epoch": 1.5963453561939713, - "grad_norm": 0.0009841523133218288, - "learning_rate": 0.00019999874366025128, - "loss": 46.0, - "step": 20879 - }, - { - "epoch": 1.596421813177361, - "grad_norm": 0.000902264378964901, - "learning_rate": 0.00019999874353984615, - "loss": 46.0, - "step": 20880 - }, - { - "epoch": 1.5964982701607509, - "grad_norm": 0.006537141744047403, - "learning_rate": 0.00019999874341943526, - "loss": 46.0, - "step": 20881 - }, - { - "epoch": 1.5965747271441404, - "grad_norm": 0.0009693100000731647, - "learning_rate": 0.00019999874329901859, - "loss": 46.0, - "step": 20882 - }, - { - "epoch": 1.5966511841275302, - "grad_norm": 0.0008807822014205158, - "learning_rate": 0.00019999874317859617, - "loss": 46.0, - "step": 20883 - }, - { - "epoch": 1.59672764111092, - "grad_norm": 0.002462441800162196, - "learning_rate": 0.00019999874305816798, - "loss": 46.0, - "step": 20884 - }, - { - "epoch": 1.5968040980943097, - "grad_norm": 0.0009589497931301594, - "learning_rate": 0.00019999874293773402, - "loss": 46.0, - "step": 20885 - }, - { - "epoch": 1.5968805550776994, - "grad_norm": 0.0006952695548534393, - "learning_rate": 0.0001999987428172943, - "loss": 46.0, - "step": 20886 - }, - { - "epoch": 1.5969570120610892, - "grad_norm": 0.002531966660171747, - "learning_rate": 0.00019999874269684877, - "loss": 46.0, - "step": 20887 - }, - { - "epoch": 1.597033469044479, - "grad_norm": 0.0014390654396265745, - "learning_rate": 0.0001999987425763975, - "loss": 46.0, - "step": 20888 - }, - { - "epoch": 1.5971099260278687, - "grad_norm": 0.000946647603996098, - "learning_rate": 0.00019999874245594046, - "loss": 46.0, - "step": 20889 - }, - { - "epoch": 1.5971863830112583, - "grad_norm": 0.0008359703933820128, - "learning_rate": 0.00019999874233547765, - "loss": 46.0, - "step": 20890 - }, - { - "epoch": 1.597262839994648, - "grad_norm": 0.001937451888807118, - "learning_rate": 0.00019999874221500905, - "loss": 46.0, - "step": 20891 - }, - { - "epoch": 1.5973392969780378, - "grad_norm": 0.0006890198565088212, - "learning_rate": 0.0001999987420945347, - "loss": 46.0, - "step": 20892 - }, - { - "epoch": 1.5974157539614273, - "grad_norm": 0.000873405544552952, - "learning_rate": 0.0001999987419740546, - "loss": 46.0, - "step": 20893 - }, - { - "epoch": 1.597492210944817, - "grad_norm": 0.0019404280465096235, - "learning_rate": 0.0001999987418535687, - "loss": 46.0, - "step": 20894 - }, - { - "epoch": 1.5975686679282068, - "grad_norm": 0.0011197124840691686, - "learning_rate": 0.00019999874173307704, - "loss": 46.0, - "step": 20895 - }, - { - "epoch": 1.5976451249115966, - "grad_norm": 0.002604343928396702, - "learning_rate": 0.00019999874161257962, - "loss": 46.0, - "step": 20896 - }, - { - "epoch": 1.5977215818949864, - "grad_norm": 0.0009899816941469908, - "learning_rate": 0.0001999987414920764, - "loss": 46.0, - "step": 20897 - }, - { - "epoch": 1.5977980388783761, - "grad_norm": 0.004315212368965149, - "learning_rate": 0.00019999874137156744, - "loss": 46.0, - "step": 20898 - }, - { - "epoch": 1.5978744958617659, - "grad_norm": 0.004477735608816147, - "learning_rate": 0.0001999987412510527, - "loss": 46.0, - "step": 20899 - }, - { - "epoch": 1.5979509528451556, - "grad_norm": 0.0009104431373998523, - "learning_rate": 0.0001999987411305322, - "loss": 46.0, - "step": 20900 - }, - { - "epoch": 1.5980274098285452, - "grad_norm": 0.006579105276614428, - "learning_rate": 0.00019999874101000594, - "loss": 46.0, - "step": 20901 - }, - { - "epoch": 1.598103866811935, - "grad_norm": 0.00044072544551454484, - "learning_rate": 0.00019999874088947385, - "loss": 46.0, - "step": 20902 - }, - { - "epoch": 1.5981803237953247, - "grad_norm": 0.0009522698237560689, - "learning_rate": 0.00019999874076893605, - "loss": 46.0, - "step": 20903 - }, - { - "epoch": 1.5982567807787142, - "grad_norm": 0.0013952743029221892, - "learning_rate": 0.00019999874064839248, - "loss": 46.0, - "step": 20904 - }, - { - "epoch": 1.598333237762104, - "grad_norm": 0.006217521149665117, - "learning_rate": 0.00019999874052784313, - "loss": 46.0, - "step": 20905 - }, - { - "epoch": 1.5984096947454938, - "grad_norm": 0.0009118167217820883, - "learning_rate": 0.000199998740407288, - "loss": 46.0, - "step": 20906 - }, - { - "epoch": 1.5984861517288835, - "grad_norm": 0.0006965245120227337, - "learning_rate": 0.0001999987402867271, - "loss": 46.0, - "step": 20907 - }, - { - "epoch": 1.5985626087122733, - "grad_norm": 0.004919230006635189, - "learning_rate": 0.00019999874016616044, - "loss": 46.0, - "step": 20908 - }, - { - "epoch": 1.598639065695663, - "grad_norm": 0.0011556344106793404, - "learning_rate": 0.000199998740045588, - "loss": 46.0, - "step": 20909 - }, - { - "epoch": 1.5987155226790528, - "grad_norm": 0.0003047606151085347, - "learning_rate": 0.0001999987399250098, - "loss": 46.0, - "step": 20910 - }, - { - "epoch": 1.5987919796624426, - "grad_norm": 0.0007800646126270294, - "learning_rate": 0.00019999873980442584, - "loss": 46.0, - "step": 20911 - }, - { - "epoch": 1.5988684366458321, - "grad_norm": 0.0009797647362574935, - "learning_rate": 0.00019999873968383608, - "loss": 46.0, - "step": 20912 - }, - { - "epoch": 1.5989448936292219, - "grad_norm": 0.002188715385273099, - "learning_rate": 0.0001999987395632406, - "loss": 46.0, - "step": 20913 - }, - { - "epoch": 1.5990213506126114, - "grad_norm": 0.0010862976778298616, - "learning_rate": 0.0001999987394426393, - "loss": 46.0, - "step": 20914 - }, - { - "epoch": 1.5990978075960012, - "grad_norm": 0.002701367950066924, - "learning_rate": 0.00019999873932203226, - "loss": 46.0, - "step": 20915 - }, - { - "epoch": 1.599174264579391, - "grad_norm": 0.0008128510671667755, - "learning_rate": 0.00019999873920141945, - "loss": 46.0, - "step": 20916 - }, - { - "epoch": 1.5992507215627807, - "grad_norm": 0.0008816164336167276, - "learning_rate": 0.00019999873908080088, - "loss": 46.0, - "step": 20917 - }, - { - "epoch": 1.5993271785461705, - "grad_norm": 0.0026887725107371807, - "learning_rate": 0.0001999987389601765, - "loss": 46.0, - "step": 20918 - }, - { - "epoch": 1.5994036355295602, - "grad_norm": 0.003197918413206935, - "learning_rate": 0.00019999873883954638, - "loss": 46.0, - "step": 20919 - }, - { - "epoch": 1.59948009251295, - "grad_norm": 0.0008864808478392661, - "learning_rate": 0.00019999873871891048, - "loss": 46.0, - "step": 20920 - }, - { - "epoch": 1.5995565494963397, - "grad_norm": 0.0007778560393489897, - "learning_rate": 0.00019999873859826881, - "loss": 46.0, - "step": 20921 - }, - { - "epoch": 1.5996330064797295, - "grad_norm": 0.0011391224106773734, - "learning_rate": 0.00019999873847762137, - "loss": 46.0, - "step": 20922 - }, - { - "epoch": 1.599709463463119, - "grad_norm": 0.0007007248350419104, - "learning_rate": 0.00019999873835696818, - "loss": 46.0, - "step": 20923 - }, - { - "epoch": 1.5997859204465088, - "grad_norm": 0.0010847756639122963, - "learning_rate": 0.00019999873823630922, - "loss": 46.0, - "step": 20924 - }, - { - "epoch": 1.5998623774298983, - "grad_norm": 0.0015344604616984725, - "learning_rate": 0.00019999873811564446, - "loss": 46.0, - "step": 20925 - }, - { - "epoch": 1.599938834413288, - "grad_norm": 0.0028038558084517717, - "learning_rate": 0.00019999873799497397, - "loss": 46.0, - "step": 20926 - }, - { - "epoch": 1.6000152913966779, - "grad_norm": 0.001368249300867319, - "learning_rate": 0.0001999987378742977, - "loss": 46.0, - "step": 20927 - }, - { - "epoch": 1.6000917483800676, - "grad_norm": 0.0017033469630405307, - "learning_rate": 0.0001999987377536156, - "loss": 46.0, - "step": 20928 - }, - { - "epoch": 1.6001682053634574, - "grad_norm": 0.0012224729871377349, - "learning_rate": 0.0001999987376329278, - "loss": 46.0, - "step": 20929 - }, - { - "epoch": 1.6002446623468471, - "grad_norm": 0.0011936567025259137, - "learning_rate": 0.0001999987375122342, - "loss": 46.0, - "step": 20930 - }, - { - "epoch": 1.600321119330237, - "grad_norm": 0.0052709211595356464, - "learning_rate": 0.00019999873739153486, - "loss": 46.0, - "step": 20931 - }, - { - "epoch": 1.6003975763136267, - "grad_norm": 0.0028194868937134743, - "learning_rate": 0.00019999873727082973, - "loss": 46.0, - "step": 20932 - }, - { - "epoch": 1.6004740332970164, - "grad_norm": 0.0010508736595511436, - "learning_rate": 0.00019999873715011886, - "loss": 46.0, - "step": 20933 - }, - { - "epoch": 1.600550490280406, - "grad_norm": 0.0015400223201140761, - "learning_rate": 0.00019999873702940217, - "loss": 46.0, - "step": 20934 - }, - { - "epoch": 1.6006269472637957, - "grad_norm": 0.0016773439710959792, - "learning_rate": 0.00019999873690867975, - "loss": 46.0, - "step": 20935 - }, - { - "epoch": 1.6007034042471853, - "grad_norm": 0.0011647220235317945, - "learning_rate": 0.00019999873678795154, - "loss": 46.0, - "step": 20936 - }, - { - "epoch": 1.600779861230575, - "grad_norm": 0.0006939746672287583, - "learning_rate": 0.00019999873666721755, - "loss": 46.0, - "step": 20937 - }, - { - "epoch": 1.6008563182139648, - "grad_norm": 0.0012253641616553068, - "learning_rate": 0.0001999987365464778, - "loss": 46.0, - "step": 20938 - }, - { - "epoch": 1.6009327751973546, - "grad_norm": 0.013783898204565048, - "learning_rate": 0.0001999987364257323, - "loss": 46.0, - "step": 20939 - }, - { - "epoch": 1.6010092321807443, - "grad_norm": 0.0006799410912208259, - "learning_rate": 0.00019999873630498102, - "loss": 46.0, - "step": 20940 - }, - { - "epoch": 1.601085689164134, - "grad_norm": 0.0005718697793781757, - "learning_rate": 0.00019999873618422397, - "loss": 46.0, - "step": 20941 - }, - { - "epoch": 1.6011621461475238, - "grad_norm": 0.0028528645634651184, - "learning_rate": 0.00019999873606346116, - "loss": 46.0, - "step": 20942 - }, - { - "epoch": 1.6012386031309136, - "grad_norm": 0.0030135787092149258, - "learning_rate": 0.00019999873594269256, - "loss": 46.0, - "step": 20943 - }, - { - "epoch": 1.6013150601143031, - "grad_norm": 0.0033525920007377863, - "learning_rate": 0.00019999873582191819, - "loss": 46.0, - "step": 20944 - }, - { - "epoch": 1.601391517097693, - "grad_norm": 0.0006587335374206305, - "learning_rate": 0.00019999873570113806, - "loss": 46.0, - "step": 20945 - }, - { - "epoch": 1.6014679740810827, - "grad_norm": 0.001272045192308724, - "learning_rate": 0.00019999873558035217, - "loss": 46.0, - "step": 20946 - }, - { - "epoch": 1.6015444310644722, - "grad_norm": 0.0009788313182070851, - "learning_rate": 0.0001999987354595605, - "loss": 46.0, - "step": 20947 - }, - { - "epoch": 1.601620888047862, - "grad_norm": 0.0021034665405750275, - "learning_rate": 0.00019999873533876308, - "loss": 46.0, - "step": 20948 - }, - { - "epoch": 1.6016973450312517, - "grad_norm": 0.0012123460182920098, - "learning_rate": 0.00019999873521795984, - "loss": 46.0, - "step": 20949 - }, - { - "epoch": 1.6017738020146415, - "grad_norm": 0.004423484206199646, - "learning_rate": 0.00019999873509715088, - "loss": 46.0, - "step": 20950 - }, - { - "epoch": 1.6018502589980312, - "grad_norm": 0.0004724509490188211, - "learning_rate": 0.00019999873497633614, - "loss": 46.0, - "step": 20951 - }, - { - "epoch": 1.601926715981421, - "grad_norm": 0.0017719129100441933, - "learning_rate": 0.00019999873485551564, - "loss": 46.0, - "step": 20952 - }, - { - "epoch": 1.6020031729648108, - "grad_norm": 0.0010541756637394428, - "learning_rate": 0.00019999873473468935, - "loss": 46.0, - "step": 20953 - }, - { - "epoch": 1.6020796299482005, - "grad_norm": 0.0007369638769887388, - "learning_rate": 0.0001999987346138573, - "loss": 46.0, - "step": 20954 - }, - { - "epoch": 1.60215608693159, - "grad_norm": 0.0012938340660184622, - "learning_rate": 0.00019999873449301947, - "loss": 46.0, - "step": 20955 - }, - { - "epoch": 1.6022325439149798, - "grad_norm": 0.0014175311662256718, - "learning_rate": 0.00019999873437217587, - "loss": 46.0, - "step": 20956 - }, - { - "epoch": 1.6023090008983696, - "grad_norm": 0.014053162187337875, - "learning_rate": 0.0001999987342513265, - "loss": 46.0, - "step": 20957 - }, - { - "epoch": 1.6023854578817591, - "grad_norm": 0.0017105075530707836, - "learning_rate": 0.0001999987341304714, - "loss": 46.0, - "step": 20958 - }, - { - "epoch": 1.6024619148651489, - "grad_norm": 0.0030711700674146414, - "learning_rate": 0.00019999873400961048, - "loss": 46.0, - "step": 20959 - }, - { - "epoch": 1.6025383718485386, - "grad_norm": 0.0005176718113943934, - "learning_rate": 0.0001999987338887438, - "loss": 46.0, - "step": 20960 - }, - { - "epoch": 1.6026148288319284, - "grad_norm": 0.0023311355616897345, - "learning_rate": 0.00019999873376787134, - "loss": 46.0, - "step": 20961 - }, - { - "epoch": 1.6026912858153182, - "grad_norm": 0.0007144382107071579, - "learning_rate": 0.00019999873364699315, - "loss": 46.0, - "step": 20962 - }, - { - "epoch": 1.602767742798708, - "grad_norm": 0.00022900894691701978, - "learning_rate": 0.0001999987335261092, - "loss": 46.0, - "step": 20963 - }, - { - "epoch": 1.6028441997820977, - "grad_norm": 0.0007813775446265936, - "learning_rate": 0.00019999873340521943, - "loss": 46.0, - "step": 20964 - }, - { - "epoch": 1.6029206567654875, - "grad_norm": 0.0035930254962295294, - "learning_rate": 0.00019999873328432392, - "loss": 46.0, - "step": 20965 - }, - { - "epoch": 1.602997113748877, - "grad_norm": 0.0005185171612538397, - "learning_rate": 0.00019999873316342264, - "loss": 46.0, - "step": 20966 - }, - { - "epoch": 1.6030735707322668, - "grad_norm": 0.0007214799406938255, - "learning_rate": 0.00019999873304251558, - "loss": 46.0, - "step": 20967 - }, - { - "epoch": 1.6031500277156565, - "grad_norm": 0.0005261466722004116, - "learning_rate": 0.00019999873292160273, - "loss": 46.0, - "step": 20968 - }, - { - "epoch": 1.603226484699046, - "grad_norm": 0.0008981827413663268, - "learning_rate": 0.00019999873280068415, - "loss": 46.0, - "step": 20969 - }, - { - "epoch": 1.6033029416824358, - "grad_norm": 0.002676150994375348, - "learning_rate": 0.0001999987326797598, - "loss": 46.0, - "step": 20970 - }, - { - "epoch": 1.6033793986658256, - "grad_norm": 0.0009556770091876388, - "learning_rate": 0.00019999873255882966, - "loss": 46.0, - "step": 20971 - }, - { - "epoch": 1.6034558556492153, - "grad_norm": 0.002996955532580614, - "learning_rate": 0.00019999873243789376, - "loss": 46.0, - "step": 20972 - }, - { - "epoch": 1.603532312632605, - "grad_norm": 0.0009080239688046277, - "learning_rate": 0.0001999987323169521, - "loss": 46.0, - "step": 20973 - }, - { - "epoch": 1.6036087696159949, - "grad_norm": 0.002595087280496955, - "learning_rate": 0.00019999873219600465, - "loss": 46.0, - "step": 20974 - }, - { - "epoch": 1.6036852265993846, - "grad_norm": 0.003918865229934454, - "learning_rate": 0.0001999987320750514, - "loss": 46.0, - "step": 20975 - }, - { - "epoch": 1.6037616835827744, - "grad_norm": 0.0011954326182603836, - "learning_rate": 0.00019999873195409245, - "loss": 46.0, - "step": 20976 - }, - { - "epoch": 1.603838140566164, - "grad_norm": 0.0012244610115885735, - "learning_rate": 0.0001999987318331277, - "loss": 46.0, - "step": 20977 - }, - { - "epoch": 1.6039145975495537, - "grad_norm": 0.00048343127127736807, - "learning_rate": 0.00019999873171215718, - "loss": 46.0, - "step": 20978 - }, - { - "epoch": 1.6039910545329434, - "grad_norm": 0.007794278673827648, - "learning_rate": 0.00019999873159118087, - "loss": 46.0, - "step": 20979 - }, - { - "epoch": 1.604067511516333, - "grad_norm": 0.0030588796362280846, - "learning_rate": 0.00019999873147019884, - "loss": 46.0, - "step": 20980 - }, - { - "epoch": 1.6041439684997227, - "grad_norm": 0.0007419758476316929, - "learning_rate": 0.00019999873134921101, - "loss": 46.0, - "step": 20981 - }, - { - "epoch": 1.6042204254831125, - "grad_norm": 0.0017668596701696515, - "learning_rate": 0.0001999987312282174, - "loss": 46.0, - "step": 20982 - }, - { - "epoch": 1.6042968824665023, - "grad_norm": 0.0019268132746219635, - "learning_rate": 0.00019999873110721804, - "loss": 46.0, - "step": 20983 - }, - { - "epoch": 1.604373339449892, - "grad_norm": 0.000637273711618036, - "learning_rate": 0.00019999873098621292, - "loss": 46.0, - "step": 20984 - }, - { - "epoch": 1.6044497964332818, - "grad_norm": 0.0017091365298256278, - "learning_rate": 0.000199998730865202, - "loss": 46.0, - "step": 20985 - }, - { - "epoch": 1.6045262534166715, - "grad_norm": 0.0008401759550906718, - "learning_rate": 0.00019999873074418533, - "loss": 46.0, - "step": 20986 - }, - { - "epoch": 1.6046027104000613, - "grad_norm": 0.0016504711238667369, - "learning_rate": 0.0001999987306231629, - "loss": 46.0, - "step": 20987 - }, - { - "epoch": 1.6046791673834508, - "grad_norm": 0.0012186329113319516, - "learning_rate": 0.0001999987305021347, - "loss": 46.0, - "step": 20988 - }, - { - "epoch": 1.6047556243668406, - "grad_norm": 0.0006863982416689396, - "learning_rate": 0.0001999987303811007, - "loss": 46.0, - "step": 20989 - }, - { - "epoch": 1.6048320813502304, - "grad_norm": 0.0009601805941201746, - "learning_rate": 0.00019999873026006095, - "loss": 46.0, - "step": 20990 - }, - { - "epoch": 1.60490853833362, - "grad_norm": 0.0014521460980176926, - "learning_rate": 0.00019999873013901542, - "loss": 46.0, - "step": 20991 - }, - { - "epoch": 1.6049849953170097, - "grad_norm": 0.002574555343016982, - "learning_rate": 0.0001999987300179641, - "loss": 46.0, - "step": 20992 - }, - { - "epoch": 1.6050614523003994, - "grad_norm": 0.000844193622469902, - "learning_rate": 0.00019999872989690708, - "loss": 46.0, - "step": 20993 - }, - { - "epoch": 1.6051379092837892, - "grad_norm": 0.004621168132871389, - "learning_rate": 0.00019999872977584425, - "loss": 46.0, - "step": 20994 - }, - { - "epoch": 1.605214366267179, - "grad_norm": 0.0035156519152224064, - "learning_rate": 0.00019999872965477565, - "loss": 46.0, - "step": 20995 - }, - { - "epoch": 1.6052908232505687, - "grad_norm": 0.0033848511520773172, - "learning_rate": 0.00019999872953370128, - "loss": 46.0, - "step": 20996 - }, - { - "epoch": 1.6053672802339585, - "grad_norm": 0.0004745798360090703, - "learning_rate": 0.00019999872941262116, - "loss": 46.0, - "step": 20997 - }, - { - "epoch": 1.6054437372173482, - "grad_norm": 0.0028230210300534964, - "learning_rate": 0.00019999872929153523, - "loss": 46.0, - "step": 20998 - }, - { - "epoch": 1.6055201942007378, - "grad_norm": 0.0004764936165884137, - "learning_rate": 0.00019999872917044357, - "loss": 46.0, - "step": 20999 - }, - { - "epoch": 1.6055966511841275, - "grad_norm": 0.00035234479582868516, - "learning_rate": 0.0001999987290493461, - "loss": 46.0, - "step": 21000 - }, - { - "epoch": 1.6056731081675173, - "grad_norm": 0.00030260623316280544, - "learning_rate": 0.00019999872892824288, - "loss": 46.0, - "step": 21001 - }, - { - "epoch": 1.6057495651509068, - "grad_norm": 0.0012927382485941052, - "learning_rate": 0.00019999872880713392, - "loss": 46.0, - "step": 21002 - }, - { - "epoch": 1.6058260221342966, - "grad_norm": 0.002184497192502022, - "learning_rate": 0.00019999872868601916, - "loss": 46.0, - "step": 21003 - }, - { - "epoch": 1.6059024791176864, - "grad_norm": 0.0009127986850216985, - "learning_rate": 0.00019999872856489863, - "loss": 46.0, - "step": 21004 - }, - { - "epoch": 1.6059789361010761, - "grad_norm": 0.0005975909298285842, - "learning_rate": 0.00019999872844377238, - "loss": 46.0, - "step": 21005 - }, - { - "epoch": 1.6060553930844659, - "grad_norm": 0.0009857243858277798, - "learning_rate": 0.0001999987283226403, - "loss": 46.0, - "step": 21006 - }, - { - "epoch": 1.6061318500678556, - "grad_norm": 0.0007776133716106415, - "learning_rate": 0.00019999872820150247, - "loss": 46.0, - "step": 21007 - }, - { - "epoch": 1.6062083070512454, - "grad_norm": 0.0022794189862906933, - "learning_rate": 0.00019999872808035884, - "loss": 46.0, - "step": 21008 - }, - { - "epoch": 1.6062847640346352, - "grad_norm": 0.001021401141770184, - "learning_rate": 0.0001999987279592095, - "loss": 46.0, - "step": 21009 - }, - { - "epoch": 1.6063612210180247, - "grad_norm": 0.00104917970020324, - "learning_rate": 0.00019999872783805434, - "loss": 46.0, - "step": 21010 - }, - { - "epoch": 1.6064376780014145, - "grad_norm": 0.0013767699711024761, - "learning_rate": 0.00019999872771689345, - "loss": 46.0, - "step": 21011 - }, - { - "epoch": 1.6065141349848042, - "grad_norm": 0.000705127080436796, - "learning_rate": 0.00019999872759572678, - "loss": 46.0, - "step": 21012 - }, - { - "epoch": 1.6065905919681938, - "grad_norm": 0.0005498520331457257, - "learning_rate": 0.00019999872747455432, - "loss": 46.0, - "step": 21013 - }, - { - "epoch": 1.6066670489515835, - "grad_norm": 0.00036141107557341456, - "learning_rate": 0.0001999987273533761, - "loss": 46.0, - "step": 21014 - }, - { - "epoch": 1.6067435059349733, - "grad_norm": 0.0013553696917369962, - "learning_rate": 0.00019999872723219211, - "loss": 46.0, - "step": 21015 - }, - { - "epoch": 1.606819962918363, - "grad_norm": 0.002571397228166461, - "learning_rate": 0.00019999872711100235, - "loss": 46.0, - "step": 21016 - }, - { - "epoch": 1.6068964199017528, - "grad_norm": 0.0021386868320405483, - "learning_rate": 0.00019999872698980685, - "loss": 46.0, - "step": 21017 - }, - { - "epoch": 1.6069728768851426, - "grad_norm": 0.0035288440994918346, - "learning_rate": 0.0001999987268686055, - "loss": 46.0, - "step": 21018 - }, - { - "epoch": 1.6070493338685323, - "grad_norm": 0.004163360223174095, - "learning_rate": 0.00019999872674739849, - "loss": 46.0, - "step": 21019 - }, - { - "epoch": 1.607125790851922, - "grad_norm": 0.002563976449891925, - "learning_rate": 0.00019999872662618563, - "loss": 46.0, - "step": 21020 - }, - { - "epoch": 1.6072022478353116, - "grad_norm": 0.00160535320173949, - "learning_rate": 0.00019999872650496703, - "loss": 46.0, - "step": 21021 - }, - { - "epoch": 1.6072787048187014, - "grad_norm": 0.0015865828609094024, - "learning_rate": 0.00019999872638374266, - "loss": 46.0, - "step": 21022 - }, - { - "epoch": 1.6073551618020911, - "grad_norm": 0.0014409509021788836, - "learning_rate": 0.00019999872626251254, - "loss": 46.0, - "step": 21023 - }, - { - "epoch": 1.6074316187854807, - "grad_norm": 0.0006730647874064744, - "learning_rate": 0.00019999872614127662, - "loss": 46.0, - "step": 21024 - }, - { - "epoch": 1.6075080757688704, - "grad_norm": 0.0020042809192091227, - "learning_rate": 0.00019999872602003492, - "loss": 46.0, - "step": 21025 - }, - { - "epoch": 1.6075845327522602, - "grad_norm": 0.0018369007157161832, - "learning_rate": 0.00019999872589878748, - "loss": 46.0, - "step": 21026 - }, - { - "epoch": 1.60766098973565, - "grad_norm": 0.0008264737552963197, - "learning_rate": 0.00019999872577753427, - "loss": 46.0, - "step": 21027 - }, - { - "epoch": 1.6077374467190397, - "grad_norm": 0.010046363808214664, - "learning_rate": 0.00019999872565627528, - "loss": 46.0, - "step": 21028 - }, - { - "epoch": 1.6078139037024295, - "grad_norm": 0.0007724351598881185, - "learning_rate": 0.00019999872553501052, - "loss": 46.0, - "step": 21029 - }, - { - "epoch": 1.6078903606858193, - "grad_norm": 0.0007708275225013494, - "learning_rate": 0.00019999872541374001, - "loss": 46.0, - "step": 21030 - }, - { - "epoch": 1.607966817669209, - "grad_norm": 0.003397758584469557, - "learning_rate": 0.0001999987252924637, - "loss": 46.0, - "step": 21031 - }, - { - "epoch": 1.6080432746525986, - "grad_norm": 0.0015222474467009306, - "learning_rate": 0.00019999872517118163, - "loss": 46.0, - "step": 21032 - }, - { - "epoch": 1.6081197316359883, - "grad_norm": 0.0029681578744202852, - "learning_rate": 0.0001999987250498938, - "loss": 46.0, - "step": 21033 - }, - { - "epoch": 1.608196188619378, - "grad_norm": 0.0019044431392103434, - "learning_rate": 0.0001999987249286002, - "loss": 46.0, - "step": 21034 - }, - { - "epoch": 1.6082726456027676, - "grad_norm": 0.002594572026282549, - "learning_rate": 0.00019999872480730085, - "loss": 46.0, - "step": 21035 - }, - { - "epoch": 1.6083491025861574, - "grad_norm": 0.003152968129143119, - "learning_rate": 0.0001999987246859957, - "loss": 46.0, - "step": 21036 - }, - { - "epoch": 1.6084255595695471, - "grad_norm": 0.001157531514763832, - "learning_rate": 0.00019999872456468479, - "loss": 46.0, - "step": 21037 - }, - { - "epoch": 1.608502016552937, - "grad_norm": 0.0016753802774474025, - "learning_rate": 0.0001999987244433681, - "loss": 46.0, - "step": 21038 - }, - { - "epoch": 1.6085784735363267, - "grad_norm": 0.0009453432867303491, - "learning_rate": 0.00019999872432204563, - "loss": 46.0, - "step": 21039 - }, - { - "epoch": 1.6086549305197164, - "grad_norm": 0.002466905862092972, - "learning_rate": 0.0001999987242007174, - "loss": 46.0, - "step": 21040 - }, - { - "epoch": 1.6087313875031062, - "grad_norm": 0.000778735731728375, - "learning_rate": 0.00019999872407938343, - "loss": 46.0, - "step": 21041 - }, - { - "epoch": 1.608807844486496, - "grad_norm": 0.0022954270243644714, - "learning_rate": 0.00019999872395804367, - "loss": 46.0, - "step": 21042 - }, - { - "epoch": 1.6088843014698855, - "grad_norm": 0.0006725925486534834, - "learning_rate": 0.00019999872383669813, - "loss": 46.0, - "step": 21043 - }, - { - "epoch": 1.6089607584532752, - "grad_norm": 0.004365310538560152, - "learning_rate": 0.00019999872371534685, - "loss": 46.0, - "step": 21044 - }, - { - "epoch": 1.6090372154366648, - "grad_norm": 0.0019086155807599425, - "learning_rate": 0.0001999987235939898, - "loss": 46.0, - "step": 21045 - }, - { - "epoch": 1.6091136724200545, - "grad_norm": 0.0006107357912696898, - "learning_rate": 0.00019999872347262695, - "loss": 46.0, - "step": 21046 - }, - { - "epoch": 1.6091901294034443, - "grad_norm": 0.00041029637213796377, - "learning_rate": 0.00019999872335125835, - "loss": 46.0, - "step": 21047 - }, - { - "epoch": 1.609266586386834, - "grad_norm": 0.0011750655248761177, - "learning_rate": 0.00019999872322988395, - "loss": 46.0, - "step": 21048 - }, - { - "epoch": 1.6093430433702238, - "grad_norm": 0.002391480142250657, - "learning_rate": 0.00019999872310850383, - "loss": 46.0, - "step": 21049 - }, - { - "epoch": 1.6094195003536136, - "grad_norm": 0.00286471308209002, - "learning_rate": 0.0001999987229871179, - "loss": 46.0, - "step": 21050 - }, - { - "epoch": 1.6094959573370033, - "grad_norm": 0.008997954428195953, - "learning_rate": 0.0001999987228657262, - "loss": 46.0, - "step": 21051 - }, - { - "epoch": 1.609572414320393, - "grad_norm": 0.0017753532156348228, - "learning_rate": 0.00019999872274432877, - "loss": 46.0, - "step": 21052 - }, - { - "epoch": 1.6096488713037829, - "grad_norm": 0.003001385135576129, - "learning_rate": 0.00019999872262292553, - "loss": 46.0, - "step": 21053 - }, - { - "epoch": 1.6097253282871724, - "grad_norm": 0.0018249507993459702, - "learning_rate": 0.00019999872250151655, - "loss": 46.0, - "step": 21054 - }, - { - "epoch": 1.6098017852705622, - "grad_norm": 0.0012306715361773968, - "learning_rate": 0.0001999987223801018, - "loss": 46.0, - "step": 21055 - }, - { - "epoch": 1.6098782422539517, - "grad_norm": 0.0009357483359053731, - "learning_rate": 0.00019999872225868125, - "loss": 46.0, - "step": 21056 - }, - { - "epoch": 1.6099546992373415, - "grad_norm": 0.002308095572516322, - "learning_rate": 0.00019999872213725497, - "loss": 46.0, - "step": 21057 - }, - { - "epoch": 1.6100311562207312, - "grad_norm": 0.0007877701427787542, - "learning_rate": 0.0001999987220158229, - "loss": 46.0, - "step": 21058 - }, - { - "epoch": 1.610107613204121, - "grad_norm": 0.0009794340003281832, - "learning_rate": 0.00019999872189438504, - "loss": 46.0, - "step": 21059 - }, - { - "epoch": 1.6101840701875108, - "grad_norm": 0.0013161645038053393, - "learning_rate": 0.00019999872177294147, - "loss": 46.0, - "step": 21060 - }, - { - "epoch": 1.6102605271709005, - "grad_norm": 0.0018214213196188211, - "learning_rate": 0.00019999872165149207, - "loss": 46.0, - "step": 21061 - }, - { - "epoch": 1.6103369841542903, - "grad_norm": 0.0011455663479864597, - "learning_rate": 0.00019999872153003692, - "loss": 46.0, - "step": 21062 - }, - { - "epoch": 1.61041344113768, - "grad_norm": 0.0019520290661603212, - "learning_rate": 0.00019999872140857598, - "loss": 46.0, - "step": 21063 - }, - { - "epoch": 1.6104898981210698, - "grad_norm": 0.0005044558201916516, - "learning_rate": 0.00019999872128710934, - "loss": 46.0, - "step": 21064 - }, - { - "epoch": 1.6105663551044593, - "grad_norm": 0.0005181654705666006, - "learning_rate": 0.00019999872116563684, - "loss": 46.0, - "step": 21065 - }, - { - "epoch": 1.610642812087849, - "grad_norm": 0.0013047849060967565, - "learning_rate": 0.00019999872104415863, - "loss": 46.0, - "step": 21066 - }, - { - "epoch": 1.6107192690712386, - "grad_norm": 0.0010704946471378207, - "learning_rate": 0.00019999872092267464, - "loss": 46.0, - "step": 21067 - }, - { - "epoch": 1.6107957260546284, - "grad_norm": 0.0009067317587323487, - "learning_rate": 0.00019999872080118486, - "loss": 46.0, - "step": 21068 - }, - { - "epoch": 1.6108721830380182, - "grad_norm": 0.001266265520825982, - "learning_rate": 0.00019999872067968935, - "loss": 46.0, - "step": 21069 - }, - { - "epoch": 1.610948640021408, - "grad_norm": 0.00047098510549403727, - "learning_rate": 0.00019999872055818805, - "loss": 46.0, - "step": 21070 - }, - { - "epoch": 1.6110250970047977, - "grad_norm": 0.000976894167251885, - "learning_rate": 0.00019999872043668097, - "loss": 46.0, - "step": 21071 - }, - { - "epoch": 1.6111015539881874, - "grad_norm": 0.0010359307052567601, - "learning_rate": 0.00019999872031516814, - "loss": 46.0, - "step": 21072 - }, - { - "epoch": 1.6111780109715772, - "grad_norm": 0.0003913237014785409, - "learning_rate": 0.00019999872019364951, - "loss": 46.0, - "step": 21073 - }, - { - "epoch": 1.611254467954967, - "grad_norm": 0.0019767077174037695, - "learning_rate": 0.00019999872007212514, - "loss": 46.0, - "step": 21074 - }, - { - "epoch": 1.6113309249383567, - "grad_norm": 0.0036628753878176212, - "learning_rate": 0.000199998719950595, - "loss": 46.0, - "step": 21075 - }, - { - "epoch": 1.6114073819217463, - "grad_norm": 0.0015950137749314308, - "learning_rate": 0.00019999871982905908, - "loss": 46.0, - "step": 21076 - }, - { - "epoch": 1.611483838905136, - "grad_norm": 0.0016126794507727027, - "learning_rate": 0.0001999987197075174, - "loss": 46.0, - "step": 21077 - }, - { - "epoch": 1.6115602958885256, - "grad_norm": 0.0012470823712646961, - "learning_rate": 0.00019999871958596992, - "loss": 46.0, - "step": 21078 - }, - { - "epoch": 1.6116367528719153, - "grad_norm": 0.0007921934593468904, - "learning_rate": 0.0001999987194644167, - "loss": 46.0, - "step": 21079 - }, - { - "epoch": 1.611713209855305, - "grad_norm": 0.0032635461539030075, - "learning_rate": 0.00019999871934285772, - "loss": 46.0, - "step": 21080 - }, - { - "epoch": 1.6117896668386948, - "grad_norm": 0.0022898688912391663, - "learning_rate": 0.00019999871922129293, - "loss": 46.0, - "step": 21081 - }, - { - "epoch": 1.6118661238220846, - "grad_norm": 0.000564819376450032, - "learning_rate": 0.0001999987190997224, - "loss": 46.0, - "step": 21082 - }, - { - "epoch": 1.6119425808054744, - "grad_norm": 0.004197360016405582, - "learning_rate": 0.0001999987189781461, - "loss": 46.0, - "step": 21083 - }, - { - "epoch": 1.6120190377888641, - "grad_norm": 0.0005882505211047828, - "learning_rate": 0.00019999871885656404, - "loss": 46.0, - "step": 21084 - }, - { - "epoch": 1.612095494772254, - "grad_norm": 0.0019654470961540937, - "learning_rate": 0.0001999987187349762, - "loss": 46.0, - "step": 21085 - }, - { - "epoch": 1.6121719517556434, - "grad_norm": 0.002143718535080552, - "learning_rate": 0.00019999871861338256, - "loss": 46.0, - "step": 21086 - }, - { - "epoch": 1.6122484087390332, - "grad_norm": 0.0017097517848014832, - "learning_rate": 0.0001999987184917832, - "loss": 46.0, - "step": 21087 - }, - { - "epoch": 1.612324865722423, - "grad_norm": 0.007222683634608984, - "learning_rate": 0.00019999871837017805, - "loss": 46.0, - "step": 21088 - }, - { - "epoch": 1.6124013227058125, - "grad_norm": 0.0007527847192250192, - "learning_rate": 0.00019999871824856713, - "loss": 46.0, - "step": 21089 - }, - { - "epoch": 1.6124777796892023, - "grad_norm": 0.0006051480304449797, - "learning_rate": 0.00019999871812695043, - "loss": 46.0, - "step": 21090 - }, - { - "epoch": 1.612554236672592, - "grad_norm": 0.0019459350733086467, - "learning_rate": 0.000199998718005328, - "loss": 46.0, - "step": 21091 - }, - { - "epoch": 1.6126306936559818, - "grad_norm": 0.006585201248526573, - "learning_rate": 0.00019999871788369976, - "loss": 46.0, - "step": 21092 - }, - { - "epoch": 1.6127071506393715, - "grad_norm": 0.0038099808152765036, - "learning_rate": 0.00019999871776206574, - "loss": 46.0, - "step": 21093 - }, - { - "epoch": 1.6127836076227613, - "grad_norm": 0.003795116674154997, - "learning_rate": 0.00019999871764042596, - "loss": 46.0, - "step": 21094 - }, - { - "epoch": 1.612860064606151, - "grad_norm": 0.0031860575545579195, - "learning_rate": 0.00019999871751878045, - "loss": 46.0, - "step": 21095 - }, - { - "epoch": 1.6129365215895408, - "grad_norm": 0.0007133377366699278, - "learning_rate": 0.00019999871739712915, - "loss": 46.0, - "step": 21096 - }, - { - "epoch": 1.6130129785729304, - "grad_norm": 0.0025748417247086763, - "learning_rate": 0.00019999871727547207, - "loss": 46.0, - "step": 21097 - }, - { - "epoch": 1.6130894355563201, - "grad_norm": 0.0005095878150314093, - "learning_rate": 0.00019999871715380922, - "loss": 46.0, - "step": 21098 - }, - { - "epoch": 1.6131658925397099, - "grad_norm": 0.00039984722388908267, - "learning_rate": 0.00019999871703214062, - "loss": 46.0, - "step": 21099 - }, - { - "epoch": 1.6132423495230994, - "grad_norm": 0.0013893496943637729, - "learning_rate": 0.00019999871691046625, - "loss": 46.0, - "step": 21100 - }, - { - "epoch": 1.6133188065064892, - "grad_norm": 0.0025907151866704226, - "learning_rate": 0.00019999871678878607, - "loss": 46.0, - "step": 21101 - }, - { - "epoch": 1.613395263489879, - "grad_norm": 0.0005438943626359105, - "learning_rate": 0.00019999871666710015, - "loss": 46.0, - "step": 21102 - }, - { - "epoch": 1.6134717204732687, - "grad_norm": 0.0031791573856025934, - "learning_rate": 0.00019999871654540846, - "loss": 46.0, - "step": 21103 - }, - { - "epoch": 1.6135481774566585, - "grad_norm": 0.0009783757850527763, - "learning_rate": 0.000199998716423711, - "loss": 46.0, - "step": 21104 - }, - { - "epoch": 1.6136246344400482, - "grad_norm": 0.0034628184512257576, - "learning_rate": 0.00019999871630200776, - "loss": 46.0, - "step": 21105 - }, - { - "epoch": 1.613701091423438, - "grad_norm": 0.0014042137190699577, - "learning_rate": 0.00019999871618029877, - "loss": 46.0, - "step": 21106 - }, - { - "epoch": 1.6137775484068277, - "grad_norm": 0.0008561168215237558, - "learning_rate": 0.00019999871605858401, - "loss": 46.0, - "step": 21107 - }, - { - "epoch": 1.6138540053902173, - "grad_norm": 0.0017930029425770044, - "learning_rate": 0.00019999871593686348, - "loss": 46.0, - "step": 21108 - }, - { - "epoch": 1.613930462373607, - "grad_norm": 0.011705402284860611, - "learning_rate": 0.00019999871581513715, - "loss": 46.0, - "step": 21109 - }, - { - "epoch": 1.6140069193569968, - "grad_norm": 0.0010347990319132805, - "learning_rate": 0.00019999871569340507, - "loss": 46.0, - "step": 21110 - }, - { - "epoch": 1.6140833763403863, - "grad_norm": 0.001082845265045762, - "learning_rate": 0.00019999871557166725, - "loss": 46.0, - "step": 21111 - }, - { - "epoch": 1.614159833323776, - "grad_norm": 0.0012829214101657271, - "learning_rate": 0.00019999871544992362, - "loss": 46.0, - "step": 21112 - }, - { - "epoch": 1.6142362903071659, - "grad_norm": 0.0012234133901074529, - "learning_rate": 0.00019999871532817425, - "loss": 46.0, - "step": 21113 - }, - { - "epoch": 1.6143127472905556, - "grad_norm": 0.000985841965302825, - "learning_rate": 0.00019999871520641905, - "loss": 46.0, - "step": 21114 - }, - { - "epoch": 1.6143892042739454, - "grad_norm": 0.0015574110439047217, - "learning_rate": 0.00019999871508465816, - "loss": 46.0, - "step": 21115 - }, - { - "epoch": 1.6144656612573351, - "grad_norm": 0.0017301106126978993, - "learning_rate": 0.00019999871496289147, - "loss": 46.0, - "step": 21116 - }, - { - "epoch": 1.614542118240725, - "grad_norm": 0.0008492065244354308, - "learning_rate": 0.000199998714841119, - "loss": 46.0, - "step": 21117 - }, - { - "epoch": 1.6146185752241147, - "grad_norm": 0.0010417180601507425, - "learning_rate": 0.00019999871471934076, - "loss": 46.0, - "step": 21118 - }, - { - "epoch": 1.6146950322075042, - "grad_norm": 0.0028450845275074244, - "learning_rate": 0.00019999871459755678, - "loss": 46.0, - "step": 21119 - }, - { - "epoch": 1.614771489190894, - "grad_norm": 0.0013455734588205814, - "learning_rate": 0.000199998714475767, - "loss": 46.0, - "step": 21120 - }, - { - "epoch": 1.6148479461742837, - "grad_norm": 0.0004979423829354346, - "learning_rate": 0.00019999871435397143, - "loss": 46.0, - "step": 21121 - }, - { - "epoch": 1.6149244031576733, - "grad_norm": 0.0006722980760969222, - "learning_rate": 0.00019999871423217016, - "loss": 46.0, - "step": 21122 - }, - { - "epoch": 1.615000860141063, - "grad_norm": 0.0014136051759123802, - "learning_rate": 0.00019999871411036305, - "loss": 46.0, - "step": 21123 - }, - { - "epoch": 1.6150773171244528, - "grad_norm": 0.0016931983409449458, - "learning_rate": 0.0001999987139885502, - "loss": 46.0, - "step": 21124 - }, - { - "epoch": 1.6151537741078426, - "grad_norm": 0.0006579659529961646, - "learning_rate": 0.0001999987138667316, - "loss": 46.0, - "step": 21125 - }, - { - "epoch": 1.6152302310912323, - "grad_norm": 0.0007514195749536157, - "learning_rate": 0.00019999871374490723, - "loss": 46.0, - "step": 21126 - }, - { - "epoch": 1.615306688074622, - "grad_norm": 0.0030831689946353436, - "learning_rate": 0.00019999871362307706, - "loss": 46.0, - "step": 21127 - }, - { - "epoch": 1.6153831450580118, - "grad_norm": 0.0005621747695840895, - "learning_rate": 0.00019999871350124114, - "loss": 46.0, - "step": 21128 - }, - { - "epoch": 1.6154596020414016, - "grad_norm": 0.0003873935202136636, - "learning_rate": 0.00019999871337939945, - "loss": 46.0, - "step": 21129 - }, - { - "epoch": 1.6155360590247911, - "grad_norm": 0.0018026749603450298, - "learning_rate": 0.00019999871325755198, - "loss": 46.0, - "step": 21130 - }, - { - "epoch": 1.615612516008181, - "grad_norm": 0.0023601707071065903, - "learning_rate": 0.00019999871313569875, - "loss": 46.0, - "step": 21131 - }, - { - "epoch": 1.6156889729915707, - "grad_norm": 0.0008154114475473762, - "learning_rate": 0.00019999871301383973, - "loss": 46.0, - "step": 21132 - }, - { - "epoch": 1.6157654299749602, - "grad_norm": 0.0023265972267836332, - "learning_rate": 0.00019999871289197495, - "loss": 46.0, - "step": 21133 - }, - { - "epoch": 1.61584188695835, - "grad_norm": 0.0010747710475698113, - "learning_rate": 0.0001999987127701044, - "loss": 46.0, - "step": 21134 - }, - { - "epoch": 1.6159183439417397, - "grad_norm": 0.018500735983252525, - "learning_rate": 0.00019999871264822811, - "loss": 46.0, - "step": 21135 - }, - { - "epoch": 1.6159948009251295, - "grad_norm": 0.0019123135134577751, - "learning_rate": 0.000199998712526346, - "loss": 46.0, - "step": 21136 - }, - { - "epoch": 1.6160712579085192, - "grad_norm": 0.0006049704388715327, - "learning_rate": 0.00019999871240445818, - "loss": 46.0, - "step": 21137 - }, - { - "epoch": 1.616147714891909, - "grad_norm": 0.002371120033785701, - "learning_rate": 0.00019999871228256456, - "loss": 46.0, - "step": 21138 - }, - { - "epoch": 1.6162241718752988, - "grad_norm": 0.001425352762453258, - "learning_rate": 0.00019999871216066516, - "loss": 46.0, - "step": 21139 - }, - { - "epoch": 1.6163006288586885, - "grad_norm": 0.0008812497835606337, - "learning_rate": 0.00019999871203876, - "loss": 46.0, - "step": 21140 - }, - { - "epoch": 1.616377085842078, - "grad_norm": 0.0015051300870254636, - "learning_rate": 0.00019999871191684907, - "loss": 46.0, - "step": 21141 - }, - { - "epoch": 1.6164535428254678, - "grad_norm": 0.009064078330993652, - "learning_rate": 0.00019999871179493238, - "loss": 46.0, - "step": 21142 - }, - { - "epoch": 1.6165299998088576, - "grad_norm": 0.0006886118790134788, - "learning_rate": 0.00019999871167300992, - "loss": 46.0, - "step": 21143 - }, - { - "epoch": 1.6166064567922471, - "grad_norm": 0.0003229297581128776, - "learning_rate": 0.0001999987115510817, - "loss": 46.0, - "step": 21144 - }, - { - "epoch": 1.6166829137756369, - "grad_norm": 0.003070235252380371, - "learning_rate": 0.00019999871142914767, - "loss": 46.0, - "step": 21145 - }, - { - "epoch": 1.6167593707590266, - "grad_norm": 0.000722051365301013, - "learning_rate": 0.00019999871130720788, - "loss": 46.0, - "step": 21146 - }, - { - "epoch": 1.6168358277424164, - "grad_norm": 0.0008982019498944283, - "learning_rate": 0.00019999871118526238, - "loss": 46.0, - "step": 21147 - }, - { - "epoch": 1.6169122847258062, - "grad_norm": 0.0038732513785362244, - "learning_rate": 0.00019999871106331102, - "loss": 46.0, - "step": 21148 - }, - { - "epoch": 1.616988741709196, - "grad_norm": 0.001466869842261076, - "learning_rate": 0.00019999871094135397, - "loss": 46.0, - "step": 21149 - }, - { - "epoch": 1.6170651986925857, - "grad_norm": 0.0037480571772903204, - "learning_rate": 0.00019999871081939112, - "loss": 46.0, - "step": 21150 - }, - { - "epoch": 1.6171416556759755, - "grad_norm": 0.002543717622756958, - "learning_rate": 0.0001999987106974225, - "loss": 46.0, - "step": 21151 - }, - { - "epoch": 1.617218112659365, - "grad_norm": 0.0022300435230135918, - "learning_rate": 0.0001999987105754481, - "loss": 46.0, - "step": 21152 - }, - { - "epoch": 1.6172945696427548, - "grad_norm": 0.0007123122923076153, - "learning_rate": 0.00019999871045346793, - "loss": 46.0, - "step": 21153 - }, - { - "epoch": 1.6173710266261445, - "grad_norm": 0.0015811588382348418, - "learning_rate": 0.000199998710331482, - "loss": 46.0, - "step": 21154 - }, - { - "epoch": 1.617447483609534, - "grad_norm": 0.001919200294651091, - "learning_rate": 0.0001999987102094903, - "loss": 46.0, - "step": 21155 - }, - { - "epoch": 1.6175239405929238, - "grad_norm": 0.0018038144335150719, - "learning_rate": 0.00019999871008749285, - "loss": 46.0, - "step": 21156 - }, - { - "epoch": 1.6176003975763136, - "grad_norm": 0.0008698327583260834, - "learning_rate": 0.0001999987099654896, - "loss": 46.0, - "step": 21157 - }, - { - "epoch": 1.6176768545597033, - "grad_norm": 0.0010338870342820883, - "learning_rate": 0.0001999987098434806, - "loss": 46.0, - "step": 21158 - }, - { - "epoch": 1.617753311543093, - "grad_norm": 0.00043855590047314763, - "learning_rate": 0.00019999870972146582, - "loss": 46.0, - "step": 21159 - }, - { - "epoch": 1.6178297685264829, - "grad_norm": 0.0024216121528297663, - "learning_rate": 0.00019999870959944526, - "loss": 46.0, - "step": 21160 - }, - { - "epoch": 1.6179062255098726, - "grad_norm": 0.0011566090397536755, - "learning_rate": 0.00019999870947741893, - "loss": 46.0, - "step": 21161 - }, - { - "epoch": 1.6179826824932624, - "grad_norm": 0.0005988717311993241, - "learning_rate": 0.00019999870935538688, - "loss": 46.0, - "step": 21162 - }, - { - "epoch": 1.618059139476652, - "grad_norm": 0.001366671989671886, - "learning_rate": 0.00019999870923334903, - "loss": 46.0, - "step": 21163 - }, - { - "epoch": 1.6181355964600417, - "grad_norm": 0.0007778751896694303, - "learning_rate": 0.0001999987091113054, - "loss": 46.0, - "step": 21164 - }, - { - "epoch": 1.6182120534434314, - "grad_norm": 0.004723059944808483, - "learning_rate": 0.000199998708989256, - "loss": 46.0, - "step": 21165 - }, - { - "epoch": 1.618288510426821, - "grad_norm": 0.001515114912763238, - "learning_rate": 0.00019999870886720084, - "loss": 46.0, - "step": 21166 - }, - { - "epoch": 1.6183649674102107, - "grad_norm": 0.0005236520082689822, - "learning_rate": 0.0001999987087451399, - "loss": 46.0, - "step": 21167 - }, - { - "epoch": 1.6184414243936005, - "grad_norm": 0.0003598094917833805, - "learning_rate": 0.0001999987086230732, - "loss": 46.0, - "step": 21168 - }, - { - "epoch": 1.6185178813769903, - "grad_norm": 0.0014772940194234252, - "learning_rate": 0.00019999870850100074, - "loss": 46.0, - "step": 21169 - }, - { - "epoch": 1.61859433836038, - "grad_norm": 0.0005886157159693539, - "learning_rate": 0.0001999987083789225, - "loss": 46.0, - "step": 21170 - }, - { - "epoch": 1.6186707953437698, - "grad_norm": 0.0009562487248331308, - "learning_rate": 0.0001999987082568385, - "loss": 46.0, - "step": 21171 - }, - { - "epoch": 1.6187472523271595, - "grad_norm": 0.0011610715882852674, - "learning_rate": 0.0001999987081347487, - "loss": 46.0, - "step": 21172 - }, - { - "epoch": 1.6188237093105493, - "grad_norm": 0.0016038455069065094, - "learning_rate": 0.00019999870801265318, - "loss": 46.0, - "step": 21173 - }, - { - "epoch": 1.6189001662939388, - "grad_norm": 0.001127162715420127, - "learning_rate": 0.00019999870789055185, - "loss": 46.0, - "step": 21174 - }, - { - "epoch": 1.6189766232773286, - "grad_norm": 0.0011866713175550103, - "learning_rate": 0.00019999870776844477, - "loss": 46.0, - "step": 21175 - }, - { - "epoch": 1.6190530802607181, - "grad_norm": 0.0033893210347741842, - "learning_rate": 0.0001999987076463319, - "loss": 46.0, - "step": 21176 - }, - { - "epoch": 1.619129537244108, - "grad_norm": 0.0013858703896403313, - "learning_rate": 0.00019999870752421327, - "loss": 46.0, - "step": 21177 - }, - { - "epoch": 1.6192059942274977, - "grad_norm": 0.0009029399370774627, - "learning_rate": 0.00019999870740208887, - "loss": 46.0, - "step": 21178 - }, - { - "epoch": 1.6192824512108874, - "grad_norm": 0.0006544990465044975, - "learning_rate": 0.00019999870727995873, - "loss": 46.0, - "step": 21179 - }, - { - "epoch": 1.6193589081942772, - "grad_norm": 0.0007578975055366755, - "learning_rate": 0.00019999870715782278, - "loss": 46.0, - "step": 21180 - }, - { - "epoch": 1.619435365177667, - "grad_norm": 0.0005427899304777384, - "learning_rate": 0.00019999870703568107, - "loss": 46.0, - "step": 21181 - }, - { - "epoch": 1.6195118221610567, - "grad_norm": 0.0015042303130030632, - "learning_rate": 0.00019999870691353363, - "loss": 46.0, - "step": 21182 - }, - { - "epoch": 1.6195882791444465, - "grad_norm": 0.0019409341039136052, - "learning_rate": 0.00019999870679138036, - "loss": 46.0, - "step": 21183 - }, - { - "epoch": 1.6196647361278362, - "grad_norm": 0.001799962599761784, - "learning_rate": 0.00019999870666922135, - "loss": 46.0, - "step": 21184 - }, - { - "epoch": 1.6197411931112258, - "grad_norm": 0.000986735220067203, - "learning_rate": 0.00019999870654705657, - "loss": 46.0, - "step": 21185 - }, - { - "epoch": 1.6198176500946155, - "grad_norm": 0.007555858232080936, - "learning_rate": 0.00019999870642488601, - "loss": 46.0, - "step": 21186 - }, - { - "epoch": 1.619894107078005, - "grad_norm": 0.004765776917338371, - "learning_rate": 0.00019999870630270968, - "loss": 46.0, - "step": 21187 - }, - { - "epoch": 1.6199705640613948, - "grad_norm": 0.0004920654464513063, - "learning_rate": 0.0001999987061805276, - "loss": 46.0, - "step": 21188 - }, - { - "epoch": 1.6200470210447846, - "grad_norm": 0.0005713792634196579, - "learning_rate": 0.00019999870605833976, - "loss": 46.0, - "step": 21189 - }, - { - "epoch": 1.6201234780281744, - "grad_norm": 0.0009968169033527374, - "learning_rate": 0.0001999987059361461, - "loss": 46.0, - "step": 21190 - }, - { - "epoch": 1.6201999350115641, - "grad_norm": 0.0007498997147195041, - "learning_rate": 0.00019999870581394674, - "loss": 46.0, - "step": 21191 - }, - { - "epoch": 1.6202763919949539, - "grad_norm": 0.000861411914229393, - "learning_rate": 0.00019999870569174157, - "loss": 46.0, - "step": 21192 - }, - { - "epoch": 1.6203528489783436, - "grad_norm": 0.0004635646764654666, - "learning_rate": 0.00019999870556953062, - "loss": 46.0, - "step": 21193 - }, - { - "epoch": 1.6204293059617334, - "grad_norm": 0.0007969801663421094, - "learning_rate": 0.00019999870544731393, - "loss": 46.0, - "step": 21194 - }, - { - "epoch": 1.6205057629451232, - "grad_norm": 0.001320715295150876, - "learning_rate": 0.00019999870532509144, - "loss": 46.0, - "step": 21195 - }, - { - "epoch": 1.6205822199285127, - "grad_norm": 0.001197740202769637, - "learning_rate": 0.0001999987052028632, - "loss": 46.0, - "step": 21196 - }, - { - "epoch": 1.6206586769119025, - "grad_norm": 0.001280443393625319, - "learning_rate": 0.0001999987050806292, - "loss": 46.0, - "step": 21197 - }, - { - "epoch": 1.620735133895292, - "grad_norm": 0.0006781668635085225, - "learning_rate": 0.0001999987049583894, - "loss": 46.0, - "step": 21198 - }, - { - "epoch": 1.6208115908786818, - "grad_norm": 0.006907659117132425, - "learning_rate": 0.00019999870483614386, - "loss": 46.0, - "step": 21199 - }, - { - "epoch": 1.6208880478620715, - "grad_norm": 0.001500763464719057, - "learning_rate": 0.00019999870471389252, - "loss": 46.0, - "step": 21200 - }, - { - "epoch": 1.6209645048454613, - "grad_norm": 0.003449493320658803, - "learning_rate": 0.00019999870459163542, - "loss": 46.0, - "step": 21201 - }, - { - "epoch": 1.621040961828851, - "grad_norm": 0.0005858216900378466, - "learning_rate": 0.0001999987044693726, - "loss": 46.0, - "step": 21202 - }, - { - "epoch": 1.6211174188122408, - "grad_norm": 0.003766548354178667, - "learning_rate": 0.00019999870434710398, - "loss": 46.0, - "step": 21203 - }, - { - "epoch": 1.6211938757956306, - "grad_norm": 0.0012942487373948097, - "learning_rate": 0.00019999870422482955, - "loss": 46.0, - "step": 21204 - }, - { - "epoch": 1.6212703327790203, - "grad_norm": 0.0017242156900465488, - "learning_rate": 0.00019999870410254938, - "loss": 46.0, - "step": 21205 - }, - { - "epoch": 1.62134678976241, - "grad_norm": 0.0007253267103806138, - "learning_rate": 0.00019999870398026347, - "loss": 46.0, - "step": 21206 - }, - { - "epoch": 1.6214232467457996, - "grad_norm": 0.0013813120312988758, - "learning_rate": 0.00019999870385797175, - "loss": 46.0, - "step": 21207 - }, - { - "epoch": 1.6214997037291894, - "grad_norm": 0.0028068660758435726, - "learning_rate": 0.00019999870373567426, - "loss": 46.0, - "step": 21208 - }, - { - "epoch": 1.621576160712579, - "grad_norm": 0.001091246958822012, - "learning_rate": 0.00019999870361337102, - "loss": 46.0, - "step": 21209 - }, - { - "epoch": 1.6216526176959687, - "grad_norm": 0.0030145449563860893, - "learning_rate": 0.000199998703491062, - "loss": 46.0, - "step": 21210 - }, - { - "epoch": 1.6217290746793585, - "grad_norm": 0.004196884576231241, - "learning_rate": 0.00019999870336874723, - "loss": 46.0, - "step": 21211 - }, - { - "epoch": 1.6218055316627482, - "grad_norm": 0.0019495775923132896, - "learning_rate": 0.0001999987032464267, - "loss": 46.0, - "step": 21212 - }, - { - "epoch": 1.621881988646138, - "grad_norm": 0.0009065186022780836, - "learning_rate": 0.00019999870312410037, - "loss": 46.0, - "step": 21213 - }, - { - "epoch": 1.6219584456295277, - "grad_norm": 0.006810011342167854, - "learning_rate": 0.00019999870300176827, - "loss": 46.0, - "step": 21214 - }, - { - "epoch": 1.6220349026129175, - "grad_norm": 0.0029011250007897615, - "learning_rate": 0.0001999987028794304, - "loss": 46.0, - "step": 21215 - }, - { - "epoch": 1.6221113595963073, - "grad_norm": 0.0010498181218281388, - "learning_rate": 0.00019999870275708677, - "loss": 46.0, - "step": 21216 - }, - { - "epoch": 1.6221878165796968, - "grad_norm": 0.0014272972475737333, - "learning_rate": 0.00019999870263473737, - "loss": 46.0, - "step": 21217 - }, - { - "epoch": 1.6222642735630866, - "grad_norm": 0.0006101102917455137, - "learning_rate": 0.0001999987025123822, - "loss": 46.0, - "step": 21218 - }, - { - "epoch": 1.6223407305464763, - "grad_norm": 0.007509036920964718, - "learning_rate": 0.00019999870239002126, - "loss": 46.0, - "step": 21219 - }, - { - "epoch": 1.6224171875298659, - "grad_norm": 0.001004537334665656, - "learning_rate": 0.00019999870226765457, - "loss": 46.0, - "step": 21220 - }, - { - "epoch": 1.6224936445132556, - "grad_norm": 0.005441855173557997, - "learning_rate": 0.00019999870214528208, - "loss": 46.0, - "step": 21221 - }, - { - "epoch": 1.6225701014966454, - "grad_norm": 0.0012853537918999791, - "learning_rate": 0.00019999870202290382, - "loss": 46.0, - "step": 21222 - }, - { - "epoch": 1.6226465584800351, - "grad_norm": 0.004511438775807619, - "learning_rate": 0.00019999870190051978, - "loss": 46.0, - "step": 21223 - }, - { - "epoch": 1.622723015463425, - "grad_norm": 0.0020690823439508677, - "learning_rate": 0.00019999870177813, - "loss": 46.0, - "step": 21224 - }, - { - "epoch": 1.6227994724468147, - "grad_norm": 0.0010296042310073972, - "learning_rate": 0.00019999870165573447, - "loss": 46.0, - "step": 21225 - }, - { - "epoch": 1.6228759294302044, - "grad_norm": 0.001650592079386115, - "learning_rate": 0.00019999870153333314, - "loss": 46.0, - "step": 21226 - }, - { - "epoch": 1.6229523864135942, - "grad_norm": 0.001578155905008316, - "learning_rate": 0.00019999870141092604, - "loss": 46.0, - "step": 21227 - }, - { - "epoch": 1.6230288433969837, - "grad_norm": 0.0035745149943977594, - "learning_rate": 0.00019999870128851316, - "loss": 46.0, - "step": 21228 - }, - { - "epoch": 1.6231053003803735, - "grad_norm": 0.0019117856863886118, - "learning_rate": 0.00019999870116609456, - "loss": 46.0, - "step": 21229 - }, - { - "epoch": 1.6231817573637632, - "grad_norm": 0.011858200654387474, - "learning_rate": 0.00019999870104367014, - "loss": 46.0, - "step": 21230 - }, - { - "epoch": 1.6232582143471528, - "grad_norm": 0.0009259468642994761, - "learning_rate": 0.00019999870092123997, - "loss": 46.0, - "step": 21231 - }, - { - "epoch": 1.6233346713305425, - "grad_norm": 0.002775243017822504, - "learning_rate": 0.00019999870079880403, - "loss": 46.0, - "step": 21232 - }, - { - "epoch": 1.6234111283139323, - "grad_norm": 0.004227730445563793, - "learning_rate": 0.00019999870067636231, - "loss": 46.0, - "step": 21233 - }, - { - "epoch": 1.623487585297322, - "grad_norm": 0.002377475146204233, - "learning_rate": 0.00019999870055391482, - "loss": 46.0, - "step": 21234 - }, - { - "epoch": 1.6235640422807118, - "grad_norm": 0.0006435289396904409, - "learning_rate": 0.0001999987004314616, - "loss": 46.0, - "step": 21235 - }, - { - "epoch": 1.6236404992641016, - "grad_norm": 0.008548357523977757, - "learning_rate": 0.00019999870030900255, - "loss": 46.0, - "step": 21236 - }, - { - "epoch": 1.6237169562474913, - "grad_norm": 0.0010516246547922492, - "learning_rate": 0.00019999870018653777, - "loss": 46.0, - "step": 21237 - }, - { - "epoch": 1.623793413230881, - "grad_norm": 0.0008543775766156614, - "learning_rate": 0.00019999870006406721, - "loss": 46.0, - "step": 21238 - }, - { - "epoch": 1.6238698702142706, - "grad_norm": 0.01090431772172451, - "learning_rate": 0.0001999986999415909, - "loss": 46.0, - "step": 21239 - }, - { - "epoch": 1.6239463271976604, - "grad_norm": 0.0021908299531787634, - "learning_rate": 0.00019999869981910878, - "loss": 46.0, - "step": 21240 - }, - { - "epoch": 1.6240227841810502, - "grad_norm": 0.0008095631492324173, - "learning_rate": 0.00019999869969662096, - "loss": 46.0, - "step": 21241 - }, - { - "epoch": 1.6240992411644397, - "grad_norm": 0.0007362104952335358, - "learning_rate": 0.00019999869957412728, - "loss": 46.0, - "step": 21242 - }, - { - "epoch": 1.6241756981478295, - "grad_norm": 0.010303488932549953, - "learning_rate": 0.00019999869945162792, - "loss": 46.0, - "step": 21243 - }, - { - "epoch": 1.6242521551312192, - "grad_norm": 0.0012417277321219444, - "learning_rate": 0.00019999869932912272, - "loss": 46.0, - "step": 21244 - }, - { - "epoch": 1.624328612114609, - "grad_norm": 0.0008282453054562211, - "learning_rate": 0.00019999869920661178, - "loss": 46.0, - "step": 21245 - }, - { - "epoch": 1.6244050690979988, - "grad_norm": 0.004407276399433613, - "learning_rate": 0.00019999869908409506, - "loss": 46.0, - "step": 21246 - }, - { - "epoch": 1.6244815260813885, - "grad_norm": 0.0006785824662074447, - "learning_rate": 0.00019999869896157257, - "loss": 46.0, - "step": 21247 - }, - { - "epoch": 1.6245579830647783, - "grad_norm": 0.0015478740679100156, - "learning_rate": 0.0001999986988390443, - "loss": 46.0, - "step": 21248 - }, - { - "epoch": 1.624634440048168, - "grad_norm": 0.0010299028363078833, - "learning_rate": 0.0001999986987165103, - "loss": 46.0, - "step": 21249 - }, - { - "epoch": 1.6247108970315576, - "grad_norm": 0.000814037979580462, - "learning_rate": 0.00019999869859397055, - "loss": 46.0, - "step": 21250 - }, - { - "epoch": 1.6247873540149473, - "grad_norm": 0.0003908071666955948, - "learning_rate": 0.00019999869847142496, - "loss": 46.0, - "step": 21251 - }, - { - "epoch": 1.624863810998337, - "grad_norm": 0.0013096685288473964, - "learning_rate": 0.00019999869834887363, - "loss": 46.0, - "step": 21252 - }, - { - "epoch": 1.6249402679817266, - "grad_norm": 0.0027929276693612337, - "learning_rate": 0.00019999869822631653, - "loss": 46.0, - "step": 21253 - }, - { - "epoch": 1.6250167249651164, - "grad_norm": 0.0011807382106781006, - "learning_rate": 0.00019999869810375366, - "loss": 46.0, - "step": 21254 - }, - { - "epoch": 1.6250931819485062, - "grad_norm": 0.000671778223477304, - "learning_rate": 0.00019999869798118503, - "loss": 46.0, - "step": 21255 - }, - { - "epoch": 1.625169638931896, - "grad_norm": 0.0007618090603500605, - "learning_rate": 0.00019999869785861064, - "loss": 46.0, - "step": 21256 - }, - { - "epoch": 1.6252460959152857, - "grad_norm": 0.0003669520956464112, - "learning_rate": 0.00019999869773603047, - "loss": 46.0, - "step": 21257 - }, - { - "epoch": 1.6253225528986754, - "grad_norm": 0.0021568224765360355, - "learning_rate": 0.00019999869761344453, - "loss": 46.0, - "step": 21258 - }, - { - "epoch": 1.6253990098820652, - "grad_norm": 0.0021585659123957157, - "learning_rate": 0.0001999986974908528, - "loss": 46.0, - "step": 21259 - }, - { - "epoch": 1.625475466865455, - "grad_norm": 0.0013506005052477121, - "learning_rate": 0.00019999869736825532, - "loss": 46.0, - "step": 21260 - }, - { - "epoch": 1.6255519238488445, - "grad_norm": 0.0021476324182003736, - "learning_rate": 0.00019999869724565206, - "loss": 46.0, - "step": 21261 - }, - { - "epoch": 1.6256283808322343, - "grad_norm": 0.0017908531008288264, - "learning_rate": 0.00019999869712304303, - "loss": 46.0, - "step": 21262 - }, - { - "epoch": 1.625704837815624, - "grad_norm": 0.0031097340397536755, - "learning_rate": 0.00019999869700042824, - "loss": 46.0, - "step": 21263 - }, - { - "epoch": 1.6257812947990136, - "grad_norm": 0.0018535548588261008, - "learning_rate": 0.0001999986968778077, - "loss": 46.0, - "step": 21264 - }, - { - "epoch": 1.6258577517824033, - "grad_norm": 0.002143446123227477, - "learning_rate": 0.00019999869675518133, - "loss": 46.0, - "step": 21265 - }, - { - "epoch": 1.625934208765793, - "grad_norm": 0.006431363523006439, - "learning_rate": 0.00019999869663254923, - "loss": 46.0, - "step": 21266 - }, - { - "epoch": 1.6260106657491828, - "grad_norm": 0.0025239151436835527, - "learning_rate": 0.00019999869650991138, - "loss": 46.0, - "step": 21267 - }, - { - "epoch": 1.6260871227325726, - "grad_norm": 0.002566578332334757, - "learning_rate": 0.00019999869638726773, - "loss": 46.0, - "step": 21268 - }, - { - "epoch": 1.6261635797159624, - "grad_norm": 0.0012660878710448742, - "learning_rate": 0.0001999986962646183, - "loss": 46.0, - "step": 21269 - }, - { - "epoch": 1.6262400366993521, - "grad_norm": 0.0008707016822881997, - "learning_rate": 0.00019999869614196314, - "loss": 46.0, - "step": 21270 - }, - { - "epoch": 1.626316493682742, - "grad_norm": 0.009448220022022724, - "learning_rate": 0.0001999986960193022, - "loss": 46.0, - "step": 21271 - }, - { - "epoch": 1.6263929506661314, - "grad_norm": 0.0014334573643282056, - "learning_rate": 0.0001999986958966355, - "loss": 46.0, - "step": 21272 - }, - { - "epoch": 1.6264694076495212, - "grad_norm": 0.002474881475791335, - "learning_rate": 0.000199998695773963, - "loss": 46.0, - "step": 21273 - }, - { - "epoch": 1.626545864632911, - "grad_norm": 0.0009059604490175843, - "learning_rate": 0.00019999869565128476, - "loss": 46.0, - "step": 21274 - }, - { - "epoch": 1.6266223216163005, - "grad_norm": 0.033154770731925964, - "learning_rate": 0.0001999986955286007, - "loss": 46.0, - "step": 21275 - }, - { - "epoch": 1.6266987785996903, - "grad_norm": 0.004225148353725672, - "learning_rate": 0.00019999869540591095, - "loss": 46.0, - "step": 21276 - }, - { - "epoch": 1.62677523558308, - "grad_norm": 0.00220974232070148, - "learning_rate": 0.00019999869528321537, - "loss": 46.0, - "step": 21277 - }, - { - "epoch": 1.6268516925664698, - "grad_norm": 0.0009935535490512848, - "learning_rate": 0.00019999869516051404, - "loss": 46.0, - "step": 21278 - }, - { - "epoch": 1.6269281495498595, - "grad_norm": 0.001757741323672235, - "learning_rate": 0.00019999869503780694, - "loss": 46.0, - "step": 21279 - }, - { - "epoch": 1.6270046065332493, - "grad_norm": 0.0009551630355417728, - "learning_rate": 0.00019999869491509403, - "loss": 46.0, - "step": 21280 - }, - { - "epoch": 1.627081063516639, - "grad_norm": 0.0005284722428768873, - "learning_rate": 0.0001999986947923754, - "loss": 46.0, - "step": 21281 - }, - { - "epoch": 1.6271575205000288, - "grad_norm": 0.0009793429635465145, - "learning_rate": 0.000199998694669651, - "loss": 46.0, - "step": 21282 - }, - { - "epoch": 1.6272339774834184, - "grad_norm": 0.004317453131079674, - "learning_rate": 0.00019999869454692082, - "loss": 46.0, - "step": 21283 - }, - { - "epoch": 1.6273104344668081, - "grad_norm": 0.0006739678210578859, - "learning_rate": 0.00019999869442418485, - "loss": 46.0, - "step": 21284 - }, - { - "epoch": 1.6273868914501979, - "grad_norm": 0.0038984795100986958, - "learning_rate": 0.00019999869430144317, - "loss": 46.0, - "step": 21285 - }, - { - "epoch": 1.6274633484335874, - "grad_norm": 0.0009606571402400732, - "learning_rate": 0.00019999869417869568, - "loss": 46.0, - "step": 21286 - }, - { - "epoch": 1.6275398054169772, - "grad_norm": 0.0009748979937285185, - "learning_rate": 0.00019999869405594242, - "loss": 46.0, - "step": 21287 - }, - { - "epoch": 1.627616262400367, - "grad_norm": 0.001311673317104578, - "learning_rate": 0.0001999986939331834, - "loss": 46.0, - "step": 21288 - }, - { - "epoch": 1.6276927193837567, - "grad_norm": 0.005168382544070482, - "learning_rate": 0.00019999869381041857, - "loss": 46.0, - "step": 21289 - }, - { - "epoch": 1.6277691763671465, - "grad_norm": 0.0009775989456102252, - "learning_rate": 0.00019999869368764802, - "loss": 46.0, - "step": 21290 - }, - { - "epoch": 1.6278456333505362, - "grad_norm": 0.0008270779508166015, - "learning_rate": 0.0001999986935648717, - "loss": 46.0, - "step": 21291 - }, - { - "epoch": 1.627922090333926, - "grad_norm": 0.0009698302601464093, - "learning_rate": 0.0001999986934420896, - "loss": 46.0, - "step": 21292 - }, - { - "epoch": 1.6279985473173157, - "grad_norm": 0.0003710102755576372, - "learning_rate": 0.0001999986933193017, - "loss": 46.0, - "step": 21293 - }, - { - "epoch": 1.6280750043007053, - "grad_norm": 0.0009183627553284168, - "learning_rate": 0.00019999869319650807, - "loss": 46.0, - "step": 21294 - }, - { - "epoch": 1.628151461284095, - "grad_norm": 0.001310950261540711, - "learning_rate": 0.00019999869307370865, - "loss": 46.0, - "step": 21295 - }, - { - "epoch": 1.6282279182674848, - "grad_norm": 0.0015147174708545208, - "learning_rate": 0.00019999869295090348, - "loss": 46.0, - "step": 21296 - }, - { - "epoch": 1.6283043752508743, - "grad_norm": 0.0018670111894607544, - "learning_rate": 0.00019999869282809254, - "loss": 46.0, - "step": 21297 - }, - { - "epoch": 1.628380832234264, - "grad_norm": 0.007048591505736113, - "learning_rate": 0.0001999986927052758, - "loss": 46.0, - "step": 21298 - }, - { - "epoch": 1.6284572892176539, - "grad_norm": 0.006009828764945269, - "learning_rate": 0.0001999986925824533, - "loss": 46.0, - "step": 21299 - }, - { - "epoch": 1.6285337462010436, - "grad_norm": 0.0004770963860210031, - "learning_rate": 0.00019999869245962505, - "loss": 46.0, - "step": 21300 - }, - { - "epoch": 1.6286102031844334, - "grad_norm": 0.0013777134008705616, - "learning_rate": 0.00019999869233679104, - "loss": 46.0, - "step": 21301 - }, - { - "epoch": 1.6286866601678232, - "grad_norm": 0.0007388416561298072, - "learning_rate": 0.00019999869221395123, - "loss": 46.0, - "step": 21302 - }, - { - "epoch": 1.628763117151213, - "grad_norm": 0.005754141602665186, - "learning_rate": 0.00019999869209110568, - "loss": 46.0, - "step": 21303 - }, - { - "epoch": 1.6288395741346027, - "grad_norm": 0.006719463504850864, - "learning_rate": 0.00019999869196825435, - "loss": 46.0, - "step": 21304 - }, - { - "epoch": 1.6289160311179922, - "grad_norm": 0.0008256753790192306, - "learning_rate": 0.00019999869184539725, - "loss": 46.0, - "step": 21305 - }, - { - "epoch": 1.628992488101382, - "grad_norm": 0.0019987996201962233, - "learning_rate": 0.00019999869172253435, - "loss": 46.0, - "step": 21306 - }, - { - "epoch": 1.6290689450847717, - "grad_norm": 0.0007657418609596789, - "learning_rate": 0.0001999986915996657, - "loss": 46.0, - "step": 21307 - }, - { - "epoch": 1.6291454020681613, - "grad_norm": 0.0013484855880960822, - "learning_rate": 0.00019999869147679128, - "loss": 46.0, - "step": 21308 - }, - { - "epoch": 1.629221859051551, - "grad_norm": 0.0009328161831945181, - "learning_rate": 0.00019999869135391108, - "loss": 46.0, - "step": 21309 - }, - { - "epoch": 1.6292983160349408, - "grad_norm": 0.002216699765995145, - "learning_rate": 0.00019999869123102514, - "loss": 46.0, - "step": 21310 - }, - { - "epoch": 1.6293747730183306, - "grad_norm": 0.0004925195826217532, - "learning_rate": 0.00019999869110813343, - "loss": 46.0, - "step": 21311 - }, - { - "epoch": 1.6294512300017203, - "grad_norm": 0.0007043244550004601, - "learning_rate": 0.00019999869098523594, - "loss": 46.0, - "step": 21312 - }, - { - "epoch": 1.62952768698511, - "grad_norm": 0.000749769969843328, - "learning_rate": 0.00019999869086233268, - "loss": 46.0, - "step": 21313 - }, - { - "epoch": 1.6296041439684998, - "grad_norm": 0.0010399612365290523, - "learning_rate": 0.00019999869073942364, - "loss": 46.0, - "step": 21314 - }, - { - "epoch": 1.6296806009518896, - "grad_norm": 0.00022684960276819766, - "learning_rate": 0.00019999869061650884, - "loss": 46.0, - "step": 21315 - }, - { - "epoch": 1.6297570579352791, - "grad_norm": 0.0020600163843482733, - "learning_rate": 0.00019999869049358826, - "loss": 46.0, - "step": 21316 - }, - { - "epoch": 1.629833514918669, - "grad_norm": 0.0009904297767207026, - "learning_rate": 0.00019999869037066193, - "loss": 46.0, - "step": 21317 - }, - { - "epoch": 1.6299099719020584, - "grad_norm": 0.0018723539542406797, - "learning_rate": 0.0001999986902477298, - "loss": 46.0, - "step": 21318 - }, - { - "epoch": 1.6299864288854482, - "grad_norm": 0.0008943119901232421, - "learning_rate": 0.00019999869012479195, - "loss": 46.0, - "step": 21319 - }, - { - "epoch": 1.630062885868838, - "grad_norm": 0.006255750544369221, - "learning_rate": 0.00019999869000184833, - "loss": 46.0, - "step": 21320 - }, - { - "epoch": 1.6301393428522277, - "grad_norm": 0.0009223469533026218, - "learning_rate": 0.00019999868987889889, - "loss": 46.0, - "step": 21321 - }, - { - "epoch": 1.6302157998356175, - "grad_norm": 0.000810697500128299, - "learning_rate": 0.0001999986897559437, - "loss": 46.0, - "step": 21322 - }, - { - "epoch": 1.6302922568190072, - "grad_norm": 0.0009278429206460714, - "learning_rate": 0.00019999868963298275, - "loss": 46.0, - "step": 21323 - }, - { - "epoch": 1.630368713802397, - "grad_norm": 0.0007479425403289497, - "learning_rate": 0.000199998689510016, - "loss": 46.0, - "step": 21324 - }, - { - "epoch": 1.6304451707857868, - "grad_norm": 0.0029310144018381834, - "learning_rate": 0.00019999868938704352, - "loss": 46.0, - "step": 21325 - }, - { - "epoch": 1.6305216277691765, - "grad_norm": 0.00308302347548306, - "learning_rate": 0.00019999868926406526, - "loss": 46.0, - "step": 21326 - }, - { - "epoch": 1.630598084752566, - "grad_norm": 0.0009149739053100348, - "learning_rate": 0.00019999868914108123, - "loss": 46.0, - "step": 21327 - }, - { - "epoch": 1.6306745417359558, - "grad_norm": 0.0006685683038085699, - "learning_rate": 0.00019999868901809142, - "loss": 46.0, - "step": 21328 - }, - { - "epoch": 1.6307509987193454, - "grad_norm": 0.0008936495869420469, - "learning_rate": 0.00019999868889509587, - "loss": 46.0, - "step": 21329 - }, - { - "epoch": 1.6308274557027351, - "grad_norm": 0.0007385073695331812, - "learning_rate": 0.0001999986887720945, - "loss": 46.0, - "step": 21330 - }, - { - "epoch": 1.630903912686125, - "grad_norm": 0.004384136293083429, - "learning_rate": 0.0001999986886490874, - "loss": 46.0, - "step": 21331 - }, - { - "epoch": 1.6309803696695147, - "grad_norm": 0.0014456650242209435, - "learning_rate": 0.00019999868852607454, - "loss": 46.0, - "step": 21332 - }, - { - "epoch": 1.6310568266529044, - "grad_norm": 0.0010554405162110925, - "learning_rate": 0.0001999986884030559, - "loss": 46.0, - "step": 21333 - }, - { - "epoch": 1.6311332836362942, - "grad_norm": 0.0010284145828336477, - "learning_rate": 0.00019999868828003144, - "loss": 46.0, - "step": 21334 - }, - { - "epoch": 1.631209740619684, - "grad_norm": 0.0005547644104808569, - "learning_rate": 0.00019999868815700128, - "loss": 46.0, - "step": 21335 - }, - { - "epoch": 1.6312861976030737, - "grad_norm": 0.0006440222496166825, - "learning_rate": 0.0001999986880339653, - "loss": 46.0, - "step": 21336 - }, - { - "epoch": 1.6313626545864635, - "grad_norm": 0.001036867150105536, - "learning_rate": 0.0001999986879109236, - "loss": 46.0, - "step": 21337 - }, - { - "epoch": 1.631439111569853, - "grad_norm": 0.000969358894508332, - "learning_rate": 0.0001999986877878761, - "loss": 46.0, - "step": 21338 - }, - { - "epoch": 1.6315155685532428, - "grad_norm": 0.00198317295871675, - "learning_rate": 0.00019999868766482285, - "loss": 46.0, - "step": 21339 - }, - { - "epoch": 1.6315920255366323, - "grad_norm": 0.0007343141478486359, - "learning_rate": 0.00019999868754176382, - "loss": 46.0, - "step": 21340 - }, - { - "epoch": 1.631668482520022, - "grad_norm": 0.0020955230575054884, - "learning_rate": 0.00019999868741869898, - "loss": 46.0, - "step": 21341 - }, - { - "epoch": 1.6317449395034118, - "grad_norm": 0.012542137876152992, - "learning_rate": 0.00019999868729562843, - "loss": 46.0, - "step": 21342 - }, - { - "epoch": 1.6318213964868016, - "grad_norm": 0.0013200690736994147, - "learning_rate": 0.00019999868717255207, - "loss": 46.0, - "step": 21343 - }, - { - "epoch": 1.6318978534701913, - "grad_norm": 0.0005991157959215343, - "learning_rate": 0.00019999868704946997, - "loss": 46.0, - "step": 21344 - }, - { - "epoch": 1.631974310453581, - "grad_norm": 0.0012595861917361617, - "learning_rate": 0.00019999868692638207, - "loss": 46.0, - "step": 21345 - }, - { - "epoch": 1.6320507674369709, - "grad_norm": 0.001718417857773602, - "learning_rate": 0.00019999868680328843, - "loss": 46.0, - "step": 21346 - }, - { - "epoch": 1.6321272244203606, - "grad_norm": 0.0015305429697036743, - "learning_rate": 0.000199998686680189, - "loss": 46.0, - "step": 21347 - }, - { - "epoch": 1.6322036814037502, - "grad_norm": 0.0014196389820426702, - "learning_rate": 0.0001999986865570838, - "loss": 46.0, - "step": 21348 - }, - { - "epoch": 1.63228013838714, - "grad_norm": 0.00282644503749907, - "learning_rate": 0.00019999868643397285, - "loss": 46.0, - "step": 21349 - }, - { - "epoch": 1.6323565953705297, - "grad_norm": 0.0004768500220961869, - "learning_rate": 0.00019999868631085613, - "loss": 46.0, - "step": 21350 - }, - { - "epoch": 1.6324330523539192, - "grad_norm": 0.001017568982206285, - "learning_rate": 0.00019999868618773365, - "loss": 46.0, - "step": 21351 - }, - { - "epoch": 1.632509509337309, - "grad_norm": 0.0008996358956210315, - "learning_rate": 0.0001999986860646054, - "loss": 46.0, - "step": 21352 - }, - { - "epoch": 1.6325859663206987, - "grad_norm": 0.001769957598298788, - "learning_rate": 0.00019999868594147135, - "loss": 46.0, - "step": 21353 - }, - { - "epoch": 1.6326624233040885, - "grad_norm": 0.0006543158669956028, - "learning_rate": 0.00019999868581833155, - "loss": 46.0, - "step": 21354 - }, - { - "epoch": 1.6327388802874783, - "grad_norm": 0.0026587098836898804, - "learning_rate": 0.00019999868569518597, - "loss": 46.0, - "step": 21355 - }, - { - "epoch": 1.632815337270868, - "grad_norm": 0.003121469169855118, - "learning_rate": 0.0001999986855720346, - "loss": 46.0, - "step": 21356 - }, - { - "epoch": 1.6328917942542578, - "grad_norm": 0.0007932318840175867, - "learning_rate": 0.0001999986854488775, - "loss": 46.0, - "step": 21357 - }, - { - "epoch": 1.6329682512376476, - "grad_norm": 0.0013700078707188368, - "learning_rate": 0.0001999986853257146, - "loss": 46.0, - "step": 21358 - }, - { - "epoch": 1.633044708221037, - "grad_norm": 0.0024322369135916233, - "learning_rate": 0.00019999868520254597, - "loss": 46.0, - "step": 21359 - }, - { - "epoch": 1.6331211652044269, - "grad_norm": 0.0010843066265806556, - "learning_rate": 0.00019999868507937155, - "loss": 46.0, - "step": 21360 - }, - { - "epoch": 1.6331976221878166, - "grad_norm": 0.0011280658654868603, - "learning_rate": 0.00019999868495619135, - "loss": 46.0, - "step": 21361 - }, - { - "epoch": 1.6332740791712062, - "grad_norm": 0.0014752084389328957, - "learning_rate": 0.00019999868483300541, - "loss": 46.0, - "step": 21362 - }, - { - "epoch": 1.633350536154596, - "grad_norm": 0.002738103736191988, - "learning_rate": 0.00019999868470981367, - "loss": 46.0, - "step": 21363 - }, - { - "epoch": 1.6334269931379857, - "grad_norm": 0.004372116178274155, - "learning_rate": 0.00019999868458661616, - "loss": 46.0, - "step": 21364 - }, - { - "epoch": 1.6335034501213754, - "grad_norm": 0.0017591556534171104, - "learning_rate": 0.0001999986844634129, - "loss": 46.0, - "step": 21365 - }, - { - "epoch": 1.6335799071047652, - "grad_norm": 0.0006350170588120818, - "learning_rate": 0.00019999868434020387, - "loss": 46.0, - "step": 21366 - }, - { - "epoch": 1.633656364088155, - "grad_norm": 0.0009469349170103669, - "learning_rate": 0.00019999868421698906, - "loss": 46.0, - "step": 21367 - }, - { - "epoch": 1.6337328210715447, - "grad_norm": 0.0034909015521407127, - "learning_rate": 0.00019999868409376848, - "loss": 46.0, - "step": 21368 - }, - { - "epoch": 1.6338092780549345, - "grad_norm": 0.0030245762318372726, - "learning_rate": 0.00019999868397054213, - "loss": 46.0, - "step": 21369 - }, - { - "epoch": 1.633885735038324, - "grad_norm": 0.0003537854354362935, - "learning_rate": 0.00019999868384731003, - "loss": 46.0, - "step": 21370 - }, - { - "epoch": 1.6339621920217138, - "grad_norm": 0.004920335486531258, - "learning_rate": 0.00019999868372407213, - "loss": 46.0, - "step": 21371 - }, - { - "epoch": 1.6340386490051035, - "grad_norm": 0.0012120850151404738, - "learning_rate": 0.00019999868360082848, - "loss": 46.0, - "step": 21372 - }, - { - "epoch": 1.634115105988493, - "grad_norm": 0.005476017948240042, - "learning_rate": 0.00019999868347757906, - "loss": 46.0, - "step": 21373 - }, - { - "epoch": 1.6341915629718828, - "grad_norm": 0.0003926352073904127, - "learning_rate": 0.00019999868335432387, - "loss": 46.0, - "step": 21374 - }, - { - "epoch": 1.6342680199552726, - "grad_norm": 0.001729327137582004, - "learning_rate": 0.0001999986832310629, - "loss": 46.0, - "step": 21375 - }, - { - "epoch": 1.6343444769386624, - "grad_norm": 0.0011682361364364624, - "learning_rate": 0.00019999868310779614, - "loss": 46.0, - "step": 21376 - }, - { - "epoch": 1.6344209339220521, - "grad_norm": 0.0027087063062936068, - "learning_rate": 0.00019999868298452368, - "loss": 46.0, - "step": 21377 - }, - { - "epoch": 1.6344973909054419, - "grad_norm": 0.002049011643975973, - "learning_rate": 0.0001999986828612454, - "loss": 46.0, - "step": 21378 - }, - { - "epoch": 1.6345738478888316, - "grad_norm": 0.0008791850996203721, - "learning_rate": 0.00019999868273796136, - "loss": 46.0, - "step": 21379 - }, - { - "epoch": 1.6346503048722214, - "grad_norm": 0.00478854775428772, - "learning_rate": 0.00019999868261467155, - "loss": 46.0, - "step": 21380 - }, - { - "epoch": 1.634726761855611, - "grad_norm": 0.0020329647231847048, - "learning_rate": 0.00019999868249137597, - "loss": 46.0, - "step": 21381 - }, - { - "epoch": 1.6348032188390007, - "grad_norm": 0.005377105437219143, - "learning_rate": 0.00019999868236807462, - "loss": 46.0, - "step": 21382 - }, - { - "epoch": 1.6348796758223905, - "grad_norm": 0.0012790110195055604, - "learning_rate": 0.0001999986822447675, - "loss": 46.0, - "step": 21383 - }, - { - "epoch": 1.63495613280578, - "grad_norm": 0.0016248412430286407, - "learning_rate": 0.0001999986821214546, - "loss": 46.0, - "step": 21384 - }, - { - "epoch": 1.6350325897891698, - "grad_norm": 0.001474945922382176, - "learning_rate": 0.00019999868199813595, - "loss": 46.0, - "step": 21385 - }, - { - "epoch": 1.6351090467725595, - "grad_norm": 0.0008074503275565803, - "learning_rate": 0.00019999868187481156, - "loss": 46.0, - "step": 21386 - }, - { - "epoch": 1.6351855037559493, - "grad_norm": 0.003150628414005041, - "learning_rate": 0.00019999868175148134, - "loss": 46.0, - "step": 21387 - }, - { - "epoch": 1.635261960739339, - "grad_norm": 0.0004442027129698545, - "learning_rate": 0.00019999868162814537, - "loss": 46.0, - "step": 21388 - }, - { - "epoch": 1.6353384177227288, - "grad_norm": 0.0025579649955034256, - "learning_rate": 0.00019999868150480363, - "loss": 46.0, - "step": 21389 - }, - { - "epoch": 1.6354148747061186, - "grad_norm": 0.0005267733940854669, - "learning_rate": 0.00019999868138145615, - "loss": 46.0, - "step": 21390 - }, - { - "epoch": 1.6354913316895083, - "grad_norm": 0.0007746985065750778, - "learning_rate": 0.00019999868125810286, - "loss": 46.0, - "step": 21391 - }, - { - "epoch": 1.6355677886728979, - "grad_norm": 0.0010149333393201232, - "learning_rate": 0.0001999986811347438, - "loss": 46.0, - "step": 21392 - }, - { - "epoch": 1.6356442456562876, - "grad_norm": 0.0008023123955354095, - "learning_rate": 0.00019999868101137902, - "loss": 46.0, - "step": 21393 - }, - { - "epoch": 1.6357207026396774, - "grad_norm": 0.0007437619497068226, - "learning_rate": 0.00019999868088800842, - "loss": 46.0, - "step": 21394 - }, - { - "epoch": 1.635797159623067, - "grad_norm": 0.0012289745500311255, - "learning_rate": 0.0001999986807646321, - "loss": 46.0, - "step": 21395 - }, - { - "epoch": 1.6358736166064567, - "grad_norm": 0.001823923666961491, - "learning_rate": 0.00019999868064124996, - "loss": 46.0, - "step": 21396 - }, - { - "epoch": 1.6359500735898465, - "grad_norm": 0.0006801133276894689, - "learning_rate": 0.00019999868051786206, - "loss": 46.0, - "step": 21397 - }, - { - "epoch": 1.6360265305732362, - "grad_norm": 0.002444860991090536, - "learning_rate": 0.00019999868039446842, - "loss": 46.0, - "step": 21398 - }, - { - "epoch": 1.636102987556626, - "grad_norm": 0.0004933615564368665, - "learning_rate": 0.00019999868027106897, - "loss": 46.0, - "step": 21399 - }, - { - "epoch": 1.6361794445400157, - "grad_norm": 0.000761988281738013, - "learning_rate": 0.00019999868014766378, - "loss": 46.0, - "step": 21400 - }, - { - "epoch": 1.6362559015234055, - "grad_norm": 0.0011164663592353463, - "learning_rate": 0.00019999868002425282, - "loss": 46.0, - "step": 21401 - }, - { - "epoch": 1.6363323585067953, - "grad_norm": 0.003488842397928238, - "learning_rate": 0.00019999867990083608, - "loss": 46.0, - "step": 21402 - }, - { - "epoch": 1.6364088154901848, - "grad_norm": 0.0008522930438630283, - "learning_rate": 0.0001999986797774136, - "loss": 46.0, - "step": 21403 - }, - { - "epoch": 1.6364852724735746, - "grad_norm": 0.0003183636872563511, - "learning_rate": 0.0001999986796539853, - "loss": 46.0, - "step": 21404 - }, - { - "epoch": 1.6365617294569643, - "grad_norm": 0.0007623909041285515, - "learning_rate": 0.00019999867953055125, - "loss": 46.0, - "step": 21405 - }, - { - "epoch": 1.6366381864403539, - "grad_norm": 0.0006663136300630867, - "learning_rate": 0.00019999867940711147, - "loss": 46.0, - "step": 21406 - }, - { - "epoch": 1.6367146434237436, - "grad_norm": 0.0012197361793369055, - "learning_rate": 0.00019999867928366586, - "loss": 46.0, - "step": 21407 - }, - { - "epoch": 1.6367911004071334, - "grad_norm": 0.001278865383937955, - "learning_rate": 0.0001999986791602145, - "loss": 46.0, - "step": 21408 - }, - { - "epoch": 1.6368675573905231, - "grad_norm": 0.0020506444852799177, - "learning_rate": 0.00019999867903675738, - "loss": 46.0, - "step": 21409 - }, - { - "epoch": 1.636944014373913, - "grad_norm": 0.0021305012051016092, - "learning_rate": 0.0001999986789132945, - "loss": 46.0, - "step": 21410 - }, - { - "epoch": 1.6370204713573027, - "grad_norm": 0.002022538334131241, - "learning_rate": 0.00019999867878982587, - "loss": 46.0, - "step": 21411 - }, - { - "epoch": 1.6370969283406924, - "grad_norm": 0.0009814834920689464, - "learning_rate": 0.00019999867866635142, - "loss": 46.0, - "step": 21412 - }, - { - "epoch": 1.6371733853240822, - "grad_norm": 0.0008672121912240982, - "learning_rate": 0.00019999867854287123, - "loss": 46.0, - "step": 21413 - }, - { - "epoch": 1.6372498423074717, - "grad_norm": 0.00041513825999572873, - "learning_rate": 0.00019999867841938524, - "loss": 46.0, - "step": 21414 - }, - { - "epoch": 1.6373262992908615, - "grad_norm": 0.0009369790786877275, - "learning_rate": 0.0001999986782958935, - "loss": 46.0, - "step": 21415 - }, - { - "epoch": 1.6374027562742512, - "grad_norm": 0.001369163510389626, - "learning_rate": 0.000199998678172396, - "loss": 46.0, - "step": 21416 - }, - { - "epoch": 1.6374792132576408, - "grad_norm": 0.0008466416038572788, - "learning_rate": 0.00019999867804889273, - "loss": 46.0, - "step": 21417 - }, - { - "epoch": 1.6375556702410305, - "grad_norm": 0.0012737085344269872, - "learning_rate": 0.0001999986779253837, - "loss": 46.0, - "step": 21418 - }, - { - "epoch": 1.6376321272244203, - "grad_norm": 0.0007173936464823782, - "learning_rate": 0.00019999867780186886, - "loss": 46.0, - "step": 21419 - }, - { - "epoch": 1.63770858420781, - "grad_norm": 0.001023541553877294, - "learning_rate": 0.00019999867767834828, - "loss": 46.0, - "step": 21420 - }, - { - "epoch": 1.6377850411911998, - "grad_norm": 0.0007021892233751714, - "learning_rate": 0.00019999867755482193, - "loss": 46.0, - "step": 21421 - }, - { - "epoch": 1.6378614981745896, - "grad_norm": 0.0006749724852852523, - "learning_rate": 0.0001999986774312898, - "loss": 46.0, - "step": 21422 - }, - { - "epoch": 1.6379379551579794, - "grad_norm": 0.0006043238681741059, - "learning_rate": 0.00019999867730775194, - "loss": 46.0, - "step": 21423 - }, - { - "epoch": 1.6380144121413691, - "grad_norm": 0.0008882411057129502, - "learning_rate": 0.00019999867718420826, - "loss": 46.0, - "step": 21424 - }, - { - "epoch": 1.6380908691247587, - "grad_norm": 0.0065286909230053425, - "learning_rate": 0.00019999867706065882, - "loss": 46.0, - "step": 21425 - }, - { - "epoch": 1.6381673261081484, - "grad_norm": 0.007178504019975662, - "learning_rate": 0.0001999986769371036, - "loss": 46.0, - "step": 21426 - }, - { - "epoch": 1.6382437830915382, - "grad_norm": 0.0009327356819994748, - "learning_rate": 0.00019999867681354263, - "loss": 46.0, - "step": 21427 - }, - { - "epoch": 1.6383202400749277, - "grad_norm": 0.000739293813239783, - "learning_rate": 0.00019999867668997592, - "loss": 46.0, - "step": 21428 - }, - { - "epoch": 1.6383966970583175, - "grad_norm": 0.0010062269866466522, - "learning_rate": 0.00019999867656640338, - "loss": 46.0, - "step": 21429 - }, - { - "epoch": 1.6384731540417072, - "grad_norm": 0.0012885708129033446, - "learning_rate": 0.00019999867644282513, - "loss": 46.0, - "step": 21430 - }, - { - "epoch": 1.638549611025097, - "grad_norm": 0.0013641210971400142, - "learning_rate": 0.00019999867631924107, - "loss": 46.0, - "step": 21431 - }, - { - "epoch": 1.6386260680084868, - "grad_norm": 0.0011096522212028503, - "learning_rate": 0.00019999867619565124, - "loss": 46.0, - "step": 21432 - }, - { - "epoch": 1.6387025249918765, - "grad_norm": 0.0016244127182289958, - "learning_rate": 0.00019999867607205566, - "loss": 46.0, - "step": 21433 - }, - { - "epoch": 1.6387789819752663, - "grad_norm": 0.0024805685970932245, - "learning_rate": 0.0001999986759484543, - "loss": 46.0, - "step": 21434 - }, - { - "epoch": 1.638855438958656, - "grad_norm": 0.0026701297610998154, - "learning_rate": 0.00019999867582484718, - "loss": 46.0, - "step": 21435 - }, - { - "epoch": 1.6389318959420456, - "grad_norm": 0.0009438165579922497, - "learning_rate": 0.00019999867570123428, - "loss": 46.0, - "step": 21436 - }, - { - "epoch": 1.6390083529254353, - "grad_norm": 0.027668753638863564, - "learning_rate": 0.0001999986755776156, - "loss": 46.0, - "step": 21437 - }, - { - "epoch": 1.639084809908825, - "grad_norm": 0.0008696553413756192, - "learning_rate": 0.0001999986754539912, - "loss": 46.0, - "step": 21438 - }, - { - "epoch": 1.6391612668922146, - "grad_norm": 0.007601591292768717, - "learning_rate": 0.000199998675330361, - "loss": 46.0, - "step": 21439 - }, - { - "epoch": 1.6392377238756044, - "grad_norm": 0.004165851976722479, - "learning_rate": 0.000199998675206725, - "loss": 46.0, - "step": 21440 - }, - { - "epoch": 1.6393141808589942, - "grad_norm": 0.0006563612259924412, - "learning_rate": 0.00019999867508308327, - "loss": 46.0, - "step": 21441 - }, - { - "epoch": 1.639390637842384, - "grad_norm": 0.005062736105173826, - "learning_rate": 0.00019999867495943576, - "loss": 46.0, - "step": 21442 - }, - { - "epoch": 1.6394670948257737, - "grad_norm": 0.0008686263463459909, - "learning_rate": 0.0001999986748357825, - "loss": 46.0, - "step": 21443 - }, - { - "epoch": 1.6395435518091634, - "grad_norm": 0.0016146207926794887, - "learning_rate": 0.00019999867471212344, - "loss": 46.0, - "step": 21444 - }, - { - "epoch": 1.6396200087925532, - "grad_norm": 0.0006938331061974168, - "learning_rate": 0.0001999986745884586, - "loss": 46.0, - "step": 21445 - }, - { - "epoch": 1.639696465775943, - "grad_norm": 0.0037709628231823444, - "learning_rate": 0.000199998674464788, - "loss": 46.0, - "step": 21446 - }, - { - "epoch": 1.6397729227593325, - "grad_norm": 0.003228316782042384, - "learning_rate": 0.00019999867434111166, - "loss": 46.0, - "step": 21447 - }, - { - "epoch": 1.6398493797427223, - "grad_norm": 0.0020065733697265387, - "learning_rate": 0.00019999867421742956, - "loss": 46.0, - "step": 21448 - }, - { - "epoch": 1.6399258367261118, - "grad_norm": 0.000771641731262207, - "learning_rate": 0.00019999867409374163, - "loss": 46.0, - "step": 21449 - }, - { - "epoch": 1.6400022937095016, - "grad_norm": 0.0006261492962948978, - "learning_rate": 0.000199998673970048, - "loss": 46.0, - "step": 21450 - }, - { - "epoch": 1.6400787506928913, - "grad_norm": 0.0011512351920828223, - "learning_rate": 0.00019999867384634854, - "loss": 46.0, - "step": 21451 - }, - { - "epoch": 1.640155207676281, - "grad_norm": 0.0023112036287784576, - "learning_rate": 0.00019999867372264335, - "loss": 46.0, - "step": 21452 - }, - { - "epoch": 1.6402316646596709, - "grad_norm": 0.0009989881655201316, - "learning_rate": 0.00019999867359893236, - "loss": 46.0, - "step": 21453 - }, - { - "epoch": 1.6403081216430606, - "grad_norm": 0.002040344290435314, - "learning_rate": 0.00019999867347521562, - "loss": 46.0, - "step": 21454 - }, - { - "epoch": 1.6403845786264504, - "grad_norm": 0.0007470930577255785, - "learning_rate": 0.0001999986733514931, - "loss": 46.0, - "step": 21455 - }, - { - "epoch": 1.6404610356098401, - "grad_norm": 0.0016769059002399445, - "learning_rate": 0.00019999867322776483, - "loss": 46.0, - "step": 21456 - }, - { - "epoch": 1.64053749259323, - "grad_norm": 0.003037337912246585, - "learning_rate": 0.00019999867310403077, - "loss": 46.0, - "step": 21457 - }, - { - "epoch": 1.6406139495766194, - "grad_norm": 0.004207718186080456, - "learning_rate": 0.00019999867298029097, - "loss": 46.0, - "step": 21458 - }, - { - "epoch": 1.6406904065600092, - "grad_norm": 0.000677183736115694, - "learning_rate": 0.00019999867285654536, - "loss": 46.0, - "step": 21459 - }, - { - "epoch": 1.6407668635433987, - "grad_norm": 0.0018042910378426313, - "learning_rate": 0.000199998672732794, - "loss": 46.0, - "step": 21460 - }, - { - "epoch": 1.6408433205267885, - "grad_norm": 0.0009706224082037807, - "learning_rate": 0.00019999867260903689, - "loss": 46.0, - "step": 21461 - }, - { - "epoch": 1.6409197775101783, - "grad_norm": 0.0009095891728065908, - "learning_rate": 0.000199998672485274, - "loss": 46.0, - "step": 21462 - }, - { - "epoch": 1.640996234493568, - "grad_norm": 0.003760226536542177, - "learning_rate": 0.00019999867236150532, - "loss": 46.0, - "step": 21463 - }, - { - "epoch": 1.6410726914769578, - "grad_norm": 0.001143462723121047, - "learning_rate": 0.00019999867223773087, - "loss": 46.0, - "step": 21464 - }, - { - "epoch": 1.6411491484603475, - "grad_norm": 0.0015298080397769809, - "learning_rate": 0.00019999867211395068, - "loss": 46.0, - "step": 21465 - }, - { - "epoch": 1.6412256054437373, - "grad_norm": 0.001853389199823141, - "learning_rate": 0.00019999867199016472, - "loss": 46.0, - "step": 21466 - }, - { - "epoch": 1.641302062427127, - "grad_norm": 0.0014462117105722427, - "learning_rate": 0.00019999867186637293, - "loss": 46.0, - "step": 21467 - }, - { - "epoch": 1.6413785194105168, - "grad_norm": 0.0007944151293486357, - "learning_rate": 0.00019999867174257542, - "loss": 46.0, - "step": 21468 - }, - { - "epoch": 1.6414549763939064, - "grad_norm": 0.003324038116261363, - "learning_rate": 0.00019999867161877216, - "loss": 46.0, - "step": 21469 - }, - { - "epoch": 1.6415314333772961, - "grad_norm": 0.006871282588690519, - "learning_rate": 0.0001999986714949631, - "loss": 46.0, - "step": 21470 - }, - { - "epoch": 1.6416078903606857, - "grad_norm": 0.001411988283507526, - "learning_rate": 0.00019999867137114827, - "loss": 46.0, - "step": 21471 - }, - { - "epoch": 1.6416843473440754, - "grad_norm": 0.003897459479048848, - "learning_rate": 0.00019999867124732766, - "loss": 46.0, - "step": 21472 - }, - { - "epoch": 1.6417608043274652, - "grad_norm": 0.0010658982209861279, - "learning_rate": 0.00019999867112350131, - "loss": 46.0, - "step": 21473 - }, - { - "epoch": 1.641837261310855, - "grad_norm": 0.0009889076463878155, - "learning_rate": 0.0001999986709996692, - "loss": 46.0, - "step": 21474 - }, - { - "epoch": 1.6419137182942447, - "grad_norm": 0.0019584798719733953, - "learning_rate": 0.0001999986708758313, - "loss": 46.0, - "step": 21475 - }, - { - "epoch": 1.6419901752776345, - "grad_norm": 0.0006531606777571142, - "learning_rate": 0.00019999867075198762, - "loss": 46.0, - "step": 21476 - }, - { - "epoch": 1.6420666322610242, - "grad_norm": 0.002930920571088791, - "learning_rate": 0.00019999867062813818, - "loss": 46.0, - "step": 21477 - }, - { - "epoch": 1.642143089244414, - "grad_norm": 0.0014221802121028304, - "learning_rate": 0.000199998670504283, - "loss": 46.0, - "step": 21478 - }, - { - "epoch": 1.6422195462278035, - "grad_norm": 0.00303996866568923, - "learning_rate": 0.000199998670380422, - "loss": 46.0, - "step": 21479 - }, - { - "epoch": 1.6422960032111933, - "grad_norm": 0.009048588573932648, - "learning_rate": 0.00019999867025655523, - "loss": 46.0, - "step": 21480 - }, - { - "epoch": 1.642372460194583, - "grad_norm": 0.001439518528059125, - "learning_rate": 0.00019999867013268272, - "loss": 46.0, - "step": 21481 - }, - { - "epoch": 1.6424489171779726, - "grad_norm": 0.0004770209197886288, - "learning_rate": 0.00019999867000880444, - "loss": 46.0, - "step": 21482 - }, - { - "epoch": 1.6425253741613624, - "grad_norm": 0.0009524385677650571, - "learning_rate": 0.00019999866988492038, - "loss": 46.0, - "step": 21483 - }, - { - "epoch": 1.6426018311447521, - "grad_norm": 0.0008762957295402884, - "learning_rate": 0.00019999866976103058, - "loss": 46.0, - "step": 21484 - }, - { - "epoch": 1.6426782881281419, - "grad_norm": 0.001765785156749189, - "learning_rate": 0.00019999866963713497, - "loss": 46.0, - "step": 21485 - }, - { - "epoch": 1.6427547451115316, - "grad_norm": 0.0008420029189437628, - "learning_rate": 0.0001999986695132336, - "loss": 46.0, - "step": 21486 - }, - { - "epoch": 1.6428312020949214, - "grad_norm": 0.00047600921243429184, - "learning_rate": 0.00019999866938932647, - "loss": 46.0, - "step": 21487 - }, - { - "epoch": 1.6429076590783112, - "grad_norm": 0.0012290027225390077, - "learning_rate": 0.00019999866926541357, - "loss": 46.0, - "step": 21488 - }, - { - "epoch": 1.642984116061701, - "grad_norm": 0.000704701931681484, - "learning_rate": 0.0001999986691414949, - "loss": 46.0, - "step": 21489 - }, - { - "epoch": 1.6430605730450905, - "grad_norm": 0.0005110579659231007, - "learning_rate": 0.00019999866901757046, - "loss": 46.0, - "step": 21490 - }, - { - "epoch": 1.6431370300284802, - "grad_norm": 0.0004316684789955616, - "learning_rate": 0.00019999866889364024, - "loss": 46.0, - "step": 21491 - }, - { - "epoch": 1.64321348701187, - "grad_norm": 0.002363208681344986, - "learning_rate": 0.00019999866876970428, - "loss": 46.0, - "step": 21492 - }, - { - "epoch": 1.6432899439952595, - "grad_norm": 0.0027257229667156935, - "learning_rate": 0.00019999866864576254, - "loss": 46.0, - "step": 21493 - }, - { - "epoch": 1.6433664009786493, - "grad_norm": 0.004278544336557388, - "learning_rate": 0.000199998668521815, - "loss": 46.0, - "step": 21494 - }, - { - "epoch": 1.643442857962039, - "grad_norm": 0.0007887862739153206, - "learning_rate": 0.00019999866839786172, - "loss": 46.0, - "step": 21495 - }, - { - "epoch": 1.6435193149454288, - "grad_norm": 0.0006672461167909205, - "learning_rate": 0.00019999866827390266, - "loss": 46.0, - "step": 21496 - }, - { - "epoch": 1.6435957719288186, - "grad_norm": 0.005941648036241531, - "learning_rate": 0.0001999986681499378, - "loss": 46.0, - "step": 21497 - }, - { - "epoch": 1.6436722289122083, - "grad_norm": 0.0008345708483830094, - "learning_rate": 0.00019999866802596726, - "loss": 46.0, - "step": 21498 - }, - { - "epoch": 1.643748685895598, - "grad_norm": 0.0032663140445947647, - "learning_rate": 0.00019999866790199085, - "loss": 46.0, - "step": 21499 - }, - { - "epoch": 1.6438251428789878, - "grad_norm": 0.001309213344939053, - "learning_rate": 0.00019999866777800873, - "loss": 46.0, - "step": 21500 - }, - { - "epoch": 1.6439015998623774, - "grad_norm": 0.00046075653517618775, - "learning_rate": 0.00019999866765402083, - "loss": 46.0, - "step": 21501 - }, - { - "epoch": 1.6439780568457671, - "grad_norm": 0.011375710368156433, - "learning_rate": 0.00019999866753002716, - "loss": 46.0, - "step": 21502 - }, - { - "epoch": 1.644054513829157, - "grad_norm": 0.002224414609372616, - "learning_rate": 0.00019999866740602772, - "loss": 46.0, - "step": 21503 - }, - { - "epoch": 1.6441309708125464, - "grad_norm": 0.001404323847964406, - "learning_rate": 0.0001999986672820225, - "loss": 46.0, - "step": 21504 - }, - { - "epoch": 1.6442074277959362, - "grad_norm": 0.00245850533246994, - "learning_rate": 0.0001999986671580115, - "loss": 46.0, - "step": 21505 - }, - { - "epoch": 1.644283884779326, - "grad_norm": 0.011049183085560799, - "learning_rate": 0.00019999866703399478, - "loss": 46.0, - "step": 21506 - }, - { - "epoch": 1.6443603417627157, - "grad_norm": 0.002516641980037093, - "learning_rate": 0.00019999866690997227, - "loss": 46.0, - "step": 21507 - }, - { - "epoch": 1.6444367987461055, - "grad_norm": 0.0012511474778875709, - "learning_rate": 0.00019999866678594396, - "loss": 46.0, - "step": 21508 - }, - { - "epoch": 1.6445132557294952, - "grad_norm": 0.0019315023673698306, - "learning_rate": 0.0001999986666619099, - "loss": 46.0, - "step": 21509 - }, - { - "epoch": 1.644589712712885, - "grad_norm": 0.0017806924879550934, - "learning_rate": 0.00019999866653787007, - "loss": 46.0, - "step": 21510 - }, - { - "epoch": 1.6446661696962748, - "grad_norm": 0.003197327023372054, - "learning_rate": 0.00019999866641382447, - "loss": 46.0, - "step": 21511 - }, - { - "epoch": 1.6447426266796643, - "grad_norm": 0.001833573216572404, - "learning_rate": 0.00019999866628977312, - "loss": 46.0, - "step": 21512 - }, - { - "epoch": 1.644819083663054, - "grad_norm": 0.003872358938679099, - "learning_rate": 0.00019999866616571597, - "loss": 46.0, - "step": 21513 - }, - { - "epoch": 1.6448955406464438, - "grad_norm": 0.0017271749675273895, - "learning_rate": 0.00019999866604165307, - "loss": 46.0, - "step": 21514 - }, - { - "epoch": 1.6449719976298334, - "grad_norm": 0.001248883199878037, - "learning_rate": 0.0001999986659175844, - "loss": 46.0, - "step": 21515 - }, - { - "epoch": 1.6450484546132231, - "grad_norm": 0.0010162307880818844, - "learning_rate": 0.00019999866579350996, - "loss": 46.0, - "step": 21516 - }, - { - "epoch": 1.645124911596613, - "grad_norm": 0.0021877228282392025, - "learning_rate": 0.00019999866566942974, - "loss": 46.0, - "step": 21517 - }, - { - "epoch": 1.6452013685800027, - "grad_norm": 0.0012758949305862188, - "learning_rate": 0.00019999866554534375, - "loss": 46.0, - "step": 21518 - }, - { - "epoch": 1.6452778255633924, - "grad_norm": 0.00048259375034831464, - "learning_rate": 0.000199998665421252, - "loss": 46.0, - "step": 21519 - }, - { - "epoch": 1.6453542825467822, - "grad_norm": 0.005213888827711344, - "learning_rate": 0.00019999866529715448, - "loss": 46.0, - "step": 21520 - }, - { - "epoch": 1.645430739530172, - "grad_norm": 0.0007712020887993276, - "learning_rate": 0.00019999866517305117, - "loss": 46.0, - "step": 21521 - }, - { - "epoch": 1.6455071965135617, - "grad_norm": 0.0008918406092561781, - "learning_rate": 0.00019999866504894214, - "loss": 46.0, - "step": 21522 - }, - { - "epoch": 1.6455836534969512, - "grad_norm": 0.0010949833085760474, - "learning_rate": 0.00019999866492482728, - "loss": 46.0, - "step": 21523 - }, - { - "epoch": 1.645660110480341, - "grad_norm": 0.004753062501549721, - "learning_rate": 0.0001999986648007067, - "loss": 46.0, - "step": 21524 - }, - { - "epoch": 1.6457365674637308, - "grad_norm": 0.0017014964250847697, - "learning_rate": 0.0001999986646765803, - "loss": 46.0, - "step": 21525 - }, - { - "epoch": 1.6458130244471203, - "grad_norm": 0.0008219338487833738, - "learning_rate": 0.00019999866455244818, - "loss": 46.0, - "step": 21526 - }, - { - "epoch": 1.64588948143051, - "grad_norm": 0.0010213457280769944, - "learning_rate": 0.00019999866442831028, - "loss": 46.0, - "step": 21527 - }, - { - "epoch": 1.6459659384138998, - "grad_norm": 0.0012580167967826128, - "learning_rate": 0.0001999986643041666, - "loss": 46.0, - "step": 21528 - }, - { - "epoch": 1.6460423953972896, - "grad_norm": 0.0007823933265171945, - "learning_rate": 0.00019999866418001717, - "loss": 46.0, - "step": 21529 - }, - { - "epoch": 1.6461188523806793, - "grad_norm": 0.000719325733371079, - "learning_rate": 0.00019999866405586193, - "loss": 46.0, - "step": 21530 - }, - { - "epoch": 1.646195309364069, - "grad_norm": 0.0010906400857493281, - "learning_rate": 0.00019999866393170094, - "loss": 46.0, - "step": 21531 - }, - { - "epoch": 1.6462717663474589, - "grad_norm": 0.0010455127339810133, - "learning_rate": 0.00019999866380753417, - "loss": 46.0, - "step": 21532 - }, - { - "epoch": 1.6463482233308486, - "grad_norm": 0.0010679762344807386, - "learning_rate": 0.0001999986636833617, - "loss": 46.0, - "step": 21533 - }, - { - "epoch": 1.6464246803142382, - "grad_norm": 0.003093340899795294, - "learning_rate": 0.00019999866355918338, - "loss": 46.0, - "step": 21534 - }, - { - "epoch": 1.646501137297628, - "grad_norm": 0.0002861547109205276, - "learning_rate": 0.0001999986634349993, - "loss": 46.0, - "step": 21535 - }, - { - "epoch": 1.6465775942810177, - "grad_norm": 0.0014112535864114761, - "learning_rate": 0.00019999866331080947, - "loss": 46.0, - "step": 21536 - }, - { - "epoch": 1.6466540512644072, - "grad_norm": 0.0005738164763897657, - "learning_rate": 0.0001999986631866139, - "loss": 46.0, - "step": 21537 - }, - { - "epoch": 1.646730508247797, - "grad_norm": 0.0006993873394094408, - "learning_rate": 0.00019999866306241252, - "loss": 46.0, - "step": 21538 - }, - { - "epoch": 1.6468069652311867, - "grad_norm": 0.0016875143628567457, - "learning_rate": 0.00019999866293820537, - "loss": 46.0, - "step": 21539 - }, - { - "epoch": 1.6468834222145765, - "grad_norm": 0.000774713815189898, - "learning_rate": 0.00019999866281399244, - "loss": 46.0, - "step": 21540 - }, - { - "epoch": 1.6469598791979663, - "grad_norm": 0.0006773581262677908, - "learning_rate": 0.00019999866268977378, - "loss": 46.0, - "step": 21541 - }, - { - "epoch": 1.647036336181356, - "grad_norm": 0.0005350045394152403, - "learning_rate": 0.00019999866256554933, - "loss": 46.0, - "step": 21542 - }, - { - "epoch": 1.6471127931647458, - "grad_norm": 0.0012855910463258624, - "learning_rate": 0.00019999866244131912, - "loss": 46.0, - "step": 21543 - }, - { - "epoch": 1.6471892501481356, - "grad_norm": 0.0009674620814621449, - "learning_rate": 0.00019999866231708313, - "loss": 46.0, - "step": 21544 - }, - { - "epoch": 1.647265707131525, - "grad_norm": 0.0010357644641771913, - "learning_rate": 0.00019999866219284136, - "loss": 46.0, - "step": 21545 - }, - { - "epoch": 1.6473421641149149, - "grad_norm": 0.009009824134409428, - "learning_rate": 0.00019999866206859386, - "loss": 46.0, - "step": 21546 - }, - { - "epoch": 1.6474186210983046, - "grad_norm": 0.0007428620592691004, - "learning_rate": 0.00019999866194434055, - "loss": 46.0, - "step": 21547 - }, - { - "epoch": 1.6474950780816942, - "grad_norm": 0.0008998431731015444, - "learning_rate": 0.0001999986618200815, - "loss": 46.0, - "step": 21548 - }, - { - "epoch": 1.647571535065084, - "grad_norm": 0.0007695354288443923, - "learning_rate": 0.00019999866169581663, - "loss": 46.0, - "step": 21549 - }, - { - "epoch": 1.6476479920484737, - "grad_norm": 0.0033494688104838133, - "learning_rate": 0.00019999866157154603, - "loss": 46.0, - "step": 21550 - }, - { - "epoch": 1.6477244490318634, - "grad_norm": 0.0008527569589205086, - "learning_rate": 0.00019999866144726966, - "loss": 46.0, - "step": 21551 - }, - { - "epoch": 1.6478009060152532, - "grad_norm": 0.010698262602090836, - "learning_rate": 0.00019999866132298753, - "loss": 46.0, - "step": 21552 - }, - { - "epoch": 1.647877362998643, - "grad_norm": 0.00197147810831666, - "learning_rate": 0.0001999986611986996, - "loss": 46.0, - "step": 21553 - }, - { - "epoch": 1.6479538199820327, - "grad_norm": 0.0014603988965973258, - "learning_rate": 0.00019999866107440592, - "loss": 46.0, - "step": 21554 - }, - { - "epoch": 1.6480302769654225, - "grad_norm": 0.001671516802161932, - "learning_rate": 0.0001999986609501065, - "loss": 46.0, - "step": 21555 - }, - { - "epoch": 1.648106733948812, - "grad_norm": 0.0004867609532084316, - "learning_rate": 0.00019999866082580128, - "loss": 46.0, - "step": 21556 - }, - { - "epoch": 1.6481831909322018, - "grad_norm": 0.001656443695537746, - "learning_rate": 0.00019999866070149027, - "loss": 46.0, - "step": 21557 - }, - { - "epoch": 1.6482596479155915, - "grad_norm": 0.005509183742105961, - "learning_rate": 0.00019999866057717353, - "loss": 46.0, - "step": 21558 - }, - { - "epoch": 1.648336104898981, - "grad_norm": 0.0006206129328347743, - "learning_rate": 0.00019999866045285097, - "loss": 46.0, - "step": 21559 - }, - { - "epoch": 1.6484125618823708, - "grad_norm": 0.0006834531086497009, - "learning_rate": 0.00019999866032852272, - "loss": 46.0, - "step": 21560 - }, - { - "epoch": 1.6484890188657606, - "grad_norm": 0.0007297497941181064, - "learning_rate": 0.00019999866020418863, - "loss": 46.0, - "step": 21561 - }, - { - "epoch": 1.6485654758491504, - "grad_norm": 0.0005961584392935038, - "learning_rate": 0.00019999866007984878, - "loss": 46.0, - "step": 21562 - }, - { - "epoch": 1.6486419328325401, - "grad_norm": 0.0014104496221989393, - "learning_rate": 0.00019999865995550317, - "loss": 46.0, - "step": 21563 - }, - { - "epoch": 1.6487183898159299, - "grad_norm": 0.0009982379851862788, - "learning_rate": 0.0001999986598311518, - "loss": 46.0, - "step": 21564 - }, - { - "epoch": 1.6487948467993196, - "grad_norm": 0.008005477488040924, - "learning_rate": 0.00019999865970679465, - "loss": 46.0, - "step": 21565 - }, - { - "epoch": 1.6488713037827094, - "grad_norm": 0.0005726784584112465, - "learning_rate": 0.00019999865958243175, - "loss": 46.0, - "step": 21566 - }, - { - "epoch": 1.648947760766099, - "grad_norm": 0.002822288079187274, - "learning_rate": 0.00019999865945806309, - "loss": 46.0, - "step": 21567 - }, - { - "epoch": 1.6490242177494887, - "grad_norm": 0.0009812784846872091, - "learning_rate": 0.00019999865933368862, - "loss": 46.0, - "step": 21568 - }, - { - "epoch": 1.6491006747328785, - "grad_norm": 0.0009202967048622668, - "learning_rate": 0.00019999865920930837, - "loss": 46.0, - "step": 21569 - }, - { - "epoch": 1.649177131716268, - "grad_norm": 0.0016039526090025902, - "learning_rate": 0.0001999986590849224, - "loss": 46.0, - "step": 21570 - }, - { - "epoch": 1.6492535886996578, - "grad_norm": 0.002659932244569063, - "learning_rate": 0.00019999865896053065, - "loss": 46.0, - "step": 21571 - }, - { - "epoch": 1.6493300456830475, - "grad_norm": 0.0006635301397182047, - "learning_rate": 0.00019999865883613312, - "loss": 46.0, - "step": 21572 - }, - { - "epoch": 1.6494065026664373, - "grad_norm": 0.0020059943199157715, - "learning_rate": 0.00019999865871172983, - "loss": 46.0, - "step": 21573 - }, - { - "epoch": 1.649482959649827, - "grad_norm": 0.0014936156803742051, - "learning_rate": 0.00019999865858732075, - "loss": 46.0, - "step": 21574 - }, - { - "epoch": 1.6495594166332168, - "grad_norm": 0.015905240550637245, - "learning_rate": 0.0001999986584629059, - "loss": 46.0, - "step": 21575 - }, - { - "epoch": 1.6496358736166066, - "grad_norm": 0.0026930151507258415, - "learning_rate": 0.0001999986583384853, - "loss": 46.0, - "step": 21576 - }, - { - "epoch": 1.6497123305999963, - "grad_norm": 0.0018401573179289699, - "learning_rate": 0.00019999865821405892, - "loss": 46.0, - "step": 21577 - }, - { - "epoch": 1.6497887875833859, - "grad_norm": 0.00048216519644483924, - "learning_rate": 0.0001999986580896268, - "loss": 46.0, - "step": 21578 - }, - { - "epoch": 1.6498652445667756, - "grad_norm": 0.0005768570699729025, - "learning_rate": 0.00019999865796518888, - "loss": 46.0, - "step": 21579 - }, - { - "epoch": 1.6499417015501652, - "grad_norm": 0.0007254622178152204, - "learning_rate": 0.00019999865784074515, - "loss": 46.0, - "step": 21580 - }, - { - "epoch": 1.650018158533555, - "grad_norm": 0.0016321848379448056, - "learning_rate": 0.00019999865771629574, - "loss": 46.0, - "step": 21581 - }, - { - "epoch": 1.6500946155169447, - "grad_norm": 0.0006128469249233603, - "learning_rate": 0.0001999986575918405, - "loss": 46.0, - "step": 21582 - }, - { - "epoch": 1.6501710725003345, - "grad_norm": 0.0010214586509391665, - "learning_rate": 0.0001999986574673795, - "loss": 46.0, - "step": 21583 - }, - { - "epoch": 1.6502475294837242, - "grad_norm": 0.000726212514564395, - "learning_rate": 0.00019999865734291275, - "loss": 46.0, - "step": 21584 - }, - { - "epoch": 1.650323986467114, - "grad_norm": 0.0023472574539482594, - "learning_rate": 0.0001999986572184402, - "loss": 46.0, - "step": 21585 - }, - { - "epoch": 1.6504004434505037, - "grad_norm": 0.008027667179703712, - "learning_rate": 0.0001999986570939619, - "loss": 46.0, - "step": 21586 - }, - { - "epoch": 1.6504769004338935, - "grad_norm": 0.002492302330210805, - "learning_rate": 0.00019999865696947785, - "loss": 46.0, - "step": 21587 - }, - { - "epoch": 1.6505533574172833, - "grad_norm": 0.0010119705693796277, - "learning_rate": 0.000199998656844988, - "loss": 46.0, - "step": 21588 - }, - { - "epoch": 1.6506298144006728, - "grad_norm": 0.0008986667962744832, - "learning_rate": 0.0001999986567204924, - "loss": 46.0, - "step": 21589 - }, - { - "epoch": 1.6507062713840626, - "grad_norm": 0.0012087725335732102, - "learning_rate": 0.00019999865659599102, - "loss": 46.0, - "step": 21590 - }, - { - "epoch": 1.650782728367452, - "grad_norm": 0.001363583141937852, - "learning_rate": 0.00019999865647148387, - "loss": 46.0, - "step": 21591 - }, - { - "epoch": 1.6508591853508419, - "grad_norm": 0.001039830851368606, - "learning_rate": 0.00019999865634697095, - "loss": 46.0, - "step": 21592 - }, - { - "epoch": 1.6509356423342316, - "grad_norm": 0.00040533390711061656, - "learning_rate": 0.00019999865622245225, - "loss": 46.0, - "step": 21593 - }, - { - "epoch": 1.6510120993176214, - "grad_norm": 0.0009514043922536075, - "learning_rate": 0.0001999986560979278, - "loss": 46.0, - "step": 21594 - }, - { - "epoch": 1.6510885563010111, - "grad_norm": 0.003054453991353512, - "learning_rate": 0.00019999865597339757, - "loss": 46.0, - "step": 21595 - }, - { - "epoch": 1.651165013284401, - "grad_norm": 0.0006716137286275625, - "learning_rate": 0.00019999865584886158, - "loss": 46.0, - "step": 21596 - }, - { - "epoch": 1.6512414702677907, - "grad_norm": 0.0003735281643457711, - "learning_rate": 0.00019999865572431982, - "loss": 46.0, - "step": 21597 - }, - { - "epoch": 1.6513179272511804, - "grad_norm": 0.002730264561250806, - "learning_rate": 0.00019999865559977228, - "loss": 46.0, - "step": 21598 - }, - { - "epoch": 1.6513943842345702, - "grad_norm": 0.0010241935960948467, - "learning_rate": 0.00019999865547521898, - "loss": 46.0, - "step": 21599 - }, - { - "epoch": 1.6514708412179597, - "grad_norm": 0.0010458551114425063, - "learning_rate": 0.00019999865535065992, - "loss": 46.0, - "step": 21600 - }, - { - "epoch": 1.6515472982013495, - "grad_norm": 0.0010004040086641908, - "learning_rate": 0.00019999865522609507, - "loss": 46.0, - "step": 21601 - }, - { - "epoch": 1.651623755184739, - "grad_norm": 0.0009689674479886889, - "learning_rate": 0.00019999865510152444, - "loss": 46.0, - "step": 21602 - }, - { - "epoch": 1.6517002121681288, - "grad_norm": 0.0027522125747054815, - "learning_rate": 0.00019999865497694806, - "loss": 46.0, - "step": 21603 - }, - { - "epoch": 1.6517766691515186, - "grad_norm": 0.003077751724049449, - "learning_rate": 0.00019999865485236594, - "loss": 46.0, - "step": 21604 - }, - { - "epoch": 1.6518531261349083, - "grad_norm": 0.0006341529078781605, - "learning_rate": 0.00019999865472777802, - "loss": 46.0, - "step": 21605 - }, - { - "epoch": 1.651929583118298, - "grad_norm": 0.001420557964593172, - "learning_rate": 0.00019999865460318433, - "loss": 46.0, - "step": 21606 - }, - { - "epoch": 1.6520060401016878, - "grad_norm": 0.0007528112037107348, - "learning_rate": 0.00019999865447858488, - "loss": 46.0, - "step": 21607 - }, - { - "epoch": 1.6520824970850776, - "grad_norm": 0.0011315782321617007, - "learning_rate": 0.00019999865435397962, - "loss": 46.0, - "step": 21608 - }, - { - "epoch": 1.6521589540684674, - "grad_norm": 0.004409399814903736, - "learning_rate": 0.00019999865422936863, - "loss": 46.0, - "step": 21609 - }, - { - "epoch": 1.652235411051857, - "grad_norm": 0.0015307499561458826, - "learning_rate": 0.00019999865410475187, - "loss": 46.0, - "step": 21610 - }, - { - "epoch": 1.6523118680352467, - "grad_norm": 0.0009448578348383307, - "learning_rate": 0.00019999865398012933, - "loss": 46.0, - "step": 21611 - }, - { - "epoch": 1.6523883250186364, - "grad_norm": 0.006631122436374426, - "learning_rate": 0.00019999865385550103, - "loss": 46.0, - "step": 21612 - }, - { - "epoch": 1.652464782002026, - "grad_norm": 0.0006825471064075828, - "learning_rate": 0.00019999865373086694, - "loss": 46.0, - "step": 21613 - }, - { - "epoch": 1.6525412389854157, - "grad_norm": 0.0008091607014648616, - "learning_rate": 0.00019999865360622712, - "loss": 46.0, - "step": 21614 - }, - { - "epoch": 1.6526176959688055, - "grad_norm": 0.0038639530539512634, - "learning_rate": 0.00019999865348158152, - "loss": 46.0, - "step": 21615 - }, - { - "epoch": 1.6526941529521952, - "grad_norm": 0.0011462783440947533, - "learning_rate": 0.00019999865335693011, - "loss": 46.0, - "step": 21616 - }, - { - "epoch": 1.652770609935585, - "grad_norm": 0.0011677076108753681, - "learning_rate": 0.000199998653232273, - "loss": 46.0, - "step": 21617 - }, - { - "epoch": 1.6528470669189748, - "grad_norm": 0.002352225361391902, - "learning_rate": 0.00019999865310761005, - "loss": 46.0, - "step": 21618 - }, - { - "epoch": 1.6529235239023645, - "grad_norm": 0.001397239277139306, - "learning_rate": 0.00019999865298294135, - "loss": 46.0, - "step": 21619 - }, - { - "epoch": 1.6529999808857543, - "grad_norm": 0.0011672712862491608, - "learning_rate": 0.0001999986528582669, - "loss": 46.0, - "step": 21620 - }, - { - "epoch": 1.6530764378691438, - "grad_norm": 0.0015168029349297285, - "learning_rate": 0.00019999865273358667, - "loss": 46.0, - "step": 21621 - }, - { - "epoch": 1.6531528948525336, - "grad_norm": 0.0020129806362092495, - "learning_rate": 0.00019999865260890068, - "loss": 46.0, - "step": 21622 - }, - { - "epoch": 1.6532293518359233, - "grad_norm": 0.0007061193464323878, - "learning_rate": 0.0001999986524842089, - "loss": 46.0, - "step": 21623 - }, - { - "epoch": 1.6533058088193129, - "grad_norm": 0.0007871821289882064, - "learning_rate": 0.00019999865235951136, - "loss": 46.0, - "step": 21624 - }, - { - "epoch": 1.6533822658027026, - "grad_norm": 0.0015681508230045438, - "learning_rate": 0.00019999865223480808, - "loss": 46.0, - "step": 21625 - }, - { - "epoch": 1.6534587227860924, - "grad_norm": 0.0007553251925855875, - "learning_rate": 0.00019999865211009897, - "loss": 46.0, - "step": 21626 - }, - { - "epoch": 1.6535351797694822, - "grad_norm": 0.0009992470731958747, - "learning_rate": 0.00019999865198538412, - "loss": 46.0, - "step": 21627 - }, - { - "epoch": 1.653611636752872, - "grad_norm": 0.0017561583081260324, - "learning_rate": 0.00019999865186066354, - "loss": 46.0, - "step": 21628 - }, - { - "epoch": 1.6536880937362617, - "grad_norm": 0.0018788323504850268, - "learning_rate": 0.00019999865173593711, - "loss": 46.0, - "step": 21629 - }, - { - "epoch": 1.6537645507196514, - "grad_norm": 0.0023352557327598333, - "learning_rate": 0.000199998651611205, - "loss": 46.0, - "step": 21630 - }, - { - "epoch": 1.6538410077030412, - "grad_norm": 0.0010183880804106593, - "learning_rate": 0.00019999865148646705, - "loss": 46.0, - "step": 21631 - }, - { - "epoch": 1.6539174646864307, - "grad_norm": 0.00117160112131387, - "learning_rate": 0.00019999865136172335, - "loss": 46.0, - "step": 21632 - }, - { - "epoch": 1.6539939216698205, - "grad_norm": 0.005221403203904629, - "learning_rate": 0.0001999986512369739, - "loss": 46.0, - "step": 21633 - }, - { - "epoch": 1.6540703786532103, - "grad_norm": 0.0033547745551913977, - "learning_rate": 0.00019999865111221867, - "loss": 46.0, - "step": 21634 - }, - { - "epoch": 1.6541468356365998, - "grad_norm": 0.002293757162988186, - "learning_rate": 0.00019999865098745766, - "loss": 46.0, - "step": 21635 - }, - { - "epoch": 1.6542232926199896, - "grad_norm": 0.0030303706880658865, - "learning_rate": 0.0001999986508626909, - "loss": 46.0, - "step": 21636 - }, - { - "epoch": 1.6542997496033793, - "grad_norm": 0.0033912723883986473, - "learning_rate": 0.00019999865073791834, - "loss": 46.0, - "step": 21637 - }, - { - "epoch": 1.654376206586769, - "grad_norm": 0.002571378368884325, - "learning_rate": 0.00019999865061314006, - "loss": 46.0, - "step": 21638 - }, - { - "epoch": 1.6544526635701589, - "grad_norm": 0.0011502320412546396, - "learning_rate": 0.00019999865048835597, - "loss": 46.0, - "step": 21639 - }, - { - "epoch": 1.6545291205535486, - "grad_norm": 0.00027156888972967863, - "learning_rate": 0.00019999865036356612, - "loss": 46.0, - "step": 21640 - }, - { - "epoch": 1.6546055775369384, - "grad_norm": 0.0009653330780565739, - "learning_rate": 0.00019999865023877052, - "loss": 46.0, - "step": 21641 - }, - { - "epoch": 1.6546820345203281, - "grad_norm": 0.0011527067981660366, - "learning_rate": 0.00019999865011396912, - "loss": 46.0, - "step": 21642 - }, - { - "epoch": 1.6547584915037177, - "grad_norm": 0.0010315327672287822, - "learning_rate": 0.00019999864998916195, - "loss": 46.0, - "step": 21643 - }, - { - "epoch": 1.6548349484871074, - "grad_norm": 0.0012466766638681293, - "learning_rate": 0.00019999864986434905, - "loss": 46.0, - "step": 21644 - }, - { - "epoch": 1.6549114054704972, - "grad_norm": 0.01666574738919735, - "learning_rate": 0.00019999864973953036, - "loss": 46.0, - "step": 21645 - }, - { - "epoch": 1.6549878624538867, - "grad_norm": 0.001451404532417655, - "learning_rate": 0.0001999986496147059, - "loss": 46.0, - "step": 21646 - }, - { - "epoch": 1.6550643194372765, - "grad_norm": 0.001533798291347921, - "learning_rate": 0.00019999864948987565, - "loss": 46.0, - "step": 21647 - }, - { - "epoch": 1.6551407764206663, - "grad_norm": 0.0017557170940563083, - "learning_rate": 0.00019999864936503964, - "loss": 46.0, - "step": 21648 - }, - { - "epoch": 1.655217233404056, - "grad_norm": 0.0028071885462850332, - "learning_rate": 0.00019999864924019788, - "loss": 46.0, - "step": 21649 - }, - { - "epoch": 1.6552936903874458, - "grad_norm": 0.001328642014414072, - "learning_rate": 0.00019999864911535034, - "loss": 46.0, - "step": 21650 - }, - { - "epoch": 1.6553701473708355, - "grad_norm": 0.0012538135051727295, - "learning_rate": 0.00019999864899049704, - "loss": 46.0, - "step": 21651 - }, - { - "epoch": 1.6554466043542253, - "grad_norm": 0.0010261310962960124, - "learning_rate": 0.00019999864886563796, - "loss": 46.0, - "step": 21652 - }, - { - "epoch": 1.655523061337615, - "grad_norm": 0.002302152570337057, - "learning_rate": 0.0001999986487407731, - "loss": 46.0, - "step": 21653 - }, - { - "epoch": 1.6555995183210046, - "grad_norm": 0.001607475453056395, - "learning_rate": 0.0001999986486159025, - "loss": 46.0, - "step": 21654 - }, - { - "epoch": 1.6556759753043944, - "grad_norm": 0.0013634710339829326, - "learning_rate": 0.00019999864849102607, - "loss": 46.0, - "step": 21655 - }, - { - "epoch": 1.6557524322877841, - "grad_norm": 0.0014689142117276788, - "learning_rate": 0.00019999864836614393, - "loss": 46.0, - "step": 21656 - }, - { - "epoch": 1.6558288892711737, - "grad_norm": 0.006453211419284344, - "learning_rate": 0.000199998648241256, - "loss": 46.0, - "step": 21657 - }, - { - "epoch": 1.6559053462545634, - "grad_norm": 0.0011455732164904475, - "learning_rate": 0.00019999864811636231, - "loss": 46.0, - "step": 21658 - }, - { - "epoch": 1.6559818032379532, - "grad_norm": 0.003112090053036809, - "learning_rate": 0.00019999864799146285, - "loss": 46.0, - "step": 21659 - }, - { - "epoch": 1.656058260221343, - "grad_norm": 0.004679406061768532, - "learning_rate": 0.0001999986478665576, - "loss": 46.0, - "step": 21660 - }, - { - "epoch": 1.6561347172047327, - "grad_norm": 0.0004650199261959642, - "learning_rate": 0.0001999986477416466, - "loss": 46.0, - "step": 21661 - }, - { - "epoch": 1.6562111741881225, - "grad_norm": 0.0006998194148764014, - "learning_rate": 0.00019999864761672983, - "loss": 46.0, - "step": 21662 - }, - { - "epoch": 1.6562876311715122, - "grad_norm": 0.0003703190595842898, - "learning_rate": 0.00019999864749180727, - "loss": 46.0, - "step": 21663 - }, - { - "epoch": 1.656364088154902, - "grad_norm": 0.011942723765969276, - "learning_rate": 0.000199998647366879, - "loss": 46.0, - "step": 21664 - }, - { - "epoch": 1.6564405451382915, - "grad_norm": 0.0004818958695977926, - "learning_rate": 0.0001999986472419449, - "loss": 46.0, - "step": 21665 - }, - { - "epoch": 1.6565170021216813, - "grad_norm": 0.0007157226791605353, - "learning_rate": 0.00019999864711700503, - "loss": 46.0, - "step": 21666 - }, - { - "epoch": 1.656593459105071, - "grad_norm": 0.002656287979334593, - "learning_rate": 0.00019999864699205943, - "loss": 46.0, - "step": 21667 - }, - { - "epoch": 1.6566699160884606, - "grad_norm": 0.0006410392234101892, - "learning_rate": 0.00019999864686710803, - "loss": 46.0, - "step": 21668 - }, - { - "epoch": 1.6567463730718504, - "grad_norm": 0.011426913551986217, - "learning_rate": 0.0001999986467421509, - "loss": 46.0, - "step": 21669 - }, - { - "epoch": 1.6568228300552401, - "grad_norm": 0.0008393619209527969, - "learning_rate": 0.00019999864661718794, - "loss": 46.0, - "step": 21670 - }, - { - "epoch": 1.6568992870386299, - "grad_norm": 0.0008457890362478793, - "learning_rate": 0.00019999864649221925, - "loss": 46.0, - "step": 21671 - }, - { - "epoch": 1.6569757440220196, - "grad_norm": 0.0005599437281489372, - "learning_rate": 0.00019999864636724478, - "loss": 46.0, - "step": 21672 - }, - { - "epoch": 1.6570522010054094, - "grad_norm": 0.0021990975365042686, - "learning_rate": 0.00019999864624226454, - "loss": 46.0, - "step": 21673 - }, - { - "epoch": 1.6571286579887992, - "grad_norm": 0.0010131970047950745, - "learning_rate": 0.00019999864611727853, - "loss": 46.0, - "step": 21674 - }, - { - "epoch": 1.657205114972189, - "grad_norm": 0.0009348964085802436, - "learning_rate": 0.00019999864599228677, - "loss": 46.0, - "step": 21675 - }, - { - "epoch": 1.6572815719555785, - "grad_norm": 0.0022120086941868067, - "learning_rate": 0.0001999986458672892, - "loss": 46.0, - "step": 21676 - }, - { - "epoch": 1.6573580289389682, - "grad_norm": 0.0009733859915286303, - "learning_rate": 0.0001999986457422859, - "loss": 46.0, - "step": 21677 - }, - { - "epoch": 1.657434485922358, - "grad_norm": 0.0010268380865454674, - "learning_rate": 0.00019999864561727682, - "loss": 46.0, - "step": 21678 - }, - { - "epoch": 1.6575109429057475, - "grad_norm": 0.0009617885807529092, - "learning_rate": 0.00019999864549226194, - "loss": 46.0, - "step": 21679 - }, - { - "epoch": 1.6575873998891373, - "grad_norm": 0.0027607481461018324, - "learning_rate": 0.00019999864536724134, - "loss": 46.0, - "step": 21680 - }, - { - "epoch": 1.657663856872527, - "grad_norm": 0.005382539238780737, - "learning_rate": 0.00019999864524221494, - "loss": 46.0, - "step": 21681 - }, - { - "epoch": 1.6577403138559168, - "grad_norm": 0.0008170593064278364, - "learning_rate": 0.00019999864511718277, - "loss": 46.0, - "step": 21682 - }, - { - "epoch": 1.6578167708393066, - "grad_norm": 0.0009224091772921383, - "learning_rate": 0.00019999864499214485, - "loss": 46.0, - "step": 21683 - }, - { - "epoch": 1.6578932278226963, - "grad_norm": 0.0004626004374586046, - "learning_rate": 0.00019999864486710116, - "loss": 46.0, - "step": 21684 - }, - { - "epoch": 1.657969684806086, - "grad_norm": 0.0006666372064501047, - "learning_rate": 0.0001999986447420517, - "loss": 46.0, - "step": 21685 - }, - { - "epoch": 1.6580461417894758, - "grad_norm": 0.0020343840587884188, - "learning_rate": 0.00019999864461699645, - "loss": 46.0, - "step": 21686 - }, - { - "epoch": 1.6581225987728654, - "grad_norm": 0.0007447120733559132, - "learning_rate": 0.00019999864449193544, - "loss": 46.0, - "step": 21687 - }, - { - "epoch": 1.6581990557562551, - "grad_norm": 0.00390347046777606, - "learning_rate": 0.00019999864436686865, - "loss": 46.0, - "step": 21688 - }, - { - "epoch": 1.658275512739645, - "grad_norm": 0.0008550811326131225, - "learning_rate": 0.00019999864424179612, - "loss": 46.0, - "step": 21689 - }, - { - "epoch": 1.6583519697230344, - "grad_norm": 0.003112517297267914, - "learning_rate": 0.0001999986441167178, - "loss": 46.0, - "step": 21690 - }, - { - "epoch": 1.6584284267064242, - "grad_norm": 0.001175080076791346, - "learning_rate": 0.0001999986439916337, - "loss": 46.0, - "step": 21691 - }, - { - "epoch": 1.658504883689814, - "grad_norm": 0.0012133476557210088, - "learning_rate": 0.00019999864386654385, - "loss": 46.0, - "step": 21692 - }, - { - "epoch": 1.6585813406732037, - "grad_norm": 0.0014472611946985126, - "learning_rate": 0.00019999864374144823, - "loss": 46.0, - "step": 21693 - }, - { - "epoch": 1.6586577976565935, - "grad_norm": 0.00025356828700751066, - "learning_rate": 0.00019999864361634683, - "loss": 46.0, - "step": 21694 - }, - { - "epoch": 1.6587342546399833, - "grad_norm": 0.003534167306497693, - "learning_rate": 0.00019999864349123966, - "loss": 46.0, - "step": 21695 - }, - { - "epoch": 1.658810711623373, - "grad_norm": 0.0037848169449716806, - "learning_rate": 0.00019999864336612674, - "loss": 46.0, - "step": 21696 - }, - { - "epoch": 1.6588871686067628, - "grad_norm": 0.007858806289732456, - "learning_rate": 0.00019999864324100804, - "loss": 46.0, - "step": 21697 - }, - { - "epoch": 1.6589636255901523, - "grad_norm": 0.0003288577136117965, - "learning_rate": 0.00019999864311588358, - "loss": 46.0, - "step": 21698 - }, - { - "epoch": 1.659040082573542, - "grad_norm": 0.0005986799951642752, - "learning_rate": 0.00019999864299075334, - "loss": 46.0, - "step": 21699 - }, - { - "epoch": 1.6591165395569318, - "grad_norm": 0.000887929811142385, - "learning_rate": 0.00019999864286561733, - "loss": 46.0, - "step": 21700 - }, - { - "epoch": 1.6591929965403214, - "grad_norm": 0.001999678323045373, - "learning_rate": 0.00019999864274047554, - "loss": 46.0, - "step": 21701 - }, - { - "epoch": 1.6592694535237111, - "grad_norm": 0.0022027036175131798, - "learning_rate": 0.00019999864261532798, - "loss": 46.0, - "step": 21702 - }, - { - "epoch": 1.659345910507101, - "grad_norm": 0.0006224642274901271, - "learning_rate": 0.00019999864249017467, - "loss": 46.0, - "step": 21703 - }, - { - "epoch": 1.6594223674904907, - "grad_norm": 0.0006737412768416107, - "learning_rate": 0.00019999864236501557, - "loss": 46.0, - "step": 21704 - }, - { - "epoch": 1.6594988244738804, - "grad_norm": 0.0006587451207451522, - "learning_rate": 0.00019999864223985072, - "loss": 46.0, - "step": 21705 - }, - { - "epoch": 1.6595752814572702, - "grad_norm": 0.0009604567312635481, - "learning_rate": 0.0001999986421146801, - "loss": 46.0, - "step": 21706 - }, - { - "epoch": 1.65965173844066, - "grad_norm": 0.0011488718446344137, - "learning_rate": 0.00019999864198950372, - "loss": 46.0, - "step": 21707 - }, - { - "epoch": 1.6597281954240497, - "grad_norm": 0.0037051609251648188, - "learning_rate": 0.00019999864186432155, - "loss": 46.0, - "step": 21708 - }, - { - "epoch": 1.6598046524074392, - "grad_norm": 0.0055769700556993484, - "learning_rate": 0.0001999986417391336, - "loss": 46.0, - "step": 21709 - }, - { - "epoch": 1.659881109390829, - "grad_norm": 0.0006354156066663563, - "learning_rate": 0.0001999986416139399, - "loss": 46.0, - "step": 21710 - }, - { - "epoch": 1.6599575663742185, - "grad_norm": 0.0010919993510469794, - "learning_rate": 0.00019999864148874044, - "loss": 46.0, - "step": 21711 - }, - { - "epoch": 1.6600340233576083, - "grad_norm": 0.0010993661126121879, - "learning_rate": 0.00019999864136353518, - "loss": 46.0, - "step": 21712 - }, - { - "epoch": 1.660110480340998, - "grad_norm": 0.005171710625290871, - "learning_rate": 0.00019999864123832416, - "loss": 46.0, - "step": 21713 - }, - { - "epoch": 1.6601869373243878, - "grad_norm": 0.0040898979641497135, - "learning_rate": 0.0001999986411131074, - "loss": 46.0, - "step": 21714 - }, - { - "epoch": 1.6602633943077776, - "grad_norm": 0.0017966422019526362, - "learning_rate": 0.00019999864098788485, - "loss": 46.0, - "step": 21715 - }, - { - "epoch": 1.6603398512911673, - "grad_norm": 0.0013107439735904336, - "learning_rate": 0.00019999864086265654, - "loss": 46.0, - "step": 21716 - }, - { - "epoch": 1.660416308274557, - "grad_norm": 0.0007092754240147769, - "learning_rate": 0.00019999864073742246, - "loss": 46.0, - "step": 21717 - }, - { - "epoch": 1.6604927652579469, - "grad_norm": 0.0018959550652652979, - "learning_rate": 0.00019999864061218258, - "loss": 46.0, - "step": 21718 - }, - { - "epoch": 1.6605692222413366, - "grad_norm": 0.004546552896499634, - "learning_rate": 0.00019999864048693696, - "loss": 46.0, - "step": 21719 - }, - { - "epoch": 1.6606456792247262, - "grad_norm": 0.005949110724031925, - "learning_rate": 0.00019999864036168553, - "loss": 46.0, - "step": 21720 - }, - { - "epoch": 1.660722136208116, - "grad_norm": 0.0012279408983886242, - "learning_rate": 0.0001999986402364284, - "loss": 46.0, - "step": 21721 - }, - { - "epoch": 1.6607985931915055, - "grad_norm": 0.0006957650766707957, - "learning_rate": 0.00019999864011116544, - "loss": 46.0, - "step": 21722 - }, - { - "epoch": 1.6608750501748952, - "grad_norm": 0.004193790722638369, - "learning_rate": 0.00019999863998589675, - "loss": 46.0, - "step": 21723 - }, - { - "epoch": 1.660951507158285, - "grad_norm": 0.0019929578993469477, - "learning_rate": 0.00019999863986062226, - "loss": 46.0, - "step": 21724 - }, - { - "epoch": 1.6610279641416748, - "grad_norm": 0.0020179208368062973, - "learning_rate": 0.00019999863973534202, - "loss": 46.0, - "step": 21725 - }, - { - "epoch": 1.6611044211250645, - "grad_norm": 0.0011595117393881083, - "learning_rate": 0.00019999863961005603, - "loss": 46.0, - "step": 21726 - }, - { - "epoch": 1.6611808781084543, - "grad_norm": 0.004813872277736664, - "learning_rate": 0.00019999863948476422, - "loss": 46.0, - "step": 21727 - }, - { - "epoch": 1.661257335091844, - "grad_norm": 0.0007569942972622812, - "learning_rate": 0.0001999986393594667, - "loss": 46.0, - "step": 21728 - }, - { - "epoch": 1.6613337920752338, - "grad_norm": 0.0005347812548279762, - "learning_rate": 0.00019999863923416336, - "loss": 46.0, - "step": 21729 - }, - { - "epoch": 1.6614102490586236, - "grad_norm": 0.0021815341897308826, - "learning_rate": 0.00019999863910885428, - "loss": 46.0, - "step": 21730 - }, - { - "epoch": 1.661486706042013, - "grad_norm": 0.001619148999452591, - "learning_rate": 0.0001999986389835394, - "loss": 46.0, - "step": 21731 - }, - { - "epoch": 1.6615631630254029, - "grad_norm": 0.0012437907280400395, - "learning_rate": 0.00019999863885821877, - "loss": 46.0, - "step": 21732 - }, - { - "epoch": 1.6616396200087924, - "grad_norm": 0.0004048449045512825, - "learning_rate": 0.00019999863873289237, - "loss": 46.0, - "step": 21733 - }, - { - "epoch": 1.6617160769921822, - "grad_norm": 0.0020641072187572718, - "learning_rate": 0.0001999986386075602, - "loss": 46.0, - "step": 21734 - }, - { - "epoch": 1.661792533975572, - "grad_norm": 0.0007592297624796629, - "learning_rate": 0.00019999863848222226, - "loss": 46.0, - "step": 21735 - }, - { - "epoch": 1.6618689909589617, - "grad_norm": 0.00298447348177433, - "learning_rate": 0.00019999863835687857, - "loss": 46.0, - "step": 21736 - }, - { - "epoch": 1.6619454479423514, - "grad_norm": 0.0010736165568232536, - "learning_rate": 0.0001999986382315291, - "loss": 46.0, - "step": 21737 - }, - { - "epoch": 1.6620219049257412, - "grad_norm": 0.0012079750886186957, - "learning_rate": 0.00019999863810617384, - "loss": 46.0, - "step": 21738 - }, - { - "epoch": 1.662098361909131, - "grad_norm": 0.0012040602741762996, - "learning_rate": 0.00019999863798081282, - "loss": 46.0, - "step": 21739 - }, - { - "epoch": 1.6621748188925207, - "grad_norm": 0.004665082786232233, - "learning_rate": 0.00019999863785544604, - "loss": 46.0, - "step": 21740 - }, - { - "epoch": 1.6622512758759103, - "grad_norm": 0.0017301623011007905, - "learning_rate": 0.00019999863773007348, - "loss": 46.0, - "step": 21741 - }, - { - "epoch": 1.6623277328593, - "grad_norm": 0.0015062422025948763, - "learning_rate": 0.00019999863760469518, - "loss": 46.0, - "step": 21742 - }, - { - "epoch": 1.6624041898426898, - "grad_norm": 0.0022936842869967222, - "learning_rate": 0.00019999863747931107, - "loss": 46.0, - "step": 21743 - }, - { - "epoch": 1.6624806468260793, - "grad_norm": 0.0012678923085331917, - "learning_rate": 0.0001999986373539212, - "loss": 46.0, - "step": 21744 - }, - { - "epoch": 1.662557103809469, - "grad_norm": 0.0014153685187920928, - "learning_rate": 0.00019999863722852557, - "loss": 46.0, - "step": 21745 - }, - { - "epoch": 1.6626335607928588, - "grad_norm": 0.0016152793541550636, - "learning_rate": 0.00019999863710312417, - "loss": 46.0, - "step": 21746 - }, - { - "epoch": 1.6627100177762486, - "grad_norm": 0.001285429927520454, - "learning_rate": 0.00019999863697771702, - "loss": 46.0, - "step": 21747 - }, - { - "epoch": 1.6627864747596384, - "grad_norm": 0.0008521474082954228, - "learning_rate": 0.00019999863685230405, - "loss": 46.0, - "step": 21748 - }, - { - "epoch": 1.6628629317430281, - "grad_norm": 0.0005075836088508368, - "learning_rate": 0.00019999863672688533, - "loss": 46.0, - "step": 21749 - }, - { - "epoch": 1.6629393887264179, - "grad_norm": 0.0015887904446572065, - "learning_rate": 0.00019999863660146087, - "loss": 46.0, - "step": 21750 - }, - { - "epoch": 1.6630158457098076, - "grad_norm": 0.0013447246747091413, - "learning_rate": 0.00019999863647603063, - "loss": 46.0, - "step": 21751 - }, - { - "epoch": 1.6630923026931972, - "grad_norm": 0.0022178690414875746, - "learning_rate": 0.00019999863635059462, - "loss": 46.0, - "step": 21752 - }, - { - "epoch": 1.663168759676587, - "grad_norm": 0.001922041061334312, - "learning_rate": 0.00019999863622515283, - "loss": 46.0, - "step": 21753 - }, - { - "epoch": 1.6632452166599767, - "grad_norm": 0.0012107868678867817, - "learning_rate": 0.00019999863609970525, - "loss": 46.0, - "step": 21754 - }, - { - "epoch": 1.6633216736433662, - "grad_norm": 0.0007862246129661798, - "learning_rate": 0.00019999863597425192, - "loss": 46.0, - "step": 21755 - }, - { - "epoch": 1.663398130626756, - "grad_norm": 0.0012303428957238793, - "learning_rate": 0.00019999863584879284, - "loss": 46.0, - "step": 21756 - }, - { - "epoch": 1.6634745876101458, - "grad_norm": 0.0025524809025228024, - "learning_rate": 0.000199998635723328, - "loss": 46.0, - "step": 21757 - }, - { - "epoch": 1.6635510445935355, - "grad_norm": 0.0014661196619272232, - "learning_rate": 0.00019999863559785734, - "loss": 46.0, - "step": 21758 - }, - { - "epoch": 1.6636275015769253, - "grad_norm": 0.0012701055966317654, - "learning_rate": 0.00019999863547238094, - "loss": 46.0, - "step": 21759 - }, - { - "epoch": 1.663703958560315, - "grad_norm": 0.002395395655184984, - "learning_rate": 0.00019999863534689877, - "loss": 46.0, - "step": 21760 - }, - { - "epoch": 1.6637804155437048, - "grad_norm": 0.0024629172403365374, - "learning_rate": 0.00019999863522141082, - "loss": 46.0, - "step": 21761 - }, - { - "epoch": 1.6638568725270946, - "grad_norm": 0.0006907899514771998, - "learning_rate": 0.0001999986350959171, - "loss": 46.0, - "step": 21762 - }, - { - "epoch": 1.6639333295104841, - "grad_norm": 0.000729088787920773, - "learning_rate": 0.00019999863497041761, - "loss": 46.0, - "step": 21763 - }, - { - "epoch": 1.6640097864938739, - "grad_norm": 0.0012969098752364516, - "learning_rate": 0.00019999863484491238, - "loss": 46.0, - "step": 21764 - }, - { - "epoch": 1.6640862434772636, - "grad_norm": 0.0008993445662781596, - "learning_rate": 0.00019999863471940134, - "loss": 46.0, - "step": 21765 - }, - { - "epoch": 1.6641627004606532, - "grad_norm": 0.0010818386217579246, - "learning_rate": 0.00019999863459388458, - "loss": 46.0, - "step": 21766 - }, - { - "epoch": 1.664239157444043, - "grad_norm": 0.0005669799284078181, - "learning_rate": 0.00019999863446836202, - "loss": 46.0, - "step": 21767 - }, - { - "epoch": 1.6643156144274327, - "grad_norm": 0.001311693456955254, - "learning_rate": 0.0001999986343428337, - "loss": 46.0, - "step": 21768 - }, - { - "epoch": 1.6643920714108225, - "grad_norm": 0.0006725798011757433, - "learning_rate": 0.0001999986342172996, - "loss": 46.0, - "step": 21769 - }, - { - "epoch": 1.6644685283942122, - "grad_norm": 0.0023361369967460632, - "learning_rate": 0.0001999986340917597, - "loss": 46.0, - "step": 21770 - }, - { - "epoch": 1.664544985377602, - "grad_norm": 0.001291585504077375, - "learning_rate": 0.0001999986339662141, - "loss": 46.0, - "step": 21771 - }, - { - "epoch": 1.6646214423609917, - "grad_norm": 0.001266077859327197, - "learning_rate": 0.0001999986338406627, - "loss": 46.0, - "step": 21772 - }, - { - "epoch": 1.6646978993443815, - "grad_norm": 0.0005462805274873972, - "learning_rate": 0.0001999986337151055, - "loss": 46.0, - "step": 21773 - }, - { - "epoch": 1.664774356327771, - "grad_norm": 0.0016762202139943838, - "learning_rate": 0.00019999863358954257, - "loss": 46.0, - "step": 21774 - }, - { - "epoch": 1.6648508133111608, - "grad_norm": 0.0026368971448391676, - "learning_rate": 0.00019999863346397385, - "loss": 46.0, - "step": 21775 - }, - { - "epoch": 1.6649272702945506, - "grad_norm": 0.000918002100661397, - "learning_rate": 0.00019999863333839936, - "loss": 46.0, - "step": 21776 - }, - { - "epoch": 1.66500372727794, - "grad_norm": 0.0015913642710074782, - "learning_rate": 0.0001999986332128191, - "loss": 46.0, - "step": 21777 - }, - { - "epoch": 1.6650801842613299, - "grad_norm": 0.001786625711247325, - "learning_rate": 0.0001999986330872331, - "loss": 46.0, - "step": 21778 - }, - { - "epoch": 1.6651566412447196, - "grad_norm": 0.0013834276469424367, - "learning_rate": 0.00019999863296164133, - "loss": 46.0, - "step": 21779 - }, - { - "epoch": 1.6652330982281094, - "grad_norm": 0.003367658006027341, - "learning_rate": 0.00019999863283604375, - "loss": 46.0, - "step": 21780 - }, - { - "epoch": 1.6653095552114991, - "grad_norm": 0.0006114642601460218, - "learning_rate": 0.00019999863271044041, - "loss": 46.0, - "step": 21781 - }, - { - "epoch": 1.665386012194889, - "grad_norm": 0.0014529459876939654, - "learning_rate": 0.0001999986325848313, - "loss": 46.0, - "step": 21782 - }, - { - "epoch": 1.6654624691782787, - "grad_norm": 0.028930600732564926, - "learning_rate": 0.00019999863245921646, - "loss": 46.0, - "step": 21783 - }, - { - "epoch": 1.6655389261616684, - "grad_norm": 0.0012406535679474473, - "learning_rate": 0.0001999986323335958, - "loss": 46.0, - "step": 21784 - }, - { - "epoch": 1.665615383145058, - "grad_norm": 0.0006059122388251126, - "learning_rate": 0.00019999863220796938, - "loss": 46.0, - "step": 21785 - }, - { - "epoch": 1.6656918401284477, - "grad_norm": 0.000924452324397862, - "learning_rate": 0.00019999863208233721, - "loss": 46.0, - "step": 21786 - }, - { - "epoch": 1.6657682971118375, - "grad_norm": 0.000638169061858207, - "learning_rate": 0.00019999863195669927, - "loss": 46.0, - "step": 21787 - }, - { - "epoch": 1.665844754095227, - "grad_norm": 0.0006390910129994154, - "learning_rate": 0.00019999863183105555, - "loss": 46.0, - "step": 21788 - }, - { - "epoch": 1.6659212110786168, - "grad_norm": 0.0007224271539598703, - "learning_rate": 0.00019999863170540606, - "loss": 46.0, - "step": 21789 - }, - { - "epoch": 1.6659976680620066, - "grad_norm": 0.016078229993581772, - "learning_rate": 0.00019999863157975082, - "loss": 46.0, - "step": 21790 - }, - { - "epoch": 1.6660741250453963, - "grad_norm": 0.001869305968284607, - "learning_rate": 0.00019999863145408981, - "loss": 46.0, - "step": 21791 - }, - { - "epoch": 1.666150582028786, - "grad_norm": 0.001883415738120675, - "learning_rate": 0.000199998631328423, - "loss": 46.0, - "step": 21792 - }, - { - "epoch": 1.6662270390121758, - "grad_norm": 0.0008455296629108489, - "learning_rate": 0.00019999863120275047, - "loss": 46.0, - "step": 21793 - }, - { - "epoch": 1.6663034959955656, - "grad_norm": 0.001925777760334313, - "learning_rate": 0.00019999863107707212, - "loss": 46.0, - "step": 21794 - }, - { - "epoch": 1.6663799529789554, - "grad_norm": 0.0011103162541985512, - "learning_rate": 0.000199998630951388, - "loss": 46.0, - "step": 21795 - }, - { - "epoch": 1.666456409962345, - "grad_norm": 0.001968169817700982, - "learning_rate": 0.00019999863082569816, - "loss": 46.0, - "step": 21796 - }, - { - "epoch": 1.6665328669457347, - "grad_norm": 0.003113889368250966, - "learning_rate": 0.0001999986307000025, - "loss": 46.0, - "step": 21797 - }, - { - "epoch": 1.6666093239291244, - "grad_norm": 0.001178809441626072, - "learning_rate": 0.0001999986305743011, - "loss": 46.0, - "step": 21798 - }, - { - "epoch": 1.666685780912514, - "grad_norm": 0.0037485749926418066, - "learning_rate": 0.00019999863044859392, - "loss": 46.0, - "step": 21799 - }, - { - "epoch": 1.6667622378959037, - "grad_norm": 0.00220527988858521, - "learning_rate": 0.00019999863032288097, - "loss": 46.0, - "step": 21800 - }, - { - "epoch": 1.6668386948792935, - "grad_norm": 0.0013013853458687663, - "learning_rate": 0.00019999863019716228, - "loss": 46.0, - "step": 21801 - }, - { - "epoch": 1.6669151518626832, - "grad_norm": 0.0015391850611194968, - "learning_rate": 0.00019999863007143776, - "loss": 46.0, - "step": 21802 - }, - { - "epoch": 1.666991608846073, - "grad_norm": 0.008504524827003479, - "learning_rate": 0.0001999986299457075, - "loss": 46.0, - "step": 21803 - }, - { - "epoch": 1.6670680658294628, - "grad_norm": 0.0033453081268817186, - "learning_rate": 0.00019999862981997152, - "loss": 46.0, - "step": 21804 - }, - { - "epoch": 1.6671445228128525, - "grad_norm": 0.0007444752845913172, - "learning_rate": 0.00019999862969422968, - "loss": 46.0, - "step": 21805 - }, - { - "epoch": 1.6672209797962423, - "grad_norm": 0.004083487205207348, - "learning_rate": 0.00019999862956848215, - "loss": 46.0, - "step": 21806 - }, - { - "epoch": 1.6672974367796318, - "grad_norm": 0.00040972669376060367, - "learning_rate": 0.0001999986294427288, - "loss": 46.0, - "step": 21807 - }, - { - "epoch": 1.6673738937630216, - "grad_norm": 0.0005900819669477642, - "learning_rate": 0.0001999986293169697, - "loss": 46.0, - "step": 21808 - }, - { - "epoch": 1.6674503507464113, - "grad_norm": 0.0023416285403072834, - "learning_rate": 0.0001999986291912048, - "loss": 46.0, - "step": 21809 - }, - { - "epoch": 1.6675268077298009, - "grad_norm": 0.0019489447586238384, - "learning_rate": 0.0001999986290654342, - "loss": 46.0, - "step": 21810 - }, - { - "epoch": 1.6676032647131906, - "grad_norm": 0.002405709819868207, - "learning_rate": 0.00019999862893965776, - "loss": 46.0, - "step": 21811 - }, - { - "epoch": 1.6676797216965804, - "grad_norm": 0.0006889355136081576, - "learning_rate": 0.0001999986288138756, - "loss": 46.0, - "step": 21812 - }, - { - "epoch": 1.6677561786799702, - "grad_norm": 0.0017988828476518393, - "learning_rate": 0.00019999862868808762, - "loss": 46.0, - "step": 21813 - }, - { - "epoch": 1.66783263566336, - "grad_norm": 0.0013023015344515443, - "learning_rate": 0.00019999862856229393, - "loss": 46.0, - "step": 21814 - }, - { - "epoch": 1.6679090926467497, - "grad_norm": 0.001345370663329959, - "learning_rate": 0.00019999862843649444, - "loss": 46.0, - "step": 21815 - }, - { - "epoch": 1.6679855496301395, - "grad_norm": 0.0011189555516466498, - "learning_rate": 0.00019999862831068918, - "loss": 46.0, - "step": 21816 - }, - { - "epoch": 1.6680620066135292, - "grad_norm": 0.001372084952890873, - "learning_rate": 0.00019999862818487814, - "loss": 46.0, - "step": 21817 - }, - { - "epoch": 1.6681384635969188, - "grad_norm": 0.003140259301289916, - "learning_rate": 0.00019999862805906136, - "loss": 46.0, - "step": 21818 - }, - { - "epoch": 1.6682149205803085, - "grad_norm": 0.00125603296328336, - "learning_rate": 0.00019999862793323878, - "loss": 46.0, - "step": 21819 - }, - { - "epoch": 1.6682913775636983, - "grad_norm": 0.006496195215731859, - "learning_rate": 0.00019999862780741045, - "loss": 46.0, - "step": 21820 - }, - { - "epoch": 1.6683678345470878, - "grad_norm": 0.004713891074061394, - "learning_rate": 0.00019999862768157634, - "loss": 46.0, - "step": 21821 - }, - { - "epoch": 1.6684442915304776, - "grad_norm": 0.00048765799147076905, - "learning_rate": 0.0001999986275557365, - "loss": 46.0, - "step": 21822 - }, - { - "epoch": 1.6685207485138673, - "grad_norm": 0.0018604713259264827, - "learning_rate": 0.00019999862742989084, - "loss": 46.0, - "step": 21823 - }, - { - "epoch": 1.668597205497257, - "grad_norm": 0.00674502644687891, - "learning_rate": 0.00019999862730403942, - "loss": 46.0, - "step": 21824 - }, - { - "epoch": 1.6686736624806469, - "grad_norm": 0.0006744790589436889, - "learning_rate": 0.00019999862717818222, - "loss": 46.0, - "step": 21825 - }, - { - "epoch": 1.6687501194640366, - "grad_norm": 0.002573287347331643, - "learning_rate": 0.00019999862705231928, - "loss": 46.0, - "step": 21826 - }, - { - "epoch": 1.6688265764474264, - "grad_norm": 0.006537720561027527, - "learning_rate": 0.00019999862692645056, - "loss": 46.0, - "step": 21827 - }, - { - "epoch": 1.6689030334308161, - "grad_norm": 0.002559009939432144, - "learning_rate": 0.00019999862680057607, - "loss": 46.0, - "step": 21828 - }, - { - "epoch": 1.6689794904142057, - "grad_norm": 0.003728767391294241, - "learning_rate": 0.0001999986266746958, - "loss": 46.0, - "step": 21829 - }, - { - "epoch": 1.6690559473975954, - "grad_norm": 0.006121995393186808, - "learning_rate": 0.00019999862654880977, - "loss": 46.0, - "step": 21830 - }, - { - "epoch": 1.6691324043809852, - "grad_norm": 0.0006884766626171768, - "learning_rate": 0.000199998626422918, - "loss": 46.0, - "step": 21831 - }, - { - "epoch": 1.6692088613643747, - "grad_norm": 0.0010282910661771894, - "learning_rate": 0.0001999986262970204, - "loss": 46.0, - "step": 21832 - }, - { - "epoch": 1.6692853183477645, - "grad_norm": 0.002935074269771576, - "learning_rate": 0.00019999862617111708, - "loss": 46.0, - "step": 21833 - }, - { - "epoch": 1.6693617753311543, - "grad_norm": 0.0022740764543414116, - "learning_rate": 0.00019999862604520798, - "loss": 46.0, - "step": 21834 - }, - { - "epoch": 1.669438232314544, - "grad_norm": 0.0006617013132199645, - "learning_rate": 0.0001999986259192931, - "loss": 46.0, - "step": 21835 - }, - { - "epoch": 1.6695146892979338, - "grad_norm": 0.0005657104193232954, - "learning_rate": 0.00019999862579337245, - "loss": 46.0, - "step": 21836 - }, - { - "epoch": 1.6695911462813235, - "grad_norm": 0.0014485052088275552, - "learning_rate": 0.00019999862566744605, - "loss": 46.0, - "step": 21837 - }, - { - "epoch": 1.6696676032647133, - "grad_norm": 0.0004112808674108237, - "learning_rate": 0.00019999862554151388, - "loss": 46.0, - "step": 21838 - }, - { - "epoch": 1.669744060248103, - "grad_norm": 0.0008636778802610934, - "learning_rate": 0.00019999862541557591, - "loss": 46.0, - "step": 21839 - }, - { - "epoch": 1.6698205172314926, - "grad_norm": 0.0013015868607908487, - "learning_rate": 0.0001999986252896322, - "loss": 46.0, - "step": 21840 - }, - { - "epoch": 1.6698969742148824, - "grad_norm": 0.0005731317214667797, - "learning_rate": 0.00019999862516368268, - "loss": 46.0, - "step": 21841 - }, - { - "epoch": 1.669973431198272, - "grad_norm": 0.000936092168558389, - "learning_rate": 0.00019999862503772745, - "loss": 46.0, - "step": 21842 - }, - { - "epoch": 1.6700498881816617, - "grad_norm": 0.001647431286983192, - "learning_rate": 0.0001999986249117664, - "loss": 46.0, - "step": 21843 - }, - { - "epoch": 1.6701263451650514, - "grad_norm": 0.0013484007213264704, - "learning_rate": 0.0001999986247857996, - "loss": 46.0, - "step": 21844 - }, - { - "epoch": 1.6702028021484412, - "grad_norm": 0.003102868562564254, - "learning_rate": 0.00019999862465982702, - "loss": 46.0, - "step": 21845 - }, - { - "epoch": 1.670279259131831, - "grad_norm": 0.0012187802931293845, - "learning_rate": 0.0001999986245338487, - "loss": 46.0, - "step": 21846 - }, - { - "epoch": 1.6703557161152207, - "grad_norm": 0.0012246815022081137, - "learning_rate": 0.00019999862440786458, - "loss": 46.0, - "step": 21847 - }, - { - "epoch": 1.6704321730986105, - "grad_norm": 0.0004368004738353193, - "learning_rate": 0.0001999986242818747, - "loss": 46.0, - "step": 21848 - }, - { - "epoch": 1.6705086300820002, - "grad_norm": 0.0021131793037056923, - "learning_rate": 0.00019999862415587906, - "loss": 46.0, - "step": 21849 - }, - { - "epoch": 1.67058508706539, - "grad_norm": 0.0055949329398572445, - "learning_rate": 0.00019999862402987764, - "loss": 46.0, - "step": 21850 - }, - { - "epoch": 1.6706615440487795, - "grad_norm": 0.002044996013864875, - "learning_rate": 0.00019999862390387047, - "loss": 46.0, - "step": 21851 - }, - { - "epoch": 1.6707380010321693, - "grad_norm": 0.0003055713605135679, - "learning_rate": 0.0001999986237778575, - "loss": 46.0, - "step": 21852 - }, - { - "epoch": 1.6708144580155588, - "grad_norm": 0.0019037730526179075, - "learning_rate": 0.00019999862365183878, - "loss": 46.0, - "step": 21853 - }, - { - "epoch": 1.6708909149989486, - "grad_norm": 0.0004695192910730839, - "learning_rate": 0.0001999986235258143, - "loss": 46.0, - "step": 21854 - }, - { - "epoch": 1.6709673719823384, - "grad_norm": 0.0014583783922716975, - "learning_rate": 0.000199998623399784, - "loss": 46.0, - "step": 21855 - }, - { - "epoch": 1.6710438289657281, - "grad_norm": 0.0012723759282380342, - "learning_rate": 0.000199998623273748, - "loss": 46.0, - "step": 21856 - }, - { - "epoch": 1.6711202859491179, - "grad_norm": 0.00046787571045570076, - "learning_rate": 0.00019999862314770618, - "loss": 46.0, - "step": 21857 - }, - { - "epoch": 1.6711967429325076, - "grad_norm": 0.0005144478054717183, - "learning_rate": 0.00019999862302165863, - "loss": 46.0, - "step": 21858 - }, - { - "epoch": 1.6712731999158974, - "grad_norm": 0.0008232161053456366, - "learning_rate": 0.00019999862289560527, - "loss": 46.0, - "step": 21859 - }, - { - "epoch": 1.6713496568992872, - "grad_norm": 0.0023117554374039173, - "learning_rate": 0.00019999862276954614, - "loss": 46.0, - "step": 21860 - }, - { - "epoch": 1.671426113882677, - "grad_norm": 0.0007232401403598487, - "learning_rate": 0.0001999986226434813, - "loss": 46.0, - "step": 21861 - }, - { - "epoch": 1.6715025708660665, - "grad_norm": 0.0013719139387831092, - "learning_rate": 0.00019999862251741064, - "loss": 46.0, - "step": 21862 - }, - { - "epoch": 1.6715790278494562, - "grad_norm": 0.0010404265485703945, - "learning_rate": 0.00019999862239133422, - "loss": 46.0, - "step": 21863 - }, - { - "epoch": 1.6716554848328458, - "grad_norm": 0.0009005528991110623, - "learning_rate": 0.00019999862226525203, - "loss": 46.0, - "step": 21864 - }, - { - "epoch": 1.6717319418162355, - "grad_norm": 0.0008152939262799919, - "learning_rate": 0.00019999862213916408, - "loss": 46.0, - "step": 21865 - }, - { - "epoch": 1.6718083987996253, - "grad_norm": 0.0020366471726447344, - "learning_rate": 0.00019999862201307034, - "loss": 46.0, - "step": 21866 - }, - { - "epoch": 1.671884855783015, - "grad_norm": 0.0038110660389065742, - "learning_rate": 0.00019999862188697085, - "loss": 46.0, - "step": 21867 - }, - { - "epoch": 1.6719613127664048, - "grad_norm": 0.0011139587732031941, - "learning_rate": 0.0001999986217608656, - "loss": 46.0, - "step": 21868 - }, - { - "epoch": 1.6720377697497946, - "grad_norm": 0.001885499688796699, - "learning_rate": 0.00019999862163475455, - "loss": 46.0, - "step": 21869 - }, - { - "epoch": 1.6721142267331843, - "grad_norm": 0.0012409884948283434, - "learning_rate": 0.00019999862150863775, - "loss": 46.0, - "step": 21870 - }, - { - "epoch": 1.672190683716574, - "grad_norm": 0.0007964686374180019, - "learning_rate": 0.00019999862138251516, - "loss": 46.0, - "step": 21871 - }, - { - "epoch": 1.6722671406999636, - "grad_norm": 0.000891521864105016, - "learning_rate": 0.0001999986212563868, - "loss": 46.0, - "step": 21872 - }, - { - "epoch": 1.6723435976833534, - "grad_norm": 0.0012375818332657218, - "learning_rate": 0.0001999986211302527, - "loss": 46.0, - "step": 21873 - }, - { - "epoch": 1.6724200546667431, - "grad_norm": 0.000987158971838653, - "learning_rate": 0.00019999862100411283, - "loss": 46.0, - "step": 21874 - }, - { - "epoch": 1.6724965116501327, - "grad_norm": 0.0033816248178482056, - "learning_rate": 0.00019999862087796718, - "loss": 46.0, - "step": 21875 - }, - { - "epoch": 1.6725729686335224, - "grad_norm": 0.001214248826727271, - "learning_rate": 0.00019999862075181576, - "loss": 46.0, - "step": 21876 - }, - { - "epoch": 1.6726494256169122, - "grad_norm": 0.001549255452118814, - "learning_rate": 0.0001999986206256586, - "loss": 46.0, - "step": 21877 - }, - { - "epoch": 1.672725882600302, - "grad_norm": 0.0010053038131445646, - "learning_rate": 0.00019999862049949562, - "loss": 46.0, - "step": 21878 - }, - { - "epoch": 1.6728023395836917, - "grad_norm": 0.0024029631167650223, - "learning_rate": 0.00019999862037332688, - "loss": 46.0, - "step": 21879 - }, - { - "epoch": 1.6728787965670815, - "grad_norm": 0.0015450253849849105, - "learning_rate": 0.0001999986202471524, - "loss": 46.0, - "step": 21880 - }, - { - "epoch": 1.6729552535504713, - "grad_norm": 0.001079551293514669, - "learning_rate": 0.00019999862012097213, - "loss": 46.0, - "step": 21881 - }, - { - "epoch": 1.673031710533861, - "grad_norm": 0.002845742041245103, - "learning_rate": 0.0001999986199947861, - "loss": 46.0, - "step": 21882 - }, - { - "epoch": 1.6731081675172506, - "grad_norm": 0.0027554119005799294, - "learning_rate": 0.00019999861986859428, - "loss": 46.0, - "step": 21883 - }, - { - "epoch": 1.6731846245006403, - "grad_norm": 0.0008502207929268479, - "learning_rate": 0.0001999986197423967, - "loss": 46.0, - "step": 21884 - }, - { - "epoch": 1.67326108148403, - "grad_norm": 0.0010531614534556866, - "learning_rate": 0.00019999861961619337, - "loss": 46.0, - "step": 21885 - }, - { - "epoch": 1.6733375384674196, - "grad_norm": 0.0039682453498244286, - "learning_rate": 0.00019999861948998425, - "loss": 46.0, - "step": 21886 - }, - { - "epoch": 1.6734139954508094, - "grad_norm": 0.0023424874525517225, - "learning_rate": 0.00019999861936376937, - "loss": 46.0, - "step": 21887 - }, - { - "epoch": 1.6734904524341991, - "grad_norm": 0.0013596605276688933, - "learning_rate": 0.00019999861923754872, - "loss": 46.0, - "step": 21888 - }, - { - "epoch": 1.673566909417589, - "grad_norm": 0.019456440582871437, - "learning_rate": 0.0001999986191113223, - "loss": 46.0, - "step": 21889 - }, - { - "epoch": 1.6736433664009787, - "grad_norm": 0.0010776534909382463, - "learning_rate": 0.0001999986189850901, - "loss": 46.0, - "step": 21890 - }, - { - "epoch": 1.6737198233843684, - "grad_norm": 0.00044219361734576523, - "learning_rate": 0.00019999861885885216, - "loss": 46.0, - "step": 21891 - }, - { - "epoch": 1.6737962803677582, - "grad_norm": 0.0012572972336784005, - "learning_rate": 0.00019999861873260842, - "loss": 46.0, - "step": 21892 - }, - { - "epoch": 1.673872737351148, - "grad_norm": 0.001081867958419025, - "learning_rate": 0.0001999986186063589, - "loss": 46.0, - "step": 21893 - }, - { - "epoch": 1.6739491943345375, - "grad_norm": 0.0007973661413416266, - "learning_rate": 0.00019999861848010367, - "loss": 46.0, - "step": 21894 - }, - { - "epoch": 1.6740256513179272, - "grad_norm": 0.001230223453603685, - "learning_rate": 0.0001999986183538426, - "loss": 46.0, - "step": 21895 - }, - { - "epoch": 1.674102108301317, - "grad_norm": 0.0044013215228915215, - "learning_rate": 0.0001999986182275758, - "loss": 46.0, - "step": 21896 - }, - { - "epoch": 1.6741785652847065, - "grad_norm": 0.0005416969652287662, - "learning_rate": 0.00019999861810130322, - "loss": 46.0, - "step": 21897 - }, - { - "epoch": 1.6742550222680963, - "grad_norm": 0.0005605234182439744, - "learning_rate": 0.0001999986179750249, - "loss": 46.0, - "step": 21898 - }, - { - "epoch": 1.674331479251486, - "grad_norm": 0.006108017172664404, - "learning_rate": 0.00019999861784874076, - "loss": 46.0, - "step": 21899 - }, - { - "epoch": 1.6744079362348758, - "grad_norm": 0.0013146072160452604, - "learning_rate": 0.0001999986177224509, - "loss": 46.0, - "step": 21900 - }, - { - "epoch": 1.6744843932182656, - "grad_norm": 0.0007573894108645618, - "learning_rate": 0.00019999861759615524, - "loss": 46.0, - "step": 21901 - }, - { - "epoch": 1.6745608502016553, - "grad_norm": 0.0016217163065448403, - "learning_rate": 0.0001999986174698538, - "loss": 46.0, - "step": 21902 - }, - { - "epoch": 1.674637307185045, - "grad_norm": 0.0018594225402921438, - "learning_rate": 0.0001999986173435466, - "loss": 46.0, - "step": 21903 - }, - { - "epoch": 1.6747137641684349, - "grad_norm": 0.0007377619622275233, - "learning_rate": 0.00019999861721723363, - "loss": 46.0, - "step": 21904 - }, - { - "epoch": 1.6747902211518244, - "grad_norm": 0.000803124625235796, - "learning_rate": 0.00019999861709091489, - "loss": 46.0, - "step": 21905 - }, - { - "epoch": 1.6748666781352142, - "grad_norm": 0.0007471116841770709, - "learning_rate": 0.00019999861696459042, - "loss": 46.0, - "step": 21906 - }, - { - "epoch": 1.674943135118604, - "grad_norm": 0.001843982026912272, - "learning_rate": 0.00019999861683826014, - "loss": 46.0, - "step": 21907 - }, - { - "epoch": 1.6750195921019935, - "grad_norm": 0.0005107785691507161, - "learning_rate": 0.0001999986167119241, - "loss": 46.0, - "step": 21908 - }, - { - "epoch": 1.6750960490853832, - "grad_norm": 0.00047672796063125134, - "learning_rate": 0.00019999861658558227, - "loss": 46.0, - "step": 21909 - }, - { - "epoch": 1.675172506068773, - "grad_norm": 0.0011195311089977622, - "learning_rate": 0.0001999986164592347, - "loss": 46.0, - "step": 21910 - }, - { - "epoch": 1.6752489630521628, - "grad_norm": 0.0035118686500936747, - "learning_rate": 0.00019999861633288136, - "loss": 46.0, - "step": 21911 - }, - { - "epoch": 1.6753254200355525, - "grad_norm": 0.0008547678589820862, - "learning_rate": 0.00019999861620652226, - "loss": 46.0, - "step": 21912 - }, - { - "epoch": 1.6754018770189423, - "grad_norm": 0.001727780792862177, - "learning_rate": 0.00019999861608015735, - "loss": 46.0, - "step": 21913 - }, - { - "epoch": 1.675478334002332, - "grad_norm": 0.0006705062696710229, - "learning_rate": 0.0001999986159537867, - "loss": 46.0, - "step": 21914 - }, - { - "epoch": 1.6755547909857218, - "grad_norm": 0.0005745262023992836, - "learning_rate": 0.00019999861582741026, - "loss": 46.0, - "step": 21915 - }, - { - "epoch": 1.6756312479691113, - "grad_norm": 0.0036399506498128176, - "learning_rate": 0.00019999861570102806, - "loss": 46.0, - "step": 21916 - }, - { - "epoch": 1.675707704952501, - "grad_norm": 0.0003371548082213849, - "learning_rate": 0.0001999986155746401, - "loss": 46.0, - "step": 21917 - }, - { - "epoch": 1.6757841619358909, - "grad_norm": 0.001034217537380755, - "learning_rate": 0.00019999861544824638, - "loss": 46.0, - "step": 21918 - }, - { - "epoch": 1.6758606189192804, - "grad_norm": 0.0004881095082964748, - "learning_rate": 0.0001999986153218469, - "loss": 46.0, - "step": 21919 - }, - { - "epoch": 1.6759370759026702, - "grad_norm": 0.000510350102558732, - "learning_rate": 0.0001999986151954416, - "loss": 46.0, - "step": 21920 - }, - { - "epoch": 1.67601353288606, - "grad_norm": 0.0012808021856471896, - "learning_rate": 0.00019999861506903057, - "loss": 46.0, - "step": 21921 - }, - { - "epoch": 1.6760899898694497, - "grad_norm": 0.017940206453204155, - "learning_rate": 0.00019999861494261376, - "loss": 46.0, - "step": 21922 - }, - { - "epoch": 1.6761664468528394, - "grad_norm": 0.007467540446668863, - "learning_rate": 0.00019999861481619118, - "loss": 46.0, - "step": 21923 - }, - { - "epoch": 1.6762429038362292, - "grad_norm": 0.000876472913660109, - "learning_rate": 0.00019999861468976283, - "loss": 46.0, - "step": 21924 - }, - { - "epoch": 1.676319360819619, - "grad_norm": 0.000634226540569216, - "learning_rate": 0.0001999986145633287, - "loss": 46.0, - "step": 21925 - }, - { - "epoch": 1.6763958178030087, - "grad_norm": 0.0019975327886641026, - "learning_rate": 0.00019999861443688882, - "loss": 46.0, - "step": 21926 - }, - { - "epoch": 1.6764722747863983, - "grad_norm": 0.0007047424442134798, - "learning_rate": 0.00019999861431044315, - "loss": 46.0, - "step": 21927 - }, - { - "epoch": 1.676548731769788, - "grad_norm": 0.0020711985416710377, - "learning_rate": 0.00019999861418399173, - "loss": 46.0, - "step": 21928 - }, - { - "epoch": 1.6766251887531778, - "grad_norm": 0.00022046097728889436, - "learning_rate": 0.00019999861405753453, - "loss": 46.0, - "step": 21929 - }, - { - "epoch": 1.6767016457365673, - "grad_norm": 0.0004052590811625123, - "learning_rate": 0.00019999861393107157, - "loss": 46.0, - "step": 21930 - }, - { - "epoch": 1.676778102719957, - "grad_norm": 0.0007094019092619419, - "learning_rate": 0.00019999861380460283, - "loss": 46.0, - "step": 21931 - }, - { - "epoch": 1.6768545597033468, - "grad_norm": 0.0010485637467354536, - "learning_rate": 0.0001999986136781283, - "loss": 46.0, - "step": 21932 - }, - { - "epoch": 1.6769310166867366, - "grad_norm": 0.0050408439710736275, - "learning_rate": 0.00019999861355164805, - "loss": 46.0, - "step": 21933 - }, - { - "epoch": 1.6770074736701264, - "grad_norm": 0.0013136015040799975, - "learning_rate": 0.000199998613425162, - "loss": 46.0, - "step": 21934 - }, - { - "epoch": 1.6770839306535161, - "grad_norm": 0.0008111601346172392, - "learning_rate": 0.0001999986132986702, - "loss": 46.0, - "step": 21935 - }, - { - "epoch": 1.677160387636906, - "grad_norm": 0.002402213402092457, - "learning_rate": 0.0001999986131721726, - "loss": 46.0, - "step": 21936 - }, - { - "epoch": 1.6772368446202957, - "grad_norm": 0.0009191042045131326, - "learning_rate": 0.00019999861304566925, - "loss": 46.0, - "step": 21937 - }, - { - "epoch": 1.6773133016036852, - "grad_norm": 0.0007864030194468796, - "learning_rate": 0.00019999861291916015, - "loss": 46.0, - "step": 21938 - }, - { - "epoch": 1.677389758587075, - "grad_norm": 0.001963469898328185, - "learning_rate": 0.00019999861279264525, - "loss": 46.0, - "step": 21939 - }, - { - "epoch": 1.6774662155704647, - "grad_norm": 0.001459066872484982, - "learning_rate": 0.0001999986126661246, - "loss": 46.0, - "step": 21940 - }, - { - "epoch": 1.6775426725538543, - "grad_norm": 0.003184190485626459, - "learning_rate": 0.00019999861253959815, - "loss": 46.0, - "step": 21941 - }, - { - "epoch": 1.677619129537244, - "grad_norm": 0.0009452649974264205, - "learning_rate": 0.00019999861241306596, - "loss": 46.0, - "step": 21942 - }, - { - "epoch": 1.6776955865206338, - "grad_norm": 0.0012913054088130593, - "learning_rate": 0.000199998612286528, - "loss": 46.0, - "step": 21943 - }, - { - "epoch": 1.6777720435040235, - "grad_norm": 0.008002624846994877, - "learning_rate": 0.00019999861215998425, - "loss": 46.0, - "step": 21944 - }, - { - "epoch": 1.6778485004874133, - "grad_norm": 0.0013766029151156545, - "learning_rate": 0.00019999861203343476, - "loss": 46.0, - "step": 21945 - }, - { - "epoch": 1.677924957470803, - "grad_norm": 0.0022248669993132353, - "learning_rate": 0.00019999861190687947, - "loss": 46.0, - "step": 21946 - }, - { - "epoch": 1.6780014144541928, - "grad_norm": 0.000477284484077245, - "learning_rate": 0.0001999986117803184, - "loss": 46.0, - "step": 21947 - }, - { - "epoch": 1.6780778714375826, - "grad_norm": 0.00070261798100546, - "learning_rate": 0.0001999986116537516, - "loss": 46.0, - "step": 21948 - }, - { - "epoch": 1.6781543284209721, - "grad_norm": 0.0015849824994802475, - "learning_rate": 0.00019999861152717903, - "loss": 46.0, - "step": 21949 - }, - { - "epoch": 1.6782307854043619, - "grad_norm": 0.0003068811201956123, - "learning_rate": 0.00019999861140060067, - "loss": 46.0, - "step": 21950 - }, - { - "epoch": 1.6783072423877516, - "grad_norm": 0.001097612432204187, - "learning_rate": 0.00019999861127401654, - "loss": 46.0, - "step": 21951 - }, - { - "epoch": 1.6783836993711412, - "grad_norm": 0.00123211566824466, - "learning_rate": 0.00019999861114742667, - "loss": 46.0, - "step": 21952 - }, - { - "epoch": 1.678460156354531, - "grad_norm": 0.0005299783661030233, - "learning_rate": 0.000199998611020831, - "loss": 46.0, - "step": 21953 - }, - { - "epoch": 1.6785366133379207, - "grad_norm": 0.0003358338144607842, - "learning_rate": 0.00019999861089422955, - "loss": 46.0, - "step": 21954 - }, - { - "epoch": 1.6786130703213105, - "grad_norm": 0.0005786007968708873, - "learning_rate": 0.00019999861076762236, - "loss": 46.0, - "step": 21955 - }, - { - "epoch": 1.6786895273047002, - "grad_norm": 0.002668971661478281, - "learning_rate": 0.0001999986106410094, - "loss": 46.0, - "step": 21956 - }, - { - "epoch": 1.67876598428809, - "grad_norm": 0.0004163929261267185, - "learning_rate": 0.00019999861051439065, - "loss": 46.0, - "step": 21957 - }, - { - "epoch": 1.6788424412714797, - "grad_norm": 0.00147600460331887, - "learning_rate": 0.00019999861038776616, - "loss": 46.0, - "step": 21958 - }, - { - "epoch": 1.6789188982548695, - "grad_norm": 0.009283826686441898, - "learning_rate": 0.00019999861026113587, - "loss": 46.0, - "step": 21959 - }, - { - "epoch": 1.678995355238259, - "grad_norm": 0.0038927216082811356, - "learning_rate": 0.0001999986101344998, - "loss": 46.0, - "step": 21960 - }, - { - "epoch": 1.6790718122216488, - "grad_norm": 0.0035042238887399435, - "learning_rate": 0.000199998610007858, - "loss": 46.0, - "step": 21961 - }, - { - "epoch": 1.6791482692050386, - "grad_norm": 0.0008100098930299282, - "learning_rate": 0.00019999860988121043, - "loss": 46.0, - "step": 21962 - }, - { - "epoch": 1.679224726188428, - "grad_norm": 0.0034163829404860735, - "learning_rate": 0.00019999860975455707, - "loss": 46.0, - "step": 21963 - }, - { - "epoch": 1.6793011831718179, - "grad_norm": 0.0012321972753852606, - "learning_rate": 0.00019999860962789795, - "loss": 46.0, - "step": 21964 - }, - { - "epoch": 1.6793776401552076, - "grad_norm": 0.0032783031929284334, - "learning_rate": 0.00019999860950123305, - "loss": 46.0, - "step": 21965 - }, - { - "epoch": 1.6794540971385974, - "grad_norm": 0.005596384406089783, - "learning_rate": 0.00019999860937456237, - "loss": 46.0, - "step": 21966 - }, - { - "epoch": 1.6795305541219872, - "grad_norm": 0.0012995415600016713, - "learning_rate": 0.00019999860924788595, - "loss": 46.0, - "step": 21967 - }, - { - "epoch": 1.679607011105377, - "grad_norm": 0.001801166101358831, - "learning_rate": 0.00019999860912120373, - "loss": 46.0, - "step": 21968 - }, - { - "epoch": 1.6796834680887667, - "grad_norm": 0.007909854874014854, - "learning_rate": 0.00019999860899451576, - "loss": 46.0, - "step": 21969 - }, - { - "epoch": 1.6797599250721564, - "grad_norm": 0.0007382303010672331, - "learning_rate": 0.00019999860886782202, - "loss": 46.0, - "step": 21970 - }, - { - "epoch": 1.679836382055546, - "grad_norm": 0.003825595136731863, - "learning_rate": 0.00019999860874112254, - "loss": 46.0, - "step": 21971 - }, - { - "epoch": 1.6799128390389357, - "grad_norm": 0.0005994772654958069, - "learning_rate": 0.00019999860861441725, - "loss": 46.0, - "step": 21972 - }, - { - "epoch": 1.6799892960223253, - "grad_norm": 0.0005374700995162129, - "learning_rate": 0.00019999860848770616, - "loss": 46.0, - "step": 21973 - }, - { - "epoch": 1.680065753005715, - "grad_norm": 0.0009538496960885823, - "learning_rate": 0.00019999860836098936, - "loss": 46.0, - "step": 21974 - }, - { - "epoch": 1.6801422099891048, - "grad_norm": 0.002697878982871771, - "learning_rate": 0.00019999860823426678, - "loss": 46.0, - "step": 21975 - }, - { - "epoch": 1.6802186669724946, - "grad_norm": 0.002903758082538843, - "learning_rate": 0.0001999986081075384, - "loss": 46.0, - "step": 21976 - }, - { - "epoch": 1.6802951239558843, - "grad_norm": 0.00045755933388136327, - "learning_rate": 0.0001999986079808043, - "loss": 46.0, - "step": 21977 - }, - { - "epoch": 1.680371580939274, - "grad_norm": 0.0014379234053194523, - "learning_rate": 0.00019999860785406442, - "loss": 46.0, - "step": 21978 - }, - { - "epoch": 1.6804480379226638, - "grad_norm": 0.0012884469470009208, - "learning_rate": 0.00019999860772731872, - "loss": 46.0, - "step": 21979 - }, - { - "epoch": 1.6805244949060536, - "grad_norm": 0.0007561820675618947, - "learning_rate": 0.0001999986076005673, - "loss": 46.0, - "step": 21980 - }, - { - "epoch": 1.6806009518894434, - "grad_norm": 0.012626275420188904, - "learning_rate": 0.00019999860747381008, - "loss": 46.0, - "step": 21981 - }, - { - "epoch": 1.680677408872833, - "grad_norm": 0.0007733111851848662, - "learning_rate": 0.00019999860734704712, - "loss": 46.0, - "step": 21982 - }, - { - "epoch": 1.6807538658562227, - "grad_norm": 0.001948415650986135, - "learning_rate": 0.00019999860722027838, - "loss": 46.0, - "step": 21983 - }, - { - "epoch": 1.6808303228396122, - "grad_norm": 0.002721319906413555, - "learning_rate": 0.00019999860709350386, - "loss": 46.0, - "step": 21984 - }, - { - "epoch": 1.680906779823002, - "grad_norm": 0.0008817558991722763, - "learning_rate": 0.00019999860696672358, - "loss": 46.0, - "step": 21985 - }, - { - "epoch": 1.6809832368063917, - "grad_norm": 0.0022498629987239838, - "learning_rate": 0.00019999860683993754, - "loss": 46.0, - "step": 21986 - }, - { - "epoch": 1.6810596937897815, - "grad_norm": 0.0007718690321780741, - "learning_rate": 0.0001999986067131457, - "loss": 46.0, - "step": 21987 - }, - { - "epoch": 1.6811361507731712, - "grad_norm": 0.0011238427832722664, - "learning_rate": 0.00019999860658634813, - "loss": 46.0, - "step": 21988 - }, - { - "epoch": 1.681212607756561, - "grad_norm": 0.0008793903980404139, - "learning_rate": 0.00019999860645954475, - "loss": 46.0, - "step": 21989 - }, - { - "epoch": 1.6812890647399508, - "grad_norm": 0.0007311938679777086, - "learning_rate": 0.00019999860633273563, - "loss": 46.0, - "step": 21990 - }, - { - "epoch": 1.6813655217233405, - "grad_norm": 0.0015717670321464539, - "learning_rate": 0.00019999860620592075, - "loss": 46.0, - "step": 21991 - }, - { - "epoch": 1.6814419787067303, - "grad_norm": 0.0011497556697577238, - "learning_rate": 0.00019999860607910005, - "loss": 46.0, - "step": 21992 - }, - { - "epoch": 1.6815184356901198, - "grad_norm": 0.001142602995969355, - "learning_rate": 0.00019999860595227363, - "loss": 46.0, - "step": 21993 - }, - { - "epoch": 1.6815948926735096, - "grad_norm": 0.0018448163755238056, - "learning_rate": 0.00019999860582544144, - "loss": 46.0, - "step": 21994 - }, - { - "epoch": 1.6816713496568991, - "grad_norm": 0.0006297669024206698, - "learning_rate": 0.00019999860569860345, - "loss": 46.0, - "step": 21995 - }, - { - "epoch": 1.681747806640289, - "grad_norm": 0.0019050653791055083, - "learning_rate": 0.0001999986055717597, - "loss": 46.0, - "step": 21996 - }, - { - "epoch": 1.6818242636236787, - "grad_norm": 0.0074720801785588264, - "learning_rate": 0.0001999986054449102, - "loss": 46.0, - "step": 21997 - }, - { - "epoch": 1.6819007206070684, - "grad_norm": 0.0007136424537748098, - "learning_rate": 0.0001999986053180549, - "loss": 46.0, - "step": 21998 - }, - { - "epoch": 1.6819771775904582, - "grad_norm": 0.0007414708379656076, - "learning_rate": 0.00019999860519119388, - "loss": 46.0, - "step": 21999 - }, - { - "epoch": 1.682053634573848, - "grad_norm": 0.0008026451105251908, - "learning_rate": 0.00019999860506432704, - "loss": 46.0, - "step": 22000 - }, - { - "epoch": 1.6821300915572377, - "grad_norm": 0.003655873006209731, - "learning_rate": 0.00019999860493745444, - "loss": 46.0, - "step": 22001 - }, - { - "epoch": 1.6822065485406275, - "grad_norm": 0.0025371690280735493, - "learning_rate": 0.00019999860481057608, - "loss": 46.0, - "step": 22002 - }, - { - "epoch": 1.682283005524017, - "grad_norm": 0.0003799954429268837, - "learning_rate": 0.00019999860468369196, - "loss": 46.0, - "step": 22003 - }, - { - "epoch": 1.6823594625074068, - "grad_norm": 0.0007769875228404999, - "learning_rate": 0.00019999860455680203, - "loss": 46.0, - "step": 22004 - }, - { - "epoch": 1.6824359194907965, - "grad_norm": 0.0006201844080351293, - "learning_rate": 0.00019999860442990639, - "loss": 46.0, - "step": 22005 - }, - { - "epoch": 1.682512376474186, - "grad_norm": 0.0010021042544394732, - "learning_rate": 0.00019999860430300494, - "loss": 46.0, - "step": 22006 - }, - { - "epoch": 1.6825888334575758, - "grad_norm": 0.0004711336805485189, - "learning_rate": 0.00019999860417609772, - "loss": 46.0, - "step": 22007 - }, - { - "epoch": 1.6826652904409656, - "grad_norm": 0.0008106256718747318, - "learning_rate": 0.00019999860404918475, - "loss": 46.0, - "step": 22008 - }, - { - "epoch": 1.6827417474243553, - "grad_norm": 0.001056780805811286, - "learning_rate": 0.00019999860392226602, - "loss": 46.0, - "step": 22009 - }, - { - "epoch": 1.682818204407745, - "grad_norm": 0.0006996591109782457, - "learning_rate": 0.0001999986037953415, - "loss": 46.0, - "step": 22010 - }, - { - "epoch": 1.6828946613911349, - "grad_norm": 0.0012410633498802781, - "learning_rate": 0.00019999860366841122, - "loss": 46.0, - "step": 22011 - }, - { - "epoch": 1.6829711183745246, - "grad_norm": 0.0012923083268105984, - "learning_rate": 0.00019999860354147516, - "loss": 46.0, - "step": 22012 - }, - { - "epoch": 1.6830475753579144, - "grad_norm": 0.005224398337304592, - "learning_rate": 0.00019999860341453332, - "loss": 46.0, - "step": 22013 - }, - { - "epoch": 1.683124032341304, - "grad_norm": 0.002212190069258213, - "learning_rate": 0.00019999860328758575, - "loss": 46.0, - "step": 22014 - }, - { - "epoch": 1.6832004893246937, - "grad_norm": 0.0008398907375521958, - "learning_rate": 0.00019999860316063237, - "loss": 46.0, - "step": 22015 - }, - { - "epoch": 1.6832769463080834, - "grad_norm": 0.0017157564871013165, - "learning_rate": 0.00019999860303367324, - "loss": 46.0, - "step": 22016 - }, - { - "epoch": 1.683353403291473, - "grad_norm": 0.003142619039863348, - "learning_rate": 0.00019999860290670834, - "loss": 46.0, - "step": 22017 - }, - { - "epoch": 1.6834298602748627, - "grad_norm": 0.002326208632439375, - "learning_rate": 0.00019999860277973767, - "loss": 46.0, - "step": 22018 - }, - { - "epoch": 1.6835063172582525, - "grad_norm": 0.0015836707316339016, - "learning_rate": 0.00019999860265276122, - "loss": 46.0, - "step": 22019 - }, - { - "epoch": 1.6835827742416423, - "grad_norm": 0.003529695561155677, - "learning_rate": 0.000199998602525779, - "loss": 46.0, - "step": 22020 - }, - { - "epoch": 1.683659231225032, - "grad_norm": 0.00117284688167274, - "learning_rate": 0.000199998602398791, - "loss": 46.0, - "step": 22021 - }, - { - "epoch": 1.6837356882084218, - "grad_norm": 0.0024197855964303017, - "learning_rate": 0.00019999860227179727, - "loss": 46.0, - "step": 22022 - }, - { - "epoch": 1.6838121451918115, - "grad_norm": 0.0017137534450739622, - "learning_rate": 0.00019999860214479776, - "loss": 46.0, - "step": 22023 - }, - { - "epoch": 1.6838886021752013, - "grad_norm": 0.004155252128839493, - "learning_rate": 0.00019999860201779247, - "loss": 46.0, - "step": 22024 - }, - { - "epoch": 1.6839650591585908, - "grad_norm": 0.0011570446658879519, - "learning_rate": 0.00019999860189078144, - "loss": 46.0, - "step": 22025 - }, - { - "epoch": 1.6840415161419806, - "grad_norm": 0.0005555233801715076, - "learning_rate": 0.00019999860176376458, - "loss": 46.0, - "step": 22026 - }, - { - "epoch": 1.6841179731253704, - "grad_norm": 0.00028855595155619085, - "learning_rate": 0.000199998601636742, - "loss": 46.0, - "step": 22027 - }, - { - "epoch": 1.68419443010876, - "grad_norm": 0.0018521032761782408, - "learning_rate": 0.00019999860150971363, - "loss": 46.0, - "step": 22028 - }, - { - "epoch": 1.6842708870921497, - "grad_norm": 0.00781367439776659, - "learning_rate": 0.00019999860138267953, - "loss": 46.0, - "step": 22029 - }, - { - "epoch": 1.6843473440755394, - "grad_norm": 0.001226624008268118, - "learning_rate": 0.0001999986012556396, - "loss": 46.0, - "step": 22030 - }, - { - "epoch": 1.6844238010589292, - "grad_norm": 0.0008058040984906256, - "learning_rate": 0.00019999860112859393, - "loss": 46.0, - "step": 22031 - }, - { - "epoch": 1.684500258042319, - "grad_norm": 0.0002205067139584571, - "learning_rate": 0.00019999860100154248, - "loss": 46.0, - "step": 22032 - }, - { - "epoch": 1.6845767150257087, - "grad_norm": 0.0011257849400863051, - "learning_rate": 0.00019999860087448526, - "loss": 46.0, - "step": 22033 - }, - { - "epoch": 1.6846531720090985, - "grad_norm": 0.023186737671494484, - "learning_rate": 0.0001999986007474223, - "loss": 46.0, - "step": 22034 - }, - { - "epoch": 1.6847296289924882, - "grad_norm": 0.0032532988116145134, - "learning_rate": 0.00019999860062035353, - "loss": 46.0, - "step": 22035 - }, - { - "epoch": 1.6848060859758778, - "grad_norm": 0.0022807603236287832, - "learning_rate": 0.00019999860049327902, - "loss": 46.0, - "step": 22036 - }, - { - "epoch": 1.6848825429592675, - "grad_norm": 0.001121692592278123, - "learning_rate": 0.00019999860036619874, - "loss": 46.0, - "step": 22037 - }, - { - "epoch": 1.6849589999426573, - "grad_norm": 0.0007382062030956149, - "learning_rate": 0.00019999860023911268, - "loss": 46.0, - "step": 22038 - }, - { - "epoch": 1.6850354569260468, - "grad_norm": 0.0029226462356746197, - "learning_rate": 0.00019999860011202085, - "loss": 46.0, - "step": 22039 - }, - { - "epoch": 1.6851119139094366, - "grad_norm": 0.004737196955829859, - "learning_rate": 0.00019999859998492324, - "loss": 46.0, - "step": 22040 - }, - { - "epoch": 1.6851883708928264, - "grad_norm": 0.000391368375858292, - "learning_rate": 0.0001999985998578199, - "loss": 46.0, - "step": 22041 - }, - { - "epoch": 1.6852648278762161, - "grad_norm": 0.0011650958331301808, - "learning_rate": 0.00019999859973071074, - "loss": 46.0, - "step": 22042 - }, - { - "epoch": 1.6853412848596059, - "grad_norm": 0.00641118036583066, - "learning_rate": 0.00019999859960359584, - "loss": 46.0, - "step": 22043 - }, - { - "epoch": 1.6854177418429956, - "grad_norm": 0.00103643792681396, - "learning_rate": 0.00019999859947647517, - "loss": 46.0, - "step": 22044 - }, - { - "epoch": 1.6854941988263854, - "grad_norm": 0.00042371300514787436, - "learning_rate": 0.00019999859934934872, - "loss": 46.0, - "step": 22045 - }, - { - "epoch": 1.6855706558097752, - "grad_norm": 0.006192030850797892, - "learning_rate": 0.0001999985992222165, - "loss": 46.0, - "step": 22046 - }, - { - "epoch": 1.6856471127931647, - "grad_norm": 0.004003859590739012, - "learning_rate": 0.0001999985990950785, - "loss": 46.0, - "step": 22047 - }, - { - "epoch": 1.6857235697765545, - "grad_norm": 0.0009548881207592785, - "learning_rate": 0.00019999859896793474, - "loss": 46.0, - "step": 22048 - }, - { - "epoch": 1.6858000267599442, - "grad_norm": 0.0014837206108495593, - "learning_rate": 0.00019999859884078523, - "loss": 46.0, - "step": 22049 - }, - { - "epoch": 1.6858764837433338, - "grad_norm": 0.00445706257596612, - "learning_rate": 0.00019999859871362995, - "loss": 46.0, - "step": 22050 - }, - { - "epoch": 1.6859529407267235, - "grad_norm": 0.0015673998277634382, - "learning_rate": 0.0001999985985864689, - "loss": 46.0, - "step": 22051 - }, - { - "epoch": 1.6860293977101133, - "grad_norm": 0.0016404592897742987, - "learning_rate": 0.00019999859845930206, - "loss": 46.0, - "step": 22052 - }, - { - "epoch": 1.686105854693503, - "grad_norm": 0.00034314801450818777, - "learning_rate": 0.00019999859833212948, - "loss": 46.0, - "step": 22053 - }, - { - "epoch": 1.6861823116768928, - "grad_norm": 0.0005453019984997809, - "learning_rate": 0.0001999985982049511, - "loss": 46.0, - "step": 22054 - }, - { - "epoch": 1.6862587686602826, - "grad_norm": 0.001099586603231728, - "learning_rate": 0.00019999859807776695, - "loss": 46.0, - "step": 22055 - }, - { - "epoch": 1.6863352256436723, - "grad_norm": 0.0011352287838235497, - "learning_rate": 0.00019999859795057705, - "loss": 46.0, - "step": 22056 - }, - { - "epoch": 1.686411682627062, - "grad_norm": 0.0012024266179651022, - "learning_rate": 0.00019999859782338138, - "loss": 46.0, - "step": 22057 - }, - { - "epoch": 1.6864881396104516, - "grad_norm": 0.0006635950412601233, - "learning_rate": 0.00019999859769617994, - "loss": 46.0, - "step": 22058 - }, - { - "epoch": 1.6865645965938414, - "grad_norm": 0.0006319055682979524, - "learning_rate": 0.00019999859756897272, - "loss": 46.0, - "step": 22059 - }, - { - "epoch": 1.6866410535772312, - "grad_norm": 0.0017972607165575027, - "learning_rate": 0.00019999859744175973, - "loss": 46.0, - "step": 22060 - }, - { - "epoch": 1.6867175105606207, - "grad_norm": 0.005334995221346617, - "learning_rate": 0.000199998597314541, - "loss": 46.0, - "step": 22061 - }, - { - "epoch": 1.6867939675440105, - "grad_norm": 0.0014196924166753888, - "learning_rate": 0.00019999859718731645, - "loss": 46.0, - "step": 22062 - }, - { - "epoch": 1.6868704245274002, - "grad_norm": 0.0009492482058703899, - "learning_rate": 0.00019999859706008616, - "loss": 46.0, - "step": 22063 - }, - { - "epoch": 1.68694688151079, - "grad_norm": 0.0010091520380228758, - "learning_rate": 0.0001999985969328501, - "loss": 46.0, - "step": 22064 - }, - { - "epoch": 1.6870233384941797, - "grad_norm": 0.0013195306528359652, - "learning_rate": 0.00019999859680560827, - "loss": 46.0, - "step": 22065 - }, - { - "epoch": 1.6870997954775695, - "grad_norm": 0.0011403732933104038, - "learning_rate": 0.00019999859667836067, - "loss": 46.0, - "step": 22066 - }, - { - "epoch": 1.6871762524609593, - "grad_norm": 0.006182524375617504, - "learning_rate": 0.00019999859655110732, - "loss": 46.0, - "step": 22067 - }, - { - "epoch": 1.687252709444349, - "grad_norm": 0.0009352777851745486, - "learning_rate": 0.00019999859642384817, - "loss": 46.0, - "step": 22068 - }, - { - "epoch": 1.6873291664277386, - "grad_norm": 0.0008126057800836861, - "learning_rate": 0.00019999859629658327, - "loss": 46.0, - "step": 22069 - }, - { - "epoch": 1.6874056234111283, - "grad_norm": 0.002511648228392005, - "learning_rate": 0.00019999859616931257, - "loss": 46.0, - "step": 22070 - }, - { - "epoch": 1.687482080394518, - "grad_norm": 0.002141204196959734, - "learning_rate": 0.00019999859604203613, - "loss": 46.0, - "step": 22071 - }, - { - "epoch": 1.6875585373779076, - "grad_norm": 0.0004095903132110834, - "learning_rate": 0.00019999859591475388, - "loss": 46.0, - "step": 22072 - }, - { - "epoch": 1.6876349943612974, - "grad_norm": 0.0013014693977311254, - "learning_rate": 0.00019999859578746592, - "loss": 46.0, - "step": 22073 - }, - { - "epoch": 1.6877114513446871, - "grad_norm": 0.001256956602446735, - "learning_rate": 0.00019999859566017215, - "loss": 46.0, - "step": 22074 - }, - { - "epoch": 1.687787908328077, - "grad_norm": 0.0008801307994872332, - "learning_rate": 0.00019999859553287264, - "loss": 46.0, - "step": 22075 - }, - { - "epoch": 1.6878643653114667, - "grad_norm": 0.006607298273593187, - "learning_rate": 0.00019999859540556733, - "loss": 46.0, - "step": 22076 - }, - { - "epoch": 1.6879408222948564, - "grad_norm": 0.0016905938973650336, - "learning_rate": 0.00019999859527825627, - "loss": 46.0, - "step": 22077 - }, - { - "epoch": 1.6880172792782462, - "grad_norm": 0.0010199955431744456, - "learning_rate": 0.00019999859515093944, - "loss": 46.0, - "step": 22078 - }, - { - "epoch": 1.688093736261636, - "grad_norm": 0.0016149061266332865, - "learning_rate": 0.00019999859502361684, - "loss": 46.0, - "step": 22079 - }, - { - "epoch": 1.6881701932450255, - "grad_norm": 0.0013768686912953854, - "learning_rate": 0.00019999859489628846, - "loss": 46.0, - "step": 22080 - }, - { - "epoch": 1.6882466502284152, - "grad_norm": 0.002957918681204319, - "learning_rate": 0.00019999859476895433, - "loss": 46.0, - "step": 22081 - }, - { - "epoch": 1.688323107211805, - "grad_norm": 0.004262669011950493, - "learning_rate": 0.0001999985946416144, - "loss": 46.0, - "step": 22082 - }, - { - "epoch": 1.6883995641951945, - "grad_norm": 0.0011622327147051692, - "learning_rate": 0.00019999859451426874, - "loss": 46.0, - "step": 22083 - }, - { - "epoch": 1.6884760211785843, - "grad_norm": 0.0012830978957936168, - "learning_rate": 0.0001999985943869173, - "loss": 46.0, - "step": 22084 - }, - { - "epoch": 1.688552478161974, - "grad_norm": 0.0007717488333582878, - "learning_rate": 0.00019999859425956008, - "loss": 46.0, - "step": 22085 - }, - { - "epoch": 1.6886289351453638, - "grad_norm": 0.0010372751858085394, - "learning_rate": 0.00019999859413219709, - "loss": 46.0, - "step": 22086 - }, - { - "epoch": 1.6887053921287536, - "grad_norm": 0.0010127536952495575, - "learning_rate": 0.00019999859400482832, - "loss": 46.0, - "step": 22087 - }, - { - "epoch": 1.6887818491121434, - "grad_norm": 0.014839183539152145, - "learning_rate": 0.00019999859387745378, - "loss": 46.0, - "step": 22088 - }, - { - "epoch": 1.6888583060955331, - "grad_norm": 0.002051344607025385, - "learning_rate": 0.0001999985937500735, - "loss": 46.0, - "step": 22089 - }, - { - "epoch": 1.6889347630789229, - "grad_norm": 0.0018079476431012154, - "learning_rate": 0.00019999859362268742, - "loss": 46.0, - "step": 22090 - }, - { - "epoch": 1.6890112200623124, - "grad_norm": 0.0011286528315395117, - "learning_rate": 0.00019999859349529558, - "loss": 46.0, - "step": 22091 - }, - { - "epoch": 1.6890876770457022, - "grad_norm": 0.0006381383864209056, - "learning_rate": 0.00019999859336789798, - "loss": 46.0, - "step": 22092 - }, - { - "epoch": 1.689164134029092, - "grad_norm": 0.0007902693469077349, - "learning_rate": 0.0001999985932404946, - "loss": 46.0, - "step": 22093 - }, - { - "epoch": 1.6892405910124815, - "grad_norm": 0.0005956924869678915, - "learning_rate": 0.00019999859311308548, - "loss": 46.0, - "step": 22094 - }, - { - "epoch": 1.6893170479958712, - "grad_norm": 0.00042516615940257907, - "learning_rate": 0.00019999859298567055, - "loss": 46.0, - "step": 22095 - }, - { - "epoch": 1.689393504979261, - "grad_norm": 0.004786976613104343, - "learning_rate": 0.00019999859285824986, - "loss": 46.0, - "step": 22096 - }, - { - "epoch": 1.6894699619626508, - "grad_norm": 0.0013289989437907934, - "learning_rate": 0.0001999985927308234, - "loss": 46.0, - "step": 22097 - }, - { - "epoch": 1.6895464189460405, - "grad_norm": 0.0011059901444241405, - "learning_rate": 0.0001999985926033912, - "loss": 46.0, - "step": 22098 - }, - { - "epoch": 1.6896228759294303, - "grad_norm": 0.0006552828708663583, - "learning_rate": 0.00019999859247595323, - "loss": 46.0, - "step": 22099 - }, - { - "epoch": 1.68969933291282, - "grad_norm": 0.0008212397224269807, - "learning_rate": 0.00019999859234850944, - "loss": 46.0, - "step": 22100 - }, - { - "epoch": 1.6897757898962098, - "grad_norm": 0.003853896399959922, - "learning_rate": 0.00019999859222105993, - "loss": 46.0, - "step": 22101 - }, - { - "epoch": 1.6898522468795993, - "grad_norm": 0.0007604114944115281, - "learning_rate": 0.00019999859209360462, - "loss": 46.0, - "step": 22102 - }, - { - "epoch": 1.689928703862989, - "grad_norm": 0.0011506242444738746, - "learning_rate": 0.00019999859196614356, - "loss": 46.0, - "step": 22103 - }, - { - "epoch": 1.6900051608463786, - "grad_norm": 0.000643672130536288, - "learning_rate": 0.00019999859183867673, - "loss": 46.0, - "step": 22104 - }, - { - "epoch": 1.6900816178297684, - "grad_norm": 0.001015696907415986, - "learning_rate": 0.00019999859171120413, - "loss": 46.0, - "step": 22105 - }, - { - "epoch": 1.6901580748131582, - "grad_norm": 0.0010475862072780728, - "learning_rate": 0.00019999859158372572, - "loss": 46.0, - "step": 22106 - }, - { - "epoch": 1.690234531796548, - "grad_norm": 0.003309564897790551, - "learning_rate": 0.00019999859145624157, - "loss": 46.0, - "step": 22107 - }, - { - "epoch": 1.6903109887799377, - "grad_norm": 0.0010428582318127155, - "learning_rate": 0.00019999859132875168, - "loss": 46.0, - "step": 22108 - }, - { - "epoch": 1.6903874457633274, - "grad_norm": 0.0013270392082631588, - "learning_rate": 0.00019999859120125598, - "loss": 46.0, - "step": 22109 - }, - { - "epoch": 1.6904639027467172, - "grad_norm": 0.0012736058561131358, - "learning_rate": 0.00019999859107375454, - "loss": 46.0, - "step": 22110 - }, - { - "epoch": 1.690540359730107, - "grad_norm": 0.0010910237906500697, - "learning_rate": 0.00019999859094624732, - "loss": 46.0, - "step": 22111 - }, - { - "epoch": 1.6906168167134967, - "grad_norm": 0.00038344794302247465, - "learning_rate": 0.00019999859081873433, - "loss": 46.0, - "step": 22112 - }, - { - "epoch": 1.6906932736968863, - "grad_norm": 0.0010292945662513375, - "learning_rate": 0.00019999859069121556, - "loss": 46.0, - "step": 22113 - }, - { - "epoch": 1.690769730680276, - "grad_norm": 0.0012062484165653586, - "learning_rate": 0.00019999859056369103, - "loss": 46.0, - "step": 22114 - }, - { - "epoch": 1.6908461876636656, - "grad_norm": 0.0010345166083425283, - "learning_rate": 0.00019999859043616072, - "loss": 46.0, - "step": 22115 - }, - { - "epoch": 1.6909226446470553, - "grad_norm": 0.0007773747202008963, - "learning_rate": 0.00019999859030862466, - "loss": 46.0, - "step": 22116 - }, - { - "epoch": 1.690999101630445, - "grad_norm": 0.0009605976520106196, - "learning_rate": 0.00019999859018108283, - "loss": 46.0, - "step": 22117 - }, - { - "epoch": 1.6910755586138349, - "grad_norm": 0.002996504306793213, - "learning_rate": 0.00019999859005353523, - "loss": 46.0, - "step": 22118 - }, - { - "epoch": 1.6911520155972246, - "grad_norm": 0.002304126275703311, - "learning_rate": 0.00019999858992598185, - "loss": 46.0, - "step": 22119 - }, - { - "epoch": 1.6912284725806144, - "grad_norm": 0.0008009443408809602, - "learning_rate": 0.00019999858979842267, - "loss": 46.0, - "step": 22120 - }, - { - "epoch": 1.6913049295640041, - "grad_norm": 0.006163548678159714, - "learning_rate": 0.00019999858967085777, - "loss": 46.0, - "step": 22121 - }, - { - "epoch": 1.691381386547394, - "grad_norm": 0.0012128724483773112, - "learning_rate": 0.00019999858954328708, - "loss": 46.0, - "step": 22122 - }, - { - "epoch": 1.6914578435307837, - "grad_norm": 0.0010021511698141694, - "learning_rate": 0.00019999858941571063, - "loss": 46.0, - "step": 22123 - }, - { - "epoch": 1.6915343005141732, - "grad_norm": 0.0012031501391902566, - "learning_rate": 0.0001999985892881284, - "loss": 46.0, - "step": 22124 - }, - { - "epoch": 1.691610757497563, - "grad_norm": 0.003160058753564954, - "learning_rate": 0.00019999858916054043, - "loss": 46.0, - "step": 22125 - }, - { - "epoch": 1.6916872144809525, - "grad_norm": 0.0007245881133712828, - "learning_rate": 0.00019999858903294664, - "loss": 46.0, - "step": 22126 - }, - { - "epoch": 1.6917636714643423, - "grad_norm": 0.0022353974636644125, - "learning_rate": 0.0001999985889053471, - "loss": 46.0, - "step": 22127 - }, - { - "epoch": 1.691840128447732, - "grad_norm": 0.001276383874937892, - "learning_rate": 0.0001999985887777418, - "loss": 46.0, - "step": 22128 - }, - { - "epoch": 1.6919165854311218, - "grad_norm": 0.0005574924871325493, - "learning_rate": 0.00019999858865013076, - "loss": 46.0, - "step": 22129 - }, - { - "epoch": 1.6919930424145115, - "grad_norm": 0.0007169734453782439, - "learning_rate": 0.0001999985885225139, - "loss": 46.0, - "step": 22130 - }, - { - "epoch": 1.6920694993979013, - "grad_norm": 0.003125410992652178, - "learning_rate": 0.0001999985883948913, - "loss": 46.0, - "step": 22131 - }, - { - "epoch": 1.692145956381291, - "grad_norm": 0.001720004016533494, - "learning_rate": 0.0001999985882672629, - "loss": 46.0, - "step": 22132 - }, - { - "epoch": 1.6922224133646808, - "grad_norm": 0.002073258627206087, - "learning_rate": 0.00019999858813962877, - "loss": 46.0, - "step": 22133 - }, - { - "epoch": 1.6922988703480706, - "grad_norm": 0.0008305757655762136, - "learning_rate": 0.00019999858801198882, - "loss": 46.0, - "step": 22134 - }, - { - "epoch": 1.6923753273314601, - "grad_norm": 0.000664488528855145, - "learning_rate": 0.00019999858788434315, - "loss": 46.0, - "step": 22135 - }, - { - "epoch": 1.6924517843148499, - "grad_norm": 0.0013135633198544383, - "learning_rate": 0.0001999985877566917, - "loss": 46.0, - "step": 22136 - }, - { - "epoch": 1.6925282412982394, - "grad_norm": 0.0017636786215007305, - "learning_rate": 0.00019999858762903447, - "loss": 46.0, - "step": 22137 - }, - { - "epoch": 1.6926046982816292, - "grad_norm": 0.0005673085106536746, - "learning_rate": 0.00019999858750137145, - "loss": 46.0, - "step": 22138 - }, - { - "epoch": 1.692681155265019, - "grad_norm": 0.0012310820166021585, - "learning_rate": 0.0001999985873737027, - "loss": 46.0, - "step": 22139 - }, - { - "epoch": 1.6927576122484087, - "grad_norm": 0.0011759101180359721, - "learning_rate": 0.00019999858724602818, - "loss": 46.0, - "step": 22140 - }, - { - "epoch": 1.6928340692317985, - "grad_norm": 0.000560302403755486, - "learning_rate": 0.00019999858711834787, - "loss": 46.0, - "step": 22141 - }, - { - "epoch": 1.6929105262151882, - "grad_norm": 0.0007434495491907, - "learning_rate": 0.0001999985869906618, - "loss": 46.0, - "step": 22142 - }, - { - "epoch": 1.692986983198578, - "grad_norm": 0.0011841882951557636, - "learning_rate": 0.00019999858686296995, - "loss": 46.0, - "step": 22143 - }, - { - "epoch": 1.6930634401819677, - "grad_norm": 0.00042658360325731337, - "learning_rate": 0.00019999858673527235, - "loss": 46.0, - "step": 22144 - }, - { - "epoch": 1.6931398971653573, - "grad_norm": 0.0007580728852190077, - "learning_rate": 0.00019999858660756895, - "loss": 46.0, - "step": 22145 - }, - { - "epoch": 1.693216354148747, - "grad_norm": 0.0013402904151007533, - "learning_rate": 0.0001999985864798598, - "loss": 46.0, - "step": 22146 - }, - { - "epoch": 1.6932928111321368, - "grad_norm": 0.000532217905856669, - "learning_rate": 0.00019999858635214488, - "loss": 46.0, - "step": 22147 - }, - { - "epoch": 1.6933692681155263, - "grad_norm": 0.0008148018969222903, - "learning_rate": 0.0001999985862244242, - "loss": 46.0, - "step": 22148 - }, - { - "epoch": 1.693445725098916, - "grad_norm": 0.0010160935344174504, - "learning_rate": 0.00019999858609669774, - "loss": 46.0, - "step": 22149 - }, - { - "epoch": 1.6935221820823059, - "grad_norm": 0.0063598970882594585, - "learning_rate": 0.00019999858596896552, - "loss": 46.0, - "step": 22150 - }, - { - "epoch": 1.6935986390656956, - "grad_norm": 0.000679582473821938, - "learning_rate": 0.00019999858584122753, - "loss": 46.0, - "step": 22151 - }, - { - "epoch": 1.6936750960490854, - "grad_norm": 0.003162949113175273, - "learning_rate": 0.00019999858571348377, - "loss": 46.0, - "step": 22152 - }, - { - "epoch": 1.6937515530324752, - "grad_norm": 0.001294243847951293, - "learning_rate": 0.0001999985855857342, - "loss": 46.0, - "step": 22153 - }, - { - "epoch": 1.693828010015865, - "grad_norm": 0.0006573147838935256, - "learning_rate": 0.0001999985854579789, - "loss": 46.0, - "step": 22154 - }, - { - "epoch": 1.6939044669992547, - "grad_norm": 0.0013010881375521421, - "learning_rate": 0.00019999858533021784, - "loss": 46.0, - "step": 22155 - }, - { - "epoch": 1.6939809239826442, - "grad_norm": 0.0011456274660304189, - "learning_rate": 0.00019999858520245098, - "loss": 46.0, - "step": 22156 - }, - { - "epoch": 1.694057380966034, - "grad_norm": 0.002017498714849353, - "learning_rate": 0.00019999858507467838, - "loss": 46.0, - "step": 22157 - }, - { - "epoch": 1.6941338379494237, - "grad_norm": 0.001456520170904696, - "learning_rate": 0.00019999858494689998, - "loss": 46.0, - "step": 22158 - }, - { - "epoch": 1.6942102949328133, - "grad_norm": 0.0016963450470939279, - "learning_rate": 0.00019999858481911586, - "loss": 46.0, - "step": 22159 - }, - { - "epoch": 1.694286751916203, - "grad_norm": 0.0007291499641723931, - "learning_rate": 0.00019999858469132593, - "loss": 46.0, - "step": 22160 - }, - { - "epoch": 1.6943632088995928, - "grad_norm": 0.003190933493897319, - "learning_rate": 0.0001999985845635302, - "loss": 46.0, - "step": 22161 - }, - { - "epoch": 1.6944396658829826, - "grad_norm": 0.002851868513971567, - "learning_rate": 0.00019999858443572877, - "loss": 46.0, - "step": 22162 - }, - { - "epoch": 1.6945161228663723, - "grad_norm": 0.0009559214813634753, - "learning_rate": 0.00019999858430792152, - "loss": 46.0, - "step": 22163 - }, - { - "epoch": 1.694592579849762, - "grad_norm": 0.0032951198518276215, - "learning_rate": 0.00019999858418010853, - "loss": 46.0, - "step": 22164 - }, - { - "epoch": 1.6946690368331518, - "grad_norm": 0.0005475416546687484, - "learning_rate": 0.00019999858405228977, - "loss": 46.0, - "step": 22165 - }, - { - "epoch": 1.6947454938165416, - "grad_norm": 0.00342411152087152, - "learning_rate": 0.0001999985839244652, - "loss": 46.0, - "step": 22166 - }, - { - "epoch": 1.6948219507999311, - "grad_norm": 0.0008787123369984329, - "learning_rate": 0.00019999858379663493, - "loss": 46.0, - "step": 22167 - }, - { - "epoch": 1.694898407783321, - "grad_norm": 0.0025145593099296093, - "learning_rate": 0.00019999858366879884, - "loss": 46.0, - "step": 22168 - }, - { - "epoch": 1.6949748647667107, - "grad_norm": 0.0024322818499058485, - "learning_rate": 0.000199998583540957, - "loss": 46.0, - "step": 22169 - }, - { - "epoch": 1.6950513217501002, - "grad_norm": 0.0006192693253979087, - "learning_rate": 0.00019999858341310939, - "loss": 46.0, - "step": 22170 - }, - { - "epoch": 1.69512777873349, - "grad_norm": 0.0007469177944585681, - "learning_rate": 0.000199998583285256, - "loss": 46.0, - "step": 22171 - }, - { - "epoch": 1.6952042357168797, - "grad_norm": 0.0005055687506683171, - "learning_rate": 0.00019999858315739683, - "loss": 46.0, - "step": 22172 - }, - { - "epoch": 1.6952806927002695, - "grad_norm": 0.0008832396124489605, - "learning_rate": 0.0001999985830295319, - "loss": 46.0, - "step": 22173 - }, - { - "epoch": 1.6953571496836592, - "grad_norm": 0.0012023644521832466, - "learning_rate": 0.00019999858290166122, - "loss": 46.0, - "step": 22174 - }, - { - "epoch": 1.695433606667049, - "grad_norm": 0.0012463612947613, - "learning_rate": 0.00019999858277378475, - "loss": 46.0, - "step": 22175 - }, - { - "epoch": 1.6955100636504388, - "grad_norm": 0.0054426779970526695, - "learning_rate": 0.00019999858264590253, - "loss": 46.0, - "step": 22176 - }, - { - "epoch": 1.6955865206338285, - "grad_norm": 0.000678927346598357, - "learning_rate": 0.00019999858251801454, - "loss": 46.0, - "step": 22177 - }, - { - "epoch": 1.695662977617218, - "grad_norm": 0.0011926769511774182, - "learning_rate": 0.00019999858239012075, - "loss": 46.0, - "step": 22178 - }, - { - "epoch": 1.6957394346006078, - "grad_norm": 0.0005148768541403115, - "learning_rate": 0.00019999858226222122, - "loss": 46.0, - "step": 22179 - }, - { - "epoch": 1.6958158915839976, - "grad_norm": 0.0008881281246431172, - "learning_rate": 0.0001999985821343159, - "loss": 46.0, - "step": 22180 - }, - { - "epoch": 1.6958923485673871, - "grad_norm": 0.0043052202090620995, - "learning_rate": 0.00019999858200640483, - "loss": 46.0, - "step": 22181 - }, - { - "epoch": 1.695968805550777, - "grad_norm": 0.0009644617093726993, - "learning_rate": 0.00019999858187848797, - "loss": 46.0, - "step": 22182 - }, - { - "epoch": 1.6960452625341667, - "grad_norm": 0.00042988162022084, - "learning_rate": 0.00019999858175056537, - "loss": 46.0, - "step": 22183 - }, - { - "epoch": 1.6961217195175564, - "grad_norm": 0.0006817795219831169, - "learning_rate": 0.000199998581622637, - "loss": 46.0, - "step": 22184 - }, - { - "epoch": 1.6961981765009462, - "grad_norm": 0.0010689935879781842, - "learning_rate": 0.00019999858149470282, - "loss": 46.0, - "step": 22185 - }, - { - "epoch": 1.696274633484336, - "grad_norm": 0.0011991215869784355, - "learning_rate": 0.0001999985813667629, - "loss": 46.0, - "step": 22186 - }, - { - "epoch": 1.6963510904677257, - "grad_norm": 0.0006758029339835048, - "learning_rate": 0.0001999985812388172, - "loss": 46.0, - "step": 22187 - }, - { - "epoch": 1.6964275474511155, - "grad_norm": 0.0007255064556375146, - "learning_rate": 0.00019999858111086573, - "loss": 46.0, - "step": 22188 - }, - { - "epoch": 1.696504004434505, - "grad_norm": 0.004217163659632206, - "learning_rate": 0.0001999985809829085, - "loss": 46.0, - "step": 22189 - }, - { - "epoch": 1.6965804614178948, - "grad_norm": 0.0005545195308513939, - "learning_rate": 0.0001999985808549455, - "loss": 46.0, - "step": 22190 - }, - { - "epoch": 1.6966569184012845, - "grad_norm": 0.0020643165335059166, - "learning_rate": 0.00019999858072697674, - "loss": 46.0, - "step": 22191 - }, - { - "epoch": 1.696733375384674, - "grad_norm": 0.0017001588130369782, - "learning_rate": 0.00019999858059900215, - "loss": 46.0, - "step": 22192 - }, - { - "epoch": 1.6968098323680638, - "grad_norm": 0.0016963101224973798, - "learning_rate": 0.00019999858047102187, - "loss": 46.0, - "step": 22193 - }, - { - "epoch": 1.6968862893514536, - "grad_norm": 0.0013595379423350096, - "learning_rate": 0.00019999858034303579, - "loss": 46.0, - "step": 22194 - }, - { - "epoch": 1.6969627463348433, - "grad_norm": 0.004765457473695278, - "learning_rate": 0.00019999858021504393, - "loss": 46.0, - "step": 22195 - }, - { - "epoch": 1.697039203318233, - "grad_norm": 0.004340450279414654, - "learning_rate": 0.0001999985800870463, - "loss": 46.0, - "step": 22196 - }, - { - "epoch": 1.6971156603016229, - "grad_norm": 0.0005983118899166584, - "learning_rate": 0.00019999857995904293, - "loss": 46.0, - "step": 22197 - }, - { - "epoch": 1.6971921172850126, - "grad_norm": 0.0011501519475132227, - "learning_rate": 0.00019999857983103375, - "loss": 46.0, - "step": 22198 - }, - { - "epoch": 1.6972685742684024, - "grad_norm": 0.004356314428150654, - "learning_rate": 0.00019999857970301883, - "loss": 46.0, - "step": 22199 - }, - { - "epoch": 1.697345031251792, - "grad_norm": 0.001399780623614788, - "learning_rate": 0.00019999857957499813, - "loss": 46.0, - "step": 22200 - }, - { - "epoch": 1.6974214882351817, - "grad_norm": 0.0131297642365098, - "learning_rate": 0.0001999985794469717, - "loss": 46.0, - "step": 22201 - }, - { - "epoch": 1.6974979452185714, - "grad_norm": 0.0019582435488700867, - "learning_rate": 0.00019999857931893945, - "loss": 46.0, - "step": 22202 - }, - { - "epoch": 1.697574402201961, - "grad_norm": 0.0003096163272857666, - "learning_rate": 0.00019999857919090144, - "loss": 46.0, - "step": 22203 - }, - { - "epoch": 1.6976508591853507, - "grad_norm": 0.0008582757436670363, - "learning_rate": 0.00019999857906285767, - "loss": 46.0, - "step": 22204 - }, - { - "epoch": 1.6977273161687405, - "grad_norm": 0.0016848635859787464, - "learning_rate": 0.0001999985789348081, - "loss": 46.0, - "step": 22205 - }, - { - "epoch": 1.6978037731521303, - "grad_norm": 0.0004925362882204354, - "learning_rate": 0.0001999985788067528, - "loss": 46.0, - "step": 22206 - }, - { - "epoch": 1.69788023013552, - "grad_norm": 0.0012169420951977372, - "learning_rate": 0.00019999857867869172, - "loss": 46.0, - "step": 22207 - }, - { - "epoch": 1.6979566871189098, - "grad_norm": 0.0005178020219318569, - "learning_rate": 0.00019999857855062487, - "loss": 46.0, - "step": 22208 - }, - { - "epoch": 1.6980331441022996, - "grad_norm": 0.0008111104252748191, - "learning_rate": 0.00019999857842255227, - "loss": 46.0, - "step": 22209 - }, - { - "epoch": 1.6981096010856893, - "grad_norm": 0.002278172643855214, - "learning_rate": 0.00019999857829447386, - "loss": 46.0, - "step": 22210 - }, - { - "epoch": 1.6981860580690789, - "grad_norm": 0.004765786230564117, - "learning_rate": 0.0001999985781663897, - "loss": 46.0, - "step": 22211 - }, - { - "epoch": 1.6982625150524686, - "grad_norm": 0.0017840036889538169, - "learning_rate": 0.00019999857803829977, - "loss": 46.0, - "step": 22212 - }, - { - "epoch": 1.6983389720358584, - "grad_norm": 0.0006655256147496402, - "learning_rate": 0.00019999857791020407, - "loss": 46.0, - "step": 22213 - }, - { - "epoch": 1.698415429019248, - "grad_norm": 0.000860373314935714, - "learning_rate": 0.0001999985777821026, - "loss": 46.0, - "step": 22214 - }, - { - "epoch": 1.6984918860026377, - "grad_norm": 0.0009574061841703951, - "learning_rate": 0.00019999857765399536, - "loss": 46.0, - "step": 22215 - }, - { - "epoch": 1.6985683429860274, - "grad_norm": 0.0011977136600762606, - "learning_rate": 0.00019999857752588235, - "loss": 46.0, - "step": 22216 - }, - { - "epoch": 1.6986447999694172, - "grad_norm": 0.0012669811258092523, - "learning_rate": 0.00019999857739776359, - "loss": 46.0, - "step": 22217 - }, - { - "epoch": 1.698721256952807, - "grad_norm": 0.00323835713788867, - "learning_rate": 0.00019999857726963902, - "loss": 46.0, - "step": 22218 - }, - { - "epoch": 1.6987977139361967, - "grad_norm": 0.0010458544129505754, - "learning_rate": 0.00019999857714150872, - "loss": 46.0, - "step": 22219 - }, - { - "epoch": 1.6988741709195865, - "grad_norm": 0.0008137304685078561, - "learning_rate": 0.00019999857701337263, - "loss": 46.0, - "step": 22220 - }, - { - "epoch": 1.6989506279029762, - "grad_norm": 0.0007417897577397525, - "learning_rate": 0.00019999857688523078, - "loss": 46.0, - "step": 22221 - }, - { - "epoch": 1.6990270848863658, - "grad_norm": 0.0031227373983711004, - "learning_rate": 0.00019999857675708315, - "loss": 46.0, - "step": 22222 - }, - { - "epoch": 1.6991035418697555, - "grad_norm": 0.0012534618144854903, - "learning_rate": 0.00019999857662892975, - "loss": 46.0, - "step": 22223 - }, - { - "epoch": 1.6991799988531453, - "grad_norm": 0.00120448914822191, - "learning_rate": 0.0001999985765007706, - "loss": 46.0, - "step": 22224 - }, - { - "epoch": 1.6992564558365348, - "grad_norm": 0.0006296585779637098, - "learning_rate": 0.00019999857637260568, - "loss": 46.0, - "step": 22225 - }, - { - "epoch": 1.6993329128199246, - "grad_norm": 0.0029547917656600475, - "learning_rate": 0.00019999857624443496, - "loss": 46.0, - "step": 22226 - }, - { - "epoch": 1.6994093698033144, - "grad_norm": 0.0024203951470553875, - "learning_rate": 0.0001999985761162585, - "loss": 46.0, - "step": 22227 - }, - { - "epoch": 1.6994858267867041, - "grad_norm": 0.0005265744403004646, - "learning_rate": 0.00019999857598807625, - "loss": 46.0, - "step": 22228 - }, - { - "epoch": 1.6995622837700939, - "grad_norm": 0.0023372878786176443, - "learning_rate": 0.00019999857585988826, - "loss": 46.0, - "step": 22229 - }, - { - "epoch": 1.6996387407534836, - "grad_norm": 0.0041009425185620785, - "learning_rate": 0.00019999857573169447, - "loss": 46.0, - "step": 22230 - }, - { - "epoch": 1.6997151977368734, - "grad_norm": 0.00406024930998683, - "learning_rate": 0.00019999857560349494, - "loss": 46.0, - "step": 22231 - }, - { - "epoch": 1.6997916547202632, - "grad_norm": 0.0027244891971349716, - "learning_rate": 0.0001999985754752896, - "loss": 46.0, - "step": 22232 - }, - { - "epoch": 1.6998681117036527, - "grad_norm": 0.0015552338445559144, - "learning_rate": 0.0001999985753470785, - "loss": 46.0, - "step": 22233 - }, - { - "epoch": 1.6999445686870425, - "grad_norm": 0.0034095821902155876, - "learning_rate": 0.00019999857521886167, - "loss": 46.0, - "step": 22234 - }, - { - "epoch": 1.700021025670432, - "grad_norm": 0.0010631275363266468, - "learning_rate": 0.00019999857509063904, - "loss": 46.0, - "step": 22235 - }, - { - "epoch": 1.7000974826538218, - "grad_norm": 0.00038365149521268904, - "learning_rate": 0.00019999857496241067, - "loss": 46.0, - "step": 22236 - }, - { - "epoch": 1.7001739396372115, - "grad_norm": 0.0010809325613081455, - "learning_rate": 0.00019999857483417647, - "loss": 46.0, - "step": 22237 - }, - { - "epoch": 1.7002503966206013, - "grad_norm": 0.006074589677155018, - "learning_rate": 0.00019999857470593655, - "loss": 46.0, - "step": 22238 - }, - { - "epoch": 1.700326853603991, - "grad_norm": 0.0013358741998672485, - "learning_rate": 0.00019999857457769085, - "loss": 46.0, - "step": 22239 - }, - { - "epoch": 1.7004033105873808, - "grad_norm": 0.002121866913512349, - "learning_rate": 0.00019999857444943938, - "loss": 46.0, - "step": 22240 - }, - { - "epoch": 1.7004797675707706, - "grad_norm": 0.0013715768000110984, - "learning_rate": 0.00019999857432118212, - "loss": 46.0, - "step": 22241 - }, - { - "epoch": 1.7005562245541603, - "grad_norm": 0.0008254814893007278, - "learning_rate": 0.00019999857419291913, - "loss": 46.0, - "step": 22242 - }, - { - "epoch": 1.70063268153755, - "grad_norm": 0.0009830788476392627, - "learning_rate": 0.00019999857406465034, - "loss": 46.0, - "step": 22243 - }, - { - "epoch": 1.7007091385209396, - "grad_norm": 0.000562444212846458, - "learning_rate": 0.0001999985739363758, - "loss": 46.0, - "step": 22244 - }, - { - "epoch": 1.7007855955043294, - "grad_norm": 0.009231217205524445, - "learning_rate": 0.00019999857380809547, - "loss": 46.0, - "step": 22245 - }, - { - "epoch": 1.700862052487719, - "grad_norm": 0.0022698829416185617, - "learning_rate": 0.0001999985736798094, - "loss": 46.0, - "step": 22246 - }, - { - "epoch": 1.7009385094711087, - "grad_norm": 0.00686991261318326, - "learning_rate": 0.0001999985735515175, - "loss": 46.0, - "step": 22247 - }, - { - "epoch": 1.7010149664544985, - "grad_norm": 0.0010533536551520228, - "learning_rate": 0.0001999985734232199, - "loss": 46.0, - "step": 22248 - }, - { - "epoch": 1.7010914234378882, - "grad_norm": 0.0006744272541254759, - "learning_rate": 0.0001999985732949165, - "loss": 46.0, - "step": 22249 - }, - { - "epoch": 1.701167880421278, - "grad_norm": 0.0031580820214003325, - "learning_rate": 0.00019999857316660733, - "loss": 46.0, - "step": 22250 - }, - { - "epoch": 1.7012443374046677, - "grad_norm": 0.0035408653784543276, - "learning_rate": 0.00019999857303829241, - "loss": 46.0, - "step": 22251 - }, - { - "epoch": 1.7013207943880575, - "grad_norm": 0.0020211224909871817, - "learning_rate": 0.0001999985729099717, - "loss": 46.0, - "step": 22252 - }, - { - "epoch": 1.7013972513714473, - "grad_norm": 0.0011255443096160889, - "learning_rate": 0.00019999857278164523, - "loss": 46.0, - "step": 22253 - }, - { - "epoch": 1.701473708354837, - "grad_norm": 0.001478796824812889, - "learning_rate": 0.00019999857265331298, - "loss": 46.0, - "step": 22254 - }, - { - "epoch": 1.7015501653382266, - "grad_norm": 0.0010935028549283743, - "learning_rate": 0.00019999857252497497, - "loss": 46.0, - "step": 22255 - }, - { - "epoch": 1.7016266223216163, - "grad_norm": 0.000634104828350246, - "learning_rate": 0.0001999985723966312, - "loss": 46.0, - "step": 22256 - }, - { - "epoch": 1.7017030793050059, - "grad_norm": 0.0015172429848462343, - "learning_rate": 0.00019999857226828165, - "loss": 46.0, - "step": 22257 - }, - { - "epoch": 1.7017795362883956, - "grad_norm": 0.002083627739921212, - "learning_rate": 0.00019999857213992632, - "loss": 46.0, - "step": 22258 - }, - { - "epoch": 1.7018559932717854, - "grad_norm": 0.00191004597581923, - "learning_rate": 0.00019999857201156524, - "loss": 46.0, - "step": 22259 - }, - { - "epoch": 1.7019324502551751, - "grad_norm": 0.0006558081950061023, - "learning_rate": 0.00019999857188319838, - "loss": 46.0, - "step": 22260 - }, - { - "epoch": 1.702008907238565, - "grad_norm": 0.0027500188443809748, - "learning_rate": 0.00019999857175482575, - "loss": 46.0, - "step": 22261 - }, - { - "epoch": 1.7020853642219547, - "grad_norm": 0.0010153194889426231, - "learning_rate": 0.00019999857162644733, - "loss": 46.0, - "step": 22262 - }, - { - "epoch": 1.7021618212053444, - "grad_norm": 0.002050751820206642, - "learning_rate": 0.00019999857149806318, - "loss": 46.0, - "step": 22263 - }, - { - "epoch": 1.7022382781887342, - "grad_norm": 0.002942193066701293, - "learning_rate": 0.00019999857136967323, - "loss": 46.0, - "step": 22264 - }, - { - "epoch": 1.702314735172124, - "grad_norm": 0.0015713314060121775, - "learning_rate": 0.0001999985712412775, - "loss": 46.0, - "step": 22265 - }, - { - "epoch": 1.7023911921555135, - "grad_norm": 0.001802604179829359, - "learning_rate": 0.00019999857111287604, - "loss": 46.0, - "step": 22266 - }, - { - "epoch": 1.7024676491389032, - "grad_norm": 0.002200249582529068, - "learning_rate": 0.0001999985709844688, - "loss": 46.0, - "step": 22267 - }, - { - "epoch": 1.7025441061222928, - "grad_norm": 0.0011776078026741743, - "learning_rate": 0.0001999985708560558, - "loss": 46.0, - "step": 22268 - }, - { - "epoch": 1.7026205631056825, - "grad_norm": 0.000524084665812552, - "learning_rate": 0.000199998570727637, - "loss": 46.0, - "step": 22269 - }, - { - "epoch": 1.7026970200890723, - "grad_norm": 0.004391106776893139, - "learning_rate": 0.00019999857059921244, - "loss": 46.0, - "step": 22270 - }, - { - "epoch": 1.702773477072462, - "grad_norm": 0.000983295845799148, - "learning_rate": 0.00019999857047078214, - "loss": 46.0, - "step": 22271 - }, - { - "epoch": 1.7028499340558518, - "grad_norm": 0.0051885670982301235, - "learning_rate": 0.00019999857034234606, - "loss": 46.0, - "step": 22272 - }, - { - "epoch": 1.7029263910392416, - "grad_norm": 0.0005018426454626024, - "learning_rate": 0.00019999857021390418, - "loss": 46.0, - "step": 22273 - }, - { - "epoch": 1.7030028480226314, - "grad_norm": 0.004570841323584318, - "learning_rate": 0.00019999857008545658, - "loss": 46.0, - "step": 22274 - }, - { - "epoch": 1.7030793050060211, - "grad_norm": 0.0004142904363106936, - "learning_rate": 0.00019999856995700315, - "loss": 46.0, - "step": 22275 - }, - { - "epoch": 1.7031557619894107, - "grad_norm": 0.000696543138474226, - "learning_rate": 0.000199998569828544, - "loss": 46.0, - "step": 22276 - }, - { - "epoch": 1.7032322189728004, - "grad_norm": 0.007419273257255554, - "learning_rate": 0.00019999856970007905, - "loss": 46.0, - "step": 22277 - }, - { - "epoch": 1.7033086759561902, - "grad_norm": 0.00047700945287942886, - "learning_rate": 0.00019999856957160833, - "loss": 46.0, - "step": 22278 - }, - { - "epoch": 1.7033851329395797, - "grad_norm": 0.002744599711149931, - "learning_rate": 0.00019999856944313187, - "loss": 46.0, - "step": 22279 - }, - { - "epoch": 1.7034615899229695, - "grad_norm": 0.0004348990914877504, - "learning_rate": 0.00019999856931464963, - "loss": 46.0, - "step": 22280 - }, - { - "epoch": 1.7035380469063592, - "grad_norm": 0.0020320408511906862, - "learning_rate": 0.0001999985691861616, - "loss": 46.0, - "step": 22281 - }, - { - "epoch": 1.703614503889749, - "grad_norm": 0.00048128562048077583, - "learning_rate": 0.00019999856905766783, - "loss": 46.0, - "step": 22282 - }, - { - "epoch": 1.7036909608731388, - "grad_norm": 0.00269178650341928, - "learning_rate": 0.00019999856892916827, - "loss": 46.0, - "step": 22283 - }, - { - "epoch": 1.7037674178565285, - "grad_norm": 0.0007060584030114114, - "learning_rate": 0.00019999856880066293, - "loss": 46.0, - "step": 22284 - }, - { - "epoch": 1.7038438748399183, - "grad_norm": 0.0010880966437980533, - "learning_rate": 0.00019999856867215185, - "loss": 46.0, - "step": 22285 - }, - { - "epoch": 1.703920331823308, - "grad_norm": 0.0014449474401772022, - "learning_rate": 0.00019999856854363497, - "loss": 46.0, - "step": 22286 - }, - { - "epoch": 1.7039967888066976, - "grad_norm": 0.0018279115902259946, - "learning_rate": 0.00019999856841511235, - "loss": 46.0, - "step": 22287 - }, - { - "epoch": 1.7040732457900873, - "grad_norm": 0.0013416545698419213, - "learning_rate": 0.00019999856828658395, - "loss": 46.0, - "step": 22288 - }, - { - "epoch": 1.704149702773477, - "grad_norm": 0.0017435051267966628, - "learning_rate": 0.00019999856815804975, - "loss": 46.0, - "step": 22289 - }, - { - "epoch": 1.7042261597568666, - "grad_norm": 0.0012131447438150644, - "learning_rate": 0.00019999856802950983, - "loss": 46.0, - "step": 22290 - }, - { - "epoch": 1.7043026167402564, - "grad_norm": 0.002309046685695648, - "learning_rate": 0.0001999985679009641, - "loss": 46.0, - "step": 22291 - }, - { - "epoch": 1.7043790737236462, - "grad_norm": 0.002016992075368762, - "learning_rate": 0.00019999856777241264, - "loss": 46.0, - "step": 22292 - }, - { - "epoch": 1.704455530707036, - "grad_norm": 0.000857964507304132, - "learning_rate": 0.00019999856764385537, - "loss": 46.0, - "step": 22293 - }, - { - "epoch": 1.7045319876904257, - "grad_norm": 0.001404703943990171, - "learning_rate": 0.0001999985675152924, - "loss": 46.0, - "step": 22294 - }, - { - "epoch": 1.7046084446738154, - "grad_norm": 0.002127214567735791, - "learning_rate": 0.0001999985673867236, - "loss": 46.0, - "step": 22295 - }, - { - "epoch": 1.7046849016572052, - "grad_norm": 0.0007519699865952134, - "learning_rate": 0.00019999856725814901, - "loss": 46.0, - "step": 22296 - }, - { - "epoch": 1.704761358640595, - "grad_norm": 0.0020818302873522043, - "learning_rate": 0.0001999985671295687, - "loss": 46.0, - "step": 22297 - }, - { - "epoch": 1.7048378156239845, - "grad_norm": 0.0007663040305487812, - "learning_rate": 0.0001999985670009826, - "loss": 46.0, - "step": 22298 - }, - { - "epoch": 1.7049142726073743, - "grad_norm": 0.0031487327069044113, - "learning_rate": 0.00019999856687239075, - "loss": 46.0, - "step": 22299 - }, - { - "epoch": 1.704990729590764, - "grad_norm": 0.0017989104380831122, - "learning_rate": 0.00019999856674379312, - "loss": 46.0, - "step": 22300 - }, - { - "epoch": 1.7050671865741536, - "grad_norm": 0.0006492551183328032, - "learning_rate": 0.0001999985666151897, - "loss": 46.0, - "step": 22301 - }, - { - "epoch": 1.7051436435575433, - "grad_norm": 0.0030262882355600595, - "learning_rate": 0.00019999856648658052, - "loss": 46.0, - "step": 22302 - }, - { - "epoch": 1.705220100540933, - "grad_norm": 0.0006862126756459475, - "learning_rate": 0.00019999856635796558, - "loss": 46.0, - "step": 22303 - }, - { - "epoch": 1.7052965575243229, - "grad_norm": 0.00170803046785295, - "learning_rate": 0.00019999856622934488, - "loss": 46.0, - "step": 22304 - }, - { - "epoch": 1.7053730145077126, - "grad_norm": 0.0024671442806720734, - "learning_rate": 0.0001999985661007184, - "loss": 46.0, - "step": 22305 - }, - { - "epoch": 1.7054494714911024, - "grad_norm": 0.0012373417848721147, - "learning_rate": 0.00019999856597208615, - "loss": 46.0, - "step": 22306 - }, - { - "epoch": 1.7055259284744921, - "grad_norm": 0.000414506095694378, - "learning_rate": 0.00019999856584344814, - "loss": 46.0, - "step": 22307 - }, - { - "epoch": 1.705602385457882, - "grad_norm": 0.0029115143697708845, - "learning_rate": 0.00019999856571480433, - "loss": 46.0, - "step": 22308 - }, - { - "epoch": 1.7056788424412714, - "grad_norm": 0.0013761200243607163, - "learning_rate": 0.0001999985655861548, - "loss": 46.0, - "step": 22309 - }, - { - "epoch": 1.7057552994246612, - "grad_norm": 0.0019353954121470451, - "learning_rate": 0.00019999856545749943, - "loss": 46.0, - "step": 22310 - }, - { - "epoch": 1.705831756408051, - "grad_norm": 0.0014452057657763362, - "learning_rate": 0.00019999856532883835, - "loss": 46.0, - "step": 22311 - }, - { - "epoch": 1.7059082133914405, - "grad_norm": 0.0011837605852633715, - "learning_rate": 0.0001999985652001715, - "loss": 46.0, - "step": 22312 - }, - { - "epoch": 1.7059846703748303, - "grad_norm": 0.00034283779677934945, - "learning_rate": 0.00019999856507149885, - "loss": 46.0, - "step": 22313 - }, - { - "epoch": 1.70606112735822, - "grad_norm": 0.002047367161139846, - "learning_rate": 0.00019999856494282042, - "loss": 46.0, - "step": 22314 - }, - { - "epoch": 1.7061375843416098, - "grad_norm": 0.005829261615872383, - "learning_rate": 0.00019999856481413625, - "loss": 46.0, - "step": 22315 - }, - { - "epoch": 1.7062140413249995, - "grad_norm": 0.006298776715993881, - "learning_rate": 0.00019999856468544633, - "loss": 46.0, - "step": 22316 - }, - { - "epoch": 1.7062904983083893, - "grad_norm": 0.0007811666582711041, - "learning_rate": 0.0001999985645567506, - "loss": 46.0, - "step": 22317 - }, - { - "epoch": 1.706366955291779, - "grad_norm": 0.0005377213819883764, - "learning_rate": 0.00019999856442804914, - "loss": 46.0, - "step": 22318 - }, - { - "epoch": 1.7064434122751688, - "grad_norm": 0.0057342019863426685, - "learning_rate": 0.00019999856429934188, - "loss": 46.0, - "step": 22319 - }, - { - "epoch": 1.7065198692585584, - "grad_norm": 0.0014235972194001079, - "learning_rate": 0.00019999856417062887, - "loss": 46.0, - "step": 22320 - }, - { - "epoch": 1.7065963262419481, - "grad_norm": 0.00045009213499724865, - "learning_rate": 0.00019999856404191005, - "loss": 46.0, - "step": 22321 - }, - { - "epoch": 1.7066727832253379, - "grad_norm": 0.003260469762608409, - "learning_rate": 0.00019999856391318552, - "loss": 46.0, - "step": 22322 - }, - { - "epoch": 1.7067492402087274, - "grad_norm": 0.0010507296537980437, - "learning_rate": 0.00019999856378445516, - "loss": 46.0, - "step": 22323 - }, - { - "epoch": 1.7068256971921172, - "grad_norm": 0.00036351699964143336, - "learning_rate": 0.00019999856365571908, - "loss": 46.0, - "step": 22324 - }, - { - "epoch": 1.706902154175507, - "grad_norm": 0.005515695083886385, - "learning_rate": 0.0001999985635269772, - "loss": 46.0, - "step": 22325 - }, - { - "epoch": 1.7069786111588967, - "grad_norm": 0.0007527195266447961, - "learning_rate": 0.00019999856339822958, - "loss": 46.0, - "step": 22326 - }, - { - "epoch": 1.7070550681422865, - "grad_norm": 0.0006638483610004187, - "learning_rate": 0.00019999856326947618, - "loss": 46.0, - "step": 22327 - }, - { - "epoch": 1.7071315251256762, - "grad_norm": 0.003606505459174514, - "learning_rate": 0.00019999856314071698, - "loss": 46.0, - "step": 22328 - }, - { - "epoch": 1.707207982109066, - "grad_norm": 0.0006367659079842269, - "learning_rate": 0.00019999856301195203, - "loss": 46.0, - "step": 22329 - }, - { - "epoch": 1.7072844390924558, - "grad_norm": 0.001677622552961111, - "learning_rate": 0.00019999856288318134, - "loss": 46.0, - "step": 22330 - }, - { - "epoch": 1.7073608960758453, - "grad_norm": 0.0004196741501800716, - "learning_rate": 0.00019999856275440485, - "loss": 46.0, - "step": 22331 - }, - { - "epoch": 1.707437353059235, - "grad_norm": 0.0005129675264470279, - "learning_rate": 0.00019999856262562258, - "loss": 46.0, - "step": 22332 - }, - { - "epoch": 1.7075138100426248, - "grad_norm": 0.0010110505390912294, - "learning_rate": 0.00019999856249683457, - "loss": 46.0, - "step": 22333 - }, - { - "epoch": 1.7075902670260144, - "grad_norm": 0.004729064647108316, - "learning_rate": 0.00019999856236804079, - "loss": 46.0, - "step": 22334 - }, - { - "epoch": 1.7076667240094041, - "grad_norm": 0.00293494644574821, - "learning_rate": 0.0001999985622392412, - "loss": 46.0, - "step": 22335 - }, - { - "epoch": 1.7077431809927939, - "grad_norm": 0.012141870334744453, - "learning_rate": 0.0001999985621104359, - "loss": 46.0, - "step": 22336 - }, - { - "epoch": 1.7078196379761836, - "grad_norm": 0.0005728612304665148, - "learning_rate": 0.0001999985619816248, - "loss": 46.0, - "step": 22337 - }, - { - "epoch": 1.7078960949595734, - "grad_norm": 0.002871981356292963, - "learning_rate": 0.0001999985618528079, - "loss": 46.0, - "step": 22338 - }, - { - "epoch": 1.7079725519429632, - "grad_norm": 0.0005553775699809194, - "learning_rate": 0.00019999856172398528, - "loss": 46.0, - "step": 22339 - }, - { - "epoch": 1.708049008926353, - "grad_norm": 0.0020619346760213375, - "learning_rate": 0.00019999856159515686, - "loss": 46.0, - "step": 22340 - }, - { - "epoch": 1.7081254659097427, - "grad_norm": 0.0006550454418174922, - "learning_rate": 0.00019999856146632269, - "loss": 46.0, - "step": 22341 - }, - { - "epoch": 1.7082019228931322, - "grad_norm": 0.0030603879131376743, - "learning_rate": 0.00019999856133748274, - "loss": 46.0, - "step": 22342 - }, - { - "epoch": 1.708278379876522, - "grad_norm": 0.000612535048276186, - "learning_rate": 0.00019999856120863702, - "loss": 46.0, - "step": 22343 - }, - { - "epoch": 1.7083548368599117, - "grad_norm": 0.0009035649127326906, - "learning_rate": 0.00019999856107978553, - "loss": 46.0, - "step": 22344 - }, - { - "epoch": 1.7084312938433013, - "grad_norm": 0.0008447629516012967, - "learning_rate": 0.0001999985609509283, - "loss": 46.0, - "step": 22345 - }, - { - "epoch": 1.708507750826691, - "grad_norm": 0.0009394664084538817, - "learning_rate": 0.00019999856082206525, - "loss": 46.0, - "step": 22346 - }, - { - "epoch": 1.7085842078100808, - "grad_norm": 0.0056218672543764114, - "learning_rate": 0.00019999856069319644, - "loss": 46.0, - "step": 22347 - }, - { - "epoch": 1.7086606647934706, - "grad_norm": 0.0030335215851664543, - "learning_rate": 0.00019999856056432188, - "loss": 46.0, - "step": 22348 - }, - { - "epoch": 1.7087371217768603, - "grad_norm": 0.0012526758946478367, - "learning_rate": 0.00019999856043544155, - "loss": 46.0, - "step": 22349 - }, - { - "epoch": 1.70881357876025, - "grad_norm": 0.002431680681183934, - "learning_rate": 0.00019999856030655547, - "loss": 46.0, - "step": 22350 - }, - { - "epoch": 1.7088900357436398, - "grad_norm": 0.0017076322110369802, - "learning_rate": 0.0001999985601776636, - "loss": 46.0, - "step": 22351 - }, - { - "epoch": 1.7089664927270296, - "grad_norm": 0.0008788536651991308, - "learning_rate": 0.00019999856004876594, - "loss": 46.0, - "step": 22352 - }, - { - "epoch": 1.7090429497104191, - "grad_norm": 0.001363835297524929, - "learning_rate": 0.00019999855991986251, - "loss": 46.0, - "step": 22353 - }, - { - "epoch": 1.709119406693809, - "grad_norm": 0.003085857490077615, - "learning_rate": 0.00019999855979095334, - "loss": 46.0, - "step": 22354 - }, - { - "epoch": 1.7091958636771987, - "grad_norm": 0.0008214113768190145, - "learning_rate": 0.0001999985596620384, - "loss": 46.0, - "step": 22355 - }, - { - "epoch": 1.7092723206605882, - "grad_norm": 0.0009509060764685273, - "learning_rate": 0.00019999855953311768, - "loss": 46.0, - "step": 22356 - }, - { - "epoch": 1.709348777643978, - "grad_norm": 0.003197141457349062, - "learning_rate": 0.0001999985594041912, - "loss": 46.0, - "step": 22357 - }, - { - "epoch": 1.7094252346273677, - "grad_norm": 0.002035139361396432, - "learning_rate": 0.00019999855927525895, - "loss": 46.0, - "step": 22358 - }, - { - "epoch": 1.7095016916107575, - "grad_norm": 0.0012600065674632788, - "learning_rate": 0.0001999985591463209, - "loss": 46.0, - "step": 22359 - }, - { - "epoch": 1.7095781485941473, - "grad_norm": 0.0006923387991264462, - "learning_rate": 0.0001999985590173771, - "loss": 46.0, - "step": 22360 - }, - { - "epoch": 1.709654605577537, - "grad_norm": 0.0006955589633435011, - "learning_rate": 0.00019999855888842754, - "loss": 46.0, - "step": 22361 - }, - { - "epoch": 1.7097310625609268, - "grad_norm": 0.0016970790456980467, - "learning_rate": 0.0001999985587594722, - "loss": 46.0, - "step": 22362 - }, - { - "epoch": 1.7098075195443165, - "grad_norm": 0.018865732476115227, - "learning_rate": 0.0001999985586305111, - "loss": 46.0, - "step": 22363 - }, - { - "epoch": 1.709883976527706, - "grad_norm": 0.0024612094275653362, - "learning_rate": 0.00019999855850154423, - "loss": 46.0, - "step": 22364 - }, - { - "epoch": 1.7099604335110958, - "grad_norm": 0.0006160700577311218, - "learning_rate": 0.0001999985583725716, - "loss": 46.0, - "step": 22365 - }, - { - "epoch": 1.7100368904944856, - "grad_norm": 0.02170710824429989, - "learning_rate": 0.00019999855824359318, - "loss": 46.0, - "step": 22366 - }, - { - "epoch": 1.7101133474778751, - "grad_norm": 0.0005958536057732999, - "learning_rate": 0.000199998558114609, - "loss": 46.0, - "step": 22367 - }, - { - "epoch": 1.710189804461265, - "grad_norm": 0.0005198173457756639, - "learning_rate": 0.00019999855798561903, - "loss": 46.0, - "step": 22368 - }, - { - "epoch": 1.7102662614446547, - "grad_norm": 0.0020933523774147034, - "learning_rate": 0.00019999855785662331, - "loss": 46.0, - "step": 22369 - }, - { - "epoch": 1.7103427184280444, - "grad_norm": 0.0025250727776437998, - "learning_rate": 0.00019999855772762182, - "loss": 46.0, - "step": 22370 - }, - { - "epoch": 1.7104191754114342, - "grad_norm": 0.0009470495278947055, - "learning_rate": 0.00019999855759861456, - "loss": 46.0, - "step": 22371 - }, - { - "epoch": 1.710495632394824, - "grad_norm": 0.0021975236013531685, - "learning_rate": 0.00019999855746960155, - "loss": 46.0, - "step": 22372 - }, - { - "epoch": 1.7105720893782137, - "grad_norm": 0.00020594771194737405, - "learning_rate": 0.00019999855734058276, - "loss": 46.0, - "step": 22373 - }, - { - "epoch": 1.7106485463616035, - "grad_norm": 0.0026221165899187326, - "learning_rate": 0.00019999855721155818, - "loss": 46.0, - "step": 22374 - }, - { - "epoch": 1.710725003344993, - "grad_norm": 0.000708280480466783, - "learning_rate": 0.00019999855708252785, - "loss": 46.0, - "step": 22375 - }, - { - "epoch": 1.7108014603283828, - "grad_norm": 0.0017831235891208053, - "learning_rate": 0.0001999985569534917, - "loss": 46.0, - "step": 22376 - }, - { - "epoch": 1.7108779173117723, - "grad_norm": 0.002299598418176174, - "learning_rate": 0.00019999855682444986, - "loss": 46.0, - "step": 22377 - }, - { - "epoch": 1.710954374295162, - "grad_norm": 0.008643848821520805, - "learning_rate": 0.0001999985566954022, - "loss": 46.0, - "step": 22378 - }, - { - "epoch": 1.7110308312785518, - "grad_norm": 0.0027712073642760515, - "learning_rate": 0.00019999855656634879, - "loss": 46.0, - "step": 22379 - }, - { - "epoch": 1.7111072882619416, - "grad_norm": 0.0006109913811087608, - "learning_rate": 0.00019999855643728961, - "loss": 46.0, - "step": 22380 - }, - { - "epoch": 1.7111837452453313, - "grad_norm": 0.0009963305201381445, - "learning_rate": 0.00019999855630822467, - "loss": 46.0, - "step": 22381 - }, - { - "epoch": 1.711260202228721, - "grad_norm": 0.0012679974315688014, - "learning_rate": 0.00019999855617915393, - "loss": 46.0, - "step": 22382 - }, - { - "epoch": 1.7113366592121109, - "grad_norm": 0.001112680067308247, - "learning_rate": 0.00019999855605007743, - "loss": 46.0, - "step": 22383 - }, - { - "epoch": 1.7114131161955006, - "grad_norm": 0.0006381681305356324, - "learning_rate": 0.0001999985559209952, - "loss": 46.0, - "step": 22384 - }, - { - "epoch": 1.7114895731788904, - "grad_norm": 0.0017438214272260666, - "learning_rate": 0.00019999855579190716, - "loss": 46.0, - "step": 22385 - }, - { - "epoch": 1.71156603016228, - "grad_norm": 0.0004668562032748014, - "learning_rate": 0.00019999855566281335, - "loss": 46.0, - "step": 22386 - }, - { - "epoch": 1.7116424871456697, - "grad_norm": 0.0012550413375720382, - "learning_rate": 0.0001999985555337138, - "loss": 46.0, - "step": 22387 - }, - { - "epoch": 1.7117189441290592, - "grad_norm": 0.001102479174733162, - "learning_rate": 0.00019999855540460843, - "loss": 46.0, - "step": 22388 - }, - { - "epoch": 1.711795401112449, - "grad_norm": 0.0008673723787069321, - "learning_rate": 0.00019999855527549733, - "loss": 46.0, - "step": 22389 - }, - { - "epoch": 1.7118718580958387, - "grad_norm": 0.001250889734365046, - "learning_rate": 0.00019999855514638045, - "loss": 46.0, - "step": 22390 - }, - { - "epoch": 1.7119483150792285, - "grad_norm": 0.003054277505725622, - "learning_rate": 0.0001999985550172578, - "loss": 46.0, - "step": 22391 - }, - { - "epoch": 1.7120247720626183, - "grad_norm": 0.0006690003210678697, - "learning_rate": 0.0001999985548881294, - "loss": 46.0, - "step": 22392 - }, - { - "epoch": 1.712101229046008, - "grad_norm": 0.0006188425468280911, - "learning_rate": 0.0001999985547589952, - "loss": 46.0, - "step": 22393 - }, - { - "epoch": 1.7121776860293978, - "grad_norm": 0.00827113725244999, - "learning_rate": 0.00019999855462985523, - "loss": 46.0, - "step": 22394 - }, - { - "epoch": 1.7122541430127876, - "grad_norm": 0.008701412007212639, - "learning_rate": 0.00019999855450070951, - "loss": 46.0, - "step": 22395 - }, - { - "epoch": 1.7123305999961773, - "grad_norm": 0.0007379450835287571, - "learning_rate": 0.00019999855437155802, - "loss": 46.0, - "step": 22396 - }, - { - "epoch": 1.7124070569795669, - "grad_norm": 0.0005624516634270549, - "learning_rate": 0.00019999855424240076, - "loss": 46.0, - "step": 22397 - }, - { - "epoch": 1.7124835139629566, - "grad_norm": 0.0019218729576095939, - "learning_rate": 0.00019999855411323775, - "loss": 46.0, - "step": 22398 - }, - { - "epoch": 1.7125599709463462, - "grad_norm": 0.001076512853614986, - "learning_rate": 0.00019999855398406894, - "loss": 46.0, - "step": 22399 - }, - { - "epoch": 1.712636427929736, - "grad_norm": 0.0014896945795044303, - "learning_rate": 0.00019999855385489435, - "loss": 46.0, - "step": 22400 - }, - { - "epoch": 1.7127128849131257, - "grad_norm": 0.004581732675433159, - "learning_rate": 0.00019999855372571402, - "loss": 46.0, - "step": 22401 - }, - { - "epoch": 1.7127893418965154, - "grad_norm": 0.0032729562371969223, - "learning_rate": 0.00019999855359652792, - "loss": 46.0, - "step": 22402 - }, - { - "epoch": 1.7128657988799052, - "grad_norm": 0.003369504353031516, - "learning_rate": 0.00019999855346733604, - "loss": 46.0, - "step": 22403 - }, - { - "epoch": 1.712942255863295, - "grad_norm": 0.0024576676078140736, - "learning_rate": 0.00019999855333813836, - "loss": 46.0, - "step": 22404 - }, - { - "epoch": 1.7130187128466847, - "grad_norm": 0.001060210750438273, - "learning_rate": 0.00019999855320893497, - "loss": 46.0, - "step": 22405 - }, - { - "epoch": 1.7130951698300745, - "grad_norm": 0.0010517387418076396, - "learning_rate": 0.0001999985530797258, - "loss": 46.0, - "step": 22406 - }, - { - "epoch": 1.713171626813464, - "grad_norm": 0.0012878982815891504, - "learning_rate": 0.00019999855295051082, - "loss": 46.0, - "step": 22407 - }, - { - "epoch": 1.7132480837968538, - "grad_norm": 0.00787353701889515, - "learning_rate": 0.0001999985528212901, - "loss": 46.0, - "step": 22408 - }, - { - "epoch": 1.7133245407802435, - "grad_norm": 0.0009401249117217958, - "learning_rate": 0.0001999985526920636, - "loss": 46.0, - "step": 22409 - }, - { - "epoch": 1.713400997763633, - "grad_norm": 0.0017263785703107715, - "learning_rate": 0.00019999855256283132, - "loss": 46.0, - "step": 22410 - }, - { - "epoch": 1.7134774547470228, - "grad_norm": 0.0007351974491029978, - "learning_rate": 0.00019999855243359331, - "loss": 46.0, - "step": 22411 - }, - { - "epoch": 1.7135539117304126, - "grad_norm": 0.0007150332676246762, - "learning_rate": 0.0001999985523043495, - "loss": 46.0, - "step": 22412 - }, - { - "epoch": 1.7136303687138024, - "grad_norm": 0.007817660458385944, - "learning_rate": 0.00019999855217509992, - "loss": 46.0, - "step": 22413 - }, - { - "epoch": 1.7137068256971921, - "grad_norm": 0.0008801393560133874, - "learning_rate": 0.0001999985520458446, - "loss": 46.0, - "step": 22414 - }, - { - "epoch": 1.7137832826805819, - "grad_norm": 0.003122924827039242, - "learning_rate": 0.00019999855191658346, - "loss": 46.0, - "step": 22415 - }, - { - "epoch": 1.7138597396639716, - "grad_norm": 0.0023737852461636066, - "learning_rate": 0.00019999855178731658, - "loss": 46.0, - "step": 22416 - }, - { - "epoch": 1.7139361966473614, - "grad_norm": 0.001244217506609857, - "learning_rate": 0.00019999855165804396, - "loss": 46.0, - "step": 22417 - }, - { - "epoch": 1.714012653630751, - "grad_norm": 0.0006176520837470889, - "learning_rate": 0.00019999855152876553, - "loss": 46.0, - "step": 22418 - }, - { - "epoch": 1.7140891106141407, - "grad_norm": 0.00201747240498662, - "learning_rate": 0.0001999985513994813, - "loss": 46.0, - "step": 22419 - }, - { - "epoch": 1.7141655675975305, - "grad_norm": 0.001043471391312778, - "learning_rate": 0.00019999855127019137, - "loss": 46.0, - "step": 22420 - }, - { - "epoch": 1.71424202458092, - "grad_norm": 0.0007902960060164332, - "learning_rate": 0.00019999855114089562, - "loss": 46.0, - "step": 22421 - }, - { - "epoch": 1.7143184815643098, - "grad_norm": 0.0017413819441571832, - "learning_rate": 0.00019999855101159416, - "loss": 46.0, - "step": 22422 - }, - { - "epoch": 1.7143949385476995, - "grad_norm": 0.0011369165731593966, - "learning_rate": 0.00019999855088228687, - "loss": 46.0, - "step": 22423 - }, - { - "epoch": 1.7144713955310893, - "grad_norm": 0.003757861442863941, - "learning_rate": 0.00019999855075297386, - "loss": 46.0, - "step": 22424 - }, - { - "epoch": 1.714547852514479, - "grad_norm": 0.0028186722192913294, - "learning_rate": 0.00019999855062365502, - "loss": 46.0, - "step": 22425 - }, - { - "epoch": 1.7146243094978688, - "grad_norm": 0.0009562631603330374, - "learning_rate": 0.00019999855049433046, - "loss": 46.0, - "step": 22426 - }, - { - "epoch": 1.7147007664812586, - "grad_norm": 0.0009940763702616096, - "learning_rate": 0.0001999985503650001, - "loss": 46.0, - "step": 22427 - }, - { - "epoch": 1.7147772234646483, - "grad_norm": 0.0030138599686324596, - "learning_rate": 0.000199998550235664, - "loss": 46.0, - "step": 22428 - }, - { - "epoch": 1.7148536804480379, - "grad_norm": 0.0005366287077777088, - "learning_rate": 0.00019999855010632213, - "loss": 46.0, - "step": 22429 - }, - { - "epoch": 1.7149301374314276, - "grad_norm": 0.001073703751899302, - "learning_rate": 0.00019999854997697448, - "loss": 46.0, - "step": 22430 - }, - { - "epoch": 1.7150065944148174, - "grad_norm": 0.0009289165609516203, - "learning_rate": 0.00019999854984762103, - "loss": 46.0, - "step": 22431 - }, - { - "epoch": 1.715083051398207, - "grad_norm": 0.004284227266907692, - "learning_rate": 0.00019999854971826183, - "loss": 46.0, - "step": 22432 - }, - { - "epoch": 1.7151595083815967, - "grad_norm": 0.0010894276201725006, - "learning_rate": 0.00019999854958889689, - "loss": 46.0, - "step": 22433 - }, - { - "epoch": 1.7152359653649865, - "grad_norm": 0.004678845871239901, - "learning_rate": 0.00019999854945952617, - "loss": 46.0, - "step": 22434 - }, - { - "epoch": 1.7153124223483762, - "grad_norm": 0.0004139160446356982, - "learning_rate": 0.00019999854933014968, - "loss": 46.0, - "step": 22435 - }, - { - "epoch": 1.715388879331766, - "grad_norm": 0.000822081696242094, - "learning_rate": 0.0001999985492007674, - "loss": 46.0, - "step": 22436 - }, - { - "epoch": 1.7154653363151557, - "grad_norm": 0.0018326276913285255, - "learning_rate": 0.00019999854907137935, - "loss": 46.0, - "step": 22437 - }, - { - "epoch": 1.7155417932985455, - "grad_norm": 0.001037940033711493, - "learning_rate": 0.00019999854894198554, - "loss": 46.0, - "step": 22438 - }, - { - "epoch": 1.7156182502819353, - "grad_norm": 0.00408597057685256, - "learning_rate": 0.00019999854881258598, - "loss": 46.0, - "step": 22439 - }, - { - "epoch": 1.7156947072653248, - "grad_norm": 0.0005092289648018777, - "learning_rate": 0.00019999854868318063, - "loss": 46.0, - "step": 22440 - }, - { - "epoch": 1.7157711642487146, - "grad_norm": 0.0005906568840146065, - "learning_rate": 0.00019999854855376952, - "loss": 46.0, - "step": 22441 - }, - { - "epoch": 1.7158476212321043, - "grad_norm": 0.0013110678410157561, - "learning_rate": 0.00019999854842435265, - "loss": 46.0, - "step": 22442 - }, - { - "epoch": 1.7159240782154939, - "grad_norm": 0.0006217741756699979, - "learning_rate": 0.00019999854829493, - "loss": 46.0, - "step": 22443 - }, - { - "epoch": 1.7160005351988836, - "grad_norm": 0.0018422106513753533, - "learning_rate": 0.00019999854816550158, - "loss": 46.0, - "step": 22444 - }, - { - "epoch": 1.7160769921822734, - "grad_norm": 0.0005019055097363889, - "learning_rate": 0.00019999854803606738, - "loss": 46.0, - "step": 22445 - }, - { - "epoch": 1.7161534491656631, - "grad_norm": 0.0008745864033699036, - "learning_rate": 0.0001999985479066274, - "loss": 46.0, - "step": 22446 - }, - { - "epoch": 1.716229906149053, - "grad_norm": 0.0008612436940893531, - "learning_rate": 0.00019999854777718167, - "loss": 46.0, - "step": 22447 - }, - { - "epoch": 1.7163063631324427, - "grad_norm": 0.0018096229759976268, - "learning_rate": 0.00019999854764773015, - "loss": 46.0, - "step": 22448 - }, - { - "epoch": 1.7163828201158324, - "grad_norm": 0.005119806155562401, - "learning_rate": 0.00019999854751827289, - "loss": 46.0, - "step": 22449 - }, - { - "epoch": 1.7164592770992222, - "grad_norm": 0.0006359869730658829, - "learning_rate": 0.00019999854738880985, - "loss": 46.0, - "step": 22450 - }, - { - "epoch": 1.7165357340826117, - "grad_norm": 0.0008692768751643598, - "learning_rate": 0.00019999854725934104, - "loss": 46.0, - "step": 22451 - }, - { - "epoch": 1.7166121910660015, - "grad_norm": 0.0011077569797635078, - "learning_rate": 0.00019999854712986648, - "loss": 46.0, - "step": 22452 - }, - { - "epoch": 1.7166886480493913, - "grad_norm": 0.0009412099607288837, - "learning_rate": 0.00019999854700038613, - "loss": 46.0, - "step": 22453 - }, - { - "epoch": 1.7167651050327808, - "grad_norm": 0.003998979926109314, - "learning_rate": 0.0001999985468709, - "loss": 46.0, - "step": 22454 - }, - { - "epoch": 1.7168415620161706, - "grad_norm": 0.0013675636146217585, - "learning_rate": 0.0001999985467414081, - "loss": 46.0, - "step": 22455 - }, - { - "epoch": 1.7169180189995603, - "grad_norm": 0.0029458513017743826, - "learning_rate": 0.00019999854661191044, - "loss": 46.0, - "step": 22456 - }, - { - "epoch": 1.71699447598295, - "grad_norm": 0.0009958730079233646, - "learning_rate": 0.00019999854648240705, - "loss": 46.0, - "step": 22457 - }, - { - "epoch": 1.7170709329663398, - "grad_norm": 0.0008039625827223063, - "learning_rate": 0.00019999854635289782, - "loss": 46.0, - "step": 22458 - }, - { - "epoch": 1.7171473899497296, - "grad_norm": 0.0002651166287250817, - "learning_rate": 0.00019999854622338288, - "loss": 46.0, - "step": 22459 - }, - { - "epoch": 1.7172238469331194, - "grad_norm": 0.0006535813445225358, - "learning_rate": 0.00019999854609386214, - "loss": 46.0, - "step": 22460 - }, - { - "epoch": 1.7173003039165091, - "grad_norm": 0.002061316976323724, - "learning_rate": 0.00019999854596433565, - "loss": 46.0, - "step": 22461 - }, - { - "epoch": 1.7173767608998987, - "grad_norm": 0.001987624214962125, - "learning_rate": 0.00019999854583480336, - "loss": 46.0, - "step": 22462 - }, - { - "epoch": 1.7174532178832884, - "grad_norm": 0.0033599711023271084, - "learning_rate": 0.00019999854570526532, - "loss": 46.0, - "step": 22463 - }, - { - "epoch": 1.7175296748666782, - "grad_norm": 0.0007719573331996799, - "learning_rate": 0.00019999854557572151, - "loss": 46.0, - "step": 22464 - }, - { - "epoch": 1.7176061318500677, - "grad_norm": 0.004135304596275091, - "learning_rate": 0.0001999985454461719, - "loss": 46.0, - "step": 22465 - }, - { - "epoch": 1.7176825888334575, - "grad_norm": 0.004250828176736832, - "learning_rate": 0.00019999854531661657, - "loss": 46.0, - "step": 22466 - }, - { - "epoch": 1.7177590458168472, - "grad_norm": 0.0006882243906147778, - "learning_rate": 0.00019999854518705545, - "loss": 46.0, - "step": 22467 - }, - { - "epoch": 1.717835502800237, - "grad_norm": 0.004639922175556421, - "learning_rate": 0.00019999854505748857, - "loss": 46.0, - "step": 22468 - }, - { - "epoch": 1.7179119597836268, - "grad_norm": 0.0007389179663732648, - "learning_rate": 0.0001999985449279159, - "loss": 46.0, - "step": 22469 - }, - { - "epoch": 1.7179884167670165, - "grad_norm": 0.0032404428347945213, - "learning_rate": 0.0001999985447983375, - "loss": 46.0, - "step": 22470 - }, - { - "epoch": 1.7180648737504063, - "grad_norm": 0.0012873783707618713, - "learning_rate": 0.0001999985446687533, - "loss": 46.0, - "step": 22471 - }, - { - "epoch": 1.718141330733796, - "grad_norm": 0.003340303199365735, - "learning_rate": 0.0001999985445391633, - "loss": 46.0, - "step": 22472 - }, - { - "epoch": 1.7182177877171856, - "grad_norm": 0.0004160654207225889, - "learning_rate": 0.0001999985444095676, - "loss": 46.0, - "step": 22473 - }, - { - "epoch": 1.7182942447005753, - "grad_norm": 0.0007767430506646633, - "learning_rate": 0.00019999854427996607, - "loss": 46.0, - "step": 22474 - }, - { - "epoch": 1.718370701683965, - "grad_norm": 0.0007537707570008934, - "learning_rate": 0.0001999985441503588, - "loss": 46.0, - "step": 22475 - }, - { - "epoch": 1.7184471586673546, - "grad_norm": 0.003122064284980297, - "learning_rate": 0.00019999854402074577, - "loss": 46.0, - "step": 22476 - }, - { - "epoch": 1.7185236156507444, - "grad_norm": 0.003668226068839431, - "learning_rate": 0.00019999854389112697, - "loss": 46.0, - "step": 22477 - }, - { - "epoch": 1.7186000726341342, - "grad_norm": 0.0017019016668200493, - "learning_rate": 0.00019999854376150236, - "loss": 46.0, - "step": 22478 - }, - { - "epoch": 1.718676529617524, - "grad_norm": 0.0016353281680494547, - "learning_rate": 0.000199998543631872, - "loss": 46.0, - "step": 22479 - }, - { - "epoch": 1.7187529866009137, - "grad_norm": 0.00042351995944045484, - "learning_rate": 0.0001999985435022359, - "loss": 46.0, - "step": 22480 - }, - { - "epoch": 1.7188294435843035, - "grad_norm": 0.0013452437706291676, - "learning_rate": 0.000199998543372594, - "loss": 46.0, - "step": 22481 - }, - { - "epoch": 1.7189059005676932, - "grad_norm": 0.005218386184424162, - "learning_rate": 0.00019999854324294635, - "loss": 46.0, - "step": 22482 - }, - { - "epoch": 1.718982357551083, - "grad_norm": 0.001016703899949789, - "learning_rate": 0.00019999854311329292, - "loss": 46.0, - "step": 22483 - }, - { - "epoch": 1.7190588145344725, - "grad_norm": 0.0014405595138669014, - "learning_rate": 0.00019999854298363373, - "loss": 46.0, - "step": 22484 - }, - { - "epoch": 1.7191352715178623, - "grad_norm": 0.0009401018614880741, - "learning_rate": 0.00019999854285396876, - "loss": 46.0, - "step": 22485 - }, - { - "epoch": 1.719211728501252, - "grad_norm": 0.0015400574775412679, - "learning_rate": 0.00019999854272429802, - "loss": 46.0, - "step": 22486 - }, - { - "epoch": 1.7192881854846416, - "grad_norm": 0.0007076390320435166, - "learning_rate": 0.0001999985425946215, - "loss": 46.0, - "step": 22487 - }, - { - "epoch": 1.7193646424680313, - "grad_norm": 0.0006557370652444661, - "learning_rate": 0.00019999854246493924, - "loss": 46.0, - "step": 22488 - }, - { - "epoch": 1.719441099451421, - "grad_norm": 0.0005964445881545544, - "learning_rate": 0.0001999985423352512, - "loss": 46.0, - "step": 22489 - }, - { - "epoch": 1.7195175564348109, - "grad_norm": 0.0004475999448914081, - "learning_rate": 0.0001999985422055574, - "loss": 46.0, - "step": 22490 - }, - { - "epoch": 1.7195940134182006, - "grad_norm": 0.0014614162500947714, - "learning_rate": 0.00019999854207585778, - "loss": 46.0, - "step": 22491 - }, - { - "epoch": 1.7196704704015904, - "grad_norm": 0.0034353667870163918, - "learning_rate": 0.00019999854194615243, - "loss": 46.0, - "step": 22492 - }, - { - "epoch": 1.7197469273849801, - "grad_norm": 0.0009692535386420786, - "learning_rate": 0.0001999985418164413, - "loss": 46.0, - "step": 22493 - }, - { - "epoch": 1.71982338436837, - "grad_norm": 0.0014354020822793245, - "learning_rate": 0.0001999985416867244, - "loss": 46.0, - "step": 22494 - }, - { - "epoch": 1.7198998413517594, - "grad_norm": 0.0011246701469644904, - "learning_rate": 0.00019999854155700178, - "loss": 46.0, - "step": 22495 - }, - { - "epoch": 1.7199762983351492, - "grad_norm": 0.0016662514535710216, - "learning_rate": 0.00019999854142727333, - "loss": 46.0, - "step": 22496 - }, - { - "epoch": 1.720052755318539, - "grad_norm": 0.0009482745081186295, - "learning_rate": 0.00019999854129753913, - "loss": 46.0, - "step": 22497 - }, - { - "epoch": 1.7201292123019285, - "grad_norm": 0.0021958444267511368, - "learning_rate": 0.00019999854116779914, - "loss": 46.0, - "step": 22498 - }, - { - "epoch": 1.7202056692853183, - "grad_norm": 0.001114825252443552, - "learning_rate": 0.00019999854103805342, - "loss": 46.0, - "step": 22499 - }, - { - "epoch": 1.720282126268708, - "grad_norm": 0.0007832496194168925, - "learning_rate": 0.00019999854090830193, - "loss": 46.0, - "step": 22500 - }, - { - "epoch": 1.7203585832520978, - "grad_norm": 0.0022682598792016506, - "learning_rate": 0.00019999854077854462, - "loss": 46.0, - "step": 22501 - }, - { - "epoch": 1.7204350402354875, - "grad_norm": 0.0006361659034155309, - "learning_rate": 0.00019999854064878158, - "loss": 46.0, - "step": 22502 - }, - { - "epoch": 1.7205114972188773, - "grad_norm": 0.0004284860042389482, - "learning_rate": 0.00019999854051901277, - "loss": 46.0, - "step": 22503 - }, - { - "epoch": 1.720587954202267, - "grad_norm": 0.0005408914876170456, - "learning_rate": 0.0001999985403892382, - "loss": 46.0, - "step": 22504 - }, - { - "epoch": 1.7206644111856568, - "grad_norm": 0.0012192067224532366, - "learning_rate": 0.00019999854025945784, - "loss": 46.0, - "step": 22505 - }, - { - "epoch": 1.7207408681690464, - "grad_norm": 0.002179897390305996, - "learning_rate": 0.0001999985401296717, - "loss": 46.0, - "step": 22506 - }, - { - "epoch": 1.7208173251524361, - "grad_norm": 0.0016331454971805215, - "learning_rate": 0.0001999985399998798, - "loss": 46.0, - "step": 22507 - }, - { - "epoch": 1.7208937821358257, - "grad_norm": 0.0018505053594708443, - "learning_rate": 0.00019999853987008216, - "loss": 46.0, - "step": 22508 - }, - { - "epoch": 1.7209702391192154, - "grad_norm": 0.0017334234435111284, - "learning_rate": 0.0001999985397402787, - "loss": 46.0, - "step": 22509 - }, - { - "epoch": 1.7210466961026052, - "grad_norm": 0.004260999150574207, - "learning_rate": 0.0001999985396104695, - "loss": 46.0, - "step": 22510 - }, - { - "epoch": 1.721123153085995, - "grad_norm": 0.0005032224580645561, - "learning_rate": 0.00019999853948065454, - "loss": 46.0, - "step": 22511 - }, - { - "epoch": 1.7211996100693847, - "grad_norm": 0.0007490611169487238, - "learning_rate": 0.0001999985393508338, - "loss": 46.0, - "step": 22512 - }, - { - "epoch": 1.7212760670527745, - "grad_norm": 0.0002332647272851318, - "learning_rate": 0.0001999985392210073, - "loss": 46.0, - "step": 22513 - }, - { - "epoch": 1.7213525240361642, - "grad_norm": 0.008897039107978344, - "learning_rate": 0.000199998539091175, - "loss": 46.0, - "step": 22514 - }, - { - "epoch": 1.721428981019554, - "grad_norm": 0.002618935890495777, - "learning_rate": 0.00019999853896133696, - "loss": 46.0, - "step": 22515 - }, - { - "epoch": 1.7215054380029438, - "grad_norm": 0.000531247875187546, - "learning_rate": 0.00019999853883149316, - "loss": 46.0, - "step": 22516 - }, - { - "epoch": 1.7215818949863333, - "grad_norm": 0.001366589916869998, - "learning_rate": 0.00019999853870164355, - "loss": 46.0, - "step": 22517 - }, - { - "epoch": 1.721658351969723, - "grad_norm": 0.001164983375929296, - "learning_rate": 0.0001999985385717882, - "loss": 46.0, - "step": 22518 - }, - { - "epoch": 1.7217348089531126, - "grad_norm": 0.002358474303036928, - "learning_rate": 0.0001999985384419271, - "loss": 46.0, - "step": 22519 - }, - { - "epoch": 1.7218112659365024, - "grad_norm": 0.0009585806983523071, - "learning_rate": 0.00019999853831206016, - "loss": 46.0, - "step": 22520 - }, - { - "epoch": 1.7218877229198921, - "grad_norm": 0.00181785156019032, - "learning_rate": 0.00019999853818218751, - "loss": 46.0, - "step": 22521 - }, - { - "epoch": 1.7219641799032819, - "grad_norm": 0.0008936924859881401, - "learning_rate": 0.0001999985380523091, - "loss": 46.0, - "step": 22522 - }, - { - "epoch": 1.7220406368866716, - "grad_norm": 0.004332865122705698, - "learning_rate": 0.00019999853792242487, - "loss": 46.0, - "step": 22523 - }, - { - "epoch": 1.7221170938700614, - "grad_norm": 0.0008371897274628282, - "learning_rate": 0.0001999985377925349, - "loss": 46.0, - "step": 22524 - }, - { - "epoch": 1.7221935508534512, - "grad_norm": 0.0009357577073387802, - "learning_rate": 0.00019999853766263916, - "loss": 46.0, - "step": 22525 - }, - { - "epoch": 1.722270007836841, - "grad_norm": 0.001828839653171599, - "learning_rate": 0.00019999853753273765, - "loss": 46.0, - "step": 22526 - }, - { - "epoch": 1.7223464648202307, - "grad_norm": 0.0008495564106851816, - "learning_rate": 0.00019999853740283036, - "loss": 46.0, - "step": 22527 - }, - { - "epoch": 1.7224229218036202, - "grad_norm": 0.0005137830739840865, - "learning_rate": 0.0001999985372729173, - "loss": 46.0, - "step": 22528 - }, - { - "epoch": 1.72249937878701, - "grad_norm": 0.0014874048065394163, - "learning_rate": 0.0001999985371429985, - "loss": 46.0, - "step": 22529 - }, - { - "epoch": 1.7225758357703995, - "grad_norm": 0.0021972861140966415, - "learning_rate": 0.0001999985370130739, - "loss": 46.0, - "step": 22530 - }, - { - "epoch": 1.7226522927537893, - "grad_norm": 0.0007032732246443629, - "learning_rate": 0.00019999853688314355, - "loss": 46.0, - "step": 22531 - }, - { - "epoch": 1.722728749737179, - "grad_norm": 0.0021237845066934824, - "learning_rate": 0.0001999985367532074, - "loss": 46.0, - "step": 22532 - }, - { - "epoch": 1.7228052067205688, - "grad_norm": 0.000899318780284375, - "learning_rate": 0.0001999985366232655, - "loss": 46.0, - "step": 22533 - }, - { - "epoch": 1.7228816637039586, - "grad_norm": 0.0013699724804610014, - "learning_rate": 0.00019999853649331785, - "loss": 46.0, - "step": 22534 - }, - { - "epoch": 1.7229581206873483, - "grad_norm": 0.000825609196908772, - "learning_rate": 0.00019999853636336443, - "loss": 46.0, - "step": 22535 - }, - { - "epoch": 1.723034577670738, - "grad_norm": 0.0011562325526028872, - "learning_rate": 0.0001999985362334052, - "loss": 46.0, - "step": 22536 - }, - { - "epoch": 1.7231110346541278, - "grad_norm": 0.0005326572572812438, - "learning_rate": 0.00019999853610344024, - "loss": 46.0, - "step": 22537 - }, - { - "epoch": 1.7231874916375174, - "grad_norm": 0.0018527604406699538, - "learning_rate": 0.0001999985359734695, - "loss": 46.0, - "step": 22538 - }, - { - "epoch": 1.7232639486209071, - "grad_norm": 0.0006820294074714184, - "learning_rate": 0.00019999853584349298, - "loss": 46.0, - "step": 22539 - }, - { - "epoch": 1.723340405604297, - "grad_norm": 0.0011219180887565017, - "learning_rate": 0.0001999985357135107, - "loss": 46.0, - "step": 22540 - }, - { - "epoch": 1.7234168625876864, - "grad_norm": 0.001367273973301053, - "learning_rate": 0.00019999853558352263, - "loss": 46.0, - "step": 22541 - }, - { - "epoch": 1.7234933195710762, - "grad_norm": 0.0005039800889790058, - "learning_rate": 0.00019999853545352882, - "loss": 46.0, - "step": 22542 - }, - { - "epoch": 1.723569776554466, - "grad_norm": 0.0016077890759333968, - "learning_rate": 0.00019999853532352924, - "loss": 46.0, - "step": 22543 - }, - { - "epoch": 1.7236462335378557, - "grad_norm": 0.0010009424295276403, - "learning_rate": 0.00019999853519352386, - "loss": 46.0, - "step": 22544 - }, - { - "epoch": 1.7237226905212455, - "grad_norm": 0.0028042467311024666, - "learning_rate": 0.00019999853506351273, - "loss": 46.0, - "step": 22545 - }, - { - "epoch": 1.7237991475046353, - "grad_norm": 0.0027355896309018135, - "learning_rate": 0.00019999853493349586, - "loss": 46.0, - "step": 22546 - }, - { - "epoch": 1.723875604488025, - "grad_norm": 0.0010892339050769806, - "learning_rate": 0.00019999853480347316, - "loss": 46.0, - "step": 22547 - }, - { - "epoch": 1.7239520614714148, - "grad_norm": 0.007179956883192062, - "learning_rate": 0.0001999985346734447, - "loss": 46.0, - "step": 22548 - }, - { - "epoch": 1.7240285184548043, - "grad_norm": 0.0008726438391022384, - "learning_rate": 0.00019999853454341052, - "loss": 46.0, - "step": 22549 - }, - { - "epoch": 1.724104975438194, - "grad_norm": 0.001996300183236599, - "learning_rate": 0.00019999853441337052, - "loss": 46.0, - "step": 22550 - }, - { - "epoch": 1.7241814324215838, - "grad_norm": 0.00043070968240499496, - "learning_rate": 0.0001999985342833248, - "loss": 46.0, - "step": 22551 - }, - { - "epoch": 1.7242578894049734, - "grad_norm": 0.0023758583702147007, - "learning_rate": 0.0001999985341532733, - "loss": 46.0, - "step": 22552 - }, - { - "epoch": 1.7243343463883631, - "grad_norm": 0.0009823755826801062, - "learning_rate": 0.000199998534023216, - "loss": 46.0, - "step": 22553 - }, - { - "epoch": 1.724410803371753, - "grad_norm": 0.0009820658015087247, - "learning_rate": 0.00019999853389315295, - "loss": 46.0, - "step": 22554 - }, - { - "epoch": 1.7244872603551427, - "grad_norm": 0.0009053281391970813, - "learning_rate": 0.00019999853376308414, - "loss": 46.0, - "step": 22555 - }, - { - "epoch": 1.7245637173385324, - "grad_norm": 0.0004495630564633757, - "learning_rate": 0.00019999853363300953, - "loss": 46.0, - "step": 22556 - }, - { - "epoch": 1.7246401743219222, - "grad_norm": 0.00098418106790632, - "learning_rate": 0.00019999853350292918, - "loss": 46.0, - "step": 22557 - }, - { - "epoch": 1.724716631305312, - "grad_norm": 0.0007086388650350273, - "learning_rate": 0.00019999853337284305, - "loss": 46.0, - "step": 22558 - }, - { - "epoch": 1.7247930882887017, - "grad_norm": 0.0004983508842997253, - "learning_rate": 0.00019999853324275112, - "loss": 46.0, - "step": 22559 - }, - { - "epoch": 1.7248695452720912, - "grad_norm": 0.005471676122397184, - "learning_rate": 0.00019999853311265347, - "loss": 46.0, - "step": 22560 - }, - { - "epoch": 1.724946002255481, - "grad_norm": 0.005148309748619795, - "learning_rate": 0.00019999853298255003, - "loss": 46.0, - "step": 22561 - }, - { - "epoch": 1.7250224592388708, - "grad_norm": 0.002501945709809661, - "learning_rate": 0.0001999985328524408, - "loss": 46.0, - "step": 22562 - }, - { - "epoch": 1.7250989162222603, - "grad_norm": 0.0008293217397294939, - "learning_rate": 0.00019999853272232584, - "loss": 46.0, - "step": 22563 - }, - { - "epoch": 1.72517537320565, - "grad_norm": 0.0006360613624565303, - "learning_rate": 0.0001999985325922051, - "loss": 46.0, - "step": 22564 - }, - { - "epoch": 1.7252518301890398, - "grad_norm": 0.0017641987651586533, - "learning_rate": 0.00019999853246207856, - "loss": 46.0, - "step": 22565 - }, - { - "epoch": 1.7253282871724296, - "grad_norm": 0.0011632274836301804, - "learning_rate": 0.0001999985323319463, - "loss": 46.0, - "step": 22566 - }, - { - "epoch": 1.7254047441558193, - "grad_norm": 0.0006391158094629645, - "learning_rate": 0.0001999985322018082, - "loss": 46.0, - "step": 22567 - }, - { - "epoch": 1.725481201139209, - "grad_norm": 0.0006679279613308609, - "learning_rate": 0.0001999985320716644, - "loss": 46.0, - "step": 22568 - }, - { - "epoch": 1.7255576581225989, - "grad_norm": 0.0006939740851521492, - "learning_rate": 0.00019999853194151482, - "loss": 46.0, - "step": 22569 - }, - { - "epoch": 1.7256341151059886, - "grad_norm": 0.0010613654740154743, - "learning_rate": 0.00019999853181135944, - "loss": 46.0, - "step": 22570 - }, - { - "epoch": 1.7257105720893782, - "grad_norm": 0.0018004346638917923, - "learning_rate": 0.00019999853168119831, - "loss": 46.0, - "step": 22571 - }, - { - "epoch": 1.725787029072768, - "grad_norm": 0.0008225837373174727, - "learning_rate": 0.0001999985315510314, - "loss": 46.0, - "step": 22572 - }, - { - "epoch": 1.7258634860561577, - "grad_norm": 0.0013309657806530595, - "learning_rate": 0.00019999853142085874, - "loss": 46.0, - "step": 22573 - }, - { - "epoch": 1.7259399430395472, - "grad_norm": 0.0017102316487580538, - "learning_rate": 0.0001999985312906803, - "loss": 46.0, - "step": 22574 - }, - { - "epoch": 1.726016400022937, - "grad_norm": 0.0008002474787645042, - "learning_rate": 0.00019999853116049607, - "loss": 46.0, - "step": 22575 - }, - { - "epoch": 1.7260928570063268, - "grad_norm": 0.0006907505448907614, - "learning_rate": 0.0001999985310303061, - "loss": 46.0, - "step": 22576 - }, - { - "epoch": 1.7261693139897165, - "grad_norm": 0.0005228658556006849, - "learning_rate": 0.00019999853090011036, - "loss": 46.0, - "step": 22577 - }, - { - "epoch": 1.7262457709731063, - "grad_norm": 0.0005460300599224865, - "learning_rate": 0.00019999853076990882, - "loss": 46.0, - "step": 22578 - }, - { - "epoch": 1.726322227956496, - "grad_norm": 0.0007252333452925086, - "learning_rate": 0.00019999853063970156, - "loss": 46.0, - "step": 22579 - }, - { - "epoch": 1.7263986849398858, - "grad_norm": 0.000526864139828831, - "learning_rate": 0.00019999853050948848, - "loss": 46.0, - "step": 22580 - }, - { - "epoch": 1.7264751419232756, - "grad_norm": 0.0010521203512325883, - "learning_rate": 0.00019999853037926964, - "loss": 46.0, - "step": 22581 - }, - { - "epoch": 1.726551598906665, - "grad_norm": 0.0006113207200542092, - "learning_rate": 0.00019999853024904506, - "loss": 46.0, - "step": 22582 - }, - { - "epoch": 1.7266280558900549, - "grad_norm": 0.000968958018347621, - "learning_rate": 0.0001999985301188147, - "loss": 46.0, - "step": 22583 - }, - { - "epoch": 1.7267045128734446, - "grad_norm": 0.0004944971296936274, - "learning_rate": 0.00019999852998857856, - "loss": 46.0, - "step": 22584 - }, - { - "epoch": 1.7267809698568342, - "grad_norm": 0.0005346629186533391, - "learning_rate": 0.00019999852985833668, - "loss": 46.0, - "step": 22585 - }, - { - "epoch": 1.726857426840224, - "grad_norm": 0.0019707956817001104, - "learning_rate": 0.00019999852972808898, - "loss": 46.0, - "step": 22586 - }, - { - "epoch": 1.7269338838236137, - "grad_norm": 0.0025061715859919786, - "learning_rate": 0.00019999852959783553, - "loss": 46.0, - "step": 22587 - }, - { - "epoch": 1.7270103408070034, - "grad_norm": 0.000909914611838758, - "learning_rate": 0.00019999852946757631, - "loss": 46.0, - "step": 22588 - }, - { - "epoch": 1.7270867977903932, - "grad_norm": 0.0013463539071381092, - "learning_rate": 0.00019999852933731135, - "loss": 46.0, - "step": 22589 - }, - { - "epoch": 1.727163254773783, - "grad_norm": 0.0014716185396537185, - "learning_rate": 0.00019999852920704058, - "loss": 46.0, - "step": 22590 - }, - { - "epoch": 1.7272397117571727, - "grad_norm": 0.0021005552262067795, - "learning_rate": 0.00019999852907676407, - "loss": 46.0, - "step": 22591 - }, - { - "epoch": 1.7273161687405625, - "grad_norm": 0.000704847218003124, - "learning_rate": 0.00019999852894648175, - "loss": 46.0, - "step": 22592 - }, - { - "epoch": 1.727392625723952, - "grad_norm": 0.0011378333438187838, - "learning_rate": 0.00019999852881619372, - "loss": 46.0, - "step": 22593 - }, - { - "epoch": 1.7274690827073418, - "grad_norm": 0.0004219490510877222, - "learning_rate": 0.0001999985286858999, - "loss": 46.0, - "step": 22594 - }, - { - "epoch": 1.7275455396907315, - "grad_norm": 0.0025741581339389086, - "learning_rate": 0.0001999985285556003, - "loss": 46.0, - "step": 22595 - }, - { - "epoch": 1.727621996674121, - "grad_norm": 0.0034743815194815397, - "learning_rate": 0.00019999852842529493, - "loss": 46.0, - "step": 22596 - }, - { - "epoch": 1.7276984536575108, - "grad_norm": 0.0005063615390099585, - "learning_rate": 0.0001999985282949838, - "loss": 46.0, - "step": 22597 - }, - { - "epoch": 1.7277749106409006, - "grad_norm": 0.0009142651106230915, - "learning_rate": 0.0001999985281646669, - "loss": 46.0, - "step": 22598 - }, - { - "epoch": 1.7278513676242904, - "grad_norm": 0.0011641262099146843, - "learning_rate": 0.0001999985280343442, - "loss": 46.0, - "step": 22599 - }, - { - "epoch": 1.7279278246076801, - "grad_norm": 0.0010043210349977016, - "learning_rate": 0.00019999852790401575, - "loss": 46.0, - "step": 22600 - }, - { - "epoch": 1.72800428159107, - "grad_norm": 0.002094058785587549, - "learning_rate": 0.00019999852777368156, - "loss": 46.0, - "step": 22601 - }, - { - "epoch": 1.7280807385744597, - "grad_norm": 0.0008834440959617496, - "learning_rate": 0.00019999852764334154, - "loss": 46.0, - "step": 22602 - }, - { - "epoch": 1.7281571955578494, - "grad_norm": 0.011193652637302876, - "learning_rate": 0.00019999852751299583, - "loss": 46.0, - "step": 22603 - }, - { - "epoch": 1.728233652541239, - "grad_norm": 0.0003516439173836261, - "learning_rate": 0.0001999985273826443, - "loss": 46.0, - "step": 22604 - }, - { - "epoch": 1.7283101095246287, - "grad_norm": 0.0008961883140727878, - "learning_rate": 0.00019999852725228697, - "loss": 46.0, - "step": 22605 - }, - { - "epoch": 1.7283865665080185, - "grad_norm": 0.0012827515602111816, - "learning_rate": 0.00019999852712192394, - "loss": 46.0, - "step": 22606 - }, - { - "epoch": 1.728463023491408, - "grad_norm": 0.0034592922311276197, - "learning_rate": 0.0001999985269915551, - "loss": 46.0, - "step": 22607 - }, - { - "epoch": 1.7285394804747978, - "grad_norm": 0.000991699518635869, - "learning_rate": 0.0001999985268611805, - "loss": 46.0, - "step": 22608 - }, - { - "epoch": 1.7286159374581875, - "grad_norm": 0.0010638709645718336, - "learning_rate": 0.00019999852673080012, - "loss": 46.0, - "step": 22609 - }, - { - "epoch": 1.7286923944415773, - "grad_norm": 0.0017935744253918529, - "learning_rate": 0.00019999852660041397, - "loss": 46.0, - "step": 22610 - }, - { - "epoch": 1.728768851424967, - "grad_norm": 0.0016181034734472632, - "learning_rate": 0.0001999985264700221, - "loss": 46.0, - "step": 22611 - }, - { - "epoch": 1.7288453084083568, - "grad_norm": 0.00338179268874228, - "learning_rate": 0.00019999852633962442, - "loss": 46.0, - "step": 22612 - }, - { - "epoch": 1.7289217653917466, - "grad_norm": 0.002531736623495817, - "learning_rate": 0.00019999852620922095, - "loss": 46.0, - "step": 22613 - }, - { - "epoch": 1.7289982223751363, - "grad_norm": 0.00115268025547266, - "learning_rate": 0.00019999852607881176, - "loss": 46.0, - "step": 22614 - }, - { - "epoch": 1.7290746793585259, - "grad_norm": 0.0017992340726777911, - "learning_rate": 0.00019999852594839674, - "loss": 46.0, - "step": 22615 - }, - { - "epoch": 1.7291511363419156, - "grad_norm": 0.002800759393721819, - "learning_rate": 0.000199998525817976, - "loss": 46.0, - "step": 22616 - }, - { - "epoch": 1.7292275933253054, - "grad_norm": 0.0012787943705916405, - "learning_rate": 0.00019999852568754949, - "loss": 46.0, - "step": 22617 - }, - { - "epoch": 1.729304050308695, - "grad_norm": 0.0023138525430113077, - "learning_rate": 0.00019999852555711717, - "loss": 46.0, - "step": 22618 - }, - { - "epoch": 1.7293805072920847, - "grad_norm": 0.0033475991804152727, - "learning_rate": 0.0001999985254266791, - "loss": 46.0, - "step": 22619 - }, - { - "epoch": 1.7294569642754745, - "grad_norm": 0.001402844674885273, - "learning_rate": 0.00019999852529623528, - "loss": 46.0, - "step": 22620 - }, - { - "epoch": 1.7295334212588642, - "grad_norm": 0.0006659413920715451, - "learning_rate": 0.00019999852516578567, - "loss": 46.0, - "step": 22621 - }, - { - "epoch": 1.729609878242254, - "grad_norm": 0.00293273595161736, - "learning_rate": 0.00019999852503533032, - "loss": 46.0, - "step": 22622 - }, - { - "epoch": 1.7296863352256437, - "grad_norm": 0.0008309009135700762, - "learning_rate": 0.00019999852490486917, - "loss": 46.0, - "step": 22623 - }, - { - "epoch": 1.7297627922090335, - "grad_norm": 0.0005622395547106862, - "learning_rate": 0.00019999852477440227, - "loss": 46.0, - "step": 22624 - }, - { - "epoch": 1.7298392491924233, - "grad_norm": 0.0011021544924005866, - "learning_rate": 0.0001999985246439296, - "loss": 46.0, - "step": 22625 - }, - { - "epoch": 1.7299157061758128, - "grad_norm": 0.000948019849602133, - "learning_rate": 0.00019999852451345112, - "loss": 46.0, - "step": 22626 - }, - { - "epoch": 1.7299921631592026, - "grad_norm": 0.0019641867838799953, - "learning_rate": 0.0001999985243829669, - "loss": 46.0, - "step": 22627 - }, - { - "epoch": 1.7300686201425923, - "grad_norm": 0.0007797997677698731, - "learning_rate": 0.00019999852425247694, - "loss": 46.0, - "step": 22628 - }, - { - "epoch": 1.7301450771259819, - "grad_norm": 0.002256902866065502, - "learning_rate": 0.00019999852412198117, - "loss": 46.0, - "step": 22629 - }, - { - "epoch": 1.7302215341093716, - "grad_norm": 0.0006343925488181412, - "learning_rate": 0.00019999852399147963, - "loss": 46.0, - "step": 22630 - }, - { - "epoch": 1.7302979910927614, - "grad_norm": 0.0008882620604708791, - "learning_rate": 0.00019999852386097232, - "loss": 46.0, - "step": 22631 - }, - { - "epoch": 1.7303744480761512, - "grad_norm": 0.0006998606841079891, - "learning_rate": 0.00019999852373045926, - "loss": 46.0, - "step": 22632 - }, - { - "epoch": 1.730450905059541, - "grad_norm": 0.0008176497067324817, - "learning_rate": 0.00019999852359994046, - "loss": 46.0, - "step": 22633 - }, - { - "epoch": 1.7305273620429307, - "grad_norm": 0.000559588719625026, - "learning_rate": 0.00019999852346941585, - "loss": 46.0, - "step": 22634 - }, - { - "epoch": 1.7306038190263204, - "grad_norm": 0.0007625481812283397, - "learning_rate": 0.00019999852333888547, - "loss": 46.0, - "step": 22635 - }, - { - "epoch": 1.7306802760097102, - "grad_norm": 0.001884974422864616, - "learning_rate": 0.00019999852320834932, - "loss": 46.0, - "step": 22636 - }, - { - "epoch": 1.7307567329930997, - "grad_norm": 0.0005362986121326685, - "learning_rate": 0.00019999852307780742, - "loss": 46.0, - "step": 22637 - }, - { - "epoch": 1.7308331899764895, - "grad_norm": 0.0008835560292936862, - "learning_rate": 0.00019999852294725975, - "loss": 46.0, - "step": 22638 - }, - { - "epoch": 1.730909646959879, - "grad_norm": 0.0021581652108579874, - "learning_rate": 0.00019999852281670627, - "loss": 46.0, - "step": 22639 - }, - { - "epoch": 1.7309861039432688, - "grad_norm": 0.0010900796623900533, - "learning_rate": 0.00019999852268614708, - "loss": 46.0, - "step": 22640 - }, - { - "epoch": 1.7310625609266586, - "grad_norm": 0.0010500267380848527, - "learning_rate": 0.00019999852255558206, - "loss": 46.0, - "step": 22641 - }, - { - "epoch": 1.7311390179100483, - "grad_norm": 0.0011888942681252956, - "learning_rate": 0.00019999852242501132, - "loss": 46.0, - "step": 22642 - }, - { - "epoch": 1.731215474893438, - "grad_norm": 0.0010251005878672004, - "learning_rate": 0.00019999852229443478, - "loss": 46.0, - "step": 22643 - }, - { - "epoch": 1.7312919318768278, - "grad_norm": 0.0006767407176084816, - "learning_rate": 0.0001999985221638525, - "loss": 46.0, - "step": 22644 - }, - { - "epoch": 1.7313683888602176, - "grad_norm": 0.0017930514877662063, - "learning_rate": 0.00019999852203326444, - "loss": 46.0, - "step": 22645 - }, - { - "epoch": 1.7314448458436074, - "grad_norm": 0.0014192037051543593, - "learning_rate": 0.00019999852190267058, - "loss": 46.0, - "step": 22646 - }, - { - "epoch": 1.7315213028269971, - "grad_norm": 0.0012247315607964993, - "learning_rate": 0.00019999852177207098, - "loss": 46.0, - "step": 22647 - }, - { - "epoch": 1.7315977598103867, - "grad_norm": 0.002818105276674032, - "learning_rate": 0.00019999852164146562, - "loss": 46.0, - "step": 22648 - }, - { - "epoch": 1.7316742167937764, - "grad_norm": 0.0035968590527772903, - "learning_rate": 0.00019999852151085444, - "loss": 46.0, - "step": 22649 - }, - { - "epoch": 1.731750673777166, - "grad_norm": 0.0011192914098501205, - "learning_rate": 0.00019999852138023755, - "loss": 46.0, - "step": 22650 - }, - { - "epoch": 1.7318271307605557, - "grad_norm": 0.0007100970833562315, - "learning_rate": 0.00019999852124961487, - "loss": 46.0, - "step": 22651 - }, - { - "epoch": 1.7319035877439455, - "grad_norm": 0.0006707278080284595, - "learning_rate": 0.0001999985211189864, - "loss": 46.0, - "step": 22652 - }, - { - "epoch": 1.7319800447273352, - "grad_norm": 0.0016358091961592436, - "learning_rate": 0.00019999852098835218, - "loss": 46.0, - "step": 22653 - }, - { - "epoch": 1.732056501710725, - "grad_norm": 0.0017581508727744222, - "learning_rate": 0.0001999985208577122, - "loss": 46.0, - "step": 22654 - }, - { - "epoch": 1.7321329586941148, - "grad_norm": 0.002192989457398653, - "learning_rate": 0.00019999852072706645, - "loss": 46.0, - "step": 22655 - }, - { - "epoch": 1.7322094156775045, - "grad_norm": 0.005653319880366325, - "learning_rate": 0.0001999985205964149, - "loss": 46.0, - "step": 22656 - }, - { - "epoch": 1.7322858726608943, - "grad_norm": 0.0020329018589109182, - "learning_rate": 0.0001999985204657576, - "loss": 46.0, - "step": 22657 - }, - { - "epoch": 1.732362329644284, - "grad_norm": 0.01344548724591732, - "learning_rate": 0.00019999852033509454, - "loss": 46.0, - "step": 22658 - }, - { - "epoch": 1.7324387866276736, - "grad_norm": 0.002527801785618067, - "learning_rate": 0.0001999985202044257, - "loss": 46.0, - "step": 22659 - }, - { - "epoch": 1.7325152436110633, - "grad_norm": 0.000667993794195354, - "learning_rate": 0.00019999852007375108, - "loss": 46.0, - "step": 22660 - }, - { - "epoch": 1.7325917005944529, - "grad_norm": 0.0063514430075883865, - "learning_rate": 0.00019999851994307073, - "loss": 46.0, - "step": 22661 - }, - { - "epoch": 1.7326681575778426, - "grad_norm": 0.0037415847182273865, - "learning_rate": 0.00019999851981238455, - "loss": 46.0, - "step": 22662 - }, - { - "epoch": 1.7327446145612324, - "grad_norm": 0.0006423986051231623, - "learning_rate": 0.00019999851968169265, - "loss": 46.0, - "step": 22663 - }, - { - "epoch": 1.7328210715446222, - "grad_norm": 0.009163415059447289, - "learning_rate": 0.00019999851955099498, - "loss": 46.0, - "step": 22664 - }, - { - "epoch": 1.732897528528012, - "grad_norm": 0.000832437421195209, - "learning_rate": 0.00019999851942029153, - "loss": 46.0, - "step": 22665 - }, - { - "epoch": 1.7329739855114017, - "grad_norm": 0.004653178621083498, - "learning_rate": 0.0001999985192895823, - "loss": 46.0, - "step": 22666 - }, - { - "epoch": 1.7330504424947915, - "grad_norm": 0.002586974762380123, - "learning_rate": 0.0001999985191588673, - "loss": 46.0, - "step": 22667 - }, - { - "epoch": 1.7331268994781812, - "grad_norm": 0.0019834262784570456, - "learning_rate": 0.00019999851902814656, - "loss": 46.0, - "step": 22668 - }, - { - "epoch": 1.7332033564615708, - "grad_norm": 0.0011409041471779346, - "learning_rate": 0.00019999851889742002, - "loss": 46.0, - "step": 22669 - }, - { - "epoch": 1.7332798134449605, - "grad_norm": 0.0015969412634149194, - "learning_rate": 0.00019999851876668768, - "loss": 46.0, - "step": 22670 - }, - { - "epoch": 1.7333562704283503, - "grad_norm": 0.002990870736539364, - "learning_rate": 0.00019999851863594962, - "loss": 46.0, - "step": 22671 - }, - { - "epoch": 1.7334327274117398, - "grad_norm": 0.0015055029653012753, - "learning_rate": 0.0001999985185052058, - "loss": 46.0, - "step": 22672 - }, - { - "epoch": 1.7335091843951296, - "grad_norm": 0.0006832543876953423, - "learning_rate": 0.00019999851837445619, - "loss": 46.0, - "step": 22673 - }, - { - "epoch": 1.7335856413785193, - "grad_norm": 0.0020437815692275763, - "learning_rate": 0.0001999985182437008, - "loss": 46.0, - "step": 22674 - }, - { - "epoch": 1.733662098361909, - "grad_norm": 0.009566658176481724, - "learning_rate": 0.00019999851811293966, - "loss": 46.0, - "step": 22675 - }, - { - "epoch": 1.7337385553452989, - "grad_norm": 0.0012620704947039485, - "learning_rate": 0.00019999851798217276, - "loss": 46.0, - "step": 22676 - }, - { - "epoch": 1.7338150123286886, - "grad_norm": 0.0014410660369321704, - "learning_rate": 0.00019999851785140006, - "loss": 46.0, - "step": 22677 - }, - { - "epoch": 1.7338914693120784, - "grad_norm": 0.0013877763412892818, - "learning_rate": 0.0001999985177206216, - "loss": 46.0, - "step": 22678 - }, - { - "epoch": 1.7339679262954681, - "grad_norm": 0.0009364584111608565, - "learning_rate": 0.0001999985175898374, - "loss": 46.0, - "step": 22679 - }, - { - "epoch": 1.7340443832788577, - "grad_norm": 0.0032117620576173067, - "learning_rate": 0.00019999851745904738, - "loss": 46.0, - "step": 22680 - }, - { - "epoch": 1.7341208402622474, - "grad_norm": 0.0013126885751262307, - "learning_rate": 0.0001999985173282516, - "loss": 46.0, - "step": 22681 - }, - { - "epoch": 1.7341972972456372, - "grad_norm": 0.0009948459919542074, - "learning_rate": 0.00019999851719745007, - "loss": 46.0, - "step": 22682 - }, - { - "epoch": 1.7342737542290267, - "grad_norm": 0.001783974003046751, - "learning_rate": 0.0001999985170666428, - "loss": 46.0, - "step": 22683 - }, - { - "epoch": 1.7343502112124165, - "grad_norm": 0.0024737364146858454, - "learning_rate": 0.0001999985169358297, - "loss": 46.0, - "step": 22684 - }, - { - "epoch": 1.7344266681958063, - "grad_norm": 0.0004796576395165175, - "learning_rate": 0.00019999851680501087, - "loss": 46.0, - "step": 22685 - }, - { - "epoch": 1.734503125179196, - "grad_norm": 0.0012784480350092053, - "learning_rate": 0.00019999851667418624, - "loss": 46.0, - "step": 22686 - }, - { - "epoch": 1.7345795821625858, - "grad_norm": 0.000598890008404851, - "learning_rate": 0.00019999851654335586, - "loss": 46.0, - "step": 22687 - }, - { - "epoch": 1.7346560391459755, - "grad_norm": 0.0007786693749949336, - "learning_rate": 0.0001999985164125197, - "loss": 46.0, - "step": 22688 - }, - { - "epoch": 1.7347324961293653, - "grad_norm": 0.0007252361974678934, - "learning_rate": 0.00019999851628167781, - "loss": 46.0, - "step": 22689 - }, - { - "epoch": 1.734808953112755, - "grad_norm": 0.004248403944075108, - "learning_rate": 0.00019999851615083012, - "loss": 46.0, - "step": 22690 - }, - { - "epoch": 1.7348854100961446, - "grad_norm": 0.010970363393425941, - "learning_rate": 0.00019999851601997665, - "loss": 46.0, - "step": 22691 - }, - { - "epoch": 1.7349618670795344, - "grad_norm": 0.001021255855448544, - "learning_rate": 0.00019999851588911743, - "loss": 46.0, - "step": 22692 - }, - { - "epoch": 1.7350383240629241, - "grad_norm": 0.0009164549992419779, - "learning_rate": 0.0001999985157582524, - "loss": 46.0, - "step": 22693 - }, - { - "epoch": 1.7351147810463137, - "grad_norm": 0.0006815766100771725, - "learning_rate": 0.00019999851562738167, - "loss": 46.0, - "step": 22694 - }, - { - "epoch": 1.7351912380297034, - "grad_norm": 0.007553113624453545, - "learning_rate": 0.00019999851549650513, - "loss": 46.0, - "step": 22695 - }, - { - "epoch": 1.7352676950130932, - "grad_norm": 0.0015128928935155272, - "learning_rate": 0.00019999851536562285, - "loss": 46.0, - "step": 22696 - }, - { - "epoch": 1.735344151996483, - "grad_norm": 0.0016350175719708204, - "learning_rate": 0.00019999851523473474, - "loss": 46.0, - "step": 22697 - }, - { - "epoch": 1.7354206089798727, - "grad_norm": 0.006948954425752163, - "learning_rate": 0.0001999985151038409, - "loss": 46.0, - "step": 22698 - }, - { - "epoch": 1.7354970659632625, - "grad_norm": 0.0008353106677532196, - "learning_rate": 0.0001999985149729413, - "loss": 46.0, - "step": 22699 - }, - { - "epoch": 1.7355735229466522, - "grad_norm": 0.00039935094537213445, - "learning_rate": 0.0001999985148420359, - "loss": 46.0, - "step": 22700 - }, - { - "epoch": 1.735649979930042, - "grad_norm": 0.0009470641962252557, - "learning_rate": 0.00019999851471112477, - "loss": 46.0, - "step": 22701 - }, - { - "epoch": 1.7357264369134315, - "grad_norm": 0.003697985550388694, - "learning_rate": 0.00019999851458020785, - "loss": 46.0, - "step": 22702 - }, - { - "epoch": 1.7358028938968213, - "grad_norm": 0.0004675001255236566, - "learning_rate": 0.00019999851444928515, - "loss": 46.0, - "step": 22703 - }, - { - "epoch": 1.735879350880211, - "grad_norm": 0.0009979365859180689, - "learning_rate": 0.0001999985143183567, - "loss": 46.0, - "step": 22704 - }, - { - "epoch": 1.7359558078636006, - "grad_norm": 0.0005944758886471391, - "learning_rate": 0.00019999851418742246, - "loss": 46.0, - "step": 22705 - }, - { - "epoch": 1.7360322648469904, - "grad_norm": 0.0012176495511084795, - "learning_rate": 0.00019999851405648247, - "loss": 46.0, - "step": 22706 - }, - { - "epoch": 1.7361087218303801, - "grad_norm": 0.0009234109311364591, - "learning_rate": 0.0001999985139255367, - "loss": 46.0, - "step": 22707 - }, - { - "epoch": 1.7361851788137699, - "grad_norm": 0.0022533521987497807, - "learning_rate": 0.00019999851379458514, - "loss": 46.0, - "step": 22708 - }, - { - "epoch": 1.7362616357971596, - "grad_norm": 0.003385854884982109, - "learning_rate": 0.00019999851366362786, - "loss": 46.0, - "step": 22709 - }, - { - "epoch": 1.7363380927805494, - "grad_norm": 0.0017720299074426293, - "learning_rate": 0.00019999851353266478, - "loss": 46.0, - "step": 22710 - }, - { - "epoch": 1.7364145497639392, - "grad_norm": 0.001349471160210669, - "learning_rate": 0.00019999851340169592, - "loss": 46.0, - "step": 22711 - }, - { - "epoch": 1.736491006747329, - "grad_norm": 0.0009567879023961723, - "learning_rate": 0.00019999851327072129, - "loss": 46.0, - "step": 22712 - }, - { - "epoch": 1.7365674637307185, - "grad_norm": 0.002546332310885191, - "learning_rate": 0.0001999985131397409, - "loss": 46.0, - "step": 22713 - }, - { - "epoch": 1.7366439207141082, - "grad_norm": 0.0012086735805496573, - "learning_rate": 0.00019999851300875476, - "loss": 46.0, - "step": 22714 - }, - { - "epoch": 1.736720377697498, - "grad_norm": 0.0005902419798076153, - "learning_rate": 0.00019999851287776286, - "loss": 46.0, - "step": 22715 - }, - { - "epoch": 1.7367968346808875, - "grad_norm": 0.0007699631969444454, - "learning_rate": 0.00019999851274676516, - "loss": 46.0, - "step": 22716 - }, - { - "epoch": 1.7368732916642773, - "grad_norm": 0.004581313580274582, - "learning_rate": 0.0001999985126157617, - "loss": 46.0, - "step": 22717 - }, - { - "epoch": 1.736949748647667, - "grad_norm": 0.0004327432543504983, - "learning_rate": 0.00019999851248475245, - "loss": 46.0, - "step": 22718 - }, - { - "epoch": 1.7370262056310568, - "grad_norm": 0.0006114005809649825, - "learning_rate": 0.00019999851235373749, - "loss": 46.0, - "step": 22719 - }, - { - "epoch": 1.7371026626144466, - "grad_norm": 0.0005895322537980974, - "learning_rate": 0.0001999985122227167, - "loss": 46.0, - "step": 22720 - }, - { - "epoch": 1.7371791195978363, - "grad_norm": 0.0008027162402868271, - "learning_rate": 0.00019999851209169016, - "loss": 46.0, - "step": 22721 - }, - { - "epoch": 1.737255576581226, - "grad_norm": 0.0018236652249470353, - "learning_rate": 0.00019999851196065787, - "loss": 46.0, - "step": 22722 - }, - { - "epoch": 1.7373320335646159, - "grad_norm": 0.0018301403615623713, - "learning_rate": 0.00019999851182961976, - "loss": 46.0, - "step": 22723 - }, - { - "epoch": 1.7374084905480054, - "grad_norm": 0.0003246336127631366, - "learning_rate": 0.00019999851169857593, - "loss": 46.0, - "step": 22724 - }, - { - "epoch": 1.7374849475313952, - "grad_norm": 0.000664866587612778, - "learning_rate": 0.0001999985115675263, - "loss": 46.0, - "step": 22725 - }, - { - "epoch": 1.737561404514785, - "grad_norm": 0.0006422539008781314, - "learning_rate": 0.00019999851143647092, - "loss": 46.0, - "step": 22726 - }, - { - "epoch": 1.7376378614981745, - "grad_norm": 0.0055303629487752914, - "learning_rate": 0.00019999851130540977, - "loss": 46.0, - "step": 22727 - }, - { - "epoch": 1.7377143184815642, - "grad_norm": 0.00352671486325562, - "learning_rate": 0.00019999851117434285, - "loss": 46.0, - "step": 22728 - }, - { - "epoch": 1.737790775464954, - "grad_norm": 0.0010309522040188313, - "learning_rate": 0.00019999851104327015, - "loss": 46.0, - "step": 22729 - }, - { - "epoch": 1.7378672324483437, - "grad_norm": 0.0019963423255831003, - "learning_rate": 0.00019999851091219168, - "loss": 46.0, - "step": 22730 - }, - { - "epoch": 1.7379436894317335, - "grad_norm": 0.0005471197655424476, - "learning_rate": 0.00019999851078110744, - "loss": 46.0, - "step": 22731 - }, - { - "epoch": 1.7380201464151233, - "grad_norm": 0.0015014695236459374, - "learning_rate": 0.00019999851065001747, - "loss": 46.0, - "step": 22732 - }, - { - "epoch": 1.738096603398513, - "grad_norm": 0.0008950633346103132, - "learning_rate": 0.00019999851051892166, - "loss": 46.0, - "step": 22733 - }, - { - "epoch": 1.7381730603819028, - "grad_norm": 0.001532626454718411, - "learning_rate": 0.00019999851038782015, - "loss": 46.0, - "step": 22734 - }, - { - "epoch": 1.7382495173652923, - "grad_norm": 0.0011636392446234822, - "learning_rate": 0.00019999851025671284, - "loss": 46.0, - "step": 22735 - }, - { - "epoch": 1.738325974348682, - "grad_norm": 0.0015476715052500367, - "learning_rate": 0.00019999851012559975, - "loss": 46.0, - "step": 22736 - }, - { - "epoch": 1.7384024313320718, - "grad_norm": 0.0028576315380632877, - "learning_rate": 0.0001999985099944809, - "loss": 46.0, - "step": 22737 - }, - { - "epoch": 1.7384788883154614, - "grad_norm": 0.001266071922145784, - "learning_rate": 0.0001999985098633563, - "loss": 46.0, - "step": 22738 - }, - { - "epoch": 1.7385553452988511, - "grad_norm": 0.0009416180546395481, - "learning_rate": 0.0001999985097322259, - "loss": 46.0, - "step": 22739 - }, - { - "epoch": 1.738631802282241, - "grad_norm": 0.0014856200432404876, - "learning_rate": 0.00019999850960108974, - "loss": 46.0, - "step": 22740 - }, - { - "epoch": 1.7387082592656307, - "grad_norm": 0.0008043196867220104, - "learning_rate": 0.00019999850946994782, - "loss": 46.0, - "step": 22741 - }, - { - "epoch": 1.7387847162490204, - "grad_norm": 0.000951383204665035, - "learning_rate": 0.00019999850933880012, - "loss": 46.0, - "step": 22742 - }, - { - "epoch": 1.7388611732324102, - "grad_norm": 0.0018324218690395355, - "learning_rate": 0.00019999850920764668, - "loss": 46.0, - "step": 22743 - }, - { - "epoch": 1.7389376302158, - "grad_norm": 0.002452875254675746, - "learning_rate": 0.0001999985090764874, - "loss": 46.0, - "step": 22744 - }, - { - "epoch": 1.7390140871991897, - "grad_norm": 0.004986785817891359, - "learning_rate": 0.00019999850894532241, - "loss": 46.0, - "step": 22745 - }, - { - "epoch": 1.7390905441825792, - "grad_norm": 0.0012780589750036597, - "learning_rate": 0.00019999850881415165, - "loss": 46.0, - "step": 22746 - }, - { - "epoch": 1.739167001165969, - "grad_norm": 0.0015120753087103367, - "learning_rate": 0.00019999850868297511, - "loss": 46.0, - "step": 22747 - }, - { - "epoch": 1.7392434581493588, - "grad_norm": 0.0019318959675729275, - "learning_rate": 0.00019999850855179278, - "loss": 46.0, - "step": 22748 - }, - { - "epoch": 1.7393199151327483, - "grad_norm": 0.0015932531096041203, - "learning_rate": 0.0001999985084206047, - "loss": 46.0, - "step": 22749 - }, - { - "epoch": 1.739396372116138, - "grad_norm": 0.00028493956779129803, - "learning_rate": 0.00019999850828941086, - "loss": 46.0, - "step": 22750 - }, - { - "epoch": 1.7394728290995278, - "grad_norm": 0.0009779352694749832, - "learning_rate": 0.00019999850815821123, - "loss": 46.0, - "step": 22751 - }, - { - "epoch": 1.7395492860829176, - "grad_norm": 0.0015636475291103125, - "learning_rate": 0.00019999850802700583, - "loss": 46.0, - "step": 22752 - }, - { - "epoch": 1.7396257430663074, - "grad_norm": 0.002212472492828965, - "learning_rate": 0.00019999850789579468, - "loss": 46.0, - "step": 22753 - }, - { - "epoch": 1.7397022000496971, - "grad_norm": 0.001162045169621706, - "learning_rate": 0.00019999850776457776, - "loss": 46.0, - "step": 22754 - }, - { - "epoch": 1.7397786570330869, - "grad_norm": 0.013619589619338512, - "learning_rate": 0.00019999850763335506, - "loss": 46.0, - "step": 22755 - }, - { - "epoch": 1.7398551140164766, - "grad_norm": 0.003062523202970624, - "learning_rate": 0.0001999985075021266, - "loss": 46.0, - "step": 22756 - }, - { - "epoch": 1.7399315709998662, - "grad_norm": 0.0007701321155764163, - "learning_rate": 0.00019999850737089238, - "loss": 46.0, - "step": 22757 - }, - { - "epoch": 1.740008027983256, - "grad_norm": 0.009220766834914684, - "learning_rate": 0.00019999850723965236, - "loss": 46.0, - "step": 22758 - }, - { - "epoch": 1.7400844849666457, - "grad_norm": 0.0037550898268818855, - "learning_rate": 0.00019999850710840657, - "loss": 46.0, - "step": 22759 - }, - { - "epoch": 1.7401609419500352, - "grad_norm": 0.0007332914974540472, - "learning_rate": 0.00019999850697715503, - "loss": 46.0, - "step": 22760 - }, - { - "epoch": 1.740237398933425, - "grad_norm": 0.0005844080587849021, - "learning_rate": 0.00019999850684589775, - "loss": 46.0, - "step": 22761 - }, - { - "epoch": 1.7403138559168148, - "grad_norm": 0.0007936040055938065, - "learning_rate": 0.00019999850671463464, - "loss": 46.0, - "step": 22762 - }, - { - "epoch": 1.7403903129002045, - "grad_norm": 0.00065904128132388, - "learning_rate": 0.00019999850658336578, - "loss": 46.0, - "step": 22763 - }, - { - "epoch": 1.7404667698835943, - "grad_norm": 0.0003065250057261437, - "learning_rate": 0.00019999850645209115, - "loss": 46.0, - "step": 22764 - }, - { - "epoch": 1.740543226866984, - "grad_norm": 0.0008610678487457335, - "learning_rate": 0.00019999850632081078, - "loss": 46.0, - "step": 22765 - }, - { - "epoch": 1.7406196838503738, - "grad_norm": 0.0008119586855173111, - "learning_rate": 0.0001999985061895246, - "loss": 46.0, - "step": 22766 - }, - { - "epoch": 1.7406961408337636, - "grad_norm": 0.0013643893180415034, - "learning_rate": 0.0001999985060582327, - "loss": 46.0, - "step": 22767 - }, - { - "epoch": 1.740772597817153, - "grad_norm": 0.0010365574853494763, - "learning_rate": 0.00019999850592693498, - "loss": 46.0, - "step": 22768 - }, - { - "epoch": 1.7408490548005429, - "grad_norm": 0.0007099217618815601, - "learning_rate": 0.0001999985057956315, - "loss": 46.0, - "step": 22769 - }, - { - "epoch": 1.7409255117839324, - "grad_norm": 0.0008160346769727767, - "learning_rate": 0.00019999850566432227, - "loss": 46.0, - "step": 22770 - }, - { - "epoch": 1.7410019687673222, - "grad_norm": 0.002936852630227804, - "learning_rate": 0.00019999850553300728, - "loss": 46.0, - "step": 22771 - }, - { - "epoch": 1.741078425750712, - "grad_norm": 0.000620995881035924, - "learning_rate": 0.0001999985054016865, - "loss": 46.0, - "step": 22772 - }, - { - "epoch": 1.7411548827341017, - "grad_norm": 0.001320533687248826, - "learning_rate": 0.00019999850527035995, - "loss": 46.0, - "step": 22773 - }, - { - "epoch": 1.7412313397174914, - "grad_norm": 0.000999670708552003, - "learning_rate": 0.00019999850513902762, - "loss": 46.0, - "step": 22774 - }, - { - "epoch": 1.7413077967008812, - "grad_norm": 0.0026237950660288334, - "learning_rate": 0.00019999850500768953, - "loss": 46.0, - "step": 22775 - }, - { - "epoch": 1.741384253684271, - "grad_norm": 0.0025458999443799257, - "learning_rate": 0.0001999985048763457, - "loss": 46.0, - "step": 22776 - }, - { - "epoch": 1.7414607106676607, - "grad_norm": 0.0011643661418929696, - "learning_rate": 0.00019999850474499608, - "loss": 46.0, - "step": 22777 - }, - { - "epoch": 1.7415371676510505, - "grad_norm": 0.00045163059257902205, - "learning_rate": 0.0001999985046136407, - "loss": 46.0, - "step": 22778 - }, - { - "epoch": 1.74161362463444, - "grad_norm": 0.0009960890747606754, - "learning_rate": 0.0001999985044822795, - "loss": 46.0, - "step": 22779 - }, - { - "epoch": 1.7416900816178298, - "grad_norm": 0.001297186827287078, - "learning_rate": 0.00019999850435091257, - "loss": 46.0, - "step": 22780 - }, - { - "epoch": 1.7417665386012193, - "grad_norm": 0.0013079263735562563, - "learning_rate": 0.0001999985042195399, - "loss": 46.0, - "step": 22781 - }, - { - "epoch": 1.741842995584609, - "grad_norm": 0.0013282245490700006, - "learning_rate": 0.0001999985040881614, - "loss": 46.0, - "step": 22782 - }, - { - "epoch": 1.7419194525679988, - "grad_norm": 0.0006855812389403582, - "learning_rate": 0.00019999850395677717, - "loss": 46.0, - "step": 22783 - }, - { - "epoch": 1.7419959095513886, - "grad_norm": 0.013218983076512814, - "learning_rate": 0.00019999850382538715, - "loss": 46.0, - "step": 22784 - }, - { - "epoch": 1.7420723665347784, - "grad_norm": 0.0005991904763504863, - "learning_rate": 0.0001999985036939914, - "loss": 46.0, - "step": 22785 - }, - { - "epoch": 1.7421488235181681, - "grad_norm": 0.0018436353420838714, - "learning_rate": 0.00019999850356258985, - "loss": 46.0, - "step": 22786 - }, - { - "epoch": 1.742225280501558, - "grad_norm": 0.0019221490947529674, - "learning_rate": 0.00019999850343118252, - "loss": 46.0, - "step": 22787 - }, - { - "epoch": 1.7423017374849477, - "grad_norm": 0.001245797029696405, - "learning_rate": 0.00019999850329976943, - "loss": 46.0, - "step": 22788 - }, - { - "epoch": 1.7423781944683374, - "grad_norm": 0.002214134903624654, - "learning_rate": 0.00019999850316835058, - "loss": 46.0, - "step": 22789 - }, - { - "epoch": 1.742454651451727, - "grad_norm": 0.0006869768840260804, - "learning_rate": 0.00019999850303692595, - "loss": 46.0, - "step": 22790 - }, - { - "epoch": 1.7425311084351167, - "grad_norm": 0.0008450874011032283, - "learning_rate": 0.00019999850290549555, - "loss": 46.0, - "step": 22791 - }, - { - "epoch": 1.7426075654185063, - "grad_norm": 0.0008311583078466356, - "learning_rate": 0.0001999985027740594, - "loss": 46.0, - "step": 22792 - }, - { - "epoch": 1.742684022401896, - "grad_norm": 0.0007313786773011088, - "learning_rate": 0.00019999850264261745, - "loss": 46.0, - "step": 22793 - }, - { - "epoch": 1.7427604793852858, - "grad_norm": 0.0015601444756612182, - "learning_rate": 0.00019999850251116975, - "loss": 46.0, - "step": 22794 - }, - { - "epoch": 1.7428369363686755, - "grad_norm": 0.0024768984876573086, - "learning_rate": 0.00019999850237971626, - "loss": 46.0, - "step": 22795 - }, - { - "epoch": 1.7429133933520653, - "grad_norm": 0.000522041053045541, - "learning_rate": 0.00019999850224825701, - "loss": 46.0, - "step": 22796 - }, - { - "epoch": 1.742989850335455, - "grad_norm": 0.0008263241034001112, - "learning_rate": 0.00019999850211679202, - "loss": 46.0, - "step": 22797 - }, - { - "epoch": 1.7430663073188448, - "grad_norm": 0.001271361019462347, - "learning_rate": 0.00019999850198532126, - "loss": 46.0, - "step": 22798 - }, - { - "epoch": 1.7431427643022346, - "grad_norm": 0.001528938882984221, - "learning_rate": 0.0001999985018538447, - "loss": 46.0, - "step": 22799 - }, - { - "epoch": 1.7432192212856241, - "grad_norm": 0.000709356798324734, - "learning_rate": 0.0001999985017223624, - "loss": 46.0, - "step": 22800 - }, - { - "epoch": 1.7432956782690139, - "grad_norm": 0.002937842858955264, - "learning_rate": 0.0001999985015908743, - "loss": 46.0, - "step": 22801 - }, - { - "epoch": 1.7433721352524036, - "grad_norm": 0.0006658356287516654, - "learning_rate": 0.00019999850145938043, - "loss": 46.0, - "step": 22802 - }, - { - "epoch": 1.7434485922357932, - "grad_norm": 0.0014967272290959954, - "learning_rate": 0.0001999985013278808, - "loss": 46.0, - "step": 22803 - }, - { - "epoch": 1.743525049219183, - "grad_norm": 0.0020940289832651615, - "learning_rate": 0.00019999850119637542, - "loss": 46.0, - "step": 22804 - }, - { - "epoch": 1.7436015062025727, - "grad_norm": 0.0007779986481182277, - "learning_rate": 0.00019999850106486425, - "loss": 46.0, - "step": 22805 - }, - { - "epoch": 1.7436779631859625, - "grad_norm": 0.0008359465282410383, - "learning_rate": 0.0001999985009333473, - "loss": 46.0, - "step": 22806 - }, - { - "epoch": 1.7437544201693522, - "grad_norm": 0.0016202996484935284, - "learning_rate": 0.0001999985008018246, - "loss": 46.0, - "step": 22807 - }, - { - "epoch": 1.743830877152742, - "grad_norm": 0.000525411160197109, - "learning_rate": 0.00019999850067029613, - "loss": 46.0, - "step": 22808 - }, - { - "epoch": 1.7439073341361317, - "grad_norm": 0.0006692411843687296, - "learning_rate": 0.0001999985005387619, - "loss": 46.0, - "step": 22809 - }, - { - "epoch": 1.7439837911195215, - "grad_norm": 0.0014965205918997526, - "learning_rate": 0.00019999850040722188, - "loss": 46.0, - "step": 22810 - }, - { - "epoch": 1.744060248102911, - "grad_norm": 0.0017924440326169133, - "learning_rate": 0.0001999985002756761, - "loss": 46.0, - "step": 22811 - }, - { - "epoch": 1.7441367050863008, - "grad_norm": 0.0030297625344246626, - "learning_rate": 0.00019999850014412455, - "loss": 46.0, - "step": 22812 - }, - { - "epoch": 1.7442131620696906, - "grad_norm": 0.0009215819300152361, - "learning_rate": 0.00019999850001256722, - "loss": 46.0, - "step": 22813 - }, - { - "epoch": 1.74428961905308, - "grad_norm": 0.00822428334504366, - "learning_rate": 0.00019999849988100414, - "loss": 46.0, - "step": 22814 - }, - { - "epoch": 1.7443660760364699, - "grad_norm": 0.0009410441271029413, - "learning_rate": 0.00019999849974943528, - "loss": 46.0, - "step": 22815 - }, - { - "epoch": 1.7444425330198596, - "grad_norm": 0.0010528520215302706, - "learning_rate": 0.00019999849961786062, - "loss": 46.0, - "step": 22816 - }, - { - "epoch": 1.7445189900032494, - "grad_norm": 0.001200356287881732, - "learning_rate": 0.00019999849948628025, - "loss": 46.0, - "step": 22817 - }, - { - "epoch": 1.7445954469866392, - "grad_norm": 0.00629137409850955, - "learning_rate": 0.00019999849935469407, - "loss": 46.0, - "step": 22818 - }, - { - "epoch": 1.744671903970029, - "grad_norm": 0.0025854301638901234, - "learning_rate": 0.00019999849922310215, - "loss": 46.0, - "step": 22819 - }, - { - "epoch": 1.7447483609534187, - "grad_norm": 0.001349236466921866, - "learning_rate": 0.00019999849909150443, - "loss": 46.0, - "step": 22820 - }, - { - "epoch": 1.7448248179368084, - "grad_norm": 0.0007376992725767195, - "learning_rate": 0.00019999849895990094, - "loss": 46.0, - "step": 22821 - }, - { - "epoch": 1.744901274920198, - "grad_norm": 0.0024189534597098827, - "learning_rate": 0.00019999849882829172, - "loss": 46.0, - "step": 22822 - }, - { - "epoch": 1.7449777319035877, - "grad_norm": 0.001447937567718327, - "learning_rate": 0.0001999984986966767, - "loss": 46.0, - "step": 22823 - }, - { - "epoch": 1.7450541888869775, - "grad_norm": 0.001649067155085504, - "learning_rate": 0.0001999984985650559, - "loss": 46.0, - "step": 22824 - }, - { - "epoch": 1.745130645870367, - "grad_norm": 0.0008314444567076862, - "learning_rate": 0.00019999849843342938, - "loss": 46.0, - "step": 22825 - }, - { - "epoch": 1.7452071028537568, - "grad_norm": 0.0012076111743226647, - "learning_rate": 0.00019999849830179705, - "loss": 46.0, - "step": 22826 - }, - { - "epoch": 1.7452835598371466, - "grad_norm": 0.003345315344631672, - "learning_rate": 0.00019999849817015894, - "loss": 46.0, - "step": 22827 - }, - { - "epoch": 1.7453600168205363, - "grad_norm": 0.000997675582766533, - "learning_rate": 0.00019999849803851508, - "loss": 46.0, - "step": 22828 - }, - { - "epoch": 1.745436473803926, - "grad_norm": 0.0014490509638562799, - "learning_rate": 0.00019999849790686546, - "loss": 46.0, - "step": 22829 - }, - { - "epoch": 1.7455129307873158, - "grad_norm": 0.0012346961302682757, - "learning_rate": 0.00019999849777521008, - "loss": 46.0, - "step": 22830 - }, - { - "epoch": 1.7455893877707056, - "grad_norm": 0.0004516993649303913, - "learning_rate": 0.0001999984976435489, - "loss": 46.0, - "step": 22831 - }, - { - "epoch": 1.7456658447540954, - "grad_norm": 0.0026130510959774256, - "learning_rate": 0.00019999849751188196, - "loss": 46.0, - "step": 22832 - }, - { - "epoch": 1.745742301737485, - "grad_norm": 0.0012400804553180933, - "learning_rate": 0.00019999849738020924, - "loss": 46.0, - "step": 22833 - }, - { - "epoch": 1.7458187587208747, - "grad_norm": 0.0015285499393939972, - "learning_rate": 0.00019999849724853074, - "loss": 46.0, - "step": 22834 - }, - { - "epoch": 1.7458952157042644, - "grad_norm": 0.0011443976545706391, - "learning_rate": 0.00019999849711684653, - "loss": 46.0, - "step": 22835 - }, - { - "epoch": 1.745971672687654, - "grad_norm": 0.0007074170862324536, - "learning_rate": 0.00019999849698515648, - "loss": 46.0, - "step": 22836 - }, - { - "epoch": 1.7460481296710437, - "grad_norm": 0.0034424017649143934, - "learning_rate": 0.00019999849685346072, - "loss": 46.0, - "step": 22837 - }, - { - "epoch": 1.7461245866544335, - "grad_norm": 0.0018888873746618629, - "learning_rate": 0.00019999849672175913, - "loss": 46.0, - "step": 22838 - }, - { - "epoch": 1.7462010436378232, - "grad_norm": 0.0008307318203151226, - "learning_rate": 0.00019999849659005183, - "loss": 46.0, - "step": 22839 - }, - { - "epoch": 1.746277500621213, - "grad_norm": 0.0007107091369107366, - "learning_rate": 0.00019999849645833875, - "loss": 46.0, - "step": 22840 - }, - { - "epoch": 1.7463539576046028, - "grad_norm": 0.0010909700067713857, - "learning_rate": 0.0001999984963266199, - "loss": 46.0, - "step": 22841 - }, - { - "epoch": 1.7464304145879925, - "grad_norm": 0.001032295753248036, - "learning_rate": 0.00019999849619489526, - "loss": 46.0, - "step": 22842 - }, - { - "epoch": 1.7465068715713823, - "grad_norm": 0.0051668556407094, - "learning_rate": 0.00019999849606316486, - "loss": 46.0, - "step": 22843 - }, - { - "epoch": 1.7465833285547718, - "grad_norm": 0.0015473958337679505, - "learning_rate": 0.0001999984959314287, - "loss": 46.0, - "step": 22844 - }, - { - "epoch": 1.7466597855381616, - "grad_norm": 0.0013182054972276092, - "learning_rate": 0.0001999984957996867, - "loss": 46.0, - "step": 22845 - }, - { - "epoch": 1.7467362425215514, - "grad_norm": 0.0014130270574241877, - "learning_rate": 0.00019999849566793902, - "loss": 46.0, - "step": 22846 - }, - { - "epoch": 1.746812699504941, - "grad_norm": 0.0007180745014920831, - "learning_rate": 0.00019999849553618555, - "loss": 46.0, - "step": 22847 - }, - { - "epoch": 1.7468891564883307, - "grad_norm": 0.0026254537515342236, - "learning_rate": 0.00019999849540442628, - "loss": 46.0, - "step": 22848 - }, - { - "epoch": 1.7469656134717204, - "grad_norm": 0.0009421589784324169, - "learning_rate": 0.00019999849527266127, - "loss": 46.0, - "step": 22849 - }, - { - "epoch": 1.7470420704551102, - "grad_norm": 0.0006869077915325761, - "learning_rate": 0.00019999849514089048, - "loss": 46.0, - "step": 22850 - }, - { - "epoch": 1.7471185274385, - "grad_norm": 0.0007505064131692052, - "learning_rate": 0.00019999849500911392, - "loss": 46.0, - "step": 22851 - }, - { - "epoch": 1.7471949844218897, - "grad_norm": 0.0004789851373061538, - "learning_rate": 0.0001999984948773316, - "loss": 46.0, - "step": 22852 - }, - { - "epoch": 1.7472714414052795, - "grad_norm": 0.0008028162992559373, - "learning_rate": 0.0001999984947455435, - "loss": 46.0, - "step": 22853 - }, - { - "epoch": 1.7473478983886692, - "grad_norm": 0.0013775058323517442, - "learning_rate": 0.00019999849461374965, - "loss": 46.0, - "step": 22854 - }, - { - "epoch": 1.7474243553720588, - "grad_norm": 0.0004190747858956456, - "learning_rate": 0.00019999849448195, - "loss": 46.0, - "step": 22855 - }, - { - "epoch": 1.7475008123554485, - "grad_norm": 0.003468609880656004, - "learning_rate": 0.0001999984943501446, - "loss": 46.0, - "step": 22856 - }, - { - "epoch": 1.7475772693388383, - "grad_norm": 0.0013469042023643851, - "learning_rate": 0.00019999849421833345, - "loss": 46.0, - "step": 22857 - }, - { - "epoch": 1.7476537263222278, - "grad_norm": 0.0012016008840873837, - "learning_rate": 0.0001999984940865165, - "loss": 46.0, - "step": 22858 - }, - { - "epoch": 1.7477301833056176, - "grad_norm": 0.0007089224527589977, - "learning_rate": 0.00019999849395469378, - "loss": 46.0, - "step": 22859 - }, - { - "epoch": 1.7478066402890073, - "grad_norm": 0.007833764888346195, - "learning_rate": 0.0001999984938228653, - "loss": 46.0, - "step": 22860 - }, - { - "epoch": 1.747883097272397, - "grad_norm": 0.001163505483418703, - "learning_rate": 0.00019999849369103104, - "loss": 46.0, - "step": 22861 - }, - { - "epoch": 1.7479595542557869, - "grad_norm": 0.00446652015671134, - "learning_rate": 0.00019999849355919103, - "loss": 46.0, - "step": 22862 - }, - { - "epoch": 1.7480360112391766, - "grad_norm": 0.0031159506179392338, - "learning_rate": 0.00019999849342734524, - "loss": 46.0, - "step": 22863 - }, - { - "epoch": 1.7481124682225664, - "grad_norm": 0.0004764788900502026, - "learning_rate": 0.0001999984932954937, - "loss": 46.0, - "step": 22864 - }, - { - "epoch": 1.7481889252059561, - "grad_norm": 0.0005654712440446019, - "learning_rate": 0.00019999849316363634, - "loss": 46.0, - "step": 22865 - }, - { - "epoch": 1.7482653821893457, - "grad_norm": 0.0013280424755066633, - "learning_rate": 0.00019999849303177324, - "loss": 46.0, - "step": 22866 - }, - { - "epoch": 1.7483418391727354, - "grad_norm": 0.0020434551406651735, - "learning_rate": 0.0001999984928999044, - "loss": 46.0, - "step": 22867 - }, - { - "epoch": 1.7484182961561252, - "grad_norm": 0.0008360739448107779, - "learning_rate": 0.00019999849276802975, - "loss": 46.0, - "step": 22868 - }, - { - "epoch": 1.7484947531395147, - "grad_norm": 0.0031081035267561674, - "learning_rate": 0.00019999849263614933, - "loss": 46.0, - "step": 22869 - }, - { - "epoch": 1.7485712101229045, - "grad_norm": 0.0017827115952968597, - "learning_rate": 0.00019999849250426318, - "loss": 46.0, - "step": 22870 - }, - { - "epoch": 1.7486476671062943, - "grad_norm": 0.004078402183949947, - "learning_rate": 0.00019999849237237123, - "loss": 46.0, - "step": 22871 - }, - { - "epoch": 1.748724124089684, - "grad_norm": 0.0007813745178282261, - "learning_rate": 0.0001999984922404735, - "loss": 46.0, - "step": 22872 - }, - { - "epoch": 1.7488005810730738, - "grad_norm": 0.0005173644749447703, - "learning_rate": 0.00019999849210857002, - "loss": 46.0, - "step": 22873 - }, - { - "epoch": 1.7488770380564636, - "grad_norm": 0.0005492236814461648, - "learning_rate": 0.00019999849197666078, - "loss": 46.0, - "step": 22874 - }, - { - "epoch": 1.7489534950398533, - "grad_norm": 0.0014844798715785146, - "learning_rate": 0.00019999849184474576, - "loss": 46.0, - "step": 22875 - }, - { - "epoch": 1.749029952023243, - "grad_norm": 0.0006212327280081809, - "learning_rate": 0.00019999849171282498, - "loss": 46.0, - "step": 22876 - }, - { - "epoch": 1.7491064090066326, - "grad_norm": 0.00043857062701135874, - "learning_rate": 0.00019999849158089842, - "loss": 46.0, - "step": 22877 - }, - { - "epoch": 1.7491828659900224, - "grad_norm": 0.0007692112121731043, - "learning_rate": 0.00019999849144896608, - "loss": 46.0, - "step": 22878 - }, - { - "epoch": 1.7492593229734121, - "grad_norm": 0.0006290115416049957, - "learning_rate": 0.000199998491317028, - "loss": 46.0, - "step": 22879 - }, - { - "epoch": 1.7493357799568017, - "grad_norm": 0.0035379943437874317, - "learning_rate": 0.00019999849118508412, - "loss": 46.0, - "step": 22880 - }, - { - "epoch": 1.7494122369401914, - "grad_norm": 0.001420535147190094, - "learning_rate": 0.00019999849105313447, - "loss": 46.0, - "step": 22881 - }, - { - "epoch": 1.7494886939235812, - "grad_norm": 0.0011333078145980835, - "learning_rate": 0.00019999849092117907, - "loss": 46.0, - "step": 22882 - }, - { - "epoch": 1.749565150906971, - "grad_norm": 0.018663544207811356, - "learning_rate": 0.0001999984907892179, - "loss": 46.0, - "step": 22883 - }, - { - "epoch": 1.7496416078903607, - "grad_norm": 0.0029617047403007746, - "learning_rate": 0.00019999849065725095, - "loss": 46.0, - "step": 22884 - }, - { - "epoch": 1.7497180648737505, - "grad_norm": 0.0011189765064045787, - "learning_rate": 0.00019999849052527822, - "loss": 46.0, - "step": 22885 - }, - { - "epoch": 1.7497945218571402, - "grad_norm": 0.0007039718911983073, - "learning_rate": 0.00019999849039329973, - "loss": 46.0, - "step": 22886 - }, - { - "epoch": 1.74987097884053, - "grad_norm": 0.0008867841097526252, - "learning_rate": 0.0001999984902613155, - "loss": 46.0, - "step": 22887 - }, - { - "epoch": 1.7499474358239195, - "grad_norm": 0.0005496154772117734, - "learning_rate": 0.00019999849012932548, - "loss": 46.0, - "step": 22888 - }, - { - "epoch": 1.7500238928073093, - "grad_norm": 0.0009427402401342988, - "learning_rate": 0.0001999984899973297, - "loss": 46.0, - "step": 22889 - }, - { - "epoch": 1.750100349790699, - "grad_norm": 0.003354016225785017, - "learning_rate": 0.00019999848986532813, - "loss": 46.0, - "step": 22890 - }, - { - "epoch": 1.7501768067740886, - "grad_norm": 0.00044121965765953064, - "learning_rate": 0.00019999848973332077, - "loss": 46.0, - "step": 22891 - }, - { - "epoch": 1.7502532637574784, - "grad_norm": 0.0006626663380302489, - "learning_rate": 0.00019999848960130766, - "loss": 46.0, - "step": 22892 - }, - { - "epoch": 1.7503297207408681, - "grad_norm": 0.0046019949950277805, - "learning_rate": 0.0001999984894692888, - "loss": 46.0, - "step": 22893 - }, - { - "epoch": 1.7504061777242579, - "grad_norm": 0.005321100354194641, - "learning_rate": 0.00019999848933726418, - "loss": 46.0, - "step": 22894 - }, - { - "epoch": 1.7504826347076476, - "grad_norm": 0.0009239411447197199, - "learning_rate": 0.00019999848920523375, - "loss": 46.0, - "step": 22895 - }, - { - "epoch": 1.7505590916910374, - "grad_norm": 0.0020021754316985607, - "learning_rate": 0.00019999848907319758, - "loss": 46.0, - "step": 22896 - }, - { - "epoch": 1.7506355486744272, - "grad_norm": 0.005139068700373173, - "learning_rate": 0.00019999848894115566, - "loss": 46.0, - "step": 22897 - }, - { - "epoch": 1.750712005657817, - "grad_norm": 0.00038632916403003037, - "learning_rate": 0.00019999848880910791, - "loss": 46.0, - "step": 22898 - }, - { - "epoch": 1.7507884626412065, - "grad_norm": 0.000553794379811734, - "learning_rate": 0.00019999848867705442, - "loss": 46.0, - "step": 22899 - }, - { - "epoch": 1.7508649196245962, - "grad_norm": 0.00201168074272573, - "learning_rate": 0.00019999848854499515, - "loss": 46.0, - "step": 22900 - }, - { - "epoch": 1.7509413766079858, - "grad_norm": 0.0006868286873213947, - "learning_rate": 0.00019999848841293014, - "loss": 46.0, - "step": 22901 - }, - { - "epoch": 1.7510178335913755, - "grad_norm": 0.004044482950121164, - "learning_rate": 0.00019999848828085935, - "loss": 46.0, - "step": 22902 - }, - { - "epoch": 1.7510942905747653, - "grad_norm": 0.0012502634199336171, - "learning_rate": 0.0001999984881487828, - "loss": 46.0, - "step": 22903 - }, - { - "epoch": 1.751170747558155, - "grad_norm": 0.0013042453210800886, - "learning_rate": 0.00019999848801670046, - "loss": 46.0, - "step": 22904 - }, - { - "epoch": 1.7512472045415448, - "grad_norm": 0.0057823313400149345, - "learning_rate": 0.00019999848788461235, - "loss": 46.0, - "step": 22905 - }, - { - "epoch": 1.7513236615249346, - "grad_norm": 0.0007642376003786922, - "learning_rate": 0.00019999848775251847, - "loss": 46.0, - "step": 22906 - }, - { - "epoch": 1.7514001185083243, - "grad_norm": 0.0020913316402584314, - "learning_rate": 0.00019999848762041885, - "loss": 46.0, - "step": 22907 - }, - { - "epoch": 1.751476575491714, - "grad_norm": 0.001820555655285716, - "learning_rate": 0.00019999848748831345, - "loss": 46.0, - "step": 22908 - }, - { - "epoch": 1.7515530324751039, - "grad_norm": 0.004824264906346798, - "learning_rate": 0.00019999848735620225, - "loss": 46.0, - "step": 22909 - }, - { - "epoch": 1.7516294894584934, - "grad_norm": 0.0040700905956327915, - "learning_rate": 0.0001999984872240853, - "loss": 46.0, - "step": 22910 - }, - { - "epoch": 1.7517059464418832, - "grad_norm": 0.0009885140461847186, - "learning_rate": 0.00019999848709196258, - "loss": 46.0, - "step": 22911 - }, - { - "epoch": 1.7517824034252727, - "grad_norm": 0.0007716494728811085, - "learning_rate": 0.0001999984869598341, - "loss": 46.0, - "step": 22912 - }, - { - "epoch": 1.7518588604086625, - "grad_norm": 0.0009181308560073376, - "learning_rate": 0.00019999848682769985, - "loss": 46.0, - "step": 22913 - }, - { - "epoch": 1.7519353173920522, - "grad_norm": 0.0005727520911023021, - "learning_rate": 0.0001999984866955598, - "loss": 46.0, - "step": 22914 - }, - { - "epoch": 1.752011774375442, - "grad_norm": 0.000605431676376611, - "learning_rate": 0.000199998486563414, - "loss": 46.0, - "step": 22915 - }, - { - "epoch": 1.7520882313588317, - "grad_norm": 0.0036140515003353357, - "learning_rate": 0.00019999848643126244, - "loss": 46.0, - "step": 22916 - }, - { - "epoch": 1.7521646883422215, - "grad_norm": 0.0013606420252472162, - "learning_rate": 0.0001999984862991051, - "loss": 46.0, - "step": 22917 - }, - { - "epoch": 1.7522411453256113, - "grad_norm": 0.0018541462486609817, - "learning_rate": 0.00019999848616694202, - "loss": 46.0, - "step": 22918 - }, - { - "epoch": 1.752317602309001, - "grad_norm": 0.0006072652759030461, - "learning_rate": 0.00019999848603477314, - "loss": 46.0, - "step": 22919 - }, - { - "epoch": 1.7523940592923908, - "grad_norm": 0.0016855557914823294, - "learning_rate": 0.0001999984859025985, - "loss": 46.0, - "step": 22920 - }, - { - "epoch": 1.7524705162757803, - "grad_norm": 0.0008594723185524344, - "learning_rate": 0.00019999848577041806, - "loss": 46.0, - "step": 22921 - }, - { - "epoch": 1.75254697325917, - "grad_norm": 0.0010709441266953945, - "learning_rate": 0.0001999984856382319, - "loss": 46.0, - "step": 22922 - }, - { - "epoch": 1.7526234302425596, - "grad_norm": 0.00046520476462319493, - "learning_rate": 0.00019999848550603994, - "loss": 46.0, - "step": 22923 - }, - { - "epoch": 1.7526998872259494, - "grad_norm": 0.003397207008674741, - "learning_rate": 0.00019999848537384222, - "loss": 46.0, - "step": 22924 - }, - { - "epoch": 1.7527763442093391, - "grad_norm": 0.0015184527728706598, - "learning_rate": 0.00019999848524163873, - "loss": 46.0, - "step": 22925 - }, - { - "epoch": 1.752852801192729, - "grad_norm": 0.0005837680073454976, - "learning_rate": 0.00019999848510942947, - "loss": 46.0, - "step": 22926 - }, - { - "epoch": 1.7529292581761187, - "grad_norm": 0.0010285830358043313, - "learning_rate": 0.00019999848497721445, - "loss": 46.0, - "step": 22927 - }, - { - "epoch": 1.7530057151595084, - "grad_norm": 0.0011810037540271878, - "learning_rate": 0.00019999848484499367, - "loss": 46.0, - "step": 22928 - }, - { - "epoch": 1.7530821721428982, - "grad_norm": 0.0004259173874743283, - "learning_rate": 0.00019999848471276708, - "loss": 46.0, - "step": 22929 - }, - { - "epoch": 1.753158629126288, - "grad_norm": 0.0010500161442905664, - "learning_rate": 0.00019999848458053475, - "loss": 46.0, - "step": 22930 - }, - { - "epoch": 1.7532350861096775, - "grad_norm": 0.0011475651990622282, - "learning_rate": 0.00019999848444829664, - "loss": 46.0, - "step": 22931 - }, - { - "epoch": 1.7533115430930672, - "grad_norm": 0.00923268124461174, - "learning_rate": 0.00019999848431605276, - "loss": 46.0, - "step": 22932 - }, - { - "epoch": 1.753388000076457, - "grad_norm": 0.0013598434161394835, - "learning_rate": 0.0001999984841838031, - "loss": 46.0, - "step": 22933 - }, - { - "epoch": 1.7534644570598465, - "grad_norm": 0.0029720028396695852, - "learning_rate": 0.0001999984840515477, - "loss": 46.0, - "step": 22934 - }, - { - "epoch": 1.7535409140432363, - "grad_norm": 0.0008924399735406041, - "learning_rate": 0.0001999984839192865, - "loss": 46.0, - "step": 22935 - }, - { - "epoch": 1.753617371026626, - "grad_norm": 0.0014193335082381964, - "learning_rate": 0.00019999848378701956, - "loss": 46.0, - "step": 22936 - }, - { - "epoch": 1.7536938280100158, - "grad_norm": 0.00140817416831851, - "learning_rate": 0.00019999848365474685, - "loss": 46.0, - "step": 22937 - }, - { - "epoch": 1.7537702849934056, - "grad_norm": 0.0007353240507654846, - "learning_rate": 0.00019999848352246835, - "loss": 46.0, - "step": 22938 - }, - { - "epoch": 1.7538467419767954, - "grad_norm": 0.0014878481160849333, - "learning_rate": 0.0001999984833901841, - "loss": 46.0, - "step": 22939 - }, - { - "epoch": 1.7539231989601851, - "grad_norm": 0.0021783888805657625, - "learning_rate": 0.00019999848325789405, - "loss": 46.0, - "step": 22940 - }, - { - "epoch": 1.7539996559435749, - "grad_norm": 0.0012715599732473493, - "learning_rate": 0.00019999848312559826, - "loss": 46.0, - "step": 22941 - }, - { - "epoch": 1.7540761129269644, - "grad_norm": 0.0012889860663563013, - "learning_rate": 0.00019999848299329668, - "loss": 46.0, - "step": 22942 - }, - { - "epoch": 1.7541525699103542, - "grad_norm": 0.000898173835594207, - "learning_rate": 0.00019999848286098937, - "loss": 46.0, - "step": 22943 - }, - { - "epoch": 1.754229026893744, - "grad_norm": 0.0027461578138172626, - "learning_rate": 0.00019999848272867624, - "loss": 46.0, - "step": 22944 - }, - { - "epoch": 1.7543054838771335, - "grad_norm": 0.0007422103080898523, - "learning_rate": 0.00019999848259635736, - "loss": 46.0, - "step": 22945 - }, - { - "epoch": 1.7543819408605232, - "grad_norm": 0.00827016681432724, - "learning_rate": 0.00019999848246403273, - "loss": 46.0, - "step": 22946 - }, - { - "epoch": 1.754458397843913, - "grad_norm": 0.00832771323621273, - "learning_rate": 0.0001999984823317023, - "loss": 46.0, - "step": 22947 - }, - { - "epoch": 1.7545348548273028, - "grad_norm": 0.0016653466736897826, - "learning_rate": 0.00019999848219936613, - "loss": 46.0, - "step": 22948 - }, - { - "epoch": 1.7546113118106925, - "grad_norm": 0.0019959560595452785, - "learning_rate": 0.00019999848206702416, - "loss": 46.0, - "step": 22949 - }, - { - "epoch": 1.7546877687940823, - "grad_norm": 0.001784514868631959, - "learning_rate": 0.00019999848193467642, - "loss": 46.0, - "step": 22950 - }, - { - "epoch": 1.754764225777472, - "grad_norm": 0.00265725189819932, - "learning_rate": 0.00019999848180232295, - "loss": 46.0, - "step": 22951 - }, - { - "epoch": 1.7548406827608618, - "grad_norm": 0.0005549251800402999, - "learning_rate": 0.00019999848166996369, - "loss": 46.0, - "step": 22952 - }, - { - "epoch": 1.7549171397442513, - "grad_norm": 0.003805492538958788, - "learning_rate": 0.00019999848153759865, - "loss": 46.0, - "step": 22953 - }, - { - "epoch": 1.754993596727641, - "grad_norm": 0.001588365063071251, - "learning_rate": 0.00019999848140522786, - "loss": 46.0, - "step": 22954 - }, - { - "epoch": 1.7550700537110309, - "grad_norm": 0.0008523932774551213, - "learning_rate": 0.00019999848127285128, - "loss": 46.0, - "step": 22955 - }, - { - "epoch": 1.7551465106944204, - "grad_norm": 0.002328865695744753, - "learning_rate": 0.00019999848114046894, - "loss": 46.0, - "step": 22956 - }, - { - "epoch": 1.7552229676778102, - "grad_norm": 0.0005983063601888716, - "learning_rate": 0.00019999848100808084, - "loss": 46.0, - "step": 22957 - }, - { - "epoch": 1.7552994246612, - "grad_norm": 0.00479264697059989, - "learning_rate": 0.00019999848087568696, - "loss": 46.0, - "step": 22958 - }, - { - "epoch": 1.7553758816445897, - "grad_norm": 0.0005162210436537862, - "learning_rate": 0.0001999984807432873, - "loss": 46.0, - "step": 22959 - }, - { - "epoch": 1.7554523386279794, - "grad_norm": 0.0008861337555572391, - "learning_rate": 0.00019999848061088188, - "loss": 46.0, - "step": 22960 - }, - { - "epoch": 1.7555287956113692, - "grad_norm": 0.0009791561169549823, - "learning_rate": 0.0001999984804784707, - "loss": 46.0, - "step": 22961 - }, - { - "epoch": 1.755605252594759, - "grad_norm": 0.0018878286937251687, - "learning_rate": 0.00019999848034605374, - "loss": 46.0, - "step": 22962 - }, - { - "epoch": 1.7556817095781487, - "grad_norm": 0.0012427836190909147, - "learning_rate": 0.00019999848021363102, - "loss": 46.0, - "step": 22963 - }, - { - "epoch": 1.7557581665615383, - "grad_norm": 0.0010017188033089042, - "learning_rate": 0.00019999848008120253, - "loss": 46.0, - "step": 22964 - }, - { - "epoch": 1.755834623544928, - "grad_norm": 0.001413905993103981, - "learning_rate": 0.00019999847994876826, - "loss": 46.0, - "step": 22965 - }, - { - "epoch": 1.7559110805283178, - "grad_norm": 0.001142196706496179, - "learning_rate": 0.00019999847981632822, - "loss": 46.0, - "step": 22966 - }, - { - "epoch": 1.7559875375117073, - "grad_norm": 0.0004801061295438558, - "learning_rate": 0.00019999847968388244, - "loss": 46.0, - "step": 22967 - }, - { - "epoch": 1.756063994495097, - "grad_norm": 0.0002558489504735917, - "learning_rate": 0.00019999847955143085, - "loss": 46.0, - "step": 22968 - }, - { - "epoch": 1.7561404514784869, - "grad_norm": 0.0006692885654047132, - "learning_rate": 0.0001999984794189735, - "loss": 46.0, - "step": 22969 - }, - { - "epoch": 1.7562169084618766, - "grad_norm": 0.000777937937527895, - "learning_rate": 0.0001999984792865104, - "loss": 46.0, - "step": 22970 - }, - { - "epoch": 1.7562933654452664, - "grad_norm": 0.002101212739944458, - "learning_rate": 0.0001999984791540415, - "loss": 46.0, - "step": 22971 - }, - { - "epoch": 1.7563698224286561, - "grad_norm": 0.004076733253896236, - "learning_rate": 0.00019999847902156686, - "loss": 46.0, - "step": 22972 - }, - { - "epoch": 1.756446279412046, - "grad_norm": 0.00183341302908957, - "learning_rate": 0.00019999847888908643, - "loss": 46.0, - "step": 22973 - }, - { - "epoch": 1.7565227363954357, - "grad_norm": 0.0008881328976713121, - "learning_rate": 0.00019999847875660026, - "loss": 46.0, - "step": 22974 - }, - { - "epoch": 1.7565991933788252, - "grad_norm": 0.0024700553622096777, - "learning_rate": 0.0001999984786241083, - "loss": 46.0, - "step": 22975 - }, - { - "epoch": 1.756675650362215, - "grad_norm": 0.004311004187911749, - "learning_rate": 0.00019999847849161054, - "loss": 46.0, - "step": 22976 - }, - { - "epoch": 1.7567521073456047, - "grad_norm": 0.000527957221493125, - "learning_rate": 0.00019999847835910708, - "loss": 46.0, - "step": 22977 - }, - { - "epoch": 1.7568285643289943, - "grad_norm": 0.002179940929636359, - "learning_rate": 0.0001999984782265978, - "loss": 46.0, - "step": 22978 - }, - { - "epoch": 1.756905021312384, - "grad_norm": 0.001060448819771409, - "learning_rate": 0.00019999847809408277, - "loss": 46.0, - "step": 22979 - }, - { - "epoch": 1.7569814782957738, - "grad_norm": 0.0009896565461531281, - "learning_rate": 0.00019999847796156196, - "loss": 46.0, - "step": 22980 - }, - { - "epoch": 1.7570579352791635, - "grad_norm": 0.0007081157527863979, - "learning_rate": 0.0001999984778290354, - "loss": 46.0, - "step": 22981 - }, - { - "epoch": 1.7571343922625533, - "grad_norm": 0.0014334216248244047, - "learning_rate": 0.00019999847769650307, - "loss": 46.0, - "step": 22982 - }, - { - "epoch": 1.757210849245943, - "grad_norm": 0.0014801254728809, - "learning_rate": 0.00019999847756396494, - "loss": 46.0, - "step": 22983 - }, - { - "epoch": 1.7572873062293328, - "grad_norm": 0.001222072052769363, - "learning_rate": 0.00019999847743142106, - "loss": 46.0, - "step": 22984 - }, - { - "epoch": 1.7573637632127226, - "grad_norm": 0.0007329648360610008, - "learning_rate": 0.0001999984772988714, - "loss": 46.0, - "step": 22985 - }, - { - "epoch": 1.7574402201961121, - "grad_norm": 0.0017048392910510302, - "learning_rate": 0.00019999847716631596, - "loss": 46.0, - "step": 22986 - }, - { - "epoch": 1.7575166771795019, - "grad_norm": 0.0008884343551471829, - "learning_rate": 0.0001999984770337548, - "loss": 46.0, - "step": 22987 - }, - { - "epoch": 1.7575931341628916, - "grad_norm": 0.0016720156418159604, - "learning_rate": 0.00019999847690118782, - "loss": 46.0, - "step": 22988 - }, - { - "epoch": 1.7576695911462812, - "grad_norm": 0.004272850695997477, - "learning_rate": 0.0001999984767686151, - "loss": 46.0, - "step": 22989 - }, - { - "epoch": 1.757746048129671, - "grad_norm": 0.0010351334931328893, - "learning_rate": 0.00019999847663603658, - "loss": 46.0, - "step": 22990 - }, - { - "epoch": 1.7578225051130607, - "grad_norm": 0.001561406534165144, - "learning_rate": 0.00019999847650345234, - "loss": 46.0, - "step": 22991 - }, - { - "epoch": 1.7578989620964505, - "grad_norm": 0.0007286272011697292, - "learning_rate": 0.0001999984763708623, - "loss": 46.0, - "step": 22992 - }, - { - "epoch": 1.7579754190798402, - "grad_norm": 0.0009288830915465951, - "learning_rate": 0.0001999984762382665, - "loss": 46.0, - "step": 22993 - }, - { - "epoch": 1.75805187606323, - "grad_norm": 0.0020690688397735357, - "learning_rate": 0.0001999984761056649, - "loss": 46.0, - "step": 22994 - }, - { - "epoch": 1.7581283330466198, - "grad_norm": 0.0005922580603510141, - "learning_rate": 0.00019999847597305758, - "loss": 46.0, - "step": 22995 - }, - { - "epoch": 1.7582047900300095, - "grad_norm": 0.0017920979298651218, - "learning_rate": 0.00019999847584044444, - "loss": 46.0, - "step": 22996 - }, - { - "epoch": 1.758281247013399, - "grad_norm": 0.00025034809368662536, - "learning_rate": 0.00019999847570782557, - "loss": 46.0, - "step": 22997 - }, - { - "epoch": 1.7583577039967888, - "grad_norm": 0.0008215445559471846, - "learning_rate": 0.00019999847557520092, - "loss": 46.0, - "step": 22998 - }, - { - "epoch": 1.7584341609801786, - "grad_norm": 0.0008346502436324954, - "learning_rate": 0.0001999984754425705, - "loss": 46.0, - "step": 22999 - }, - { - "epoch": 1.7585106179635681, - "grad_norm": 0.0009983754716813564, - "learning_rate": 0.0001999984753099343, - "loss": 46.0, - "step": 23000 - }, - { - "epoch": 1.7585870749469579, - "grad_norm": 0.0016917455941438675, - "learning_rate": 0.00019999847517729232, - "loss": 46.0, - "step": 23001 - }, - { - "epoch": 1.7586635319303476, - "grad_norm": 0.0022480038460344076, - "learning_rate": 0.0001999984750446446, - "loss": 46.0, - "step": 23002 - }, - { - "epoch": 1.7587399889137374, - "grad_norm": 0.0006862612208351493, - "learning_rate": 0.0001999984749119911, - "loss": 46.0, - "step": 23003 - }, - { - "epoch": 1.7588164458971272, - "grad_norm": 0.0008875308558344841, - "learning_rate": 0.00019999847477933185, - "loss": 46.0, - "step": 23004 - }, - { - "epoch": 1.758892902880517, - "grad_norm": 0.003141864435747266, - "learning_rate": 0.00019999847464666678, - "loss": 46.0, - "step": 23005 - }, - { - "epoch": 1.7589693598639067, - "grad_norm": 0.0007503660162910819, - "learning_rate": 0.00019999847451399597, - "loss": 46.0, - "step": 23006 - }, - { - "epoch": 1.7590458168472964, - "grad_norm": 0.0006347611779347062, - "learning_rate": 0.00019999847438131941, - "loss": 46.0, - "step": 23007 - }, - { - "epoch": 1.759122273830686, - "grad_norm": 0.0020879756193608046, - "learning_rate": 0.00019999847424863706, - "loss": 46.0, - "step": 23008 - }, - { - "epoch": 1.7591987308140757, - "grad_norm": 0.0007142355316318572, - "learning_rate": 0.00019999847411594895, - "loss": 46.0, - "step": 23009 - }, - { - "epoch": 1.7592751877974655, - "grad_norm": 0.00042321221553720534, - "learning_rate": 0.00019999847398325505, - "loss": 46.0, - "step": 23010 - }, - { - "epoch": 1.759351644780855, - "grad_norm": 0.0007877916796132922, - "learning_rate": 0.00019999847385055542, - "loss": 46.0, - "step": 23011 - }, - { - "epoch": 1.7594281017642448, - "grad_norm": 0.0008601034642197192, - "learning_rate": 0.00019999847371784997, - "loss": 46.0, - "step": 23012 - }, - { - "epoch": 1.7595045587476346, - "grad_norm": 0.00041944821714423597, - "learning_rate": 0.0001999984735851388, - "loss": 46.0, - "step": 23013 - }, - { - "epoch": 1.7595810157310243, - "grad_norm": 0.0008222644100897014, - "learning_rate": 0.00019999847345242183, - "loss": 46.0, - "step": 23014 - }, - { - "epoch": 1.759657472714414, - "grad_norm": 0.0012930564116686583, - "learning_rate": 0.0001999984733196991, - "loss": 46.0, - "step": 23015 - }, - { - "epoch": 1.7597339296978038, - "grad_norm": 0.001170386909507215, - "learning_rate": 0.00019999847318697057, - "loss": 46.0, - "step": 23016 - }, - { - "epoch": 1.7598103866811936, - "grad_norm": 0.0008373709279112518, - "learning_rate": 0.0001999984730542363, - "loss": 46.0, - "step": 23017 - }, - { - "epoch": 1.7598868436645834, - "grad_norm": 0.0023464609403163195, - "learning_rate": 0.00019999847292149627, - "loss": 46.0, - "step": 23018 - }, - { - "epoch": 1.759963300647973, - "grad_norm": 0.000365028390660882, - "learning_rate": 0.00019999847278875046, - "loss": 46.0, - "step": 23019 - }, - { - "epoch": 1.7600397576313627, - "grad_norm": 0.004052398726344109, - "learning_rate": 0.00019999847265599887, - "loss": 46.0, - "step": 23020 - }, - { - "epoch": 1.7601162146147524, - "grad_norm": 0.0009370911866426468, - "learning_rate": 0.00019999847252324154, - "loss": 46.0, - "step": 23021 - }, - { - "epoch": 1.760192671598142, - "grad_norm": 0.001851333538070321, - "learning_rate": 0.0001999984723904784, - "loss": 46.0, - "step": 23022 - }, - { - "epoch": 1.7602691285815317, - "grad_norm": 0.0013390742242336273, - "learning_rate": 0.0001999984722577095, - "loss": 46.0, - "step": 23023 - }, - { - "epoch": 1.7603455855649215, - "grad_norm": 0.0013048751279711723, - "learning_rate": 0.00019999847212493488, - "loss": 46.0, - "step": 23024 - }, - { - "epoch": 1.7604220425483112, - "grad_norm": 0.001635401975363493, - "learning_rate": 0.00019999847199215446, - "loss": 46.0, - "step": 23025 - }, - { - "epoch": 1.760498499531701, - "grad_norm": 0.0007840105681680143, - "learning_rate": 0.00019999847185936823, - "loss": 46.0, - "step": 23026 - }, - { - "epoch": 1.7605749565150908, - "grad_norm": 0.00026450862060301006, - "learning_rate": 0.0001999984717265763, - "loss": 46.0, - "step": 23027 - }, - { - "epoch": 1.7606514134984805, - "grad_norm": 0.002210391918197274, - "learning_rate": 0.00019999847159377855, - "loss": 46.0, - "step": 23028 - }, - { - "epoch": 1.7607278704818703, - "grad_norm": 0.0007283276063390076, - "learning_rate": 0.00019999847146097506, - "loss": 46.0, - "step": 23029 - }, - { - "epoch": 1.7608043274652598, - "grad_norm": 0.003568722167983651, - "learning_rate": 0.0001999984713281658, - "loss": 46.0, - "step": 23030 - }, - { - "epoch": 1.7608807844486496, - "grad_norm": 0.0021393224596977234, - "learning_rate": 0.00019999847119535073, - "loss": 46.0, - "step": 23031 - }, - { - "epoch": 1.7609572414320391, - "grad_norm": 0.0008001381647773087, - "learning_rate": 0.00019999847106252992, - "loss": 46.0, - "step": 23032 - }, - { - "epoch": 1.761033698415429, - "grad_norm": 0.0010784866753965616, - "learning_rate": 0.0001999984709297033, - "loss": 46.0, - "step": 23033 - }, - { - "epoch": 1.7611101553988187, - "grad_norm": 0.0005454495549201965, - "learning_rate": 0.000199998470796871, - "loss": 46.0, - "step": 23034 - }, - { - "epoch": 1.7611866123822084, - "grad_norm": 0.00164863932877779, - "learning_rate": 0.00019999847066403285, - "loss": 46.0, - "step": 23035 - }, - { - "epoch": 1.7612630693655982, - "grad_norm": 0.0012681143125519156, - "learning_rate": 0.000199998470531189, - "loss": 46.0, - "step": 23036 - }, - { - "epoch": 1.761339526348988, - "grad_norm": 0.002939911326393485, - "learning_rate": 0.0001999984703983393, - "loss": 46.0, - "step": 23037 - }, - { - "epoch": 1.7614159833323777, - "grad_norm": 0.003414539387449622, - "learning_rate": 0.0001999984702654839, - "loss": 46.0, - "step": 23038 - }, - { - "epoch": 1.7614924403157675, - "grad_norm": 0.0007823705673217773, - "learning_rate": 0.0001999984701326227, - "loss": 46.0, - "step": 23039 - }, - { - "epoch": 1.7615688972991572, - "grad_norm": 0.0015217153122648597, - "learning_rate": 0.00019999846999975573, - "loss": 46.0, - "step": 23040 - }, - { - "epoch": 1.7616453542825468, - "grad_norm": 0.0008177928393706679, - "learning_rate": 0.000199998469866883, - "loss": 46.0, - "step": 23041 - }, - { - "epoch": 1.7617218112659365, - "grad_norm": 0.004731368273496628, - "learning_rate": 0.0001999984697340045, - "loss": 46.0, - "step": 23042 - }, - { - "epoch": 1.761798268249326, - "grad_norm": 0.00045001530088484287, - "learning_rate": 0.0001999984696011202, - "loss": 46.0, - "step": 23043 - }, - { - "epoch": 1.7618747252327158, - "grad_norm": 0.0010435177246108651, - "learning_rate": 0.00019999846946823017, - "loss": 46.0, - "step": 23044 - }, - { - "epoch": 1.7619511822161056, - "grad_norm": 0.0013058970216661692, - "learning_rate": 0.00019999846933533433, - "loss": 46.0, - "step": 23045 - }, - { - "epoch": 1.7620276391994953, - "grad_norm": 0.0031492572743445635, - "learning_rate": 0.00019999846920243277, - "loss": 46.0, - "step": 23046 - }, - { - "epoch": 1.762104096182885, - "grad_norm": 0.0010075252503156662, - "learning_rate": 0.0001999984690695254, - "loss": 46.0, - "step": 23047 - }, - { - "epoch": 1.7621805531662749, - "grad_norm": 0.0024334799963980913, - "learning_rate": 0.00019999846893661228, - "loss": 46.0, - "step": 23048 - }, - { - "epoch": 1.7622570101496646, - "grad_norm": 0.001103270798921585, - "learning_rate": 0.0001999984688036934, - "loss": 46.0, - "step": 23049 - }, - { - "epoch": 1.7623334671330544, - "grad_norm": 0.0024429045151919127, - "learning_rate": 0.00019999846867076873, - "loss": 46.0, - "step": 23050 - }, - { - "epoch": 1.7624099241164441, - "grad_norm": 0.0036698102485388517, - "learning_rate": 0.0001999984685378383, - "loss": 46.0, - "step": 23051 - }, - { - "epoch": 1.7624863810998337, - "grad_norm": 0.0009404487791471183, - "learning_rate": 0.0001999984684049021, - "loss": 46.0, - "step": 23052 - }, - { - "epoch": 1.7625628380832234, - "grad_norm": 0.0021824913565069437, - "learning_rate": 0.00019999846827196014, - "loss": 46.0, - "step": 23053 - }, - { - "epoch": 1.762639295066613, - "grad_norm": 0.0007494486053474247, - "learning_rate": 0.0001999984681390124, - "loss": 46.0, - "step": 23054 - }, - { - "epoch": 1.7627157520500027, - "grad_norm": 0.0014965999871492386, - "learning_rate": 0.00019999846800605888, - "loss": 46.0, - "step": 23055 - }, - { - "epoch": 1.7627922090333925, - "grad_norm": 0.0009514080011285841, - "learning_rate": 0.00019999846787309962, - "loss": 46.0, - "step": 23056 - }, - { - "epoch": 1.7628686660167823, - "grad_norm": 0.0007919525378383696, - "learning_rate": 0.00019999846774013458, - "loss": 46.0, - "step": 23057 - }, - { - "epoch": 1.762945123000172, - "grad_norm": 0.000772044702898711, - "learning_rate": 0.00019999846760716374, - "loss": 46.0, - "step": 23058 - }, - { - "epoch": 1.7630215799835618, - "grad_norm": 0.0012643402442336082, - "learning_rate": 0.0001999984674741872, - "loss": 46.0, - "step": 23059 - }, - { - "epoch": 1.7630980369669516, - "grad_norm": 0.0006897153798490763, - "learning_rate": 0.0001999984673412048, - "loss": 46.0, - "step": 23060 - }, - { - "epoch": 1.7631744939503413, - "grad_norm": 0.0012045793700963259, - "learning_rate": 0.0001999984672082167, - "loss": 46.0, - "step": 23061 - }, - { - "epoch": 1.763250950933731, - "grad_norm": 0.0007357140420936048, - "learning_rate": 0.0001999984670752228, - "loss": 46.0, - "step": 23062 - }, - { - "epoch": 1.7633274079171206, - "grad_norm": 0.003234150819480419, - "learning_rate": 0.00019999846694222312, - "loss": 46.0, - "step": 23063 - }, - { - "epoch": 1.7634038649005104, - "grad_norm": 0.0009918593568727374, - "learning_rate": 0.0001999984668092177, - "loss": 46.0, - "step": 23064 - }, - { - "epoch": 1.7634803218839, - "grad_norm": 0.0011889488669112325, - "learning_rate": 0.0001999984666762065, - "loss": 46.0, - "step": 23065 - }, - { - "epoch": 1.7635567788672897, - "grad_norm": 0.0027338166255503893, - "learning_rate": 0.00019999846654318956, - "loss": 46.0, - "step": 23066 - }, - { - "epoch": 1.7636332358506794, - "grad_norm": 0.0017383953090757132, - "learning_rate": 0.0001999984664101668, - "loss": 46.0, - "step": 23067 - }, - { - "epoch": 1.7637096928340692, - "grad_norm": 0.0018819868564605713, - "learning_rate": 0.00019999846627713827, - "loss": 46.0, - "step": 23068 - }, - { - "epoch": 1.763786149817459, - "grad_norm": 0.001236236421391368, - "learning_rate": 0.000199998466144104, - "loss": 46.0, - "step": 23069 - }, - { - "epoch": 1.7638626068008487, - "grad_norm": 0.00044397584861144423, - "learning_rate": 0.00019999846601106394, - "loss": 46.0, - "step": 23070 - }, - { - "epoch": 1.7639390637842385, - "grad_norm": 0.001205915934406221, - "learning_rate": 0.00019999846587801813, - "loss": 46.0, - "step": 23071 - }, - { - "epoch": 1.7640155207676282, - "grad_norm": 0.0011821012012660503, - "learning_rate": 0.00019999846574496655, - "loss": 46.0, - "step": 23072 - }, - { - "epoch": 1.7640919777510178, - "grad_norm": 0.002327796770259738, - "learning_rate": 0.0001999984656119092, - "loss": 46.0, - "step": 23073 - }, - { - "epoch": 1.7641684347344075, - "grad_norm": 0.0009161459747701883, - "learning_rate": 0.00019999846547884607, - "loss": 46.0, - "step": 23074 - }, - { - "epoch": 1.7642448917177973, - "grad_norm": 0.0005388643476180732, - "learning_rate": 0.00019999846534577716, - "loss": 46.0, - "step": 23075 - }, - { - "epoch": 1.7643213487011868, - "grad_norm": 0.0017050675814971328, - "learning_rate": 0.00019999846521270249, - "loss": 46.0, - "step": 23076 - }, - { - "epoch": 1.7643978056845766, - "grad_norm": 0.0002865987189579755, - "learning_rate": 0.0001999984650796221, - "loss": 46.0, - "step": 23077 - }, - { - "epoch": 1.7644742626679664, - "grad_norm": 0.000978047726675868, - "learning_rate": 0.00019999846494653587, - "loss": 46.0, - "step": 23078 - }, - { - "epoch": 1.7645507196513561, - "grad_norm": 0.0008743317448534071, - "learning_rate": 0.0001999984648134439, - "loss": 46.0, - "step": 23079 - }, - { - "epoch": 1.7646271766347459, - "grad_norm": 0.004247661214321852, - "learning_rate": 0.00019999846468034615, - "loss": 46.0, - "step": 23080 - }, - { - "epoch": 1.7647036336181356, - "grad_norm": 0.001804320840165019, - "learning_rate": 0.00019999846454724264, - "loss": 46.0, - "step": 23081 - }, - { - "epoch": 1.7647800906015254, - "grad_norm": 0.00039279242628253996, - "learning_rate": 0.00019999846441413338, - "loss": 46.0, - "step": 23082 - }, - { - "epoch": 1.7648565475849152, - "grad_norm": 0.0008443098631687462, - "learning_rate": 0.0001999984642810183, - "loss": 46.0, - "step": 23083 - }, - { - "epoch": 1.7649330045683047, - "grad_norm": 0.0042954012751579285, - "learning_rate": 0.0001999984641478975, - "loss": 46.0, - "step": 23084 - }, - { - "epoch": 1.7650094615516945, - "grad_norm": 0.0030831163749098778, - "learning_rate": 0.00019999846401477092, - "loss": 46.0, - "step": 23085 - }, - { - "epoch": 1.7650859185350842, - "grad_norm": 0.0035576969385147095, - "learning_rate": 0.00019999846388163856, - "loss": 46.0, - "step": 23086 - }, - { - "epoch": 1.7651623755184738, - "grad_norm": 0.0008713395218364894, - "learning_rate": 0.0001999984637485004, - "loss": 46.0, - "step": 23087 - }, - { - "epoch": 1.7652388325018635, - "grad_norm": 0.000810770783573389, - "learning_rate": 0.00019999846361535653, - "loss": 46.0, - "step": 23088 - }, - { - "epoch": 1.7653152894852533, - "grad_norm": 0.0007830265094526112, - "learning_rate": 0.00019999846348220686, - "loss": 46.0, - "step": 23089 - }, - { - "epoch": 1.765391746468643, - "grad_norm": 0.0017373106675222516, - "learning_rate": 0.00019999846334905143, - "loss": 46.0, - "step": 23090 - }, - { - "epoch": 1.7654682034520328, - "grad_norm": 0.0032556059304624796, - "learning_rate": 0.0001999984632158902, - "loss": 46.0, - "step": 23091 - }, - { - "epoch": 1.7655446604354226, - "grad_norm": 0.0028761804569512606, - "learning_rate": 0.00019999846308272324, - "loss": 46.0, - "step": 23092 - }, - { - "epoch": 1.7656211174188123, - "grad_norm": 0.001863745623268187, - "learning_rate": 0.00019999846294955053, - "loss": 46.0, - "step": 23093 - }, - { - "epoch": 1.765697574402202, - "grad_norm": 0.0008716261363588274, - "learning_rate": 0.00019999846281637198, - "loss": 46.0, - "step": 23094 - }, - { - "epoch": 1.7657740313855916, - "grad_norm": 0.0031519723124802113, - "learning_rate": 0.00019999846268318772, - "loss": 46.0, - "step": 23095 - }, - { - "epoch": 1.7658504883689814, - "grad_norm": 0.0037860311567783356, - "learning_rate": 0.00019999846254999766, - "loss": 46.0, - "step": 23096 - }, - { - "epoch": 1.7659269453523712, - "grad_norm": 0.002153954701498151, - "learning_rate": 0.00019999846241680182, - "loss": 46.0, - "step": 23097 - }, - { - "epoch": 1.7660034023357607, - "grad_norm": 0.0009732242324389517, - "learning_rate": 0.00019999846228360024, - "loss": 46.0, - "step": 23098 - }, - { - "epoch": 1.7660798593191505, - "grad_norm": 0.004467865917831659, - "learning_rate": 0.00019999846215039288, - "loss": 46.0, - "step": 23099 - }, - { - "epoch": 1.7661563163025402, - "grad_norm": 0.0008208779036067426, - "learning_rate": 0.00019999846201717975, - "loss": 46.0, - "step": 23100 - }, - { - "epoch": 1.76623277328593, - "grad_norm": 0.0012709691654890776, - "learning_rate": 0.00019999846188396085, - "loss": 46.0, - "step": 23101 - }, - { - "epoch": 1.7663092302693197, - "grad_norm": 0.0005803306121379137, - "learning_rate": 0.00019999846175073618, - "loss": 46.0, - "step": 23102 - }, - { - "epoch": 1.7663856872527095, - "grad_norm": 0.0005613188841380179, - "learning_rate": 0.00019999846161750575, - "loss": 46.0, - "step": 23103 - }, - { - "epoch": 1.7664621442360993, - "grad_norm": 0.0010739011922851205, - "learning_rate": 0.00019999846148426956, - "loss": 46.0, - "step": 23104 - }, - { - "epoch": 1.766538601219489, - "grad_norm": 0.003066170262172818, - "learning_rate": 0.00019999846135102756, - "loss": 46.0, - "step": 23105 - }, - { - "epoch": 1.7666150582028786, - "grad_norm": 0.0007236566161736846, - "learning_rate": 0.00019999846121777982, - "loss": 46.0, - "step": 23106 - }, - { - "epoch": 1.7666915151862683, - "grad_norm": 0.0012443193700164557, - "learning_rate": 0.0001999984610845263, - "loss": 46.0, - "step": 23107 - }, - { - "epoch": 1.766767972169658, - "grad_norm": 0.0005706559168174863, - "learning_rate": 0.00019999846095126702, - "loss": 46.0, - "step": 23108 - }, - { - "epoch": 1.7668444291530476, - "grad_norm": 0.0028388206847012043, - "learning_rate": 0.00019999846081800198, - "loss": 46.0, - "step": 23109 - }, - { - "epoch": 1.7669208861364374, - "grad_norm": 0.00118687329813838, - "learning_rate": 0.00019999846068473117, - "loss": 46.0, - "step": 23110 - }, - { - "epoch": 1.7669973431198271, - "grad_norm": 0.0007301591103896499, - "learning_rate": 0.00019999846055145456, - "loss": 46.0, - "step": 23111 - }, - { - "epoch": 1.767073800103217, - "grad_norm": 0.0006311266915872693, - "learning_rate": 0.0001999984604181722, - "loss": 46.0, - "step": 23112 - }, - { - "epoch": 1.7671502570866067, - "grad_norm": 0.0012225827667862177, - "learning_rate": 0.00019999846028488408, - "loss": 46.0, - "step": 23113 - }, - { - "epoch": 1.7672267140699964, - "grad_norm": 0.0006423905142582953, - "learning_rate": 0.00019999846015159015, - "loss": 46.0, - "step": 23114 - }, - { - "epoch": 1.7673031710533862, - "grad_norm": 0.0015107393264770508, - "learning_rate": 0.0001999984600182905, - "loss": 46.0, - "step": 23115 - }, - { - "epoch": 1.767379628036776, - "grad_norm": 0.0006345415604300797, - "learning_rate": 0.00019999845988498508, - "loss": 46.0, - "step": 23116 - }, - { - "epoch": 1.7674560850201655, - "grad_norm": 0.0009737061918713152, - "learning_rate": 0.00019999845975167383, - "loss": 46.0, - "step": 23117 - }, - { - "epoch": 1.7675325420035553, - "grad_norm": 0.0007959159556776285, - "learning_rate": 0.0001999984596183569, - "loss": 46.0, - "step": 23118 - }, - { - "epoch": 1.767608998986945, - "grad_norm": 0.009825431741774082, - "learning_rate": 0.00019999845948503412, - "loss": 46.0, - "step": 23119 - }, - { - "epoch": 1.7676854559703346, - "grad_norm": 0.0072423676028847694, - "learning_rate": 0.0001999984593517056, - "loss": 46.0, - "step": 23120 - }, - { - "epoch": 1.7677619129537243, - "grad_norm": 0.0006642839289270341, - "learning_rate": 0.00019999845921837132, - "loss": 46.0, - "step": 23121 - }, - { - "epoch": 1.767838369937114, - "grad_norm": 0.0011451351456344128, - "learning_rate": 0.00019999845908503128, - "loss": 46.0, - "step": 23122 - }, - { - "epoch": 1.7679148269205038, - "grad_norm": 0.001224048319272697, - "learning_rate": 0.00019999845895168544, - "loss": 46.0, - "step": 23123 - }, - { - "epoch": 1.7679912839038936, - "grad_norm": 0.0007788949878886342, - "learning_rate": 0.00019999845881833386, - "loss": 46.0, - "step": 23124 - }, - { - "epoch": 1.7680677408872834, - "grad_norm": 0.0035099743399769068, - "learning_rate": 0.00019999845868497648, - "loss": 46.0, - "step": 23125 - }, - { - "epoch": 1.7681441978706731, - "grad_norm": 0.0017941266996785998, - "learning_rate": 0.00019999845855161335, - "loss": 46.0, - "step": 23126 - }, - { - "epoch": 1.7682206548540629, - "grad_norm": 0.0011262808693572879, - "learning_rate": 0.00019999845841824445, - "loss": 46.0, - "step": 23127 - }, - { - "epoch": 1.7682971118374524, - "grad_norm": 0.0008886543801054358, - "learning_rate": 0.00019999845828486978, - "loss": 46.0, - "step": 23128 - }, - { - "epoch": 1.7683735688208422, - "grad_norm": 0.008047091774642467, - "learning_rate": 0.00019999845815148935, - "loss": 46.0, - "step": 23129 - }, - { - "epoch": 1.768450025804232, - "grad_norm": 0.0008445560815744102, - "learning_rate": 0.00019999845801810313, - "loss": 46.0, - "step": 23130 - }, - { - "epoch": 1.7685264827876215, - "grad_norm": 0.002351143630221486, - "learning_rate": 0.00019999845788471116, - "loss": 46.0, - "step": 23131 - }, - { - "epoch": 1.7686029397710112, - "grad_norm": 0.0011543554719537497, - "learning_rate": 0.0001999984577513134, - "loss": 46.0, - "step": 23132 - }, - { - "epoch": 1.768679396754401, - "grad_norm": 0.0013604448176920414, - "learning_rate": 0.00019999845761790988, - "loss": 46.0, - "step": 23133 - }, - { - "epoch": 1.7687558537377908, - "grad_norm": 0.0008139307610690594, - "learning_rate": 0.0001999984574845006, - "loss": 46.0, - "step": 23134 - }, - { - "epoch": 1.7688323107211805, - "grad_norm": 0.006762339733541012, - "learning_rate": 0.00019999845735108553, - "loss": 46.0, - "step": 23135 - }, - { - "epoch": 1.7689087677045703, - "grad_norm": 0.0014616911066696048, - "learning_rate": 0.0001999984572176647, - "loss": 46.0, - "step": 23136 - }, - { - "epoch": 1.76898522468796, - "grad_norm": 0.004571232013404369, - "learning_rate": 0.0001999984570842381, - "loss": 46.0, - "step": 23137 - }, - { - "epoch": 1.7690616816713498, - "grad_norm": 0.009523669257760048, - "learning_rate": 0.00019999845695080576, - "loss": 46.0, - "step": 23138 - }, - { - "epoch": 1.7691381386547393, - "grad_norm": 0.0015674973838031292, - "learning_rate": 0.00019999845681736763, - "loss": 46.0, - "step": 23139 - }, - { - "epoch": 1.769214595638129, - "grad_norm": 0.003483065403997898, - "learning_rate": 0.00019999845668392373, - "loss": 46.0, - "step": 23140 - }, - { - "epoch": 1.7692910526215189, - "grad_norm": 0.0011361864162608981, - "learning_rate": 0.00019999845655047405, - "loss": 46.0, - "step": 23141 - }, - { - "epoch": 1.7693675096049084, - "grad_norm": 0.0006945352652110159, - "learning_rate": 0.0001999984564170186, - "loss": 46.0, - "step": 23142 - }, - { - "epoch": 1.7694439665882982, - "grad_norm": 0.000498998211696744, - "learning_rate": 0.00019999845628355738, - "loss": 46.0, - "step": 23143 - }, - { - "epoch": 1.769520423571688, - "grad_norm": 0.0003876656701322645, - "learning_rate": 0.00019999845615009042, - "loss": 46.0, - "step": 23144 - }, - { - "epoch": 1.7695968805550777, - "grad_norm": 0.005981977563351393, - "learning_rate": 0.00019999845601661767, - "loss": 46.0, - "step": 23145 - }, - { - "epoch": 1.7696733375384675, - "grad_norm": 0.0029290628153830767, - "learning_rate": 0.00019999845588313916, - "loss": 46.0, - "step": 23146 - }, - { - "epoch": 1.7697497945218572, - "grad_norm": 0.0007858932367525995, - "learning_rate": 0.00019999845574965484, - "loss": 46.0, - "step": 23147 - }, - { - "epoch": 1.769826251505247, - "grad_norm": 0.002118223113939166, - "learning_rate": 0.00019999845561616478, - "loss": 46.0, - "step": 23148 - }, - { - "epoch": 1.7699027084886367, - "grad_norm": 0.000950882094912231, - "learning_rate": 0.00019999845548266895, - "loss": 46.0, - "step": 23149 - }, - { - "epoch": 1.7699791654720263, - "grad_norm": 0.0018941829912364483, - "learning_rate": 0.00019999845534916737, - "loss": 46.0, - "step": 23150 - }, - { - "epoch": 1.770055622455416, - "grad_norm": 0.0008296070736832917, - "learning_rate": 0.00019999845521566, - "loss": 46.0, - "step": 23151 - }, - { - "epoch": 1.7701320794388058, - "grad_norm": 0.0005480193067342043, - "learning_rate": 0.00019999845508214686, - "loss": 46.0, - "step": 23152 - }, - { - "epoch": 1.7702085364221953, - "grad_norm": 0.0007223273278214037, - "learning_rate": 0.00019999845494862796, - "loss": 46.0, - "step": 23153 - }, - { - "epoch": 1.770284993405585, - "grad_norm": 0.0009624581434763968, - "learning_rate": 0.00019999845481510328, - "loss": 46.0, - "step": 23154 - }, - { - "epoch": 1.7703614503889749, - "grad_norm": 0.0011638266732916236, - "learning_rate": 0.00019999845468157283, - "loss": 46.0, - "step": 23155 - }, - { - "epoch": 1.7704379073723646, - "grad_norm": 0.0037739588879048824, - "learning_rate": 0.00019999845454803664, - "loss": 46.0, - "step": 23156 - }, - { - "epoch": 1.7705143643557544, - "grad_norm": 0.001201388891786337, - "learning_rate": 0.00019999845441449464, - "loss": 46.0, - "step": 23157 - }, - { - "epoch": 1.7705908213391441, - "grad_norm": 0.0016899992479011416, - "learning_rate": 0.0001999984542809469, - "loss": 46.0, - "step": 23158 - }, - { - "epoch": 1.770667278322534, - "grad_norm": 0.012375622056424618, - "learning_rate": 0.0001999984541473934, - "loss": 46.0, - "step": 23159 - }, - { - "epoch": 1.7707437353059237, - "grad_norm": 0.0004891376011073589, - "learning_rate": 0.00019999845401383407, - "loss": 46.0, - "step": 23160 - }, - { - "epoch": 1.7708201922893132, - "grad_norm": 0.0014209039509296417, - "learning_rate": 0.000199998453880269, - "loss": 46.0, - "step": 23161 - }, - { - "epoch": 1.770896649272703, - "grad_norm": 0.0006470477674156427, - "learning_rate": 0.00019999845374669818, - "loss": 46.0, - "step": 23162 - }, - { - "epoch": 1.7709731062560925, - "grad_norm": 0.0029922465328127146, - "learning_rate": 0.0001999984536131216, - "loss": 46.0, - "step": 23163 - }, - { - "epoch": 1.7710495632394823, - "grad_norm": 0.012191470712423325, - "learning_rate": 0.00019999845347953922, - "loss": 46.0, - "step": 23164 - }, - { - "epoch": 1.771126020222872, - "grad_norm": 0.0024859150871634483, - "learning_rate": 0.0001999984533459511, - "loss": 46.0, - "step": 23165 - }, - { - "epoch": 1.7712024772062618, - "grad_norm": 0.0019891022238880396, - "learning_rate": 0.00019999845321235716, - "loss": 46.0, - "step": 23166 - }, - { - "epoch": 1.7712789341896515, - "grad_norm": 0.0013281208230182528, - "learning_rate": 0.0001999984530787575, - "loss": 46.0, - "step": 23167 - }, - { - "epoch": 1.7713553911730413, - "grad_norm": 0.00130205019377172, - "learning_rate": 0.00019999845294515204, - "loss": 46.0, - "step": 23168 - }, - { - "epoch": 1.771431848156431, - "grad_norm": 0.0005985440802760422, - "learning_rate": 0.00019999845281154085, - "loss": 46.0, - "step": 23169 - }, - { - "epoch": 1.7715083051398208, - "grad_norm": 0.002968721790239215, - "learning_rate": 0.00019999845267792385, - "loss": 46.0, - "step": 23170 - }, - { - "epoch": 1.7715847621232106, - "grad_norm": 0.0006349895847961307, - "learning_rate": 0.00019999845254430108, - "loss": 46.0, - "step": 23171 - }, - { - "epoch": 1.7716612191066001, - "grad_norm": 0.0006741798715665936, - "learning_rate": 0.00019999845241067257, - "loss": 46.0, - "step": 23172 - }, - { - "epoch": 1.7717376760899899, - "grad_norm": 0.0008166732732206583, - "learning_rate": 0.00019999845227703828, - "loss": 46.0, - "step": 23173 - }, - { - "epoch": 1.7718141330733794, - "grad_norm": 0.0023718588054180145, - "learning_rate": 0.00019999845214339822, - "loss": 46.0, - "step": 23174 - }, - { - "epoch": 1.7718905900567692, - "grad_norm": 0.001330870552919805, - "learning_rate": 0.0001999984520097524, - "loss": 46.0, - "step": 23175 - }, - { - "epoch": 1.771967047040159, - "grad_norm": 0.0010530907893553376, - "learning_rate": 0.0001999984518761008, - "loss": 46.0, - "step": 23176 - }, - { - "epoch": 1.7720435040235487, - "grad_norm": 0.005613513290882111, - "learning_rate": 0.0001999984517424434, - "loss": 46.0, - "step": 23177 - }, - { - "epoch": 1.7721199610069385, - "grad_norm": 0.0031455366406589746, - "learning_rate": 0.00019999845160878027, - "loss": 46.0, - "step": 23178 - }, - { - "epoch": 1.7721964179903282, - "grad_norm": 0.0010642630513757467, - "learning_rate": 0.00019999845147511137, - "loss": 46.0, - "step": 23179 - }, - { - "epoch": 1.772272874973718, - "grad_norm": 0.0014018104411661625, - "learning_rate": 0.0001999984513414367, - "loss": 46.0, - "step": 23180 - }, - { - "epoch": 1.7723493319571078, - "grad_norm": 0.0006225567194633186, - "learning_rate": 0.00019999845120775625, - "loss": 46.0, - "step": 23181 - }, - { - "epoch": 1.7724257889404975, - "grad_norm": 0.0005686302902176976, - "learning_rate": 0.00019999845107407003, - "loss": 46.0, - "step": 23182 - }, - { - "epoch": 1.772502245923887, - "grad_norm": 0.0025199546944350004, - "learning_rate": 0.00019999845094037806, - "loss": 46.0, - "step": 23183 - }, - { - "epoch": 1.7725787029072768, - "grad_norm": 0.0006324366549961269, - "learning_rate": 0.0001999984508066803, - "loss": 46.0, - "step": 23184 - }, - { - "epoch": 1.7726551598906664, - "grad_norm": 0.0022804951295256615, - "learning_rate": 0.00019999845067297675, - "loss": 46.0, - "step": 23185 - }, - { - "epoch": 1.7727316168740561, - "grad_norm": 0.0010371665703132749, - "learning_rate": 0.00019999845053926747, - "loss": 46.0, - "step": 23186 - }, - { - "epoch": 1.7728080738574459, - "grad_norm": 0.002356019103899598, - "learning_rate": 0.0001999984504055524, - "loss": 46.0, - "step": 23187 - }, - { - "epoch": 1.7728845308408356, - "grad_norm": 0.0010994799667969346, - "learning_rate": 0.00019999845027183157, - "loss": 46.0, - "step": 23188 - }, - { - "epoch": 1.7729609878242254, - "grad_norm": 0.0008478878880850971, - "learning_rate": 0.00019999845013810496, - "loss": 46.0, - "step": 23189 - }, - { - "epoch": 1.7730374448076152, - "grad_norm": 0.001982656307518482, - "learning_rate": 0.0001999984500043726, - "loss": 46.0, - "step": 23190 - }, - { - "epoch": 1.773113901791005, - "grad_norm": 0.0008776086033321917, - "learning_rate": 0.00019999844987063446, - "loss": 46.0, - "step": 23191 - }, - { - "epoch": 1.7731903587743947, - "grad_norm": 0.0026984878350049257, - "learning_rate": 0.00019999844973689056, - "loss": 46.0, - "step": 23192 - }, - { - "epoch": 1.7732668157577844, - "grad_norm": 0.0019534658640623093, - "learning_rate": 0.00019999844960314088, - "loss": 46.0, - "step": 23193 - }, - { - "epoch": 1.773343272741174, - "grad_norm": 0.0013197226217016578, - "learning_rate": 0.0001999984494693854, - "loss": 46.0, - "step": 23194 - }, - { - "epoch": 1.7734197297245637, - "grad_norm": 0.0026509107556194067, - "learning_rate": 0.0001999984493356242, - "loss": 46.0, - "step": 23195 - }, - { - "epoch": 1.7734961867079533, - "grad_norm": 0.0007069522398523986, - "learning_rate": 0.00019999844920185722, - "loss": 46.0, - "step": 23196 - }, - { - "epoch": 1.773572643691343, - "grad_norm": 0.002517035696655512, - "learning_rate": 0.00019999844906808445, - "loss": 46.0, - "step": 23197 - }, - { - "epoch": 1.7736491006747328, - "grad_norm": 0.0005949987680651248, - "learning_rate": 0.00019999844893430591, - "loss": 46.0, - "step": 23198 - }, - { - "epoch": 1.7737255576581226, - "grad_norm": 0.005007871892303228, - "learning_rate": 0.00019999844880052163, - "loss": 46.0, - "step": 23199 - }, - { - "epoch": 1.7738020146415123, - "grad_norm": 0.0006895462283864617, - "learning_rate": 0.00019999844866673157, - "loss": 46.0, - "step": 23200 - }, - { - "epoch": 1.773878471624902, - "grad_norm": 0.0015854892553761601, - "learning_rate": 0.00019999844853293573, - "loss": 46.0, - "step": 23201 - }, - { - "epoch": 1.7739549286082918, - "grad_norm": 0.002647966146469116, - "learning_rate": 0.00019999844839913413, - "loss": 46.0, - "step": 23202 - }, - { - "epoch": 1.7740313855916816, - "grad_norm": 0.001964349066838622, - "learning_rate": 0.00019999844826532677, - "loss": 46.0, - "step": 23203 - }, - { - "epoch": 1.7741078425750711, - "grad_norm": 0.0009689134894870222, - "learning_rate": 0.0001999984481315136, - "loss": 46.0, - "step": 23204 - }, - { - "epoch": 1.774184299558461, - "grad_norm": 0.0006538014858961105, - "learning_rate": 0.0001999984479976947, - "loss": 46.0, - "step": 23205 - }, - { - "epoch": 1.7742607565418507, - "grad_norm": 0.0004765463818330318, - "learning_rate": 0.00019999844786387002, - "loss": 46.0, - "step": 23206 - }, - { - "epoch": 1.7743372135252402, - "grad_norm": 0.0018598216120153666, - "learning_rate": 0.00019999844773003957, - "loss": 46.0, - "step": 23207 - }, - { - "epoch": 1.77441367050863, - "grad_norm": 0.0017507366137579083, - "learning_rate": 0.00019999844759620335, - "loss": 46.0, - "step": 23208 - }, - { - "epoch": 1.7744901274920197, - "grad_norm": 0.0015931391390040517, - "learning_rate": 0.00019999844746236133, - "loss": 46.0, - "step": 23209 - }, - { - "epoch": 1.7745665844754095, - "grad_norm": 0.0006228883867152035, - "learning_rate": 0.00019999844732851362, - "loss": 46.0, - "step": 23210 - }, - { - "epoch": 1.7746430414587993, - "grad_norm": 0.0010079315397888422, - "learning_rate": 0.00019999844719466005, - "loss": 46.0, - "step": 23211 - }, - { - "epoch": 1.774719498442189, - "grad_norm": 0.0026071411557495594, - "learning_rate": 0.00019999844706080076, - "loss": 46.0, - "step": 23212 - }, - { - "epoch": 1.7747959554255788, - "grad_norm": 0.0015334176132455468, - "learning_rate": 0.0001999984469269357, - "loss": 46.0, - "step": 23213 - }, - { - "epoch": 1.7748724124089685, - "grad_norm": 0.0020003607496619225, - "learning_rate": 0.00019999844679306484, - "loss": 46.0, - "step": 23214 - }, - { - "epoch": 1.774948869392358, - "grad_norm": 0.0009174122824333608, - "learning_rate": 0.00019999844665918827, - "loss": 46.0, - "step": 23215 - }, - { - "epoch": 1.7750253263757478, - "grad_norm": 0.0042031812481582165, - "learning_rate": 0.00019999844652530589, - "loss": 46.0, - "step": 23216 - }, - { - "epoch": 1.7751017833591376, - "grad_norm": 0.001706851995550096, - "learning_rate": 0.00019999844639141773, - "loss": 46.0, - "step": 23217 - }, - { - "epoch": 1.7751782403425271, - "grad_norm": 0.002087873872369528, - "learning_rate": 0.00019999844625752383, - "loss": 46.0, - "step": 23218 - }, - { - "epoch": 1.775254697325917, - "grad_norm": 0.0006772560300305486, - "learning_rate": 0.00019999844612362413, - "loss": 46.0, - "step": 23219 - }, - { - "epoch": 1.7753311543093067, - "grad_norm": 0.0020502344705164433, - "learning_rate": 0.00019999844598971869, - "loss": 46.0, - "step": 23220 - }, - { - "epoch": 1.7754076112926964, - "grad_norm": 0.0011054871138185263, - "learning_rate": 0.00019999844585580747, - "loss": 46.0, - "step": 23221 - }, - { - "epoch": 1.7754840682760862, - "grad_norm": 0.000559154199436307, - "learning_rate": 0.00019999844572189047, - "loss": 46.0, - "step": 23222 - }, - { - "epoch": 1.775560525259476, - "grad_norm": 0.003327616024762392, - "learning_rate": 0.00019999844558796773, - "loss": 46.0, - "step": 23223 - }, - { - "epoch": 1.7756369822428657, - "grad_norm": 0.002628596732392907, - "learning_rate": 0.0001999984454540392, - "loss": 46.0, - "step": 23224 - }, - { - "epoch": 1.7757134392262555, - "grad_norm": 0.0008326114621013403, - "learning_rate": 0.00019999844532010488, - "loss": 46.0, - "step": 23225 - }, - { - "epoch": 1.775789896209645, - "grad_norm": 0.0012390506453812122, - "learning_rate": 0.00019999844518616482, - "loss": 46.0, - "step": 23226 - }, - { - "epoch": 1.7758663531930348, - "grad_norm": 0.01298129465430975, - "learning_rate": 0.000199998445052219, - "loss": 46.0, - "step": 23227 - }, - { - "epoch": 1.7759428101764245, - "grad_norm": 0.00398150272667408, - "learning_rate": 0.00019999844491826738, - "loss": 46.0, - "step": 23228 - }, - { - "epoch": 1.776019267159814, - "grad_norm": 0.0006963791674934328, - "learning_rate": 0.00019999844478431, - "loss": 46.0, - "step": 23229 - }, - { - "epoch": 1.7760957241432038, - "grad_norm": 0.0018393455538898706, - "learning_rate": 0.00019999844465034685, - "loss": 46.0, - "step": 23230 - }, - { - "epoch": 1.7761721811265936, - "grad_norm": 0.0008311583660542965, - "learning_rate": 0.00019999844451637792, - "loss": 46.0, - "step": 23231 - }, - { - "epoch": 1.7762486381099833, - "grad_norm": 0.0007329022628255188, - "learning_rate": 0.00019999844438240322, - "loss": 46.0, - "step": 23232 - }, - { - "epoch": 1.776325095093373, - "grad_norm": 0.004611688200384378, - "learning_rate": 0.0001999984442484228, - "loss": 46.0, - "step": 23233 - }, - { - "epoch": 1.7764015520767629, - "grad_norm": 0.0015256782062351704, - "learning_rate": 0.00019999844411443656, - "loss": 46.0, - "step": 23234 - }, - { - "epoch": 1.7764780090601526, - "grad_norm": 0.0004698209522757679, - "learning_rate": 0.0001999984439804446, - "loss": 46.0, - "step": 23235 - }, - { - "epoch": 1.7765544660435424, - "grad_norm": 0.0007031442364677787, - "learning_rate": 0.0001999984438464468, - "loss": 46.0, - "step": 23236 - }, - { - "epoch": 1.776630923026932, - "grad_norm": 0.0022335692774504423, - "learning_rate": 0.00019999844371244328, - "loss": 46.0, - "step": 23237 - }, - { - "epoch": 1.7767073800103217, - "grad_norm": 0.012205605395138264, - "learning_rate": 0.00019999844357843397, - "loss": 46.0, - "step": 23238 - }, - { - "epoch": 1.7767838369937115, - "grad_norm": 0.0007610509055666625, - "learning_rate": 0.0001999984434444189, - "loss": 46.0, - "step": 23239 - }, - { - "epoch": 1.776860293977101, - "grad_norm": 0.0011236566351726651, - "learning_rate": 0.00019999844331039808, - "loss": 46.0, - "step": 23240 - }, - { - "epoch": 1.7769367509604908, - "grad_norm": 0.00048113096272572875, - "learning_rate": 0.00019999844317637147, - "loss": 46.0, - "step": 23241 - }, - { - "epoch": 1.7770132079438805, - "grad_norm": 0.0009932528482750058, - "learning_rate": 0.00019999844304233907, - "loss": 46.0, - "step": 23242 - }, - { - "epoch": 1.7770896649272703, - "grad_norm": 0.0040565053932368755, - "learning_rate": 0.00019999844290830091, - "loss": 46.0, - "step": 23243 - }, - { - "epoch": 1.77716612191066, - "grad_norm": 0.0005631509120576084, - "learning_rate": 0.00019999844277425702, - "loss": 46.0, - "step": 23244 - }, - { - "epoch": 1.7772425788940498, - "grad_norm": 0.0024882592260837555, - "learning_rate": 0.00019999844264020732, - "loss": 46.0, - "step": 23245 - }, - { - "epoch": 1.7773190358774396, - "grad_norm": 0.0007060154457576573, - "learning_rate": 0.00019999844250615184, - "loss": 46.0, - "step": 23246 - }, - { - "epoch": 1.7773954928608293, - "grad_norm": 0.002434036461636424, - "learning_rate": 0.00019999844237209065, - "loss": 46.0, - "step": 23247 - }, - { - "epoch": 1.7774719498442189, - "grad_norm": 0.00185084727127105, - "learning_rate": 0.00019999844223802363, - "loss": 46.0, - "step": 23248 - }, - { - "epoch": 1.7775484068276086, - "grad_norm": 0.000825387891381979, - "learning_rate": 0.00019999844210395087, - "loss": 46.0, - "step": 23249 - }, - { - "epoch": 1.7776248638109984, - "grad_norm": 0.0013457470340654254, - "learning_rate": 0.00019999844196987235, - "loss": 46.0, - "step": 23250 - }, - { - "epoch": 1.777701320794388, - "grad_norm": 0.0007637071539647877, - "learning_rate": 0.00019999844183578804, - "loss": 46.0, - "step": 23251 - }, - { - "epoch": 1.7777777777777777, - "grad_norm": 0.0006038442370481789, - "learning_rate": 0.00019999844170169798, - "loss": 46.0, - "step": 23252 - }, - { - "epoch": 1.7778542347611674, - "grad_norm": 0.001025978708639741, - "learning_rate": 0.00019999844156760212, - "loss": 46.0, - "step": 23253 - }, - { - "epoch": 1.7779306917445572, - "grad_norm": 0.0003737601509783417, - "learning_rate": 0.00019999844143350052, - "loss": 46.0, - "step": 23254 - }, - { - "epoch": 1.778007148727947, - "grad_norm": 0.0010456457966938615, - "learning_rate": 0.00019999844129939314, - "loss": 46.0, - "step": 23255 - }, - { - "epoch": 1.7780836057113367, - "grad_norm": 0.0019081287318840623, - "learning_rate": 0.00019999844116527999, - "loss": 46.0, - "step": 23256 - }, - { - "epoch": 1.7781600626947265, - "grad_norm": 0.0009576175943948328, - "learning_rate": 0.00019999844103116106, - "loss": 46.0, - "step": 23257 - }, - { - "epoch": 1.7782365196781162, - "grad_norm": 0.0008039544918574393, - "learning_rate": 0.0001999984408970364, - "loss": 46.0, - "step": 23258 - }, - { - "epoch": 1.7783129766615058, - "grad_norm": 0.002151490654796362, - "learning_rate": 0.00019999844076290591, - "loss": 46.0, - "step": 23259 - }, - { - "epoch": 1.7783894336448955, - "grad_norm": 0.0007029905100353062, - "learning_rate": 0.0001999984406287697, - "loss": 46.0, - "step": 23260 - }, - { - "epoch": 1.7784658906282853, - "grad_norm": 0.0008604549220763147, - "learning_rate": 0.0001999984404946277, - "loss": 46.0, - "step": 23261 - }, - { - "epoch": 1.7785423476116748, - "grad_norm": 0.0013133713509887457, - "learning_rate": 0.00019999844036047994, - "loss": 46.0, - "step": 23262 - }, - { - "epoch": 1.7786188045950646, - "grad_norm": 0.014470922760665417, - "learning_rate": 0.0001999984402263264, - "loss": 46.0, - "step": 23263 - }, - { - "epoch": 1.7786952615784544, - "grad_norm": 0.0008368071285076439, - "learning_rate": 0.00019999844009216711, - "loss": 46.0, - "step": 23264 - }, - { - "epoch": 1.7787717185618441, - "grad_norm": 0.0014800543431192636, - "learning_rate": 0.00019999843995800203, - "loss": 46.0, - "step": 23265 - }, - { - "epoch": 1.778848175545234, - "grad_norm": 0.002377198077738285, - "learning_rate": 0.00019999843982383117, - "loss": 46.0, - "step": 23266 - }, - { - "epoch": 1.7789246325286237, - "grad_norm": 0.0010161095997318625, - "learning_rate": 0.00019999843968965454, - "loss": 46.0, - "step": 23267 - }, - { - "epoch": 1.7790010895120134, - "grad_norm": 0.0011116121895611286, - "learning_rate": 0.00019999843955547219, - "loss": 46.0, - "step": 23268 - }, - { - "epoch": 1.7790775464954032, - "grad_norm": 0.0014691423857584596, - "learning_rate": 0.00019999843942128403, - "loss": 46.0, - "step": 23269 - }, - { - "epoch": 1.7791540034787927, - "grad_norm": 0.0012214378220960498, - "learning_rate": 0.00019999843928709013, - "loss": 46.0, - "step": 23270 - }, - { - "epoch": 1.7792304604621825, - "grad_norm": 0.0025345112662762403, - "learning_rate": 0.0001999984391528904, - "loss": 46.0, - "step": 23271 - }, - { - "epoch": 1.7793069174455722, - "grad_norm": 0.0007620805990882218, - "learning_rate": 0.00019999843901868496, - "loss": 46.0, - "step": 23272 - }, - { - "epoch": 1.7793833744289618, - "grad_norm": 0.0011552328942343593, - "learning_rate": 0.00019999843888447374, - "loss": 46.0, - "step": 23273 - }, - { - "epoch": 1.7794598314123515, - "grad_norm": 0.0011205559130758047, - "learning_rate": 0.00019999843875025675, - "loss": 46.0, - "step": 23274 - }, - { - "epoch": 1.7795362883957413, - "grad_norm": 0.0006173912552185357, - "learning_rate": 0.00019999843861603396, - "loss": 46.0, - "step": 23275 - }, - { - "epoch": 1.779612745379131, - "grad_norm": 0.0009429361089132726, - "learning_rate": 0.00019999843848180545, - "loss": 46.0, - "step": 23276 - }, - { - "epoch": 1.7796892023625208, - "grad_norm": 0.00042408870649524033, - "learning_rate": 0.00019999843834757114, - "loss": 46.0, - "step": 23277 - }, - { - "epoch": 1.7797656593459106, - "grad_norm": 0.0017061886610463262, - "learning_rate": 0.00019999843821333105, - "loss": 46.0, - "step": 23278 - }, - { - "epoch": 1.7798421163293003, - "grad_norm": 0.0010921771172434092, - "learning_rate": 0.0001999984380790852, - "loss": 46.0, - "step": 23279 - }, - { - "epoch": 1.77991857331269, - "grad_norm": 0.0016911420971155167, - "learning_rate": 0.0001999984379448336, - "loss": 46.0, - "step": 23280 - }, - { - "epoch": 1.7799950302960796, - "grad_norm": 0.0009914758848026395, - "learning_rate": 0.00019999843781057618, - "loss": 46.0, - "step": 23281 - }, - { - "epoch": 1.7800714872794694, - "grad_norm": 0.001459247781895101, - "learning_rate": 0.00019999843767631306, - "loss": 46.0, - "step": 23282 - }, - { - "epoch": 1.7801479442628592, - "grad_norm": 0.0006058759172447026, - "learning_rate": 0.00019999843754204413, - "loss": 46.0, - "step": 23283 - }, - { - "epoch": 1.7802244012462487, - "grad_norm": 0.0019957320764660835, - "learning_rate": 0.00019999843740776943, - "loss": 46.0, - "step": 23284 - }, - { - "epoch": 1.7803008582296385, - "grad_norm": 0.000458067370345816, - "learning_rate": 0.00019999843727348896, - "loss": 46.0, - "step": 23285 - }, - { - "epoch": 1.7803773152130282, - "grad_norm": 0.0007111165323294699, - "learning_rate": 0.00019999843713920274, - "loss": 46.0, - "step": 23286 - }, - { - "epoch": 1.780453772196418, - "grad_norm": 0.0012318477965891361, - "learning_rate": 0.00019999843700491075, - "loss": 46.0, - "step": 23287 - }, - { - "epoch": 1.7805302291798077, - "grad_norm": 0.005296248011291027, - "learning_rate": 0.00019999843687061296, - "loss": 46.0, - "step": 23288 - }, - { - "epoch": 1.7806066861631975, - "grad_norm": 0.005017813760787249, - "learning_rate": 0.00019999843673630942, - "loss": 46.0, - "step": 23289 - }, - { - "epoch": 1.7806831431465873, - "grad_norm": 0.0015848270850256085, - "learning_rate": 0.0001999984366020001, - "loss": 46.0, - "step": 23290 - }, - { - "epoch": 1.780759600129977, - "grad_norm": 0.0003489513765089214, - "learning_rate": 0.00019999843646768505, - "loss": 46.0, - "step": 23291 - }, - { - "epoch": 1.7808360571133666, - "grad_norm": 0.0017636175034567714, - "learning_rate": 0.0001999984363333642, - "loss": 46.0, - "step": 23292 - }, - { - "epoch": 1.7809125140967563, - "grad_norm": 0.0009710597805678844, - "learning_rate": 0.0001999984361990376, - "loss": 46.0, - "step": 23293 - }, - { - "epoch": 1.780988971080146, - "grad_norm": 0.0011906478321179748, - "learning_rate": 0.00019999843606470518, - "loss": 46.0, - "step": 23294 - }, - { - "epoch": 1.7810654280635356, - "grad_norm": 0.00041255855467170477, - "learning_rate": 0.00019999843593036703, - "loss": 46.0, - "step": 23295 - }, - { - "epoch": 1.7811418850469254, - "grad_norm": 0.0012901457957923412, - "learning_rate": 0.0001999984357960231, - "loss": 46.0, - "step": 23296 - }, - { - "epoch": 1.7812183420303151, - "grad_norm": 0.0015257798368111253, - "learning_rate": 0.0001999984356616734, - "loss": 46.0, - "step": 23297 - }, - { - "epoch": 1.781294799013705, - "grad_norm": 0.0012141375336796045, - "learning_rate": 0.00019999843552731794, - "loss": 46.0, - "step": 23298 - }, - { - "epoch": 1.7813712559970947, - "grad_norm": 0.001517360913567245, - "learning_rate": 0.00019999843539295672, - "loss": 46.0, - "step": 23299 - }, - { - "epoch": 1.7814477129804844, - "grad_norm": 0.0008944621658883989, - "learning_rate": 0.00019999843525858973, - "loss": 46.0, - "step": 23300 - }, - { - "epoch": 1.7815241699638742, - "grad_norm": 0.006637830287218094, - "learning_rate": 0.00019999843512421696, - "loss": 46.0, - "step": 23301 - }, - { - "epoch": 1.781600626947264, - "grad_norm": 0.0010529521387070417, - "learning_rate": 0.00019999843498983843, - "loss": 46.0, - "step": 23302 - }, - { - "epoch": 1.7816770839306535, - "grad_norm": 0.0012665257090702653, - "learning_rate": 0.0001999984348554541, - "loss": 46.0, - "step": 23303 - }, - { - "epoch": 1.7817535409140433, - "grad_norm": 0.0023495457135140896, - "learning_rate": 0.00019999843472106403, - "loss": 46.0, - "step": 23304 - }, - { - "epoch": 1.7818299978974328, - "grad_norm": 0.0011543849250301719, - "learning_rate": 0.00019999843458666817, - "loss": 46.0, - "step": 23305 - }, - { - "epoch": 1.7819064548808226, - "grad_norm": 0.005076871253550053, - "learning_rate": 0.00019999843445226657, - "loss": 46.0, - "step": 23306 - }, - { - "epoch": 1.7819829118642123, - "grad_norm": 0.0008754977607168257, - "learning_rate": 0.00019999843431785916, - "loss": 46.0, - "step": 23307 - }, - { - "epoch": 1.782059368847602, - "grad_norm": 0.00356099265627563, - "learning_rate": 0.00019999843418344598, - "loss": 46.0, - "step": 23308 - }, - { - "epoch": 1.7821358258309918, - "grad_norm": 0.0007451626588590443, - "learning_rate": 0.00019999843404902709, - "loss": 46.0, - "step": 23309 - }, - { - "epoch": 1.7822122828143816, - "grad_norm": 0.0005000513046979904, - "learning_rate": 0.0001999984339146024, - "loss": 46.0, - "step": 23310 - }, - { - "epoch": 1.7822887397977714, - "grad_norm": 0.000997535651549697, - "learning_rate": 0.00019999843378017192, - "loss": 46.0, - "step": 23311 - }, - { - "epoch": 1.7823651967811611, - "grad_norm": 0.0013332013040781021, - "learning_rate": 0.00019999843364573567, - "loss": 46.0, - "step": 23312 - }, - { - "epoch": 1.7824416537645509, - "grad_norm": 0.0013277566758915782, - "learning_rate": 0.00019999843351129368, - "loss": 46.0, - "step": 23313 - }, - { - "epoch": 1.7825181107479404, - "grad_norm": 0.003483612323179841, - "learning_rate": 0.00019999843337684592, - "loss": 46.0, - "step": 23314 - }, - { - "epoch": 1.7825945677313302, - "grad_norm": 0.001168434857390821, - "learning_rate": 0.00019999843324239235, - "loss": 46.0, - "step": 23315 - }, - { - "epoch": 1.7826710247147197, - "grad_norm": 0.004102638456970453, - "learning_rate": 0.00019999843310793307, - "loss": 46.0, - "step": 23316 - }, - { - "epoch": 1.7827474816981095, - "grad_norm": 0.0017078651580959558, - "learning_rate": 0.00019999843297346796, - "loss": 46.0, - "step": 23317 - }, - { - "epoch": 1.7828239386814992, - "grad_norm": 0.0015993935521692038, - "learning_rate": 0.00019999843283899713, - "loss": 46.0, - "step": 23318 - }, - { - "epoch": 1.782900395664889, - "grad_norm": 0.0006118700257502496, - "learning_rate": 0.00019999843270452052, - "loss": 46.0, - "step": 23319 - }, - { - "epoch": 1.7829768526482788, - "grad_norm": 0.0008508054888807237, - "learning_rate": 0.00019999843257003812, - "loss": 46.0, - "step": 23320 - }, - { - "epoch": 1.7830533096316685, - "grad_norm": 0.0012558602029457688, - "learning_rate": 0.00019999843243554994, - "loss": 46.0, - "step": 23321 - }, - { - "epoch": 1.7831297666150583, - "grad_norm": 0.0010096827754750848, - "learning_rate": 0.00019999843230105601, - "loss": 46.0, - "step": 23322 - }, - { - "epoch": 1.783206223598448, - "grad_norm": 0.0009758556261658669, - "learning_rate": 0.00019999843216655634, - "loss": 46.0, - "step": 23323 - }, - { - "epoch": 1.7832826805818378, - "grad_norm": 0.0032229137141257524, - "learning_rate": 0.00019999843203205087, - "loss": 46.0, - "step": 23324 - }, - { - "epoch": 1.7833591375652273, - "grad_norm": 0.0020327342208474874, - "learning_rate": 0.00019999843189753963, - "loss": 46.0, - "step": 23325 - }, - { - "epoch": 1.783435594548617, - "grad_norm": 0.0008022714173421264, - "learning_rate": 0.0001999984317630226, - "loss": 46.0, - "step": 23326 - }, - { - "epoch": 1.7835120515320066, - "grad_norm": 0.001887103426270187, - "learning_rate": 0.00019999843162849985, - "loss": 46.0, - "step": 23327 - }, - { - "epoch": 1.7835885085153964, - "grad_norm": 0.0009429011843167245, - "learning_rate": 0.0001999984314939713, - "loss": 46.0, - "step": 23328 - }, - { - "epoch": 1.7836649654987862, - "grad_norm": 0.0007383808260783553, - "learning_rate": 0.000199998431359437, - "loss": 46.0, - "step": 23329 - }, - { - "epoch": 1.783741422482176, - "grad_norm": 0.000742451346013695, - "learning_rate": 0.0001999984312248969, - "loss": 46.0, - "step": 23330 - }, - { - "epoch": 1.7838178794655657, - "grad_norm": 0.0013161482056602836, - "learning_rate": 0.00019999843109035106, - "loss": 46.0, - "step": 23331 - }, - { - "epoch": 1.7838943364489555, - "grad_norm": 0.0027356704231351614, - "learning_rate": 0.00019999843095579945, - "loss": 46.0, - "step": 23332 - }, - { - "epoch": 1.7839707934323452, - "grad_norm": 0.0010390293318778276, - "learning_rate": 0.00019999843082124205, - "loss": 46.0, - "step": 23333 - }, - { - "epoch": 1.784047250415735, - "grad_norm": 0.0035049952566623688, - "learning_rate": 0.0001999984306866789, - "loss": 46.0, - "step": 23334 - }, - { - "epoch": 1.7841237073991245, - "grad_norm": 0.0010374942794442177, - "learning_rate": 0.00019999843055210997, - "loss": 46.0, - "step": 23335 - }, - { - "epoch": 1.7842001643825143, - "grad_norm": 0.0009820755803957582, - "learning_rate": 0.00019999843041753525, - "loss": 46.0, - "step": 23336 - }, - { - "epoch": 1.784276621365904, - "grad_norm": 0.0035666734911501408, - "learning_rate": 0.00019999843028295478, - "loss": 46.0, - "step": 23337 - }, - { - "epoch": 1.7843530783492936, - "grad_norm": 0.0005721225752495229, - "learning_rate": 0.00019999843014836856, - "loss": 46.0, - "step": 23338 - }, - { - "epoch": 1.7844295353326833, - "grad_norm": 0.0029102619737386703, - "learning_rate": 0.00019999843001377655, - "loss": 46.0, - "step": 23339 - }, - { - "epoch": 1.784505992316073, - "grad_norm": 0.0008085412555374205, - "learning_rate": 0.00019999842987917875, - "loss": 46.0, - "step": 23340 - }, - { - "epoch": 1.7845824492994629, - "grad_norm": 0.0013353623216971755, - "learning_rate": 0.00019999842974457522, - "loss": 46.0, - "step": 23341 - }, - { - "epoch": 1.7846589062828526, - "grad_norm": 0.002384156221523881, - "learning_rate": 0.0001999984296099659, - "loss": 46.0, - "step": 23342 - }, - { - "epoch": 1.7847353632662424, - "grad_norm": 0.00034197737113572657, - "learning_rate": 0.00019999842947535082, - "loss": 46.0, - "step": 23343 - }, - { - "epoch": 1.7848118202496321, - "grad_norm": 0.005555443931370974, - "learning_rate": 0.00019999842934072997, - "loss": 46.0, - "step": 23344 - }, - { - "epoch": 1.784888277233022, - "grad_norm": 0.0022445947397500277, - "learning_rate": 0.00019999842920610334, - "loss": 46.0, - "step": 23345 - }, - { - "epoch": 1.7849647342164114, - "grad_norm": 0.0013989756116643548, - "learning_rate": 0.00019999842907147096, - "loss": 46.0, - "step": 23346 - }, - { - "epoch": 1.7850411911998012, - "grad_norm": 0.0013043450890108943, - "learning_rate": 0.0001999984289368328, - "loss": 46.0, - "step": 23347 - }, - { - "epoch": 1.785117648183191, - "grad_norm": 0.0011561455903574824, - "learning_rate": 0.00019999842880218886, - "loss": 46.0, - "step": 23348 - }, - { - "epoch": 1.7851941051665805, - "grad_norm": 0.0012594155268743634, - "learning_rate": 0.00019999842866753916, - "loss": 46.0, - "step": 23349 - }, - { - "epoch": 1.7852705621499703, - "grad_norm": 0.0005937193054705858, - "learning_rate": 0.0001999984285328837, - "loss": 46.0, - "step": 23350 - }, - { - "epoch": 1.78534701913336, - "grad_norm": 0.012542163021862507, - "learning_rate": 0.00019999842839822245, - "loss": 46.0, - "step": 23351 - }, - { - "epoch": 1.7854234761167498, - "grad_norm": 0.0011094792280346155, - "learning_rate": 0.00019999842826355543, - "loss": 46.0, - "step": 23352 - }, - { - "epoch": 1.7854999331001395, - "grad_norm": 0.0015730494633316994, - "learning_rate": 0.00019999842812888267, - "loss": 46.0, - "step": 23353 - }, - { - "epoch": 1.7855763900835293, - "grad_norm": 0.002759851748123765, - "learning_rate": 0.00019999842799420413, - "loss": 46.0, - "step": 23354 - }, - { - "epoch": 1.785652847066919, - "grad_norm": 0.0011959831463173032, - "learning_rate": 0.00019999842785951982, - "loss": 46.0, - "step": 23355 - }, - { - "epoch": 1.7857293040503088, - "grad_norm": 0.001111734309233725, - "learning_rate": 0.0001999984277248297, - "loss": 46.0, - "step": 23356 - }, - { - "epoch": 1.7858057610336984, - "grad_norm": 0.0008648911025375128, - "learning_rate": 0.00019999842759013385, - "loss": 46.0, - "step": 23357 - }, - { - "epoch": 1.7858822180170881, - "grad_norm": 0.0005462827393785119, - "learning_rate": 0.00019999842745543222, - "loss": 46.0, - "step": 23358 - }, - { - "epoch": 1.785958675000478, - "grad_norm": 0.0009702163515612483, - "learning_rate": 0.00019999842732072484, - "loss": 46.0, - "step": 23359 - }, - { - "epoch": 1.7860351319838674, - "grad_norm": 0.0010261706775054336, - "learning_rate": 0.00019999842718601167, - "loss": 46.0, - "step": 23360 - }, - { - "epoch": 1.7861115889672572, - "grad_norm": 0.0008456979412585497, - "learning_rate": 0.00019999842705129272, - "loss": 46.0, - "step": 23361 - }, - { - "epoch": 1.786188045950647, - "grad_norm": 0.0005397512577474117, - "learning_rate": 0.00019999842691656805, - "loss": 46.0, - "step": 23362 - }, - { - "epoch": 1.7862645029340367, - "grad_norm": 0.0019083087099716067, - "learning_rate": 0.00019999842678183755, - "loss": 46.0, - "step": 23363 - }, - { - "epoch": 1.7863409599174265, - "grad_norm": 0.0022420925088226795, - "learning_rate": 0.00019999842664710133, - "loss": 46.0, - "step": 23364 - }, - { - "epoch": 1.7864174169008162, - "grad_norm": 0.0007163463742472231, - "learning_rate": 0.00019999842651235932, - "loss": 46.0, - "step": 23365 - }, - { - "epoch": 1.786493873884206, - "grad_norm": 0.0005728755495510995, - "learning_rate": 0.00019999842637761153, - "loss": 46.0, - "step": 23366 - }, - { - "epoch": 1.7865703308675958, - "grad_norm": 0.0006644154782406986, - "learning_rate": 0.000199998426242858, - "loss": 46.0, - "step": 23367 - }, - { - "epoch": 1.7866467878509853, - "grad_norm": 0.000963221478741616, - "learning_rate": 0.00019999842610809868, - "loss": 46.0, - "step": 23368 - }, - { - "epoch": 1.786723244834375, - "grad_norm": 0.00035833221045322716, - "learning_rate": 0.00019999842597333357, - "loss": 46.0, - "step": 23369 - }, - { - "epoch": 1.7867997018177648, - "grad_norm": 0.0007129181176424026, - "learning_rate": 0.00019999842583856271, - "loss": 46.0, - "step": 23370 - }, - { - "epoch": 1.7868761588011544, - "grad_norm": 0.001610266393981874, - "learning_rate": 0.00019999842570378608, - "loss": 46.0, - "step": 23371 - }, - { - "epoch": 1.7869526157845441, - "grad_norm": 0.0009280890808440745, - "learning_rate": 0.0001999984255690037, - "loss": 46.0, - "step": 23372 - }, - { - "epoch": 1.7870290727679339, - "grad_norm": 0.004658679943531752, - "learning_rate": 0.00019999842543421553, - "loss": 46.0, - "step": 23373 - }, - { - "epoch": 1.7871055297513236, - "grad_norm": 0.007147517986595631, - "learning_rate": 0.00019999842529942158, - "loss": 46.0, - "step": 23374 - }, - { - "epoch": 1.7871819867347134, - "grad_norm": 0.001862075412645936, - "learning_rate": 0.0001999984251646219, - "loss": 46.0, - "step": 23375 - }, - { - "epoch": 1.7872584437181032, - "grad_norm": 0.0012501366436481476, - "learning_rate": 0.00019999842502981641, - "loss": 46.0, - "step": 23376 - }, - { - "epoch": 1.787334900701493, - "grad_norm": 0.0007985960692167282, - "learning_rate": 0.00019999842489500517, - "loss": 46.0, - "step": 23377 - }, - { - "epoch": 1.7874113576848827, - "grad_norm": 0.0022612749598920345, - "learning_rate": 0.00019999842476018818, - "loss": 46.0, - "step": 23378 - }, - { - "epoch": 1.7874878146682722, - "grad_norm": 0.001469761598855257, - "learning_rate": 0.00019999842462536537, - "loss": 46.0, - "step": 23379 - }, - { - "epoch": 1.787564271651662, - "grad_norm": 0.0005615563131868839, - "learning_rate": 0.00019999842449053683, - "loss": 46.0, - "step": 23380 - }, - { - "epoch": 1.7876407286350517, - "grad_norm": 0.004391221329569817, - "learning_rate": 0.00019999842435570252, - "loss": 46.0, - "step": 23381 - }, - { - "epoch": 1.7877171856184413, - "grad_norm": 0.0011570961214601994, - "learning_rate": 0.00019999842422086244, - "loss": 46.0, - "step": 23382 - }, - { - "epoch": 1.787793642601831, - "grad_norm": 0.0008647372596897185, - "learning_rate": 0.00019999842408601658, - "loss": 46.0, - "step": 23383 - }, - { - "epoch": 1.7878700995852208, - "grad_norm": 0.00046265454147942364, - "learning_rate": 0.00019999842395116492, - "loss": 46.0, - "step": 23384 - }, - { - "epoch": 1.7879465565686106, - "grad_norm": 0.0007150240708142519, - "learning_rate": 0.00019999842381630755, - "loss": 46.0, - "step": 23385 - }, - { - "epoch": 1.7880230135520003, - "grad_norm": 0.0008370680152438581, - "learning_rate": 0.00019999842368144437, - "loss": 46.0, - "step": 23386 - }, - { - "epoch": 1.78809947053539, - "grad_norm": 0.0006866789772175252, - "learning_rate": 0.00019999842354657542, - "loss": 46.0, - "step": 23387 - }, - { - "epoch": 1.7881759275187799, - "grad_norm": 0.0010378382867202163, - "learning_rate": 0.00019999842341170073, - "loss": 46.0, - "step": 23388 - }, - { - "epoch": 1.7882523845021696, - "grad_norm": 0.0016210711328312755, - "learning_rate": 0.00019999842327682026, - "loss": 46.0, - "step": 23389 - }, - { - "epoch": 1.7883288414855592, - "grad_norm": 0.0009405912132933736, - "learning_rate": 0.00019999842314193399, - "loss": 46.0, - "step": 23390 - }, - { - "epoch": 1.788405298468949, - "grad_norm": 0.0015011117793619633, - "learning_rate": 0.000199998423007042, - "loss": 46.0, - "step": 23391 - }, - { - "epoch": 1.7884817554523387, - "grad_norm": 0.0016328402562066913, - "learning_rate": 0.0001999984228721442, - "loss": 46.0, - "step": 23392 - }, - { - "epoch": 1.7885582124357282, - "grad_norm": 0.0017886487767100334, - "learning_rate": 0.00019999842273724064, - "loss": 46.0, - "step": 23393 - }, - { - "epoch": 1.788634669419118, - "grad_norm": 0.0006534748245030642, - "learning_rate": 0.00019999842260233134, - "loss": 46.0, - "step": 23394 - }, - { - "epoch": 1.7887111264025077, - "grad_norm": 0.0036050642374902964, - "learning_rate": 0.00019999842246741625, - "loss": 46.0, - "step": 23395 - }, - { - "epoch": 1.7887875833858975, - "grad_norm": 0.002600611187517643, - "learning_rate": 0.00019999842233249537, - "loss": 46.0, - "step": 23396 - }, - { - "epoch": 1.7888640403692873, - "grad_norm": 0.001363143790513277, - "learning_rate": 0.00019999842219756874, - "loss": 46.0, - "step": 23397 - }, - { - "epoch": 1.788940497352677, - "grad_norm": 0.0023228079080581665, - "learning_rate": 0.00019999842206263634, - "loss": 46.0, - "step": 23398 - }, - { - "epoch": 1.7890169543360668, - "grad_norm": 0.0024469674099236727, - "learning_rate": 0.0001999984219276982, - "loss": 46.0, - "step": 23399 - }, - { - "epoch": 1.7890934113194565, - "grad_norm": 0.0010389079106971622, - "learning_rate": 0.00019999842179275424, - "loss": 46.0, - "step": 23400 - }, - { - "epoch": 1.789169868302846, - "grad_norm": 0.0008803008240647614, - "learning_rate": 0.00019999842165780454, - "loss": 46.0, - "step": 23401 - }, - { - "epoch": 1.7892463252862358, - "grad_norm": 0.0020320320036262274, - "learning_rate": 0.00019999842152284907, - "loss": 46.0, - "step": 23402 - }, - { - "epoch": 1.7893227822696256, - "grad_norm": 0.0008957046666182578, - "learning_rate": 0.00019999842138788783, - "loss": 46.0, - "step": 23403 - }, - { - "epoch": 1.7893992392530151, - "grad_norm": 0.0007960941875353456, - "learning_rate": 0.00019999842125292082, - "loss": 46.0, - "step": 23404 - }, - { - "epoch": 1.789475696236405, - "grad_norm": 0.00265374593436718, - "learning_rate": 0.00019999842111794803, - "loss": 46.0, - "step": 23405 - }, - { - "epoch": 1.7895521532197947, - "grad_norm": 0.006574220489710569, - "learning_rate": 0.00019999842098296946, - "loss": 46.0, - "step": 23406 - }, - { - "epoch": 1.7896286102031844, - "grad_norm": 0.001125707058236003, - "learning_rate": 0.00019999842084798513, - "loss": 46.0, - "step": 23407 - }, - { - "epoch": 1.7897050671865742, - "grad_norm": 0.0005455115460790694, - "learning_rate": 0.00019999842071299505, - "loss": 46.0, - "step": 23408 - }, - { - "epoch": 1.789781524169964, - "grad_norm": 0.0018281489610671997, - "learning_rate": 0.0001999984205779992, - "loss": 46.0, - "step": 23409 - }, - { - "epoch": 1.7898579811533537, - "grad_norm": 0.0008425092091783881, - "learning_rate": 0.00019999842044299753, - "loss": 46.0, - "step": 23410 - }, - { - "epoch": 1.7899344381367435, - "grad_norm": 0.001513608731329441, - "learning_rate": 0.00019999842030799013, - "loss": 46.0, - "step": 23411 - }, - { - "epoch": 1.790010895120133, - "grad_norm": 0.0036630851682275534, - "learning_rate": 0.00019999842017297698, - "loss": 46.0, - "step": 23412 - }, - { - "epoch": 1.7900873521035228, - "grad_norm": 0.0008791261352598667, - "learning_rate": 0.00019999842003795803, - "loss": 46.0, - "step": 23413 - }, - { - "epoch": 1.7901638090869125, - "grad_norm": 0.0009894762188196182, - "learning_rate": 0.0001999984199029333, - "loss": 46.0, - "step": 23414 - }, - { - "epoch": 1.790240266070302, - "grad_norm": 0.0010719248093664646, - "learning_rate": 0.00019999841976790284, - "loss": 46.0, - "step": 23415 - }, - { - "epoch": 1.7903167230536918, - "grad_norm": 0.003220104146748781, - "learning_rate": 0.00019999841963286657, - "loss": 46.0, - "step": 23416 - }, - { - "epoch": 1.7903931800370816, - "grad_norm": 0.0007135960622690618, - "learning_rate": 0.00019999841949782456, - "loss": 46.0, - "step": 23417 - }, - { - "epoch": 1.7904696370204713, - "grad_norm": 0.0009460801375098526, - "learning_rate": 0.0001999984193627768, - "loss": 46.0, - "step": 23418 - }, - { - "epoch": 1.790546094003861, - "grad_norm": 0.004054906778037548, - "learning_rate": 0.00019999841922772323, - "loss": 46.0, - "step": 23419 - }, - { - "epoch": 1.7906225509872509, - "grad_norm": 0.00043670253944583237, - "learning_rate": 0.0001999984190926639, - "loss": 46.0, - "step": 23420 - }, - { - "epoch": 1.7906990079706406, - "grad_norm": 0.0010652946075424552, - "learning_rate": 0.00019999841895759884, - "loss": 46.0, - "step": 23421 - }, - { - "epoch": 1.7907754649540304, - "grad_norm": 0.0009400029084645212, - "learning_rate": 0.00019999841882252793, - "loss": 46.0, - "step": 23422 - }, - { - "epoch": 1.79085192193742, - "grad_norm": 0.0036266951356083155, - "learning_rate": 0.00019999841868745133, - "loss": 46.0, - "step": 23423 - }, - { - "epoch": 1.7909283789208097, - "grad_norm": 0.0013853814452886581, - "learning_rate": 0.0001999984185523689, - "loss": 46.0, - "step": 23424 - }, - { - "epoch": 1.7910048359041995, - "grad_norm": 0.002084429142996669, - "learning_rate": 0.00019999841841728073, - "loss": 46.0, - "step": 23425 - }, - { - "epoch": 1.791081292887589, - "grad_norm": 0.000407147133955732, - "learning_rate": 0.0001999984182821868, - "loss": 46.0, - "step": 23426 - }, - { - "epoch": 1.7911577498709788, - "grad_norm": 0.0008450127788819373, - "learning_rate": 0.00019999841814708708, - "loss": 46.0, - "step": 23427 - }, - { - "epoch": 1.7912342068543685, - "grad_norm": 0.0021378525998443365, - "learning_rate": 0.00019999841801198161, - "loss": 46.0, - "step": 23428 - }, - { - "epoch": 1.7913106638377583, - "grad_norm": 0.0005242145271040499, - "learning_rate": 0.00019999841787687035, - "loss": 46.0, - "step": 23429 - }, - { - "epoch": 1.791387120821148, - "grad_norm": 0.00328258378431201, - "learning_rate": 0.00019999841774175333, - "loss": 46.0, - "step": 23430 - }, - { - "epoch": 1.7914635778045378, - "grad_norm": 0.0004819945024792105, - "learning_rate": 0.00019999841760663057, - "loss": 46.0, - "step": 23431 - }, - { - "epoch": 1.7915400347879276, - "grad_norm": 0.001724945497699082, - "learning_rate": 0.00019999841747150198, - "loss": 46.0, - "step": 23432 - }, - { - "epoch": 1.7916164917713173, - "grad_norm": 0.0007490586722269654, - "learning_rate": 0.00019999841733636767, - "loss": 46.0, - "step": 23433 - }, - { - "epoch": 1.7916929487547069, - "grad_norm": 0.0010977743659168482, - "learning_rate": 0.00019999841720122756, - "loss": 46.0, - "step": 23434 - }, - { - "epoch": 1.7917694057380966, - "grad_norm": 0.0012437679106369615, - "learning_rate": 0.0001999984170660817, - "loss": 46.0, - "step": 23435 - }, - { - "epoch": 1.7918458627214862, - "grad_norm": 0.002973268274217844, - "learning_rate": 0.00019999841693093005, - "loss": 46.0, - "step": 23436 - }, - { - "epoch": 1.791922319704876, - "grad_norm": 0.003718478139489889, - "learning_rate": 0.00019999841679577268, - "loss": 46.0, - "step": 23437 - }, - { - "epoch": 1.7919987766882657, - "grad_norm": 0.0019366148626431823, - "learning_rate": 0.00019999841666060948, - "loss": 46.0, - "step": 23438 - }, - { - "epoch": 1.7920752336716554, - "grad_norm": 0.007893499918282032, - "learning_rate": 0.00019999841652544053, - "loss": 46.0, - "step": 23439 - }, - { - "epoch": 1.7921516906550452, - "grad_norm": 0.0017065148567780852, - "learning_rate": 0.00019999841639026583, - "loss": 46.0, - "step": 23440 - }, - { - "epoch": 1.792228147638435, - "grad_norm": 0.0012645057868212461, - "learning_rate": 0.00019999841625508536, - "loss": 46.0, - "step": 23441 - }, - { - "epoch": 1.7923046046218247, - "grad_norm": 0.001027383143082261, - "learning_rate": 0.00019999841611989912, - "loss": 46.0, - "step": 23442 - }, - { - "epoch": 1.7923810616052145, - "grad_norm": 0.0007045645616017282, - "learning_rate": 0.00019999841598470708, - "loss": 46.0, - "step": 23443 - }, - { - "epoch": 1.7924575185886042, - "grad_norm": 0.0009280061349272728, - "learning_rate": 0.0001999984158495093, - "loss": 46.0, - "step": 23444 - }, - { - "epoch": 1.7925339755719938, - "grad_norm": 0.0012330528115853667, - "learning_rate": 0.00019999841571430573, - "loss": 46.0, - "step": 23445 - }, - { - "epoch": 1.7926104325553835, - "grad_norm": 0.0005882453988306224, - "learning_rate": 0.0001999984155790964, - "loss": 46.0, - "step": 23446 - }, - { - "epoch": 1.792686889538773, - "grad_norm": 0.0041147139854729176, - "learning_rate": 0.0001999984154438813, - "loss": 46.0, - "step": 23447 - }, - { - "epoch": 1.7927633465221628, - "grad_norm": 0.0011442916002124548, - "learning_rate": 0.00019999841530866043, - "loss": 46.0, - "step": 23448 - }, - { - "epoch": 1.7928398035055526, - "grad_norm": 0.0006084247725084424, - "learning_rate": 0.0001999984151734338, - "loss": 46.0, - "step": 23449 - }, - { - "epoch": 1.7929162604889424, - "grad_norm": 0.004325206391513348, - "learning_rate": 0.0001999984150382014, - "loss": 46.0, - "step": 23450 - }, - { - "epoch": 1.7929927174723321, - "grad_norm": 0.0011547714238986373, - "learning_rate": 0.00019999841490296323, - "loss": 46.0, - "step": 23451 - }, - { - "epoch": 1.793069174455722, - "grad_norm": 0.0007342167082242668, - "learning_rate": 0.00019999841476771928, - "loss": 46.0, - "step": 23452 - }, - { - "epoch": 1.7931456314391117, - "grad_norm": 0.0003476165875326842, - "learning_rate": 0.00019999841463246956, - "loss": 46.0, - "step": 23453 - }, - { - "epoch": 1.7932220884225014, - "grad_norm": 0.002937949961051345, - "learning_rate": 0.00019999841449721406, - "loss": 46.0, - "step": 23454 - }, - { - "epoch": 1.7932985454058912, - "grad_norm": 0.0038411268033087254, - "learning_rate": 0.00019999841436195282, - "loss": 46.0, - "step": 23455 - }, - { - "epoch": 1.7933750023892807, - "grad_norm": 0.000800565758254379, - "learning_rate": 0.00019999841422668578, - "loss": 46.0, - "step": 23456 - }, - { - "epoch": 1.7934514593726705, - "grad_norm": 0.00106425816193223, - "learning_rate": 0.000199998414091413, - "loss": 46.0, - "step": 23457 - }, - { - "epoch": 1.79352791635606, - "grad_norm": 0.0009492255048826337, - "learning_rate": 0.00019999841395613443, - "loss": 46.0, - "step": 23458 - }, - { - "epoch": 1.7936043733394498, - "grad_norm": 0.00187025242485106, - "learning_rate": 0.00019999841382085012, - "loss": 46.0, - "step": 23459 - }, - { - "epoch": 1.7936808303228395, - "grad_norm": 0.0013016454176977277, - "learning_rate": 0.00019999841368556002, - "loss": 46.0, - "step": 23460 - }, - { - "epoch": 1.7937572873062293, - "grad_norm": 0.00206198962405324, - "learning_rate": 0.00019999841355026413, - "loss": 46.0, - "step": 23461 - }, - { - "epoch": 1.793833744289619, - "grad_norm": 0.004766110796481371, - "learning_rate": 0.0001999984134149625, - "loss": 46.0, - "step": 23462 - }, - { - "epoch": 1.7939102012730088, - "grad_norm": 0.0013337443815544248, - "learning_rate": 0.0001999984132796551, - "loss": 46.0, - "step": 23463 - }, - { - "epoch": 1.7939866582563986, - "grad_norm": 0.007464942522346973, - "learning_rate": 0.00019999841314434193, - "loss": 46.0, - "step": 23464 - }, - { - "epoch": 1.7940631152397883, - "grad_norm": 0.0017536778468638659, - "learning_rate": 0.00019999841300902298, - "loss": 46.0, - "step": 23465 - }, - { - "epoch": 1.7941395722231779, - "grad_norm": 0.0012567124795168638, - "learning_rate": 0.00019999841287369826, - "loss": 46.0, - "step": 23466 - }, - { - "epoch": 1.7942160292065676, - "grad_norm": 0.0011617616983130574, - "learning_rate": 0.00019999841273836777, - "loss": 46.0, - "step": 23467 - }, - { - "epoch": 1.7942924861899574, - "grad_norm": 0.0019567653071135283, - "learning_rate": 0.00019999841260303153, - "loss": 46.0, - "step": 23468 - }, - { - "epoch": 1.794368943173347, - "grad_norm": 0.0007796279969625175, - "learning_rate": 0.00019999841246768948, - "loss": 46.0, - "step": 23469 - }, - { - "epoch": 1.7944454001567367, - "grad_norm": 0.0009487696806900203, - "learning_rate": 0.0001999984123323417, - "loss": 46.0, - "step": 23470 - }, - { - "epoch": 1.7945218571401265, - "grad_norm": 0.0008429872686974704, - "learning_rate": 0.00019999841219698814, - "loss": 46.0, - "step": 23471 - }, - { - "epoch": 1.7945983141235162, - "grad_norm": 0.0013688217150047421, - "learning_rate": 0.0001999984120616288, - "loss": 46.0, - "step": 23472 - }, - { - "epoch": 1.794674771106906, - "grad_norm": 0.0008438929216936231, - "learning_rate": 0.0001999984119262637, - "loss": 46.0, - "step": 23473 - }, - { - "epoch": 1.7947512280902957, - "grad_norm": 0.002165168523788452, - "learning_rate": 0.0001999984117908928, - "loss": 46.0, - "step": 23474 - }, - { - "epoch": 1.7948276850736855, - "grad_norm": 0.0009734886698424816, - "learning_rate": 0.00019999841165551619, - "loss": 46.0, - "step": 23475 - }, - { - "epoch": 1.7949041420570753, - "grad_norm": 0.001065479009412229, - "learning_rate": 0.00019999841152013378, - "loss": 46.0, - "step": 23476 - }, - { - "epoch": 1.7949805990404648, - "grad_norm": 0.0009766994044184685, - "learning_rate": 0.0001999984113847456, - "loss": 46.0, - "step": 23477 - }, - { - "epoch": 1.7950570560238546, - "grad_norm": 0.0027736565098166466, - "learning_rate": 0.00019999841124935164, - "loss": 46.0, - "step": 23478 - }, - { - "epoch": 1.7951335130072443, - "grad_norm": 0.0007543741376139224, - "learning_rate": 0.00019999841111395194, - "loss": 46.0, - "step": 23479 - }, - { - "epoch": 1.7952099699906339, - "grad_norm": 0.0010395781137049198, - "learning_rate": 0.00019999841097854645, - "loss": 46.0, - "step": 23480 - }, - { - "epoch": 1.7952864269740236, - "grad_norm": 0.0012976148864254355, - "learning_rate": 0.0001999984108431352, - "loss": 46.0, - "step": 23481 - }, - { - "epoch": 1.7953628839574134, - "grad_norm": 0.0012276128400117159, - "learning_rate": 0.00019999841070771817, - "loss": 46.0, - "step": 23482 - }, - { - "epoch": 1.7954393409408032, - "grad_norm": 0.0030859080143272877, - "learning_rate": 0.00019999841057229535, - "loss": 46.0, - "step": 23483 - }, - { - "epoch": 1.795515797924193, - "grad_norm": 0.002846098504960537, - "learning_rate": 0.0001999984104368668, - "loss": 46.0, - "step": 23484 - }, - { - "epoch": 1.7955922549075827, - "grad_norm": 0.0029819656629115343, - "learning_rate": 0.00019999841030143248, - "loss": 46.0, - "step": 23485 - }, - { - "epoch": 1.7956687118909724, - "grad_norm": 0.0012364336289465427, - "learning_rate": 0.00019999841016599238, - "loss": 46.0, - "step": 23486 - }, - { - "epoch": 1.7957451688743622, - "grad_norm": 0.0013423376949504018, - "learning_rate": 0.0001999984100305465, - "loss": 46.0, - "step": 23487 - }, - { - "epoch": 1.7958216258577517, - "grad_norm": 0.0014464695705100894, - "learning_rate": 0.00019999840989509484, - "loss": 46.0, - "step": 23488 - }, - { - "epoch": 1.7958980828411415, - "grad_norm": 0.0015231097349897027, - "learning_rate": 0.00019999840975963744, - "loss": 46.0, - "step": 23489 - }, - { - "epoch": 1.7959745398245313, - "grad_norm": 0.0007667168392799795, - "learning_rate": 0.00019999840962417427, - "loss": 46.0, - "step": 23490 - }, - { - "epoch": 1.7960509968079208, - "grad_norm": 0.004429669119417667, - "learning_rate": 0.0001999984094887053, - "loss": 46.0, - "step": 23491 - }, - { - "epoch": 1.7961274537913106, - "grad_norm": 0.0008974652155302465, - "learning_rate": 0.0001999984093532306, - "loss": 46.0, - "step": 23492 - }, - { - "epoch": 1.7962039107747003, - "grad_norm": 0.0006273578619584441, - "learning_rate": 0.00019999840921775008, - "loss": 46.0, - "step": 23493 - }, - { - "epoch": 1.79628036775809, - "grad_norm": 0.000733337365090847, - "learning_rate": 0.00019999840908226384, - "loss": 46.0, - "step": 23494 - }, - { - "epoch": 1.7963568247414798, - "grad_norm": 0.0007398317102342844, - "learning_rate": 0.00019999840894677182, - "loss": 46.0, - "step": 23495 - }, - { - "epoch": 1.7964332817248696, - "grad_norm": 0.001640309114009142, - "learning_rate": 0.00019999840881127404, - "loss": 46.0, - "step": 23496 - }, - { - "epoch": 1.7965097387082594, - "grad_norm": 0.0008160446886904538, - "learning_rate": 0.00019999840867577045, - "loss": 46.0, - "step": 23497 - }, - { - "epoch": 1.7965861956916491, - "grad_norm": 0.0003719780361279845, - "learning_rate": 0.00019999840854026112, - "loss": 46.0, - "step": 23498 - }, - { - "epoch": 1.7966626526750387, - "grad_norm": 0.0009321917314082384, - "learning_rate": 0.000199998408404746, - "loss": 46.0, - "step": 23499 - }, - { - "epoch": 1.7967391096584284, - "grad_norm": 0.0004980378434993327, - "learning_rate": 0.00019999840826922513, - "loss": 46.0, - "step": 23500 - }, - { - "epoch": 1.7968155666418182, - "grad_norm": 0.006817714311182499, - "learning_rate": 0.00019999840813369847, - "loss": 46.0, - "step": 23501 - }, - { - "epoch": 1.7968920236252077, - "grad_norm": 0.001043003867380321, - "learning_rate": 0.00019999840799816607, - "loss": 46.0, - "step": 23502 - }, - { - "epoch": 1.7969684806085975, - "grad_norm": 0.0025603363756090403, - "learning_rate": 0.0001999984078626279, - "loss": 46.0, - "step": 23503 - }, - { - "epoch": 1.7970449375919872, - "grad_norm": 0.0005779876373708248, - "learning_rate": 0.00019999840772708393, - "loss": 46.0, - "step": 23504 - }, - { - "epoch": 1.797121394575377, - "grad_norm": 0.0028805567417293787, - "learning_rate": 0.0001999984075915342, - "loss": 46.0, - "step": 23505 - }, - { - "epoch": 1.7971978515587668, - "grad_norm": 0.001627816236577928, - "learning_rate": 0.00019999840745597874, - "loss": 46.0, - "step": 23506 - }, - { - "epoch": 1.7972743085421565, - "grad_norm": 0.015421341173350811, - "learning_rate": 0.00019999840732041747, - "loss": 46.0, - "step": 23507 - }, - { - "epoch": 1.7973507655255463, - "grad_norm": 0.00489919213578105, - "learning_rate": 0.00019999840718485043, - "loss": 46.0, - "step": 23508 - }, - { - "epoch": 1.797427222508936, - "grad_norm": 0.00043394602835178375, - "learning_rate": 0.00019999840704927764, - "loss": 46.0, - "step": 23509 - }, - { - "epoch": 1.7975036794923256, - "grad_norm": 0.010248390026390553, - "learning_rate": 0.00019999840691369908, - "loss": 46.0, - "step": 23510 - }, - { - "epoch": 1.7975801364757154, - "grad_norm": 0.00131643854547292, - "learning_rate": 0.00019999840677811475, - "loss": 46.0, - "step": 23511 - }, - { - "epoch": 1.7976565934591051, - "grad_norm": 0.002210026141256094, - "learning_rate": 0.00019999840664252462, - "loss": 46.0, - "step": 23512 - }, - { - "epoch": 1.7977330504424947, - "grad_norm": 0.005083800759166479, - "learning_rate": 0.00019999840650692874, - "loss": 46.0, - "step": 23513 - }, - { - "epoch": 1.7978095074258844, - "grad_norm": 0.0006176887545734644, - "learning_rate": 0.0001999984063713271, - "loss": 46.0, - "step": 23514 - }, - { - "epoch": 1.7978859644092742, - "grad_norm": 0.0029231118969619274, - "learning_rate": 0.00019999840623571968, - "loss": 46.0, - "step": 23515 - }, - { - "epoch": 1.797962421392664, - "grad_norm": 0.0007739661377854645, - "learning_rate": 0.0001999984061001065, - "loss": 46.0, - "step": 23516 - }, - { - "epoch": 1.7980388783760537, - "grad_norm": 0.0006957594305276871, - "learning_rate": 0.00019999840596448756, - "loss": 46.0, - "step": 23517 - }, - { - "epoch": 1.7981153353594435, - "grad_norm": 0.0016285991296172142, - "learning_rate": 0.00019999840582886282, - "loss": 46.0, - "step": 23518 - }, - { - "epoch": 1.7981917923428332, - "grad_norm": 0.0016668487805873156, - "learning_rate": 0.00019999840569323235, - "loss": 46.0, - "step": 23519 - }, - { - "epoch": 1.798268249326223, - "grad_norm": 0.0010149258887395263, - "learning_rate": 0.00019999840555759608, - "loss": 46.0, - "step": 23520 - }, - { - "epoch": 1.7983447063096125, - "grad_norm": 0.0011890220921486616, - "learning_rate": 0.00019999840542195404, - "loss": 46.0, - "step": 23521 - }, - { - "epoch": 1.7984211632930023, - "grad_norm": 0.0018648263067007065, - "learning_rate": 0.00019999840528630626, - "loss": 46.0, - "step": 23522 - }, - { - "epoch": 1.798497620276392, - "grad_norm": 0.0007314970134757459, - "learning_rate": 0.00019999840515065267, - "loss": 46.0, - "step": 23523 - }, - { - "epoch": 1.7985740772597816, - "grad_norm": 0.0007192611810751259, - "learning_rate": 0.00019999840501499334, - "loss": 46.0, - "step": 23524 - }, - { - "epoch": 1.7986505342431713, - "grad_norm": 0.0004841941990889609, - "learning_rate": 0.00019999840487932823, - "loss": 46.0, - "step": 23525 - }, - { - "epoch": 1.798726991226561, - "grad_norm": 0.001030182815156877, - "learning_rate": 0.00019999840474365735, - "loss": 46.0, - "step": 23526 - }, - { - "epoch": 1.7988034482099509, - "grad_norm": 0.000695998955052346, - "learning_rate": 0.00019999840460798072, - "loss": 46.0, - "step": 23527 - }, - { - "epoch": 1.7988799051933406, - "grad_norm": 0.0018407469615340233, - "learning_rate": 0.00019999840447229827, - "loss": 46.0, - "step": 23528 - }, - { - "epoch": 1.7989563621767304, - "grad_norm": 0.0009694445761851966, - "learning_rate": 0.0001999984043366101, - "loss": 46.0, - "step": 23529 - }, - { - "epoch": 1.7990328191601201, - "grad_norm": 0.0011146041797474027, - "learning_rate": 0.00019999840420091615, - "loss": 46.0, - "step": 23530 - }, - { - "epoch": 1.79910927614351, - "grad_norm": 0.0015319178346544504, - "learning_rate": 0.0001999984040652164, - "loss": 46.0, - "step": 23531 - }, - { - "epoch": 1.7991857331268994, - "grad_norm": 0.0006881053559482098, - "learning_rate": 0.0001999984039295109, - "loss": 46.0, - "step": 23532 - }, - { - "epoch": 1.7992621901102892, - "grad_norm": 0.010389677248895168, - "learning_rate": 0.00019999840379379967, - "loss": 46.0, - "step": 23533 - }, - { - "epoch": 1.799338647093679, - "grad_norm": 0.0007288535707630217, - "learning_rate": 0.00019999840365808263, - "loss": 46.0, - "step": 23534 - }, - { - "epoch": 1.7994151040770685, - "grad_norm": 0.0009432890801690519, - "learning_rate": 0.00019999840352235985, - "loss": 46.0, - "step": 23535 - }, - { - "epoch": 1.7994915610604583, - "grad_norm": 0.0015950363595038652, - "learning_rate": 0.00019999840338663126, - "loss": 46.0, - "step": 23536 - }, - { - "epoch": 1.799568018043848, - "grad_norm": 0.0016075170133262873, - "learning_rate": 0.00019999840325089693, - "loss": 46.0, - "step": 23537 - }, - { - "epoch": 1.7996444750272378, - "grad_norm": 0.0010066393297165632, - "learning_rate": 0.00019999840311515682, - "loss": 46.0, - "step": 23538 - }, - { - "epoch": 1.7997209320106275, - "grad_norm": 0.000552264042198658, - "learning_rate": 0.0001999984029794109, - "loss": 46.0, - "step": 23539 - }, - { - "epoch": 1.7997973889940173, - "grad_norm": 0.0019441158510744572, - "learning_rate": 0.00019999840284365926, - "loss": 46.0, - "step": 23540 - }, - { - "epoch": 1.799873845977407, - "grad_norm": 0.0015652806032449007, - "learning_rate": 0.00019999840270790186, - "loss": 46.0, - "step": 23541 - }, - { - "epoch": 1.7999503029607968, - "grad_norm": 0.002753129694610834, - "learning_rate": 0.00019999840257213866, - "loss": 46.0, - "step": 23542 - }, - { - "epoch": 1.8000267599441864, - "grad_norm": 0.0006505911005660892, - "learning_rate": 0.00019999840243636972, - "loss": 46.0, - "step": 23543 - }, - { - "epoch": 1.8001032169275761, - "grad_norm": 0.0009638468036428094, - "learning_rate": 0.00019999840230059497, - "loss": 46.0, - "step": 23544 - }, - { - "epoch": 1.800179673910966, - "grad_norm": 0.0031288028694689274, - "learning_rate": 0.0001999984021648145, - "loss": 46.0, - "step": 23545 - }, - { - "epoch": 1.8002561308943554, - "grad_norm": 0.0016086224932223558, - "learning_rate": 0.0001999984020290282, - "loss": 46.0, - "step": 23546 - }, - { - "epoch": 1.8003325878777452, - "grad_norm": 0.00047096493653953075, - "learning_rate": 0.0001999984018932362, - "loss": 46.0, - "step": 23547 - }, - { - "epoch": 1.800409044861135, - "grad_norm": 0.0031271015759557486, - "learning_rate": 0.00019999840175743838, - "loss": 46.0, - "step": 23548 - }, - { - "epoch": 1.8004855018445247, - "grad_norm": 0.0006806131568737328, - "learning_rate": 0.0001999984016216348, - "loss": 46.0, - "step": 23549 - }, - { - "epoch": 1.8005619588279145, - "grad_norm": 0.0012186251115053892, - "learning_rate": 0.00019999840148582547, - "loss": 46.0, - "step": 23550 - }, - { - "epoch": 1.8006384158113042, - "grad_norm": 0.0007826102082617581, - "learning_rate": 0.00019999840135001036, - "loss": 46.0, - "step": 23551 - }, - { - "epoch": 1.800714872794694, - "grad_norm": 0.0013373569818213582, - "learning_rate": 0.00019999840121418948, - "loss": 46.0, - "step": 23552 - }, - { - "epoch": 1.8007913297780838, - "grad_norm": 0.0004930038121528924, - "learning_rate": 0.00019999840107836283, - "loss": 46.0, - "step": 23553 - }, - { - "epoch": 1.8008677867614733, - "grad_norm": 0.0032909975852817297, - "learning_rate": 0.00019999840094253043, - "loss": 46.0, - "step": 23554 - }, - { - "epoch": 1.800944243744863, - "grad_norm": 0.00043303988059051335, - "learning_rate": 0.0001999984008066922, - "loss": 46.0, - "step": 23555 - }, - { - "epoch": 1.8010207007282528, - "grad_norm": 0.0006659660721197724, - "learning_rate": 0.00019999840067084826, - "loss": 46.0, - "step": 23556 - }, - { - "epoch": 1.8010971577116424, - "grad_norm": 0.0016180934617295861, - "learning_rate": 0.00019999840053499854, - "loss": 46.0, - "step": 23557 - }, - { - "epoch": 1.8011736146950321, - "grad_norm": 0.0027012594509869814, - "learning_rate": 0.00019999840039914305, - "loss": 46.0, - "step": 23558 - }, - { - "epoch": 1.8012500716784219, - "grad_norm": 0.003291347064077854, - "learning_rate": 0.00019999840026328175, - "loss": 46.0, - "step": 23559 - }, - { - "epoch": 1.8013265286618116, - "grad_norm": 0.0026221624575555325, - "learning_rate": 0.00019999840012741474, - "loss": 46.0, - "step": 23560 - }, - { - "epoch": 1.8014029856452014, - "grad_norm": 0.000646628555841744, - "learning_rate": 0.00019999839999154193, - "loss": 46.0, - "step": 23561 - }, - { - "epoch": 1.8014794426285912, - "grad_norm": 0.0007230503833852708, - "learning_rate": 0.00019999839985566334, - "loss": 46.0, - "step": 23562 - }, - { - "epoch": 1.801555899611981, - "grad_norm": 0.001434436533600092, - "learning_rate": 0.000199998399719779, - "loss": 46.0, - "step": 23563 - }, - { - "epoch": 1.8016323565953707, - "grad_norm": 0.0010613644262775779, - "learning_rate": 0.00019999839958388888, - "loss": 46.0, - "step": 23564 - }, - { - "epoch": 1.8017088135787602, - "grad_norm": 0.0015314940828830004, - "learning_rate": 0.000199998399447993, - "loss": 46.0, - "step": 23565 - }, - { - "epoch": 1.80178527056215, - "grad_norm": 0.0013053305447101593, - "learning_rate": 0.00019999839931209135, - "loss": 46.0, - "step": 23566 - }, - { - "epoch": 1.8018617275455395, - "grad_norm": 0.0005981647409498692, - "learning_rate": 0.00019999839917618392, - "loss": 46.0, - "step": 23567 - }, - { - "epoch": 1.8019381845289293, - "grad_norm": 0.0009032060042954981, - "learning_rate": 0.00019999839904027072, - "loss": 46.0, - "step": 23568 - }, - { - "epoch": 1.802014641512319, - "grad_norm": 0.0013273843796923757, - "learning_rate": 0.00019999839890435178, - "loss": 46.0, - "step": 23569 - }, - { - "epoch": 1.8020910984957088, - "grad_norm": 0.001075581181794405, - "learning_rate": 0.00019999839876842706, - "loss": 46.0, - "step": 23570 - }, - { - "epoch": 1.8021675554790986, - "grad_norm": 0.0008504153811372817, - "learning_rate": 0.00019999839863249656, - "loss": 46.0, - "step": 23571 - }, - { - "epoch": 1.8022440124624883, - "grad_norm": 0.0007186035509221256, - "learning_rate": 0.00019999839849656027, - "loss": 46.0, - "step": 23572 - }, - { - "epoch": 1.802320469445878, - "grad_norm": 0.0009708944126032293, - "learning_rate": 0.00019999839836061823, - "loss": 46.0, - "step": 23573 - }, - { - "epoch": 1.8023969264292679, - "grad_norm": 0.0009529021917842329, - "learning_rate": 0.00019999839822467045, - "loss": 46.0, - "step": 23574 - }, - { - "epoch": 1.8024733834126576, - "grad_norm": 0.001032594358548522, - "learning_rate": 0.00019999839808871686, - "loss": 46.0, - "step": 23575 - }, - { - "epoch": 1.8025498403960472, - "grad_norm": 0.0015712508466094732, - "learning_rate": 0.0001999983979527575, - "loss": 46.0, - "step": 23576 - }, - { - "epoch": 1.802626297379437, - "grad_norm": 0.004000730346888304, - "learning_rate": 0.0001999983978167924, - "loss": 46.0, - "step": 23577 - }, - { - "epoch": 1.8027027543628265, - "grad_norm": 0.0016209562309086323, - "learning_rate": 0.00019999839768082152, - "loss": 46.0, - "step": 23578 - }, - { - "epoch": 1.8027792113462162, - "grad_norm": 0.0018562778132036328, - "learning_rate": 0.00019999839754484487, - "loss": 46.0, - "step": 23579 - }, - { - "epoch": 1.802855668329606, - "grad_norm": 0.0016734784003347158, - "learning_rate": 0.00019999839740886244, - "loss": 46.0, - "step": 23580 - }, - { - "epoch": 1.8029321253129957, - "grad_norm": 0.0007380274473689497, - "learning_rate": 0.00019999839727287424, - "loss": 46.0, - "step": 23581 - }, - { - "epoch": 1.8030085822963855, - "grad_norm": 0.0011833934113383293, - "learning_rate": 0.00019999839713688027, - "loss": 46.0, - "step": 23582 - }, - { - "epoch": 1.8030850392797753, - "grad_norm": 0.0006618915940634906, - "learning_rate": 0.00019999839700088055, - "loss": 46.0, - "step": 23583 - }, - { - "epoch": 1.803161496263165, - "grad_norm": 0.0018449613125994802, - "learning_rate": 0.00019999839686487506, - "loss": 46.0, - "step": 23584 - }, - { - "epoch": 1.8032379532465548, - "grad_norm": 0.0027561953756958246, - "learning_rate": 0.00019999839672886377, - "loss": 46.0, - "step": 23585 - }, - { - "epoch": 1.8033144102299445, - "grad_norm": 0.006247633136808872, - "learning_rate": 0.00019999839659284673, - "loss": 46.0, - "step": 23586 - }, - { - "epoch": 1.803390867213334, - "grad_norm": 0.000633818272035569, - "learning_rate": 0.00019999839645682395, - "loss": 46.0, - "step": 23587 - }, - { - "epoch": 1.8034673241967238, - "grad_norm": 0.0013962165685370564, - "learning_rate": 0.00019999839632079533, - "loss": 46.0, - "step": 23588 - }, - { - "epoch": 1.8035437811801134, - "grad_norm": 0.0009096218855120242, - "learning_rate": 0.000199998396184761, - "loss": 46.0, - "step": 23589 - }, - { - "epoch": 1.8036202381635031, - "grad_norm": 0.0009115052525885403, - "learning_rate": 0.00019999839604872087, - "loss": 46.0, - "step": 23590 - }, - { - "epoch": 1.803696695146893, - "grad_norm": 0.003917634021490812, - "learning_rate": 0.000199998395912675, - "loss": 46.0, - "step": 23591 - }, - { - "epoch": 1.8037731521302827, - "grad_norm": 0.0016916001914069057, - "learning_rate": 0.00019999839577662334, - "loss": 46.0, - "step": 23592 - }, - { - "epoch": 1.8038496091136724, - "grad_norm": 0.0010094393510371447, - "learning_rate": 0.00019999839564056591, - "loss": 46.0, - "step": 23593 - }, - { - "epoch": 1.8039260660970622, - "grad_norm": 0.0007031704299151897, - "learning_rate": 0.00019999839550450272, - "loss": 46.0, - "step": 23594 - }, - { - "epoch": 1.804002523080452, - "grad_norm": 0.000712833134457469, - "learning_rate": 0.00019999839536843377, - "loss": 46.0, - "step": 23595 - }, - { - "epoch": 1.8040789800638417, - "grad_norm": 0.003031100146472454, - "learning_rate": 0.00019999839523235903, - "loss": 46.0, - "step": 23596 - }, - { - "epoch": 1.8041554370472312, - "grad_norm": 0.000821817375253886, - "learning_rate": 0.00019999839509627853, - "loss": 46.0, - "step": 23597 - }, - { - "epoch": 1.804231894030621, - "grad_norm": 0.0025261512491852045, - "learning_rate": 0.00019999839496019224, - "loss": 46.0, - "step": 23598 - }, - { - "epoch": 1.8043083510140108, - "grad_norm": 0.0014901439426466823, - "learning_rate": 0.0001999983948241002, - "loss": 46.0, - "step": 23599 - }, - { - "epoch": 1.8043848079974003, - "grad_norm": 0.0006671230075880885, - "learning_rate": 0.00019999839468800242, - "loss": 46.0, - "step": 23600 - }, - { - "epoch": 1.80446126498079, - "grad_norm": 0.0011447911383584142, - "learning_rate": 0.0001999983945518988, - "loss": 46.0, - "step": 23601 - }, - { - "epoch": 1.8045377219641798, - "grad_norm": 0.0018249230924993753, - "learning_rate": 0.00019999839441578948, - "loss": 46.0, - "step": 23602 - }, - { - "epoch": 1.8046141789475696, - "grad_norm": 0.006038696505129337, - "learning_rate": 0.00019999839427967434, - "loss": 46.0, - "step": 23603 - }, - { - "epoch": 1.8046906359309594, - "grad_norm": 0.0023688075598329306, - "learning_rate": 0.00019999839414355347, - "loss": 46.0, - "step": 23604 - }, - { - "epoch": 1.8047670929143491, - "grad_norm": 0.00037246185820549726, - "learning_rate": 0.00019999839400742681, - "loss": 46.0, - "step": 23605 - }, - { - "epoch": 1.8048435498977389, - "grad_norm": 0.0009016448748297989, - "learning_rate": 0.00019999839387129436, - "loss": 46.0, - "step": 23606 - }, - { - "epoch": 1.8049200068811286, - "grad_norm": 0.0030539450235664845, - "learning_rate": 0.0001999983937351562, - "loss": 46.0, - "step": 23607 - }, - { - "epoch": 1.8049964638645182, - "grad_norm": 0.0016823712503537536, - "learning_rate": 0.00019999839359901222, - "loss": 46.0, - "step": 23608 - }, - { - "epoch": 1.805072920847908, - "grad_norm": 0.007601631339639425, - "learning_rate": 0.00019999839346286248, - "loss": 46.0, - "step": 23609 - }, - { - "epoch": 1.8051493778312977, - "grad_norm": 0.0005124068120494485, - "learning_rate": 0.00019999839332670698, - "loss": 46.0, - "step": 23610 - }, - { - "epoch": 1.8052258348146872, - "grad_norm": 0.0030892896465957165, - "learning_rate": 0.00019999839319054572, - "loss": 46.0, - "step": 23611 - }, - { - "epoch": 1.805302291798077, - "grad_norm": 0.0022689029574394226, - "learning_rate": 0.00019999839305437865, - "loss": 46.0, - "step": 23612 - }, - { - "epoch": 1.8053787487814668, - "grad_norm": 0.0024960017763078213, - "learning_rate": 0.00019999839291820584, - "loss": 46.0, - "step": 23613 - }, - { - "epoch": 1.8054552057648565, - "grad_norm": 0.0002980822464451194, - "learning_rate": 0.00019999839278202726, - "loss": 46.0, - "step": 23614 - }, - { - "epoch": 1.8055316627482463, - "grad_norm": 0.000677394971717149, - "learning_rate": 0.0001999983926458429, - "loss": 46.0, - "step": 23615 - }, - { - "epoch": 1.805608119731636, - "grad_norm": 0.0016386767383664846, - "learning_rate": 0.0001999983925096528, - "loss": 46.0, - "step": 23616 - }, - { - "epoch": 1.8056845767150258, - "grad_norm": 0.0006867151241749525, - "learning_rate": 0.0001999983923734569, - "loss": 46.0, - "step": 23617 - }, - { - "epoch": 1.8057610336984156, - "grad_norm": 0.004006482660770416, - "learning_rate": 0.00019999839223725524, - "loss": 46.0, - "step": 23618 - }, - { - "epoch": 1.805837490681805, - "grad_norm": 0.0009820415871217847, - "learning_rate": 0.00019999839210104782, - "loss": 46.0, - "step": 23619 - }, - { - "epoch": 1.8059139476651949, - "grad_norm": 0.000442410382675007, - "learning_rate": 0.00019999839196483462, - "loss": 46.0, - "step": 23620 - }, - { - "epoch": 1.8059904046485846, - "grad_norm": 0.0007473917794413865, - "learning_rate": 0.00019999839182861565, - "loss": 46.0, - "step": 23621 - }, - { - "epoch": 1.8060668616319742, - "grad_norm": 0.003018592484295368, - "learning_rate": 0.00019999839169239093, - "loss": 46.0, - "step": 23622 - }, - { - "epoch": 1.806143318615364, - "grad_norm": 0.0009581079357303679, - "learning_rate": 0.0001999983915561604, - "loss": 46.0, - "step": 23623 - }, - { - "epoch": 1.8062197755987537, - "grad_norm": 0.0025521027855575085, - "learning_rate": 0.00019999839141992415, - "loss": 46.0, - "step": 23624 - }, - { - "epoch": 1.8062962325821434, - "grad_norm": 0.0013378465082496405, - "learning_rate": 0.00019999839128368208, - "loss": 46.0, - "step": 23625 - }, - { - "epoch": 1.8063726895655332, - "grad_norm": 0.011976220645010471, - "learning_rate": 0.00019999839114743427, - "loss": 46.0, - "step": 23626 - }, - { - "epoch": 1.806449146548923, - "grad_norm": 0.0006340249092318118, - "learning_rate": 0.0001999983910111807, - "loss": 46.0, - "step": 23627 - }, - { - "epoch": 1.8065256035323127, - "grad_norm": 0.000979077536612749, - "learning_rate": 0.00019999839087492133, - "loss": 46.0, - "step": 23628 - }, - { - "epoch": 1.8066020605157025, - "grad_norm": 0.0005895447684451938, - "learning_rate": 0.00019999839073865623, - "loss": 46.0, - "step": 23629 - }, - { - "epoch": 1.806678517499092, - "grad_norm": 0.00047078015631996095, - "learning_rate": 0.00019999839060238532, - "loss": 46.0, - "step": 23630 - }, - { - "epoch": 1.8067549744824818, - "grad_norm": 0.006959778722375631, - "learning_rate": 0.00019999839046610867, - "loss": 46.0, - "step": 23631 - }, - { - "epoch": 1.8068314314658716, - "grad_norm": 0.0007475718157365918, - "learning_rate": 0.00019999839032982625, - "loss": 46.0, - "step": 23632 - }, - { - "epoch": 1.806907888449261, - "grad_norm": 0.0008762756478972733, - "learning_rate": 0.00019999839019353802, - "loss": 46.0, - "step": 23633 - }, - { - "epoch": 1.8069843454326509, - "grad_norm": 0.0019413064001128078, - "learning_rate": 0.00019999839005724405, - "loss": 46.0, - "step": 23634 - }, - { - "epoch": 1.8070608024160406, - "grad_norm": 0.0014487196458503604, - "learning_rate": 0.00019999838992094433, - "loss": 46.0, - "step": 23635 - }, - { - "epoch": 1.8071372593994304, - "grad_norm": 0.0013770927907899022, - "learning_rate": 0.00019999838978463882, - "loss": 46.0, - "step": 23636 - }, - { - "epoch": 1.8072137163828201, - "grad_norm": 0.0015888100024312735, - "learning_rate": 0.00019999838964832755, - "loss": 46.0, - "step": 23637 - }, - { - "epoch": 1.80729017336621, - "grad_norm": 0.0005667250952683389, - "learning_rate": 0.00019999838951201051, - "loss": 46.0, - "step": 23638 - }, - { - "epoch": 1.8073666303495997, - "grad_norm": 0.0010357708670198917, - "learning_rate": 0.00019999838937568768, - "loss": 46.0, - "step": 23639 - }, - { - "epoch": 1.8074430873329894, - "grad_norm": 0.0009489455260336399, - "learning_rate": 0.0001999983892393591, - "loss": 46.0, - "step": 23640 - }, - { - "epoch": 1.807519544316379, - "grad_norm": 0.000291152362478897, - "learning_rate": 0.00019999838910302474, - "loss": 46.0, - "step": 23641 - }, - { - "epoch": 1.8075960012997687, - "grad_norm": 0.0016290185740217566, - "learning_rate": 0.0001999983889666846, - "loss": 46.0, - "step": 23642 - }, - { - "epoch": 1.8076724582831585, - "grad_norm": 0.0036848685704171658, - "learning_rate": 0.00019999838883033873, - "loss": 46.0, - "step": 23643 - }, - { - "epoch": 1.807748915266548, - "grad_norm": 0.0005374210304580629, - "learning_rate": 0.00019999838869398705, - "loss": 46.0, - "step": 23644 - }, - { - "epoch": 1.8078253722499378, - "grad_norm": 0.008087070658802986, - "learning_rate": 0.00019999838855762963, - "loss": 46.0, - "step": 23645 - }, - { - "epoch": 1.8079018292333275, - "grad_norm": 0.0008488206076435745, - "learning_rate": 0.00019999838842126643, - "loss": 46.0, - "step": 23646 - }, - { - "epoch": 1.8079782862167173, - "grad_norm": 0.0009354506619274616, - "learning_rate": 0.00019999838828489746, - "loss": 46.0, - "step": 23647 - }, - { - "epoch": 1.808054743200107, - "grad_norm": 0.002079383935779333, - "learning_rate": 0.0001999983881485227, - "loss": 46.0, - "step": 23648 - }, - { - "epoch": 1.8081312001834968, - "grad_norm": 0.0024932650849223137, - "learning_rate": 0.0001999983880121422, - "loss": 46.0, - "step": 23649 - }, - { - "epoch": 1.8082076571668866, - "grad_norm": 0.0020582699216902256, - "learning_rate": 0.0001999983878757559, - "loss": 46.0, - "step": 23650 - }, - { - "epoch": 1.8082841141502763, - "grad_norm": 0.0011367028346285224, - "learning_rate": 0.00019999838773936387, - "loss": 46.0, - "step": 23651 - }, - { - "epoch": 1.8083605711336659, - "grad_norm": 0.0027940308209508657, - "learning_rate": 0.00019999838760296609, - "loss": 46.0, - "step": 23652 - }, - { - "epoch": 1.8084370281170556, - "grad_norm": 0.002253140090033412, - "learning_rate": 0.00019999838746656247, - "loss": 46.0, - "step": 23653 - }, - { - "epoch": 1.8085134851004454, - "grad_norm": 0.0011912768241018057, - "learning_rate": 0.0001999983873301531, - "loss": 46.0, - "step": 23654 - }, - { - "epoch": 1.808589942083835, - "grad_norm": 0.003485002089291811, - "learning_rate": 0.00019999838719373801, - "loss": 46.0, - "step": 23655 - }, - { - "epoch": 1.8086663990672247, - "grad_norm": 0.001128022326156497, - "learning_rate": 0.00019999838705731708, - "loss": 46.0, - "step": 23656 - }, - { - "epoch": 1.8087428560506145, - "grad_norm": 0.0007546091801486909, - "learning_rate": 0.00019999838692089043, - "loss": 46.0, - "step": 23657 - }, - { - "epoch": 1.8088193130340042, - "grad_norm": 0.0011466775322332978, - "learning_rate": 0.000199998386784458, - "loss": 46.0, - "step": 23658 - }, - { - "epoch": 1.808895770017394, - "grad_norm": 0.001339720212854445, - "learning_rate": 0.00019999838664801979, - "loss": 46.0, - "step": 23659 - }, - { - "epoch": 1.8089722270007838, - "grad_norm": 0.0019825256895273924, - "learning_rate": 0.00019999838651157582, - "loss": 46.0, - "step": 23660 - }, - { - "epoch": 1.8090486839841735, - "grad_norm": 0.0009286838467232883, - "learning_rate": 0.00019999838637512607, - "loss": 46.0, - "step": 23661 - }, - { - "epoch": 1.8091251409675633, - "grad_norm": 0.0008940558764152229, - "learning_rate": 0.00019999838623867055, - "loss": 46.0, - "step": 23662 - }, - { - "epoch": 1.8092015979509528, - "grad_norm": 0.00195328495465219, - "learning_rate": 0.0001999983861022093, - "loss": 46.0, - "step": 23663 - }, - { - "epoch": 1.8092780549343426, - "grad_norm": 0.0006442503654398024, - "learning_rate": 0.00019999838596574225, - "loss": 46.0, - "step": 23664 - }, - { - "epoch": 1.8093545119177323, - "grad_norm": 0.0006528267404064536, - "learning_rate": 0.00019999838582926942, - "loss": 46.0, - "step": 23665 - }, - { - "epoch": 1.8094309689011219, - "grad_norm": 0.0006126930238679051, - "learning_rate": 0.00019999838569279083, - "loss": 46.0, - "step": 23666 - }, - { - "epoch": 1.8095074258845116, - "grad_norm": 0.00026850542053580284, - "learning_rate": 0.00019999838555630648, - "loss": 46.0, - "step": 23667 - }, - { - "epoch": 1.8095838828679014, - "grad_norm": 0.0020031880121678114, - "learning_rate": 0.00019999838541981635, - "loss": 46.0, - "step": 23668 - }, - { - "epoch": 1.8096603398512912, - "grad_norm": 0.001533976523205638, - "learning_rate": 0.00019999838528332044, - "loss": 46.0, - "step": 23669 - }, - { - "epoch": 1.809736796834681, - "grad_norm": 0.0004952155286446214, - "learning_rate": 0.0001999983851468188, - "loss": 46.0, - "step": 23670 - }, - { - "epoch": 1.8098132538180707, - "grad_norm": 0.0010664340807124972, - "learning_rate": 0.00019999838501031137, - "loss": 46.0, - "step": 23671 - }, - { - "epoch": 1.8098897108014604, - "grad_norm": 0.006680730730295181, - "learning_rate": 0.00019999838487379815, - "loss": 46.0, - "step": 23672 - }, - { - "epoch": 1.8099661677848502, - "grad_norm": 0.002702244557440281, - "learning_rate": 0.00019999838473727918, - "loss": 46.0, - "step": 23673 - }, - { - "epoch": 1.8100426247682397, - "grad_norm": 0.0005602518795058131, - "learning_rate": 0.00019999838460075443, - "loss": 46.0, - "step": 23674 - }, - { - "epoch": 1.8101190817516295, - "grad_norm": 0.0016347015043720603, - "learning_rate": 0.00019999838446422392, - "loss": 46.0, - "step": 23675 - }, - { - "epoch": 1.8101955387350193, - "grad_norm": 0.0038165065925568342, - "learning_rate": 0.00019999838432768763, - "loss": 46.0, - "step": 23676 - }, - { - "epoch": 1.8102719957184088, - "grad_norm": 0.0006928627844899893, - "learning_rate": 0.0001999983841911456, - "loss": 46.0, - "step": 23677 - }, - { - "epoch": 1.8103484527017986, - "grad_norm": 0.0009190915152430534, - "learning_rate": 0.00019999838405459775, - "loss": 46.0, - "step": 23678 - }, - { - "epoch": 1.8104249096851883, - "grad_norm": 0.0008778053452260792, - "learning_rate": 0.00019999838391804414, - "loss": 46.0, - "step": 23679 - }, - { - "epoch": 1.810501366668578, - "grad_norm": 0.0013824475463479757, - "learning_rate": 0.00019999838378148481, - "loss": 46.0, - "step": 23680 - }, - { - "epoch": 1.8105778236519678, - "grad_norm": 0.0032797250896692276, - "learning_rate": 0.00019999838364491968, - "loss": 46.0, - "step": 23681 - }, - { - "epoch": 1.8106542806353576, - "grad_norm": 0.0006235101027414203, - "learning_rate": 0.00019999838350834878, - "loss": 46.0, - "step": 23682 - }, - { - "epoch": 1.8107307376187474, - "grad_norm": 0.001360720139928162, - "learning_rate": 0.0001999983833717721, - "loss": 46.0, - "step": 23683 - }, - { - "epoch": 1.8108071946021371, - "grad_norm": 0.003522517392411828, - "learning_rate": 0.00019999838323518965, - "loss": 46.0, - "step": 23684 - }, - { - "epoch": 1.8108836515855267, - "grad_norm": 0.0017405670369043946, - "learning_rate": 0.00019999838309860146, - "loss": 46.0, - "step": 23685 - }, - { - "epoch": 1.8109601085689164, - "grad_norm": 0.0013798840809613466, - "learning_rate": 0.0001999983829620075, - "loss": 46.0, - "step": 23686 - }, - { - "epoch": 1.8110365655523062, - "grad_norm": 0.002594240475445986, - "learning_rate": 0.00019999838282540775, - "loss": 46.0, - "step": 23687 - }, - { - "epoch": 1.8111130225356957, - "grad_norm": 0.0012301730457693338, - "learning_rate": 0.00019999838268880226, - "loss": 46.0, - "step": 23688 - }, - { - "epoch": 1.8111894795190855, - "grad_norm": 0.0006875758990645409, - "learning_rate": 0.00019999838255219094, - "loss": 46.0, - "step": 23689 - }, - { - "epoch": 1.8112659365024752, - "grad_norm": 0.0044905696995556355, - "learning_rate": 0.0001999983824155739, - "loss": 46.0, - "step": 23690 - }, - { - "epoch": 1.811342393485865, - "grad_norm": 0.001636273693293333, - "learning_rate": 0.00019999838227895107, - "loss": 46.0, - "step": 23691 - }, - { - "epoch": 1.8114188504692548, - "grad_norm": 0.0005134460516273975, - "learning_rate": 0.00019999838214232248, - "loss": 46.0, - "step": 23692 - }, - { - "epoch": 1.8114953074526445, - "grad_norm": 0.0010170018067583442, - "learning_rate": 0.00019999838200568813, - "loss": 46.0, - "step": 23693 - }, - { - "epoch": 1.8115717644360343, - "grad_norm": 0.0015770571772009134, - "learning_rate": 0.00019999838186904797, - "loss": 46.0, - "step": 23694 - }, - { - "epoch": 1.811648221419424, - "grad_norm": 0.0012213855516165495, - "learning_rate": 0.0001999983817324021, - "loss": 46.0, - "step": 23695 - }, - { - "epoch": 1.8117246784028136, - "grad_norm": 0.0008021800895221531, - "learning_rate": 0.00019999838159575042, - "loss": 46.0, - "step": 23696 - }, - { - "epoch": 1.8118011353862034, - "grad_norm": 0.001608015038073063, - "learning_rate": 0.00019999838145909297, - "loss": 46.0, - "step": 23697 - }, - { - "epoch": 1.811877592369593, - "grad_norm": 0.0005111222271807492, - "learning_rate": 0.00019999838132242977, - "loss": 46.0, - "step": 23698 - }, - { - "epoch": 1.8119540493529827, - "grad_norm": 0.0006768002058379352, - "learning_rate": 0.0001999983811857608, - "loss": 46.0, - "step": 23699 - }, - { - "epoch": 1.8120305063363724, - "grad_norm": 0.002105721039697528, - "learning_rate": 0.00019999838104908606, - "loss": 46.0, - "step": 23700 - }, - { - "epoch": 1.8121069633197622, - "grad_norm": 0.0019101414363831282, - "learning_rate": 0.00019999838091240552, - "loss": 46.0, - "step": 23701 - }, - { - "epoch": 1.812183420303152, - "grad_norm": 0.001791138551197946, - "learning_rate": 0.00019999838077571926, - "loss": 46.0, - "step": 23702 - }, - { - "epoch": 1.8122598772865417, - "grad_norm": 0.0013671392807736993, - "learning_rate": 0.0001999983806390272, - "loss": 46.0, - "step": 23703 - }, - { - "epoch": 1.8123363342699315, - "grad_norm": 0.0011461729882284999, - "learning_rate": 0.00019999838050232938, - "loss": 46.0, - "step": 23704 - }, - { - "epoch": 1.8124127912533212, - "grad_norm": 0.000511440506670624, - "learning_rate": 0.00019999838036562577, - "loss": 46.0, - "step": 23705 - }, - { - "epoch": 1.812489248236711, - "grad_norm": 0.0007339512812905014, - "learning_rate": 0.00019999838022891642, - "loss": 46.0, - "step": 23706 - }, - { - "epoch": 1.8125657052201005, - "grad_norm": 0.001765978173352778, - "learning_rate": 0.00019999838009220126, - "loss": 46.0, - "step": 23707 - }, - { - "epoch": 1.8126421622034903, - "grad_norm": 0.00021233840379863977, - "learning_rate": 0.00019999837995548036, - "loss": 46.0, - "step": 23708 - }, - { - "epoch": 1.8127186191868798, - "grad_norm": 0.0006821598508395255, - "learning_rate": 0.0001999983798187537, - "loss": 46.0, - "step": 23709 - }, - { - "epoch": 1.8127950761702696, - "grad_norm": 0.0004592933692038059, - "learning_rate": 0.00019999837968202126, - "loss": 46.0, - "step": 23710 - }, - { - "epoch": 1.8128715331536593, - "grad_norm": 0.0021277684718370438, - "learning_rate": 0.00019999837954528307, - "loss": 46.0, - "step": 23711 - }, - { - "epoch": 1.812947990137049, - "grad_norm": 0.0005421715322881937, - "learning_rate": 0.00019999837940853907, - "loss": 46.0, - "step": 23712 - }, - { - "epoch": 1.8130244471204389, - "grad_norm": 0.0009878689888864756, - "learning_rate": 0.00019999837927178933, - "loss": 46.0, - "step": 23713 - }, - { - "epoch": 1.8131009041038286, - "grad_norm": 0.0016195743810385466, - "learning_rate": 0.0001999983791350338, - "loss": 46.0, - "step": 23714 - }, - { - "epoch": 1.8131773610872184, - "grad_norm": 0.0005191999371163547, - "learning_rate": 0.00019999837899827255, - "loss": 46.0, - "step": 23715 - }, - { - "epoch": 1.8132538180706081, - "grad_norm": 0.0020176684483885765, - "learning_rate": 0.00019999837886150546, - "loss": 46.0, - "step": 23716 - }, - { - "epoch": 1.813330275053998, - "grad_norm": 0.0008017811924219131, - "learning_rate": 0.00019999837872473265, - "loss": 46.0, - "step": 23717 - }, - { - "epoch": 1.8134067320373874, - "grad_norm": 0.0006969118840061128, - "learning_rate": 0.00019999837858795404, - "loss": 46.0, - "step": 23718 - }, - { - "epoch": 1.8134831890207772, - "grad_norm": 0.000578109291382134, - "learning_rate": 0.00019999837845116969, - "loss": 46.0, - "step": 23719 - }, - { - "epoch": 1.8135596460041667, - "grad_norm": 0.000796251988504082, - "learning_rate": 0.00019999837831437956, - "loss": 46.0, - "step": 23720 - }, - { - "epoch": 1.8136361029875565, - "grad_norm": 0.00046822612057439983, - "learning_rate": 0.00019999837817758365, - "loss": 46.0, - "step": 23721 - }, - { - "epoch": 1.8137125599709463, - "grad_norm": 0.001055878004990518, - "learning_rate": 0.00019999837804078195, - "loss": 46.0, - "step": 23722 - }, - { - "epoch": 1.813789016954336, - "grad_norm": 0.0005453979247249663, - "learning_rate": 0.00019999837790397453, - "loss": 46.0, - "step": 23723 - }, - { - "epoch": 1.8138654739377258, - "grad_norm": 0.0005892764893360436, - "learning_rate": 0.00019999837776716133, - "loss": 46.0, - "step": 23724 - }, - { - "epoch": 1.8139419309211156, - "grad_norm": 0.0007359874434769154, - "learning_rate": 0.00019999837763034234, - "loss": 46.0, - "step": 23725 - }, - { - "epoch": 1.8140183879045053, - "grad_norm": 0.003332388587296009, - "learning_rate": 0.0001999983774935176, - "loss": 46.0, - "step": 23726 - }, - { - "epoch": 1.814094844887895, - "grad_norm": 0.0012405895395204425, - "learning_rate": 0.00019999837735668705, - "loss": 46.0, - "step": 23727 - }, - { - "epoch": 1.8141713018712846, - "grad_norm": 0.0016399107407778502, - "learning_rate": 0.00019999837721985077, - "loss": 46.0, - "step": 23728 - }, - { - "epoch": 1.8142477588546744, - "grad_norm": 0.0019913967698812485, - "learning_rate": 0.00019999837708300873, - "loss": 46.0, - "step": 23729 - }, - { - "epoch": 1.8143242158380641, - "grad_norm": 0.0007014106377027929, - "learning_rate": 0.0001999983769461609, - "loss": 46.0, - "step": 23730 - }, - { - "epoch": 1.8144006728214537, - "grad_norm": 0.0024441899731755257, - "learning_rate": 0.0001999983768093073, - "loss": 46.0, - "step": 23731 - }, - { - "epoch": 1.8144771298048434, - "grad_norm": 0.001906158053316176, - "learning_rate": 0.00019999837667244793, - "loss": 46.0, - "step": 23732 - }, - { - "epoch": 1.8145535867882332, - "grad_norm": 0.0023151030763983727, - "learning_rate": 0.0001999983765355828, - "loss": 46.0, - "step": 23733 - }, - { - "epoch": 1.814630043771623, - "grad_norm": 0.0024479578714817762, - "learning_rate": 0.00019999837639871187, - "loss": 46.0, - "step": 23734 - }, - { - "epoch": 1.8147065007550127, - "grad_norm": 0.002640495542436838, - "learning_rate": 0.0001999983762618352, - "loss": 46.0, - "step": 23735 - }, - { - "epoch": 1.8147829577384025, - "grad_norm": 0.0013096173061057925, - "learning_rate": 0.00019999837612495278, - "loss": 46.0, - "step": 23736 - }, - { - "epoch": 1.8148594147217922, - "grad_norm": 0.0009974492713809013, - "learning_rate": 0.00019999837598806455, - "loss": 46.0, - "step": 23737 - }, - { - "epoch": 1.814935871705182, - "grad_norm": 0.0005888328305445611, - "learning_rate": 0.00019999837585117059, - "loss": 46.0, - "step": 23738 - }, - { - "epoch": 1.8150123286885715, - "grad_norm": 0.0010382991749793291, - "learning_rate": 0.00019999837571427082, - "loss": 46.0, - "step": 23739 - }, - { - "epoch": 1.8150887856719613, - "grad_norm": 0.0019064415246248245, - "learning_rate": 0.0001999983755773653, - "loss": 46.0, - "step": 23740 - }, - { - "epoch": 1.815165242655351, - "grad_norm": 0.0023607865441590548, - "learning_rate": 0.000199998375440454, - "loss": 46.0, - "step": 23741 - }, - { - "epoch": 1.8152416996387406, - "grad_norm": 0.001305295154452324, - "learning_rate": 0.00019999837530353695, - "loss": 46.0, - "step": 23742 - }, - { - "epoch": 1.8153181566221304, - "grad_norm": 0.002001626882702112, - "learning_rate": 0.00019999837516661412, - "loss": 46.0, - "step": 23743 - }, - { - "epoch": 1.8153946136055201, - "grad_norm": 0.00202653044834733, - "learning_rate": 0.00019999837502968553, - "loss": 46.0, - "step": 23744 - }, - { - "epoch": 1.8154710705889099, - "grad_norm": 0.0006119931931607425, - "learning_rate": 0.00019999837489275115, - "loss": 46.0, - "step": 23745 - }, - { - "epoch": 1.8155475275722996, - "grad_norm": 0.000539101311005652, - "learning_rate": 0.000199998374755811, - "loss": 46.0, - "step": 23746 - }, - { - "epoch": 1.8156239845556894, - "grad_norm": 0.00047082730452530086, - "learning_rate": 0.0001999983746188651, - "loss": 46.0, - "step": 23747 - }, - { - "epoch": 1.8157004415390792, - "grad_norm": 0.0008020388195291162, - "learning_rate": 0.00019999837448191342, - "loss": 46.0, - "step": 23748 - }, - { - "epoch": 1.815776898522469, - "grad_norm": 0.0007263301522471011, - "learning_rate": 0.000199998374344956, - "loss": 46.0, - "step": 23749 - }, - { - "epoch": 1.8158533555058585, - "grad_norm": 0.002049210947006941, - "learning_rate": 0.00019999837420799278, - "loss": 46.0, - "step": 23750 - }, - { - "epoch": 1.8159298124892482, - "grad_norm": 0.0015929215587675571, - "learning_rate": 0.0001999983740710238, - "loss": 46.0, - "step": 23751 - }, - { - "epoch": 1.816006269472638, - "grad_norm": 0.001869912026450038, - "learning_rate": 0.00019999837393404904, - "loss": 46.0, - "step": 23752 - }, - { - "epoch": 1.8160827264560275, - "grad_norm": 0.003016539616510272, - "learning_rate": 0.0001999983737970685, - "loss": 46.0, - "step": 23753 - }, - { - "epoch": 1.8161591834394173, - "grad_norm": 0.0009548344532959163, - "learning_rate": 0.0001999983736600822, - "loss": 46.0, - "step": 23754 - }, - { - "epoch": 1.816235640422807, - "grad_norm": 0.0027448381297290325, - "learning_rate": 0.00019999837352309015, - "loss": 46.0, - "step": 23755 - }, - { - "epoch": 1.8163120974061968, - "grad_norm": 0.0005462928093038499, - "learning_rate": 0.00019999837338609234, - "loss": 46.0, - "step": 23756 - }, - { - "epoch": 1.8163885543895866, - "grad_norm": 0.0006976991426199675, - "learning_rate": 0.00019999837324908873, - "loss": 46.0, - "step": 23757 - }, - { - "epoch": 1.8164650113729763, - "grad_norm": 0.0005639574374072254, - "learning_rate": 0.00019999837311207935, - "loss": 46.0, - "step": 23758 - }, - { - "epoch": 1.816541468356366, - "grad_norm": 0.0006333451601676643, - "learning_rate": 0.0001999983729750642, - "loss": 46.0, - "step": 23759 - }, - { - "epoch": 1.8166179253397559, - "grad_norm": 0.0009449411300010979, - "learning_rate": 0.00019999837283804332, - "loss": 46.0, - "step": 23760 - }, - { - "epoch": 1.8166943823231454, - "grad_norm": 0.0005404514377005398, - "learning_rate": 0.00019999837270101662, - "loss": 46.0, - "step": 23761 - }, - { - "epoch": 1.8167708393065352, - "grad_norm": 0.001246896805241704, - "learning_rate": 0.0001999983725639842, - "loss": 46.0, - "step": 23762 - }, - { - "epoch": 1.816847296289925, - "grad_norm": 0.0012557493755593896, - "learning_rate": 0.00019999837242694598, - "loss": 46.0, - "step": 23763 - }, - { - "epoch": 1.8169237532733145, - "grad_norm": 0.0015247507253661752, - "learning_rate": 0.00019999837228990198, - "loss": 46.0, - "step": 23764 - }, - { - "epoch": 1.8170002102567042, - "grad_norm": 0.0003656579356174916, - "learning_rate": 0.00019999837215285221, - "loss": 46.0, - "step": 23765 - }, - { - "epoch": 1.817076667240094, - "grad_norm": 0.0004148459993302822, - "learning_rate": 0.0001999983720157967, - "loss": 46.0, - "step": 23766 - }, - { - "epoch": 1.8171531242234837, - "grad_norm": 0.003125576302409172, - "learning_rate": 0.0001999983718787354, - "loss": 46.0, - "step": 23767 - }, - { - "epoch": 1.8172295812068735, - "grad_norm": 0.0005042135016992688, - "learning_rate": 0.00019999837174166832, - "loss": 46.0, - "step": 23768 - }, - { - "epoch": 1.8173060381902633, - "grad_norm": 0.0031226954888552427, - "learning_rate": 0.00019999837160459552, - "loss": 46.0, - "step": 23769 - }, - { - "epoch": 1.817382495173653, - "grad_norm": 0.0008610028307884932, - "learning_rate": 0.0001999983714675169, - "loss": 46.0, - "step": 23770 - }, - { - "epoch": 1.8174589521570428, - "grad_norm": 0.004178045317530632, - "learning_rate": 0.00019999837133043253, - "loss": 46.0, - "step": 23771 - }, - { - "epoch": 1.8175354091404323, - "grad_norm": 0.004781307652592659, - "learning_rate": 0.0001999983711933424, - "loss": 46.0, - "step": 23772 - }, - { - "epoch": 1.817611866123822, - "grad_norm": 0.0005874600610695779, - "learning_rate": 0.00019999837105624647, - "loss": 46.0, - "step": 23773 - }, - { - "epoch": 1.8176883231072118, - "grad_norm": 0.0006655587349087, - "learning_rate": 0.00019999837091914482, - "loss": 46.0, - "step": 23774 - }, - { - "epoch": 1.8177647800906014, - "grad_norm": 0.0015199481276795268, - "learning_rate": 0.00019999837078203735, - "loss": 46.0, - "step": 23775 - }, - { - "epoch": 1.8178412370739911, - "grad_norm": 0.0008756067254580557, - "learning_rate": 0.00019999837064492413, - "loss": 46.0, - "step": 23776 - }, - { - "epoch": 1.817917694057381, - "grad_norm": 0.0008462730911560357, - "learning_rate": 0.00019999837050780513, - "loss": 46.0, - "step": 23777 - }, - { - "epoch": 1.8179941510407707, - "grad_norm": 0.000778833928052336, - "learning_rate": 0.0001999983703706804, - "loss": 46.0, - "step": 23778 - }, - { - "epoch": 1.8180706080241604, - "grad_norm": 0.00037754737422801554, - "learning_rate": 0.00019999837023354985, - "loss": 46.0, - "step": 23779 - }, - { - "epoch": 1.8181470650075502, - "grad_norm": 0.0007783666951581836, - "learning_rate": 0.00019999837009641356, - "loss": 46.0, - "step": 23780 - }, - { - "epoch": 1.81822352199094, - "grad_norm": 0.004791202023625374, - "learning_rate": 0.0001999983699592715, - "loss": 46.0, - "step": 23781 - }, - { - "epoch": 1.8182999789743297, - "grad_norm": 0.0006258657667785883, - "learning_rate": 0.00019999836982212367, - "loss": 46.0, - "step": 23782 - }, - { - "epoch": 1.8183764359577193, - "grad_norm": 0.001412646030075848, - "learning_rate": 0.00019999836968497006, - "loss": 46.0, - "step": 23783 - }, - { - "epoch": 1.818452892941109, - "grad_norm": 0.0013155157212167978, - "learning_rate": 0.0001999983695478107, - "loss": 46.0, - "step": 23784 - }, - { - "epoch": 1.8185293499244988, - "grad_norm": 0.0030480362474918365, - "learning_rate": 0.00019999836941064555, - "loss": 46.0, - "step": 23785 - }, - { - "epoch": 1.8186058069078883, - "grad_norm": 0.001286276150494814, - "learning_rate": 0.00019999836927347465, - "loss": 46.0, - "step": 23786 - }, - { - "epoch": 1.818682263891278, - "grad_norm": 0.0009965029312297702, - "learning_rate": 0.00019999836913629798, - "loss": 46.0, - "step": 23787 - }, - { - "epoch": 1.8187587208746678, - "grad_norm": 0.00161589658819139, - "learning_rate": 0.00019999836899911553, - "loss": 46.0, - "step": 23788 - }, - { - "epoch": 1.8188351778580576, - "grad_norm": 0.000690197222866118, - "learning_rate": 0.00019999836886192728, - "loss": 46.0, - "step": 23789 - }, - { - "epoch": 1.8189116348414474, - "grad_norm": 0.0029922593384981155, - "learning_rate": 0.0001999983687247333, - "loss": 46.0, - "step": 23790 - }, - { - "epoch": 1.8189880918248371, - "grad_norm": 0.0005037240334786475, - "learning_rate": 0.00019999836858753355, - "loss": 46.0, - "step": 23791 - }, - { - "epoch": 1.8190645488082269, - "grad_norm": 0.0010681712301447988, - "learning_rate": 0.00019999836845032803, - "loss": 46.0, - "step": 23792 - }, - { - "epoch": 1.8191410057916166, - "grad_norm": 0.002176741138100624, - "learning_rate": 0.00019999836831311672, - "loss": 46.0, - "step": 23793 - }, - { - "epoch": 1.8192174627750062, - "grad_norm": 0.0037860942538827658, - "learning_rate": 0.00019999836817589966, - "loss": 46.0, - "step": 23794 - }, - { - "epoch": 1.819293919758396, - "grad_norm": 0.0011033108457922935, - "learning_rate": 0.00019999836803867682, - "loss": 46.0, - "step": 23795 - }, - { - "epoch": 1.8193703767417857, - "grad_norm": 0.0020454039331525564, - "learning_rate": 0.00019999836790144822, - "loss": 46.0, - "step": 23796 - }, - { - "epoch": 1.8194468337251752, - "grad_norm": 0.0002975747629534453, - "learning_rate": 0.00019999836776421384, - "loss": 46.0, - "step": 23797 - }, - { - "epoch": 1.819523290708565, - "grad_norm": 0.0005273958086036146, - "learning_rate": 0.0001999983676269737, - "loss": 46.0, - "step": 23798 - }, - { - "epoch": 1.8195997476919548, - "grad_norm": 0.0015240439679473639, - "learning_rate": 0.00019999836748972778, - "loss": 46.0, - "step": 23799 - }, - { - "epoch": 1.8196762046753445, - "grad_norm": 0.0007435697480104864, - "learning_rate": 0.00019999836735247608, - "loss": 46.0, - "step": 23800 - }, - { - "epoch": 1.8197526616587343, - "grad_norm": 0.0017040574457496405, - "learning_rate": 0.00019999836721521863, - "loss": 46.0, - "step": 23801 - }, - { - "epoch": 1.819829118642124, - "grad_norm": 0.0033538327552378178, - "learning_rate": 0.0001999983670779554, - "loss": 46.0, - "step": 23802 - }, - { - "epoch": 1.8199055756255138, - "grad_norm": 0.0022707749158143997, - "learning_rate": 0.00019999836694068645, - "loss": 46.0, - "step": 23803 - }, - { - "epoch": 1.8199820326089036, - "grad_norm": 0.001140329404734075, - "learning_rate": 0.00019999836680341165, - "loss": 46.0, - "step": 23804 - }, - { - "epoch": 1.820058489592293, - "grad_norm": 0.0039946772158145905, - "learning_rate": 0.00019999836666613114, - "loss": 46.0, - "step": 23805 - }, - { - "epoch": 1.8201349465756829, - "grad_norm": 0.0009491947712376714, - "learning_rate": 0.00019999836652884482, - "loss": 46.0, - "step": 23806 - }, - { - "epoch": 1.8202114035590726, - "grad_norm": 0.0009418984409421682, - "learning_rate": 0.0001999983663915528, - "loss": 46.0, - "step": 23807 - }, - { - "epoch": 1.8202878605424622, - "grad_norm": 0.0007848025416024029, - "learning_rate": 0.00019999836625425493, - "loss": 46.0, - "step": 23808 - }, - { - "epoch": 1.820364317525852, - "grad_norm": 0.0018619188340380788, - "learning_rate": 0.00019999836611695132, - "loss": 46.0, - "step": 23809 - }, - { - "epoch": 1.8204407745092417, - "grad_norm": 0.0036131732631474733, - "learning_rate": 0.00019999836597964194, - "loss": 46.0, - "step": 23810 - }, - { - "epoch": 1.8205172314926314, - "grad_norm": 0.0039264364168047905, - "learning_rate": 0.00019999836584232682, - "loss": 46.0, - "step": 23811 - }, - { - "epoch": 1.8205936884760212, - "grad_norm": 0.002276131883263588, - "learning_rate": 0.00019999836570500586, - "loss": 46.0, - "step": 23812 - }, - { - "epoch": 1.820670145459411, - "grad_norm": 0.004367159213870764, - "learning_rate": 0.0001999983655676792, - "loss": 46.0, - "step": 23813 - }, - { - "epoch": 1.8207466024428007, - "grad_norm": 0.0037661665119230747, - "learning_rate": 0.00019999836543034674, - "loss": 46.0, - "step": 23814 - }, - { - "epoch": 1.8208230594261905, - "grad_norm": 0.0037466511130332947, - "learning_rate": 0.00019999836529300852, - "loss": 46.0, - "step": 23815 - }, - { - "epoch": 1.82089951640958, - "grad_norm": 0.0008819775539450347, - "learning_rate": 0.00019999836515566455, - "loss": 46.0, - "step": 23816 - }, - { - "epoch": 1.8209759733929698, - "grad_norm": 0.0028808724600821733, - "learning_rate": 0.00019999836501831476, - "loss": 46.0, - "step": 23817 - }, - { - "epoch": 1.8210524303763596, - "grad_norm": 0.0007140140514820814, - "learning_rate": 0.00019999836488095925, - "loss": 46.0, - "step": 23818 - }, - { - "epoch": 1.821128887359749, - "grad_norm": 0.0004463373334147036, - "learning_rate": 0.00019999836474359793, - "loss": 46.0, - "step": 23819 - }, - { - "epoch": 1.8212053443431389, - "grad_norm": 0.0013685438316315413, - "learning_rate": 0.00019999836460623087, - "loss": 46.0, - "step": 23820 - }, - { - "epoch": 1.8212818013265286, - "grad_norm": 0.002619948238134384, - "learning_rate": 0.000199998364468858, - "loss": 46.0, - "step": 23821 - }, - { - "epoch": 1.8213582583099184, - "grad_norm": 0.001976925414055586, - "learning_rate": 0.00019999836433147943, - "loss": 46.0, - "step": 23822 - }, - { - "epoch": 1.8214347152933081, - "grad_norm": 0.0030305383261293173, - "learning_rate": 0.00019999836419409505, - "loss": 46.0, - "step": 23823 - }, - { - "epoch": 1.821511172276698, - "grad_norm": 0.0010502489749342203, - "learning_rate": 0.0001999983640567049, - "loss": 46.0, - "step": 23824 - }, - { - "epoch": 1.8215876292600877, - "grad_norm": 0.00035608242615126073, - "learning_rate": 0.000199998363919309, - "loss": 46.0, - "step": 23825 - }, - { - "epoch": 1.8216640862434774, - "grad_norm": 0.0006635666941292584, - "learning_rate": 0.0001999983637819073, - "loss": 46.0, - "step": 23826 - }, - { - "epoch": 1.821740543226867, - "grad_norm": 0.001828717882744968, - "learning_rate": 0.00019999836364449985, - "loss": 46.0, - "step": 23827 - }, - { - "epoch": 1.8218170002102567, - "grad_norm": 0.0007944001699797809, - "learning_rate": 0.0001999983635070866, - "loss": 46.0, - "step": 23828 - }, - { - "epoch": 1.8218934571936463, - "grad_norm": 0.003315124660730362, - "learning_rate": 0.0001999983633696676, - "loss": 46.0, - "step": 23829 - }, - { - "epoch": 1.821969914177036, - "grad_norm": 0.0069752843119204044, - "learning_rate": 0.00019999836323224285, - "loss": 46.0, - "step": 23830 - }, - { - "epoch": 1.8220463711604258, - "grad_norm": 0.0005215610144659877, - "learning_rate": 0.00019999836309481233, - "loss": 46.0, - "step": 23831 - }, - { - "epoch": 1.8221228281438155, - "grad_norm": 0.003613660577684641, - "learning_rate": 0.00019999836295737602, - "loss": 46.0, - "step": 23832 - }, - { - "epoch": 1.8221992851272053, - "grad_norm": 0.003564306069165468, - "learning_rate": 0.00019999836281993396, - "loss": 46.0, - "step": 23833 - }, - { - "epoch": 1.822275742110595, - "grad_norm": 0.0009329724125564098, - "learning_rate": 0.0001999983626824861, - "loss": 46.0, - "step": 23834 - }, - { - "epoch": 1.8223521990939848, - "grad_norm": 0.00513521209359169, - "learning_rate": 0.0001999983625450325, - "loss": 46.0, - "step": 23835 - }, - { - "epoch": 1.8224286560773746, - "grad_norm": 0.0009125597425736487, - "learning_rate": 0.0001999983624075731, - "loss": 46.0, - "step": 23836 - }, - { - "epoch": 1.8225051130607643, - "grad_norm": 0.0010099117644131184, - "learning_rate": 0.00019999836227010796, - "loss": 46.0, - "step": 23837 - }, - { - "epoch": 1.8225815700441539, - "grad_norm": 0.009658338502049446, - "learning_rate": 0.00019999836213263706, - "loss": 46.0, - "step": 23838 - }, - { - "epoch": 1.8226580270275436, - "grad_norm": 0.0008519923430867493, - "learning_rate": 0.00019999836199516036, - "loss": 46.0, - "step": 23839 - }, - { - "epoch": 1.8227344840109332, - "grad_norm": 0.0015861664433032274, - "learning_rate": 0.00019999836185767791, - "loss": 46.0, - "step": 23840 - }, - { - "epoch": 1.822810940994323, - "grad_norm": 0.0015659792115911841, - "learning_rate": 0.00019999836172018967, - "loss": 46.0, - "step": 23841 - }, - { - "epoch": 1.8228873979777127, - "grad_norm": 0.001510980655439198, - "learning_rate": 0.0001999983615826957, - "loss": 46.0, - "step": 23842 - }, - { - "epoch": 1.8229638549611025, - "grad_norm": 0.0022470734547823668, - "learning_rate": 0.0001999983614451959, - "loss": 46.0, - "step": 23843 - }, - { - "epoch": 1.8230403119444922, - "grad_norm": 0.0012548583326861262, - "learning_rate": 0.0001999983613076904, - "loss": 46.0, - "step": 23844 - }, - { - "epoch": 1.823116768927882, - "grad_norm": 0.001574006862938404, - "learning_rate": 0.00019999836117017908, - "loss": 46.0, - "step": 23845 - }, - { - "epoch": 1.8231932259112718, - "grad_norm": 0.0008820904768072069, - "learning_rate": 0.000199998361032662, - "loss": 46.0, - "step": 23846 - }, - { - "epoch": 1.8232696828946615, - "grad_norm": 0.0004603576671797782, - "learning_rate": 0.00019999836089513914, - "loss": 46.0, - "step": 23847 - }, - { - "epoch": 1.8233461398780513, - "grad_norm": 0.00055723573314026, - "learning_rate": 0.00019999836075761053, - "loss": 46.0, - "step": 23848 - }, - { - "epoch": 1.8234225968614408, - "grad_norm": 0.0015917620621621609, - "learning_rate": 0.00019999836062007615, - "loss": 46.0, - "step": 23849 - }, - { - "epoch": 1.8234990538448306, - "grad_norm": 0.000962583813816309, - "learning_rate": 0.00019999836048253603, - "loss": 46.0, - "step": 23850 - }, - { - "epoch": 1.8235755108282201, - "grad_norm": 0.0011149718193337321, - "learning_rate": 0.0001999983603449901, - "loss": 46.0, - "step": 23851 - }, - { - "epoch": 1.8236519678116099, - "grad_norm": 0.0024677978362888098, - "learning_rate": 0.0001999983602074384, - "loss": 46.0, - "step": 23852 - }, - { - "epoch": 1.8237284247949996, - "grad_norm": 0.0012178650358691812, - "learning_rate": 0.00019999836006988093, - "loss": 46.0, - "step": 23853 - }, - { - "epoch": 1.8238048817783894, - "grad_norm": 0.0010864818468689919, - "learning_rate": 0.00019999835993231773, - "loss": 46.0, - "step": 23854 - }, - { - "epoch": 1.8238813387617792, - "grad_norm": 0.005176450591534376, - "learning_rate": 0.00019999835979474871, - "loss": 46.0, - "step": 23855 - }, - { - "epoch": 1.823957795745169, - "grad_norm": 0.0017009952571243048, - "learning_rate": 0.00019999835965717395, - "loss": 46.0, - "step": 23856 - }, - { - "epoch": 1.8240342527285587, - "grad_norm": 0.0008226045756600797, - "learning_rate": 0.00019999835951959344, - "loss": 46.0, - "step": 23857 - }, - { - "epoch": 1.8241107097119484, - "grad_norm": 0.005510048009455204, - "learning_rate": 0.0001999983593820071, - "loss": 46.0, - "step": 23858 - }, - { - "epoch": 1.824187166695338, - "grad_norm": 0.0025144314859062433, - "learning_rate": 0.00019999835924441504, - "loss": 46.0, - "step": 23859 - }, - { - "epoch": 1.8242636236787277, - "grad_norm": 0.001036861096508801, - "learning_rate": 0.00019999835910681718, - "loss": 46.0, - "step": 23860 - }, - { - "epoch": 1.8243400806621175, - "grad_norm": 0.001475979806855321, - "learning_rate": 0.00019999835896921357, - "loss": 46.0, - "step": 23861 - }, - { - "epoch": 1.824416537645507, - "grad_norm": 0.0008923432324081659, - "learning_rate": 0.0001999983588316042, - "loss": 46.0, - "step": 23862 - }, - { - "epoch": 1.8244929946288968, - "grad_norm": 0.004577656276524067, - "learning_rate": 0.00019999835869398904, - "loss": 46.0, - "step": 23863 - }, - { - "epoch": 1.8245694516122866, - "grad_norm": 0.0021204662043601274, - "learning_rate": 0.00019999835855636811, - "loss": 46.0, - "step": 23864 - }, - { - "epoch": 1.8246459085956763, - "grad_norm": 0.000634261523373425, - "learning_rate": 0.00019999835841874144, - "loss": 46.0, - "step": 23865 - }, - { - "epoch": 1.824722365579066, - "grad_norm": 0.0014365861425176263, - "learning_rate": 0.00019999835828110894, - "loss": 46.0, - "step": 23866 - }, - { - "epoch": 1.8247988225624558, - "grad_norm": 0.0033843209967017174, - "learning_rate": 0.00019999835814347075, - "loss": 46.0, - "step": 23867 - }, - { - "epoch": 1.8248752795458456, - "grad_norm": 0.0025061562191694975, - "learning_rate": 0.00019999835800582673, - "loss": 46.0, - "step": 23868 - }, - { - "epoch": 1.8249517365292354, - "grad_norm": 0.0013637689407914877, - "learning_rate": 0.00019999835786817696, - "loss": 46.0, - "step": 23869 - }, - { - "epoch": 1.825028193512625, - "grad_norm": 0.0008603520691394806, - "learning_rate": 0.00019999835773052143, - "loss": 46.0, - "step": 23870 - }, - { - "epoch": 1.8251046504960147, - "grad_norm": 0.0006613720906898379, - "learning_rate": 0.0001999983575928601, - "loss": 46.0, - "step": 23871 - }, - { - "epoch": 1.8251811074794044, - "grad_norm": 0.0019336212426424026, - "learning_rate": 0.00019999835745519305, - "loss": 46.0, - "step": 23872 - }, - { - "epoch": 1.825257564462794, - "grad_norm": 0.014227689243853092, - "learning_rate": 0.0001999983573175202, - "loss": 46.0, - "step": 23873 - }, - { - "epoch": 1.8253340214461837, - "grad_norm": 0.0009199431515298784, - "learning_rate": 0.0001999983571798416, - "loss": 46.0, - "step": 23874 - }, - { - "epoch": 1.8254104784295735, - "grad_norm": 0.0006757903029210865, - "learning_rate": 0.0001999983570421572, - "loss": 46.0, - "step": 23875 - }, - { - "epoch": 1.8254869354129633, - "grad_norm": 0.0006481630261987448, - "learning_rate": 0.00019999835690446703, - "loss": 46.0, - "step": 23876 - }, - { - "epoch": 1.825563392396353, - "grad_norm": 0.0017809225246310234, - "learning_rate": 0.00019999835676677113, - "loss": 46.0, - "step": 23877 - }, - { - "epoch": 1.8256398493797428, - "grad_norm": 0.0004950346192345023, - "learning_rate": 0.00019999835662906943, - "loss": 46.0, - "step": 23878 - }, - { - "epoch": 1.8257163063631325, - "grad_norm": 0.0009703824762254953, - "learning_rate": 0.000199998356491362, - "loss": 46.0, - "step": 23879 - }, - { - "epoch": 1.8257927633465223, - "grad_norm": 0.0007006517262198031, - "learning_rate": 0.00019999835635364872, - "loss": 46.0, - "step": 23880 - }, - { - "epoch": 1.8258692203299118, - "grad_norm": 0.00054171122610569, - "learning_rate": 0.00019999835621592972, - "loss": 46.0, - "step": 23881 - }, - { - "epoch": 1.8259456773133016, - "grad_norm": 0.0021724977996200323, - "learning_rate": 0.00019999835607820496, - "loss": 46.0, - "step": 23882 - }, - { - "epoch": 1.8260221342966914, - "grad_norm": 0.002891576150432229, - "learning_rate": 0.00019999835594047442, - "loss": 46.0, - "step": 23883 - }, - { - "epoch": 1.826098591280081, - "grad_norm": 0.0015874544624239206, - "learning_rate": 0.0001999983558027381, - "loss": 46.0, - "step": 23884 - }, - { - "epoch": 1.8261750482634707, - "grad_norm": 0.0010487012332305312, - "learning_rate": 0.00019999835566499602, - "loss": 46.0, - "step": 23885 - }, - { - "epoch": 1.8262515052468604, - "grad_norm": 0.0005651236278936267, - "learning_rate": 0.0001999983555272482, - "loss": 46.0, - "step": 23886 - }, - { - "epoch": 1.8263279622302502, - "grad_norm": 0.0015597763704136014, - "learning_rate": 0.00019999835538949459, - "loss": 46.0, - "step": 23887 - }, - { - "epoch": 1.82640441921364, - "grad_norm": 0.000770699349232018, - "learning_rate": 0.00019999835525173518, - "loss": 46.0, - "step": 23888 - }, - { - "epoch": 1.8264808761970297, - "grad_norm": 0.0007349138031713665, - "learning_rate": 0.00019999835511397003, - "loss": 46.0, - "step": 23889 - }, - { - "epoch": 1.8265573331804195, - "grad_norm": 0.002255955943837762, - "learning_rate": 0.0001999983549761991, - "loss": 46.0, - "step": 23890 - }, - { - "epoch": 1.8266337901638092, - "grad_norm": 0.0017204832984134555, - "learning_rate": 0.00019999835483842243, - "loss": 46.0, - "step": 23891 - }, - { - "epoch": 1.8267102471471988, - "grad_norm": 0.0015940828016027808, - "learning_rate": 0.00019999835470063993, - "loss": 46.0, - "step": 23892 - }, - { - "epoch": 1.8267867041305885, - "grad_norm": 0.0005565414903685451, - "learning_rate": 0.00019999835456285169, - "loss": 46.0, - "step": 23893 - }, - { - "epoch": 1.8268631611139783, - "grad_norm": 0.000997836352325976, - "learning_rate": 0.0001999983544250577, - "loss": 46.0, - "step": 23894 - }, - { - "epoch": 1.8269396180973678, - "grad_norm": 0.0031602459494024515, - "learning_rate": 0.00019999835428725793, - "loss": 46.0, - "step": 23895 - }, - { - "epoch": 1.8270160750807576, - "grad_norm": 0.0021500152070075274, - "learning_rate": 0.0001999983541494524, - "loss": 46.0, - "step": 23896 - }, - { - "epoch": 1.8270925320641473, - "grad_norm": 0.0011405717814341187, - "learning_rate": 0.00019999835401164108, - "loss": 46.0, - "step": 23897 - }, - { - "epoch": 1.827168989047537, - "grad_norm": 0.0024421107955276966, - "learning_rate": 0.00019999835387382402, - "loss": 46.0, - "step": 23898 - }, - { - "epoch": 1.8272454460309269, - "grad_norm": 0.0010856501758098602, - "learning_rate": 0.00019999835373600116, - "loss": 46.0, - "step": 23899 - }, - { - "epoch": 1.8273219030143166, - "grad_norm": 0.0008228891529142857, - "learning_rate": 0.00019999835359817253, - "loss": 46.0, - "step": 23900 - }, - { - "epoch": 1.8273983599977064, - "grad_norm": 0.000771163497120142, - "learning_rate": 0.00019999835346033815, - "loss": 46.0, - "step": 23901 - }, - { - "epoch": 1.8274748169810962, - "grad_norm": 0.0006650617578998208, - "learning_rate": 0.000199998353322498, - "loss": 46.0, - "step": 23902 - }, - { - "epoch": 1.8275512739644857, - "grad_norm": 0.0006762298289686441, - "learning_rate": 0.00019999835318465205, - "loss": 46.0, - "step": 23903 - }, - { - "epoch": 1.8276277309478755, - "grad_norm": 0.001431665034033358, - "learning_rate": 0.00019999835304680038, - "loss": 46.0, - "step": 23904 - }, - { - "epoch": 1.8277041879312652, - "grad_norm": 0.003254946554079652, - "learning_rate": 0.0001999983529089429, - "loss": 46.0, - "step": 23905 - }, - { - "epoch": 1.8277806449146548, - "grad_norm": 0.0009096749126911163, - "learning_rate": 0.0001999983527710797, - "loss": 46.0, - "step": 23906 - }, - { - "epoch": 1.8278571018980445, - "grad_norm": 0.0009419335401616991, - "learning_rate": 0.00019999835263321067, - "loss": 46.0, - "step": 23907 - }, - { - "epoch": 1.8279335588814343, - "grad_norm": 0.004340171813964844, - "learning_rate": 0.0001999983524953359, - "loss": 46.0, - "step": 23908 - }, - { - "epoch": 1.828010015864824, - "grad_norm": 0.0004847559903282672, - "learning_rate": 0.00019999835235745534, - "loss": 46.0, - "step": 23909 - }, - { - "epoch": 1.8280864728482138, - "grad_norm": 0.0017887247959151864, - "learning_rate": 0.00019999835221956903, - "loss": 46.0, - "step": 23910 - }, - { - "epoch": 1.8281629298316036, - "grad_norm": 0.014102067798376083, - "learning_rate": 0.00019999835208167697, - "loss": 46.0, - "step": 23911 - }, - { - "epoch": 1.8282393868149933, - "grad_norm": 0.0011384808458387852, - "learning_rate": 0.00019999835194377908, - "loss": 46.0, - "step": 23912 - }, - { - "epoch": 1.828315843798383, - "grad_norm": 0.0021035217214375734, - "learning_rate": 0.0001999983518058755, - "loss": 46.0, - "step": 23913 - }, - { - "epoch": 1.8283923007817726, - "grad_norm": 0.0010395547142252326, - "learning_rate": 0.00019999835166796607, - "loss": 46.0, - "step": 23914 - }, - { - "epoch": 1.8284687577651624, - "grad_norm": 0.0013432534178718925, - "learning_rate": 0.00019999835153005092, - "loss": 46.0, - "step": 23915 - }, - { - "epoch": 1.8285452147485521, - "grad_norm": 0.005125477910041809, - "learning_rate": 0.00019999835139213003, - "loss": 46.0, - "step": 23916 - }, - { - "epoch": 1.8286216717319417, - "grad_norm": 0.0007548199500888586, - "learning_rate": 0.00019999835125420333, - "loss": 46.0, - "step": 23917 - }, - { - "epoch": 1.8286981287153314, - "grad_norm": 0.0029630232602357864, - "learning_rate": 0.00019999835111627083, - "loss": 46.0, - "step": 23918 - }, - { - "epoch": 1.8287745856987212, - "grad_norm": 0.0015760298119857907, - "learning_rate": 0.0001999983509783326, - "loss": 46.0, - "step": 23919 - }, - { - "epoch": 1.828851042682111, - "grad_norm": 0.001539901946671307, - "learning_rate": 0.0001999983508403886, - "loss": 46.0, - "step": 23920 - }, - { - "epoch": 1.8289274996655007, - "grad_norm": 0.007302130106836557, - "learning_rate": 0.00019999835070243883, - "loss": 46.0, - "step": 23921 - }, - { - "epoch": 1.8290039566488905, - "grad_norm": 0.0008780031930655241, - "learning_rate": 0.00019999835056448326, - "loss": 46.0, - "step": 23922 - }, - { - "epoch": 1.8290804136322802, - "grad_norm": 0.0008248342201113701, - "learning_rate": 0.00019999835042652198, - "loss": 46.0, - "step": 23923 - }, - { - "epoch": 1.82915687061567, - "grad_norm": 0.001435629790648818, - "learning_rate": 0.00019999835028855487, - "loss": 46.0, - "step": 23924 - }, - { - "epoch": 1.8292333275990595, - "grad_norm": 0.0005808002315461636, - "learning_rate": 0.00019999835015058204, - "loss": 46.0, - "step": 23925 - }, - { - "epoch": 1.8293097845824493, - "grad_norm": 0.000657212920486927, - "learning_rate": 0.00019999835001260338, - "loss": 46.0, - "step": 23926 - }, - { - "epoch": 1.829386241565839, - "grad_norm": 0.019714007154107094, - "learning_rate": 0.000199998349874619, - "loss": 46.0, - "step": 23927 - }, - { - "epoch": 1.8294626985492286, - "grad_norm": 0.00415431335568428, - "learning_rate": 0.00019999834973662885, - "loss": 46.0, - "step": 23928 - }, - { - "epoch": 1.8295391555326184, - "grad_norm": 0.004770858678966761, - "learning_rate": 0.00019999834959863293, - "loss": 46.0, - "step": 23929 - }, - { - "epoch": 1.8296156125160081, - "grad_norm": 0.002844860777258873, - "learning_rate": 0.00019999834946063123, - "loss": 46.0, - "step": 23930 - }, - { - "epoch": 1.8296920694993979, - "grad_norm": 0.0032237747218459845, - "learning_rate": 0.00019999834932262376, - "loss": 46.0, - "step": 23931 - }, - { - "epoch": 1.8297685264827876, - "grad_norm": 0.0010265747550874949, - "learning_rate": 0.0001999983491846105, - "loss": 46.0, - "step": 23932 - }, - { - "epoch": 1.8298449834661774, - "grad_norm": 0.001270824926905334, - "learning_rate": 0.0001999983490465915, - "loss": 46.0, - "step": 23933 - }, - { - "epoch": 1.8299214404495672, - "grad_norm": 0.0005657592555508018, - "learning_rate": 0.00019999834890856673, - "loss": 46.0, - "step": 23934 - }, - { - "epoch": 1.829997897432957, - "grad_norm": 0.0021599598694592714, - "learning_rate": 0.0001999983487705362, - "loss": 46.0, - "step": 23935 - }, - { - "epoch": 1.8300743544163465, - "grad_norm": 0.0010334642138332129, - "learning_rate": 0.00019999834863249985, - "loss": 46.0, - "step": 23936 - }, - { - "epoch": 1.8301508113997362, - "grad_norm": 0.0005314050358720124, - "learning_rate": 0.00019999834849445777, - "loss": 46.0, - "step": 23937 - }, - { - "epoch": 1.830227268383126, - "grad_norm": 0.0015683239325881004, - "learning_rate": 0.00019999834835640994, - "loss": 46.0, - "step": 23938 - }, - { - "epoch": 1.8303037253665155, - "grad_norm": 0.005588720552623272, - "learning_rate": 0.0001999983482183563, - "loss": 46.0, - "step": 23939 - }, - { - "epoch": 1.8303801823499053, - "grad_norm": 0.0012042330345138907, - "learning_rate": 0.0001999983480802969, - "loss": 46.0, - "step": 23940 - }, - { - "epoch": 1.830456639333295, - "grad_norm": 0.0019481373019516468, - "learning_rate": 0.00019999834794223173, - "loss": 46.0, - "step": 23941 - }, - { - "epoch": 1.8305330963166848, - "grad_norm": 0.0005353696760721505, - "learning_rate": 0.0001999983478041608, - "loss": 46.0, - "step": 23942 - }, - { - "epoch": 1.8306095533000746, - "grad_norm": 0.0012337693478912115, - "learning_rate": 0.0001999983476660841, - "loss": 46.0, - "step": 23943 - }, - { - "epoch": 1.8306860102834643, - "grad_norm": 0.0014952932251617312, - "learning_rate": 0.00019999834752800163, - "loss": 46.0, - "step": 23944 - }, - { - "epoch": 1.830762467266854, - "grad_norm": 0.0009090615203604102, - "learning_rate": 0.00019999834738991342, - "loss": 46.0, - "step": 23945 - }, - { - "epoch": 1.8308389242502439, - "grad_norm": 0.001103985938243568, - "learning_rate": 0.00019999834725181937, - "loss": 46.0, - "step": 23946 - }, - { - "epoch": 1.8309153812336334, - "grad_norm": 0.0006535687716677785, - "learning_rate": 0.0001999983471137196, - "loss": 46.0, - "step": 23947 - }, - { - "epoch": 1.8309918382170232, - "grad_norm": 0.0013266380410641432, - "learning_rate": 0.00019999834697561407, - "loss": 46.0, - "step": 23948 - }, - { - "epoch": 1.831068295200413, - "grad_norm": 0.00342384516261518, - "learning_rate": 0.00019999834683750276, - "loss": 46.0, - "step": 23949 - }, - { - "epoch": 1.8311447521838025, - "grad_norm": 0.0007869650144129992, - "learning_rate": 0.00019999834669938568, - "loss": 46.0, - "step": 23950 - }, - { - "epoch": 1.8312212091671922, - "grad_norm": 0.000919678364880383, - "learning_rate": 0.0001999983465612628, - "loss": 46.0, - "step": 23951 - }, - { - "epoch": 1.831297666150582, - "grad_norm": 0.0009309927700087428, - "learning_rate": 0.00019999834642313416, - "loss": 46.0, - "step": 23952 - }, - { - "epoch": 1.8313741231339717, - "grad_norm": 0.0029769206885248423, - "learning_rate": 0.00019999834628499978, - "loss": 46.0, - "step": 23953 - }, - { - "epoch": 1.8314505801173615, - "grad_norm": 0.003182024462148547, - "learning_rate": 0.0001999983461468596, - "loss": 46.0, - "step": 23954 - }, - { - "epoch": 1.8315270371007513, - "grad_norm": 0.005156370811164379, - "learning_rate": 0.00019999834600871368, - "loss": 46.0, - "step": 23955 - }, - { - "epoch": 1.831603494084141, - "grad_norm": 0.0028223861008882523, - "learning_rate": 0.000199998345870562, - "loss": 46.0, - "step": 23956 - }, - { - "epoch": 1.8316799510675308, - "grad_norm": 0.0010221661068499088, - "learning_rate": 0.00019999834573240452, - "loss": 46.0, - "step": 23957 - }, - { - "epoch": 1.8317564080509203, - "grad_norm": 0.000572660006582737, - "learning_rate": 0.00019999834559424127, - "loss": 46.0, - "step": 23958 - }, - { - "epoch": 1.83183286503431, - "grad_norm": 0.0006711501046083868, - "learning_rate": 0.00019999834545607225, - "loss": 46.0, - "step": 23959 - }, - { - "epoch": 1.8319093220176996, - "grad_norm": 0.0007115493062883615, - "learning_rate": 0.00019999834531789746, - "loss": 46.0, - "step": 23960 - }, - { - "epoch": 1.8319857790010894, - "grad_norm": 0.0006306515424512327, - "learning_rate": 0.00019999834517971693, - "loss": 46.0, - "step": 23961 - }, - { - "epoch": 1.8320622359844791, - "grad_norm": 0.004018108360469341, - "learning_rate": 0.00019999834504153062, - "loss": 46.0, - "step": 23962 - }, - { - "epoch": 1.832138692967869, - "grad_norm": 0.0013060973724350333, - "learning_rate": 0.0001999983449033385, - "loss": 46.0, - "step": 23963 - }, - { - "epoch": 1.8322151499512587, - "grad_norm": 0.001037942012771964, - "learning_rate": 0.00019999834476514065, - "loss": 46.0, - "step": 23964 - }, - { - "epoch": 1.8322916069346484, - "grad_norm": 0.0018497114069759846, - "learning_rate": 0.00019999834462693702, - "loss": 46.0, - "step": 23965 - }, - { - "epoch": 1.8323680639180382, - "grad_norm": 0.0012205595849081874, - "learning_rate": 0.00019999834448872764, - "loss": 46.0, - "step": 23966 - }, - { - "epoch": 1.832444520901428, - "grad_norm": 0.0009895114926621318, - "learning_rate": 0.0001999983443505125, - "loss": 46.0, - "step": 23967 - }, - { - "epoch": 1.8325209778848177, - "grad_norm": 0.0004822291084565222, - "learning_rate": 0.00019999834421229154, - "loss": 46.0, - "step": 23968 - }, - { - "epoch": 1.8325974348682073, - "grad_norm": 0.0015185363590717316, - "learning_rate": 0.00019999834407406484, - "loss": 46.0, - "step": 23969 - }, - { - "epoch": 1.832673891851597, - "grad_norm": 0.004448238294571638, - "learning_rate": 0.00019999834393583235, - "loss": 46.0, - "step": 23970 - }, - { - "epoch": 1.8327503488349866, - "grad_norm": 0.0028663533739745617, - "learning_rate": 0.00019999834379759413, - "loss": 46.0, - "step": 23971 - }, - { - "epoch": 1.8328268058183763, - "grad_norm": 0.0009537548758089542, - "learning_rate": 0.00019999834365935011, - "loss": 46.0, - "step": 23972 - }, - { - "epoch": 1.832903262801766, - "grad_norm": 0.0005530266207642853, - "learning_rate": 0.00019999834352110032, - "loss": 46.0, - "step": 23973 - }, - { - "epoch": 1.8329797197851558, - "grad_norm": 0.002375100739300251, - "learning_rate": 0.0001999983433828448, - "loss": 46.0, - "step": 23974 - }, - { - "epoch": 1.8330561767685456, - "grad_norm": 0.005531785078346729, - "learning_rate": 0.00019999834324458345, - "loss": 46.0, - "step": 23975 - }, - { - "epoch": 1.8331326337519354, - "grad_norm": 0.0005423836992122233, - "learning_rate": 0.00019999834310631637, - "loss": 46.0, - "step": 23976 - }, - { - "epoch": 1.8332090907353251, - "grad_norm": 0.001727782771922648, - "learning_rate": 0.00019999834296804348, - "loss": 46.0, - "step": 23977 - }, - { - "epoch": 1.8332855477187149, - "grad_norm": 0.000533060694579035, - "learning_rate": 0.00019999834282976488, - "loss": 46.0, - "step": 23978 - }, - { - "epoch": 1.8333620047021046, - "grad_norm": 0.0024386525619775057, - "learning_rate": 0.00019999834269148048, - "loss": 46.0, - "step": 23979 - }, - { - "epoch": 1.8334384616854942, - "grad_norm": 0.005179790314286947, - "learning_rate": 0.0001999983425531903, - "loss": 46.0, - "step": 23980 - }, - { - "epoch": 1.833514918668884, - "grad_norm": 0.0018118720036000013, - "learning_rate": 0.00019999834241489438, - "loss": 46.0, - "step": 23981 - }, - { - "epoch": 1.8335913756522735, - "grad_norm": 0.0005988035118207335, - "learning_rate": 0.00019999834227659265, - "loss": 46.0, - "step": 23982 - }, - { - "epoch": 1.8336678326356632, - "grad_norm": 0.0008260435424745083, - "learning_rate": 0.00019999834213828518, - "loss": 46.0, - "step": 23983 - }, - { - "epoch": 1.833744289619053, - "grad_norm": 0.0015572276897728443, - "learning_rate": 0.00019999834199997194, - "loss": 46.0, - "step": 23984 - }, - { - "epoch": 1.8338207466024428, - "grad_norm": 0.0012696572812274098, - "learning_rate": 0.00019999834186165295, - "loss": 46.0, - "step": 23985 - }, - { - "epoch": 1.8338972035858325, - "grad_norm": 0.0006861437577754259, - "learning_rate": 0.00019999834172332813, - "loss": 46.0, - "step": 23986 - }, - { - "epoch": 1.8339736605692223, - "grad_norm": 0.0014596692053601146, - "learning_rate": 0.0001999983415849976, - "loss": 46.0, - "step": 23987 - }, - { - "epoch": 1.834050117552612, - "grad_norm": 0.0033149810042232275, - "learning_rate": 0.00019999834144666126, - "loss": 46.0, - "step": 23988 - }, - { - "epoch": 1.8341265745360018, - "grad_norm": 0.00040873352554626763, - "learning_rate": 0.00019999834130831918, - "loss": 46.0, - "step": 23989 - }, - { - "epoch": 1.8342030315193913, - "grad_norm": 0.001727715483866632, - "learning_rate": 0.00019999834116997132, - "loss": 46.0, - "step": 23990 - }, - { - "epoch": 1.834279488502781, - "grad_norm": 0.0005510460468940437, - "learning_rate": 0.0001999983410316177, - "loss": 46.0, - "step": 23991 - }, - { - "epoch": 1.8343559454861709, - "grad_norm": 0.0008622353198006749, - "learning_rate": 0.0001999983408932583, - "loss": 46.0, - "step": 23992 - }, - { - "epoch": 1.8344324024695604, - "grad_norm": 0.0007293229573406279, - "learning_rate": 0.0001999983407548931, - "loss": 46.0, - "step": 23993 - }, - { - "epoch": 1.8345088594529502, - "grad_norm": 0.0008433896582573652, - "learning_rate": 0.0001999983406165222, - "loss": 46.0, - "step": 23994 - }, - { - "epoch": 1.83458531643634, - "grad_norm": 0.001933095045387745, - "learning_rate": 0.00019999834047814547, - "loss": 46.0, - "step": 23995 - }, - { - "epoch": 1.8346617734197297, - "grad_norm": 0.000515175866894424, - "learning_rate": 0.000199998340339763, - "loss": 46.0, - "step": 23996 - }, - { - "epoch": 1.8347382304031195, - "grad_norm": 0.002671354217454791, - "learning_rate": 0.00019999834020137475, - "loss": 46.0, - "step": 23997 - }, - { - "epoch": 1.8348146873865092, - "grad_norm": 0.0009272352326661348, - "learning_rate": 0.00019999834006298074, - "loss": 46.0, - "step": 23998 - }, - { - "epoch": 1.834891144369899, - "grad_norm": 0.001544341561384499, - "learning_rate": 0.00019999833992458095, - "loss": 46.0, - "step": 23999 - }, - { - "epoch": 1.8349676013532887, - "grad_norm": 0.0009590385016053915, - "learning_rate": 0.00019999833978617538, - "loss": 46.0, - "step": 24000 - }, - { - "epoch": 1.8350440583366783, - "grad_norm": 0.001049233484081924, - "learning_rate": 0.00019999833964776407, - "loss": 46.0, - "step": 24001 - }, - { - "epoch": 1.835120515320068, - "grad_norm": 0.0007759078871458769, - "learning_rate": 0.00019999833950934697, - "loss": 46.0, - "step": 24002 - }, - { - "epoch": 1.8351969723034578, - "grad_norm": 0.005377600435167551, - "learning_rate": 0.0001999983393709241, - "loss": 46.0, - "step": 24003 - }, - { - "epoch": 1.8352734292868473, - "grad_norm": 0.0015093449037522078, - "learning_rate": 0.00019999833923249548, - "loss": 46.0, - "step": 24004 - }, - { - "epoch": 1.835349886270237, - "grad_norm": 0.00214086240157485, - "learning_rate": 0.00019999833909406108, - "loss": 46.0, - "step": 24005 - }, - { - "epoch": 1.8354263432536269, - "grad_norm": 0.0038216100074350834, - "learning_rate": 0.0001999983389556209, - "loss": 46.0, - "step": 24006 - }, - { - "epoch": 1.8355028002370166, - "grad_norm": 0.0028912988491356373, - "learning_rate": 0.00019999833881717498, - "loss": 46.0, - "step": 24007 - }, - { - "epoch": 1.8355792572204064, - "grad_norm": 0.0007045595557428896, - "learning_rate": 0.00019999833867872323, - "loss": 46.0, - "step": 24008 - }, - { - "epoch": 1.8356557142037961, - "grad_norm": 0.0024463674053549767, - "learning_rate": 0.00019999833854026579, - "loss": 46.0, - "step": 24009 - }, - { - "epoch": 1.835732171187186, - "grad_norm": 0.00050332274986431, - "learning_rate": 0.00019999833840180254, - "loss": 46.0, - "step": 24010 - }, - { - "epoch": 1.8358086281705757, - "grad_norm": 0.0026274381671100855, - "learning_rate": 0.00019999833826333353, - "loss": 46.0, - "step": 24011 - }, - { - "epoch": 1.8358850851539652, - "grad_norm": 0.00470306072384119, - "learning_rate": 0.00019999833812485874, - "loss": 46.0, - "step": 24012 - }, - { - "epoch": 1.835961542137355, - "grad_norm": 0.001142130233347416, - "learning_rate": 0.00019999833798637818, - "loss": 46.0, - "step": 24013 - }, - { - "epoch": 1.8360379991207447, - "grad_norm": 0.0006553728599101305, - "learning_rate": 0.00019999833784789184, - "loss": 46.0, - "step": 24014 - }, - { - "epoch": 1.8361144561041343, - "grad_norm": 0.0010038039181381464, - "learning_rate": 0.00019999833770939973, - "loss": 46.0, - "step": 24015 - }, - { - "epoch": 1.836190913087524, - "grad_norm": 0.006854768842458725, - "learning_rate": 0.0001999983375709019, - "loss": 46.0, - "step": 24016 - }, - { - "epoch": 1.8362673700709138, - "grad_norm": 0.002887592650949955, - "learning_rate": 0.00019999833743239824, - "loss": 46.0, - "step": 24017 - }, - { - "epoch": 1.8363438270543035, - "grad_norm": 0.004634858574718237, - "learning_rate": 0.00019999833729388884, - "loss": 46.0, - "step": 24018 - }, - { - "epoch": 1.8364202840376933, - "grad_norm": 0.0022212162148207426, - "learning_rate": 0.00019999833715537367, - "loss": 46.0, - "step": 24019 - }, - { - "epoch": 1.836496741021083, - "grad_norm": 0.0008391932933591306, - "learning_rate": 0.00019999833701685274, - "loss": 46.0, - "step": 24020 - }, - { - "epoch": 1.8365731980044728, - "grad_norm": 0.001955055631697178, - "learning_rate": 0.00019999833687832602, - "loss": 46.0, - "step": 24021 - }, - { - "epoch": 1.8366496549878626, - "grad_norm": 0.001016722060739994, - "learning_rate": 0.00019999833673979353, - "loss": 46.0, - "step": 24022 - }, - { - "epoch": 1.8367261119712521, - "grad_norm": 0.0017843263922259212, - "learning_rate": 0.00019999833660125528, - "loss": 46.0, - "step": 24023 - }, - { - "epoch": 1.836802568954642, - "grad_norm": 0.0011481106048449874, - "learning_rate": 0.00019999833646271127, - "loss": 46.0, - "step": 24024 - }, - { - "epoch": 1.8368790259380317, - "grad_norm": 0.0020079100504517555, - "learning_rate": 0.0001999983363241615, - "loss": 46.0, - "step": 24025 - }, - { - "epoch": 1.8369554829214212, - "grad_norm": 0.001076492597348988, - "learning_rate": 0.00019999833618560592, - "loss": 46.0, - "step": 24026 - }, - { - "epoch": 1.837031939904811, - "grad_norm": 0.0018108839867636561, - "learning_rate": 0.00019999833604704458, - "loss": 46.0, - "step": 24027 - }, - { - "epoch": 1.8371083968882007, - "grad_norm": 0.003120560199022293, - "learning_rate": 0.0001999983359084775, - "loss": 46.0, - "step": 24028 - }, - { - "epoch": 1.8371848538715905, - "grad_norm": 0.0025472554843872786, - "learning_rate": 0.00019999833576990464, - "loss": 46.0, - "step": 24029 - }, - { - "epoch": 1.8372613108549802, - "grad_norm": 0.0004606877628248185, - "learning_rate": 0.000199998335631326, - "loss": 46.0, - "step": 24030 - }, - { - "epoch": 1.83733776783837, - "grad_norm": 0.0023512982297688723, - "learning_rate": 0.00019999833549274158, - "loss": 46.0, - "step": 24031 - }, - { - "epoch": 1.8374142248217598, - "grad_norm": 0.0003429347416386008, - "learning_rate": 0.00019999833535415144, - "loss": 46.0, - "step": 24032 - }, - { - "epoch": 1.8374906818051495, - "grad_norm": 0.0013568336144089699, - "learning_rate": 0.0001999983352155555, - "loss": 46.0, - "step": 24033 - }, - { - "epoch": 1.837567138788539, - "grad_norm": 0.00510236294940114, - "learning_rate": 0.00019999833507695376, - "loss": 46.0, - "step": 24034 - }, - { - "epoch": 1.8376435957719288, - "grad_norm": 0.0009480480221100152, - "learning_rate": 0.00019999833493834627, - "loss": 46.0, - "step": 24035 - }, - { - "epoch": 1.8377200527553186, - "grad_norm": 0.0010470944689586759, - "learning_rate": 0.00019999833479973303, - "loss": 46.0, - "step": 24036 - }, - { - "epoch": 1.8377965097387081, - "grad_norm": 0.0017882988322526217, - "learning_rate": 0.000199998334661114, - "loss": 46.0, - "step": 24037 - }, - { - "epoch": 1.8378729667220979, - "grad_norm": 0.003667086362838745, - "learning_rate": 0.00019999833452248922, - "loss": 46.0, - "step": 24038 - }, - { - "epoch": 1.8379494237054876, - "grad_norm": 0.0012558590387925506, - "learning_rate": 0.00019999833438385863, - "loss": 46.0, - "step": 24039 - }, - { - "epoch": 1.8380258806888774, - "grad_norm": 0.0007093364838510752, - "learning_rate": 0.00019999833424522235, - "loss": 46.0, - "step": 24040 - }, - { - "epoch": 1.8381023376722672, - "grad_norm": 0.001240081270225346, - "learning_rate": 0.00019999833410658022, - "loss": 46.0, - "step": 24041 - }, - { - "epoch": 1.838178794655657, - "grad_norm": 0.0033272383734583855, - "learning_rate": 0.00019999833396793236, - "loss": 46.0, - "step": 24042 - }, - { - "epoch": 1.8382552516390467, - "grad_norm": 0.0016868329839780927, - "learning_rate": 0.0001999983338292787, - "loss": 46.0, - "step": 24043 - }, - { - "epoch": 1.8383317086224364, - "grad_norm": 0.0010811745887622237, - "learning_rate": 0.0001999983336906193, - "loss": 46.0, - "step": 24044 - }, - { - "epoch": 1.838408165605826, - "grad_norm": 0.0007133649778552353, - "learning_rate": 0.00019999833355195416, - "loss": 46.0, - "step": 24045 - }, - { - "epoch": 1.8384846225892157, - "grad_norm": 0.0010707960464060307, - "learning_rate": 0.0001999983334132832, - "loss": 46.0, - "step": 24046 - }, - { - "epoch": 1.8385610795726055, - "grad_norm": 0.0031527355313301086, - "learning_rate": 0.00019999833327460649, - "loss": 46.0, - "step": 24047 - }, - { - "epoch": 1.838637536555995, - "grad_norm": 0.0007617617957293987, - "learning_rate": 0.000199998333135924, - "loss": 46.0, - "step": 24048 - }, - { - "epoch": 1.8387139935393848, - "grad_norm": 0.0016422446351498365, - "learning_rate": 0.00019999833299723575, - "loss": 46.0, - "step": 24049 - }, - { - "epoch": 1.8387904505227746, - "grad_norm": 0.004349039401859045, - "learning_rate": 0.0001999983328585417, - "loss": 46.0, - "step": 24050 - }, - { - "epoch": 1.8388669075061643, - "grad_norm": 0.002776196924969554, - "learning_rate": 0.00019999833271984194, - "loss": 46.0, - "step": 24051 - }, - { - "epoch": 1.838943364489554, - "grad_norm": 0.001398204592987895, - "learning_rate": 0.00019999833258113636, - "loss": 46.0, - "step": 24052 - }, - { - "epoch": 1.8390198214729438, - "grad_norm": 0.0006950525566935539, - "learning_rate": 0.00019999833244242505, - "loss": 46.0, - "step": 24053 - }, - { - "epoch": 1.8390962784563336, - "grad_norm": 0.004921953193843365, - "learning_rate": 0.0001999983323037079, - "loss": 46.0, - "step": 24054 - }, - { - "epoch": 1.8391727354397234, - "grad_norm": 0.00047293800162151456, - "learning_rate": 0.00019999833216498506, - "loss": 46.0, - "step": 24055 - }, - { - "epoch": 1.839249192423113, - "grad_norm": 0.0016401024768128991, - "learning_rate": 0.00019999833202625643, - "loss": 46.0, - "step": 24056 - }, - { - "epoch": 1.8393256494065027, - "grad_norm": 0.0003546375664882362, - "learning_rate": 0.00019999833188752203, - "loss": 46.0, - "step": 24057 - }, - { - "epoch": 1.8394021063898924, - "grad_norm": 0.009104827418923378, - "learning_rate": 0.00019999833174878185, - "loss": 46.0, - "step": 24058 - }, - { - "epoch": 1.839478563373282, - "grad_norm": 0.0014980998821556568, - "learning_rate": 0.0001999983316100359, - "loss": 46.0, - "step": 24059 - }, - { - "epoch": 1.8395550203566717, - "grad_norm": 0.0008139011915773153, - "learning_rate": 0.00019999833147128418, - "loss": 46.0, - "step": 24060 - }, - { - "epoch": 1.8396314773400615, - "grad_norm": 0.0004737124836537987, - "learning_rate": 0.0001999983313325267, - "loss": 46.0, - "step": 24061 - }, - { - "epoch": 1.8397079343234513, - "grad_norm": 0.002801937749609351, - "learning_rate": 0.00019999833119376345, - "loss": 46.0, - "step": 24062 - }, - { - "epoch": 1.839784391306841, - "grad_norm": 0.0006293219048529863, - "learning_rate": 0.0001999983310549944, - "loss": 46.0, - "step": 24063 - }, - { - "epoch": 1.8398608482902308, - "grad_norm": 0.0016475480515509844, - "learning_rate": 0.00019999833091621962, - "loss": 46.0, - "step": 24064 - }, - { - "epoch": 1.8399373052736205, - "grad_norm": 0.0005701237241737545, - "learning_rate": 0.00019999833077743905, - "loss": 46.0, - "step": 24065 - }, - { - "epoch": 1.8400137622570103, - "grad_norm": 0.0005094964290037751, - "learning_rate": 0.00019999833063865272, - "loss": 46.0, - "step": 24066 - }, - { - "epoch": 1.8400902192403998, - "grad_norm": 0.00043167563853785396, - "learning_rate": 0.00019999833049986064, - "loss": 46.0, - "step": 24067 - }, - { - "epoch": 1.8401666762237896, - "grad_norm": 0.001459711231291294, - "learning_rate": 0.00019999833036106276, - "loss": 46.0, - "step": 24068 - }, - { - "epoch": 1.8402431332071794, - "grad_norm": 0.0012314132181927562, - "learning_rate": 0.0001999983302222591, - "loss": 46.0, - "step": 24069 - }, - { - "epoch": 1.840319590190569, - "grad_norm": 0.00071003008633852, - "learning_rate": 0.0001999983300834497, - "loss": 46.0, - "step": 24070 - }, - { - "epoch": 1.8403960471739587, - "grad_norm": 0.0010922811925411224, - "learning_rate": 0.00019999832994463453, - "loss": 46.0, - "step": 24071 - }, - { - "epoch": 1.8404725041573484, - "grad_norm": 0.0020431228913366795, - "learning_rate": 0.00019999832980581358, - "loss": 46.0, - "step": 24072 - }, - { - "epoch": 1.8405489611407382, - "grad_norm": 0.00042537436820566654, - "learning_rate": 0.00019999832966698686, - "loss": 46.0, - "step": 24073 - }, - { - "epoch": 1.840625418124128, - "grad_norm": 0.002330636139959097, - "learning_rate": 0.00019999832952815436, - "loss": 46.0, - "step": 24074 - }, - { - "epoch": 1.8407018751075177, - "grad_norm": 0.002542466390877962, - "learning_rate": 0.00019999832938931612, - "loss": 46.0, - "step": 24075 - }, - { - "epoch": 1.8407783320909075, - "grad_norm": 0.0005889342282898724, - "learning_rate": 0.00019999832925047208, - "loss": 46.0, - "step": 24076 - }, - { - "epoch": 1.8408547890742972, - "grad_norm": 0.001610838226042688, - "learning_rate": 0.00019999832911162232, - "loss": 46.0, - "step": 24077 - }, - { - "epoch": 1.8409312460576868, - "grad_norm": 0.0012444778112694621, - "learning_rate": 0.0001999983289727667, - "loss": 46.0, - "step": 24078 - }, - { - "epoch": 1.8410077030410765, - "grad_norm": 0.0010654201032593846, - "learning_rate": 0.0001999983288339054, - "loss": 46.0, - "step": 24079 - }, - { - "epoch": 1.8410841600244663, - "grad_norm": 0.005882246419787407, - "learning_rate": 0.0001999983286950383, - "loss": 46.0, - "step": 24080 - }, - { - "epoch": 1.8411606170078558, - "grad_norm": 0.0012291306629776955, - "learning_rate": 0.00019999832855616543, - "loss": 46.0, - "step": 24081 - }, - { - "epoch": 1.8412370739912456, - "grad_norm": 0.0009694420150481164, - "learning_rate": 0.00019999832841728678, - "loss": 46.0, - "step": 24082 - }, - { - "epoch": 1.8413135309746353, - "grad_norm": 0.005421114154160023, - "learning_rate": 0.00019999832827840238, - "loss": 46.0, - "step": 24083 - }, - { - "epoch": 1.841389987958025, - "grad_norm": 0.005127171520143747, - "learning_rate": 0.00019999832813951218, - "loss": 46.0, - "step": 24084 - }, - { - "epoch": 1.8414664449414149, - "grad_norm": 0.0010110702132806182, - "learning_rate": 0.00019999832800061623, - "loss": 46.0, - "step": 24085 - }, - { - "epoch": 1.8415429019248046, - "grad_norm": 0.0014125773450359702, - "learning_rate": 0.0001999983278617145, - "loss": 46.0, - "step": 24086 - }, - { - "epoch": 1.8416193589081944, - "grad_norm": 0.0005949250189587474, - "learning_rate": 0.000199998327722807, - "loss": 46.0, - "step": 24087 - }, - { - "epoch": 1.8416958158915842, - "grad_norm": 0.0014252071268856525, - "learning_rate": 0.00019999832758389377, - "loss": 46.0, - "step": 24088 - }, - { - "epoch": 1.8417722728749737, - "grad_norm": 0.0009450973011553288, - "learning_rate": 0.00019999832744497473, - "loss": 46.0, - "step": 24089 - }, - { - "epoch": 1.8418487298583635, - "grad_norm": 0.0010257799876853824, - "learning_rate": 0.00019999832730604994, - "loss": 46.0, - "step": 24090 - }, - { - "epoch": 1.841925186841753, - "grad_norm": 0.0009541959734633565, - "learning_rate": 0.00019999832716711938, - "loss": 46.0, - "step": 24091 - }, - { - "epoch": 1.8420016438251428, - "grad_norm": 0.001316535985097289, - "learning_rate": 0.00019999832702818305, - "loss": 46.0, - "step": 24092 - }, - { - "epoch": 1.8420781008085325, - "grad_norm": 0.001131929107941687, - "learning_rate": 0.00019999832688924094, - "loss": 46.0, - "step": 24093 - }, - { - "epoch": 1.8421545577919223, - "grad_norm": 0.0019271766068413854, - "learning_rate": 0.00019999832675029306, - "loss": 46.0, - "step": 24094 - }, - { - "epoch": 1.842231014775312, - "grad_norm": 0.0007769920630380511, - "learning_rate": 0.0001999983266113394, - "loss": 46.0, - "step": 24095 - }, - { - "epoch": 1.8423074717587018, - "grad_norm": 0.0008586702169850469, - "learning_rate": 0.00019999832647238, - "loss": 46.0, - "step": 24096 - }, - { - "epoch": 1.8423839287420916, - "grad_norm": 0.0008281109621748328, - "learning_rate": 0.00019999832633341483, - "loss": 46.0, - "step": 24097 - }, - { - "epoch": 1.8424603857254813, - "grad_norm": 0.0011058085365220904, - "learning_rate": 0.00019999832619444386, - "loss": 46.0, - "step": 24098 - }, - { - "epoch": 1.842536842708871, - "grad_norm": 0.0008683353080414236, - "learning_rate": 0.00019999832605546716, - "loss": 46.0, - "step": 24099 - }, - { - "epoch": 1.8426132996922606, - "grad_norm": 0.000852260272949934, - "learning_rate": 0.00019999832591648464, - "loss": 46.0, - "step": 24100 - }, - { - "epoch": 1.8426897566756504, - "grad_norm": 0.0015756089705973864, - "learning_rate": 0.0001999983257774964, - "loss": 46.0, - "step": 24101 - }, - { - "epoch": 1.84276621365904, - "grad_norm": 0.0009335257345810533, - "learning_rate": 0.0001999983256385024, - "loss": 46.0, - "step": 24102 - }, - { - "epoch": 1.8428426706424297, - "grad_norm": 0.0011852050665766, - "learning_rate": 0.00019999832549950257, - "loss": 46.0, - "step": 24103 - }, - { - "epoch": 1.8429191276258194, - "grad_norm": 0.0047043063677847385, - "learning_rate": 0.00019999832536049699, - "loss": 46.0, - "step": 24104 - }, - { - "epoch": 1.8429955846092092, - "grad_norm": 0.005377822555601597, - "learning_rate": 0.00019999832522148565, - "loss": 46.0, - "step": 24105 - }, - { - "epoch": 1.843072041592599, - "grad_norm": 0.0017967717722058296, - "learning_rate": 0.00019999832508246854, - "loss": 46.0, - "step": 24106 - }, - { - "epoch": 1.8431484985759887, - "grad_norm": 0.0011472634505480528, - "learning_rate": 0.0001999983249434457, - "loss": 46.0, - "step": 24107 - }, - { - "epoch": 1.8432249555593785, - "grad_norm": 0.0007326845661737025, - "learning_rate": 0.00019999832480441704, - "loss": 46.0, - "step": 24108 - }, - { - "epoch": 1.8433014125427682, - "grad_norm": 0.0022598314099013805, - "learning_rate": 0.0001999983246653826, - "loss": 46.0, - "step": 24109 - }, - { - "epoch": 1.843377869526158, - "grad_norm": 0.0011644657934084535, - "learning_rate": 0.00019999832452634244, - "loss": 46.0, - "step": 24110 - }, - { - "epoch": 1.8434543265095475, - "grad_norm": 0.0004907556576654315, - "learning_rate": 0.0001999983243872965, - "loss": 46.0, - "step": 24111 - }, - { - "epoch": 1.8435307834929373, - "grad_norm": 0.0007266573375090957, - "learning_rate": 0.00019999832424824477, - "loss": 46.0, - "step": 24112 - }, - { - "epoch": 1.8436072404763268, - "grad_norm": 0.0006670369766652584, - "learning_rate": 0.00019999832410918727, - "loss": 46.0, - "step": 24113 - }, - { - "epoch": 1.8436836974597166, - "grad_norm": 0.0007401605253107846, - "learning_rate": 0.000199998323970124, - "loss": 46.0, - "step": 24114 - }, - { - "epoch": 1.8437601544431064, - "grad_norm": 0.0013634668430313468, - "learning_rate": 0.000199998323831055, - "loss": 46.0, - "step": 24115 - }, - { - "epoch": 1.8438366114264961, - "grad_norm": 0.002419000491499901, - "learning_rate": 0.00019999832369198018, - "loss": 46.0, - "step": 24116 - }, - { - "epoch": 1.843913068409886, - "grad_norm": 0.0018210975686088204, - "learning_rate": 0.0001999983235528996, - "loss": 46.0, - "step": 24117 - }, - { - "epoch": 1.8439895253932757, - "grad_norm": 0.0007208941970020533, - "learning_rate": 0.00019999832341381329, - "loss": 46.0, - "step": 24118 - }, - { - "epoch": 1.8440659823766654, - "grad_norm": 0.0020097719971090555, - "learning_rate": 0.00019999832327472118, - "loss": 46.0, - "step": 24119 - }, - { - "epoch": 1.8441424393600552, - "grad_norm": 0.0013149511069059372, - "learning_rate": 0.00019999832313562327, - "loss": 46.0, - "step": 24120 - }, - { - "epoch": 1.844218896343445, - "grad_norm": 0.0010369336232542992, - "learning_rate": 0.00019999832299651965, - "loss": 46.0, - "step": 24121 - }, - { - "epoch": 1.8442953533268345, - "grad_norm": 0.002007534261792898, - "learning_rate": 0.00019999832285741022, - "loss": 46.0, - "step": 24122 - }, - { - "epoch": 1.8443718103102242, - "grad_norm": 0.00027686450630426407, - "learning_rate": 0.00019999832271829502, - "loss": 46.0, - "step": 24123 - }, - { - "epoch": 1.8444482672936138, - "grad_norm": 0.0005134936654940248, - "learning_rate": 0.00019999832257917407, - "loss": 46.0, - "step": 24124 - }, - { - "epoch": 1.8445247242770035, - "grad_norm": 0.000567734008654952, - "learning_rate": 0.00019999832244004735, - "loss": 46.0, - "step": 24125 - }, - { - "epoch": 1.8446011812603933, - "grad_norm": 0.004502412863075733, - "learning_rate": 0.00019999832230091483, - "loss": 46.0, - "step": 24126 - }, - { - "epoch": 1.844677638243783, - "grad_norm": 0.0007243787404149771, - "learning_rate": 0.0001999983221617766, - "loss": 46.0, - "step": 24127 - }, - { - "epoch": 1.8447540952271728, - "grad_norm": 0.0008244475466199219, - "learning_rate": 0.00019999832202263255, - "loss": 46.0, - "step": 24128 - }, - { - "epoch": 1.8448305522105626, - "grad_norm": 0.002063054358586669, - "learning_rate": 0.00019999832188348276, - "loss": 46.0, - "step": 24129 - }, - { - "epoch": 1.8449070091939523, - "grad_norm": 0.0007605401915498078, - "learning_rate": 0.00019999832174432718, - "loss": 46.0, - "step": 24130 - }, - { - "epoch": 1.844983466177342, - "grad_norm": 0.0018615626031532884, - "learning_rate": 0.00019999832160516584, - "loss": 46.0, - "step": 24131 - }, - { - "epoch": 1.8450599231607316, - "grad_norm": 0.0009881944861263037, - "learning_rate": 0.00019999832146599876, - "loss": 46.0, - "step": 24132 - }, - { - "epoch": 1.8451363801441214, - "grad_norm": 0.000806662195827812, - "learning_rate": 0.00019999832132682586, - "loss": 46.0, - "step": 24133 - }, - { - "epoch": 1.8452128371275112, - "grad_norm": 0.004836688749492168, - "learning_rate": 0.0001999983211876472, - "loss": 46.0, - "step": 24134 - }, - { - "epoch": 1.8452892941109007, - "grad_norm": 0.0004650605551432818, - "learning_rate": 0.00019999832104846278, - "loss": 46.0, - "step": 24135 - }, - { - "epoch": 1.8453657510942905, - "grad_norm": 0.001476981327868998, - "learning_rate": 0.00019999832090927263, - "loss": 46.0, - "step": 24136 - }, - { - "epoch": 1.8454422080776802, - "grad_norm": 0.0005782695370726287, - "learning_rate": 0.00019999832077007666, - "loss": 46.0, - "step": 24137 - }, - { - "epoch": 1.84551866506107, - "grad_norm": 0.0011770921992138028, - "learning_rate": 0.0001999983206308749, - "loss": 46.0, - "step": 24138 - }, - { - "epoch": 1.8455951220444597, - "grad_norm": 0.003743873443454504, - "learning_rate": 0.00019999832049166742, - "loss": 46.0, - "step": 24139 - }, - { - "epoch": 1.8456715790278495, - "grad_norm": 0.0036658523604273796, - "learning_rate": 0.00019999832035245415, - "loss": 46.0, - "step": 24140 - }, - { - "epoch": 1.8457480360112393, - "grad_norm": 0.004497750662267208, - "learning_rate": 0.00019999832021323514, - "loss": 46.0, - "step": 24141 - }, - { - "epoch": 1.845824492994629, - "grad_norm": 0.000493558996822685, - "learning_rate": 0.00019999832007401032, - "loss": 46.0, - "step": 24142 - }, - { - "epoch": 1.8459009499780186, - "grad_norm": 0.0009167492389678955, - "learning_rate": 0.00019999831993477976, - "loss": 46.0, - "step": 24143 - }, - { - "epoch": 1.8459774069614083, - "grad_norm": 0.00026153927319683135, - "learning_rate": 0.00019999831979554343, - "loss": 46.0, - "step": 24144 - }, - { - "epoch": 1.846053863944798, - "grad_norm": 0.0036145898047834635, - "learning_rate": 0.0001999983196563013, - "loss": 46.0, - "step": 24145 - }, - { - "epoch": 1.8461303209281876, - "grad_norm": 0.0012017266126349568, - "learning_rate": 0.00019999831951705342, - "loss": 46.0, - "step": 24146 - }, - { - "epoch": 1.8462067779115774, - "grad_norm": 0.0007951234001666307, - "learning_rate": 0.00019999831937779977, - "loss": 46.0, - "step": 24147 - }, - { - "epoch": 1.8462832348949672, - "grad_norm": 0.0009669052669778466, - "learning_rate": 0.00019999831923854037, - "loss": 46.0, - "step": 24148 - }, - { - "epoch": 1.846359691878357, - "grad_norm": 0.002213160041719675, - "learning_rate": 0.00019999831909927517, - "loss": 46.0, - "step": 24149 - }, - { - "epoch": 1.8464361488617467, - "grad_norm": 0.0007168302545323968, - "learning_rate": 0.0001999983189600042, - "loss": 46.0, - "step": 24150 - }, - { - "epoch": 1.8465126058451364, - "grad_norm": 0.0014908612938597798, - "learning_rate": 0.00019999831882072745, - "loss": 46.0, - "step": 24151 - }, - { - "epoch": 1.8465890628285262, - "grad_norm": 0.0010554497130215168, - "learning_rate": 0.00019999831868144495, - "loss": 46.0, - "step": 24152 - }, - { - "epoch": 1.846665519811916, - "grad_norm": 0.00048272041021846235, - "learning_rate": 0.0001999983185421567, - "loss": 46.0, - "step": 24153 - }, - { - "epoch": 1.8467419767953055, - "grad_norm": 0.0024181148037314415, - "learning_rate": 0.00019999831840286267, - "loss": 46.0, - "step": 24154 - }, - { - "epoch": 1.8468184337786953, - "grad_norm": 0.0017503125127404928, - "learning_rate": 0.0001999983182635629, - "loss": 46.0, - "step": 24155 - }, - { - "epoch": 1.846894890762085, - "grad_norm": 0.0011440166272222996, - "learning_rate": 0.0001999983181242573, - "loss": 46.0, - "step": 24156 - }, - { - "epoch": 1.8469713477454746, - "grad_norm": 0.002768615260720253, - "learning_rate": 0.00019999831798494594, - "loss": 46.0, - "step": 24157 - }, - { - "epoch": 1.8470478047288643, - "grad_norm": 0.0012028460623696446, - "learning_rate": 0.00019999831784562884, - "loss": 46.0, - "step": 24158 - }, - { - "epoch": 1.847124261712254, - "grad_norm": 0.0015924079343676567, - "learning_rate": 0.00019999831770630596, - "loss": 46.0, - "step": 24159 - }, - { - "epoch": 1.8472007186956438, - "grad_norm": 0.0010614608181640506, - "learning_rate": 0.00019999831756697728, - "loss": 46.0, - "step": 24160 - }, - { - "epoch": 1.8472771756790336, - "grad_norm": 0.001744345761835575, - "learning_rate": 0.00019999831742764288, - "loss": 46.0, - "step": 24161 - }, - { - "epoch": 1.8473536326624234, - "grad_norm": 0.0016607313882559538, - "learning_rate": 0.0001999983172883027, - "loss": 46.0, - "step": 24162 - }, - { - "epoch": 1.8474300896458131, - "grad_norm": 0.0005190973752178252, - "learning_rate": 0.00019999831714895673, - "loss": 46.0, - "step": 24163 - }, - { - "epoch": 1.8475065466292029, - "grad_norm": 0.0010341916931793094, - "learning_rate": 0.00019999831700960501, - "loss": 46.0, - "step": 24164 - }, - { - "epoch": 1.8475830036125924, - "grad_norm": 0.0008348671835847199, - "learning_rate": 0.0001999983168702475, - "loss": 46.0, - "step": 24165 - }, - { - "epoch": 1.8476594605959822, - "grad_norm": 0.0004457558679860085, - "learning_rate": 0.00019999831673088423, - "loss": 46.0, - "step": 24166 - }, - { - "epoch": 1.847735917579372, - "grad_norm": 0.0006765160360373557, - "learning_rate": 0.0001999983165915152, - "loss": 46.0, - "step": 24167 - }, - { - "epoch": 1.8478123745627615, - "grad_norm": 0.0006265683914534748, - "learning_rate": 0.0001999983164521404, - "loss": 46.0, - "step": 24168 - }, - { - "epoch": 1.8478888315461512, - "grad_norm": 0.0017007633578032255, - "learning_rate": 0.00019999831631275982, - "loss": 46.0, - "step": 24169 - }, - { - "epoch": 1.847965288529541, - "grad_norm": 0.0015448606573045254, - "learning_rate": 0.00019999831617337348, - "loss": 46.0, - "step": 24170 - }, - { - "epoch": 1.8480417455129308, - "grad_norm": 0.003544918028637767, - "learning_rate": 0.00019999831603398132, - "loss": 46.0, - "step": 24171 - }, - { - "epoch": 1.8481182024963205, - "grad_norm": 0.0008507675374858081, - "learning_rate": 0.00019999831589458347, - "loss": 46.0, - "step": 24172 - }, - { - "epoch": 1.8481946594797103, - "grad_norm": 0.0016558998031541705, - "learning_rate": 0.00019999831575517982, - "loss": 46.0, - "step": 24173 - }, - { - "epoch": 1.8482711164631, - "grad_norm": 0.0016026265220716596, - "learning_rate": 0.0001999983156157704, - "loss": 46.0, - "step": 24174 - }, - { - "epoch": 1.8483475734464898, - "grad_norm": 0.0007121895905584097, - "learning_rate": 0.00019999831547635522, - "loss": 46.0, - "step": 24175 - }, - { - "epoch": 1.8484240304298794, - "grad_norm": 0.0003881090960931033, - "learning_rate": 0.00019999831533693425, - "loss": 46.0, - "step": 24176 - }, - { - "epoch": 1.8485004874132691, - "grad_norm": 0.0013854090357199311, - "learning_rate": 0.0001999983151975075, - "loss": 46.0, - "step": 24177 - }, - { - "epoch": 1.8485769443966589, - "grad_norm": 0.0011741119669750333, - "learning_rate": 0.000199998315058075, - "loss": 46.0, - "step": 24178 - }, - { - "epoch": 1.8486534013800484, - "grad_norm": 0.000811099773272872, - "learning_rate": 0.00019999831491863672, - "loss": 46.0, - "step": 24179 - }, - { - "epoch": 1.8487298583634382, - "grad_norm": 0.00379693484865129, - "learning_rate": 0.0001999983147791927, - "loss": 46.0, - "step": 24180 - }, - { - "epoch": 1.848806315346828, - "grad_norm": 0.0003310217580292374, - "learning_rate": 0.00019999831463974287, - "loss": 46.0, - "step": 24181 - }, - { - "epoch": 1.8488827723302177, - "grad_norm": 0.0012897496344521642, - "learning_rate": 0.0001999983145002873, - "loss": 46.0, - "step": 24182 - }, - { - "epoch": 1.8489592293136075, - "grad_norm": 0.0004151734756305814, - "learning_rate": 0.00019999831436082598, - "loss": 46.0, - "step": 24183 - }, - { - "epoch": 1.8490356862969972, - "grad_norm": 0.002177771646529436, - "learning_rate": 0.00019999831422135884, - "loss": 46.0, - "step": 24184 - }, - { - "epoch": 1.849112143280387, - "grad_norm": 0.0026943094562739134, - "learning_rate": 0.00019999831408188597, - "loss": 46.0, - "step": 24185 - }, - { - "epoch": 1.8491886002637767, - "grad_norm": 0.001724479952827096, - "learning_rate": 0.0001999983139424073, - "loss": 46.0, - "step": 24186 - }, - { - "epoch": 1.8492650572471663, - "grad_norm": 0.0006051899981684983, - "learning_rate": 0.0001999983138029229, - "loss": 46.0, - "step": 24187 - }, - { - "epoch": 1.849341514230556, - "grad_norm": 0.0022523547522723675, - "learning_rate": 0.0001999983136634327, - "loss": 46.0, - "step": 24188 - }, - { - "epoch": 1.8494179712139458, - "grad_norm": 0.0007815954741090536, - "learning_rate": 0.00019999831352393672, - "loss": 46.0, - "step": 24189 - }, - { - "epoch": 1.8494944281973353, - "grad_norm": 0.0035633379593491554, - "learning_rate": 0.00019999831338443497, - "loss": 46.0, - "step": 24190 - }, - { - "epoch": 1.849570885180725, - "grad_norm": 0.0012178485048934817, - "learning_rate": 0.0001999983132449275, - "loss": 46.0, - "step": 24191 - }, - { - "epoch": 1.8496473421641149, - "grad_norm": 0.002476731315255165, - "learning_rate": 0.00019999831310541421, - "loss": 46.0, - "step": 24192 - }, - { - "epoch": 1.8497237991475046, - "grad_norm": 0.0010207825107499957, - "learning_rate": 0.00019999831296589518, - "loss": 46.0, - "step": 24193 - }, - { - "epoch": 1.8498002561308944, - "grad_norm": 0.00046677826321683824, - "learning_rate": 0.00019999831282637036, - "loss": 46.0, - "step": 24194 - }, - { - "epoch": 1.8498767131142841, - "grad_norm": 0.004186040721833706, - "learning_rate": 0.0001999983126868398, - "loss": 46.0, - "step": 24195 - }, - { - "epoch": 1.849953170097674, - "grad_norm": 0.0006600570632144809, - "learning_rate": 0.00019999831254730345, - "loss": 46.0, - "step": 24196 - }, - { - "epoch": 1.8500296270810637, - "grad_norm": 0.0011380210053175688, - "learning_rate": 0.00019999831240776131, - "loss": 46.0, - "step": 24197 - }, - { - "epoch": 1.8501060840644532, - "grad_norm": 0.0014121267013251781, - "learning_rate": 0.0001999983122682134, - "loss": 46.0, - "step": 24198 - }, - { - "epoch": 1.850182541047843, - "grad_norm": 0.002492221537977457, - "learning_rate": 0.00019999831212865976, - "loss": 46.0, - "step": 24199 - }, - { - "epoch": 1.8502589980312327, - "grad_norm": 0.002731338609009981, - "learning_rate": 0.00019999831198910036, - "loss": 46.0, - "step": 24200 - }, - { - "epoch": 1.8503354550146223, - "grad_norm": 0.0006185830570757389, - "learning_rate": 0.00019999831184953516, - "loss": 46.0, - "step": 24201 - }, - { - "epoch": 1.850411911998012, - "grad_norm": 0.0008674424607306719, - "learning_rate": 0.0001999983117099642, - "loss": 46.0, - "step": 24202 - }, - { - "epoch": 1.8504883689814018, - "grad_norm": 0.0005722398054786026, - "learning_rate": 0.00019999831157038744, - "loss": 46.0, - "step": 24203 - }, - { - "epoch": 1.8505648259647915, - "grad_norm": 0.0009735375642776489, - "learning_rate": 0.00019999831143080495, - "loss": 46.0, - "step": 24204 - }, - { - "epoch": 1.8506412829481813, - "grad_norm": 0.002922134706750512, - "learning_rate": 0.00019999831129121666, - "loss": 46.0, - "step": 24205 - }, - { - "epoch": 1.850717739931571, - "grad_norm": 0.0005558782140724361, - "learning_rate": 0.00019999831115162265, - "loss": 46.0, - "step": 24206 - }, - { - "epoch": 1.8507941969149608, - "grad_norm": 0.0017395829781889915, - "learning_rate": 0.0001999983110120228, - "loss": 46.0, - "step": 24207 - }, - { - "epoch": 1.8508706538983506, - "grad_norm": 0.0012845478486269712, - "learning_rate": 0.00019999831087241722, - "loss": 46.0, - "step": 24208 - }, - { - "epoch": 1.8509471108817401, - "grad_norm": 0.0009314960334450006, - "learning_rate": 0.0001999983107328059, - "loss": 46.0, - "step": 24209 - }, - { - "epoch": 1.85102356786513, - "grad_norm": 0.0028973405715078115, - "learning_rate": 0.00019999831059318876, - "loss": 46.0, - "step": 24210 - }, - { - "epoch": 1.8511000248485197, - "grad_norm": 0.01054801419377327, - "learning_rate": 0.00019999831045356588, - "loss": 46.0, - "step": 24211 - }, - { - "epoch": 1.8511764818319092, - "grad_norm": 0.0014662856701761484, - "learning_rate": 0.0001999983103139372, - "loss": 46.0, - "step": 24212 - }, - { - "epoch": 1.851252938815299, - "grad_norm": 0.000963919737841934, - "learning_rate": 0.0001999983101743028, - "loss": 46.0, - "step": 24213 - }, - { - "epoch": 1.8513293957986887, - "grad_norm": 0.0008107098983600736, - "learning_rate": 0.0001999983100346626, - "loss": 46.0, - "step": 24214 - }, - { - "epoch": 1.8514058527820785, - "grad_norm": 0.008049333468079567, - "learning_rate": 0.00019999830989501664, - "loss": 46.0, - "step": 24215 - }, - { - "epoch": 1.8514823097654682, - "grad_norm": 0.0014261500909924507, - "learning_rate": 0.00019999830975536492, - "loss": 46.0, - "step": 24216 - }, - { - "epoch": 1.851558766748858, - "grad_norm": 0.0007155823986977339, - "learning_rate": 0.00019999830961570737, - "loss": 46.0, - "step": 24217 - }, - { - "epoch": 1.8516352237322478, - "grad_norm": 0.0010858505265787244, - "learning_rate": 0.00019999830947604413, - "loss": 46.0, - "step": 24218 - }, - { - "epoch": 1.8517116807156375, - "grad_norm": 0.0005700408946722746, - "learning_rate": 0.0001999983093363751, - "loss": 46.0, - "step": 24219 - }, - { - "epoch": 1.851788137699027, - "grad_norm": 0.0005103947478346527, - "learning_rate": 0.00019999830919670026, - "loss": 46.0, - "step": 24220 - }, - { - "epoch": 1.8518645946824168, - "grad_norm": 0.005461809225380421, - "learning_rate": 0.00019999830905701967, - "loss": 46.0, - "step": 24221 - }, - { - "epoch": 1.8519410516658064, - "grad_norm": 0.0006621723296120763, - "learning_rate": 0.00019999830891733332, - "loss": 46.0, - "step": 24222 - }, - { - "epoch": 1.8520175086491961, - "grad_norm": 0.0006473535322584212, - "learning_rate": 0.0001999983087776412, - "loss": 46.0, - "step": 24223 - }, - { - "epoch": 1.8520939656325859, - "grad_norm": 0.0009456057450734079, - "learning_rate": 0.0001999983086379433, - "loss": 46.0, - "step": 24224 - }, - { - "epoch": 1.8521704226159756, - "grad_norm": 0.0033528481144458055, - "learning_rate": 0.00019999830849823965, - "loss": 46.0, - "step": 24225 - }, - { - "epoch": 1.8522468795993654, - "grad_norm": 0.001613075379282236, - "learning_rate": 0.00019999830835853023, - "loss": 46.0, - "step": 24226 - }, - { - "epoch": 1.8523233365827552, - "grad_norm": 0.0012833367800340056, - "learning_rate": 0.00019999830821881503, - "loss": 46.0, - "step": 24227 - }, - { - "epoch": 1.852399793566145, - "grad_norm": 0.00898823980242014, - "learning_rate": 0.00019999830807909406, - "loss": 46.0, - "step": 24228 - }, - { - "epoch": 1.8524762505495347, - "grad_norm": 0.002006369410082698, - "learning_rate": 0.00019999830793936732, - "loss": 46.0, - "step": 24229 - }, - { - "epoch": 1.8525527075329244, - "grad_norm": 0.0016314416425302625, - "learning_rate": 0.0001999983077996348, - "loss": 46.0, - "step": 24230 - }, - { - "epoch": 1.852629164516314, - "grad_norm": 0.0009637699695304036, - "learning_rate": 0.00019999830765989653, - "loss": 46.0, - "step": 24231 - }, - { - "epoch": 1.8527056214997037, - "grad_norm": 0.0012076377170160413, - "learning_rate": 0.0001999983075201525, - "loss": 46.0, - "step": 24232 - }, - { - "epoch": 1.8527820784830933, - "grad_norm": 0.002940573263913393, - "learning_rate": 0.00019999830738040268, - "loss": 46.0, - "step": 24233 - }, - { - "epoch": 1.852858535466483, - "grad_norm": 0.0006985941436141729, - "learning_rate": 0.0001999983072406471, - "loss": 46.0, - "step": 24234 - }, - { - "epoch": 1.8529349924498728, - "grad_norm": 0.0002745035744737834, - "learning_rate": 0.00019999830710088574, - "loss": 46.0, - "step": 24235 - }, - { - "epoch": 1.8530114494332626, - "grad_norm": 0.004302772227674723, - "learning_rate": 0.0001999983069611186, - "loss": 46.0, - "step": 24236 - }, - { - "epoch": 1.8530879064166523, - "grad_norm": 0.0004975069314241409, - "learning_rate": 0.0001999983068213457, - "loss": 46.0, - "step": 24237 - }, - { - "epoch": 1.853164363400042, - "grad_norm": 0.004065149463713169, - "learning_rate": 0.00019999830668156708, - "loss": 46.0, - "step": 24238 - }, - { - "epoch": 1.8532408203834319, - "grad_norm": 0.0019213083432987332, - "learning_rate": 0.00019999830654178263, - "loss": 46.0, - "step": 24239 - }, - { - "epoch": 1.8533172773668216, - "grad_norm": 0.0006144684157334268, - "learning_rate": 0.00019999830640199243, - "loss": 46.0, - "step": 24240 - }, - { - "epoch": 1.8533937343502114, - "grad_norm": 0.001071637263521552, - "learning_rate": 0.00019999830626219644, - "loss": 46.0, - "step": 24241 - }, - { - "epoch": 1.853470191333601, - "grad_norm": 0.0008036333601921797, - "learning_rate": 0.00019999830612239472, - "loss": 46.0, - "step": 24242 - }, - { - "epoch": 1.8535466483169907, - "grad_norm": 0.0008139242418110371, - "learning_rate": 0.00019999830598258723, - "loss": 46.0, - "step": 24243 - }, - { - "epoch": 1.8536231053003802, - "grad_norm": 0.002035158919170499, - "learning_rate": 0.00019999830584277394, - "loss": 46.0, - "step": 24244 - }, - { - "epoch": 1.85369956228377, - "grad_norm": 0.0006918600993230939, - "learning_rate": 0.00019999830570295487, - "loss": 46.0, - "step": 24245 - }, - { - "epoch": 1.8537760192671597, - "grad_norm": 0.0010036255698651075, - "learning_rate": 0.00019999830556313006, - "loss": 46.0, - "step": 24246 - }, - { - "epoch": 1.8538524762505495, - "grad_norm": 0.005046769976615906, - "learning_rate": 0.0001999983054232995, - "loss": 46.0, - "step": 24247 - }, - { - "epoch": 1.8539289332339393, - "grad_norm": 0.0015095500275492668, - "learning_rate": 0.00019999830528346312, - "loss": 46.0, - "step": 24248 - }, - { - "epoch": 1.854005390217329, - "grad_norm": 0.00347004272043705, - "learning_rate": 0.000199998305143621, - "loss": 46.0, - "step": 24249 - }, - { - "epoch": 1.8540818472007188, - "grad_norm": 0.0014457611832767725, - "learning_rate": 0.0001999983050037731, - "loss": 46.0, - "step": 24250 - }, - { - "epoch": 1.8541583041841085, - "grad_norm": 0.0007619456155225635, - "learning_rate": 0.00019999830486391943, - "loss": 46.0, - "step": 24251 - }, - { - "epoch": 1.8542347611674983, - "grad_norm": 0.008653547614812851, - "learning_rate": 0.00019999830472406004, - "loss": 46.0, - "step": 24252 - }, - { - "epoch": 1.8543112181508878, - "grad_norm": 0.0010393777629360557, - "learning_rate": 0.00019999830458419479, - "loss": 46.0, - "step": 24253 - }, - { - "epoch": 1.8543876751342776, - "grad_norm": 0.0008056523511186242, - "learning_rate": 0.00019999830444432384, - "loss": 46.0, - "step": 24254 - }, - { - "epoch": 1.8544641321176671, - "grad_norm": 0.0012520093005150557, - "learning_rate": 0.0001999983043044471, - "loss": 46.0, - "step": 24255 - }, - { - "epoch": 1.854540589101057, - "grad_norm": 0.0006997457821853459, - "learning_rate": 0.0001999983041645646, - "loss": 46.0, - "step": 24256 - }, - { - "epoch": 1.8546170460844467, - "grad_norm": 0.004774757660925388, - "learning_rate": 0.00019999830402467632, - "loss": 46.0, - "step": 24257 - }, - { - "epoch": 1.8546935030678364, - "grad_norm": 0.0007005062070675194, - "learning_rate": 0.00019999830388478228, - "loss": 46.0, - "step": 24258 - }, - { - "epoch": 1.8547699600512262, - "grad_norm": 0.001984376460313797, - "learning_rate": 0.00019999830374488244, - "loss": 46.0, - "step": 24259 - }, - { - "epoch": 1.854846417034616, - "grad_norm": 0.0004810243262909353, - "learning_rate": 0.00019999830360497686, - "loss": 46.0, - "step": 24260 - }, - { - "epoch": 1.8549228740180057, - "grad_norm": 0.001636349712498486, - "learning_rate": 0.0001999983034650655, - "loss": 46.0, - "step": 24261 - }, - { - "epoch": 1.8549993310013955, - "grad_norm": 0.0006077042198739946, - "learning_rate": 0.00019999830332514837, - "loss": 46.0, - "step": 24262 - }, - { - "epoch": 1.855075787984785, - "grad_norm": 0.002515554428100586, - "learning_rate": 0.00019999830318522547, - "loss": 46.0, - "step": 24263 - }, - { - "epoch": 1.8551522449681748, - "grad_norm": 0.0008373632445000112, - "learning_rate": 0.00019999830304529682, - "loss": 46.0, - "step": 24264 - }, - { - "epoch": 1.8552287019515645, - "grad_norm": 0.0011075373040512204, - "learning_rate": 0.0001999983029053624, - "loss": 46.0, - "step": 24265 - }, - { - "epoch": 1.855305158934954, - "grad_norm": 0.026234447956085205, - "learning_rate": 0.00019999830276542217, - "loss": 46.0, - "step": 24266 - }, - { - "epoch": 1.8553816159183438, - "grad_norm": 0.005558467470109463, - "learning_rate": 0.0001999983026254762, - "loss": 46.0, - "step": 24267 - }, - { - "epoch": 1.8554580729017336, - "grad_norm": 0.0014849754516035318, - "learning_rate": 0.00019999830248552445, - "loss": 46.0, - "step": 24268 - }, - { - "epoch": 1.8555345298851234, - "grad_norm": 0.001033682725392282, - "learning_rate": 0.00019999830234556694, - "loss": 46.0, - "step": 24269 - }, - { - "epoch": 1.8556109868685131, - "grad_norm": 0.0011668866500258446, - "learning_rate": 0.00019999830220560365, - "loss": 46.0, - "step": 24270 - }, - { - "epoch": 1.8556874438519029, - "grad_norm": 0.002063876949250698, - "learning_rate": 0.0001999983020656346, - "loss": 46.0, - "step": 24271 - }, - { - "epoch": 1.8557639008352926, - "grad_norm": 0.0005701023619621992, - "learning_rate": 0.0001999983019256598, - "loss": 46.0, - "step": 24272 - }, - { - "epoch": 1.8558403578186824, - "grad_norm": 0.0007977632340043783, - "learning_rate": 0.0001999983017856792, - "loss": 46.0, - "step": 24273 - }, - { - "epoch": 1.855916814802072, - "grad_norm": 0.0007731782970950007, - "learning_rate": 0.00019999830164569283, - "loss": 46.0, - "step": 24274 - }, - { - "epoch": 1.8559932717854617, - "grad_norm": 0.0006289740558713675, - "learning_rate": 0.0001999983015057007, - "loss": 46.0, - "step": 24275 - }, - { - "epoch": 1.8560697287688515, - "grad_norm": 0.0010380460880696774, - "learning_rate": 0.00019999830136570282, - "loss": 46.0, - "step": 24276 - }, - { - "epoch": 1.856146185752241, - "grad_norm": 0.005266075022518635, - "learning_rate": 0.00019999830122569915, - "loss": 46.0, - "step": 24277 - }, - { - "epoch": 1.8562226427356308, - "grad_norm": 0.0026753107085824013, - "learning_rate": 0.0001999983010856897, - "loss": 46.0, - "step": 24278 - }, - { - "epoch": 1.8562990997190205, - "grad_norm": 0.0013278336264193058, - "learning_rate": 0.0001999983009456745, - "loss": 46.0, - "step": 24279 - }, - { - "epoch": 1.8563755567024103, - "grad_norm": 0.0006706651183776557, - "learning_rate": 0.00019999830080565353, - "loss": 46.0, - "step": 24280 - }, - { - "epoch": 1.8564520136858, - "grad_norm": 0.0008870938327163458, - "learning_rate": 0.0001999983006656268, - "loss": 46.0, - "step": 24281 - }, - { - "epoch": 1.8565284706691898, - "grad_norm": 0.0008552668150514364, - "learning_rate": 0.00019999830052559427, - "loss": 46.0, - "step": 24282 - }, - { - "epoch": 1.8566049276525796, - "grad_norm": 0.0005379029898904264, - "learning_rate": 0.00019999830038555598, - "loss": 46.0, - "step": 24283 - }, - { - "epoch": 1.8566813846359693, - "grad_norm": 0.0010993879986926913, - "learning_rate": 0.00019999830024551194, - "loss": 46.0, - "step": 24284 - }, - { - "epoch": 1.8567578416193589, - "grad_norm": 0.002867314266040921, - "learning_rate": 0.00019999830010546213, - "loss": 46.0, - "step": 24285 - }, - { - "epoch": 1.8568342986027486, - "grad_norm": 0.001601767842657864, - "learning_rate": 0.00019999829996540652, - "loss": 46.0, - "step": 24286 - }, - { - "epoch": 1.8569107555861384, - "grad_norm": 0.005336458329111338, - "learning_rate": 0.00019999829982534517, - "loss": 46.0, - "step": 24287 - }, - { - "epoch": 1.856987212569528, - "grad_norm": 0.0009401165298186243, - "learning_rate": 0.00019999829968527804, - "loss": 46.0, - "step": 24288 - }, - { - "epoch": 1.8570636695529177, - "grad_norm": 0.0017366111278533936, - "learning_rate": 0.00019999829954520513, - "loss": 46.0, - "step": 24289 - }, - { - "epoch": 1.8571401265363074, - "grad_norm": 0.000529967132024467, - "learning_rate": 0.00019999829940512646, - "loss": 46.0, - "step": 24290 - }, - { - "epoch": 1.8572165835196972, - "grad_norm": 0.0022200760431587696, - "learning_rate": 0.00019999829926504203, - "loss": 46.0, - "step": 24291 - }, - { - "epoch": 1.857293040503087, - "grad_norm": 0.0008585151517763734, - "learning_rate": 0.00019999829912495184, - "loss": 46.0, - "step": 24292 - }, - { - "epoch": 1.8573694974864767, - "grad_norm": 0.0011055023642256856, - "learning_rate": 0.00019999829898485584, - "loss": 46.0, - "step": 24293 - }, - { - "epoch": 1.8574459544698665, - "grad_norm": 0.001331742969341576, - "learning_rate": 0.0001999982988447541, - "loss": 46.0, - "step": 24294 - }, - { - "epoch": 1.8575224114532563, - "grad_norm": 0.001066754455678165, - "learning_rate": 0.00019999829870464658, - "loss": 46.0, - "step": 24295 - }, - { - "epoch": 1.8575988684366458, - "grad_norm": 0.0007064155070111156, - "learning_rate": 0.00019999829856453332, - "loss": 46.0, - "step": 24296 - }, - { - "epoch": 1.8576753254200356, - "grad_norm": 0.0017156120156869292, - "learning_rate": 0.00019999829842441425, - "loss": 46.0, - "step": 24297 - }, - { - "epoch": 1.8577517824034253, - "grad_norm": 0.0012110660318285227, - "learning_rate": 0.00019999829828428944, - "loss": 46.0, - "step": 24298 - }, - { - "epoch": 1.8578282393868149, - "grad_norm": 0.0020337607711553574, - "learning_rate": 0.00019999829814415883, - "loss": 46.0, - "step": 24299 - }, - { - "epoch": 1.8579046963702046, - "grad_norm": 0.0006884110043756664, - "learning_rate": 0.00019999829800402248, - "loss": 46.0, - "step": 24300 - }, - { - "epoch": 1.8579811533535944, - "grad_norm": 0.0013276750687509775, - "learning_rate": 0.00019999829786388035, - "loss": 46.0, - "step": 24301 - }, - { - "epoch": 1.8580576103369841, - "grad_norm": 0.0034233753103762865, - "learning_rate": 0.00019999829772373245, - "loss": 46.0, - "step": 24302 - }, - { - "epoch": 1.858134067320374, - "grad_norm": 0.0007153255864977837, - "learning_rate": 0.00019999829758357877, - "loss": 46.0, - "step": 24303 - }, - { - "epoch": 1.8582105243037637, - "grad_norm": 0.002965073101222515, - "learning_rate": 0.00019999829744341935, - "loss": 46.0, - "step": 24304 - }, - { - "epoch": 1.8582869812871534, - "grad_norm": 0.0028012837283313274, - "learning_rate": 0.00019999829730325412, - "loss": 46.0, - "step": 24305 - }, - { - "epoch": 1.8583634382705432, - "grad_norm": 0.00914770271629095, - "learning_rate": 0.00019999829716308315, - "loss": 46.0, - "step": 24306 - }, - { - "epoch": 1.8584398952539327, - "grad_norm": 0.0009376664529554546, - "learning_rate": 0.0001999982970229064, - "loss": 46.0, - "step": 24307 - }, - { - "epoch": 1.8585163522373225, - "grad_norm": 0.001472039963118732, - "learning_rate": 0.00019999829688272387, - "loss": 46.0, - "step": 24308 - }, - { - "epoch": 1.8585928092207122, - "grad_norm": 0.0006524782511405647, - "learning_rate": 0.0001999982967425356, - "loss": 46.0, - "step": 24309 - }, - { - "epoch": 1.8586692662041018, - "grad_norm": 0.000704131496604532, - "learning_rate": 0.00019999829660234152, - "loss": 46.0, - "step": 24310 - }, - { - "epoch": 1.8587457231874915, - "grad_norm": 0.000869977637194097, - "learning_rate": 0.00019999829646214173, - "loss": 46.0, - "step": 24311 - }, - { - "epoch": 1.8588221801708813, - "grad_norm": 0.002646066015586257, - "learning_rate": 0.00019999829632193612, - "loss": 46.0, - "step": 24312 - }, - { - "epoch": 1.858898637154271, - "grad_norm": 0.005296837538480759, - "learning_rate": 0.00019999829618172474, - "loss": 46.0, - "step": 24313 - }, - { - "epoch": 1.8589750941376608, - "grad_norm": 0.0011698000598698854, - "learning_rate": 0.0001999982960415076, - "loss": 46.0, - "step": 24314 - }, - { - "epoch": 1.8590515511210506, - "grad_norm": 0.0007332656532526016, - "learning_rate": 0.0001999982959012847, - "loss": 46.0, - "step": 24315 - }, - { - "epoch": 1.8591280081044403, - "grad_norm": 0.0012183219660073519, - "learning_rate": 0.00019999829576105603, - "loss": 46.0, - "step": 24316 - }, - { - "epoch": 1.85920446508783, - "grad_norm": 0.0010472637368366122, - "learning_rate": 0.0001999982956208216, - "loss": 46.0, - "step": 24317 - }, - { - "epoch": 1.8592809220712196, - "grad_norm": 0.0008507959428243339, - "learning_rate": 0.0001999982954805814, - "loss": 46.0, - "step": 24318 - }, - { - "epoch": 1.8593573790546094, - "grad_norm": 0.0007265129825100303, - "learning_rate": 0.00019999829534033542, - "loss": 46.0, - "step": 24319 - }, - { - "epoch": 1.8594338360379992, - "grad_norm": 0.0006136031006462872, - "learning_rate": 0.00019999829520008368, - "loss": 46.0, - "step": 24320 - }, - { - "epoch": 1.8595102930213887, - "grad_norm": 0.0007494620513170958, - "learning_rate": 0.00019999829505982616, - "loss": 46.0, - "step": 24321 - }, - { - "epoch": 1.8595867500047785, - "grad_norm": 0.0020406614057719707, - "learning_rate": 0.00019999829491956284, - "loss": 46.0, - "step": 24322 - }, - { - "epoch": 1.8596632069881682, - "grad_norm": 0.0006749049643985927, - "learning_rate": 0.0001999982947792938, - "loss": 46.0, - "step": 24323 - }, - { - "epoch": 1.859739663971558, - "grad_norm": 0.0007904137601144612, - "learning_rate": 0.00019999829463901897, - "loss": 46.0, - "step": 24324 - }, - { - "epoch": 1.8598161209549477, - "grad_norm": 0.00148884451482445, - "learning_rate": 0.00019999829449873836, - "loss": 46.0, - "step": 24325 - }, - { - "epoch": 1.8598925779383375, - "grad_norm": 0.0005614770343527198, - "learning_rate": 0.000199998294358452, - "loss": 46.0, - "step": 24326 - }, - { - "epoch": 1.8599690349217273, - "grad_norm": 0.000606716435868293, - "learning_rate": 0.00019999829421815988, - "loss": 46.0, - "step": 24327 - }, - { - "epoch": 1.860045491905117, - "grad_norm": 0.004627090413123369, - "learning_rate": 0.00019999829407786197, - "loss": 46.0, - "step": 24328 - }, - { - "epoch": 1.8601219488885066, - "grad_norm": 0.010523718781769276, - "learning_rate": 0.0001999982939375583, - "loss": 46.0, - "step": 24329 - }, - { - "epoch": 1.8601984058718963, - "grad_norm": 0.0008912984048947692, - "learning_rate": 0.00019999829379724888, - "loss": 46.0, - "step": 24330 - }, - { - "epoch": 1.860274862855286, - "grad_norm": 0.00428125960752368, - "learning_rate": 0.00019999829365693365, - "loss": 46.0, - "step": 24331 - }, - { - "epoch": 1.8603513198386756, - "grad_norm": 0.0007216015364974737, - "learning_rate": 0.00019999829351661269, - "loss": 46.0, - "step": 24332 - }, - { - "epoch": 1.8604277768220654, - "grad_norm": 0.0003870258806273341, - "learning_rate": 0.00019999829337628592, - "loss": 46.0, - "step": 24333 - }, - { - "epoch": 1.8605042338054552, - "grad_norm": 0.0009758063824847341, - "learning_rate": 0.0001999982932359534, - "loss": 46.0, - "step": 24334 - }, - { - "epoch": 1.860580690788845, - "grad_norm": 0.0008671103860251606, - "learning_rate": 0.0001999982930956151, - "loss": 46.0, - "step": 24335 - }, - { - "epoch": 1.8606571477722347, - "grad_norm": 0.0025011859834194183, - "learning_rate": 0.00019999829295527105, - "loss": 46.0, - "step": 24336 - }, - { - "epoch": 1.8607336047556244, - "grad_norm": 0.0011880442034453154, - "learning_rate": 0.00019999829281492124, - "loss": 46.0, - "step": 24337 - }, - { - "epoch": 1.8608100617390142, - "grad_norm": 0.0004758826980832964, - "learning_rate": 0.00019999829267456566, - "loss": 46.0, - "step": 24338 - }, - { - "epoch": 1.860886518722404, - "grad_norm": 0.003106341464444995, - "learning_rate": 0.00019999829253420428, - "loss": 46.0, - "step": 24339 - }, - { - "epoch": 1.8609629757057935, - "grad_norm": 0.0009115770226344466, - "learning_rate": 0.00019999829239383715, - "loss": 46.0, - "step": 24340 - }, - { - "epoch": 1.8610394326891833, - "grad_norm": 0.0018286345293745399, - "learning_rate": 0.00019999829225346422, - "loss": 46.0, - "step": 24341 - }, - { - "epoch": 1.861115889672573, - "grad_norm": 0.0003300034150015563, - "learning_rate": 0.00019999829211308554, - "loss": 46.0, - "step": 24342 - }, - { - "epoch": 1.8611923466559626, - "grad_norm": 0.0007188476738519967, - "learning_rate": 0.0001999982919727011, - "loss": 46.0, - "step": 24343 - }, - { - "epoch": 1.8612688036393523, - "grad_norm": 0.00057169608771801, - "learning_rate": 0.0001999982918323109, - "loss": 46.0, - "step": 24344 - }, - { - "epoch": 1.861345260622742, - "grad_norm": 0.0012310597812756896, - "learning_rate": 0.0001999982916919149, - "loss": 46.0, - "step": 24345 - }, - { - "epoch": 1.8614217176061318, - "grad_norm": 0.0005620619049295783, - "learning_rate": 0.00019999829155151316, - "loss": 46.0, - "step": 24346 - }, - { - "epoch": 1.8614981745895216, - "grad_norm": 0.0016944174421951175, - "learning_rate": 0.00019999829141110565, - "loss": 46.0, - "step": 24347 - }, - { - "epoch": 1.8615746315729114, - "grad_norm": 0.0011592715745791793, - "learning_rate": 0.00019999829127069233, - "loss": 46.0, - "step": 24348 - }, - { - "epoch": 1.8616510885563011, - "grad_norm": 0.000989419175311923, - "learning_rate": 0.0001999982911302733, - "loss": 46.0, - "step": 24349 - }, - { - "epoch": 1.8617275455396909, - "grad_norm": 0.0010174572234973311, - "learning_rate": 0.00019999829098984843, - "loss": 46.0, - "step": 24350 - }, - { - "epoch": 1.8618040025230804, - "grad_norm": 0.001392244128510356, - "learning_rate": 0.00019999829084941782, - "loss": 46.0, - "step": 24351 - }, - { - "epoch": 1.8618804595064702, - "grad_norm": 0.0012236990733072162, - "learning_rate": 0.0001999982907089815, - "loss": 46.0, - "step": 24352 - }, - { - "epoch": 1.86195691648986, - "grad_norm": 0.0009973953710868955, - "learning_rate": 0.00019999829056853937, - "loss": 46.0, - "step": 24353 - }, - { - "epoch": 1.8620333734732495, - "grad_norm": 0.005353773478418589, - "learning_rate": 0.00019999829042809144, - "loss": 46.0, - "step": 24354 - }, - { - "epoch": 1.8621098304566392, - "grad_norm": 0.0007683031726628542, - "learning_rate": 0.00019999829028763774, - "loss": 46.0, - "step": 24355 - }, - { - "epoch": 1.862186287440029, - "grad_norm": 0.0009289855370298028, - "learning_rate": 0.00019999829014717832, - "loss": 46.0, - "step": 24356 - }, - { - "epoch": 1.8622627444234188, - "grad_norm": 0.002057724166661501, - "learning_rate": 0.0001999982900067131, - "loss": 46.0, - "step": 24357 - }, - { - "epoch": 1.8623392014068085, - "grad_norm": 0.0004398580058477819, - "learning_rate": 0.00019999828986624213, - "loss": 46.0, - "step": 24358 - }, - { - "epoch": 1.8624156583901983, - "grad_norm": 0.0012263068929314613, - "learning_rate": 0.00019999828972576538, - "loss": 46.0, - "step": 24359 - }, - { - "epoch": 1.862492115373588, - "grad_norm": 0.0006371165509335697, - "learning_rate": 0.00019999828958528284, - "loss": 46.0, - "step": 24360 - }, - { - "epoch": 1.8625685723569778, - "grad_norm": 0.0007121982634998858, - "learning_rate": 0.00019999828944479455, - "loss": 46.0, - "step": 24361 - }, - { - "epoch": 1.8626450293403674, - "grad_norm": 0.0009636137983761728, - "learning_rate": 0.0001999982893043005, - "loss": 46.0, - "step": 24362 - }, - { - "epoch": 1.8627214863237571, - "grad_norm": 0.009043118916451931, - "learning_rate": 0.00019999828916380066, - "loss": 46.0, - "step": 24363 - }, - { - "epoch": 1.8627979433071467, - "grad_norm": 0.0006459135911427438, - "learning_rate": 0.00019999828902329505, - "loss": 46.0, - "step": 24364 - }, - { - "epoch": 1.8628744002905364, - "grad_norm": 0.0008643659530207515, - "learning_rate": 0.0001999982888827837, - "loss": 46.0, - "step": 24365 - }, - { - "epoch": 1.8629508572739262, - "grad_norm": 0.0010988058056682348, - "learning_rate": 0.00019999828874226654, - "loss": 46.0, - "step": 24366 - }, - { - "epoch": 1.863027314257316, - "grad_norm": 0.0008805750403553247, - "learning_rate": 0.00019999828860174364, - "loss": 46.0, - "step": 24367 - }, - { - "epoch": 1.8631037712407057, - "grad_norm": 0.0027690345887094736, - "learning_rate": 0.00019999828846121496, - "loss": 46.0, - "step": 24368 - }, - { - "epoch": 1.8631802282240955, - "grad_norm": 0.0021754212211817503, - "learning_rate": 0.00019999828832068051, - "loss": 46.0, - "step": 24369 - }, - { - "epoch": 1.8632566852074852, - "grad_norm": 0.0009094441775232553, - "learning_rate": 0.0001999982881801403, - "loss": 46.0, - "step": 24370 - }, - { - "epoch": 1.863333142190875, - "grad_norm": 0.0017631532391533256, - "learning_rate": 0.0001999982880395943, - "loss": 46.0, - "step": 24371 - }, - { - "epoch": 1.8634095991742647, - "grad_norm": 0.0023792963474988937, - "learning_rate": 0.00019999828789904256, - "loss": 46.0, - "step": 24372 - }, - { - "epoch": 1.8634860561576543, - "grad_norm": 0.0008597279665991664, - "learning_rate": 0.00019999828775848504, - "loss": 46.0, - "step": 24373 - }, - { - "epoch": 1.863562513141044, - "grad_norm": 0.0013290847418829799, - "learning_rate": 0.00019999828761792175, - "loss": 46.0, - "step": 24374 - }, - { - "epoch": 1.8636389701244336, - "grad_norm": 0.0005624671466648579, - "learning_rate": 0.00019999828747735266, - "loss": 46.0, - "step": 24375 - }, - { - "epoch": 1.8637154271078233, - "grad_norm": 0.014460711739957333, - "learning_rate": 0.00019999828733677783, - "loss": 46.0, - "step": 24376 - }, - { - "epoch": 1.863791884091213, - "grad_norm": 0.0007604233687743545, - "learning_rate": 0.00019999828719619722, - "loss": 46.0, - "step": 24377 - }, - { - "epoch": 1.8638683410746029, - "grad_norm": 0.0005617126007564366, - "learning_rate": 0.00019999828705561087, - "loss": 46.0, - "step": 24378 - }, - { - "epoch": 1.8639447980579926, - "grad_norm": 0.004724816884845495, - "learning_rate": 0.0001999982869150187, - "loss": 46.0, - "step": 24379 - }, - { - "epoch": 1.8640212550413824, - "grad_norm": 0.002206844510510564, - "learning_rate": 0.0001999982867744208, - "loss": 46.0, - "step": 24380 - }, - { - "epoch": 1.8640977120247721, - "grad_norm": 0.0007577717187814415, - "learning_rate": 0.0001999982866338171, - "loss": 46.0, - "step": 24381 - }, - { - "epoch": 1.864174169008162, - "grad_norm": 0.0020656753331422806, - "learning_rate": 0.0001999982864932077, - "loss": 46.0, - "step": 24382 - }, - { - "epoch": 1.8642506259915517, - "grad_norm": 0.002052735071629286, - "learning_rate": 0.00019999828635259247, - "loss": 46.0, - "step": 24383 - }, - { - "epoch": 1.8643270829749412, - "grad_norm": 0.0010528728598728776, - "learning_rate": 0.00019999828621197147, - "loss": 46.0, - "step": 24384 - }, - { - "epoch": 1.864403539958331, - "grad_norm": 0.0011211491655558348, - "learning_rate": 0.00019999828607134473, - "loss": 46.0, - "step": 24385 - }, - { - "epoch": 1.8644799969417205, - "grad_norm": 0.0009848263580352068, - "learning_rate": 0.0001999982859307122, - "loss": 46.0, - "step": 24386 - }, - { - "epoch": 1.8645564539251103, - "grad_norm": 0.0033407120499759912, - "learning_rate": 0.0001999982857900739, - "loss": 46.0, - "step": 24387 - }, - { - "epoch": 1.8646329109085, - "grad_norm": 0.0009989996906369925, - "learning_rate": 0.00019999828564942984, - "loss": 46.0, - "step": 24388 - }, - { - "epoch": 1.8647093678918898, - "grad_norm": 0.0010883756913244724, - "learning_rate": 0.00019999828550877998, - "loss": 46.0, - "step": 24389 - }, - { - "epoch": 1.8647858248752796, - "grad_norm": 0.004948021844029427, - "learning_rate": 0.0001999982853681244, - "loss": 46.0, - "step": 24390 - }, - { - "epoch": 1.8648622818586693, - "grad_norm": 0.0044440315105021, - "learning_rate": 0.00019999828522746302, - "loss": 46.0, - "step": 24391 - }, - { - "epoch": 1.864938738842059, - "grad_norm": 0.0008561125723645091, - "learning_rate": 0.0001999982850867959, - "loss": 46.0, - "step": 24392 - }, - { - "epoch": 1.8650151958254488, - "grad_norm": 0.0005297197494655848, - "learning_rate": 0.00019999828494612296, - "loss": 46.0, - "step": 24393 - }, - { - "epoch": 1.8650916528088384, - "grad_norm": 0.0011938869720324874, - "learning_rate": 0.0001999982848054443, - "loss": 46.0, - "step": 24394 - }, - { - "epoch": 1.8651681097922281, - "grad_norm": 0.0031873309053480625, - "learning_rate": 0.00019999828466475981, - "loss": 46.0, - "step": 24395 - }, - { - "epoch": 1.865244566775618, - "grad_norm": 0.006199810653924942, - "learning_rate": 0.00019999828452406962, - "loss": 46.0, - "step": 24396 - }, - { - "epoch": 1.8653210237590074, - "grad_norm": 0.0030445787124335766, - "learning_rate": 0.00019999828438337363, - "loss": 46.0, - "step": 24397 - }, - { - "epoch": 1.8653974807423972, - "grad_norm": 0.0008772070286795497, - "learning_rate": 0.00019999828424267186, - "loss": 46.0, - "step": 24398 - }, - { - "epoch": 1.865473937725787, - "grad_norm": 0.0004670654016081244, - "learning_rate": 0.00019999828410196434, - "loss": 46.0, - "step": 24399 - }, - { - "epoch": 1.8655503947091767, - "grad_norm": 0.0005748254479840398, - "learning_rate": 0.00019999828396125103, - "loss": 46.0, - "step": 24400 - }, - { - "epoch": 1.8656268516925665, - "grad_norm": 0.0012402923312038183, - "learning_rate": 0.00019999828382053197, - "loss": 46.0, - "step": 24401 - }, - { - "epoch": 1.8657033086759562, - "grad_norm": 0.002196715911850333, - "learning_rate": 0.00019999828367980716, - "loss": 46.0, - "step": 24402 - }, - { - "epoch": 1.865779765659346, - "grad_norm": 0.0008591005462221801, - "learning_rate": 0.00019999828353907653, - "loss": 46.0, - "step": 24403 - }, - { - "epoch": 1.8658562226427358, - "grad_norm": 0.00043613393791019917, - "learning_rate": 0.00019999828339834017, - "loss": 46.0, - "step": 24404 - }, - { - "epoch": 1.8659326796261253, - "grad_norm": 0.0006122419727034867, - "learning_rate": 0.000199998283257598, - "loss": 46.0, - "step": 24405 - }, - { - "epoch": 1.866009136609515, - "grad_norm": 0.002290830947458744, - "learning_rate": 0.0001999982831168501, - "loss": 46.0, - "step": 24406 - }, - { - "epoch": 1.8660855935929048, - "grad_norm": 0.0012005933094769716, - "learning_rate": 0.00019999828297609642, - "loss": 46.0, - "step": 24407 - }, - { - "epoch": 1.8661620505762944, - "grad_norm": 0.0013345555635169148, - "learning_rate": 0.00019999828283533697, - "loss": 46.0, - "step": 24408 - }, - { - "epoch": 1.8662385075596841, - "grad_norm": 0.0005205682828091085, - "learning_rate": 0.00019999828269457175, - "loss": 46.0, - "step": 24409 - }, - { - "epoch": 1.8663149645430739, - "grad_norm": 0.0007695188978686929, - "learning_rate": 0.00019999828255380076, - "loss": 46.0, - "step": 24410 - }, - { - "epoch": 1.8663914215264636, - "grad_norm": 0.0007895085145719349, - "learning_rate": 0.000199998282413024, - "loss": 46.0, - "step": 24411 - }, - { - "epoch": 1.8664678785098534, - "grad_norm": 0.0009795501828193665, - "learning_rate": 0.00019999828227224145, - "loss": 46.0, - "step": 24412 - }, - { - "epoch": 1.8665443354932432, - "grad_norm": 0.0012096166610717773, - "learning_rate": 0.00019999828213145316, - "loss": 46.0, - "step": 24413 - }, - { - "epoch": 1.866620792476633, - "grad_norm": 0.001128451549448073, - "learning_rate": 0.0001999982819906591, - "loss": 46.0, - "step": 24414 - }, - { - "epoch": 1.8666972494600227, - "grad_norm": 0.00040535206790082157, - "learning_rate": 0.00019999828184985927, - "loss": 46.0, - "step": 24415 - }, - { - "epoch": 1.8667737064434122, - "grad_norm": 0.002225355478003621, - "learning_rate": 0.00019999828170905363, - "loss": 46.0, - "step": 24416 - }, - { - "epoch": 1.866850163426802, - "grad_norm": 0.0013886241940781474, - "learning_rate": 0.00019999828156824228, - "loss": 46.0, - "step": 24417 - }, - { - "epoch": 1.8669266204101918, - "grad_norm": 0.0010280228452757, - "learning_rate": 0.00019999828142742515, - "loss": 46.0, - "step": 24418 - }, - { - "epoch": 1.8670030773935813, - "grad_norm": 0.0005310793640092015, - "learning_rate": 0.0001999982812866022, - "loss": 46.0, - "step": 24419 - }, - { - "epoch": 1.867079534376971, - "grad_norm": 0.0015119437593966722, - "learning_rate": 0.00019999828114577352, - "loss": 46.0, - "step": 24420 - }, - { - "epoch": 1.8671559913603608, - "grad_norm": 0.004198964685201645, - "learning_rate": 0.00019999828100493908, - "loss": 46.0, - "step": 24421 - }, - { - "epoch": 1.8672324483437506, - "grad_norm": 0.0007355962297879159, - "learning_rate": 0.00019999828086409886, - "loss": 46.0, - "step": 24422 - }, - { - "epoch": 1.8673089053271403, - "grad_norm": 0.006976368371397257, - "learning_rate": 0.00019999828072325286, - "loss": 46.0, - "step": 24423 - }, - { - "epoch": 1.86738536231053, - "grad_norm": 0.0012016210239380598, - "learning_rate": 0.0001999982805824011, - "loss": 46.0, - "step": 24424 - }, - { - "epoch": 1.8674618192939199, - "grad_norm": 0.001741177518852055, - "learning_rate": 0.00019999828044154356, - "loss": 46.0, - "step": 24425 - }, - { - "epoch": 1.8675382762773096, - "grad_norm": 0.002907464513555169, - "learning_rate": 0.00019999828030068027, - "loss": 46.0, - "step": 24426 - }, - { - "epoch": 1.8676147332606992, - "grad_norm": 0.0009455534745939076, - "learning_rate": 0.00019999828015981118, - "loss": 46.0, - "step": 24427 - }, - { - "epoch": 1.867691190244089, - "grad_norm": 0.0006432398222386837, - "learning_rate": 0.00019999828001893635, - "loss": 46.0, - "step": 24428 - }, - { - "epoch": 1.8677676472274787, - "grad_norm": 0.006211003288626671, - "learning_rate": 0.00019999827987805574, - "loss": 46.0, - "step": 24429 - }, - { - "epoch": 1.8678441042108682, - "grad_norm": 0.0007333803805522621, - "learning_rate": 0.0001999982797371694, - "loss": 46.0, - "step": 24430 - }, - { - "epoch": 1.867920561194258, - "grad_norm": 0.002615738194435835, - "learning_rate": 0.0001999982795962772, - "loss": 46.0, - "step": 24431 - }, - { - "epoch": 1.8679970181776477, - "grad_norm": 0.0016500019701197743, - "learning_rate": 0.0001999982794553793, - "loss": 46.0, - "step": 24432 - }, - { - "epoch": 1.8680734751610375, - "grad_norm": 0.00065139977959916, - "learning_rate": 0.0001999982793144756, - "loss": 46.0, - "step": 24433 - }, - { - "epoch": 1.8681499321444273, - "grad_norm": 0.0018682519439607859, - "learning_rate": 0.00019999827917356616, - "loss": 46.0, - "step": 24434 - }, - { - "epoch": 1.868226389127817, - "grad_norm": 0.0017565478337928653, - "learning_rate": 0.00019999827903265094, - "loss": 46.0, - "step": 24435 - }, - { - "epoch": 1.8683028461112068, - "grad_norm": 0.00086885632481426, - "learning_rate": 0.00019999827889172992, - "loss": 46.0, - "step": 24436 - }, - { - "epoch": 1.8683793030945965, - "grad_norm": 0.0026381935458630323, - "learning_rate": 0.00019999827875080318, - "loss": 46.0, - "step": 24437 - }, - { - "epoch": 1.868455760077986, - "grad_norm": 0.001720384112559259, - "learning_rate": 0.00019999827860987064, - "loss": 46.0, - "step": 24438 - }, - { - "epoch": 1.8685322170613758, - "grad_norm": 0.0009916112758219242, - "learning_rate": 0.00019999827846893235, - "loss": 46.0, - "step": 24439 - }, - { - "epoch": 1.8686086740447656, - "grad_norm": 0.001400334993377328, - "learning_rate": 0.00019999827832798827, - "loss": 46.0, - "step": 24440 - }, - { - "epoch": 1.8686851310281551, - "grad_norm": 0.00034276899532414973, - "learning_rate": 0.00019999827818703843, - "loss": 46.0, - "step": 24441 - }, - { - "epoch": 1.868761588011545, - "grad_norm": 0.0011658057337626815, - "learning_rate": 0.00019999827804608283, - "loss": 46.0, - "step": 24442 - }, - { - "epoch": 1.8688380449949347, - "grad_norm": 0.010067655704915524, - "learning_rate": 0.00019999827790512145, - "loss": 46.0, - "step": 24443 - }, - { - "epoch": 1.8689145019783244, - "grad_norm": 0.0008100769482553005, - "learning_rate": 0.0001999982777641543, - "loss": 46.0, - "step": 24444 - }, - { - "epoch": 1.8689909589617142, - "grad_norm": 0.0008821483352221549, - "learning_rate": 0.00019999827762318137, - "loss": 46.0, - "step": 24445 - }, - { - "epoch": 1.869067415945104, - "grad_norm": 0.0009331233450211585, - "learning_rate": 0.00019999827748220267, - "loss": 46.0, - "step": 24446 - }, - { - "epoch": 1.8691438729284937, - "grad_norm": 0.0002576448896434158, - "learning_rate": 0.00019999827734121822, - "loss": 46.0, - "step": 24447 - }, - { - "epoch": 1.8692203299118835, - "grad_norm": 0.00041902950033545494, - "learning_rate": 0.000199998277200228, - "loss": 46.0, - "step": 24448 - }, - { - "epoch": 1.869296786895273, - "grad_norm": 0.0009253700845874846, - "learning_rate": 0.000199998277059232, - "loss": 46.0, - "step": 24449 - }, - { - "epoch": 1.8693732438786628, - "grad_norm": 0.0013820132007822394, - "learning_rate": 0.00019999827691823024, - "loss": 46.0, - "step": 24450 - }, - { - "epoch": 1.8694497008620525, - "grad_norm": 0.006285613402724266, - "learning_rate": 0.0001999982767772227, - "loss": 46.0, - "step": 24451 - }, - { - "epoch": 1.869526157845442, - "grad_norm": 0.0009755613282322884, - "learning_rate": 0.00019999827663620941, - "loss": 46.0, - "step": 24452 - }, - { - "epoch": 1.8696026148288318, - "grad_norm": 0.0009945965139195323, - "learning_rate": 0.00019999827649519033, - "loss": 46.0, - "step": 24453 - }, - { - "epoch": 1.8696790718122216, - "grad_norm": 0.005110160913318396, - "learning_rate": 0.00019999827635416547, - "loss": 46.0, - "step": 24454 - }, - { - "epoch": 1.8697555287956114, - "grad_norm": 0.0016918452456593513, - "learning_rate": 0.00019999827621313486, - "loss": 46.0, - "step": 24455 - }, - { - "epoch": 1.8698319857790011, - "grad_norm": 0.0007825566572137177, - "learning_rate": 0.00019999827607209848, - "loss": 46.0, - "step": 24456 - }, - { - "epoch": 1.8699084427623909, - "grad_norm": 0.0004155226342845708, - "learning_rate": 0.00019999827593105636, - "loss": 46.0, - "step": 24457 - }, - { - "epoch": 1.8699848997457806, - "grad_norm": 0.0011150981299579144, - "learning_rate": 0.00019999827579000843, - "loss": 46.0, - "step": 24458 - }, - { - "epoch": 1.8700613567291704, - "grad_norm": 0.004238279070705175, - "learning_rate": 0.00019999827564895473, - "loss": 46.0, - "step": 24459 - }, - { - "epoch": 1.87013781371256, - "grad_norm": 0.0009951271349564195, - "learning_rate": 0.00019999827550789528, - "loss": 46.0, - "step": 24460 - }, - { - "epoch": 1.8702142706959497, - "grad_norm": 0.0006854430539533496, - "learning_rate": 0.00019999827536683004, - "loss": 46.0, - "step": 24461 - }, - { - "epoch": 1.8702907276793395, - "grad_norm": 0.009353235363960266, - "learning_rate": 0.00019999827522575905, - "loss": 46.0, - "step": 24462 - }, - { - "epoch": 1.870367184662729, - "grad_norm": 0.002244858071208, - "learning_rate": 0.00019999827508468228, - "loss": 46.0, - "step": 24463 - }, - { - "epoch": 1.8704436416461188, - "grad_norm": 0.0008229893282987177, - "learning_rate": 0.00019999827494359974, - "loss": 46.0, - "step": 24464 - }, - { - "epoch": 1.8705200986295085, - "grad_norm": 0.002377427415922284, - "learning_rate": 0.00019999827480251145, - "loss": 46.0, - "step": 24465 - }, - { - "epoch": 1.8705965556128983, - "grad_norm": 0.005021687597036362, - "learning_rate": 0.00019999827466141737, - "loss": 46.0, - "step": 24466 - }, - { - "epoch": 1.870673012596288, - "grad_norm": 0.0028901048935949802, - "learning_rate": 0.0001999982745203175, - "loss": 46.0, - "step": 24467 - }, - { - "epoch": 1.8707494695796778, - "grad_norm": 0.0014143723528832197, - "learning_rate": 0.0001999982743792119, - "loss": 46.0, - "step": 24468 - }, - { - "epoch": 1.8708259265630676, - "grad_norm": 0.005924125202000141, - "learning_rate": 0.00019999827423810052, - "loss": 46.0, - "step": 24469 - }, - { - "epoch": 1.8709023835464573, - "grad_norm": 0.0008604479371570051, - "learning_rate": 0.00019999827409698337, - "loss": 46.0, - "step": 24470 - }, - { - "epoch": 1.8709788405298469, - "grad_norm": 0.002774209249764681, - "learning_rate": 0.00019999827395586044, - "loss": 46.0, - "step": 24471 - }, - { - "epoch": 1.8710552975132366, - "grad_norm": 0.0010927956318482757, - "learning_rate": 0.00019999827381473177, - "loss": 46.0, - "step": 24472 - }, - { - "epoch": 1.8711317544966264, - "grad_norm": 0.006643348373472691, - "learning_rate": 0.0001999982736735973, - "loss": 46.0, - "step": 24473 - }, - { - "epoch": 1.871208211480016, - "grad_norm": 0.0009108408121392131, - "learning_rate": 0.00019999827353245708, - "loss": 46.0, - "step": 24474 - }, - { - "epoch": 1.8712846684634057, - "grad_norm": 0.0005059065879322588, - "learning_rate": 0.00019999827339131106, - "loss": 46.0, - "step": 24475 - }, - { - "epoch": 1.8713611254467954, - "grad_norm": 0.0016773603856563568, - "learning_rate": 0.0001999982732501593, - "loss": 46.0, - "step": 24476 - }, - { - "epoch": 1.8714375824301852, - "grad_norm": 0.0010655325604602695, - "learning_rate": 0.00019999827310900175, - "loss": 46.0, - "step": 24477 - }, - { - "epoch": 1.871514039413575, - "grad_norm": 0.002016855403780937, - "learning_rate": 0.00019999827296783847, - "loss": 46.0, - "step": 24478 - }, - { - "epoch": 1.8715904963969647, - "grad_norm": 0.001541712204925716, - "learning_rate": 0.00019999827282666938, - "loss": 46.0, - "step": 24479 - }, - { - "epoch": 1.8716669533803545, - "grad_norm": 0.0042297979816794395, - "learning_rate": 0.00019999827268549452, - "loss": 46.0, - "step": 24480 - }, - { - "epoch": 1.8717434103637443, - "grad_norm": 0.0007102172821760178, - "learning_rate": 0.00019999827254431392, - "loss": 46.0, - "step": 24481 - }, - { - "epoch": 1.8718198673471338, - "grad_norm": 0.0009684275719337165, - "learning_rate": 0.0001999982724031275, - "loss": 46.0, - "step": 24482 - }, - { - "epoch": 1.8718963243305236, - "grad_norm": 0.0009876557160168886, - "learning_rate": 0.0001999982722619354, - "loss": 46.0, - "step": 24483 - }, - { - "epoch": 1.8719727813139133, - "grad_norm": 0.0003435726393945515, - "learning_rate": 0.00019999827212073746, - "loss": 46.0, - "step": 24484 - }, - { - "epoch": 1.8720492382973029, - "grad_norm": 0.0016244461294263601, - "learning_rate": 0.0001999982719795338, - "loss": 46.0, - "step": 24485 - }, - { - "epoch": 1.8721256952806926, - "grad_norm": 0.0007226461893878877, - "learning_rate": 0.0001999982718383243, - "loss": 46.0, - "step": 24486 - }, - { - "epoch": 1.8722021522640824, - "grad_norm": 0.0019329085480421782, - "learning_rate": 0.00019999827169710907, - "loss": 46.0, - "step": 24487 - }, - { - "epoch": 1.8722786092474721, - "grad_norm": 0.0007299327407963574, - "learning_rate": 0.00019999827155588808, - "loss": 46.0, - "step": 24488 - }, - { - "epoch": 1.872355066230862, - "grad_norm": 0.00095442368183285, - "learning_rate": 0.00019999827141466129, - "loss": 46.0, - "step": 24489 - }, - { - "epoch": 1.8724315232142517, - "grad_norm": 0.001932155922986567, - "learning_rate": 0.00019999827127342877, - "loss": 46.0, - "step": 24490 - }, - { - "epoch": 1.8725079801976414, - "grad_norm": 0.0011166568147018552, - "learning_rate": 0.00019999827113219046, - "loss": 46.0, - "step": 24491 - }, - { - "epoch": 1.8725844371810312, - "grad_norm": 0.0005555366515181959, - "learning_rate": 0.0001999982709909464, - "loss": 46.0, - "step": 24492 - }, - { - "epoch": 1.8726608941644207, - "grad_norm": 0.0012485034530982375, - "learning_rate": 0.00019999827084969654, - "loss": 46.0, - "step": 24493 - }, - { - "epoch": 1.8727373511478105, - "grad_norm": 0.0007916319882497191, - "learning_rate": 0.0001999982707084409, - "loss": 46.0, - "step": 24494 - }, - { - "epoch": 1.8728138081312, - "grad_norm": 0.000595214543864131, - "learning_rate": 0.00019999827056717953, - "loss": 46.0, - "step": 24495 - }, - { - "epoch": 1.8728902651145898, - "grad_norm": 0.0010901688365265727, - "learning_rate": 0.00019999827042591238, - "loss": 46.0, - "step": 24496 - }, - { - "epoch": 1.8729667220979795, - "grad_norm": 0.0009604181395843625, - "learning_rate": 0.00019999827028463946, - "loss": 46.0, - "step": 24497 - }, - { - "epoch": 1.8730431790813693, - "grad_norm": 0.004549084696918726, - "learning_rate": 0.00019999827014336076, - "loss": 46.0, - "step": 24498 - }, - { - "epoch": 1.873119636064759, - "grad_norm": 0.0008366979891434312, - "learning_rate": 0.00019999827000207628, - "loss": 46.0, - "step": 24499 - }, - { - "epoch": 1.8731960930481488, - "grad_norm": 0.00029412517324090004, - "learning_rate": 0.00019999826986078604, - "loss": 46.0, - "step": 24500 - }, - { - "epoch": 1.8732725500315386, - "grad_norm": 0.000268264819169417, - "learning_rate": 0.00019999826971949005, - "loss": 46.0, - "step": 24501 - }, - { - "epoch": 1.8733490070149283, - "grad_norm": 0.0014895758358761668, - "learning_rate": 0.00019999826957818828, - "loss": 46.0, - "step": 24502 - }, - { - "epoch": 1.873425463998318, - "grad_norm": 0.00035663298331201077, - "learning_rate": 0.00019999826943688074, - "loss": 46.0, - "step": 24503 - }, - { - "epoch": 1.8735019209817076, - "grad_norm": 0.0011001632083207369, - "learning_rate": 0.00019999826929556743, - "loss": 46.0, - "step": 24504 - }, - { - "epoch": 1.8735783779650974, - "grad_norm": 0.0015001454157754779, - "learning_rate": 0.00019999826915424835, - "loss": 46.0, - "step": 24505 - }, - { - "epoch": 1.873654834948487, - "grad_norm": 0.0006530695827677846, - "learning_rate": 0.0001999982690129235, - "loss": 46.0, - "step": 24506 - }, - { - "epoch": 1.8737312919318767, - "grad_norm": 0.00045370415318757296, - "learning_rate": 0.00019999826887159288, - "loss": 46.0, - "step": 24507 - }, - { - "epoch": 1.8738077489152665, - "grad_norm": 0.00073546904604882, - "learning_rate": 0.00019999826873025648, - "loss": 46.0, - "step": 24508 - }, - { - "epoch": 1.8738842058986562, - "grad_norm": 0.00421450100839138, - "learning_rate": 0.00019999826858891435, - "loss": 46.0, - "step": 24509 - }, - { - "epoch": 1.873960662882046, - "grad_norm": 0.0016301005380228162, - "learning_rate": 0.0001999982684475664, - "loss": 46.0, - "step": 24510 - }, - { - "epoch": 1.8740371198654358, - "grad_norm": 0.0058384365402162075, - "learning_rate": 0.0001999982683062127, - "loss": 46.0, - "step": 24511 - }, - { - "epoch": 1.8741135768488255, - "grad_norm": 0.0005423123948276043, - "learning_rate": 0.00019999826816485326, - "loss": 46.0, - "step": 24512 - }, - { - "epoch": 1.8741900338322153, - "grad_norm": 0.0012862710282206535, - "learning_rate": 0.000199998268023488, - "loss": 46.0, - "step": 24513 - }, - { - "epoch": 1.874266490815605, - "grad_norm": 0.000843945425003767, - "learning_rate": 0.00019999826788211702, - "loss": 46.0, - "step": 24514 - }, - { - "epoch": 1.8743429477989946, - "grad_norm": 0.005360694136470556, - "learning_rate": 0.00019999826774074023, - "loss": 46.0, - "step": 24515 - }, - { - "epoch": 1.8744194047823843, - "grad_norm": 0.0008958461112342775, - "learning_rate": 0.0001999982675993577, - "loss": 46.0, - "step": 24516 - }, - { - "epoch": 1.8744958617657739, - "grad_norm": 0.0011793512385338545, - "learning_rate": 0.00019999826745796938, - "loss": 46.0, - "step": 24517 - }, - { - "epoch": 1.8745723187491636, - "grad_norm": 0.0011013824259862304, - "learning_rate": 0.00019999826731657532, - "loss": 46.0, - "step": 24518 - }, - { - "epoch": 1.8746487757325534, - "grad_norm": 0.0009038368589244783, - "learning_rate": 0.00019999826717517543, - "loss": 46.0, - "step": 24519 - }, - { - "epoch": 1.8747252327159432, - "grad_norm": 0.012815146706998348, - "learning_rate": 0.00019999826703376983, - "loss": 46.0, - "step": 24520 - }, - { - "epoch": 1.874801689699333, - "grad_norm": 0.0026535573415458202, - "learning_rate": 0.00019999826689235843, - "loss": 46.0, - "step": 24521 - }, - { - "epoch": 1.8748781466827227, - "grad_norm": 0.0013110985746607184, - "learning_rate": 0.0001999982667509413, - "loss": 46.0, - "step": 24522 - }, - { - "epoch": 1.8749546036661124, - "grad_norm": 0.0009825148154050112, - "learning_rate": 0.00019999826660951838, - "loss": 46.0, - "step": 24523 - }, - { - "epoch": 1.8750310606495022, - "grad_norm": 0.002428684150800109, - "learning_rate": 0.00019999826646808965, - "loss": 46.0, - "step": 24524 - }, - { - "epoch": 1.8751075176328917, - "grad_norm": 0.0005493476637639105, - "learning_rate": 0.00019999826632665518, - "loss": 46.0, - "step": 24525 - }, - { - "epoch": 1.8751839746162815, - "grad_norm": 0.0004378874145913869, - "learning_rate": 0.00019999826618521496, - "loss": 46.0, - "step": 24526 - }, - { - "epoch": 1.8752604315996713, - "grad_norm": 0.0024309037253260612, - "learning_rate": 0.00019999826604376894, - "loss": 46.0, - "step": 24527 - }, - { - "epoch": 1.8753368885830608, - "grad_norm": 0.009604115039110184, - "learning_rate": 0.00019999826590231718, - "loss": 46.0, - "step": 24528 - }, - { - "epoch": 1.8754133455664506, - "grad_norm": 0.0010836664587259293, - "learning_rate": 0.0001999982657608596, - "loss": 46.0, - "step": 24529 - }, - { - "epoch": 1.8754898025498403, - "grad_norm": 0.0012843436561524868, - "learning_rate": 0.00019999826561939633, - "loss": 46.0, - "step": 24530 - }, - { - "epoch": 1.87556625953323, - "grad_norm": 0.0002768023405224085, - "learning_rate": 0.00019999826547792724, - "loss": 46.0, - "step": 24531 - }, - { - "epoch": 1.8756427165166198, - "grad_norm": 0.0005219127633608878, - "learning_rate": 0.00019999826533645239, - "loss": 46.0, - "step": 24532 - }, - { - "epoch": 1.8757191735000096, - "grad_norm": 0.00045046856394037604, - "learning_rate": 0.00019999826519497175, - "loss": 46.0, - "step": 24533 - }, - { - "epoch": 1.8757956304833994, - "grad_norm": 0.002908071968704462, - "learning_rate": 0.00019999826505348538, - "loss": 46.0, - "step": 24534 - }, - { - "epoch": 1.8758720874667891, - "grad_norm": 0.0007360133458860219, - "learning_rate": 0.0001999982649119932, - "loss": 46.0, - "step": 24535 - }, - { - "epoch": 1.8759485444501787, - "grad_norm": 0.0009785944130271673, - "learning_rate": 0.00019999826477049527, - "loss": 46.0, - "step": 24536 - }, - { - "epoch": 1.8760250014335684, - "grad_norm": 0.0008189226500689983, - "learning_rate": 0.00019999826462899158, - "loss": 46.0, - "step": 24537 - }, - { - "epoch": 1.8761014584169582, - "grad_norm": 0.0006891925004310906, - "learning_rate": 0.0001999982644874821, - "loss": 46.0, - "step": 24538 - }, - { - "epoch": 1.8761779154003477, - "grad_norm": 0.0006073514232411981, - "learning_rate": 0.00019999826434596686, - "loss": 46.0, - "step": 24539 - }, - { - "epoch": 1.8762543723837375, - "grad_norm": 0.0030272388830780983, - "learning_rate": 0.00019999826420444587, - "loss": 46.0, - "step": 24540 - }, - { - "epoch": 1.8763308293671273, - "grad_norm": 0.002854966791346669, - "learning_rate": 0.00019999826406291908, - "loss": 46.0, - "step": 24541 - }, - { - "epoch": 1.876407286350517, - "grad_norm": 0.0007178594823926687, - "learning_rate": 0.00019999826392138654, - "loss": 46.0, - "step": 24542 - }, - { - "epoch": 1.8764837433339068, - "grad_norm": 0.0007962700328789651, - "learning_rate": 0.00019999826377984823, - "loss": 46.0, - "step": 24543 - }, - { - "epoch": 1.8765602003172965, - "grad_norm": 0.0013936894247308373, - "learning_rate": 0.00019999826363830415, - "loss": 46.0, - "step": 24544 - }, - { - "epoch": 1.8766366573006863, - "grad_norm": 0.0007003230857662857, - "learning_rate": 0.0001999982634967543, - "loss": 46.0, - "step": 24545 - }, - { - "epoch": 1.876713114284076, - "grad_norm": 0.000480629998492077, - "learning_rate": 0.00019999826335519866, - "loss": 46.0, - "step": 24546 - }, - { - "epoch": 1.8767895712674656, - "grad_norm": 0.0006380486884154379, - "learning_rate": 0.00019999826321363728, - "loss": 46.0, - "step": 24547 - }, - { - "epoch": 1.8768660282508554, - "grad_norm": 0.000577355211135, - "learning_rate": 0.00019999826307207013, - "loss": 46.0, - "step": 24548 - }, - { - "epoch": 1.8769424852342451, - "grad_norm": 0.005160320084542036, - "learning_rate": 0.0001999982629304972, - "loss": 46.0, - "step": 24549 - }, - { - "epoch": 1.8770189422176347, - "grad_norm": 0.0011860494269058108, - "learning_rate": 0.00019999826278891848, - "loss": 46.0, - "step": 24550 - }, - { - "epoch": 1.8770953992010244, - "grad_norm": 0.0023892447352409363, - "learning_rate": 0.000199998262647334, - "loss": 46.0, - "step": 24551 - }, - { - "epoch": 1.8771718561844142, - "grad_norm": 0.0014236947754397988, - "learning_rate": 0.00019999826250574376, - "loss": 46.0, - "step": 24552 - }, - { - "epoch": 1.877248313167804, - "grad_norm": 0.0006510316743515432, - "learning_rate": 0.00019999826236414775, - "loss": 46.0, - "step": 24553 - }, - { - "epoch": 1.8773247701511937, - "grad_norm": 0.0007313325186260045, - "learning_rate": 0.00019999826222254598, - "loss": 46.0, - "step": 24554 - }, - { - "epoch": 1.8774012271345835, - "grad_norm": 0.0008234083070419729, - "learning_rate": 0.00019999826208093845, - "loss": 46.0, - "step": 24555 - }, - { - "epoch": 1.8774776841179732, - "grad_norm": 0.001224440522491932, - "learning_rate": 0.0001999982619393251, - "loss": 46.0, - "step": 24556 - }, - { - "epoch": 1.877554141101363, - "grad_norm": 0.0005092094652354717, - "learning_rate": 0.00019999826179770605, - "loss": 46.0, - "step": 24557 - }, - { - "epoch": 1.8776305980847525, - "grad_norm": 0.001224069157615304, - "learning_rate": 0.00019999826165608117, - "loss": 46.0, - "step": 24558 - }, - { - "epoch": 1.8777070550681423, - "grad_norm": 0.0006747928564436734, - "learning_rate": 0.00019999826151445056, - "loss": 46.0, - "step": 24559 - }, - { - "epoch": 1.877783512051532, - "grad_norm": 0.0015635519521310925, - "learning_rate": 0.00019999826137281413, - "loss": 46.0, - "step": 24560 - }, - { - "epoch": 1.8778599690349216, - "grad_norm": 0.001057027606293559, - "learning_rate": 0.00019999826123117198, - "loss": 46.0, - "step": 24561 - }, - { - "epoch": 1.8779364260183113, - "grad_norm": 0.0012541123433038592, - "learning_rate": 0.00019999826108952406, - "loss": 46.0, - "step": 24562 - }, - { - "epoch": 1.878012883001701, - "grad_norm": 0.0019493142608553171, - "learning_rate": 0.00019999826094787039, - "loss": 46.0, - "step": 24563 - }, - { - "epoch": 1.8780893399850909, - "grad_norm": 0.010323435999453068, - "learning_rate": 0.0001999982608062109, - "loss": 46.0, - "step": 24564 - }, - { - "epoch": 1.8781657969684806, - "grad_norm": 0.005729845724999905, - "learning_rate": 0.00019999826066454564, - "loss": 46.0, - "step": 24565 - }, - { - "epoch": 1.8782422539518704, - "grad_norm": 0.0028460088651627302, - "learning_rate": 0.00019999826052287465, - "loss": 46.0, - "step": 24566 - }, - { - "epoch": 1.8783187109352601, - "grad_norm": 0.0009143223869614303, - "learning_rate": 0.00019999826038119784, - "loss": 46.0, - "step": 24567 - }, - { - "epoch": 1.87839516791865, - "grad_norm": 0.0013302953448146582, - "learning_rate": 0.0001999982602395153, - "loss": 46.0, - "step": 24568 - }, - { - "epoch": 1.8784716249020394, - "grad_norm": 0.00485475966706872, - "learning_rate": 0.00019999826009782702, - "loss": 46.0, - "step": 24569 - }, - { - "epoch": 1.8785480818854292, - "grad_norm": 0.001227396191097796, - "learning_rate": 0.0001999982599561329, - "loss": 46.0, - "step": 24570 - }, - { - "epoch": 1.878624538868819, - "grad_norm": 0.0036998779978603125, - "learning_rate": 0.00019999825981443302, - "loss": 46.0, - "step": 24571 - }, - { - "epoch": 1.8787009958522085, - "grad_norm": 0.0016569297295063734, - "learning_rate": 0.00019999825967272742, - "loss": 46.0, - "step": 24572 - }, - { - "epoch": 1.8787774528355983, - "grad_norm": 0.0006455841939896345, - "learning_rate": 0.00019999825953101601, - "loss": 46.0, - "step": 24573 - }, - { - "epoch": 1.878853909818988, - "grad_norm": 0.0013256885576993227, - "learning_rate": 0.00019999825938929886, - "loss": 46.0, - "step": 24574 - }, - { - "epoch": 1.8789303668023778, - "grad_norm": 0.0014563980512320995, - "learning_rate": 0.00019999825924757594, - "loss": 46.0, - "step": 24575 - }, - { - "epoch": 1.8790068237857676, - "grad_norm": 0.0011467279400676489, - "learning_rate": 0.00019999825910584722, - "loss": 46.0, - "step": 24576 - }, - { - "epoch": 1.8790832807691573, - "grad_norm": 0.0006850773352198303, - "learning_rate": 0.00019999825896411275, - "loss": 46.0, - "step": 24577 - }, - { - "epoch": 1.879159737752547, - "grad_norm": 0.0009535907302051783, - "learning_rate": 0.0001999982588223725, - "loss": 46.0, - "step": 24578 - }, - { - "epoch": 1.8792361947359368, - "grad_norm": 0.0009148971876129508, - "learning_rate": 0.00019999825868062649, - "loss": 46.0, - "step": 24579 - }, - { - "epoch": 1.8793126517193264, - "grad_norm": 0.001068438752554357, - "learning_rate": 0.0001999982585388747, - "loss": 46.0, - "step": 24580 - }, - { - "epoch": 1.8793891087027161, - "grad_norm": 0.0009263596730306745, - "learning_rate": 0.00019999825839711716, - "loss": 46.0, - "step": 24581 - }, - { - "epoch": 1.879465565686106, - "grad_norm": 0.0009450057987123728, - "learning_rate": 0.00019999825825535385, - "loss": 46.0, - "step": 24582 - }, - { - "epoch": 1.8795420226694954, - "grad_norm": 0.001825155457481742, - "learning_rate": 0.00019999825811358474, - "loss": 46.0, - "step": 24583 - }, - { - "epoch": 1.8796184796528852, - "grad_norm": 0.0005288769607432187, - "learning_rate": 0.00019999825797180988, - "loss": 46.0, - "step": 24584 - }, - { - "epoch": 1.879694936636275, - "grad_norm": 0.0012233048910275102, - "learning_rate": 0.00019999825783002928, - "loss": 46.0, - "step": 24585 - }, - { - "epoch": 1.8797713936196647, - "grad_norm": 0.0010230995249003172, - "learning_rate": 0.00019999825768824287, - "loss": 46.0, - "step": 24586 - }, - { - "epoch": 1.8798478506030545, - "grad_norm": 0.0020094281062483788, - "learning_rate": 0.0001999982575464507, - "loss": 46.0, - "step": 24587 - }, - { - "epoch": 1.8799243075864442, - "grad_norm": 0.0038012228906154633, - "learning_rate": 0.00019999825740465277, - "loss": 46.0, - "step": 24588 - }, - { - "epoch": 1.880000764569834, - "grad_norm": 0.0011561153223738074, - "learning_rate": 0.00019999825726284908, - "loss": 46.0, - "step": 24589 - }, - { - "epoch": 1.8800772215532238, - "grad_norm": 0.0010910233249887824, - "learning_rate": 0.00019999825712103958, - "loss": 46.0, - "step": 24590 - }, - { - "epoch": 1.8801536785366133, - "grad_norm": 0.0016625941498205066, - "learning_rate": 0.00019999825697922434, - "loss": 46.0, - "step": 24591 - }, - { - "epoch": 1.880230135520003, - "grad_norm": 0.0028500158805400133, - "learning_rate": 0.00019999825683740335, - "loss": 46.0, - "step": 24592 - }, - { - "epoch": 1.8803065925033928, - "grad_norm": 0.0029136845842003822, - "learning_rate": 0.00019999825669557656, - "loss": 46.0, - "step": 24593 - }, - { - "epoch": 1.8803830494867824, - "grad_norm": 0.0015776555519551039, - "learning_rate": 0.00019999825655374397, - "loss": 46.0, - "step": 24594 - }, - { - "epoch": 1.8804595064701721, - "grad_norm": 0.0006938427104614675, - "learning_rate": 0.00019999825641190566, - "loss": 46.0, - "step": 24595 - }, - { - "epoch": 1.8805359634535619, - "grad_norm": 0.0027691680006682873, - "learning_rate": 0.00019999825627006157, - "loss": 46.0, - "step": 24596 - }, - { - "epoch": 1.8806124204369516, - "grad_norm": 0.0011147845070809126, - "learning_rate": 0.0001999982561282117, - "loss": 46.0, - "step": 24597 - }, - { - "epoch": 1.8806888774203414, - "grad_norm": 0.001035679248161614, - "learning_rate": 0.0001999982559863561, - "loss": 46.0, - "step": 24598 - }, - { - "epoch": 1.8807653344037312, - "grad_norm": 0.0020652797538787127, - "learning_rate": 0.0001999982558444947, - "loss": 46.0, - "step": 24599 - }, - { - "epoch": 1.880841791387121, - "grad_norm": 0.0007283765589818358, - "learning_rate": 0.0001999982557026275, - "loss": 46.0, - "step": 24600 - }, - { - "epoch": 1.8809182483705107, - "grad_norm": 0.0011391901643946767, - "learning_rate": 0.0001999982555607546, - "loss": 46.0, - "step": 24601 - }, - { - "epoch": 1.8809947053539002, - "grad_norm": 0.0018763246480375528, - "learning_rate": 0.00019999825541887586, - "loss": 46.0, - "step": 24602 - }, - { - "epoch": 1.88107116233729, - "grad_norm": 0.0012015707325190306, - "learning_rate": 0.00019999825527699142, - "loss": 46.0, - "step": 24603 - }, - { - "epoch": 1.8811476193206798, - "grad_norm": 0.001744012231938541, - "learning_rate": 0.00019999825513510115, - "loss": 46.0, - "step": 24604 - }, - { - "epoch": 1.8812240763040693, - "grad_norm": 0.0005731271230615675, - "learning_rate": 0.00019999825499320513, - "loss": 46.0, - "step": 24605 - }, - { - "epoch": 1.881300533287459, - "grad_norm": 0.0017357015749439597, - "learning_rate": 0.00019999825485130334, - "loss": 46.0, - "step": 24606 - }, - { - "epoch": 1.8813769902708488, - "grad_norm": 0.0012829415500164032, - "learning_rate": 0.0001999982547093958, - "loss": 46.0, - "step": 24607 - }, - { - "epoch": 1.8814534472542386, - "grad_norm": 0.00039806694258004427, - "learning_rate": 0.00019999825456748247, - "loss": 46.0, - "step": 24608 - }, - { - "epoch": 1.8815299042376283, - "grad_norm": 0.0012564085191115737, - "learning_rate": 0.00019999825442556339, - "loss": 46.0, - "step": 24609 - }, - { - "epoch": 1.881606361221018, - "grad_norm": 0.0004147278086747974, - "learning_rate": 0.00019999825428363853, - "loss": 46.0, - "step": 24610 - }, - { - "epoch": 1.8816828182044079, - "grad_norm": 0.0023488050792366266, - "learning_rate": 0.0001999982541417079, - "loss": 46.0, - "step": 24611 - }, - { - "epoch": 1.8817592751877976, - "grad_norm": 0.0011952234199270606, - "learning_rate": 0.00019999825399977147, - "loss": 46.0, - "step": 24612 - }, - { - "epoch": 1.8818357321711872, - "grad_norm": 0.0028413129039108753, - "learning_rate": 0.00019999825385782932, - "loss": 46.0, - "step": 24613 - }, - { - "epoch": 1.881912189154577, - "grad_norm": 0.0017588648479431868, - "learning_rate": 0.00019999825371588137, - "loss": 46.0, - "step": 24614 - }, - { - "epoch": 1.8819886461379667, - "grad_norm": 0.0005073270876891911, - "learning_rate": 0.00019999825357392768, - "loss": 46.0, - "step": 24615 - }, - { - "epoch": 1.8820651031213562, - "grad_norm": 0.0005572976660914719, - "learning_rate": 0.0001999982534319682, - "loss": 46.0, - "step": 24616 - }, - { - "epoch": 1.882141560104746, - "grad_norm": 0.0005067386664450169, - "learning_rate": 0.00019999825329000294, - "loss": 46.0, - "step": 24617 - }, - { - "epoch": 1.8822180170881357, - "grad_norm": 0.0006053035031072795, - "learning_rate": 0.00019999825314803192, - "loss": 46.0, - "step": 24618 - }, - { - "epoch": 1.8822944740715255, - "grad_norm": 0.0004883640795014799, - "learning_rate": 0.00019999825300605516, - "loss": 46.0, - "step": 24619 - }, - { - "epoch": 1.8823709310549153, - "grad_norm": 0.0014096918748691678, - "learning_rate": 0.0001999982528640726, - "loss": 46.0, - "step": 24620 - }, - { - "epoch": 1.882447388038305, - "grad_norm": 0.0008274352876469493, - "learning_rate": 0.00019999825272208429, - "loss": 46.0, - "step": 24621 - }, - { - "epoch": 1.8825238450216948, - "grad_norm": 0.001104346476495266, - "learning_rate": 0.00019999825258009018, - "loss": 46.0, - "step": 24622 - }, - { - "epoch": 1.8826003020050845, - "grad_norm": 0.001666387775912881, - "learning_rate": 0.0001999982524380903, - "loss": 46.0, - "step": 24623 - }, - { - "epoch": 1.882676758988474, - "grad_norm": 0.009655337780714035, - "learning_rate": 0.0001999982522960847, - "loss": 46.0, - "step": 24624 - }, - { - "epoch": 1.8827532159718638, - "grad_norm": 0.0042439959943294525, - "learning_rate": 0.00019999825215407326, - "loss": 46.0, - "step": 24625 - }, - { - "epoch": 1.8828296729552534, - "grad_norm": 0.0009557383018545806, - "learning_rate": 0.0001999982520120561, - "loss": 46.0, - "step": 24626 - }, - { - "epoch": 1.8829061299386431, - "grad_norm": 0.0017679401207715273, - "learning_rate": 0.00019999825187003316, - "loss": 46.0, - "step": 24627 - }, - { - "epoch": 1.882982586922033, - "grad_norm": 0.0005287333624437451, - "learning_rate": 0.00019999825172800444, - "loss": 46.0, - "step": 24628 - }, - { - "epoch": 1.8830590439054227, - "grad_norm": 0.001273384434171021, - "learning_rate": 0.00019999825158596997, - "loss": 46.0, - "step": 24629 - }, - { - "epoch": 1.8831355008888124, - "grad_norm": 0.0006854168022982776, - "learning_rate": 0.00019999825144392973, - "loss": 46.0, - "step": 24630 - }, - { - "epoch": 1.8832119578722022, - "grad_norm": 0.0008531357161700726, - "learning_rate": 0.00019999825130188369, - "loss": 46.0, - "step": 24631 - }, - { - "epoch": 1.883288414855592, - "grad_norm": 0.0031230042222887278, - "learning_rate": 0.0001999982511598319, - "loss": 46.0, - "step": 24632 - }, - { - "epoch": 1.8833648718389817, - "grad_norm": 0.0006191385327838361, - "learning_rate": 0.00019999825101777436, - "loss": 46.0, - "step": 24633 - }, - { - "epoch": 1.8834413288223715, - "grad_norm": 0.000485429132822901, - "learning_rate": 0.00019999825087571105, - "loss": 46.0, - "step": 24634 - }, - { - "epoch": 1.883517785805761, - "grad_norm": 0.0030190711840987206, - "learning_rate": 0.00019999825073364194, - "loss": 46.0, - "step": 24635 - }, - { - "epoch": 1.8835942427891508, - "grad_norm": 0.002198781119659543, - "learning_rate": 0.00019999825059156706, - "loss": 46.0, - "step": 24636 - }, - { - "epoch": 1.8836706997725403, - "grad_norm": 0.0005672792322002351, - "learning_rate": 0.00019999825044948643, - "loss": 46.0, - "step": 24637 - }, - { - "epoch": 1.88374715675593, - "grad_norm": 0.001003442332148552, - "learning_rate": 0.00019999825030740006, - "loss": 46.0, - "step": 24638 - }, - { - "epoch": 1.8838236137393198, - "grad_norm": 0.0010664293076843023, - "learning_rate": 0.00019999825016530785, - "loss": 46.0, - "step": 24639 - }, - { - "epoch": 1.8839000707227096, - "grad_norm": 0.002378760138526559, - "learning_rate": 0.0001999982500232099, - "loss": 46.0, - "step": 24640 - }, - { - "epoch": 1.8839765277060994, - "grad_norm": 0.0005571945803239942, - "learning_rate": 0.0001999982498811062, - "loss": 46.0, - "step": 24641 - }, - { - "epoch": 1.8840529846894891, - "grad_norm": 0.0011518622050061822, - "learning_rate": 0.0001999982497389967, - "loss": 46.0, - "step": 24642 - }, - { - "epoch": 1.8841294416728789, - "grad_norm": 0.002100169425830245, - "learning_rate": 0.00019999824959688147, - "loss": 46.0, - "step": 24643 - }, - { - "epoch": 1.8842058986562686, - "grad_norm": 0.0033432957716286182, - "learning_rate": 0.00019999824945476046, - "loss": 46.0, - "step": 24644 - }, - { - "epoch": 1.8842823556396584, - "grad_norm": 0.014557997696101665, - "learning_rate": 0.00019999824931263367, - "loss": 46.0, - "step": 24645 - }, - { - "epoch": 1.884358812623048, - "grad_norm": 0.0005380836082622409, - "learning_rate": 0.0001999982491705011, - "loss": 46.0, - "step": 24646 - }, - { - "epoch": 1.8844352696064377, - "grad_norm": 0.0005122815491631627, - "learning_rate": 0.00019999824902836277, - "loss": 46.0, - "step": 24647 - }, - { - "epoch": 1.8845117265898272, - "grad_norm": 0.0013482756912708282, - "learning_rate": 0.0001999982488862187, - "loss": 46.0, - "step": 24648 - }, - { - "epoch": 1.884588183573217, - "grad_norm": 0.002124207792803645, - "learning_rate": 0.0001999982487440688, - "loss": 46.0, - "step": 24649 - }, - { - "epoch": 1.8846646405566068, - "grad_norm": 0.0017691556131467223, - "learning_rate": 0.00019999824860191318, - "loss": 46.0, - "step": 24650 - }, - { - "epoch": 1.8847410975399965, - "grad_norm": 0.0010154476622119546, - "learning_rate": 0.0001999982484597518, - "loss": 46.0, - "step": 24651 - }, - { - "epoch": 1.8848175545233863, - "grad_norm": 0.0008109635673463345, - "learning_rate": 0.0001999982483175846, - "loss": 46.0, - "step": 24652 - }, - { - "epoch": 1.884894011506776, - "grad_norm": 0.0010708397021517158, - "learning_rate": 0.00019999824817541165, - "loss": 46.0, - "step": 24653 - }, - { - "epoch": 1.8849704684901658, - "grad_norm": 0.007725434843450785, - "learning_rate": 0.00019999824803323296, - "loss": 46.0, - "step": 24654 - }, - { - "epoch": 1.8850469254735556, - "grad_norm": 0.0008287687669508159, - "learning_rate": 0.00019999824789104846, - "loss": 46.0, - "step": 24655 - }, - { - "epoch": 1.885123382456945, - "grad_norm": 0.0011678702430799603, - "learning_rate": 0.00019999824774885822, - "loss": 46.0, - "step": 24656 - }, - { - "epoch": 1.8851998394403349, - "grad_norm": 0.0008198132854886353, - "learning_rate": 0.0001999982476066622, - "loss": 46.0, - "step": 24657 - }, - { - "epoch": 1.8852762964237246, - "grad_norm": 0.004641393199563026, - "learning_rate": 0.0001999982474644604, - "loss": 46.0, - "step": 24658 - }, - { - "epoch": 1.8853527534071142, - "grad_norm": 0.0053079151548445225, - "learning_rate": 0.00019999824732225285, - "loss": 46.0, - "step": 24659 - }, - { - "epoch": 1.885429210390504, - "grad_norm": 0.000767974357586354, - "learning_rate": 0.0001999982471800395, - "loss": 46.0, - "step": 24660 - }, - { - "epoch": 1.8855056673738937, - "grad_norm": 0.0012130244867876172, - "learning_rate": 0.0001999982470378204, - "loss": 46.0, - "step": 24661 - }, - { - "epoch": 1.8855821243572835, - "grad_norm": 0.004133188631385565, - "learning_rate": 0.00019999824689559555, - "loss": 46.0, - "step": 24662 - }, - { - "epoch": 1.8856585813406732, - "grad_norm": 0.002551071112975478, - "learning_rate": 0.0001999982467533649, - "loss": 46.0, - "step": 24663 - }, - { - "epoch": 1.885735038324063, - "grad_norm": 0.001413128338754177, - "learning_rate": 0.0001999982466111285, - "loss": 46.0, - "step": 24664 - }, - { - "epoch": 1.8858114953074527, - "grad_norm": 0.003531447611749172, - "learning_rate": 0.00019999824646888632, - "loss": 46.0, - "step": 24665 - }, - { - "epoch": 1.8858879522908425, - "grad_norm": 0.0005414483603090048, - "learning_rate": 0.00019999824632663837, - "loss": 46.0, - "step": 24666 - }, - { - "epoch": 1.885964409274232, - "grad_norm": 0.0007398679736070335, - "learning_rate": 0.00019999824618438465, - "loss": 46.0, - "step": 24667 - }, - { - "epoch": 1.8860408662576218, - "grad_norm": 0.0009434225503355265, - "learning_rate": 0.00019999824604212516, - "loss": 46.0, - "step": 24668 - }, - { - "epoch": 1.8861173232410116, - "grad_norm": 0.0013627024600282311, - "learning_rate": 0.00019999824589985991, - "loss": 46.0, - "step": 24669 - }, - { - "epoch": 1.886193780224401, - "grad_norm": 0.0005271456320770085, - "learning_rate": 0.0001999982457575889, - "loss": 46.0, - "step": 24670 - }, - { - "epoch": 1.8862702372077909, - "grad_norm": 0.0014919457025825977, - "learning_rate": 0.0001999982456153121, - "loss": 46.0, - "step": 24671 - }, - { - "epoch": 1.8863466941911806, - "grad_norm": 0.0034128958359360695, - "learning_rate": 0.00019999824547302955, - "loss": 46.0, - "step": 24672 - }, - { - "epoch": 1.8864231511745704, - "grad_norm": 0.0010589085286483169, - "learning_rate": 0.00019999824533074121, - "loss": 46.0, - "step": 24673 - }, - { - "epoch": 1.8864996081579601, - "grad_norm": 0.0007168719894252717, - "learning_rate": 0.0001999982451884471, - "loss": 46.0, - "step": 24674 - }, - { - "epoch": 1.88657606514135, - "grad_norm": 0.0012177197495475411, - "learning_rate": 0.00019999824504614722, - "loss": 46.0, - "step": 24675 - }, - { - "epoch": 1.8866525221247397, - "grad_norm": 0.00045947369653731585, - "learning_rate": 0.0001999982449038416, - "loss": 46.0, - "step": 24676 - }, - { - "epoch": 1.8867289791081294, - "grad_norm": 0.0011018654331564903, - "learning_rate": 0.0001999982447615302, - "loss": 46.0, - "step": 24677 - }, - { - "epoch": 1.886805436091519, - "grad_norm": 0.003252594731748104, - "learning_rate": 0.00019999824461921302, - "loss": 46.0, - "step": 24678 - }, - { - "epoch": 1.8868818930749087, - "grad_norm": 0.0006382344872690737, - "learning_rate": 0.00019999824447689007, - "loss": 46.0, - "step": 24679 - }, - { - "epoch": 1.8869583500582985, - "grad_norm": 0.003114924533292651, - "learning_rate": 0.00019999824433456135, - "loss": 46.0, - "step": 24680 - }, - { - "epoch": 1.887034807041688, - "grad_norm": 0.005569116212427616, - "learning_rate": 0.00019999824419222688, - "loss": 46.0, - "step": 24681 - }, - { - "epoch": 1.8871112640250778, - "grad_norm": 0.0013368381187319756, - "learning_rate": 0.00019999824404988661, - "loss": 46.0, - "step": 24682 - }, - { - "epoch": 1.8871877210084675, - "grad_norm": 0.0011965315788984299, - "learning_rate": 0.00019999824390754057, - "loss": 46.0, - "step": 24683 - }, - { - "epoch": 1.8872641779918573, - "grad_norm": 0.0025736431125551462, - "learning_rate": 0.00019999824376518878, - "loss": 46.0, - "step": 24684 - }, - { - "epoch": 1.887340634975247, - "grad_norm": 0.0012781030964106321, - "learning_rate": 0.00019999824362283125, - "loss": 46.0, - "step": 24685 - }, - { - "epoch": 1.8874170919586368, - "grad_norm": 0.0010048523545265198, - "learning_rate": 0.0001999982434804679, - "loss": 46.0, - "step": 24686 - }, - { - "epoch": 1.8874935489420266, - "grad_norm": 0.0029230224899947643, - "learning_rate": 0.00019999824333809878, - "loss": 46.0, - "step": 24687 - }, - { - "epoch": 1.8875700059254163, - "grad_norm": 0.000682733254507184, - "learning_rate": 0.0001999982431957239, - "loss": 46.0, - "step": 24688 - }, - { - "epoch": 1.8876464629088059, - "grad_norm": 0.0018335724016651511, - "learning_rate": 0.00019999824305334327, - "loss": 46.0, - "step": 24689 - }, - { - "epoch": 1.8877229198921956, - "grad_norm": 0.0008607509662397206, - "learning_rate": 0.00019999824291095684, - "loss": 46.0, - "step": 24690 - }, - { - "epoch": 1.8877993768755854, - "grad_norm": 0.0016742546577006578, - "learning_rate": 0.0001999982427685647, - "loss": 46.0, - "step": 24691 - }, - { - "epoch": 1.887875833858975, - "grad_norm": 0.0028891898691654205, - "learning_rate": 0.00019999824262616675, - "loss": 46.0, - "step": 24692 - }, - { - "epoch": 1.8879522908423647, - "grad_norm": 0.002334264572709799, - "learning_rate": 0.00019999824248376303, - "loss": 46.0, - "step": 24693 - }, - { - "epoch": 1.8880287478257545, - "grad_norm": 0.0005821464001201093, - "learning_rate": 0.00019999824234135353, - "loss": 46.0, - "step": 24694 - }, - { - "epoch": 1.8881052048091442, - "grad_norm": 0.0017732614651322365, - "learning_rate": 0.0001999982421989383, - "loss": 46.0, - "step": 24695 - }, - { - "epoch": 1.888181661792534, - "grad_norm": 0.0034232784528285265, - "learning_rate": 0.00019999824205651728, - "loss": 46.0, - "step": 24696 - }, - { - "epoch": 1.8882581187759238, - "grad_norm": 0.0005851819296367466, - "learning_rate": 0.00019999824191409044, - "loss": 46.0, - "step": 24697 - }, - { - "epoch": 1.8883345757593135, - "grad_norm": 0.0005628893850371242, - "learning_rate": 0.0001999982417716579, - "loss": 46.0, - "step": 24698 - }, - { - "epoch": 1.8884110327427033, - "grad_norm": 0.0009058432769961655, - "learning_rate": 0.00019999824162921957, - "loss": 46.0, - "step": 24699 - }, - { - "epoch": 1.8884874897260928, - "grad_norm": 0.0008194043184630573, - "learning_rate": 0.00019999824148677546, - "loss": 46.0, - "step": 24700 - }, - { - "epoch": 1.8885639467094826, - "grad_norm": 0.0009048864594660699, - "learning_rate": 0.00019999824134432558, - "loss": 46.0, - "step": 24701 - }, - { - "epoch": 1.8886404036928723, - "grad_norm": 0.0005796966725029051, - "learning_rate": 0.00019999824120186995, - "loss": 46.0, - "step": 24702 - }, - { - "epoch": 1.8887168606762619, - "grad_norm": 0.002342530293390155, - "learning_rate": 0.00019999824105940852, - "loss": 46.0, - "step": 24703 - }, - { - "epoch": 1.8887933176596516, - "grad_norm": 0.0012555879075080156, - "learning_rate": 0.00019999824091694135, - "loss": 46.0, - "step": 24704 - }, - { - "epoch": 1.8888697746430414, - "grad_norm": 0.0007464020745828748, - "learning_rate": 0.0001999982407744684, - "loss": 46.0, - "step": 24705 - }, - { - "epoch": 1.8889462316264312, - "grad_norm": 0.00080097810132429, - "learning_rate": 0.00019999824063198965, - "loss": 46.0, - "step": 24706 - }, - { - "epoch": 1.889022688609821, - "grad_norm": 0.005033240187913179, - "learning_rate": 0.0001999982404895052, - "loss": 46.0, - "step": 24707 - }, - { - "epoch": 1.8890991455932107, - "grad_norm": 0.0006725105340592563, - "learning_rate": 0.00019999824034701492, - "loss": 46.0, - "step": 24708 - }, - { - "epoch": 1.8891756025766004, - "grad_norm": 0.0007440222543664277, - "learning_rate": 0.0001999982402045189, - "loss": 46.0, - "step": 24709 - }, - { - "epoch": 1.8892520595599902, - "grad_norm": 0.0007157246000133455, - "learning_rate": 0.00019999824006201712, - "loss": 46.0, - "step": 24710 - }, - { - "epoch": 1.8893285165433797, - "grad_norm": 0.0027019151020795107, - "learning_rate": 0.00019999823991950956, - "loss": 46.0, - "step": 24711 - }, - { - "epoch": 1.8894049735267695, - "grad_norm": 0.002709275344386697, - "learning_rate": 0.00019999823977699622, - "loss": 46.0, - "step": 24712 - }, - { - "epoch": 1.8894814305101593, - "grad_norm": 0.0031730926129966974, - "learning_rate": 0.00019999823963447712, - "loss": 46.0, - "step": 24713 - }, - { - "epoch": 1.8895578874935488, - "grad_norm": 0.0017869881121441722, - "learning_rate": 0.00019999823949195224, - "loss": 46.0, - "step": 24714 - }, - { - "epoch": 1.8896343444769386, - "grad_norm": 0.0026095665525645018, - "learning_rate": 0.00019999823934942158, - "loss": 46.0, - "step": 24715 - }, - { - "epoch": 1.8897108014603283, - "grad_norm": 0.0005542807630263269, - "learning_rate": 0.00019999823920688515, - "loss": 46.0, - "step": 24716 - }, - { - "epoch": 1.889787258443718, - "grad_norm": 0.0023922398686408997, - "learning_rate": 0.00019999823906434298, - "loss": 46.0, - "step": 24717 - }, - { - "epoch": 1.8898637154271078, - "grad_norm": 0.0002743404184002429, - "learning_rate": 0.00019999823892179503, - "loss": 46.0, - "step": 24718 - }, - { - "epoch": 1.8899401724104976, - "grad_norm": 0.0017191022634506226, - "learning_rate": 0.0001999982387792413, - "loss": 46.0, - "step": 24719 - }, - { - "epoch": 1.8900166293938874, - "grad_norm": 0.003249636385589838, - "learning_rate": 0.00019999823863668185, - "loss": 46.0, - "step": 24720 - }, - { - "epoch": 1.8900930863772771, - "grad_norm": 0.0009787646122276783, - "learning_rate": 0.00019999823849411655, - "loss": 46.0, - "step": 24721 - }, - { - "epoch": 1.8901695433606667, - "grad_norm": 0.0024778626393526793, - "learning_rate": 0.0001999982383515455, - "loss": 46.0, - "step": 24722 - }, - { - "epoch": 1.8902460003440564, - "grad_norm": 0.001309894840233028, - "learning_rate": 0.00019999823820896872, - "loss": 46.0, - "step": 24723 - }, - { - "epoch": 1.8903224573274462, - "grad_norm": 0.0016520543722435832, - "learning_rate": 0.00019999823806638616, - "loss": 46.0, - "step": 24724 - }, - { - "epoch": 1.8903989143108357, - "grad_norm": 0.001087581622414291, - "learning_rate": 0.00019999823792379783, - "loss": 46.0, - "step": 24725 - }, - { - "epoch": 1.8904753712942255, - "grad_norm": 0.0013746342156082392, - "learning_rate": 0.00019999823778120372, - "loss": 46.0, - "step": 24726 - }, - { - "epoch": 1.8905518282776153, - "grad_norm": 0.0037622456438839436, - "learning_rate": 0.00019999823763860387, - "loss": 46.0, - "step": 24727 - }, - { - "epoch": 1.890628285261005, - "grad_norm": 0.003025029320269823, - "learning_rate": 0.0001999982374959982, - "loss": 46.0, - "step": 24728 - }, - { - "epoch": 1.8907047422443948, - "grad_norm": 0.0031665421556681395, - "learning_rate": 0.0001999982373533868, - "loss": 46.0, - "step": 24729 - }, - { - "epoch": 1.8907811992277845, - "grad_norm": 0.009643260389566422, - "learning_rate": 0.00019999823721076961, - "loss": 46.0, - "step": 24730 - }, - { - "epoch": 1.8908576562111743, - "grad_norm": 0.0002975004317704588, - "learning_rate": 0.00019999823706814664, - "loss": 46.0, - "step": 24731 - }, - { - "epoch": 1.890934113194564, - "grad_norm": 0.026694513857364655, - "learning_rate": 0.00019999823692551792, - "loss": 46.0, - "step": 24732 - }, - { - "epoch": 1.8910105701779536, - "grad_norm": 0.0008916464284993708, - "learning_rate": 0.00019999823678288343, - "loss": 46.0, - "step": 24733 - }, - { - "epoch": 1.8910870271613434, - "grad_norm": 0.0030395861249417067, - "learning_rate": 0.00019999823664024319, - "loss": 46.0, - "step": 24734 - }, - { - "epoch": 1.8911634841447331, - "grad_norm": 0.0012122743064537644, - "learning_rate": 0.00019999823649759715, - "loss": 46.0, - "step": 24735 - }, - { - "epoch": 1.8912399411281227, - "grad_norm": 0.001330947270616889, - "learning_rate": 0.00019999823635494533, - "loss": 46.0, - "step": 24736 - }, - { - "epoch": 1.8913163981115124, - "grad_norm": 0.0003822512226179242, - "learning_rate": 0.00019999823621228777, - "loss": 46.0, - "step": 24737 - }, - { - "epoch": 1.8913928550949022, - "grad_norm": 0.0010991458548232913, - "learning_rate": 0.0001999982360696244, - "loss": 46.0, - "step": 24738 - }, - { - "epoch": 1.891469312078292, - "grad_norm": 0.0005206392961554229, - "learning_rate": 0.00019999823592695533, - "loss": 46.0, - "step": 24739 - }, - { - "epoch": 1.8915457690616817, - "grad_norm": 0.00035012204898521304, - "learning_rate": 0.00019999823578428045, - "loss": 46.0, - "step": 24740 - }, - { - "epoch": 1.8916222260450715, - "grad_norm": 0.001025093370117247, - "learning_rate": 0.0001999982356415998, - "loss": 46.0, - "step": 24741 - }, - { - "epoch": 1.8916986830284612, - "grad_norm": 0.00045775799662806094, - "learning_rate": 0.0001999982354989134, - "loss": 46.0, - "step": 24742 - }, - { - "epoch": 1.891775140011851, - "grad_norm": 0.0010018307948485017, - "learning_rate": 0.0001999982353562212, - "loss": 46.0, - "step": 24743 - }, - { - "epoch": 1.8918515969952405, - "grad_norm": 0.0011945945443585515, - "learning_rate": 0.00019999823521352322, - "loss": 46.0, - "step": 24744 - }, - { - "epoch": 1.8919280539786303, - "grad_norm": 0.0020748113747686148, - "learning_rate": 0.0001999982350708195, - "loss": 46.0, - "step": 24745 - }, - { - "epoch": 1.89200451096202, - "grad_norm": 0.0012052421225234866, - "learning_rate": 0.00019999823492811004, - "loss": 46.0, - "step": 24746 - }, - { - "epoch": 1.8920809679454096, - "grad_norm": 0.0009332199115306139, - "learning_rate": 0.00019999823478539477, - "loss": 46.0, - "step": 24747 - }, - { - "epoch": 1.8921574249287993, - "grad_norm": 0.0007355533307418227, - "learning_rate": 0.00019999823464267373, - "loss": 46.0, - "step": 24748 - }, - { - "epoch": 1.892233881912189, - "grad_norm": 0.0011991269420832396, - "learning_rate": 0.00019999823449994692, - "loss": 46.0, - "step": 24749 - }, - { - "epoch": 1.8923103388955789, - "grad_norm": 0.0010245504090562463, - "learning_rate": 0.00019999823435721433, - "loss": 46.0, - "step": 24750 - }, - { - "epoch": 1.8923867958789686, - "grad_norm": 0.0007938212365843356, - "learning_rate": 0.000199998234214476, - "loss": 46.0, - "step": 24751 - }, - { - "epoch": 1.8924632528623584, - "grad_norm": 0.0010722477454692125, - "learning_rate": 0.0001999982340717319, - "loss": 46.0, - "step": 24752 - }, - { - "epoch": 1.8925397098457482, - "grad_norm": 0.005164948757737875, - "learning_rate": 0.00019999823392898204, - "loss": 46.0, - "step": 24753 - }, - { - "epoch": 1.892616166829138, - "grad_norm": 0.0007275641546584666, - "learning_rate": 0.00019999823378622639, - "loss": 46.0, - "step": 24754 - }, - { - "epoch": 1.8926926238125275, - "grad_norm": 0.0010443564970046282, - "learning_rate": 0.00019999823364346496, - "loss": 46.0, - "step": 24755 - }, - { - "epoch": 1.8927690807959172, - "grad_norm": 0.003957248758524656, - "learning_rate": 0.00019999823350069776, - "loss": 46.0, - "step": 24756 - }, - { - "epoch": 1.8928455377793068, - "grad_norm": 0.0015251325676217675, - "learning_rate": 0.0001999982333579248, - "loss": 46.0, - "step": 24757 - }, - { - "epoch": 1.8929219947626965, - "grad_norm": 0.0004211643827147782, - "learning_rate": 0.0001999982332151461, - "loss": 46.0, - "step": 24758 - }, - { - "epoch": 1.8929984517460863, - "grad_norm": 0.0007466599345207214, - "learning_rate": 0.00019999823307236157, - "loss": 46.0, - "step": 24759 - }, - { - "epoch": 1.893074908729476, - "grad_norm": 0.004767555743455887, - "learning_rate": 0.0001999982329295713, - "loss": 46.0, - "step": 24760 - }, - { - "epoch": 1.8931513657128658, - "grad_norm": 0.010698717087507248, - "learning_rate": 0.00019999823278677527, - "loss": 46.0, - "step": 24761 - }, - { - "epoch": 1.8932278226962556, - "grad_norm": 0.006788797676563263, - "learning_rate": 0.00019999823264397348, - "loss": 46.0, - "step": 24762 - }, - { - "epoch": 1.8933042796796453, - "grad_norm": 0.0018999719759449363, - "learning_rate": 0.00019999823250116592, - "loss": 46.0, - "step": 24763 - }, - { - "epoch": 1.893380736663035, - "grad_norm": 0.0007776694837957621, - "learning_rate": 0.00019999823235835256, - "loss": 46.0, - "step": 24764 - }, - { - "epoch": 1.8934571936464248, - "grad_norm": 0.000996719696559012, - "learning_rate": 0.00019999823221553346, - "loss": 46.0, - "step": 24765 - }, - { - "epoch": 1.8935336506298144, - "grad_norm": 0.0007827046792954206, - "learning_rate": 0.00019999823207270858, - "loss": 46.0, - "step": 24766 - }, - { - "epoch": 1.8936101076132041, - "grad_norm": 0.0022264192812144756, - "learning_rate": 0.00019999823192987792, - "loss": 46.0, - "step": 24767 - }, - { - "epoch": 1.8936865645965937, - "grad_norm": 0.0022582896053791046, - "learning_rate": 0.00019999823178704152, - "loss": 46.0, - "step": 24768 - }, - { - "epoch": 1.8937630215799834, - "grad_norm": 0.002451102714985609, - "learning_rate": 0.00019999823164419932, - "loss": 46.0, - "step": 24769 - }, - { - "epoch": 1.8938394785633732, - "grad_norm": 0.0006675709155388176, - "learning_rate": 0.00019999823150135138, - "loss": 46.0, - "step": 24770 - }, - { - "epoch": 1.893915935546763, - "grad_norm": 0.0007069043931551278, - "learning_rate": 0.00019999823135849763, - "loss": 46.0, - "step": 24771 - }, - { - "epoch": 1.8939923925301527, - "grad_norm": 0.009171134792268276, - "learning_rate": 0.00019999823121563814, - "loss": 46.0, - "step": 24772 - }, - { - "epoch": 1.8940688495135425, - "grad_norm": 0.00439388258382678, - "learning_rate": 0.00019999823107277287, - "loss": 46.0, - "step": 24773 - }, - { - "epoch": 1.8941453064969322, - "grad_norm": 0.0012644353555515409, - "learning_rate": 0.0001999982309299018, - "loss": 46.0, - "step": 24774 - }, - { - "epoch": 1.894221763480322, - "grad_norm": 0.0014597425470128655, - "learning_rate": 0.00019999823078702505, - "loss": 46.0, - "step": 24775 - }, - { - "epoch": 1.8942982204637118, - "grad_norm": 0.0011703589698299766, - "learning_rate": 0.00019999823064414243, - "loss": 46.0, - "step": 24776 - }, - { - "epoch": 1.8943746774471013, - "grad_norm": 0.0012173581635579467, - "learning_rate": 0.00019999823050125413, - "loss": 46.0, - "step": 24777 - }, - { - "epoch": 1.894451134430491, - "grad_norm": 0.0006201710202731192, - "learning_rate": 0.00019999823035836, - "loss": 46.0, - "step": 24778 - }, - { - "epoch": 1.8945275914138806, - "grad_norm": 0.002437945455312729, - "learning_rate": 0.00019999823021546012, - "loss": 46.0, - "step": 24779 - }, - { - "epoch": 1.8946040483972704, - "grad_norm": 0.008216519840061665, - "learning_rate": 0.00019999823007255446, - "loss": 46.0, - "step": 24780 - }, - { - "epoch": 1.8946805053806601, - "grad_norm": 0.0022775973193347454, - "learning_rate": 0.00019999822992964307, - "loss": 46.0, - "step": 24781 - }, - { - "epoch": 1.89475696236405, - "grad_norm": 0.000508844677824527, - "learning_rate": 0.00019999822978672587, - "loss": 46.0, - "step": 24782 - }, - { - "epoch": 1.8948334193474397, - "grad_norm": 0.0006538983434438705, - "learning_rate": 0.0001999982296438029, - "loss": 46.0, - "step": 24783 - }, - { - "epoch": 1.8949098763308294, - "grad_norm": 0.0006354584475047886, - "learning_rate": 0.00019999822950087417, - "loss": 46.0, - "step": 24784 - }, - { - "epoch": 1.8949863333142192, - "grad_norm": 0.0010672869393602014, - "learning_rate": 0.00019999822935793966, - "loss": 46.0, - "step": 24785 - }, - { - "epoch": 1.895062790297609, - "grad_norm": 0.00047534782788716257, - "learning_rate": 0.0001999982292149994, - "loss": 46.0, - "step": 24786 - }, - { - "epoch": 1.8951392472809985, - "grad_norm": 0.002917158417403698, - "learning_rate": 0.00019999822907205338, - "loss": 46.0, - "step": 24787 - }, - { - "epoch": 1.8952157042643882, - "grad_norm": 0.002033337950706482, - "learning_rate": 0.00019999822892910154, - "loss": 46.0, - "step": 24788 - }, - { - "epoch": 1.895292161247778, - "grad_norm": 0.002093485789373517, - "learning_rate": 0.00019999822878614398, - "loss": 46.0, - "step": 24789 - }, - { - "epoch": 1.8953686182311675, - "grad_norm": 0.0015971862012520432, - "learning_rate": 0.00019999822864318062, - "loss": 46.0, - "step": 24790 - }, - { - "epoch": 1.8954450752145573, - "grad_norm": 0.0027375868521630764, - "learning_rate": 0.00019999822850021151, - "loss": 46.0, - "step": 24791 - }, - { - "epoch": 1.895521532197947, - "grad_norm": 0.0007920638890936971, - "learning_rate": 0.00019999822835723664, - "loss": 46.0, - "step": 24792 - }, - { - "epoch": 1.8955979891813368, - "grad_norm": 0.0016711171483621001, - "learning_rate": 0.00019999822821425598, - "loss": 46.0, - "step": 24793 - }, - { - "epoch": 1.8956744461647266, - "grad_norm": 0.0028647526632994413, - "learning_rate": 0.00019999822807126956, - "loss": 46.0, - "step": 24794 - }, - { - "epoch": 1.8957509031481163, - "grad_norm": 0.0002937439421657473, - "learning_rate": 0.00019999822792827736, - "loss": 46.0, - "step": 24795 - }, - { - "epoch": 1.895827360131506, - "grad_norm": 0.0005791856092400849, - "learning_rate": 0.00019999822778527939, - "loss": 46.0, - "step": 24796 - }, - { - "epoch": 1.8959038171148959, - "grad_norm": 0.0011031260946765542, - "learning_rate": 0.00019999822764227567, - "loss": 46.0, - "step": 24797 - }, - { - "epoch": 1.8959802740982854, - "grad_norm": 0.0009347690502181649, - "learning_rate": 0.00019999822749926618, - "loss": 46.0, - "step": 24798 - }, - { - "epoch": 1.8960567310816752, - "grad_norm": 0.00110166787635535, - "learning_rate": 0.00019999822735625088, - "loss": 46.0, - "step": 24799 - }, - { - "epoch": 1.896133188065065, - "grad_norm": 0.0014626196352764964, - "learning_rate": 0.00019999822721322987, - "loss": 46.0, - "step": 24800 - }, - { - "epoch": 1.8962096450484545, - "grad_norm": 0.00030768782016821206, - "learning_rate": 0.00019999822707020306, - "loss": 46.0, - "step": 24801 - }, - { - "epoch": 1.8962861020318442, - "grad_norm": 0.001377628417685628, - "learning_rate": 0.00019999822692717047, - "loss": 46.0, - "step": 24802 - }, - { - "epoch": 1.896362559015234, - "grad_norm": 0.0005198445869609714, - "learning_rate": 0.00019999822678413214, - "loss": 46.0, - "step": 24803 - }, - { - "epoch": 1.8964390159986237, - "grad_norm": 0.0016490245470777154, - "learning_rate": 0.00019999822664108804, - "loss": 46.0, - "step": 24804 - }, - { - "epoch": 1.8965154729820135, - "grad_norm": 0.0014037376968190074, - "learning_rate": 0.00019999822649803813, - "loss": 46.0, - "step": 24805 - }, - { - "epoch": 1.8965919299654033, - "grad_norm": 0.0014297586167231202, - "learning_rate": 0.00019999822635498248, - "loss": 46.0, - "step": 24806 - }, - { - "epoch": 1.896668386948793, - "grad_norm": 0.0008142938604578376, - "learning_rate": 0.00019999822621192105, - "loss": 46.0, - "step": 24807 - }, - { - "epoch": 1.8967448439321828, - "grad_norm": 0.005264353007078171, - "learning_rate": 0.00019999822606885385, - "loss": 46.0, - "step": 24808 - }, - { - "epoch": 1.8968213009155723, - "grad_norm": 0.0009005211759358644, - "learning_rate": 0.00019999822592578088, - "loss": 46.0, - "step": 24809 - }, - { - "epoch": 1.896897757898962, - "grad_norm": 0.0019074570154771209, - "learning_rate": 0.00019999822578270216, - "loss": 46.0, - "step": 24810 - }, - { - "epoch": 1.8969742148823519, - "grad_norm": 0.0023443959653377533, - "learning_rate": 0.00019999822563961765, - "loss": 46.0, - "step": 24811 - }, - { - "epoch": 1.8970506718657414, - "grad_norm": 0.0054117850959300995, - "learning_rate": 0.0001999982254965274, - "loss": 46.0, - "step": 24812 - }, - { - "epoch": 1.8971271288491312, - "grad_norm": 0.0011071175104007125, - "learning_rate": 0.00019999822535343134, - "loss": 46.0, - "step": 24813 - }, - { - "epoch": 1.897203585832521, - "grad_norm": 0.0024850473273545504, - "learning_rate": 0.00019999822521032953, - "loss": 46.0, - "step": 24814 - }, - { - "epoch": 1.8972800428159107, - "grad_norm": 0.0013730566715821624, - "learning_rate": 0.00019999822506722197, - "loss": 46.0, - "step": 24815 - }, - { - "epoch": 1.8973564997993004, - "grad_norm": 0.0004893143195658922, - "learning_rate": 0.0001999982249241086, - "loss": 46.0, - "step": 24816 - }, - { - "epoch": 1.8974329567826902, - "grad_norm": 0.00259625562466681, - "learning_rate": 0.0001999982247809895, - "loss": 46.0, - "step": 24817 - }, - { - "epoch": 1.89750941376608, - "grad_norm": 0.001510382047854364, - "learning_rate": 0.00019999822463786463, - "loss": 46.0, - "step": 24818 - }, - { - "epoch": 1.8975858707494697, - "grad_norm": 0.0018947870703414083, - "learning_rate": 0.00019999822449473395, - "loss": 46.0, - "step": 24819 - }, - { - "epoch": 1.8976623277328593, - "grad_norm": 0.013857456855475903, - "learning_rate": 0.00019999822435159753, - "loss": 46.0, - "step": 24820 - }, - { - "epoch": 1.897738784716249, - "grad_norm": 0.001011537853628397, - "learning_rate": 0.00019999822420845533, - "loss": 46.0, - "step": 24821 - }, - { - "epoch": 1.8978152416996388, - "grad_norm": 0.0015456390101462603, - "learning_rate": 0.00019999822406530738, - "loss": 46.0, - "step": 24822 - }, - { - "epoch": 1.8978916986830283, - "grad_norm": 0.0004182908160146326, - "learning_rate": 0.0001999982239221536, - "loss": 46.0, - "step": 24823 - }, - { - "epoch": 1.897968155666418, - "grad_norm": 0.0005073556094430387, - "learning_rate": 0.00019999822377899412, - "loss": 46.0, - "step": 24824 - }, - { - "epoch": 1.8980446126498078, - "grad_norm": 0.0011597982374951243, - "learning_rate": 0.00019999822363582885, - "loss": 46.0, - "step": 24825 - }, - { - "epoch": 1.8981210696331976, - "grad_norm": 0.0008622303139418364, - "learning_rate": 0.0001999982234926578, - "loss": 46.0, - "step": 24826 - }, - { - "epoch": 1.8981975266165874, - "grad_norm": 0.0008540943963453174, - "learning_rate": 0.000199998223349481, - "loss": 46.0, - "step": 24827 - }, - { - "epoch": 1.8982739835999771, - "grad_norm": 0.000912796298507601, - "learning_rate": 0.00019999822320629842, - "loss": 46.0, - "step": 24828 - }, - { - "epoch": 1.8983504405833669, - "grad_norm": 0.004008203744888306, - "learning_rate": 0.00019999822306311006, - "loss": 46.0, - "step": 24829 - }, - { - "epoch": 1.8984268975667566, - "grad_norm": 0.0013046471867710352, - "learning_rate": 0.00019999822291991596, - "loss": 46.0, - "step": 24830 - }, - { - "epoch": 1.8985033545501462, - "grad_norm": 0.0005455836653709412, - "learning_rate": 0.00019999822277671605, - "loss": 46.0, - "step": 24831 - }, - { - "epoch": 1.898579811533536, - "grad_norm": 0.0008767587132751942, - "learning_rate": 0.0001999982226335104, - "loss": 46.0, - "step": 24832 - }, - { - "epoch": 1.8986562685169257, - "grad_norm": 0.000601877982262522, - "learning_rate": 0.00019999822249029898, - "loss": 46.0, - "step": 24833 - }, - { - "epoch": 1.8987327255003152, - "grad_norm": 0.0016630530590191483, - "learning_rate": 0.00019999822234708178, - "loss": 46.0, - "step": 24834 - }, - { - "epoch": 1.898809182483705, - "grad_norm": 0.003101180773228407, - "learning_rate": 0.00019999822220385883, - "loss": 46.0, - "step": 24835 - }, - { - "epoch": 1.8988856394670948, - "grad_norm": 0.0003369378100614995, - "learning_rate": 0.0001999982220606301, - "loss": 46.0, - "step": 24836 - }, - { - "epoch": 1.8989620964504845, - "grad_norm": 0.0007574019255116582, - "learning_rate": 0.00019999822191739557, - "loss": 46.0, - "step": 24837 - }, - { - "epoch": 1.8990385534338743, - "grad_norm": 0.0008945493609644473, - "learning_rate": 0.0001999982217741553, - "loss": 46.0, - "step": 24838 - }, - { - "epoch": 1.899115010417264, - "grad_norm": 0.0010623873677104712, - "learning_rate": 0.00019999822163090927, - "loss": 46.0, - "step": 24839 - }, - { - "epoch": 1.8991914674006538, - "grad_norm": 0.0005498065147548914, - "learning_rate": 0.00019999822148765746, - "loss": 46.0, - "step": 24840 - }, - { - "epoch": 1.8992679243840436, - "grad_norm": 0.0017968276515603065, - "learning_rate": 0.00019999822134439987, - "loss": 46.0, - "step": 24841 - }, - { - "epoch": 1.899344381367433, - "grad_norm": 0.000995611073449254, - "learning_rate": 0.00019999822120113651, - "loss": 46.0, - "step": 24842 - }, - { - "epoch": 1.8994208383508229, - "grad_norm": 0.001208151807077229, - "learning_rate": 0.00019999822105786738, - "loss": 46.0, - "step": 24843 - }, - { - "epoch": 1.8994972953342126, - "grad_norm": 0.0004636195953935385, - "learning_rate": 0.00019999822091459248, - "loss": 46.0, - "step": 24844 - }, - { - "epoch": 1.8995737523176022, - "grad_norm": 0.0022059523034840822, - "learning_rate": 0.00019999822077131185, - "loss": 46.0, - "step": 24845 - }, - { - "epoch": 1.899650209300992, - "grad_norm": 0.0005472211632877588, - "learning_rate": 0.0001999982206280254, - "loss": 46.0, - "step": 24846 - }, - { - "epoch": 1.8997266662843817, - "grad_norm": 0.0030993276741355658, - "learning_rate": 0.00019999822048473323, - "loss": 46.0, - "step": 24847 - }, - { - "epoch": 1.8998031232677715, - "grad_norm": 0.0005370528087951243, - "learning_rate": 0.00019999822034143523, - "loss": 46.0, - "step": 24848 - }, - { - "epoch": 1.8998795802511612, - "grad_norm": 0.0009516928112134337, - "learning_rate": 0.0001999982201981315, - "loss": 46.0, - "step": 24849 - }, - { - "epoch": 1.899956037234551, - "grad_norm": 0.0011598489945754409, - "learning_rate": 0.000199998220054822, - "loss": 46.0, - "step": 24850 - }, - { - "epoch": 1.9000324942179407, - "grad_norm": 0.0034063542261719704, - "learning_rate": 0.00019999821991150673, - "loss": 46.0, - "step": 24851 - }, - { - "epoch": 1.9001089512013305, - "grad_norm": 0.0018962441245093942, - "learning_rate": 0.0001999982197681857, - "loss": 46.0, - "step": 24852 - }, - { - "epoch": 1.90018540818472, - "grad_norm": 0.0010442601051181555, - "learning_rate": 0.00019999821962485889, - "loss": 46.0, - "step": 24853 - }, - { - "epoch": 1.9002618651681098, - "grad_norm": 0.0017621165607124567, - "learning_rate": 0.00019999821948152627, - "loss": 46.0, - "step": 24854 - }, - { - "epoch": 1.9003383221514996, - "grad_norm": 0.0021643179934471846, - "learning_rate": 0.00019999821933818792, - "loss": 46.0, - "step": 24855 - }, - { - "epoch": 1.900414779134889, - "grad_norm": 0.0007741773733869195, - "learning_rate": 0.0001999982191948438, - "loss": 46.0, - "step": 24856 - }, - { - "epoch": 1.9004912361182789, - "grad_norm": 0.0004004188522230834, - "learning_rate": 0.0001999982190514939, - "loss": 46.0, - "step": 24857 - }, - { - "epoch": 1.9005676931016686, - "grad_norm": 0.00094946613535285, - "learning_rate": 0.00019999821890813823, - "loss": 46.0, - "step": 24858 - }, - { - "epoch": 1.9006441500850584, - "grad_norm": 0.0006203845259733498, - "learning_rate": 0.00019999821876477683, - "loss": 46.0, - "step": 24859 - }, - { - "epoch": 1.9007206070684481, - "grad_norm": 0.004983421880751848, - "learning_rate": 0.0001999982186214096, - "loss": 46.0, - "step": 24860 - }, - { - "epoch": 1.900797064051838, - "grad_norm": 0.0010092385346069932, - "learning_rate": 0.00019999821847803664, - "loss": 46.0, - "step": 24861 - }, - { - "epoch": 1.9008735210352277, - "grad_norm": 0.0009766437578946352, - "learning_rate": 0.0001999982183346579, - "loss": 46.0, - "step": 24862 - }, - { - "epoch": 1.9009499780186174, - "grad_norm": 0.0025472999550402164, - "learning_rate": 0.0001999982181912734, - "loss": 46.0, - "step": 24863 - }, - { - "epoch": 1.901026435002007, - "grad_norm": 0.0005673071136698127, - "learning_rate": 0.0001999982180478831, - "loss": 46.0, - "step": 24864 - }, - { - "epoch": 1.9011028919853967, - "grad_norm": 0.0073205516673624516, - "learning_rate": 0.00019999821790448708, - "loss": 46.0, - "step": 24865 - }, - { - "epoch": 1.9011793489687865, - "grad_norm": 0.0008029459859244525, - "learning_rate": 0.00019999821776108524, - "loss": 46.0, - "step": 24866 - }, - { - "epoch": 1.901255805952176, - "grad_norm": 0.0008055150974541903, - "learning_rate": 0.00019999821761767768, - "loss": 46.0, - "step": 24867 - }, - { - "epoch": 1.9013322629355658, - "grad_norm": 0.00026805861853063107, - "learning_rate": 0.00019999821747426432, - "loss": 46.0, - "step": 24868 - }, - { - "epoch": 1.9014087199189555, - "grad_norm": 0.0007115594926290214, - "learning_rate": 0.0001999982173308452, - "loss": 46.0, - "step": 24869 - }, - { - "epoch": 1.9014851769023453, - "grad_norm": 0.0013798220315948129, - "learning_rate": 0.00019999821718742032, - "loss": 46.0, - "step": 24870 - }, - { - "epoch": 1.901561633885735, - "grad_norm": 0.0007382621406577528, - "learning_rate": 0.00019999821704398964, - "loss": 46.0, - "step": 24871 - }, - { - "epoch": 1.9016380908691248, - "grad_norm": 0.0003800530394073576, - "learning_rate": 0.00019999821690055321, - "loss": 46.0, - "step": 24872 - }, - { - "epoch": 1.9017145478525146, - "grad_norm": 0.0007542367675341666, - "learning_rate": 0.00019999821675711102, - "loss": 46.0, - "step": 24873 - }, - { - "epoch": 1.9017910048359044, - "grad_norm": 0.0026554998476058245, - "learning_rate": 0.00019999821661366305, - "loss": 46.0, - "step": 24874 - }, - { - "epoch": 1.901867461819294, - "grad_norm": 0.000755745975766331, - "learning_rate": 0.0001999982164702093, - "loss": 46.0, - "step": 24875 - }, - { - "epoch": 1.9019439188026837, - "grad_norm": 0.0009823732543736696, - "learning_rate": 0.00019999821632674979, - "loss": 46.0, - "step": 24876 - }, - { - "epoch": 1.9020203757860734, - "grad_norm": 0.005809125956147909, - "learning_rate": 0.00019999821618328452, - "loss": 46.0, - "step": 24877 - }, - { - "epoch": 1.902096832769463, - "grad_norm": 0.0006884276517666876, - "learning_rate": 0.00019999821603981346, - "loss": 46.0, - "step": 24878 - }, - { - "epoch": 1.9021732897528527, - "grad_norm": 0.0004453766450751573, - "learning_rate": 0.00019999821589633665, - "loss": 46.0, - "step": 24879 - }, - { - "epoch": 1.9022497467362425, - "grad_norm": 0.0009323851554654539, - "learning_rate": 0.00019999821575285406, - "loss": 46.0, - "step": 24880 - }, - { - "epoch": 1.9023262037196322, - "grad_norm": 0.0009870842332020402, - "learning_rate": 0.00019999821560936568, - "loss": 46.0, - "step": 24881 - }, - { - "epoch": 1.902402660703022, - "grad_norm": 0.002390071516856551, - "learning_rate": 0.00019999821546587158, - "loss": 46.0, - "step": 24882 - }, - { - "epoch": 1.9024791176864118, - "grad_norm": 0.010729173198342323, - "learning_rate": 0.00019999821532237167, - "loss": 46.0, - "step": 24883 - }, - { - "epoch": 1.9025555746698015, - "grad_norm": 0.0007198489038273692, - "learning_rate": 0.00019999821517886602, - "loss": 46.0, - "step": 24884 - }, - { - "epoch": 1.9026320316531913, - "grad_norm": 0.0005699815810658038, - "learning_rate": 0.00019999821503535457, - "loss": 46.0, - "step": 24885 - }, - { - "epoch": 1.9027084886365808, - "grad_norm": 0.003231907030567527, - "learning_rate": 0.00019999821489183738, - "loss": 46.0, - "step": 24886 - }, - { - "epoch": 1.9027849456199706, - "grad_norm": 0.0007182261324487627, - "learning_rate": 0.0001999982147483144, - "loss": 46.0, - "step": 24887 - }, - { - "epoch": 1.9028614026033601, - "grad_norm": 0.0015613920986652374, - "learning_rate": 0.00019999821460478566, - "loss": 46.0, - "step": 24888 - }, - { - "epoch": 1.9029378595867499, - "grad_norm": 0.0016119254287332296, - "learning_rate": 0.00019999821446125114, - "loss": 46.0, - "step": 24889 - }, - { - "epoch": 1.9030143165701396, - "grad_norm": 0.0007292053196579218, - "learning_rate": 0.00019999821431771085, - "loss": 46.0, - "step": 24890 - }, - { - "epoch": 1.9030907735535294, - "grad_norm": 0.004297601990401745, - "learning_rate": 0.00019999821417416482, - "loss": 46.0, - "step": 24891 - }, - { - "epoch": 1.9031672305369192, - "grad_norm": 0.0006740153767168522, - "learning_rate": 0.000199998214030613, - "loss": 46.0, - "step": 24892 - }, - { - "epoch": 1.903243687520309, - "grad_norm": 0.0009984076023101807, - "learning_rate": 0.00019999821388705542, - "loss": 46.0, - "step": 24893 - }, - { - "epoch": 1.9033201445036987, - "grad_norm": 0.0015975938877090812, - "learning_rate": 0.00019999821374349207, - "loss": 46.0, - "step": 24894 - }, - { - "epoch": 1.9033966014870884, - "grad_norm": 0.001951965969055891, - "learning_rate": 0.00019999821359992294, - "loss": 46.0, - "step": 24895 - }, - { - "epoch": 1.9034730584704782, - "grad_norm": 0.004002948757261038, - "learning_rate": 0.00019999821345634803, - "loss": 46.0, - "step": 24896 - }, - { - "epoch": 1.9035495154538677, - "grad_norm": 0.0011364936362951994, - "learning_rate": 0.00019999821331276736, - "loss": 46.0, - "step": 24897 - }, - { - "epoch": 1.9036259724372575, - "grad_norm": 0.0010299276327714324, - "learning_rate": 0.00019999821316918093, - "loss": 46.0, - "step": 24898 - }, - { - "epoch": 1.903702429420647, - "grad_norm": 0.0002644795167725533, - "learning_rate": 0.0001999982130255887, - "loss": 46.0, - "step": 24899 - }, - { - "epoch": 1.9037788864040368, - "grad_norm": 0.008624094538390636, - "learning_rate": 0.00019999821288199074, - "loss": 46.0, - "step": 24900 - }, - { - "epoch": 1.9038553433874266, - "grad_norm": 0.0006809115293435752, - "learning_rate": 0.00019999821273838702, - "loss": 46.0, - "step": 24901 - }, - { - "epoch": 1.9039318003708163, - "grad_norm": 0.000670730834826827, - "learning_rate": 0.0001999982125947775, - "loss": 46.0, - "step": 24902 - }, - { - "epoch": 1.904008257354206, - "grad_norm": 0.0007364463526755571, - "learning_rate": 0.00019999821245116222, - "loss": 46.0, - "step": 24903 - }, - { - "epoch": 1.9040847143375959, - "grad_norm": 0.0006315343198366463, - "learning_rate": 0.00019999821230754115, - "loss": 46.0, - "step": 24904 - }, - { - "epoch": 1.9041611713209856, - "grad_norm": 0.0012751728063449264, - "learning_rate": 0.00019999821216391434, - "loss": 46.0, - "step": 24905 - }, - { - "epoch": 1.9042376283043754, - "grad_norm": 0.002455499954521656, - "learning_rate": 0.00019999821202028176, - "loss": 46.0, - "step": 24906 - }, - { - "epoch": 1.9043140852877651, - "grad_norm": 0.0006889952928759158, - "learning_rate": 0.00019999821187664338, - "loss": 46.0, - "step": 24907 - }, - { - "epoch": 1.9043905422711547, - "grad_norm": 0.0014112438075244427, - "learning_rate": 0.00019999821173299928, - "loss": 46.0, - "step": 24908 - }, - { - "epoch": 1.9044669992545444, - "grad_norm": 0.0009608845575712621, - "learning_rate": 0.00019999821158934937, - "loss": 46.0, - "step": 24909 - }, - { - "epoch": 1.904543456237934, - "grad_norm": 0.001845090533606708, - "learning_rate": 0.0001999982114456937, - "loss": 46.0, - "step": 24910 - }, - { - "epoch": 1.9046199132213237, - "grad_norm": 0.002541648456826806, - "learning_rate": 0.00019999821130203227, - "loss": 46.0, - "step": 24911 - }, - { - "epoch": 1.9046963702047135, - "grad_norm": 0.0008550227503292263, - "learning_rate": 0.00019999821115836505, - "loss": 46.0, - "step": 24912 - }, - { - "epoch": 1.9047728271881033, - "grad_norm": 0.0008201056625694036, - "learning_rate": 0.00019999821101469208, - "loss": 46.0, - "step": 24913 - }, - { - "epoch": 1.904849284171493, - "grad_norm": 0.001776192570105195, - "learning_rate": 0.00019999821087101334, - "loss": 46.0, - "step": 24914 - }, - { - "epoch": 1.9049257411548828, - "grad_norm": 0.00551563547924161, - "learning_rate": 0.00019999821072732882, - "loss": 46.0, - "step": 24915 - }, - { - "epoch": 1.9050021981382725, - "grad_norm": 0.0006977103766985238, - "learning_rate": 0.00019999821058363853, - "loss": 46.0, - "step": 24916 - }, - { - "epoch": 1.9050786551216623, - "grad_norm": 0.0027677600737661123, - "learning_rate": 0.0001999982104399425, - "loss": 46.0, - "step": 24917 - }, - { - "epoch": 1.9051551121050518, - "grad_norm": 0.0011622656602412462, - "learning_rate": 0.00019999821029624066, - "loss": 46.0, - "step": 24918 - }, - { - "epoch": 1.9052315690884416, - "grad_norm": 0.0006694824551232159, - "learning_rate": 0.00019999821015253305, - "loss": 46.0, - "step": 24919 - }, - { - "epoch": 1.9053080260718314, - "grad_norm": 0.007385769858956337, - "learning_rate": 0.00019999821000881972, - "loss": 46.0, - "step": 24920 - }, - { - "epoch": 1.905384483055221, - "grad_norm": 0.001618484384380281, - "learning_rate": 0.0001999982098651006, - "loss": 46.0, - "step": 24921 - }, - { - "epoch": 1.9054609400386107, - "grad_norm": 0.0033515943214297295, - "learning_rate": 0.0001999982097213757, - "loss": 46.0, - "step": 24922 - }, - { - "epoch": 1.9055373970220004, - "grad_norm": 0.001488825073465705, - "learning_rate": 0.000199998209577645, - "loss": 46.0, - "step": 24923 - }, - { - "epoch": 1.9056138540053902, - "grad_norm": 0.0005886798608116806, - "learning_rate": 0.00019999820943390856, - "loss": 46.0, - "step": 24924 - }, - { - "epoch": 1.90569031098878, - "grad_norm": 0.0011335702147334814, - "learning_rate": 0.00019999820929016634, - "loss": 46.0, - "step": 24925 - }, - { - "epoch": 1.9057667679721697, - "grad_norm": 0.0007771881064400077, - "learning_rate": 0.0001999982091464184, - "loss": 46.0, - "step": 24926 - }, - { - "epoch": 1.9058432249555595, - "grad_norm": 0.0014857404166832566, - "learning_rate": 0.00019999820900266463, - "loss": 46.0, - "step": 24927 - }, - { - "epoch": 1.9059196819389492, - "grad_norm": 0.0017242380417883396, - "learning_rate": 0.0001999982088589051, - "loss": 46.0, - "step": 24928 - }, - { - "epoch": 1.9059961389223388, - "grad_norm": 0.0013279590057209134, - "learning_rate": 0.00019999820871513982, - "loss": 46.0, - "step": 24929 - }, - { - "epoch": 1.9060725959057285, - "grad_norm": 0.0020307907834649086, - "learning_rate": 0.00019999820857136879, - "loss": 46.0, - "step": 24930 - }, - { - "epoch": 1.9061490528891183, - "grad_norm": 0.0022239654790610075, - "learning_rate": 0.00019999820842759195, - "loss": 46.0, - "step": 24931 - }, - { - "epoch": 1.9062255098725078, - "grad_norm": 0.0012013082159683108, - "learning_rate": 0.00019999820828380934, - "loss": 46.0, - "step": 24932 - }, - { - "epoch": 1.9063019668558976, - "grad_norm": 0.0005729928961955011, - "learning_rate": 0.00019999820814002098, - "loss": 46.0, - "step": 24933 - }, - { - "epoch": 1.9063784238392874, - "grad_norm": 0.000442824064521119, - "learning_rate": 0.00019999820799622686, - "loss": 46.0, - "step": 24934 - }, - { - "epoch": 1.9064548808226771, - "grad_norm": 0.00823877565562725, - "learning_rate": 0.00019999820785242698, - "loss": 46.0, - "step": 24935 - }, - { - "epoch": 1.9065313378060669, - "grad_norm": 0.002614240162074566, - "learning_rate": 0.0001999982077086213, - "loss": 46.0, - "step": 24936 - }, - { - "epoch": 1.9066077947894566, - "grad_norm": 0.0010755376424640417, - "learning_rate": 0.00019999820756480988, - "loss": 46.0, - "step": 24937 - }, - { - "epoch": 1.9066842517728464, - "grad_norm": 0.0008267826633527875, - "learning_rate": 0.00019999820742099263, - "loss": 46.0, - "step": 24938 - }, - { - "epoch": 1.9067607087562362, - "grad_norm": 0.001741600688546896, - "learning_rate": 0.00019999820727716966, - "loss": 46.0, - "step": 24939 - }, - { - "epoch": 1.9068371657396257, - "grad_norm": 0.0007187746232375503, - "learning_rate": 0.00019999820713334092, - "loss": 46.0, - "step": 24940 - }, - { - "epoch": 1.9069136227230155, - "grad_norm": 0.0011117152171209455, - "learning_rate": 0.00019999820698950643, - "loss": 46.0, - "step": 24941 - }, - { - "epoch": 1.9069900797064052, - "grad_norm": 0.003406405681744218, - "learning_rate": 0.00019999820684566612, - "loss": 46.0, - "step": 24942 - }, - { - "epoch": 1.9070665366897948, - "grad_norm": 0.004850083030760288, - "learning_rate": 0.00019999820670182005, - "loss": 46.0, - "step": 24943 - }, - { - "epoch": 1.9071429936731845, - "grad_norm": 0.0006139572360552847, - "learning_rate": 0.00019999820655796824, - "loss": 46.0, - "step": 24944 - }, - { - "epoch": 1.9072194506565743, - "grad_norm": 0.002164687728509307, - "learning_rate": 0.00019999820641411064, - "loss": 46.0, - "step": 24945 - }, - { - "epoch": 1.907295907639964, - "grad_norm": 0.0012175774900242686, - "learning_rate": 0.00019999820627024728, - "loss": 46.0, - "step": 24946 - }, - { - "epoch": 1.9073723646233538, - "grad_norm": 0.007076655514538288, - "learning_rate": 0.00019999820612637815, - "loss": 46.0, - "step": 24947 - }, - { - "epoch": 1.9074488216067436, - "grad_norm": 0.005158160347491503, - "learning_rate": 0.00019999820598250322, - "loss": 46.0, - "step": 24948 - }, - { - "epoch": 1.9075252785901333, - "grad_norm": 0.0009839009726420045, - "learning_rate": 0.0001999982058386226, - "loss": 46.0, - "step": 24949 - }, - { - "epoch": 1.907601735573523, - "grad_norm": 0.001031207968480885, - "learning_rate": 0.00019999820569473612, - "loss": 46.0, - "step": 24950 - }, - { - "epoch": 1.9076781925569126, - "grad_norm": 0.001690936041995883, - "learning_rate": 0.00019999820555084393, - "loss": 46.0, - "step": 24951 - }, - { - "epoch": 1.9077546495403024, - "grad_norm": 0.0012634992599487305, - "learning_rate": 0.00019999820540694593, - "loss": 46.0, - "step": 24952 - }, - { - "epoch": 1.9078311065236921, - "grad_norm": 0.000994466245174408, - "learning_rate": 0.0001999982052630422, - "loss": 46.0, - "step": 24953 - }, - { - "epoch": 1.9079075635070817, - "grad_norm": 0.0007749787764623761, - "learning_rate": 0.00019999820511913268, - "loss": 46.0, - "step": 24954 - }, - { - "epoch": 1.9079840204904714, - "grad_norm": 0.001949117286130786, - "learning_rate": 0.00019999820497521736, - "loss": 46.0, - "step": 24955 - }, - { - "epoch": 1.9080604774738612, - "grad_norm": 0.0007108062272891402, - "learning_rate": 0.00019999820483129633, - "loss": 46.0, - "step": 24956 - }, - { - "epoch": 1.908136934457251, - "grad_norm": 0.0007981735398061574, - "learning_rate": 0.0001999982046873695, - "loss": 46.0, - "step": 24957 - }, - { - "epoch": 1.9082133914406407, - "grad_norm": 0.0016814186237752438, - "learning_rate": 0.0001999982045434369, - "loss": 46.0, - "step": 24958 - }, - { - "epoch": 1.9082898484240305, - "grad_norm": 0.0019186069257557392, - "learning_rate": 0.00019999820439949853, - "loss": 46.0, - "step": 24959 - }, - { - "epoch": 1.9083663054074202, - "grad_norm": 0.0014015876222401857, - "learning_rate": 0.0001999982042555544, - "loss": 46.0, - "step": 24960 - }, - { - "epoch": 1.90844276239081, - "grad_norm": 0.0008946965681388974, - "learning_rate": 0.0001999982041116045, - "loss": 46.0, - "step": 24961 - }, - { - "epoch": 1.9085192193741995, - "grad_norm": 0.001257650787010789, - "learning_rate": 0.00019999820396764882, - "loss": 46.0, - "step": 24962 - }, - { - "epoch": 1.9085956763575893, - "grad_norm": 0.0005987566546536982, - "learning_rate": 0.0001999982038236874, - "loss": 46.0, - "step": 24963 - }, - { - "epoch": 1.908672133340979, - "grad_norm": 0.0006889775977469981, - "learning_rate": 0.00019999820367972015, - "loss": 46.0, - "step": 24964 - }, - { - "epoch": 1.9087485903243686, - "grad_norm": 0.010618265718221664, - "learning_rate": 0.00019999820353574718, - "loss": 46.0, - "step": 24965 - }, - { - "epoch": 1.9088250473077584, - "grad_norm": 0.0033406633883714676, - "learning_rate": 0.00019999820339176844, - "loss": 46.0, - "step": 24966 - }, - { - "epoch": 1.9089015042911481, - "grad_norm": 0.002791493898257613, - "learning_rate": 0.00019999820324778392, - "loss": 46.0, - "step": 24967 - }, - { - "epoch": 1.908977961274538, - "grad_norm": 0.0006676879711449146, - "learning_rate": 0.00019999820310379364, - "loss": 46.0, - "step": 24968 - }, - { - "epoch": 1.9090544182579277, - "grad_norm": 0.0012116502039134502, - "learning_rate": 0.00019999820295979757, - "loss": 46.0, - "step": 24969 - }, - { - "epoch": 1.9091308752413174, - "grad_norm": 0.0011264095082879066, - "learning_rate": 0.00019999820281579574, - "loss": 46.0, - "step": 24970 - }, - { - "epoch": 1.9092073322247072, - "grad_norm": 0.0005533446674235165, - "learning_rate": 0.00019999820267178816, - "loss": 46.0, - "step": 24971 - }, - { - "epoch": 1.909283789208097, - "grad_norm": 0.0004319793079048395, - "learning_rate": 0.00019999820252777478, - "loss": 46.0, - "step": 24972 - }, - { - "epoch": 1.9093602461914865, - "grad_norm": 0.0022293655201792717, - "learning_rate": 0.00019999820238375565, - "loss": 46.0, - "step": 24973 - }, - { - "epoch": 1.9094367031748762, - "grad_norm": 0.0009035140392370522, - "learning_rate": 0.00019999820223973075, - "loss": 46.0, - "step": 24974 - }, - { - "epoch": 1.909513160158266, - "grad_norm": 0.0008334499434567988, - "learning_rate": 0.00019999820209570004, - "loss": 46.0, - "step": 24975 - }, - { - "epoch": 1.9095896171416555, - "grad_norm": 0.0012106074718758464, - "learning_rate": 0.00019999820195166362, - "loss": 46.0, - "step": 24976 - }, - { - "epoch": 1.9096660741250453, - "grad_norm": 0.0011463393457233906, - "learning_rate": 0.0001999982018076214, - "loss": 46.0, - "step": 24977 - }, - { - "epoch": 1.909742531108435, - "grad_norm": 0.0005189673393033445, - "learning_rate": 0.0001999982016635734, - "loss": 46.0, - "step": 24978 - }, - { - "epoch": 1.9098189880918248, - "grad_norm": 0.0007190461037680507, - "learning_rate": 0.00019999820151951964, - "loss": 46.0, - "step": 24979 - }, - { - "epoch": 1.9098954450752146, - "grad_norm": 0.003450064454227686, - "learning_rate": 0.00019999820137546012, - "loss": 46.0, - "step": 24980 - }, - { - "epoch": 1.9099719020586043, - "grad_norm": 0.0008244753116741776, - "learning_rate": 0.00019999820123139483, - "loss": 46.0, - "step": 24981 - }, - { - "epoch": 1.910048359041994, - "grad_norm": 0.0006356416270136833, - "learning_rate": 0.0001999982010873238, - "loss": 46.0, - "step": 24982 - }, - { - "epoch": 1.9101248160253839, - "grad_norm": 0.0005068987375125289, - "learning_rate": 0.00019999820094324694, - "loss": 46.0, - "step": 24983 - }, - { - "epoch": 1.9102012730087734, - "grad_norm": 0.0022258213721215725, - "learning_rate": 0.00019999820079916436, - "loss": 46.0, - "step": 24984 - }, - { - "epoch": 1.9102777299921632, - "grad_norm": 0.003178951796144247, - "learning_rate": 0.00019999820065507597, - "loss": 46.0, - "step": 24985 - }, - { - "epoch": 1.910354186975553, - "grad_norm": 0.0010225767036899924, - "learning_rate": 0.00019999820051098185, - "loss": 46.0, - "step": 24986 - }, - { - "epoch": 1.9104306439589425, - "grad_norm": 0.005318465642631054, - "learning_rate": 0.00019999820036688192, - "loss": 46.0, - "step": 24987 - }, - { - "epoch": 1.9105071009423322, - "grad_norm": 0.0025950248818844557, - "learning_rate": 0.00019999820022277627, - "loss": 46.0, - "step": 24988 - }, - { - "epoch": 1.910583557925722, - "grad_norm": 0.006598325911909342, - "learning_rate": 0.00019999820007866482, - "loss": 46.0, - "step": 24989 - }, - { - "epoch": 1.9106600149091117, - "grad_norm": 0.0035286687780171633, - "learning_rate": 0.0001999981999345476, - "loss": 46.0, - "step": 24990 - }, - { - "epoch": 1.9107364718925015, - "grad_norm": 0.0010828207014128566, - "learning_rate": 0.0001999981997904246, - "loss": 46.0, - "step": 24991 - }, - { - "epoch": 1.9108129288758913, - "grad_norm": 0.0004757887218147516, - "learning_rate": 0.00019999819964629584, - "loss": 46.0, - "step": 24992 - }, - { - "epoch": 1.910889385859281, - "grad_norm": 0.003275728551670909, - "learning_rate": 0.00019999819950216135, - "loss": 46.0, - "step": 24993 - }, - { - "epoch": 1.9109658428426708, - "grad_norm": 0.0022997776977717876, - "learning_rate": 0.00019999819935802104, - "loss": 46.0, - "step": 24994 - }, - { - "epoch": 1.9110422998260603, - "grad_norm": 0.0006355869118124247, - "learning_rate": 0.000199998199213875, - "loss": 46.0, - "step": 24995 - }, - { - "epoch": 1.91111875680945, - "grad_norm": 0.000471688574180007, - "learning_rate": 0.00019999819906972314, - "loss": 46.0, - "step": 24996 - }, - { - "epoch": 1.9111952137928399, - "grad_norm": 0.0030428606551140547, - "learning_rate": 0.00019999819892556556, - "loss": 46.0, - "step": 24997 - }, - { - "epoch": 1.9112716707762294, - "grad_norm": 0.0025528762489557266, - "learning_rate": 0.00019999819878140218, - "loss": 46.0, - "step": 24998 - }, - { - "epoch": 1.9113481277596192, - "grad_norm": 0.0012705266708508134, - "learning_rate": 0.00019999819863723302, - "loss": 46.0, - "step": 24999 - }, - { - "epoch": 1.911424584743009, - "grad_norm": 0.004144930746406317, - "learning_rate": 0.00019999819849305812, - "loss": 46.0, - "step": 25000 - }, - { - "epoch": 1.9115010417263987, - "grad_norm": 0.001256758812814951, - "learning_rate": 0.00019999819834887745, - "loss": 46.0, - "step": 25001 - }, - { - "epoch": 1.9115774987097884, - "grad_norm": 0.0040659294463694096, - "learning_rate": 0.000199998198204691, - "loss": 46.0, - "step": 25002 - }, - { - "epoch": 1.9116539556931782, - "grad_norm": 0.0005735154845751822, - "learning_rate": 0.0001999981980604988, - "loss": 46.0, - "step": 25003 - }, - { - "epoch": 1.911730412676568, - "grad_norm": 0.0008928435272537172, - "learning_rate": 0.0001999981979163008, - "loss": 46.0, - "step": 25004 - }, - { - "epoch": 1.9118068696599577, - "grad_norm": 0.002758158603683114, - "learning_rate": 0.00019999819777209704, - "loss": 46.0, - "step": 25005 - }, - { - "epoch": 1.9118833266433473, - "grad_norm": 0.000967782165389508, - "learning_rate": 0.00019999819762788753, - "loss": 46.0, - "step": 25006 - }, - { - "epoch": 1.911959783626737, - "grad_norm": 0.011960156261920929, - "learning_rate": 0.00019999819748367222, - "loss": 46.0, - "step": 25007 - }, - { - "epoch": 1.9120362406101268, - "grad_norm": 0.0032648888882249594, - "learning_rate": 0.00019999819733945118, - "loss": 46.0, - "step": 25008 - }, - { - "epoch": 1.9121126975935163, - "grad_norm": 0.0015277514467015862, - "learning_rate": 0.00019999819719522432, - "loss": 46.0, - "step": 25009 - }, - { - "epoch": 1.912189154576906, - "grad_norm": 0.00117511919233948, - "learning_rate": 0.00019999819705099174, - "loss": 46.0, - "step": 25010 - }, - { - "epoch": 1.9122656115602958, - "grad_norm": 0.0008993755909614265, - "learning_rate": 0.00019999819690675333, - "loss": 46.0, - "step": 25011 - }, - { - "epoch": 1.9123420685436856, - "grad_norm": 0.003055133856832981, - "learning_rate": 0.0001999981967625092, - "loss": 46.0, - "step": 25012 - }, - { - "epoch": 1.9124185255270754, - "grad_norm": 0.006628992967307568, - "learning_rate": 0.0001999981966182593, - "loss": 46.0, - "step": 25013 - }, - { - "epoch": 1.9124949825104651, - "grad_norm": 0.004076141398400068, - "learning_rate": 0.00019999819647400363, - "loss": 46.0, - "step": 25014 - }, - { - "epoch": 1.9125714394938549, - "grad_norm": 0.0019407860236242414, - "learning_rate": 0.00019999819632974218, - "loss": 46.0, - "step": 25015 - }, - { - "epoch": 1.9126478964772446, - "grad_norm": 0.003689304692670703, - "learning_rate": 0.00019999819618547496, - "loss": 46.0, - "step": 25016 - }, - { - "epoch": 1.9127243534606342, - "grad_norm": 0.0009440301801078022, - "learning_rate": 0.00019999819604120197, - "loss": 46.0, - "step": 25017 - }, - { - "epoch": 1.912800810444024, - "grad_norm": 0.0006374557269737124, - "learning_rate": 0.0001999981958969232, - "loss": 46.0, - "step": 25018 - }, - { - "epoch": 1.9128772674274135, - "grad_norm": 0.0015290643787011504, - "learning_rate": 0.00019999819575263869, - "loss": 46.0, - "step": 25019 - }, - { - "epoch": 1.9129537244108032, - "grad_norm": 0.0005443200934678316, - "learning_rate": 0.00019999819560834837, - "loss": 46.0, - "step": 25020 - }, - { - "epoch": 1.913030181394193, - "grad_norm": 0.004492605570703745, - "learning_rate": 0.0001999981954640523, - "loss": 46.0, - "step": 25021 - }, - { - "epoch": 1.9131066383775828, - "grad_norm": 0.0010905354283750057, - "learning_rate": 0.0001999981953197505, - "loss": 46.0, - "step": 25022 - }, - { - "epoch": 1.9131830953609725, - "grad_norm": 0.0005812941235490143, - "learning_rate": 0.00019999819517544287, - "loss": 46.0, - "step": 25023 - }, - { - "epoch": 1.9132595523443623, - "grad_norm": 0.0013756934786215425, - "learning_rate": 0.00019999819503112952, - "loss": 46.0, - "step": 25024 - }, - { - "epoch": 1.913336009327752, - "grad_norm": 0.0013091216096654534, - "learning_rate": 0.00019999819488681036, - "loss": 46.0, - "step": 25025 - }, - { - "epoch": 1.9134124663111418, - "grad_norm": 0.013098536059260368, - "learning_rate": 0.00019999819474248546, - "loss": 46.0, - "step": 25026 - }, - { - "epoch": 1.9134889232945316, - "grad_norm": 0.0010521155782043934, - "learning_rate": 0.0001999981945981548, - "loss": 46.0, - "step": 25027 - }, - { - "epoch": 1.9135653802779211, - "grad_norm": 0.0008318438776768744, - "learning_rate": 0.00019999819445381834, - "loss": 46.0, - "step": 25028 - }, - { - "epoch": 1.9136418372613109, - "grad_norm": 0.0009173999424092472, - "learning_rate": 0.0001999981943094761, - "loss": 46.0, - "step": 25029 - }, - { - "epoch": 1.9137182942447004, - "grad_norm": 0.001803903141990304, - "learning_rate": 0.0001999981941651281, - "loss": 46.0, - "step": 25030 - }, - { - "epoch": 1.9137947512280902, - "grad_norm": 0.001230034395121038, - "learning_rate": 0.00019999819402077436, - "loss": 46.0, - "step": 25031 - }, - { - "epoch": 1.91387120821148, - "grad_norm": 0.002747897757217288, - "learning_rate": 0.00019999819387641482, - "loss": 46.0, - "step": 25032 - }, - { - "epoch": 1.9139476651948697, - "grad_norm": 0.0018156863516196609, - "learning_rate": 0.00019999819373204953, - "loss": 46.0, - "step": 25033 - }, - { - "epoch": 1.9140241221782595, - "grad_norm": 0.0005510213668458164, - "learning_rate": 0.00019999819358767847, - "loss": 46.0, - "step": 25034 - }, - { - "epoch": 1.9141005791616492, - "grad_norm": 0.003328816732391715, - "learning_rate": 0.00019999819344330164, - "loss": 46.0, - "step": 25035 - }, - { - "epoch": 1.914177036145039, - "grad_norm": 0.0007040451164357364, - "learning_rate": 0.00019999819329891903, - "loss": 46.0, - "step": 25036 - }, - { - "epoch": 1.9142534931284287, - "grad_norm": 0.0032834475859999657, - "learning_rate": 0.00019999819315453062, - "loss": 46.0, - "step": 25037 - }, - { - "epoch": 1.9143299501118185, - "grad_norm": 0.0008268989040516317, - "learning_rate": 0.0001999981930101365, - "loss": 46.0, - "step": 25038 - }, - { - "epoch": 1.914406407095208, - "grad_norm": 0.0016562157543376088, - "learning_rate": 0.00019999819286573657, - "loss": 46.0, - "step": 25039 - }, - { - "epoch": 1.9144828640785978, - "grad_norm": 0.0006790382321923971, - "learning_rate": 0.0001999981927213309, - "loss": 46.0, - "step": 25040 - }, - { - "epoch": 1.9145593210619873, - "grad_norm": 0.00041332311229780316, - "learning_rate": 0.00019999819257691945, - "loss": 46.0, - "step": 25041 - }, - { - "epoch": 1.914635778045377, - "grad_norm": 0.0013336250558495522, - "learning_rate": 0.00019999819243250223, - "loss": 46.0, - "step": 25042 - }, - { - "epoch": 1.9147122350287669, - "grad_norm": 0.0009243679814971983, - "learning_rate": 0.00019999819228807924, - "loss": 46.0, - "step": 25043 - }, - { - "epoch": 1.9147886920121566, - "grad_norm": 0.001260233111679554, - "learning_rate": 0.00019999819214365047, - "loss": 46.0, - "step": 25044 - }, - { - "epoch": 1.9148651489955464, - "grad_norm": 0.0013258402468636632, - "learning_rate": 0.00019999819199921596, - "loss": 46.0, - "step": 25045 - }, - { - "epoch": 1.9149416059789361, - "grad_norm": 0.005777197424322367, - "learning_rate": 0.00019999819185477567, - "loss": 46.0, - "step": 25046 - }, - { - "epoch": 1.915018062962326, - "grad_norm": 0.0008911778568290174, - "learning_rate": 0.00019999819171032956, - "loss": 46.0, - "step": 25047 - }, - { - "epoch": 1.9150945199457157, - "grad_norm": 0.0004140009405091405, - "learning_rate": 0.00019999819156587772, - "loss": 46.0, - "step": 25048 - }, - { - "epoch": 1.9151709769291054, - "grad_norm": 0.0008028841693885624, - "learning_rate": 0.00019999819142142014, - "loss": 46.0, - "step": 25049 - }, - { - "epoch": 1.915247433912495, - "grad_norm": 0.0010953544406220317, - "learning_rate": 0.00019999819127695676, - "loss": 46.0, - "step": 25050 - }, - { - "epoch": 1.9153238908958847, - "grad_norm": 0.0015279761282727122, - "learning_rate": 0.0001999981911324876, - "loss": 46.0, - "step": 25051 - }, - { - "epoch": 1.9154003478792743, - "grad_norm": 0.00328050390817225, - "learning_rate": 0.00019999819098801268, - "loss": 46.0, - "step": 25052 - }, - { - "epoch": 1.915476804862664, - "grad_norm": 0.0010097004706040025, - "learning_rate": 0.000199998190843532, - "loss": 46.0, - "step": 25053 - }, - { - "epoch": 1.9155532618460538, - "grad_norm": 0.0005634309491142631, - "learning_rate": 0.00019999819069904556, - "loss": 46.0, - "step": 25054 - }, - { - "epoch": 1.9156297188294436, - "grad_norm": 0.0018211569404229522, - "learning_rate": 0.00019999819055455334, - "loss": 46.0, - "step": 25055 - }, - { - "epoch": 1.9157061758128333, - "grad_norm": 0.0005940868286415935, - "learning_rate": 0.00019999819041005535, - "loss": 46.0, - "step": 25056 - }, - { - "epoch": 1.915782632796223, - "grad_norm": 0.0010370899690315127, - "learning_rate": 0.00019999819026555158, - "loss": 46.0, - "step": 25057 - }, - { - "epoch": 1.9158590897796128, - "grad_norm": 0.0007954982575029135, - "learning_rate": 0.00019999819012104207, - "loss": 46.0, - "step": 25058 - }, - { - "epoch": 1.9159355467630026, - "grad_norm": 0.001185583882033825, - "learning_rate": 0.00019999818997652676, - "loss": 46.0, - "step": 25059 - }, - { - "epoch": 1.9160120037463921, - "grad_norm": 0.0009369342587888241, - "learning_rate": 0.00019999818983200567, - "loss": 46.0, - "step": 25060 - }, - { - "epoch": 1.916088460729782, - "grad_norm": 0.001079972367733717, - "learning_rate": 0.00019999818968747884, - "loss": 46.0, - "step": 25061 - }, - { - "epoch": 1.9161649177131717, - "grad_norm": 0.00123128283303231, - "learning_rate": 0.00019999818954294623, - "loss": 46.0, - "step": 25062 - }, - { - "epoch": 1.9162413746965612, - "grad_norm": 0.0013428755337372422, - "learning_rate": 0.00019999818939840785, - "loss": 46.0, - "step": 25063 - }, - { - "epoch": 1.916317831679951, - "grad_norm": 0.0008612447418272495, - "learning_rate": 0.0001999981892538637, - "loss": 46.0, - "step": 25064 - }, - { - "epoch": 1.9163942886633407, - "grad_norm": 0.0007068425184115767, - "learning_rate": 0.00019999818910931377, - "loss": 46.0, - "step": 25065 - }, - { - "epoch": 1.9164707456467305, - "grad_norm": 0.004639300983399153, - "learning_rate": 0.0001999981889647581, - "loss": 46.0, - "step": 25066 - }, - { - "epoch": 1.9165472026301202, - "grad_norm": 0.003493841737508774, - "learning_rate": 0.00019999818882019663, - "loss": 46.0, - "step": 25067 - }, - { - "epoch": 1.91662365961351, - "grad_norm": 0.0006687366403639317, - "learning_rate": 0.00019999818867562943, - "loss": 46.0, - "step": 25068 - }, - { - "epoch": 1.9167001165968998, - "grad_norm": 0.0004994089249521494, - "learning_rate": 0.00019999818853105644, - "loss": 46.0, - "step": 25069 - }, - { - "epoch": 1.9167765735802895, - "grad_norm": 0.0008875160710886121, - "learning_rate": 0.00019999818838647765, - "loss": 46.0, - "step": 25070 - }, - { - "epoch": 1.916853030563679, - "grad_norm": 0.001617278321646154, - "learning_rate": 0.00019999818824189313, - "loss": 46.0, - "step": 25071 - }, - { - "epoch": 1.9169294875470688, - "grad_norm": 0.002318442100659013, - "learning_rate": 0.00019999818809730285, - "loss": 46.0, - "step": 25072 - }, - { - "epoch": 1.9170059445304586, - "grad_norm": 0.0011374695459380746, - "learning_rate": 0.00019999818795270676, - "loss": 46.0, - "step": 25073 - }, - { - "epoch": 1.9170824015138481, - "grad_norm": 0.0154255460947752, - "learning_rate": 0.00019999818780810493, - "loss": 46.0, - "step": 25074 - }, - { - "epoch": 1.9171588584972379, - "grad_norm": 0.0011985677992925048, - "learning_rate": 0.00019999818766349732, - "loss": 46.0, - "step": 25075 - }, - { - "epoch": 1.9172353154806276, - "grad_norm": 0.001337897963821888, - "learning_rate": 0.00019999818751888392, - "loss": 46.0, - "step": 25076 - }, - { - "epoch": 1.9173117724640174, - "grad_norm": 0.00039542795275337994, - "learning_rate": 0.00019999818737426476, - "loss": 46.0, - "step": 25077 - }, - { - "epoch": 1.9173882294474072, - "grad_norm": 0.00354016968049109, - "learning_rate": 0.00019999818722963986, - "loss": 46.0, - "step": 25078 - }, - { - "epoch": 1.917464686430797, - "grad_norm": 0.0007964569376781583, - "learning_rate": 0.00019999818708500917, - "loss": 46.0, - "step": 25079 - }, - { - "epoch": 1.9175411434141867, - "grad_norm": 0.0013906906824558973, - "learning_rate": 0.00019999818694037272, - "loss": 46.0, - "step": 25080 - }, - { - "epoch": 1.9176176003975764, - "grad_norm": 0.000562442815862596, - "learning_rate": 0.00019999818679573047, - "loss": 46.0, - "step": 25081 - }, - { - "epoch": 1.917694057380966, - "grad_norm": 0.0014158479170873761, - "learning_rate": 0.0001999981866510825, - "loss": 46.0, - "step": 25082 - }, - { - "epoch": 1.9177705143643557, - "grad_norm": 0.00161244987975806, - "learning_rate": 0.00019999818650642871, - "loss": 46.0, - "step": 25083 - }, - { - "epoch": 1.9178469713477455, - "grad_norm": 0.0005394053878262639, - "learning_rate": 0.0001999981863617692, - "loss": 46.0, - "step": 25084 - }, - { - "epoch": 1.917923428331135, - "grad_norm": 0.0004928641719743609, - "learning_rate": 0.0001999981862171039, - "loss": 46.0, - "step": 25085 - }, - { - "epoch": 1.9179998853145248, - "grad_norm": 0.0004640161932911724, - "learning_rate": 0.0001999981860724328, - "loss": 46.0, - "step": 25086 - }, - { - "epoch": 1.9180763422979146, - "grad_norm": 0.0010645025176927447, - "learning_rate": 0.000199998185927756, - "loss": 46.0, - "step": 25087 - }, - { - "epoch": 1.9181527992813043, - "grad_norm": 0.0006481001619249582, - "learning_rate": 0.00019999818578307337, - "loss": 46.0, - "step": 25088 - }, - { - "epoch": 1.918229256264694, - "grad_norm": 0.004494494758546352, - "learning_rate": 0.00019999818563838496, - "loss": 46.0, - "step": 25089 - }, - { - "epoch": 1.9183057132480839, - "grad_norm": 0.003421301953494549, - "learning_rate": 0.00019999818549369083, - "loss": 46.0, - "step": 25090 - }, - { - "epoch": 1.9183821702314736, - "grad_norm": 0.0030701770447194576, - "learning_rate": 0.0001999981853489909, - "loss": 46.0, - "step": 25091 - }, - { - "epoch": 1.9184586272148634, - "grad_norm": 0.0008201185846701264, - "learning_rate": 0.0001999981852042852, - "loss": 46.0, - "step": 25092 - }, - { - "epoch": 1.918535084198253, - "grad_norm": 0.000525744108017534, - "learning_rate": 0.00019999818505957376, - "loss": 46.0, - "step": 25093 - }, - { - "epoch": 1.9186115411816427, - "grad_norm": 0.0007714878884144127, - "learning_rate": 0.00019999818491485654, - "loss": 46.0, - "step": 25094 - }, - { - "epoch": 1.9186879981650324, - "grad_norm": 0.0017069801688194275, - "learning_rate": 0.00019999818477013355, - "loss": 46.0, - "step": 25095 - }, - { - "epoch": 1.918764455148422, - "grad_norm": 0.0008298956672661006, - "learning_rate": 0.00019999818462540479, - "loss": 46.0, - "step": 25096 - }, - { - "epoch": 1.9188409121318117, - "grad_norm": 0.0005693138227798045, - "learning_rate": 0.00019999818448067025, - "loss": 46.0, - "step": 25097 - }, - { - "epoch": 1.9189173691152015, - "grad_norm": 0.003054674481973052, - "learning_rate": 0.00019999818433592993, - "loss": 46.0, - "step": 25098 - }, - { - "epoch": 1.9189938260985913, - "grad_norm": 0.0007443517679348588, - "learning_rate": 0.00019999818419118385, - "loss": 46.0, - "step": 25099 - }, - { - "epoch": 1.919070283081981, - "grad_norm": 0.0006166899693198502, - "learning_rate": 0.000199998184046432, - "loss": 46.0, - "step": 25100 - }, - { - "epoch": 1.9191467400653708, - "grad_norm": 0.0007017873576842248, - "learning_rate": 0.00019999818390167438, - "loss": 46.0, - "step": 25101 - }, - { - "epoch": 1.9192231970487605, - "grad_norm": 0.000554573955014348, - "learning_rate": 0.000199998183756911, - "loss": 46.0, - "step": 25102 - }, - { - "epoch": 1.9192996540321503, - "grad_norm": 0.0008606591145507991, - "learning_rate": 0.00019999818361214185, - "loss": 46.0, - "step": 25103 - }, - { - "epoch": 1.9193761110155398, - "grad_norm": 0.0006248600548133254, - "learning_rate": 0.00019999818346736693, - "loss": 46.0, - "step": 25104 - }, - { - "epoch": 1.9194525679989296, - "grad_norm": 0.0015409360639750957, - "learning_rate": 0.00019999818332258626, - "loss": 46.0, - "step": 25105 - }, - { - "epoch": 1.9195290249823194, - "grad_norm": 0.0007284529856406152, - "learning_rate": 0.00019999818317779978, - "loss": 46.0, - "step": 25106 - }, - { - "epoch": 1.919605481965709, - "grad_norm": 0.0011467261938378215, - "learning_rate": 0.00019999818303300754, - "loss": 46.0, - "step": 25107 - }, - { - "epoch": 1.9196819389490987, - "grad_norm": 0.0019680545665323734, - "learning_rate": 0.00019999818288820955, - "loss": 46.0, - "step": 25108 - }, - { - "epoch": 1.9197583959324884, - "grad_norm": 0.0012068209471181035, - "learning_rate": 0.00019999818274340578, - "loss": 46.0, - "step": 25109 - }, - { - "epoch": 1.9198348529158782, - "grad_norm": 0.002536902204155922, - "learning_rate": 0.00019999818259859624, - "loss": 46.0, - "step": 25110 - }, - { - "epoch": 1.919911309899268, - "grad_norm": 0.0012823395663872361, - "learning_rate": 0.00019999818245378096, - "loss": 46.0, - "step": 25111 - }, - { - "epoch": 1.9199877668826577, - "grad_norm": 0.0014077186351642013, - "learning_rate": 0.00019999818230895987, - "loss": 46.0, - "step": 25112 - }, - { - "epoch": 1.9200642238660475, - "grad_norm": 0.0009194363374263048, - "learning_rate": 0.000199998182164133, - "loss": 46.0, - "step": 25113 - }, - { - "epoch": 1.9201406808494372, - "grad_norm": 0.0006999961915425956, - "learning_rate": 0.0001999981820193004, - "loss": 46.0, - "step": 25114 - }, - { - "epoch": 1.9202171378328268, - "grad_norm": 0.001703887595795095, - "learning_rate": 0.00019999818187446203, - "loss": 46.0, - "step": 25115 - }, - { - "epoch": 1.9202935948162165, - "grad_norm": 0.0011886197607964277, - "learning_rate": 0.00019999818172961788, - "loss": 46.0, - "step": 25116 - }, - { - "epoch": 1.9203700517996063, - "grad_norm": 0.0013008458772674203, - "learning_rate": 0.00019999818158476795, - "loss": 46.0, - "step": 25117 - }, - { - "epoch": 1.9204465087829958, - "grad_norm": 0.001087557291612029, - "learning_rate": 0.00019999818143991225, - "loss": 46.0, - "step": 25118 - }, - { - "epoch": 1.9205229657663856, - "grad_norm": 0.0013504237867891788, - "learning_rate": 0.00019999818129505078, - "loss": 46.0, - "step": 25119 - }, - { - "epoch": 1.9205994227497754, - "grad_norm": 0.007073573302477598, - "learning_rate": 0.00019999818115018356, - "loss": 46.0, - "step": 25120 - }, - { - "epoch": 1.9206758797331651, - "grad_norm": 0.003233939642086625, - "learning_rate": 0.00019999818100531057, - "loss": 46.0, - "step": 25121 - }, - { - "epoch": 1.9207523367165549, - "grad_norm": 0.0009536903235130012, - "learning_rate": 0.00019999818086043178, - "loss": 46.0, - "step": 25122 - }, - { - "epoch": 1.9208287936999446, - "grad_norm": 0.0018891231156885624, - "learning_rate": 0.00019999818071554727, - "loss": 46.0, - "step": 25123 - }, - { - "epoch": 1.9209052506833344, - "grad_norm": 0.0009204610832966864, - "learning_rate": 0.00019999818057065696, - "loss": 46.0, - "step": 25124 - }, - { - "epoch": 1.9209817076667242, - "grad_norm": 0.00030159568996168673, - "learning_rate": 0.00019999818042576087, - "loss": 46.0, - "step": 25125 - }, - { - "epoch": 1.9210581646501137, - "grad_norm": 0.0007492768345400691, - "learning_rate": 0.000199998180280859, - "loss": 46.0, - "step": 25126 - }, - { - "epoch": 1.9211346216335035, - "grad_norm": 0.0015603062929585576, - "learning_rate": 0.0001999981801359514, - "loss": 46.0, - "step": 25127 - }, - { - "epoch": 1.9212110786168932, - "grad_norm": 0.0006441359873861074, - "learning_rate": 0.00019999817999103803, - "loss": 46.0, - "step": 25128 - }, - { - "epoch": 1.9212875356002828, - "grad_norm": 0.0012904421892017126, - "learning_rate": 0.00019999817984611885, - "loss": 46.0, - "step": 25129 - }, - { - "epoch": 1.9213639925836725, - "grad_norm": 0.0012758687371388078, - "learning_rate": 0.00019999817970119395, - "loss": 46.0, - "step": 25130 - }, - { - "epoch": 1.9214404495670623, - "grad_norm": 0.007401332724839449, - "learning_rate": 0.00019999817955626323, - "loss": 46.0, - "step": 25131 - }, - { - "epoch": 1.921516906550452, - "grad_norm": 0.008546171709895134, - "learning_rate": 0.00019999817941132676, - "loss": 46.0, - "step": 25132 - }, - { - "epoch": 1.9215933635338418, - "grad_norm": 0.0012078093132004142, - "learning_rate": 0.00019999817926638454, - "loss": 46.0, - "step": 25133 - }, - { - "epoch": 1.9216698205172316, - "grad_norm": 0.0023466809652745724, - "learning_rate": 0.00019999817912143655, - "loss": 46.0, - "step": 25134 - }, - { - "epoch": 1.9217462775006213, - "grad_norm": 0.0008031139732338488, - "learning_rate": 0.00019999817897648278, - "loss": 46.0, - "step": 25135 - }, - { - "epoch": 1.921822734484011, - "grad_norm": 0.0016933862352743745, - "learning_rate": 0.00019999817883152324, - "loss": 46.0, - "step": 25136 - }, - { - "epoch": 1.9218991914674006, - "grad_norm": 0.0009341283002868295, - "learning_rate": 0.00019999817868655793, - "loss": 46.0, - "step": 25137 - }, - { - "epoch": 1.9219756484507904, - "grad_norm": 0.0019462230848148465, - "learning_rate": 0.00019999817854158685, - "loss": 46.0, - "step": 25138 - }, - { - "epoch": 1.9220521054341801, - "grad_norm": 0.0020560200791805983, - "learning_rate": 0.00019999817839661, - "loss": 46.0, - "step": 25139 - }, - { - "epoch": 1.9221285624175697, - "grad_norm": 0.0008402771200053394, - "learning_rate": 0.00019999817825162736, - "loss": 46.0, - "step": 25140 - }, - { - "epoch": 1.9222050194009594, - "grad_norm": 0.0008873976767063141, - "learning_rate": 0.00019999817810663898, - "loss": 46.0, - "step": 25141 - }, - { - "epoch": 1.9222814763843492, - "grad_norm": 0.0012485863408073783, - "learning_rate": 0.00019999817796164483, - "loss": 46.0, - "step": 25142 - }, - { - "epoch": 1.922357933367739, - "grad_norm": 0.0027492486406117678, - "learning_rate": 0.0001999981778166449, - "loss": 46.0, - "step": 25143 - }, - { - "epoch": 1.9224343903511287, - "grad_norm": 0.0022691525518894196, - "learning_rate": 0.0001999981776716392, - "loss": 46.0, - "step": 25144 - }, - { - "epoch": 1.9225108473345185, - "grad_norm": 0.0005444939597509801, - "learning_rate": 0.00019999817752662776, - "loss": 46.0, - "step": 25145 - }, - { - "epoch": 1.9225873043179083, - "grad_norm": 0.0011389129795134068, - "learning_rate": 0.00019999817738161052, - "loss": 46.0, - "step": 25146 - }, - { - "epoch": 1.922663761301298, - "grad_norm": 0.0013293647207319736, - "learning_rate": 0.00019999817723658753, - "loss": 46.0, - "step": 25147 - }, - { - "epoch": 1.9227402182846876, - "grad_norm": 0.005335854832082987, - "learning_rate": 0.00019999817709155873, - "loss": 46.0, - "step": 25148 - }, - { - "epoch": 1.9228166752680773, - "grad_norm": 0.0017869733273983002, - "learning_rate": 0.0001999981769465242, - "loss": 46.0, - "step": 25149 - }, - { - "epoch": 1.9228931322514669, - "grad_norm": 0.013788530603051186, - "learning_rate": 0.00019999817680148389, - "loss": 46.0, - "step": 25150 - }, - { - "epoch": 1.9229695892348566, - "grad_norm": 0.012004577554762363, - "learning_rate": 0.0001999981766564378, - "loss": 46.0, - "step": 25151 - }, - { - "epoch": 1.9230460462182464, - "grad_norm": 0.0015379858668893576, - "learning_rate": 0.00019999817651138594, - "loss": 46.0, - "step": 25152 - }, - { - "epoch": 1.9231225032016361, - "grad_norm": 0.0018959281733259559, - "learning_rate": 0.00019999817636632834, - "loss": 46.0, - "step": 25153 - }, - { - "epoch": 1.923198960185026, - "grad_norm": 0.0007205240544863045, - "learning_rate": 0.00019999817622126496, - "loss": 46.0, - "step": 25154 - }, - { - "epoch": 1.9232754171684157, - "grad_norm": 0.00045765176764689386, - "learning_rate": 0.0001999981760761958, - "loss": 46.0, - "step": 25155 - }, - { - "epoch": 1.9233518741518054, - "grad_norm": 0.0005802412633784115, - "learning_rate": 0.00019999817593112086, - "loss": 46.0, - "step": 25156 - }, - { - "epoch": 1.9234283311351952, - "grad_norm": 0.002795987529680133, - "learning_rate": 0.00019999817578604016, - "loss": 46.0, - "step": 25157 - }, - { - "epoch": 1.923504788118585, - "grad_norm": 0.0007610914763063192, - "learning_rate": 0.0001999981756409537, - "loss": 46.0, - "step": 25158 - }, - { - "epoch": 1.9235812451019745, - "grad_norm": 0.000974387745372951, - "learning_rate": 0.00019999817549586147, - "loss": 46.0, - "step": 25159 - }, - { - "epoch": 1.9236577020853642, - "grad_norm": 0.008340018801391125, - "learning_rate": 0.00019999817535076346, - "loss": 46.0, - "step": 25160 - }, - { - "epoch": 1.9237341590687538, - "grad_norm": 0.0003882547316607088, - "learning_rate": 0.00019999817520565966, - "loss": 46.0, - "step": 25161 - }, - { - "epoch": 1.9238106160521435, - "grad_norm": 0.0033759695943444967, - "learning_rate": 0.00019999817506055015, - "loss": 46.0, - "step": 25162 - }, - { - "epoch": 1.9238870730355333, - "grad_norm": 0.0009988202946260571, - "learning_rate": 0.00019999817491543482, - "loss": 46.0, - "step": 25163 - }, - { - "epoch": 1.923963530018923, - "grad_norm": 0.0021684356033802032, - "learning_rate": 0.00019999817477031373, - "loss": 46.0, - "step": 25164 - }, - { - "epoch": 1.9240399870023128, - "grad_norm": 0.0022273636423051357, - "learning_rate": 0.00019999817462518687, - "loss": 46.0, - "step": 25165 - }, - { - "epoch": 1.9241164439857026, - "grad_norm": 0.0047491323202848434, - "learning_rate": 0.00019999817448005427, - "loss": 46.0, - "step": 25166 - }, - { - "epoch": 1.9241929009690923, - "grad_norm": 0.0005047345766797662, - "learning_rate": 0.00019999817433491587, - "loss": 46.0, - "step": 25167 - }, - { - "epoch": 1.924269357952482, - "grad_norm": 0.0005172788514755666, - "learning_rate": 0.00019999817418977172, - "loss": 46.0, - "step": 25168 - }, - { - "epoch": 1.9243458149358719, - "grad_norm": 0.0019460206385701895, - "learning_rate": 0.00019999817404462176, - "loss": 46.0, - "step": 25169 - }, - { - "epoch": 1.9244222719192614, - "grad_norm": 0.0007274834788404405, - "learning_rate": 0.0001999981738994661, - "loss": 46.0, - "step": 25170 - }, - { - "epoch": 1.9244987289026512, - "grad_norm": 0.0011231759563088417, - "learning_rate": 0.00019999817375430462, - "loss": 46.0, - "step": 25171 - }, - { - "epoch": 1.9245751858860407, - "grad_norm": 0.0008739459444768727, - "learning_rate": 0.00019999817360913738, - "loss": 46.0, - "step": 25172 - }, - { - "epoch": 1.9246516428694305, - "grad_norm": 0.0007187564042396843, - "learning_rate": 0.0001999981734639644, - "loss": 46.0, - "step": 25173 - }, - { - "epoch": 1.9247280998528202, - "grad_norm": 0.0009323000558651984, - "learning_rate": 0.0001999981733187856, - "loss": 46.0, - "step": 25174 - }, - { - "epoch": 1.92480455683621, - "grad_norm": 0.001003554556518793, - "learning_rate": 0.00019999817317360106, - "loss": 46.0, - "step": 25175 - }, - { - "epoch": 1.9248810138195998, - "grad_norm": 0.004511723760515451, - "learning_rate": 0.00019999817302841075, - "loss": 46.0, - "step": 25176 - }, - { - "epoch": 1.9249574708029895, - "grad_norm": 0.000644420797470957, - "learning_rate": 0.00019999817288321467, - "loss": 46.0, - "step": 25177 - }, - { - "epoch": 1.9250339277863793, - "grad_norm": 0.0012313324259594083, - "learning_rate": 0.00019999817273801284, - "loss": 46.0, - "step": 25178 - }, - { - "epoch": 1.925110384769769, - "grad_norm": 0.0008592989179305732, - "learning_rate": 0.0001999981725928052, - "loss": 46.0, - "step": 25179 - }, - { - "epoch": 1.9251868417531588, - "grad_norm": 0.0007877004682086408, - "learning_rate": 0.0001999981724475918, - "loss": 46.0, - "step": 25180 - }, - { - "epoch": 1.9252632987365483, - "grad_norm": 0.0006632974254898727, - "learning_rate": 0.00019999817230237262, - "loss": 46.0, - "step": 25181 - }, - { - "epoch": 1.925339755719938, - "grad_norm": 0.0011624970939010382, - "learning_rate": 0.0001999981721571477, - "loss": 46.0, - "step": 25182 - }, - { - "epoch": 1.9254162127033276, - "grad_norm": 0.0034205024130642414, - "learning_rate": 0.000199998172011917, - "loss": 46.0, - "step": 25183 - }, - { - "epoch": 1.9254926696867174, - "grad_norm": 0.003955758176743984, - "learning_rate": 0.00019999817186668053, - "loss": 46.0, - "step": 25184 - }, - { - "epoch": 1.9255691266701072, - "grad_norm": 0.0015583012718707323, - "learning_rate": 0.0001999981717214383, - "loss": 46.0, - "step": 25185 - }, - { - "epoch": 1.925645583653497, - "grad_norm": 0.0016653392231091857, - "learning_rate": 0.00019999817157619027, - "loss": 46.0, - "step": 25186 - }, - { - "epoch": 1.9257220406368867, - "grad_norm": 0.0013273786753416061, - "learning_rate": 0.0001999981714309365, - "loss": 46.0, - "step": 25187 - }, - { - "epoch": 1.9257984976202764, - "grad_norm": 0.001043940195813775, - "learning_rate": 0.00019999817128567697, - "loss": 46.0, - "step": 25188 - }, - { - "epoch": 1.9258749546036662, - "grad_norm": 0.001366392825730145, - "learning_rate": 0.00019999817114041163, - "loss": 46.0, - "step": 25189 - }, - { - "epoch": 1.925951411587056, - "grad_norm": 0.001967476913705468, - "learning_rate": 0.00019999817099514055, - "loss": 46.0, - "step": 25190 - }, - { - "epoch": 1.9260278685704455, - "grad_norm": 0.0010032574646174908, - "learning_rate": 0.00019999817084986372, - "loss": 46.0, - "step": 25191 - }, - { - "epoch": 1.9261043255538353, - "grad_norm": 0.006519900634884834, - "learning_rate": 0.0001999981707045811, - "loss": 46.0, - "step": 25192 - }, - { - "epoch": 1.926180782537225, - "grad_norm": 0.00022109072597231716, - "learning_rate": 0.0001999981705592927, - "loss": 46.0, - "step": 25193 - }, - { - "epoch": 1.9262572395206146, - "grad_norm": 0.0020203767344355583, - "learning_rate": 0.0001999981704139985, - "loss": 46.0, - "step": 25194 - }, - { - "epoch": 1.9263336965040043, - "grad_norm": 0.0006868203636258841, - "learning_rate": 0.00019999817026869861, - "loss": 46.0, - "step": 25195 - }, - { - "epoch": 1.926410153487394, - "grad_norm": 0.0006123451748862863, - "learning_rate": 0.0001999981701233929, - "loss": 46.0, - "step": 25196 - }, - { - "epoch": 1.9264866104707838, - "grad_norm": 0.0004660837585106492, - "learning_rate": 0.00019999816997808142, - "loss": 46.0, - "step": 25197 - }, - { - "epoch": 1.9265630674541736, - "grad_norm": 0.0010515452595427632, - "learning_rate": 0.00019999816983276418, - "loss": 46.0, - "step": 25198 - }, - { - "epoch": 1.9266395244375634, - "grad_norm": 0.0011153577361255884, - "learning_rate": 0.00019999816968744116, - "loss": 46.0, - "step": 25199 - }, - { - "epoch": 1.9267159814209531, - "grad_norm": 0.004239494446665049, - "learning_rate": 0.0001999981695421124, - "loss": 46.0, - "step": 25200 - }, - { - "epoch": 1.9267924384043429, - "grad_norm": 0.001552197034470737, - "learning_rate": 0.00019999816939677783, - "loss": 46.0, - "step": 25201 - }, - { - "epoch": 1.9268688953877324, - "grad_norm": 0.0008156081312336028, - "learning_rate": 0.00019999816925143752, - "loss": 46.0, - "step": 25202 - }, - { - "epoch": 1.9269453523711222, - "grad_norm": 0.0005057839443907142, - "learning_rate": 0.00019999816910609144, - "loss": 46.0, - "step": 25203 - }, - { - "epoch": 1.927021809354512, - "grad_norm": 0.000987923238426447, - "learning_rate": 0.00019999816896073958, - "loss": 46.0, - "step": 25204 - }, - { - "epoch": 1.9270982663379015, - "grad_norm": 0.0010984322289004922, - "learning_rate": 0.00019999816881538195, - "loss": 46.0, - "step": 25205 - }, - { - "epoch": 1.9271747233212912, - "grad_norm": 0.0022356316912919283, - "learning_rate": 0.00019999816867001858, - "loss": 46.0, - "step": 25206 - }, - { - "epoch": 1.927251180304681, - "grad_norm": 0.004303027410060167, - "learning_rate": 0.0001999981685246494, - "loss": 46.0, - "step": 25207 - }, - { - "epoch": 1.9273276372880708, - "grad_norm": 0.0006896471022628248, - "learning_rate": 0.00019999816837927448, - "loss": 46.0, - "step": 25208 - }, - { - "epoch": 1.9274040942714605, - "grad_norm": 0.005964044947177172, - "learning_rate": 0.00019999816823389375, - "loss": 46.0, - "step": 25209 - }, - { - "epoch": 1.9274805512548503, - "grad_norm": 0.003415411338210106, - "learning_rate": 0.00019999816808850728, - "loss": 46.0, - "step": 25210 - }, - { - "epoch": 1.92755700823824, - "grad_norm": 0.002570156008005142, - "learning_rate": 0.00019999816794311507, - "loss": 46.0, - "step": 25211 - }, - { - "epoch": 1.9276334652216298, - "grad_norm": 0.0009039311553351581, - "learning_rate": 0.00019999816779771702, - "loss": 46.0, - "step": 25212 - }, - { - "epoch": 1.9277099222050194, - "grad_norm": 0.0007681567803956568, - "learning_rate": 0.00019999816765231326, - "loss": 46.0, - "step": 25213 - }, - { - "epoch": 1.9277863791884091, - "grad_norm": 0.0013136455090716481, - "learning_rate": 0.0001999981675069037, - "loss": 46.0, - "step": 25214 - }, - { - "epoch": 1.9278628361717989, - "grad_norm": 0.0033056868705898523, - "learning_rate": 0.0001999981673614884, - "loss": 46.0, - "step": 25215 - }, - { - "epoch": 1.9279392931551884, - "grad_norm": 0.0025418614968657494, - "learning_rate": 0.0001999981672160673, - "loss": 46.0, - "step": 25216 - }, - { - "epoch": 1.9280157501385782, - "grad_norm": 0.0019595385529100895, - "learning_rate": 0.00019999816707064042, - "loss": 46.0, - "step": 25217 - }, - { - "epoch": 1.928092207121968, - "grad_norm": 0.001870792475529015, - "learning_rate": 0.00019999816692520782, - "loss": 46.0, - "step": 25218 - }, - { - "epoch": 1.9281686641053577, - "grad_norm": 0.0021238909102976322, - "learning_rate": 0.00019999816677976942, - "loss": 46.0, - "step": 25219 - }, - { - "epoch": 1.9282451210887475, - "grad_norm": 0.0005851304158568382, - "learning_rate": 0.00019999816663432524, - "loss": 46.0, - "step": 25220 - }, - { - "epoch": 1.9283215780721372, - "grad_norm": 0.004765552934259176, - "learning_rate": 0.00019999816648887532, - "loss": 46.0, - "step": 25221 - }, - { - "epoch": 1.928398035055527, - "grad_norm": 0.0067898607812821865, - "learning_rate": 0.00019999816634341962, - "loss": 46.0, - "step": 25222 - }, - { - "epoch": 1.9284744920389167, - "grad_norm": 0.0009244895190931857, - "learning_rate": 0.00019999816619795815, - "loss": 46.0, - "step": 25223 - }, - { - "epoch": 1.9285509490223063, - "grad_norm": 0.0011424466501921415, - "learning_rate": 0.00019999816605249088, - "loss": 46.0, - "step": 25224 - }, - { - "epoch": 1.928627406005696, - "grad_norm": 0.0013715140521526337, - "learning_rate": 0.00019999816590701787, - "loss": 46.0, - "step": 25225 - }, - { - "epoch": 1.9287038629890858, - "grad_norm": 0.002194358967244625, - "learning_rate": 0.0001999981657615391, - "loss": 46.0, - "step": 25226 - }, - { - "epoch": 1.9287803199724753, - "grad_norm": 0.005988112650811672, - "learning_rate": 0.00019999816561605457, - "loss": 46.0, - "step": 25227 - }, - { - "epoch": 1.928856776955865, - "grad_norm": 0.0037977169267833233, - "learning_rate": 0.00019999816547056423, - "loss": 46.0, - "step": 25228 - }, - { - "epoch": 1.9289332339392549, - "grad_norm": 0.0004668365581892431, - "learning_rate": 0.00019999816532506812, - "loss": 46.0, - "step": 25229 - }, - { - "epoch": 1.9290096909226446, - "grad_norm": 0.0006393849616870284, - "learning_rate": 0.0001999981651795663, - "loss": 46.0, - "step": 25230 - }, - { - "epoch": 1.9290861479060344, - "grad_norm": 0.0017548591131344438, - "learning_rate": 0.00019999816503405864, - "loss": 46.0, - "step": 25231 - }, - { - "epoch": 1.9291626048894241, - "grad_norm": 0.0009567700326442719, - "learning_rate": 0.00019999816488854526, - "loss": 46.0, - "step": 25232 - }, - { - "epoch": 1.929239061872814, - "grad_norm": 0.0019936037715524435, - "learning_rate": 0.00019999816474302608, - "loss": 46.0, - "step": 25233 - }, - { - "epoch": 1.9293155188562037, - "grad_norm": 0.0008094929507933557, - "learning_rate": 0.00019999816459750113, - "loss": 46.0, - "step": 25234 - }, - { - "epoch": 1.9293919758395932, - "grad_norm": 0.0012513125548139215, - "learning_rate": 0.00019999816445197044, - "loss": 46.0, - "step": 25235 - }, - { - "epoch": 1.929468432822983, - "grad_norm": 0.0006199326599016786, - "learning_rate": 0.00019999816430643397, - "loss": 46.0, - "step": 25236 - }, - { - "epoch": 1.9295448898063727, - "grad_norm": 0.000641452323179692, - "learning_rate": 0.00019999816416089173, - "loss": 46.0, - "step": 25237 - }, - { - "epoch": 1.9296213467897623, - "grad_norm": 0.0013919707853347063, - "learning_rate": 0.0001999981640153437, - "loss": 46.0, - "step": 25238 - }, - { - "epoch": 1.929697803773152, - "grad_norm": 0.0026501931715756655, - "learning_rate": 0.00019999816386978992, - "loss": 46.0, - "step": 25239 - }, - { - "epoch": 1.9297742607565418, - "grad_norm": 0.0010532482992857695, - "learning_rate": 0.00019999816372423036, - "loss": 46.0, - "step": 25240 - }, - { - "epoch": 1.9298507177399316, - "grad_norm": 0.0013078592019155622, - "learning_rate": 0.00019999816357866505, - "loss": 46.0, - "step": 25241 - }, - { - "epoch": 1.9299271747233213, - "grad_norm": 0.008671666495501995, - "learning_rate": 0.00019999816343309394, - "loss": 46.0, - "step": 25242 - }, - { - "epoch": 1.930003631706711, - "grad_norm": 0.0004087663546670228, - "learning_rate": 0.0001999981632875171, - "loss": 46.0, - "step": 25243 - }, - { - "epoch": 1.9300800886901008, - "grad_norm": 0.00042735011084005237, - "learning_rate": 0.00019999816314193446, - "loss": 46.0, - "step": 25244 - }, - { - "epoch": 1.9301565456734906, - "grad_norm": 0.0017284011701121926, - "learning_rate": 0.00019999816299634605, - "loss": 46.0, - "step": 25245 - }, - { - "epoch": 1.9302330026568801, - "grad_norm": 0.0013545791152864695, - "learning_rate": 0.0001999981628507519, - "loss": 46.0, - "step": 25246 - }, - { - "epoch": 1.93030945964027, - "grad_norm": 0.000881209853105247, - "learning_rate": 0.00019999816270515198, - "loss": 46.0, - "step": 25247 - }, - { - "epoch": 1.9303859166236597, - "grad_norm": 0.0009371745400130749, - "learning_rate": 0.00019999816255954623, - "loss": 46.0, - "step": 25248 - }, - { - "epoch": 1.9304623736070492, - "grad_norm": 0.0008964454173110425, - "learning_rate": 0.00019999816241393476, - "loss": 46.0, - "step": 25249 - }, - { - "epoch": 1.930538830590439, - "grad_norm": 0.0014341735513880849, - "learning_rate": 0.00019999816226831752, - "loss": 46.0, - "step": 25250 - }, - { - "epoch": 1.9306152875738287, - "grad_norm": 0.0037198318168520927, - "learning_rate": 0.0001999981621226945, - "loss": 46.0, - "step": 25251 - }, - { - "epoch": 1.9306917445572185, - "grad_norm": 0.0015879302518442273, - "learning_rate": 0.00019999816197706572, - "loss": 46.0, - "step": 25252 - }, - { - "epoch": 1.9307682015406082, - "grad_norm": 0.0013303877785801888, - "learning_rate": 0.00019999816183143115, - "loss": 46.0, - "step": 25253 - }, - { - "epoch": 1.930844658523998, - "grad_norm": 0.0014115603407844901, - "learning_rate": 0.00019999816168579085, - "loss": 46.0, - "step": 25254 - }, - { - "epoch": 1.9309211155073878, - "grad_norm": 0.0012846238678321242, - "learning_rate": 0.00019999816154014474, - "loss": 46.0, - "step": 25255 - }, - { - "epoch": 1.9309975724907775, - "grad_norm": 0.001141722546890378, - "learning_rate": 0.00019999816139449288, - "loss": 46.0, - "step": 25256 - }, - { - "epoch": 1.931074029474167, - "grad_norm": 0.0007933792658150196, - "learning_rate": 0.00019999816124883525, - "loss": 46.0, - "step": 25257 - }, - { - "epoch": 1.9311504864575568, - "grad_norm": 0.0008432471659034491, - "learning_rate": 0.00019999816110317188, - "loss": 46.0, - "step": 25258 - }, - { - "epoch": 1.9312269434409466, - "grad_norm": 0.0018273926107212901, - "learning_rate": 0.0001999981609575027, - "loss": 46.0, - "step": 25259 - }, - { - "epoch": 1.9313034004243361, - "grad_norm": 0.0006823830190114677, - "learning_rate": 0.00019999816081182775, - "loss": 46.0, - "step": 25260 - }, - { - "epoch": 1.9313798574077259, - "grad_norm": 0.0030405069701373577, - "learning_rate": 0.00019999816066614706, - "loss": 46.0, - "step": 25261 - }, - { - "epoch": 1.9314563143911156, - "grad_norm": 0.0014062374830245972, - "learning_rate": 0.00019999816052046056, - "loss": 46.0, - "step": 25262 - }, - { - "epoch": 1.9315327713745054, - "grad_norm": 0.0016032709972932935, - "learning_rate": 0.0001999981603747683, - "loss": 46.0, - "step": 25263 - }, - { - "epoch": 1.9316092283578952, - "grad_norm": 0.00033817230723798275, - "learning_rate": 0.0001999981602290703, - "loss": 46.0, - "step": 25264 - }, - { - "epoch": 1.931685685341285, - "grad_norm": 0.001765438006259501, - "learning_rate": 0.00019999816008336652, - "loss": 46.0, - "step": 25265 - }, - { - "epoch": 1.9317621423246747, - "grad_norm": 0.0007238680846057832, - "learning_rate": 0.00019999815993765693, - "loss": 46.0, - "step": 25266 - }, - { - "epoch": 1.9318385993080645, - "grad_norm": 0.0012378082610666752, - "learning_rate": 0.00019999815979194162, - "loss": 46.0, - "step": 25267 - }, - { - "epoch": 1.931915056291454, - "grad_norm": 0.0016592685133218765, - "learning_rate": 0.00019999815964622054, - "loss": 46.0, - "step": 25268 - }, - { - "epoch": 1.9319915132748438, - "grad_norm": 0.00020512235641945153, - "learning_rate": 0.00019999815950049366, - "loss": 46.0, - "step": 25269 - }, - { - "epoch": 1.9320679702582335, - "grad_norm": 0.0008504390716552734, - "learning_rate": 0.00019999815935476103, - "loss": 46.0, - "step": 25270 - }, - { - "epoch": 1.932144427241623, - "grad_norm": 0.002243452239781618, - "learning_rate": 0.00019999815920902263, - "loss": 46.0, - "step": 25271 - }, - { - "epoch": 1.9322208842250128, - "grad_norm": 0.0007578788208775222, - "learning_rate": 0.00019999815906327845, - "loss": 46.0, - "step": 25272 - }, - { - "epoch": 1.9322973412084026, - "grad_norm": 0.0003603436052799225, - "learning_rate": 0.0001999981589175285, - "loss": 46.0, - "step": 25273 - }, - { - "epoch": 1.9323737981917923, - "grad_norm": 0.0021095096599310637, - "learning_rate": 0.00019999815877177278, - "loss": 46.0, - "step": 25274 - }, - { - "epoch": 1.932450255175182, - "grad_norm": 0.0007024008664302528, - "learning_rate": 0.0001999981586260113, - "loss": 46.0, - "step": 25275 - }, - { - "epoch": 1.9325267121585719, - "grad_norm": 0.001266497652977705, - "learning_rate": 0.00019999815848024407, - "loss": 46.0, - "step": 25276 - }, - { - "epoch": 1.9326031691419616, - "grad_norm": 0.0009625194361433387, - "learning_rate": 0.00019999815833447105, - "loss": 46.0, - "step": 25277 - }, - { - "epoch": 1.9326796261253514, - "grad_norm": 0.0005683636991307139, - "learning_rate": 0.00019999815818869227, - "loss": 46.0, - "step": 25278 - }, - { - "epoch": 1.932756083108741, - "grad_norm": 0.0009354943758808076, - "learning_rate": 0.0001999981580429077, - "loss": 46.0, - "step": 25279 - }, - { - "epoch": 1.9328325400921307, - "grad_norm": 0.0005600865697488189, - "learning_rate": 0.00019999815789711737, - "loss": 46.0, - "step": 25280 - }, - { - "epoch": 1.9329089970755204, - "grad_norm": 0.0012779291719198227, - "learning_rate": 0.0001999981577513213, - "loss": 46.0, - "step": 25281 - }, - { - "epoch": 1.93298545405891, - "grad_norm": 0.0018300687661394477, - "learning_rate": 0.00019999815760551943, - "loss": 46.0, - "step": 25282 - }, - { - "epoch": 1.9330619110422997, - "grad_norm": 0.0006032388191670179, - "learning_rate": 0.0001999981574597118, - "loss": 46.0, - "step": 25283 - }, - { - "epoch": 1.9331383680256895, - "grad_norm": 0.0010696015087887645, - "learning_rate": 0.0001999981573138984, - "loss": 46.0, - "step": 25284 - }, - { - "epoch": 1.9332148250090793, - "grad_norm": 0.0008619575528427958, - "learning_rate": 0.0001999981571680792, - "loss": 46.0, - "step": 25285 - }, - { - "epoch": 1.933291281992469, - "grad_norm": 0.0025029631797224283, - "learning_rate": 0.00019999815702225425, - "loss": 46.0, - "step": 25286 - }, - { - "epoch": 1.9333677389758588, - "grad_norm": 0.0016274285735562444, - "learning_rate": 0.00019999815687642356, - "loss": 46.0, - "step": 25287 - }, - { - "epoch": 1.9334441959592485, - "grad_norm": 0.0010962628293782473, - "learning_rate": 0.0001999981567305871, - "loss": 46.0, - "step": 25288 - }, - { - "epoch": 1.9335206529426383, - "grad_norm": 0.0013498312328010798, - "learning_rate": 0.00019999815658474482, - "loss": 46.0, - "step": 25289 - }, - { - "epoch": 1.9335971099260278, - "grad_norm": 0.0022390643134713173, - "learning_rate": 0.0001999981564388968, - "loss": 46.0, - "step": 25290 - }, - { - "epoch": 1.9336735669094176, - "grad_norm": 0.0022034659050405025, - "learning_rate": 0.00019999815629304302, - "loss": 46.0, - "step": 25291 - }, - { - "epoch": 1.9337500238928071, - "grad_norm": 0.008217648603022099, - "learning_rate": 0.00019999815614718346, - "loss": 46.0, - "step": 25292 - }, - { - "epoch": 1.933826480876197, - "grad_norm": 0.0010494983289390802, - "learning_rate": 0.00019999815600131812, - "loss": 46.0, - "step": 25293 - }, - { - "epoch": 1.9339029378595867, - "grad_norm": 0.00039278369513340294, - "learning_rate": 0.00019999815585544704, - "loss": 46.0, - "step": 25294 - }, - { - "epoch": 1.9339793948429764, - "grad_norm": 0.003571538021788001, - "learning_rate": 0.00019999815570957016, - "loss": 46.0, - "step": 25295 - }, - { - "epoch": 1.9340558518263662, - "grad_norm": 0.0031834174878895283, - "learning_rate": 0.00019999815556368753, - "loss": 46.0, - "step": 25296 - }, - { - "epoch": 1.934132308809756, - "grad_norm": 0.002159104449674487, - "learning_rate": 0.0001999981554177991, - "loss": 46.0, - "step": 25297 - }, - { - "epoch": 1.9342087657931457, - "grad_norm": 0.0008337416220456362, - "learning_rate": 0.00019999815527190493, - "loss": 46.0, - "step": 25298 - }, - { - "epoch": 1.9342852227765355, - "grad_norm": 0.0010350560769438744, - "learning_rate": 0.000199998155126005, - "loss": 46.0, - "step": 25299 - }, - { - "epoch": 1.9343616797599252, - "grad_norm": 0.0014388470444828272, - "learning_rate": 0.0001999981549800993, - "loss": 46.0, - "step": 25300 - }, - { - "epoch": 1.9344381367433148, - "grad_norm": 0.0008535103988833725, - "learning_rate": 0.0001999981548341878, - "loss": 46.0, - "step": 25301 - }, - { - "epoch": 1.9345145937267045, - "grad_norm": 0.0008447173167951405, - "learning_rate": 0.00019999815468827055, - "loss": 46.0, - "step": 25302 - }, - { - "epoch": 1.934591050710094, - "grad_norm": 0.0005892303888686001, - "learning_rate": 0.00019999815454234754, - "loss": 46.0, - "step": 25303 - }, - { - "epoch": 1.9346675076934838, - "grad_norm": 0.0025854837149381638, - "learning_rate": 0.00019999815439641872, - "loss": 46.0, - "step": 25304 - }, - { - "epoch": 1.9347439646768736, - "grad_norm": 0.002143304329365492, - "learning_rate": 0.0001999981542504842, - "loss": 46.0, - "step": 25305 - }, - { - "epoch": 1.9348204216602634, - "grad_norm": 0.0010795366251841187, - "learning_rate": 0.00019999815410454383, - "loss": 46.0, - "step": 25306 - }, - { - "epoch": 1.9348968786436531, - "grad_norm": 0.0010126135312020779, - "learning_rate": 0.00019999815395859772, - "loss": 46.0, - "step": 25307 - }, - { - "epoch": 1.9349733356270429, - "grad_norm": 0.0013496881583705544, - "learning_rate": 0.0001999981538126459, - "loss": 46.0, - "step": 25308 - }, - { - "epoch": 1.9350497926104326, - "grad_norm": 0.0009083639015443623, - "learning_rate": 0.00019999815366668824, - "loss": 46.0, - "step": 25309 - }, - { - "epoch": 1.9351262495938224, - "grad_norm": 0.0005320738418959081, - "learning_rate": 0.00019999815352072484, - "loss": 46.0, - "step": 25310 - }, - { - "epoch": 1.9352027065772122, - "grad_norm": 0.0005455210921354592, - "learning_rate": 0.00019999815337475564, - "loss": 46.0, - "step": 25311 - }, - { - "epoch": 1.9352791635606017, - "grad_norm": 0.0012280141236260533, - "learning_rate": 0.00019999815322878072, - "loss": 46.0, - "step": 25312 - }, - { - "epoch": 1.9353556205439915, - "grad_norm": 0.0026231713127344847, - "learning_rate": 0.0001999981530828, - "loss": 46.0, - "step": 25313 - }, - { - "epoch": 1.935432077527381, - "grad_norm": 0.0020567369647324085, - "learning_rate": 0.00019999815293681353, - "loss": 46.0, - "step": 25314 - }, - { - "epoch": 1.9355085345107708, - "grad_norm": 0.0010275328531861305, - "learning_rate": 0.00019999815279082126, - "loss": 46.0, - "step": 25315 - }, - { - "epoch": 1.9355849914941605, - "grad_norm": 0.0036778005305677652, - "learning_rate": 0.00019999815264482322, - "loss": 46.0, - "step": 25316 - }, - { - "epoch": 1.9356614484775503, - "grad_norm": 0.0015031290240585804, - "learning_rate": 0.00019999815249881943, - "loss": 46.0, - "step": 25317 - }, - { - "epoch": 1.93573790546094, - "grad_norm": 0.0015411186031997204, - "learning_rate": 0.00019999815235280987, - "loss": 46.0, - "step": 25318 - }, - { - "epoch": 1.9358143624443298, - "grad_norm": 0.001010417123325169, - "learning_rate": 0.00019999815220679454, - "loss": 46.0, - "step": 25319 - }, - { - "epoch": 1.9358908194277196, - "grad_norm": 0.0006620500353164971, - "learning_rate": 0.00019999815206077343, - "loss": 46.0, - "step": 25320 - }, - { - "epoch": 1.9359672764111093, - "grad_norm": 0.0012426540488377213, - "learning_rate": 0.00019999815191474658, - "loss": 46.0, - "step": 25321 - }, - { - "epoch": 1.9360437333944989, - "grad_norm": 0.0006940505118109286, - "learning_rate": 0.00019999815176871395, - "loss": 46.0, - "step": 25322 - }, - { - "epoch": 1.9361201903778886, - "grad_norm": 0.005281574092805386, - "learning_rate": 0.00019999815162267552, - "loss": 46.0, - "step": 25323 - }, - { - "epoch": 1.9361966473612784, - "grad_norm": 0.001288366038352251, - "learning_rate": 0.00019999815147663137, - "loss": 46.0, - "step": 25324 - }, - { - "epoch": 1.936273104344668, - "grad_norm": 0.0006116593722254038, - "learning_rate": 0.0001999981513305814, - "loss": 46.0, - "step": 25325 - }, - { - "epoch": 1.9363495613280577, - "grad_norm": 0.001009846106171608, - "learning_rate": 0.0001999981511845257, - "loss": 46.0, - "step": 25326 - }, - { - "epoch": 1.9364260183114475, - "grad_norm": 0.0007040408672764897, - "learning_rate": 0.0001999981510384642, - "loss": 46.0, - "step": 25327 - }, - { - "epoch": 1.9365024752948372, - "grad_norm": 0.006772223860025406, - "learning_rate": 0.00019999815089239692, - "loss": 46.0, - "step": 25328 - }, - { - "epoch": 1.936578932278227, - "grad_norm": 0.0005106626776978374, - "learning_rate": 0.0001999981507463239, - "loss": 46.0, - "step": 25329 - }, - { - "epoch": 1.9366553892616167, - "grad_norm": 0.0006273649632930756, - "learning_rate": 0.00019999815060024512, - "loss": 46.0, - "step": 25330 - }, - { - "epoch": 1.9367318462450065, - "grad_norm": 0.000693241716362536, - "learning_rate": 0.00019999815045416056, - "loss": 46.0, - "step": 25331 - }, - { - "epoch": 1.9368083032283963, - "grad_norm": 0.001070735976099968, - "learning_rate": 0.00019999815030807022, - "loss": 46.0, - "step": 25332 - }, - { - "epoch": 1.9368847602117858, - "grad_norm": 0.0012118174927309155, - "learning_rate": 0.00019999815016197414, - "loss": 46.0, - "step": 25333 - }, - { - "epoch": 1.9369612171951756, - "grad_norm": 0.008624296635389328, - "learning_rate": 0.00019999815001587226, - "loss": 46.0, - "step": 25334 - }, - { - "epoch": 1.9370376741785653, - "grad_norm": 0.0006888203206472099, - "learning_rate": 0.0001999981498697646, - "loss": 46.0, - "step": 25335 - }, - { - "epoch": 1.9371141311619549, - "grad_norm": 0.0005594720132648945, - "learning_rate": 0.0001999981497236512, - "loss": 46.0, - "step": 25336 - }, - { - "epoch": 1.9371905881453446, - "grad_norm": 0.0017651293892413378, - "learning_rate": 0.00019999814957753203, - "loss": 46.0, - "step": 25337 - }, - { - "epoch": 1.9372670451287344, - "grad_norm": 0.0004064141830895096, - "learning_rate": 0.00019999814943140706, - "loss": 46.0, - "step": 25338 - }, - { - "epoch": 1.9373435021121241, - "grad_norm": 0.0005659838207066059, - "learning_rate": 0.00019999814928527637, - "loss": 46.0, - "step": 25339 - }, - { - "epoch": 1.937419959095514, - "grad_norm": 0.0018348086159676313, - "learning_rate": 0.00019999814913913987, - "loss": 46.0, - "step": 25340 - }, - { - "epoch": 1.9374964160789037, - "grad_norm": 0.0003551933041308075, - "learning_rate": 0.00019999814899299763, - "loss": 46.0, - "step": 25341 - }, - { - "epoch": 1.9375728730622934, - "grad_norm": 0.0017288707895204425, - "learning_rate": 0.0001999981488468496, - "loss": 46.0, - "step": 25342 - }, - { - "epoch": 1.9376493300456832, - "grad_norm": 0.002727646380662918, - "learning_rate": 0.0001999981487006958, - "loss": 46.0, - "step": 25343 - }, - { - "epoch": 1.9377257870290727, - "grad_norm": 0.002783076139166951, - "learning_rate": 0.00019999814855453622, - "loss": 46.0, - "step": 25344 - }, - { - "epoch": 1.9378022440124625, - "grad_norm": 0.0007516831392422318, - "learning_rate": 0.00019999814840837088, - "loss": 46.0, - "step": 25345 - }, - { - "epoch": 1.9378787009958522, - "grad_norm": 0.0015455958200618625, - "learning_rate": 0.0001999981482621998, - "loss": 46.0, - "step": 25346 - }, - { - "epoch": 1.9379551579792418, - "grad_norm": 0.003510406706482172, - "learning_rate": 0.0001999981481160229, - "loss": 46.0, - "step": 25347 - }, - { - "epoch": 1.9380316149626315, - "grad_norm": 0.0008550632046535611, - "learning_rate": 0.0001999981479698403, - "loss": 46.0, - "step": 25348 - }, - { - "epoch": 1.9381080719460213, - "grad_norm": 0.0004901379579678178, - "learning_rate": 0.00019999814782365184, - "loss": 46.0, - "step": 25349 - }, - { - "epoch": 1.938184528929411, - "grad_norm": 0.0011447896249592304, - "learning_rate": 0.00019999814767745767, - "loss": 46.0, - "step": 25350 - }, - { - "epoch": 1.9382609859128008, - "grad_norm": 0.0068277595564723015, - "learning_rate": 0.00019999814753125775, - "loss": 46.0, - "step": 25351 - }, - { - "epoch": 1.9383374428961906, - "grad_norm": 0.002613328630104661, - "learning_rate": 0.00019999814738505203, - "loss": 46.0, - "step": 25352 - }, - { - "epoch": 1.9384138998795803, - "grad_norm": 0.001656971639022231, - "learning_rate": 0.0001999981472388405, - "loss": 46.0, - "step": 25353 - }, - { - "epoch": 1.93849035686297, - "grad_norm": 0.00023466994753107429, - "learning_rate": 0.00019999814709262327, - "loss": 46.0, - "step": 25354 - }, - { - "epoch": 1.9385668138463596, - "grad_norm": 0.001167806563898921, - "learning_rate": 0.00019999814694640026, - "loss": 46.0, - "step": 25355 - }, - { - "epoch": 1.9386432708297494, - "grad_norm": 0.001004403573460877, - "learning_rate": 0.00019999814680017144, - "loss": 46.0, - "step": 25356 - }, - { - "epoch": 1.9387197278131392, - "grad_norm": 0.001383994473144412, - "learning_rate": 0.00019999814665393686, - "loss": 46.0, - "step": 25357 - }, - { - "epoch": 1.9387961847965287, - "grad_norm": 0.0012152658309787512, - "learning_rate": 0.00019999814650769655, - "loss": 46.0, - "step": 25358 - }, - { - "epoch": 1.9388726417799185, - "grad_norm": 0.0019428218947723508, - "learning_rate": 0.00019999814636145042, - "loss": 46.0, - "step": 25359 - }, - { - "epoch": 1.9389490987633082, - "grad_norm": 0.0024508978240191936, - "learning_rate": 0.00019999814621519856, - "loss": 46.0, - "step": 25360 - }, - { - "epoch": 1.939025555746698, - "grad_norm": 0.001649106852710247, - "learning_rate": 0.00019999814606894088, - "loss": 46.0, - "step": 25361 - }, - { - "epoch": 1.9391020127300878, - "grad_norm": 0.0007924661040306091, - "learning_rate": 0.00019999814592267748, - "loss": 46.0, - "step": 25362 - }, - { - "epoch": 1.9391784697134775, - "grad_norm": 0.0015159822069108486, - "learning_rate": 0.0001999981457764083, - "loss": 46.0, - "step": 25363 - }, - { - "epoch": 1.9392549266968673, - "grad_norm": 0.00569218909367919, - "learning_rate": 0.00019999814563013336, - "loss": 46.0, - "step": 25364 - }, - { - "epoch": 1.939331383680257, - "grad_norm": 0.0020105738658457994, - "learning_rate": 0.00019999814548385264, - "loss": 46.0, - "step": 25365 - }, - { - "epoch": 1.9394078406636466, - "grad_norm": 0.0033240276388823986, - "learning_rate": 0.00019999814533756615, - "loss": 46.0, - "step": 25366 - }, - { - "epoch": 1.9394842976470363, - "grad_norm": 0.0016309726051986217, - "learning_rate": 0.00019999814519127389, - "loss": 46.0, - "step": 25367 - }, - { - "epoch": 1.939560754630426, - "grad_norm": 0.0010426866356283426, - "learning_rate": 0.00019999814504497585, - "loss": 46.0, - "step": 25368 - }, - { - "epoch": 1.9396372116138156, - "grad_norm": 0.0010331958765164018, - "learning_rate": 0.00019999814489867206, - "loss": 46.0, - "step": 25369 - }, - { - "epoch": 1.9397136685972054, - "grad_norm": 0.0015260276850312948, - "learning_rate": 0.0001999981447523625, - "loss": 46.0, - "step": 25370 - }, - { - "epoch": 1.9397901255805952, - "grad_norm": 0.005961915012449026, - "learning_rate": 0.00019999814460604717, - "loss": 46.0, - "step": 25371 - }, - { - "epoch": 1.939866582563985, - "grad_norm": 0.002002912340685725, - "learning_rate": 0.00019999814445972603, - "loss": 46.0, - "step": 25372 - }, - { - "epoch": 1.9399430395473747, - "grad_norm": 0.0018917162669822574, - "learning_rate": 0.00019999814431339918, - "loss": 46.0, - "step": 25373 - }, - { - "epoch": 1.9400194965307644, - "grad_norm": 0.0021304097026586533, - "learning_rate": 0.00019999814416706653, - "loss": 46.0, - "step": 25374 - }, - { - "epoch": 1.9400959535141542, - "grad_norm": 0.002490064362064004, - "learning_rate": 0.00019999814402072813, - "loss": 46.0, - "step": 25375 - }, - { - "epoch": 1.940172410497544, - "grad_norm": 0.009650619700551033, - "learning_rate": 0.0001999981438743839, - "loss": 46.0, - "step": 25376 - }, - { - "epoch": 1.9402488674809335, - "grad_norm": 0.001609961618669331, - "learning_rate": 0.00019999814372803396, - "loss": 46.0, - "step": 25377 - }, - { - "epoch": 1.9403253244643233, - "grad_norm": 0.0005743880174122751, - "learning_rate": 0.00019999814358167826, - "loss": 46.0, - "step": 25378 - }, - { - "epoch": 1.940401781447713, - "grad_norm": 0.001369268400594592, - "learning_rate": 0.00019999814343531677, - "loss": 46.0, - "step": 25379 - }, - { - "epoch": 1.9404782384311026, - "grad_norm": 0.0005733421421609819, - "learning_rate": 0.0001999981432889495, - "loss": 46.0, - "step": 25380 - }, - { - "epoch": 1.9405546954144923, - "grad_norm": 0.0019639874808490276, - "learning_rate": 0.00019999814314257646, - "loss": 46.0, - "step": 25381 - }, - { - "epoch": 1.940631152397882, - "grad_norm": 0.0008799354545772076, - "learning_rate": 0.00019999814299619768, - "loss": 46.0, - "step": 25382 - }, - { - "epoch": 1.9407076093812718, - "grad_norm": 0.00126199284568429, - "learning_rate": 0.0001999981428498131, - "loss": 46.0, - "step": 25383 - }, - { - "epoch": 1.9407840663646616, - "grad_norm": 0.0015734905609861016, - "learning_rate": 0.00019999814270342276, - "loss": 46.0, - "step": 25384 - }, - { - "epoch": 1.9408605233480514, - "grad_norm": 0.0005148928030394018, - "learning_rate": 0.00019999814255702665, - "loss": 46.0, - "step": 25385 - }, - { - "epoch": 1.9409369803314411, - "grad_norm": 0.0009339142707176507, - "learning_rate": 0.00019999814241062477, - "loss": 46.0, - "step": 25386 - }, - { - "epoch": 1.941013437314831, - "grad_norm": 0.004559422377496958, - "learning_rate": 0.00019999814226421712, - "loss": 46.0, - "step": 25387 - }, - { - "epoch": 1.9410898942982204, - "grad_norm": 0.0006120121688582003, - "learning_rate": 0.0001999981421178037, - "loss": 46.0, - "step": 25388 - }, - { - "epoch": 1.9411663512816102, - "grad_norm": 0.000705523241776973, - "learning_rate": 0.00019999814197138452, - "loss": 46.0, - "step": 25389 - }, - { - "epoch": 1.941242808265, - "grad_norm": 0.0014590610517188907, - "learning_rate": 0.00019999814182495958, - "loss": 46.0, - "step": 25390 - }, - { - "epoch": 1.9413192652483895, - "grad_norm": 0.004136715084314346, - "learning_rate": 0.00019999814167852886, - "loss": 46.0, - "step": 25391 - }, - { - "epoch": 1.9413957222317793, - "grad_norm": 0.0006367635796777904, - "learning_rate": 0.00019999814153209237, - "loss": 46.0, - "step": 25392 - }, - { - "epoch": 1.941472179215169, - "grad_norm": 0.002046600915491581, - "learning_rate": 0.00019999814138565007, - "loss": 46.0, - "step": 25393 - }, - { - "epoch": 1.9415486361985588, - "grad_norm": 0.001310759806074202, - "learning_rate": 0.00019999814123920206, - "loss": 46.0, - "step": 25394 - }, - { - "epoch": 1.9416250931819485, - "grad_norm": 0.0021803409326821566, - "learning_rate": 0.00019999814109274825, - "loss": 46.0, - "step": 25395 - }, - { - "epoch": 1.9417015501653383, - "grad_norm": 0.0022339874412864447, - "learning_rate": 0.0001999981409462887, - "loss": 46.0, - "step": 25396 - }, - { - "epoch": 1.941778007148728, - "grad_norm": 0.001016075606457889, - "learning_rate": 0.00019999814079982336, - "loss": 46.0, - "step": 25397 - }, - { - "epoch": 1.9418544641321178, - "grad_norm": 0.001984353642910719, - "learning_rate": 0.00019999814065335222, - "loss": 46.0, - "step": 25398 - }, - { - "epoch": 1.9419309211155074, - "grad_norm": 0.001188677386380732, - "learning_rate": 0.00019999814050687535, - "loss": 46.0, - "step": 25399 - }, - { - "epoch": 1.9420073780988971, - "grad_norm": 0.0018012761138379574, - "learning_rate": 0.0001999981403603927, - "loss": 46.0, - "step": 25400 - }, - { - "epoch": 1.9420838350822869, - "grad_norm": 0.0009258925565518439, - "learning_rate": 0.0001999981402139043, - "loss": 46.0, - "step": 25401 - }, - { - "epoch": 1.9421602920656764, - "grad_norm": 0.00034944259095937014, - "learning_rate": 0.00019999814006741012, - "loss": 46.0, - "step": 25402 - }, - { - "epoch": 1.9422367490490662, - "grad_norm": 0.0010209300089627504, - "learning_rate": 0.00019999813992091015, - "loss": 46.0, - "step": 25403 - }, - { - "epoch": 1.942313206032456, - "grad_norm": 0.0007083873497322202, - "learning_rate": 0.0001999981397744044, - "loss": 46.0, - "step": 25404 - }, - { - "epoch": 1.9423896630158457, - "grad_norm": 0.0010984893888235092, - "learning_rate": 0.00019999813962789294, - "loss": 46.0, - "step": 25405 - }, - { - "epoch": 1.9424661199992355, - "grad_norm": 0.0010761962039396167, - "learning_rate": 0.00019999813948137565, - "loss": 46.0, - "step": 25406 - }, - { - "epoch": 1.9425425769826252, - "grad_norm": 0.0017928986344486475, - "learning_rate": 0.00019999813933485263, - "loss": 46.0, - "step": 25407 - }, - { - "epoch": 1.942619033966015, - "grad_norm": 0.006016286555677652, - "learning_rate": 0.00019999813918832382, - "loss": 46.0, - "step": 25408 - }, - { - "epoch": 1.9426954909494047, - "grad_norm": 0.0014520544791594148, - "learning_rate": 0.00019999813904178924, - "loss": 46.0, - "step": 25409 - }, - { - "epoch": 1.9427719479327943, - "grad_norm": 0.0009543381165713072, - "learning_rate": 0.0001999981388952489, - "loss": 46.0, - "step": 25410 - }, - { - "epoch": 1.942848404916184, - "grad_norm": 0.0006197650800459087, - "learning_rate": 0.0001999981387487028, - "loss": 46.0, - "step": 25411 - }, - { - "epoch": 1.9429248618995738, - "grad_norm": 0.0014205684419721365, - "learning_rate": 0.00019999813860215092, - "loss": 46.0, - "step": 25412 - }, - { - "epoch": 1.9430013188829633, - "grad_norm": 0.0011642139870673418, - "learning_rate": 0.00019999813845559324, - "loss": 46.0, - "step": 25413 - }, - { - "epoch": 1.943077775866353, - "grad_norm": 0.0005908952443860471, - "learning_rate": 0.00019999813830902984, - "loss": 46.0, - "step": 25414 - }, - { - "epoch": 1.9431542328497429, - "grad_norm": 0.0007647752063348889, - "learning_rate": 0.00019999813816246067, - "loss": 46.0, - "step": 25415 - }, - { - "epoch": 1.9432306898331326, - "grad_norm": 0.0004617222584784031, - "learning_rate": 0.0001999981380158857, - "loss": 46.0, - "step": 25416 - }, - { - "epoch": 1.9433071468165224, - "grad_norm": 0.0007163193076848984, - "learning_rate": 0.00019999813786930498, - "loss": 46.0, - "step": 25417 - }, - { - "epoch": 1.9433836037999122, - "grad_norm": 0.0006509136292152107, - "learning_rate": 0.00019999813772271846, - "loss": 46.0, - "step": 25418 - }, - { - "epoch": 1.943460060783302, - "grad_norm": 0.0017256282735615969, - "learning_rate": 0.00019999813757612622, - "loss": 46.0, - "step": 25419 - }, - { - "epoch": 1.9435365177666917, - "grad_norm": 0.0009750222670845687, - "learning_rate": 0.00019999813742952818, - "loss": 46.0, - "step": 25420 - }, - { - "epoch": 1.9436129747500812, - "grad_norm": 0.0008193259709514678, - "learning_rate": 0.00019999813728292435, - "loss": 46.0, - "step": 25421 - }, - { - "epoch": 1.943689431733471, - "grad_norm": 0.0009368039900436997, - "learning_rate": 0.0001999981371363148, - "loss": 46.0, - "step": 25422 - }, - { - "epoch": 1.9437658887168605, - "grad_norm": 0.0009751998004503548, - "learning_rate": 0.00019999813698969946, - "loss": 46.0, - "step": 25423 - }, - { - "epoch": 1.9438423457002503, - "grad_norm": 0.003908711019903421, - "learning_rate": 0.00019999813684307832, - "loss": 46.0, - "step": 25424 - }, - { - "epoch": 1.94391880268364, - "grad_norm": 0.0008872703183442354, - "learning_rate": 0.00019999813669645145, - "loss": 46.0, - "step": 25425 - }, - { - "epoch": 1.9439952596670298, - "grad_norm": 0.003220543498173356, - "learning_rate": 0.0001999981365498188, - "loss": 46.0, - "step": 25426 - }, - { - "epoch": 1.9440717166504196, - "grad_norm": 0.002107213716953993, - "learning_rate": 0.0001999981364031804, - "loss": 46.0, - "step": 25427 - }, - { - "epoch": 1.9441481736338093, - "grad_norm": 0.0011610285146161914, - "learning_rate": 0.0001999981362565362, - "loss": 46.0, - "step": 25428 - }, - { - "epoch": 1.944224630617199, - "grad_norm": 0.0011864140396937728, - "learning_rate": 0.00019999813610988625, - "loss": 46.0, - "step": 25429 - }, - { - "epoch": 1.9443010876005888, - "grad_norm": 0.0018960388842970133, - "learning_rate": 0.00019999813596323048, - "loss": 46.0, - "step": 25430 - }, - { - "epoch": 1.9443775445839786, - "grad_norm": 0.0006463818717747927, - "learning_rate": 0.00019999813581656902, - "loss": 46.0, - "step": 25431 - }, - { - "epoch": 1.9444540015673681, - "grad_norm": 0.0008672216790728271, - "learning_rate": 0.00019999813566990175, - "loss": 46.0, - "step": 25432 - }, - { - "epoch": 1.944530458550758, - "grad_norm": 0.0021202166099101305, - "learning_rate": 0.0001999981355232287, - "loss": 46.0, - "step": 25433 - }, - { - "epoch": 1.9446069155341474, - "grad_norm": 0.001004306715913117, - "learning_rate": 0.0001999981353765499, - "loss": 46.0, - "step": 25434 - }, - { - "epoch": 1.9446833725175372, - "grad_norm": 0.0028675622306764126, - "learning_rate": 0.00019999813522986532, - "loss": 46.0, - "step": 25435 - }, - { - "epoch": 1.944759829500927, - "grad_norm": 0.000756549765355885, - "learning_rate": 0.00019999813508317498, - "loss": 46.0, - "step": 25436 - }, - { - "epoch": 1.9448362864843167, - "grad_norm": 0.0008775634923949838, - "learning_rate": 0.00019999813493647888, - "loss": 46.0, - "step": 25437 - }, - { - "epoch": 1.9449127434677065, - "grad_norm": 0.000666490406729281, - "learning_rate": 0.00019999813478977697, - "loss": 46.0, - "step": 25438 - }, - { - "epoch": 1.9449892004510962, - "grad_norm": 0.0005975561798550189, - "learning_rate": 0.00019999813464306932, - "loss": 46.0, - "step": 25439 - }, - { - "epoch": 1.945065657434486, - "grad_norm": 0.0012797012459486723, - "learning_rate": 0.00019999813449635593, - "loss": 46.0, - "step": 25440 - }, - { - "epoch": 1.9451421144178758, - "grad_norm": 0.0004020068736281246, - "learning_rate": 0.0001999981343496367, - "loss": 46.0, - "step": 25441 - }, - { - "epoch": 1.9452185714012655, - "grad_norm": 0.0005726832896471024, - "learning_rate": 0.00019999813420291176, - "loss": 46.0, - "step": 25442 - }, - { - "epoch": 1.945295028384655, - "grad_norm": 0.0012177075259387493, - "learning_rate": 0.000199998134056181, - "loss": 46.0, - "step": 25443 - }, - { - "epoch": 1.9453714853680448, - "grad_norm": 0.00281155901029706, - "learning_rate": 0.00019999813390944452, - "loss": 46.0, - "step": 25444 - }, - { - "epoch": 1.9454479423514344, - "grad_norm": 0.0035891542211174965, - "learning_rate": 0.00019999813376270225, - "loss": 46.0, - "step": 25445 - }, - { - "epoch": 1.9455243993348241, - "grad_norm": 0.0025906874798238277, - "learning_rate": 0.00019999813361595422, - "loss": 46.0, - "step": 25446 - }, - { - "epoch": 1.945600856318214, - "grad_norm": 0.000746597012039274, - "learning_rate": 0.0001999981334692004, - "loss": 46.0, - "step": 25447 - }, - { - "epoch": 1.9456773133016037, - "grad_norm": 0.0015209836419671774, - "learning_rate": 0.00019999813332244082, - "loss": 46.0, - "step": 25448 - }, - { - "epoch": 1.9457537702849934, - "grad_norm": 0.010853101499378681, - "learning_rate": 0.0001999981331756755, - "loss": 46.0, - "step": 25449 - }, - { - "epoch": 1.9458302272683832, - "grad_norm": 0.0025025890208780766, - "learning_rate": 0.00019999813302890438, - "loss": 46.0, - "step": 25450 - }, - { - "epoch": 1.945906684251773, - "grad_norm": 0.0013881336199119687, - "learning_rate": 0.00019999813288212748, - "loss": 46.0, - "step": 25451 - }, - { - "epoch": 1.9459831412351627, - "grad_norm": 0.0013710112543776631, - "learning_rate": 0.00019999813273534483, - "loss": 46.0, - "step": 25452 - }, - { - "epoch": 1.9460595982185522, - "grad_norm": 0.0022558008786290884, - "learning_rate": 0.0001999981325885564, - "loss": 46.0, - "step": 25453 - }, - { - "epoch": 1.946136055201942, - "grad_norm": 0.0016930341953411698, - "learning_rate": 0.00019999813244176223, - "loss": 46.0, - "step": 25454 - }, - { - "epoch": 1.9462125121853318, - "grad_norm": 0.004416623618453741, - "learning_rate": 0.00019999813229496224, - "loss": 46.0, - "step": 25455 - }, - { - "epoch": 1.9462889691687213, - "grad_norm": 0.0019515713211148977, - "learning_rate": 0.0001999981321481565, - "loss": 46.0, - "step": 25456 - }, - { - "epoch": 1.946365426152111, - "grad_norm": 0.0008018958615139127, - "learning_rate": 0.000199998132001345, - "loss": 46.0, - "step": 25457 - }, - { - "epoch": 1.9464418831355008, - "grad_norm": 0.0014892738545313478, - "learning_rate": 0.00019999813185452774, - "loss": 46.0, - "step": 25458 - }, - { - "epoch": 1.9465183401188906, - "grad_norm": 0.0015683931997045875, - "learning_rate": 0.0001999981317077047, - "loss": 46.0, - "step": 25459 - }, - { - "epoch": 1.9465947971022803, - "grad_norm": 0.0017586903413757682, - "learning_rate": 0.0001999981315608759, - "loss": 46.0, - "step": 25460 - }, - { - "epoch": 1.94667125408567, - "grad_norm": 0.003979188855737448, - "learning_rate": 0.0001999981314140413, - "loss": 46.0, - "step": 25461 - }, - { - "epoch": 1.9467477110690599, - "grad_norm": 0.00048734399024397135, - "learning_rate": 0.00019999813126720097, - "loss": 46.0, - "step": 25462 - }, - { - "epoch": 1.9468241680524496, - "grad_norm": 0.0008830137667246163, - "learning_rate": 0.00019999813112035484, - "loss": 46.0, - "step": 25463 - }, - { - "epoch": 1.9469006250358392, - "grad_norm": 0.0017410836881026626, - "learning_rate": 0.00019999813097350296, - "loss": 46.0, - "step": 25464 - }, - { - "epoch": 1.946977082019229, - "grad_norm": 0.0008175763650797307, - "learning_rate": 0.00019999813082664529, - "loss": 46.0, - "step": 25465 - }, - { - "epoch": 1.9470535390026187, - "grad_norm": 0.0017329795518890023, - "learning_rate": 0.0001999981306797819, - "loss": 46.0, - "step": 25466 - }, - { - "epoch": 1.9471299959860082, - "grad_norm": 0.001185034867376089, - "learning_rate": 0.0001999981305329127, - "loss": 46.0, - "step": 25467 - }, - { - "epoch": 1.947206452969398, - "grad_norm": 0.0005753164296038449, - "learning_rate": 0.00019999813038603772, - "loss": 46.0, - "step": 25468 - }, - { - "epoch": 1.9472829099527877, - "grad_norm": 0.0025529414415359497, - "learning_rate": 0.000199998130239157, - "loss": 46.0, - "step": 25469 - }, - { - "epoch": 1.9473593669361775, - "grad_norm": 0.0013759658904746175, - "learning_rate": 0.00019999813009227048, - "loss": 46.0, - "step": 25470 - }, - { - "epoch": 1.9474358239195673, - "grad_norm": 0.0007008545217104256, - "learning_rate": 0.00019999812994537822, - "loss": 46.0, - "step": 25471 - }, - { - "epoch": 1.947512280902957, - "grad_norm": 0.0006326701841317117, - "learning_rate": 0.00019999812979848018, - "loss": 46.0, - "step": 25472 - }, - { - "epoch": 1.9475887378863468, - "grad_norm": 0.0009554935386404395, - "learning_rate": 0.00019999812965157637, - "loss": 46.0, - "step": 25473 - }, - { - "epoch": 1.9476651948697365, - "grad_norm": 0.0008366239489987493, - "learning_rate": 0.00019999812950466676, - "loss": 46.0, - "step": 25474 - }, - { - "epoch": 1.947741651853126, - "grad_norm": 0.011264491826295853, - "learning_rate": 0.00019999812935775143, - "loss": 46.0, - "step": 25475 - }, - { - "epoch": 1.9478181088365158, - "grad_norm": 0.001632630592212081, - "learning_rate": 0.0001999981292108303, - "loss": 46.0, - "step": 25476 - }, - { - "epoch": 1.9478945658199056, - "grad_norm": 0.0018762430408969522, - "learning_rate": 0.00019999812906390342, - "loss": 46.0, - "step": 25477 - }, - { - "epoch": 1.9479710228032951, - "grad_norm": 0.0006108759553171694, - "learning_rate": 0.00019999812891697077, - "loss": 46.0, - "step": 25478 - }, - { - "epoch": 1.948047479786685, - "grad_norm": 0.006483633536845446, - "learning_rate": 0.00019999812877003235, - "loss": 46.0, - "step": 25479 - }, - { - "epoch": 1.9481239367700747, - "grad_norm": 0.001171313808299601, - "learning_rate": 0.00019999812862308812, - "loss": 46.0, - "step": 25480 - }, - { - "epoch": 1.9482003937534644, - "grad_norm": 0.00031468638917431235, - "learning_rate": 0.00019999812847613818, - "loss": 46.0, - "step": 25481 - }, - { - "epoch": 1.9482768507368542, - "grad_norm": 0.002096825046464801, - "learning_rate": 0.00019999812832918244, - "loss": 46.0, - "step": 25482 - }, - { - "epoch": 1.948353307720244, - "grad_norm": 0.0009315012721344829, - "learning_rate": 0.00019999812818222092, - "loss": 46.0, - "step": 25483 - }, - { - "epoch": 1.9484297647036337, - "grad_norm": 0.0004553587350528687, - "learning_rate": 0.00019999812803525366, - "loss": 46.0, - "step": 25484 - }, - { - "epoch": 1.9485062216870235, - "grad_norm": 0.0005571578512899578, - "learning_rate": 0.00019999812788828062, - "loss": 46.0, - "step": 25485 - }, - { - "epoch": 1.948582678670413, - "grad_norm": 0.0007103788084350526, - "learning_rate": 0.0001999981277413018, - "loss": 46.0, - "step": 25486 - }, - { - "epoch": 1.9486591356538028, - "grad_norm": 0.001896351808682084, - "learning_rate": 0.00019999812759431725, - "loss": 46.0, - "step": 25487 - }, - { - "epoch": 1.9487355926371925, - "grad_norm": 0.0005857925862073898, - "learning_rate": 0.00019999812744732687, - "loss": 46.0, - "step": 25488 - }, - { - "epoch": 1.948812049620582, - "grad_norm": 0.0006940364255569875, - "learning_rate": 0.00019999812730033076, - "loss": 46.0, - "step": 25489 - }, - { - "epoch": 1.9488885066039718, - "grad_norm": 0.0014010635204613209, - "learning_rate": 0.00019999812715332886, - "loss": 46.0, - "step": 25490 - }, - { - "epoch": 1.9489649635873616, - "grad_norm": 0.0005416049971245229, - "learning_rate": 0.0001999981270063212, - "loss": 46.0, - "step": 25491 - }, - { - "epoch": 1.9490414205707514, - "grad_norm": 0.003481562715023756, - "learning_rate": 0.00019999812685930776, - "loss": 46.0, - "step": 25492 - }, - { - "epoch": 1.9491178775541411, - "grad_norm": 0.0008785219397395849, - "learning_rate": 0.00019999812671228856, - "loss": 46.0, - "step": 25493 - }, - { - "epoch": 1.9491943345375309, - "grad_norm": 0.0005087285535410047, - "learning_rate": 0.0001999981265652636, - "loss": 46.0, - "step": 25494 - }, - { - "epoch": 1.9492707915209206, - "grad_norm": 0.0005706212832592428, - "learning_rate": 0.00019999812641823288, - "loss": 46.0, - "step": 25495 - }, - { - "epoch": 1.9493472485043104, - "grad_norm": 0.0011976402020081878, - "learning_rate": 0.00019999812627119639, - "loss": 46.0, - "step": 25496 - }, - { - "epoch": 1.9494237054877, - "grad_norm": 0.0029671434313058853, - "learning_rate": 0.0001999981261241541, - "loss": 46.0, - "step": 25497 - }, - { - "epoch": 1.9495001624710897, - "grad_norm": 0.000705836690030992, - "learning_rate": 0.00019999812597710606, - "loss": 46.0, - "step": 25498 - }, - { - "epoch": 1.9495766194544795, - "grad_norm": 0.0009436235413886607, - "learning_rate": 0.00019999812583005225, - "loss": 46.0, - "step": 25499 - }, - { - "epoch": 1.949653076437869, - "grad_norm": 0.001094362000003457, - "learning_rate": 0.00019999812568299266, - "loss": 46.0, - "step": 25500 - }, - { - "epoch": 1.9497295334212588, - "grad_norm": 0.0011653897818177938, - "learning_rate": 0.0001999981255359273, - "loss": 46.0, - "step": 25501 - }, - { - "epoch": 1.9498059904046485, - "grad_norm": 0.001219184254296124, - "learning_rate": 0.00019999812538885618, - "loss": 46.0, - "step": 25502 - }, - { - "epoch": 1.9498824473880383, - "grad_norm": 0.0019858512096107006, - "learning_rate": 0.0001999981252417793, - "loss": 46.0, - "step": 25503 - }, - { - "epoch": 1.949958904371428, - "grad_norm": 0.0009545744978822768, - "learning_rate": 0.00019999812509469665, - "loss": 46.0, - "step": 25504 - }, - { - "epoch": 1.9500353613548178, - "grad_norm": 0.0014235759153962135, - "learning_rate": 0.0001999981249476082, - "loss": 46.0, - "step": 25505 - }, - { - "epoch": 1.9501118183382076, - "grad_norm": 0.0008185433689504862, - "learning_rate": 0.000199998124800514, - "loss": 46.0, - "step": 25506 - }, - { - "epoch": 1.9501882753215973, - "grad_norm": 0.0014334002044051886, - "learning_rate": 0.00019999812465341403, - "loss": 46.0, - "step": 25507 - }, - { - "epoch": 1.9502647323049869, - "grad_norm": 0.0012235109461471438, - "learning_rate": 0.0001999981245063083, - "loss": 46.0, - "step": 25508 - }, - { - "epoch": 1.9503411892883766, - "grad_norm": 0.002971385605633259, - "learning_rate": 0.0001999981243591968, - "loss": 46.0, - "step": 25509 - }, - { - "epoch": 1.9504176462717664, - "grad_norm": 0.0035227371845394373, - "learning_rate": 0.0001999981242120795, - "loss": 46.0, - "step": 25510 - }, - { - "epoch": 1.950494103255156, - "grad_norm": 0.001821687095798552, - "learning_rate": 0.00019999812406495645, - "loss": 46.0, - "step": 25511 - }, - { - "epoch": 1.9505705602385457, - "grad_norm": 0.010380851104855537, - "learning_rate": 0.00019999812391782764, - "loss": 46.0, - "step": 25512 - }, - { - "epoch": 1.9506470172219355, - "grad_norm": 0.008459806442260742, - "learning_rate": 0.00019999812377069308, - "loss": 46.0, - "step": 25513 - }, - { - "epoch": 1.9507234742053252, - "grad_norm": 0.0032521216198801994, - "learning_rate": 0.00019999812362355272, - "loss": 46.0, - "step": 25514 - }, - { - "epoch": 1.950799931188715, - "grad_norm": 0.009646959602832794, - "learning_rate": 0.0001999981234764066, - "loss": 46.0, - "step": 25515 - }, - { - "epoch": 1.9508763881721047, - "grad_norm": 0.0008727413951419294, - "learning_rate": 0.0001999981233292547, - "loss": 46.0, - "step": 25516 - }, - { - "epoch": 1.9509528451554945, - "grad_norm": 0.0005606000777333975, - "learning_rate": 0.00019999812318209704, - "loss": 46.0, - "step": 25517 - }, - { - "epoch": 1.9510293021388843, - "grad_norm": 0.001337242778390646, - "learning_rate": 0.00019999812303493362, - "loss": 46.0, - "step": 25518 - }, - { - "epoch": 1.9511057591222738, - "grad_norm": 0.0008637694409117103, - "learning_rate": 0.0001999981228877644, - "loss": 46.0, - "step": 25519 - }, - { - "epoch": 1.9511822161056636, - "grad_norm": 0.0010775624541565776, - "learning_rate": 0.00019999812274058943, - "loss": 46.0, - "step": 25520 - }, - { - "epoch": 1.9512586730890533, - "grad_norm": 0.0016888573300093412, - "learning_rate": 0.00019999812259340868, - "loss": 46.0, - "step": 25521 - }, - { - "epoch": 1.9513351300724429, - "grad_norm": 0.0009854328818619251, - "learning_rate": 0.0001999981224462222, - "loss": 46.0, - "step": 25522 - }, - { - "epoch": 1.9514115870558326, - "grad_norm": 0.0014013720210641623, - "learning_rate": 0.0001999981222990299, - "loss": 46.0, - "step": 25523 - }, - { - "epoch": 1.9514880440392224, - "grad_norm": 0.0007134824991226196, - "learning_rate": 0.00019999812215183184, - "loss": 46.0, - "step": 25524 - }, - { - "epoch": 1.9515645010226121, - "grad_norm": 0.02319967746734619, - "learning_rate": 0.00019999812200462803, - "loss": 46.0, - "step": 25525 - }, - { - "epoch": 1.951640958006002, - "grad_norm": 0.001971699995920062, - "learning_rate": 0.00019999812185741848, - "loss": 46.0, - "step": 25526 - }, - { - "epoch": 1.9517174149893917, - "grad_norm": 0.0007795064593665302, - "learning_rate": 0.00019999812171020312, - "loss": 46.0, - "step": 25527 - }, - { - "epoch": 1.9517938719727814, - "grad_norm": 0.002493527950718999, - "learning_rate": 0.000199998121562982, - "loss": 46.0, - "step": 25528 - }, - { - "epoch": 1.9518703289561712, - "grad_norm": 0.0013603090774267912, - "learning_rate": 0.0001999981214157551, - "loss": 46.0, - "step": 25529 - }, - { - "epoch": 1.9519467859395607, - "grad_norm": 0.0010205727303400636, - "learning_rate": 0.0001999981212685224, - "loss": 46.0, - "step": 25530 - }, - { - "epoch": 1.9520232429229505, - "grad_norm": 0.0007125125266611576, - "learning_rate": 0.000199998121121284, - "loss": 46.0, - "step": 25531 - }, - { - "epoch": 1.9520996999063402, - "grad_norm": 0.0007476358441635966, - "learning_rate": 0.0001999981209740398, - "loss": 46.0, - "step": 25532 - }, - { - "epoch": 1.9521761568897298, - "grad_norm": 0.0009519326267763972, - "learning_rate": 0.00019999812082678982, - "loss": 46.0, - "step": 25533 - }, - { - "epoch": 1.9522526138731195, - "grad_norm": 0.0021328977309167385, - "learning_rate": 0.00019999812067953408, - "loss": 46.0, - "step": 25534 - }, - { - "epoch": 1.9523290708565093, - "grad_norm": 0.0036633401177823544, - "learning_rate": 0.00019999812053227256, - "loss": 46.0, - "step": 25535 - }, - { - "epoch": 1.952405527839899, - "grad_norm": 0.0006054287077859044, - "learning_rate": 0.0001999981203850053, - "loss": 46.0, - "step": 25536 - }, - { - "epoch": 1.9524819848232888, - "grad_norm": 0.0011634370312094688, - "learning_rate": 0.00019999812023773224, - "loss": 46.0, - "step": 25537 - }, - { - "epoch": 1.9525584418066786, - "grad_norm": 0.0015786882722750306, - "learning_rate": 0.0001999981200904534, - "loss": 46.0, - "step": 25538 - }, - { - "epoch": 1.9526348987900684, - "grad_norm": 0.002831559395417571, - "learning_rate": 0.00019999811994316882, - "loss": 46.0, - "step": 25539 - }, - { - "epoch": 1.9527113557734581, - "grad_norm": 0.0014576261164620519, - "learning_rate": 0.00019999811979587846, - "loss": 46.0, - "step": 25540 - }, - { - "epoch": 1.9527878127568477, - "grad_norm": 0.0006848961347714067, - "learning_rate": 0.00019999811964858233, - "loss": 46.0, - "step": 25541 - }, - { - "epoch": 1.9528642697402374, - "grad_norm": 0.0023620238061994314, - "learning_rate": 0.00019999811950128046, - "loss": 46.0, - "step": 25542 - }, - { - "epoch": 1.9529407267236272, - "grad_norm": 0.0006348474998958409, - "learning_rate": 0.00019999811935397278, - "loss": 46.0, - "step": 25543 - }, - { - "epoch": 1.9530171837070167, - "grad_norm": 0.007859431207180023, - "learning_rate": 0.00019999811920665933, - "loss": 46.0, - "step": 25544 - }, - { - "epoch": 1.9530936406904065, - "grad_norm": 0.0006576450541615486, - "learning_rate": 0.00019999811905934014, - "loss": 46.0, - "step": 25545 - }, - { - "epoch": 1.9531700976737962, - "grad_norm": 0.001759189646691084, - "learning_rate": 0.00019999811891201517, - "loss": 46.0, - "step": 25546 - }, - { - "epoch": 1.953246554657186, - "grad_norm": 0.0005286120576784015, - "learning_rate": 0.00019999811876468443, - "loss": 46.0, - "step": 25547 - }, - { - "epoch": 1.9533230116405758, - "grad_norm": 0.005120165646076202, - "learning_rate": 0.0001999981186173479, - "loss": 46.0, - "step": 25548 - }, - { - "epoch": 1.9533994686239655, - "grad_norm": 0.0009879935532808304, - "learning_rate": 0.00019999811847000562, - "loss": 46.0, - "step": 25549 - }, - { - "epoch": 1.9534759256073553, - "grad_norm": 0.0011777252657338977, - "learning_rate": 0.00019999811832265759, - "loss": 46.0, - "step": 25550 - }, - { - "epoch": 1.953552382590745, - "grad_norm": 0.000631751143373549, - "learning_rate": 0.00019999811817530375, - "loss": 46.0, - "step": 25551 - }, - { - "epoch": 1.9536288395741346, - "grad_norm": 0.0005040019750595093, - "learning_rate": 0.00019999811802794417, - "loss": 46.0, - "step": 25552 - }, - { - "epoch": 1.9537052965575243, - "grad_norm": 0.0011498959502205253, - "learning_rate": 0.0001999981178805788, - "loss": 46.0, - "step": 25553 - }, - { - "epoch": 1.9537817535409139, - "grad_norm": 0.0008133918745443225, - "learning_rate": 0.00019999811773320768, - "loss": 46.0, - "step": 25554 - }, - { - "epoch": 1.9538582105243036, - "grad_norm": 0.0009214473539032042, - "learning_rate": 0.0001999981175858308, - "loss": 46.0, - "step": 25555 - }, - { - "epoch": 1.9539346675076934, - "grad_norm": 0.000881068641319871, - "learning_rate": 0.00019999811743844813, - "loss": 46.0, - "step": 25556 - }, - { - "epoch": 1.9540111244910832, - "grad_norm": 0.0004997215000912547, - "learning_rate": 0.00019999811729105968, - "loss": 46.0, - "step": 25557 - }, - { - "epoch": 1.954087581474473, - "grad_norm": 0.01997225731611252, - "learning_rate": 0.0001999981171436655, - "loss": 46.0, - "step": 25558 - }, - { - "epoch": 1.9541640384578627, - "grad_norm": 0.001274542766623199, - "learning_rate": 0.00019999811699626552, - "loss": 46.0, - "step": 25559 - }, - { - "epoch": 1.9542404954412524, - "grad_norm": 0.009379551745951176, - "learning_rate": 0.00019999811684885975, - "loss": 46.0, - "step": 25560 - }, - { - "epoch": 1.9543169524246422, - "grad_norm": 0.0034876237623393536, - "learning_rate": 0.00019999811670144826, - "loss": 46.0, - "step": 25561 - }, - { - "epoch": 1.954393409408032, - "grad_norm": 0.000805043033324182, - "learning_rate": 0.00019999811655403094, - "loss": 46.0, - "step": 25562 - }, - { - "epoch": 1.9544698663914215, - "grad_norm": 0.0019923821091651917, - "learning_rate": 0.0001999981164066079, - "loss": 46.0, - "step": 25563 - }, - { - "epoch": 1.9545463233748113, - "grad_norm": 0.0010309857316315174, - "learning_rate": 0.00019999811625917907, - "loss": 46.0, - "step": 25564 - }, - { - "epoch": 1.9546227803582008, - "grad_norm": 0.01174786314368248, - "learning_rate": 0.0001999981161117445, - "loss": 46.0, - "step": 25565 - }, - { - "epoch": 1.9546992373415906, - "grad_norm": 0.0006500668241642416, - "learning_rate": 0.00019999811596430416, - "loss": 46.0, - "step": 25566 - }, - { - "epoch": 1.9547756943249803, - "grad_norm": 0.0038077477365732193, - "learning_rate": 0.000199998115816858, - "loss": 46.0, - "step": 25567 - }, - { - "epoch": 1.95485215130837, - "grad_norm": 0.0012108869850635529, - "learning_rate": 0.0001999981156694061, - "loss": 46.0, - "step": 25568 - }, - { - "epoch": 1.9549286082917599, - "grad_norm": 0.0035956581123173237, - "learning_rate": 0.00019999811552194846, - "loss": 46.0, - "step": 25569 - }, - { - "epoch": 1.9550050652751496, - "grad_norm": 0.0008804632816463709, - "learning_rate": 0.00019999811537448504, - "loss": 46.0, - "step": 25570 - }, - { - "epoch": 1.9550815222585394, - "grad_norm": 0.0004633062344510108, - "learning_rate": 0.0001999981152270158, - "loss": 46.0, - "step": 25571 - }, - { - "epoch": 1.9551579792419291, - "grad_norm": 0.0023061796091496944, - "learning_rate": 0.00019999811507954082, - "loss": 46.0, - "step": 25572 - }, - { - "epoch": 1.955234436225319, - "grad_norm": 0.001015381538309157, - "learning_rate": 0.00019999811493206008, - "loss": 46.0, - "step": 25573 - }, - { - "epoch": 1.9553108932087084, - "grad_norm": 0.0008870554156601429, - "learning_rate": 0.0001999981147845736, - "loss": 46.0, - "step": 25574 - }, - { - "epoch": 1.9553873501920982, - "grad_norm": 0.00047566034481860697, - "learning_rate": 0.0001999981146370813, - "loss": 46.0, - "step": 25575 - }, - { - "epoch": 1.9554638071754877, - "grad_norm": 0.0008879919187165797, - "learning_rate": 0.00019999811448958324, - "loss": 46.0, - "step": 25576 - }, - { - "epoch": 1.9555402641588775, - "grad_norm": 0.0009728758595883846, - "learning_rate": 0.0001999981143420794, - "loss": 46.0, - "step": 25577 - }, - { - "epoch": 1.9556167211422673, - "grad_norm": 0.000601473730057478, - "learning_rate": 0.00019999811419456982, - "loss": 46.0, - "step": 25578 - }, - { - "epoch": 1.955693178125657, - "grad_norm": 0.008816084824502468, - "learning_rate": 0.00019999811404705447, - "loss": 46.0, - "step": 25579 - }, - { - "epoch": 1.9557696351090468, - "grad_norm": 0.0013176584616303444, - "learning_rate": 0.0001999981138995333, - "loss": 46.0, - "step": 25580 - }, - { - "epoch": 1.9558460920924365, - "grad_norm": 0.0014934474602341652, - "learning_rate": 0.00019999811375200644, - "loss": 46.0, - "step": 25581 - }, - { - "epoch": 1.9559225490758263, - "grad_norm": 0.0017329715192317963, - "learning_rate": 0.00019999811360447376, - "loss": 46.0, - "step": 25582 - }, - { - "epoch": 1.955999006059216, - "grad_norm": 0.002628705697134137, - "learning_rate": 0.0001999981134569353, - "loss": 46.0, - "step": 25583 - }, - { - "epoch": 1.9560754630426056, - "grad_norm": 0.002184395445510745, - "learning_rate": 0.00019999811330939112, - "loss": 46.0, - "step": 25584 - }, - { - "epoch": 1.9561519200259954, - "grad_norm": 0.0011793995508924127, - "learning_rate": 0.00019999811316184115, - "loss": 46.0, - "step": 25585 - }, - { - "epoch": 1.9562283770093851, - "grad_norm": 0.003846992738544941, - "learning_rate": 0.00019999811301428538, - "loss": 46.0, - "step": 25586 - }, - { - "epoch": 1.9563048339927747, - "grad_norm": 0.0016753869131207466, - "learning_rate": 0.00019999811286672387, - "loss": 46.0, - "step": 25587 - }, - { - "epoch": 1.9563812909761644, - "grad_norm": 0.0011474102502688766, - "learning_rate": 0.00019999811271915658, - "loss": 46.0, - "step": 25588 - }, - { - "epoch": 1.9564577479595542, - "grad_norm": 0.003411001991480589, - "learning_rate": 0.00019999811257158352, - "loss": 46.0, - "step": 25589 - }, - { - "epoch": 1.956534204942944, - "grad_norm": 0.0010889185359701514, - "learning_rate": 0.0001999981124240047, - "loss": 46.0, - "step": 25590 - }, - { - "epoch": 1.9566106619263337, - "grad_norm": 0.0026897569186985493, - "learning_rate": 0.0001999981122764201, - "loss": 46.0, - "step": 25591 - }, - { - "epoch": 1.9566871189097235, - "grad_norm": 0.0013514735037460923, - "learning_rate": 0.00019999811212882975, - "loss": 46.0, - "step": 25592 - }, - { - "epoch": 1.9567635758931132, - "grad_norm": 0.006613563280552626, - "learning_rate": 0.00019999811198123362, - "loss": 46.0, - "step": 25593 - }, - { - "epoch": 1.956840032876503, - "grad_norm": 0.0008116142707876861, - "learning_rate": 0.00019999811183363174, - "loss": 46.0, - "step": 25594 - }, - { - "epoch": 1.9569164898598925, - "grad_norm": 0.0009808239992707968, - "learning_rate": 0.00019999811168602401, - "loss": 46.0, - "step": 25595 - }, - { - "epoch": 1.9569929468432823, - "grad_norm": 0.003019998548552394, - "learning_rate": 0.0001999981115384106, - "loss": 46.0, - "step": 25596 - }, - { - "epoch": 1.957069403826672, - "grad_norm": 0.0008172235684469342, - "learning_rate": 0.0001999981113907914, - "loss": 46.0, - "step": 25597 - }, - { - "epoch": 1.9571458608100616, - "grad_norm": 0.000792684790212661, - "learning_rate": 0.0001999981112431664, - "loss": 46.0, - "step": 25598 - }, - { - "epoch": 1.9572223177934513, - "grad_norm": 0.0031228649895638227, - "learning_rate": 0.00019999811109553566, - "loss": 46.0, - "step": 25599 - }, - { - "epoch": 1.957298774776841, - "grad_norm": 0.001094255829229951, - "learning_rate": 0.00019999811094789912, - "loss": 46.0, - "step": 25600 - }, - { - "epoch": 1.9573752317602309, - "grad_norm": 0.0005204302724450827, - "learning_rate": 0.00019999811080025686, - "loss": 46.0, - "step": 25601 - }, - { - "epoch": 1.9574516887436206, - "grad_norm": 0.0014343796065077186, - "learning_rate": 0.0001999981106526088, - "loss": 46.0, - "step": 25602 - }, - { - "epoch": 1.9575281457270104, - "grad_norm": 0.0008985139429569244, - "learning_rate": 0.000199998110504955, - "loss": 46.0, - "step": 25603 - }, - { - "epoch": 1.9576046027104002, - "grad_norm": 0.0005287400563247502, - "learning_rate": 0.00019999811035729538, - "loss": 46.0, - "step": 25604 - }, - { - "epoch": 1.95768105969379, - "grad_norm": 0.018086234107613564, - "learning_rate": 0.00019999811020963003, - "loss": 46.0, - "step": 25605 - }, - { - "epoch": 1.9577575166771795, - "grad_norm": 0.001463517313823104, - "learning_rate": 0.00019999811006195887, - "loss": 46.0, - "step": 25606 - }, - { - "epoch": 1.9578339736605692, - "grad_norm": 0.0005041114636696875, - "learning_rate": 0.00019999810991428197, - "loss": 46.0, - "step": 25607 - }, - { - "epoch": 1.957910430643959, - "grad_norm": 0.0006080283783376217, - "learning_rate": 0.00019999810976659933, - "loss": 46.0, - "step": 25608 - }, - { - "epoch": 1.9579868876273485, - "grad_norm": 0.0012026106705889106, - "learning_rate": 0.00019999810961891088, - "loss": 46.0, - "step": 25609 - }, - { - "epoch": 1.9580633446107383, - "grad_norm": 0.0009459232096560299, - "learning_rate": 0.00019999810947121666, - "loss": 46.0, - "step": 25610 - }, - { - "epoch": 1.958139801594128, - "grad_norm": 0.000572491786442697, - "learning_rate": 0.0001999981093235167, - "loss": 46.0, - "step": 25611 - }, - { - "epoch": 1.9582162585775178, - "grad_norm": 0.0014231883687898517, - "learning_rate": 0.00019999810917581092, - "loss": 46.0, - "step": 25612 - }, - { - "epoch": 1.9582927155609076, - "grad_norm": 0.0003486821660771966, - "learning_rate": 0.0001999981090280994, - "loss": 46.0, - "step": 25613 - }, - { - "epoch": 1.9583691725442973, - "grad_norm": 0.0021960726007819176, - "learning_rate": 0.00019999810888038212, - "loss": 46.0, - "step": 25614 - }, - { - "epoch": 1.958445629527687, - "grad_norm": 0.0007462743087671697, - "learning_rate": 0.00019999810873265906, - "loss": 46.0, - "step": 25615 - }, - { - "epoch": 1.9585220865110768, - "grad_norm": 0.0008867362048476934, - "learning_rate": 0.00019999810858493025, - "loss": 46.0, - "step": 25616 - }, - { - "epoch": 1.9585985434944664, - "grad_norm": 0.0010086533147841692, - "learning_rate": 0.00019999810843719564, - "loss": 46.0, - "step": 25617 - }, - { - "epoch": 1.9586750004778561, - "grad_norm": 0.0031216300558298826, - "learning_rate": 0.00019999810828945526, - "loss": 46.0, - "step": 25618 - }, - { - "epoch": 1.958751457461246, - "grad_norm": 0.001055656815879047, - "learning_rate": 0.00019999810814170913, - "loss": 46.0, - "step": 25619 - }, - { - "epoch": 1.9588279144446354, - "grad_norm": 0.002654892159625888, - "learning_rate": 0.00019999810799395723, - "loss": 46.0, - "step": 25620 - }, - { - "epoch": 1.9589043714280252, - "grad_norm": 0.0010736286640167236, - "learning_rate": 0.00019999810784619956, - "loss": 46.0, - "step": 25621 - }, - { - "epoch": 1.958980828411415, - "grad_norm": 0.000880467938259244, - "learning_rate": 0.0001999981076984361, - "loss": 46.0, - "step": 25622 - }, - { - "epoch": 1.9590572853948047, - "grad_norm": 0.0010455846786499023, - "learning_rate": 0.0001999981075506669, - "loss": 46.0, - "step": 25623 - }, - { - "epoch": 1.9591337423781945, - "grad_norm": 0.0007909988053143024, - "learning_rate": 0.0001999981074028919, - "loss": 46.0, - "step": 25624 - }, - { - "epoch": 1.9592101993615842, - "grad_norm": 0.0011371668661013246, - "learning_rate": 0.00019999810725511116, - "loss": 46.0, - "step": 25625 - }, - { - "epoch": 1.959286656344974, - "grad_norm": 0.0013723236043006182, - "learning_rate": 0.00019999810710732464, - "loss": 46.0, - "step": 25626 - }, - { - "epoch": 1.9593631133283638, - "grad_norm": 0.0006425781175494194, - "learning_rate": 0.00019999810695953235, - "loss": 46.0, - "step": 25627 - }, - { - "epoch": 1.9594395703117533, - "grad_norm": 0.0040087648667395115, - "learning_rate": 0.0001999981068117343, - "loss": 46.0, - "step": 25628 - }, - { - "epoch": 1.959516027295143, - "grad_norm": 0.0013753294479101896, - "learning_rate": 0.0001999981066639305, - "loss": 46.0, - "step": 25629 - }, - { - "epoch": 1.9595924842785328, - "grad_norm": 0.0009036906412802637, - "learning_rate": 0.00019999810651612088, - "loss": 46.0, - "step": 25630 - }, - { - "epoch": 1.9596689412619224, - "grad_norm": 0.0013251419877633452, - "learning_rate": 0.0001999981063683055, - "loss": 46.0, - "step": 25631 - }, - { - "epoch": 1.9597453982453121, - "grad_norm": 0.0032385559752583504, - "learning_rate": 0.00019999810622048437, - "loss": 46.0, - "step": 25632 - }, - { - "epoch": 1.959821855228702, - "grad_norm": 0.0002641786413732916, - "learning_rate": 0.00019999810607265744, - "loss": 46.0, - "step": 25633 - }, - { - "epoch": 1.9598983122120917, - "grad_norm": 0.001194876036606729, - "learning_rate": 0.00019999810592482477, - "loss": 46.0, - "step": 25634 - }, - { - "epoch": 1.9599747691954814, - "grad_norm": 0.004771309904754162, - "learning_rate": 0.00019999810577698635, - "loss": 46.0, - "step": 25635 - }, - { - "epoch": 1.9600512261788712, - "grad_norm": 0.0019493907457217574, - "learning_rate": 0.00019999810562914213, - "loss": 46.0, - "step": 25636 - }, - { - "epoch": 1.960127683162261, - "grad_norm": 0.0012141424231231213, - "learning_rate": 0.00019999810548129214, - "loss": 46.0, - "step": 25637 - }, - { - "epoch": 1.9602041401456507, - "grad_norm": 0.0014177298871800303, - "learning_rate": 0.0001999981053334364, - "loss": 46.0, - "step": 25638 - }, - { - "epoch": 1.9602805971290402, - "grad_norm": 0.0011091342894360423, - "learning_rate": 0.00019999810518557488, - "loss": 46.0, - "step": 25639 - }, - { - "epoch": 1.96035705411243, - "grad_norm": 0.001043921452946961, - "learning_rate": 0.00019999810503770757, - "loss": 46.0, - "step": 25640 - }, - { - "epoch": 1.9604335110958198, - "grad_norm": 0.0007383283227682114, - "learning_rate": 0.0001999981048898345, - "loss": 46.0, - "step": 25641 - }, - { - "epoch": 1.9605099680792093, - "grad_norm": 0.0012561003677546978, - "learning_rate": 0.00019999810474195567, - "loss": 46.0, - "step": 25642 - }, - { - "epoch": 1.960586425062599, - "grad_norm": 0.0009179412736557424, - "learning_rate": 0.0001999981045940711, - "loss": 46.0, - "step": 25643 - }, - { - "epoch": 1.9606628820459888, - "grad_norm": 0.0041306293569505215, - "learning_rate": 0.0001999981044461807, - "loss": 46.0, - "step": 25644 - }, - { - "epoch": 1.9607393390293786, - "grad_norm": 0.004089896101504564, - "learning_rate": 0.0001999981042982846, - "loss": 46.0, - "step": 25645 - }, - { - "epoch": 1.9608157960127683, - "grad_norm": 0.0046420409344136715, - "learning_rate": 0.00019999810415038266, - "loss": 46.0, - "step": 25646 - }, - { - "epoch": 1.960892252996158, - "grad_norm": 0.0018208050169050694, - "learning_rate": 0.00019999810400247499, - "loss": 46.0, - "step": 25647 - }, - { - "epoch": 1.9609687099795479, - "grad_norm": 0.0018417071551084518, - "learning_rate": 0.00019999810385456154, - "loss": 46.0, - "step": 25648 - }, - { - "epoch": 1.9610451669629376, - "grad_norm": 0.002569543430581689, - "learning_rate": 0.00019999810370664235, - "loss": 46.0, - "step": 25649 - }, - { - "epoch": 1.9611216239463272, - "grad_norm": 0.0021901200525462627, - "learning_rate": 0.00019999810355871732, - "loss": 46.0, - "step": 25650 - }, - { - "epoch": 1.961198080929717, - "grad_norm": 0.0031648639123886824, - "learning_rate": 0.00019999810341078659, - "loss": 46.0, - "step": 25651 - }, - { - "epoch": 1.9612745379131067, - "grad_norm": 0.0003894160327035934, - "learning_rate": 0.00019999810326285007, - "loss": 46.0, - "step": 25652 - }, - { - "epoch": 1.9613509948964962, - "grad_norm": 0.0010531754232943058, - "learning_rate": 0.00019999810311490776, - "loss": 46.0, - "step": 25653 - }, - { - "epoch": 1.961427451879886, - "grad_norm": 0.0009474078542552888, - "learning_rate": 0.0001999981029669597, - "loss": 46.0, - "step": 25654 - }, - { - "epoch": 1.9615039088632757, - "grad_norm": 0.0012509784428402781, - "learning_rate": 0.0001999981028190059, - "loss": 46.0, - "step": 25655 - }, - { - "epoch": 1.9615803658466655, - "grad_norm": 0.001959295244887471, - "learning_rate": 0.00019999810267104628, - "loss": 46.0, - "step": 25656 - }, - { - "epoch": 1.9616568228300553, - "grad_norm": 0.0005629324004985392, - "learning_rate": 0.0001999981025230809, - "loss": 46.0, - "step": 25657 - }, - { - "epoch": 1.961733279813445, - "grad_norm": 0.0011021456448361278, - "learning_rate": 0.00019999810237510975, - "loss": 46.0, - "step": 25658 - }, - { - "epoch": 1.9618097367968348, - "grad_norm": 0.0025703106075525284, - "learning_rate": 0.00019999810222713288, - "loss": 46.0, - "step": 25659 - }, - { - "epoch": 1.9618861937802246, - "grad_norm": 0.0005849493318237364, - "learning_rate": 0.0001999981020791502, - "loss": 46.0, - "step": 25660 - }, - { - "epoch": 1.961962650763614, - "grad_norm": 0.0006567466771230102, - "learning_rate": 0.00019999810193116173, - "loss": 46.0, - "step": 25661 - }, - { - "epoch": 1.9620391077470039, - "grad_norm": 0.0011481598485261202, - "learning_rate": 0.00019999810178316754, - "loss": 46.0, - "step": 25662 - }, - { - "epoch": 1.9621155647303936, - "grad_norm": 0.0009814308723434806, - "learning_rate": 0.00019999810163516752, - "loss": 46.0, - "step": 25663 - }, - { - "epoch": 1.9621920217137832, - "grad_norm": 0.0015112621476873755, - "learning_rate": 0.00019999810148716178, - "loss": 46.0, - "step": 25664 - }, - { - "epoch": 1.962268478697173, - "grad_norm": 0.0013320047874003649, - "learning_rate": 0.00019999810133915027, - "loss": 46.0, - "step": 25665 - }, - { - "epoch": 1.9623449356805627, - "grad_norm": 0.0009068603976629674, - "learning_rate": 0.00019999810119113295, - "loss": 46.0, - "step": 25666 - }, - { - "epoch": 1.9624213926639524, - "grad_norm": 0.00048425066052004695, - "learning_rate": 0.0001999981010431099, - "loss": 46.0, - "step": 25667 - }, - { - "epoch": 1.9624978496473422, - "grad_norm": 0.0020001623779535294, - "learning_rate": 0.00019999810089508106, - "loss": 46.0, - "step": 25668 - }, - { - "epoch": 1.962574306630732, - "grad_norm": 0.0005820230580866337, - "learning_rate": 0.00019999810074704645, - "loss": 46.0, - "step": 25669 - }, - { - "epoch": 1.9626507636141217, - "grad_norm": 0.002070485381409526, - "learning_rate": 0.0001999981005990061, - "loss": 46.0, - "step": 25670 - }, - { - "epoch": 1.9627272205975115, - "grad_norm": 0.0020292168483138084, - "learning_rate": 0.00019999810045095995, - "loss": 46.0, - "step": 25671 - }, - { - "epoch": 1.962803677580901, - "grad_norm": 0.0013971275184303522, - "learning_rate": 0.00019999810030290802, - "loss": 46.0, - "step": 25672 - }, - { - "epoch": 1.9628801345642908, - "grad_norm": 0.0020395535975694656, - "learning_rate": 0.00019999810015485037, - "loss": 46.0, - "step": 25673 - }, - { - "epoch": 1.9629565915476805, - "grad_norm": 0.0011849283473566175, - "learning_rate": 0.0001999981000067869, - "loss": 46.0, - "step": 25674 - }, - { - "epoch": 1.96303304853107, - "grad_norm": 0.0007762099266983569, - "learning_rate": 0.00019999809985871768, - "loss": 46.0, - "step": 25675 - }, - { - "epoch": 1.9631095055144598, - "grad_norm": 0.00038909068098291755, - "learning_rate": 0.0001999980997106427, - "loss": 46.0, - "step": 25676 - }, - { - "epoch": 1.9631859624978496, - "grad_norm": 0.0008692687260918319, - "learning_rate": 0.00019999809956256192, - "loss": 46.0, - "step": 25677 - }, - { - "epoch": 1.9632624194812394, - "grad_norm": 0.001047069556079805, - "learning_rate": 0.0001999980994144754, - "loss": 46.0, - "step": 25678 - }, - { - "epoch": 1.9633388764646291, - "grad_norm": 0.0035722774919122458, - "learning_rate": 0.00019999809926638312, - "loss": 46.0, - "step": 25679 - }, - { - "epoch": 1.9634153334480189, - "grad_norm": 0.001829311833716929, - "learning_rate": 0.00019999809911828504, - "loss": 46.0, - "step": 25680 - }, - { - "epoch": 1.9634917904314086, - "grad_norm": 0.0009578315657563508, - "learning_rate": 0.0001999980989701812, - "loss": 46.0, - "step": 25681 - }, - { - "epoch": 1.9635682474147984, - "grad_norm": 0.0008192209643311799, - "learning_rate": 0.0001999980988220716, - "loss": 46.0, - "step": 25682 - }, - { - "epoch": 1.963644704398188, - "grad_norm": 0.0016062733484432101, - "learning_rate": 0.00019999809867395625, - "loss": 46.0, - "step": 25683 - }, - { - "epoch": 1.9637211613815777, - "grad_norm": 0.0005128085031174123, - "learning_rate": 0.0001999980985258351, - "loss": 46.0, - "step": 25684 - }, - { - "epoch": 1.9637976183649672, - "grad_norm": 0.0018017567927017808, - "learning_rate": 0.0001999980983777082, - "loss": 46.0, - "step": 25685 - }, - { - "epoch": 1.963874075348357, - "grad_norm": 0.0028801364824175835, - "learning_rate": 0.0001999980982295755, - "loss": 46.0, - "step": 25686 - }, - { - "epoch": 1.9639505323317468, - "grad_norm": 0.0012952187098562717, - "learning_rate": 0.00019999809808143705, - "loss": 46.0, - "step": 25687 - }, - { - "epoch": 1.9640269893151365, - "grad_norm": 0.003607895690947771, - "learning_rate": 0.00019999809793329286, - "loss": 46.0, - "step": 25688 - }, - { - "epoch": 1.9641034462985263, - "grad_norm": 0.0013448529643937945, - "learning_rate": 0.00019999809778514284, - "loss": 46.0, - "step": 25689 - }, - { - "epoch": 1.964179903281916, - "grad_norm": 0.0013092899462208152, - "learning_rate": 0.0001999980976369871, - "loss": 46.0, - "step": 25690 - }, - { - "epoch": 1.9642563602653058, - "grad_norm": 0.000752260850276798, - "learning_rate": 0.00019999809748882553, - "loss": 46.0, - "step": 25691 - }, - { - "epoch": 1.9643328172486956, - "grad_norm": 0.0008124150335788727, - "learning_rate": 0.00019999809734065827, - "loss": 46.0, - "step": 25692 - }, - { - "epoch": 1.9644092742320853, - "grad_norm": 0.0008994932286441326, - "learning_rate": 0.00019999809719248519, - "loss": 46.0, - "step": 25693 - }, - { - "epoch": 1.9644857312154749, - "grad_norm": 0.0005908635212108493, - "learning_rate": 0.00019999809704430635, - "loss": 46.0, - "step": 25694 - }, - { - "epoch": 1.9645621881988646, - "grad_norm": 0.0008434213232249022, - "learning_rate": 0.00019999809689612175, - "loss": 46.0, - "step": 25695 - }, - { - "epoch": 1.9646386451822542, - "grad_norm": 0.00044164498103782535, - "learning_rate": 0.00019999809674793137, - "loss": 46.0, - "step": 25696 - }, - { - "epoch": 1.964715102165644, - "grad_norm": 0.0006314634811133146, - "learning_rate": 0.00019999809659973524, - "loss": 46.0, - "step": 25697 - }, - { - "epoch": 1.9647915591490337, - "grad_norm": 0.0005994917592033744, - "learning_rate": 0.00019999809645153334, - "loss": 46.0, - "step": 25698 - }, - { - "epoch": 1.9648680161324235, - "grad_norm": 0.000701598240993917, - "learning_rate": 0.00019999809630332562, - "loss": 46.0, - "step": 25699 - }, - { - "epoch": 1.9649444731158132, - "grad_norm": 0.00297808856703341, - "learning_rate": 0.00019999809615511217, - "loss": 46.0, - "step": 25700 - }, - { - "epoch": 1.965020930099203, - "grad_norm": 0.00306686176918447, - "learning_rate": 0.00019999809600689295, - "loss": 46.0, - "step": 25701 - }, - { - "epoch": 1.9650973870825927, - "grad_norm": 0.0013077683979645371, - "learning_rate": 0.000199998095858668, - "loss": 46.0, - "step": 25702 - }, - { - "epoch": 1.9651738440659825, - "grad_norm": 0.0027012452483177185, - "learning_rate": 0.00019999809571043722, - "loss": 46.0, - "step": 25703 - }, - { - "epoch": 1.9652503010493723, - "grad_norm": 0.0015396863454952836, - "learning_rate": 0.00019999809556220068, - "loss": 46.0, - "step": 25704 - }, - { - "epoch": 1.9653267580327618, - "grad_norm": 0.0005602787714451551, - "learning_rate": 0.00019999809541395837, - "loss": 46.0, - "step": 25705 - }, - { - "epoch": 1.9654032150161516, - "grad_norm": 0.00037825407343916595, - "learning_rate": 0.0001999980952657103, - "loss": 46.0, - "step": 25706 - }, - { - "epoch": 1.965479671999541, - "grad_norm": 0.0006492449319921434, - "learning_rate": 0.00019999809511745648, - "loss": 46.0, - "step": 25707 - }, - { - "epoch": 1.9655561289829309, - "grad_norm": 0.0038903469685465097, - "learning_rate": 0.00019999809496919685, - "loss": 46.0, - "step": 25708 - }, - { - "epoch": 1.9656325859663206, - "grad_norm": 0.0016727959737181664, - "learning_rate": 0.0001999980948209315, - "loss": 46.0, - "step": 25709 - }, - { - "epoch": 1.9657090429497104, - "grad_norm": 0.0006891209632158279, - "learning_rate": 0.00019999809467266034, - "loss": 46.0, - "step": 25710 - }, - { - "epoch": 1.9657854999331001, - "grad_norm": 0.003447185968980193, - "learning_rate": 0.00019999809452438342, - "loss": 46.0, - "step": 25711 - }, - { - "epoch": 1.96586195691649, - "grad_norm": 0.0005791171570308506, - "learning_rate": 0.00019999809437610074, - "loss": 46.0, - "step": 25712 - }, - { - "epoch": 1.9659384138998797, - "grad_norm": 0.0030269778799265623, - "learning_rate": 0.0001999980942278123, - "loss": 46.0, - "step": 25713 - }, - { - "epoch": 1.9660148708832694, - "grad_norm": 0.0016051674028858542, - "learning_rate": 0.00019999809407951808, - "loss": 46.0, - "step": 25714 - }, - { - "epoch": 1.966091327866659, - "grad_norm": 0.00045554747339338064, - "learning_rate": 0.0001999980939312181, - "loss": 46.0, - "step": 25715 - }, - { - "epoch": 1.9661677848500487, - "grad_norm": 0.0038114527706056833, - "learning_rate": 0.00019999809378291232, - "loss": 46.0, - "step": 25716 - }, - { - "epoch": 1.9662442418334385, - "grad_norm": 0.001503113890066743, - "learning_rate": 0.00019999809363460078, - "loss": 46.0, - "step": 25717 - }, - { - "epoch": 1.966320698816828, - "grad_norm": 0.0023027423303574324, - "learning_rate": 0.0001999980934862835, - "loss": 46.0, - "step": 25718 - }, - { - "epoch": 1.9663971558002178, - "grad_norm": 0.003781256265938282, - "learning_rate": 0.0001999980933379604, - "loss": 46.0, - "step": 25719 - }, - { - "epoch": 1.9664736127836075, - "grad_norm": 0.006362109910696745, - "learning_rate": 0.00019999809318963155, - "loss": 46.0, - "step": 25720 - }, - { - "epoch": 1.9665500697669973, - "grad_norm": 0.000668186810798943, - "learning_rate": 0.00019999809304129698, - "loss": 46.0, - "step": 25721 - }, - { - "epoch": 1.966626526750387, - "grad_norm": 0.0009403019212186337, - "learning_rate": 0.0001999980928929566, - "loss": 46.0, - "step": 25722 - }, - { - "epoch": 1.9667029837337768, - "grad_norm": 0.0006688606808893383, - "learning_rate": 0.00019999809274461045, - "loss": 46.0, - "step": 25723 - }, - { - "epoch": 1.9667794407171666, - "grad_norm": 0.0005047800950706005, - "learning_rate": 0.00019999809259625852, - "loss": 46.0, - "step": 25724 - }, - { - "epoch": 1.9668558977005564, - "grad_norm": 0.0005354401655495167, - "learning_rate": 0.00019999809244790085, - "loss": 46.0, - "step": 25725 - }, - { - "epoch": 1.966932354683946, - "grad_norm": 0.000846469949465245, - "learning_rate": 0.00019999809229953738, - "loss": 46.0, - "step": 25726 - }, - { - "epoch": 1.9670088116673357, - "grad_norm": 0.0009723450057208538, - "learning_rate": 0.00019999809215116816, - "loss": 46.0, - "step": 25727 - }, - { - "epoch": 1.9670852686507254, - "grad_norm": 0.001135780243203044, - "learning_rate": 0.00019999809200279317, - "loss": 46.0, - "step": 25728 - }, - { - "epoch": 1.967161725634115, - "grad_norm": 0.0013109707506373525, - "learning_rate": 0.0001999980918544124, - "loss": 46.0, - "step": 25729 - }, - { - "epoch": 1.9672381826175047, - "grad_norm": 0.00041831049020402133, - "learning_rate": 0.0001999980917060259, - "loss": 46.0, - "step": 25730 - }, - { - "epoch": 1.9673146396008945, - "grad_norm": 0.002529071643948555, - "learning_rate": 0.00019999809155763355, - "loss": 46.0, - "step": 25731 - }, - { - "epoch": 1.9673910965842842, - "grad_norm": 0.0008151988731697202, - "learning_rate": 0.00019999809140923552, - "loss": 46.0, - "step": 25732 - }, - { - "epoch": 1.967467553567674, - "grad_norm": 0.003350807586684823, - "learning_rate": 0.00019999809126083166, - "loss": 46.0, - "step": 25733 - }, - { - "epoch": 1.9675440105510638, - "grad_norm": 0.0011093176435679197, - "learning_rate": 0.00019999809111242206, - "loss": 46.0, - "step": 25734 - }, - { - "epoch": 1.9676204675344535, - "grad_norm": 0.0016163561958819628, - "learning_rate": 0.00019999809096400668, - "loss": 46.0, - "step": 25735 - }, - { - "epoch": 1.9676969245178433, - "grad_norm": 0.0030409004539251328, - "learning_rate": 0.00019999809081558553, - "loss": 46.0, - "step": 25736 - }, - { - "epoch": 1.9677733815012328, - "grad_norm": 0.0003842986188828945, - "learning_rate": 0.00019999809066715863, - "loss": 46.0, - "step": 25737 - }, - { - "epoch": 1.9678498384846226, - "grad_norm": 0.000913542287889868, - "learning_rate": 0.00019999809051872593, - "loss": 46.0, - "step": 25738 - }, - { - "epoch": 1.9679262954680123, - "grad_norm": 0.0011681531323119998, - "learning_rate": 0.00019999809037028748, - "loss": 46.0, - "step": 25739 - }, - { - "epoch": 1.9680027524514019, - "grad_norm": 0.0010768186766654253, - "learning_rate": 0.00019999809022184324, - "loss": 46.0, - "step": 25740 - }, - { - "epoch": 1.9680792094347916, - "grad_norm": 0.00041841319762170315, - "learning_rate": 0.00019999809007339322, - "loss": 46.0, - "step": 25741 - }, - { - "epoch": 1.9681556664181814, - "grad_norm": 0.005378073081374168, - "learning_rate": 0.00019999808992493748, - "loss": 46.0, - "step": 25742 - }, - { - "epoch": 1.9682321234015712, - "grad_norm": 0.0010246969759464264, - "learning_rate": 0.00019999808977647594, - "loss": 46.0, - "step": 25743 - }, - { - "epoch": 1.968308580384961, - "grad_norm": 0.0008413073373958468, - "learning_rate": 0.00019999808962800866, - "loss": 46.0, - "step": 25744 - }, - { - "epoch": 1.9683850373683507, - "grad_norm": 0.0009757535299286246, - "learning_rate": 0.00019999808947953555, - "loss": 46.0, - "step": 25745 - }, - { - "epoch": 1.9684614943517404, - "grad_norm": 0.0008605967741459608, - "learning_rate": 0.00019999808933105672, - "loss": 46.0, - "step": 25746 - }, - { - "epoch": 1.9685379513351302, - "grad_norm": 0.0011325961677357554, - "learning_rate": 0.0001999980891825721, - "loss": 46.0, - "step": 25747 - }, - { - "epoch": 1.9686144083185197, - "grad_norm": 0.0015184168005362153, - "learning_rate": 0.00019999808903408173, - "loss": 46.0, - "step": 25748 - }, - { - "epoch": 1.9686908653019095, - "grad_norm": 0.0010799546726047993, - "learning_rate": 0.00019999808888558558, - "loss": 46.0, - "step": 25749 - }, - { - "epoch": 1.9687673222852993, - "grad_norm": 0.0009057698771357536, - "learning_rate": 0.00019999808873708369, - "loss": 46.0, - "step": 25750 - }, - { - "epoch": 1.9688437792686888, - "grad_norm": 0.0006330139003694057, - "learning_rate": 0.000199998088588576, - "loss": 46.0, - "step": 25751 - }, - { - "epoch": 1.9689202362520786, - "grad_norm": 0.0008669638191349804, - "learning_rate": 0.00019999808844006252, - "loss": 46.0, - "step": 25752 - }, - { - "epoch": 1.9689966932354683, - "grad_norm": 0.0012531676329672337, - "learning_rate": 0.00019999808829154327, - "loss": 46.0, - "step": 25753 - }, - { - "epoch": 1.969073150218858, - "grad_norm": 0.0035813641734421253, - "learning_rate": 0.0001999980881430183, - "loss": 46.0, - "step": 25754 - }, - { - "epoch": 1.9691496072022479, - "grad_norm": 0.0034570202697068453, - "learning_rate": 0.00019999808799448752, - "loss": 46.0, - "step": 25755 - }, - { - "epoch": 1.9692260641856376, - "grad_norm": 0.0009613910806365311, - "learning_rate": 0.000199998087845951, - "loss": 46.0, - "step": 25756 - }, - { - "epoch": 1.9693025211690274, - "grad_norm": 0.00043255550554022193, - "learning_rate": 0.0001999980876974087, - "loss": 46.0, - "step": 25757 - }, - { - "epoch": 1.9693789781524171, - "grad_norm": 0.0005464975256472826, - "learning_rate": 0.0001999980875488606, - "loss": 46.0, - "step": 25758 - }, - { - "epoch": 1.9694554351358067, - "grad_norm": 0.0006330720498226583, - "learning_rate": 0.00019999808740030678, - "loss": 46.0, - "step": 25759 - }, - { - "epoch": 1.9695318921191964, - "grad_norm": 0.000557834398932755, - "learning_rate": 0.00019999808725174715, - "loss": 46.0, - "step": 25760 - }, - { - "epoch": 1.9696083491025862, - "grad_norm": 0.0012844385346397758, - "learning_rate": 0.00019999808710318177, - "loss": 46.0, - "step": 25761 - }, - { - "epoch": 1.9696848060859757, - "grad_norm": 0.0005189888179302216, - "learning_rate": 0.00019999808695461062, - "loss": 46.0, - "step": 25762 - }, - { - "epoch": 1.9697612630693655, - "grad_norm": 0.0010427827946841717, - "learning_rate": 0.0001999980868060337, - "loss": 46.0, - "step": 25763 - }, - { - "epoch": 1.9698377200527553, - "grad_norm": 0.0019316133111715317, - "learning_rate": 0.00019999808665745102, - "loss": 46.0, - "step": 25764 - }, - { - "epoch": 1.969914177036145, - "grad_norm": 0.0006965315551497042, - "learning_rate": 0.00019999808650886255, - "loss": 46.0, - "step": 25765 - }, - { - "epoch": 1.9699906340195348, - "grad_norm": 0.0007156726205721498, - "learning_rate": 0.0001999980863602683, - "loss": 46.0, - "step": 25766 - }, - { - "epoch": 1.9700670910029245, - "grad_norm": 0.0015348537126556039, - "learning_rate": 0.00019999808621166834, - "loss": 46.0, - "step": 25767 - }, - { - "epoch": 1.9701435479863143, - "grad_norm": 0.0017303547356277704, - "learning_rate": 0.00019999808606306255, - "loss": 46.0, - "step": 25768 - }, - { - "epoch": 1.970220004969704, - "grad_norm": 0.0033142436295747757, - "learning_rate": 0.00019999808591445101, - "loss": 46.0, - "step": 25769 - }, - { - "epoch": 1.9702964619530936, - "grad_norm": 0.0009024566388688982, - "learning_rate": 0.00019999808576583373, - "loss": 46.0, - "step": 25770 - }, - { - "epoch": 1.9703729189364834, - "grad_norm": 0.0009344329009763896, - "learning_rate": 0.00019999808561721065, - "loss": 46.0, - "step": 25771 - }, - { - "epoch": 1.9704493759198731, - "grad_norm": 0.0073862276040017605, - "learning_rate": 0.00019999808546858182, - "loss": 46.0, - "step": 25772 - }, - { - "epoch": 1.9705258329032627, - "grad_norm": 0.001017266302369535, - "learning_rate": 0.00019999808531994718, - "loss": 46.0, - "step": 25773 - }, - { - "epoch": 1.9706022898866524, - "grad_norm": 0.0009426310425624251, - "learning_rate": 0.0001999980851713068, - "loss": 46.0, - "step": 25774 - }, - { - "epoch": 1.9706787468700422, - "grad_norm": 0.0008773712906986475, - "learning_rate": 0.00019999808502266063, - "loss": 46.0, - "step": 25775 - }, - { - "epoch": 1.970755203853432, - "grad_norm": 0.0019621788524091244, - "learning_rate": 0.0001999980848740087, - "loss": 46.0, - "step": 25776 - }, - { - "epoch": 1.9708316608368217, - "grad_norm": 0.001762914820574224, - "learning_rate": 0.00019999808472535103, - "loss": 46.0, - "step": 25777 - }, - { - "epoch": 1.9709081178202115, - "grad_norm": 0.002573672216385603, - "learning_rate": 0.00019999808457668756, - "loss": 46.0, - "step": 25778 - }, - { - "epoch": 1.9709845748036012, - "grad_norm": 0.0005982061848044395, - "learning_rate": 0.00019999808442801835, - "loss": 46.0, - "step": 25779 - }, - { - "epoch": 1.971061031786991, - "grad_norm": 0.0006087124347686768, - "learning_rate": 0.00019999808427934333, - "loss": 46.0, - "step": 25780 - }, - { - "epoch": 1.9711374887703805, - "grad_norm": 0.001752177020534873, - "learning_rate": 0.00019999808413066257, - "loss": 46.0, - "step": 25781 - }, - { - "epoch": 1.9712139457537703, - "grad_norm": 0.0007067126571200788, - "learning_rate": 0.00019999808398197603, - "loss": 46.0, - "step": 25782 - }, - { - "epoch": 1.97129040273716, - "grad_norm": 0.0009051482775248587, - "learning_rate": 0.00019999808383328374, - "loss": 46.0, - "step": 25783 - }, - { - "epoch": 1.9713668597205496, - "grad_norm": 0.0013950142310932279, - "learning_rate": 0.00019999808368458566, - "loss": 46.0, - "step": 25784 - }, - { - "epoch": 1.9714433167039394, - "grad_norm": 0.0012019522255286574, - "learning_rate": 0.0001999980835358818, - "loss": 46.0, - "step": 25785 - }, - { - "epoch": 1.9715197736873291, - "grad_norm": 0.0009348238236270845, - "learning_rate": 0.00019999808338717217, - "loss": 46.0, - "step": 25786 - }, - { - "epoch": 1.9715962306707189, - "grad_norm": 0.0006031130906194448, - "learning_rate": 0.0001999980832384568, - "loss": 46.0, - "step": 25787 - }, - { - "epoch": 1.9716726876541086, - "grad_norm": 0.0023562414571642876, - "learning_rate": 0.00019999808308973565, - "loss": 46.0, - "step": 25788 - }, - { - "epoch": 1.9717491446374984, - "grad_norm": 0.00122918002307415, - "learning_rate": 0.00019999808294100872, - "loss": 46.0, - "step": 25789 - }, - { - "epoch": 1.9718256016208882, - "grad_norm": 0.0011660020099952817, - "learning_rate": 0.00019999808279227602, - "loss": 46.0, - "step": 25790 - }, - { - "epoch": 1.971902058604278, - "grad_norm": 0.0034926109947264194, - "learning_rate": 0.00019999808264353755, - "loss": 46.0, - "step": 25791 - }, - { - "epoch": 1.9719785155876675, - "grad_norm": 0.0026699271984398365, - "learning_rate": 0.00019999808249479334, - "loss": 46.0, - "step": 25792 - }, - { - "epoch": 1.9720549725710572, - "grad_norm": 0.01537258829921484, - "learning_rate": 0.00019999808234604335, - "loss": 46.0, - "step": 25793 - }, - { - "epoch": 1.972131429554447, - "grad_norm": 0.0008090611663646996, - "learning_rate": 0.00019999808219728756, - "loss": 46.0, - "step": 25794 - }, - { - "epoch": 1.9722078865378365, - "grad_norm": 0.001403270522132516, - "learning_rate": 0.00019999808204852602, - "loss": 46.0, - "step": 25795 - }, - { - "epoch": 1.9722843435212263, - "grad_norm": 0.00156811170745641, - "learning_rate": 0.0001999980818997587, - "loss": 46.0, - "step": 25796 - }, - { - "epoch": 1.972360800504616, - "grad_norm": 0.008453596383333206, - "learning_rate": 0.00019999808175098562, - "loss": 46.0, - "step": 25797 - }, - { - "epoch": 1.9724372574880058, - "grad_norm": 0.002186944941058755, - "learning_rate": 0.0001999980816022068, - "loss": 46.0, - "step": 25798 - }, - { - "epoch": 1.9725137144713956, - "grad_norm": 0.0006994829163886607, - "learning_rate": 0.0001999980814534222, - "loss": 46.0, - "step": 25799 - }, - { - "epoch": 1.9725901714547853, - "grad_norm": 0.0025082130450755358, - "learning_rate": 0.0001999980813046318, - "loss": 46.0, - "step": 25800 - }, - { - "epoch": 1.972666628438175, - "grad_norm": 0.0007570331799797714, - "learning_rate": 0.00019999808115583564, - "loss": 46.0, - "step": 25801 - }, - { - "epoch": 1.9727430854215648, - "grad_norm": 0.001030400162562728, - "learning_rate": 0.0001999980810070337, - "loss": 46.0, - "step": 25802 - }, - { - "epoch": 1.9728195424049544, - "grad_norm": 0.0006028040661476552, - "learning_rate": 0.000199998080858226, - "loss": 46.0, - "step": 25803 - }, - { - "epoch": 1.9728959993883441, - "grad_norm": 0.0006911895470693707, - "learning_rate": 0.00019999808070941252, - "loss": 46.0, - "step": 25804 - }, - { - "epoch": 1.972972456371734, - "grad_norm": 0.0027786351274698973, - "learning_rate": 0.0001999980805605933, - "loss": 46.0, - "step": 25805 - }, - { - "epoch": 1.9730489133551234, - "grad_norm": 0.00045096265967004, - "learning_rate": 0.00019999808041176831, - "loss": 46.0, - "step": 25806 - }, - { - "epoch": 1.9731253703385132, - "grad_norm": 0.00123696424998343, - "learning_rate": 0.00019999808026293755, - "loss": 46.0, - "step": 25807 - }, - { - "epoch": 1.973201827321903, - "grad_norm": 0.00045669314567930996, - "learning_rate": 0.000199998080114101, - "loss": 46.0, - "step": 25808 - }, - { - "epoch": 1.9732782843052927, - "grad_norm": 0.0012601757189258933, - "learning_rate": 0.00019999807996525868, - "loss": 46.0, - "step": 25809 - }, - { - "epoch": 1.9733547412886825, - "grad_norm": 0.0023155435919761658, - "learning_rate": 0.0001999980798164106, - "loss": 46.0, - "step": 25810 - }, - { - "epoch": 1.9734311982720723, - "grad_norm": 0.0015559191815555096, - "learning_rate": 0.00019999807966755674, - "loss": 46.0, - "step": 25811 - }, - { - "epoch": 1.973507655255462, - "grad_norm": 0.00292005087248981, - "learning_rate": 0.00019999807951869713, - "loss": 46.0, - "step": 25812 - }, - { - "epoch": 1.9735841122388518, - "grad_norm": 0.0007657215464860201, - "learning_rate": 0.00019999807936983176, - "loss": 46.0, - "step": 25813 - }, - { - "epoch": 1.9736605692222413, - "grad_norm": 0.0013588722795248032, - "learning_rate": 0.00019999807922096058, - "loss": 46.0, - "step": 25814 - }, - { - "epoch": 1.973737026205631, - "grad_norm": 0.0012404138687998056, - "learning_rate": 0.00019999807907208368, - "loss": 46.0, - "step": 25815 - }, - { - "epoch": 1.9738134831890206, - "grad_norm": 0.006692041177302599, - "learning_rate": 0.00019999807892320096, - "loss": 46.0, - "step": 25816 - }, - { - "epoch": 1.9738899401724104, - "grad_norm": 0.002543903887271881, - "learning_rate": 0.0001999980787743125, - "loss": 46.0, - "step": 25817 - }, - { - "epoch": 1.9739663971558001, - "grad_norm": 0.0012056382838636637, - "learning_rate": 0.00019999807862541827, - "loss": 46.0, - "step": 25818 - }, - { - "epoch": 1.97404285413919, - "grad_norm": 0.0008124654996208847, - "learning_rate": 0.00019999807847651826, - "loss": 46.0, - "step": 25819 - }, - { - "epoch": 1.9741193111225797, - "grad_norm": 0.01230429857969284, - "learning_rate": 0.0001999980783276125, - "loss": 46.0, - "step": 25820 - }, - { - "epoch": 1.9741957681059694, - "grad_norm": 0.0009111386025324464, - "learning_rate": 0.00019999807817870093, - "loss": 46.0, - "step": 25821 - }, - { - "epoch": 1.9742722250893592, - "grad_norm": 0.0012734156334772706, - "learning_rate": 0.00019999807802978362, - "loss": 46.0, - "step": 25822 - }, - { - "epoch": 1.974348682072749, - "grad_norm": 0.000698140705935657, - "learning_rate": 0.00019999807788086057, - "loss": 46.0, - "step": 25823 - }, - { - "epoch": 1.9744251390561387, - "grad_norm": 0.0029561358969658613, - "learning_rate": 0.00019999807773193168, - "loss": 46.0, - "step": 25824 - }, - { - "epoch": 1.9745015960395282, - "grad_norm": 0.001605145982466638, - "learning_rate": 0.00019999807758299708, - "loss": 46.0, - "step": 25825 - }, - { - "epoch": 1.974578053022918, - "grad_norm": 0.0010017509339377284, - "learning_rate": 0.00019999807743405668, - "loss": 46.0, - "step": 25826 - }, - { - "epoch": 1.9746545100063075, - "grad_norm": 0.0005134253297001123, - "learning_rate": 0.00019999807728511053, - "loss": 46.0, - "step": 25827 - }, - { - "epoch": 1.9747309669896973, - "grad_norm": 0.0010002549970522523, - "learning_rate": 0.0001999980771361586, - "loss": 46.0, - "step": 25828 - }, - { - "epoch": 1.974807423973087, - "grad_norm": 0.009470101445913315, - "learning_rate": 0.00019999807698720093, - "loss": 46.0, - "step": 25829 - }, - { - "epoch": 1.9748838809564768, - "grad_norm": 0.0013153848703950644, - "learning_rate": 0.00019999807683823744, - "loss": 46.0, - "step": 25830 - }, - { - "epoch": 1.9749603379398666, - "grad_norm": 0.0017125066369771957, - "learning_rate": 0.0001999980766892682, - "loss": 46.0, - "step": 25831 - }, - { - "epoch": 1.9750367949232563, - "grad_norm": 0.002290693111717701, - "learning_rate": 0.0001999980765402932, - "loss": 46.0, - "step": 25832 - }, - { - "epoch": 1.975113251906646, - "grad_norm": 0.002258292166516185, - "learning_rate": 0.00019999807639131242, - "loss": 46.0, - "step": 25833 - }, - { - "epoch": 1.9751897088900359, - "grad_norm": 0.0009066957863979042, - "learning_rate": 0.00019999807624232588, - "loss": 46.0, - "step": 25834 - }, - { - "epoch": 1.9752661658734256, - "grad_norm": 0.0003284249687567353, - "learning_rate": 0.00019999807609333357, - "loss": 46.0, - "step": 25835 - }, - { - "epoch": 1.9753426228568152, - "grad_norm": 0.0006234605680219829, - "learning_rate": 0.0001999980759443355, - "loss": 46.0, - "step": 25836 - }, - { - "epoch": 1.975419079840205, - "grad_norm": 0.002773670945316553, - "learning_rate": 0.00019999807579533163, - "loss": 46.0, - "step": 25837 - }, - { - "epoch": 1.9754955368235945, - "grad_norm": 0.0014588736230507493, - "learning_rate": 0.000199998075646322, - "loss": 46.0, - "step": 25838 - }, - { - "epoch": 1.9755719938069842, - "grad_norm": 0.0011562788859009743, - "learning_rate": 0.00019999807549730663, - "loss": 46.0, - "step": 25839 - }, - { - "epoch": 1.975648450790374, - "grad_norm": 0.0024272899609059095, - "learning_rate": 0.00019999807534828545, - "loss": 46.0, - "step": 25840 - }, - { - "epoch": 1.9757249077737637, - "grad_norm": 0.0009556411532685161, - "learning_rate": 0.00019999807519925853, - "loss": 46.0, - "step": 25841 - }, - { - "epoch": 1.9758013647571535, - "grad_norm": 0.0037731309421360493, - "learning_rate": 0.00019999807505022583, - "loss": 46.0, - "step": 25842 - }, - { - "epoch": 1.9758778217405433, - "grad_norm": 0.002451549516990781, - "learning_rate": 0.00019999807490118736, - "loss": 46.0, - "step": 25843 - }, - { - "epoch": 1.975954278723933, - "grad_norm": 0.0021754151675850153, - "learning_rate": 0.00019999807475214315, - "loss": 46.0, - "step": 25844 - }, - { - "epoch": 1.9760307357073228, - "grad_norm": 0.0030975828412920237, - "learning_rate": 0.00019999807460309313, - "loss": 46.0, - "step": 25845 - }, - { - "epoch": 1.9761071926907123, - "grad_norm": 0.008454015478491783, - "learning_rate": 0.00019999807445403734, - "loss": 46.0, - "step": 25846 - }, - { - "epoch": 1.976183649674102, - "grad_norm": 0.004546020179986954, - "learning_rate": 0.00019999807430497578, - "loss": 46.0, - "step": 25847 - }, - { - "epoch": 1.9762601066574919, - "grad_norm": 0.000900718558114022, - "learning_rate": 0.00019999807415590847, - "loss": 46.0, - "step": 25848 - }, - { - "epoch": 1.9763365636408814, - "grad_norm": 0.0016044018557295203, - "learning_rate": 0.0001999980740068354, - "loss": 46.0, - "step": 25849 - }, - { - "epoch": 1.9764130206242712, - "grad_norm": 0.0003852058725897223, - "learning_rate": 0.00019999807385775653, - "loss": 46.0, - "step": 25850 - }, - { - "epoch": 1.976489477607661, - "grad_norm": 0.003859543474391103, - "learning_rate": 0.00019999807370867193, - "loss": 46.0, - "step": 25851 - }, - { - "epoch": 1.9765659345910507, - "grad_norm": 0.0012387122260406613, - "learning_rate": 0.00019999807355958153, - "loss": 46.0, - "step": 25852 - }, - { - "epoch": 1.9766423915744404, - "grad_norm": 0.00046013304381631315, - "learning_rate": 0.00019999807341048538, - "loss": 46.0, - "step": 25853 - }, - { - "epoch": 1.9767188485578302, - "grad_norm": 0.001931617152877152, - "learning_rate": 0.00019999807326138346, - "loss": 46.0, - "step": 25854 - }, - { - "epoch": 1.97679530554122, - "grad_norm": 0.0021536045242100954, - "learning_rate": 0.00019999807311227573, - "loss": 46.0, - "step": 25855 - }, - { - "epoch": 1.9768717625246097, - "grad_norm": 0.0019783740863204002, - "learning_rate": 0.00019999807296316226, - "loss": 46.0, - "step": 25856 - }, - { - "epoch": 1.9769482195079993, - "grad_norm": 0.0029039422515779734, - "learning_rate": 0.00019999807281404302, - "loss": 46.0, - "step": 25857 - }, - { - "epoch": 1.977024676491389, - "grad_norm": 0.001038994058035314, - "learning_rate": 0.000199998072664918, - "loss": 46.0, - "step": 25858 - }, - { - "epoch": 1.9771011334747788, - "grad_norm": 0.0006801777053624392, - "learning_rate": 0.00019999807251578724, - "loss": 46.0, - "step": 25859 - }, - { - "epoch": 1.9771775904581683, - "grad_norm": 0.0006004204624332488, - "learning_rate": 0.00019999807236665068, - "loss": 46.0, - "step": 25860 - }, - { - "epoch": 1.977254047441558, - "grad_norm": 0.003392234444618225, - "learning_rate": 0.00019999807221750837, - "loss": 46.0, - "step": 25861 - }, - { - "epoch": 1.9773305044249478, - "grad_norm": 0.0020625144243240356, - "learning_rate": 0.0001999980720683603, - "loss": 46.0, - "step": 25862 - }, - { - "epoch": 1.9774069614083376, - "grad_norm": 0.0017339610494673252, - "learning_rate": 0.00019999807191920644, - "loss": 46.0, - "step": 25863 - }, - { - "epoch": 1.9774834183917274, - "grad_norm": 0.0004393297713249922, - "learning_rate": 0.00019999807177004678, - "loss": 46.0, - "step": 25864 - }, - { - "epoch": 1.9775598753751171, - "grad_norm": 0.0019282331923022866, - "learning_rate": 0.0001999980716208814, - "loss": 46.0, - "step": 25865 - }, - { - "epoch": 1.9776363323585069, - "grad_norm": 0.0029359781183302402, - "learning_rate": 0.00019999807147171023, - "loss": 46.0, - "step": 25866 - }, - { - "epoch": 1.9777127893418966, - "grad_norm": 0.000568628020118922, - "learning_rate": 0.00019999807132253333, - "loss": 46.0, - "step": 25867 - }, - { - "epoch": 1.9777892463252862, - "grad_norm": 0.001429157331585884, - "learning_rate": 0.00019999807117335064, - "loss": 46.0, - "step": 25868 - }, - { - "epoch": 1.977865703308676, - "grad_norm": 0.000368216511560604, - "learning_rate": 0.00019999807102416214, - "loss": 46.0, - "step": 25869 - }, - { - "epoch": 1.9779421602920657, - "grad_norm": 0.0009015920222736895, - "learning_rate": 0.0001999980708749679, - "loss": 46.0, - "step": 25870 - }, - { - "epoch": 1.9780186172754552, - "grad_norm": 0.008182347752153873, - "learning_rate": 0.0001999980707257679, - "loss": 46.0, - "step": 25871 - }, - { - "epoch": 1.978095074258845, - "grad_norm": 0.0006159769254736602, - "learning_rate": 0.0001999980705765621, - "loss": 46.0, - "step": 25872 - }, - { - "epoch": 1.9781715312422348, - "grad_norm": 0.0004768925136886537, - "learning_rate": 0.00019999807042735056, - "loss": 46.0, - "step": 25873 - }, - { - "epoch": 1.9782479882256245, - "grad_norm": 0.000916451623197645, - "learning_rate": 0.00019999807027813325, - "loss": 46.0, - "step": 25874 - }, - { - "epoch": 1.9783244452090143, - "grad_norm": 0.0027534232940524817, - "learning_rate": 0.00019999807012891017, - "loss": 46.0, - "step": 25875 - }, - { - "epoch": 1.978400902192404, - "grad_norm": 0.00047271931543946266, - "learning_rate": 0.0001999980699796813, - "loss": 46.0, - "step": 25876 - }, - { - "epoch": 1.9784773591757938, - "grad_norm": 0.000993673107586801, - "learning_rate": 0.00019999806983044666, - "loss": 46.0, - "step": 25877 - }, - { - "epoch": 1.9785538161591836, - "grad_norm": 0.0006514216074720025, - "learning_rate": 0.00019999806968120626, - "loss": 46.0, - "step": 25878 - }, - { - "epoch": 1.9786302731425731, - "grad_norm": 0.0008146577165462077, - "learning_rate": 0.0001999980695319601, - "loss": 46.0, - "step": 25879 - }, - { - "epoch": 1.9787067301259629, - "grad_norm": 0.0037862665485590696, - "learning_rate": 0.0001999980693827082, - "loss": 46.0, - "step": 25880 - }, - { - "epoch": 1.9787831871093526, - "grad_norm": 0.00133634137455374, - "learning_rate": 0.0001999980692334505, - "loss": 46.0, - "step": 25881 - }, - { - "epoch": 1.9788596440927422, - "grad_norm": 0.0007426688098348677, - "learning_rate": 0.00019999806908418703, - "loss": 46.0, - "step": 25882 - }, - { - "epoch": 1.978936101076132, - "grad_norm": 0.0014057334046810865, - "learning_rate": 0.00019999806893491778, - "loss": 46.0, - "step": 25883 - }, - { - "epoch": 1.9790125580595217, - "grad_norm": 0.001237798947840929, - "learning_rate": 0.00019999806878564277, - "loss": 46.0, - "step": 25884 - }, - { - "epoch": 1.9790890150429115, - "grad_norm": 0.0012995251454412937, - "learning_rate": 0.00019999806863636198, - "loss": 46.0, - "step": 25885 - }, - { - "epoch": 1.9791654720263012, - "grad_norm": 0.0014032222097739577, - "learning_rate": 0.00019999806848707542, - "loss": 46.0, - "step": 25886 - }, - { - "epoch": 1.979241929009691, - "grad_norm": 0.001716622500680387, - "learning_rate": 0.0001999980683377831, - "loss": 46.0, - "step": 25887 - }, - { - "epoch": 1.9793183859930807, - "grad_norm": 0.0006896345294080675, - "learning_rate": 0.000199998068188485, - "loss": 46.0, - "step": 25888 - }, - { - "epoch": 1.9793948429764705, - "grad_norm": 0.002268021460622549, - "learning_rate": 0.00019999806803918115, - "loss": 46.0, - "step": 25889 - }, - { - "epoch": 1.97947129995986, - "grad_norm": 0.0019452099222689867, - "learning_rate": 0.00019999806788987155, - "loss": 46.0, - "step": 25890 - }, - { - "epoch": 1.9795477569432498, - "grad_norm": 0.0024786870926618576, - "learning_rate": 0.00019999806774055614, - "loss": 46.0, - "step": 25891 - }, - { - "epoch": 1.9796242139266396, - "grad_norm": 0.0007155233179219067, - "learning_rate": 0.00019999806759123497, - "loss": 46.0, - "step": 25892 - }, - { - "epoch": 1.979700670910029, - "grad_norm": 0.002744987839832902, - "learning_rate": 0.00019999806744190802, - "loss": 46.0, - "step": 25893 - }, - { - "epoch": 1.9797771278934189, - "grad_norm": 0.0014073261991143227, - "learning_rate": 0.00019999806729257533, - "loss": 46.0, - "step": 25894 - }, - { - "epoch": 1.9798535848768086, - "grad_norm": 0.011025724932551384, - "learning_rate": 0.00019999806714323686, - "loss": 46.0, - "step": 25895 - }, - { - "epoch": 1.9799300418601984, - "grad_norm": 0.001961614703759551, - "learning_rate": 0.00019999806699389262, - "loss": 46.0, - "step": 25896 - }, - { - "epoch": 1.9800064988435881, - "grad_norm": 0.000389292516047135, - "learning_rate": 0.0001999980668445426, - "loss": 46.0, - "step": 25897 - }, - { - "epoch": 1.980082955826978, - "grad_norm": 0.0014290747931227088, - "learning_rate": 0.0001999980666951868, - "loss": 46.0, - "step": 25898 - }, - { - "epoch": 1.9801594128103677, - "grad_norm": 0.0033309024292975664, - "learning_rate": 0.00019999806654582525, - "loss": 46.0, - "step": 25899 - }, - { - "epoch": 1.9802358697937574, - "grad_norm": 0.002173813059926033, - "learning_rate": 0.00019999806639645792, - "loss": 46.0, - "step": 25900 - }, - { - "epoch": 1.980312326777147, - "grad_norm": 0.001985164824873209, - "learning_rate": 0.00019999806624708484, - "loss": 46.0, - "step": 25901 - }, - { - "epoch": 1.9803887837605367, - "grad_norm": 0.0027034750673919916, - "learning_rate": 0.00019999806609770598, - "loss": 46.0, - "step": 25902 - }, - { - "epoch": 1.9804652407439265, - "grad_norm": 0.0010117364581674337, - "learning_rate": 0.00019999806594832138, - "loss": 46.0, - "step": 25903 - }, - { - "epoch": 1.980541697727316, - "grad_norm": 0.0013257297687232494, - "learning_rate": 0.00019999806579893098, - "loss": 46.0, - "step": 25904 - }, - { - "epoch": 1.9806181547107058, - "grad_norm": 0.0014383801026269794, - "learning_rate": 0.00019999806564953478, - "loss": 46.0, - "step": 25905 - }, - { - "epoch": 1.9806946116940956, - "grad_norm": 0.0007488276460207999, - "learning_rate": 0.00019999806550013286, - "loss": 46.0, - "step": 25906 - }, - { - "epoch": 1.9807710686774853, - "grad_norm": 0.0008196709677577019, - "learning_rate": 0.00019999806535072513, - "loss": 46.0, - "step": 25907 - }, - { - "epoch": 1.980847525660875, - "grad_norm": 0.0027127836365252733, - "learning_rate": 0.00019999806520131167, - "loss": 46.0, - "step": 25908 - }, - { - "epoch": 1.9809239826442648, - "grad_norm": 0.0007803915650583804, - "learning_rate": 0.00019999806505189245, - "loss": 46.0, - "step": 25909 - }, - { - "epoch": 1.9810004396276546, - "grad_norm": 0.000966378313023597, - "learning_rate": 0.0001999980649024674, - "loss": 46.0, - "step": 25910 - }, - { - "epoch": 1.9810768966110444, - "grad_norm": 0.002839515684172511, - "learning_rate": 0.00019999806475303665, - "loss": 46.0, - "step": 25911 - }, - { - "epoch": 1.981153353594434, - "grad_norm": 0.004682973027229309, - "learning_rate": 0.00019999806460360006, - "loss": 46.0, - "step": 25912 - }, - { - "epoch": 1.9812298105778237, - "grad_norm": 0.0009504939080215991, - "learning_rate": 0.00019999806445415776, - "loss": 46.0, - "step": 25913 - }, - { - "epoch": 1.9813062675612134, - "grad_norm": 0.0019033750286325812, - "learning_rate": 0.00019999806430470967, - "loss": 46.0, - "step": 25914 - }, - { - "epoch": 1.981382724544603, - "grad_norm": 0.0028831204399466515, - "learning_rate": 0.00019999806415525582, - "loss": 46.0, - "step": 25915 - }, - { - "epoch": 1.9814591815279927, - "grad_norm": 0.0007136219064705074, - "learning_rate": 0.00019999806400579617, - "loss": 46.0, - "step": 25916 - }, - { - "epoch": 1.9815356385113825, - "grad_norm": 0.0005213957047089934, - "learning_rate": 0.00019999806385633076, - "loss": 46.0, - "step": 25917 - }, - { - "epoch": 1.9816120954947722, - "grad_norm": 0.001165029825642705, - "learning_rate": 0.00019999806370685962, - "loss": 46.0, - "step": 25918 - }, - { - "epoch": 1.981688552478162, - "grad_norm": 0.0018076051492244005, - "learning_rate": 0.0001999980635573827, - "loss": 46.0, - "step": 25919 - }, - { - "epoch": 1.9817650094615518, - "grad_norm": 0.004414466675370932, - "learning_rate": 0.00019999806340789995, - "loss": 46.0, - "step": 25920 - }, - { - "epoch": 1.9818414664449415, - "grad_norm": 0.0005241346079856157, - "learning_rate": 0.00019999806325841148, - "loss": 46.0, - "step": 25921 - }, - { - "epoch": 1.9819179234283313, - "grad_norm": 0.005071109160780907, - "learning_rate": 0.00019999806310891724, - "loss": 46.0, - "step": 25922 - }, - { - "epoch": 1.9819943804117208, - "grad_norm": 0.0006443731253966689, - "learning_rate": 0.00019999806295941723, - "loss": 46.0, - "step": 25923 - }, - { - "epoch": 1.9820708373951106, - "grad_norm": 0.0004258525150362402, - "learning_rate": 0.00019999806280991144, - "loss": 46.0, - "step": 25924 - }, - { - "epoch": 1.9821472943785003, - "grad_norm": 0.0004597913066390902, - "learning_rate": 0.00019999806266039988, - "loss": 46.0, - "step": 25925 - }, - { - "epoch": 1.9822237513618899, - "grad_norm": 0.0006636075559072196, - "learning_rate": 0.00019999806251088254, - "loss": 46.0, - "step": 25926 - }, - { - "epoch": 1.9823002083452796, - "grad_norm": 0.001049064681865275, - "learning_rate": 0.00019999806236135946, - "loss": 46.0, - "step": 25927 - }, - { - "epoch": 1.9823766653286694, - "grad_norm": 0.0014680858002975583, - "learning_rate": 0.0001999980622118306, - "loss": 46.0, - "step": 25928 - }, - { - "epoch": 1.9824531223120592, - "grad_norm": 0.0011501642875373363, - "learning_rate": 0.00019999806206229595, - "loss": 46.0, - "step": 25929 - }, - { - "epoch": 1.982529579295449, - "grad_norm": 0.0003905289340764284, - "learning_rate": 0.00019999806191275558, - "loss": 46.0, - "step": 25930 - }, - { - "epoch": 1.9826060362788387, - "grad_norm": 0.0021350185852497816, - "learning_rate": 0.00019999806176320938, - "loss": 46.0, - "step": 25931 - }, - { - "epoch": 1.9826824932622285, - "grad_norm": 0.0004402836493682116, - "learning_rate": 0.00019999806161365746, - "loss": 46.0, - "step": 25932 - }, - { - "epoch": 1.9827589502456182, - "grad_norm": 0.0006026846822351217, - "learning_rate": 0.00019999806146409977, - "loss": 46.0, - "step": 25933 - }, - { - "epoch": 1.9828354072290078, - "grad_norm": 0.0007048918050713837, - "learning_rate": 0.0001999980613145363, - "loss": 46.0, - "step": 25934 - }, - { - "epoch": 1.9829118642123975, - "grad_norm": 0.001930861035361886, - "learning_rate": 0.00019999806116496703, - "loss": 46.0, - "step": 25935 - }, - { - "epoch": 1.9829883211957873, - "grad_norm": 0.0017339850310236216, - "learning_rate": 0.00019999806101539202, - "loss": 46.0, - "step": 25936 - }, - { - "epoch": 1.9830647781791768, - "grad_norm": 0.004155580885708332, - "learning_rate": 0.00019999806086581123, - "loss": 46.0, - "step": 25937 - }, - { - "epoch": 1.9831412351625666, - "grad_norm": 0.0012208541156724095, - "learning_rate": 0.00019999806071622467, - "loss": 46.0, - "step": 25938 - }, - { - "epoch": 1.9832176921459563, - "grad_norm": 0.0019351525697857141, - "learning_rate": 0.00019999806056663234, - "loss": 46.0, - "step": 25939 - }, - { - "epoch": 1.983294149129346, - "grad_norm": 0.0014785719104111195, - "learning_rate": 0.00019999806041703426, - "loss": 46.0, - "step": 25940 - }, - { - "epoch": 1.9833706061127359, - "grad_norm": 0.0014351492282003164, - "learning_rate": 0.00019999806026743037, - "loss": 46.0, - "step": 25941 - }, - { - "epoch": 1.9834470630961256, - "grad_norm": 0.0004911868018098176, - "learning_rate": 0.00019999806011782077, - "loss": 46.0, - "step": 25942 - }, - { - "epoch": 1.9835235200795154, - "grad_norm": 0.00128077098634094, - "learning_rate": 0.00019999805996820532, - "loss": 46.0, - "step": 25943 - }, - { - "epoch": 1.9835999770629051, - "grad_norm": 0.0004483874363359064, - "learning_rate": 0.00019999805981858417, - "loss": 46.0, - "step": 25944 - }, - { - "epoch": 1.9836764340462947, - "grad_norm": 0.0012766679283231497, - "learning_rate": 0.00019999805966895723, - "loss": 46.0, - "step": 25945 - }, - { - "epoch": 1.9837528910296844, - "grad_norm": 0.0021675911266356707, - "learning_rate": 0.00019999805951932453, - "loss": 46.0, - "step": 25946 - }, - { - "epoch": 1.983829348013074, - "grad_norm": 0.0005191454547457397, - "learning_rate": 0.00019999805936968607, - "loss": 46.0, - "step": 25947 - }, - { - "epoch": 1.9839058049964637, - "grad_norm": 0.0016540009528398514, - "learning_rate": 0.0001999980592200418, - "loss": 46.0, - "step": 25948 - }, - { - "epoch": 1.9839822619798535, - "grad_norm": 0.0003261689271312207, - "learning_rate": 0.00019999805907039179, - "loss": 46.0, - "step": 25949 - }, - { - "epoch": 1.9840587189632433, - "grad_norm": 0.0013586319983005524, - "learning_rate": 0.00019999805892073597, - "loss": 46.0, - "step": 25950 - }, - { - "epoch": 1.984135175946633, - "grad_norm": 0.0015663780504837632, - "learning_rate": 0.00019999805877107444, - "loss": 46.0, - "step": 25951 - }, - { - "epoch": 1.9842116329300228, - "grad_norm": 0.0019280379638075829, - "learning_rate": 0.0001999980586214071, - "loss": 46.0, - "step": 25952 - }, - { - "epoch": 1.9842880899134125, - "grad_norm": 0.0028812652453780174, - "learning_rate": 0.000199998058471734, - "loss": 46.0, - "step": 25953 - }, - { - "epoch": 1.9843645468968023, - "grad_norm": 0.0006614577723667026, - "learning_rate": 0.00019999805832205514, - "loss": 46.0, - "step": 25954 - }, - { - "epoch": 1.984441003880192, - "grad_norm": 0.002217573346570134, - "learning_rate": 0.00019999805817237052, - "loss": 46.0, - "step": 25955 - }, - { - "epoch": 1.9845174608635816, - "grad_norm": 0.0008722925558686256, - "learning_rate": 0.0001999980580226801, - "loss": 46.0, - "step": 25956 - }, - { - "epoch": 1.9845939178469714, - "grad_norm": 0.0016122815432026982, - "learning_rate": 0.00019999805787298392, - "loss": 46.0, - "step": 25957 - }, - { - "epoch": 1.984670374830361, - "grad_norm": 0.0028325149323791265, - "learning_rate": 0.00019999805772328197, - "loss": 46.0, - "step": 25958 - }, - { - "epoch": 1.9847468318137507, - "grad_norm": 0.0017200781730934978, - "learning_rate": 0.00019999805757357425, - "loss": 46.0, - "step": 25959 - }, - { - "epoch": 1.9848232887971404, - "grad_norm": 0.001014393987134099, - "learning_rate": 0.00019999805742386078, - "loss": 46.0, - "step": 25960 - }, - { - "epoch": 1.9848997457805302, - "grad_norm": 0.0007479116902686656, - "learning_rate": 0.00019999805727414154, - "loss": 46.0, - "step": 25961 - }, - { - "epoch": 1.98497620276392, - "grad_norm": 0.0013300023274496198, - "learning_rate": 0.00019999805712441653, - "loss": 46.0, - "step": 25962 - }, - { - "epoch": 1.9850526597473097, - "grad_norm": 0.00508840149268508, - "learning_rate": 0.00019999805697468572, - "loss": 46.0, - "step": 25963 - }, - { - "epoch": 1.9851291167306995, - "grad_norm": 0.002394547453150153, - "learning_rate": 0.00019999805682494918, - "loss": 46.0, - "step": 25964 - }, - { - "epoch": 1.9852055737140892, - "grad_norm": 0.0009620438795536757, - "learning_rate": 0.00019999805667520682, - "loss": 46.0, - "step": 25965 - }, - { - "epoch": 1.985282030697479, - "grad_norm": 0.004395087715238333, - "learning_rate": 0.00019999805652545874, - "loss": 46.0, - "step": 25966 - }, - { - "epoch": 1.9853584876808685, - "grad_norm": 0.0007532541640102863, - "learning_rate": 0.0001999980563757049, - "loss": 46.0, - "step": 25967 - }, - { - "epoch": 1.9854349446642583, - "grad_norm": 0.0007761105662211776, - "learning_rate": 0.00019999805622594524, - "loss": 46.0, - "step": 25968 - }, - { - "epoch": 1.9855114016476478, - "grad_norm": 0.005108152516186237, - "learning_rate": 0.00019999805607617984, - "loss": 46.0, - "step": 25969 - }, - { - "epoch": 1.9855878586310376, - "grad_norm": 0.011084684170782566, - "learning_rate": 0.00019999805592640866, - "loss": 46.0, - "step": 25970 - }, - { - "epoch": 1.9856643156144274, - "grad_norm": 0.0010184103157371283, - "learning_rate": 0.00019999805577663172, - "loss": 46.0, - "step": 25971 - }, - { - "epoch": 1.9857407725978171, - "grad_norm": 0.0014622407034039497, - "learning_rate": 0.00019999805562684903, - "loss": 46.0, - "step": 25972 - }, - { - "epoch": 1.9858172295812069, - "grad_norm": 0.0006349731120280921, - "learning_rate": 0.00019999805547706053, - "loss": 46.0, - "step": 25973 - }, - { - "epoch": 1.9858936865645966, - "grad_norm": 0.001209433190524578, - "learning_rate": 0.00019999805532726627, - "loss": 46.0, - "step": 25974 - }, - { - "epoch": 1.9859701435479864, - "grad_norm": 0.001606301637366414, - "learning_rate": 0.00019999805517746623, - "loss": 46.0, - "step": 25975 - }, - { - "epoch": 1.9860466005313762, - "grad_norm": 0.0009062322205863893, - "learning_rate": 0.00019999805502766047, - "loss": 46.0, - "step": 25976 - }, - { - "epoch": 1.9861230575147657, - "grad_norm": 0.0009599516051821411, - "learning_rate": 0.00019999805487784888, - "loss": 46.0, - "step": 25977 - }, - { - "epoch": 1.9861995144981555, - "grad_norm": 0.0011032337788492441, - "learning_rate": 0.00019999805472803157, - "loss": 46.0, - "step": 25978 - }, - { - "epoch": 1.9862759714815452, - "grad_norm": 0.0007067614351399243, - "learning_rate": 0.0001999980545782085, - "loss": 46.0, - "step": 25979 - }, - { - "epoch": 1.9863524284649348, - "grad_norm": 0.0005981251015327871, - "learning_rate": 0.0001999980544283796, - "loss": 46.0, - "step": 25980 - }, - { - "epoch": 1.9864288854483245, - "grad_norm": 0.001199584687128663, - "learning_rate": 0.00019999805427854496, - "loss": 46.0, - "step": 25981 - }, - { - "epoch": 1.9865053424317143, - "grad_norm": 0.0008631689706817269, - "learning_rate": 0.00019999805412870456, - "loss": 46.0, - "step": 25982 - }, - { - "epoch": 1.986581799415104, - "grad_norm": 0.0036176980938762426, - "learning_rate": 0.0001999980539788584, - "loss": 46.0, - "step": 25983 - }, - { - "epoch": 1.9866582563984938, - "grad_norm": 0.0029676465783268213, - "learning_rate": 0.00019999805382900644, - "loss": 46.0, - "step": 25984 - }, - { - "epoch": 1.9867347133818836, - "grad_norm": 0.0018274352187290788, - "learning_rate": 0.00019999805367914872, - "loss": 46.0, - "step": 25985 - }, - { - "epoch": 1.9868111703652733, - "grad_norm": 0.0011082688579335809, - "learning_rate": 0.00019999805352928523, - "loss": 46.0, - "step": 25986 - }, - { - "epoch": 1.986887627348663, - "grad_norm": 0.001649734447710216, - "learning_rate": 0.000199998053379416, - "loss": 46.0, - "step": 25987 - }, - { - "epoch": 1.9869640843320526, - "grad_norm": 0.0005887331790290773, - "learning_rate": 0.00019999805322954095, - "loss": 46.0, - "step": 25988 - }, - { - "epoch": 1.9870405413154424, - "grad_norm": 0.0006896872655488551, - "learning_rate": 0.00019999805307966017, - "loss": 46.0, - "step": 25989 - }, - { - "epoch": 1.9871169982988321, - "grad_norm": 0.0018458407139405608, - "learning_rate": 0.0001999980529297736, - "loss": 46.0, - "step": 25990 - }, - { - "epoch": 1.9871934552822217, - "grad_norm": 0.0009387212339788675, - "learning_rate": 0.00019999805277988127, - "loss": 46.0, - "step": 25991 - }, - { - "epoch": 1.9872699122656114, - "grad_norm": 0.0010361146414652467, - "learning_rate": 0.00019999805262998317, - "loss": 46.0, - "step": 25992 - }, - { - "epoch": 1.9873463692490012, - "grad_norm": 0.0016938091721385717, - "learning_rate": 0.0001999980524800793, - "loss": 46.0, - "step": 25993 - }, - { - "epoch": 1.987422826232391, - "grad_norm": 0.0009540242026560009, - "learning_rate": 0.00019999805233016966, - "loss": 46.0, - "step": 25994 - }, - { - "epoch": 1.9874992832157807, - "grad_norm": 0.007173227611929178, - "learning_rate": 0.00019999805218025424, - "loss": 46.0, - "step": 25995 - }, - { - "epoch": 1.9875757401991705, - "grad_norm": 0.0010351529344916344, - "learning_rate": 0.0001999980520303331, - "loss": 46.0, - "step": 25996 - }, - { - "epoch": 1.9876521971825603, - "grad_norm": 0.0008168282802216709, - "learning_rate": 0.00019999805188040614, - "loss": 46.0, - "step": 25997 - }, - { - "epoch": 1.98772865416595, - "grad_norm": 0.001473135664127767, - "learning_rate": 0.00019999805173047343, - "loss": 46.0, - "step": 25998 - }, - { - "epoch": 1.9878051111493396, - "grad_norm": 0.0025913547724485397, - "learning_rate": 0.00019999805158053493, - "loss": 46.0, - "step": 25999 - }, - { - "epoch": 1.9878815681327293, - "grad_norm": 0.002686369465664029, - "learning_rate": 0.00019999805143059067, - "loss": 46.0, - "step": 26000 - }, - { - "epoch": 1.987958025116119, - "grad_norm": 0.0029672659002244473, - "learning_rate": 0.00019999805128064065, - "loss": 46.0, - "step": 26001 - }, - { - "epoch": 1.9880344820995086, - "grad_norm": 0.0016667507588863373, - "learning_rate": 0.00019999805113068487, - "loss": 46.0, - "step": 26002 - }, - { - "epoch": 1.9881109390828984, - "grad_norm": 0.0020327751990407705, - "learning_rate": 0.00019999805098072328, - "loss": 46.0, - "step": 26003 - }, - { - "epoch": 1.9881873960662881, - "grad_norm": 0.002505532931536436, - "learning_rate": 0.00019999805083075595, - "loss": 46.0, - "step": 26004 - }, - { - "epoch": 1.988263853049678, - "grad_norm": 0.002215274376794696, - "learning_rate": 0.00019999805068078284, - "loss": 46.0, - "step": 26005 - }, - { - "epoch": 1.9883403100330677, - "grad_norm": 0.0014072777703404427, - "learning_rate": 0.000199998050530804, - "loss": 46.0, - "step": 26006 - }, - { - "epoch": 1.9884167670164574, - "grad_norm": 0.0011279814643785357, - "learning_rate": 0.00019999805038081934, - "loss": 46.0, - "step": 26007 - }, - { - "epoch": 1.9884932239998472, - "grad_norm": 0.00045551860239356756, - "learning_rate": 0.00019999805023082894, - "loss": 46.0, - "step": 26008 - }, - { - "epoch": 1.988569680983237, - "grad_norm": 0.0014087604358792305, - "learning_rate": 0.00019999805008083277, - "loss": 46.0, - "step": 26009 - }, - { - "epoch": 1.9886461379666265, - "grad_norm": 0.0024430318735539913, - "learning_rate": 0.0001999980499308308, - "loss": 46.0, - "step": 26010 - }, - { - "epoch": 1.9887225949500162, - "grad_norm": 0.0013536142650991678, - "learning_rate": 0.0001999980497808231, - "loss": 46.0, - "step": 26011 - }, - { - "epoch": 1.988799051933406, - "grad_norm": 0.0013808598741889, - "learning_rate": 0.00019999804963080959, - "loss": 46.0, - "step": 26012 - }, - { - "epoch": 1.9888755089167955, - "grad_norm": 0.0016814154805615544, - "learning_rate": 0.00019999804948079035, - "loss": 46.0, - "step": 26013 - }, - { - "epoch": 1.9889519659001853, - "grad_norm": 0.0030319502111524343, - "learning_rate": 0.00019999804933076534, - "loss": 46.0, - "step": 26014 - }, - { - "epoch": 1.989028422883575, - "grad_norm": 0.0020242927130311728, - "learning_rate": 0.00019999804918073452, - "loss": 46.0, - "step": 26015 - }, - { - "epoch": 1.9891048798669648, - "grad_norm": 0.005415746476501226, - "learning_rate": 0.00019999804903069796, - "loss": 46.0, - "step": 26016 - }, - { - "epoch": 1.9891813368503546, - "grad_norm": 0.001192018506117165, - "learning_rate": 0.00019999804888065563, - "loss": 46.0, - "step": 26017 - }, - { - "epoch": 1.9892577938337443, - "grad_norm": 0.004027003422379494, - "learning_rate": 0.00019999804873060753, - "loss": 46.0, - "step": 26018 - }, - { - "epoch": 1.989334250817134, - "grad_norm": 0.0010035043815150857, - "learning_rate": 0.00019999804858055367, - "loss": 46.0, - "step": 26019 - }, - { - "epoch": 1.9894107078005239, - "grad_norm": 0.005634360946714878, - "learning_rate": 0.00019999804843049402, - "loss": 46.0, - "step": 26020 - }, - { - "epoch": 1.9894871647839134, - "grad_norm": 0.008986933156847954, - "learning_rate": 0.0001999980482804286, - "loss": 46.0, - "step": 26021 - }, - { - "epoch": 1.9895636217673032, - "grad_norm": 0.0012102812761440873, - "learning_rate": 0.00019999804813035743, - "loss": 46.0, - "step": 26022 - }, - { - "epoch": 1.989640078750693, - "grad_norm": 0.0015416034730151296, - "learning_rate": 0.00019999804798028045, - "loss": 46.0, - "step": 26023 - }, - { - "epoch": 1.9897165357340825, - "grad_norm": 0.0008822264499031007, - "learning_rate": 0.00019999804783019776, - "loss": 46.0, - "step": 26024 - }, - { - "epoch": 1.9897929927174722, - "grad_norm": 0.0008111972128972411, - "learning_rate": 0.00019999804768010927, - "loss": 46.0, - "step": 26025 - }, - { - "epoch": 1.989869449700862, - "grad_norm": 0.004551817663013935, - "learning_rate": 0.000199998047530015, - "loss": 46.0, - "step": 26026 - }, - { - "epoch": 1.9899459066842518, - "grad_norm": 0.0011565666645765305, - "learning_rate": 0.00019999804737991497, - "loss": 46.0, - "step": 26027 - }, - { - "epoch": 1.9900223636676415, - "grad_norm": 0.0027729745488613844, - "learning_rate": 0.00019999804722980918, - "loss": 46.0, - "step": 26028 - }, - { - "epoch": 1.9900988206510313, - "grad_norm": 0.00193033239338547, - "learning_rate": 0.0001999980470796976, - "loss": 46.0, - "step": 26029 - }, - { - "epoch": 1.990175277634421, - "grad_norm": 0.0007491422584280372, - "learning_rate": 0.0001999980469295803, - "loss": 46.0, - "step": 26030 - }, - { - "epoch": 1.9902517346178108, - "grad_norm": 0.004865617025643587, - "learning_rate": 0.00019999804677945716, - "loss": 46.0, - "step": 26031 - }, - { - "epoch": 1.9903281916012003, - "grad_norm": 0.008173923939466476, - "learning_rate": 0.0001999980466293283, - "loss": 46.0, - "step": 26032 - }, - { - "epoch": 1.99040464858459, - "grad_norm": 0.001339426962658763, - "learning_rate": 0.00019999804647919365, - "loss": 46.0, - "step": 26033 - }, - { - "epoch": 1.9904811055679799, - "grad_norm": 0.0027886289171874523, - "learning_rate": 0.00019999804632905326, - "loss": 46.0, - "step": 26034 - }, - { - "epoch": 1.9905575625513694, - "grad_norm": 0.01195152010768652, - "learning_rate": 0.00019999804617890706, - "loss": 46.0, - "step": 26035 - }, - { - "epoch": 1.9906340195347592, - "grad_norm": 0.0025208652950823307, - "learning_rate": 0.0001999980460287551, - "loss": 46.0, - "step": 26036 - }, - { - "epoch": 1.990710476518149, - "grad_norm": 0.000533510756213218, - "learning_rate": 0.0001999980458785974, - "loss": 46.0, - "step": 26037 - }, - { - "epoch": 1.9907869335015387, - "grad_norm": 0.005462232977151871, - "learning_rate": 0.0001999980457284339, - "loss": 46.0, - "step": 26038 - }, - { - "epoch": 1.9908633904849284, - "grad_norm": 0.00044619044638238847, - "learning_rate": 0.00019999804557826464, - "loss": 46.0, - "step": 26039 - }, - { - "epoch": 1.9909398474683182, - "grad_norm": 0.0014948435127735138, - "learning_rate": 0.0001999980454280896, - "loss": 46.0, - "step": 26040 - }, - { - "epoch": 1.991016304451708, - "grad_norm": 0.0008152392692863941, - "learning_rate": 0.00019999804527790881, - "loss": 46.0, - "step": 26041 - }, - { - "epoch": 1.9910927614350977, - "grad_norm": 0.0005629527731798589, - "learning_rate": 0.00019999804512772226, - "loss": 46.0, - "step": 26042 - }, - { - "epoch": 1.9911692184184873, - "grad_norm": 0.0007600269163958728, - "learning_rate": 0.00019999804497752993, - "loss": 46.0, - "step": 26043 - }, - { - "epoch": 1.991245675401877, - "grad_norm": 0.0010236066300421953, - "learning_rate": 0.0001999980448273318, - "loss": 46.0, - "step": 26044 - }, - { - "epoch": 1.9913221323852668, - "grad_norm": 0.0009042013552971184, - "learning_rate": 0.00019999804467712791, - "loss": 46.0, - "step": 26045 - }, - { - "epoch": 1.9913985893686563, - "grad_norm": 0.0013252947246655822, - "learning_rate": 0.0001999980445269183, - "loss": 46.0, - "step": 26046 - }, - { - "epoch": 1.991475046352046, - "grad_norm": 0.001456466969102621, - "learning_rate": 0.0001999980443767029, - "loss": 46.0, - "step": 26047 - }, - { - "epoch": 1.9915515033354358, - "grad_norm": 0.0011430311715230346, - "learning_rate": 0.00019999804422648167, - "loss": 46.0, - "step": 26048 - }, - { - "epoch": 1.9916279603188256, - "grad_norm": 0.0011806755792349577, - "learning_rate": 0.00019999804407625472, - "loss": 46.0, - "step": 26049 - }, - { - "epoch": 1.9917044173022154, - "grad_norm": 0.0009642531513236463, - "learning_rate": 0.000199998043926022, - "loss": 46.0, - "step": 26050 - }, - { - "epoch": 1.9917808742856051, - "grad_norm": 0.0027697968762367964, - "learning_rate": 0.00019999804377578354, - "loss": 46.0, - "step": 26051 - }, - { - "epoch": 1.991857331268995, - "grad_norm": 0.000881535466760397, - "learning_rate": 0.00019999804362553928, - "loss": 46.0, - "step": 26052 - }, - { - "epoch": 1.9919337882523847, - "grad_norm": 0.003264032071456313, - "learning_rate": 0.00019999804347528924, - "loss": 46.0, - "step": 26053 - }, - { - "epoch": 1.9920102452357742, - "grad_norm": 0.004112382885068655, - "learning_rate": 0.00019999804332503343, - "loss": 46.0, - "step": 26054 - }, - { - "epoch": 1.992086702219164, - "grad_norm": 0.0008862063987180591, - "learning_rate": 0.00019999804317477187, - "loss": 46.0, - "step": 26055 - }, - { - "epoch": 1.9921631592025537, - "grad_norm": 0.0017171716317534447, - "learning_rate": 0.00019999804302450454, - "loss": 46.0, - "step": 26056 - }, - { - "epoch": 1.9922396161859433, - "grad_norm": 0.001282471464946866, - "learning_rate": 0.0001999980428742314, - "loss": 46.0, - "step": 26057 - }, - { - "epoch": 1.992316073169333, - "grad_norm": 0.0034951879642903805, - "learning_rate": 0.00019999804272395255, - "loss": 46.0, - "step": 26058 - }, - { - "epoch": 1.9923925301527228, - "grad_norm": 0.0031374499667435884, - "learning_rate": 0.00019999804257366788, - "loss": 46.0, - "step": 26059 - }, - { - "epoch": 1.9924689871361125, - "grad_norm": 0.0011678824666887522, - "learning_rate": 0.00019999804242337748, - "loss": 46.0, - "step": 26060 - }, - { - "epoch": 1.9925454441195023, - "grad_norm": 0.0010851527331396937, - "learning_rate": 0.0001999980422730813, - "loss": 46.0, - "step": 26061 - }, - { - "epoch": 1.992621901102892, - "grad_norm": 0.0011215993436053395, - "learning_rate": 0.00019999804212277934, - "loss": 46.0, - "step": 26062 - }, - { - "epoch": 1.9926983580862818, - "grad_norm": 0.0023272421676665545, - "learning_rate": 0.00019999804197247162, - "loss": 46.0, - "step": 26063 - }, - { - "epoch": 1.9927748150696716, - "grad_norm": 0.0009803898865357041, - "learning_rate": 0.00019999804182215813, - "loss": 46.0, - "step": 26064 - }, - { - "epoch": 1.9928512720530611, - "grad_norm": 0.0008112943614833057, - "learning_rate": 0.00019999804167183886, - "loss": 46.0, - "step": 26065 - }, - { - "epoch": 1.9929277290364509, - "grad_norm": 0.00735689839348197, - "learning_rate": 0.00019999804152151383, - "loss": 46.0, - "step": 26066 - }, - { - "epoch": 1.9930041860198406, - "grad_norm": 0.003033566987141967, - "learning_rate": 0.00019999804137118301, - "loss": 46.0, - "step": 26067 - }, - { - "epoch": 1.9930806430032302, - "grad_norm": 0.0007896330207586288, - "learning_rate": 0.00019999804122084646, - "loss": 46.0, - "step": 26068 - }, - { - "epoch": 1.99315709998662, - "grad_norm": 0.0023039579391479492, - "learning_rate": 0.0001999980410705041, - "loss": 46.0, - "step": 26069 - }, - { - "epoch": 1.9932335569700097, - "grad_norm": 0.0030734494794160128, - "learning_rate": 0.00019999804092015602, - "loss": 46.0, - "step": 26070 - }, - { - "epoch": 1.9933100139533995, - "grad_norm": 0.0020111759658902884, - "learning_rate": 0.00019999804076980212, - "loss": 46.0, - "step": 26071 - }, - { - "epoch": 1.9933864709367892, - "grad_norm": 0.0006385320448316634, - "learning_rate": 0.00019999804061944247, - "loss": 46.0, - "step": 26072 - }, - { - "epoch": 1.993462927920179, - "grad_norm": 0.0008567767799831927, - "learning_rate": 0.00019999804046907707, - "loss": 46.0, - "step": 26073 - }, - { - "epoch": 1.9935393849035687, - "grad_norm": 0.0026293655391782522, - "learning_rate": 0.00019999804031870587, - "loss": 46.0, - "step": 26074 - }, - { - "epoch": 1.9936158418869585, - "grad_norm": 0.0008773842710070312, - "learning_rate": 0.00019999804016832893, - "loss": 46.0, - "step": 26075 - }, - { - "epoch": 1.993692298870348, - "grad_norm": 0.0010073985904455185, - "learning_rate": 0.0001999980400179462, - "loss": 46.0, - "step": 26076 - }, - { - "epoch": 1.9937687558537378, - "grad_norm": 0.0017932179616764188, - "learning_rate": 0.0001999980398675577, - "loss": 46.0, - "step": 26077 - }, - { - "epoch": 1.9938452128371273, - "grad_norm": 0.0030114867258816957, - "learning_rate": 0.00019999803971716346, - "loss": 46.0, - "step": 26078 - }, - { - "epoch": 1.993921669820517, - "grad_norm": 0.002553084399551153, - "learning_rate": 0.00019999803956676342, - "loss": 46.0, - "step": 26079 - }, - { - "epoch": 1.9939981268039069, - "grad_norm": 0.0013618904631584883, - "learning_rate": 0.0001999980394163576, - "loss": 46.0, - "step": 26080 - }, - { - "epoch": 1.9940745837872966, - "grad_norm": 0.0010776977287605405, - "learning_rate": 0.00019999803926594605, - "loss": 46.0, - "step": 26081 - }, - { - "epoch": 1.9941510407706864, - "grad_norm": 0.0017622618470340967, - "learning_rate": 0.0001999980391155287, - "loss": 46.0, - "step": 26082 - }, - { - "epoch": 1.9942274977540762, - "grad_norm": 0.0008909834432415664, - "learning_rate": 0.0001999980389651056, - "loss": 46.0, - "step": 26083 - }, - { - "epoch": 1.994303954737466, - "grad_norm": 0.0008263926720246673, - "learning_rate": 0.0001999980388146767, - "loss": 46.0, - "step": 26084 - }, - { - "epoch": 1.9943804117208557, - "grad_norm": 0.0010461872443556786, - "learning_rate": 0.00019999803866424206, - "loss": 46.0, - "step": 26085 - }, - { - "epoch": 1.9944568687042454, - "grad_norm": 0.0010621914407238364, - "learning_rate": 0.00019999803851380164, - "loss": 46.0, - "step": 26086 - }, - { - "epoch": 1.994533325687635, - "grad_norm": 0.00033374049235135317, - "learning_rate": 0.00019999803836335544, - "loss": 46.0, - "step": 26087 - }, - { - "epoch": 1.9946097826710247, - "grad_norm": 0.0009514071280136704, - "learning_rate": 0.0001999980382129035, - "loss": 46.0, - "step": 26088 - }, - { - "epoch": 1.9946862396544143, - "grad_norm": 0.0009135097498074174, - "learning_rate": 0.00019999803806244578, - "loss": 46.0, - "step": 26089 - }, - { - "epoch": 1.994762696637804, - "grad_norm": 0.0034728844184428453, - "learning_rate": 0.00019999803791198226, - "loss": 46.0, - "step": 26090 - }, - { - "epoch": 1.9948391536211938, - "grad_norm": 0.0013102206867188215, - "learning_rate": 0.000199998037761513, - "loss": 46.0, - "step": 26091 - }, - { - "epoch": 1.9949156106045836, - "grad_norm": 0.0035122144035995007, - "learning_rate": 0.00019999803761103796, - "loss": 46.0, - "step": 26092 - }, - { - "epoch": 1.9949920675879733, - "grad_norm": 0.0007056621252559125, - "learning_rate": 0.00019999803746055718, - "loss": 46.0, - "step": 26093 - }, - { - "epoch": 1.995068524571363, - "grad_norm": 0.0007049194537103176, - "learning_rate": 0.0001999980373100706, - "loss": 46.0, - "step": 26094 - }, - { - "epoch": 1.9951449815547528, - "grad_norm": 0.0006460077711381018, - "learning_rate": 0.00019999803715957826, - "loss": 46.0, - "step": 26095 - }, - { - "epoch": 1.9952214385381426, - "grad_norm": 0.0004916912293992937, - "learning_rate": 0.00019999803700908013, - "loss": 46.0, - "step": 26096 - }, - { - "epoch": 1.9952978955215324, - "grad_norm": 0.0008945545996539295, - "learning_rate": 0.00019999803685857625, - "loss": 46.0, - "step": 26097 - }, - { - "epoch": 1.995374352504922, - "grad_norm": 0.0024941160809248686, - "learning_rate": 0.0001999980367080666, - "loss": 46.0, - "step": 26098 - }, - { - "epoch": 1.9954508094883117, - "grad_norm": 0.0013370742090046406, - "learning_rate": 0.00019999803655755118, - "loss": 46.0, - "step": 26099 - }, - { - "epoch": 1.9955272664717012, - "grad_norm": 0.0011255971621721983, - "learning_rate": 0.00019999803640703, - "loss": 46.0, - "step": 26100 - }, - { - "epoch": 1.995603723455091, - "grad_norm": 0.00518334424123168, - "learning_rate": 0.00019999803625650304, - "loss": 46.0, - "step": 26101 - }, - { - "epoch": 1.9956801804384807, - "grad_norm": 0.0027693563606590033, - "learning_rate": 0.0001999980361059703, - "loss": 46.0, - "step": 26102 - }, - { - "epoch": 1.9957566374218705, - "grad_norm": 0.0012155806180089712, - "learning_rate": 0.0001999980359554318, - "loss": 46.0, - "step": 26103 - }, - { - "epoch": 1.9958330944052602, - "grad_norm": 0.0009681333904154599, - "learning_rate": 0.00019999803580488754, - "loss": 46.0, - "step": 26104 - }, - { - "epoch": 1.99590955138865, - "grad_norm": 0.0007021358469501138, - "learning_rate": 0.00019999803565433753, - "loss": 46.0, - "step": 26105 - }, - { - "epoch": 1.9959860083720398, - "grad_norm": 0.000937016389798373, - "learning_rate": 0.0001999980355037817, - "loss": 46.0, - "step": 26106 - }, - { - "epoch": 1.9960624653554295, - "grad_norm": 0.000996751245111227, - "learning_rate": 0.0001999980353532201, - "loss": 46.0, - "step": 26107 - }, - { - "epoch": 1.9961389223388193, - "grad_norm": 0.0006522854091599584, - "learning_rate": 0.00019999803520265278, - "loss": 46.0, - "step": 26108 - }, - { - "epoch": 1.9962153793222088, - "grad_norm": 0.001614025910384953, - "learning_rate": 0.00019999803505207968, - "loss": 46.0, - "step": 26109 - }, - { - "epoch": 1.9962918363055986, - "grad_norm": 0.0014393326127901673, - "learning_rate": 0.0001999980349015008, - "loss": 46.0, - "step": 26110 - }, - { - "epoch": 1.9963682932889881, - "grad_norm": 0.0014180174330249429, - "learning_rate": 0.00019999803475091615, - "loss": 46.0, - "step": 26111 - }, - { - "epoch": 1.9964447502723779, - "grad_norm": 0.0010683961445465684, - "learning_rate": 0.0001999980346003257, - "loss": 46.0, - "step": 26112 - }, - { - "epoch": 1.9965212072557676, - "grad_norm": 0.0029336409643292427, - "learning_rate": 0.0001999980344497295, - "loss": 46.0, - "step": 26113 - }, - { - "epoch": 1.9965976642391574, - "grad_norm": 0.0004483310040086508, - "learning_rate": 0.0001999980342991276, - "loss": 46.0, - "step": 26114 - }, - { - "epoch": 1.9966741212225472, - "grad_norm": 0.0025066423695534468, - "learning_rate": 0.00019999803414851982, - "loss": 46.0, - "step": 26115 - }, - { - "epoch": 1.996750578205937, - "grad_norm": 0.000863031717017293, - "learning_rate": 0.00019999803399790633, - "loss": 46.0, - "step": 26116 - }, - { - "epoch": 1.9968270351893267, - "grad_norm": 0.00048315984895452857, - "learning_rate": 0.00019999803384728707, - "loss": 46.0, - "step": 26117 - }, - { - "epoch": 1.9969034921727165, - "grad_norm": 0.0008483135025016963, - "learning_rate": 0.00019999803369666203, - "loss": 46.0, - "step": 26118 - }, - { - "epoch": 1.996979949156106, - "grad_norm": 0.0008925742004066706, - "learning_rate": 0.00019999803354603125, - "loss": 46.0, - "step": 26119 - }, - { - "epoch": 1.9970564061394958, - "grad_norm": 0.0008175320108421147, - "learning_rate": 0.00019999803339539464, - "loss": 46.0, - "step": 26120 - }, - { - "epoch": 1.9971328631228855, - "grad_norm": 0.002876169281080365, - "learning_rate": 0.0001999980332447523, - "loss": 46.0, - "step": 26121 - }, - { - "epoch": 1.997209320106275, - "grad_norm": 0.0006611901917494833, - "learning_rate": 0.00019999803309410418, - "loss": 46.0, - "step": 26122 - }, - { - "epoch": 1.9972857770896648, - "grad_norm": 0.0029720300808548927, - "learning_rate": 0.00019999803294345033, - "loss": 46.0, - "step": 26123 - }, - { - "epoch": 1.9973622340730546, - "grad_norm": 0.0038217692635953426, - "learning_rate": 0.00019999803279279065, - "loss": 46.0, - "step": 26124 - }, - { - "epoch": 1.9974386910564443, - "grad_norm": 0.0035990856122225523, - "learning_rate": 0.00019999803264212523, - "loss": 46.0, - "step": 26125 - }, - { - "epoch": 1.997515148039834, - "grad_norm": 0.0006057422142475843, - "learning_rate": 0.00019999803249145403, - "loss": 46.0, - "step": 26126 - }, - { - "epoch": 1.9975916050232239, - "grad_norm": 0.0006656552432104945, - "learning_rate": 0.0001999980323407771, - "loss": 46.0, - "step": 26127 - }, - { - "epoch": 1.9976680620066136, - "grad_norm": 0.0007760857697576284, - "learning_rate": 0.00019999803219009435, - "loss": 46.0, - "step": 26128 - }, - { - "epoch": 1.9977445189900034, - "grad_norm": 0.0007318463176488876, - "learning_rate": 0.00019999803203940583, - "loss": 46.0, - "step": 26129 - }, - { - "epoch": 1.997820975973393, - "grad_norm": 0.0005209353403188288, - "learning_rate": 0.00019999803188871157, - "loss": 46.0, - "step": 26130 - }, - { - "epoch": 1.9978974329567827, - "grad_norm": 0.0005699151079170406, - "learning_rate": 0.00019999803173801156, - "loss": 46.0, - "step": 26131 - }, - { - "epoch": 1.9979738899401724, - "grad_norm": 0.0031449166126549244, - "learning_rate": 0.00019999803158730572, - "loss": 46.0, - "step": 26132 - }, - { - "epoch": 1.998050346923562, - "grad_norm": 0.0007490452844649553, - "learning_rate": 0.00019999803143659414, - "loss": 46.0, - "step": 26133 - }, - { - "epoch": 1.9981268039069517, - "grad_norm": 0.0006352420896291733, - "learning_rate": 0.0001999980312858768, - "loss": 46.0, - "step": 26134 - }, - { - "epoch": 1.9982032608903415, - "grad_norm": 0.0009486954077146947, - "learning_rate": 0.0001999980311351537, - "loss": 46.0, - "step": 26135 - }, - { - "epoch": 1.9982797178737313, - "grad_norm": 0.0007956647896207869, - "learning_rate": 0.0001999980309844248, - "loss": 46.0, - "step": 26136 - }, - { - "epoch": 1.998356174857121, - "grad_norm": 0.0016845494974404573, - "learning_rate": 0.00019999803083369013, - "loss": 46.0, - "step": 26137 - }, - { - "epoch": 1.9984326318405108, - "grad_norm": 0.0012404262088239193, - "learning_rate": 0.00019999803068294973, - "loss": 46.0, - "step": 26138 - }, - { - "epoch": 1.9985090888239005, - "grad_norm": 0.0012078429572284222, - "learning_rate": 0.0001999980305322035, - "loss": 46.0, - "step": 26139 - }, - { - "epoch": 1.9985855458072903, - "grad_norm": 0.004205978475511074, - "learning_rate": 0.00019999803038145156, - "loss": 46.0, - "step": 26140 - }, - { - "epoch": 1.9986620027906798, - "grad_norm": 0.0024527739733457565, - "learning_rate": 0.00019999803023069382, - "loss": 46.0, - "step": 26141 - }, - { - "epoch": 1.9987384597740696, - "grad_norm": 0.002919460879638791, - "learning_rate": 0.00019999803007993033, - "loss": 46.0, - "step": 26142 - }, - { - "epoch": 1.9988149167574594, - "grad_norm": 0.0013226374285295606, - "learning_rate": 0.00019999802992916107, - "loss": 46.0, - "step": 26143 - }, - { - "epoch": 1.998891373740849, - "grad_norm": 0.0030463864095509052, - "learning_rate": 0.000199998029778386, - "loss": 46.0, - "step": 26144 - }, - { - "epoch": 1.9989678307242387, - "grad_norm": 0.0004927762784063816, - "learning_rate": 0.0001999980296276052, - "loss": 46.0, - "step": 26145 - }, - { - "epoch": 1.9990442877076284, - "grad_norm": 0.0010199008975178003, - "learning_rate": 0.00019999802947681861, - "loss": 46.0, - "step": 26146 - }, - { - "epoch": 1.9991207446910182, - "grad_norm": 0.0008828681311570108, - "learning_rate": 0.00019999802932602626, - "loss": 46.0, - "step": 26147 - }, - { - "epoch": 1.999197201674408, - "grad_norm": 0.0007143750553950667, - "learning_rate": 0.00019999802917522816, - "loss": 46.0, - "step": 26148 - }, - { - "epoch": 1.9992736586577977, - "grad_norm": 0.0005300296470522881, - "learning_rate": 0.00019999802902442428, - "loss": 46.0, - "step": 26149 - }, - { - "epoch": 1.9993501156411875, - "grad_norm": 0.0016304715536534786, - "learning_rate": 0.00019999802887361463, - "loss": 46.0, - "step": 26150 - }, - { - "epoch": 1.9994265726245772, - "grad_norm": 0.001515907235443592, - "learning_rate": 0.0001999980287227992, - "loss": 46.0, - "step": 26151 - }, - { - "epoch": 1.9995030296079668, - "grad_norm": 0.0008195392438210547, - "learning_rate": 0.00019999802857197801, - "loss": 46.0, - "step": 26152 - }, - { - "epoch": 1.9995794865913565, - "grad_norm": 0.0004347838694229722, - "learning_rate": 0.00019999802842115102, - "loss": 46.0, - "step": 26153 - }, - { - "epoch": 1.9996559435747463, - "grad_norm": 0.0007506748079322278, - "learning_rate": 0.0001999980282703183, - "loss": 46.0, - "step": 26154 - }, - { - "epoch": 1.9997324005581358, - "grad_norm": 0.0005916275549679995, - "learning_rate": 0.0001999980281194798, - "loss": 46.0, - "step": 26155 - }, - { - "epoch": 1.9998088575415256, - "grad_norm": 0.0009836844401434064, - "learning_rate": 0.00019999802796863555, - "loss": 46.0, - "step": 26156 - }, - { - "epoch": 1.9998853145249154, - "grad_norm": 0.0007093893946148455, - "learning_rate": 0.0001999980278177855, - "loss": 46.0, - "step": 26157 - }, - { - "epoch": 1.9999617715083051, - "grad_norm": 0.0006633367738686502, - "learning_rate": 0.00019999802766692968, - "loss": 46.0, - "step": 26158 - }, - { - "epoch": 1.9999617715083051, - "eval_loss": 11.5, - "eval_runtime": 31.9233, - "eval_samples_per_second": 172.538, - "eval_steps_per_second": 86.269, - "step": 26158 - }, - { - "epoch": 2.000038228491695, - "grad_norm": 0.004662771243602037, - "learning_rate": 0.0001999980275160681, - "loss": 46.0, - "step": 26159 - }, - { - "epoch": 2.0001146854750846, - "grad_norm": 0.0018271158915013075, - "learning_rate": 0.00019999802736520074, - "loss": 46.0, - "step": 26160 - }, - { - "epoch": 2.0001911424584744, - "grad_norm": 0.0010419727768748999, - "learning_rate": 0.00019999802721432764, - "loss": 46.0, - "step": 26161 - }, - { - "epoch": 2.000267599441864, - "grad_norm": 0.0009499929146841168, - "learning_rate": 0.00019999802706344874, - "loss": 46.0, - "step": 26162 - }, - { - "epoch": 2.000344056425254, - "grad_norm": 0.0011963375145569444, - "learning_rate": 0.0001999980269125641, - "loss": 46.0, - "step": 26163 - }, - { - "epoch": 2.0004205134086432, - "grad_norm": 0.0011605339823290706, - "learning_rate": 0.00019999802676167364, - "loss": 46.0, - "step": 26164 - }, - { - "epoch": 2.000496970392033, - "grad_norm": 0.0014711207477375865, - "learning_rate": 0.00019999802661077744, - "loss": 46.0, - "step": 26165 - }, - { - "epoch": 2.0005734273754228, - "grad_norm": 0.0037214034236967564, - "learning_rate": 0.00019999802645987547, - "loss": 46.0, - "step": 26166 - }, - { - "epoch": 2.0006498843588125, - "grad_norm": 0.0004070333670824766, - "learning_rate": 0.00019999802630896776, - "loss": 46.0, - "step": 26167 - }, - { - "epoch": 2.0007263413422023, - "grad_norm": 0.0008006632560864091, - "learning_rate": 0.00019999802615805424, - "loss": 46.0, - "step": 26168 - }, - { - "epoch": 2.000802798325592, - "grad_norm": 0.004580541513860226, - "learning_rate": 0.00019999802600713498, - "loss": 46.0, - "step": 26169 - }, - { - "epoch": 2.000879255308982, - "grad_norm": 0.0006492201937362552, - "learning_rate": 0.00019999802585620992, - "loss": 46.0, - "step": 26170 - }, - { - "epoch": 2.0009557122923716, - "grad_norm": 0.0020484321285039186, - "learning_rate": 0.0001999980257052791, - "loss": 46.0, - "step": 26171 - }, - { - "epoch": 2.0010321692757613, - "grad_norm": 0.0006456835544668138, - "learning_rate": 0.00019999802555434256, - "loss": 46.0, - "step": 26172 - }, - { - "epoch": 2.001108626259151, - "grad_norm": 0.0004316331760492176, - "learning_rate": 0.00019999802540340017, - "loss": 46.0, - "step": 26173 - }, - { - "epoch": 2.001185083242541, - "grad_norm": 0.0008028432494029403, - "learning_rate": 0.00019999802525245207, - "loss": 46.0, - "step": 26174 - }, - { - "epoch": 2.00126154022593, - "grad_norm": 0.0027233120054006577, - "learning_rate": 0.00019999802510149817, - "loss": 46.0, - "step": 26175 - }, - { - "epoch": 2.00133799720932, - "grad_norm": 0.0010254610097035766, - "learning_rate": 0.00019999802495053852, - "loss": 46.0, - "step": 26176 - }, - { - "epoch": 2.0014144541927097, - "grad_norm": 0.003934929613023996, - "learning_rate": 0.0001999980247995731, - "loss": 46.0, - "step": 26177 - }, - { - "epoch": 2.0014909111760995, - "grad_norm": 0.0006774758221581578, - "learning_rate": 0.0001999980246486019, - "loss": 46.0, - "step": 26178 - }, - { - "epoch": 2.001567368159489, - "grad_norm": 0.00043291132897138596, - "learning_rate": 0.00019999802449762494, - "loss": 46.0, - "step": 26179 - }, - { - "epoch": 2.001643825142879, - "grad_norm": 0.0008927814196795225, - "learning_rate": 0.0001999980243466422, - "loss": 46.0, - "step": 26180 - }, - { - "epoch": 2.0017202821262687, - "grad_norm": 0.0011041504330933094, - "learning_rate": 0.0001999980241956537, - "loss": 46.0, - "step": 26181 - }, - { - "epoch": 2.0017967391096585, - "grad_norm": 0.0015459402929991484, - "learning_rate": 0.00019999802404465942, - "loss": 46.0, - "step": 26182 - }, - { - "epoch": 2.0018731960930483, - "grad_norm": 0.0025369576178491116, - "learning_rate": 0.00019999802389365935, - "loss": 46.0, - "step": 26183 - }, - { - "epoch": 2.001949653076438, - "grad_norm": 0.001021634554490447, - "learning_rate": 0.00019999802374265355, - "loss": 46.0, - "step": 26184 - }, - { - "epoch": 2.002026110059828, - "grad_norm": 0.0010396811412647367, - "learning_rate": 0.00019999802359164196, - "loss": 46.0, - "step": 26185 - }, - { - "epoch": 2.002102567043217, - "grad_norm": 0.00226794951595366, - "learning_rate": 0.0001999980234406246, - "loss": 46.0, - "step": 26186 - }, - { - "epoch": 2.002179024026607, - "grad_norm": 0.002138368086889386, - "learning_rate": 0.00019999802328960148, - "loss": 46.0, - "step": 26187 - }, - { - "epoch": 2.0022554810099966, - "grad_norm": 0.0016221149126067758, - "learning_rate": 0.0001999980231385726, - "loss": 46.0, - "step": 26188 - }, - { - "epoch": 2.0023319379933864, - "grad_norm": 0.005963224917650223, - "learning_rate": 0.00019999802298753796, - "loss": 46.0, - "step": 26189 - }, - { - "epoch": 2.002408394976776, - "grad_norm": 0.0004824300413019955, - "learning_rate": 0.0001999980228364975, - "loss": 46.0, - "step": 26190 - }, - { - "epoch": 2.002484851960166, - "grad_norm": 0.002030147472396493, - "learning_rate": 0.00019999802268545131, - "loss": 46.0, - "step": 26191 - }, - { - "epoch": 2.0025613089435557, - "grad_norm": 0.005240280646830797, - "learning_rate": 0.00019999802253439935, - "loss": 46.0, - "step": 26192 - }, - { - "epoch": 2.0026377659269454, - "grad_norm": 0.0013183457776904106, - "learning_rate": 0.00019999802238334163, - "loss": 46.0, - "step": 26193 - }, - { - "epoch": 2.002714222910335, - "grad_norm": 0.0038624336011707783, - "learning_rate": 0.00019999802223227812, - "loss": 46.0, - "step": 26194 - }, - { - "epoch": 2.002790679893725, - "grad_norm": 0.001587224774993956, - "learning_rate": 0.0001999980220812088, - "loss": 46.0, - "step": 26195 - }, - { - "epoch": 2.0028671368771147, - "grad_norm": 0.0009375744266435504, - "learning_rate": 0.0001999980219301338, - "loss": 46.0, - "step": 26196 - }, - { - "epoch": 2.002943593860504, - "grad_norm": 0.006520427297800779, - "learning_rate": 0.00019999802177905296, - "loss": 46.0, - "step": 26197 - }, - { - "epoch": 2.003020050843894, - "grad_norm": 0.004415924660861492, - "learning_rate": 0.0001999980216279664, - "loss": 46.0, - "step": 26198 - }, - { - "epoch": 2.0030965078272835, - "grad_norm": 0.009158768691122532, - "learning_rate": 0.00019999802147687402, - "loss": 46.0, - "step": 26199 - }, - { - "epoch": 2.0031729648106733, - "grad_norm": 0.0010773178655654192, - "learning_rate": 0.0001999980213257759, - "loss": 46.0, - "step": 26200 - }, - { - "epoch": 2.003249421794063, - "grad_norm": 0.0032553726341575384, - "learning_rate": 0.00019999802117467202, - "loss": 46.0, - "step": 26201 - }, - { - "epoch": 2.003325878777453, - "grad_norm": 0.0009913387475535274, - "learning_rate": 0.00019999802102356235, - "loss": 46.0, - "step": 26202 - }, - { - "epoch": 2.0034023357608426, - "grad_norm": 0.000741445692256093, - "learning_rate": 0.00019999802087244692, - "loss": 46.0, - "step": 26203 - }, - { - "epoch": 2.0034787927442324, - "grad_norm": 0.0018948677461594343, - "learning_rate": 0.00019999802072132573, - "loss": 46.0, - "step": 26204 - }, - { - "epoch": 2.003555249727622, - "grad_norm": 0.002056448021903634, - "learning_rate": 0.00019999802057019873, - "loss": 46.0, - "step": 26205 - }, - { - "epoch": 2.003631706711012, - "grad_norm": 0.0007232400821521878, - "learning_rate": 0.000199998020419066, - "loss": 46.0, - "step": 26206 - }, - { - "epoch": 2.0037081636944016, - "grad_norm": 0.002205180935561657, - "learning_rate": 0.0001999980202679275, - "loss": 46.0, - "step": 26207 - }, - { - "epoch": 2.003784620677791, - "grad_norm": 0.0011409829603508115, - "learning_rate": 0.00019999802011678322, - "loss": 46.0, - "step": 26208 - }, - { - "epoch": 2.0038610776611807, - "grad_norm": 0.001275534275919199, - "learning_rate": 0.00019999801996563318, - "loss": 46.0, - "step": 26209 - }, - { - "epoch": 2.0039375346445705, - "grad_norm": 0.0022096901666373014, - "learning_rate": 0.00019999801981447735, - "loss": 46.0, - "step": 26210 - }, - { - "epoch": 2.0040139916279602, - "grad_norm": 0.0006243409006856382, - "learning_rate": 0.00019999801966331577, - "loss": 46.0, - "step": 26211 - }, - { - "epoch": 2.00409044861135, - "grad_norm": 0.0035669677890837193, - "learning_rate": 0.00019999801951214844, - "loss": 46.0, - "step": 26212 - }, - { - "epoch": 2.0041669055947398, - "grad_norm": 0.000551888020709157, - "learning_rate": 0.00019999801936097528, - "loss": 46.0, - "step": 26213 - }, - { - "epoch": 2.0042433625781295, - "grad_norm": 0.0009982648771256208, - "learning_rate": 0.00019999801920979638, - "loss": 46.0, - "step": 26214 - }, - { - "epoch": 2.0043198195615193, - "grad_norm": 0.0031910568941384554, - "learning_rate": 0.00019999801905861174, - "loss": 46.0, - "step": 26215 - }, - { - "epoch": 2.004396276544909, - "grad_norm": 0.0011441687820479274, - "learning_rate": 0.00019999801890742132, - "loss": 46.0, - "step": 26216 - }, - { - "epoch": 2.004472733528299, - "grad_norm": 0.0019939281046390533, - "learning_rate": 0.0001999980187562251, - "loss": 46.0, - "step": 26217 - }, - { - "epoch": 2.0045491905116886, - "grad_norm": 0.0009862144943326712, - "learning_rate": 0.00019999801860502313, - "loss": 46.0, - "step": 26218 - }, - { - "epoch": 2.004625647495078, - "grad_norm": 0.0004974405746906996, - "learning_rate": 0.00019999801845381539, - "loss": 46.0, - "step": 26219 - }, - { - "epoch": 2.0047021044784676, - "grad_norm": 0.0012085522757843137, - "learning_rate": 0.00019999801830260187, - "loss": 46.0, - "step": 26220 - }, - { - "epoch": 2.0047785614618574, - "grad_norm": 0.0007428917451761663, - "learning_rate": 0.00019999801815138258, - "loss": 46.0, - "step": 26221 - }, - { - "epoch": 2.004855018445247, - "grad_norm": 0.0011536243837326765, - "learning_rate": 0.00019999801800015755, - "loss": 46.0, - "step": 26222 - }, - { - "epoch": 2.004931475428637, - "grad_norm": 0.0007541900267824531, - "learning_rate": 0.00019999801784892674, - "loss": 46.0, - "step": 26223 - }, - { - "epoch": 2.0050079324120267, - "grad_norm": 0.0025192727334797382, - "learning_rate": 0.00019999801769769016, - "loss": 46.0, - "step": 26224 - }, - { - "epoch": 2.0050843893954164, - "grad_norm": 0.0008683687192387879, - "learning_rate": 0.00019999801754644778, - "loss": 46.0, - "step": 26225 - }, - { - "epoch": 2.005160846378806, - "grad_norm": 0.0012463143793866038, - "learning_rate": 0.00019999801739519965, - "loss": 46.0, - "step": 26226 - }, - { - "epoch": 2.005237303362196, - "grad_norm": 0.0021338139194995165, - "learning_rate": 0.00019999801724394578, - "loss": 46.0, - "step": 26227 - }, - { - "epoch": 2.0053137603455857, - "grad_norm": 0.0013347516069188714, - "learning_rate": 0.0001999980170926861, - "loss": 46.0, - "step": 26228 - }, - { - "epoch": 2.0053902173289755, - "grad_norm": 0.0004869582480750978, - "learning_rate": 0.00019999801694142066, - "loss": 46.0, - "step": 26229 - }, - { - "epoch": 2.005466674312365, - "grad_norm": 0.003374520456418395, - "learning_rate": 0.00019999801679014944, - "loss": 46.0, - "step": 26230 - }, - { - "epoch": 2.0055431312957546, - "grad_norm": 0.0006843886221759021, - "learning_rate": 0.0001999980166388725, - "loss": 46.0, - "step": 26231 - }, - { - "epoch": 2.0056195882791443, - "grad_norm": 0.0005083077703602612, - "learning_rate": 0.00019999801648758973, - "loss": 46.0, - "step": 26232 - }, - { - "epoch": 2.005696045262534, - "grad_norm": 0.0023535622749477625, - "learning_rate": 0.00019999801633630124, - "loss": 46.0, - "step": 26233 - }, - { - "epoch": 2.005772502245924, - "grad_norm": 0.00102468382101506, - "learning_rate": 0.00019999801618500695, - "loss": 46.0, - "step": 26234 - }, - { - "epoch": 2.0058489592293136, - "grad_norm": 0.0005148566560819745, - "learning_rate": 0.0001999980160337069, - "loss": 46.0, - "step": 26235 - }, - { - "epoch": 2.0059254162127034, - "grad_norm": 0.0004898388287983835, - "learning_rate": 0.00019999801588240109, - "loss": 46.0, - "step": 26236 - }, - { - "epoch": 2.006001873196093, - "grad_norm": 0.0011947706807404757, - "learning_rate": 0.0001999980157310895, - "loss": 46.0, - "step": 26237 - }, - { - "epoch": 2.006078330179483, - "grad_norm": 0.0011407856363803148, - "learning_rate": 0.00019999801557977212, - "loss": 46.0, - "step": 26238 - }, - { - "epoch": 2.0061547871628727, - "grad_norm": 0.0009961758041754365, - "learning_rate": 0.000199998015428449, - "loss": 46.0, - "step": 26239 - }, - { - "epoch": 2.0062312441462624, - "grad_norm": 0.0006893862737342715, - "learning_rate": 0.0001999980152771201, - "loss": 46.0, - "step": 26240 - }, - { - "epoch": 2.0063077011296517, - "grad_norm": 0.0017040036618709564, - "learning_rate": 0.00019999801512578545, - "loss": 46.0, - "step": 26241 - }, - { - "epoch": 2.0063841581130415, - "grad_norm": 0.003879678901284933, - "learning_rate": 0.000199998014974445, - "loss": 46.0, - "step": 26242 - }, - { - "epoch": 2.0064606150964313, - "grad_norm": 0.0016147332498803735, - "learning_rate": 0.0001999980148230988, - "loss": 46.0, - "step": 26243 - }, - { - "epoch": 2.006537072079821, - "grad_norm": 0.000772004306782037, - "learning_rate": 0.00019999801467174684, - "loss": 46.0, - "step": 26244 - }, - { - "epoch": 2.0066135290632108, - "grad_norm": 0.0011211322853341699, - "learning_rate": 0.0001999980145203891, - "loss": 46.0, - "step": 26245 - }, - { - "epoch": 2.0066899860466005, - "grad_norm": 0.00042306879186071455, - "learning_rate": 0.00019999801436902556, - "loss": 46.0, - "step": 26246 - }, - { - "epoch": 2.0067664430299903, - "grad_norm": 0.0012402822030708194, - "learning_rate": 0.0001999980142176563, - "loss": 46.0, - "step": 26247 - }, - { - "epoch": 2.00684290001338, - "grad_norm": 0.002053857082501054, - "learning_rate": 0.00019999801406628124, - "loss": 46.0, - "step": 26248 - }, - { - "epoch": 2.00691935699677, - "grad_norm": 0.0006379009573720396, - "learning_rate": 0.00019999801391490043, - "loss": 46.0, - "step": 26249 - }, - { - "epoch": 2.0069958139801596, - "grad_norm": 0.00084595667431131, - "learning_rate": 0.00019999801376351383, - "loss": 46.0, - "step": 26250 - }, - { - "epoch": 2.0070722709635493, - "grad_norm": 0.0027609895914793015, - "learning_rate": 0.00019999801361212147, - "loss": 46.0, - "step": 26251 - }, - { - "epoch": 2.0071487279469387, - "grad_norm": 0.0017518082167953253, - "learning_rate": 0.00019999801346072335, - "loss": 46.0, - "step": 26252 - }, - { - "epoch": 2.0072251849303284, - "grad_norm": 0.00341012142598629, - "learning_rate": 0.00019999801330931944, - "loss": 46.0, - "step": 26253 - }, - { - "epoch": 2.007301641913718, - "grad_norm": 0.0009974631248041987, - "learning_rate": 0.00019999801315790977, - "loss": 46.0, - "step": 26254 - }, - { - "epoch": 2.007378098897108, - "grad_norm": 0.0005263067432679236, - "learning_rate": 0.00019999801300649435, - "loss": 46.0, - "step": 26255 - }, - { - "epoch": 2.0074545558804977, - "grad_norm": 0.0030940650030970573, - "learning_rate": 0.00019999801285507313, - "loss": 46.0, - "step": 26256 - }, - { - "epoch": 2.0075310128638875, - "grad_norm": 0.0013548798160627484, - "learning_rate": 0.00019999801270364616, - "loss": 46.0, - "step": 26257 - }, - { - "epoch": 2.0076074698472772, - "grad_norm": 0.001492160721682012, - "learning_rate": 0.0001999980125522134, - "loss": 46.0, - "step": 26258 - }, - { - "epoch": 2.007683926830667, - "grad_norm": 0.0017141959397122264, - "learning_rate": 0.00019999801240077488, - "loss": 46.0, - "step": 26259 - }, - { - "epoch": 2.0077603838140567, - "grad_norm": 0.0003981981717515737, - "learning_rate": 0.00019999801224933062, - "loss": 46.0, - "step": 26260 - }, - { - "epoch": 2.0078368407974465, - "grad_norm": 0.0012443915475159883, - "learning_rate": 0.00019999801209788056, - "loss": 46.0, - "step": 26261 - }, - { - "epoch": 2.0079132977808363, - "grad_norm": 0.0016075114253908396, - "learning_rate": 0.00019999801194642473, - "loss": 46.0, - "step": 26262 - }, - { - "epoch": 2.0079897547642256, - "grad_norm": 0.002303424524143338, - "learning_rate": 0.00019999801179496312, - "loss": 46.0, - "step": 26263 - }, - { - "epoch": 2.0080662117476153, - "grad_norm": 0.0007340922602452338, - "learning_rate": 0.0001999980116434958, - "loss": 46.0, - "step": 26264 - }, - { - "epoch": 2.008142668731005, - "grad_norm": 0.0010445526568219066, - "learning_rate": 0.00019999801149202267, - "loss": 46.0, - "step": 26265 - }, - { - "epoch": 2.008219125714395, - "grad_norm": 0.003046773374080658, - "learning_rate": 0.00019999801134054374, - "loss": 46.0, - "step": 26266 - }, - { - "epoch": 2.0082955826977846, - "grad_norm": 0.0005591484368778765, - "learning_rate": 0.0001999980111890591, - "loss": 46.0, - "step": 26267 - }, - { - "epoch": 2.0083720396811744, - "grad_norm": 0.0007764684269204736, - "learning_rate": 0.00019999801103756862, - "loss": 46.0, - "step": 26268 - }, - { - "epoch": 2.008448496664564, - "grad_norm": 0.0018513009417802095, - "learning_rate": 0.00019999801088607243, - "loss": 46.0, - "step": 26269 - }, - { - "epoch": 2.008524953647954, - "grad_norm": 0.0007164459675550461, - "learning_rate": 0.00019999801073457046, - "loss": 46.0, - "step": 26270 - }, - { - "epoch": 2.0086014106313437, - "grad_norm": 0.0006123896455392241, - "learning_rate": 0.00019999801058306272, - "loss": 46.0, - "step": 26271 - }, - { - "epoch": 2.0086778676147334, - "grad_norm": 0.006820992566645145, - "learning_rate": 0.0001999980104315492, - "loss": 46.0, - "step": 26272 - }, - { - "epoch": 2.008754324598123, - "grad_norm": 0.0008229804807342589, - "learning_rate": 0.00019999801028002992, - "loss": 46.0, - "step": 26273 - }, - { - "epoch": 2.0088307815815125, - "grad_norm": 0.0026525594294071198, - "learning_rate": 0.00019999801012850487, - "loss": 46.0, - "step": 26274 - }, - { - "epoch": 2.0089072385649023, - "grad_norm": 0.0006436111871153116, - "learning_rate": 0.00019999800997697403, - "loss": 46.0, - "step": 26275 - }, - { - "epoch": 2.008983695548292, - "grad_norm": 0.005770350340753794, - "learning_rate": 0.00019999800982543743, - "loss": 46.0, - "step": 26276 - }, - { - "epoch": 2.009060152531682, - "grad_norm": 0.0012650461867451668, - "learning_rate": 0.00019999800967389507, - "loss": 46.0, - "step": 26277 - }, - { - "epoch": 2.0091366095150716, - "grad_norm": 0.002966303611174226, - "learning_rate": 0.00019999800952234695, - "loss": 46.0, - "step": 26278 - }, - { - "epoch": 2.0092130664984613, - "grad_norm": 0.002709926338866353, - "learning_rate": 0.00019999800937079305, - "loss": 46.0, - "step": 26279 - }, - { - "epoch": 2.009289523481851, - "grad_norm": 0.0008663387270644307, - "learning_rate": 0.00019999800921923337, - "loss": 46.0, - "step": 26280 - }, - { - "epoch": 2.009365980465241, - "grad_norm": 0.0016286926111206412, - "learning_rate": 0.00019999800906766795, - "loss": 46.0, - "step": 26281 - }, - { - "epoch": 2.0094424374486306, - "grad_norm": 0.00324411247856915, - "learning_rate": 0.00019999800891609673, - "loss": 46.0, - "step": 26282 - }, - { - "epoch": 2.0095188944320204, - "grad_norm": 0.0010447860695421696, - "learning_rate": 0.00019999800876451974, - "loss": 46.0, - "step": 26283 - }, - { - "epoch": 2.00959535141541, - "grad_norm": 0.0010546088451519608, - "learning_rate": 0.000199998008612937, - "loss": 46.0, - "step": 26284 - }, - { - "epoch": 2.0096718083987994, - "grad_norm": 0.0019695681985467672, - "learning_rate": 0.0001999980084613485, - "loss": 46.0, - "step": 26285 - }, - { - "epoch": 2.009748265382189, - "grad_norm": 0.00726426811888814, - "learning_rate": 0.0001999980083097542, - "loss": 46.0, - "step": 26286 - }, - { - "epoch": 2.009824722365579, - "grad_norm": 0.00047941997763700783, - "learning_rate": 0.00019999800815815414, - "loss": 46.0, - "step": 26287 - }, - { - "epoch": 2.0099011793489687, - "grad_norm": 0.0021181961055845022, - "learning_rate": 0.0001999980080065483, - "loss": 46.0, - "step": 26288 - }, - { - "epoch": 2.0099776363323585, - "grad_norm": 0.0018221414647996426, - "learning_rate": 0.00019999800785493673, - "loss": 46.0, - "step": 26289 - }, - { - "epoch": 2.0100540933157482, - "grad_norm": 0.0009569289977662265, - "learning_rate": 0.00019999800770331935, - "loss": 46.0, - "step": 26290 - }, - { - "epoch": 2.010130550299138, - "grad_norm": 0.0007281259750016034, - "learning_rate": 0.00019999800755169625, - "loss": 46.0, - "step": 26291 - }, - { - "epoch": 2.0102070072825278, - "grad_norm": 0.0034563683439046144, - "learning_rate": 0.0001999980074000673, - "loss": 46.0, - "step": 26292 - }, - { - "epoch": 2.0102834642659175, - "grad_norm": 0.0012553880224004388, - "learning_rate": 0.00019999800724843265, - "loss": 46.0, - "step": 26293 - }, - { - "epoch": 2.0103599212493073, - "grad_norm": 0.0018042916199192405, - "learning_rate": 0.0001999980070967922, - "loss": 46.0, - "step": 26294 - }, - { - "epoch": 2.0104363782326966, - "grad_norm": 0.0007203936693258584, - "learning_rate": 0.00019999800694514602, - "loss": 46.0, - "step": 26295 - }, - { - "epoch": 2.0105128352160864, - "grad_norm": 0.0013191327452659607, - "learning_rate": 0.00019999800679349402, - "loss": 46.0, - "step": 26296 - }, - { - "epoch": 2.010589292199476, - "grad_norm": 0.0009801177075132728, - "learning_rate": 0.00019999800664183628, - "loss": 46.0, - "step": 26297 - }, - { - "epoch": 2.010665749182866, - "grad_norm": 0.0008313339785672724, - "learning_rate": 0.00019999800649017274, - "loss": 46.0, - "step": 26298 - }, - { - "epoch": 2.0107422061662557, - "grad_norm": 0.003581333439797163, - "learning_rate": 0.00019999800633850346, - "loss": 46.0, - "step": 26299 - }, - { - "epoch": 2.0108186631496454, - "grad_norm": 0.0013816790888085961, - "learning_rate": 0.00019999800618682843, - "loss": 46.0, - "step": 26300 - }, - { - "epoch": 2.010895120133035, - "grad_norm": 0.0006482881144620478, - "learning_rate": 0.00019999800603514757, - "loss": 46.0, - "step": 26301 - }, - { - "epoch": 2.010971577116425, - "grad_norm": 0.0012095622951164842, - "learning_rate": 0.00019999800588346101, - "loss": 46.0, - "step": 26302 - }, - { - "epoch": 2.0110480340998147, - "grad_norm": 0.0011970933992415667, - "learning_rate": 0.0001999980057317686, - "loss": 46.0, - "step": 26303 - }, - { - "epoch": 2.0111244910832045, - "grad_norm": 0.0069392891600728035, - "learning_rate": 0.00019999800558007048, - "loss": 46.0, - "step": 26304 - }, - { - "epoch": 2.011200948066594, - "grad_norm": 0.0006005058530718088, - "learning_rate": 0.0001999980054283666, - "loss": 46.0, - "step": 26305 - }, - { - "epoch": 2.0112774050499835, - "grad_norm": 0.0013811110984534025, - "learning_rate": 0.0001999980052766569, - "loss": 46.0, - "step": 26306 - }, - { - "epoch": 2.0113538620333733, - "grad_norm": 0.0005704708746634424, - "learning_rate": 0.00019999800512494146, - "loss": 46.0, - "step": 26307 - }, - { - "epoch": 2.011430319016763, - "grad_norm": 0.0005890579777769744, - "learning_rate": 0.00019999800497322025, - "loss": 46.0, - "step": 26308 - }, - { - "epoch": 2.011506776000153, - "grad_norm": 0.002463872078806162, - "learning_rate": 0.00019999800482149328, - "loss": 46.0, - "step": 26309 - }, - { - "epoch": 2.0115832329835426, - "grad_norm": 0.0007687838515266776, - "learning_rate": 0.00019999800466976054, - "loss": 46.0, - "step": 26310 - }, - { - "epoch": 2.0116596899669323, - "grad_norm": 0.00044294408871792257, - "learning_rate": 0.000199998004518022, - "loss": 46.0, - "step": 26311 - }, - { - "epoch": 2.011736146950322, - "grad_norm": 0.0006065298803150654, - "learning_rate": 0.00019999800436627772, - "loss": 46.0, - "step": 26312 - }, - { - "epoch": 2.011812603933712, - "grad_norm": 0.0008302808273583651, - "learning_rate": 0.00019999800421452766, - "loss": 46.0, - "step": 26313 - }, - { - "epoch": 2.0118890609171016, - "grad_norm": 0.0008995538810268044, - "learning_rate": 0.00019999800406277183, - "loss": 46.0, - "step": 26314 - }, - { - "epoch": 2.0119655179004914, - "grad_norm": 0.0010585618438199162, - "learning_rate": 0.00019999800391101022, - "loss": 46.0, - "step": 26315 - }, - { - "epoch": 2.012041974883881, - "grad_norm": 0.00061994040152058, - "learning_rate": 0.00019999800375924287, - "loss": 46.0, - "step": 26316 - }, - { - "epoch": 2.0121184318672705, - "grad_norm": 0.000894365191925317, - "learning_rate": 0.00019999800360746974, - "loss": 46.0, - "step": 26317 - }, - { - "epoch": 2.0121948888506602, - "grad_norm": 0.0006692753522656858, - "learning_rate": 0.00019999800345569082, - "loss": 46.0, - "step": 26318 - }, - { - "epoch": 2.01227134583405, - "grad_norm": 0.0005519141559489071, - "learning_rate": 0.00019999800330390614, - "loss": 46.0, - "step": 26319 - }, - { - "epoch": 2.0123478028174397, - "grad_norm": 0.0019849473610520363, - "learning_rate": 0.00019999800315211573, - "loss": 46.0, - "step": 26320 - }, - { - "epoch": 2.0124242598008295, - "grad_norm": 0.0006093309493735433, - "learning_rate": 0.0001999980030003195, - "loss": 46.0, - "step": 26321 - }, - { - "epoch": 2.0125007167842193, - "grad_norm": 0.0018989529926329851, - "learning_rate": 0.00019999800284851752, - "loss": 46.0, - "step": 26322 - }, - { - "epoch": 2.012577173767609, - "grad_norm": 0.007206793874502182, - "learning_rate": 0.00019999800269670975, - "loss": 46.0, - "step": 26323 - }, - { - "epoch": 2.012653630750999, - "grad_norm": 0.0021626390516757965, - "learning_rate": 0.00019999800254489624, - "loss": 46.0, - "step": 26324 - }, - { - "epoch": 2.0127300877343886, - "grad_norm": 0.00046044550254009664, - "learning_rate": 0.00019999800239307693, - "loss": 46.0, - "step": 26325 - }, - { - "epoch": 2.0128065447177783, - "grad_norm": 0.001414434053003788, - "learning_rate": 0.0001999980022412519, - "loss": 46.0, - "step": 26326 - }, - { - "epoch": 2.012883001701168, - "grad_norm": 0.0021342714317142963, - "learning_rate": 0.00019999800208942106, - "loss": 46.0, - "step": 26327 - }, - { - "epoch": 2.0129594586845574, - "grad_norm": 0.001481067156419158, - "learning_rate": 0.00019999800193758446, - "loss": 46.0, - "step": 26328 - }, - { - "epoch": 2.013035915667947, - "grad_norm": 0.000655050971545279, - "learning_rate": 0.00019999800178574208, - "loss": 46.0, - "step": 26329 - }, - { - "epoch": 2.013112372651337, - "grad_norm": 0.015324120409786701, - "learning_rate": 0.00019999800163389395, - "loss": 46.0, - "step": 26330 - }, - { - "epoch": 2.0131888296347267, - "grad_norm": 0.015295472927391529, - "learning_rate": 0.00019999800148204005, - "loss": 46.0, - "step": 26331 - }, - { - "epoch": 2.0132652866181164, - "grad_norm": 0.0008215457201004028, - "learning_rate": 0.00019999800133018036, - "loss": 46.0, - "step": 26332 - }, - { - "epoch": 2.013341743601506, - "grad_norm": 0.0003924024640582502, - "learning_rate": 0.0001999980011783149, - "loss": 46.0, - "step": 26333 - }, - { - "epoch": 2.013418200584896, - "grad_norm": 0.0003850834327749908, - "learning_rate": 0.0001999980010264437, - "loss": 46.0, - "step": 26334 - }, - { - "epoch": 2.0134946575682857, - "grad_norm": 0.007263628300279379, - "learning_rate": 0.00019999800087456673, - "loss": 46.0, - "step": 26335 - }, - { - "epoch": 2.0135711145516755, - "grad_norm": 0.0025417886208742857, - "learning_rate": 0.000199998000722684, - "loss": 46.0, - "step": 26336 - }, - { - "epoch": 2.0136475715350652, - "grad_norm": 0.0022703923750668764, - "learning_rate": 0.00019999800057079545, - "loss": 46.0, - "step": 26337 - }, - { - "epoch": 2.013724028518455, - "grad_norm": 0.0010218846146017313, - "learning_rate": 0.00019999800041890117, - "loss": 46.0, - "step": 26338 - }, - { - "epoch": 2.0138004855018443, - "grad_norm": 0.0004849161487072706, - "learning_rate": 0.00019999800026700108, - "loss": 46.0, - "step": 26339 - }, - { - "epoch": 2.013876942485234, - "grad_norm": 0.001087265438400209, - "learning_rate": 0.00019999800011509528, - "loss": 46.0, - "step": 26340 - }, - { - "epoch": 2.013953399468624, - "grad_norm": 0.001947965007275343, - "learning_rate": 0.00019999799996318367, - "loss": 46.0, - "step": 26341 - }, - { - "epoch": 2.0140298564520136, - "grad_norm": 0.0014058618107810616, - "learning_rate": 0.0001999979998112663, - "loss": 46.0, - "step": 26342 - }, - { - "epoch": 2.0141063134354034, - "grad_norm": 0.0003884157631546259, - "learning_rate": 0.00019999799965934317, - "loss": 46.0, - "step": 26343 - }, - { - "epoch": 2.014182770418793, - "grad_norm": 0.0003111445694230497, - "learning_rate": 0.00019999799950741424, - "loss": 46.0, - "step": 26344 - }, - { - "epoch": 2.014259227402183, - "grad_norm": 0.0011368457926437259, - "learning_rate": 0.00019999799935547957, - "loss": 46.0, - "step": 26345 - }, - { - "epoch": 2.0143356843855726, - "grad_norm": 0.000923772226087749, - "learning_rate": 0.00019999799920353913, - "loss": 46.0, - "step": 26346 - }, - { - "epoch": 2.0144121413689624, - "grad_norm": 0.009267128072679043, - "learning_rate": 0.0001999979990515929, - "loss": 46.0, - "step": 26347 - }, - { - "epoch": 2.014488598352352, - "grad_norm": 0.0014039318775758147, - "learning_rate": 0.00019999799889964092, - "loss": 46.0, - "step": 26348 - }, - { - "epoch": 2.014565055335742, - "grad_norm": 0.0015578106977045536, - "learning_rate": 0.00019999799874768318, - "loss": 46.0, - "step": 26349 - }, - { - "epoch": 2.0146415123191312, - "grad_norm": 0.0006281944224610925, - "learning_rate": 0.00019999799859571964, - "loss": 46.0, - "step": 26350 - }, - { - "epoch": 2.014717969302521, - "grad_norm": 0.002535541309043765, - "learning_rate": 0.00019999799844375033, - "loss": 46.0, - "step": 26351 - }, - { - "epoch": 2.0147944262859108, - "grad_norm": 0.0014830816071480513, - "learning_rate": 0.00019999799829177527, - "loss": 46.0, - "step": 26352 - }, - { - "epoch": 2.0148708832693005, - "grad_norm": 0.0006594632286578417, - "learning_rate": 0.00019999799813979444, - "loss": 46.0, - "step": 26353 - }, - { - "epoch": 2.0149473402526903, - "grad_norm": 0.0027152851689606905, - "learning_rate": 0.00019999799798780786, - "loss": 46.0, - "step": 26354 - }, - { - "epoch": 2.01502379723608, - "grad_norm": 0.0011149500496685505, - "learning_rate": 0.00019999799783581548, - "loss": 46.0, - "step": 26355 - }, - { - "epoch": 2.01510025421947, - "grad_norm": 0.0028852594550698996, - "learning_rate": 0.00019999799768381733, - "loss": 46.0, - "step": 26356 - }, - { - "epoch": 2.0151767112028596, - "grad_norm": 0.002378114964812994, - "learning_rate": 0.00019999799753181343, - "loss": 46.0, - "step": 26357 - }, - { - "epoch": 2.0152531681862493, - "grad_norm": 0.0008286259835585952, - "learning_rate": 0.00019999799737980376, - "loss": 46.0, - "step": 26358 - }, - { - "epoch": 2.015329625169639, - "grad_norm": 0.0006026119226589799, - "learning_rate": 0.0001999979972277883, - "loss": 46.0, - "step": 26359 - }, - { - "epoch": 2.015406082153029, - "grad_norm": 0.0023870649747550488, - "learning_rate": 0.0001999979970757671, - "loss": 46.0, - "step": 26360 - }, - { - "epoch": 2.015482539136418, - "grad_norm": 0.0007586884312331676, - "learning_rate": 0.0001999979969237401, - "loss": 46.0, - "step": 26361 - }, - { - "epoch": 2.015558996119808, - "grad_norm": 0.0011644542682915926, - "learning_rate": 0.00019999799677170734, - "loss": 46.0, - "step": 26362 - }, - { - "epoch": 2.0156354531031977, - "grad_norm": 0.0014677379513159394, - "learning_rate": 0.00019999799661966883, - "loss": 46.0, - "step": 26363 - }, - { - "epoch": 2.0157119100865875, - "grad_norm": 0.0029217274859547615, - "learning_rate": 0.00019999799646762455, - "loss": 46.0, - "step": 26364 - }, - { - "epoch": 2.015788367069977, - "grad_norm": 0.002096845768392086, - "learning_rate": 0.00019999799631557446, - "loss": 46.0, - "step": 26365 - }, - { - "epoch": 2.015864824053367, - "grad_norm": 0.0031227206345647573, - "learning_rate": 0.00019999799616351863, - "loss": 46.0, - "step": 26366 - }, - { - "epoch": 2.0159412810367567, - "grad_norm": 0.0006880405708216131, - "learning_rate": 0.00019999799601145703, - "loss": 46.0, - "step": 26367 - }, - { - "epoch": 2.0160177380201465, - "grad_norm": 0.0011815892066806555, - "learning_rate": 0.00019999799585938965, - "loss": 46.0, - "step": 26368 - }, - { - "epoch": 2.0160941950035363, - "grad_norm": 0.0073180003091692924, - "learning_rate": 0.0001999979957073165, - "loss": 46.0, - "step": 26369 - }, - { - "epoch": 2.016170651986926, - "grad_norm": 0.002724348334595561, - "learning_rate": 0.0001999979955552376, - "loss": 46.0, - "step": 26370 - }, - { - "epoch": 2.016247108970316, - "grad_norm": 0.0005904012941755354, - "learning_rate": 0.0001999979954031529, - "loss": 46.0, - "step": 26371 - }, - { - "epoch": 2.016323565953705, - "grad_norm": 0.007443905342370272, - "learning_rate": 0.00019999799525106246, - "loss": 46.0, - "step": 26372 - }, - { - "epoch": 2.016400022937095, - "grad_norm": 0.0016939763445407152, - "learning_rate": 0.00019999799509896624, - "loss": 46.0, - "step": 26373 - }, - { - "epoch": 2.0164764799204846, - "grad_norm": 0.000984145444817841, - "learning_rate": 0.00019999799494686425, - "loss": 46.0, - "step": 26374 - }, - { - "epoch": 2.0165529369038744, - "grad_norm": 0.0018851279746741056, - "learning_rate": 0.00019999799479475652, - "loss": 46.0, - "step": 26375 - }, - { - "epoch": 2.016629393887264, - "grad_norm": 0.0027249157428741455, - "learning_rate": 0.00019999799464264298, - "loss": 46.0, - "step": 26376 - }, - { - "epoch": 2.016705850870654, - "grad_norm": 0.00255312561057508, - "learning_rate": 0.00019999799449052367, - "loss": 46.0, - "step": 26377 - }, - { - "epoch": 2.0167823078540437, - "grad_norm": 0.003908507991582155, - "learning_rate": 0.00019999799433839864, - "loss": 46.0, - "step": 26378 - }, - { - "epoch": 2.0168587648374334, - "grad_norm": 0.0006590376724489033, - "learning_rate": 0.00019999799418626778, - "loss": 46.0, - "step": 26379 - }, - { - "epoch": 2.016935221820823, - "grad_norm": 0.001372669474221766, - "learning_rate": 0.00019999799403413117, - "loss": 46.0, - "step": 26380 - }, - { - "epoch": 2.017011678804213, - "grad_norm": 0.0022291734348982573, - "learning_rate": 0.0001999979938819888, - "loss": 46.0, - "step": 26381 - }, - { - "epoch": 2.0170881357876027, - "grad_norm": 0.0011557965772226453, - "learning_rate": 0.00019999799372984067, - "loss": 46.0, - "step": 26382 - }, - { - "epoch": 2.017164592770992, - "grad_norm": 0.0014570147031918168, - "learning_rate": 0.00019999799357768675, - "loss": 46.0, - "step": 26383 - }, - { - "epoch": 2.017241049754382, - "grad_norm": 0.0031361987348645926, - "learning_rate": 0.00019999799342552708, - "loss": 46.0, - "step": 26384 - }, - { - "epoch": 2.0173175067377715, - "grad_norm": 0.0007653756183572114, - "learning_rate": 0.0001999979932733616, - "loss": 46.0, - "step": 26385 - }, - { - "epoch": 2.0173939637211613, - "grad_norm": 0.0031632056925445795, - "learning_rate": 0.0001999979931211904, - "loss": 46.0, - "step": 26386 - }, - { - "epoch": 2.017470420704551, - "grad_norm": 0.002237035194411874, - "learning_rate": 0.0001999979929690134, - "loss": 46.0, - "step": 26387 - }, - { - "epoch": 2.017546877687941, - "grad_norm": 0.0011668553343042731, - "learning_rate": 0.00019999799281683064, - "loss": 46.0, - "step": 26388 - }, - { - "epoch": 2.0176233346713306, - "grad_norm": 0.0007532284944318235, - "learning_rate": 0.00019999799266464213, - "loss": 46.0, - "step": 26389 - }, - { - "epoch": 2.0176997916547204, - "grad_norm": 0.0006863339222036302, - "learning_rate": 0.00019999799251244781, - "loss": 46.0, - "step": 26390 - }, - { - "epoch": 2.01777624863811, - "grad_norm": 0.0011143706506118178, - "learning_rate": 0.00019999799236024773, - "loss": 46.0, - "step": 26391 - }, - { - "epoch": 2.0178527056215, - "grad_norm": 0.0007110777078196406, - "learning_rate": 0.00019999799220804193, - "loss": 46.0, - "step": 26392 - }, - { - "epoch": 2.0179291626048896, - "grad_norm": 0.0007005173829384148, - "learning_rate": 0.00019999799205583032, - "loss": 46.0, - "step": 26393 - }, - { - "epoch": 2.018005619588279, - "grad_norm": 0.003621959825977683, - "learning_rate": 0.00019999799190361292, - "loss": 46.0, - "step": 26394 - }, - { - "epoch": 2.0180820765716687, - "grad_norm": 0.000936551543418318, - "learning_rate": 0.0001999979917513898, - "loss": 46.0, - "step": 26395 - }, - { - "epoch": 2.0181585335550585, - "grad_norm": 0.0008430216112174094, - "learning_rate": 0.00019999799159916087, - "loss": 46.0, - "step": 26396 - }, - { - "epoch": 2.0182349905384482, - "grad_norm": 0.0008361685322597623, - "learning_rate": 0.0001999979914469262, - "loss": 46.0, - "step": 26397 - }, - { - "epoch": 2.018311447521838, - "grad_norm": 0.0008514817454852164, - "learning_rate": 0.00019999799129468576, - "loss": 46.0, - "step": 26398 - }, - { - "epoch": 2.0183879045052278, - "grad_norm": 0.004284955561161041, - "learning_rate": 0.00019999799114243954, - "loss": 46.0, - "step": 26399 - }, - { - "epoch": 2.0184643614886175, - "grad_norm": 0.0005054807988926768, - "learning_rate": 0.00019999799099018752, - "loss": 46.0, - "step": 26400 - }, - { - "epoch": 2.0185408184720073, - "grad_norm": 0.0011327427346259356, - "learning_rate": 0.0001999979908379298, - "loss": 46.0, - "step": 26401 - }, - { - "epoch": 2.018617275455397, - "grad_norm": 0.0006181719363667071, - "learning_rate": 0.00019999799068566625, - "loss": 46.0, - "step": 26402 - }, - { - "epoch": 2.018693732438787, - "grad_norm": 0.002397105796262622, - "learning_rate": 0.00019999799053339694, - "loss": 46.0, - "step": 26403 - }, - { - "epoch": 2.0187701894221766, - "grad_norm": 0.002497076755389571, - "learning_rate": 0.00019999799038112186, - "loss": 46.0, - "step": 26404 - }, - { - "epoch": 2.018846646405566, - "grad_norm": 0.0006338916718959808, - "learning_rate": 0.00019999799022884105, - "loss": 46.0, - "step": 26405 - }, - { - "epoch": 2.0189231033889556, - "grad_norm": 0.0025392379611730576, - "learning_rate": 0.00019999799007655442, - "loss": 46.0, - "step": 26406 - }, - { - "epoch": 2.0189995603723454, - "grad_norm": 0.0035979480016976595, - "learning_rate": 0.00019999798992426207, - "loss": 46.0, - "step": 26407 - }, - { - "epoch": 2.019076017355735, - "grad_norm": 0.000816471700090915, - "learning_rate": 0.0001999979897719639, - "loss": 46.0, - "step": 26408 - }, - { - "epoch": 2.019152474339125, - "grad_norm": 0.005113798659294844, - "learning_rate": 0.00019999798961966, - "loss": 46.0, - "step": 26409 - }, - { - "epoch": 2.0192289313225147, - "grad_norm": 0.0008697230950929224, - "learning_rate": 0.0001999979894673503, - "loss": 46.0, - "step": 26410 - }, - { - "epoch": 2.0193053883059044, - "grad_norm": 0.0015793988713994622, - "learning_rate": 0.00019999798931503486, - "loss": 46.0, - "step": 26411 - }, - { - "epoch": 2.019381845289294, - "grad_norm": 0.0028395401313900948, - "learning_rate": 0.00019999798916271364, - "loss": 46.0, - "step": 26412 - }, - { - "epoch": 2.019458302272684, - "grad_norm": 0.000814637343864888, - "learning_rate": 0.00019999798901038665, - "loss": 46.0, - "step": 26413 - }, - { - "epoch": 2.0195347592560737, - "grad_norm": 0.0012641982175409794, - "learning_rate": 0.0001999979888580539, - "loss": 46.0, - "step": 26414 - }, - { - "epoch": 2.0196112162394635, - "grad_norm": 0.002331336960196495, - "learning_rate": 0.00019999798870571532, - "loss": 46.0, - "step": 26415 - }, - { - "epoch": 2.019687673222853, - "grad_norm": 0.0005343082593753934, - "learning_rate": 0.00019999798855337104, - "loss": 46.0, - "step": 26416 - }, - { - "epoch": 2.0197641302062426, - "grad_norm": 0.00045682990457862616, - "learning_rate": 0.00019999798840102099, - "loss": 46.0, - "step": 26417 - }, - { - "epoch": 2.0198405871896323, - "grad_norm": 0.0036687185056507587, - "learning_rate": 0.00019999798824866513, - "loss": 46.0, - "step": 26418 - }, - { - "epoch": 2.019917044173022, - "grad_norm": 0.0012728706933557987, - "learning_rate": 0.00019999798809630353, - "loss": 46.0, - "step": 26419 - }, - { - "epoch": 2.019993501156412, - "grad_norm": 0.001992689212784171, - "learning_rate": 0.00019999798794393615, - "loss": 46.0, - "step": 26420 - }, - { - "epoch": 2.0200699581398016, - "grad_norm": 0.0006950357928872108, - "learning_rate": 0.000199997987791563, - "loss": 46.0, - "step": 26421 - }, - { - "epoch": 2.0201464151231914, - "grad_norm": 0.003074574051424861, - "learning_rate": 0.00019999798763918408, - "loss": 46.0, - "step": 26422 - }, - { - "epoch": 2.020222872106581, - "grad_norm": 0.008224640972912312, - "learning_rate": 0.00019999798748679938, - "loss": 46.0, - "step": 26423 - }, - { - "epoch": 2.020299329089971, - "grad_norm": 0.000960544275585562, - "learning_rate": 0.00019999798733440894, - "loss": 46.0, - "step": 26424 - }, - { - "epoch": 2.0203757860733607, - "grad_norm": 0.005144823808223009, - "learning_rate": 0.0001999979871820127, - "loss": 46.0, - "step": 26425 - }, - { - "epoch": 2.0204522430567504, - "grad_norm": 0.0009282471728511155, - "learning_rate": 0.00019999798702961073, - "loss": 46.0, - "step": 26426 - }, - { - "epoch": 2.0205287000401397, - "grad_norm": 0.0011337038595229387, - "learning_rate": 0.00019999798687720297, - "loss": 46.0, - "step": 26427 - }, - { - "epoch": 2.0206051570235295, - "grad_norm": 0.0007164134876802564, - "learning_rate": 0.00019999798672478943, - "loss": 46.0, - "step": 26428 - }, - { - "epoch": 2.0206816140069193, - "grad_norm": 0.000708968669641763, - "learning_rate": 0.00019999798657237012, - "loss": 46.0, - "step": 26429 - }, - { - "epoch": 2.020758070990309, - "grad_norm": 0.0016046749660745263, - "learning_rate": 0.00019999798641994504, - "loss": 46.0, - "step": 26430 - }, - { - "epoch": 2.020834527973699, - "grad_norm": 0.0017448560101911426, - "learning_rate": 0.00019999798626751423, - "loss": 46.0, - "step": 26431 - }, - { - "epoch": 2.0209109849570885, - "grad_norm": 0.00240304390899837, - "learning_rate": 0.0001999979861150776, - "loss": 46.0, - "step": 26432 - }, - { - "epoch": 2.0209874419404783, - "grad_norm": 0.0013527818955481052, - "learning_rate": 0.0001999979859626352, - "loss": 46.0, - "step": 26433 - }, - { - "epoch": 2.021063898923868, - "grad_norm": 0.0011102689895778894, - "learning_rate": 0.00019999798581018708, - "loss": 46.0, - "step": 26434 - }, - { - "epoch": 2.021140355907258, - "grad_norm": 0.0008913030615076423, - "learning_rate": 0.00019999798565773316, - "loss": 46.0, - "step": 26435 - }, - { - "epoch": 2.0212168128906476, - "grad_norm": 0.0023256440181285143, - "learning_rate": 0.00019999798550527346, - "loss": 46.0, - "step": 26436 - }, - { - "epoch": 2.021293269874037, - "grad_norm": 0.0020580971613526344, - "learning_rate": 0.00019999798535280802, - "loss": 46.0, - "step": 26437 - }, - { - "epoch": 2.0213697268574267, - "grad_norm": 0.007729441858828068, - "learning_rate": 0.0001999979852003368, - "loss": 46.0, - "step": 26438 - }, - { - "epoch": 2.0214461838408164, - "grad_norm": 0.003236080752685666, - "learning_rate": 0.0001999979850478598, - "loss": 46.0, - "step": 26439 - }, - { - "epoch": 2.021522640824206, - "grad_norm": 0.0015236795879900455, - "learning_rate": 0.00019999798489537702, - "loss": 46.0, - "step": 26440 - }, - { - "epoch": 2.021599097807596, - "grad_norm": 0.0006803652504459023, - "learning_rate": 0.00019999798474288849, - "loss": 46.0, - "step": 26441 - }, - { - "epoch": 2.0216755547909857, - "grad_norm": 0.004668922629207373, - "learning_rate": 0.00019999798459039418, - "loss": 46.0, - "step": 26442 - }, - { - "epoch": 2.0217520117743755, - "grad_norm": 0.0008509972831234336, - "learning_rate": 0.00019999798443789412, - "loss": 46.0, - "step": 26443 - }, - { - "epoch": 2.0218284687577652, - "grad_norm": 0.0009522740147076547, - "learning_rate": 0.00019999798428538826, - "loss": 46.0, - "step": 26444 - }, - { - "epoch": 2.021904925741155, - "grad_norm": 0.000667439540848136, - "learning_rate": 0.00019999798413287663, - "loss": 46.0, - "step": 26445 - }, - { - "epoch": 2.0219813827245448, - "grad_norm": 0.0010644018184393644, - "learning_rate": 0.00019999798398035926, - "loss": 46.0, - "step": 26446 - }, - { - "epoch": 2.0220578397079345, - "grad_norm": 0.002345553133636713, - "learning_rate": 0.0001999979838278361, - "loss": 46.0, - "step": 26447 - }, - { - "epoch": 2.022134296691324, - "grad_norm": 0.000988130341283977, - "learning_rate": 0.0001999979836753072, - "loss": 46.0, - "step": 26448 - }, - { - "epoch": 2.0222107536747136, - "grad_norm": 0.0022990063298493624, - "learning_rate": 0.00019999798352277252, - "loss": 46.0, - "step": 26449 - }, - { - "epoch": 2.0222872106581034, - "grad_norm": 0.00030240416526794434, - "learning_rate": 0.00019999798337023208, - "loss": 46.0, - "step": 26450 - }, - { - "epoch": 2.022363667641493, - "grad_norm": 0.001114911399781704, - "learning_rate": 0.00019999798321768583, - "loss": 46.0, - "step": 26451 - }, - { - "epoch": 2.022440124624883, - "grad_norm": 0.002209915081039071, - "learning_rate": 0.00019999798306513384, - "loss": 46.0, - "step": 26452 - }, - { - "epoch": 2.0225165816082726, - "grad_norm": 0.0006973057752475142, - "learning_rate": 0.00019999798291257608, - "loss": 46.0, - "step": 26453 - }, - { - "epoch": 2.0225930385916624, - "grad_norm": 0.0023166206665337086, - "learning_rate": 0.00019999798276001255, - "loss": 46.0, - "step": 26454 - }, - { - "epoch": 2.022669495575052, - "grad_norm": 0.0005569932982325554, - "learning_rate": 0.00019999798260744324, - "loss": 46.0, - "step": 26455 - }, - { - "epoch": 2.022745952558442, - "grad_norm": 0.0023603159934282303, - "learning_rate": 0.00019999798245486815, - "loss": 46.0, - "step": 26456 - }, - { - "epoch": 2.0228224095418317, - "grad_norm": 0.0009010782232508063, - "learning_rate": 0.00019999798230228733, - "loss": 46.0, - "step": 26457 - }, - { - "epoch": 2.0228988665252214, - "grad_norm": 0.0006541848997585475, - "learning_rate": 0.00019999798214970072, - "loss": 46.0, - "step": 26458 - }, - { - "epoch": 2.0229753235086108, - "grad_norm": 0.005989361554384232, - "learning_rate": 0.00019999798199710832, - "loss": 46.0, - "step": 26459 - }, - { - "epoch": 2.0230517804920005, - "grad_norm": 0.0009209500858560205, - "learning_rate": 0.00019999798184451017, - "loss": 46.0, - "step": 26460 - }, - { - "epoch": 2.0231282374753903, - "grad_norm": 0.0010172826005145907, - "learning_rate": 0.00019999798169190625, - "loss": 46.0, - "step": 26461 - }, - { - "epoch": 2.02320469445878, - "grad_norm": 0.0018289603758603334, - "learning_rate": 0.00019999798153929655, - "loss": 46.0, - "step": 26462 - }, - { - "epoch": 2.02328115144217, - "grad_norm": 0.0005168657517060637, - "learning_rate": 0.0001999979813866811, - "loss": 46.0, - "step": 26463 - }, - { - "epoch": 2.0233576084255596, - "grad_norm": 0.0009984978241845965, - "learning_rate": 0.00019999798123405987, - "loss": 46.0, - "step": 26464 - }, - { - "epoch": 2.0234340654089493, - "grad_norm": 0.002276997547596693, - "learning_rate": 0.00019999798108143288, - "loss": 46.0, - "step": 26465 - }, - { - "epoch": 2.023510522392339, - "grad_norm": 0.0005757012986578047, - "learning_rate": 0.00019999798092880012, - "loss": 46.0, - "step": 26466 - }, - { - "epoch": 2.023586979375729, - "grad_norm": 0.0009865592001006007, - "learning_rate": 0.00019999798077616156, - "loss": 46.0, - "step": 26467 - }, - { - "epoch": 2.0236634363591186, - "grad_norm": 0.0007731883670203388, - "learning_rate": 0.00019999798062351727, - "loss": 46.0, - "step": 26468 - }, - { - "epoch": 2.0237398933425084, - "grad_norm": 0.0033960232976824045, - "learning_rate": 0.0001999979804708672, - "loss": 46.0, - "step": 26469 - }, - { - "epoch": 2.0238163503258977, - "grad_norm": 0.001232285751029849, - "learning_rate": 0.00019999798031821136, - "loss": 46.0, - "step": 26470 - }, - { - "epoch": 2.0238928073092874, - "grad_norm": 0.0022852083202451468, - "learning_rate": 0.00019999798016554973, - "loss": 46.0, - "step": 26471 - }, - { - "epoch": 2.023969264292677, - "grad_norm": 0.0045961979776620865, - "learning_rate": 0.00019999798001288236, - "loss": 46.0, - "step": 26472 - }, - { - "epoch": 2.024045721276067, - "grad_norm": 0.0006669955910183489, - "learning_rate": 0.00019999797986020918, - "loss": 46.0, - "step": 26473 - }, - { - "epoch": 2.0241221782594567, - "grad_norm": 0.002898002043366432, - "learning_rate": 0.00019999797970753026, - "loss": 46.0, - "step": 26474 - }, - { - "epoch": 2.0241986352428465, - "grad_norm": 0.0019249245524406433, - "learning_rate": 0.0001999979795548456, - "loss": 46.0, - "step": 26475 - }, - { - "epoch": 2.0242750922262363, - "grad_norm": 0.0028629314620047808, - "learning_rate": 0.00019999797940215512, - "loss": 46.0, - "step": 26476 - }, - { - "epoch": 2.024351549209626, - "grad_norm": 0.0016173362964764237, - "learning_rate": 0.00019999797924945888, - "loss": 46.0, - "step": 26477 - }, - { - "epoch": 2.0244280061930158, - "grad_norm": 0.0057194409891963005, - "learning_rate": 0.0001999979790967569, - "loss": 46.0, - "step": 26478 - }, - { - "epoch": 2.0245044631764055, - "grad_norm": 0.0009702262468636036, - "learning_rate": 0.00019999797894404913, - "loss": 46.0, - "step": 26479 - }, - { - "epoch": 2.0245809201597953, - "grad_norm": 0.002342892112210393, - "learning_rate": 0.00019999797879133557, - "loss": 46.0, - "step": 26480 - }, - { - "epoch": 2.0246573771431846, - "grad_norm": 0.0010883017675951123, - "learning_rate": 0.00019999797863861626, - "loss": 46.0, - "step": 26481 - }, - { - "epoch": 2.0247338341265744, - "grad_norm": 0.0021422302816063166, - "learning_rate": 0.00019999797848589118, - "loss": 46.0, - "step": 26482 - }, - { - "epoch": 2.024810291109964, - "grad_norm": 0.0022096848115324974, - "learning_rate": 0.00019999797833316035, - "loss": 46.0, - "step": 26483 - }, - { - "epoch": 2.024886748093354, - "grad_norm": 0.00527407368645072, - "learning_rate": 0.00019999797818042375, - "loss": 46.0, - "step": 26484 - }, - { - "epoch": 2.0249632050767437, - "grad_norm": 0.0013935323804616928, - "learning_rate": 0.00019999797802768137, - "loss": 46.0, - "step": 26485 - }, - { - "epoch": 2.0250396620601334, - "grad_norm": 0.0009756692452356219, - "learning_rate": 0.0001999979778749332, - "loss": 46.0, - "step": 26486 - }, - { - "epoch": 2.025116119043523, - "grad_norm": 0.0007487956318072975, - "learning_rate": 0.00019999797772217928, - "loss": 46.0, - "step": 26487 - }, - { - "epoch": 2.025192576026913, - "grad_norm": 0.0005695989821106195, - "learning_rate": 0.00019999797756941958, - "loss": 46.0, - "step": 26488 - }, - { - "epoch": 2.0252690330103027, - "grad_norm": 0.0005050969775766134, - "learning_rate": 0.00019999797741665414, - "loss": 46.0, - "step": 26489 - }, - { - "epoch": 2.0253454899936925, - "grad_norm": 0.0018592077540233731, - "learning_rate": 0.0001999979772638829, - "loss": 46.0, - "step": 26490 - }, - { - "epoch": 2.025421946977082, - "grad_norm": 0.0006684542750008404, - "learning_rate": 0.00019999797711110588, - "loss": 46.0, - "step": 26491 - }, - { - "epoch": 2.0254984039604715, - "grad_norm": 0.0018544172635301948, - "learning_rate": 0.00019999797695832312, - "loss": 46.0, - "step": 26492 - }, - { - "epoch": 2.0255748609438613, - "grad_norm": 0.01081087812781334, - "learning_rate": 0.0001999979768055346, - "loss": 46.0, - "step": 26493 - }, - { - "epoch": 2.025651317927251, - "grad_norm": 0.0010818091686815023, - "learning_rate": 0.00019999797665274028, - "loss": 46.0, - "step": 26494 - }, - { - "epoch": 2.025727774910641, - "grad_norm": 0.001547107007354498, - "learning_rate": 0.0001999979764999402, - "loss": 46.0, - "step": 26495 - }, - { - "epoch": 2.0258042318940306, - "grad_norm": 0.002483338350430131, - "learning_rate": 0.00019999797634713434, - "loss": 46.0, - "step": 26496 - }, - { - "epoch": 2.0258806888774203, - "grad_norm": 0.0010727066546678543, - "learning_rate": 0.00019999797619432274, - "loss": 46.0, - "step": 26497 - }, - { - "epoch": 2.02595714586081, - "grad_norm": 0.0017662536120042205, - "learning_rate": 0.00019999797604150534, - "loss": 46.0, - "step": 26498 - }, - { - "epoch": 2.0260336028442, - "grad_norm": 0.00038949030567891896, - "learning_rate": 0.0001999979758886822, - "loss": 46.0, - "step": 26499 - }, - { - "epoch": 2.0261100598275896, - "grad_norm": 0.0010763240279629827, - "learning_rate": 0.00019999797573585327, - "loss": 46.0, - "step": 26500 - }, - { - "epoch": 2.0261865168109794, - "grad_norm": 0.0010031070560216904, - "learning_rate": 0.00019999797558301857, - "loss": 46.0, - "step": 26501 - }, - { - "epoch": 2.026262973794369, - "grad_norm": 0.0021999813616275787, - "learning_rate": 0.0001999979754301781, - "loss": 46.0, - "step": 26502 - }, - { - "epoch": 2.0263394307777585, - "grad_norm": 0.0031785566825419664, - "learning_rate": 0.00019999797527733187, - "loss": 46.0, - "step": 26503 - }, - { - "epoch": 2.0264158877611482, - "grad_norm": 0.0025844541378319263, - "learning_rate": 0.00019999797512447988, - "loss": 46.0, - "step": 26504 - }, - { - "epoch": 2.026492344744538, - "grad_norm": 0.005652190186083317, - "learning_rate": 0.0001999979749716221, - "loss": 46.0, - "step": 26505 - }, - { - "epoch": 2.0265688017279277, - "grad_norm": 0.0024238815531134605, - "learning_rate": 0.00019999797481875855, - "loss": 46.0, - "step": 26506 - }, - { - "epoch": 2.0266452587113175, - "grad_norm": 0.0011899323435500264, - "learning_rate": 0.00019999797466588922, - "loss": 46.0, - "step": 26507 - }, - { - "epoch": 2.0267217156947073, - "grad_norm": 0.0012539083836600184, - "learning_rate": 0.00019999797451301417, - "loss": 46.0, - "step": 26508 - }, - { - "epoch": 2.026798172678097, - "grad_norm": 0.0009194174199365079, - "learning_rate": 0.0001999979743601333, - "loss": 46.0, - "step": 26509 - }, - { - "epoch": 2.026874629661487, - "grad_norm": 0.0006174949812702835, - "learning_rate": 0.00019999797420724668, - "loss": 46.0, - "step": 26510 - }, - { - "epoch": 2.0269510866448766, - "grad_norm": 0.0014386842958629131, - "learning_rate": 0.0001999979740543543, - "loss": 46.0, - "step": 26511 - }, - { - "epoch": 2.0270275436282663, - "grad_norm": 0.001159730483777821, - "learning_rate": 0.00019999797390145616, - "loss": 46.0, - "step": 26512 - }, - { - "epoch": 2.027104000611656, - "grad_norm": 0.0007140013622120023, - "learning_rate": 0.0001999979737485522, - "loss": 46.0, - "step": 26513 - }, - { - "epoch": 2.0271804575950454, - "grad_norm": 0.0007913279114291072, - "learning_rate": 0.00019999797359564254, - "loss": 46.0, - "step": 26514 - }, - { - "epoch": 2.027256914578435, - "grad_norm": 0.004414248745888472, - "learning_rate": 0.00019999797344272708, - "loss": 46.0, - "step": 26515 - }, - { - "epoch": 2.027333371561825, - "grad_norm": 0.0015020071296021342, - "learning_rate": 0.00019999797328980586, - "loss": 46.0, - "step": 26516 - }, - { - "epoch": 2.0274098285452147, - "grad_norm": 0.0007777907885611057, - "learning_rate": 0.00019999797313687885, - "loss": 46.0, - "step": 26517 - }, - { - "epoch": 2.0274862855286044, - "grad_norm": 0.0012671292060986161, - "learning_rate": 0.00019999797298394606, - "loss": 46.0, - "step": 26518 - }, - { - "epoch": 2.027562742511994, - "grad_norm": 0.0007127164863049984, - "learning_rate": 0.00019999797283100753, - "loss": 46.0, - "step": 26519 - }, - { - "epoch": 2.027639199495384, - "grad_norm": 0.0019728608895093203, - "learning_rate": 0.0001999979726780632, - "loss": 46.0, - "step": 26520 - }, - { - "epoch": 2.0277156564787737, - "grad_norm": 0.0012132716365158558, - "learning_rate": 0.0001999979725251131, - "loss": 46.0, - "step": 26521 - }, - { - "epoch": 2.0277921134621635, - "grad_norm": 0.0011979705886915326, - "learning_rate": 0.00019999797237215728, - "loss": 46.0, - "step": 26522 - }, - { - "epoch": 2.0278685704455532, - "grad_norm": 0.0006301268003880978, - "learning_rate": 0.00019999797221919566, - "loss": 46.0, - "step": 26523 - }, - { - "epoch": 2.027945027428943, - "grad_norm": 0.0036981524899601936, - "learning_rate": 0.00019999797206622825, - "loss": 46.0, - "step": 26524 - }, - { - "epoch": 2.0280214844123323, - "grad_norm": 0.0004456181777641177, - "learning_rate": 0.0001999979719132551, - "loss": 46.0, - "step": 26525 - }, - { - "epoch": 2.028097941395722, - "grad_norm": 0.0014028712175786495, - "learning_rate": 0.00019999797176027619, - "loss": 46.0, - "step": 26526 - }, - { - "epoch": 2.028174398379112, - "grad_norm": 0.0008825690601952374, - "learning_rate": 0.0001999979716072915, - "loss": 46.0, - "step": 26527 - }, - { - "epoch": 2.0282508553625016, - "grad_norm": 0.0006085058557800949, - "learning_rate": 0.00019999797145430102, - "loss": 46.0, - "step": 26528 - }, - { - "epoch": 2.0283273123458914, - "grad_norm": 0.0014870521845296025, - "learning_rate": 0.00019999797130130478, - "loss": 46.0, - "step": 26529 - }, - { - "epoch": 2.028403769329281, - "grad_norm": 0.0011964990990236402, - "learning_rate": 0.0001999979711483028, - "loss": 46.0, - "step": 26530 - }, - { - "epoch": 2.028480226312671, - "grad_norm": 0.0019460866460576653, - "learning_rate": 0.000199997970995295, - "loss": 46.0, - "step": 26531 - }, - { - "epoch": 2.0285566832960606, - "grad_norm": 0.0005429726443253458, - "learning_rate": 0.00019999797084228145, - "loss": 46.0, - "step": 26532 - }, - { - "epoch": 2.0286331402794504, - "grad_norm": 0.0006899991421960294, - "learning_rate": 0.00019999797068926217, - "loss": 46.0, - "step": 26533 - }, - { - "epoch": 2.02870959726284, - "grad_norm": 0.0005117070395499468, - "learning_rate": 0.00019999797053623706, - "loss": 46.0, - "step": 26534 - }, - { - "epoch": 2.02878605424623, - "grad_norm": 0.0006403716397471726, - "learning_rate": 0.00019999797038320623, - "loss": 46.0, - "step": 26535 - }, - { - "epoch": 2.0288625112296192, - "grad_norm": 0.0005592163070105016, - "learning_rate": 0.00019999797023016958, - "loss": 46.0, - "step": 26536 - }, - { - "epoch": 2.028938968213009, - "grad_norm": 0.001833010814152658, - "learning_rate": 0.0001999979700771272, - "loss": 46.0, - "step": 26537 - }, - { - "epoch": 2.0290154251963988, - "grad_norm": 0.00384948099963367, - "learning_rate": 0.00019999796992407903, - "loss": 46.0, - "step": 26538 - }, - { - "epoch": 2.0290918821797885, - "grad_norm": 0.0007120004738681018, - "learning_rate": 0.0001999979697710251, - "loss": 46.0, - "step": 26539 - }, - { - "epoch": 2.0291683391631783, - "grad_norm": 0.0012704110704362392, - "learning_rate": 0.00019999796961796542, - "loss": 46.0, - "step": 26540 - }, - { - "epoch": 2.029244796146568, - "grad_norm": 0.0012613125145435333, - "learning_rate": 0.00019999796946489995, - "loss": 46.0, - "step": 26541 - }, - { - "epoch": 2.029321253129958, - "grad_norm": 0.00263027916662395, - "learning_rate": 0.0001999979693118287, - "loss": 46.0, - "step": 26542 - }, - { - "epoch": 2.0293977101133476, - "grad_norm": 0.0010949325514957309, - "learning_rate": 0.0001999979691587517, - "loss": 46.0, - "step": 26543 - }, - { - "epoch": 2.0294741670967373, - "grad_norm": 0.0010598624357953668, - "learning_rate": 0.00019999796900566893, - "loss": 46.0, - "step": 26544 - }, - { - "epoch": 2.029550624080127, - "grad_norm": 0.0013497535837814212, - "learning_rate": 0.00019999796885258037, - "loss": 46.0, - "step": 26545 - }, - { - "epoch": 2.029627081063517, - "grad_norm": 0.0016930188285186887, - "learning_rate": 0.00019999796869948607, - "loss": 46.0, - "step": 26546 - }, - { - "epoch": 2.029703538046906, - "grad_norm": 0.0024236850440502167, - "learning_rate": 0.00019999796854638596, - "loss": 46.0, - "step": 26547 - }, - { - "epoch": 2.029779995030296, - "grad_norm": 0.0011239380110055208, - "learning_rate": 0.00019999796839328013, - "loss": 46.0, - "step": 26548 - }, - { - "epoch": 2.0298564520136857, - "grad_norm": 0.000773589825257659, - "learning_rate": 0.00019999796824016853, - "loss": 46.0, - "step": 26549 - }, - { - "epoch": 2.0299329089970755, - "grad_norm": 0.0020657656714320183, - "learning_rate": 0.0001999979680870511, - "loss": 46.0, - "step": 26550 - }, - { - "epoch": 2.030009365980465, - "grad_norm": 0.0006149280816316605, - "learning_rate": 0.00019999796793392796, - "loss": 46.0, - "step": 26551 - }, - { - "epoch": 2.030085822963855, - "grad_norm": 0.0023668883368372917, - "learning_rate": 0.000199997967780799, - "loss": 46.0, - "step": 26552 - }, - { - "epoch": 2.0301622799472447, - "grad_norm": 0.0012319053057581186, - "learning_rate": 0.00019999796762766432, - "loss": 46.0, - "step": 26553 - }, - { - "epoch": 2.0302387369306345, - "grad_norm": 0.0009415640961378813, - "learning_rate": 0.00019999796747452385, - "loss": 46.0, - "step": 26554 - }, - { - "epoch": 2.0303151939140243, - "grad_norm": 0.001029097125865519, - "learning_rate": 0.0001999979673213776, - "loss": 46.0, - "step": 26555 - }, - { - "epoch": 2.030391650897414, - "grad_norm": 0.0007834908901713789, - "learning_rate": 0.0001999979671682256, - "loss": 46.0, - "step": 26556 - }, - { - "epoch": 2.030468107880804, - "grad_norm": 0.0011065511498600245, - "learning_rate": 0.00019999796701506784, - "loss": 46.0, - "step": 26557 - }, - { - "epoch": 2.030544564864193, - "grad_norm": 0.0022991320583969355, - "learning_rate": 0.00019999796686190428, - "loss": 46.0, - "step": 26558 - }, - { - "epoch": 2.030621021847583, - "grad_norm": 0.0019270549528300762, - "learning_rate": 0.00019999796670873497, - "loss": 46.0, - "step": 26559 - }, - { - "epoch": 2.0306974788309726, - "grad_norm": 0.0003749753232114017, - "learning_rate": 0.0001999979665555599, - "loss": 46.0, - "step": 26560 - }, - { - "epoch": 2.0307739358143624, - "grad_norm": 0.0007511851144954562, - "learning_rate": 0.00019999796640237904, - "loss": 46.0, - "step": 26561 - }, - { - "epoch": 2.030850392797752, - "grad_norm": 0.0013289545895531774, - "learning_rate": 0.0001999979662491924, - "loss": 46.0, - "step": 26562 - }, - { - "epoch": 2.030926849781142, - "grad_norm": 0.00038059064536355436, - "learning_rate": 0.000199997966096, - "loss": 46.0, - "step": 26563 - }, - { - "epoch": 2.0310033067645317, - "grad_norm": 0.0013287510955706239, - "learning_rate": 0.00019999796594280184, - "loss": 46.0, - "step": 26564 - }, - { - "epoch": 2.0310797637479214, - "grad_norm": 0.000757434987463057, - "learning_rate": 0.00019999796578959794, - "loss": 46.0, - "step": 26565 - }, - { - "epoch": 2.031156220731311, - "grad_norm": 0.0007932082517072558, - "learning_rate": 0.00019999796563638822, - "loss": 46.0, - "step": 26566 - }, - { - "epoch": 2.031232677714701, - "grad_norm": 0.0010606384603306651, - "learning_rate": 0.00019999796548317273, - "loss": 46.0, - "step": 26567 - }, - { - "epoch": 2.0313091346980903, - "grad_norm": 0.0013175703352317214, - "learning_rate": 0.00019999796532995152, - "loss": 46.0, - "step": 26568 - }, - { - "epoch": 2.03138559168148, - "grad_norm": 0.001010414445772767, - "learning_rate": 0.00019999796517672448, - "loss": 46.0, - "step": 26569 - }, - { - "epoch": 2.03146204866487, - "grad_norm": 0.0046905288472771645, - "learning_rate": 0.00019999796502349172, - "loss": 46.0, - "step": 26570 - }, - { - "epoch": 2.0315385056482596, - "grad_norm": 0.0006144028156995773, - "learning_rate": 0.00019999796487025318, - "loss": 46.0, - "step": 26571 - }, - { - "epoch": 2.0316149626316493, - "grad_norm": 0.0019732704386115074, - "learning_rate": 0.00019999796471700885, - "loss": 46.0, - "step": 26572 - }, - { - "epoch": 2.031691419615039, - "grad_norm": 0.001210829708725214, - "learning_rate": 0.00019999796456375877, - "loss": 46.0, - "step": 26573 - }, - { - "epoch": 2.031767876598429, - "grad_norm": 0.0018193984869867563, - "learning_rate": 0.00019999796441050292, - "loss": 46.0, - "step": 26574 - }, - { - "epoch": 2.0318443335818186, - "grad_norm": 0.0006971064722165465, - "learning_rate": 0.0001999979642572413, - "loss": 46.0, - "step": 26575 - }, - { - "epoch": 2.0319207905652084, - "grad_norm": 0.00032437010668218136, - "learning_rate": 0.0001999979641039739, - "loss": 46.0, - "step": 26576 - }, - { - "epoch": 2.031997247548598, - "grad_norm": 0.001501981751061976, - "learning_rate": 0.00019999796395070072, - "loss": 46.0, - "step": 26577 - }, - { - "epoch": 2.032073704531988, - "grad_norm": 0.00476000877097249, - "learning_rate": 0.0001999979637974218, - "loss": 46.0, - "step": 26578 - }, - { - "epoch": 2.032150161515377, - "grad_norm": 0.001412849291227758, - "learning_rate": 0.0001999979636441371, - "loss": 46.0, - "step": 26579 - }, - { - "epoch": 2.032226618498767, - "grad_norm": 0.0006279490771703422, - "learning_rate": 0.0001999979634908466, - "loss": 46.0, - "step": 26580 - }, - { - "epoch": 2.0323030754821567, - "grad_norm": 0.00045324102393351495, - "learning_rate": 0.0001999979633375504, - "loss": 46.0, - "step": 26581 - }, - { - "epoch": 2.0323795324655465, - "grad_norm": 0.0009774421341717243, - "learning_rate": 0.00019999796318424836, - "loss": 46.0, - "step": 26582 - }, - { - "epoch": 2.0324559894489362, - "grad_norm": 0.00042095532990060747, - "learning_rate": 0.00019999796303094057, - "loss": 46.0, - "step": 26583 - }, - { - "epoch": 2.032532446432326, - "grad_norm": 0.0012581833871081471, - "learning_rate": 0.00019999796287762704, - "loss": 46.0, - "step": 26584 - }, - { - "epoch": 2.0326089034157158, - "grad_norm": 0.0007525114924646914, - "learning_rate": 0.00019999796272430773, - "loss": 46.0, - "step": 26585 - }, - { - "epoch": 2.0326853603991055, - "grad_norm": 0.0008561808499507606, - "learning_rate": 0.00019999796257098265, - "loss": 46.0, - "step": 26586 - }, - { - "epoch": 2.0327618173824953, - "grad_norm": 0.0007320060394704342, - "learning_rate": 0.00019999796241765177, - "loss": 46.0, - "step": 26587 - }, - { - "epoch": 2.032838274365885, - "grad_norm": 0.0029086056165397167, - "learning_rate": 0.00019999796226431515, - "loss": 46.0, - "step": 26588 - }, - { - "epoch": 2.032914731349275, - "grad_norm": 0.002113952534273267, - "learning_rate": 0.00019999796211097275, - "loss": 46.0, - "step": 26589 - }, - { - "epoch": 2.032991188332664, - "grad_norm": 0.000662376347463578, - "learning_rate": 0.0001999979619576246, - "loss": 46.0, - "step": 26590 - }, - { - "epoch": 2.033067645316054, - "grad_norm": 0.00044667781912721694, - "learning_rate": 0.00019999796180427066, - "loss": 46.0, - "step": 26591 - }, - { - "epoch": 2.0331441022994436, - "grad_norm": 0.0007176919025368989, - "learning_rate": 0.00019999796165091094, - "loss": 46.0, - "step": 26592 - }, - { - "epoch": 2.0332205592828334, - "grad_norm": 0.00146351323928684, - "learning_rate": 0.00019999796149754547, - "loss": 46.0, - "step": 26593 - }, - { - "epoch": 2.033297016266223, - "grad_norm": 0.0025684009306132793, - "learning_rate": 0.00019999796134417423, - "loss": 46.0, - "step": 26594 - }, - { - "epoch": 2.033373473249613, - "grad_norm": 0.00047959969379007816, - "learning_rate": 0.00019999796119079722, - "loss": 46.0, - "step": 26595 - }, - { - "epoch": 2.0334499302330027, - "grad_norm": 0.0003634125750977546, - "learning_rate": 0.00019999796103741443, - "loss": 46.0, - "step": 26596 - }, - { - "epoch": 2.0335263872163925, - "grad_norm": 0.0009037366835400462, - "learning_rate": 0.0001999979608840259, - "loss": 46.0, - "step": 26597 - }, - { - "epoch": 2.033602844199782, - "grad_norm": 0.0010114137548953295, - "learning_rate": 0.00019999796073063157, - "loss": 46.0, - "step": 26598 - }, - { - "epoch": 2.033679301183172, - "grad_norm": 0.001232185517437756, - "learning_rate": 0.0001999979605772315, - "loss": 46.0, - "step": 26599 - }, - { - "epoch": 2.0337557581665617, - "grad_norm": 0.008454413153231144, - "learning_rate": 0.0001999979604238256, - "loss": 46.0, - "step": 26600 - }, - { - "epoch": 2.033832215149951, - "grad_norm": 0.0003661074733827263, - "learning_rate": 0.000199997960270414, - "loss": 46.0, - "step": 26601 - }, - { - "epoch": 2.033908672133341, - "grad_norm": 0.0030759908258914948, - "learning_rate": 0.0001999979601169966, - "loss": 46.0, - "step": 26602 - }, - { - "epoch": 2.0339851291167306, - "grad_norm": 0.0008564048912376165, - "learning_rate": 0.0001999979599635734, - "loss": 46.0, - "step": 26603 - }, - { - "epoch": 2.0340615861001203, - "grad_norm": 0.009459891356527805, - "learning_rate": 0.00019999795981014452, - "loss": 46.0, - "step": 26604 - }, - { - "epoch": 2.03413804308351, - "grad_norm": 0.001192210242152214, - "learning_rate": 0.00019999795965670977, - "loss": 46.0, - "step": 26605 - }, - { - "epoch": 2.0342145000669, - "grad_norm": 0.001824631355702877, - "learning_rate": 0.0001999979595032693, - "loss": 46.0, - "step": 26606 - }, - { - "epoch": 2.0342909570502896, - "grad_norm": 0.0004505351826082915, - "learning_rate": 0.0001999979593498231, - "loss": 46.0, - "step": 26607 - }, - { - "epoch": 2.0343674140336794, - "grad_norm": 0.0012984368950128555, - "learning_rate": 0.00019999795919637106, - "loss": 46.0, - "step": 26608 - }, - { - "epoch": 2.034443871017069, - "grad_norm": 0.0022749153431504965, - "learning_rate": 0.00019999795904291327, - "loss": 46.0, - "step": 26609 - }, - { - "epoch": 2.034520328000459, - "grad_norm": 0.0014079343527555466, - "learning_rate": 0.0001999979588894497, - "loss": 46.0, - "step": 26610 - }, - { - "epoch": 2.0345967849838487, - "grad_norm": 0.00256090285256505, - "learning_rate": 0.0001999979587359804, - "loss": 46.0, - "step": 26611 - }, - { - "epoch": 2.034673241967238, - "grad_norm": 0.00040165524114854634, - "learning_rate": 0.00019999795858250533, - "loss": 46.0, - "step": 26612 - }, - { - "epoch": 2.0347496989506277, - "grad_norm": 0.002713286317884922, - "learning_rate": 0.00019999795842902445, - "loss": 46.0, - "step": 26613 - }, - { - "epoch": 2.0348261559340175, - "grad_norm": 0.0014820380602031946, - "learning_rate": 0.00019999795827553782, - "loss": 46.0, - "step": 26614 - }, - { - "epoch": 2.0349026129174073, - "grad_norm": 0.00043587872642092407, - "learning_rate": 0.00019999795812204542, - "loss": 46.0, - "step": 26615 - }, - { - "epoch": 2.034979069900797, - "grad_norm": 0.0004651909985113889, - "learning_rate": 0.00019999795796854725, - "loss": 46.0, - "step": 26616 - }, - { - "epoch": 2.035055526884187, - "grad_norm": 0.002295827027410269, - "learning_rate": 0.0001999979578150433, - "loss": 46.0, - "step": 26617 - }, - { - "epoch": 2.0351319838675765, - "grad_norm": 0.0010976034682244062, - "learning_rate": 0.00019999795766153361, - "loss": 46.0, - "step": 26618 - }, - { - "epoch": 2.0352084408509663, - "grad_norm": 0.002214741660282016, - "learning_rate": 0.00019999795750801815, - "loss": 46.0, - "step": 26619 - }, - { - "epoch": 2.035284897834356, - "grad_norm": 0.0007417284068651497, - "learning_rate": 0.00019999795735449688, - "loss": 46.0, - "step": 26620 - }, - { - "epoch": 2.035361354817746, - "grad_norm": 0.0004341478052083403, - "learning_rate": 0.00019999795720096987, - "loss": 46.0, - "step": 26621 - }, - { - "epoch": 2.0354378118011356, - "grad_norm": 0.0005533367511816323, - "learning_rate": 0.00019999795704743708, - "loss": 46.0, - "step": 26622 - }, - { - "epoch": 2.035514268784525, - "grad_norm": 0.0007011029520072043, - "learning_rate": 0.00019999795689389855, - "loss": 46.0, - "step": 26623 - }, - { - "epoch": 2.0355907257679147, - "grad_norm": 0.0004201651900075376, - "learning_rate": 0.00019999795674035422, - "loss": 46.0, - "step": 26624 - }, - { - "epoch": 2.0356671827513044, - "grad_norm": 0.0004260960849933326, - "learning_rate": 0.00019999795658680412, - "loss": 46.0, - "step": 26625 - }, - { - "epoch": 2.035743639734694, - "grad_norm": 0.0026603899896144867, - "learning_rate": 0.0001999979564332483, - "loss": 46.0, - "step": 26626 - }, - { - "epoch": 2.035820096718084, - "grad_norm": 0.0056581273674964905, - "learning_rate": 0.00019999795627968664, - "loss": 46.0, - "step": 26627 - }, - { - "epoch": 2.0358965537014737, - "grad_norm": 0.0002495713997632265, - "learning_rate": 0.00019999795612611924, - "loss": 46.0, - "step": 26628 - }, - { - "epoch": 2.0359730106848635, - "grad_norm": 0.00373982940800488, - "learning_rate": 0.00019999795597254607, - "loss": 46.0, - "step": 26629 - }, - { - "epoch": 2.0360494676682532, - "grad_norm": 0.0006167826941236854, - "learning_rate": 0.00019999795581896712, - "loss": 46.0, - "step": 26630 - }, - { - "epoch": 2.036125924651643, - "grad_norm": 0.0005011427565477788, - "learning_rate": 0.00019999795566538243, - "loss": 46.0, - "step": 26631 - }, - { - "epoch": 2.0362023816350328, - "grad_norm": 0.0005400745430961251, - "learning_rate": 0.00019999795551179194, - "loss": 46.0, - "step": 26632 - }, - { - "epoch": 2.0362788386184225, - "grad_norm": 0.0008155073737725616, - "learning_rate": 0.0001999979553581957, - "loss": 46.0, - "step": 26633 - }, - { - "epoch": 2.036355295601812, - "grad_norm": 0.000484047137433663, - "learning_rate": 0.0001999979552045937, - "loss": 46.0, - "step": 26634 - }, - { - "epoch": 2.0364317525852016, - "grad_norm": 0.0004688260378316045, - "learning_rate": 0.00019999795505098588, - "loss": 46.0, - "step": 26635 - }, - { - "epoch": 2.0365082095685914, - "grad_norm": 0.0009017328848131001, - "learning_rate": 0.00019999795489737234, - "loss": 46.0, - "step": 26636 - }, - { - "epoch": 2.036584666551981, - "grad_norm": 0.001145393936894834, - "learning_rate": 0.000199997954743753, - "loss": 46.0, - "step": 26637 - }, - { - "epoch": 2.036661123535371, - "grad_norm": 0.0014703063061460853, - "learning_rate": 0.0001999979545901279, - "loss": 46.0, - "step": 26638 - }, - { - "epoch": 2.0367375805187606, - "grad_norm": 0.0009786536684259772, - "learning_rate": 0.00019999795443649708, - "loss": 46.0, - "step": 26639 - }, - { - "epoch": 2.0368140375021504, - "grad_norm": 0.0030558928847312927, - "learning_rate": 0.0001999979542828604, - "loss": 46.0, - "step": 26640 - }, - { - "epoch": 2.03689049448554, - "grad_norm": 0.0013135335175320506, - "learning_rate": 0.00019999795412921803, - "loss": 46.0, - "step": 26641 - }, - { - "epoch": 2.03696695146893, - "grad_norm": 0.0017956553492695093, - "learning_rate": 0.00019999795397556986, - "loss": 46.0, - "step": 26642 - }, - { - "epoch": 2.0370434084523197, - "grad_norm": 0.0018976174760609865, - "learning_rate": 0.00019999795382191592, - "loss": 46.0, - "step": 26643 - }, - { - "epoch": 2.0371198654357094, - "grad_norm": 0.0019661334808915854, - "learning_rate": 0.0001999979536682562, - "loss": 46.0, - "step": 26644 - }, - { - "epoch": 2.0371963224190988, - "grad_norm": 0.0005052143824286759, - "learning_rate": 0.00019999795351459073, - "loss": 46.0, - "step": 26645 - }, - { - "epoch": 2.0372727794024885, - "grad_norm": 0.00033888386678881943, - "learning_rate": 0.0001999979533609195, - "loss": 46.0, - "step": 26646 - }, - { - "epoch": 2.0373492363858783, - "grad_norm": 0.006462957710027695, - "learning_rate": 0.00019999795320724246, - "loss": 46.0, - "step": 26647 - }, - { - "epoch": 2.037425693369268, - "grad_norm": 0.001122289919294417, - "learning_rate": 0.00019999795305355967, - "loss": 46.0, - "step": 26648 - }, - { - "epoch": 2.037502150352658, - "grad_norm": 0.0006262047681957483, - "learning_rate": 0.00019999795289987114, - "loss": 46.0, - "step": 26649 - }, - { - "epoch": 2.0375786073360476, - "grad_norm": 0.005542945582419634, - "learning_rate": 0.00019999795274617678, - "loss": 46.0, - "step": 26650 - }, - { - "epoch": 2.0376550643194373, - "grad_norm": 0.005652573890984058, - "learning_rate": 0.0001999979525924767, - "loss": 46.0, - "step": 26651 - }, - { - "epoch": 2.037731521302827, - "grad_norm": 0.004041920881718397, - "learning_rate": 0.00019999795243877083, - "loss": 46.0, - "step": 26652 - }, - { - "epoch": 2.037807978286217, - "grad_norm": 0.0023524577263742685, - "learning_rate": 0.0001999979522850592, - "loss": 46.0, - "step": 26653 - }, - { - "epoch": 2.0378844352696066, - "grad_norm": 0.004859734792262316, - "learning_rate": 0.0001999979521313418, - "loss": 46.0, - "step": 26654 - }, - { - "epoch": 2.0379608922529964, - "grad_norm": 0.0008747361716814339, - "learning_rate": 0.00019999795197761863, - "loss": 46.0, - "step": 26655 - }, - { - "epoch": 2.0380373492363857, - "grad_norm": 0.0009978747693821788, - "learning_rate": 0.0001999979518238897, - "loss": 46.0, - "step": 26656 - }, - { - "epoch": 2.0381138062197754, - "grad_norm": 0.0006871107034385204, - "learning_rate": 0.00019999795167015497, - "loss": 46.0, - "step": 26657 - }, - { - "epoch": 2.038190263203165, - "grad_norm": 0.0004938003839924932, - "learning_rate": 0.0001999979515164145, - "loss": 46.0, - "step": 26658 - }, - { - "epoch": 2.038266720186555, - "grad_norm": 0.0009424231830053031, - "learning_rate": 0.00019999795136266824, - "loss": 46.0, - "step": 26659 - }, - { - "epoch": 2.0383431771699447, - "grad_norm": 0.0007297934498637915, - "learning_rate": 0.00019999795120891623, - "loss": 46.0, - "step": 26660 - }, - { - "epoch": 2.0384196341533345, - "grad_norm": 0.0016007061349228024, - "learning_rate": 0.00019999795105515844, - "loss": 46.0, - "step": 26661 - }, - { - "epoch": 2.0384960911367243, - "grad_norm": 0.0010401232866570354, - "learning_rate": 0.0001999979509013949, - "loss": 46.0, - "step": 26662 - }, - { - "epoch": 2.038572548120114, - "grad_norm": 0.0026519561652094126, - "learning_rate": 0.00019999795074762556, - "loss": 46.0, - "step": 26663 - }, - { - "epoch": 2.0386490051035038, - "grad_norm": 0.001126496703363955, - "learning_rate": 0.00019999795059385048, - "loss": 46.0, - "step": 26664 - }, - { - "epoch": 2.0387254620868935, - "grad_norm": 0.001208712114021182, - "learning_rate": 0.0001999979504400696, - "loss": 46.0, - "step": 26665 - }, - { - "epoch": 2.0388019190702833, - "grad_norm": 0.0011292635463178158, - "learning_rate": 0.00019999795028628295, - "loss": 46.0, - "step": 26666 - }, - { - "epoch": 2.0388783760536726, - "grad_norm": 0.0005419857916422188, - "learning_rate": 0.00019999795013249055, - "loss": 46.0, - "step": 26667 - }, - { - "epoch": 2.0389548330370624, - "grad_norm": 0.0007096214685589075, - "learning_rate": 0.00019999794997869238, - "loss": 46.0, - "step": 26668 - }, - { - "epoch": 2.039031290020452, - "grad_norm": 0.00209606159478426, - "learning_rate": 0.0001999979498248884, - "loss": 46.0, - "step": 26669 - }, - { - "epoch": 2.039107747003842, - "grad_norm": 0.0011689257808029652, - "learning_rate": 0.00019999794967107872, - "loss": 46.0, - "step": 26670 - }, - { - "epoch": 2.0391842039872317, - "grad_norm": 0.0006484262412413955, - "learning_rate": 0.00019999794951726325, - "loss": 46.0, - "step": 26671 - }, - { - "epoch": 2.0392606609706214, - "grad_norm": 0.0007338208379223943, - "learning_rate": 0.000199997949363442, - "loss": 46.0, - "step": 26672 - }, - { - "epoch": 2.039337117954011, - "grad_norm": 0.004225425887852907, - "learning_rate": 0.00019999794920961498, - "loss": 46.0, - "step": 26673 - }, - { - "epoch": 2.039413574937401, - "grad_norm": 0.0009118360467255116, - "learning_rate": 0.00019999794905578217, - "loss": 46.0, - "step": 26674 - }, - { - "epoch": 2.0394900319207907, - "grad_norm": 0.0022862653713673353, - "learning_rate": 0.00019999794890194364, - "loss": 46.0, - "step": 26675 - }, - { - "epoch": 2.0395664889041805, - "grad_norm": 0.000911420735064894, - "learning_rate": 0.0001999979487480993, - "loss": 46.0, - "step": 26676 - }, - { - "epoch": 2.03964294588757, - "grad_norm": 0.0006587571115233004, - "learning_rate": 0.00019999794859424917, - "loss": 46.0, - "step": 26677 - }, - { - "epoch": 2.0397194028709595, - "grad_norm": 0.0024143436457961798, - "learning_rate": 0.00019999794844039332, - "loss": 46.0, - "step": 26678 - }, - { - "epoch": 2.0397958598543493, - "grad_norm": 0.0011215313570573926, - "learning_rate": 0.0001999979482865317, - "loss": 46.0, - "step": 26679 - }, - { - "epoch": 2.039872316837739, - "grad_norm": 0.002950820606201887, - "learning_rate": 0.00019999794813266428, - "loss": 46.0, - "step": 26680 - }, - { - "epoch": 2.039948773821129, - "grad_norm": 0.0012147912057116628, - "learning_rate": 0.0001999979479787911, - "loss": 46.0, - "step": 26681 - }, - { - "epoch": 2.0400252308045186, - "grad_norm": 0.0009096599533222616, - "learning_rate": 0.00019999794782491216, - "loss": 46.0, - "step": 26682 - }, - { - "epoch": 2.0401016877879083, - "grad_norm": 0.004348806571215391, - "learning_rate": 0.00019999794767102747, - "loss": 46.0, - "step": 26683 - }, - { - "epoch": 2.040178144771298, - "grad_norm": 0.0007725459872744977, - "learning_rate": 0.00019999794751713695, - "loss": 46.0, - "step": 26684 - }, - { - "epoch": 2.040254601754688, - "grad_norm": 0.0008875320781953633, - "learning_rate": 0.00019999794736324074, - "loss": 46.0, - "step": 26685 - }, - { - "epoch": 2.0403310587380776, - "grad_norm": 0.0004521760274656117, - "learning_rate": 0.00019999794720933868, - "loss": 46.0, - "step": 26686 - }, - { - "epoch": 2.0404075157214674, - "grad_norm": 0.0004650182672776282, - "learning_rate": 0.00019999794705543092, - "loss": 46.0, - "step": 26687 - }, - { - "epoch": 2.040483972704857, - "grad_norm": 0.003060281975194812, - "learning_rate": 0.00019999794690151736, - "loss": 46.0, - "step": 26688 - }, - { - "epoch": 2.0405604296882465, - "grad_norm": 0.0014394642785191536, - "learning_rate": 0.00019999794674759803, - "loss": 46.0, - "step": 26689 - }, - { - "epoch": 2.0406368866716362, - "grad_norm": 0.0010106581030413508, - "learning_rate": 0.00019999794659367293, - "loss": 46.0, - "step": 26690 - }, - { - "epoch": 2.040713343655026, - "grad_norm": 0.0006022258894518018, - "learning_rate": 0.00019999794643974205, - "loss": 46.0, - "step": 26691 - }, - { - "epoch": 2.0407898006384158, - "grad_norm": 0.002769224112853408, - "learning_rate": 0.00019999794628580543, - "loss": 46.0, - "step": 26692 - }, - { - "epoch": 2.0408662576218055, - "grad_norm": 0.0011104485020041466, - "learning_rate": 0.00019999794613186303, - "loss": 46.0, - "step": 26693 - }, - { - "epoch": 2.0409427146051953, - "grad_norm": 0.0010501760989427567, - "learning_rate": 0.00019999794597791483, - "loss": 46.0, - "step": 26694 - }, - { - "epoch": 2.041019171588585, - "grad_norm": 0.0013500908389687538, - "learning_rate": 0.00019999794582396086, - "loss": 46.0, - "step": 26695 - }, - { - "epoch": 2.041095628571975, - "grad_norm": 0.013040841557085514, - "learning_rate": 0.00019999794567000117, - "loss": 46.0, - "step": 26696 - }, - { - "epoch": 2.0411720855553646, - "grad_norm": 0.003713401732966304, - "learning_rate": 0.00019999794551603568, - "loss": 46.0, - "step": 26697 - }, - { - "epoch": 2.0412485425387543, - "grad_norm": 0.0008403608808293939, - "learning_rate": 0.00019999794536206444, - "loss": 46.0, - "step": 26698 - }, - { - "epoch": 2.0413249995221436, - "grad_norm": 0.0003959627647418529, - "learning_rate": 0.0001999979452080874, - "loss": 46.0, - "step": 26699 - }, - { - "epoch": 2.0414014565055334, - "grad_norm": 0.0018761565443128347, - "learning_rate": 0.0001999979450541046, - "loss": 46.0, - "step": 26700 - }, - { - "epoch": 2.041477913488923, - "grad_norm": 0.0012406965252012014, - "learning_rate": 0.00019999794490011604, - "loss": 46.0, - "step": 26701 - }, - { - "epoch": 2.041554370472313, - "grad_norm": 0.0005860960809513927, - "learning_rate": 0.00019999794474612174, - "loss": 46.0, - "step": 26702 - }, - { - "epoch": 2.0416308274557027, - "grad_norm": 0.0024115475825965405, - "learning_rate": 0.00019999794459212163, - "loss": 46.0, - "step": 26703 - }, - { - "epoch": 2.0417072844390924, - "grad_norm": 0.0008953747455962002, - "learning_rate": 0.00019999794443811576, - "loss": 46.0, - "step": 26704 - }, - { - "epoch": 2.041783741422482, - "grad_norm": 0.0035076530184596777, - "learning_rate": 0.00019999794428410413, - "loss": 46.0, - "step": 26705 - }, - { - "epoch": 2.041860198405872, - "grad_norm": 0.0011120779672637582, - "learning_rate": 0.00019999794413008674, - "loss": 46.0, - "step": 26706 - }, - { - "epoch": 2.0419366553892617, - "grad_norm": 0.009908235631883144, - "learning_rate": 0.00019999794397606354, - "loss": 46.0, - "step": 26707 - }, - { - "epoch": 2.0420131123726515, - "grad_norm": 0.004595461301505566, - "learning_rate": 0.0001999979438220346, - "loss": 46.0, - "step": 26708 - }, - { - "epoch": 2.0420895693560412, - "grad_norm": 0.005109483376145363, - "learning_rate": 0.00019999794366799988, - "loss": 46.0, - "step": 26709 - }, - { - "epoch": 2.0421660263394306, - "grad_norm": 0.0023741349577903748, - "learning_rate": 0.0001999979435139594, - "loss": 46.0, - "step": 26710 - }, - { - "epoch": 2.0422424833228203, - "grad_norm": 0.0020227041095495224, - "learning_rate": 0.00019999794335991312, - "loss": 46.0, - "step": 26711 - }, - { - "epoch": 2.04231894030621, - "grad_norm": 0.0009598316974006593, - "learning_rate": 0.0001999979432058611, - "loss": 46.0, - "step": 26712 - }, - { - "epoch": 2.0423953972896, - "grad_norm": 0.0011091948254033923, - "learning_rate": 0.0001999979430518033, - "loss": 46.0, - "step": 26713 - }, - { - "epoch": 2.0424718542729896, - "grad_norm": 0.0007525385008193552, - "learning_rate": 0.00019999794289773975, - "loss": 46.0, - "step": 26714 - }, - { - "epoch": 2.0425483112563794, - "grad_norm": 0.0006028954521752894, - "learning_rate": 0.00019999794274367042, - "loss": 46.0, - "step": 26715 - }, - { - "epoch": 2.042624768239769, - "grad_norm": 0.0011807328555732965, - "learning_rate": 0.0001999979425895953, - "loss": 46.0, - "step": 26716 - }, - { - "epoch": 2.042701225223159, - "grad_norm": 0.0008310778066515923, - "learning_rate": 0.00019999794243551444, - "loss": 46.0, - "step": 26717 - }, - { - "epoch": 2.0427776822065487, - "grad_norm": 0.000946046260651201, - "learning_rate": 0.00019999794228142781, - "loss": 46.0, - "step": 26718 - }, - { - "epoch": 2.0428541391899384, - "grad_norm": 0.0010109165450558066, - "learning_rate": 0.0001999979421273354, - "loss": 46.0, - "step": 26719 - }, - { - "epoch": 2.042930596173328, - "grad_norm": 0.001046556979417801, - "learning_rate": 0.00019999794197323722, - "loss": 46.0, - "step": 26720 - }, - { - "epoch": 2.0430070531567175, - "grad_norm": 0.002121644327417016, - "learning_rate": 0.00019999794181913328, - "loss": 46.0, - "step": 26721 - }, - { - "epoch": 2.0430835101401073, - "grad_norm": 0.0036012011114507914, - "learning_rate": 0.00019999794166502353, - "loss": 46.0, - "step": 26722 - }, - { - "epoch": 2.043159967123497, - "grad_norm": 0.0028198163490742445, - "learning_rate": 0.00019999794151090807, - "loss": 46.0, - "step": 26723 - }, - { - "epoch": 2.0432364241068868, - "grad_norm": 0.0006680210353806615, - "learning_rate": 0.00019999794135678684, - "loss": 46.0, - "step": 26724 - }, - { - "epoch": 2.0433128810902765, - "grad_norm": 0.0026276058051735163, - "learning_rate": 0.00019999794120265977, - "loss": 46.0, - "step": 26725 - }, - { - "epoch": 2.0433893380736663, - "grad_norm": 0.0033334163017570972, - "learning_rate": 0.000199997941048527, - "loss": 46.0, - "step": 26726 - }, - { - "epoch": 2.043465795057056, - "grad_norm": 0.00047325206105597317, - "learning_rate": 0.00019999794089438843, - "loss": 46.0, - "step": 26727 - }, - { - "epoch": 2.043542252040446, - "grad_norm": 0.0005689062527380884, - "learning_rate": 0.0001999979407402441, - "loss": 46.0, - "step": 26728 - }, - { - "epoch": 2.0436187090238356, - "grad_norm": 0.0008415696211159229, - "learning_rate": 0.00019999794058609397, - "loss": 46.0, - "step": 26729 - }, - { - "epoch": 2.0436951660072253, - "grad_norm": 0.0009475558181293309, - "learning_rate": 0.00019999794043193812, - "loss": 46.0, - "step": 26730 - }, - { - "epoch": 2.043771622990615, - "grad_norm": 0.000825090566650033, - "learning_rate": 0.00019999794027777647, - "loss": 46.0, - "step": 26731 - }, - { - "epoch": 2.0438480799740044, - "grad_norm": 0.005847132299095392, - "learning_rate": 0.00019999794012360905, - "loss": 46.0, - "step": 26732 - }, - { - "epoch": 2.043924536957394, - "grad_norm": 0.004025956150144339, - "learning_rate": 0.00019999793996943588, - "loss": 46.0, - "step": 26733 - }, - { - "epoch": 2.044000993940784, - "grad_norm": 0.000576301827095449, - "learning_rate": 0.00019999793981525694, - "loss": 46.0, - "step": 26734 - }, - { - "epoch": 2.0440774509241737, - "grad_norm": 0.0006756465299986303, - "learning_rate": 0.00019999793966107222, - "loss": 46.0, - "step": 26735 - }, - { - "epoch": 2.0441539079075635, - "grad_norm": 0.0028010420501232147, - "learning_rate": 0.00019999793950688173, - "loss": 46.0, - "step": 26736 - }, - { - "epoch": 2.0442303648909532, - "grad_norm": 0.001140445121563971, - "learning_rate": 0.00019999793935268547, - "loss": 46.0, - "step": 26737 - }, - { - "epoch": 2.044306821874343, - "grad_norm": 0.0012476647971197963, - "learning_rate": 0.00019999793919848343, - "loss": 46.0, - "step": 26738 - }, - { - "epoch": 2.0443832788577327, - "grad_norm": 0.005917396396398544, - "learning_rate": 0.00019999793904427565, - "loss": 46.0, - "step": 26739 - }, - { - "epoch": 2.0444597358411225, - "grad_norm": 0.0010810368694365025, - "learning_rate": 0.0001999979388900621, - "loss": 46.0, - "step": 26740 - }, - { - "epoch": 2.0445361928245123, - "grad_norm": 0.0007767686038278043, - "learning_rate": 0.00019999793873584274, - "loss": 46.0, - "step": 26741 - }, - { - "epoch": 2.044612649807902, - "grad_norm": 0.0006513047264888883, - "learning_rate": 0.00019999793858161764, - "loss": 46.0, - "step": 26742 - }, - { - "epoch": 2.0446891067912913, - "grad_norm": 0.00045319818309508264, - "learning_rate": 0.00019999793842738676, - "loss": 46.0, - "step": 26743 - }, - { - "epoch": 2.044765563774681, - "grad_norm": 0.004594389349222183, - "learning_rate": 0.0001999979382731501, - "loss": 46.0, - "step": 26744 - }, - { - "epoch": 2.044842020758071, - "grad_norm": 0.01075744442641735, - "learning_rate": 0.00019999793811890771, - "loss": 46.0, - "step": 26745 - }, - { - "epoch": 2.0449184777414606, - "grad_norm": 0.0008175843977369368, - "learning_rate": 0.00019999793796465954, - "loss": 46.0, - "step": 26746 - }, - { - "epoch": 2.0449949347248504, - "grad_norm": 0.0009589049150235951, - "learning_rate": 0.0001999979378104056, - "loss": 46.0, - "step": 26747 - }, - { - "epoch": 2.04507139170824, - "grad_norm": 0.0006627555703744292, - "learning_rate": 0.00019999793765614589, - "loss": 46.0, - "step": 26748 - }, - { - "epoch": 2.04514784869163, - "grad_norm": 0.0018860474228858948, - "learning_rate": 0.0001999979375018804, - "loss": 46.0, - "step": 26749 - }, - { - "epoch": 2.0452243056750197, - "grad_norm": 0.0005355579778552055, - "learning_rate": 0.00019999793734760913, - "loss": 46.0, - "step": 26750 - }, - { - "epoch": 2.0453007626584094, - "grad_norm": 0.001369601464830339, - "learning_rate": 0.0001999979371933321, - "loss": 46.0, - "step": 26751 - }, - { - "epoch": 2.045377219641799, - "grad_norm": 0.0010594576597213745, - "learning_rate": 0.0001999979370390493, - "loss": 46.0, - "step": 26752 - }, - { - "epoch": 2.045453676625189, - "grad_norm": 0.0010313743259757757, - "learning_rate": 0.0001999979368847607, - "loss": 46.0, - "step": 26753 - }, - { - "epoch": 2.0455301336085783, - "grad_norm": 0.00456597562879324, - "learning_rate": 0.00019999793673046638, - "loss": 46.0, - "step": 26754 - }, - { - "epoch": 2.045606590591968, - "grad_norm": 0.0008752791327424347, - "learning_rate": 0.00019999793657616627, - "loss": 46.0, - "step": 26755 - }, - { - "epoch": 2.045683047575358, - "grad_norm": 0.0017183005111292005, - "learning_rate": 0.0001999979364218604, - "loss": 46.0, - "step": 26756 - }, - { - "epoch": 2.0457595045587476, - "grad_norm": 0.0005333643639460206, - "learning_rate": 0.00019999793626754878, - "loss": 46.0, - "step": 26757 - }, - { - "epoch": 2.0458359615421373, - "grad_norm": 0.0005705517833121121, - "learning_rate": 0.00019999793611323135, - "loss": 46.0, - "step": 26758 - }, - { - "epoch": 2.045912418525527, - "grad_norm": 0.0025102447252720594, - "learning_rate": 0.00019999793595890813, - "loss": 46.0, - "step": 26759 - }, - { - "epoch": 2.045988875508917, - "grad_norm": 0.0011639269068837166, - "learning_rate": 0.0001999979358045792, - "loss": 46.0, - "step": 26760 - }, - { - "epoch": 2.0460653324923066, - "grad_norm": 0.0006512021063826978, - "learning_rate": 0.00019999793565024447, - "loss": 46.0, - "step": 26761 - }, - { - "epoch": 2.0461417894756964, - "grad_norm": 0.0011443783296272159, - "learning_rate": 0.000199997935495904, - "loss": 46.0, - "step": 26762 - }, - { - "epoch": 2.046218246459086, - "grad_norm": 0.01710735820233822, - "learning_rate": 0.00019999793534155772, - "loss": 46.0, - "step": 26763 - }, - { - "epoch": 2.046294703442476, - "grad_norm": 0.0020361868664622307, - "learning_rate": 0.0001999979351872057, - "loss": 46.0, - "step": 26764 - }, - { - "epoch": 2.046371160425865, - "grad_norm": 0.001960717374458909, - "learning_rate": 0.0001999979350328479, - "loss": 46.0, - "step": 26765 - }, - { - "epoch": 2.046447617409255, - "grad_norm": 0.0013600252568721771, - "learning_rate": 0.00019999793487848435, - "loss": 46.0, - "step": 26766 - }, - { - "epoch": 2.0465240743926447, - "grad_norm": 0.00043563719373196363, - "learning_rate": 0.000199997934724115, - "loss": 46.0, - "step": 26767 - }, - { - "epoch": 2.0466005313760345, - "grad_norm": 0.0008381299558095634, - "learning_rate": 0.0001999979345697399, - "loss": 46.0, - "step": 26768 - }, - { - "epoch": 2.0466769883594242, - "grad_norm": 0.0010570789454504848, - "learning_rate": 0.00019999793441535902, - "loss": 46.0, - "step": 26769 - }, - { - "epoch": 2.046753445342814, - "grad_norm": 0.00193371984641999, - "learning_rate": 0.00019999793426097237, - "loss": 46.0, - "step": 26770 - }, - { - "epoch": 2.0468299023262038, - "grad_norm": 0.0004712466325145215, - "learning_rate": 0.00019999793410657994, - "loss": 46.0, - "step": 26771 - }, - { - "epoch": 2.0469063593095935, - "grad_norm": 0.01838589459657669, - "learning_rate": 0.00019999793395218175, - "loss": 46.0, - "step": 26772 - }, - { - "epoch": 2.0469828162929833, - "grad_norm": 0.002186743775382638, - "learning_rate": 0.0001999979337977778, - "loss": 46.0, - "step": 26773 - }, - { - "epoch": 2.047059273276373, - "grad_norm": 0.000854240613989532, - "learning_rate": 0.0001999979336433681, - "loss": 46.0, - "step": 26774 - }, - { - "epoch": 2.047135730259763, - "grad_norm": 0.0005020123207941651, - "learning_rate": 0.0001999979334889526, - "loss": 46.0, - "step": 26775 - }, - { - "epoch": 2.047212187243152, - "grad_norm": 0.0009910749504342675, - "learning_rate": 0.00019999793333453134, - "loss": 46.0, - "step": 26776 - }, - { - "epoch": 2.047288644226542, - "grad_norm": 0.0016292175278067589, - "learning_rate": 0.0001999979331801043, - "loss": 46.0, - "step": 26777 - }, - { - "epoch": 2.0473651012099316, - "grad_norm": 0.0026920894160866737, - "learning_rate": 0.00019999793302567152, - "loss": 46.0, - "step": 26778 - }, - { - "epoch": 2.0474415581933214, - "grad_norm": 0.0006900280131958425, - "learning_rate": 0.00019999793287123294, - "loss": 46.0, - "step": 26779 - }, - { - "epoch": 2.047518015176711, - "grad_norm": 0.00046262776595540345, - "learning_rate": 0.00019999793271678859, - "loss": 46.0, - "step": 26780 - }, - { - "epoch": 2.047594472160101, - "grad_norm": 0.0025357462000101805, - "learning_rate": 0.00019999793256233848, - "loss": 46.0, - "step": 26781 - }, - { - "epoch": 2.0476709291434907, - "grad_norm": 0.0013350378721952438, - "learning_rate": 0.00019999793240788264, - "loss": 46.0, - "step": 26782 - }, - { - "epoch": 2.0477473861268805, - "grad_norm": 0.0007526238914579153, - "learning_rate": 0.000199997932253421, - "loss": 46.0, - "step": 26783 - }, - { - "epoch": 2.04782384311027, - "grad_norm": 0.0009977235458791256, - "learning_rate": 0.00019999793209895357, - "loss": 46.0, - "step": 26784 - }, - { - "epoch": 2.04790030009366, - "grad_norm": 0.000511665886733681, - "learning_rate": 0.00019999793194448037, - "loss": 46.0, - "step": 26785 - }, - { - "epoch": 2.0479767570770497, - "grad_norm": 0.0018593334825709462, - "learning_rate": 0.0001999979317900014, - "loss": 46.0, - "step": 26786 - }, - { - "epoch": 2.048053214060439, - "grad_norm": 0.0023004096001386642, - "learning_rate": 0.0001999979316355167, - "loss": 46.0, - "step": 26787 - }, - { - "epoch": 2.048129671043829, - "grad_norm": 0.0007995264022611082, - "learning_rate": 0.0001999979314810262, - "loss": 46.0, - "step": 26788 - }, - { - "epoch": 2.0482061280272186, - "grad_norm": 0.0011476317886263132, - "learning_rate": 0.00019999793132652994, - "loss": 46.0, - "step": 26789 - }, - { - "epoch": 2.0482825850106083, - "grad_norm": 0.0014823433011770248, - "learning_rate": 0.0001999979311720279, - "loss": 46.0, - "step": 26790 - }, - { - "epoch": 2.048359041993998, - "grad_norm": 0.000912494317162782, - "learning_rate": 0.0001999979310175201, - "loss": 46.0, - "step": 26791 - }, - { - "epoch": 2.048435498977388, - "grad_norm": 0.0040137311443686485, - "learning_rate": 0.00019999793086300654, - "loss": 46.0, - "step": 26792 - }, - { - "epoch": 2.0485119559607776, - "grad_norm": 0.0028996344190090895, - "learning_rate": 0.00019999793070848718, - "loss": 46.0, - "step": 26793 - }, - { - "epoch": 2.0485884129441674, - "grad_norm": 0.0004226558667141944, - "learning_rate": 0.00019999793055396208, - "loss": 46.0, - "step": 26794 - }, - { - "epoch": 2.048664869927557, - "grad_norm": 0.0003900914452970028, - "learning_rate": 0.0001999979303994312, - "loss": 46.0, - "step": 26795 - }, - { - "epoch": 2.048741326910947, - "grad_norm": 0.005706121679395437, - "learning_rate": 0.00019999793024489456, - "loss": 46.0, - "step": 26796 - }, - { - "epoch": 2.0488177838943367, - "grad_norm": 0.0014819680945947766, - "learning_rate": 0.00019999793009035214, - "loss": 46.0, - "step": 26797 - }, - { - "epoch": 2.048894240877726, - "grad_norm": 0.0008054747595451772, - "learning_rate": 0.00019999792993580394, - "loss": 46.0, - "step": 26798 - }, - { - "epoch": 2.0489706978611157, - "grad_norm": 0.0024097184650599957, - "learning_rate": 0.00019999792978124997, - "loss": 46.0, - "step": 26799 - }, - { - "epoch": 2.0490471548445055, - "grad_norm": 0.0009792178170755506, - "learning_rate": 0.00019999792962669026, - "loss": 46.0, - "step": 26800 - }, - { - "epoch": 2.0491236118278953, - "grad_norm": 0.0010091076837852597, - "learning_rate": 0.00019999792947212477, - "loss": 46.0, - "step": 26801 - }, - { - "epoch": 2.049200068811285, - "grad_norm": 0.0008666398352943361, - "learning_rate": 0.0001999979293175535, - "loss": 46.0, - "step": 26802 - }, - { - "epoch": 2.049276525794675, - "grad_norm": 0.005858314223587513, - "learning_rate": 0.00019999792916297645, - "loss": 46.0, - "step": 26803 - }, - { - "epoch": 2.0493529827780645, - "grad_norm": 0.0013598664663732052, - "learning_rate": 0.00019999792900839367, - "loss": 46.0, - "step": 26804 - }, - { - "epoch": 2.0494294397614543, - "grad_norm": 0.0007814023410901427, - "learning_rate": 0.0001999979288538051, - "loss": 46.0, - "step": 26805 - }, - { - "epoch": 2.049505896744844, - "grad_norm": 0.002629644935950637, - "learning_rate": 0.00019999792869921076, - "loss": 46.0, - "step": 26806 - }, - { - "epoch": 2.049582353728234, - "grad_norm": 0.00041831322596408427, - "learning_rate": 0.00019999792854461063, - "loss": 46.0, - "step": 26807 - }, - { - "epoch": 2.0496588107116236, - "grad_norm": 0.005221270490437746, - "learning_rate": 0.00019999792839000478, - "loss": 46.0, - "step": 26808 - }, - { - "epoch": 2.049735267695013, - "grad_norm": 0.002187157981097698, - "learning_rate": 0.0001999979282353931, - "loss": 46.0, - "step": 26809 - }, - { - "epoch": 2.0498117246784027, - "grad_norm": 0.0015697296475991607, - "learning_rate": 0.00019999792808077569, - "loss": 46.0, - "step": 26810 - }, - { - "epoch": 2.0498881816617924, - "grad_norm": 0.0010987631976604462, - "learning_rate": 0.00019999792792615252, - "loss": 46.0, - "step": 26811 - }, - { - "epoch": 2.049964638645182, - "grad_norm": 0.0005440912209451199, - "learning_rate": 0.00019999792777152355, - "loss": 46.0, - "step": 26812 - }, - { - "epoch": 2.050041095628572, - "grad_norm": 0.0017145854653790593, - "learning_rate": 0.0001999979276168888, - "loss": 46.0, - "step": 26813 - }, - { - "epoch": 2.0501175526119617, - "grad_norm": 0.003489426104351878, - "learning_rate": 0.00019999792746224832, - "loss": 46.0, - "step": 26814 - }, - { - "epoch": 2.0501940095953515, - "grad_norm": 0.0069366986863315105, - "learning_rate": 0.00019999792730760206, - "loss": 46.0, - "step": 26815 - }, - { - "epoch": 2.0502704665787412, - "grad_norm": 0.0020684825722128153, - "learning_rate": 0.00019999792715295, - "loss": 46.0, - "step": 26816 - }, - { - "epoch": 2.050346923562131, - "grad_norm": 0.0003260245139244944, - "learning_rate": 0.00019999792699829222, - "loss": 46.0, - "step": 26817 - }, - { - "epoch": 2.0504233805455208, - "grad_norm": 0.000811265257652849, - "learning_rate": 0.00019999792684362864, - "loss": 46.0, - "step": 26818 - }, - { - "epoch": 2.0504998375289105, - "grad_norm": 0.004917887505143881, - "learning_rate": 0.00019999792668895928, - "loss": 46.0, - "step": 26819 - }, - { - "epoch": 2.0505762945123, - "grad_norm": 0.0010727765038609505, - "learning_rate": 0.00019999792653428418, - "loss": 46.0, - "step": 26820 - }, - { - "epoch": 2.0506527514956896, - "grad_norm": 0.0016743084415793419, - "learning_rate": 0.0001999979263796033, - "loss": 46.0, - "step": 26821 - }, - { - "epoch": 2.0507292084790794, - "grad_norm": 0.012910761870443821, - "learning_rate": 0.00019999792622491666, - "loss": 46.0, - "step": 26822 - }, - { - "epoch": 2.050805665462469, - "grad_norm": 0.0011691940017044544, - "learning_rate": 0.00019999792607022424, - "loss": 46.0, - "step": 26823 - }, - { - "epoch": 2.050882122445859, - "grad_norm": 0.00044540679664351046, - "learning_rate": 0.00019999792591552604, - "loss": 46.0, - "step": 26824 - }, - { - "epoch": 2.0509585794292486, - "grad_norm": 0.000470242666779086, - "learning_rate": 0.00019999792576082208, - "loss": 46.0, - "step": 26825 - }, - { - "epoch": 2.0510350364126384, - "grad_norm": 0.001184387132525444, - "learning_rate": 0.00019999792560611236, - "loss": 46.0, - "step": 26826 - }, - { - "epoch": 2.051111493396028, - "grad_norm": 0.0026979998219758272, - "learning_rate": 0.00019999792545139687, - "loss": 46.0, - "step": 26827 - }, - { - "epoch": 2.051187950379418, - "grad_norm": 0.0015030615031719208, - "learning_rate": 0.0001999979252966756, - "loss": 46.0, - "step": 26828 - }, - { - "epoch": 2.0512644073628077, - "grad_norm": 0.008087236434221268, - "learning_rate": 0.00019999792514194855, - "loss": 46.0, - "step": 26829 - }, - { - "epoch": 2.051340864346197, - "grad_norm": 0.0014332369901239872, - "learning_rate": 0.00019999792498721575, - "loss": 46.0, - "step": 26830 - }, - { - "epoch": 2.0514173213295868, - "grad_norm": 0.0005862611578777432, - "learning_rate": 0.00019999792483247716, - "loss": 46.0, - "step": 26831 - }, - { - "epoch": 2.0514937783129765, - "grad_norm": 0.00040685906424187124, - "learning_rate": 0.0001999979246777328, - "loss": 46.0, - "step": 26832 - }, - { - "epoch": 2.0515702352963663, - "grad_norm": 0.0007863244391046464, - "learning_rate": 0.0001999979245229827, - "loss": 46.0, - "step": 26833 - }, - { - "epoch": 2.051646692279756, - "grad_norm": 0.0012992905685678124, - "learning_rate": 0.00019999792436822684, - "loss": 46.0, - "step": 26834 - }, - { - "epoch": 2.051723149263146, - "grad_norm": 0.0007701580179855227, - "learning_rate": 0.00019999792421346516, - "loss": 46.0, - "step": 26835 - }, - { - "epoch": 2.0517996062465356, - "grad_norm": 0.0012683391105383635, - "learning_rate": 0.00019999792405869777, - "loss": 46.0, - "step": 26836 - }, - { - "epoch": 2.0518760632299253, - "grad_norm": 0.0012288318248465657, - "learning_rate": 0.00019999792390392455, - "loss": 46.0, - "step": 26837 - }, - { - "epoch": 2.051952520213315, - "grad_norm": 0.0022708745673298836, - "learning_rate": 0.0001999979237491456, - "loss": 46.0, - "step": 26838 - }, - { - "epoch": 2.052028977196705, - "grad_norm": 0.0011641462333500385, - "learning_rate": 0.0001999979235943609, - "loss": 46.0, - "step": 26839 - }, - { - "epoch": 2.0521054341800946, - "grad_norm": 0.0005502661224454641, - "learning_rate": 0.00019999792343957038, - "loss": 46.0, - "step": 26840 - }, - { - "epoch": 2.052181891163484, - "grad_norm": 0.0011832385789602995, - "learning_rate": 0.00019999792328477412, - "loss": 46.0, - "step": 26841 - }, - { - "epoch": 2.0522583481468737, - "grad_norm": 0.002460158197209239, - "learning_rate": 0.00019999792312997208, - "loss": 46.0, - "step": 26842 - }, - { - "epoch": 2.0523348051302635, - "grad_norm": 0.0019206901779398322, - "learning_rate": 0.00019999792297516428, - "loss": 46.0, - "step": 26843 - }, - { - "epoch": 2.052411262113653, - "grad_norm": 0.0009876237018033862, - "learning_rate": 0.0001999979228203507, - "loss": 46.0, - "step": 26844 - }, - { - "epoch": 2.052487719097043, - "grad_norm": 0.0011415391927585006, - "learning_rate": 0.00019999792266553134, - "loss": 46.0, - "step": 26845 - }, - { - "epoch": 2.0525641760804327, - "grad_norm": 0.0009885606123134494, - "learning_rate": 0.00019999792251070622, - "loss": 46.0, - "step": 26846 - }, - { - "epoch": 2.0526406330638225, - "grad_norm": 0.000393895257730037, - "learning_rate": 0.00019999792235587537, - "loss": 46.0, - "step": 26847 - }, - { - "epoch": 2.0527170900472123, - "grad_norm": 0.0018497485434636474, - "learning_rate": 0.00019999792220103867, - "loss": 46.0, - "step": 26848 - }, - { - "epoch": 2.052793547030602, - "grad_norm": 0.0012509553926065564, - "learning_rate": 0.00019999792204619627, - "loss": 46.0, - "step": 26849 - }, - { - "epoch": 2.052870004013992, - "grad_norm": 0.0013123966054990888, - "learning_rate": 0.00019999792189134808, - "loss": 46.0, - "step": 26850 - }, - { - "epoch": 2.0529464609973815, - "grad_norm": 0.0008878799853846431, - "learning_rate": 0.00019999792173649409, - "loss": 46.0, - "step": 26851 - }, - { - "epoch": 2.053022917980771, - "grad_norm": 0.0007017144234851003, - "learning_rate": 0.00019999792158163437, - "loss": 46.0, - "step": 26852 - }, - { - "epoch": 2.0530993749641606, - "grad_norm": 0.0009450880461372435, - "learning_rate": 0.00019999792142676889, - "loss": 46.0, - "step": 26853 - }, - { - "epoch": 2.0531758319475504, - "grad_norm": 0.0024252634029835463, - "learning_rate": 0.0001999979212718976, - "loss": 46.0, - "step": 26854 - }, - { - "epoch": 2.05325228893094, - "grad_norm": 0.00022634721244685352, - "learning_rate": 0.00019999792111702057, - "loss": 46.0, - "step": 26855 - }, - { - "epoch": 2.05332874591433, - "grad_norm": 0.001143791014328599, - "learning_rate": 0.00019999792096213776, - "loss": 46.0, - "step": 26856 - }, - { - "epoch": 2.0534052028977197, - "grad_norm": 0.0012385756708681583, - "learning_rate": 0.0001999979208072492, - "loss": 46.0, - "step": 26857 - }, - { - "epoch": 2.0534816598811094, - "grad_norm": 0.0010120879160240293, - "learning_rate": 0.00019999792065235482, - "loss": 46.0, - "step": 26858 - }, - { - "epoch": 2.053558116864499, - "grad_norm": 0.0013031127164140344, - "learning_rate": 0.00019999792049745475, - "loss": 46.0, - "step": 26859 - }, - { - "epoch": 2.053634573847889, - "grad_norm": 0.0009092942927964032, - "learning_rate": 0.00019999792034254882, - "loss": 46.0, - "step": 26860 - }, - { - "epoch": 2.0537110308312787, - "grad_norm": 0.0010208553867414594, - "learning_rate": 0.00019999792018763718, - "loss": 46.0, - "step": 26861 - }, - { - "epoch": 2.0537874878146685, - "grad_norm": 0.004471789114177227, - "learning_rate": 0.00019999792003271976, - "loss": 46.0, - "step": 26862 - }, - { - "epoch": 2.053863944798058, - "grad_norm": 0.0008084700675681233, - "learning_rate": 0.00019999791987779654, - "loss": 46.0, - "step": 26863 - }, - { - "epoch": 2.0539404017814475, - "grad_norm": 0.008358980529010296, - "learning_rate": 0.0001999979197228676, - "loss": 46.0, - "step": 26864 - }, - { - "epoch": 2.0540168587648373, - "grad_norm": 0.0011034226045012474, - "learning_rate": 0.00019999791956793286, - "loss": 46.0, - "step": 26865 - }, - { - "epoch": 2.054093315748227, - "grad_norm": 0.004931580275297165, - "learning_rate": 0.00019999791941299234, - "loss": 46.0, - "step": 26866 - }, - { - "epoch": 2.054169772731617, - "grad_norm": 0.0019216890214011073, - "learning_rate": 0.00019999791925804608, - "loss": 46.0, - "step": 26867 - }, - { - "epoch": 2.0542462297150066, - "grad_norm": 0.0012013522209599614, - "learning_rate": 0.00019999791910309408, - "loss": 46.0, - "step": 26868 - }, - { - "epoch": 2.0543226866983963, - "grad_norm": 0.0013623957056552172, - "learning_rate": 0.00019999791894813624, - "loss": 46.0, - "step": 26869 - }, - { - "epoch": 2.054399143681786, - "grad_norm": 0.0010508917039260268, - "learning_rate": 0.00019999791879317266, - "loss": 46.0, - "step": 26870 - }, - { - "epoch": 2.054475600665176, - "grad_norm": 0.0007067056722007692, - "learning_rate": 0.0001999979186382033, - "loss": 46.0, - "step": 26871 - }, - { - "epoch": 2.0545520576485656, - "grad_norm": 0.0008177933050319552, - "learning_rate": 0.00019999791848322818, - "loss": 46.0, - "step": 26872 - }, - { - "epoch": 2.0546285146319554, - "grad_norm": 0.009712925180792809, - "learning_rate": 0.0001999979183282473, - "loss": 46.0, - "step": 26873 - }, - { - "epoch": 2.0547049716153447, - "grad_norm": 0.000902174215298146, - "learning_rate": 0.00019999791817326066, - "loss": 46.0, - "step": 26874 - }, - { - "epoch": 2.0547814285987345, - "grad_norm": 0.0019085563253611326, - "learning_rate": 0.00019999791801826824, - "loss": 46.0, - "step": 26875 - }, - { - "epoch": 2.0548578855821242, - "grad_norm": 0.0025579489301890135, - "learning_rate": 0.00019999791786327005, - "loss": 46.0, - "step": 26876 - }, - { - "epoch": 2.054934342565514, - "grad_norm": 0.0005497814272530377, - "learning_rate": 0.00019999791770826609, - "loss": 46.0, - "step": 26877 - }, - { - "epoch": 2.0550107995489038, - "grad_norm": 0.0005591757362708449, - "learning_rate": 0.00019999791755325635, - "loss": 46.0, - "step": 26878 - }, - { - "epoch": 2.0550872565322935, - "grad_norm": 0.0006346144364215434, - "learning_rate": 0.00019999791739824083, - "loss": 46.0, - "step": 26879 - }, - { - "epoch": 2.0551637135156833, - "grad_norm": 0.0009246206027455628, - "learning_rate": 0.00019999791724321955, - "loss": 46.0, - "step": 26880 - }, - { - "epoch": 2.055240170499073, - "grad_norm": 0.0008889971068128943, - "learning_rate": 0.00019999791708819254, - "loss": 46.0, - "step": 26881 - }, - { - "epoch": 2.055316627482463, - "grad_norm": 0.0010018158936873078, - "learning_rate": 0.0001999979169331597, - "loss": 46.0, - "step": 26882 - }, - { - "epoch": 2.0553930844658526, - "grad_norm": 0.0008579955901950598, - "learning_rate": 0.00019999791677812115, - "loss": 46.0, - "step": 26883 - }, - { - "epoch": 2.0554695414492423, - "grad_norm": 0.0028697752859443426, - "learning_rate": 0.00019999791662307677, - "loss": 46.0, - "step": 26884 - }, - { - "epoch": 2.0555459984326316, - "grad_norm": 0.0020914908964186907, - "learning_rate": 0.00019999791646802665, - "loss": 46.0, - "step": 26885 - }, - { - "epoch": 2.0556224554160214, - "grad_norm": 0.0008215868147090077, - "learning_rate": 0.00019999791631297077, - "loss": 46.0, - "step": 26886 - }, - { - "epoch": 2.055698912399411, - "grad_norm": 0.0004442485223989934, - "learning_rate": 0.00019999791615790913, - "loss": 46.0, - "step": 26887 - }, - { - "epoch": 2.055775369382801, - "grad_norm": 0.006440585479140282, - "learning_rate": 0.0001999979160028417, - "loss": 46.0, - "step": 26888 - }, - { - "epoch": 2.0558518263661907, - "grad_norm": 0.0020210817456245422, - "learning_rate": 0.0001999979158477685, - "loss": 46.0, - "step": 26889 - }, - { - "epoch": 2.0559282833495804, - "grad_norm": 0.0007858136668801308, - "learning_rate": 0.00019999791569268952, - "loss": 46.0, - "step": 26890 - }, - { - "epoch": 2.05600474033297, - "grad_norm": 0.0006385630113072693, - "learning_rate": 0.00019999791553760478, - "loss": 46.0, - "step": 26891 - }, - { - "epoch": 2.05608119731636, - "grad_norm": 0.0007511741714552045, - "learning_rate": 0.0001999979153825143, - "loss": 46.0, - "step": 26892 - }, - { - "epoch": 2.0561576542997497, - "grad_norm": 0.0018868870101869106, - "learning_rate": 0.000199997915227418, - "loss": 46.0, - "step": 26893 - }, - { - "epoch": 2.0562341112831395, - "grad_norm": 0.0011995711829513311, - "learning_rate": 0.00019999791507231595, - "loss": 46.0, - "step": 26894 - }, - { - "epoch": 2.0563105682665292, - "grad_norm": 0.0007007892127148807, - "learning_rate": 0.00019999791491720817, - "loss": 46.0, - "step": 26895 - }, - { - "epoch": 2.0563870252499186, - "grad_norm": 0.00204948871396482, - "learning_rate": 0.00019999791476209457, - "loss": 46.0, - "step": 26896 - }, - { - "epoch": 2.0564634822333083, - "grad_norm": 0.0017381446668878198, - "learning_rate": 0.00019999791460697521, - "loss": 46.0, - "step": 26897 - }, - { - "epoch": 2.056539939216698, - "grad_norm": 0.0002926558372564614, - "learning_rate": 0.0001999979144518501, - "loss": 46.0, - "step": 26898 - }, - { - "epoch": 2.056616396200088, - "grad_norm": 0.0029764098580926657, - "learning_rate": 0.00019999791429671922, - "loss": 46.0, - "step": 26899 - }, - { - "epoch": 2.0566928531834776, - "grad_norm": 0.0014221267774701118, - "learning_rate": 0.00019999791414158254, - "loss": 46.0, - "step": 26900 - }, - { - "epoch": 2.0567693101668674, - "grad_norm": 0.0029510289896279573, - "learning_rate": 0.00019999791398644012, - "loss": 46.0, - "step": 26901 - }, - { - "epoch": 2.056845767150257, - "grad_norm": 0.0007957772468216717, - "learning_rate": 0.0001999979138312919, - "loss": 46.0, - "step": 26902 - }, - { - "epoch": 2.056922224133647, - "grad_norm": 0.0019415791612118483, - "learning_rate": 0.00019999791367613797, - "loss": 46.0, - "step": 26903 - }, - { - "epoch": 2.0569986811170367, - "grad_norm": 0.004954686388373375, - "learning_rate": 0.00019999791352097823, - "loss": 46.0, - "step": 26904 - }, - { - "epoch": 2.0570751381004264, - "grad_norm": 0.0011634246911853552, - "learning_rate": 0.00019999791336581271, - "loss": 46.0, - "step": 26905 - }, - { - "epoch": 2.057151595083816, - "grad_norm": 0.0009029583306983113, - "learning_rate": 0.00019999791321064145, - "loss": 46.0, - "step": 26906 - }, - { - "epoch": 2.0572280520672055, - "grad_norm": 0.00045587020576931536, - "learning_rate": 0.00019999791305546437, - "loss": 46.0, - "step": 26907 - }, - { - "epoch": 2.0573045090505953, - "grad_norm": 0.0006289492012001574, - "learning_rate": 0.0001999979129002816, - "loss": 46.0, - "step": 26908 - }, - { - "epoch": 2.057380966033985, - "grad_norm": 0.0009635791648179293, - "learning_rate": 0.000199997912745093, - "loss": 46.0, - "step": 26909 - }, - { - "epoch": 2.0574574230173748, - "grad_norm": 0.0024948331993073225, - "learning_rate": 0.00019999791258989866, - "loss": 46.0, - "step": 26910 - }, - { - "epoch": 2.0575338800007645, - "grad_norm": 0.0032892015296965837, - "learning_rate": 0.00019999791243469853, - "loss": 46.0, - "step": 26911 - }, - { - "epoch": 2.0576103369841543, - "grad_norm": 0.0022792054805904627, - "learning_rate": 0.00019999791227949266, - "loss": 46.0, - "step": 26912 - }, - { - "epoch": 2.057686793967544, - "grad_norm": 0.0016760226571932435, - "learning_rate": 0.000199997912124281, - "loss": 46.0, - "step": 26913 - }, - { - "epoch": 2.057763250950934, - "grad_norm": 0.0016270535998046398, - "learning_rate": 0.00019999791196906354, - "loss": 46.0, - "step": 26914 - }, - { - "epoch": 2.0578397079343236, - "grad_norm": 0.0012395388912409544, - "learning_rate": 0.00019999791181384032, - "loss": 46.0, - "step": 26915 - }, - { - "epoch": 2.0579161649177133, - "grad_norm": 0.0005843533435836434, - "learning_rate": 0.00019999791165861138, - "loss": 46.0, - "step": 26916 - }, - { - "epoch": 2.057992621901103, - "grad_norm": 0.000997414579614997, - "learning_rate": 0.00019999791150337665, - "loss": 46.0, - "step": 26917 - }, - { - "epoch": 2.0580690788844924, - "grad_norm": 0.001252759713679552, - "learning_rate": 0.00019999791134813613, - "loss": 46.0, - "step": 26918 - }, - { - "epoch": 2.058145535867882, - "grad_norm": 0.0006714845658279955, - "learning_rate": 0.00019999791119288985, - "loss": 46.0, - "step": 26919 - }, - { - "epoch": 2.058221992851272, - "grad_norm": 0.002894036937505007, - "learning_rate": 0.00019999791103763782, - "loss": 46.0, - "step": 26920 - }, - { - "epoch": 2.0582984498346617, - "grad_norm": 0.00043962011113762856, - "learning_rate": 0.00019999791088238, - "loss": 46.0, - "step": 26921 - }, - { - "epoch": 2.0583749068180515, - "grad_norm": 0.0007253745570778847, - "learning_rate": 0.00019999791072711643, - "loss": 46.0, - "step": 26922 - }, - { - "epoch": 2.0584513638014412, - "grad_norm": 0.0009464692557230592, - "learning_rate": 0.00019999791057184708, - "loss": 46.0, - "step": 26923 - }, - { - "epoch": 2.058527820784831, - "grad_norm": 0.0027029828634113073, - "learning_rate": 0.00019999791041657195, - "loss": 46.0, - "step": 26924 - }, - { - "epoch": 2.0586042777682207, - "grad_norm": 0.0006274234619922936, - "learning_rate": 0.00019999791026129106, - "loss": 46.0, - "step": 26925 - }, - { - "epoch": 2.0586807347516105, - "grad_norm": 0.01396503672003746, - "learning_rate": 0.00019999791010600438, - "loss": 46.0, - "step": 26926 - }, - { - "epoch": 2.0587571917350003, - "grad_norm": 0.012848246842622757, - "learning_rate": 0.00019999790995071194, - "loss": 46.0, - "step": 26927 - }, - { - "epoch": 2.05883364871839, - "grad_norm": 0.00488661602139473, - "learning_rate": 0.00019999790979541375, - "loss": 46.0, - "step": 26928 - }, - { - "epoch": 2.0589101057017793, - "grad_norm": 0.0011477720690891147, - "learning_rate": 0.00019999790964010978, - "loss": 46.0, - "step": 26929 - }, - { - "epoch": 2.058986562685169, - "grad_norm": 0.0011884690029546618, - "learning_rate": 0.00019999790948480004, - "loss": 46.0, - "step": 26930 - }, - { - "epoch": 2.059063019668559, - "grad_norm": 0.0014746342785656452, - "learning_rate": 0.00019999790932948456, - "loss": 46.0, - "step": 26931 - }, - { - "epoch": 2.0591394766519486, - "grad_norm": 0.0021771288011223078, - "learning_rate": 0.00019999790917416324, - "loss": 46.0, - "step": 26932 - }, - { - "epoch": 2.0592159336353384, - "grad_norm": 0.0027020389679819345, - "learning_rate": 0.00019999790901883619, - "loss": 46.0, - "step": 26933 - }, - { - "epoch": 2.059292390618728, - "grad_norm": 0.0006614235462620854, - "learning_rate": 0.00019999790886350338, - "loss": 46.0, - "step": 26934 - }, - { - "epoch": 2.059368847602118, - "grad_norm": 0.0030041332356631756, - "learning_rate": 0.0001999979087081648, - "loss": 46.0, - "step": 26935 - }, - { - "epoch": 2.0594453045855077, - "grad_norm": 0.0033881342969834805, - "learning_rate": 0.00019999790855282042, - "loss": 46.0, - "step": 26936 - }, - { - "epoch": 2.0595217615688974, - "grad_norm": 0.0007130822050385177, - "learning_rate": 0.00019999790839747033, - "loss": 46.0, - "step": 26937 - }, - { - "epoch": 2.059598218552287, - "grad_norm": 0.0012841123389080167, - "learning_rate": 0.00019999790824211443, - "loss": 46.0, - "step": 26938 - }, - { - "epoch": 2.059674675535677, - "grad_norm": 0.00044165831059217453, - "learning_rate": 0.00019999790808675276, - "loss": 46.0, - "step": 26939 - }, - { - "epoch": 2.0597511325190663, - "grad_norm": 0.000506731157656759, - "learning_rate": 0.00019999790793138534, - "loss": 46.0, - "step": 26940 - }, - { - "epoch": 2.059827589502456, - "grad_norm": 0.0023624610621482134, - "learning_rate": 0.00019999790777601212, - "loss": 46.0, - "step": 26941 - }, - { - "epoch": 2.059904046485846, - "grad_norm": 0.0014661407331004739, - "learning_rate": 0.00019999790762063318, - "loss": 46.0, - "step": 26942 - }, - { - "epoch": 2.0599805034692356, - "grad_norm": 0.0004016837337985635, - "learning_rate": 0.00019999790746524842, - "loss": 46.0, - "step": 26943 - }, - { - "epoch": 2.0600569604526253, - "grad_norm": 0.0009443744784221053, - "learning_rate": 0.0001999979073098579, - "loss": 46.0, - "step": 26944 - }, - { - "epoch": 2.060133417436015, - "grad_norm": 0.0008529173210263252, - "learning_rate": 0.00019999790715446162, - "loss": 46.0, - "step": 26945 - }, - { - "epoch": 2.060209874419405, - "grad_norm": 0.0005631794920191169, - "learning_rate": 0.0001999979069990596, - "loss": 46.0, - "step": 26946 - }, - { - "epoch": 2.0602863314027946, - "grad_norm": 0.00044998229714110494, - "learning_rate": 0.00019999790684365176, - "loss": 46.0, - "step": 26947 - }, - { - "epoch": 2.0603627883861844, - "grad_norm": 0.0022053855936974287, - "learning_rate": 0.00019999790668823818, - "loss": 46.0, - "step": 26948 - }, - { - "epoch": 2.060439245369574, - "grad_norm": 0.0008079966064542532, - "learning_rate": 0.00019999790653281883, - "loss": 46.0, - "step": 26949 - }, - { - "epoch": 2.060515702352964, - "grad_norm": 0.0005781477666459978, - "learning_rate": 0.0001999979063773937, - "loss": 46.0, - "step": 26950 - }, - { - "epoch": 2.060592159336353, - "grad_norm": 0.002677736571058631, - "learning_rate": 0.0001999979062219628, - "loss": 46.0, - "step": 26951 - }, - { - "epoch": 2.060668616319743, - "grad_norm": 0.0018682688241824508, - "learning_rate": 0.00019999790606652616, - "loss": 46.0, - "step": 26952 - }, - { - "epoch": 2.0607450733031327, - "grad_norm": 0.007014586590230465, - "learning_rate": 0.0001999979059110837, - "loss": 46.0, - "step": 26953 - }, - { - "epoch": 2.0608215302865225, - "grad_norm": 0.000522536167409271, - "learning_rate": 0.0001999979057556355, - "loss": 46.0, - "step": 26954 - }, - { - "epoch": 2.0608979872699122, - "grad_norm": 0.0007256384706124663, - "learning_rate": 0.00019999790560018153, - "loss": 46.0, - "step": 26955 - }, - { - "epoch": 2.060974444253302, - "grad_norm": 0.0012019777204841375, - "learning_rate": 0.0001999979054447218, - "loss": 46.0, - "step": 26956 - }, - { - "epoch": 2.0610509012366918, - "grad_norm": 0.001009106868878007, - "learning_rate": 0.00019999790528925625, - "loss": 46.0, - "step": 26957 - }, - { - "epoch": 2.0611273582200815, - "grad_norm": 0.000709208135958761, - "learning_rate": 0.00019999790513378497, - "loss": 46.0, - "step": 26958 - }, - { - "epoch": 2.0612038152034713, - "grad_norm": 0.0006043802131898701, - "learning_rate": 0.00019999790497830794, - "loss": 46.0, - "step": 26959 - }, - { - "epoch": 2.061280272186861, - "grad_norm": 0.0005293880822136998, - "learning_rate": 0.00019999790482282513, - "loss": 46.0, - "step": 26960 - }, - { - "epoch": 2.0613567291702504, - "grad_norm": 0.0008860472589731216, - "learning_rate": 0.00019999790466733656, - "loss": 46.0, - "step": 26961 - }, - { - "epoch": 2.06143318615364, - "grad_norm": 0.004374388605356216, - "learning_rate": 0.00019999790451184218, - "loss": 46.0, - "step": 26962 - }, - { - "epoch": 2.06150964313703, - "grad_norm": 0.0013512099394574761, - "learning_rate": 0.00019999790435634205, - "loss": 46.0, - "step": 26963 - }, - { - "epoch": 2.0615861001204197, - "grad_norm": 0.0013697885442525148, - "learning_rate": 0.00019999790420083616, - "loss": 46.0, - "step": 26964 - }, - { - "epoch": 2.0616625571038094, - "grad_norm": 0.0012314216000959277, - "learning_rate": 0.0001999979040453245, - "loss": 46.0, - "step": 26965 - }, - { - "epoch": 2.061739014087199, - "grad_norm": 0.010410306043922901, - "learning_rate": 0.00019999790388980707, - "loss": 46.0, - "step": 26966 - }, - { - "epoch": 2.061815471070589, - "grad_norm": 0.00029443763196468353, - "learning_rate": 0.00019999790373428385, - "loss": 46.0, - "step": 26967 - }, - { - "epoch": 2.0618919280539787, - "grad_norm": 0.0008221578318625689, - "learning_rate": 0.00019999790357875489, - "loss": 46.0, - "step": 26968 - }, - { - "epoch": 2.0619683850373685, - "grad_norm": 0.00038452661829069257, - "learning_rate": 0.00019999790342322015, - "loss": 46.0, - "step": 26969 - }, - { - "epoch": 2.062044842020758, - "grad_norm": 0.0008576579275541008, - "learning_rate": 0.0001999979032676796, - "loss": 46.0, - "step": 26970 - }, - { - "epoch": 2.062121299004148, - "grad_norm": 0.0021967049688100815, - "learning_rate": 0.00019999790311213332, - "loss": 46.0, - "step": 26971 - }, - { - "epoch": 2.0621977559875373, - "grad_norm": 0.0037471975665539503, - "learning_rate": 0.00019999790295658127, - "loss": 46.0, - "step": 26972 - }, - { - "epoch": 2.062274212970927, - "grad_norm": 0.001114453887566924, - "learning_rate": 0.00019999790280102346, - "loss": 46.0, - "step": 26973 - }, - { - "epoch": 2.062350669954317, - "grad_norm": 0.01322798803448677, - "learning_rate": 0.00019999790264545989, - "loss": 46.0, - "step": 26974 - }, - { - "epoch": 2.0624271269377066, - "grad_norm": 0.00037531653651967645, - "learning_rate": 0.0001999979024898905, - "loss": 46.0, - "step": 26975 - }, - { - "epoch": 2.0625035839210963, - "grad_norm": 0.002553671132773161, - "learning_rate": 0.00019999790233431538, - "loss": 46.0, - "step": 26976 - }, - { - "epoch": 2.062580040904486, - "grad_norm": 0.002734871581196785, - "learning_rate": 0.0001999979021787345, - "loss": 46.0, - "step": 26977 - }, - { - "epoch": 2.062656497887876, - "grad_norm": 0.0004395974974613637, - "learning_rate": 0.00019999790202314782, - "loss": 46.0, - "step": 26978 - }, - { - "epoch": 2.0627329548712656, - "grad_norm": 0.0006870915531180799, - "learning_rate": 0.00019999790186755537, - "loss": 46.0, - "step": 26979 - }, - { - "epoch": 2.0628094118546554, - "grad_norm": 0.001701031462289393, - "learning_rate": 0.00019999790171195718, - "loss": 46.0, - "step": 26980 - }, - { - "epoch": 2.062885868838045, - "grad_norm": 0.0014159544371068478, - "learning_rate": 0.00019999790155635322, - "loss": 46.0, - "step": 26981 - }, - { - "epoch": 2.062962325821435, - "grad_norm": 0.008852141909301281, - "learning_rate": 0.00019999790140074345, - "loss": 46.0, - "step": 26982 - }, - { - "epoch": 2.0630387828048242, - "grad_norm": 0.0014562713913619518, - "learning_rate": 0.00019999790124512794, - "loss": 46.0, - "step": 26983 - }, - { - "epoch": 2.063115239788214, - "grad_norm": 0.0004403952043503523, - "learning_rate": 0.00019999790108950666, - "loss": 46.0, - "step": 26984 - }, - { - "epoch": 2.0631916967716037, - "grad_norm": 0.00234870333224535, - "learning_rate": 0.00019999790093387963, - "loss": 46.0, - "step": 26985 - }, - { - "epoch": 2.0632681537549935, - "grad_norm": 0.0035048872232437134, - "learning_rate": 0.0001999979007782468, - "loss": 46.0, - "step": 26986 - }, - { - "epoch": 2.0633446107383833, - "grad_norm": 0.0006169637781567872, - "learning_rate": 0.00019999790062260822, - "loss": 46.0, - "step": 26987 - }, - { - "epoch": 2.063421067721773, - "grad_norm": 0.001449449686333537, - "learning_rate": 0.00019999790046696384, - "loss": 46.0, - "step": 26988 - }, - { - "epoch": 2.063497524705163, - "grad_norm": 0.0023134795483201742, - "learning_rate": 0.00019999790031131372, - "loss": 46.0, - "step": 26989 - }, - { - "epoch": 2.0635739816885525, - "grad_norm": 0.0005178030696697533, - "learning_rate": 0.00019999790015565782, - "loss": 46.0, - "step": 26990 - }, - { - "epoch": 2.0636504386719423, - "grad_norm": 0.001162930391728878, - "learning_rate": 0.00019999789999999615, - "loss": 46.0, - "step": 26991 - }, - { - "epoch": 2.063726895655332, - "grad_norm": 0.005006987601518631, - "learning_rate": 0.0001999978998443287, - "loss": 46.0, - "step": 26992 - }, - { - "epoch": 2.063803352638722, - "grad_norm": 0.0010016504675149918, - "learning_rate": 0.0001999978996886555, - "loss": 46.0, - "step": 26993 - }, - { - "epoch": 2.063879809622111, - "grad_norm": 0.002906529698520899, - "learning_rate": 0.00019999789953297652, - "loss": 46.0, - "step": 26994 - }, - { - "epoch": 2.063956266605501, - "grad_norm": 0.0005867416621185839, - "learning_rate": 0.0001999978993772918, - "loss": 46.0, - "step": 26995 - }, - { - "epoch": 2.0640327235888907, - "grad_norm": 0.00039372959872707725, - "learning_rate": 0.00019999789922160125, - "loss": 46.0, - "step": 26996 - }, - { - "epoch": 2.0641091805722804, - "grad_norm": 0.001543025253340602, - "learning_rate": 0.000199997899065905, - "loss": 46.0, - "step": 26997 - }, - { - "epoch": 2.06418563755567, - "grad_norm": 0.0013029432157054543, - "learning_rate": 0.00019999789891020294, - "loss": 46.0, - "step": 26998 - }, - { - "epoch": 2.06426209453906, - "grad_norm": 0.0007536399061791599, - "learning_rate": 0.00019999789875449508, - "loss": 46.0, - "step": 26999 - }, - { - "epoch": 2.0643385515224497, - "grad_norm": 0.0012177103199064732, - "learning_rate": 0.0001999978985987815, - "loss": 46.0, - "step": 27000 - }, - { - "epoch": 2.0644150085058395, - "grad_norm": 0.0004639303660951555, - "learning_rate": 0.00019999789844306215, - "loss": 46.0, - "step": 27001 - }, - { - "epoch": 2.0644914654892292, - "grad_norm": 0.0006161812343634665, - "learning_rate": 0.00019999789828733703, - "loss": 46.0, - "step": 27002 - }, - { - "epoch": 2.064567922472619, - "grad_norm": 0.0008269344107247889, - "learning_rate": 0.0001999978981316061, - "loss": 46.0, - "step": 27003 - }, - { - "epoch": 2.0646443794560088, - "grad_norm": 0.0009487119968980551, - "learning_rate": 0.00019999789797586943, - "loss": 46.0, - "step": 27004 - }, - { - "epoch": 2.064720836439398, - "grad_norm": 0.0010664633009582758, - "learning_rate": 0.000199997897820127, - "loss": 46.0, - "step": 27005 - }, - { - "epoch": 2.064797293422788, - "grad_norm": 0.002439675386995077, - "learning_rate": 0.0001999978976643788, - "loss": 46.0, - "step": 27006 - }, - { - "epoch": 2.0648737504061776, - "grad_norm": 0.001499276957474649, - "learning_rate": 0.00019999789750862484, - "loss": 46.0, - "step": 27007 - }, - { - "epoch": 2.0649502073895674, - "grad_norm": 0.0008093096548691392, - "learning_rate": 0.00019999789735286507, - "loss": 46.0, - "step": 27008 - }, - { - "epoch": 2.065026664372957, - "grad_norm": 0.001459756982512772, - "learning_rate": 0.00019999789719709956, - "loss": 46.0, - "step": 27009 - }, - { - "epoch": 2.065103121356347, - "grad_norm": 0.0008315164595842361, - "learning_rate": 0.00019999789704132828, - "loss": 46.0, - "step": 27010 - }, - { - "epoch": 2.0651795783397366, - "grad_norm": 0.000978629570454359, - "learning_rate": 0.00019999789688555122, - "loss": 46.0, - "step": 27011 - }, - { - "epoch": 2.0652560353231264, - "grad_norm": 0.0014649737859144807, - "learning_rate": 0.0001999978967297684, - "loss": 46.0, - "step": 27012 - }, - { - "epoch": 2.065332492306516, - "grad_norm": 0.0019375067204236984, - "learning_rate": 0.00019999789657397982, - "loss": 46.0, - "step": 27013 - }, - { - "epoch": 2.065408949289906, - "grad_norm": 0.0020096905063837767, - "learning_rate": 0.00019999789641818544, - "loss": 46.0, - "step": 27014 - }, - { - "epoch": 2.0654854062732957, - "grad_norm": 0.0005466511356644332, - "learning_rate": 0.0001999978962623853, - "loss": 46.0, - "step": 27015 - }, - { - "epoch": 2.065561863256685, - "grad_norm": 0.0015550549142062664, - "learning_rate": 0.0001999978961065794, - "loss": 46.0, - "step": 27016 - }, - { - "epoch": 2.0656383202400748, - "grad_norm": 0.0024740565568208694, - "learning_rate": 0.00019999789595076775, - "loss": 46.0, - "step": 27017 - }, - { - "epoch": 2.0657147772234645, - "grad_norm": 0.0009245170513167977, - "learning_rate": 0.0001999978957949503, - "loss": 46.0, - "step": 27018 - }, - { - "epoch": 2.0657912342068543, - "grad_norm": 0.004177370108664036, - "learning_rate": 0.0001999978956391271, - "loss": 46.0, - "step": 27019 - }, - { - "epoch": 2.065867691190244, - "grad_norm": 0.0007365798228420317, - "learning_rate": 0.0001999978954832981, - "loss": 46.0, - "step": 27020 - }, - { - "epoch": 2.065944148173634, - "grad_norm": 0.0013109984574839473, - "learning_rate": 0.00019999789532746336, - "loss": 46.0, - "step": 27021 - }, - { - "epoch": 2.0660206051570236, - "grad_norm": 0.0008615059196017683, - "learning_rate": 0.00019999789517162283, - "loss": 46.0, - "step": 27022 - }, - { - "epoch": 2.0660970621404133, - "grad_norm": 0.0005882181576453149, - "learning_rate": 0.00019999789501577654, - "loss": 46.0, - "step": 27023 - }, - { - "epoch": 2.066173519123803, - "grad_norm": 0.0008883198606781662, - "learning_rate": 0.00019999789485992449, - "loss": 46.0, - "step": 27024 - }, - { - "epoch": 2.066249976107193, - "grad_norm": 0.0009025599574670196, - "learning_rate": 0.00019999789470406666, - "loss": 46.0, - "step": 27025 - }, - { - "epoch": 2.0663264330905826, - "grad_norm": 0.0005582623416557908, - "learning_rate": 0.00019999789454820305, - "loss": 46.0, - "step": 27026 - }, - { - "epoch": 2.066402890073972, - "grad_norm": 0.003043276024982333, - "learning_rate": 0.0001999978943923337, - "loss": 46.0, - "step": 27027 - }, - { - "epoch": 2.0664793470573617, - "grad_norm": 0.0008435086929239333, - "learning_rate": 0.00019999789423645855, - "loss": 46.0, - "step": 27028 - }, - { - "epoch": 2.0665558040407515, - "grad_norm": 0.0020478428341448307, - "learning_rate": 0.00019999789408057766, - "loss": 46.0, - "step": 27029 - }, - { - "epoch": 2.066632261024141, - "grad_norm": 0.0008808975107967854, - "learning_rate": 0.000199997893924691, - "loss": 46.0, - "step": 27030 - }, - { - "epoch": 2.066708718007531, - "grad_norm": 0.0030391705222427845, - "learning_rate": 0.00019999789376879857, - "loss": 46.0, - "step": 27031 - }, - { - "epoch": 2.0667851749909207, - "grad_norm": 0.0026454650796949863, - "learning_rate": 0.00019999789361290033, - "loss": 46.0, - "step": 27032 - }, - { - "epoch": 2.0668616319743105, - "grad_norm": 0.0004930328577756882, - "learning_rate": 0.00019999789345699636, - "loss": 46.0, - "step": 27033 - }, - { - "epoch": 2.0669380889577003, - "grad_norm": 0.0008861568639986217, - "learning_rate": 0.0001999978933010866, - "loss": 46.0, - "step": 27034 - }, - { - "epoch": 2.06701454594109, - "grad_norm": 0.0018538491567596793, - "learning_rate": 0.0001999978931451711, - "loss": 46.0, - "step": 27035 - }, - { - "epoch": 2.06709100292448, - "grad_norm": 0.0005827159038744867, - "learning_rate": 0.0001999978929892498, - "loss": 46.0, - "step": 27036 - }, - { - "epoch": 2.0671674599078695, - "grad_norm": 0.0014680143212899566, - "learning_rate": 0.00019999789283332273, - "loss": 46.0, - "step": 27037 - }, - { - "epoch": 2.067243916891259, - "grad_norm": 0.0009790032636374235, - "learning_rate": 0.0001999978926773899, - "loss": 46.0, - "step": 27038 - }, - { - "epoch": 2.0673203738746486, - "grad_norm": 0.0013785073533654213, - "learning_rate": 0.00019999789252145132, - "loss": 46.0, - "step": 27039 - }, - { - "epoch": 2.0673968308580384, - "grad_norm": 0.0017776767490431666, - "learning_rate": 0.00019999789236550694, - "loss": 46.0, - "step": 27040 - }, - { - "epoch": 2.067473287841428, - "grad_norm": 0.00031912574195303023, - "learning_rate": 0.00019999789220955682, - "loss": 46.0, - "step": 27041 - }, - { - "epoch": 2.067549744824818, - "grad_norm": 0.0017054181080311537, - "learning_rate": 0.00019999789205360093, - "loss": 46.0, - "step": 27042 - }, - { - "epoch": 2.0676262018082077, - "grad_norm": 0.0004103182291146368, - "learning_rate": 0.00019999789189763926, - "loss": 46.0, - "step": 27043 - }, - { - "epoch": 2.0677026587915974, - "grad_norm": 0.002302873879671097, - "learning_rate": 0.0001999978917416718, - "loss": 46.0, - "step": 27044 - }, - { - "epoch": 2.067779115774987, - "grad_norm": 0.0009202692890539765, - "learning_rate": 0.0001999978915856986, - "loss": 46.0, - "step": 27045 - }, - { - "epoch": 2.067855572758377, - "grad_norm": 0.0006078872247599065, - "learning_rate": 0.0001999978914297196, - "loss": 46.0, - "step": 27046 - }, - { - "epoch": 2.0679320297417667, - "grad_norm": 0.0006707943975925446, - "learning_rate": 0.00019999789127373485, - "loss": 46.0, - "step": 27047 - }, - { - "epoch": 2.0680084867251565, - "grad_norm": 0.001298778923228383, - "learning_rate": 0.00019999789111774434, - "loss": 46.0, - "step": 27048 - }, - { - "epoch": 2.068084943708546, - "grad_norm": 0.0007975633488968015, - "learning_rate": 0.00019999789096174805, - "loss": 46.0, - "step": 27049 - }, - { - "epoch": 2.0681614006919355, - "grad_norm": 0.0030256900936365128, - "learning_rate": 0.000199997890805746, - "loss": 46.0, - "step": 27050 - }, - { - "epoch": 2.0682378576753253, - "grad_norm": 0.0021144249476492405, - "learning_rate": 0.00019999789064973817, - "loss": 46.0, - "step": 27051 - }, - { - "epoch": 2.068314314658715, - "grad_norm": 0.0005385276162996888, - "learning_rate": 0.00019999789049372457, - "loss": 46.0, - "step": 27052 - }, - { - "epoch": 2.068390771642105, - "grad_norm": 0.0002385624684393406, - "learning_rate": 0.0001999978903377052, - "loss": 46.0, - "step": 27053 - }, - { - "epoch": 2.0684672286254946, - "grad_norm": 0.0005000167293474078, - "learning_rate": 0.00019999789018168004, - "loss": 46.0, - "step": 27054 - }, - { - "epoch": 2.0685436856088844, - "grad_norm": 0.0010154960909858346, - "learning_rate": 0.00019999789002564914, - "loss": 46.0, - "step": 27055 - }, - { - "epoch": 2.068620142592274, - "grad_norm": 0.002301678992807865, - "learning_rate": 0.00019999788986961248, - "loss": 46.0, - "step": 27056 - }, - { - "epoch": 2.068696599575664, - "grad_norm": 0.0034767414908856153, - "learning_rate": 0.00019999788971357003, - "loss": 46.0, - "step": 27057 - }, - { - "epoch": 2.0687730565590536, - "grad_norm": 0.00034108993713743985, - "learning_rate": 0.00019999788955752182, - "loss": 46.0, - "step": 27058 - }, - { - "epoch": 2.0688495135424434, - "grad_norm": 0.0010133845498785377, - "learning_rate": 0.00019999788940146786, - "loss": 46.0, - "step": 27059 - }, - { - "epoch": 2.0689259705258327, - "grad_norm": 0.0009166516829282045, - "learning_rate": 0.00019999788924540807, - "loss": 46.0, - "step": 27060 - }, - { - "epoch": 2.0690024275092225, - "grad_norm": 0.0005939473048783839, - "learning_rate": 0.00019999788908934256, - "loss": 46.0, - "step": 27061 - }, - { - "epoch": 2.0690788844926122, - "grad_norm": 0.0009271614253520966, - "learning_rate": 0.00019999788893327127, - "loss": 46.0, - "step": 27062 - }, - { - "epoch": 2.069155341476002, - "grad_norm": 0.0013534929603338242, - "learning_rate": 0.00019999788877719422, - "loss": 46.0, - "step": 27063 - }, - { - "epoch": 2.0692317984593918, - "grad_norm": 0.0010275986278429627, - "learning_rate": 0.00019999788862111136, - "loss": 46.0, - "step": 27064 - }, - { - "epoch": 2.0693082554427815, - "grad_norm": 0.0010461921337991953, - "learning_rate": 0.0001999978884650228, - "loss": 46.0, - "step": 27065 - }, - { - "epoch": 2.0693847124261713, - "grad_norm": 0.0014470914611592889, - "learning_rate": 0.0001999978883089284, - "loss": 46.0, - "step": 27066 - }, - { - "epoch": 2.069461169409561, - "grad_norm": 0.0006521940813399851, - "learning_rate": 0.00019999788815282826, - "loss": 46.0, - "step": 27067 - }, - { - "epoch": 2.069537626392951, - "grad_norm": 0.0011575151002034545, - "learning_rate": 0.00019999788799672237, - "loss": 46.0, - "step": 27068 - }, - { - "epoch": 2.0696140833763406, - "grad_norm": 0.0009787343442440033, - "learning_rate": 0.0001999978878406107, - "loss": 46.0, - "step": 27069 - }, - { - "epoch": 2.06969054035973, - "grad_norm": 0.0006229884456843138, - "learning_rate": 0.00019999788768449323, - "loss": 46.0, - "step": 27070 - }, - { - "epoch": 2.0697669973431196, - "grad_norm": 0.005218871869146824, - "learning_rate": 0.00019999788752837004, - "loss": 46.0, - "step": 27071 - }, - { - "epoch": 2.0698434543265094, - "grad_norm": 0.011910800822079182, - "learning_rate": 0.00019999788737224105, - "loss": 46.0, - "step": 27072 - }, - { - "epoch": 2.069919911309899, - "grad_norm": 0.0005000763921998441, - "learning_rate": 0.00019999788721610632, - "loss": 46.0, - "step": 27073 - }, - { - "epoch": 2.069996368293289, - "grad_norm": 0.0006613932782784104, - "learning_rate": 0.00019999788705996578, - "loss": 46.0, - "step": 27074 - }, - { - "epoch": 2.0700728252766787, - "grad_norm": 0.0008409673464484513, - "learning_rate": 0.0001999978869038195, - "loss": 46.0, - "step": 27075 - }, - { - "epoch": 2.0701492822600684, - "grad_norm": 0.00089884246699512, - "learning_rate": 0.00019999788674766742, - "loss": 46.0, - "step": 27076 - }, - { - "epoch": 2.070225739243458, - "grad_norm": 0.002012914279475808, - "learning_rate": 0.0001999978865915096, - "loss": 46.0, - "step": 27077 - }, - { - "epoch": 2.070302196226848, - "grad_norm": 0.001167898066341877, - "learning_rate": 0.000199997886435346, - "loss": 46.0, - "step": 27078 - }, - { - "epoch": 2.0703786532102377, - "grad_norm": 0.0015673909801989794, - "learning_rate": 0.0001999978862791766, - "loss": 46.0, - "step": 27079 - }, - { - "epoch": 2.0704551101936275, - "grad_norm": 0.023026851937174797, - "learning_rate": 0.0001999978861230015, - "loss": 46.0, - "step": 27080 - }, - { - "epoch": 2.0705315671770173, - "grad_norm": 0.00261838105507195, - "learning_rate": 0.00019999788596682057, - "loss": 46.0, - "step": 27081 - }, - { - "epoch": 2.0706080241604066, - "grad_norm": 0.0005020395037718117, - "learning_rate": 0.0001999978858106339, - "loss": 46.0, - "step": 27082 - }, - { - "epoch": 2.0706844811437963, - "grad_norm": 0.002186959143728018, - "learning_rate": 0.00019999788565444143, - "loss": 46.0, - "step": 27083 - }, - { - "epoch": 2.070760938127186, - "grad_norm": 0.000831065874081105, - "learning_rate": 0.00019999788549824321, - "loss": 46.0, - "step": 27084 - }, - { - "epoch": 2.070837395110576, - "grad_norm": 0.000870771415065974, - "learning_rate": 0.00019999788534203923, - "loss": 46.0, - "step": 27085 - }, - { - "epoch": 2.0709138520939656, - "grad_norm": 0.00061557482695207, - "learning_rate": 0.00019999788518582946, - "loss": 46.0, - "step": 27086 - }, - { - "epoch": 2.0709903090773554, - "grad_norm": 0.0012566782534122467, - "learning_rate": 0.00019999788502961396, - "loss": 46.0, - "step": 27087 - }, - { - "epoch": 2.071066766060745, - "grad_norm": 0.0012375015066936612, - "learning_rate": 0.00019999788487339265, - "loss": 46.0, - "step": 27088 - }, - { - "epoch": 2.071143223044135, - "grad_norm": 0.0006211168947629631, - "learning_rate": 0.00019999788471716562, - "loss": 46.0, - "step": 27089 - }, - { - "epoch": 2.0712196800275247, - "grad_norm": 0.0004771646053995937, - "learning_rate": 0.00019999788456093276, - "loss": 46.0, - "step": 27090 - }, - { - "epoch": 2.0712961370109144, - "grad_norm": 0.002767386846244335, - "learning_rate": 0.0001999978844046942, - "loss": 46.0, - "step": 27091 - }, - { - "epoch": 2.0713725939943037, - "grad_norm": 0.003315454814583063, - "learning_rate": 0.0001999978842484498, - "loss": 46.0, - "step": 27092 - }, - { - "epoch": 2.0714490509776935, - "grad_norm": 0.0018309837905690074, - "learning_rate": 0.00019999788409219967, - "loss": 46.0, - "step": 27093 - }, - { - "epoch": 2.0715255079610833, - "grad_norm": 0.0011916599469259381, - "learning_rate": 0.00019999788393594374, - "loss": 46.0, - "step": 27094 - }, - { - "epoch": 2.071601964944473, - "grad_norm": 0.000992544344626367, - "learning_rate": 0.00019999788377968208, - "loss": 46.0, - "step": 27095 - }, - { - "epoch": 2.071678421927863, - "grad_norm": 0.0011875139316543937, - "learning_rate": 0.0001999978836234146, - "loss": 46.0, - "step": 27096 - }, - { - "epoch": 2.0717548789112525, - "grad_norm": 0.0010922644287347794, - "learning_rate": 0.0001999978834671414, - "loss": 46.0, - "step": 27097 - }, - { - "epoch": 2.0718313358946423, - "grad_norm": 0.002796332584694028, - "learning_rate": 0.0001999978833108624, - "loss": 46.0, - "step": 27098 - }, - { - "epoch": 2.071907792878032, - "grad_norm": 0.0022023599594831467, - "learning_rate": 0.00019999788315457767, - "loss": 46.0, - "step": 27099 - }, - { - "epoch": 2.071984249861422, - "grad_norm": 0.0005100789712741971, - "learning_rate": 0.00019999788299828714, - "loss": 46.0, - "step": 27100 - }, - { - "epoch": 2.0720607068448116, - "grad_norm": 0.0017492339247837663, - "learning_rate": 0.00019999788284199083, - "loss": 46.0, - "step": 27101 - }, - { - "epoch": 2.0721371638282013, - "grad_norm": 0.0030357837677001953, - "learning_rate": 0.00019999788268568877, - "loss": 46.0, - "step": 27102 - }, - { - "epoch": 2.072213620811591, - "grad_norm": 0.0005458348896354437, - "learning_rate": 0.00019999788252938094, - "loss": 46.0, - "step": 27103 - }, - { - "epoch": 2.0722900777949804, - "grad_norm": 0.001348218065686524, - "learning_rate": 0.00019999788237306734, - "loss": 46.0, - "step": 27104 - }, - { - "epoch": 2.07236653477837, - "grad_norm": 0.0007464727968908846, - "learning_rate": 0.00019999788221674797, - "loss": 46.0, - "step": 27105 - }, - { - "epoch": 2.07244299176176, - "grad_norm": 0.00191450584679842, - "learning_rate": 0.00019999788206042282, - "loss": 46.0, - "step": 27106 - }, - { - "epoch": 2.0725194487451497, - "grad_norm": 0.0019547617994248867, - "learning_rate": 0.00019999788190409193, - "loss": 46.0, - "step": 27107 - }, - { - "epoch": 2.0725959057285395, - "grad_norm": 0.000526728224940598, - "learning_rate": 0.00019999788174775526, - "loss": 46.0, - "step": 27108 - }, - { - "epoch": 2.0726723627119292, - "grad_norm": 0.000797897286247462, - "learning_rate": 0.0001999978815914128, - "loss": 46.0, - "step": 27109 - }, - { - "epoch": 2.072748819695319, - "grad_norm": 0.001858215662650764, - "learning_rate": 0.00019999788143506457, - "loss": 46.0, - "step": 27110 - }, - { - "epoch": 2.0728252766787088, - "grad_norm": 0.003177137114107609, - "learning_rate": 0.0001999978812787106, - "loss": 46.0, - "step": 27111 - }, - { - "epoch": 2.0729017336620985, - "grad_norm": 0.0003472694952506572, - "learning_rate": 0.0001999978811223508, - "loss": 46.0, - "step": 27112 - }, - { - "epoch": 2.0729781906454883, - "grad_norm": 0.0010311231017112732, - "learning_rate": 0.0001999978809659853, - "loss": 46.0, - "step": 27113 - }, - { - "epoch": 2.0730546476288776, - "grad_norm": 0.0006290294695645571, - "learning_rate": 0.00019999788080961398, - "loss": 46.0, - "step": 27114 - }, - { - "epoch": 2.0731311046122674, - "grad_norm": 0.0008104062872007489, - "learning_rate": 0.00019999788065323693, - "loss": 46.0, - "step": 27115 - }, - { - "epoch": 2.073207561595657, - "grad_norm": 0.000582873763050884, - "learning_rate": 0.00019999788049685407, - "loss": 46.0, - "step": 27116 - }, - { - "epoch": 2.073284018579047, - "grad_norm": 0.0006381890270859003, - "learning_rate": 0.0001999978803404655, - "loss": 46.0, - "step": 27117 - }, - { - "epoch": 2.0733604755624366, - "grad_norm": 0.0008024231647141278, - "learning_rate": 0.00019999788018407113, - "loss": 46.0, - "step": 27118 - }, - { - "epoch": 2.0734369325458264, - "grad_norm": 0.0009202121291309595, - "learning_rate": 0.00019999788002767098, - "loss": 46.0, - "step": 27119 - }, - { - "epoch": 2.073513389529216, - "grad_norm": 0.000793017097748816, - "learning_rate": 0.00019999787987126506, - "loss": 46.0, - "step": 27120 - }, - { - "epoch": 2.073589846512606, - "grad_norm": 0.0007888094405643642, - "learning_rate": 0.0001999978797148534, - "loss": 46.0, - "step": 27121 - }, - { - "epoch": 2.0736663034959957, - "grad_norm": 0.00149096071254462, - "learning_rate": 0.00019999787955843595, - "loss": 46.0, - "step": 27122 - }, - { - "epoch": 2.0737427604793854, - "grad_norm": 0.0005638212896883488, - "learning_rate": 0.0001999978794020127, - "loss": 46.0, - "step": 27123 - }, - { - "epoch": 2.073819217462775, - "grad_norm": 0.0013729806523770094, - "learning_rate": 0.00019999787924558372, - "loss": 46.0, - "step": 27124 - }, - { - "epoch": 2.0738956744461645, - "grad_norm": 0.00045726681128144264, - "learning_rate": 0.00019999787908914896, - "loss": 46.0, - "step": 27125 - }, - { - "epoch": 2.0739721314295543, - "grad_norm": 0.0016534518217667937, - "learning_rate": 0.00019999787893270842, - "loss": 46.0, - "step": 27126 - }, - { - "epoch": 2.074048588412944, - "grad_norm": 0.0011107750469818711, - "learning_rate": 0.00019999787877626212, - "loss": 46.0, - "step": 27127 - }, - { - "epoch": 2.074125045396334, - "grad_norm": 0.0006969883688725531, - "learning_rate": 0.00019999787861981006, - "loss": 46.0, - "step": 27128 - }, - { - "epoch": 2.0742015023797236, - "grad_norm": 0.0012100323801860213, - "learning_rate": 0.00019999787846335223, - "loss": 46.0, - "step": 27129 - }, - { - "epoch": 2.0742779593631133, - "grad_norm": 0.0011009123409166932, - "learning_rate": 0.00019999787830688863, - "loss": 46.0, - "step": 27130 - }, - { - "epoch": 2.074354416346503, - "grad_norm": 0.003460992593318224, - "learning_rate": 0.00019999787815041926, - "loss": 46.0, - "step": 27131 - }, - { - "epoch": 2.074430873329893, - "grad_norm": 0.0018930698279291391, - "learning_rate": 0.0001999978779939441, - "loss": 46.0, - "step": 27132 - }, - { - "epoch": 2.0745073303132826, - "grad_norm": 0.0025584097020328045, - "learning_rate": 0.0001999978778374632, - "loss": 46.0, - "step": 27133 - }, - { - "epoch": 2.0745837872966724, - "grad_norm": 0.0021529991645365953, - "learning_rate": 0.0001999978776809765, - "loss": 46.0, - "step": 27134 - }, - { - "epoch": 2.074660244280062, - "grad_norm": 0.001232546172104776, - "learning_rate": 0.00019999787752448406, - "loss": 46.0, - "step": 27135 - }, - { - "epoch": 2.0747367012634514, - "grad_norm": 0.0013623720733448863, - "learning_rate": 0.00019999787736798584, - "loss": 46.0, - "step": 27136 - }, - { - "epoch": 2.074813158246841, - "grad_norm": 0.001318274182267487, - "learning_rate": 0.00019999787721148186, - "loss": 46.0, - "step": 27137 - }, - { - "epoch": 2.074889615230231, - "grad_norm": 0.002330901101231575, - "learning_rate": 0.0001999978770549721, - "loss": 46.0, - "step": 27138 - }, - { - "epoch": 2.0749660722136207, - "grad_norm": 0.002453487366437912, - "learning_rate": 0.00019999787689845656, - "loss": 46.0, - "step": 27139 - }, - { - "epoch": 2.0750425291970105, - "grad_norm": 0.0031309437472373247, - "learning_rate": 0.00019999787674193525, - "loss": 46.0, - "step": 27140 - }, - { - "epoch": 2.0751189861804002, - "grad_norm": 0.001266809063963592, - "learning_rate": 0.00019999787658540817, - "loss": 46.0, - "step": 27141 - }, - { - "epoch": 2.07519544316379, - "grad_norm": 0.0007018725736998022, - "learning_rate": 0.00019999787642887532, - "loss": 46.0, - "step": 27142 - }, - { - "epoch": 2.0752719001471798, - "grad_norm": 0.0005419120425358415, - "learning_rate": 0.00019999787627233672, - "loss": 46.0, - "step": 27143 - }, - { - "epoch": 2.0753483571305695, - "grad_norm": 0.03149529919028282, - "learning_rate": 0.00019999787611579234, - "loss": 46.0, - "step": 27144 - }, - { - "epoch": 2.0754248141139593, - "grad_norm": 0.000695561699103564, - "learning_rate": 0.00019999787595924222, - "loss": 46.0, - "step": 27145 - }, - { - "epoch": 2.075501271097349, - "grad_norm": 0.00033221839112229645, - "learning_rate": 0.00019999787580268633, - "loss": 46.0, - "step": 27146 - }, - { - "epoch": 2.0755777280807384, - "grad_norm": 0.0008184429025277495, - "learning_rate": 0.0001999978756461246, - "loss": 46.0, - "step": 27147 - }, - { - "epoch": 2.075654185064128, - "grad_norm": 0.0005841483362019062, - "learning_rate": 0.00019999787548955714, - "loss": 46.0, - "step": 27148 - }, - { - "epoch": 2.075730642047518, - "grad_norm": 0.0008841967792250216, - "learning_rate": 0.00019999787533298396, - "loss": 46.0, - "step": 27149 - }, - { - "epoch": 2.0758070990309077, - "grad_norm": 0.0010597413638606668, - "learning_rate": 0.00019999787517640494, - "loss": 46.0, - "step": 27150 - }, - { - "epoch": 2.0758835560142974, - "grad_norm": 0.0016691063065081835, - "learning_rate": 0.00019999787501982018, - "loss": 46.0, - "step": 27151 - }, - { - "epoch": 2.075960012997687, - "grad_norm": 0.003026694757863879, - "learning_rate": 0.00019999787486322967, - "loss": 46.0, - "step": 27152 - }, - { - "epoch": 2.076036469981077, - "grad_norm": 0.0014178812270984054, - "learning_rate": 0.00019999787470663337, - "loss": 46.0, - "step": 27153 - }, - { - "epoch": 2.0761129269644667, - "grad_norm": 0.004958159755915403, - "learning_rate": 0.00019999787455003126, - "loss": 46.0, - "step": 27154 - }, - { - "epoch": 2.0761893839478565, - "grad_norm": 0.0016772547969594598, - "learning_rate": 0.00019999787439342343, - "loss": 46.0, - "step": 27155 - }, - { - "epoch": 2.076265840931246, - "grad_norm": 0.0008967339526861906, - "learning_rate": 0.00019999787423680983, - "loss": 46.0, - "step": 27156 - }, - { - "epoch": 2.076342297914636, - "grad_norm": 0.0015132429543882608, - "learning_rate": 0.00019999787408019046, - "loss": 46.0, - "step": 27157 - }, - { - "epoch": 2.0764187548980253, - "grad_norm": 0.003755326149985194, - "learning_rate": 0.00019999787392356534, - "loss": 46.0, - "step": 27158 - }, - { - "epoch": 2.076495211881415, - "grad_norm": 0.0023535825312137604, - "learning_rate": 0.0001999978737669344, - "loss": 46.0, - "step": 27159 - }, - { - "epoch": 2.076571668864805, - "grad_norm": 0.0011980890994891524, - "learning_rate": 0.00019999787361029773, - "loss": 46.0, - "step": 27160 - }, - { - "epoch": 2.0766481258481946, - "grad_norm": 0.0011827265843749046, - "learning_rate": 0.00019999787345365526, - "loss": 46.0, - "step": 27161 - }, - { - "epoch": 2.0767245828315843, - "grad_norm": 0.00028965191449970007, - "learning_rate": 0.00019999787329700705, - "loss": 46.0, - "step": 27162 - }, - { - "epoch": 2.076801039814974, - "grad_norm": 0.0016712151700630784, - "learning_rate": 0.00019999787314035303, - "loss": 46.0, - "step": 27163 - }, - { - "epoch": 2.076877496798364, - "grad_norm": 0.0021500373259186745, - "learning_rate": 0.00019999787298369327, - "loss": 46.0, - "step": 27164 - }, - { - "epoch": 2.0769539537817536, - "grad_norm": 0.0015568399103358388, - "learning_rate": 0.00019999787282702777, - "loss": 46.0, - "step": 27165 - }, - { - "epoch": 2.0770304107651434, - "grad_norm": 0.0004950611037202179, - "learning_rate": 0.00019999787267035646, - "loss": 46.0, - "step": 27166 - }, - { - "epoch": 2.077106867748533, - "grad_norm": 0.0026304114144295454, - "learning_rate": 0.00019999787251367938, - "loss": 46.0, - "step": 27167 - }, - { - "epoch": 2.077183324731923, - "grad_norm": 0.0010290222708135843, - "learning_rate": 0.00019999787235699655, - "loss": 46.0, - "step": 27168 - }, - { - "epoch": 2.0772597817153122, - "grad_norm": 0.0009328462765552104, - "learning_rate": 0.00019999787220030793, - "loss": 46.0, - "step": 27169 - }, - { - "epoch": 2.077336238698702, - "grad_norm": 0.0008161712903529406, - "learning_rate": 0.00019999787204361352, - "loss": 46.0, - "step": 27170 - }, - { - "epoch": 2.0774126956820917, - "grad_norm": 0.0015659695491194725, - "learning_rate": 0.0001999978718869134, - "loss": 46.0, - "step": 27171 - }, - { - "epoch": 2.0774891526654815, - "grad_norm": 0.0008939250255934894, - "learning_rate": 0.00019999787173020749, - "loss": 46.0, - "step": 27172 - }, - { - "epoch": 2.0775656096488713, - "grad_norm": 0.0009759397944435477, - "learning_rate": 0.0001999978715734958, - "loss": 46.0, - "step": 27173 - }, - { - "epoch": 2.077642066632261, - "grad_norm": 0.0008491888293065131, - "learning_rate": 0.00019999787141677833, - "loss": 46.0, - "step": 27174 - }, - { - "epoch": 2.077718523615651, - "grad_norm": 0.0013619356323033571, - "learning_rate": 0.0001999978712600551, - "loss": 46.0, - "step": 27175 - }, - { - "epoch": 2.0777949805990406, - "grad_norm": 0.000696946750395, - "learning_rate": 0.00019999787110332613, - "loss": 46.0, - "step": 27176 - }, - { - "epoch": 2.0778714375824303, - "grad_norm": 0.0015801778063178062, - "learning_rate": 0.00019999787094659134, - "loss": 46.0, - "step": 27177 - }, - { - "epoch": 2.07794789456582, - "grad_norm": 0.0004969353904016316, - "learning_rate": 0.0001999978707898508, - "loss": 46.0, - "step": 27178 - }, - { - "epoch": 2.07802435154921, - "grad_norm": 0.001622940762899816, - "learning_rate": 0.0001999978706331045, - "loss": 46.0, - "step": 27179 - }, - { - "epoch": 2.078100808532599, - "grad_norm": 0.0004925756948068738, - "learning_rate": 0.00019999787047635245, - "loss": 46.0, - "step": 27180 - }, - { - "epoch": 2.078177265515989, - "grad_norm": 0.001371089369058609, - "learning_rate": 0.0001999978703195946, - "loss": 46.0, - "step": 27181 - }, - { - "epoch": 2.0782537224993787, - "grad_norm": 0.0013874656287953258, - "learning_rate": 0.00019999787016283097, - "loss": 46.0, - "step": 27182 - }, - { - "epoch": 2.0783301794827684, - "grad_norm": 0.0007348840590566397, - "learning_rate": 0.0001999978700060616, - "loss": 46.0, - "step": 27183 - }, - { - "epoch": 2.078406636466158, - "grad_norm": 0.0009697225177660584, - "learning_rate": 0.00019999786984928645, - "loss": 46.0, - "step": 27184 - }, - { - "epoch": 2.078483093449548, - "grad_norm": 0.0007268933695740998, - "learning_rate": 0.00019999786969250556, - "loss": 46.0, - "step": 27185 - }, - { - "epoch": 2.0785595504329377, - "grad_norm": 0.0007455849554389715, - "learning_rate": 0.00019999786953571884, - "loss": 46.0, - "step": 27186 - }, - { - "epoch": 2.0786360074163275, - "grad_norm": 0.0007582865655422211, - "learning_rate": 0.0001999978693789264, - "loss": 46.0, - "step": 27187 - }, - { - "epoch": 2.0787124643997172, - "grad_norm": 0.0033378128428012133, - "learning_rate": 0.00019999786922212818, - "loss": 46.0, - "step": 27188 - }, - { - "epoch": 2.078788921383107, - "grad_norm": 0.0017556612147018313, - "learning_rate": 0.00019999786906532417, - "loss": 46.0, - "step": 27189 - }, - { - "epoch": 2.0788653783664968, - "grad_norm": 0.0025836050044745207, - "learning_rate": 0.0001999978689085144, - "loss": 46.0, - "step": 27190 - }, - { - "epoch": 2.078941835349886, - "grad_norm": 0.0014807026600465178, - "learning_rate": 0.00019999786875169888, - "loss": 46.0, - "step": 27191 - }, - { - "epoch": 2.079018292333276, - "grad_norm": 0.0008699865429662168, - "learning_rate": 0.00019999786859487757, - "loss": 46.0, - "step": 27192 - }, - { - "epoch": 2.0790947493166656, - "grad_norm": 0.005615171976387501, - "learning_rate": 0.0001999978684380505, - "loss": 46.0, - "step": 27193 - }, - { - "epoch": 2.0791712063000554, - "grad_norm": 0.005202721804380417, - "learning_rate": 0.00019999786828121764, - "loss": 46.0, - "step": 27194 - }, - { - "epoch": 2.079247663283445, - "grad_norm": 0.00414247065782547, - "learning_rate": 0.00019999786812437904, - "loss": 46.0, - "step": 27195 - }, - { - "epoch": 2.079324120266835, - "grad_norm": 0.0024178854655474424, - "learning_rate": 0.00019999786796753467, - "loss": 46.0, - "step": 27196 - }, - { - "epoch": 2.0794005772502246, - "grad_norm": 0.0006391534116119146, - "learning_rate": 0.0001999978678106845, - "loss": 46.0, - "step": 27197 - }, - { - "epoch": 2.0794770342336144, - "grad_norm": 0.0013488776748999953, - "learning_rate": 0.0001999978676538286, - "loss": 46.0, - "step": 27198 - }, - { - "epoch": 2.079553491217004, - "grad_norm": 0.0010994477197527885, - "learning_rate": 0.00019999786749696688, - "loss": 46.0, - "step": 27199 - }, - { - "epoch": 2.079629948200394, - "grad_norm": 0.0019452725537121296, - "learning_rate": 0.00019999786734009944, - "loss": 46.0, - "step": 27200 - }, - { - "epoch": 2.0797064051837832, - "grad_norm": 0.002070052782073617, - "learning_rate": 0.0001999978671832262, - "loss": 46.0, - "step": 27201 - }, - { - "epoch": 2.079782862167173, - "grad_norm": 0.0009187899995595217, - "learning_rate": 0.0001999978670263472, - "loss": 46.0, - "step": 27202 - }, - { - "epoch": 2.0798593191505628, - "grad_norm": 0.0007883846410550177, - "learning_rate": 0.00019999786686946246, - "loss": 46.0, - "step": 27203 - }, - { - "epoch": 2.0799357761339525, - "grad_norm": 0.0005143506568856537, - "learning_rate": 0.00019999786671257193, - "loss": 46.0, - "step": 27204 - }, - { - "epoch": 2.0800122331173423, - "grad_norm": 0.0021083930041640997, - "learning_rate": 0.0001999978665556756, - "loss": 46.0, - "step": 27205 - }, - { - "epoch": 2.080088690100732, - "grad_norm": 0.000846718205139041, - "learning_rate": 0.00019999786639877351, - "loss": 46.0, - "step": 27206 - }, - { - "epoch": 2.080165147084122, - "grad_norm": 0.0009303530096076429, - "learning_rate": 0.0001999978662418657, - "loss": 46.0, - "step": 27207 - }, - { - "epoch": 2.0802416040675116, - "grad_norm": 0.001431467360816896, - "learning_rate": 0.00019999786608495206, - "loss": 46.0, - "step": 27208 - }, - { - "epoch": 2.0803180610509013, - "grad_norm": 0.000938993995077908, - "learning_rate": 0.0001999978659280327, - "loss": 46.0, - "step": 27209 - }, - { - "epoch": 2.080394518034291, - "grad_norm": 0.0013045774539932609, - "learning_rate": 0.00019999786577110755, - "loss": 46.0, - "step": 27210 - }, - { - "epoch": 2.080470975017681, - "grad_norm": 0.00042622911860235035, - "learning_rate": 0.00019999786561417663, - "loss": 46.0, - "step": 27211 - }, - { - "epoch": 2.0805474320010706, - "grad_norm": 0.0006025473121553659, - "learning_rate": 0.0001999978654572399, - "loss": 46.0, - "step": 27212 - }, - { - "epoch": 2.08062388898446, - "grad_norm": 0.00041579693788662553, - "learning_rate": 0.00019999786530029744, - "loss": 46.0, - "step": 27213 - }, - { - "epoch": 2.0807003459678497, - "grad_norm": 0.0020275823771953583, - "learning_rate": 0.00019999786514334923, - "loss": 46.0, - "step": 27214 - }, - { - "epoch": 2.0807768029512395, - "grad_norm": 0.0013582655228674412, - "learning_rate": 0.00019999786498639524, - "loss": 46.0, - "step": 27215 - }, - { - "epoch": 2.080853259934629, - "grad_norm": 0.0008413522155024111, - "learning_rate": 0.00019999786482943549, - "loss": 46.0, - "step": 27216 - }, - { - "epoch": 2.080929716918019, - "grad_norm": 0.0008687196532264352, - "learning_rate": 0.00019999786467246993, - "loss": 46.0, - "step": 27217 - }, - { - "epoch": 2.0810061739014087, - "grad_norm": 0.0009470437071286142, - "learning_rate": 0.00019999786451549862, - "loss": 46.0, - "step": 27218 - }, - { - "epoch": 2.0810826308847985, - "grad_norm": 0.0009760914836078882, - "learning_rate": 0.00019999786435852154, - "loss": 46.0, - "step": 27219 - }, - { - "epoch": 2.0811590878681883, - "grad_norm": 0.0015747162979096174, - "learning_rate": 0.0001999978642015387, - "loss": 46.0, - "step": 27220 - }, - { - "epoch": 2.081235544851578, - "grad_norm": 0.0014986732276156545, - "learning_rate": 0.00019999786404455006, - "loss": 46.0, - "step": 27221 - }, - { - "epoch": 2.081312001834968, - "grad_norm": 0.002020235639065504, - "learning_rate": 0.0001999978638875557, - "loss": 46.0, - "step": 27222 - }, - { - "epoch": 2.081388458818357, - "grad_norm": 0.011122310534119606, - "learning_rate": 0.00019999786373055555, - "loss": 46.0, - "step": 27223 - }, - { - "epoch": 2.081464915801747, - "grad_norm": 0.0009557961602695286, - "learning_rate": 0.00019999786357354963, - "loss": 46.0, - "step": 27224 - }, - { - "epoch": 2.0815413727851366, - "grad_norm": 0.0012390066403895617, - "learning_rate": 0.00019999786341653794, - "loss": 46.0, - "step": 27225 - }, - { - "epoch": 2.0816178297685264, - "grad_norm": 0.003337208181619644, - "learning_rate": 0.00019999786325952047, - "loss": 46.0, - "step": 27226 - }, - { - "epoch": 2.081694286751916, - "grad_norm": 0.0016678700922057033, - "learning_rate": 0.00019999786310249723, - "loss": 46.0, - "step": 27227 - }, - { - "epoch": 2.081770743735306, - "grad_norm": 0.005351881962269545, - "learning_rate": 0.00019999786294546822, - "loss": 46.0, - "step": 27228 - }, - { - "epoch": 2.0818472007186957, - "grad_norm": 0.0004929532879032195, - "learning_rate": 0.0001999978627884335, - "loss": 46.0, - "step": 27229 - }, - { - "epoch": 2.0819236577020854, - "grad_norm": 0.0010795847047120333, - "learning_rate": 0.00019999786263139293, - "loss": 46.0, - "step": 27230 - }, - { - "epoch": 2.082000114685475, - "grad_norm": 0.0016849403036758304, - "learning_rate": 0.00019999786247434662, - "loss": 46.0, - "step": 27231 - }, - { - "epoch": 2.082076571668865, - "grad_norm": 0.002279861830174923, - "learning_rate": 0.00019999786231729455, - "loss": 46.0, - "step": 27232 - }, - { - "epoch": 2.0821530286522547, - "grad_norm": 0.0023578687105327845, - "learning_rate": 0.0001999978621602367, - "loss": 46.0, - "step": 27233 - }, - { - "epoch": 2.0822294856356445, - "grad_norm": 0.0023254305124282837, - "learning_rate": 0.0001999978620031731, - "loss": 46.0, - "step": 27234 - }, - { - "epoch": 2.082305942619034, - "grad_norm": 0.0010756539413705468, - "learning_rate": 0.0001999978618461037, - "loss": 46.0, - "step": 27235 - }, - { - "epoch": 2.0823823996024236, - "grad_norm": 0.0006331177428364754, - "learning_rate": 0.00019999786168902855, - "loss": 46.0, - "step": 27236 - }, - { - "epoch": 2.0824588565858133, - "grad_norm": 0.001233407179825008, - "learning_rate": 0.0001999978615319476, - "loss": 46.0, - "step": 27237 - }, - { - "epoch": 2.082535313569203, - "grad_norm": 0.0005494873621501029, - "learning_rate": 0.00019999786137486094, - "loss": 46.0, - "step": 27238 - }, - { - "epoch": 2.082611770552593, - "grad_norm": 0.000868365284986794, - "learning_rate": 0.00019999786121776848, - "loss": 46.0, - "step": 27239 - }, - { - "epoch": 2.0826882275359826, - "grad_norm": 0.0011070248438045382, - "learning_rate": 0.0001999978610606702, - "loss": 46.0, - "step": 27240 - }, - { - "epoch": 2.0827646845193724, - "grad_norm": 0.001520760590210557, - "learning_rate": 0.00019999786090356625, - "loss": 46.0, - "step": 27241 - }, - { - "epoch": 2.082841141502762, - "grad_norm": 0.0016706987516954541, - "learning_rate": 0.00019999786074645647, - "loss": 46.0, - "step": 27242 - }, - { - "epoch": 2.082917598486152, - "grad_norm": 0.0006322450935840607, - "learning_rate": 0.00019999786058934094, - "loss": 46.0, - "step": 27243 - }, - { - "epoch": 2.0829940554695416, - "grad_norm": 0.003286790568381548, - "learning_rate": 0.0001999978604322196, - "loss": 46.0, - "step": 27244 - }, - { - "epoch": 2.083070512452931, - "grad_norm": 0.000545597868040204, - "learning_rate": 0.00019999786027509253, - "loss": 46.0, - "step": 27245 - }, - { - "epoch": 2.0831469694363207, - "grad_norm": 0.0008285172516480088, - "learning_rate": 0.0001999978601179597, - "loss": 46.0, - "step": 27246 - }, - { - "epoch": 2.0832234264197105, - "grad_norm": 0.004713290371000767, - "learning_rate": 0.00019999785996082108, - "loss": 46.0, - "step": 27247 - }, - { - "epoch": 2.0832998834031002, - "grad_norm": 0.0027769531589001417, - "learning_rate": 0.00019999785980367668, - "loss": 46.0, - "step": 27248 - }, - { - "epoch": 2.08337634038649, - "grad_norm": 0.00078834273153916, - "learning_rate": 0.00019999785964652653, - "loss": 46.0, - "step": 27249 - }, - { - "epoch": 2.0834527973698798, - "grad_norm": 0.0006405291496776044, - "learning_rate": 0.0001999978594893706, - "loss": 46.0, - "step": 27250 - }, - { - "epoch": 2.0835292543532695, - "grad_norm": 0.013986635021865368, - "learning_rate": 0.0001999978593322089, - "loss": 46.0, - "step": 27251 - }, - { - "epoch": 2.0836057113366593, - "grad_norm": 0.003889655228704214, - "learning_rate": 0.00019999785917504143, - "loss": 46.0, - "step": 27252 - }, - { - "epoch": 2.083682168320049, - "grad_norm": 0.0031468758825212717, - "learning_rate": 0.0001999978590178682, - "loss": 46.0, - "step": 27253 - }, - { - "epoch": 2.083758625303439, - "grad_norm": 0.0005284941871650517, - "learning_rate": 0.0001999978588606892, - "loss": 46.0, - "step": 27254 - }, - { - "epoch": 2.0838350822868286, - "grad_norm": 0.0021087052300572395, - "learning_rate": 0.00019999785870350442, - "loss": 46.0, - "step": 27255 - }, - { - "epoch": 2.083911539270218, - "grad_norm": 0.0007566118729300797, - "learning_rate": 0.0001999978585463139, - "loss": 46.0, - "step": 27256 - }, - { - "epoch": 2.0839879962536076, - "grad_norm": 0.0006520853494293988, - "learning_rate": 0.0001999978583891176, - "loss": 46.0, - "step": 27257 - }, - { - "epoch": 2.0840644532369974, - "grad_norm": 0.0004206908051855862, - "learning_rate": 0.0001999978582319155, - "loss": 46.0, - "step": 27258 - }, - { - "epoch": 2.084140910220387, - "grad_norm": 0.0018530928064137697, - "learning_rate": 0.00019999785807470763, - "loss": 46.0, - "step": 27259 - }, - { - "epoch": 2.084217367203777, - "grad_norm": 0.0024774286430329084, - "learning_rate": 0.00019999785791749403, - "loss": 46.0, - "step": 27260 - }, - { - "epoch": 2.0842938241871667, - "grad_norm": 0.0017375156749039888, - "learning_rate": 0.00019999785776027464, - "loss": 46.0, - "step": 27261 - }, - { - "epoch": 2.0843702811705564, - "grad_norm": 0.0033782843966037035, - "learning_rate": 0.0001999978576030495, - "loss": 46.0, - "step": 27262 - }, - { - "epoch": 2.084446738153946, - "grad_norm": 0.0007590474560856819, - "learning_rate": 0.00019999785744581855, - "loss": 46.0, - "step": 27263 - }, - { - "epoch": 2.084523195137336, - "grad_norm": 0.012711228802800179, - "learning_rate": 0.00019999785728858186, - "loss": 46.0, - "step": 27264 - }, - { - "epoch": 2.0845996521207257, - "grad_norm": 0.0006222252268344164, - "learning_rate": 0.00019999785713133942, - "loss": 46.0, - "step": 27265 - }, - { - "epoch": 2.0846761091041155, - "grad_norm": 0.002791031962260604, - "learning_rate": 0.00019999785697409115, - "loss": 46.0, - "step": 27266 - }, - { - "epoch": 2.084752566087505, - "grad_norm": 0.0009851750219240785, - "learning_rate": 0.00019999785681683717, - "loss": 46.0, - "step": 27267 - }, - { - "epoch": 2.0848290230708946, - "grad_norm": 0.0007425334770232439, - "learning_rate": 0.0001999978566595774, - "loss": 46.0, - "step": 27268 - }, - { - "epoch": 2.0849054800542843, - "grad_norm": 0.0029472720343619585, - "learning_rate": 0.00019999785650231185, - "loss": 46.0, - "step": 27269 - }, - { - "epoch": 2.084981937037674, - "grad_norm": 0.0027116015553474426, - "learning_rate": 0.00019999785634504052, - "loss": 46.0, - "step": 27270 - }, - { - "epoch": 2.085058394021064, - "grad_norm": 0.0015690447762608528, - "learning_rate": 0.00019999785618776344, - "loss": 46.0, - "step": 27271 - }, - { - "epoch": 2.0851348510044536, - "grad_norm": 0.001972117694094777, - "learning_rate": 0.0001999978560304806, - "loss": 46.0, - "step": 27272 - }, - { - "epoch": 2.0852113079878434, - "grad_norm": 0.0009207900729961693, - "learning_rate": 0.00019999785587319197, - "loss": 46.0, - "step": 27273 - }, - { - "epoch": 2.085287764971233, - "grad_norm": 0.000603479624260217, - "learning_rate": 0.0001999978557158976, - "loss": 46.0, - "step": 27274 - }, - { - "epoch": 2.085364221954623, - "grad_norm": 0.0006268219440244138, - "learning_rate": 0.00019999785555859743, - "loss": 46.0, - "step": 27275 - }, - { - "epoch": 2.0854406789380127, - "grad_norm": 0.0023255066480487585, - "learning_rate": 0.0001999978554012915, - "loss": 46.0, - "step": 27276 - }, - { - "epoch": 2.0855171359214024, - "grad_norm": 0.0021955957636237144, - "learning_rate": 0.0001999978552439798, - "loss": 46.0, - "step": 27277 - }, - { - "epoch": 2.0855935929047917, - "grad_norm": 0.00640465784817934, - "learning_rate": 0.00019999785508666233, - "loss": 46.0, - "step": 27278 - }, - { - "epoch": 2.0856700498881815, - "grad_norm": 0.001450004754588008, - "learning_rate": 0.0001999978549293391, - "loss": 46.0, - "step": 27279 - }, - { - "epoch": 2.0857465068715713, - "grad_norm": 0.002285077003762126, - "learning_rate": 0.0001999978547720101, - "loss": 46.0, - "step": 27280 - }, - { - "epoch": 2.085822963854961, - "grad_norm": 0.0024938893038779497, - "learning_rate": 0.00019999785461467532, - "loss": 46.0, - "step": 27281 - }, - { - "epoch": 2.085899420838351, - "grad_norm": 0.0012369889300316572, - "learning_rate": 0.0001999978544573348, - "loss": 46.0, - "step": 27282 - }, - { - "epoch": 2.0859758778217405, - "grad_norm": 0.005502518266439438, - "learning_rate": 0.00019999785429998846, - "loss": 46.0, - "step": 27283 - }, - { - "epoch": 2.0860523348051303, - "grad_norm": 0.00030840406543575227, - "learning_rate": 0.00019999785414263638, - "loss": 46.0, - "step": 27284 - }, - { - "epoch": 2.08612879178852, - "grad_norm": 0.0007111412123776972, - "learning_rate": 0.00019999785398527853, - "loss": 46.0, - "step": 27285 - }, - { - "epoch": 2.08620524877191, - "grad_norm": 0.0018708074931055307, - "learning_rate": 0.00019999785382791488, - "loss": 46.0, - "step": 27286 - }, - { - "epoch": 2.0862817057552996, - "grad_norm": 0.00042130364454351366, - "learning_rate": 0.00019999785367054554, - "loss": 46.0, - "step": 27287 - }, - { - "epoch": 2.0863581627386893, - "grad_norm": 0.005188572220504284, - "learning_rate": 0.00019999785351317034, - "loss": 46.0, - "step": 27288 - }, - { - "epoch": 2.0864346197220787, - "grad_norm": 0.00044203459401614964, - "learning_rate": 0.00019999785335578945, - "loss": 46.0, - "step": 27289 - }, - { - "epoch": 2.0865110767054684, - "grad_norm": 0.0005602473393082619, - "learning_rate": 0.0001999978531984027, - "loss": 46.0, - "step": 27290 - }, - { - "epoch": 2.086587533688858, - "grad_norm": 0.0008185104816220701, - "learning_rate": 0.00019999785304101027, - "loss": 46.0, - "step": 27291 - }, - { - "epoch": 2.086663990672248, - "grad_norm": 0.00395533861592412, - "learning_rate": 0.00019999785288361203, - "loss": 46.0, - "step": 27292 - }, - { - "epoch": 2.0867404476556377, - "grad_norm": 0.002584238536655903, - "learning_rate": 0.00019999785272620805, - "loss": 46.0, - "step": 27293 - }, - { - "epoch": 2.0868169046390275, - "grad_norm": 0.0016792984679341316, - "learning_rate": 0.00019999785256879824, - "loss": 46.0, - "step": 27294 - }, - { - "epoch": 2.0868933616224172, - "grad_norm": 0.0009061884484253824, - "learning_rate": 0.0001999978524113827, - "loss": 46.0, - "step": 27295 - }, - { - "epoch": 2.086969818605807, - "grad_norm": 0.000508524535689503, - "learning_rate": 0.0001999978522539614, - "loss": 46.0, - "step": 27296 - }, - { - "epoch": 2.0870462755891968, - "grad_norm": 0.0006886657210998237, - "learning_rate": 0.0001999978520965343, - "loss": 46.0, - "step": 27297 - }, - { - "epoch": 2.0871227325725865, - "grad_norm": 0.011873285286128521, - "learning_rate": 0.00019999785193910142, - "loss": 46.0, - "step": 27298 - }, - { - "epoch": 2.0871991895559763, - "grad_norm": 0.002773172687739134, - "learning_rate": 0.00019999785178166282, - "loss": 46.0, - "step": 27299 - }, - { - "epoch": 2.0872756465393656, - "grad_norm": 0.005620944779366255, - "learning_rate": 0.00019999785162421843, - "loss": 46.0, - "step": 27300 - }, - { - "epoch": 2.0873521035227554, - "grad_norm": 0.0004794311826117337, - "learning_rate": 0.00019999785146676828, - "loss": 46.0, - "step": 27301 - }, - { - "epoch": 2.087428560506145, - "grad_norm": 0.0017960167024284601, - "learning_rate": 0.00019999785130931234, - "loss": 46.0, - "step": 27302 - }, - { - "epoch": 2.087505017489535, - "grad_norm": 0.0016684964066371322, - "learning_rate": 0.00019999785115185062, - "loss": 46.0, - "step": 27303 - }, - { - "epoch": 2.0875814744729246, - "grad_norm": 0.00072661874582991, - "learning_rate": 0.0001999978509943832, - "loss": 46.0, - "step": 27304 - }, - { - "epoch": 2.0876579314563144, - "grad_norm": 0.00037797962431795895, - "learning_rate": 0.00019999785083690995, - "loss": 46.0, - "step": 27305 - }, - { - "epoch": 2.087734388439704, - "grad_norm": 0.002528723794966936, - "learning_rate": 0.00019999785067943094, - "loss": 46.0, - "step": 27306 - }, - { - "epoch": 2.087810845423094, - "grad_norm": 0.00035425880923867226, - "learning_rate": 0.00019999785052194616, - "loss": 46.0, - "step": 27307 - }, - { - "epoch": 2.0878873024064837, - "grad_norm": 0.0007851978298276663, - "learning_rate": 0.0001999978503644556, - "loss": 46.0, - "step": 27308 - }, - { - "epoch": 2.0879637593898734, - "grad_norm": 0.0005193176330067217, - "learning_rate": 0.00019999785020695927, - "loss": 46.0, - "step": 27309 - }, - { - "epoch": 2.088040216373263, - "grad_norm": 0.0006123346392996609, - "learning_rate": 0.00019999785004945722, - "loss": 46.0, - "step": 27310 - }, - { - "epoch": 2.0881166733566525, - "grad_norm": 0.0011612738016992807, - "learning_rate": 0.00019999784989194934, - "loss": 46.0, - "step": 27311 - }, - { - "epoch": 2.0881931303400423, - "grad_norm": 0.0014939234824851155, - "learning_rate": 0.00019999784973443572, - "loss": 46.0, - "step": 27312 - }, - { - "epoch": 2.088269587323432, - "grad_norm": 0.006138111464679241, - "learning_rate": 0.00019999784957691632, - "loss": 46.0, - "step": 27313 - }, - { - "epoch": 2.088346044306822, - "grad_norm": 0.0014626990305259824, - "learning_rate": 0.00019999784941939118, - "loss": 46.0, - "step": 27314 - }, - { - "epoch": 2.0884225012902116, - "grad_norm": 0.0016980463406071067, - "learning_rate": 0.00019999784926186026, - "loss": 46.0, - "step": 27315 - }, - { - "epoch": 2.0884989582736013, - "grad_norm": 0.0005950651247985661, - "learning_rate": 0.00019999784910432354, - "loss": 46.0, - "step": 27316 - }, - { - "epoch": 2.088575415256991, - "grad_norm": 0.0050689708441495895, - "learning_rate": 0.00019999784894678105, - "loss": 46.0, - "step": 27317 - }, - { - "epoch": 2.088651872240381, - "grad_norm": 0.01390141248703003, - "learning_rate": 0.00019999784878923282, - "loss": 46.0, - "step": 27318 - }, - { - "epoch": 2.0887283292237706, - "grad_norm": 0.0019448251696303487, - "learning_rate": 0.0001999978486316788, - "loss": 46.0, - "step": 27319 - }, - { - "epoch": 2.0888047862071604, - "grad_norm": 0.0018287294078618288, - "learning_rate": 0.00019999784847411905, - "loss": 46.0, - "step": 27320 - }, - { - "epoch": 2.08888124319055, - "grad_norm": 0.0015632157446816564, - "learning_rate": 0.0001999978483165535, - "loss": 46.0, - "step": 27321 - }, - { - "epoch": 2.0889577001739394, - "grad_norm": 0.0005896036163903773, - "learning_rate": 0.00019999784815898216, - "loss": 46.0, - "step": 27322 - }, - { - "epoch": 2.089034157157329, - "grad_norm": 0.0005592514644376934, - "learning_rate": 0.0001999978480014051, - "loss": 46.0, - "step": 27323 - }, - { - "epoch": 2.089110614140719, - "grad_norm": 0.0008798418566584587, - "learning_rate": 0.00019999784784382224, - "loss": 46.0, - "step": 27324 - }, - { - "epoch": 2.0891870711241087, - "grad_norm": 0.001165579305961728, - "learning_rate": 0.0001999978476862336, - "loss": 46.0, - "step": 27325 - }, - { - "epoch": 2.0892635281074985, - "grad_norm": 0.0003788323374465108, - "learning_rate": 0.00019999784752863922, - "loss": 46.0, - "step": 27326 - }, - { - "epoch": 2.0893399850908883, - "grad_norm": 0.001779798767529428, - "learning_rate": 0.00019999784737103905, - "loss": 46.0, - "step": 27327 - }, - { - "epoch": 2.089416442074278, - "grad_norm": 0.001192900468595326, - "learning_rate": 0.0001999978472134331, - "loss": 46.0, - "step": 27328 - }, - { - "epoch": 2.0894928990576678, - "grad_norm": 0.0009551694965921342, - "learning_rate": 0.00019999784705582142, - "loss": 46.0, - "step": 27329 - }, - { - "epoch": 2.0895693560410575, - "grad_norm": 0.0009529570234008133, - "learning_rate": 0.00019999784689820393, - "loss": 46.0, - "step": 27330 - }, - { - "epoch": 2.0896458130244473, - "grad_norm": 0.0014148111222311854, - "learning_rate": 0.00019999784674058072, - "loss": 46.0, - "step": 27331 - }, - { - "epoch": 2.0897222700078366, - "grad_norm": 0.0022507377434521914, - "learning_rate": 0.00019999784658295168, - "loss": 46.0, - "step": 27332 - }, - { - "epoch": 2.0897987269912264, - "grad_norm": 0.0004598718660417944, - "learning_rate": 0.0001999978464253169, - "loss": 46.0, - "step": 27333 - }, - { - "epoch": 2.089875183974616, - "grad_norm": 0.0006176439928822219, - "learning_rate": 0.00019999784626767637, - "loss": 46.0, - "step": 27334 - }, - { - "epoch": 2.089951640958006, - "grad_norm": 0.0007755841943435371, - "learning_rate": 0.00019999784611003004, - "loss": 46.0, - "step": 27335 - }, - { - "epoch": 2.0900280979413957, - "grad_norm": 0.017024213448166847, - "learning_rate": 0.00019999784595237796, - "loss": 46.0, - "step": 27336 - }, - { - "epoch": 2.0901045549247854, - "grad_norm": 0.001571474364027381, - "learning_rate": 0.00019999784579472009, - "loss": 46.0, - "step": 27337 - }, - { - "epoch": 2.090181011908175, - "grad_norm": 0.004843530245125294, - "learning_rate": 0.00019999784563705646, - "loss": 46.0, - "step": 27338 - }, - { - "epoch": 2.090257468891565, - "grad_norm": 0.0032593111973255873, - "learning_rate": 0.00019999784547938707, - "loss": 46.0, - "step": 27339 - }, - { - "epoch": 2.0903339258749547, - "grad_norm": 0.0005252194823697209, - "learning_rate": 0.0001999978453217119, - "loss": 46.0, - "step": 27340 - }, - { - "epoch": 2.0904103828583445, - "grad_norm": 0.0029758221935480833, - "learning_rate": 0.00019999784516403098, - "loss": 46.0, - "step": 27341 - }, - { - "epoch": 2.0904868398417342, - "grad_norm": 0.0007039497722871602, - "learning_rate": 0.00019999784500634426, - "loss": 46.0, - "step": 27342 - }, - { - "epoch": 2.090563296825124, - "grad_norm": 0.004084882326424122, - "learning_rate": 0.00019999784484865177, - "loss": 46.0, - "step": 27343 - }, - { - "epoch": 2.0906397538085133, - "grad_norm": 0.0009567401721142232, - "learning_rate": 0.00019999784469095354, - "loss": 46.0, - "step": 27344 - }, - { - "epoch": 2.090716210791903, - "grad_norm": 0.0023373793810606003, - "learning_rate": 0.00019999784453324956, - "loss": 46.0, - "step": 27345 - }, - { - "epoch": 2.090792667775293, - "grad_norm": 0.0024321535602211952, - "learning_rate": 0.00019999784437553975, - "loss": 46.0, - "step": 27346 - }, - { - "epoch": 2.0908691247586826, - "grad_norm": 0.0017820857465267181, - "learning_rate": 0.00019999784421782422, - "loss": 46.0, - "step": 27347 - }, - { - "epoch": 2.0909455817420723, - "grad_norm": 0.000932272698264569, - "learning_rate": 0.0001999978440601029, - "loss": 46.0, - "step": 27348 - }, - { - "epoch": 2.091022038725462, - "grad_norm": 0.0011680368334054947, - "learning_rate": 0.00019999784390237579, - "loss": 46.0, - "step": 27349 - }, - { - "epoch": 2.091098495708852, - "grad_norm": 0.0011892203474417329, - "learning_rate": 0.00019999784374464294, - "loss": 46.0, - "step": 27350 - }, - { - "epoch": 2.0911749526922416, - "grad_norm": 0.0010113869793713093, - "learning_rate": 0.00019999784358690431, - "loss": 46.0, - "step": 27351 - }, - { - "epoch": 2.0912514096756314, - "grad_norm": 0.0007817111327312887, - "learning_rate": 0.00019999784342915992, - "loss": 46.0, - "step": 27352 - }, - { - "epoch": 2.091327866659021, - "grad_norm": 0.0012770001776516438, - "learning_rate": 0.00019999784327140975, - "loss": 46.0, - "step": 27353 - }, - { - "epoch": 2.0914043236424105, - "grad_norm": 0.0009075301932170987, - "learning_rate": 0.0001999978431136538, - "loss": 46.0, - "step": 27354 - }, - { - "epoch": 2.0914807806258002, - "grad_norm": 0.005038647912442684, - "learning_rate": 0.0001999978429558921, - "loss": 46.0, - "step": 27355 - }, - { - "epoch": 2.09155723760919, - "grad_norm": 0.0027564982883632183, - "learning_rate": 0.00019999784279812463, - "loss": 46.0, - "step": 27356 - }, - { - "epoch": 2.0916336945925798, - "grad_norm": 0.00037014728877693415, - "learning_rate": 0.0001999978426403514, - "loss": 46.0, - "step": 27357 - }, - { - "epoch": 2.0917101515759695, - "grad_norm": 0.0010014689760282636, - "learning_rate": 0.00019999784248257236, - "loss": 46.0, - "step": 27358 - }, - { - "epoch": 2.0917866085593593, - "grad_norm": 0.0021338353399187326, - "learning_rate": 0.0001999978423247876, - "loss": 46.0, - "step": 27359 - }, - { - "epoch": 2.091863065542749, - "grad_norm": 0.0012704251566901803, - "learning_rate": 0.00019999784216699704, - "loss": 46.0, - "step": 27360 - }, - { - "epoch": 2.091939522526139, - "grad_norm": 0.0008454549824818969, - "learning_rate": 0.00019999784200920072, - "loss": 46.0, - "step": 27361 - }, - { - "epoch": 2.0920159795095286, - "grad_norm": 0.0013233498902991414, - "learning_rate": 0.0001999978418513986, - "loss": 46.0, - "step": 27362 - }, - { - "epoch": 2.0920924364929183, - "grad_norm": 0.000730192638002336, - "learning_rate": 0.00019999784169359076, - "loss": 46.0, - "step": 27363 - }, - { - "epoch": 2.092168893476308, - "grad_norm": 0.003717245766893029, - "learning_rate": 0.00019999784153577714, - "loss": 46.0, - "step": 27364 - }, - { - "epoch": 2.092245350459698, - "grad_norm": 0.002286767354235053, - "learning_rate": 0.00019999784137795775, - "loss": 46.0, - "step": 27365 - }, - { - "epoch": 2.092321807443087, - "grad_norm": 0.000832725374493748, - "learning_rate": 0.00019999784122013258, - "loss": 46.0, - "step": 27366 - }, - { - "epoch": 2.092398264426477, - "grad_norm": 0.0010958941420540214, - "learning_rate": 0.00019999784106230163, - "loss": 46.0, - "step": 27367 - }, - { - "epoch": 2.0924747214098667, - "grad_norm": 0.0005153437377884984, - "learning_rate": 0.00019999784090446495, - "loss": 46.0, - "step": 27368 - }, - { - "epoch": 2.0925511783932564, - "grad_norm": 0.0010657175444066525, - "learning_rate": 0.00019999784074662246, - "loss": 46.0, - "step": 27369 - }, - { - "epoch": 2.092627635376646, - "grad_norm": 0.0005108549376018345, - "learning_rate": 0.00019999784058877422, - "loss": 46.0, - "step": 27370 - }, - { - "epoch": 2.092704092360036, - "grad_norm": 0.0005293979193083942, - "learning_rate": 0.0001999978404309202, - "loss": 46.0, - "step": 27371 - }, - { - "epoch": 2.0927805493434257, - "grad_norm": 0.0012036376865580678, - "learning_rate": 0.0001999978402730604, - "loss": 46.0, - "step": 27372 - }, - { - "epoch": 2.0928570063268155, - "grad_norm": 0.0007132362807169557, - "learning_rate": 0.00019999784011519488, - "loss": 46.0, - "step": 27373 - }, - { - "epoch": 2.0929334633102052, - "grad_norm": 0.0008389372960664332, - "learning_rate": 0.00019999783995732355, - "loss": 46.0, - "step": 27374 - }, - { - "epoch": 2.093009920293595, - "grad_norm": 0.001961604692041874, - "learning_rate": 0.00019999783979944647, - "loss": 46.0, - "step": 27375 - }, - { - "epoch": 2.0930863772769843, - "grad_norm": 0.0014159164857119322, - "learning_rate": 0.0001999978396415636, - "loss": 46.0, - "step": 27376 - }, - { - "epoch": 2.093162834260374, - "grad_norm": 0.0018381391419097781, - "learning_rate": 0.00019999783948367497, - "loss": 46.0, - "step": 27377 - }, - { - "epoch": 2.093239291243764, - "grad_norm": 0.0009232028387486935, - "learning_rate": 0.00019999783932578055, - "loss": 46.0, - "step": 27378 - }, - { - "epoch": 2.0933157482271536, - "grad_norm": 0.0007812344119884074, - "learning_rate": 0.00019999783916788038, - "loss": 46.0, - "step": 27379 - }, - { - "epoch": 2.0933922052105434, - "grad_norm": 0.0005640701856464148, - "learning_rate": 0.00019999783900997447, - "loss": 46.0, - "step": 27380 - }, - { - "epoch": 2.093468662193933, - "grad_norm": 0.02172602340579033, - "learning_rate": 0.00019999783885206273, - "loss": 46.0, - "step": 27381 - }, - { - "epoch": 2.093545119177323, - "grad_norm": 0.0021992987021803856, - "learning_rate": 0.0001999978386941453, - "loss": 46.0, - "step": 27382 - }, - { - "epoch": 2.0936215761607126, - "grad_norm": 0.0011721898335963488, - "learning_rate": 0.000199997838536222, - "loss": 46.0, - "step": 27383 - }, - { - "epoch": 2.0936980331441024, - "grad_norm": 0.0007042097277007997, - "learning_rate": 0.000199997838378293, - "loss": 46.0, - "step": 27384 - }, - { - "epoch": 2.093774490127492, - "grad_norm": 0.0006267200224101543, - "learning_rate": 0.0001999978382203582, - "loss": 46.0, - "step": 27385 - }, - { - "epoch": 2.093850947110882, - "grad_norm": 0.0007163263508118689, - "learning_rate": 0.00019999783806241768, - "loss": 46.0, - "step": 27386 - }, - { - "epoch": 2.0939274040942712, - "grad_norm": 0.010717884637415409, - "learning_rate": 0.00019999783790447136, - "loss": 46.0, - "step": 27387 - }, - { - "epoch": 2.094003861077661, - "grad_norm": 0.00101911136880517, - "learning_rate": 0.00019999783774651925, - "loss": 46.0, - "step": 27388 - }, - { - "epoch": 2.0940803180610508, - "grad_norm": 0.0009173436556011438, - "learning_rate": 0.00019999783758856138, - "loss": 46.0, - "step": 27389 - }, - { - "epoch": 2.0941567750444405, - "grad_norm": 0.0005543183069676161, - "learning_rate": 0.00019999783743059776, - "loss": 46.0, - "step": 27390 - }, - { - "epoch": 2.0942332320278303, - "grad_norm": 0.00109396583866328, - "learning_rate": 0.00019999783727262836, - "loss": 46.0, - "step": 27391 - }, - { - "epoch": 2.09430968901122, - "grad_norm": 0.0013153445906937122, - "learning_rate": 0.0001999978371146532, - "loss": 46.0, - "step": 27392 - }, - { - "epoch": 2.09438614599461, - "grad_norm": 0.0007452231948263943, - "learning_rate": 0.00019999783695667225, - "loss": 46.0, - "step": 27393 - }, - { - "epoch": 2.0944626029779996, - "grad_norm": 0.0012657151091843843, - "learning_rate": 0.00019999783679868554, - "loss": 46.0, - "step": 27394 - }, - { - "epoch": 2.0945390599613893, - "grad_norm": 0.0019953420851379633, - "learning_rate": 0.00019999783664069307, - "loss": 46.0, - "step": 27395 - }, - { - "epoch": 2.094615516944779, - "grad_norm": 0.000672145513817668, - "learning_rate": 0.0001999978364826948, - "loss": 46.0, - "step": 27396 - }, - { - "epoch": 2.094691973928169, - "grad_norm": 0.0007700035930611193, - "learning_rate": 0.00019999783632469078, - "loss": 46.0, - "step": 27397 - }, - { - "epoch": 2.094768430911558, - "grad_norm": 0.005130458157509565, - "learning_rate": 0.000199997836166681, - "loss": 46.0, - "step": 27398 - }, - { - "epoch": 2.094844887894948, - "grad_norm": 0.0005214900593273342, - "learning_rate": 0.00019999783600866544, - "loss": 46.0, - "step": 27399 - }, - { - "epoch": 2.0949213448783377, - "grad_norm": 0.0025057243183255196, - "learning_rate": 0.0001999978358506441, - "loss": 46.0, - "step": 27400 - }, - { - "epoch": 2.0949978018617275, - "grad_norm": 0.0017635300755500793, - "learning_rate": 0.00019999783569261704, - "loss": 46.0, - "step": 27401 - }, - { - "epoch": 2.095074258845117, - "grad_norm": 0.001078938483260572, - "learning_rate": 0.00019999783553458416, - "loss": 46.0, - "step": 27402 - }, - { - "epoch": 2.095150715828507, - "grad_norm": 0.0021621212363243103, - "learning_rate": 0.00019999783537654552, - "loss": 46.0, - "step": 27403 - }, - { - "epoch": 2.0952271728118967, - "grad_norm": 0.0026818534824997187, - "learning_rate": 0.00019999783521850112, - "loss": 46.0, - "step": 27404 - }, - { - "epoch": 2.0953036297952865, - "grad_norm": 0.0007877573953010142, - "learning_rate": 0.00019999783506045095, - "loss": 46.0, - "step": 27405 - }, - { - "epoch": 2.0953800867786763, - "grad_norm": 0.0025736417155712843, - "learning_rate": 0.00019999783490239504, - "loss": 46.0, - "step": 27406 - }, - { - "epoch": 2.095456543762066, - "grad_norm": 0.0005176902050152421, - "learning_rate": 0.00019999783474433332, - "loss": 46.0, - "step": 27407 - }, - { - "epoch": 2.095533000745456, - "grad_norm": 0.003798508085310459, - "learning_rate": 0.0001999978345862658, - "loss": 46.0, - "step": 27408 - }, - { - "epoch": 2.095609457728845, - "grad_norm": 0.005138091742992401, - "learning_rate": 0.00019999783442819257, - "loss": 46.0, - "step": 27409 - }, - { - "epoch": 2.095685914712235, - "grad_norm": 0.0003937718574889004, - "learning_rate": 0.00019999783427011357, - "loss": 46.0, - "step": 27410 - }, - { - "epoch": 2.0957623716956246, - "grad_norm": 0.002517223358154297, - "learning_rate": 0.00019999783411202876, - "loss": 46.0, - "step": 27411 - }, - { - "epoch": 2.0958388286790144, - "grad_norm": 0.0006115194410085678, - "learning_rate": 0.0001999978339539382, - "loss": 46.0, - "step": 27412 - }, - { - "epoch": 2.095915285662404, - "grad_norm": 0.002805655123665929, - "learning_rate": 0.0001999978337958419, - "loss": 46.0, - "step": 27413 - }, - { - "epoch": 2.095991742645794, - "grad_norm": 0.0009261516388505697, - "learning_rate": 0.0001999978336377398, - "loss": 46.0, - "step": 27414 - }, - { - "epoch": 2.0960681996291837, - "grad_norm": 0.001984564121812582, - "learning_rate": 0.00019999783347963193, - "loss": 46.0, - "step": 27415 - }, - { - "epoch": 2.0961446566125734, - "grad_norm": 0.0007494467427022755, - "learning_rate": 0.0001999978333215183, - "loss": 46.0, - "step": 27416 - }, - { - "epoch": 2.096221113595963, - "grad_norm": 0.009969832375645638, - "learning_rate": 0.00019999783316339888, - "loss": 46.0, - "step": 27417 - }, - { - "epoch": 2.096297570579353, - "grad_norm": 0.0009585009538568556, - "learning_rate": 0.00019999783300527372, - "loss": 46.0, - "step": 27418 - }, - { - "epoch": 2.0963740275627427, - "grad_norm": 0.0005163499736227095, - "learning_rate": 0.00019999783284714278, - "loss": 46.0, - "step": 27419 - }, - { - "epoch": 2.096450484546132, - "grad_norm": 0.004340521991252899, - "learning_rate": 0.00019999783268900606, - "loss": 46.0, - "step": 27420 - }, - { - "epoch": 2.096526941529522, - "grad_norm": 0.0008606056217104197, - "learning_rate": 0.00019999783253086357, - "loss": 46.0, - "step": 27421 - }, - { - "epoch": 2.0966033985129116, - "grad_norm": 0.002138251205906272, - "learning_rate": 0.0001999978323727153, - "loss": 46.0, - "step": 27422 - }, - { - "epoch": 2.0966798554963013, - "grad_norm": 0.0006655004690401256, - "learning_rate": 0.00019999783221456133, - "loss": 46.0, - "step": 27423 - }, - { - "epoch": 2.096756312479691, - "grad_norm": 0.0015485911862924695, - "learning_rate": 0.00019999783205640152, - "loss": 46.0, - "step": 27424 - }, - { - "epoch": 2.096832769463081, - "grad_norm": 0.002647699322551489, - "learning_rate": 0.00019999783189823597, - "loss": 46.0, - "step": 27425 - }, - { - "epoch": 2.0969092264464706, - "grad_norm": 0.0024343065451830626, - "learning_rate": 0.00019999783174006464, - "loss": 46.0, - "step": 27426 - }, - { - "epoch": 2.0969856834298604, - "grad_norm": 0.005338119808584452, - "learning_rate": 0.00019999783158188754, - "loss": 46.0, - "step": 27427 - }, - { - "epoch": 2.09706214041325, - "grad_norm": 0.018196184188127518, - "learning_rate": 0.00019999783142370467, - "loss": 46.0, - "step": 27428 - }, - { - "epoch": 2.09713859739664, - "grad_norm": 0.0008322685025632381, - "learning_rate": 0.00019999783126551605, - "loss": 46.0, - "step": 27429 - }, - { - "epoch": 2.0972150543800296, - "grad_norm": 0.0009649796411395073, - "learning_rate": 0.00019999783110732165, - "loss": 46.0, - "step": 27430 - }, - { - "epoch": 2.097291511363419, - "grad_norm": 0.0008222355972975492, - "learning_rate": 0.00019999783094912146, - "loss": 46.0, - "step": 27431 - }, - { - "epoch": 2.0973679683468087, - "grad_norm": 0.0009169584955088794, - "learning_rate": 0.00019999783079091552, - "loss": 46.0, - "step": 27432 - }, - { - "epoch": 2.0974444253301985, - "grad_norm": 0.0006126546650193632, - "learning_rate": 0.0001999978306327038, - "loss": 46.0, - "step": 27433 - }, - { - "epoch": 2.0975208823135882, - "grad_norm": 0.0005853792536072433, - "learning_rate": 0.00019999783047448632, - "loss": 46.0, - "step": 27434 - }, - { - "epoch": 2.097597339296978, - "grad_norm": 0.0007943316013552248, - "learning_rate": 0.00019999783031626308, - "loss": 46.0, - "step": 27435 - }, - { - "epoch": 2.0976737962803678, - "grad_norm": 0.0010237219976261258, - "learning_rate": 0.00019999783015803405, - "loss": 46.0, - "step": 27436 - }, - { - "epoch": 2.0977502532637575, - "grad_norm": 0.0032180820126086473, - "learning_rate": 0.00019999782999979927, - "loss": 46.0, - "step": 27437 - }, - { - "epoch": 2.0978267102471473, - "grad_norm": 0.0026606442406773567, - "learning_rate": 0.0001999978298415587, - "loss": 46.0, - "step": 27438 - }, - { - "epoch": 2.097903167230537, - "grad_norm": 0.005456660874187946, - "learning_rate": 0.00019999782968331239, - "loss": 46.0, - "step": 27439 - }, - { - "epoch": 2.097979624213927, - "grad_norm": 0.0027833774220198393, - "learning_rate": 0.00019999782952506028, - "loss": 46.0, - "step": 27440 - }, - { - "epoch": 2.0980560811973166, - "grad_norm": 0.0013499169144779444, - "learning_rate": 0.00019999782936680244, - "loss": 46.0, - "step": 27441 - }, - { - "epoch": 2.098132538180706, - "grad_norm": 0.004375061485916376, - "learning_rate": 0.0001999978292085388, - "loss": 46.0, - "step": 27442 - }, - { - "epoch": 2.0982089951640956, - "grad_norm": 0.0013925025705248117, - "learning_rate": 0.00019999782905026937, - "loss": 46.0, - "step": 27443 - }, - { - "epoch": 2.0982854521474854, - "grad_norm": 0.0011829192517325282, - "learning_rate": 0.00019999782889199418, - "loss": 46.0, - "step": 27444 - }, - { - "epoch": 2.098361909130875, - "grad_norm": 0.0022398512810468674, - "learning_rate": 0.00019999782873371324, - "loss": 46.0, - "step": 27445 - }, - { - "epoch": 2.098438366114265, - "grad_norm": 0.004042470827698708, - "learning_rate": 0.00019999782857542652, - "loss": 46.0, - "step": 27446 - }, - { - "epoch": 2.0985148230976547, - "grad_norm": 0.002573939273133874, - "learning_rate": 0.00019999782841713403, - "loss": 46.0, - "step": 27447 - }, - { - "epoch": 2.0985912800810445, - "grad_norm": 0.0006682987441308796, - "learning_rate": 0.0001999978282588358, - "loss": 46.0, - "step": 27448 - }, - { - "epoch": 2.098667737064434, - "grad_norm": 0.0019486340461298823, - "learning_rate": 0.0001999978281005318, - "loss": 46.0, - "step": 27449 - }, - { - "epoch": 2.098744194047824, - "grad_norm": 0.0011153511004522443, - "learning_rate": 0.00019999782794222196, - "loss": 46.0, - "step": 27450 - }, - { - "epoch": 2.0988206510312137, - "grad_norm": 0.001907104509882629, - "learning_rate": 0.00019999782778390643, - "loss": 46.0, - "step": 27451 - }, - { - "epoch": 2.0988971080146035, - "grad_norm": 0.0010149977169930935, - "learning_rate": 0.0001999978276255851, - "loss": 46.0, - "step": 27452 - }, - { - "epoch": 2.098973564997993, - "grad_norm": 0.00487825833261013, - "learning_rate": 0.00019999782746725798, - "loss": 46.0, - "step": 27453 - }, - { - "epoch": 2.0990500219813826, - "grad_norm": 0.0009998572058975697, - "learning_rate": 0.00019999782730892513, - "loss": 46.0, - "step": 27454 - }, - { - "epoch": 2.0991264789647723, - "grad_norm": 0.00228071678429842, - "learning_rate": 0.0001999978271505865, - "loss": 46.0, - "step": 27455 - }, - { - "epoch": 2.099202935948162, - "grad_norm": 0.0023862337693572044, - "learning_rate": 0.00019999782699224206, - "loss": 46.0, - "step": 27456 - }, - { - "epoch": 2.099279392931552, - "grad_norm": 0.0014261300675570965, - "learning_rate": 0.0001999978268338919, - "loss": 46.0, - "step": 27457 - }, - { - "epoch": 2.0993558499149416, - "grad_norm": 0.0011226380011066794, - "learning_rate": 0.00019999782667553596, - "loss": 46.0, - "step": 27458 - }, - { - "epoch": 2.0994323068983314, - "grad_norm": 0.001110877376049757, - "learning_rate": 0.00019999782651717424, - "loss": 46.0, - "step": 27459 - }, - { - "epoch": 2.099508763881721, - "grad_norm": 0.0034316410310566425, - "learning_rate": 0.00019999782635880676, - "loss": 46.0, - "step": 27460 - }, - { - "epoch": 2.099585220865111, - "grad_norm": 0.0014385415706783533, - "learning_rate": 0.0001999978262004335, - "loss": 46.0, - "step": 27461 - }, - { - "epoch": 2.0996616778485007, - "grad_norm": 0.006219201255589724, - "learning_rate": 0.0001999978260420545, - "loss": 46.0, - "step": 27462 - }, - { - "epoch": 2.09973813483189, - "grad_norm": 0.0006632667500525713, - "learning_rate": 0.00019999782588366968, - "loss": 46.0, - "step": 27463 - }, - { - "epoch": 2.0998145918152797, - "grad_norm": 0.00036832300247624516, - "learning_rate": 0.00019999782572527913, - "loss": 46.0, - "step": 27464 - }, - { - "epoch": 2.0998910487986695, - "grad_norm": 0.0010592605685815215, - "learning_rate": 0.0001999978255668828, - "loss": 46.0, - "step": 27465 - }, - { - "epoch": 2.0999675057820593, - "grad_norm": 0.00135167280677706, - "learning_rate": 0.0001999978254084807, - "loss": 46.0, - "step": 27466 - }, - { - "epoch": 2.100043962765449, - "grad_norm": 0.0005404517869465053, - "learning_rate": 0.00019999782525007283, - "loss": 46.0, - "step": 27467 - }, - { - "epoch": 2.100120419748839, - "grad_norm": 0.0008791881846264005, - "learning_rate": 0.00019999782509165918, - "loss": 46.0, - "step": 27468 - }, - { - "epoch": 2.1001968767322285, - "grad_norm": 0.0017522121779620647, - "learning_rate": 0.00019999782493323976, - "loss": 46.0, - "step": 27469 - }, - { - "epoch": 2.1002733337156183, - "grad_norm": 0.0006317515508271754, - "learning_rate": 0.00019999782477481462, - "loss": 46.0, - "step": 27470 - }, - { - "epoch": 2.100349790699008, - "grad_norm": 0.0007047151448205113, - "learning_rate": 0.00019999782461638366, - "loss": 46.0, - "step": 27471 - }, - { - "epoch": 2.100426247682398, - "grad_norm": 0.0013135392218828201, - "learning_rate": 0.00019999782445794692, - "loss": 46.0, - "step": 27472 - }, - { - "epoch": 2.1005027046657876, - "grad_norm": 0.002107311738654971, - "learning_rate": 0.00019999782429950446, - "loss": 46.0, - "step": 27473 - }, - { - "epoch": 2.1005791616491774, - "grad_norm": 0.0007449019467458129, - "learning_rate": 0.0001999978241410562, - "loss": 46.0, - "step": 27474 - }, - { - "epoch": 2.1006556186325667, - "grad_norm": 0.0005109795019961894, - "learning_rate": 0.0001999978239826022, - "loss": 46.0, - "step": 27475 - }, - { - "epoch": 2.1007320756159564, - "grad_norm": 0.0011709368554875255, - "learning_rate": 0.00019999782382414236, - "loss": 46.0, - "step": 27476 - }, - { - "epoch": 2.100808532599346, - "grad_norm": 0.0006436691619455814, - "learning_rate": 0.0001999978236656768, - "loss": 46.0, - "step": 27477 - }, - { - "epoch": 2.100884989582736, - "grad_norm": 0.0023157019168138504, - "learning_rate": 0.00019999782350720548, - "loss": 46.0, - "step": 27478 - }, - { - "epoch": 2.1009614465661257, - "grad_norm": 0.0009241512161679566, - "learning_rate": 0.00019999782334872838, - "loss": 46.0, - "step": 27479 - }, - { - "epoch": 2.1010379035495155, - "grad_norm": 0.004700734280049801, - "learning_rate": 0.0001999978231902455, - "loss": 46.0, - "step": 27480 - }, - { - "epoch": 2.1011143605329052, - "grad_norm": 0.0006546802469529212, - "learning_rate": 0.00019999782303175686, - "loss": 46.0, - "step": 27481 - }, - { - "epoch": 2.101190817516295, - "grad_norm": 0.0028919437900185585, - "learning_rate": 0.00019999782287326247, - "loss": 46.0, - "step": 27482 - }, - { - "epoch": 2.1012672744996848, - "grad_norm": 0.00415746821090579, - "learning_rate": 0.00019999782271476227, - "loss": 46.0, - "step": 27483 - }, - { - "epoch": 2.1013437314830745, - "grad_norm": 0.0029722987674176693, - "learning_rate": 0.00019999782255625633, - "loss": 46.0, - "step": 27484 - }, - { - "epoch": 2.101420188466464, - "grad_norm": 0.0011102004209533334, - "learning_rate": 0.0001999978223977446, - "loss": 46.0, - "step": 27485 - }, - { - "epoch": 2.1014966454498536, - "grad_norm": 0.00449033547192812, - "learning_rate": 0.0001999978222392271, - "loss": 46.0, - "step": 27486 - }, - { - "epoch": 2.1015731024332434, - "grad_norm": 0.00145477254409343, - "learning_rate": 0.00019999782208070385, - "loss": 46.0, - "step": 27487 - }, - { - "epoch": 2.101649559416633, - "grad_norm": 0.0008745861705392599, - "learning_rate": 0.00019999782192217484, - "loss": 46.0, - "step": 27488 - }, - { - "epoch": 2.101726016400023, - "grad_norm": 0.003108362201601267, - "learning_rate": 0.00019999782176364006, - "loss": 46.0, - "step": 27489 - }, - { - "epoch": 2.1018024733834126, - "grad_norm": 0.000680555880535394, - "learning_rate": 0.00019999782160509948, - "loss": 46.0, - "step": 27490 - }, - { - "epoch": 2.1018789303668024, - "grad_norm": 0.0017704834463074803, - "learning_rate": 0.00019999782144655316, - "loss": 46.0, - "step": 27491 - }, - { - "epoch": 2.101955387350192, - "grad_norm": 0.0005636248970404267, - "learning_rate": 0.00019999782128800103, - "loss": 46.0, - "step": 27492 - }, - { - "epoch": 2.102031844333582, - "grad_norm": 0.0006046387716196477, - "learning_rate": 0.00019999782112944318, - "loss": 46.0, - "step": 27493 - }, - { - "epoch": 2.1021083013169717, - "grad_norm": 0.001230064663104713, - "learning_rate": 0.00019999782097087954, - "loss": 46.0, - "step": 27494 - }, - { - "epoch": 2.1021847583003614, - "grad_norm": 0.0015667584957554936, - "learning_rate": 0.00019999782081231012, - "loss": 46.0, - "step": 27495 - }, - { - "epoch": 2.102261215283751, - "grad_norm": 0.0008459329837933183, - "learning_rate": 0.00019999782065373493, - "loss": 46.0, - "step": 27496 - }, - { - "epoch": 2.1023376722671405, - "grad_norm": 0.0007667461177334189, - "learning_rate": 0.00019999782049515399, - "loss": 46.0, - "step": 27497 - }, - { - "epoch": 2.1024141292505303, - "grad_norm": 0.0020758130121976137, - "learning_rate": 0.00019999782033656727, - "loss": 46.0, - "step": 27498 - }, - { - "epoch": 2.10249058623392, - "grad_norm": 0.0034676725044846535, - "learning_rate": 0.00019999782017797476, - "loss": 46.0, - "step": 27499 - }, - { - "epoch": 2.10256704321731, - "grad_norm": 0.00042554130777716637, - "learning_rate": 0.00019999782001937653, - "loss": 46.0, - "step": 27500 - }, - { - "epoch": 2.1026435002006996, - "grad_norm": 0.0011001051170751452, - "learning_rate": 0.0001999978198607725, - "loss": 46.0, - "step": 27501 - }, - { - "epoch": 2.1027199571840893, - "grad_norm": 0.0012970971874892712, - "learning_rate": 0.00019999781970216272, - "loss": 46.0, - "step": 27502 - }, - { - "epoch": 2.102796414167479, - "grad_norm": 0.002104746177792549, - "learning_rate": 0.00019999781954354714, - "loss": 46.0, - "step": 27503 - }, - { - "epoch": 2.102872871150869, - "grad_norm": 0.0006146163213998079, - "learning_rate": 0.0001999978193849258, - "loss": 46.0, - "step": 27504 - }, - { - "epoch": 2.1029493281342586, - "grad_norm": 0.00045064990990795195, - "learning_rate": 0.0001999978192262987, - "loss": 46.0, - "step": 27505 - }, - { - "epoch": 2.1030257851176484, - "grad_norm": 0.0005981334252282977, - "learning_rate": 0.0001999978190676658, - "loss": 46.0, - "step": 27506 - }, - { - "epoch": 2.1031022421010377, - "grad_norm": 0.0009634100715629756, - "learning_rate": 0.00019999781890902717, - "loss": 46.0, - "step": 27507 - }, - { - "epoch": 2.1031786990844274, - "grad_norm": 0.0005045508733019233, - "learning_rate": 0.00019999781875038278, - "loss": 46.0, - "step": 27508 - }, - { - "epoch": 2.103255156067817, - "grad_norm": 0.006769275292754173, - "learning_rate": 0.00019999781859173256, - "loss": 46.0, - "step": 27509 - }, - { - "epoch": 2.103331613051207, - "grad_norm": 0.0006632230360992253, - "learning_rate": 0.00019999781843307664, - "loss": 46.0, - "step": 27510 - }, - { - "epoch": 2.1034080700345967, - "grad_norm": 0.0021935333497822285, - "learning_rate": 0.00019999781827441493, - "loss": 46.0, - "step": 27511 - }, - { - "epoch": 2.1034845270179865, - "grad_norm": 0.0008420352824032307, - "learning_rate": 0.00019999781811574742, - "loss": 46.0, - "step": 27512 - }, - { - "epoch": 2.1035609840013763, - "grad_norm": 0.0014697188744321465, - "learning_rate": 0.00019999781795707416, - "loss": 46.0, - "step": 27513 - }, - { - "epoch": 2.103637440984766, - "grad_norm": 0.0006531400722451508, - "learning_rate": 0.00019999781779839513, - "loss": 46.0, - "step": 27514 - }, - { - "epoch": 2.1037138979681558, - "grad_norm": 0.005551649257540703, - "learning_rate": 0.00019999781763971032, - "loss": 46.0, - "step": 27515 - }, - { - "epoch": 2.1037903549515455, - "grad_norm": 0.0033420671243220568, - "learning_rate": 0.00019999781748101974, - "loss": 46.0, - "step": 27516 - }, - { - "epoch": 2.1038668119349353, - "grad_norm": 0.0027969086077064276, - "learning_rate": 0.00019999781732232344, - "loss": 46.0, - "step": 27517 - }, - { - "epoch": 2.1039432689183246, - "grad_norm": 0.00125506438780576, - "learning_rate": 0.00019999781716362135, - "loss": 46.0, - "step": 27518 - }, - { - "epoch": 2.1040197259017144, - "grad_norm": 0.0017572084907442331, - "learning_rate": 0.00019999781700491345, - "loss": 46.0, - "step": 27519 - }, - { - "epoch": 2.104096182885104, - "grad_norm": 0.0008902869885787368, - "learning_rate": 0.0001999978168461998, - "loss": 46.0, - "step": 27520 - }, - { - "epoch": 2.104172639868494, - "grad_norm": 0.0009279965306632221, - "learning_rate": 0.00019999781668748038, - "loss": 46.0, - "step": 27521 - }, - { - "epoch": 2.1042490968518837, - "grad_norm": 0.00045384641271084547, - "learning_rate": 0.00019999781652875522, - "loss": 46.0, - "step": 27522 - }, - { - "epoch": 2.1043255538352734, - "grad_norm": 0.0006511066458187997, - "learning_rate": 0.00019999781637002425, - "loss": 46.0, - "step": 27523 - }, - { - "epoch": 2.104402010818663, - "grad_norm": 0.0017435166519135237, - "learning_rate": 0.0001999978162112875, - "loss": 46.0, - "step": 27524 - }, - { - "epoch": 2.104478467802053, - "grad_norm": 0.0006301420507952571, - "learning_rate": 0.00019999781605254503, - "loss": 46.0, - "step": 27525 - }, - { - "epoch": 2.1045549247854427, - "grad_norm": 0.0015568938106298447, - "learning_rate": 0.00019999781589379677, - "loss": 46.0, - "step": 27526 - }, - { - "epoch": 2.1046313817688325, - "grad_norm": 0.0011748585384339094, - "learning_rate": 0.00019999781573504274, - "loss": 46.0, - "step": 27527 - }, - { - "epoch": 2.1047078387522222, - "grad_norm": 0.0005240196478553116, - "learning_rate": 0.00019999781557628293, - "loss": 46.0, - "step": 27528 - }, - { - "epoch": 2.1047842957356115, - "grad_norm": 0.00037409065407700837, - "learning_rate": 0.00019999781541751738, - "loss": 46.0, - "step": 27529 - }, - { - "epoch": 2.1048607527190013, - "grad_norm": 0.0031022352632135153, - "learning_rate": 0.00019999781525874605, - "loss": 46.0, - "step": 27530 - }, - { - "epoch": 2.104937209702391, - "grad_norm": 0.0019445521757006645, - "learning_rate": 0.00019999781509996893, - "loss": 46.0, - "step": 27531 - }, - { - "epoch": 2.105013666685781, - "grad_norm": 0.001530882902443409, - "learning_rate": 0.00019999781494118608, - "loss": 46.0, - "step": 27532 - }, - { - "epoch": 2.1050901236691706, - "grad_norm": 0.0040167272090911865, - "learning_rate": 0.0001999978147823974, - "loss": 46.0, - "step": 27533 - }, - { - "epoch": 2.1051665806525603, - "grad_norm": 0.0012179792392998934, - "learning_rate": 0.00019999781462360302, - "loss": 46.0, - "step": 27534 - }, - { - "epoch": 2.10524303763595, - "grad_norm": 0.0013874232536181808, - "learning_rate": 0.00019999781446480283, - "loss": 46.0, - "step": 27535 - }, - { - "epoch": 2.10531949461934, - "grad_norm": 0.004843584261834621, - "learning_rate": 0.00019999781430599686, - "loss": 46.0, - "step": 27536 - }, - { - "epoch": 2.1053959516027296, - "grad_norm": 0.003989243879914284, - "learning_rate": 0.00019999781414718515, - "loss": 46.0, - "step": 27537 - }, - { - "epoch": 2.1054724085861194, - "grad_norm": 0.0006604177760891616, - "learning_rate": 0.00019999781398836767, - "loss": 46.0, - "step": 27538 - }, - { - "epoch": 2.105548865569509, - "grad_norm": 0.0009026466286741197, - "learning_rate": 0.00019999781382954438, - "loss": 46.0, - "step": 27539 - }, - { - "epoch": 2.1056253225528985, - "grad_norm": 0.0026330859400331974, - "learning_rate": 0.00019999781367071538, - "loss": 46.0, - "step": 27540 - }, - { - "epoch": 2.1057017795362882, - "grad_norm": 0.001139921136200428, - "learning_rate": 0.00019999781351188057, - "loss": 46.0, - "step": 27541 - }, - { - "epoch": 2.105778236519678, - "grad_norm": 0.0007302314625121653, - "learning_rate": 0.00019999781335304, - "loss": 46.0, - "step": 27542 - }, - { - "epoch": 2.1058546935030678, - "grad_norm": 0.001985919428989291, - "learning_rate": 0.00019999781319419367, - "loss": 46.0, - "step": 27543 - }, - { - "epoch": 2.1059311504864575, - "grad_norm": 0.0006481681484729052, - "learning_rate": 0.00019999781303534154, - "loss": 46.0, - "step": 27544 - }, - { - "epoch": 2.1060076074698473, - "grad_norm": 0.0011985978344455361, - "learning_rate": 0.00019999781287648367, - "loss": 46.0, - "step": 27545 - }, - { - "epoch": 2.106084064453237, - "grad_norm": 0.0006719738594256341, - "learning_rate": 0.00019999781271762003, - "loss": 46.0, - "step": 27546 - }, - { - "epoch": 2.106160521436627, - "grad_norm": 0.0028443941846489906, - "learning_rate": 0.0001999978125587506, - "loss": 46.0, - "step": 27547 - }, - { - "epoch": 2.1062369784200166, - "grad_norm": 0.0007125627598725259, - "learning_rate": 0.00019999781239987542, - "loss": 46.0, - "step": 27548 - }, - { - "epoch": 2.1063134354034063, - "grad_norm": 0.000829347234684974, - "learning_rate": 0.00019999781224099445, - "loss": 46.0, - "step": 27549 - }, - { - "epoch": 2.106389892386796, - "grad_norm": 0.003277435200288892, - "learning_rate": 0.00019999781208210774, - "loss": 46.0, - "step": 27550 - }, - { - "epoch": 2.1064663493701854, - "grad_norm": 0.0051928856410086155, - "learning_rate": 0.00019999781192321525, - "loss": 46.0, - "step": 27551 - }, - { - "epoch": 2.106542806353575, - "grad_norm": 0.0010621713008731604, - "learning_rate": 0.000199997811764317, - "loss": 46.0, - "step": 27552 - }, - { - "epoch": 2.106619263336965, - "grad_norm": 0.0022010935936123133, - "learning_rate": 0.00019999781160541297, - "loss": 46.0, - "step": 27553 - }, - { - "epoch": 2.1066957203203547, - "grad_norm": 0.010605143383145332, - "learning_rate": 0.00019999781144650313, - "loss": 46.0, - "step": 27554 - }, - { - "epoch": 2.1067721773037444, - "grad_norm": 0.001072065089829266, - "learning_rate": 0.00019999781128758758, - "loss": 46.0, - "step": 27555 - }, - { - "epoch": 2.106848634287134, - "grad_norm": 0.002340103732421994, - "learning_rate": 0.00019999781112866623, - "loss": 46.0, - "step": 27556 - }, - { - "epoch": 2.106925091270524, - "grad_norm": 0.0019150434527546167, - "learning_rate": 0.00019999781096973913, - "loss": 46.0, - "step": 27557 - }, - { - "epoch": 2.1070015482539137, - "grad_norm": 0.0013690509367734194, - "learning_rate": 0.00019999781081080626, - "loss": 46.0, - "step": 27558 - }, - { - "epoch": 2.1070780052373035, - "grad_norm": 0.000352658040355891, - "learning_rate": 0.0001999978106518676, - "loss": 46.0, - "step": 27559 - }, - { - "epoch": 2.1071544622206932, - "grad_norm": 0.0019908156245946884, - "learning_rate": 0.0001999978104929232, - "loss": 46.0, - "step": 27560 - }, - { - "epoch": 2.107230919204083, - "grad_norm": 0.005864780396223068, - "learning_rate": 0.000199997810333973, - "loss": 46.0, - "step": 27561 - }, - { - "epoch": 2.1073073761874723, - "grad_norm": 0.000879982253536582, - "learning_rate": 0.00019999781017501704, - "loss": 46.0, - "step": 27562 - }, - { - "epoch": 2.107383833170862, - "grad_norm": 0.0015050513902679086, - "learning_rate": 0.00019999781001605533, - "loss": 46.0, - "step": 27563 - }, - { - "epoch": 2.107460290154252, - "grad_norm": 0.0012313692132011056, - "learning_rate": 0.00019999780985708782, - "loss": 46.0, - "step": 27564 - }, - { - "epoch": 2.1075367471376416, - "grad_norm": 0.00040437025018036366, - "learning_rate": 0.00019999780969811454, - "loss": 46.0, - "step": 27565 - }, - { - "epoch": 2.1076132041210314, - "grad_norm": 0.0006390642956830561, - "learning_rate": 0.00019999780953913553, - "loss": 46.0, - "step": 27566 - }, - { - "epoch": 2.107689661104421, - "grad_norm": 0.000998621340841055, - "learning_rate": 0.0001999978093801507, - "loss": 46.0, - "step": 27567 - }, - { - "epoch": 2.107766118087811, - "grad_norm": 0.0014330863486975431, - "learning_rate": 0.00019999780922116015, - "loss": 46.0, - "step": 27568 - }, - { - "epoch": 2.1078425750712007, - "grad_norm": 0.0013231082120910287, - "learning_rate": 0.0001999978090621638, - "loss": 46.0, - "step": 27569 - }, - { - "epoch": 2.1079190320545904, - "grad_norm": 0.000830391189083457, - "learning_rate": 0.0001999978089031617, - "loss": 46.0, - "step": 27570 - }, - { - "epoch": 2.10799548903798, - "grad_norm": 0.002778175286948681, - "learning_rate": 0.0001999978087441538, - "loss": 46.0, - "step": 27571 - }, - { - "epoch": 2.10807194602137, - "grad_norm": 0.0007058788323774934, - "learning_rate": 0.00019999780858514016, - "loss": 46.0, - "step": 27572 - }, - { - "epoch": 2.1081484030047593, - "grad_norm": 0.0005704647046513855, - "learning_rate": 0.00019999780842612074, - "loss": 46.0, - "step": 27573 - }, - { - "epoch": 2.108224859988149, - "grad_norm": 0.0006529319216497242, - "learning_rate": 0.00019999780826709555, - "loss": 46.0, - "step": 27574 - }, - { - "epoch": 2.1083013169715388, - "grad_norm": 0.010321738198399544, - "learning_rate": 0.00019999780810806459, - "loss": 46.0, - "step": 27575 - }, - { - "epoch": 2.1083777739549285, - "grad_norm": 0.0006926292553544044, - "learning_rate": 0.0001999978079490279, - "loss": 46.0, - "step": 27576 - }, - { - "epoch": 2.1084542309383183, - "grad_norm": 0.0008111309725791216, - "learning_rate": 0.0001999978077899854, - "loss": 46.0, - "step": 27577 - }, - { - "epoch": 2.108530687921708, - "grad_norm": 0.0013768266653642058, - "learning_rate": 0.0001999978076309371, - "loss": 46.0, - "step": 27578 - }, - { - "epoch": 2.108607144905098, - "grad_norm": 0.002121939091011882, - "learning_rate": 0.00019999780747188308, - "loss": 46.0, - "step": 27579 - }, - { - "epoch": 2.1086836018884876, - "grad_norm": 0.0009338922682218254, - "learning_rate": 0.0001999978073128233, - "loss": 46.0, - "step": 27580 - }, - { - "epoch": 2.1087600588718773, - "grad_norm": 0.0012911935336887836, - "learning_rate": 0.0001999978071537577, - "loss": 46.0, - "step": 27581 - }, - { - "epoch": 2.108836515855267, - "grad_norm": 0.0007539521902799606, - "learning_rate": 0.00019999780699468634, - "loss": 46.0, - "step": 27582 - }, - { - "epoch": 2.108912972838657, - "grad_norm": 0.0004395316354930401, - "learning_rate": 0.00019999780683560925, - "loss": 46.0, - "step": 27583 - }, - { - "epoch": 2.108989429822046, - "grad_norm": 0.0007877614698372781, - "learning_rate": 0.00019999780667652637, - "loss": 46.0, - "step": 27584 - }, - { - "epoch": 2.109065886805436, - "grad_norm": 0.00099780666641891, - "learning_rate": 0.0001999978065174377, - "loss": 46.0, - "step": 27585 - }, - { - "epoch": 2.1091423437888257, - "grad_norm": 0.0016647278098389506, - "learning_rate": 0.00019999780635834329, - "loss": 46.0, - "step": 27586 - }, - { - "epoch": 2.1092188007722155, - "grad_norm": 0.004062425810843706, - "learning_rate": 0.00019999780619924312, - "loss": 46.0, - "step": 27587 - }, - { - "epoch": 2.1092952577556052, - "grad_norm": 0.0009334741625934839, - "learning_rate": 0.00019999780604013716, - "loss": 46.0, - "step": 27588 - }, - { - "epoch": 2.109371714738995, - "grad_norm": 0.0007265450549311936, - "learning_rate": 0.00019999780588102542, - "loss": 46.0, - "step": 27589 - }, - { - "epoch": 2.1094481717223847, - "grad_norm": 0.0015771707985550165, - "learning_rate": 0.00019999780572190794, - "loss": 46.0, - "step": 27590 - }, - { - "epoch": 2.1095246287057745, - "grad_norm": 0.0020960504189133644, - "learning_rate": 0.00019999780556278465, - "loss": 46.0, - "step": 27591 - }, - { - "epoch": 2.1096010856891643, - "grad_norm": 0.0007886430830694735, - "learning_rate": 0.00019999780540365562, - "loss": 46.0, - "step": 27592 - }, - { - "epoch": 2.109677542672554, - "grad_norm": 0.0016777862329035997, - "learning_rate": 0.00019999780524452082, - "loss": 46.0, - "step": 27593 - }, - { - "epoch": 2.1097539996559433, - "grad_norm": 0.0007015528390184045, - "learning_rate": 0.00019999780508538024, - "loss": 46.0, - "step": 27594 - }, - { - "epoch": 2.109830456639333, - "grad_norm": 0.0005086369928903878, - "learning_rate": 0.0001999978049262339, - "loss": 46.0, - "step": 27595 - }, - { - "epoch": 2.109906913622723, - "grad_norm": 0.0009788682218641043, - "learning_rate": 0.0001999978047670818, - "loss": 46.0, - "step": 27596 - }, - { - "epoch": 2.1099833706061126, - "grad_norm": 0.0019625723361968994, - "learning_rate": 0.0001999978046079239, - "loss": 46.0, - "step": 27597 - }, - { - "epoch": 2.1100598275895024, - "grad_norm": 0.0022894656285643578, - "learning_rate": 0.00019999780444876025, - "loss": 46.0, - "step": 27598 - }, - { - "epoch": 2.110136284572892, - "grad_norm": 0.003526546061038971, - "learning_rate": 0.00019999780428959084, - "loss": 46.0, - "step": 27599 - }, - { - "epoch": 2.110212741556282, - "grad_norm": 0.004093230236321688, - "learning_rate": 0.00019999780413041562, - "loss": 46.0, - "step": 27600 - }, - { - "epoch": 2.1102891985396717, - "grad_norm": 0.0010514372261241078, - "learning_rate": 0.00019999780397123468, - "loss": 46.0, - "step": 27601 - }, - { - "epoch": 2.1103656555230614, - "grad_norm": 0.0004950147122144699, - "learning_rate": 0.00019999780381204794, - "loss": 46.0, - "step": 27602 - }, - { - "epoch": 2.110442112506451, - "grad_norm": 0.0010895001469179988, - "learning_rate": 0.00019999780365285546, - "loss": 46.0, - "step": 27603 - }, - { - "epoch": 2.110518569489841, - "grad_norm": 0.003022734308615327, - "learning_rate": 0.0001999978034936572, - "loss": 46.0, - "step": 27604 - }, - { - "epoch": 2.1105950264732307, - "grad_norm": 0.002471886109560728, - "learning_rate": 0.00019999780333445315, - "loss": 46.0, - "step": 27605 - }, - { - "epoch": 2.11067148345662, - "grad_norm": 0.002287641167640686, - "learning_rate": 0.00019999780317524332, - "loss": 46.0, - "step": 27606 - }, - { - "epoch": 2.11074794044001, - "grad_norm": 0.002804981777444482, - "learning_rate": 0.00019999780301602774, - "loss": 46.0, - "step": 27607 - }, - { - "epoch": 2.1108243974233996, - "grad_norm": 0.001787775312550366, - "learning_rate": 0.00019999780285680642, - "loss": 46.0, - "step": 27608 - }, - { - "epoch": 2.1109008544067893, - "grad_norm": 0.0010987324640154839, - "learning_rate": 0.0001999978026975793, - "loss": 46.0, - "step": 27609 - }, - { - "epoch": 2.110977311390179, - "grad_norm": 0.001675209030508995, - "learning_rate": 0.00019999780253834642, - "loss": 46.0, - "step": 27610 - }, - { - "epoch": 2.111053768373569, - "grad_norm": 0.00035454094177111983, - "learning_rate": 0.00019999780237910778, - "loss": 46.0, - "step": 27611 - }, - { - "epoch": 2.1111302253569586, - "grad_norm": 0.0007373957778327167, - "learning_rate": 0.00019999780221986334, - "loss": 46.0, - "step": 27612 - }, - { - "epoch": 2.1112066823403484, - "grad_norm": 0.0004193242348264903, - "learning_rate": 0.00019999780206061317, - "loss": 46.0, - "step": 27613 - }, - { - "epoch": 2.111283139323738, - "grad_norm": 0.002905827248468995, - "learning_rate": 0.00019999780190135718, - "loss": 46.0, - "step": 27614 - }, - { - "epoch": 2.111359596307128, - "grad_norm": 0.0011484738206490874, - "learning_rate": 0.00019999780174209545, - "loss": 46.0, - "step": 27615 - }, - { - "epoch": 2.111436053290517, - "grad_norm": 0.0032124840654432774, - "learning_rate": 0.00019999780158282796, - "loss": 46.0, - "step": 27616 - }, - { - "epoch": 2.111512510273907, - "grad_norm": 0.0008766617393121123, - "learning_rate": 0.00019999780142355468, - "loss": 46.0, - "step": 27617 - }, - { - "epoch": 2.1115889672572967, - "grad_norm": 0.0007554818294011056, - "learning_rate": 0.00019999780126427565, - "loss": 46.0, - "step": 27618 - }, - { - "epoch": 2.1116654242406865, - "grad_norm": 0.000889607414137572, - "learning_rate": 0.00019999780110499085, - "loss": 46.0, - "step": 27619 - }, - { - "epoch": 2.1117418812240762, - "grad_norm": 0.0013870007824152708, - "learning_rate": 0.00019999780094570027, - "loss": 46.0, - "step": 27620 - }, - { - "epoch": 2.111818338207466, - "grad_norm": 0.0020611132495105267, - "learning_rate": 0.00019999780078640392, - "loss": 46.0, - "step": 27621 - }, - { - "epoch": 2.1118947951908558, - "grad_norm": 0.006050414405763149, - "learning_rate": 0.0001999978006271018, - "loss": 46.0, - "step": 27622 - }, - { - "epoch": 2.1119712521742455, - "grad_norm": 0.0011397033231332898, - "learning_rate": 0.0001999978004677939, - "loss": 46.0, - "step": 27623 - }, - { - "epoch": 2.1120477091576353, - "grad_norm": 0.007902242243289948, - "learning_rate": 0.00019999780030848028, - "loss": 46.0, - "step": 27624 - }, - { - "epoch": 2.112124166141025, - "grad_norm": 0.0010233998764306307, - "learning_rate": 0.00019999780014916084, - "loss": 46.0, - "step": 27625 - }, - { - "epoch": 2.112200623124415, - "grad_norm": 0.002083076862618327, - "learning_rate": 0.00019999779998983565, - "loss": 46.0, - "step": 27626 - }, - { - "epoch": 2.1122770801078046, - "grad_norm": 0.006393703632056713, - "learning_rate": 0.0001999977998305047, - "loss": 46.0, - "step": 27627 - }, - { - "epoch": 2.112353537091194, - "grad_norm": 0.0009162043570540845, - "learning_rate": 0.00019999779967116795, - "loss": 46.0, - "step": 27628 - }, - { - "epoch": 2.1124299940745837, - "grad_norm": 0.000548450043424964, - "learning_rate": 0.00019999779951182544, - "loss": 46.0, - "step": 27629 - }, - { - "epoch": 2.1125064510579734, - "grad_norm": 0.0003302285913378, - "learning_rate": 0.00019999779935247718, - "loss": 46.0, - "step": 27630 - }, - { - "epoch": 2.112582908041363, - "grad_norm": 0.002078081015497446, - "learning_rate": 0.00019999779919312313, - "loss": 46.0, - "step": 27631 - }, - { - "epoch": 2.112659365024753, - "grad_norm": 0.0022325501777231693, - "learning_rate": 0.00019999779903376332, - "loss": 46.0, - "step": 27632 - }, - { - "epoch": 2.1127358220081427, - "grad_norm": 0.0017688550287857652, - "learning_rate": 0.00019999779887439775, - "loss": 46.0, - "step": 27633 - }, - { - "epoch": 2.1128122789915325, - "grad_norm": 0.0003088069788645953, - "learning_rate": 0.00019999779871502643, - "loss": 46.0, - "step": 27634 - }, - { - "epoch": 2.112888735974922, - "grad_norm": 0.0013076105387881398, - "learning_rate": 0.00019999779855564928, - "loss": 46.0, - "step": 27635 - }, - { - "epoch": 2.112965192958312, - "grad_norm": 0.0015641204081475735, - "learning_rate": 0.0001999977983962664, - "loss": 46.0, - "step": 27636 - }, - { - "epoch": 2.1130416499417017, - "grad_norm": 0.008307659067213535, - "learning_rate": 0.00019999779823687774, - "loss": 46.0, - "step": 27637 - }, - { - "epoch": 2.113118106925091, - "grad_norm": 0.013898754492402077, - "learning_rate": 0.00019999779807748332, - "loss": 46.0, - "step": 27638 - }, - { - "epoch": 2.113194563908481, - "grad_norm": 0.0004593391786329448, - "learning_rate": 0.00019999779791808313, - "loss": 46.0, - "step": 27639 - }, - { - "epoch": 2.1132710208918706, - "grad_norm": 0.0010538318892940879, - "learning_rate": 0.00019999779775867714, - "loss": 46.0, - "step": 27640 - }, - { - "epoch": 2.1133474778752603, - "grad_norm": 0.0004920208011753857, - "learning_rate": 0.00019999779759926543, - "loss": 46.0, - "step": 27641 - }, - { - "epoch": 2.11342393485865, - "grad_norm": 0.002719099400565028, - "learning_rate": 0.00019999779743984792, - "loss": 46.0, - "step": 27642 - }, - { - "epoch": 2.11350039184204, - "grad_norm": 0.001338088302873075, - "learning_rate": 0.00019999779728042464, - "loss": 46.0, - "step": 27643 - }, - { - "epoch": 2.1135768488254296, - "grad_norm": 0.001461145468056202, - "learning_rate": 0.0001999977971209956, - "loss": 46.0, - "step": 27644 - }, - { - "epoch": 2.1136533058088194, - "grad_norm": 0.0006191398133523762, - "learning_rate": 0.00019999779696156083, - "loss": 46.0, - "step": 27645 - }, - { - "epoch": 2.113729762792209, - "grad_norm": 0.0007986398413777351, - "learning_rate": 0.00019999779680212023, - "loss": 46.0, - "step": 27646 - }, - { - "epoch": 2.113806219775599, - "grad_norm": 0.0018254269380122423, - "learning_rate": 0.00019999779664267388, - "loss": 46.0, - "step": 27647 - }, - { - "epoch": 2.1138826767589887, - "grad_norm": 0.0023519706446677446, - "learning_rate": 0.00019999779648322176, - "loss": 46.0, - "step": 27648 - }, - { - "epoch": 2.113959133742378, - "grad_norm": 0.0011385384714230895, - "learning_rate": 0.00019999779632376386, - "loss": 46.0, - "step": 27649 - }, - { - "epoch": 2.1140355907257677, - "grad_norm": 0.0014540132833644748, - "learning_rate": 0.00019999779616430022, - "loss": 46.0, - "step": 27650 - }, - { - "epoch": 2.1141120477091575, - "grad_norm": 0.0033372421748936176, - "learning_rate": 0.0001999977960048308, - "loss": 46.0, - "step": 27651 - }, - { - "epoch": 2.1141885046925473, - "grad_norm": 0.00061256461776793, - "learning_rate": 0.00019999779584535562, - "loss": 46.0, - "step": 27652 - }, - { - "epoch": 2.114264961675937, - "grad_norm": 0.0029663462191820145, - "learning_rate": 0.00019999779568587465, - "loss": 46.0, - "step": 27653 - }, - { - "epoch": 2.114341418659327, - "grad_norm": 0.004290277138352394, - "learning_rate": 0.00019999779552638792, - "loss": 46.0, - "step": 27654 - }, - { - "epoch": 2.1144178756427165, - "grad_norm": 0.0018869865452870727, - "learning_rate": 0.0001999977953668954, - "loss": 46.0, - "step": 27655 - }, - { - "epoch": 2.1144943326261063, - "grad_norm": 0.0005279462784528732, - "learning_rate": 0.00019999779520739713, - "loss": 46.0, - "step": 27656 - }, - { - "epoch": 2.114570789609496, - "grad_norm": 0.0009307373547926545, - "learning_rate": 0.00019999779504789307, - "loss": 46.0, - "step": 27657 - }, - { - "epoch": 2.114647246592886, - "grad_norm": 0.0015755825443193316, - "learning_rate": 0.00019999779488838327, - "loss": 46.0, - "step": 27658 - }, - { - "epoch": 2.1147237035762756, - "grad_norm": 0.0016170954331755638, - "learning_rate": 0.0001999977947288677, - "loss": 46.0, - "step": 27659 - }, - { - "epoch": 2.114800160559665, - "grad_norm": 0.0008384857792407274, - "learning_rate": 0.00019999779456934634, - "loss": 46.0, - "step": 27660 - }, - { - "epoch": 2.1148766175430547, - "grad_norm": 0.0005755027523264289, - "learning_rate": 0.0001999977944098192, - "loss": 46.0, - "step": 27661 - }, - { - "epoch": 2.1149530745264444, - "grad_norm": 0.0016407616203650832, - "learning_rate": 0.00019999779425028632, - "loss": 46.0, - "step": 27662 - }, - { - "epoch": 2.115029531509834, - "grad_norm": 0.0009915753034874797, - "learning_rate": 0.00019999779409074768, - "loss": 46.0, - "step": 27663 - }, - { - "epoch": 2.115105988493224, - "grad_norm": 0.000602319254539907, - "learning_rate": 0.00019999779393120324, - "loss": 46.0, - "step": 27664 - }, - { - "epoch": 2.1151824454766137, - "grad_norm": 0.0016069767298176885, - "learning_rate": 0.00019999779377165305, - "loss": 46.0, - "step": 27665 - }, - { - "epoch": 2.1152589024600035, - "grad_norm": 0.004144642502069473, - "learning_rate": 0.00019999779361209706, - "loss": 46.0, - "step": 27666 - }, - { - "epoch": 2.1153353594433932, - "grad_norm": 0.0019507389515638351, - "learning_rate": 0.00019999779345253535, - "loss": 46.0, - "step": 27667 - }, - { - "epoch": 2.115411816426783, - "grad_norm": 0.0011293807765468955, - "learning_rate": 0.00019999779329296784, - "loss": 46.0, - "step": 27668 - }, - { - "epoch": 2.1154882734101728, - "grad_norm": 0.001420993241481483, - "learning_rate": 0.00019999779313339456, - "loss": 46.0, - "step": 27669 - }, - { - "epoch": 2.1155647303935625, - "grad_norm": 0.0008896767976693809, - "learning_rate": 0.00019999779297381553, - "loss": 46.0, - "step": 27670 - }, - { - "epoch": 2.115641187376952, - "grad_norm": 0.0018266815459355712, - "learning_rate": 0.0001999977928142307, - "loss": 46.0, - "step": 27671 - }, - { - "epoch": 2.1157176443603416, - "grad_norm": 0.0008133428054861724, - "learning_rate": 0.00019999779265464013, - "loss": 46.0, - "step": 27672 - }, - { - "epoch": 2.1157941013437314, - "grad_norm": 0.00037843038444407284, - "learning_rate": 0.00019999779249504375, - "loss": 46.0, - "step": 27673 - }, - { - "epoch": 2.115870558327121, - "grad_norm": 0.0010723500745370984, - "learning_rate": 0.00019999779233544166, - "loss": 46.0, - "step": 27674 - }, - { - "epoch": 2.115947015310511, - "grad_norm": 0.000778036832343787, - "learning_rate": 0.00019999779217583376, - "loss": 46.0, - "step": 27675 - }, - { - "epoch": 2.1160234722939006, - "grad_norm": 0.0008204008336178958, - "learning_rate": 0.00019999779201622012, - "loss": 46.0, - "step": 27676 - }, - { - "epoch": 2.1160999292772904, - "grad_norm": 0.004079892765730619, - "learning_rate": 0.00019999779185660068, - "loss": 46.0, - "step": 27677 - }, - { - "epoch": 2.11617638626068, - "grad_norm": 0.0034579394850879908, - "learning_rate": 0.0001999977916969755, - "loss": 46.0, - "step": 27678 - }, - { - "epoch": 2.11625284324407, - "grad_norm": 0.0021807660814374685, - "learning_rate": 0.0001999977915373445, - "loss": 46.0, - "step": 27679 - }, - { - "epoch": 2.1163293002274597, - "grad_norm": 0.0017785833915695548, - "learning_rate": 0.00019999779137770777, - "loss": 46.0, - "step": 27680 - }, - { - "epoch": 2.1164057572108494, - "grad_norm": 0.001374895335175097, - "learning_rate": 0.00019999779121806528, - "loss": 46.0, - "step": 27681 - }, - { - "epoch": 2.1164822141942388, - "grad_norm": 0.0007049769628793001, - "learning_rate": 0.00019999779105841698, - "loss": 46.0, - "step": 27682 - }, - { - "epoch": 2.1165586711776285, - "grad_norm": 0.002382354810833931, - "learning_rate": 0.00019999779089876295, - "loss": 46.0, - "step": 27683 - }, - { - "epoch": 2.1166351281610183, - "grad_norm": 0.0006863875314593315, - "learning_rate": 0.00019999779073910312, - "loss": 46.0, - "step": 27684 - }, - { - "epoch": 2.116711585144408, - "grad_norm": 0.0005150326760485768, - "learning_rate": 0.00019999779057943754, - "loss": 46.0, - "step": 27685 - }, - { - "epoch": 2.116788042127798, - "grad_norm": 0.0013440201291814446, - "learning_rate": 0.0001999977904197662, - "loss": 46.0, - "step": 27686 - }, - { - "epoch": 2.1168644991111876, - "grad_norm": 0.0017601497238501906, - "learning_rate": 0.00019999779026008907, - "loss": 46.0, - "step": 27687 - }, - { - "epoch": 2.1169409560945773, - "grad_norm": 0.0005629589431919158, - "learning_rate": 0.00019999779010040618, - "loss": 46.0, - "step": 27688 - }, - { - "epoch": 2.117017413077967, - "grad_norm": 0.000687082763761282, - "learning_rate": 0.0001999977899407175, - "loss": 46.0, - "step": 27689 - }, - { - "epoch": 2.117093870061357, - "grad_norm": 0.0006971581024117768, - "learning_rate": 0.0001999977897810231, - "loss": 46.0, - "step": 27690 - }, - { - "epoch": 2.1171703270447466, - "grad_norm": 0.0018247404368594289, - "learning_rate": 0.0001999977896213229, - "loss": 46.0, - "step": 27691 - }, - { - "epoch": 2.1172467840281364, - "grad_norm": 0.002912582363933325, - "learning_rate": 0.00019999778946161692, - "loss": 46.0, - "step": 27692 - }, - { - "epoch": 2.1173232410115257, - "grad_norm": 0.0009077168069779873, - "learning_rate": 0.00019999778930190518, - "loss": 46.0, - "step": 27693 - }, - { - "epoch": 2.1173996979949155, - "grad_norm": 0.00042187661165371537, - "learning_rate": 0.00019999778914218767, - "loss": 46.0, - "step": 27694 - }, - { - "epoch": 2.117476154978305, - "grad_norm": 0.0013393854023888707, - "learning_rate": 0.0001999977889824644, - "loss": 46.0, - "step": 27695 - }, - { - "epoch": 2.117552611961695, - "grad_norm": 0.0012687164125964046, - "learning_rate": 0.00019999778882273534, - "loss": 46.0, - "step": 27696 - }, - { - "epoch": 2.1176290689450847, - "grad_norm": 0.0006825268501415849, - "learning_rate": 0.00019999778866300054, - "loss": 46.0, - "step": 27697 - }, - { - "epoch": 2.1177055259284745, - "grad_norm": 0.0008617136627435684, - "learning_rate": 0.00019999778850325996, - "loss": 46.0, - "step": 27698 - }, - { - "epoch": 2.1177819829118643, - "grad_norm": 0.0017924109706655145, - "learning_rate": 0.00019999778834351361, - "loss": 46.0, - "step": 27699 - }, - { - "epoch": 2.117858439895254, - "grad_norm": 0.00128801423124969, - "learning_rate": 0.0001999977881837615, - "loss": 46.0, - "step": 27700 - }, - { - "epoch": 2.117934896878644, - "grad_norm": 0.0009495370904915035, - "learning_rate": 0.0001999977880240036, - "loss": 46.0, - "step": 27701 - }, - { - "epoch": 2.1180113538620335, - "grad_norm": 0.003119442379102111, - "learning_rate": 0.00019999778786423993, - "loss": 46.0, - "step": 27702 - }, - { - "epoch": 2.1180878108454233, - "grad_norm": 0.0013680238043889403, - "learning_rate": 0.0001999977877044705, - "loss": 46.0, - "step": 27703 - }, - { - "epoch": 2.1181642678288126, - "grad_norm": 0.0006327729788608849, - "learning_rate": 0.0001999977875446953, - "loss": 46.0, - "step": 27704 - }, - { - "epoch": 2.1182407248122024, - "grad_norm": 0.0007384635391645133, - "learning_rate": 0.00019999778738491434, - "loss": 46.0, - "step": 27705 - }, - { - "epoch": 2.118317181795592, - "grad_norm": 0.0009168931283056736, - "learning_rate": 0.00019999778722512758, - "loss": 46.0, - "step": 27706 - }, - { - "epoch": 2.118393638778982, - "grad_norm": 0.001216028118506074, - "learning_rate": 0.00019999778706533507, - "loss": 46.0, - "step": 27707 - }, - { - "epoch": 2.1184700957623717, - "grad_norm": 0.001020344439893961, - "learning_rate": 0.00019999778690553681, - "loss": 46.0, - "step": 27708 - }, - { - "epoch": 2.1185465527457614, - "grad_norm": 0.0010994928888976574, - "learning_rate": 0.00019999778674573276, - "loss": 46.0, - "step": 27709 - }, - { - "epoch": 2.118623009729151, - "grad_norm": 0.0005078541580587626, - "learning_rate": 0.00019999778658592293, - "loss": 46.0, - "step": 27710 - }, - { - "epoch": 2.118699466712541, - "grad_norm": 0.0018513964023441076, - "learning_rate": 0.00019999778642610733, - "loss": 46.0, - "step": 27711 - }, - { - "epoch": 2.1187759236959307, - "grad_norm": 0.0009828828042373061, - "learning_rate": 0.00019999778626628598, - "loss": 46.0, - "step": 27712 - }, - { - "epoch": 2.1188523806793205, - "grad_norm": 0.0010659564286470413, - "learning_rate": 0.00019999778610645886, - "loss": 46.0, - "step": 27713 - }, - { - "epoch": 2.1189288376627102, - "grad_norm": 0.002268376061692834, - "learning_rate": 0.00019999778594662597, - "loss": 46.0, - "step": 27714 - }, - { - "epoch": 2.1190052946460995, - "grad_norm": 0.002673167735338211, - "learning_rate": 0.00019999778578678733, - "loss": 46.0, - "step": 27715 - }, - { - "epoch": 2.1190817516294893, - "grad_norm": 0.0028183767572045326, - "learning_rate": 0.00019999778562694286, - "loss": 46.0, - "step": 27716 - }, - { - "epoch": 2.119158208612879, - "grad_norm": 0.0013293303782120347, - "learning_rate": 0.0001999977854670927, - "loss": 46.0, - "step": 27717 - }, - { - "epoch": 2.119234665596269, - "grad_norm": 0.0037902083713561296, - "learning_rate": 0.0001999977853072367, - "loss": 46.0, - "step": 27718 - }, - { - "epoch": 2.1193111225796586, - "grad_norm": 0.001904433243907988, - "learning_rate": 0.00019999778514737497, - "loss": 46.0, - "step": 27719 - }, - { - "epoch": 2.1193875795630484, - "grad_norm": 0.0004010601551271975, - "learning_rate": 0.00019999778498750744, - "loss": 46.0, - "step": 27720 - }, - { - "epoch": 2.119464036546438, - "grad_norm": 0.0017100354889407754, - "learning_rate": 0.00019999778482763416, - "loss": 46.0, - "step": 27721 - }, - { - "epoch": 2.119540493529828, - "grad_norm": 0.0010594935156404972, - "learning_rate": 0.0001999977846677551, - "loss": 46.0, - "step": 27722 - }, - { - "epoch": 2.1196169505132176, - "grad_norm": 0.0007256284006871283, - "learning_rate": 0.0001999977845078703, - "loss": 46.0, - "step": 27723 - }, - { - "epoch": 2.1196934074966074, - "grad_norm": 0.002242007292807102, - "learning_rate": 0.0001999977843479797, - "loss": 46.0, - "step": 27724 - }, - { - "epoch": 2.119769864479997, - "grad_norm": 0.0021430812776088715, - "learning_rate": 0.00019999778418808338, - "loss": 46.0, - "step": 27725 - }, - { - "epoch": 2.1198463214633865, - "grad_norm": 0.0014775723684579134, - "learning_rate": 0.00019999778402818123, - "loss": 46.0, - "step": 27726 - }, - { - "epoch": 2.1199227784467762, - "grad_norm": 0.0010617939988151193, - "learning_rate": 0.00019999778386827334, - "loss": 46.0, - "step": 27727 - }, - { - "epoch": 2.119999235430166, - "grad_norm": 0.0015301000094041228, - "learning_rate": 0.00019999778370835967, - "loss": 46.0, - "step": 27728 - }, - { - "epoch": 2.1200756924135558, - "grad_norm": 0.0012853695079684258, - "learning_rate": 0.00019999778354844026, - "loss": 46.0, - "step": 27729 - }, - { - "epoch": 2.1201521493969455, - "grad_norm": 0.0005558666889555752, - "learning_rate": 0.00019999778338851504, - "loss": 46.0, - "step": 27730 - }, - { - "epoch": 2.1202286063803353, - "grad_norm": 0.0022962314542382956, - "learning_rate": 0.00019999778322858406, - "loss": 46.0, - "step": 27731 - }, - { - "epoch": 2.120305063363725, - "grad_norm": 0.0018421991262584925, - "learning_rate": 0.00019999778306864735, - "loss": 46.0, - "step": 27732 - }, - { - "epoch": 2.120381520347115, - "grad_norm": 0.0019613695330917835, - "learning_rate": 0.00019999778290870484, - "loss": 46.0, - "step": 27733 - }, - { - "epoch": 2.1204579773305046, - "grad_norm": 0.0026510327588766813, - "learning_rate": 0.00019999778274875653, - "loss": 46.0, - "step": 27734 - }, - { - "epoch": 2.1205344343138943, - "grad_norm": 0.020627731457352638, - "learning_rate": 0.00019999778258880248, - "loss": 46.0, - "step": 27735 - }, - { - "epoch": 2.120610891297284, - "grad_norm": 0.007026944309473038, - "learning_rate": 0.00019999778242884268, - "loss": 46.0, - "step": 27736 - }, - { - "epoch": 2.1206873482806734, - "grad_norm": 0.0020663077011704445, - "learning_rate": 0.00019999778226887708, - "loss": 46.0, - "step": 27737 - }, - { - "epoch": 2.120763805264063, - "grad_norm": 0.0009447532356716692, - "learning_rate": 0.0001999977821089057, - "loss": 46.0, - "step": 27738 - }, - { - "epoch": 2.120840262247453, - "grad_norm": 0.0006349161267280579, - "learning_rate": 0.00019999778194892858, - "loss": 46.0, - "step": 27739 - }, - { - "epoch": 2.1209167192308427, - "grad_norm": 0.0012721461243927479, - "learning_rate": 0.00019999778178894572, - "loss": 46.0, - "step": 27740 - }, - { - "epoch": 2.1209931762142324, - "grad_norm": 0.0034698189701884985, - "learning_rate": 0.00019999778162895705, - "loss": 46.0, - "step": 27741 - }, - { - "epoch": 2.121069633197622, - "grad_norm": 0.0009711988968774676, - "learning_rate": 0.0001999977814689626, - "loss": 46.0, - "step": 27742 - }, - { - "epoch": 2.121146090181012, - "grad_norm": 0.0008573653176426888, - "learning_rate": 0.0001999977813089624, - "loss": 46.0, - "step": 27743 - }, - { - "epoch": 2.1212225471644017, - "grad_norm": 0.0014904525596648455, - "learning_rate": 0.0001999977811489564, - "loss": 46.0, - "step": 27744 - }, - { - "epoch": 2.1212990041477915, - "grad_norm": 0.002192295854911208, - "learning_rate": 0.00019999778098894467, - "loss": 46.0, - "step": 27745 - }, - { - "epoch": 2.1213754611311813, - "grad_norm": 0.00025476867449469864, - "learning_rate": 0.00019999778082892714, - "loss": 46.0, - "step": 27746 - }, - { - "epoch": 2.1214519181145706, - "grad_norm": 0.0009082362521439791, - "learning_rate": 0.0001999977806689039, - "loss": 46.0, - "step": 27747 - }, - { - "epoch": 2.1215283750979603, - "grad_norm": 0.0007706684991717339, - "learning_rate": 0.00019999778050887483, - "loss": 46.0, - "step": 27748 - }, - { - "epoch": 2.12160483208135, - "grad_norm": 0.0011777285253629088, - "learning_rate": 0.00019999778034884003, - "loss": 46.0, - "step": 27749 - }, - { - "epoch": 2.12168128906474, - "grad_norm": 0.0007248474285006523, - "learning_rate": 0.00019999778018879943, - "loss": 46.0, - "step": 27750 - }, - { - "epoch": 2.1217577460481296, - "grad_norm": 0.0019676508381962776, - "learning_rate": 0.00019999778002875306, - "loss": 46.0, - "step": 27751 - }, - { - "epoch": 2.1218342030315194, - "grad_norm": 0.0009099239250645041, - "learning_rate": 0.00019999777986870094, - "loss": 46.0, - "step": 27752 - }, - { - "epoch": 2.121910660014909, - "grad_norm": 0.0005753159057348967, - "learning_rate": 0.00019999777970864304, - "loss": 46.0, - "step": 27753 - }, - { - "epoch": 2.121987116998299, - "grad_norm": 0.001207655994221568, - "learning_rate": 0.00019999777954857938, - "loss": 46.0, - "step": 27754 - }, - { - "epoch": 2.1220635739816887, - "grad_norm": 0.0003887069469783455, - "learning_rate": 0.00019999777938850994, - "loss": 46.0, - "step": 27755 - }, - { - "epoch": 2.1221400309650784, - "grad_norm": 0.007342228665947914, - "learning_rate": 0.00019999777922843475, - "loss": 46.0, - "step": 27756 - }, - { - "epoch": 2.122216487948468, - "grad_norm": 0.0007428711396642029, - "learning_rate": 0.00019999777906835376, - "loss": 46.0, - "step": 27757 - }, - { - "epoch": 2.122292944931858, - "grad_norm": 0.0020686613861471415, - "learning_rate": 0.000199997778908267, - "loss": 46.0, - "step": 27758 - }, - { - "epoch": 2.1223694019152473, - "grad_norm": 0.0007537698256783187, - "learning_rate": 0.0001999977787481745, - "loss": 46.0, - "step": 27759 - }, - { - "epoch": 2.122445858898637, - "grad_norm": 0.0020727405790239573, - "learning_rate": 0.00019999777858807622, - "loss": 46.0, - "step": 27760 - }, - { - "epoch": 2.1225223158820268, - "grad_norm": 0.0022943192161619663, - "learning_rate": 0.00019999777842797216, - "loss": 46.0, - "step": 27761 - }, - { - "epoch": 2.1225987728654165, - "grad_norm": 0.001227724482305348, - "learning_rate": 0.00019999777826786234, - "loss": 46.0, - "step": 27762 - }, - { - "epoch": 2.1226752298488063, - "grad_norm": 0.00046310090692713857, - "learning_rate": 0.00019999777810774676, - "loss": 46.0, - "step": 27763 - }, - { - "epoch": 2.122751686832196, - "grad_norm": 0.00034540367778390646, - "learning_rate": 0.00019999777794762536, - "loss": 46.0, - "step": 27764 - }, - { - "epoch": 2.122828143815586, - "grad_norm": 0.0015350074972957373, - "learning_rate": 0.00019999777778749827, - "loss": 46.0, - "step": 27765 - }, - { - "epoch": 2.1229046007989756, - "grad_norm": 0.00046227211714722216, - "learning_rate": 0.00019999777762736535, - "loss": 46.0, - "step": 27766 - }, - { - "epoch": 2.1229810577823653, - "grad_norm": 0.0007318872376345098, - "learning_rate": 0.00019999777746722668, - "loss": 46.0, - "step": 27767 - }, - { - "epoch": 2.123057514765755, - "grad_norm": 0.005794634576886892, - "learning_rate": 0.00019999777730708224, - "loss": 46.0, - "step": 27768 - }, - { - "epoch": 2.1231339717491444, - "grad_norm": 0.001248258980922401, - "learning_rate": 0.00019999777714693203, - "loss": 46.0, - "step": 27769 - }, - { - "epoch": 2.123210428732534, - "grad_norm": 0.0023213066160678864, - "learning_rate": 0.00019999777698677607, - "loss": 46.0, - "step": 27770 - }, - { - "epoch": 2.123286885715924, - "grad_norm": 0.0003884672769345343, - "learning_rate": 0.0001999977768266143, - "loss": 46.0, - "step": 27771 - }, - { - "epoch": 2.1233633426993137, - "grad_norm": 0.0006161300116218626, - "learning_rate": 0.0001999977766664468, - "loss": 46.0, - "step": 27772 - }, - { - "epoch": 2.1234397996827035, - "grad_norm": 0.0007281790021806955, - "learning_rate": 0.00019999777650627355, - "loss": 46.0, - "step": 27773 - }, - { - "epoch": 2.1235162566660932, - "grad_norm": 0.0017733452841639519, - "learning_rate": 0.00019999777634609447, - "loss": 46.0, - "step": 27774 - }, - { - "epoch": 2.123592713649483, - "grad_norm": 0.0006097950972616673, - "learning_rate": 0.00019999777618590964, - "loss": 46.0, - "step": 27775 - }, - { - "epoch": 2.1236691706328727, - "grad_norm": 0.0017350472044199705, - "learning_rate": 0.00019999777602571904, - "loss": 46.0, - "step": 27776 - }, - { - "epoch": 2.1237456276162625, - "grad_norm": 0.0023962666746228933, - "learning_rate": 0.0001999977758655227, - "loss": 46.0, - "step": 27777 - }, - { - "epoch": 2.1238220845996523, - "grad_norm": 0.001431509735994041, - "learning_rate": 0.00019999777570532058, - "loss": 46.0, - "step": 27778 - }, - { - "epoch": 2.123898541583042, - "grad_norm": 0.0013171338941901922, - "learning_rate": 0.00019999777554511266, - "loss": 46.0, - "step": 27779 - }, - { - "epoch": 2.1239749985664313, - "grad_norm": 0.0022890930995345116, - "learning_rate": 0.000199997775384899, - "loss": 46.0, - "step": 27780 - }, - { - "epoch": 2.124051455549821, - "grad_norm": 0.0018152977572754025, - "learning_rate": 0.00019999777522467955, - "loss": 46.0, - "step": 27781 - }, - { - "epoch": 2.124127912533211, - "grad_norm": 0.0008151123765856028, - "learning_rate": 0.00019999777506445434, - "loss": 46.0, - "step": 27782 - }, - { - "epoch": 2.1242043695166006, - "grad_norm": 0.0008217376307584345, - "learning_rate": 0.00019999777490422338, - "loss": 46.0, - "step": 27783 - }, - { - "epoch": 2.1242808264999904, - "grad_norm": 0.0020327442325651646, - "learning_rate": 0.00019999777474398665, - "loss": 46.0, - "step": 27784 - }, - { - "epoch": 2.12435728348338, - "grad_norm": 0.0020759673789143562, - "learning_rate": 0.0001999977745837441, - "loss": 46.0, - "step": 27785 - }, - { - "epoch": 2.12443374046677, - "grad_norm": 0.0007270768983289599, - "learning_rate": 0.00019999777442349583, - "loss": 46.0, - "step": 27786 - }, - { - "epoch": 2.1245101974501597, - "grad_norm": 0.0008436166681349277, - "learning_rate": 0.00019999777426324178, - "loss": 46.0, - "step": 27787 - }, - { - "epoch": 2.1245866544335494, - "grad_norm": 0.0007066066027618945, - "learning_rate": 0.00019999777410298195, - "loss": 46.0, - "step": 27788 - }, - { - "epoch": 2.124663111416939, - "grad_norm": 0.002340213628485799, - "learning_rate": 0.00019999777394271635, - "loss": 46.0, - "step": 27789 - }, - { - "epoch": 2.124739568400329, - "grad_norm": 0.0012920309090986848, - "learning_rate": 0.000199997773782445, - "loss": 46.0, - "step": 27790 - }, - { - "epoch": 2.1248160253837183, - "grad_norm": 0.002201956696808338, - "learning_rate": 0.00019999777362216786, - "loss": 46.0, - "step": 27791 - }, - { - "epoch": 2.124892482367108, - "grad_norm": 0.0011296358425170183, - "learning_rate": 0.00019999777346188497, - "loss": 46.0, - "step": 27792 - }, - { - "epoch": 2.124968939350498, - "grad_norm": 0.0017244359478354454, - "learning_rate": 0.0001999977733015963, - "loss": 46.0, - "step": 27793 - }, - { - "epoch": 2.1250453963338876, - "grad_norm": 0.001204316969960928, - "learning_rate": 0.00019999777314130186, - "loss": 46.0, - "step": 27794 - }, - { - "epoch": 2.1251218533172773, - "grad_norm": 0.0012584576616063714, - "learning_rate": 0.00019999777298100165, - "loss": 46.0, - "step": 27795 - }, - { - "epoch": 2.125198310300667, - "grad_norm": 0.003922926727682352, - "learning_rate": 0.00019999777282069564, - "loss": 46.0, - "step": 27796 - }, - { - "epoch": 2.125274767284057, - "grad_norm": 0.0008311591227538884, - "learning_rate": 0.0001999977726603839, - "loss": 46.0, - "step": 27797 - }, - { - "epoch": 2.1253512242674466, - "grad_norm": 0.0012705157278105617, - "learning_rate": 0.0001999977725000664, - "loss": 46.0, - "step": 27798 - }, - { - "epoch": 2.1254276812508364, - "grad_norm": 0.0012697366764768958, - "learning_rate": 0.0001999977723397431, - "loss": 46.0, - "step": 27799 - }, - { - "epoch": 2.125504138234226, - "grad_norm": 0.002598720835521817, - "learning_rate": 0.00019999777217941404, - "loss": 46.0, - "step": 27800 - }, - { - "epoch": 2.125580595217616, - "grad_norm": 0.0006474059773609042, - "learning_rate": 0.00019999777201907924, - "loss": 46.0, - "step": 27801 - }, - { - "epoch": 2.125657052201005, - "grad_norm": 0.002168721053749323, - "learning_rate": 0.00019999777185873864, - "loss": 46.0, - "step": 27802 - }, - { - "epoch": 2.125733509184395, - "grad_norm": 0.0018245772225782275, - "learning_rate": 0.00019999777169839227, - "loss": 46.0, - "step": 27803 - }, - { - "epoch": 2.1258099661677847, - "grad_norm": 0.0015790582401677966, - "learning_rate": 0.00019999777153804012, - "loss": 46.0, - "step": 27804 - }, - { - "epoch": 2.1258864231511745, - "grad_norm": 0.0008621436427347362, - "learning_rate": 0.00019999777137768223, - "loss": 46.0, - "step": 27805 - }, - { - "epoch": 2.1259628801345642, - "grad_norm": 0.0017095734365284443, - "learning_rate": 0.00019999777121731857, - "loss": 46.0, - "step": 27806 - }, - { - "epoch": 2.126039337117954, - "grad_norm": 0.0005265677464194596, - "learning_rate": 0.00019999777105694913, - "loss": 46.0, - "step": 27807 - }, - { - "epoch": 2.1261157941013438, - "grad_norm": 0.0013383154291659594, - "learning_rate": 0.0001999977708965739, - "loss": 46.0, - "step": 27808 - }, - { - "epoch": 2.1261922510847335, - "grad_norm": 0.0014789337292313576, - "learning_rate": 0.00019999777073619296, - "loss": 46.0, - "step": 27809 - }, - { - "epoch": 2.1262687080681233, - "grad_norm": 0.001086424570530653, - "learning_rate": 0.00019999777057580617, - "loss": 46.0, - "step": 27810 - }, - { - "epoch": 2.126345165051513, - "grad_norm": 0.0009949494851753116, - "learning_rate": 0.00019999777041541366, - "loss": 46.0, - "step": 27811 - }, - { - "epoch": 2.126421622034903, - "grad_norm": 0.0010510762222111225, - "learning_rate": 0.00019999777025501536, - "loss": 46.0, - "step": 27812 - }, - { - "epoch": 2.126498079018292, - "grad_norm": 0.0009002738515846431, - "learning_rate": 0.0001999977700946113, - "loss": 46.0, - "step": 27813 - }, - { - "epoch": 2.126574536001682, - "grad_norm": 0.0006245758268050849, - "learning_rate": 0.0001999977699342015, - "loss": 46.0, - "step": 27814 - }, - { - "epoch": 2.1266509929850717, - "grad_norm": 0.0006332048214972019, - "learning_rate": 0.0001999977697737859, - "loss": 46.0, - "step": 27815 - }, - { - "epoch": 2.1267274499684614, - "grad_norm": 0.0009491315577179193, - "learning_rate": 0.00019999776961336454, - "loss": 46.0, - "step": 27816 - }, - { - "epoch": 2.126803906951851, - "grad_norm": 0.0008021583780646324, - "learning_rate": 0.0001999977694529374, - "loss": 46.0, - "step": 27817 - }, - { - "epoch": 2.126880363935241, - "grad_norm": 0.008494246751070023, - "learning_rate": 0.0001999977692925045, - "loss": 46.0, - "step": 27818 - }, - { - "epoch": 2.1269568209186307, - "grad_norm": 0.0004216600500512868, - "learning_rate": 0.0001999977691320658, - "loss": 46.0, - "step": 27819 - }, - { - "epoch": 2.1270332779020205, - "grad_norm": 0.0015819082036614418, - "learning_rate": 0.0001999977689716214, - "loss": 46.0, - "step": 27820 - }, - { - "epoch": 2.12710973488541, - "grad_norm": 0.0010109863942489028, - "learning_rate": 0.00019999776881117116, - "loss": 46.0, - "step": 27821 - }, - { - "epoch": 2.1271861918688, - "grad_norm": 0.000561410328373313, - "learning_rate": 0.00019999776865071517, - "loss": 46.0, - "step": 27822 - }, - { - "epoch": 2.1272626488521897, - "grad_norm": 0.0018921643495559692, - "learning_rate": 0.0001999977684902534, - "loss": 46.0, - "step": 27823 - }, - { - "epoch": 2.127339105835579, - "grad_norm": 0.0008970113121904433, - "learning_rate": 0.0001999977683297859, - "loss": 46.0, - "step": 27824 - }, - { - "epoch": 2.127415562818969, - "grad_norm": 0.0017257422441616654, - "learning_rate": 0.00019999776816931263, - "loss": 46.0, - "step": 27825 - }, - { - "epoch": 2.1274920198023586, - "grad_norm": 0.002155852038413286, - "learning_rate": 0.00019999776800883357, - "loss": 46.0, - "step": 27826 - }, - { - "epoch": 2.1275684767857483, - "grad_norm": 0.0019253874197602272, - "learning_rate": 0.00019999776784834875, - "loss": 46.0, - "step": 27827 - }, - { - "epoch": 2.127644933769138, - "grad_norm": 0.001639417139813304, - "learning_rate": 0.00019999776768785815, - "loss": 46.0, - "step": 27828 - }, - { - "epoch": 2.127721390752528, - "grad_norm": 0.0009411929058842361, - "learning_rate": 0.00019999776752736178, - "loss": 46.0, - "step": 27829 - }, - { - "epoch": 2.1277978477359176, - "grad_norm": 0.0006266910932026803, - "learning_rate": 0.00019999776736685963, - "loss": 46.0, - "step": 27830 - }, - { - "epoch": 2.1278743047193074, - "grad_norm": 0.000506626209244132, - "learning_rate": 0.00019999776720635171, - "loss": 46.0, - "step": 27831 - }, - { - "epoch": 2.127950761702697, - "grad_norm": 0.0017454660264775157, - "learning_rate": 0.00019999776704583805, - "loss": 46.0, - "step": 27832 - }, - { - "epoch": 2.128027218686087, - "grad_norm": 0.0007652774220332503, - "learning_rate": 0.0001999977668853186, - "loss": 46.0, - "step": 27833 - }, - { - "epoch": 2.1281036756694767, - "grad_norm": 0.00030271473224274814, - "learning_rate": 0.0001999977667247934, - "loss": 46.0, - "step": 27834 - }, - { - "epoch": 2.128180132652866, - "grad_norm": 0.0005601273733191192, - "learning_rate": 0.00019999776656426241, - "loss": 46.0, - "step": 27835 - }, - { - "epoch": 2.1282565896362557, - "grad_norm": 0.001075144624337554, - "learning_rate": 0.00019999776640372568, - "loss": 46.0, - "step": 27836 - }, - { - "epoch": 2.1283330466196455, - "grad_norm": 0.0016779261641204357, - "learning_rate": 0.00019999776624318315, - "loss": 46.0, - "step": 27837 - }, - { - "epoch": 2.1284095036030353, - "grad_norm": 0.0013826611684635282, - "learning_rate": 0.00019999776608263487, - "loss": 46.0, - "step": 27838 - }, - { - "epoch": 2.128485960586425, - "grad_norm": 0.002510193968191743, - "learning_rate": 0.00019999776592208082, - "loss": 46.0, - "step": 27839 - }, - { - "epoch": 2.128562417569815, - "grad_norm": 0.0008974681841209531, - "learning_rate": 0.000199997765761521, - "loss": 46.0, - "step": 27840 - }, - { - "epoch": 2.1286388745532046, - "grad_norm": 0.0010345522314310074, - "learning_rate": 0.00019999776560095537, - "loss": 46.0, - "step": 27841 - }, - { - "epoch": 2.1287153315365943, - "grad_norm": 0.0006212533335201442, - "learning_rate": 0.000199997765440384, - "loss": 46.0, - "step": 27842 - }, - { - "epoch": 2.128791788519984, - "grad_norm": 0.0011552287032827735, - "learning_rate": 0.00019999776527980688, - "loss": 46.0, - "step": 27843 - }, - { - "epoch": 2.128868245503374, - "grad_norm": 0.0013556129997596145, - "learning_rate": 0.00019999776511922396, - "loss": 46.0, - "step": 27844 - }, - { - "epoch": 2.1289447024867636, - "grad_norm": 0.0008895316859707236, - "learning_rate": 0.00019999776495863527, - "loss": 46.0, - "step": 27845 - }, - { - "epoch": 2.129021159470153, - "grad_norm": 0.003528506960719824, - "learning_rate": 0.00019999776479804083, - "loss": 46.0, - "step": 27846 - }, - { - "epoch": 2.1290976164535427, - "grad_norm": 0.0007977734203450382, - "learning_rate": 0.00019999776463744062, - "loss": 46.0, - "step": 27847 - }, - { - "epoch": 2.1291740734369324, - "grad_norm": 0.0010544314282014966, - "learning_rate": 0.00019999776447683464, - "loss": 46.0, - "step": 27848 - }, - { - "epoch": 2.129250530420322, - "grad_norm": 0.0011289088288322091, - "learning_rate": 0.00019999776431622288, - "loss": 46.0, - "step": 27849 - }, - { - "epoch": 2.129326987403712, - "grad_norm": 0.001412665005773306, - "learning_rate": 0.00019999776415560537, - "loss": 46.0, - "step": 27850 - }, - { - "epoch": 2.1294034443871017, - "grad_norm": 0.0014999600825831294, - "learning_rate": 0.00019999776399498207, - "loss": 46.0, - "step": 27851 - }, - { - "epoch": 2.1294799013704915, - "grad_norm": 0.0008316526073031127, - "learning_rate": 0.000199997763834353, - "loss": 46.0, - "step": 27852 - }, - { - "epoch": 2.1295563583538812, - "grad_norm": 0.0014714428689330816, - "learning_rate": 0.0001999977636737182, - "loss": 46.0, - "step": 27853 - }, - { - "epoch": 2.129632815337271, - "grad_norm": 0.0010015941224992275, - "learning_rate": 0.0001999977635130776, - "loss": 46.0, - "step": 27854 - }, - { - "epoch": 2.1297092723206608, - "grad_norm": 0.0007377064903266728, - "learning_rate": 0.00019999776335243122, - "loss": 46.0, - "step": 27855 - }, - { - "epoch": 2.12978572930405, - "grad_norm": 0.00147099862806499, - "learning_rate": 0.00019999776319177908, - "loss": 46.0, - "step": 27856 - }, - { - "epoch": 2.12986218628744, - "grad_norm": 0.0007974232430569828, - "learning_rate": 0.00019999776303112116, - "loss": 46.0, - "step": 27857 - }, - { - "epoch": 2.1299386432708296, - "grad_norm": 0.001324589946307242, - "learning_rate": 0.0001999977628704575, - "loss": 46.0, - "step": 27858 - }, - { - "epoch": 2.1300151002542194, - "grad_norm": 0.001063237781636417, - "learning_rate": 0.00019999776270978806, - "loss": 46.0, - "step": 27859 - }, - { - "epoch": 2.130091557237609, - "grad_norm": 0.0014845584519207478, - "learning_rate": 0.00019999776254911282, - "loss": 46.0, - "step": 27860 - }, - { - "epoch": 2.130168014220999, - "grad_norm": 0.0006285892450250685, - "learning_rate": 0.00019999776238843183, - "loss": 46.0, - "step": 27861 - }, - { - "epoch": 2.1302444712043886, - "grad_norm": 0.0018885077442973852, - "learning_rate": 0.00019999776222774508, - "loss": 46.0, - "step": 27862 - }, - { - "epoch": 2.1303209281877784, - "grad_norm": 0.001775530632585287, - "learning_rate": 0.00019999776206705257, - "loss": 46.0, - "step": 27863 - }, - { - "epoch": 2.130397385171168, - "grad_norm": 0.003006625920534134, - "learning_rate": 0.00019999776190635427, - "loss": 46.0, - "step": 27864 - }, - { - "epoch": 2.130473842154558, - "grad_norm": 0.0011765756644308567, - "learning_rate": 0.00019999776174565022, - "loss": 46.0, - "step": 27865 - }, - { - "epoch": 2.1305502991379477, - "grad_norm": 0.0007958024507388473, - "learning_rate": 0.0001999977615849404, - "loss": 46.0, - "step": 27866 - }, - { - "epoch": 2.1306267561213375, - "grad_norm": 0.0006438548443838954, - "learning_rate": 0.00019999776142422477, - "loss": 46.0, - "step": 27867 - }, - { - "epoch": 2.1307032131047268, - "grad_norm": 0.0017352061113342643, - "learning_rate": 0.00019999776126350342, - "loss": 46.0, - "step": 27868 - }, - { - "epoch": 2.1307796700881165, - "grad_norm": 0.0014211511006578803, - "learning_rate": 0.00019999776110277625, - "loss": 46.0, - "step": 27869 - }, - { - "epoch": 2.1308561270715063, - "grad_norm": 0.0027469047345221043, - "learning_rate": 0.00019999776094204336, - "loss": 46.0, - "step": 27870 - }, - { - "epoch": 2.130932584054896, - "grad_norm": 0.0008085627341642976, - "learning_rate": 0.0001999977607813047, - "loss": 46.0, - "step": 27871 - }, - { - "epoch": 2.131009041038286, - "grad_norm": 0.0008705726359039545, - "learning_rate": 0.00019999776062056023, - "loss": 46.0, - "step": 27872 - }, - { - "epoch": 2.1310854980216756, - "grad_norm": 0.0011628699721768498, - "learning_rate": 0.00019999776045981002, - "loss": 46.0, - "step": 27873 - }, - { - "epoch": 2.1311619550050653, - "grad_norm": 0.0017635178519412875, - "learning_rate": 0.00019999776029905404, - "loss": 46.0, - "step": 27874 - }, - { - "epoch": 2.131238411988455, - "grad_norm": 0.0007235931698232889, - "learning_rate": 0.00019999776013829228, - "loss": 46.0, - "step": 27875 - }, - { - "epoch": 2.131314868971845, - "grad_norm": 0.0007826391374692321, - "learning_rate": 0.00019999775997752478, - "loss": 46.0, - "step": 27876 - }, - { - "epoch": 2.1313913259552346, - "grad_norm": 0.0008739493787288666, - "learning_rate": 0.00019999775981675147, - "loss": 46.0, - "step": 27877 - }, - { - "epoch": 2.131467782938624, - "grad_norm": 0.0006460209260694683, - "learning_rate": 0.0001999977596559724, - "loss": 46.0, - "step": 27878 - }, - { - "epoch": 2.1315442399220137, - "grad_norm": 0.0008441006066277623, - "learning_rate": 0.00019999775949518757, - "loss": 46.0, - "step": 27879 - }, - { - "epoch": 2.1316206969054035, - "grad_norm": 0.002067247871309519, - "learning_rate": 0.00019999775933439697, - "loss": 46.0, - "step": 27880 - }, - { - "epoch": 2.131697153888793, - "grad_norm": 0.0008495355723425746, - "learning_rate": 0.0001999977591736006, - "loss": 46.0, - "step": 27881 - }, - { - "epoch": 2.131773610872183, - "grad_norm": 0.0007296452531591058, - "learning_rate": 0.00019999775901279846, - "loss": 46.0, - "step": 27882 - }, - { - "epoch": 2.1318500678555727, - "grad_norm": 0.0008365765097551048, - "learning_rate": 0.00019999775885199054, - "loss": 46.0, - "step": 27883 - }, - { - "epoch": 2.1319265248389625, - "grad_norm": 0.00044905408867634833, - "learning_rate": 0.00019999775869117685, - "loss": 46.0, - "step": 27884 - }, - { - "epoch": 2.1320029818223523, - "grad_norm": 0.0013494088780134916, - "learning_rate": 0.0001999977585303574, - "loss": 46.0, - "step": 27885 - }, - { - "epoch": 2.132079438805742, - "grad_norm": 0.0014080164255574346, - "learning_rate": 0.00019999775836953217, - "loss": 46.0, - "step": 27886 - }, - { - "epoch": 2.132155895789132, - "grad_norm": 0.0013624605489894748, - "learning_rate": 0.00019999775820870122, - "loss": 46.0, - "step": 27887 - }, - { - "epoch": 2.1322323527725215, - "grad_norm": 0.0010071718133985996, - "learning_rate": 0.00019999775804786446, - "loss": 46.0, - "step": 27888 - }, - { - "epoch": 2.1323088097559113, - "grad_norm": 0.001538605079986155, - "learning_rate": 0.00019999775788702193, - "loss": 46.0, - "step": 27889 - }, - { - "epoch": 2.1323852667393006, - "grad_norm": 0.0038290575612336397, - "learning_rate": 0.00019999775772617365, - "loss": 46.0, - "step": 27890 - }, - { - "epoch": 2.1324617237226904, - "grad_norm": 0.00046507318620570004, - "learning_rate": 0.00019999775756531957, - "loss": 46.0, - "step": 27891 - }, - { - "epoch": 2.13253818070608, - "grad_norm": 0.0015731814783066511, - "learning_rate": 0.00019999775740445975, - "loss": 46.0, - "step": 27892 - }, - { - "epoch": 2.13261463768947, - "grad_norm": 0.0016571552259847522, - "learning_rate": 0.00019999775724359413, - "loss": 46.0, - "step": 27893 - }, - { - "epoch": 2.1326910946728597, - "grad_norm": 0.001173077616840601, - "learning_rate": 0.00019999775708272278, - "loss": 46.0, - "step": 27894 - }, - { - "epoch": 2.1327675516562494, - "grad_norm": 0.0004726675688289106, - "learning_rate": 0.00019999775692184564, - "loss": 46.0, - "step": 27895 - }, - { - "epoch": 2.132844008639639, - "grad_norm": 0.0013790634693577886, - "learning_rate": 0.0001999977567609627, - "loss": 46.0, - "step": 27896 - }, - { - "epoch": 2.132920465623029, - "grad_norm": 0.0020232738461345434, - "learning_rate": 0.00019999775660007403, - "loss": 46.0, - "step": 27897 - }, - { - "epoch": 2.1329969226064187, - "grad_norm": 0.0009763463749550283, - "learning_rate": 0.00019999775643917957, - "loss": 46.0, - "step": 27898 - }, - { - "epoch": 2.1330733795898085, - "grad_norm": 0.0006836850079707801, - "learning_rate": 0.00019999775627827936, - "loss": 46.0, - "step": 27899 - }, - { - "epoch": 2.133149836573198, - "grad_norm": 0.0038131983019411564, - "learning_rate": 0.00019999775611737337, - "loss": 46.0, - "step": 27900 - }, - { - "epoch": 2.1332262935565875, - "grad_norm": 0.0021967804059386253, - "learning_rate": 0.0001999977559564616, - "loss": 46.0, - "step": 27901 - }, - { - "epoch": 2.1333027505399773, - "grad_norm": 0.004178035072982311, - "learning_rate": 0.00019999775579554409, - "loss": 46.0, - "step": 27902 - }, - { - "epoch": 2.133379207523367, - "grad_norm": 0.001492820680141449, - "learning_rate": 0.00019999775563462078, - "loss": 46.0, - "step": 27903 - }, - { - "epoch": 2.133455664506757, - "grad_norm": 0.0011182373855262995, - "learning_rate": 0.00019999775547369173, - "loss": 46.0, - "step": 27904 - }, - { - "epoch": 2.1335321214901466, - "grad_norm": 0.00037381317815743387, - "learning_rate": 0.00019999775531275688, - "loss": 46.0, - "step": 27905 - }, - { - "epoch": 2.1336085784735364, - "grad_norm": 0.0016465885564684868, - "learning_rate": 0.00019999775515181626, - "loss": 46.0, - "step": 27906 - }, - { - "epoch": 2.133685035456926, - "grad_norm": 0.0010482021607458591, - "learning_rate": 0.0001999977549908699, - "loss": 46.0, - "step": 27907 - }, - { - "epoch": 2.133761492440316, - "grad_norm": 0.001215518917888403, - "learning_rate": 0.00019999775482991777, - "loss": 46.0, - "step": 27908 - }, - { - "epoch": 2.1338379494237056, - "grad_norm": 0.010715700685977936, - "learning_rate": 0.00019999775466895985, - "loss": 46.0, - "step": 27909 - }, - { - "epoch": 2.1339144064070954, - "grad_norm": 0.0006658357451669872, - "learning_rate": 0.00019999775450799616, - "loss": 46.0, - "step": 27910 - }, - { - "epoch": 2.133990863390485, - "grad_norm": 0.0007772714598104358, - "learning_rate": 0.00019999775434702673, - "loss": 46.0, - "step": 27911 - }, - { - "epoch": 2.1340673203738745, - "grad_norm": 0.0009552838164381683, - "learning_rate": 0.00019999775418605152, - "loss": 46.0, - "step": 27912 - }, - { - "epoch": 2.1341437773572642, - "grad_norm": 0.0040089404210448265, - "learning_rate": 0.0001999977540250705, - "loss": 46.0, - "step": 27913 - }, - { - "epoch": 2.134220234340654, - "grad_norm": 0.0014650429366156459, - "learning_rate": 0.00019999775386408378, - "loss": 46.0, - "step": 27914 - }, - { - "epoch": 2.1342966913240438, - "grad_norm": 0.0007803139160387218, - "learning_rate": 0.00019999775370309122, - "loss": 46.0, - "step": 27915 - }, - { - "epoch": 2.1343731483074335, - "grad_norm": 0.0016963358502835035, - "learning_rate": 0.00019999775354209294, - "loss": 46.0, - "step": 27916 - }, - { - "epoch": 2.1344496052908233, - "grad_norm": 0.0009868739871308208, - "learning_rate": 0.00019999775338108887, - "loss": 46.0, - "step": 27917 - }, - { - "epoch": 2.134526062274213, - "grad_norm": 0.0005647193756885827, - "learning_rate": 0.00019999775322007902, - "loss": 46.0, - "step": 27918 - }, - { - "epoch": 2.134602519257603, - "grad_norm": 0.0013062083162367344, - "learning_rate": 0.00019999775305906342, - "loss": 46.0, - "step": 27919 - }, - { - "epoch": 2.1346789762409926, - "grad_norm": 0.0013398105511441827, - "learning_rate": 0.00019999775289804202, - "loss": 46.0, - "step": 27920 - }, - { - "epoch": 2.1347554332243823, - "grad_norm": 0.007981443777680397, - "learning_rate": 0.00019999775273701488, - "loss": 46.0, - "step": 27921 - }, - { - "epoch": 2.1348318902077716, - "grad_norm": 0.00045654072891920805, - "learning_rate": 0.000199997752575982, - "loss": 46.0, - "step": 27922 - }, - { - "epoch": 2.1349083471911614, - "grad_norm": 0.0006403446313925087, - "learning_rate": 0.00019999775241494333, - "loss": 46.0, - "step": 27923 - }, - { - "epoch": 2.134984804174551, - "grad_norm": 0.0004985718405805528, - "learning_rate": 0.00019999775225389886, - "loss": 46.0, - "step": 27924 - }, - { - "epoch": 2.135061261157941, - "grad_norm": 0.0005125298630446196, - "learning_rate": 0.00019999775209284863, - "loss": 46.0, - "step": 27925 - }, - { - "epoch": 2.1351377181413307, - "grad_norm": 0.000692663190420717, - "learning_rate": 0.00019999775193179264, - "loss": 46.0, - "step": 27926 - }, - { - "epoch": 2.1352141751247204, - "grad_norm": 0.0012574001448228955, - "learning_rate": 0.0001999977517707309, - "loss": 46.0, - "step": 27927 - }, - { - "epoch": 2.13529063210811, - "grad_norm": 0.0015778085216879845, - "learning_rate": 0.00019999775160966333, - "loss": 46.0, - "step": 27928 - }, - { - "epoch": 2.1353670890915, - "grad_norm": 0.0018670731224119663, - "learning_rate": 0.00019999775144859006, - "loss": 46.0, - "step": 27929 - }, - { - "epoch": 2.1354435460748897, - "grad_norm": 0.001294909161515534, - "learning_rate": 0.000199997751287511, - "loss": 46.0, - "step": 27930 - }, - { - "epoch": 2.1355200030582795, - "grad_norm": 0.008364435285329819, - "learning_rate": 0.00019999775112642616, - "loss": 46.0, - "step": 27931 - }, - { - "epoch": 2.1355964600416693, - "grad_norm": 0.0007175628561526537, - "learning_rate": 0.00019999775096533556, - "loss": 46.0, - "step": 27932 - }, - { - "epoch": 2.1356729170250586, - "grad_norm": 0.0007800923776812851, - "learning_rate": 0.00019999775080423916, - "loss": 46.0, - "step": 27933 - }, - { - "epoch": 2.1357493740084483, - "grad_norm": 0.0006643543601967394, - "learning_rate": 0.00019999775064313702, - "loss": 46.0, - "step": 27934 - }, - { - "epoch": 2.135825830991838, - "grad_norm": 0.0008329321863129735, - "learning_rate": 0.00019999775048202908, - "loss": 46.0, - "step": 27935 - }, - { - "epoch": 2.135902287975228, - "grad_norm": 0.0005805922555737197, - "learning_rate": 0.00019999775032091544, - "loss": 46.0, - "step": 27936 - }, - { - "epoch": 2.1359787449586176, - "grad_norm": 0.0008534258813597262, - "learning_rate": 0.00019999775015979595, - "loss": 46.0, - "step": 27937 - }, - { - "epoch": 2.1360552019420074, - "grad_norm": 0.004057834390550852, - "learning_rate": 0.00019999774999867077, - "loss": 46.0, - "step": 27938 - }, - { - "epoch": 2.136131658925397, - "grad_norm": 0.0008537324029020965, - "learning_rate": 0.00019999774983753973, - "loss": 46.0, - "step": 27939 - }, - { - "epoch": 2.136208115908787, - "grad_norm": 0.0014889901503920555, - "learning_rate": 0.000199997749676403, - "loss": 46.0, - "step": 27940 - }, - { - "epoch": 2.1362845728921767, - "grad_norm": 0.001632365514524281, - "learning_rate": 0.00019999774951526047, - "loss": 46.0, - "step": 27941 - }, - { - "epoch": 2.1363610298755664, - "grad_norm": 0.0012648382689803839, - "learning_rate": 0.00019999774935411217, - "loss": 46.0, - "step": 27942 - }, - { - "epoch": 2.136437486858956, - "grad_norm": 0.000894739234354347, - "learning_rate": 0.0001999977491929581, - "loss": 46.0, - "step": 27943 - }, - { - "epoch": 2.1365139438423455, - "grad_norm": 0.006497282534837723, - "learning_rate": 0.00019999774903179827, - "loss": 46.0, - "step": 27944 - }, - { - "epoch": 2.1365904008257353, - "grad_norm": 0.001762912841513753, - "learning_rate": 0.00019999774887063265, - "loss": 46.0, - "step": 27945 - }, - { - "epoch": 2.136666857809125, - "grad_norm": 0.0006510132807306945, - "learning_rate": 0.00019999774870946125, - "loss": 46.0, - "step": 27946 - }, - { - "epoch": 2.136743314792515, - "grad_norm": 0.0003557926102075726, - "learning_rate": 0.0001999977485482841, - "loss": 46.0, - "step": 27947 - }, - { - "epoch": 2.1368197717759045, - "grad_norm": 0.000626552093308419, - "learning_rate": 0.00019999774838710122, - "loss": 46.0, - "step": 27948 - }, - { - "epoch": 2.1368962287592943, - "grad_norm": 0.001279408810660243, - "learning_rate": 0.0001999977482259125, - "loss": 46.0, - "step": 27949 - }, - { - "epoch": 2.136972685742684, - "grad_norm": 0.0012648004340007901, - "learning_rate": 0.00019999774806471807, - "loss": 46.0, - "step": 27950 - }, - { - "epoch": 2.137049142726074, - "grad_norm": 0.0004089291614945978, - "learning_rate": 0.00019999774790351783, - "loss": 46.0, - "step": 27951 - }, - { - "epoch": 2.1371255997094636, - "grad_norm": 0.000834234815556556, - "learning_rate": 0.00019999774774231182, - "loss": 46.0, - "step": 27952 - }, - { - "epoch": 2.1372020566928533, - "grad_norm": 0.003803718602284789, - "learning_rate": 0.00019999774758110007, - "loss": 46.0, - "step": 27953 - }, - { - "epoch": 2.137278513676243, - "grad_norm": 0.0020154034718871117, - "learning_rate": 0.00019999774741988257, - "loss": 46.0, - "step": 27954 - }, - { - "epoch": 2.1373549706596324, - "grad_norm": 0.006825027987360954, - "learning_rate": 0.00019999774725865926, - "loss": 46.0, - "step": 27955 - }, - { - "epoch": 2.137431427643022, - "grad_norm": 0.0014343821676447988, - "learning_rate": 0.0001999977470974302, - "loss": 46.0, - "step": 27956 - }, - { - "epoch": 2.137507884626412, - "grad_norm": 0.004281510133296251, - "learning_rate": 0.00019999774693619534, - "loss": 46.0, - "step": 27957 - }, - { - "epoch": 2.1375843416098017, - "grad_norm": 0.001521748723462224, - "learning_rate": 0.00019999774677495474, - "loss": 46.0, - "step": 27958 - }, - { - "epoch": 2.1376607985931915, - "grad_norm": 0.0005761711508966982, - "learning_rate": 0.00019999774661370835, - "loss": 46.0, - "step": 27959 - }, - { - "epoch": 2.1377372555765812, - "grad_norm": 0.0015655792085453868, - "learning_rate": 0.0001999977464524562, - "loss": 46.0, - "step": 27960 - }, - { - "epoch": 2.137813712559971, - "grad_norm": 0.016118254512548447, - "learning_rate": 0.0001999977462911983, - "loss": 46.0, - "step": 27961 - }, - { - "epoch": 2.1378901695433608, - "grad_norm": 0.0006259109941311181, - "learning_rate": 0.0001999977461299346, - "loss": 46.0, - "step": 27962 - }, - { - "epoch": 2.1379666265267505, - "grad_norm": 0.0008122969884425402, - "learning_rate": 0.00019999774596866514, - "loss": 46.0, - "step": 27963 - }, - { - "epoch": 2.1380430835101403, - "grad_norm": 0.0012238132767379284, - "learning_rate": 0.00019999774580738993, - "loss": 46.0, - "step": 27964 - }, - { - "epoch": 2.13811954049353, - "grad_norm": 0.0007204969879239798, - "learning_rate": 0.00019999774564610895, - "loss": 46.0, - "step": 27965 - }, - { - "epoch": 2.1381959974769194, - "grad_norm": 0.000909200927708298, - "learning_rate": 0.00019999774548482216, - "loss": 46.0, - "step": 27966 - }, - { - "epoch": 2.138272454460309, - "grad_norm": 0.003151579527184367, - "learning_rate": 0.00019999774532352964, - "loss": 46.0, - "step": 27967 - }, - { - "epoch": 2.138348911443699, - "grad_norm": 0.002466138917952776, - "learning_rate": 0.00019999774516223133, - "loss": 46.0, - "step": 27968 - }, - { - "epoch": 2.1384253684270886, - "grad_norm": 0.0014224511105567217, - "learning_rate": 0.00019999774500092726, - "loss": 46.0, - "step": 27969 - }, - { - "epoch": 2.1385018254104784, - "grad_norm": 0.0007494601304642856, - "learning_rate": 0.0001999977448396174, - "loss": 46.0, - "step": 27970 - }, - { - "epoch": 2.138578282393868, - "grad_norm": 0.0014879771042615175, - "learning_rate": 0.00019999774467830181, - "loss": 46.0, - "step": 27971 - }, - { - "epoch": 2.138654739377258, - "grad_norm": 0.00064283941173926, - "learning_rate": 0.00019999774451698042, - "loss": 46.0, - "step": 27972 - }, - { - "epoch": 2.1387311963606477, - "grad_norm": 0.033043231815099716, - "learning_rate": 0.00019999774435565328, - "loss": 46.0, - "step": 27973 - }, - { - "epoch": 2.1388076533440374, - "grad_norm": 0.0009749619639478624, - "learning_rate": 0.00019999774419432036, - "loss": 46.0, - "step": 27974 - }, - { - "epoch": 2.138884110327427, - "grad_norm": 0.0006747089792042971, - "learning_rate": 0.00019999774403298167, - "loss": 46.0, - "step": 27975 - }, - { - "epoch": 2.138960567310817, - "grad_norm": 0.010180780664086342, - "learning_rate": 0.0001999977438716372, - "loss": 46.0, - "step": 27976 - }, - { - "epoch": 2.1390370242942063, - "grad_norm": 0.006115399766713381, - "learning_rate": 0.000199997743710287, - "loss": 46.0, - "step": 27977 - }, - { - "epoch": 2.139113481277596, - "grad_norm": 0.0011515038786455989, - "learning_rate": 0.000199997743548931, - "loss": 46.0, - "step": 27978 - }, - { - "epoch": 2.139189938260986, - "grad_norm": 0.0011794361053034663, - "learning_rate": 0.00019999774338756924, - "loss": 46.0, - "step": 27979 - }, - { - "epoch": 2.1392663952443756, - "grad_norm": 0.007787879090756178, - "learning_rate": 0.00019999774322620168, - "loss": 46.0, - "step": 27980 - }, - { - "epoch": 2.1393428522277653, - "grad_norm": 0.0015810028417035937, - "learning_rate": 0.00019999774306482838, - "loss": 46.0, - "step": 27981 - }, - { - "epoch": 2.139419309211155, - "grad_norm": 0.0005167325725778937, - "learning_rate": 0.00019999774290344933, - "loss": 46.0, - "step": 27982 - }, - { - "epoch": 2.139495766194545, - "grad_norm": 0.0005183730972930789, - "learning_rate": 0.00019999774274206446, - "loss": 46.0, - "step": 27983 - }, - { - "epoch": 2.1395722231779346, - "grad_norm": 0.0016227837186306715, - "learning_rate": 0.00019999774258067386, - "loss": 46.0, - "step": 27984 - }, - { - "epoch": 2.1396486801613244, - "grad_norm": 0.0008283095667138696, - "learning_rate": 0.00019999774241927747, - "loss": 46.0, - "step": 27985 - }, - { - "epoch": 2.139725137144714, - "grad_norm": 0.0029107846785336733, - "learning_rate": 0.00019999774225787532, - "loss": 46.0, - "step": 27986 - }, - { - "epoch": 2.1398015941281034, - "grad_norm": 0.0008983362349681556, - "learning_rate": 0.0001999977420964674, - "loss": 46.0, - "step": 27987 - }, - { - "epoch": 2.139878051111493, - "grad_norm": 0.002306839916855097, - "learning_rate": 0.00019999774193505372, - "loss": 46.0, - "step": 27988 - }, - { - "epoch": 2.139954508094883, - "grad_norm": 0.0008934852667152882, - "learning_rate": 0.00019999774177363426, - "loss": 46.0, - "step": 27989 - }, - { - "epoch": 2.1400309650782727, - "grad_norm": 0.0009797102538868785, - "learning_rate": 0.00019999774161220902, - "loss": 46.0, - "step": 27990 - }, - { - "epoch": 2.1401074220616625, - "grad_norm": 0.006482865661382675, - "learning_rate": 0.00019999774145077804, - "loss": 46.0, - "step": 27991 - }, - { - "epoch": 2.1401838790450523, - "grad_norm": 0.003567473264411092, - "learning_rate": 0.0001999977412893413, - "loss": 46.0, - "step": 27992 - }, - { - "epoch": 2.140260336028442, - "grad_norm": 0.0028472740668803453, - "learning_rate": 0.00019999774112789873, - "loss": 46.0, - "step": 27993 - }, - { - "epoch": 2.1403367930118318, - "grad_norm": 0.0009431094513274729, - "learning_rate": 0.00019999774096645043, - "loss": 46.0, - "step": 27994 - }, - { - "epoch": 2.1404132499952215, - "grad_norm": 0.003307847073301673, - "learning_rate": 0.00019999774080499636, - "loss": 46.0, - "step": 27995 - }, - { - "epoch": 2.1404897069786113, - "grad_norm": 0.00043861649464815855, - "learning_rate": 0.0001999977406435365, - "loss": 46.0, - "step": 27996 - }, - { - "epoch": 2.140566163962001, - "grad_norm": 0.0011678007431328297, - "learning_rate": 0.00019999774048207089, - "loss": 46.0, - "step": 27997 - }, - { - "epoch": 2.140642620945391, - "grad_norm": 0.0013952864101156592, - "learning_rate": 0.00019999774032059952, - "loss": 46.0, - "step": 27998 - }, - { - "epoch": 2.14071907792878, - "grad_norm": 0.00679080281406641, - "learning_rate": 0.00019999774015912235, - "loss": 46.0, - "step": 27999 - }, - { - "epoch": 2.14079553491217, - "grad_norm": 0.001105370232835412, - "learning_rate": 0.00019999773999763943, - "loss": 46.0, - "step": 28000 - }, - { - "epoch": 2.1408719918955597, - "grad_norm": 0.0013363149482756853, - "learning_rate": 0.00019999773983615075, - "loss": 46.0, - "step": 28001 - }, - { - "epoch": 2.1409484488789494, - "grad_norm": 0.0008694211137481034, - "learning_rate": 0.00019999773967465629, - "loss": 46.0, - "step": 28002 - }, - { - "epoch": 2.141024905862339, - "grad_norm": 0.0006769180181436241, - "learning_rate": 0.00019999773951315608, - "loss": 46.0, - "step": 28003 - }, - { - "epoch": 2.141101362845729, - "grad_norm": 0.0027024210430681705, - "learning_rate": 0.00019999773935165007, - "loss": 46.0, - "step": 28004 - }, - { - "epoch": 2.1411778198291187, - "grad_norm": 0.0007253543008118868, - "learning_rate": 0.0001999977391901383, - "loss": 46.0, - "step": 28005 - }, - { - "epoch": 2.1412542768125085, - "grad_norm": 0.0038207811303436756, - "learning_rate": 0.00019999773902862076, - "loss": 46.0, - "step": 28006 - }, - { - "epoch": 2.1413307337958982, - "grad_norm": 0.0024014287628233433, - "learning_rate": 0.00019999773886709746, - "loss": 46.0, - "step": 28007 - }, - { - "epoch": 2.141407190779288, - "grad_norm": 0.0011907346779480577, - "learning_rate": 0.00019999773870556838, - "loss": 46.0, - "step": 28008 - }, - { - "epoch": 2.1414836477626773, - "grad_norm": 0.0012580914190039039, - "learning_rate": 0.0001999977385440335, - "loss": 46.0, - "step": 28009 - }, - { - "epoch": 2.141560104746067, - "grad_norm": 0.0008508822065778077, - "learning_rate": 0.00019999773838249291, - "loss": 46.0, - "step": 28010 - }, - { - "epoch": 2.141636561729457, - "grad_norm": 0.0010860462207347155, - "learning_rate": 0.00019999773822094655, - "loss": 46.0, - "step": 28011 - }, - { - "epoch": 2.1417130187128466, - "grad_norm": 0.0021899437997490168, - "learning_rate": 0.00019999773805939438, - "loss": 46.0, - "step": 28012 - }, - { - "epoch": 2.1417894756962363, - "grad_norm": 0.0044187684543430805, - "learning_rate": 0.00019999773789783646, - "loss": 46.0, - "step": 28013 - }, - { - "epoch": 2.141865932679626, - "grad_norm": 0.0007306295447051525, - "learning_rate": 0.00019999773773627278, - "loss": 46.0, - "step": 28014 - }, - { - "epoch": 2.141942389663016, - "grad_norm": 0.0011351504363119602, - "learning_rate": 0.00019999773757470332, - "loss": 46.0, - "step": 28015 - }, - { - "epoch": 2.1420188466464056, - "grad_norm": 0.0031075968872755766, - "learning_rate": 0.00019999773741312808, - "loss": 46.0, - "step": 28016 - }, - { - "epoch": 2.1420953036297954, - "grad_norm": 0.0008862606482580304, - "learning_rate": 0.0001999977372515471, - "loss": 46.0, - "step": 28017 - }, - { - "epoch": 2.142171760613185, - "grad_norm": 0.008570067584514618, - "learning_rate": 0.0001999977370899603, - "loss": 46.0, - "step": 28018 - }, - { - "epoch": 2.142248217596575, - "grad_norm": 0.001295009278692305, - "learning_rate": 0.0001999977369283678, - "loss": 46.0, - "step": 28019 - }, - { - "epoch": 2.1423246745799647, - "grad_norm": 0.0007180798565968871, - "learning_rate": 0.00019999773676676946, - "loss": 46.0, - "step": 28020 - }, - { - "epoch": 2.142401131563354, - "grad_norm": 0.001086206641048193, - "learning_rate": 0.0001999977366051654, - "loss": 46.0, - "step": 28021 - }, - { - "epoch": 2.1424775885467437, - "grad_norm": 0.0005852627218700945, - "learning_rate": 0.00019999773644355554, - "loss": 46.0, - "step": 28022 - }, - { - "epoch": 2.1425540455301335, - "grad_norm": 0.0008816480403766036, - "learning_rate": 0.00019999773628193995, - "loss": 46.0, - "step": 28023 - }, - { - "epoch": 2.1426305025135233, - "grad_norm": 0.0017591941868886352, - "learning_rate": 0.00019999773612031855, - "loss": 46.0, - "step": 28024 - }, - { - "epoch": 2.142706959496913, - "grad_norm": 0.0010571813909336925, - "learning_rate": 0.0001999977359586914, - "loss": 46.0, - "step": 28025 - }, - { - "epoch": 2.142783416480303, - "grad_norm": 0.0016324438620358706, - "learning_rate": 0.00019999773579705847, - "loss": 46.0, - "step": 28026 - }, - { - "epoch": 2.1428598734636926, - "grad_norm": 0.0010612218175083399, - "learning_rate": 0.00019999773563541976, - "loss": 46.0, - "step": 28027 - }, - { - "epoch": 2.1429363304470823, - "grad_norm": 0.0007551426533609629, - "learning_rate": 0.00019999773547377532, - "loss": 46.0, - "step": 28028 - }, - { - "epoch": 2.143012787430472, - "grad_norm": 0.0011655011912807822, - "learning_rate": 0.0001999977353121251, - "loss": 46.0, - "step": 28029 - }, - { - "epoch": 2.143089244413862, - "grad_norm": 0.000542212335858494, - "learning_rate": 0.00019999773515046908, - "loss": 46.0, - "step": 28030 - }, - { - "epoch": 2.143165701397251, - "grad_norm": 0.0006167124374769628, - "learning_rate": 0.00019999773498880733, - "loss": 46.0, - "step": 28031 - }, - { - "epoch": 2.143242158380641, - "grad_norm": 0.0026519224047660828, - "learning_rate": 0.00019999773482713977, - "loss": 46.0, - "step": 28032 - }, - { - "epoch": 2.1433186153640307, - "grad_norm": 0.001772786257788539, - "learning_rate": 0.00019999773466546645, - "loss": 46.0, - "step": 28033 - }, - { - "epoch": 2.1433950723474204, - "grad_norm": 0.0009836714016273618, - "learning_rate": 0.00019999773450378737, - "loss": 46.0, - "step": 28034 - }, - { - "epoch": 2.14347152933081, - "grad_norm": 0.005014047026634216, - "learning_rate": 0.00019999773434210255, - "loss": 46.0, - "step": 28035 - }, - { - "epoch": 2.1435479863142, - "grad_norm": 0.007744866423308849, - "learning_rate": 0.00019999773418041193, - "loss": 46.0, - "step": 28036 - }, - { - "epoch": 2.1436244432975897, - "grad_norm": 0.0008506677113473415, - "learning_rate": 0.00019999773401871554, - "loss": 46.0, - "step": 28037 - }, - { - "epoch": 2.1437009002809795, - "grad_norm": 0.0007242302526719868, - "learning_rate": 0.00019999773385701337, - "loss": 46.0, - "step": 28038 - }, - { - "epoch": 2.1437773572643692, - "grad_norm": 0.0009783850982785225, - "learning_rate": 0.00019999773369530545, - "loss": 46.0, - "step": 28039 - }, - { - "epoch": 2.143853814247759, - "grad_norm": 0.0013943061931058764, - "learning_rate": 0.00019999773353359177, - "loss": 46.0, - "step": 28040 - }, - { - "epoch": 2.1439302712311488, - "grad_norm": 0.0013013249263167381, - "learning_rate": 0.00019999773337187228, - "loss": 46.0, - "step": 28041 - }, - { - "epoch": 2.1440067282145385, - "grad_norm": 0.00034890859387815, - "learning_rate": 0.00019999773321014707, - "loss": 46.0, - "step": 28042 - }, - { - "epoch": 2.144083185197928, - "grad_norm": 0.0010609349701553583, - "learning_rate": 0.00019999773304841607, - "loss": 46.0, - "step": 28043 - }, - { - "epoch": 2.1441596421813176, - "grad_norm": 0.0006238010246306658, - "learning_rate": 0.00019999773288667929, - "loss": 46.0, - "step": 28044 - }, - { - "epoch": 2.1442360991647074, - "grad_norm": 0.0022068372927606106, - "learning_rate": 0.00019999773272493676, - "loss": 46.0, - "step": 28045 - }, - { - "epoch": 2.144312556148097, - "grad_norm": 0.000992662156932056, - "learning_rate": 0.00019999773256318846, - "loss": 46.0, - "step": 28046 - }, - { - "epoch": 2.144389013131487, - "grad_norm": 0.003069251775741577, - "learning_rate": 0.00019999773240143438, - "loss": 46.0, - "step": 28047 - }, - { - "epoch": 2.1444654701148766, - "grad_norm": 0.0011823850218206644, - "learning_rate": 0.00019999773223967454, - "loss": 46.0, - "step": 28048 - }, - { - "epoch": 2.1445419270982664, - "grad_norm": 0.0007566019776277244, - "learning_rate": 0.00019999773207790892, - "loss": 46.0, - "step": 28049 - }, - { - "epoch": 2.144618384081656, - "grad_norm": 0.0004884752561338246, - "learning_rate": 0.00019999773191613752, - "loss": 46.0, - "step": 28050 - }, - { - "epoch": 2.144694841065046, - "grad_norm": 0.0006829904159530997, - "learning_rate": 0.00019999773175436033, - "loss": 46.0, - "step": 28051 - }, - { - "epoch": 2.1447712980484357, - "grad_norm": 0.001857191789895296, - "learning_rate": 0.00019999773159257744, - "loss": 46.0, - "step": 28052 - }, - { - "epoch": 2.144847755031825, - "grad_norm": 0.000579994055442512, - "learning_rate": 0.00019999773143078873, - "loss": 46.0, - "step": 28053 - }, - { - "epoch": 2.1449242120152148, - "grad_norm": 0.0011155155953019857, - "learning_rate": 0.00019999773126899427, - "loss": 46.0, - "step": 28054 - }, - { - "epoch": 2.1450006689986045, - "grad_norm": 0.0009850916685536504, - "learning_rate": 0.00019999773110719403, - "loss": 46.0, - "step": 28055 - }, - { - "epoch": 2.1450771259819943, - "grad_norm": 0.0007776428828947246, - "learning_rate": 0.00019999773094538803, - "loss": 46.0, - "step": 28056 - }, - { - "epoch": 2.145153582965384, - "grad_norm": 0.0023823254741728306, - "learning_rate": 0.00019999773078357627, - "loss": 46.0, - "step": 28057 - }, - { - "epoch": 2.145230039948774, - "grad_norm": 0.0007415247382596135, - "learning_rate": 0.00019999773062175872, - "loss": 46.0, - "step": 28058 - }, - { - "epoch": 2.1453064969321636, - "grad_norm": 0.0011347433319315314, - "learning_rate": 0.00019999773045993542, - "loss": 46.0, - "step": 28059 - }, - { - "epoch": 2.1453829539155533, - "grad_norm": 0.00033677570172585547, - "learning_rate": 0.00019999773029810635, - "loss": 46.0, - "step": 28060 - }, - { - "epoch": 2.145459410898943, - "grad_norm": 0.0017952433554455638, - "learning_rate": 0.00019999773013627147, - "loss": 46.0, - "step": 28061 - }, - { - "epoch": 2.145535867882333, - "grad_norm": 0.0005006867577321827, - "learning_rate": 0.00019999772997443085, - "loss": 46.0, - "step": 28062 - }, - { - "epoch": 2.1456123248657226, - "grad_norm": 0.003177864011377096, - "learning_rate": 0.0001999977298125845, - "loss": 46.0, - "step": 28063 - }, - { - "epoch": 2.145688781849112, - "grad_norm": 0.0009311573230661452, - "learning_rate": 0.00019999772965073232, - "loss": 46.0, - "step": 28064 - }, - { - "epoch": 2.1457652388325017, - "grad_norm": 0.001280890661291778, - "learning_rate": 0.00019999772948887438, - "loss": 46.0, - "step": 28065 - }, - { - "epoch": 2.1458416958158915, - "grad_norm": 0.0011088397586718202, - "learning_rate": 0.0001999977293270107, - "loss": 46.0, - "step": 28066 - }, - { - "epoch": 2.145918152799281, - "grad_norm": 0.0009297026554122567, - "learning_rate": 0.00019999772916514123, - "loss": 46.0, - "step": 28067 - }, - { - "epoch": 2.145994609782671, - "grad_norm": 0.0015929682413116097, - "learning_rate": 0.000199997729003266, - "loss": 46.0, - "step": 28068 - }, - { - "epoch": 2.1460710667660607, - "grad_norm": 0.000654333911370486, - "learning_rate": 0.000199997728841385, - "loss": 46.0, - "step": 28069 - }, - { - "epoch": 2.1461475237494505, - "grad_norm": 0.0017585826572030783, - "learning_rate": 0.00019999772867949821, - "loss": 46.0, - "step": 28070 - }, - { - "epoch": 2.1462239807328403, - "grad_norm": 0.0013783980393782258, - "learning_rate": 0.0001999977285176057, - "loss": 46.0, - "step": 28071 - }, - { - "epoch": 2.14630043771623, - "grad_norm": 0.0007270924979820848, - "learning_rate": 0.00019999772835570736, - "loss": 46.0, - "step": 28072 - }, - { - "epoch": 2.14637689469962, - "grad_norm": 0.0006456988048739731, - "learning_rate": 0.0001999977281938033, - "loss": 46.0, - "step": 28073 - }, - { - "epoch": 2.1464533516830095, - "grad_norm": 0.001488837180659175, - "learning_rate": 0.00019999772803189344, - "loss": 46.0, - "step": 28074 - }, - { - "epoch": 2.146529808666399, - "grad_norm": 0.0008940851548686624, - "learning_rate": 0.00019999772786997782, - "loss": 46.0, - "step": 28075 - }, - { - "epoch": 2.1466062656497886, - "grad_norm": 0.0016282738652080297, - "learning_rate": 0.00019999772770805643, - "loss": 46.0, - "step": 28076 - }, - { - "epoch": 2.1466827226331784, - "grad_norm": 0.0012227408587932587, - "learning_rate": 0.00019999772754612923, - "loss": 46.0, - "step": 28077 - }, - { - "epoch": 2.146759179616568, - "grad_norm": 0.001382127869874239, - "learning_rate": 0.00019999772738419632, - "loss": 46.0, - "step": 28078 - }, - { - "epoch": 2.146835636599958, - "grad_norm": 0.0005174451507627964, - "learning_rate": 0.00019999772722225764, - "loss": 46.0, - "step": 28079 - }, - { - "epoch": 2.1469120935833477, - "grad_norm": 0.0015492273960262537, - "learning_rate": 0.00019999772706031318, - "loss": 46.0, - "step": 28080 - }, - { - "epoch": 2.1469885505667374, - "grad_norm": 0.000808349228464067, - "learning_rate": 0.00019999772689836294, - "loss": 46.0, - "step": 28081 - }, - { - "epoch": 2.147065007550127, - "grad_norm": 0.0008739153272472322, - "learning_rate": 0.00019999772673640694, - "loss": 46.0, - "step": 28082 - }, - { - "epoch": 2.147141464533517, - "grad_norm": 0.004506739787757397, - "learning_rate": 0.00019999772657444516, - "loss": 46.0, - "step": 28083 - }, - { - "epoch": 2.1472179215169067, - "grad_norm": 0.0005172499804757535, - "learning_rate": 0.0001999977264124776, - "loss": 46.0, - "step": 28084 - }, - { - "epoch": 2.1472943785002965, - "grad_norm": 0.0008176789269782603, - "learning_rate": 0.0001999977262505043, - "loss": 46.0, - "step": 28085 - }, - { - "epoch": 2.147370835483686, - "grad_norm": 0.0012802501441910863, - "learning_rate": 0.0001999977260885252, - "loss": 46.0, - "step": 28086 - }, - { - "epoch": 2.1474472924670756, - "grad_norm": 0.0011771807912737131, - "learning_rate": 0.00019999772592654036, - "loss": 46.0, - "step": 28087 - }, - { - "epoch": 2.1475237494504653, - "grad_norm": 0.0016017784364521503, - "learning_rate": 0.00019999772576454974, - "loss": 46.0, - "step": 28088 - }, - { - "epoch": 2.147600206433855, - "grad_norm": 0.0008139270939864218, - "learning_rate": 0.00019999772560255335, - "loss": 46.0, - "step": 28089 - }, - { - "epoch": 2.147676663417245, - "grad_norm": 0.001272616907954216, - "learning_rate": 0.00019999772544055118, - "loss": 46.0, - "step": 28090 - }, - { - "epoch": 2.1477531204006346, - "grad_norm": 0.002162357559427619, - "learning_rate": 0.00019999772527854327, - "loss": 46.0, - "step": 28091 - }, - { - "epoch": 2.1478295773840244, - "grad_norm": 0.0013778364518657327, - "learning_rate": 0.00019999772511652955, - "loss": 46.0, - "step": 28092 - }, - { - "epoch": 2.147906034367414, - "grad_norm": 0.0012272101594135165, - "learning_rate": 0.0001999977249545101, - "loss": 46.0, - "step": 28093 - }, - { - "epoch": 2.147982491350804, - "grad_norm": 0.003676793770864606, - "learning_rate": 0.00019999772479248486, - "loss": 46.0, - "step": 28094 - }, - { - "epoch": 2.1480589483341936, - "grad_norm": 0.0011014972114935517, - "learning_rate": 0.00019999772463045386, - "loss": 46.0, - "step": 28095 - }, - { - "epoch": 2.1481354053175834, - "grad_norm": 0.0036270723212510347, - "learning_rate": 0.00019999772446841708, - "loss": 46.0, - "step": 28096 - }, - { - "epoch": 2.1482118623009727, - "grad_norm": 0.0010349606163799763, - "learning_rate": 0.00019999772430637452, - "loss": 46.0, - "step": 28097 - }, - { - "epoch": 2.1482883192843625, - "grad_norm": 0.000926801934838295, - "learning_rate": 0.00019999772414432623, - "loss": 46.0, - "step": 28098 - }, - { - "epoch": 2.1483647762677522, - "grad_norm": 0.0006331445183604956, - "learning_rate": 0.00019999772398227213, - "loss": 46.0, - "step": 28099 - }, - { - "epoch": 2.148441233251142, - "grad_norm": 0.005006878636777401, - "learning_rate": 0.00019999772382021228, - "loss": 46.0, - "step": 28100 - }, - { - "epoch": 2.1485176902345318, - "grad_norm": 0.001148741808719933, - "learning_rate": 0.00019999772365814663, - "loss": 46.0, - "step": 28101 - }, - { - "epoch": 2.1485941472179215, - "grad_norm": 0.00098808400798589, - "learning_rate": 0.00019999772349607527, - "loss": 46.0, - "step": 28102 - }, - { - "epoch": 2.1486706042013113, - "grad_norm": 0.0012053870595991611, - "learning_rate": 0.0001999977233339981, - "loss": 46.0, - "step": 28103 - }, - { - "epoch": 2.148747061184701, - "grad_norm": 0.0019479095935821533, - "learning_rate": 0.0001999977231719152, - "loss": 46.0, - "step": 28104 - }, - { - "epoch": 2.148823518168091, - "grad_norm": 0.0005970261408947408, - "learning_rate": 0.00019999772300982648, - "loss": 46.0, - "step": 28105 - }, - { - "epoch": 2.1488999751514806, - "grad_norm": 0.0034951060079038143, - "learning_rate": 0.00019999772284773202, - "loss": 46.0, - "step": 28106 - }, - { - "epoch": 2.1489764321348703, - "grad_norm": 0.0025081115309149027, - "learning_rate": 0.00019999772268563176, - "loss": 46.0, - "step": 28107 - }, - { - "epoch": 2.1490528891182596, - "grad_norm": 0.0004526465490926057, - "learning_rate": 0.00019999772252352575, - "loss": 46.0, - "step": 28108 - }, - { - "epoch": 2.1491293461016494, - "grad_norm": 0.0007097624475136399, - "learning_rate": 0.00019999772236141397, - "loss": 46.0, - "step": 28109 - }, - { - "epoch": 2.149205803085039, - "grad_norm": 0.0007064482779242098, - "learning_rate": 0.00019999772219929645, - "loss": 46.0, - "step": 28110 - }, - { - "epoch": 2.149282260068429, - "grad_norm": 0.0005932310596108437, - "learning_rate": 0.0001999977220371731, - "loss": 46.0, - "step": 28111 - }, - { - "epoch": 2.1493587170518187, - "grad_norm": 0.0026616195682436228, - "learning_rate": 0.00019999772187504405, - "loss": 46.0, - "step": 28112 - }, - { - "epoch": 2.1494351740352085, - "grad_norm": 0.0016878944588825107, - "learning_rate": 0.00019999772171290918, - "loss": 46.0, - "step": 28113 - }, - { - "epoch": 2.149511631018598, - "grad_norm": 0.0006121466867625713, - "learning_rate": 0.00019999772155076856, - "loss": 46.0, - "step": 28114 - }, - { - "epoch": 2.149588088001988, - "grad_norm": 0.0008225361234508455, - "learning_rate": 0.00019999772138862217, - "loss": 46.0, - "step": 28115 - }, - { - "epoch": 2.1496645449853777, - "grad_norm": 0.0010969842551276088, - "learning_rate": 0.00019999772122647, - "loss": 46.0, - "step": 28116 - }, - { - "epoch": 2.1497410019687675, - "grad_norm": 0.0011599557474255562, - "learning_rate": 0.00019999772106431206, - "loss": 46.0, - "step": 28117 - }, - { - "epoch": 2.149817458952157, - "grad_norm": 0.0009778441162779927, - "learning_rate": 0.00019999772090214838, - "loss": 46.0, - "step": 28118 - }, - { - "epoch": 2.1498939159355466, - "grad_norm": 0.002477919915691018, - "learning_rate": 0.0001999977207399789, - "loss": 46.0, - "step": 28119 - }, - { - "epoch": 2.1499703729189363, - "grad_norm": 0.000579033512622118, - "learning_rate": 0.00019999772057780366, - "loss": 46.0, - "step": 28120 - }, - { - "epoch": 2.150046829902326, - "grad_norm": 0.0004702067526523024, - "learning_rate": 0.00019999772041562265, - "loss": 46.0, - "step": 28121 - }, - { - "epoch": 2.150123286885716, - "grad_norm": 0.0004753461398649961, - "learning_rate": 0.00019999772025343585, - "loss": 46.0, - "step": 28122 - }, - { - "epoch": 2.1501997438691056, - "grad_norm": 0.000491965445689857, - "learning_rate": 0.00019999772009124332, - "loss": 46.0, - "step": 28123 - }, - { - "epoch": 2.1502762008524954, - "grad_norm": 0.0008067890885286033, - "learning_rate": 0.000199997719929045, - "loss": 46.0, - "step": 28124 - }, - { - "epoch": 2.150352657835885, - "grad_norm": 0.0021548105869442225, - "learning_rate": 0.00019999771976684093, - "loss": 46.0, - "step": 28125 - }, - { - "epoch": 2.150429114819275, - "grad_norm": 0.0007861893973313272, - "learning_rate": 0.00019999771960463108, - "loss": 46.0, - "step": 28126 - }, - { - "epoch": 2.1505055718026647, - "grad_norm": 0.0009069948573596776, - "learning_rate": 0.00019999771944241544, - "loss": 46.0, - "step": 28127 - }, - { - "epoch": 2.1505820287860544, - "grad_norm": 0.001138515304774046, - "learning_rate": 0.00019999771928019404, - "loss": 46.0, - "step": 28128 - }, - { - "epoch": 2.150658485769444, - "grad_norm": 0.0009136605658568442, - "learning_rate": 0.00019999771911796688, - "loss": 46.0, - "step": 28129 - }, - { - "epoch": 2.1507349427528335, - "grad_norm": 0.003166587557643652, - "learning_rate": 0.00019999771895573397, - "loss": 46.0, - "step": 28130 - }, - { - "epoch": 2.1508113997362233, - "grad_norm": 0.0015549215022474527, - "learning_rate": 0.00019999771879349525, - "loss": 46.0, - "step": 28131 - }, - { - "epoch": 2.150887856719613, - "grad_norm": 0.0008785352692939341, - "learning_rate": 0.00019999771863125077, - "loss": 46.0, - "step": 28132 - }, - { - "epoch": 2.150964313703003, - "grad_norm": 0.0017593599623069167, - "learning_rate": 0.00019999771846900054, - "loss": 46.0, - "step": 28133 - }, - { - "epoch": 2.1510407706863925, - "grad_norm": 0.0013651741901412606, - "learning_rate": 0.00019999771830674453, - "loss": 46.0, - "step": 28134 - }, - { - "epoch": 2.1511172276697823, - "grad_norm": 0.0009699792135506868, - "learning_rate": 0.00019999771814448273, - "loss": 46.0, - "step": 28135 - }, - { - "epoch": 2.151193684653172, - "grad_norm": 0.000791808997746557, - "learning_rate": 0.00019999771798221517, - "loss": 46.0, - "step": 28136 - }, - { - "epoch": 2.151270141636562, - "grad_norm": 0.0008816052577458322, - "learning_rate": 0.00019999771781994188, - "loss": 46.0, - "step": 28137 - }, - { - "epoch": 2.1513465986199516, - "grad_norm": 0.0015526552451774478, - "learning_rate": 0.0001999977176576628, - "loss": 46.0, - "step": 28138 - }, - { - "epoch": 2.1514230556033413, - "grad_norm": 0.0015143491327762604, - "learning_rate": 0.00019999771749537793, - "loss": 46.0, - "step": 28139 - }, - { - "epoch": 2.1514995125867307, - "grad_norm": 0.0009356113150715828, - "learning_rate": 0.00019999771733308732, - "loss": 46.0, - "step": 28140 - }, - { - "epoch": 2.1515759695701204, - "grad_norm": 0.0009212291915901005, - "learning_rate": 0.00019999771717079092, - "loss": 46.0, - "step": 28141 - }, - { - "epoch": 2.15165242655351, - "grad_norm": 0.0007717022090218961, - "learning_rate": 0.00019999771700848876, - "loss": 46.0, - "step": 28142 - }, - { - "epoch": 2.1517288835369, - "grad_norm": 0.0004468267725314945, - "learning_rate": 0.0001999977168461808, - "loss": 46.0, - "step": 28143 - }, - { - "epoch": 2.1518053405202897, - "grad_norm": 0.003542429767549038, - "learning_rate": 0.0001999977166838671, - "loss": 46.0, - "step": 28144 - }, - { - "epoch": 2.1518817975036795, - "grad_norm": 0.0007410286925733089, - "learning_rate": 0.00019999771652154765, - "loss": 46.0, - "step": 28145 - }, - { - "epoch": 2.1519582544870692, - "grad_norm": 0.000999771524220705, - "learning_rate": 0.0001999977163592224, - "loss": 46.0, - "step": 28146 - }, - { - "epoch": 2.152034711470459, - "grad_norm": 0.0009272146853618324, - "learning_rate": 0.0001999977161968914, - "loss": 46.0, - "step": 28147 - }, - { - "epoch": 2.1521111684538488, - "grad_norm": 0.0006438328418880701, - "learning_rate": 0.0001999977160345546, - "loss": 46.0, - "step": 28148 - }, - { - "epoch": 2.1521876254372385, - "grad_norm": 0.002356426790356636, - "learning_rate": 0.00019999771587221206, - "loss": 46.0, - "step": 28149 - }, - { - "epoch": 2.1522640824206283, - "grad_norm": 0.0007545550470240414, - "learning_rate": 0.00019999771570986373, - "loss": 46.0, - "step": 28150 - }, - { - "epoch": 2.152340539404018, - "grad_norm": 0.0020118916872888803, - "learning_rate": 0.00019999771554750963, - "loss": 46.0, - "step": 28151 - }, - { - "epoch": 2.1524169963874074, - "grad_norm": 0.0011059088865295053, - "learning_rate": 0.00019999771538514976, - "loss": 46.0, - "step": 28152 - }, - { - "epoch": 2.152493453370797, - "grad_norm": 0.006322795990854502, - "learning_rate": 0.00019999771522278414, - "loss": 46.0, - "step": 28153 - }, - { - "epoch": 2.152569910354187, - "grad_norm": 0.0011724461801350117, - "learning_rate": 0.00019999771506041275, - "loss": 46.0, - "step": 28154 - }, - { - "epoch": 2.1526463673375766, - "grad_norm": 0.0015976238064467907, - "learning_rate": 0.00019999771489803562, - "loss": 46.0, - "step": 28155 - }, - { - "epoch": 2.1527228243209664, - "grad_norm": 0.0013990487204864621, - "learning_rate": 0.00019999771473565265, - "loss": 46.0, - "step": 28156 - }, - { - "epoch": 2.152799281304356, - "grad_norm": 0.0005238635931164026, - "learning_rate": 0.00019999771457326394, - "loss": 46.0, - "step": 28157 - }, - { - "epoch": 2.152875738287746, - "grad_norm": 0.006004489026963711, - "learning_rate": 0.00019999771441086948, - "loss": 46.0, - "step": 28158 - }, - { - "epoch": 2.1529521952711357, - "grad_norm": 0.0007889384869486094, - "learning_rate": 0.00019999771424846925, - "loss": 46.0, - "step": 28159 - }, - { - "epoch": 2.1530286522545254, - "grad_norm": 0.0021235414315015078, - "learning_rate": 0.00019999771408606322, - "loss": 46.0, - "step": 28160 - }, - { - "epoch": 2.153105109237915, - "grad_norm": 0.005783233791589737, - "learning_rate": 0.00019999771392365144, - "loss": 46.0, - "step": 28161 - }, - { - "epoch": 2.1531815662213045, - "grad_norm": 0.0008668258087709546, - "learning_rate": 0.0001999977137612339, - "loss": 46.0, - "step": 28162 - }, - { - "epoch": 2.1532580232046943, - "grad_norm": 0.0010577006032690406, - "learning_rate": 0.00019999771359881057, - "loss": 46.0, - "step": 28163 - }, - { - "epoch": 2.153334480188084, - "grad_norm": 0.001113516860641539, - "learning_rate": 0.00019999771343638147, - "loss": 46.0, - "step": 28164 - }, - { - "epoch": 2.153410937171474, - "grad_norm": 0.002328047063201666, - "learning_rate": 0.0001999977132739466, - "loss": 46.0, - "step": 28165 - }, - { - "epoch": 2.1534873941548636, - "grad_norm": 0.0016389908269047737, - "learning_rate": 0.00019999771311150598, - "loss": 46.0, - "step": 28166 - }, - { - "epoch": 2.1535638511382533, - "grad_norm": 0.0022578593343496323, - "learning_rate": 0.00019999771294905961, - "loss": 46.0, - "step": 28167 - }, - { - "epoch": 2.153640308121643, - "grad_norm": 0.0009417764958925545, - "learning_rate": 0.00019999771278660742, - "loss": 46.0, - "step": 28168 - }, - { - "epoch": 2.153716765105033, - "grad_norm": 0.0007314240210689604, - "learning_rate": 0.0001999977126241495, - "loss": 46.0, - "step": 28169 - }, - { - "epoch": 2.1537932220884226, - "grad_norm": 0.005837566219270229, - "learning_rate": 0.0001999977124616858, - "loss": 46.0, - "step": 28170 - }, - { - "epoch": 2.1538696790718124, - "grad_norm": 0.0014265247154980898, - "learning_rate": 0.00019999771229921632, - "loss": 46.0, - "step": 28171 - }, - { - "epoch": 2.153946136055202, - "grad_norm": 0.002703160047531128, - "learning_rate": 0.00019999771213674109, - "loss": 46.0, - "step": 28172 - }, - { - "epoch": 2.154022593038592, - "grad_norm": 0.0010520732030272484, - "learning_rate": 0.00019999771197426008, - "loss": 46.0, - "step": 28173 - }, - { - "epoch": 2.154099050021981, - "grad_norm": 0.001062014140188694, - "learning_rate": 0.00019999771181177328, - "loss": 46.0, - "step": 28174 - }, - { - "epoch": 2.154175507005371, - "grad_norm": 0.0011998831760138273, - "learning_rate": 0.00019999771164928073, - "loss": 46.0, - "step": 28175 - }, - { - "epoch": 2.1542519639887607, - "grad_norm": 0.007597212214022875, - "learning_rate": 0.0001999977114867824, - "loss": 46.0, - "step": 28176 - }, - { - "epoch": 2.1543284209721505, - "grad_norm": 0.0011730515398085117, - "learning_rate": 0.0001999977113242783, - "loss": 46.0, - "step": 28177 - }, - { - "epoch": 2.1544048779555403, - "grad_norm": 0.0007938652997836471, - "learning_rate": 0.00019999771116176843, - "loss": 46.0, - "step": 28178 - }, - { - "epoch": 2.15448133493893, - "grad_norm": 0.002646838780492544, - "learning_rate": 0.00019999771099925282, - "loss": 46.0, - "step": 28179 - }, - { - "epoch": 2.1545577919223198, - "grad_norm": 0.001280545606277883, - "learning_rate": 0.00019999771083673143, - "loss": 46.0, - "step": 28180 - }, - { - "epoch": 2.1546342489057095, - "grad_norm": 0.0018157809972763062, - "learning_rate": 0.00019999771067420426, - "loss": 46.0, - "step": 28181 - }, - { - "epoch": 2.1547107058890993, - "grad_norm": 0.0014921803958714008, - "learning_rate": 0.0001999977105116713, - "loss": 46.0, - "step": 28182 - }, - { - "epoch": 2.154787162872489, - "grad_norm": 0.0003949670644942671, - "learning_rate": 0.00019999771034913261, - "loss": 46.0, - "step": 28183 - }, - { - "epoch": 2.1548636198558784, - "grad_norm": 0.0010988599387928843, - "learning_rate": 0.00019999771018658813, - "loss": 46.0, - "step": 28184 - }, - { - "epoch": 2.154940076839268, - "grad_norm": 0.001070576487109065, - "learning_rate": 0.0001999977100240379, - "loss": 46.0, - "step": 28185 - }, - { - "epoch": 2.155016533822658, - "grad_norm": 0.000707069702912122, - "learning_rate": 0.00019999770986148184, - "loss": 46.0, - "step": 28186 - }, - { - "epoch": 2.1550929908060477, - "grad_norm": 0.0013154692715033889, - "learning_rate": 0.0001999977096989201, - "loss": 46.0, - "step": 28187 - }, - { - "epoch": 2.1551694477894374, - "grad_norm": 0.0007828108500689268, - "learning_rate": 0.00019999770953635254, - "loss": 46.0, - "step": 28188 - }, - { - "epoch": 2.155245904772827, - "grad_norm": 0.0027716923505067825, - "learning_rate": 0.00019999770937377922, - "loss": 46.0, - "step": 28189 - }, - { - "epoch": 2.155322361756217, - "grad_norm": 0.0010585255222395062, - "learning_rate": 0.00019999770921120012, - "loss": 46.0, - "step": 28190 - }, - { - "epoch": 2.1553988187396067, - "grad_norm": 0.0016748480265960097, - "learning_rate": 0.00019999770904861525, - "loss": 46.0, - "step": 28191 - }, - { - "epoch": 2.1554752757229965, - "grad_norm": 0.0008144259918481112, - "learning_rate": 0.00019999770888602463, - "loss": 46.0, - "step": 28192 - }, - { - "epoch": 2.1555517327063862, - "grad_norm": 0.0015611799899488688, - "learning_rate": 0.00019999770872342821, - "loss": 46.0, - "step": 28193 - }, - { - "epoch": 2.155628189689776, - "grad_norm": 0.0008937178063206375, - "learning_rate": 0.00019999770856082605, - "loss": 46.0, - "step": 28194 - }, - { - "epoch": 2.1557046466731653, - "grad_norm": 0.003062192350625992, - "learning_rate": 0.0001999977083982181, - "loss": 46.0, - "step": 28195 - }, - { - "epoch": 2.155781103656555, - "grad_norm": 0.0013641052646562457, - "learning_rate": 0.00019999770823560443, - "loss": 46.0, - "step": 28196 - }, - { - "epoch": 2.155857560639945, - "grad_norm": 0.002345721935853362, - "learning_rate": 0.00019999770807298492, - "loss": 46.0, - "step": 28197 - }, - { - "epoch": 2.1559340176233346, - "grad_norm": 0.0016041523776948452, - "learning_rate": 0.0001999977079103597, - "loss": 46.0, - "step": 28198 - }, - { - "epoch": 2.1560104746067243, - "grad_norm": 0.002357837976887822, - "learning_rate": 0.00019999770774772869, - "loss": 46.0, - "step": 28199 - }, - { - "epoch": 2.156086931590114, - "grad_norm": 0.00382078904658556, - "learning_rate": 0.00019999770758509188, - "loss": 46.0, - "step": 28200 - }, - { - "epoch": 2.156163388573504, - "grad_norm": 0.0013290635542944074, - "learning_rate": 0.00019999770742244933, - "loss": 46.0, - "step": 28201 - }, - { - "epoch": 2.1562398455568936, - "grad_norm": 0.0005938831018283963, - "learning_rate": 0.000199997707259801, - "loss": 46.0, - "step": 28202 - }, - { - "epoch": 2.1563163025402834, - "grad_norm": 0.0008766081882640719, - "learning_rate": 0.0001999977070971469, - "loss": 46.0, - "step": 28203 - }, - { - "epoch": 2.156392759523673, - "grad_norm": 0.0023452858440577984, - "learning_rate": 0.00019999770693448704, - "loss": 46.0, - "step": 28204 - }, - { - "epoch": 2.156469216507063, - "grad_norm": 0.00046968087553977966, - "learning_rate": 0.00019999770677182142, - "loss": 46.0, - "step": 28205 - }, - { - "epoch": 2.1565456734904522, - "grad_norm": 0.0012829380575567484, - "learning_rate": 0.00019999770660915003, - "loss": 46.0, - "step": 28206 - }, - { - "epoch": 2.156622130473842, - "grad_norm": 0.001255856710486114, - "learning_rate": 0.00019999770644647284, - "loss": 46.0, - "step": 28207 - }, - { - "epoch": 2.1566985874572318, - "grad_norm": 0.0016059057088568807, - "learning_rate": 0.00019999770628378988, - "loss": 46.0, - "step": 28208 - }, - { - "epoch": 2.1567750444406215, - "grad_norm": 0.00527126295492053, - "learning_rate": 0.00019999770612110117, - "loss": 46.0, - "step": 28209 - }, - { - "epoch": 2.1568515014240113, - "grad_norm": 0.0012687172275036573, - "learning_rate": 0.0001999977059584067, - "loss": 46.0, - "step": 28210 - }, - { - "epoch": 2.156927958407401, - "grad_norm": 0.001291608321480453, - "learning_rate": 0.00019999770579570648, - "loss": 46.0, - "step": 28211 - }, - { - "epoch": 2.157004415390791, - "grad_norm": 0.0018162453779950738, - "learning_rate": 0.00019999770563300045, - "loss": 46.0, - "step": 28212 - }, - { - "epoch": 2.1570808723741806, - "grad_norm": 0.0008445921703241765, - "learning_rate": 0.00019999770547028868, - "loss": 46.0, - "step": 28213 - }, - { - "epoch": 2.1571573293575703, - "grad_norm": 0.001552355708554387, - "learning_rate": 0.0001999977053075711, - "loss": 46.0, - "step": 28214 - }, - { - "epoch": 2.15723378634096, - "grad_norm": 0.0003529755340423435, - "learning_rate": 0.0001999977051448478, - "loss": 46.0, - "step": 28215 - }, - { - "epoch": 2.15731024332435, - "grad_norm": 0.0006778242532163858, - "learning_rate": 0.00019999770498211868, - "loss": 46.0, - "step": 28216 - }, - { - "epoch": 2.157386700307739, - "grad_norm": 0.001351646613329649, - "learning_rate": 0.00019999770481938384, - "loss": 46.0, - "step": 28217 - }, - { - "epoch": 2.157463157291129, - "grad_norm": 0.0038238682318478823, - "learning_rate": 0.0001999977046566432, - "loss": 46.0, - "step": 28218 - }, - { - "epoch": 2.1575396142745187, - "grad_norm": 0.0014866123674437404, - "learning_rate": 0.00019999770449389678, - "loss": 46.0, - "step": 28219 - }, - { - "epoch": 2.1576160712579084, - "grad_norm": 0.0018138274317607284, - "learning_rate": 0.0001999977043311446, - "loss": 46.0, - "step": 28220 - }, - { - "epoch": 2.157692528241298, - "grad_norm": 0.0011176791740581393, - "learning_rate": 0.00019999770416838668, - "loss": 46.0, - "step": 28221 - }, - { - "epoch": 2.157768985224688, - "grad_norm": 0.0014468267327174544, - "learning_rate": 0.00019999770400562297, - "loss": 46.0, - "step": 28222 - }, - { - "epoch": 2.1578454422080777, - "grad_norm": 0.0005226039793342352, - "learning_rate": 0.00019999770384285348, - "loss": 46.0, - "step": 28223 - }, - { - "epoch": 2.1579218991914675, - "grad_norm": 0.0016708774492144585, - "learning_rate": 0.00019999770368007825, - "loss": 46.0, - "step": 28224 - }, - { - "epoch": 2.1579983561748572, - "grad_norm": 0.000577491067815572, - "learning_rate": 0.0001999977035172972, - "loss": 46.0, - "step": 28225 - }, - { - "epoch": 2.158074813158247, - "grad_norm": 0.0012306488351896405, - "learning_rate": 0.00019999770335451042, - "loss": 46.0, - "step": 28226 - }, - { - "epoch": 2.1581512701416368, - "grad_norm": 0.0006994212744757533, - "learning_rate": 0.00019999770319171787, - "loss": 46.0, - "step": 28227 - }, - { - "epoch": 2.158227727125026, - "grad_norm": 0.0019818840082734823, - "learning_rate": 0.00019999770302891955, - "loss": 46.0, - "step": 28228 - }, - { - "epoch": 2.158304184108416, - "grad_norm": 0.0004970363224856555, - "learning_rate": 0.00019999770286611542, - "loss": 46.0, - "step": 28229 - }, - { - "epoch": 2.1583806410918056, - "grad_norm": 0.0019437340088188648, - "learning_rate": 0.00019999770270330558, - "loss": 46.0, - "step": 28230 - }, - { - "epoch": 2.1584570980751954, - "grad_norm": 0.0007514884928241372, - "learning_rate": 0.00019999770254048994, - "loss": 46.0, - "step": 28231 - }, - { - "epoch": 2.158533555058585, - "grad_norm": 0.001444541267119348, - "learning_rate": 0.00019999770237766855, - "loss": 46.0, - "step": 28232 - }, - { - "epoch": 2.158610012041975, - "grad_norm": 0.0014975356170907617, - "learning_rate": 0.00019999770221484139, - "loss": 46.0, - "step": 28233 - }, - { - "epoch": 2.1586864690253647, - "grad_norm": 0.0008805756224319339, - "learning_rate": 0.00019999770205200842, - "loss": 46.0, - "step": 28234 - }, - { - "epoch": 2.1587629260087544, - "grad_norm": 0.0008572934893891215, - "learning_rate": 0.0001999977018891697, - "loss": 46.0, - "step": 28235 - }, - { - "epoch": 2.158839382992144, - "grad_norm": 0.0011078552342951298, - "learning_rate": 0.00019999770172632523, - "loss": 46.0, - "step": 28236 - }, - { - "epoch": 2.158915839975534, - "grad_norm": 0.0005424771225079894, - "learning_rate": 0.000199997701563475, - "loss": 46.0, - "step": 28237 - }, - { - "epoch": 2.1589922969589237, - "grad_norm": 0.0015068185748532414, - "learning_rate": 0.00019999770140061894, - "loss": 46.0, - "step": 28238 - }, - { - "epoch": 2.159068753942313, - "grad_norm": 0.002984751481562853, - "learning_rate": 0.0001999977012377572, - "loss": 46.0, - "step": 28239 - }, - { - "epoch": 2.1591452109257028, - "grad_norm": 0.0026946093421429396, - "learning_rate": 0.00019999770107488962, - "loss": 46.0, - "step": 28240 - }, - { - "epoch": 2.1592216679090925, - "grad_norm": 0.0006708583678118885, - "learning_rate": 0.0001999977009120163, - "loss": 46.0, - "step": 28241 - }, - { - "epoch": 2.1592981248924823, - "grad_norm": 0.0010932415025308728, - "learning_rate": 0.0001999977007491372, - "loss": 46.0, - "step": 28242 - }, - { - "epoch": 2.159374581875872, - "grad_norm": 0.0019411728717386723, - "learning_rate": 0.00019999770058625233, - "loss": 46.0, - "step": 28243 - }, - { - "epoch": 2.159451038859262, - "grad_norm": 0.0012245491379871964, - "learning_rate": 0.0001999977004233617, - "loss": 46.0, - "step": 28244 - }, - { - "epoch": 2.1595274958426516, - "grad_norm": 0.0008670325041748583, - "learning_rate": 0.0001999977002604653, - "loss": 46.0, - "step": 28245 - }, - { - "epoch": 2.1596039528260413, - "grad_norm": 0.0008080180268734694, - "learning_rate": 0.0001999977000975631, - "loss": 46.0, - "step": 28246 - }, - { - "epoch": 2.159680409809431, - "grad_norm": 0.0008524100994691253, - "learning_rate": 0.00019999769993465515, - "loss": 46.0, - "step": 28247 - }, - { - "epoch": 2.159756866792821, - "grad_norm": 0.001763631938956678, - "learning_rate": 0.00019999769977174146, - "loss": 46.0, - "step": 28248 - }, - { - "epoch": 2.15983332377621, - "grad_norm": 0.0016128664137795568, - "learning_rate": 0.00019999769960882198, - "loss": 46.0, - "step": 28249 - }, - { - "epoch": 2.1599097807596, - "grad_norm": 0.0004104067920707166, - "learning_rate": 0.00019999769944589673, - "loss": 46.0, - "step": 28250 - }, - { - "epoch": 2.1599862377429897, - "grad_norm": 0.00031212990870699286, - "learning_rate": 0.0001999976992829657, - "loss": 46.0, - "step": 28251 - }, - { - "epoch": 2.1600626947263795, - "grad_norm": 0.0012686926638707519, - "learning_rate": 0.00019999769912002892, - "loss": 46.0, - "step": 28252 - }, - { - "epoch": 2.1601391517097692, - "grad_norm": 0.0009924668120220304, - "learning_rate": 0.00019999769895708634, - "loss": 46.0, - "step": 28253 - }, - { - "epoch": 2.160215608693159, - "grad_norm": 0.000501585251186043, - "learning_rate": 0.00019999769879413802, - "loss": 46.0, - "step": 28254 - }, - { - "epoch": 2.1602920656765487, - "grad_norm": 0.0016998882638290524, - "learning_rate": 0.00019999769863118393, - "loss": 46.0, - "step": 28255 - }, - { - "epoch": 2.1603685226599385, - "grad_norm": 0.0015705376863479614, - "learning_rate": 0.00019999769846822406, - "loss": 46.0, - "step": 28256 - }, - { - "epoch": 2.1604449796433283, - "grad_norm": 0.0014292121632024646, - "learning_rate": 0.00019999769830525844, - "loss": 46.0, - "step": 28257 - }, - { - "epoch": 2.160521436626718, - "grad_norm": 0.0019807922653853893, - "learning_rate": 0.00019999769814228703, - "loss": 46.0, - "step": 28258 - }, - { - "epoch": 2.160597893610108, - "grad_norm": 0.0015184932854026556, - "learning_rate": 0.00019999769797930984, - "loss": 46.0, - "step": 28259 - }, - { - "epoch": 2.1606743505934976, - "grad_norm": 0.0026066794525831938, - "learning_rate": 0.0001999976978163269, - "loss": 46.0, - "step": 28260 - }, - { - "epoch": 2.160750807576887, - "grad_norm": 0.004086014349013567, - "learning_rate": 0.0001999976976533382, - "loss": 46.0, - "step": 28261 - }, - { - "epoch": 2.1608272645602766, - "grad_norm": 0.0009970151586458087, - "learning_rate": 0.00019999769749034368, - "loss": 46.0, - "step": 28262 - }, - { - "epoch": 2.1609037215436664, - "grad_norm": 0.002410683548077941, - "learning_rate": 0.00019999769732734345, - "loss": 46.0, - "step": 28263 - }, - { - "epoch": 2.160980178527056, - "grad_norm": 0.0016978831263259053, - "learning_rate": 0.00019999769716433743, - "loss": 46.0, - "step": 28264 - }, - { - "epoch": 2.161056635510446, - "grad_norm": 0.0009385807788930833, - "learning_rate": 0.00019999769700132562, - "loss": 46.0, - "step": 28265 - }, - { - "epoch": 2.1611330924938357, - "grad_norm": 0.000730443571228534, - "learning_rate": 0.00019999769683830807, - "loss": 46.0, - "step": 28266 - }, - { - "epoch": 2.1612095494772254, - "grad_norm": 0.0020312333945184946, - "learning_rate": 0.00019999769667528475, - "loss": 46.0, - "step": 28267 - }, - { - "epoch": 2.161286006460615, - "grad_norm": 0.000674801180139184, - "learning_rate": 0.00019999769651225563, - "loss": 46.0, - "step": 28268 - }, - { - "epoch": 2.161362463444005, - "grad_norm": 0.001050399150699377, - "learning_rate": 0.00019999769634922076, - "loss": 46.0, - "step": 28269 - }, - { - "epoch": 2.1614389204273947, - "grad_norm": 0.0018419305561110377, - "learning_rate": 0.00019999769618618015, - "loss": 46.0, - "step": 28270 - }, - { - "epoch": 2.161515377410784, - "grad_norm": 0.0010177392978221178, - "learning_rate": 0.00019999769602313373, - "loss": 46.0, - "step": 28271 - }, - { - "epoch": 2.161591834394174, - "grad_norm": 0.0005326177342794836, - "learning_rate": 0.00019999769586008154, - "loss": 46.0, - "step": 28272 - }, - { - "epoch": 2.1616682913775636, - "grad_norm": 0.0018263114616274834, - "learning_rate": 0.00019999769569702363, - "loss": 46.0, - "step": 28273 - }, - { - "epoch": 2.1617447483609533, - "grad_norm": 0.001399523694999516, - "learning_rate": 0.00019999769553395987, - "loss": 46.0, - "step": 28274 - }, - { - "epoch": 2.161821205344343, - "grad_norm": 0.002858817344531417, - "learning_rate": 0.00019999769537089042, - "loss": 46.0, - "step": 28275 - }, - { - "epoch": 2.161897662327733, - "grad_norm": 0.002031118143349886, - "learning_rate": 0.00019999769520781516, - "loss": 46.0, - "step": 28276 - }, - { - "epoch": 2.1619741193111226, - "grad_norm": 0.0006862554582767189, - "learning_rate": 0.00019999769504473413, - "loss": 46.0, - "step": 28277 - }, - { - "epoch": 2.1620505762945124, - "grad_norm": 0.0006814321968704462, - "learning_rate": 0.00019999769488164733, - "loss": 46.0, - "step": 28278 - }, - { - "epoch": 2.162127033277902, - "grad_norm": 0.00224114628508687, - "learning_rate": 0.00019999769471855475, - "loss": 46.0, - "step": 28279 - }, - { - "epoch": 2.162203490261292, - "grad_norm": 0.00044544244883581996, - "learning_rate": 0.00019999769455545646, - "loss": 46.0, - "step": 28280 - }, - { - "epoch": 2.1622799472446816, - "grad_norm": 0.0017573442310094833, - "learning_rate": 0.00019999769439235234, - "loss": 46.0, - "step": 28281 - }, - { - "epoch": 2.1623564042280714, - "grad_norm": 0.00034093158319592476, - "learning_rate": 0.0001999976942292425, - "loss": 46.0, - "step": 28282 - }, - { - "epoch": 2.1624328612114607, - "grad_norm": 0.0009124795324169099, - "learning_rate": 0.00019999769406612683, - "loss": 46.0, - "step": 28283 - }, - { - "epoch": 2.1625093181948505, - "grad_norm": 0.0010847555240616202, - "learning_rate": 0.00019999769390300544, - "loss": 46.0, - "step": 28284 - }, - { - "epoch": 2.1625857751782402, - "grad_norm": 0.0007826763321645558, - "learning_rate": 0.00019999769373987825, - "loss": 46.0, - "step": 28285 - }, - { - "epoch": 2.16266223216163, - "grad_norm": 0.0010230930056422949, - "learning_rate": 0.00019999769357674532, - "loss": 46.0, - "step": 28286 - }, - { - "epoch": 2.1627386891450198, - "grad_norm": 0.0009571608970873058, - "learning_rate": 0.00019999769341360658, - "loss": 46.0, - "step": 28287 - }, - { - "epoch": 2.1628151461284095, - "grad_norm": 0.000638592173345387, - "learning_rate": 0.0001999976932504621, - "loss": 46.0, - "step": 28288 - }, - { - "epoch": 2.1628916031117993, - "grad_norm": 0.00393934641033411, - "learning_rate": 0.00019999769308731184, - "loss": 46.0, - "step": 28289 - }, - { - "epoch": 2.162968060095189, - "grad_norm": 0.0010481675853952765, - "learning_rate": 0.00019999769292415581, - "loss": 46.0, - "step": 28290 - }, - { - "epoch": 2.163044517078579, - "grad_norm": 0.0017305562505498528, - "learning_rate": 0.00019999769276099404, - "loss": 46.0, - "step": 28291 - }, - { - "epoch": 2.1631209740619686, - "grad_norm": 0.001234453055076301, - "learning_rate": 0.00019999769259782646, - "loss": 46.0, - "step": 28292 - }, - { - "epoch": 2.163197431045358, - "grad_norm": 0.001165780471637845, - "learning_rate": 0.00019999769243465314, - "loss": 46.0, - "step": 28293 - }, - { - "epoch": 2.1632738880287476, - "grad_norm": 0.0007124131661839783, - "learning_rate": 0.00019999769227147405, - "loss": 46.0, - "step": 28294 - }, - { - "epoch": 2.1633503450121374, - "grad_norm": 0.0036853512283414602, - "learning_rate": 0.00019999769210828915, - "loss": 46.0, - "step": 28295 - }, - { - "epoch": 2.163426801995527, - "grad_norm": 0.0039886184968054295, - "learning_rate": 0.0001999976919450985, - "loss": 46.0, - "step": 28296 - }, - { - "epoch": 2.163503258978917, - "grad_norm": 0.004854655358940363, - "learning_rate": 0.0001999976917819021, - "loss": 46.0, - "step": 28297 - }, - { - "epoch": 2.1635797159623067, - "grad_norm": 0.0009602952050045133, - "learning_rate": 0.00019999769161869994, - "loss": 46.0, - "step": 28298 - }, - { - "epoch": 2.1636561729456965, - "grad_norm": 0.0008612944511696696, - "learning_rate": 0.00019999769145549197, - "loss": 46.0, - "step": 28299 - }, - { - "epoch": 2.163732629929086, - "grad_norm": 0.001840696088038385, - "learning_rate": 0.00019999769129227827, - "loss": 46.0, - "step": 28300 - }, - { - "epoch": 2.163809086912476, - "grad_norm": 0.0011351652210578322, - "learning_rate": 0.00019999769112905878, - "loss": 46.0, - "step": 28301 - }, - { - "epoch": 2.1638855438958657, - "grad_norm": 0.0006566941738128662, - "learning_rate": 0.00019999769096583353, - "loss": 46.0, - "step": 28302 - }, - { - "epoch": 2.1639620008792555, - "grad_norm": 0.0008486962760798633, - "learning_rate": 0.0001999976908026025, - "loss": 46.0, - "step": 28303 - }, - { - "epoch": 2.1640384578626453, - "grad_norm": 0.001236417330801487, - "learning_rate": 0.0001999976906393657, - "loss": 46.0, - "step": 28304 - }, - { - "epoch": 2.1641149148460346, - "grad_norm": 0.0004845599760301411, - "learning_rate": 0.00019999769047612315, - "loss": 46.0, - "step": 28305 - }, - { - "epoch": 2.1641913718294243, - "grad_norm": 0.0007713251980021596, - "learning_rate": 0.0001999976903128748, - "loss": 46.0, - "step": 28306 - }, - { - "epoch": 2.164267828812814, - "grad_norm": 0.0032170279882848263, - "learning_rate": 0.0001999976901496207, - "loss": 46.0, - "step": 28307 - }, - { - "epoch": 2.164344285796204, - "grad_norm": 0.001863210811279714, - "learning_rate": 0.00019999768998636084, - "loss": 46.0, - "step": 28308 - }, - { - "epoch": 2.1644207427795936, - "grad_norm": 0.0010804088087752461, - "learning_rate": 0.0001999976898230952, - "loss": 46.0, - "step": 28309 - }, - { - "epoch": 2.1644971997629834, - "grad_norm": 0.001021967502310872, - "learning_rate": 0.00019999768965982376, - "loss": 46.0, - "step": 28310 - }, - { - "epoch": 2.164573656746373, - "grad_norm": 0.0004115740885026753, - "learning_rate": 0.0001999976894965466, - "loss": 46.0, - "step": 28311 - }, - { - "epoch": 2.164650113729763, - "grad_norm": 0.0034954624716192484, - "learning_rate": 0.00019999768933326366, - "loss": 46.0, - "step": 28312 - }, - { - "epoch": 2.1647265707131527, - "grad_norm": 0.0009458708227612078, - "learning_rate": 0.00019999768916997496, - "loss": 46.0, - "step": 28313 - }, - { - "epoch": 2.1648030276965424, - "grad_norm": 0.001612994121387601, - "learning_rate": 0.00019999768900668045, - "loss": 46.0, - "step": 28314 - }, - { - "epoch": 2.1648794846799317, - "grad_norm": 0.0025195502676069736, - "learning_rate": 0.0001999976888433802, - "loss": 46.0, - "step": 28315 - }, - { - "epoch": 2.1649559416633215, - "grad_norm": 0.0018592727137729526, - "learning_rate": 0.00019999768868007417, - "loss": 46.0, - "step": 28316 - }, - { - "epoch": 2.1650323986467113, - "grad_norm": 0.0014857631176710129, - "learning_rate": 0.00019999768851676234, - "loss": 46.0, - "step": 28317 - }, - { - "epoch": 2.165108855630101, - "grad_norm": 0.0009768782183527946, - "learning_rate": 0.00019999768835344482, - "loss": 46.0, - "step": 28318 - }, - { - "epoch": 2.165185312613491, - "grad_norm": 0.0011763478396460414, - "learning_rate": 0.00019999768819012147, - "loss": 46.0, - "step": 28319 - }, - { - "epoch": 2.1652617695968805, - "grad_norm": 0.0009184136870317161, - "learning_rate": 0.00019999768802679238, - "loss": 46.0, - "step": 28320 - }, - { - "epoch": 2.1653382265802703, - "grad_norm": 0.0015678530326113105, - "learning_rate": 0.00019999768786345748, - "loss": 46.0, - "step": 28321 - }, - { - "epoch": 2.16541468356366, - "grad_norm": 0.0006505833007395267, - "learning_rate": 0.00019999768770011684, - "loss": 46.0, - "step": 28322 - }, - { - "epoch": 2.16549114054705, - "grad_norm": 0.00388232059776783, - "learning_rate": 0.00019999768753677045, - "loss": 46.0, - "step": 28323 - }, - { - "epoch": 2.1655675975304396, - "grad_norm": 0.0018786248983815312, - "learning_rate": 0.00019999768737341827, - "loss": 46.0, - "step": 28324 - }, - { - "epoch": 2.1656440545138294, - "grad_norm": 0.0014186324551701546, - "learning_rate": 0.0001999976872100603, - "loss": 46.0, - "step": 28325 - }, - { - "epoch": 2.1657205114972187, - "grad_norm": 0.0012227930128574371, - "learning_rate": 0.0001999976870466966, - "loss": 46.0, - "step": 28326 - }, - { - "epoch": 2.1657969684806084, - "grad_norm": 0.0009038235293701291, - "learning_rate": 0.0001999976868833271, - "loss": 46.0, - "step": 28327 - }, - { - "epoch": 2.165873425463998, - "grad_norm": 0.0005395268672145903, - "learning_rate": 0.00019999768671995183, - "loss": 46.0, - "step": 28328 - }, - { - "epoch": 2.165949882447388, - "grad_norm": 0.0006006858311593533, - "learning_rate": 0.0001999976865565708, - "loss": 46.0, - "step": 28329 - }, - { - "epoch": 2.1660263394307777, - "grad_norm": 0.0006018558633513749, - "learning_rate": 0.000199997686393184, - "loss": 46.0, - "step": 28330 - }, - { - "epoch": 2.1661027964141675, - "grad_norm": 0.001946346485055983, - "learning_rate": 0.00019999768622979143, - "loss": 46.0, - "step": 28331 - }, - { - "epoch": 2.1661792533975572, - "grad_norm": 0.0005670227692462504, - "learning_rate": 0.0001999976860663931, - "loss": 46.0, - "step": 28332 - }, - { - "epoch": 2.166255710380947, - "grad_norm": 0.002674239221960306, - "learning_rate": 0.000199997685902989, - "loss": 46.0, - "step": 28333 - }, - { - "epoch": 2.1663321673643368, - "grad_norm": 0.0036192391999065876, - "learning_rate": 0.00019999768573957912, - "loss": 46.0, - "step": 28334 - }, - { - "epoch": 2.1664086243477265, - "grad_norm": 0.0012197496835142374, - "learning_rate": 0.00019999768557616348, - "loss": 46.0, - "step": 28335 - }, - { - "epoch": 2.1664850813311163, - "grad_norm": 0.0017973009962588549, - "learning_rate": 0.00019999768541274207, - "loss": 46.0, - "step": 28336 - }, - { - "epoch": 2.1665615383145056, - "grad_norm": 0.0008222817559726536, - "learning_rate": 0.00019999768524931488, - "loss": 46.0, - "step": 28337 - }, - { - "epoch": 2.1666379952978954, - "grad_norm": 0.0007087428239174187, - "learning_rate": 0.00019999768508588192, - "loss": 46.0, - "step": 28338 - }, - { - "epoch": 2.166714452281285, - "grad_norm": 0.0010377614526078105, - "learning_rate": 0.00019999768492244321, - "loss": 46.0, - "step": 28339 - }, - { - "epoch": 2.166790909264675, - "grad_norm": 0.00105758267454803, - "learning_rate": 0.00019999768475899873, - "loss": 46.0, - "step": 28340 - }, - { - "epoch": 2.1668673662480646, - "grad_norm": 0.001613438711501658, - "learning_rate": 0.00019999768459554845, - "loss": 46.0, - "step": 28341 - }, - { - "epoch": 2.1669438232314544, - "grad_norm": 0.0007211464690044522, - "learning_rate": 0.00019999768443209243, - "loss": 46.0, - "step": 28342 - }, - { - "epoch": 2.167020280214844, - "grad_norm": 0.0007866926025599241, - "learning_rate": 0.00019999768426863062, - "loss": 46.0, - "step": 28343 - }, - { - "epoch": 2.167096737198234, - "grad_norm": 0.0007860049372538924, - "learning_rate": 0.00019999768410516305, - "loss": 46.0, - "step": 28344 - }, - { - "epoch": 2.1671731941816237, - "grad_norm": 0.013147357851266861, - "learning_rate": 0.00019999768394168973, - "loss": 46.0, - "step": 28345 - }, - { - "epoch": 2.1672496511650134, - "grad_norm": 0.0018320149974897504, - "learning_rate": 0.0001999976837782106, - "loss": 46.0, - "step": 28346 - }, - { - "epoch": 2.167326108148403, - "grad_norm": 0.0005081528215669096, - "learning_rate": 0.00019999768361472574, - "loss": 46.0, - "step": 28347 - }, - { - "epoch": 2.1674025651317925, - "grad_norm": 0.0013384188059717417, - "learning_rate": 0.0001999976834512351, - "loss": 46.0, - "step": 28348 - }, - { - "epoch": 2.1674790221151823, - "grad_norm": 0.001732338685542345, - "learning_rate": 0.0001999976832877387, - "loss": 46.0, - "step": 28349 - }, - { - "epoch": 2.167555479098572, - "grad_norm": 0.011148832738399506, - "learning_rate": 0.0001999976831242365, - "loss": 46.0, - "step": 28350 - }, - { - "epoch": 2.167631936081962, - "grad_norm": 0.0025218259543180466, - "learning_rate": 0.00019999768296072854, - "loss": 46.0, - "step": 28351 - }, - { - "epoch": 2.1677083930653516, - "grad_norm": 0.0008667050278745592, - "learning_rate": 0.00019999768279721484, - "loss": 46.0, - "step": 28352 - }, - { - "epoch": 2.1677848500487413, - "grad_norm": 0.0012461276492103934, - "learning_rate": 0.00019999768263369533, - "loss": 46.0, - "step": 28353 - }, - { - "epoch": 2.167861307032131, - "grad_norm": 0.0017693970585241914, - "learning_rate": 0.00019999768247017007, - "loss": 46.0, - "step": 28354 - }, - { - "epoch": 2.167937764015521, - "grad_norm": 0.0010632730554789305, - "learning_rate": 0.00019999768230663905, - "loss": 46.0, - "step": 28355 - }, - { - "epoch": 2.1680142209989106, - "grad_norm": 0.00891291256994009, - "learning_rate": 0.00019999768214310225, - "loss": 46.0, - "step": 28356 - }, - { - "epoch": 2.1680906779823004, - "grad_norm": 0.0016254974761977792, - "learning_rate": 0.00019999768197955967, - "loss": 46.0, - "step": 28357 - }, - { - "epoch": 2.16816713496569, - "grad_norm": 0.0012648567790165544, - "learning_rate": 0.00019999768181601133, - "loss": 46.0, - "step": 28358 - }, - { - "epoch": 2.1682435919490795, - "grad_norm": 0.0013248417526483536, - "learning_rate": 0.00019999768165245723, - "loss": 46.0, - "step": 28359 - }, - { - "epoch": 2.168320048932469, - "grad_norm": 0.0006009713397361338, - "learning_rate": 0.00019999768148889734, - "loss": 46.0, - "step": 28360 - }, - { - "epoch": 2.168396505915859, - "grad_norm": 0.0029473367612808943, - "learning_rate": 0.0001999976813253317, - "loss": 46.0, - "step": 28361 - }, - { - "epoch": 2.1684729628992487, - "grad_norm": 0.00048392993630841374, - "learning_rate": 0.0001999976811617603, - "loss": 46.0, - "step": 28362 - }, - { - "epoch": 2.1685494198826385, - "grad_norm": 0.0031633959151804447, - "learning_rate": 0.00019999768099818313, - "loss": 46.0, - "step": 28363 - }, - { - "epoch": 2.1686258768660283, - "grad_norm": 0.000784774252679199, - "learning_rate": 0.00019999768083460017, - "loss": 46.0, - "step": 28364 - }, - { - "epoch": 2.168702333849418, - "grad_norm": 0.00042832139297388494, - "learning_rate": 0.00019999768067101143, - "loss": 46.0, - "step": 28365 - }, - { - "epoch": 2.168778790832808, - "grad_norm": 0.0007260804995894432, - "learning_rate": 0.00019999768050741695, - "loss": 46.0, - "step": 28366 - }, - { - "epoch": 2.1688552478161975, - "grad_norm": 0.0040352558717131615, - "learning_rate": 0.0001999976803438167, - "loss": 46.0, - "step": 28367 - }, - { - "epoch": 2.1689317047995873, - "grad_norm": 0.0009794466895982623, - "learning_rate": 0.00019999768018021065, - "loss": 46.0, - "step": 28368 - }, - { - "epoch": 2.169008161782977, - "grad_norm": 0.0014764280058443546, - "learning_rate": 0.00019999768001659885, - "loss": 46.0, - "step": 28369 - }, - { - "epoch": 2.1690846187663664, - "grad_norm": 0.0005682627088390291, - "learning_rate": 0.00019999767985298127, - "loss": 46.0, - "step": 28370 - }, - { - "epoch": 2.169161075749756, - "grad_norm": 0.0026647301856428385, - "learning_rate": 0.00019999767968935795, - "loss": 46.0, - "step": 28371 - }, - { - "epoch": 2.169237532733146, - "grad_norm": 0.0009426974575035274, - "learning_rate": 0.00019999767952572883, - "loss": 46.0, - "step": 28372 - }, - { - "epoch": 2.1693139897165357, - "grad_norm": 0.00038993547786958516, - "learning_rate": 0.00019999767936209394, - "loss": 46.0, - "step": 28373 - }, - { - "epoch": 2.1693904466999254, - "grad_norm": 0.0008578879642300308, - "learning_rate": 0.0001999976791984533, - "loss": 46.0, - "step": 28374 - }, - { - "epoch": 2.169466903683315, - "grad_norm": 0.001049996237270534, - "learning_rate": 0.0001999976790348069, - "loss": 46.0, - "step": 28375 - }, - { - "epoch": 2.169543360666705, - "grad_norm": 0.0007479878258891404, - "learning_rate": 0.0001999976788711547, - "loss": 46.0, - "step": 28376 - }, - { - "epoch": 2.1696198176500947, - "grad_norm": 0.001156297861598432, - "learning_rate": 0.00019999767870749674, - "loss": 46.0, - "step": 28377 - }, - { - "epoch": 2.1696962746334845, - "grad_norm": 0.0023184586316347122, - "learning_rate": 0.00019999767854383304, - "loss": 46.0, - "step": 28378 - }, - { - "epoch": 2.1697727316168742, - "grad_norm": 0.0008291049744002521, - "learning_rate": 0.00019999767838016356, - "loss": 46.0, - "step": 28379 - }, - { - "epoch": 2.1698491886002635, - "grad_norm": 0.001846388797275722, - "learning_rate": 0.00019999767821648825, - "loss": 46.0, - "step": 28380 - }, - { - "epoch": 2.1699256455836533, - "grad_norm": 0.0034819217398762703, - "learning_rate": 0.00019999767805280722, - "loss": 46.0, - "step": 28381 - }, - { - "epoch": 2.170002102567043, - "grad_norm": 0.000577624246943742, - "learning_rate": 0.00019999767788912042, - "loss": 46.0, - "step": 28382 - }, - { - "epoch": 2.170078559550433, - "grad_norm": 0.0009315473726019263, - "learning_rate": 0.00019999767772542785, - "loss": 46.0, - "step": 28383 - }, - { - "epoch": 2.1701550165338226, - "grad_norm": 0.0022571387235075235, - "learning_rate": 0.0001999976775617295, - "loss": 46.0, - "step": 28384 - }, - { - "epoch": 2.1702314735172124, - "grad_norm": 0.0006864149472676218, - "learning_rate": 0.00019999767739802538, - "loss": 46.0, - "step": 28385 - }, - { - "epoch": 2.170307930500602, - "grad_norm": 0.000870219839271158, - "learning_rate": 0.00019999767723431552, - "loss": 46.0, - "step": 28386 - }, - { - "epoch": 2.170384387483992, - "grad_norm": 0.002720368094742298, - "learning_rate": 0.00019999767707059988, - "loss": 46.0, - "step": 28387 - }, - { - "epoch": 2.1704608444673816, - "grad_norm": 0.0020148123148828745, - "learning_rate": 0.00019999767690687844, - "loss": 46.0, - "step": 28388 - }, - { - "epoch": 2.1705373014507714, - "grad_norm": 0.0012028336059302092, - "learning_rate": 0.00019999767674315128, - "loss": 46.0, - "step": 28389 - }, - { - "epoch": 2.170613758434161, - "grad_norm": 0.0016353317769244313, - "learning_rate": 0.00019999767657941832, - "loss": 46.0, - "step": 28390 - }, - { - "epoch": 2.170690215417551, - "grad_norm": 0.0004611596232280135, - "learning_rate": 0.0001999976764156796, - "loss": 46.0, - "step": 28391 - }, - { - "epoch": 2.1707666724009402, - "grad_norm": 0.004697082564234734, - "learning_rate": 0.0001999976762519351, - "loss": 46.0, - "step": 28392 - }, - { - "epoch": 2.17084312938433, - "grad_norm": 0.002459216397255659, - "learning_rate": 0.00019999767608818486, - "loss": 46.0, - "step": 28393 - }, - { - "epoch": 2.1709195863677198, - "grad_norm": 0.0006335514481179416, - "learning_rate": 0.00019999767592442883, - "loss": 46.0, - "step": 28394 - }, - { - "epoch": 2.1709960433511095, - "grad_norm": 0.0007775943377055228, - "learning_rate": 0.000199997675760667, - "loss": 46.0, - "step": 28395 - }, - { - "epoch": 2.1710725003344993, - "grad_norm": 0.0010786314960569143, - "learning_rate": 0.00019999767559689943, - "loss": 46.0, - "step": 28396 - }, - { - "epoch": 2.171148957317889, - "grad_norm": 0.0008986023021861911, - "learning_rate": 0.00019999767543312609, - "loss": 46.0, - "step": 28397 - }, - { - "epoch": 2.171225414301279, - "grad_norm": 0.0007970093283802271, - "learning_rate": 0.00019999767526934697, - "loss": 46.0, - "step": 28398 - }, - { - "epoch": 2.1713018712846686, - "grad_norm": 0.0006517599103972316, - "learning_rate": 0.0001999976751055621, - "loss": 46.0, - "step": 28399 - }, - { - "epoch": 2.1713783282680583, - "grad_norm": 0.0005501593695953488, - "learning_rate": 0.00019999767494177143, - "loss": 46.0, - "step": 28400 - }, - { - "epoch": 2.171454785251448, - "grad_norm": 0.0023823336232453585, - "learning_rate": 0.00019999767477797502, - "loss": 46.0, - "step": 28401 - }, - { - "epoch": 2.1715312422348374, - "grad_norm": 0.0012514422414824367, - "learning_rate": 0.00019999767461417284, - "loss": 46.0, - "step": 28402 - }, - { - "epoch": 2.171607699218227, - "grad_norm": 0.0027574875857681036, - "learning_rate": 0.00019999767445036488, - "loss": 46.0, - "step": 28403 - }, - { - "epoch": 2.171684156201617, - "grad_norm": 0.0008573058294132352, - "learning_rate": 0.00019999767428655117, - "loss": 46.0, - "step": 28404 - }, - { - "epoch": 2.1717606131850067, - "grad_norm": 0.0004672650247812271, - "learning_rate": 0.00019999767412273167, - "loss": 46.0, - "step": 28405 - }, - { - "epoch": 2.1718370701683964, - "grad_norm": 0.014130059629678726, - "learning_rate": 0.0001999976739589064, - "loss": 46.0, - "step": 28406 - }, - { - "epoch": 2.171913527151786, - "grad_norm": 0.0010685843881219625, - "learning_rate": 0.00019999767379507536, - "loss": 46.0, - "step": 28407 - }, - { - "epoch": 2.171989984135176, - "grad_norm": 0.0009260507067665458, - "learning_rate": 0.00019999767363123856, - "loss": 46.0, - "step": 28408 - }, - { - "epoch": 2.1720664411185657, - "grad_norm": 0.005498750600963831, - "learning_rate": 0.00019999767346739596, - "loss": 46.0, - "step": 28409 - }, - { - "epoch": 2.1721428981019555, - "grad_norm": 0.0010454320581629872, - "learning_rate": 0.00019999767330354764, - "loss": 46.0, - "step": 28410 - }, - { - "epoch": 2.1722193550853452, - "grad_norm": 0.0003078437002841383, - "learning_rate": 0.00019999767313969352, - "loss": 46.0, - "step": 28411 - }, - { - "epoch": 2.172295812068735, - "grad_norm": 0.0008880148525349796, - "learning_rate": 0.00019999767297583363, - "loss": 46.0, - "step": 28412 - }, - { - "epoch": 2.1723722690521248, - "grad_norm": 0.0009869198547676206, - "learning_rate": 0.000199997672811968, - "loss": 46.0, - "step": 28413 - }, - { - "epoch": 2.172448726035514, - "grad_norm": 0.0009513136465102434, - "learning_rate": 0.00019999767264809658, - "loss": 46.0, - "step": 28414 - }, - { - "epoch": 2.172525183018904, - "grad_norm": 0.0027022380381822586, - "learning_rate": 0.00019999767248421937, - "loss": 46.0, - "step": 28415 - }, - { - "epoch": 2.1726016400022936, - "grad_norm": 0.0009560069884173572, - "learning_rate": 0.0001999976723203364, - "loss": 46.0, - "step": 28416 - }, - { - "epoch": 2.1726780969856834, - "grad_norm": 0.002306233858689666, - "learning_rate": 0.00019999767215644768, - "loss": 46.0, - "step": 28417 - }, - { - "epoch": 2.172754553969073, - "grad_norm": 0.0020294920541346073, - "learning_rate": 0.0001999976719925532, - "loss": 46.0, - "step": 28418 - }, - { - "epoch": 2.172831010952463, - "grad_norm": 0.015760544687509537, - "learning_rate": 0.00019999767182865292, - "loss": 46.0, - "step": 28419 - }, - { - "epoch": 2.1729074679358527, - "grad_norm": 0.0006957454606890678, - "learning_rate": 0.0001999976716647469, - "loss": 46.0, - "step": 28420 - }, - { - "epoch": 2.1729839249192424, - "grad_norm": 0.00290909456089139, - "learning_rate": 0.0001999976715008351, - "loss": 46.0, - "step": 28421 - }, - { - "epoch": 2.173060381902632, - "grad_norm": 0.001131995115429163, - "learning_rate": 0.00019999767133691752, - "loss": 46.0, - "step": 28422 - }, - { - "epoch": 2.173136838886022, - "grad_norm": 0.0005287793464958668, - "learning_rate": 0.00019999767117299418, - "loss": 46.0, - "step": 28423 - }, - { - "epoch": 2.1732132958694113, - "grad_norm": 0.0012256193440407515, - "learning_rate": 0.00019999767100906506, - "loss": 46.0, - "step": 28424 - }, - { - "epoch": 2.173289752852801, - "grad_norm": 0.000694221118465066, - "learning_rate": 0.00019999767084513017, - "loss": 46.0, - "step": 28425 - }, - { - "epoch": 2.1733662098361908, - "grad_norm": 0.000910363916773349, - "learning_rate": 0.0001999976706811895, - "loss": 46.0, - "step": 28426 - }, - { - "epoch": 2.1734426668195805, - "grad_norm": 0.004417065065354109, - "learning_rate": 0.0001999976705172431, - "loss": 46.0, - "step": 28427 - }, - { - "epoch": 2.1735191238029703, - "grad_norm": 0.0019813564140349627, - "learning_rate": 0.0001999976703532909, - "loss": 46.0, - "step": 28428 - }, - { - "epoch": 2.17359558078636, - "grad_norm": 0.0007425614167004824, - "learning_rate": 0.00019999767018933295, - "loss": 46.0, - "step": 28429 - }, - { - "epoch": 2.17367203776975, - "grad_norm": 0.0013123922981321812, - "learning_rate": 0.00019999767002536922, - "loss": 46.0, - "step": 28430 - }, - { - "epoch": 2.1737484947531396, - "grad_norm": 0.0014108201721683145, - "learning_rate": 0.0001999976698613997, - "loss": 46.0, - "step": 28431 - }, - { - "epoch": 2.1738249517365293, - "grad_norm": 0.0011263181222602725, - "learning_rate": 0.00019999766969742446, - "loss": 46.0, - "step": 28432 - }, - { - "epoch": 2.173901408719919, - "grad_norm": 0.0019500887719914317, - "learning_rate": 0.00019999766953344343, - "loss": 46.0, - "step": 28433 - }, - { - "epoch": 2.173977865703309, - "grad_norm": 0.0007986238924786448, - "learning_rate": 0.0001999976693694566, - "loss": 46.0, - "step": 28434 - }, - { - "epoch": 2.1740543226866986, - "grad_norm": 0.003296899376437068, - "learning_rate": 0.00019999766920546404, - "loss": 46.0, - "step": 28435 - }, - { - "epoch": 2.174130779670088, - "grad_norm": 0.0010163268307223916, - "learning_rate": 0.0001999976690414657, - "loss": 46.0, - "step": 28436 - }, - { - "epoch": 2.1742072366534777, - "grad_norm": 0.000838159816339612, - "learning_rate": 0.00019999766887746157, - "loss": 46.0, - "step": 28437 - }, - { - "epoch": 2.1742836936368675, - "grad_norm": 0.0005727846873924136, - "learning_rate": 0.00019999766871345168, - "loss": 46.0, - "step": 28438 - }, - { - "epoch": 2.1743601506202572, - "grad_norm": 0.0019931441638618708, - "learning_rate": 0.00019999766854943602, - "loss": 46.0, - "step": 28439 - }, - { - "epoch": 2.174436607603647, - "grad_norm": 0.0007575023337267339, - "learning_rate": 0.0001999976683854146, - "loss": 46.0, - "step": 28440 - }, - { - "epoch": 2.1745130645870367, - "grad_norm": 0.0005464000278152525, - "learning_rate": 0.00019999766822138742, - "loss": 46.0, - "step": 28441 - }, - { - "epoch": 2.1745895215704265, - "grad_norm": 0.0010065704118460417, - "learning_rate": 0.00019999766805735446, - "loss": 46.0, - "step": 28442 - }, - { - "epoch": 2.1746659785538163, - "grad_norm": 0.0011554675875231624, - "learning_rate": 0.00019999766789331573, - "loss": 46.0, - "step": 28443 - }, - { - "epoch": 2.174742435537206, - "grad_norm": 0.0018307155696675181, - "learning_rate": 0.00019999766772927125, - "loss": 46.0, - "step": 28444 - }, - { - "epoch": 2.174818892520596, - "grad_norm": 0.0019894202705472708, - "learning_rate": 0.00019999766756522095, - "loss": 46.0, - "step": 28445 - }, - { - "epoch": 2.174895349503985, - "grad_norm": 0.0007236563833430409, - "learning_rate": 0.00019999766740116492, - "loss": 46.0, - "step": 28446 - }, - { - "epoch": 2.174971806487375, - "grad_norm": 0.0011490915203467011, - "learning_rate": 0.00019999766723710312, - "loss": 46.0, - "step": 28447 - }, - { - "epoch": 2.1750482634707646, - "grad_norm": 0.0015396373346447945, - "learning_rate": 0.00019999766707303555, - "loss": 46.0, - "step": 28448 - }, - { - "epoch": 2.1751247204541544, - "grad_norm": 0.0019520844798535109, - "learning_rate": 0.0001999976669089622, - "loss": 46.0, - "step": 28449 - }, - { - "epoch": 2.175201177437544, - "grad_norm": 0.0009827141184359789, - "learning_rate": 0.00019999766674488306, - "loss": 46.0, - "step": 28450 - }, - { - "epoch": 2.175277634420934, - "grad_norm": 0.001469287439249456, - "learning_rate": 0.0001999976665807982, - "loss": 46.0, - "step": 28451 - }, - { - "epoch": 2.1753540914043237, - "grad_norm": 0.0007992509636096656, - "learning_rate": 0.00019999766641670753, - "loss": 46.0, - "step": 28452 - }, - { - "epoch": 2.1754305483877134, - "grad_norm": 0.0008464643033221364, - "learning_rate": 0.00019999766625261112, - "loss": 46.0, - "step": 28453 - }, - { - "epoch": 2.175507005371103, - "grad_norm": 0.0010568229481577873, - "learning_rate": 0.0001999976660885089, - "loss": 46.0, - "step": 28454 - }, - { - "epoch": 2.175583462354493, - "grad_norm": 0.001394504914060235, - "learning_rate": 0.00019999766592440098, - "loss": 46.0, - "step": 28455 - }, - { - "epoch": 2.1756599193378827, - "grad_norm": 0.0011778074549511075, - "learning_rate": 0.00019999766576028725, - "loss": 46.0, - "step": 28456 - }, - { - "epoch": 2.175736376321272, - "grad_norm": 0.001161605236120522, - "learning_rate": 0.00019999766559616772, - "loss": 46.0, - "step": 28457 - }, - { - "epoch": 2.175812833304662, - "grad_norm": 0.0010076920734718442, - "learning_rate": 0.00019999766543204247, - "loss": 46.0, - "step": 28458 - }, - { - "epoch": 2.1758892902880516, - "grad_norm": 0.0009201422799378633, - "learning_rate": 0.00019999766526791141, - "loss": 46.0, - "step": 28459 - }, - { - "epoch": 2.1759657472714413, - "grad_norm": 0.0005596308619715273, - "learning_rate": 0.00019999766510377462, - "loss": 46.0, - "step": 28460 - }, - { - "epoch": 2.176042204254831, - "grad_norm": 0.0011208391515538096, - "learning_rate": 0.00019999766493963205, - "loss": 46.0, - "step": 28461 - }, - { - "epoch": 2.176118661238221, - "grad_norm": 0.0027786337304860353, - "learning_rate": 0.00019999766477548367, - "loss": 46.0, - "step": 28462 - }, - { - "epoch": 2.1761951182216106, - "grad_norm": 0.0011292733252048492, - "learning_rate": 0.00019999766461132956, - "loss": 46.0, - "step": 28463 - }, - { - "epoch": 2.1762715752050004, - "grad_norm": 0.0004198331735096872, - "learning_rate": 0.00019999766444716966, - "loss": 46.0, - "step": 28464 - }, - { - "epoch": 2.17634803218839, - "grad_norm": 0.0017612830270081758, - "learning_rate": 0.00019999766428300403, - "loss": 46.0, - "step": 28465 - }, - { - "epoch": 2.17642448917178, - "grad_norm": 0.00228533404879272, - "learning_rate": 0.00019999766411883262, - "loss": 46.0, - "step": 28466 - }, - { - "epoch": 2.1765009461551696, - "grad_norm": 0.005738257430493832, - "learning_rate": 0.00019999766395465543, - "loss": 46.0, - "step": 28467 - }, - { - "epoch": 2.176577403138559, - "grad_norm": 0.0017074800562113523, - "learning_rate": 0.00019999766379047245, - "loss": 46.0, - "step": 28468 - }, - { - "epoch": 2.1766538601219487, - "grad_norm": 0.0009029096108861268, - "learning_rate": 0.00019999766362628372, - "loss": 46.0, - "step": 28469 - }, - { - "epoch": 2.1767303171053385, - "grad_norm": 0.006646163761615753, - "learning_rate": 0.0001999976634620892, - "loss": 46.0, - "step": 28470 - }, - { - "epoch": 2.1768067740887282, - "grad_norm": 0.0008539999253116548, - "learning_rate": 0.00019999766329788893, - "loss": 46.0, - "step": 28471 - }, - { - "epoch": 2.176883231072118, - "grad_norm": 0.0013579073129221797, - "learning_rate": 0.00019999766313368288, - "loss": 46.0, - "step": 28472 - }, - { - "epoch": 2.1769596880555078, - "grad_norm": 0.0014794875169172883, - "learning_rate": 0.00019999766296947109, - "loss": 46.0, - "step": 28473 - }, - { - "epoch": 2.1770361450388975, - "grad_norm": 0.006017064675688744, - "learning_rate": 0.00019999766280525352, - "loss": 46.0, - "step": 28474 - }, - { - "epoch": 2.1771126020222873, - "grad_norm": 0.0022261340636759996, - "learning_rate": 0.00019999766264103017, - "loss": 46.0, - "step": 28475 - }, - { - "epoch": 2.177189059005677, - "grad_norm": 0.00023276920546777546, - "learning_rate": 0.00019999766247680105, - "loss": 46.0, - "step": 28476 - }, - { - "epoch": 2.177265515989067, - "grad_norm": 0.0015872431686148047, - "learning_rate": 0.00019999766231256616, - "loss": 46.0, - "step": 28477 - }, - { - "epoch": 2.1773419729724566, - "grad_norm": 0.0007817122968845069, - "learning_rate": 0.0001999976621483255, - "loss": 46.0, - "step": 28478 - }, - { - "epoch": 2.177418429955846, - "grad_norm": 0.00046454579569399357, - "learning_rate": 0.0001999976619840791, - "loss": 46.0, - "step": 28479 - }, - { - "epoch": 2.1774948869392357, - "grad_norm": 0.0019957246258854866, - "learning_rate": 0.00019999766181982688, - "loss": 46.0, - "step": 28480 - }, - { - "epoch": 2.1775713439226254, - "grad_norm": 0.0014977352693676949, - "learning_rate": 0.0001999976616555689, - "loss": 46.0, - "step": 28481 - }, - { - "epoch": 2.177647800906015, - "grad_norm": 0.000837024359498173, - "learning_rate": 0.0001999976614913052, - "loss": 46.0, - "step": 28482 - }, - { - "epoch": 2.177724257889405, - "grad_norm": 0.0029853328596800566, - "learning_rate": 0.00019999766132703568, - "loss": 46.0, - "step": 28483 - }, - { - "epoch": 2.1778007148727947, - "grad_norm": 0.001866753795184195, - "learning_rate": 0.0001999976611627604, - "loss": 46.0, - "step": 28484 - }, - { - "epoch": 2.1778771718561845, - "grad_norm": 0.0015747282886877656, - "learning_rate": 0.00019999766099847936, - "loss": 46.0, - "step": 28485 - }, - { - "epoch": 2.177953628839574, - "grad_norm": 0.0005321996286511421, - "learning_rate": 0.00019999766083419256, - "loss": 46.0, - "step": 28486 - }, - { - "epoch": 2.178030085822964, - "grad_norm": 0.0024297041818499565, - "learning_rate": 0.0001999976606699, - "loss": 46.0, - "step": 28487 - }, - { - "epoch": 2.1781065428063537, - "grad_norm": 0.0009199777850881219, - "learning_rate": 0.00019999766050560162, - "loss": 46.0, - "step": 28488 - }, - { - "epoch": 2.1781829997897435, - "grad_norm": 0.0015354490606114268, - "learning_rate": 0.0001999976603412975, - "loss": 46.0, - "step": 28489 - }, - { - "epoch": 2.178259456773133, - "grad_norm": 0.0014288178645074368, - "learning_rate": 0.0001999976601769876, - "loss": 46.0, - "step": 28490 - }, - { - "epoch": 2.1783359137565226, - "grad_norm": 0.0006210981518961489, - "learning_rate": 0.00019999766001267197, - "loss": 46.0, - "step": 28491 - }, - { - "epoch": 2.1784123707399123, - "grad_norm": 0.0014171134680509567, - "learning_rate": 0.00019999765984835054, - "loss": 46.0, - "step": 28492 - }, - { - "epoch": 2.178488827723302, - "grad_norm": 0.0013052221620455384, - "learning_rate": 0.00019999765968402333, - "loss": 46.0, - "step": 28493 - }, - { - "epoch": 2.178565284706692, - "grad_norm": 0.001749410410411656, - "learning_rate": 0.00019999765951969037, - "loss": 46.0, - "step": 28494 - }, - { - "epoch": 2.1786417416900816, - "grad_norm": 0.0008194524561986327, - "learning_rate": 0.00019999765935535164, - "loss": 46.0, - "step": 28495 - }, - { - "epoch": 2.1787181986734714, - "grad_norm": 0.0043754540383815765, - "learning_rate": 0.00019999765919100713, - "loss": 46.0, - "step": 28496 - }, - { - "epoch": 2.178794655656861, - "grad_norm": 0.0006392080103978515, - "learning_rate": 0.00019999765902665686, - "loss": 46.0, - "step": 28497 - }, - { - "epoch": 2.178871112640251, - "grad_norm": 0.001968333264812827, - "learning_rate": 0.0001999976588623008, - "loss": 46.0, - "step": 28498 - }, - { - "epoch": 2.1789475696236407, - "grad_norm": 0.0005172672681510448, - "learning_rate": 0.000199997658697939, - "loss": 46.0, - "step": 28499 - }, - { - "epoch": 2.1790240266070304, - "grad_norm": 0.0004186072910670191, - "learning_rate": 0.0001999976585335714, - "loss": 46.0, - "step": 28500 - }, - { - "epoch": 2.1791004835904197, - "grad_norm": 0.001641149865463376, - "learning_rate": 0.00019999765836919807, - "loss": 46.0, - "step": 28501 - }, - { - "epoch": 2.1791769405738095, - "grad_norm": 0.0024131264071911573, - "learning_rate": 0.00019999765820481895, - "loss": 46.0, - "step": 28502 - }, - { - "epoch": 2.1792533975571993, - "grad_norm": 0.0015256662154570222, - "learning_rate": 0.00019999765804043404, - "loss": 46.0, - "step": 28503 - }, - { - "epoch": 2.179329854540589, - "grad_norm": 0.0029041327070444822, - "learning_rate": 0.0001999976578760434, - "loss": 46.0, - "step": 28504 - }, - { - "epoch": 2.179406311523979, - "grad_norm": 0.0011511488119140267, - "learning_rate": 0.00019999765771164696, - "loss": 46.0, - "step": 28505 - }, - { - "epoch": 2.1794827685073686, - "grad_norm": 0.0019296801183372736, - "learning_rate": 0.00019999765754724478, - "loss": 46.0, - "step": 28506 - }, - { - "epoch": 2.1795592254907583, - "grad_norm": 0.001309265848249197, - "learning_rate": 0.00019999765738283682, - "loss": 46.0, - "step": 28507 - }, - { - "epoch": 2.179635682474148, - "grad_norm": 0.0017570970812812448, - "learning_rate": 0.00019999765721842306, - "loss": 46.0, - "step": 28508 - }, - { - "epoch": 2.179712139457538, - "grad_norm": 0.0012124072527512908, - "learning_rate": 0.00019999765705400356, - "loss": 46.0, - "step": 28509 - }, - { - "epoch": 2.1797885964409276, - "grad_norm": 0.0014030297752469778, - "learning_rate": 0.00019999765688957828, - "loss": 46.0, - "step": 28510 - }, - { - "epoch": 2.179865053424317, - "grad_norm": 0.0018287461716681719, - "learning_rate": 0.00019999765672514726, - "loss": 46.0, - "step": 28511 - }, - { - "epoch": 2.1799415104077067, - "grad_norm": 0.0009697952191345394, - "learning_rate": 0.00019999765656071044, - "loss": 46.0, - "step": 28512 - }, - { - "epoch": 2.1800179673910964, - "grad_norm": 0.0011770287528634071, - "learning_rate": 0.00019999765639626787, - "loss": 46.0, - "step": 28513 - }, - { - "epoch": 2.180094424374486, - "grad_norm": 0.0009654753957875073, - "learning_rate": 0.0001999976562318195, - "loss": 46.0, - "step": 28514 - }, - { - "epoch": 2.180170881357876, - "grad_norm": 0.00402452889829874, - "learning_rate": 0.00019999765606736538, - "loss": 46.0, - "step": 28515 - }, - { - "epoch": 2.1802473383412657, - "grad_norm": 0.0029834695160388947, - "learning_rate": 0.0001999976559029055, - "loss": 46.0, - "step": 28516 - }, - { - "epoch": 2.1803237953246555, - "grad_norm": 0.003193502314388752, - "learning_rate": 0.00019999765573843983, - "loss": 46.0, - "step": 28517 - }, - { - "epoch": 2.1804002523080452, - "grad_norm": 0.001085283001884818, - "learning_rate": 0.0001999976555739684, - "loss": 46.0, - "step": 28518 - }, - { - "epoch": 2.180476709291435, - "grad_norm": 0.0005285979132167995, - "learning_rate": 0.0001999976554094912, - "loss": 46.0, - "step": 28519 - }, - { - "epoch": 2.1805531662748248, - "grad_norm": 0.005591289605945349, - "learning_rate": 0.00019999765524500825, - "loss": 46.0, - "step": 28520 - }, - { - "epoch": 2.1806296232582145, - "grad_norm": 0.0013330476358532906, - "learning_rate": 0.0001999976550805195, - "loss": 46.0, - "step": 28521 - }, - { - "epoch": 2.1807060802416043, - "grad_norm": 0.0007984755211509764, - "learning_rate": 0.000199997654916025, - "loss": 46.0, - "step": 28522 - }, - { - "epoch": 2.1807825372249936, - "grad_norm": 0.0009099752642214298, - "learning_rate": 0.00019999765475152472, - "loss": 46.0, - "step": 28523 - }, - { - "epoch": 2.1808589942083834, - "grad_norm": 0.0015721693634986877, - "learning_rate": 0.00019999765458701867, - "loss": 46.0, - "step": 28524 - }, - { - "epoch": 2.180935451191773, - "grad_norm": 0.0005699247121810913, - "learning_rate": 0.00019999765442250684, - "loss": 46.0, - "step": 28525 - }, - { - "epoch": 2.181011908175163, - "grad_norm": 0.0009928439976647496, - "learning_rate": 0.00019999765425798927, - "loss": 46.0, - "step": 28526 - }, - { - "epoch": 2.1810883651585526, - "grad_norm": 0.0010869466932490468, - "learning_rate": 0.0001999976540934659, - "loss": 46.0, - "step": 28527 - }, - { - "epoch": 2.1811648221419424, - "grad_norm": 0.0005256158183328807, - "learning_rate": 0.0001999976539289368, - "loss": 46.0, - "step": 28528 - }, - { - "epoch": 2.181241279125332, - "grad_norm": 0.0007434640428982675, - "learning_rate": 0.0001999976537644019, - "loss": 46.0, - "step": 28529 - }, - { - "epoch": 2.181317736108722, - "grad_norm": 0.0010680626146495342, - "learning_rate": 0.00019999765359986123, - "loss": 46.0, - "step": 28530 - }, - { - "epoch": 2.1813941930921117, - "grad_norm": 0.0009314455091953278, - "learning_rate": 0.00019999765343531483, - "loss": 46.0, - "step": 28531 - }, - { - "epoch": 2.1814706500755014, - "grad_norm": 0.005388634745031595, - "learning_rate": 0.00019999765327076262, - "loss": 46.0, - "step": 28532 - }, - { - "epoch": 2.1815471070588908, - "grad_norm": 0.0010014433646574616, - "learning_rate": 0.00019999765310620463, - "loss": 46.0, - "step": 28533 - }, - { - "epoch": 2.1816235640422805, - "grad_norm": 0.0009149159304797649, - "learning_rate": 0.0001999976529416409, - "loss": 46.0, - "step": 28534 - }, - { - "epoch": 2.1817000210256703, - "grad_norm": 0.000616028846707195, - "learning_rate": 0.0001999976527770714, - "loss": 46.0, - "step": 28535 - }, - { - "epoch": 2.18177647800906, - "grad_norm": 0.0008973000221885741, - "learning_rate": 0.00019999765261249612, - "loss": 46.0, - "step": 28536 - }, - { - "epoch": 2.18185293499245, - "grad_norm": 0.001988670090213418, - "learning_rate": 0.00019999765244791507, - "loss": 46.0, - "step": 28537 - }, - { - "epoch": 2.1819293919758396, - "grad_norm": 0.0006010684883221984, - "learning_rate": 0.00019999765228332825, - "loss": 46.0, - "step": 28538 - }, - { - "epoch": 2.1820058489592293, - "grad_norm": 0.0014233929105103016, - "learning_rate": 0.00019999765211873566, - "loss": 46.0, - "step": 28539 - }, - { - "epoch": 2.182082305942619, - "grad_norm": 0.001085110241547227, - "learning_rate": 0.00019999765195413734, - "loss": 46.0, - "step": 28540 - }, - { - "epoch": 2.182158762926009, - "grad_norm": 0.0005333789740689099, - "learning_rate": 0.00019999765178953317, - "loss": 46.0, - "step": 28541 - }, - { - "epoch": 2.1822352199093986, - "grad_norm": 0.0011887053260579705, - "learning_rate": 0.0001999976516249233, - "loss": 46.0, - "step": 28542 - }, - { - "epoch": 2.1823116768927884, - "grad_norm": 0.0009352117776870728, - "learning_rate": 0.00019999765146030764, - "loss": 46.0, - "step": 28543 - }, - { - "epoch": 2.182388133876178, - "grad_norm": 0.0012499524746090174, - "learning_rate": 0.0001999976512956862, - "loss": 46.0, - "step": 28544 - }, - { - "epoch": 2.1824645908595675, - "grad_norm": 0.0006399460835382342, - "learning_rate": 0.00019999765113105903, - "loss": 46.0, - "step": 28545 - }, - { - "epoch": 2.182541047842957, - "grad_norm": 0.0006755090435035527, - "learning_rate": 0.00019999765096642604, - "loss": 46.0, - "step": 28546 - }, - { - "epoch": 2.182617504826347, - "grad_norm": 0.0009542009211145341, - "learning_rate": 0.0001999976508017873, - "loss": 46.0, - "step": 28547 - }, - { - "epoch": 2.1826939618097367, - "grad_norm": 0.0008284849463962018, - "learning_rate": 0.00019999765063714279, - "loss": 46.0, - "step": 28548 - }, - { - "epoch": 2.1827704187931265, - "grad_norm": 0.0009998140158131719, - "learning_rate": 0.0001999976504724925, - "loss": 46.0, - "step": 28549 - }, - { - "epoch": 2.1828468757765163, - "grad_norm": 0.0027649146504700184, - "learning_rate": 0.00019999765030783646, - "loss": 46.0, - "step": 28550 - }, - { - "epoch": 2.182923332759906, - "grad_norm": 0.003018118441104889, - "learning_rate": 0.00019999765014317464, - "loss": 46.0, - "step": 28551 - }, - { - "epoch": 2.182999789743296, - "grad_norm": 0.0004947337438352406, - "learning_rate": 0.00019999764997850704, - "loss": 46.0, - "step": 28552 - }, - { - "epoch": 2.1830762467266855, - "grad_norm": 0.002311462303623557, - "learning_rate": 0.0001999976498138337, - "loss": 46.0, - "step": 28553 - }, - { - "epoch": 2.1831527037100753, - "grad_norm": 0.0008055961225181818, - "learning_rate": 0.00019999764964915458, - "loss": 46.0, - "step": 28554 - }, - { - "epoch": 2.1832291606934646, - "grad_norm": 0.0026294426061213017, - "learning_rate": 0.0001999976494844697, - "loss": 46.0, - "step": 28555 - }, - { - "epoch": 2.1833056176768544, - "grad_norm": 0.0018904200987890363, - "learning_rate": 0.00019999764931977903, - "loss": 46.0, - "step": 28556 - }, - { - "epoch": 2.183382074660244, - "grad_norm": 0.0013703533913940191, - "learning_rate": 0.0001999976491550826, - "loss": 46.0, - "step": 28557 - }, - { - "epoch": 2.183458531643634, - "grad_norm": 0.0007844749488867819, - "learning_rate": 0.0001999976489903804, - "loss": 46.0, - "step": 28558 - }, - { - "epoch": 2.1835349886270237, - "grad_norm": 0.0003083696065004915, - "learning_rate": 0.00019999764882567243, - "loss": 46.0, - "step": 28559 - }, - { - "epoch": 2.1836114456104134, - "grad_norm": 0.0007838538731448352, - "learning_rate": 0.00019999764866095868, - "loss": 46.0, - "step": 28560 - }, - { - "epoch": 2.183687902593803, - "grad_norm": 0.0008964691660366952, - "learning_rate": 0.00019999764849623917, - "loss": 46.0, - "step": 28561 - }, - { - "epoch": 2.183764359577193, - "grad_norm": 0.0011745627271011472, - "learning_rate": 0.0001999976483315139, - "loss": 46.0, - "step": 28562 - }, - { - "epoch": 2.1838408165605827, - "grad_norm": 0.0009995668660849333, - "learning_rate": 0.00019999764816678285, - "loss": 46.0, - "step": 28563 - }, - { - "epoch": 2.1839172735439725, - "grad_norm": 0.0007942367228679359, - "learning_rate": 0.000199997648002046, - "loss": 46.0, - "step": 28564 - }, - { - "epoch": 2.1839937305273622, - "grad_norm": 0.0010753748938441277, - "learning_rate": 0.00019999764783730343, - "loss": 46.0, - "step": 28565 - }, - { - "epoch": 2.184070187510752, - "grad_norm": 0.00035657055559568107, - "learning_rate": 0.0001999976476725551, - "loss": 46.0, - "step": 28566 - }, - { - "epoch": 2.1841466444941413, - "grad_norm": 0.0006955236895009875, - "learning_rate": 0.00019999764750780095, - "loss": 46.0, - "step": 28567 - }, - { - "epoch": 2.184223101477531, - "grad_norm": 0.0009154097642749548, - "learning_rate": 0.00019999764734304106, - "loss": 46.0, - "step": 28568 - }, - { - "epoch": 2.184299558460921, - "grad_norm": 0.0010483013466000557, - "learning_rate": 0.0001999976471782754, - "loss": 46.0, - "step": 28569 - }, - { - "epoch": 2.1843760154443106, - "grad_norm": 0.0007674329681321979, - "learning_rate": 0.00019999764701350396, - "loss": 46.0, - "step": 28570 - }, - { - "epoch": 2.1844524724277004, - "grad_norm": 0.0013115676119923592, - "learning_rate": 0.00019999764684872675, - "loss": 46.0, - "step": 28571 - }, - { - "epoch": 2.18452892941109, - "grad_norm": 0.0006922197644598782, - "learning_rate": 0.00019999764668394377, - "loss": 46.0, - "step": 28572 - }, - { - "epoch": 2.18460538639448, - "grad_norm": 0.0006658604834228754, - "learning_rate": 0.00019999764651915504, - "loss": 46.0, - "step": 28573 - }, - { - "epoch": 2.1846818433778696, - "grad_norm": 0.0010801381431519985, - "learning_rate": 0.0001999976463543605, - "loss": 46.0, - "step": 28574 - }, - { - "epoch": 2.1847583003612594, - "grad_norm": 0.0011674728011712432, - "learning_rate": 0.00019999764618956024, - "loss": 46.0, - "step": 28575 - }, - { - "epoch": 2.184834757344649, - "grad_norm": 0.0015218290500342846, - "learning_rate": 0.0001999976460247542, - "loss": 46.0, - "step": 28576 - }, - { - "epoch": 2.1849112143280385, - "grad_norm": 0.00114711734931916, - "learning_rate": 0.00019999764585994237, - "loss": 46.0, - "step": 28577 - }, - { - "epoch": 2.1849876713114282, - "grad_norm": 0.0013971454463899136, - "learning_rate": 0.00019999764569512477, - "loss": 46.0, - "step": 28578 - }, - { - "epoch": 2.185064128294818, - "grad_norm": 0.0008302445057779551, - "learning_rate": 0.00019999764553030143, - "loss": 46.0, - "step": 28579 - }, - { - "epoch": 2.1851405852782078, - "grad_norm": 0.004947067238390446, - "learning_rate": 0.0001999976453654723, - "loss": 46.0, - "step": 28580 - }, - { - "epoch": 2.1852170422615975, - "grad_norm": 0.0009899416472762823, - "learning_rate": 0.0001999976452006374, - "loss": 46.0, - "step": 28581 - }, - { - "epoch": 2.1852934992449873, - "grad_norm": 0.0005146818002685905, - "learning_rate": 0.00019999764503579674, - "loss": 46.0, - "step": 28582 - }, - { - "epoch": 2.185369956228377, - "grad_norm": 0.003303535282611847, - "learning_rate": 0.00019999764487095033, - "loss": 46.0, - "step": 28583 - }, - { - "epoch": 2.185446413211767, - "grad_norm": 0.0007727764314040542, - "learning_rate": 0.0001999976447060981, - "loss": 46.0, - "step": 28584 - }, - { - "epoch": 2.1855228701951566, - "grad_norm": 0.0003680314985103905, - "learning_rate": 0.0001999976445412401, - "loss": 46.0, - "step": 28585 - }, - { - "epoch": 2.1855993271785463, - "grad_norm": 0.0013785776682198048, - "learning_rate": 0.00019999764437637638, - "loss": 46.0, - "step": 28586 - }, - { - "epoch": 2.185675784161936, - "grad_norm": 0.0007073109154589474, - "learning_rate": 0.00019999764421150688, - "loss": 46.0, - "step": 28587 - }, - { - "epoch": 2.185752241145326, - "grad_norm": 0.0008490776526741683, - "learning_rate": 0.00019999764404663158, - "loss": 46.0, - "step": 28588 - }, - { - "epoch": 2.185828698128715, - "grad_norm": 0.0005670891259796917, - "learning_rate": 0.00019999764388175053, - "loss": 46.0, - "step": 28589 - }, - { - "epoch": 2.185905155112105, - "grad_norm": 0.0008611117373220623, - "learning_rate": 0.0001999976437168637, - "loss": 46.0, - "step": 28590 - }, - { - "epoch": 2.1859816120954947, - "grad_norm": 0.0037224180996418, - "learning_rate": 0.00019999764355197112, - "loss": 46.0, - "step": 28591 - }, - { - "epoch": 2.1860580690788844, - "grad_norm": 0.0009443138842470944, - "learning_rate": 0.00019999764338707277, - "loss": 46.0, - "step": 28592 - }, - { - "epoch": 2.186134526062274, - "grad_norm": 0.0009225825197063386, - "learning_rate": 0.00019999764322216863, - "loss": 46.0, - "step": 28593 - }, - { - "epoch": 2.186210983045664, - "grad_norm": 0.0017345858504995704, - "learning_rate": 0.00019999764305725874, - "loss": 46.0, - "step": 28594 - }, - { - "epoch": 2.1862874400290537, - "grad_norm": 0.0009863072773441672, - "learning_rate": 0.00019999764289234308, - "loss": 46.0, - "step": 28595 - }, - { - "epoch": 2.1863638970124435, - "grad_norm": 0.0015016990946605802, - "learning_rate": 0.00019999764272742162, - "loss": 46.0, - "step": 28596 - }, - { - "epoch": 2.1864403539958333, - "grad_norm": 0.0006458009593188763, - "learning_rate": 0.00019999764256249444, - "loss": 46.0, - "step": 28597 - }, - { - "epoch": 2.186516810979223, - "grad_norm": 0.0031094634905457497, - "learning_rate": 0.00019999764239756146, - "loss": 46.0, - "step": 28598 - }, - { - "epoch": 2.1865932679626123, - "grad_norm": 0.001075044390745461, - "learning_rate": 0.0001999976422326227, - "loss": 46.0, - "step": 28599 - }, - { - "epoch": 2.186669724946002, - "grad_norm": 0.002897440455853939, - "learning_rate": 0.0001999976420676782, - "loss": 46.0, - "step": 28600 - }, - { - "epoch": 2.186746181929392, - "grad_norm": 0.0005038680392317474, - "learning_rate": 0.00019999764190272793, - "loss": 46.0, - "step": 28601 - }, - { - "epoch": 2.1868226389127816, - "grad_norm": 0.0014954719226807356, - "learning_rate": 0.00019999764173777188, - "loss": 46.0, - "step": 28602 - }, - { - "epoch": 2.1868990958961714, - "grad_norm": 0.0006701614474877715, - "learning_rate": 0.00019999764157281006, - "loss": 46.0, - "step": 28603 - }, - { - "epoch": 2.186975552879561, - "grad_norm": 0.0012092175893485546, - "learning_rate": 0.00019999764140784246, - "loss": 46.0, - "step": 28604 - }, - { - "epoch": 2.187052009862951, - "grad_norm": 0.0021120295859873295, - "learning_rate": 0.0001999976412428691, - "loss": 46.0, - "step": 28605 - }, - { - "epoch": 2.1871284668463407, - "grad_norm": 0.0011620563454926014, - "learning_rate": 0.00019999764107788995, - "loss": 46.0, - "step": 28606 - }, - { - "epoch": 2.1872049238297304, - "grad_norm": 0.0010778659489005804, - "learning_rate": 0.00019999764091290507, - "loss": 46.0, - "step": 28607 - }, - { - "epoch": 2.18728138081312, - "grad_norm": 0.0008230854291468859, - "learning_rate": 0.0001999976407479144, - "loss": 46.0, - "step": 28608 - }, - { - "epoch": 2.18735783779651, - "grad_norm": 0.003926537930965424, - "learning_rate": 0.00019999764058291797, - "loss": 46.0, - "step": 28609 - }, - { - "epoch": 2.1874342947798993, - "grad_norm": 0.0014673509867861867, - "learning_rate": 0.00019999764041791576, - "loss": 46.0, - "step": 28610 - }, - { - "epoch": 2.187510751763289, - "grad_norm": 0.0023358173202723265, - "learning_rate": 0.00019999764025290778, - "loss": 46.0, - "step": 28611 - }, - { - "epoch": 2.187587208746679, - "grad_norm": 0.0008677556761540473, - "learning_rate": 0.00019999764008789403, - "loss": 46.0, - "step": 28612 - }, - { - "epoch": 2.1876636657300685, - "grad_norm": 0.002461050171405077, - "learning_rate": 0.00019999763992287453, - "loss": 46.0, - "step": 28613 - }, - { - "epoch": 2.1877401227134583, - "grad_norm": 0.0013192816404625773, - "learning_rate": 0.00019999763975784925, - "loss": 46.0, - "step": 28614 - }, - { - "epoch": 2.187816579696848, - "grad_norm": 0.000389408553019166, - "learning_rate": 0.00019999763959281818, - "loss": 46.0, - "step": 28615 - }, - { - "epoch": 2.187893036680238, - "grad_norm": 0.00046909047523513436, - "learning_rate": 0.00019999763942778138, - "loss": 46.0, - "step": 28616 - }, - { - "epoch": 2.1879694936636276, - "grad_norm": 0.0029299843590706587, - "learning_rate": 0.0001999976392627388, - "loss": 46.0, - "step": 28617 - }, - { - "epoch": 2.1880459506470173, - "grad_norm": 0.0012349144089967012, - "learning_rate": 0.0001999976390976904, - "loss": 46.0, - "step": 28618 - }, - { - "epoch": 2.188122407630407, - "grad_norm": 0.0012928008800372481, - "learning_rate": 0.00019999763893263628, - "loss": 46.0, - "step": 28619 - }, - { - "epoch": 2.188198864613797, - "grad_norm": 0.0014898685039952397, - "learning_rate": 0.00019999763876757637, - "loss": 46.0, - "step": 28620 - }, - { - "epoch": 2.188275321597186, - "grad_norm": 0.0006685965345241129, - "learning_rate": 0.0001999976386025107, - "loss": 46.0, - "step": 28621 - }, - { - "epoch": 2.188351778580576, - "grad_norm": 0.0011451664613559842, - "learning_rate": 0.00019999763843743927, - "loss": 46.0, - "step": 28622 - }, - { - "epoch": 2.1884282355639657, - "grad_norm": 0.0012365257134661078, - "learning_rate": 0.00019999763827236206, - "loss": 46.0, - "step": 28623 - }, - { - "epoch": 2.1885046925473555, - "grad_norm": 0.0016647852025926113, - "learning_rate": 0.00019999763810727908, - "loss": 46.0, - "step": 28624 - }, - { - "epoch": 2.1885811495307452, - "grad_norm": 0.0007989972946234047, - "learning_rate": 0.00019999763794219036, - "loss": 46.0, - "step": 28625 - }, - { - "epoch": 2.188657606514135, - "grad_norm": 0.0036217966116964817, - "learning_rate": 0.00019999763777709583, - "loss": 46.0, - "step": 28626 - }, - { - "epoch": 2.1887340634975248, - "grad_norm": 0.0012161037884652615, - "learning_rate": 0.00019999763761199555, - "loss": 46.0, - "step": 28627 - }, - { - "epoch": 2.1888105204809145, - "grad_norm": 0.0006489575025625527, - "learning_rate": 0.00019999763744688948, - "loss": 46.0, - "step": 28628 - }, - { - "epoch": 2.1888869774643043, - "grad_norm": 0.0014411701122298837, - "learning_rate": 0.00019999763728177766, - "loss": 46.0, - "step": 28629 - }, - { - "epoch": 2.188963434447694, - "grad_norm": 0.0010443803621456027, - "learning_rate": 0.0001999976371166601, - "loss": 46.0, - "step": 28630 - }, - { - "epoch": 2.189039891431084, - "grad_norm": 0.015116575174033642, - "learning_rate": 0.0001999976369515367, - "loss": 46.0, - "step": 28631 - }, - { - "epoch": 2.189116348414473, - "grad_norm": 0.0015881045255810022, - "learning_rate": 0.00019999763678640758, - "loss": 46.0, - "step": 28632 - }, - { - "epoch": 2.189192805397863, - "grad_norm": 0.00038838779437355697, - "learning_rate": 0.00019999763662127267, - "loss": 46.0, - "step": 28633 - }, - { - "epoch": 2.1892692623812526, - "grad_norm": 0.0012234424939379096, - "learning_rate": 0.000199997636456132, - "loss": 46.0, - "step": 28634 - }, - { - "epoch": 2.1893457193646424, - "grad_norm": 0.004736605565994978, - "learning_rate": 0.00019999763629098557, - "loss": 46.0, - "step": 28635 - }, - { - "epoch": 2.189422176348032, - "grad_norm": 0.0003342684358358383, - "learning_rate": 0.00019999763612583337, - "loss": 46.0, - "step": 28636 - }, - { - "epoch": 2.189498633331422, - "grad_norm": 0.0005885021528229117, - "learning_rate": 0.0001999976359606754, - "loss": 46.0, - "step": 28637 - }, - { - "epoch": 2.1895750903148117, - "grad_norm": 0.0011080928379669785, - "learning_rate": 0.00019999763579551163, - "loss": 46.0, - "step": 28638 - }, - { - "epoch": 2.1896515472982014, - "grad_norm": 0.0019398547010496259, - "learning_rate": 0.00019999763563034213, - "loss": 46.0, - "step": 28639 - }, - { - "epoch": 2.189728004281591, - "grad_norm": 0.0007282826700247824, - "learning_rate": 0.00019999763546516683, - "loss": 46.0, - "step": 28640 - }, - { - "epoch": 2.189804461264981, - "grad_norm": 0.0006332109333015978, - "learning_rate": 0.00019999763529998576, - "loss": 46.0, - "step": 28641 - }, - { - "epoch": 2.1898809182483703, - "grad_norm": 0.0010016696760430932, - "learning_rate": 0.00019999763513479894, - "loss": 46.0, - "step": 28642 - }, - { - "epoch": 2.18995737523176, - "grad_norm": 0.002773863961920142, - "learning_rate": 0.00019999763496960634, - "loss": 46.0, - "step": 28643 - }, - { - "epoch": 2.19003383221515, - "grad_norm": 0.0007875406881794333, - "learning_rate": 0.000199997634804408, - "loss": 46.0, - "step": 28644 - }, - { - "epoch": 2.1901102891985396, - "grad_norm": 0.0017082927515730262, - "learning_rate": 0.00019999763463920384, - "loss": 46.0, - "step": 28645 - }, - { - "epoch": 2.1901867461819293, - "grad_norm": 0.0016531051369383931, - "learning_rate": 0.00019999763447399395, - "loss": 46.0, - "step": 28646 - }, - { - "epoch": 2.190263203165319, - "grad_norm": 0.0011235586134716868, - "learning_rate": 0.0001999976343087783, - "loss": 46.0, - "step": 28647 - }, - { - "epoch": 2.190339660148709, - "grad_norm": 0.0009194666054099798, - "learning_rate": 0.00019999763414355686, - "loss": 46.0, - "step": 28648 - }, - { - "epoch": 2.1904161171320986, - "grad_norm": 0.0006754841306246817, - "learning_rate": 0.00019999763397832965, - "loss": 46.0, - "step": 28649 - }, - { - "epoch": 2.1904925741154884, - "grad_norm": 0.0021153809502720833, - "learning_rate": 0.00019999763381309667, - "loss": 46.0, - "step": 28650 - }, - { - "epoch": 2.190569031098878, - "grad_norm": 0.0015513041289523244, - "learning_rate": 0.00019999763364785791, - "loss": 46.0, - "step": 28651 - }, - { - "epoch": 2.190645488082268, - "grad_norm": 0.0005015857750549912, - "learning_rate": 0.0001999976334826134, - "loss": 46.0, - "step": 28652 - }, - { - "epoch": 2.1907219450656576, - "grad_norm": 0.0014262673212215304, - "learning_rate": 0.0001999976333173631, - "loss": 46.0, - "step": 28653 - }, - { - "epoch": 2.190798402049047, - "grad_norm": 0.0010433244751766324, - "learning_rate": 0.00019999763315210704, - "loss": 46.0, - "step": 28654 - }, - { - "epoch": 2.1908748590324367, - "grad_norm": 0.0009178290492855012, - "learning_rate": 0.00019999763298684522, - "loss": 46.0, - "step": 28655 - }, - { - "epoch": 2.1909513160158265, - "grad_norm": 0.0038189892657101154, - "learning_rate": 0.00019999763282157763, - "loss": 46.0, - "step": 28656 - }, - { - "epoch": 2.1910277729992162, - "grad_norm": 0.013782191090285778, - "learning_rate": 0.00019999763265630426, - "loss": 46.0, - "step": 28657 - }, - { - "epoch": 2.191104229982606, - "grad_norm": 0.0006278374930843711, - "learning_rate": 0.00019999763249102512, - "loss": 46.0, - "step": 28658 - }, - { - "epoch": 2.1911806869659958, - "grad_norm": 0.0021108014043420553, - "learning_rate": 0.00019999763232574023, - "loss": 46.0, - "step": 28659 - }, - { - "epoch": 2.1912571439493855, - "grad_norm": 0.006868427619338036, - "learning_rate": 0.00019999763216044952, - "loss": 46.0, - "step": 28660 - }, - { - "epoch": 2.1913336009327753, - "grad_norm": 0.0005290281842462718, - "learning_rate": 0.0001999976319951531, - "loss": 46.0, - "step": 28661 - }, - { - "epoch": 2.191410057916165, - "grad_norm": 0.002222876762971282, - "learning_rate": 0.0001999976318298509, - "loss": 46.0, - "step": 28662 - }, - { - "epoch": 2.191486514899555, - "grad_norm": 0.0008497189846821129, - "learning_rate": 0.00019999763166454293, - "loss": 46.0, - "step": 28663 - }, - { - "epoch": 2.191562971882944, - "grad_norm": 0.0006918266881257296, - "learning_rate": 0.00019999763149922917, - "loss": 46.0, - "step": 28664 - }, - { - "epoch": 2.191639428866334, - "grad_norm": 0.0006093996344134212, - "learning_rate": 0.00019999763133390965, - "loss": 46.0, - "step": 28665 - }, - { - "epoch": 2.1917158858497237, - "grad_norm": 0.0003278770309407264, - "learning_rate": 0.00019999763116858437, - "loss": 46.0, - "step": 28666 - }, - { - "epoch": 2.1917923428331134, - "grad_norm": 0.0008522662683390081, - "learning_rate": 0.0001999976310032533, - "loss": 46.0, - "step": 28667 - }, - { - "epoch": 2.191868799816503, - "grad_norm": 0.0012082374887540936, - "learning_rate": 0.00019999763083791648, - "loss": 46.0, - "step": 28668 - }, - { - "epoch": 2.191945256799893, - "grad_norm": 0.0005280483746901155, - "learning_rate": 0.0001999976306725739, - "loss": 46.0, - "step": 28669 - }, - { - "epoch": 2.1920217137832827, - "grad_norm": 0.0009032447705976665, - "learning_rate": 0.00019999763050722552, - "loss": 46.0, - "step": 28670 - }, - { - "epoch": 2.1920981707666725, - "grad_norm": 0.0006261604139581323, - "learning_rate": 0.00019999763034187138, - "loss": 46.0, - "step": 28671 - }, - { - "epoch": 2.192174627750062, - "grad_norm": 0.0012686256086453795, - "learning_rate": 0.00019999763017651147, - "loss": 46.0, - "step": 28672 - }, - { - "epoch": 2.192251084733452, - "grad_norm": 0.0008066724985837936, - "learning_rate": 0.0001999976300111458, - "loss": 46.0, - "step": 28673 - }, - { - "epoch": 2.1923275417168417, - "grad_norm": 0.0007947124540805817, - "learning_rate": 0.00019999762984577438, - "loss": 46.0, - "step": 28674 - }, - { - "epoch": 2.1924039987002315, - "grad_norm": 0.002620997140184045, - "learning_rate": 0.00019999762968039714, - "loss": 46.0, - "step": 28675 - }, - { - "epoch": 2.192480455683621, - "grad_norm": 0.0011536844540387392, - "learning_rate": 0.00019999762951501416, - "loss": 46.0, - "step": 28676 - }, - { - "epoch": 2.1925569126670106, - "grad_norm": 0.0018091228557750583, - "learning_rate": 0.00019999762934962544, - "loss": 46.0, - "step": 28677 - }, - { - "epoch": 2.1926333696504003, - "grad_norm": 0.002807512879371643, - "learning_rate": 0.00019999762918423088, - "loss": 46.0, - "step": 28678 - }, - { - "epoch": 2.19270982663379, - "grad_norm": 0.0012814572546631098, - "learning_rate": 0.0001999976290188306, - "loss": 46.0, - "step": 28679 - }, - { - "epoch": 2.19278628361718, - "grad_norm": 0.0015938919968903065, - "learning_rate": 0.00019999762885342454, - "loss": 46.0, - "step": 28680 - }, - { - "epoch": 2.1928627406005696, - "grad_norm": 0.0003120073233731091, - "learning_rate": 0.00019999762868801272, - "loss": 46.0, - "step": 28681 - }, - { - "epoch": 2.1929391975839594, - "grad_norm": 0.0014784664381295443, - "learning_rate": 0.00019999762852259512, - "loss": 46.0, - "step": 28682 - }, - { - "epoch": 2.193015654567349, - "grad_norm": 0.00027768261497840285, - "learning_rate": 0.00019999762835717176, - "loss": 46.0, - "step": 28683 - }, - { - "epoch": 2.193092111550739, - "grad_norm": 0.0013586970744654536, - "learning_rate": 0.00019999762819174262, - "loss": 46.0, - "step": 28684 - }, - { - "epoch": 2.1931685685341287, - "grad_norm": 0.0025398428551852703, - "learning_rate": 0.0001999976280263077, - "loss": 46.0, - "step": 28685 - }, - { - "epoch": 2.193245025517518, - "grad_norm": 0.0019675600342452526, - "learning_rate": 0.00019999762786086702, - "loss": 46.0, - "step": 28686 - }, - { - "epoch": 2.1933214825009077, - "grad_norm": 0.0013736619148403406, - "learning_rate": 0.0001999976276954206, - "loss": 46.0, - "step": 28687 - }, - { - "epoch": 2.1933979394842975, - "grad_norm": 0.0007126835407689214, - "learning_rate": 0.00019999762752996838, - "loss": 46.0, - "step": 28688 - }, - { - "epoch": 2.1934743964676873, - "grad_norm": 0.0012680370127782226, - "learning_rate": 0.0001999976273645104, - "loss": 46.0, - "step": 28689 - }, - { - "epoch": 2.193550853451077, - "grad_norm": 0.0031738809775561094, - "learning_rate": 0.00019999762719904665, - "loss": 46.0, - "step": 28690 - }, - { - "epoch": 2.193627310434467, - "grad_norm": 0.0006710441666655242, - "learning_rate": 0.00019999762703357712, - "loss": 46.0, - "step": 28691 - }, - { - "epoch": 2.1937037674178566, - "grad_norm": 0.0011365828104317188, - "learning_rate": 0.00019999762686810182, - "loss": 46.0, - "step": 28692 - }, - { - "epoch": 2.1937802244012463, - "grad_norm": 0.008483314886689186, - "learning_rate": 0.00019999762670262078, - "loss": 46.0, - "step": 28693 - }, - { - "epoch": 2.193856681384636, - "grad_norm": 0.0013568695867434144, - "learning_rate": 0.00019999762653713393, - "loss": 46.0, - "step": 28694 - }, - { - "epoch": 2.193933138368026, - "grad_norm": 0.0020197569392621517, - "learning_rate": 0.00019999762637164134, - "loss": 46.0, - "step": 28695 - }, - { - "epoch": 2.1940095953514156, - "grad_norm": 0.0010159629164263606, - "learning_rate": 0.00019999762620614297, - "loss": 46.0, - "step": 28696 - }, - { - "epoch": 2.1940860523348054, - "grad_norm": 0.0005594915128313005, - "learning_rate": 0.00019999762604063883, - "loss": 46.0, - "step": 28697 - }, - { - "epoch": 2.1941625093181947, - "grad_norm": 0.0014561618445441127, - "learning_rate": 0.00019999762587512895, - "loss": 46.0, - "step": 28698 - }, - { - "epoch": 2.1942389663015844, - "grad_norm": 0.0015211966820061207, - "learning_rate": 0.00019999762570961326, - "loss": 46.0, - "step": 28699 - }, - { - "epoch": 2.194315423284974, - "grad_norm": 0.0023055304773151875, - "learning_rate": 0.0001999976255440918, - "loss": 46.0, - "step": 28700 - }, - { - "epoch": 2.194391880268364, - "grad_norm": 0.0005494029610417783, - "learning_rate": 0.0001999976253785646, - "loss": 46.0, - "step": 28701 - }, - { - "epoch": 2.1944683372517537, - "grad_norm": 0.0007750372169539332, - "learning_rate": 0.0001999976252130316, - "loss": 46.0, - "step": 28702 - }, - { - "epoch": 2.1945447942351435, - "grad_norm": 0.0037118406035006046, - "learning_rate": 0.00019999762504749287, - "loss": 46.0, - "step": 28703 - }, - { - "epoch": 2.1946212512185332, - "grad_norm": 0.00036844678106717765, - "learning_rate": 0.00019999762488194834, - "loss": 46.0, - "step": 28704 - }, - { - "epoch": 2.194697708201923, - "grad_norm": 0.0020455995108932257, - "learning_rate": 0.00019999762471639804, - "loss": 46.0, - "step": 28705 - }, - { - "epoch": 2.1947741651853128, - "grad_norm": 0.0008475796785205603, - "learning_rate": 0.000199997624550842, - "loss": 46.0, - "step": 28706 - }, - { - "epoch": 2.1948506221687025, - "grad_norm": 0.0011455765925347805, - "learning_rate": 0.00019999762438528015, - "loss": 46.0, - "step": 28707 - }, - { - "epoch": 2.194927079152092, - "grad_norm": 0.0022386317141354084, - "learning_rate": 0.00019999762421971256, - "loss": 46.0, - "step": 28708 - }, - { - "epoch": 2.1950035361354816, - "grad_norm": 0.0007858487660996616, - "learning_rate": 0.0001999976240541392, - "loss": 46.0, - "step": 28709 - }, - { - "epoch": 2.1950799931188714, - "grad_norm": 0.0014285441720858216, - "learning_rate": 0.00019999762388856005, - "loss": 46.0, - "step": 28710 - }, - { - "epoch": 2.195156450102261, - "grad_norm": 0.0006505746277980506, - "learning_rate": 0.0001999976237229751, - "loss": 46.0, - "step": 28711 - }, - { - "epoch": 2.195232907085651, - "grad_norm": 0.0007188899908214808, - "learning_rate": 0.00019999762355738448, - "loss": 46.0, - "step": 28712 - }, - { - "epoch": 2.1953093640690406, - "grad_norm": 0.002463469048961997, - "learning_rate": 0.000199997623391788, - "loss": 46.0, - "step": 28713 - }, - { - "epoch": 2.1953858210524304, - "grad_norm": 0.0006235355394892395, - "learning_rate": 0.00019999762322618582, - "loss": 46.0, - "step": 28714 - }, - { - "epoch": 2.19546227803582, - "grad_norm": 0.004243816249072552, - "learning_rate": 0.0001999976230605778, - "loss": 46.0, - "step": 28715 - }, - { - "epoch": 2.19553873501921, - "grad_norm": 0.0006481556920334697, - "learning_rate": 0.00019999762289496406, - "loss": 46.0, - "step": 28716 - }, - { - "epoch": 2.1956151920025997, - "grad_norm": 0.0009381757699884474, - "learning_rate": 0.00019999762272934453, - "loss": 46.0, - "step": 28717 - }, - { - "epoch": 2.1956916489859895, - "grad_norm": 0.0008142460137605667, - "learning_rate": 0.00019999762256371924, - "loss": 46.0, - "step": 28718 - }, - { - "epoch": 2.195768105969379, - "grad_norm": 0.008301754482090473, - "learning_rate": 0.00019999762239808816, - "loss": 46.0, - "step": 28719 - }, - { - "epoch": 2.1958445629527685, - "grad_norm": 0.0015477570705115795, - "learning_rate": 0.00019999762223245134, - "loss": 46.0, - "step": 28720 - }, - { - "epoch": 2.1959210199361583, - "grad_norm": 0.0016006340738385916, - "learning_rate": 0.00019999762206680875, - "loss": 46.0, - "step": 28721 - }, - { - "epoch": 2.195997476919548, - "grad_norm": 0.0005535492091439664, - "learning_rate": 0.0001999976219011604, - "loss": 46.0, - "step": 28722 - }, - { - "epoch": 2.196073933902938, - "grad_norm": 0.004451784770935774, - "learning_rate": 0.00019999762173550622, - "loss": 46.0, - "step": 28723 - }, - { - "epoch": 2.1961503908863276, - "grad_norm": 0.0018984171329066157, - "learning_rate": 0.00019999762156984634, - "loss": 46.0, - "step": 28724 - }, - { - "epoch": 2.1962268478697173, - "grad_norm": 0.0003904541954398155, - "learning_rate": 0.00019999762140418065, - "loss": 46.0, - "step": 28725 - }, - { - "epoch": 2.196303304853107, - "grad_norm": 0.0025313321966677904, - "learning_rate": 0.0001999976212385092, - "loss": 46.0, - "step": 28726 - }, - { - "epoch": 2.196379761836497, - "grad_norm": 0.00069714046549052, - "learning_rate": 0.000199997621072832, - "loss": 46.0, - "step": 28727 - }, - { - "epoch": 2.1964562188198866, - "grad_norm": 0.0022827403154224157, - "learning_rate": 0.000199997620907149, - "loss": 46.0, - "step": 28728 - }, - { - "epoch": 2.1965326758032764, - "grad_norm": 0.0012537987204268575, - "learning_rate": 0.00019999762074146026, - "loss": 46.0, - "step": 28729 - }, - { - "epoch": 2.1966091327866657, - "grad_norm": 0.00026411254657432437, - "learning_rate": 0.00019999762057576573, - "loss": 46.0, - "step": 28730 - }, - { - "epoch": 2.1966855897700555, - "grad_norm": 0.0024505213368684053, - "learning_rate": 0.00019999762041006543, - "loss": 46.0, - "step": 28731 - }, - { - "epoch": 2.196762046753445, - "grad_norm": 0.004176511894911528, - "learning_rate": 0.00019999762024435936, - "loss": 46.0, - "step": 28732 - }, - { - "epoch": 2.196838503736835, - "grad_norm": 0.0022932779975235462, - "learning_rate": 0.00019999762007864752, - "loss": 46.0, - "step": 28733 - }, - { - "epoch": 2.1969149607202247, - "grad_norm": 0.0028575395699590445, - "learning_rate": 0.00019999761991292992, - "loss": 46.0, - "step": 28734 - }, - { - "epoch": 2.1969914177036145, - "grad_norm": 0.0026604263111948967, - "learning_rate": 0.00019999761974720656, - "loss": 46.0, - "step": 28735 - }, - { - "epoch": 2.1970678746870043, - "grad_norm": 0.0012218424817547202, - "learning_rate": 0.0001999976195814774, - "loss": 46.0, - "step": 28736 - }, - { - "epoch": 2.197144331670394, - "grad_norm": 0.00360929686576128, - "learning_rate": 0.0001999976194157425, - "loss": 46.0, - "step": 28737 - }, - { - "epoch": 2.197220788653784, - "grad_norm": 0.001043299213051796, - "learning_rate": 0.00019999761925000185, - "loss": 46.0, - "step": 28738 - }, - { - "epoch": 2.1972972456371735, - "grad_norm": 0.00041641900315880775, - "learning_rate": 0.0001999976190842554, - "loss": 46.0, - "step": 28739 - }, - { - "epoch": 2.1973737026205633, - "grad_norm": 0.0025470878463238478, - "learning_rate": 0.00019999761891850316, - "loss": 46.0, - "step": 28740 - }, - { - "epoch": 2.1974501596039526, - "grad_norm": 0.002362113678827882, - "learning_rate": 0.00019999761875274516, - "loss": 46.0, - "step": 28741 - }, - { - "epoch": 2.1975266165873424, - "grad_norm": 0.004090269096195698, - "learning_rate": 0.0001999976185869814, - "loss": 46.0, - "step": 28742 - }, - { - "epoch": 2.197603073570732, - "grad_norm": 0.002057581441476941, - "learning_rate": 0.0001999976184212119, - "loss": 46.0, - "step": 28743 - }, - { - "epoch": 2.197679530554122, - "grad_norm": 0.0011703399941325188, - "learning_rate": 0.0001999976182554366, - "loss": 46.0, - "step": 28744 - }, - { - "epoch": 2.1977559875375117, - "grad_norm": 0.002911994932219386, - "learning_rate": 0.00019999761808965554, - "loss": 46.0, - "step": 28745 - }, - { - "epoch": 2.1978324445209014, - "grad_norm": 0.0007452918798662722, - "learning_rate": 0.0001999976179238687, - "loss": 46.0, - "step": 28746 - }, - { - "epoch": 2.197908901504291, - "grad_norm": 0.0007457157480530441, - "learning_rate": 0.0001999976177580761, - "loss": 46.0, - "step": 28747 - }, - { - "epoch": 2.197985358487681, - "grad_norm": 0.0027052657678723335, - "learning_rate": 0.0001999976175922777, - "loss": 46.0, - "step": 28748 - }, - { - "epoch": 2.1980618154710707, - "grad_norm": 0.0007662579300813377, - "learning_rate": 0.00019999761742647357, - "loss": 46.0, - "step": 28749 - }, - { - "epoch": 2.1981382724544605, - "grad_norm": 0.001998987514525652, - "learning_rate": 0.0001999976172606637, - "loss": 46.0, - "step": 28750 - }, - { - "epoch": 2.1982147294378502, - "grad_norm": 0.0020735764410346746, - "learning_rate": 0.00019999761709484798, - "loss": 46.0, - "step": 28751 - }, - { - "epoch": 2.1982911864212396, - "grad_norm": 0.0017626654589548707, - "learning_rate": 0.00019999761692902654, - "loss": 46.0, - "step": 28752 - }, - { - "epoch": 2.1983676434046293, - "grad_norm": 0.001942440983839333, - "learning_rate": 0.0001999976167631993, - "loss": 46.0, - "step": 28753 - }, - { - "epoch": 2.198444100388019, - "grad_norm": 0.0012866543838754296, - "learning_rate": 0.00019999761659736634, - "loss": 46.0, - "step": 28754 - }, - { - "epoch": 2.198520557371409, - "grad_norm": 0.001886275364086032, - "learning_rate": 0.00019999761643152758, - "loss": 46.0, - "step": 28755 - }, - { - "epoch": 2.1985970143547986, - "grad_norm": 0.002110939472913742, - "learning_rate": 0.00019999761626568306, - "loss": 46.0, - "step": 28756 - }, - { - "epoch": 2.1986734713381884, - "grad_norm": 0.00474103819578886, - "learning_rate": 0.00019999761609983273, - "loss": 46.0, - "step": 28757 - }, - { - "epoch": 2.198749928321578, - "grad_norm": 0.0020059135276824236, - "learning_rate": 0.0001999976159339767, - "loss": 46.0, - "step": 28758 - }, - { - "epoch": 2.198826385304968, - "grad_norm": 0.007026014383882284, - "learning_rate": 0.00019999761576811485, - "loss": 46.0, - "step": 28759 - }, - { - "epoch": 2.1989028422883576, - "grad_norm": 0.000674641109071672, - "learning_rate": 0.00019999761560224725, - "loss": 46.0, - "step": 28760 - }, - { - "epoch": 2.1989792992717474, - "grad_norm": 0.00117670523468405, - "learning_rate": 0.00019999761543637386, - "loss": 46.0, - "step": 28761 - }, - { - "epoch": 2.199055756255137, - "grad_norm": 0.00044244524906389415, - "learning_rate": 0.00019999761527049473, - "loss": 46.0, - "step": 28762 - }, - { - "epoch": 2.1991322132385265, - "grad_norm": 0.00045616025454364717, - "learning_rate": 0.00019999761510460981, - "loss": 46.0, - "step": 28763 - }, - { - "epoch": 2.1992086702219162, - "grad_norm": 0.001300415606237948, - "learning_rate": 0.00019999761493871913, - "loss": 46.0, - "step": 28764 - }, - { - "epoch": 2.199285127205306, - "grad_norm": 0.0009729649173095822, - "learning_rate": 0.00019999761477282267, - "loss": 46.0, - "step": 28765 - }, - { - "epoch": 2.1993615841886958, - "grad_norm": 0.010112939402461052, - "learning_rate": 0.00019999761460692047, - "loss": 46.0, - "step": 28766 - }, - { - "epoch": 2.1994380411720855, - "grad_norm": 0.0027120918966829777, - "learning_rate": 0.0001999976144410125, - "loss": 46.0, - "step": 28767 - }, - { - "epoch": 2.1995144981554753, - "grad_norm": 0.0008452770416624844, - "learning_rate": 0.00019999761427509874, - "loss": 46.0, - "step": 28768 - }, - { - "epoch": 2.199590955138865, - "grad_norm": 0.0013440067414194345, - "learning_rate": 0.0001999976141091792, - "loss": 46.0, - "step": 28769 - }, - { - "epoch": 2.199667412122255, - "grad_norm": 0.0008116482640616596, - "learning_rate": 0.0001999976139432539, - "loss": 46.0, - "step": 28770 - }, - { - "epoch": 2.1997438691056446, - "grad_norm": 0.002448539948090911, - "learning_rate": 0.00019999761377732282, - "loss": 46.0, - "step": 28771 - }, - { - "epoch": 2.1998203260890343, - "grad_norm": 0.0006727167638018727, - "learning_rate": 0.000199997613611386, - "loss": 46.0, - "step": 28772 - }, - { - "epoch": 2.1998967830724236, - "grad_norm": 0.002355123171582818, - "learning_rate": 0.00019999761344544338, - "loss": 46.0, - "step": 28773 - }, - { - "epoch": 2.1999732400558134, - "grad_norm": 0.0008522709831595421, - "learning_rate": 0.000199997613279495, - "loss": 46.0, - "step": 28774 - }, - { - "epoch": 2.200049697039203, - "grad_norm": 0.0040439129807055, - "learning_rate": 0.00019999761311354086, - "loss": 46.0, - "step": 28775 - }, - { - "epoch": 2.200126154022593, - "grad_norm": 0.0015502751339226961, - "learning_rate": 0.00019999761294758095, - "loss": 46.0, - "step": 28776 - }, - { - "epoch": 2.2002026110059827, - "grad_norm": 0.0007321261800825596, - "learning_rate": 0.00019999761278161526, - "loss": 46.0, - "step": 28777 - }, - { - "epoch": 2.2002790679893725, - "grad_norm": 0.002785300835967064, - "learning_rate": 0.00019999761261564383, - "loss": 46.0, - "step": 28778 - }, - { - "epoch": 2.200355524972762, - "grad_norm": 0.0008571309153921902, - "learning_rate": 0.00019999761244966663, - "loss": 46.0, - "step": 28779 - }, - { - "epoch": 2.200431981956152, - "grad_norm": 0.005570200737565756, - "learning_rate": 0.00019999761228368362, - "loss": 46.0, - "step": 28780 - }, - { - "epoch": 2.2005084389395417, - "grad_norm": 0.0031015167478471994, - "learning_rate": 0.00019999761211769485, - "loss": 46.0, - "step": 28781 - }, - { - "epoch": 2.2005848959229315, - "grad_norm": 0.0013768210774287581, - "learning_rate": 0.00019999761195170032, - "loss": 46.0, - "step": 28782 - }, - { - "epoch": 2.2006613529063213, - "grad_norm": 0.000893348129466176, - "learning_rate": 0.00019999761178570002, - "loss": 46.0, - "step": 28783 - }, - { - "epoch": 2.200737809889711, - "grad_norm": 0.0005879162927158177, - "learning_rate": 0.00019999761161969395, - "loss": 46.0, - "step": 28784 - }, - { - "epoch": 2.2008142668731003, - "grad_norm": 0.0035728267394006252, - "learning_rate": 0.0001999976114536821, - "loss": 46.0, - "step": 28785 - }, - { - "epoch": 2.20089072385649, - "grad_norm": 0.0007705323514528573, - "learning_rate": 0.00019999761128766452, - "loss": 46.0, - "step": 28786 - }, - { - "epoch": 2.20096718083988, - "grad_norm": 0.0020545287989079952, - "learning_rate": 0.00019999761112164116, - "loss": 46.0, - "step": 28787 - }, - { - "epoch": 2.2010436378232696, - "grad_norm": 0.0018090162193402648, - "learning_rate": 0.000199997610955612, - "loss": 46.0, - "step": 28788 - }, - { - "epoch": 2.2011200948066594, - "grad_norm": 0.005108046345412731, - "learning_rate": 0.00019999761078957708, - "loss": 46.0, - "step": 28789 - }, - { - "epoch": 2.201196551790049, - "grad_norm": 0.0011435793712735176, - "learning_rate": 0.0001999976106235364, - "loss": 46.0, - "step": 28790 - }, - { - "epoch": 2.201273008773439, - "grad_norm": 0.001082267495803535, - "learning_rate": 0.0001999976104574899, - "loss": 46.0, - "step": 28791 - }, - { - "epoch": 2.2013494657568287, - "grad_norm": 0.004285803530365229, - "learning_rate": 0.00019999761029143774, - "loss": 46.0, - "step": 28792 - }, - { - "epoch": 2.2014259227402184, - "grad_norm": 0.0010516910115256906, - "learning_rate": 0.00019999761012537973, - "loss": 46.0, - "step": 28793 - }, - { - "epoch": 2.201502379723608, - "grad_norm": 0.005702539347112179, - "learning_rate": 0.00019999760995931598, - "loss": 46.0, - "step": 28794 - }, - { - "epoch": 2.2015788367069975, - "grad_norm": 0.006272371392697096, - "learning_rate": 0.00019999760979324643, - "loss": 46.0, - "step": 28795 - }, - { - "epoch": 2.2016552936903873, - "grad_norm": 0.00568053312599659, - "learning_rate": 0.00019999760962717113, - "loss": 46.0, - "step": 28796 - }, - { - "epoch": 2.201731750673777, - "grad_norm": 0.0018560357857495546, - "learning_rate": 0.00019999760946109006, - "loss": 46.0, - "step": 28797 - }, - { - "epoch": 2.201808207657167, - "grad_norm": 0.0004826985823456198, - "learning_rate": 0.00019999760929500325, - "loss": 46.0, - "step": 28798 - }, - { - "epoch": 2.2018846646405565, - "grad_norm": 0.00208675442263484, - "learning_rate": 0.00019999760912891063, - "loss": 46.0, - "step": 28799 - }, - { - "epoch": 2.2019611216239463, - "grad_norm": 0.00037602230440825224, - "learning_rate": 0.00019999760896281227, - "loss": 46.0, - "step": 28800 - }, - { - "epoch": 2.202037578607336, - "grad_norm": 0.0011239483719691634, - "learning_rate": 0.0001999976087967081, - "loss": 46.0, - "step": 28801 - }, - { - "epoch": 2.202114035590726, - "grad_norm": 0.0009544836357235909, - "learning_rate": 0.00019999760863059817, - "loss": 46.0, - "step": 28802 - }, - { - "epoch": 2.2021904925741156, - "grad_norm": 0.0008614493417553604, - "learning_rate": 0.0001999976084644825, - "loss": 46.0, - "step": 28803 - }, - { - "epoch": 2.2022669495575053, - "grad_norm": 0.0009722508257254958, - "learning_rate": 0.00019999760829836105, - "loss": 46.0, - "step": 28804 - }, - { - "epoch": 2.202343406540895, - "grad_norm": 0.0010264882585033774, - "learning_rate": 0.0001999976081322338, - "loss": 46.0, - "step": 28805 - }, - { - "epoch": 2.202419863524285, - "grad_norm": 0.005427997559309006, - "learning_rate": 0.00019999760796610082, - "loss": 46.0, - "step": 28806 - }, - { - "epoch": 2.202496320507674, - "grad_norm": 0.0008990340866148472, - "learning_rate": 0.00019999760779996207, - "loss": 46.0, - "step": 28807 - }, - { - "epoch": 2.202572777491064, - "grad_norm": 0.001176820253022015, - "learning_rate": 0.00019999760763381752, - "loss": 46.0, - "step": 28808 - }, - { - "epoch": 2.2026492344744537, - "grad_norm": 0.0008145632455125451, - "learning_rate": 0.00019999760746766722, - "loss": 46.0, - "step": 28809 - }, - { - "epoch": 2.2027256914578435, - "grad_norm": 0.0014016643399372697, - "learning_rate": 0.00019999760730151115, - "loss": 46.0, - "step": 28810 - }, - { - "epoch": 2.2028021484412332, - "grad_norm": 0.0002928642206825316, - "learning_rate": 0.0001999976071353493, - "loss": 46.0, - "step": 28811 - }, - { - "epoch": 2.202878605424623, - "grad_norm": 0.0014874918852001429, - "learning_rate": 0.0001999976069691817, - "loss": 46.0, - "step": 28812 - }, - { - "epoch": 2.2029550624080128, - "grad_norm": 0.0008088365430012345, - "learning_rate": 0.00019999760680300833, - "loss": 46.0, - "step": 28813 - }, - { - "epoch": 2.2030315193914025, - "grad_norm": 0.0005311599816195667, - "learning_rate": 0.0001999976066368292, - "loss": 46.0, - "step": 28814 - }, - { - "epoch": 2.2031079763747923, - "grad_norm": 0.002118383301422, - "learning_rate": 0.00019999760647064428, - "loss": 46.0, - "step": 28815 - }, - { - "epoch": 2.203184433358182, - "grad_norm": 0.0006980802863836288, - "learning_rate": 0.0001999976063044536, - "loss": 46.0, - "step": 28816 - }, - { - "epoch": 2.2032608903415714, - "grad_norm": 0.0014909004094079137, - "learning_rate": 0.00019999760613825714, - "loss": 46.0, - "step": 28817 - }, - { - "epoch": 2.203337347324961, - "grad_norm": 0.0007738404674455523, - "learning_rate": 0.0001999976059720549, - "loss": 46.0, - "step": 28818 - }, - { - "epoch": 2.203413804308351, - "grad_norm": 0.0008588539203628898, - "learning_rate": 0.0001999976058058469, - "loss": 46.0, - "step": 28819 - }, - { - "epoch": 2.2034902612917406, - "grad_norm": 0.0014501033583655953, - "learning_rate": 0.00019999760563963313, - "loss": 46.0, - "step": 28820 - }, - { - "epoch": 2.2035667182751304, - "grad_norm": 0.0010203676065430045, - "learning_rate": 0.0001999976054734136, - "loss": 46.0, - "step": 28821 - }, - { - "epoch": 2.20364317525852, - "grad_norm": 0.0009154072031378746, - "learning_rate": 0.00019999760530718831, - "loss": 46.0, - "step": 28822 - }, - { - "epoch": 2.20371963224191, - "grad_norm": 0.001322239637374878, - "learning_rate": 0.00019999760514095724, - "loss": 46.0, - "step": 28823 - }, - { - "epoch": 2.2037960892252997, - "grad_norm": 0.001761665684171021, - "learning_rate": 0.0001999976049747204, - "loss": 46.0, - "step": 28824 - }, - { - "epoch": 2.2038725462086894, - "grad_norm": 0.0014802577206864953, - "learning_rate": 0.00019999760480847778, - "loss": 46.0, - "step": 28825 - }, - { - "epoch": 2.203949003192079, - "grad_norm": 0.0011306633241474628, - "learning_rate": 0.00019999760464222942, - "loss": 46.0, - "step": 28826 - }, - { - "epoch": 2.204025460175469, - "grad_norm": 0.0028315153904259205, - "learning_rate": 0.00019999760447597526, - "loss": 46.0, - "step": 28827 - }, - { - "epoch": 2.2041019171588587, - "grad_norm": 0.00026699324371293187, - "learning_rate": 0.00019999760430971535, - "loss": 46.0, - "step": 28828 - }, - { - "epoch": 2.204178374142248, - "grad_norm": 0.0008715919102542102, - "learning_rate": 0.00019999760414344967, - "loss": 46.0, - "step": 28829 - }, - { - "epoch": 2.204254831125638, - "grad_norm": 0.0008248778758570552, - "learning_rate": 0.0001999976039771782, - "loss": 46.0, - "step": 28830 - }, - { - "epoch": 2.2043312881090276, - "grad_norm": 0.0011730906553566456, - "learning_rate": 0.00019999760381090098, - "loss": 46.0, - "step": 28831 - }, - { - "epoch": 2.2044077450924173, - "grad_norm": 0.0004987865104340017, - "learning_rate": 0.00019999760364461798, - "loss": 46.0, - "step": 28832 - }, - { - "epoch": 2.204484202075807, - "grad_norm": 0.0008527519530616701, - "learning_rate": 0.0001999976034783292, - "loss": 46.0, - "step": 28833 - }, - { - "epoch": 2.204560659059197, - "grad_norm": 0.001204493921250105, - "learning_rate": 0.00019999760331203468, - "loss": 46.0, - "step": 28834 - }, - { - "epoch": 2.2046371160425866, - "grad_norm": 0.000427691062213853, - "learning_rate": 0.00019999760314573438, - "loss": 46.0, - "step": 28835 - }, - { - "epoch": 2.2047135730259764, - "grad_norm": 0.001846226747147739, - "learning_rate": 0.0001999976029794283, - "loss": 46.0, - "step": 28836 - }, - { - "epoch": 2.204790030009366, - "grad_norm": 0.002269761636853218, - "learning_rate": 0.00019999760281311647, - "loss": 46.0, - "step": 28837 - }, - { - "epoch": 2.204866486992756, - "grad_norm": 0.0025457690935581923, - "learning_rate": 0.00019999760264679885, - "loss": 46.0, - "step": 28838 - }, - { - "epoch": 2.204942943976145, - "grad_norm": 0.0004558280052151531, - "learning_rate": 0.00019999760248047546, - "loss": 46.0, - "step": 28839 - }, - { - "epoch": 2.205019400959535, - "grad_norm": 0.0012941573513671756, - "learning_rate": 0.00019999760231414633, - "loss": 46.0, - "step": 28840 - }, - { - "epoch": 2.2050958579429247, - "grad_norm": 0.00124286487698555, - "learning_rate": 0.00019999760214781142, - "loss": 46.0, - "step": 28841 - }, - { - "epoch": 2.2051723149263145, - "grad_norm": 0.0010324915638193488, - "learning_rate": 0.00019999760198147074, - "loss": 46.0, - "step": 28842 - }, - { - "epoch": 2.2052487719097043, - "grad_norm": 0.0014269310049712658, - "learning_rate": 0.00019999760181512425, - "loss": 46.0, - "step": 28843 - }, - { - "epoch": 2.205325228893094, - "grad_norm": 0.002782462863251567, - "learning_rate": 0.00019999760164877202, - "loss": 46.0, - "step": 28844 - }, - { - "epoch": 2.2054016858764838, - "grad_norm": 0.0006852689548395574, - "learning_rate": 0.00019999760148241405, - "loss": 46.0, - "step": 28845 - }, - { - "epoch": 2.2054781428598735, - "grad_norm": 0.0009855644311755896, - "learning_rate": 0.00019999760131605027, - "loss": 46.0, - "step": 28846 - }, - { - "epoch": 2.2055545998432633, - "grad_norm": 0.0015077366260811687, - "learning_rate": 0.00019999760114968072, - "loss": 46.0, - "step": 28847 - }, - { - "epoch": 2.205631056826653, - "grad_norm": 0.0008704654173925519, - "learning_rate": 0.00019999760098330543, - "loss": 46.0, - "step": 28848 - }, - { - "epoch": 2.205707513810043, - "grad_norm": 0.0008795936009846628, - "learning_rate": 0.00019999760081692439, - "loss": 46.0, - "step": 28849 - }, - { - "epoch": 2.2057839707934326, - "grad_norm": 0.0019369556102901697, - "learning_rate": 0.00019999760065053752, - "loss": 46.0, - "step": 28850 - }, - { - "epoch": 2.205860427776822, - "grad_norm": 0.0008396393968723714, - "learning_rate": 0.0001999976004841449, - "loss": 46.0, - "step": 28851 - }, - { - "epoch": 2.2059368847602117, - "grad_norm": 0.0009077504510059953, - "learning_rate": 0.00019999760031774654, - "loss": 46.0, - "step": 28852 - }, - { - "epoch": 2.2060133417436014, - "grad_norm": 0.00030257628532126546, - "learning_rate": 0.0001999976001513424, - "loss": 46.0, - "step": 28853 - }, - { - "epoch": 2.206089798726991, - "grad_norm": 0.0011218987638130784, - "learning_rate": 0.00019999759998493247, - "loss": 46.0, - "step": 28854 - }, - { - "epoch": 2.206166255710381, - "grad_norm": 0.0005522216088138521, - "learning_rate": 0.00019999759981851676, - "loss": 46.0, - "step": 28855 - }, - { - "epoch": 2.2062427126937707, - "grad_norm": 0.0035300019662827253, - "learning_rate": 0.0001999975996520953, - "loss": 46.0, - "step": 28856 - }, - { - "epoch": 2.2063191696771605, - "grad_norm": 0.0007878244505263865, - "learning_rate": 0.00019999759948566807, - "loss": 46.0, - "step": 28857 - }, - { - "epoch": 2.2063956266605502, - "grad_norm": 0.00852157361805439, - "learning_rate": 0.00019999759931923507, - "loss": 46.0, - "step": 28858 - }, - { - "epoch": 2.20647208364394, - "grad_norm": 0.0022826530039310455, - "learning_rate": 0.00019999759915279632, - "loss": 46.0, - "step": 28859 - }, - { - "epoch": 2.2065485406273297, - "grad_norm": 0.0010222750715911388, - "learning_rate": 0.0001999975989863518, - "loss": 46.0, - "step": 28860 - }, - { - "epoch": 2.206624997610719, - "grad_norm": 0.0007364195771515369, - "learning_rate": 0.00019999759881990148, - "loss": 46.0, - "step": 28861 - }, - { - "epoch": 2.206701454594109, - "grad_norm": 0.0007557598291896284, - "learning_rate": 0.0001999975986534454, - "loss": 46.0, - "step": 28862 - }, - { - "epoch": 2.2067779115774986, - "grad_norm": 0.0010475916787981987, - "learning_rate": 0.00019999759848698357, - "loss": 46.0, - "step": 28863 - }, - { - "epoch": 2.2068543685608883, - "grad_norm": 0.0005715079605579376, - "learning_rate": 0.00019999759832051595, - "loss": 46.0, - "step": 28864 - }, - { - "epoch": 2.206930825544278, - "grad_norm": 0.0008115984383039176, - "learning_rate": 0.00019999759815404256, - "loss": 46.0, - "step": 28865 - }, - { - "epoch": 2.207007282527668, - "grad_norm": 0.0007341348100453615, - "learning_rate": 0.0001999975979875634, - "loss": 46.0, - "step": 28866 - }, - { - "epoch": 2.2070837395110576, - "grad_norm": 0.0010737431002780795, - "learning_rate": 0.0001999975978210785, - "loss": 46.0, - "step": 28867 - }, - { - "epoch": 2.2071601964944474, - "grad_norm": 0.0015940858284011483, - "learning_rate": 0.0001999975976545878, - "loss": 46.0, - "step": 28868 - }, - { - "epoch": 2.207236653477837, - "grad_norm": 0.0010333394166082144, - "learning_rate": 0.00019999759748809135, - "loss": 46.0, - "step": 28869 - }, - { - "epoch": 2.207313110461227, - "grad_norm": 0.0005897369119338691, - "learning_rate": 0.00019999759732158913, - "loss": 46.0, - "step": 28870 - }, - { - "epoch": 2.2073895674446167, - "grad_norm": 0.0035926199052482843, - "learning_rate": 0.0001999975971550811, - "loss": 46.0, - "step": 28871 - }, - { - "epoch": 2.207466024428006, - "grad_norm": 0.0009241283987648785, - "learning_rate": 0.00019999759698856735, - "loss": 46.0, - "step": 28872 - }, - { - "epoch": 2.2075424814113958, - "grad_norm": 0.0006126143271103501, - "learning_rate": 0.00019999759682204783, - "loss": 46.0, - "step": 28873 - }, - { - "epoch": 2.2076189383947855, - "grad_norm": 0.00040630382136441767, - "learning_rate": 0.00019999759665552248, - "loss": 46.0, - "step": 28874 - }, - { - "epoch": 2.2076953953781753, - "grad_norm": 0.0035690891090780497, - "learning_rate": 0.00019999759648899144, - "loss": 46.0, - "step": 28875 - }, - { - "epoch": 2.207771852361565, - "grad_norm": 0.0008592213271185756, - "learning_rate": 0.0001999975963224546, - "loss": 46.0, - "step": 28876 - }, - { - "epoch": 2.207848309344955, - "grad_norm": 0.0007553239120170474, - "learning_rate": 0.00019999759615591195, - "loss": 46.0, - "step": 28877 - }, - { - "epoch": 2.2079247663283446, - "grad_norm": 0.0004961151862516999, - "learning_rate": 0.0001999975959893636, - "loss": 46.0, - "step": 28878 - }, - { - "epoch": 2.2080012233117343, - "grad_norm": 0.0014502119738608599, - "learning_rate": 0.00019999759582280943, - "loss": 46.0, - "step": 28879 - }, - { - "epoch": 2.208077680295124, - "grad_norm": 0.0011711064726114273, - "learning_rate": 0.00019999759565624952, - "loss": 46.0, - "step": 28880 - }, - { - "epoch": 2.208154137278514, - "grad_norm": 0.00160564377438277, - "learning_rate": 0.0001999975954896838, - "loss": 46.0, - "step": 28881 - }, - { - "epoch": 2.2082305942619036, - "grad_norm": 0.001099162152968347, - "learning_rate": 0.00019999759532311236, - "loss": 46.0, - "step": 28882 - }, - { - "epoch": 2.208307051245293, - "grad_norm": 0.004697258118540049, - "learning_rate": 0.00019999759515653513, - "loss": 46.0, - "step": 28883 - }, - { - "epoch": 2.2083835082286827, - "grad_norm": 0.0012617643224075437, - "learning_rate": 0.00019999759498995215, - "loss": 46.0, - "step": 28884 - }, - { - "epoch": 2.2084599652120724, - "grad_norm": 0.0015428911428898573, - "learning_rate": 0.00019999759482336335, - "loss": 46.0, - "step": 28885 - }, - { - "epoch": 2.208536422195462, - "grad_norm": 0.00045823512482456863, - "learning_rate": 0.00019999759465676883, - "loss": 46.0, - "step": 28886 - }, - { - "epoch": 2.208612879178852, - "grad_norm": 0.0012080523883923888, - "learning_rate": 0.0001999975944901685, - "loss": 46.0, - "step": 28887 - }, - { - "epoch": 2.2086893361622417, - "grad_norm": 0.0011397481430321932, - "learning_rate": 0.00019999759432356244, - "loss": 46.0, - "step": 28888 - }, - { - "epoch": 2.2087657931456315, - "grad_norm": 0.0012885655742138624, - "learning_rate": 0.0001999975941569506, - "loss": 46.0, - "step": 28889 - }, - { - "epoch": 2.2088422501290212, - "grad_norm": 0.00044631215860135853, - "learning_rate": 0.00019999759399033298, - "loss": 46.0, - "step": 28890 - }, - { - "epoch": 2.208918707112411, - "grad_norm": 0.0004790983221028, - "learning_rate": 0.0001999975938237096, - "loss": 46.0, - "step": 28891 - }, - { - "epoch": 2.2089951640958008, - "grad_norm": 0.0013729401398450136, - "learning_rate": 0.00019999759365708043, - "loss": 46.0, - "step": 28892 - }, - { - "epoch": 2.2090716210791905, - "grad_norm": 0.0013881829800084233, - "learning_rate": 0.0001999975934904455, - "loss": 46.0, - "step": 28893 - }, - { - "epoch": 2.20914807806258, - "grad_norm": 0.00031469701207242906, - "learning_rate": 0.00019999759332380484, - "loss": 46.0, - "step": 28894 - }, - { - "epoch": 2.2092245350459696, - "grad_norm": 0.0004291992518119514, - "learning_rate": 0.00019999759315715836, - "loss": 46.0, - "step": 28895 - }, - { - "epoch": 2.2093009920293594, - "grad_norm": 0.0068331751972436905, - "learning_rate": 0.00019999759299050613, - "loss": 46.0, - "step": 28896 - }, - { - "epoch": 2.209377449012749, - "grad_norm": 0.000925276952330023, - "learning_rate": 0.00019999759282384813, - "loss": 46.0, - "step": 28897 - }, - { - "epoch": 2.209453905996139, - "grad_norm": 0.00033266397076658905, - "learning_rate": 0.00019999759265718436, - "loss": 46.0, - "step": 28898 - }, - { - "epoch": 2.2095303629795287, - "grad_norm": 0.001193828647956252, - "learning_rate": 0.0001999975924905148, - "loss": 46.0, - "step": 28899 - }, - { - "epoch": 2.2096068199629184, - "grad_norm": 0.0005277470336295664, - "learning_rate": 0.00019999759232383952, - "loss": 46.0, - "step": 28900 - }, - { - "epoch": 2.209683276946308, - "grad_norm": 0.0014322582865133882, - "learning_rate": 0.00019999759215715842, - "loss": 46.0, - "step": 28901 - }, - { - "epoch": 2.209759733929698, - "grad_norm": 0.0012266046833246946, - "learning_rate": 0.00019999759199047158, - "loss": 46.0, - "step": 28902 - }, - { - "epoch": 2.2098361909130877, - "grad_norm": 0.002536962041631341, - "learning_rate": 0.00019999759182377897, - "loss": 46.0, - "step": 28903 - }, - { - "epoch": 2.209912647896477, - "grad_norm": 0.0010576860513538122, - "learning_rate": 0.00019999759165708058, - "loss": 46.0, - "step": 28904 - }, - { - "epoch": 2.2099891048798668, - "grad_norm": 0.0016458620084449649, - "learning_rate": 0.00019999759149037644, - "loss": 46.0, - "step": 28905 - }, - { - "epoch": 2.2100655618632565, - "grad_norm": 0.0017055054195225239, - "learning_rate": 0.0001999975913236665, - "loss": 46.0, - "step": 28906 - }, - { - "epoch": 2.2101420188466463, - "grad_norm": 0.0030669074039906263, - "learning_rate": 0.0001999975911569508, - "loss": 46.0, - "step": 28907 - }, - { - "epoch": 2.210218475830036, - "grad_norm": 0.0017010248266160488, - "learning_rate": 0.00019999759099022937, - "loss": 46.0, - "step": 28908 - }, - { - "epoch": 2.210294932813426, - "grad_norm": 0.0011826349655166268, - "learning_rate": 0.00019999759082350212, - "loss": 46.0, - "step": 28909 - }, - { - "epoch": 2.2103713897968156, - "grad_norm": 0.0007568614673800766, - "learning_rate": 0.00019999759065676912, - "loss": 46.0, - "step": 28910 - }, - { - "epoch": 2.2104478467802053, - "grad_norm": 0.0037644265685230494, - "learning_rate": 0.00019999759049003034, - "loss": 46.0, - "step": 28911 - }, - { - "epoch": 2.210524303763595, - "grad_norm": 0.0008171837544068694, - "learning_rate": 0.0001999975903232858, - "loss": 46.0, - "step": 28912 - }, - { - "epoch": 2.210600760746985, - "grad_norm": 0.0005448766751214862, - "learning_rate": 0.0001999975901565355, - "loss": 46.0, - "step": 28913 - }, - { - "epoch": 2.2106772177303746, - "grad_norm": 0.0006229626596905291, - "learning_rate": 0.00019999758998977943, - "loss": 46.0, - "step": 28914 - }, - { - "epoch": 2.2107536747137644, - "grad_norm": 0.0005808335263282061, - "learning_rate": 0.00019999758982301757, - "loss": 46.0, - "step": 28915 - }, - { - "epoch": 2.2108301316971537, - "grad_norm": 0.0008625047048553824, - "learning_rate": 0.00019999758965624995, - "loss": 46.0, - "step": 28916 - }, - { - "epoch": 2.2109065886805435, - "grad_norm": 0.0015434701927006245, - "learning_rate": 0.00019999758948947656, - "loss": 46.0, - "step": 28917 - }, - { - "epoch": 2.210983045663933, - "grad_norm": 0.0006651518633589149, - "learning_rate": 0.0001999975893226974, - "loss": 46.0, - "step": 28918 - }, - { - "epoch": 2.211059502647323, - "grad_norm": 0.0009781579719856381, - "learning_rate": 0.0001999975891559125, - "loss": 46.0, - "step": 28919 - }, - { - "epoch": 2.2111359596307127, - "grad_norm": 0.0038510675076395273, - "learning_rate": 0.0001999975889891218, - "loss": 46.0, - "step": 28920 - }, - { - "epoch": 2.2112124166141025, - "grad_norm": 0.0006015028338879347, - "learning_rate": 0.00019999758882232533, - "loss": 46.0, - "step": 28921 - }, - { - "epoch": 2.2112888735974923, - "grad_norm": 0.00036297718179412186, - "learning_rate": 0.0001999975886555231, - "loss": 46.0, - "step": 28922 - }, - { - "epoch": 2.211365330580882, - "grad_norm": 0.0009397089597769082, - "learning_rate": 0.00019999758848871508, - "loss": 46.0, - "step": 28923 - }, - { - "epoch": 2.211441787564272, - "grad_norm": 0.0017561400309205055, - "learning_rate": 0.00019999758832190133, - "loss": 46.0, - "step": 28924 - }, - { - "epoch": 2.2115182445476615, - "grad_norm": 0.0021246513351798058, - "learning_rate": 0.00019999758815508178, - "loss": 46.0, - "step": 28925 - }, - { - "epoch": 2.211594701531051, - "grad_norm": 0.0005786185502074659, - "learning_rate": 0.00019999758798825646, - "loss": 46.0, - "step": 28926 - }, - { - "epoch": 2.2116711585144406, - "grad_norm": 0.0005074609653092921, - "learning_rate": 0.0001999975878214254, - "loss": 46.0, - "step": 28927 - }, - { - "epoch": 2.2117476154978304, - "grad_norm": 0.0008231866522692144, - "learning_rate": 0.00019999758765458853, - "loss": 46.0, - "step": 28928 - }, - { - "epoch": 2.21182407248122, - "grad_norm": 0.00392834423109889, - "learning_rate": 0.0001999975874877459, - "loss": 46.0, - "step": 28929 - }, - { - "epoch": 2.21190052946461, - "grad_norm": 0.0005317357718013227, - "learning_rate": 0.00019999758732089753, - "loss": 46.0, - "step": 28930 - }, - { - "epoch": 2.2119769864479997, - "grad_norm": 0.0017007219139486551, - "learning_rate": 0.00019999758715404337, - "loss": 46.0, - "step": 28931 - }, - { - "epoch": 2.2120534434313894, - "grad_norm": 0.002958127995952964, - "learning_rate": 0.00019999758698718346, - "loss": 46.0, - "step": 28932 - }, - { - "epoch": 2.212129900414779, - "grad_norm": 0.00095657620113343, - "learning_rate": 0.00019999758682031775, - "loss": 46.0, - "step": 28933 - }, - { - "epoch": 2.212206357398169, - "grad_norm": 0.0016081796493381262, - "learning_rate": 0.0001999975866534463, - "loss": 46.0, - "step": 28934 - }, - { - "epoch": 2.2122828143815587, - "grad_norm": 0.0012840284034609795, - "learning_rate": 0.00019999758648656904, - "loss": 46.0, - "step": 28935 - }, - { - "epoch": 2.2123592713649485, - "grad_norm": 0.0019641744438558817, - "learning_rate": 0.00019999758631968604, - "loss": 46.0, - "step": 28936 - }, - { - "epoch": 2.2124357283483382, - "grad_norm": 0.0012826048769056797, - "learning_rate": 0.00019999758615279727, - "loss": 46.0, - "step": 28937 - }, - { - "epoch": 2.2125121853317276, - "grad_norm": 0.0023787023965269327, - "learning_rate": 0.00019999758598590272, - "loss": 46.0, - "step": 28938 - }, - { - "epoch": 2.2125886423151173, - "grad_norm": 0.0016007553786039352, - "learning_rate": 0.0001999975858190024, - "loss": 46.0, - "step": 28939 - }, - { - "epoch": 2.212665099298507, - "grad_norm": 0.0034340412821620703, - "learning_rate": 0.00019999758565209633, - "loss": 46.0, - "step": 28940 - }, - { - "epoch": 2.212741556281897, - "grad_norm": 0.0003411919460631907, - "learning_rate": 0.0001999975854851845, - "loss": 46.0, - "step": 28941 - }, - { - "epoch": 2.2128180132652866, - "grad_norm": 0.0008884069975465536, - "learning_rate": 0.00019999758531826688, - "loss": 46.0, - "step": 28942 - }, - { - "epoch": 2.2128944702486764, - "grad_norm": 0.0008395428885705769, - "learning_rate": 0.0001999975851513435, - "loss": 46.0, - "step": 28943 - }, - { - "epoch": 2.212970927232066, - "grad_norm": 0.0028906133957207203, - "learning_rate": 0.00019999758498441433, - "loss": 46.0, - "step": 28944 - }, - { - "epoch": 2.213047384215456, - "grad_norm": 0.0013134530745446682, - "learning_rate": 0.0001999975848174794, - "loss": 46.0, - "step": 28945 - }, - { - "epoch": 2.2131238411988456, - "grad_norm": 0.001592721207998693, - "learning_rate": 0.0001999975846505387, - "loss": 46.0, - "step": 28946 - }, - { - "epoch": 2.2132002981822354, - "grad_norm": 0.0007467641262337565, - "learning_rate": 0.00019999758448359224, - "loss": 46.0, - "step": 28947 - }, - { - "epoch": 2.2132767551656247, - "grad_norm": 0.00161384092643857, - "learning_rate": 0.00019999758431663998, - "loss": 46.0, - "step": 28948 - }, - { - "epoch": 2.2133532121490145, - "grad_norm": 0.0009858588455244899, - "learning_rate": 0.00019999758414968198, - "loss": 46.0, - "step": 28949 - }, - { - "epoch": 2.2134296691324042, - "grad_norm": 0.00833367183804512, - "learning_rate": 0.0001999975839827182, - "loss": 46.0, - "step": 28950 - }, - { - "epoch": 2.213506126115794, - "grad_norm": 0.0013271848438307643, - "learning_rate": 0.00019999758381574866, - "loss": 46.0, - "step": 28951 - }, - { - "epoch": 2.2135825830991838, - "grad_norm": 0.0022770448122173548, - "learning_rate": 0.00019999758364877334, - "loss": 46.0, - "step": 28952 - }, - { - "epoch": 2.2136590400825735, - "grad_norm": 0.002240579342469573, - "learning_rate": 0.00019999758348179228, - "loss": 46.0, - "step": 28953 - }, - { - "epoch": 2.2137354970659633, - "grad_norm": 0.0006508915685117245, - "learning_rate": 0.00019999758331480544, - "loss": 46.0, - "step": 28954 - }, - { - "epoch": 2.213811954049353, - "grad_norm": 0.0009769409662112594, - "learning_rate": 0.0001999975831478128, - "loss": 46.0, - "step": 28955 - }, - { - "epoch": 2.213888411032743, - "grad_norm": 0.0014440763043239713, - "learning_rate": 0.0001999975829808144, - "loss": 46.0, - "step": 28956 - }, - { - "epoch": 2.2139648680161326, - "grad_norm": 0.0008397704223170877, - "learning_rate": 0.00019999758281381028, - "loss": 46.0, - "step": 28957 - }, - { - "epoch": 2.2140413249995223, - "grad_norm": 0.0013478489127010107, - "learning_rate": 0.00019999758264680032, - "loss": 46.0, - "step": 28958 - }, - { - "epoch": 2.214117781982912, - "grad_norm": 0.001117086154408753, - "learning_rate": 0.0001999975824797846, - "loss": 46.0, - "step": 28959 - }, - { - "epoch": 2.2141942389663014, - "grad_norm": 0.0009121336042881012, - "learning_rate": 0.00019999758231276316, - "loss": 46.0, - "step": 28960 - }, - { - "epoch": 2.214270695949691, - "grad_norm": 0.001009747269563377, - "learning_rate": 0.00019999758214573593, - "loss": 46.0, - "step": 28961 - }, - { - "epoch": 2.214347152933081, - "grad_norm": 0.0005775009631179273, - "learning_rate": 0.0001999975819787029, - "loss": 46.0, - "step": 28962 - }, - { - "epoch": 2.2144236099164707, - "grad_norm": 0.004599041771143675, - "learning_rate": 0.00019999758181166416, - "loss": 46.0, - "step": 28963 - }, - { - "epoch": 2.2145000668998605, - "grad_norm": 0.0018458062550053, - "learning_rate": 0.0001999975816446196, - "loss": 46.0, - "step": 28964 - }, - { - "epoch": 2.21457652388325, - "grad_norm": 0.0008384262910112739, - "learning_rate": 0.0001999975814775693, - "loss": 46.0, - "step": 28965 - }, - { - "epoch": 2.21465298086664, - "grad_norm": 0.0021188645623624325, - "learning_rate": 0.0001999975813105132, - "loss": 46.0, - "step": 28966 - }, - { - "epoch": 2.2147294378500297, - "grad_norm": 0.0008339123451150954, - "learning_rate": 0.00019999758114345136, - "loss": 46.0, - "step": 28967 - }, - { - "epoch": 2.2148058948334195, - "grad_norm": 0.001448485883884132, - "learning_rate": 0.00019999758097638372, - "loss": 46.0, - "step": 28968 - }, - { - "epoch": 2.2148823518168093, - "grad_norm": 0.0024356681387871504, - "learning_rate": 0.00019999758080931033, - "loss": 46.0, - "step": 28969 - }, - { - "epoch": 2.2149588088001986, - "grad_norm": 0.0020377894397825003, - "learning_rate": 0.00019999758064223117, - "loss": 46.0, - "step": 28970 - }, - { - "epoch": 2.2150352657835883, - "grad_norm": 0.0015785840805619955, - "learning_rate": 0.00019999758047514624, - "loss": 46.0, - "step": 28971 - }, - { - "epoch": 2.215111722766978, - "grad_norm": 0.0015167194651439786, - "learning_rate": 0.00019999758030805556, - "loss": 46.0, - "step": 28972 - }, - { - "epoch": 2.215188179750368, - "grad_norm": 0.0036254897713661194, - "learning_rate": 0.00019999758014095908, - "loss": 46.0, - "step": 28973 - }, - { - "epoch": 2.2152646367337576, - "grad_norm": 0.0017698084702715278, - "learning_rate": 0.00019999757997385685, - "loss": 46.0, - "step": 28974 - }, - { - "epoch": 2.2153410937171474, - "grad_norm": 0.0010463435901328921, - "learning_rate": 0.00019999757980674883, - "loss": 46.0, - "step": 28975 - }, - { - "epoch": 2.215417550700537, - "grad_norm": 0.001757300109602511, - "learning_rate": 0.00019999757963963505, - "loss": 46.0, - "step": 28976 - }, - { - "epoch": 2.215494007683927, - "grad_norm": 0.0006821682909503579, - "learning_rate": 0.0001999975794725155, - "loss": 46.0, - "step": 28977 - }, - { - "epoch": 2.2155704646673167, - "grad_norm": 0.005596705246716738, - "learning_rate": 0.0001999975793053902, - "loss": 46.0, - "step": 28978 - }, - { - "epoch": 2.2156469216507064, - "grad_norm": 0.0012318710796535015, - "learning_rate": 0.00019999757913825912, - "loss": 46.0, - "step": 28979 - }, - { - "epoch": 2.215723378634096, - "grad_norm": 0.0012017672415822744, - "learning_rate": 0.00019999757897112225, - "loss": 46.0, - "step": 28980 - }, - { - "epoch": 2.215799835617486, - "grad_norm": 0.0015774633502587676, - "learning_rate": 0.00019999757880397962, - "loss": 46.0, - "step": 28981 - }, - { - "epoch": 2.2158762926008753, - "grad_norm": 0.002926749177277088, - "learning_rate": 0.00019999757863683123, - "loss": 46.0, - "step": 28982 - }, - { - "epoch": 2.215952749584265, - "grad_norm": 0.0011569702764973044, - "learning_rate": 0.00019999757846967707, - "loss": 46.0, - "step": 28983 - }, - { - "epoch": 2.216029206567655, - "grad_norm": 0.0007350613013841212, - "learning_rate": 0.00019999757830251716, - "loss": 46.0, - "step": 28984 - }, - { - "epoch": 2.2161056635510445, - "grad_norm": 0.0005084079457446933, - "learning_rate": 0.00019999757813535146, - "loss": 46.0, - "step": 28985 - }, - { - "epoch": 2.2161821205344343, - "grad_norm": 0.0005607547936961055, - "learning_rate": 0.00019999757796817998, - "loss": 46.0, - "step": 28986 - }, - { - "epoch": 2.216258577517824, - "grad_norm": 0.0009682882810011506, - "learning_rate": 0.00019999757780100275, - "loss": 46.0, - "step": 28987 - }, - { - "epoch": 2.216335034501214, - "grad_norm": 0.00064097746508196, - "learning_rate": 0.00019999757763381975, - "loss": 46.0, - "step": 28988 - }, - { - "epoch": 2.2164114914846036, - "grad_norm": 0.002359879668802023, - "learning_rate": 0.00019999757746663095, - "loss": 46.0, - "step": 28989 - }, - { - "epoch": 2.2164879484679934, - "grad_norm": 0.0016890749102458358, - "learning_rate": 0.00019999757729943643, - "loss": 46.0, - "step": 28990 - }, - { - "epoch": 2.216564405451383, - "grad_norm": 0.0005565067403949797, - "learning_rate": 0.0001999975771322361, - "loss": 46.0, - "step": 28991 - }, - { - "epoch": 2.2166408624347724, - "grad_norm": 0.0008554342784918845, - "learning_rate": 0.00019999757696503002, - "loss": 46.0, - "step": 28992 - }, - { - "epoch": 2.216717319418162, - "grad_norm": 0.002095424570143223, - "learning_rate": 0.00019999757679781816, - "loss": 46.0, - "step": 28993 - }, - { - "epoch": 2.216793776401552, - "grad_norm": 0.0017710519023239613, - "learning_rate": 0.00019999757663060054, - "loss": 46.0, - "step": 28994 - }, - { - "epoch": 2.2168702333849417, - "grad_norm": 0.003226671600714326, - "learning_rate": 0.00019999757646337713, - "loss": 46.0, - "step": 28995 - }, - { - "epoch": 2.2169466903683315, - "grad_norm": 0.0006709518493153155, - "learning_rate": 0.00019999757629614797, - "loss": 46.0, - "step": 28996 - }, - { - "epoch": 2.2170231473517212, - "grad_norm": 0.0033667893148958683, - "learning_rate": 0.00019999757612891307, - "loss": 46.0, - "step": 28997 - }, - { - "epoch": 2.217099604335111, - "grad_norm": 0.0008563104784116149, - "learning_rate": 0.00019999757596167233, - "loss": 46.0, - "step": 28998 - }, - { - "epoch": 2.2171760613185008, - "grad_norm": 0.0011014026822522283, - "learning_rate": 0.00019999757579442585, - "loss": 46.0, - "step": 28999 - }, - { - "epoch": 2.2172525183018905, - "grad_norm": 0.0014451269526034594, - "learning_rate": 0.00019999757562717363, - "loss": 46.0, - "step": 29000 - }, - { - "epoch": 2.2173289752852803, - "grad_norm": 0.0015442516887560487, - "learning_rate": 0.00019999757545991563, - "loss": 46.0, - "step": 29001 - }, - { - "epoch": 2.21740543226867, - "grad_norm": 0.0016162347747012973, - "learning_rate": 0.00019999757529265183, - "loss": 46.0, - "step": 29002 - }, - { - "epoch": 2.2174818892520594, - "grad_norm": 0.00045074548688717186, - "learning_rate": 0.00019999757512538228, - "loss": 46.0, - "step": 29003 - }, - { - "epoch": 2.217558346235449, - "grad_norm": 0.001895284396596253, - "learning_rate": 0.000199997574958107, - "loss": 46.0, - "step": 29004 - }, - { - "epoch": 2.217634803218839, - "grad_norm": 0.0005610543303191662, - "learning_rate": 0.0001999975747908259, - "loss": 46.0, - "step": 29005 - }, - { - "epoch": 2.2177112602022286, - "grad_norm": 0.0018726419657468796, - "learning_rate": 0.00019999757462353906, - "loss": 46.0, - "step": 29006 - }, - { - "epoch": 2.2177877171856184, - "grad_norm": 0.0012618129840120673, - "learning_rate": 0.0001999975744562464, - "loss": 46.0, - "step": 29007 - }, - { - "epoch": 2.217864174169008, - "grad_norm": 0.002981960540637374, - "learning_rate": 0.00019999757428894804, - "loss": 46.0, - "step": 29008 - }, - { - "epoch": 2.217940631152398, - "grad_norm": 0.005760539788752794, - "learning_rate": 0.00019999757412164385, - "loss": 46.0, - "step": 29009 - }, - { - "epoch": 2.2180170881357877, - "grad_norm": 0.0030491300858557224, - "learning_rate": 0.00019999757395433392, - "loss": 46.0, - "step": 29010 - }, - { - "epoch": 2.2180935451191774, - "grad_norm": 0.0012788968160748482, - "learning_rate": 0.00019999757378701821, - "loss": 46.0, - "step": 29011 - }, - { - "epoch": 2.218170002102567, - "grad_norm": 0.0007759974105283618, - "learning_rate": 0.00019999757361969676, - "loss": 46.0, - "step": 29012 - }, - { - "epoch": 2.218246459085957, - "grad_norm": 0.00045431137550622225, - "learning_rate": 0.00019999757345236948, - "loss": 46.0, - "step": 29013 - }, - { - "epoch": 2.2183229160693463, - "grad_norm": 0.0020434728357940912, - "learning_rate": 0.0001999975732850365, - "loss": 46.0, - "step": 29014 - }, - { - "epoch": 2.218399373052736, - "grad_norm": 0.0011778234038501978, - "learning_rate": 0.0001999975731176977, - "loss": 46.0, - "step": 29015 - }, - { - "epoch": 2.218475830036126, - "grad_norm": 0.0006551499245688319, - "learning_rate": 0.00019999757295035314, - "loss": 46.0, - "step": 29016 - }, - { - "epoch": 2.2185522870195156, - "grad_norm": 0.0012449778150767088, - "learning_rate": 0.00019999757278300285, - "loss": 46.0, - "step": 29017 - }, - { - "epoch": 2.2186287440029053, - "grad_norm": 0.0014794125454500318, - "learning_rate": 0.00019999757261564676, - "loss": 46.0, - "step": 29018 - }, - { - "epoch": 2.218705200986295, - "grad_norm": 0.001708697876892984, - "learning_rate": 0.0001999975724482849, - "loss": 46.0, - "step": 29019 - }, - { - "epoch": 2.218781657969685, - "grad_norm": 0.0008790239808149636, - "learning_rate": 0.00019999757228091728, - "loss": 46.0, - "step": 29020 - }, - { - "epoch": 2.2188581149530746, - "grad_norm": 0.0005551482900045812, - "learning_rate": 0.0001999975721135439, - "loss": 46.0, - "step": 29021 - }, - { - "epoch": 2.2189345719364644, - "grad_norm": 0.0006564059294760227, - "learning_rate": 0.00019999757194616473, - "loss": 46.0, - "step": 29022 - }, - { - "epoch": 2.219011028919854, - "grad_norm": 0.0008292549755424261, - "learning_rate": 0.00019999757177877978, - "loss": 46.0, - "step": 29023 - }, - { - "epoch": 2.219087485903244, - "grad_norm": 0.0009482022142037749, - "learning_rate": 0.00019999757161138907, - "loss": 46.0, - "step": 29024 - }, - { - "epoch": 2.219163942886633, - "grad_norm": 0.0020089088939130306, - "learning_rate": 0.0001999975714439926, - "loss": 46.0, - "step": 29025 - }, - { - "epoch": 2.219240399870023, - "grad_norm": 0.00059704011073336, - "learning_rate": 0.00019999757127659037, - "loss": 46.0, - "step": 29026 - }, - { - "epoch": 2.2193168568534127, - "grad_norm": 0.0024245691020041704, - "learning_rate": 0.00019999757110918237, - "loss": 46.0, - "step": 29027 - }, - { - "epoch": 2.2193933138368025, - "grad_norm": 0.0011015565833076835, - "learning_rate": 0.00019999757094176857, - "loss": 46.0, - "step": 29028 - }, - { - "epoch": 2.2194697708201923, - "grad_norm": 0.0008510947227478027, - "learning_rate": 0.00019999757077434903, - "loss": 46.0, - "step": 29029 - }, - { - "epoch": 2.219546227803582, - "grad_norm": 0.0020030790474265814, - "learning_rate": 0.0001999975706069237, - "loss": 46.0, - "step": 29030 - }, - { - "epoch": 2.219622684786972, - "grad_norm": 0.0009097789879888296, - "learning_rate": 0.00019999757043949261, - "loss": 46.0, - "step": 29031 - }, - { - "epoch": 2.2196991417703615, - "grad_norm": 0.0007160550449043512, - "learning_rate": 0.00019999757027205575, - "loss": 46.0, - "step": 29032 - }, - { - "epoch": 2.2197755987537513, - "grad_norm": 0.0007419795147143304, - "learning_rate": 0.00019999757010461314, - "loss": 46.0, - "step": 29033 - }, - { - "epoch": 2.219852055737141, - "grad_norm": 0.001492612063884735, - "learning_rate": 0.00019999756993716475, - "loss": 46.0, - "step": 29034 - }, - { - "epoch": 2.2199285127205304, - "grad_norm": 0.0010232396889477968, - "learning_rate": 0.00019999756976971057, - "loss": 46.0, - "step": 29035 - }, - { - "epoch": 2.22000496970392, - "grad_norm": 0.0006374738295562565, - "learning_rate": 0.00019999756960225064, - "loss": 46.0, - "step": 29036 - }, - { - "epoch": 2.22008142668731, - "grad_norm": 0.0018267721170559525, - "learning_rate": 0.0001999975694347849, - "loss": 46.0, - "step": 29037 - }, - { - "epoch": 2.2201578836706997, - "grad_norm": 0.001536606578156352, - "learning_rate": 0.00019999756926731345, - "loss": 46.0, - "step": 29038 - }, - { - "epoch": 2.2202343406540894, - "grad_norm": 0.000566108908969909, - "learning_rate": 0.00019999756909983623, - "loss": 46.0, - "step": 29039 - }, - { - "epoch": 2.220310797637479, - "grad_norm": 0.002099972916767001, - "learning_rate": 0.0001999975689323532, - "loss": 46.0, - "step": 29040 - }, - { - "epoch": 2.220387254620869, - "grad_norm": 0.0007780368905514479, - "learning_rate": 0.0001999975687648644, - "loss": 46.0, - "step": 29041 - }, - { - "epoch": 2.2204637116042587, - "grad_norm": 0.01432411465793848, - "learning_rate": 0.00019999756859736986, - "loss": 46.0, - "step": 29042 - }, - { - "epoch": 2.2205401685876485, - "grad_norm": 0.002702204743400216, - "learning_rate": 0.00019999756842986954, - "loss": 46.0, - "step": 29043 - }, - { - "epoch": 2.2206166255710382, - "grad_norm": 0.001272039138711989, - "learning_rate": 0.00019999756826236348, - "loss": 46.0, - "step": 29044 - }, - { - "epoch": 2.220693082554428, - "grad_norm": 0.000633472518529743, - "learning_rate": 0.00019999756809485159, - "loss": 46.0, - "step": 29045 - }, - { - "epoch": 2.2207695395378177, - "grad_norm": 0.0009532385738566518, - "learning_rate": 0.00019999756792733398, - "loss": 46.0, - "step": 29046 - }, - { - "epoch": 2.220845996521207, - "grad_norm": 0.0037663697730749846, - "learning_rate": 0.0001999975677598106, - "loss": 46.0, - "step": 29047 - }, - { - "epoch": 2.220922453504597, - "grad_norm": 0.001251509878784418, - "learning_rate": 0.00019999756759228138, - "loss": 46.0, - "step": 29048 - }, - { - "epoch": 2.2209989104879866, - "grad_norm": 0.002210004720836878, - "learning_rate": 0.00019999756742474647, - "loss": 46.0, - "step": 29049 - }, - { - "epoch": 2.2210753674713763, - "grad_norm": 0.003042635740712285, - "learning_rate": 0.00019999756725720577, - "loss": 46.0, - "step": 29050 - }, - { - "epoch": 2.221151824454766, - "grad_norm": 0.0010396906873211265, - "learning_rate": 0.0001999975670896593, - "loss": 46.0, - "step": 29051 - }, - { - "epoch": 2.221228281438156, - "grad_norm": 0.0018654249142855406, - "learning_rate": 0.000199997566922107, - "loss": 46.0, - "step": 29052 - }, - { - "epoch": 2.2213047384215456, - "grad_norm": 0.0015053300885483623, - "learning_rate": 0.00019999756675454902, - "loss": 46.0, - "step": 29053 - }, - { - "epoch": 2.2213811954049354, - "grad_norm": 0.0016937172040343285, - "learning_rate": 0.00019999756658698522, - "loss": 46.0, - "step": 29054 - }, - { - "epoch": 2.221457652388325, - "grad_norm": 0.004003510810434818, - "learning_rate": 0.0001999975664194157, - "loss": 46.0, - "step": 29055 - }, - { - "epoch": 2.221534109371715, - "grad_norm": 0.0014084666036069393, - "learning_rate": 0.00019999756625184038, - "loss": 46.0, - "step": 29056 - }, - { - "epoch": 2.2216105663551042, - "grad_norm": 0.0011506223818287253, - "learning_rate": 0.0001999975660842593, - "loss": 46.0, - "step": 29057 - }, - { - "epoch": 2.221687023338494, - "grad_norm": 0.0011702999472618103, - "learning_rate": 0.00019999756591667243, - "loss": 46.0, - "step": 29058 - }, - { - "epoch": 2.2217634803218838, - "grad_norm": 0.0009672439773567021, - "learning_rate": 0.0001999975657490798, - "loss": 46.0, - "step": 29059 - }, - { - "epoch": 2.2218399373052735, - "grad_norm": 0.0016482521314173937, - "learning_rate": 0.0001999975655814814, - "loss": 46.0, - "step": 29060 - }, - { - "epoch": 2.2219163942886633, - "grad_norm": 0.002098256256431341, - "learning_rate": 0.00019999756541387722, - "loss": 46.0, - "step": 29061 - }, - { - "epoch": 2.221992851272053, - "grad_norm": 0.0007959646754898131, - "learning_rate": 0.0001999975652462673, - "loss": 46.0, - "step": 29062 - }, - { - "epoch": 2.222069308255443, - "grad_norm": 0.0004244661540724337, - "learning_rate": 0.00019999756507865159, - "loss": 46.0, - "step": 29063 - }, - { - "epoch": 2.2221457652388326, - "grad_norm": 0.011796372942626476, - "learning_rate": 0.0001999975649110301, - "loss": 46.0, - "step": 29064 - }, - { - "epoch": 2.2222222222222223, - "grad_norm": 0.0017134278314188123, - "learning_rate": 0.00019999756474340286, - "loss": 46.0, - "step": 29065 - }, - { - "epoch": 2.222298679205612, - "grad_norm": 0.0015598268946632743, - "learning_rate": 0.00019999756457576986, - "loss": 46.0, - "step": 29066 - }, - { - "epoch": 2.222375136189002, - "grad_norm": 0.0009513128316029906, - "learning_rate": 0.00019999756440813106, - "loss": 46.0, - "step": 29067 - }, - { - "epoch": 2.2224515931723916, - "grad_norm": 0.0017385525861755013, - "learning_rate": 0.0001999975642404865, - "loss": 46.0, - "step": 29068 - }, - { - "epoch": 2.222528050155781, - "grad_norm": 0.0072024790570139885, - "learning_rate": 0.00019999756407283617, - "loss": 46.0, - "step": 29069 - }, - { - "epoch": 2.2226045071391707, - "grad_norm": 0.0007386407232843339, - "learning_rate": 0.0001999975639051801, - "loss": 46.0, - "step": 29070 - }, - { - "epoch": 2.2226809641225604, - "grad_norm": 0.0009203071822412312, - "learning_rate": 0.00019999756373751824, - "loss": 46.0, - "step": 29071 - }, - { - "epoch": 2.22275742110595, - "grad_norm": 0.0005235082353465259, - "learning_rate": 0.0001999975635698506, - "loss": 46.0, - "step": 29072 - }, - { - "epoch": 2.22283387808934, - "grad_norm": 0.0011627032654359937, - "learning_rate": 0.00019999756340217722, - "loss": 46.0, - "step": 29073 - }, - { - "epoch": 2.2229103350727297, - "grad_norm": 0.005454846657812595, - "learning_rate": 0.00019999756323449804, - "loss": 46.0, - "step": 29074 - }, - { - "epoch": 2.2229867920561195, - "grad_norm": 0.0014561025891453028, - "learning_rate": 0.0001999975630668131, - "loss": 46.0, - "step": 29075 - }, - { - "epoch": 2.2230632490395092, - "grad_norm": 0.0006992297130636871, - "learning_rate": 0.00019999756289912238, - "loss": 46.0, - "step": 29076 - }, - { - "epoch": 2.223139706022899, - "grad_norm": 0.000693952664732933, - "learning_rate": 0.00019999756273142593, - "loss": 46.0, - "step": 29077 - }, - { - "epoch": 2.2232161630062888, - "grad_norm": 0.0017759924521669745, - "learning_rate": 0.00019999756256372365, - "loss": 46.0, - "step": 29078 - }, - { - "epoch": 2.223292619989678, - "grad_norm": 0.0012003574520349503, - "learning_rate": 0.00019999756239601565, - "loss": 46.0, - "step": 29079 - }, - { - "epoch": 2.223369076973068, - "grad_norm": 0.0021588949020951986, - "learning_rate": 0.00019999756222830185, - "loss": 46.0, - "step": 29080 - }, - { - "epoch": 2.2234455339564576, - "grad_norm": 0.0017198417335748672, - "learning_rate": 0.0001999975620605823, - "loss": 46.0, - "step": 29081 - }, - { - "epoch": 2.2235219909398474, - "grad_norm": 0.0013162444811314344, - "learning_rate": 0.000199997561892857, - "loss": 46.0, - "step": 29082 - }, - { - "epoch": 2.223598447923237, - "grad_norm": 0.0019963777158409357, - "learning_rate": 0.0001999975617251259, - "loss": 46.0, - "step": 29083 - }, - { - "epoch": 2.223674904906627, - "grad_norm": 0.0010876356391236186, - "learning_rate": 0.000199997561557389, - "loss": 46.0, - "step": 29084 - }, - { - "epoch": 2.2237513618900167, - "grad_norm": 0.0005942288553342223, - "learning_rate": 0.0001999975613896464, - "loss": 46.0, - "step": 29085 - }, - { - "epoch": 2.2238278188734064, - "grad_norm": 0.0017909345915541053, - "learning_rate": 0.00019999756122189802, - "loss": 46.0, - "step": 29086 - }, - { - "epoch": 2.223904275856796, - "grad_norm": 0.0018780564423650503, - "learning_rate": 0.00019999756105414383, - "loss": 46.0, - "step": 29087 - }, - { - "epoch": 2.223980732840186, - "grad_norm": 0.001375261228531599, - "learning_rate": 0.00019999756088638388, - "loss": 46.0, - "step": 29088 - }, - { - "epoch": 2.2240571898235757, - "grad_norm": 0.0004752881359308958, - "learning_rate": 0.0001999975607186182, - "loss": 46.0, - "step": 29089 - }, - { - "epoch": 2.2241336468069655, - "grad_norm": 0.0005805412074550986, - "learning_rate": 0.0001999975605508467, - "loss": 46.0, - "step": 29090 - }, - { - "epoch": 2.2242101037903548, - "grad_norm": 0.00048358997446484864, - "learning_rate": 0.00019999756038306945, - "loss": 46.0, - "step": 29091 - }, - { - "epoch": 2.2242865607737445, - "grad_norm": 0.0012624766677618027, - "learning_rate": 0.00019999756021528645, - "loss": 46.0, - "step": 29092 - }, - { - "epoch": 2.2243630177571343, - "grad_norm": 0.001781711122021079, - "learning_rate": 0.00019999756004749765, - "loss": 46.0, - "step": 29093 - }, - { - "epoch": 2.224439474740524, - "grad_norm": 0.0008308349642902613, - "learning_rate": 0.00019999755987970308, - "loss": 46.0, - "step": 29094 - }, - { - "epoch": 2.224515931723914, - "grad_norm": 0.0009148496319539845, - "learning_rate": 0.00019999755971190276, - "loss": 46.0, - "step": 29095 - }, - { - "epoch": 2.2245923887073036, - "grad_norm": 0.0010498544434085488, - "learning_rate": 0.0001999975595440967, - "loss": 46.0, - "step": 29096 - }, - { - "epoch": 2.2246688456906933, - "grad_norm": 0.0008421143284067512, - "learning_rate": 0.0001999975593762848, - "loss": 46.0, - "step": 29097 - }, - { - "epoch": 2.224745302674083, - "grad_norm": 0.0019818521104753017, - "learning_rate": 0.00019999755920846717, - "loss": 46.0, - "step": 29098 - }, - { - "epoch": 2.224821759657473, - "grad_norm": 0.0006081559113226831, - "learning_rate": 0.00019999755904064376, - "loss": 46.0, - "step": 29099 - }, - { - "epoch": 2.2248982166408626, - "grad_norm": 0.000737370690330863, - "learning_rate": 0.0001999975588728146, - "loss": 46.0, - "step": 29100 - }, - { - "epoch": 2.224974673624252, - "grad_norm": 0.0006839355337433517, - "learning_rate": 0.00019999755870497968, - "loss": 46.0, - "step": 29101 - }, - { - "epoch": 2.2250511306076417, - "grad_norm": 0.0008955879602581263, - "learning_rate": 0.00019999755853713895, - "loss": 46.0, - "step": 29102 - }, - { - "epoch": 2.2251275875910315, - "grad_norm": 0.0004602540866471827, - "learning_rate": 0.00019999755836929247, - "loss": 46.0, - "step": 29103 - }, - { - "epoch": 2.2252040445744212, - "grad_norm": 0.0018893477972596884, - "learning_rate": 0.00019999755820144022, - "loss": 46.0, - "step": 29104 - }, - { - "epoch": 2.225280501557811, - "grad_norm": 0.0012615799205377698, - "learning_rate": 0.00019999755803358222, - "loss": 46.0, - "step": 29105 - }, - { - "epoch": 2.2253569585412007, - "grad_norm": 0.002261328510940075, - "learning_rate": 0.0001999975578657184, - "loss": 46.0, - "step": 29106 - }, - { - "epoch": 2.2254334155245905, - "grad_norm": 0.000585398986004293, - "learning_rate": 0.00019999755769784888, - "loss": 46.0, - "step": 29107 - }, - { - "epoch": 2.2255098725079803, - "grad_norm": 0.003862884361296892, - "learning_rate": 0.00019999755752997357, - "loss": 46.0, - "step": 29108 - }, - { - "epoch": 2.22558632949137, - "grad_norm": 0.0008283432689495385, - "learning_rate": 0.00019999755736209245, - "loss": 46.0, - "step": 29109 - }, - { - "epoch": 2.22566278647476, - "grad_norm": 0.0007984499097801745, - "learning_rate": 0.00019999755719420556, - "loss": 46.0, - "step": 29110 - }, - { - "epoch": 2.2257392434581496, - "grad_norm": 0.0005845974083058536, - "learning_rate": 0.00019999755702631292, - "loss": 46.0, - "step": 29111 - }, - { - "epoch": 2.2258157004415393, - "grad_norm": 0.0010624047135934234, - "learning_rate": 0.00019999755685841454, - "loss": 46.0, - "step": 29112 - }, - { - "epoch": 2.2258921574249286, - "grad_norm": 0.0006716938805766404, - "learning_rate": 0.00019999755669051036, - "loss": 46.0, - "step": 29113 - }, - { - "epoch": 2.2259686144083184, - "grad_norm": 0.00025363240274600685, - "learning_rate": 0.00019999755652260043, - "loss": 46.0, - "step": 29114 - }, - { - "epoch": 2.226045071391708, - "grad_norm": 0.0028277297969907522, - "learning_rate": 0.00019999755635468472, - "loss": 46.0, - "step": 29115 - }, - { - "epoch": 2.226121528375098, - "grad_norm": 0.0010492609580978751, - "learning_rate": 0.00019999755618676325, - "loss": 46.0, - "step": 29116 - }, - { - "epoch": 2.2261979853584877, - "grad_norm": 0.0014806295512244105, - "learning_rate": 0.000199997556018836, - "loss": 46.0, - "step": 29117 - }, - { - "epoch": 2.2262744423418774, - "grad_norm": 0.0009304140694439411, - "learning_rate": 0.00019999755585090297, - "loss": 46.0, - "step": 29118 - }, - { - "epoch": 2.226350899325267, - "grad_norm": 0.0007762485765852034, - "learning_rate": 0.00019999755568296418, - "loss": 46.0, - "step": 29119 - }, - { - "epoch": 2.226427356308657, - "grad_norm": 0.0019231531769037247, - "learning_rate": 0.0001999975555150196, - "loss": 46.0, - "step": 29120 - }, - { - "epoch": 2.2265038132920467, - "grad_norm": 0.0030659798067063093, - "learning_rate": 0.0001999975553470693, - "loss": 46.0, - "step": 29121 - }, - { - "epoch": 2.2265802702754365, - "grad_norm": 0.0014600735157728195, - "learning_rate": 0.0001999975551791132, - "loss": 46.0, - "step": 29122 - }, - { - "epoch": 2.226656727258826, - "grad_norm": 0.0010399551829323173, - "learning_rate": 0.00019999755501115134, - "loss": 46.0, - "step": 29123 - }, - { - "epoch": 2.2267331842422156, - "grad_norm": 0.00101527851074934, - "learning_rate": 0.0001999975548431837, - "loss": 46.0, - "step": 29124 - }, - { - "epoch": 2.2268096412256053, - "grad_norm": 0.0012097390135750175, - "learning_rate": 0.0001999975546752103, - "loss": 46.0, - "step": 29125 - }, - { - "epoch": 2.226886098208995, - "grad_norm": 0.0029957378283143044, - "learning_rate": 0.0001999975545072311, - "loss": 46.0, - "step": 29126 - }, - { - "epoch": 2.226962555192385, - "grad_norm": 0.003678811015561223, - "learning_rate": 0.00019999755433924618, - "loss": 46.0, - "step": 29127 - }, - { - "epoch": 2.2270390121757746, - "grad_norm": 0.0009390611667186022, - "learning_rate": 0.00019999755417125548, - "loss": 46.0, - "step": 29128 - }, - { - "epoch": 2.2271154691591644, - "grad_norm": 0.0006514430278912187, - "learning_rate": 0.000199997554003259, - "loss": 46.0, - "step": 29129 - }, - { - "epoch": 2.227191926142554, - "grad_norm": 0.0013060710625723004, - "learning_rate": 0.00019999755383525673, - "loss": 46.0, - "step": 29130 - }, - { - "epoch": 2.227268383125944, - "grad_norm": 0.0007545781554654241, - "learning_rate": 0.0001999975536672487, - "loss": 46.0, - "step": 29131 - }, - { - "epoch": 2.2273448401093336, - "grad_norm": 0.0017160180723294616, - "learning_rate": 0.00019999755349923493, - "loss": 46.0, - "step": 29132 - }, - { - "epoch": 2.2274212970927234, - "grad_norm": 0.0004708092601504177, - "learning_rate": 0.00019999755333121536, - "loss": 46.0, - "step": 29133 - }, - { - "epoch": 2.2274977540761127, - "grad_norm": 0.00128403480630368, - "learning_rate": 0.00019999755316319005, - "loss": 46.0, - "step": 29134 - }, - { - "epoch": 2.2275742110595025, - "grad_norm": 0.0048139686696231365, - "learning_rate": 0.00019999755299515893, - "loss": 46.0, - "step": 29135 - }, - { - "epoch": 2.2276506680428922, - "grad_norm": 0.0017914820928126574, - "learning_rate": 0.00019999755282712207, - "loss": 46.0, - "step": 29136 - }, - { - "epoch": 2.227727125026282, - "grad_norm": 0.0016948640113696456, - "learning_rate": 0.00019999755265907943, - "loss": 46.0, - "step": 29137 - }, - { - "epoch": 2.2278035820096718, - "grad_norm": 0.0007627106970176101, - "learning_rate": 0.00019999755249103105, - "loss": 46.0, - "step": 29138 - }, - { - "epoch": 2.2278800389930615, - "grad_norm": 0.0066308495588600636, - "learning_rate": 0.00019999755232297687, - "loss": 46.0, - "step": 29139 - }, - { - "epoch": 2.2279564959764513, - "grad_norm": 0.0007879217155277729, - "learning_rate": 0.00019999755215491694, - "loss": 46.0, - "step": 29140 - }, - { - "epoch": 2.228032952959841, - "grad_norm": 0.0016604687552899122, - "learning_rate": 0.00019999755198685118, - "loss": 46.0, - "step": 29141 - }, - { - "epoch": 2.228109409943231, - "grad_norm": 0.005728233605623245, - "learning_rate": 0.00019999755181877973, - "loss": 46.0, - "step": 29142 - }, - { - "epoch": 2.2281858669266206, - "grad_norm": 0.0014835888287052512, - "learning_rate": 0.00019999755165070248, - "loss": 46.0, - "step": 29143 - }, - { - "epoch": 2.2282623239100103, - "grad_norm": 0.0017513331258669496, - "learning_rate": 0.00019999755148261946, - "loss": 46.0, - "step": 29144 - }, - { - "epoch": 2.2283387808933997, - "grad_norm": 0.0019309240160509944, - "learning_rate": 0.00019999755131453067, - "loss": 46.0, - "step": 29145 - }, - { - "epoch": 2.2284152378767894, - "grad_norm": 0.0015076907584443688, - "learning_rate": 0.0001999975511464361, - "loss": 46.0, - "step": 29146 - }, - { - "epoch": 2.228491694860179, - "grad_norm": 0.002651678863912821, - "learning_rate": 0.00019999755097833578, - "loss": 46.0, - "step": 29147 - }, - { - "epoch": 2.228568151843569, - "grad_norm": 0.00253850850276649, - "learning_rate": 0.0001999975508102297, - "loss": 46.0, - "step": 29148 - }, - { - "epoch": 2.2286446088269587, - "grad_norm": 0.001062647090293467, - "learning_rate": 0.0001999975506421178, - "loss": 46.0, - "step": 29149 - }, - { - "epoch": 2.2287210658103485, - "grad_norm": 0.0008598386775702238, - "learning_rate": 0.00019999755047400017, - "loss": 46.0, - "step": 29150 - }, - { - "epoch": 2.228797522793738, - "grad_norm": 0.0016063852235674858, - "learning_rate": 0.00019999755030587676, - "loss": 46.0, - "step": 29151 - }, - { - "epoch": 2.228873979777128, - "grad_norm": 0.0008654667180962861, - "learning_rate": 0.0001999975501377476, - "loss": 46.0, - "step": 29152 - }, - { - "epoch": 2.2289504367605177, - "grad_norm": 0.0009056190610863268, - "learning_rate": 0.00019999754996961265, - "loss": 46.0, - "step": 29153 - }, - { - "epoch": 2.2290268937439075, - "grad_norm": 0.0034059497993439436, - "learning_rate": 0.00019999754980147194, - "loss": 46.0, - "step": 29154 - }, - { - "epoch": 2.2291033507272973, - "grad_norm": 0.0016089368145912886, - "learning_rate": 0.00019999754963332544, - "loss": 46.0, - "step": 29155 - }, - { - "epoch": 2.2291798077106866, - "grad_norm": 0.0010404606582596898, - "learning_rate": 0.0001999975494651732, - "loss": 46.0, - "step": 29156 - }, - { - "epoch": 2.2292562646940763, - "grad_norm": 0.0008481297409161925, - "learning_rate": 0.00019999754929701517, - "loss": 46.0, - "step": 29157 - }, - { - "epoch": 2.229332721677466, - "grad_norm": 0.0007183749112300575, - "learning_rate": 0.00019999754912885138, - "loss": 46.0, - "step": 29158 - }, - { - "epoch": 2.229409178660856, - "grad_norm": 0.0015572463162243366, - "learning_rate": 0.0001999975489606818, - "loss": 46.0, - "step": 29159 - }, - { - "epoch": 2.2294856356442456, - "grad_norm": 0.001245118328370154, - "learning_rate": 0.0001999975487925065, - "loss": 46.0, - "step": 29160 - }, - { - "epoch": 2.2295620926276354, - "grad_norm": 0.001970109064131975, - "learning_rate": 0.00019999754862432538, - "loss": 46.0, - "step": 29161 - }, - { - "epoch": 2.229638549611025, - "grad_norm": 0.0010958451312035322, - "learning_rate": 0.0001999975484561385, - "loss": 46.0, - "step": 29162 - }, - { - "epoch": 2.229715006594415, - "grad_norm": 0.0012278254143893719, - "learning_rate": 0.00019999754828794588, - "loss": 46.0, - "step": 29163 - }, - { - "epoch": 2.2297914635778047, - "grad_norm": 0.0021249623969197273, - "learning_rate": 0.00019999754811974747, - "loss": 46.0, - "step": 29164 - }, - { - "epoch": 2.2298679205611944, - "grad_norm": 0.0006810056511312723, - "learning_rate": 0.0001999975479515433, - "loss": 46.0, - "step": 29165 - }, - { - "epoch": 2.2299443775445837, - "grad_norm": 0.0012682306114584208, - "learning_rate": 0.00019999754778333336, - "loss": 46.0, - "step": 29166 - }, - { - "epoch": 2.2300208345279735, - "grad_norm": 0.001094619045034051, - "learning_rate": 0.00019999754761511763, - "loss": 46.0, - "step": 29167 - }, - { - "epoch": 2.2300972915113633, - "grad_norm": 0.0009834646480157971, - "learning_rate": 0.00019999754744689615, - "loss": 46.0, - "step": 29168 - }, - { - "epoch": 2.230173748494753, - "grad_norm": 0.007009634282439947, - "learning_rate": 0.0001999975472786689, - "loss": 46.0, - "step": 29169 - }, - { - "epoch": 2.230250205478143, - "grad_norm": 0.0008895605569705367, - "learning_rate": 0.00019999754711043588, - "loss": 46.0, - "step": 29170 - }, - { - "epoch": 2.2303266624615325, - "grad_norm": 0.003990990109741688, - "learning_rate": 0.0001999975469421971, - "loss": 46.0, - "step": 29171 - }, - { - "epoch": 2.2304031194449223, - "grad_norm": 0.0022045692894607782, - "learning_rate": 0.0001999975467739525, - "loss": 46.0, - "step": 29172 - }, - { - "epoch": 2.230479576428312, - "grad_norm": 0.0006977233570069075, - "learning_rate": 0.0001999975466057022, - "loss": 46.0, - "step": 29173 - }, - { - "epoch": 2.230556033411702, - "grad_norm": 0.000602979795075953, - "learning_rate": 0.0001999975464374461, - "loss": 46.0, - "step": 29174 - }, - { - "epoch": 2.2306324903950916, - "grad_norm": 0.0009595765150152147, - "learning_rate": 0.00019999754626918423, - "loss": 46.0, - "step": 29175 - }, - { - "epoch": 2.2307089473784814, - "grad_norm": 0.0008591891964897513, - "learning_rate": 0.00019999754610091657, - "loss": 46.0, - "step": 29176 - }, - { - "epoch": 2.230785404361871, - "grad_norm": 0.0005220207385718822, - "learning_rate": 0.00019999754593264318, - "loss": 46.0, - "step": 29177 - }, - { - "epoch": 2.2308618613452604, - "grad_norm": 0.001243693521246314, - "learning_rate": 0.000199997545764364, - "loss": 46.0, - "step": 29178 - }, - { - "epoch": 2.23093831832865, - "grad_norm": 0.002552089747041464, - "learning_rate": 0.00019999754559607907, - "loss": 46.0, - "step": 29179 - }, - { - "epoch": 2.23101477531204, - "grad_norm": 0.0009439396671950817, - "learning_rate": 0.00019999754542778835, - "loss": 46.0, - "step": 29180 - }, - { - "epoch": 2.2310912322954297, - "grad_norm": 0.0016397950239479542, - "learning_rate": 0.00019999754525949187, - "loss": 46.0, - "step": 29181 - }, - { - "epoch": 2.2311676892788195, - "grad_norm": 0.00040711203473620117, - "learning_rate": 0.0001999975450911896, - "loss": 46.0, - "step": 29182 - }, - { - "epoch": 2.2312441462622092, - "grad_norm": 0.0015877274563536048, - "learning_rate": 0.00019999754492288157, - "loss": 46.0, - "step": 29183 - }, - { - "epoch": 2.231320603245599, - "grad_norm": 0.0008642931352369487, - "learning_rate": 0.00019999754475456775, - "loss": 46.0, - "step": 29184 - }, - { - "epoch": 2.2313970602289888, - "grad_norm": 0.0043951366096735, - "learning_rate": 0.0001999975445862482, - "loss": 46.0, - "step": 29185 - }, - { - "epoch": 2.2314735172123785, - "grad_norm": 0.0015020916471257806, - "learning_rate": 0.00019999754441792287, - "loss": 46.0, - "step": 29186 - }, - { - "epoch": 2.2315499741957683, - "grad_norm": 0.00326914363540709, - "learning_rate": 0.00019999754424959178, - "loss": 46.0, - "step": 29187 - }, - { - "epoch": 2.2316264311791576, - "grad_norm": 0.0157815869897604, - "learning_rate": 0.00019999754408125492, - "loss": 46.0, - "step": 29188 - }, - { - "epoch": 2.2317028881625474, - "grad_norm": 0.0005216411082074046, - "learning_rate": 0.00019999754391291226, - "loss": 46.0, - "step": 29189 - }, - { - "epoch": 2.231779345145937, - "grad_norm": 0.0023198905400931835, - "learning_rate": 0.00019999754374456385, - "loss": 46.0, - "step": 29190 - }, - { - "epoch": 2.231855802129327, - "grad_norm": 0.0006171483546495438, - "learning_rate": 0.00019999754357620967, - "loss": 46.0, - "step": 29191 - }, - { - "epoch": 2.2319322591127166, - "grad_norm": 0.001198531361296773, - "learning_rate": 0.00019999754340784974, - "loss": 46.0, - "step": 29192 - }, - { - "epoch": 2.2320087160961064, - "grad_norm": 0.0006718302611261606, - "learning_rate": 0.000199997543239484, - "loss": 46.0, - "step": 29193 - }, - { - "epoch": 2.232085173079496, - "grad_norm": 0.0009702850366011262, - "learning_rate": 0.0001999975430711125, - "loss": 46.0, - "step": 29194 - }, - { - "epoch": 2.232161630062886, - "grad_norm": 0.0006308432784862816, - "learning_rate": 0.00019999754290273526, - "loss": 46.0, - "step": 29195 - }, - { - "epoch": 2.2322380870462757, - "grad_norm": 0.0014636690029874444, - "learning_rate": 0.00019999754273435224, - "loss": 46.0, - "step": 29196 - }, - { - "epoch": 2.2323145440296654, - "grad_norm": 0.0006831357604824007, - "learning_rate": 0.00019999754256596345, - "loss": 46.0, - "step": 29197 - }, - { - "epoch": 2.232391001013055, - "grad_norm": 0.0012689635623246431, - "learning_rate": 0.0001999975423975689, - "loss": 46.0, - "step": 29198 - }, - { - "epoch": 2.232467457996445, - "grad_norm": 0.0027095209807157516, - "learning_rate": 0.00019999754222916856, - "loss": 46.0, - "step": 29199 - }, - { - "epoch": 2.2325439149798343, - "grad_norm": 0.0010933097219094634, - "learning_rate": 0.00019999754206076245, - "loss": 46.0, - "step": 29200 - }, - { - "epoch": 2.232620371963224, - "grad_norm": 0.003304373938590288, - "learning_rate": 0.0001999975418923506, - "loss": 46.0, - "step": 29201 - }, - { - "epoch": 2.232696828946614, - "grad_norm": 0.004600857384502888, - "learning_rate": 0.00019999754172393295, - "loss": 46.0, - "step": 29202 - }, - { - "epoch": 2.2327732859300036, - "grad_norm": 0.0017135116504505277, - "learning_rate": 0.00019999754155550955, - "loss": 46.0, - "step": 29203 - }, - { - "epoch": 2.2328497429133933, - "grad_norm": 0.0021816326770931482, - "learning_rate": 0.00019999754138708036, - "loss": 46.0, - "step": 29204 - }, - { - "epoch": 2.232926199896783, - "grad_norm": 0.0009967110818251967, - "learning_rate": 0.0001999975412186454, - "loss": 46.0, - "step": 29205 - }, - { - "epoch": 2.233002656880173, - "grad_norm": 0.0008666834910400212, - "learning_rate": 0.00019999754105020468, - "loss": 46.0, - "step": 29206 - }, - { - "epoch": 2.2330791138635626, - "grad_norm": 0.0010114125907421112, - "learning_rate": 0.00019999754088175818, - "loss": 46.0, - "step": 29207 - }, - { - "epoch": 2.2331555708469524, - "grad_norm": 0.001203992054797709, - "learning_rate": 0.00019999754071330593, - "loss": 46.0, - "step": 29208 - }, - { - "epoch": 2.233232027830342, - "grad_norm": 0.0034374005626887083, - "learning_rate": 0.00019999754054484789, - "loss": 46.0, - "step": 29209 - }, - { - "epoch": 2.2333084848137315, - "grad_norm": 0.0025958274491131306, - "learning_rate": 0.00019999754037638412, - "loss": 46.0, - "step": 29210 - }, - { - "epoch": 2.233384941797121, - "grad_norm": 0.00039328786078840494, - "learning_rate": 0.00019999754020791455, - "loss": 46.0, - "step": 29211 - }, - { - "epoch": 2.233461398780511, - "grad_norm": 0.0014856280758976936, - "learning_rate": 0.0001999975400394392, - "loss": 46.0, - "step": 29212 - }, - { - "epoch": 2.2335378557639007, - "grad_norm": 0.005227528512477875, - "learning_rate": 0.0001999975398709581, - "loss": 46.0, - "step": 29213 - }, - { - "epoch": 2.2336143127472905, - "grad_norm": 0.000613532611168921, - "learning_rate": 0.00019999753970247123, - "loss": 46.0, - "step": 29214 - }, - { - "epoch": 2.2336907697306803, - "grad_norm": 0.0010660778498277068, - "learning_rate": 0.0001999975395339786, - "loss": 46.0, - "step": 29215 - }, - { - "epoch": 2.23376722671407, - "grad_norm": 0.0008584194001741707, - "learning_rate": 0.0001999975393654802, - "loss": 46.0, - "step": 29216 - }, - { - "epoch": 2.23384368369746, - "grad_norm": 0.0007386516663245857, - "learning_rate": 0.00019999753919697598, - "loss": 46.0, - "step": 29217 - }, - { - "epoch": 2.2339201406808495, - "grad_norm": 0.001097518834285438, - "learning_rate": 0.00019999753902846606, - "loss": 46.0, - "step": 29218 - }, - { - "epoch": 2.2339965976642393, - "grad_norm": 0.0004880745545960963, - "learning_rate": 0.00019999753885995033, - "loss": 46.0, - "step": 29219 - }, - { - "epoch": 2.234073054647629, - "grad_norm": 0.0006976108998060226, - "learning_rate": 0.00019999753869142883, - "loss": 46.0, - "step": 29220 - }, - { - "epoch": 2.234149511631019, - "grad_norm": 0.0009242414962500334, - "learning_rate": 0.0001999975385229016, - "loss": 46.0, - "step": 29221 - }, - { - "epoch": 2.234225968614408, - "grad_norm": 0.0010113274911418557, - "learning_rate": 0.00019999753835436853, - "loss": 46.0, - "step": 29222 - }, - { - "epoch": 2.234302425597798, - "grad_norm": 0.0007292842492461205, - "learning_rate": 0.00019999753818582977, - "loss": 46.0, - "step": 29223 - }, - { - "epoch": 2.2343788825811877, - "grad_norm": 0.0007697870023548603, - "learning_rate": 0.0001999975380172852, - "loss": 46.0, - "step": 29224 - }, - { - "epoch": 2.2344553395645774, - "grad_norm": 0.0007101548253558576, - "learning_rate": 0.00019999753784873489, - "loss": 46.0, - "step": 29225 - }, - { - "epoch": 2.234531796547967, - "grad_norm": 0.002976537449285388, - "learning_rate": 0.00019999753768017874, - "loss": 46.0, - "step": 29226 - }, - { - "epoch": 2.234608253531357, - "grad_norm": 0.0007559265941381454, - "learning_rate": 0.00019999753751161688, - "loss": 46.0, - "step": 29227 - }, - { - "epoch": 2.2346847105147467, - "grad_norm": 0.0008199620060622692, - "learning_rate": 0.00019999753734304925, - "loss": 46.0, - "step": 29228 - }, - { - "epoch": 2.2347611674981365, - "grad_norm": 0.0014298021560534835, - "learning_rate": 0.00019999753717447582, - "loss": 46.0, - "step": 29229 - }, - { - "epoch": 2.2348376244815262, - "grad_norm": 0.004141592886298895, - "learning_rate": 0.00019999753700589664, - "loss": 46.0, - "step": 29230 - }, - { - "epoch": 2.234914081464916, - "grad_norm": 0.003073490224778652, - "learning_rate": 0.0001999975368373117, - "loss": 46.0, - "step": 29231 - }, - { - "epoch": 2.2349905384483053, - "grad_norm": 0.0032897889614105225, - "learning_rate": 0.00019999753666872095, - "loss": 46.0, - "step": 29232 - }, - { - "epoch": 2.235066995431695, - "grad_norm": 0.0016638361848890781, - "learning_rate": 0.00019999753650012448, - "loss": 46.0, - "step": 29233 - }, - { - "epoch": 2.235143452415085, - "grad_norm": 0.00111988908611238, - "learning_rate": 0.0001999975363315222, - "loss": 46.0, - "step": 29234 - }, - { - "epoch": 2.2352199093984746, - "grad_norm": 0.00046848802594468, - "learning_rate": 0.00019999753616291419, - "loss": 46.0, - "step": 29235 - }, - { - "epoch": 2.2352963663818644, - "grad_norm": 0.0004590329772327095, - "learning_rate": 0.0001999975359943004, - "loss": 46.0, - "step": 29236 - }, - { - "epoch": 2.235372823365254, - "grad_norm": 0.0006543307099491358, - "learning_rate": 0.00019999753582568083, - "loss": 46.0, - "step": 29237 - }, - { - "epoch": 2.235449280348644, - "grad_norm": 0.0008996034739539027, - "learning_rate": 0.0001999975356570555, - "loss": 46.0, - "step": 29238 - }, - { - "epoch": 2.2355257373320336, - "grad_norm": 0.00228469748981297, - "learning_rate": 0.0001999975354884244, - "loss": 46.0, - "step": 29239 - }, - { - "epoch": 2.2356021943154234, - "grad_norm": 0.00202162005007267, - "learning_rate": 0.0001999975353197875, - "loss": 46.0, - "step": 29240 - }, - { - "epoch": 2.235678651298813, - "grad_norm": 0.000999077339656651, - "learning_rate": 0.00019999753515114485, - "loss": 46.0, - "step": 29241 - }, - { - "epoch": 2.235755108282203, - "grad_norm": 0.0009139621397480369, - "learning_rate": 0.00019999753498249644, - "loss": 46.0, - "step": 29242 - }, - { - "epoch": 2.2358315652655927, - "grad_norm": 0.0007274738745763898, - "learning_rate": 0.00019999753481384226, - "loss": 46.0, - "step": 29243 - }, - { - "epoch": 2.235908022248982, - "grad_norm": 0.0013336505508050323, - "learning_rate": 0.0001999975346451823, - "loss": 46.0, - "step": 29244 - }, - { - "epoch": 2.2359844792323718, - "grad_norm": 0.0009994828142225742, - "learning_rate": 0.00019999753447651658, - "loss": 46.0, - "step": 29245 - }, - { - "epoch": 2.2360609362157615, - "grad_norm": 0.00039548170752823353, - "learning_rate": 0.0001999975343078451, - "loss": 46.0, - "step": 29246 - }, - { - "epoch": 2.2361373931991513, - "grad_norm": 0.0018002608558163047, - "learning_rate": 0.00019999753413916784, - "loss": 46.0, - "step": 29247 - }, - { - "epoch": 2.236213850182541, - "grad_norm": 0.0008931010379455984, - "learning_rate": 0.00019999753397048482, - "loss": 46.0, - "step": 29248 - }, - { - "epoch": 2.236290307165931, - "grad_norm": 0.0009544584318064153, - "learning_rate": 0.000199997533801796, - "loss": 46.0, - "step": 29249 - }, - { - "epoch": 2.2363667641493206, - "grad_norm": 0.0008065239526331425, - "learning_rate": 0.00019999753363310146, - "loss": 46.0, - "step": 29250 - }, - { - "epoch": 2.2364432211327103, - "grad_norm": 0.0009010698995552957, - "learning_rate": 0.00019999753346440112, - "loss": 46.0, - "step": 29251 - }, - { - "epoch": 2.2365196781161, - "grad_norm": 0.002153923502191901, - "learning_rate": 0.00019999753329569503, - "loss": 46.0, - "step": 29252 - }, - { - "epoch": 2.23659613509949, - "grad_norm": 0.0018601641058921814, - "learning_rate": 0.00019999753312698311, - "loss": 46.0, - "step": 29253 - }, - { - "epoch": 2.236672592082879, - "grad_norm": 0.0007505971589125693, - "learning_rate": 0.00019999753295826545, - "loss": 46.0, - "step": 29254 - }, - { - "epoch": 2.236749049066269, - "grad_norm": 0.0007172297919169068, - "learning_rate": 0.00019999753278954207, - "loss": 46.0, - "step": 29255 - }, - { - "epoch": 2.2368255060496587, - "grad_norm": 0.0005753936129622161, - "learning_rate": 0.00019999753262081287, - "loss": 46.0, - "step": 29256 - }, - { - "epoch": 2.2369019630330484, - "grad_norm": 0.000982632045634091, - "learning_rate": 0.00019999753245207794, - "loss": 46.0, - "step": 29257 - }, - { - "epoch": 2.236978420016438, - "grad_norm": 0.0021230040583759546, - "learning_rate": 0.00019999753228333722, - "loss": 46.0, - "step": 29258 - }, - { - "epoch": 2.237054876999828, - "grad_norm": 0.0005564973107539117, - "learning_rate": 0.00019999753211459071, - "loss": 46.0, - "step": 29259 - }, - { - "epoch": 2.2371313339832177, - "grad_norm": 0.0007678851834498346, - "learning_rate": 0.00019999753194583847, - "loss": 46.0, - "step": 29260 - }, - { - "epoch": 2.2372077909666075, - "grad_norm": 0.0006714258925057948, - "learning_rate": 0.00019999753177708042, - "loss": 46.0, - "step": 29261 - }, - { - "epoch": 2.2372842479499973, - "grad_norm": 0.0012710849987342954, - "learning_rate": 0.00019999753160831663, - "loss": 46.0, - "step": 29262 - }, - { - "epoch": 2.237360704933387, - "grad_norm": 0.0015369082102552056, - "learning_rate": 0.00019999753143954706, - "loss": 46.0, - "step": 29263 - }, - { - "epoch": 2.2374371619167768, - "grad_norm": 0.0006091243121773005, - "learning_rate": 0.00019999753127077172, - "loss": 46.0, - "step": 29264 - }, - { - "epoch": 2.237513618900166, - "grad_norm": 0.0011071724584326148, - "learning_rate": 0.0001999975311019906, - "loss": 46.0, - "step": 29265 - }, - { - "epoch": 2.237590075883556, - "grad_norm": 0.0009000373538583517, - "learning_rate": 0.00019999753093320372, - "loss": 46.0, - "step": 29266 - }, - { - "epoch": 2.2376665328669456, - "grad_norm": 0.0009503897745162249, - "learning_rate": 0.0001999975307644111, - "loss": 46.0, - "step": 29267 - }, - { - "epoch": 2.2377429898503354, - "grad_norm": 0.0017044448759406805, - "learning_rate": 0.00019999753059561268, - "loss": 46.0, - "step": 29268 - }, - { - "epoch": 2.237819446833725, - "grad_norm": 0.003015250666067004, - "learning_rate": 0.00019999753042680848, - "loss": 46.0, - "step": 29269 - }, - { - "epoch": 2.237895903817115, - "grad_norm": 0.0005690613761544228, - "learning_rate": 0.00019999753025799852, - "loss": 46.0, - "step": 29270 - }, - { - "epoch": 2.2379723608005047, - "grad_norm": 0.0005843983963131905, - "learning_rate": 0.0001999975300891828, - "loss": 46.0, - "step": 29271 - }, - { - "epoch": 2.2380488177838944, - "grad_norm": 0.00123932387214154, - "learning_rate": 0.00019999752992036132, - "loss": 46.0, - "step": 29272 - }, - { - "epoch": 2.238125274767284, - "grad_norm": 0.0008801360381767154, - "learning_rate": 0.00019999752975153405, - "loss": 46.0, - "step": 29273 - }, - { - "epoch": 2.238201731750674, - "grad_norm": 0.005004067439585924, - "learning_rate": 0.000199997529582701, - "loss": 46.0, - "step": 29274 - }, - { - "epoch": 2.2382781887340637, - "grad_norm": 0.0057562850415706635, - "learning_rate": 0.0001999975294138622, - "loss": 46.0, - "step": 29275 - }, - { - "epoch": 2.238354645717453, - "grad_norm": 0.0020179220009595156, - "learning_rate": 0.00019999752924501765, - "loss": 46.0, - "step": 29276 - }, - { - "epoch": 2.238431102700843, - "grad_norm": 0.0020850191358476877, - "learning_rate": 0.0001999975290761673, - "loss": 46.0, - "step": 29277 - }, - { - "epoch": 2.2385075596842325, - "grad_norm": 0.0006886178161948919, - "learning_rate": 0.0001999975289073112, - "loss": 46.0, - "step": 29278 - }, - { - "epoch": 2.2385840166676223, - "grad_norm": 0.0004178480012342334, - "learning_rate": 0.00019999752873844933, - "loss": 46.0, - "step": 29279 - }, - { - "epoch": 2.238660473651012, - "grad_norm": 0.0009546743240207434, - "learning_rate": 0.00019999752856958167, - "loss": 46.0, - "step": 29280 - }, - { - "epoch": 2.238736930634402, - "grad_norm": 0.004087148234248161, - "learning_rate": 0.00019999752840070824, - "loss": 46.0, - "step": 29281 - }, - { - "epoch": 2.2388133876177916, - "grad_norm": 0.0008135377429425716, - "learning_rate": 0.00019999752823182906, - "loss": 46.0, - "step": 29282 - }, - { - "epoch": 2.2388898446011813, - "grad_norm": 0.0008384595275856555, - "learning_rate": 0.0001999975280629441, - "loss": 46.0, - "step": 29283 - }, - { - "epoch": 2.238966301584571, - "grad_norm": 0.0008722853963263333, - "learning_rate": 0.00019999752789405338, - "loss": 46.0, - "step": 29284 - }, - { - "epoch": 2.239042758567961, - "grad_norm": 0.0008590986835770309, - "learning_rate": 0.00019999752772515688, - "loss": 46.0, - "step": 29285 - }, - { - "epoch": 2.2391192155513506, - "grad_norm": 0.0006694166222587228, - "learning_rate": 0.00019999752755625464, - "loss": 46.0, - "step": 29286 - }, - { - "epoch": 2.23919567253474, - "grad_norm": 0.0012403350556269288, - "learning_rate": 0.0001999975273873466, - "loss": 46.0, - "step": 29287 - }, - { - "epoch": 2.2392721295181297, - "grad_norm": 0.0006181875360198319, - "learning_rate": 0.0001999975272184328, - "loss": 46.0, - "step": 29288 - }, - { - "epoch": 2.2393485865015195, - "grad_norm": 0.0008266624645330012, - "learning_rate": 0.00019999752704951323, - "loss": 46.0, - "step": 29289 - }, - { - "epoch": 2.2394250434849092, - "grad_norm": 0.002642162377014756, - "learning_rate": 0.0001999975268805879, - "loss": 46.0, - "step": 29290 - }, - { - "epoch": 2.239501500468299, - "grad_norm": 0.0012388363247737288, - "learning_rate": 0.00019999752671165676, - "loss": 46.0, - "step": 29291 - }, - { - "epoch": 2.2395779574516888, - "grad_norm": 0.0012843264266848564, - "learning_rate": 0.0001999975265427199, - "loss": 46.0, - "step": 29292 - }, - { - "epoch": 2.2396544144350785, - "grad_norm": 0.0009894461836665869, - "learning_rate": 0.00019999752637377724, - "loss": 46.0, - "step": 29293 - }, - { - "epoch": 2.2397308714184683, - "grad_norm": 0.0005327691906131804, - "learning_rate": 0.00019999752620482883, - "loss": 46.0, - "step": 29294 - }, - { - "epoch": 2.239807328401858, - "grad_norm": 0.000824386312160641, - "learning_rate": 0.00019999752603587463, - "loss": 46.0, - "step": 29295 - }, - { - "epoch": 2.239883785385248, - "grad_norm": 0.001715023652650416, - "learning_rate": 0.00019999752586691467, - "loss": 46.0, - "step": 29296 - }, - { - "epoch": 2.239960242368637, - "grad_norm": 0.0012950344244018197, - "learning_rate": 0.00019999752569794897, - "loss": 46.0, - "step": 29297 - }, - { - "epoch": 2.240036699352027, - "grad_norm": 0.0022716624662280083, - "learning_rate": 0.00019999752552897748, - "loss": 46.0, - "step": 29298 - }, - { - "epoch": 2.2401131563354166, - "grad_norm": 0.0012147424276918173, - "learning_rate": 0.0001999975253600002, - "loss": 46.0, - "step": 29299 - }, - { - "epoch": 2.2401896133188064, - "grad_norm": 0.0008777669863775373, - "learning_rate": 0.00019999752519101716, - "loss": 46.0, - "step": 29300 - }, - { - "epoch": 2.240266070302196, - "grad_norm": 0.000599419348873198, - "learning_rate": 0.00019999752502202836, - "loss": 46.0, - "step": 29301 - }, - { - "epoch": 2.240342527285586, - "grad_norm": 0.0009029298089444637, - "learning_rate": 0.00019999752485303377, - "loss": 46.0, - "step": 29302 - }, - { - "epoch": 2.2404189842689757, - "grad_norm": 0.001753746997565031, - "learning_rate": 0.00019999752468403346, - "loss": 46.0, - "step": 29303 - }, - { - "epoch": 2.2404954412523654, - "grad_norm": 0.0020846514962613583, - "learning_rate": 0.00019999752451502735, - "loss": 46.0, - "step": 29304 - }, - { - "epoch": 2.240571898235755, - "grad_norm": 0.0007304241880774498, - "learning_rate": 0.00019999752434601546, - "loss": 46.0, - "step": 29305 - }, - { - "epoch": 2.240648355219145, - "grad_norm": 0.003506737295538187, - "learning_rate": 0.0001999975241769978, - "loss": 46.0, - "step": 29306 - }, - { - "epoch": 2.2407248122025347, - "grad_norm": 0.001534824026748538, - "learning_rate": 0.00019999752400797437, - "loss": 46.0, - "step": 29307 - }, - { - "epoch": 2.2408012691859245, - "grad_norm": 0.0008726065279915929, - "learning_rate": 0.0001999975238389452, - "loss": 46.0, - "step": 29308 - }, - { - "epoch": 2.240877726169314, - "grad_norm": 0.0010125860571861267, - "learning_rate": 0.00019999752366991024, - "loss": 46.0, - "step": 29309 - }, - { - "epoch": 2.2409541831527036, - "grad_norm": 0.004232482519000769, - "learning_rate": 0.00019999752350086954, - "loss": 46.0, - "step": 29310 - }, - { - "epoch": 2.2410306401360933, - "grad_norm": 0.002225664909929037, - "learning_rate": 0.00019999752333182302, - "loss": 46.0, - "step": 29311 - }, - { - "epoch": 2.241107097119483, - "grad_norm": 0.0006565770599991083, - "learning_rate": 0.00019999752316277074, - "loss": 46.0, - "step": 29312 - }, - { - "epoch": 2.241183554102873, - "grad_norm": 0.0008778393967077136, - "learning_rate": 0.00019999752299371273, - "loss": 46.0, - "step": 29313 - }, - { - "epoch": 2.2412600110862626, - "grad_norm": 0.0012631692225113511, - "learning_rate": 0.0001999975228246489, - "loss": 46.0, - "step": 29314 - }, - { - "epoch": 2.2413364680696524, - "grad_norm": 0.000767264689784497, - "learning_rate": 0.00019999752265557934, - "loss": 46.0, - "step": 29315 - }, - { - "epoch": 2.241412925053042, - "grad_norm": 0.0009724089177325368, - "learning_rate": 0.000199997522486504, - "loss": 46.0, - "step": 29316 - }, - { - "epoch": 2.241489382036432, - "grad_norm": 0.0004554348415695131, - "learning_rate": 0.0001999975223174229, - "loss": 46.0, - "step": 29317 - }, - { - "epoch": 2.2415658390198216, - "grad_norm": 0.0011803876841440797, - "learning_rate": 0.000199997522148336, - "loss": 46.0, - "step": 29318 - }, - { - "epoch": 2.241642296003211, - "grad_norm": 0.0009750897879712284, - "learning_rate": 0.00019999752197924335, - "loss": 46.0, - "step": 29319 - }, - { - "epoch": 2.2417187529866007, - "grad_norm": 0.00057305145310238, - "learning_rate": 0.00019999752181014494, - "loss": 46.0, - "step": 29320 - }, - { - "epoch": 2.2417952099699905, - "grad_norm": 0.0006997028249315917, - "learning_rate": 0.00019999752164104076, - "loss": 46.0, - "step": 29321 - }, - { - "epoch": 2.2418716669533802, - "grad_norm": 0.0007159091182984412, - "learning_rate": 0.00019999752147193078, - "loss": 46.0, - "step": 29322 - }, - { - "epoch": 2.24194812393677, - "grad_norm": 0.0004743227909784764, - "learning_rate": 0.00019999752130281506, - "loss": 46.0, - "step": 29323 - }, - { - "epoch": 2.2420245809201598, - "grad_norm": 0.0013039861805737019, - "learning_rate": 0.00019999752113369356, - "loss": 46.0, - "step": 29324 - }, - { - "epoch": 2.2421010379035495, - "grad_norm": 0.0015581886982545257, - "learning_rate": 0.0001999975209645663, - "loss": 46.0, - "step": 29325 - }, - { - "epoch": 2.2421774948869393, - "grad_norm": 0.0008446983410976827, - "learning_rate": 0.00019999752079543327, - "loss": 46.0, - "step": 29326 - }, - { - "epoch": 2.242253951870329, - "grad_norm": 0.001050122664310038, - "learning_rate": 0.00019999752062629445, - "loss": 46.0, - "step": 29327 - }, - { - "epoch": 2.242330408853719, - "grad_norm": 0.0005471666227094829, - "learning_rate": 0.0001999975204571499, - "loss": 46.0, - "step": 29328 - }, - { - "epoch": 2.2424068658371086, - "grad_norm": 0.003242134116590023, - "learning_rate": 0.00019999752028799955, - "loss": 46.0, - "step": 29329 - }, - { - "epoch": 2.2424833228204983, - "grad_norm": 0.001610635663382709, - "learning_rate": 0.00019999752011884344, - "loss": 46.0, - "step": 29330 - }, - { - "epoch": 2.2425597798038877, - "grad_norm": 0.009515034966170788, - "learning_rate": 0.00019999751994968155, - "loss": 46.0, - "step": 29331 - }, - { - "epoch": 2.2426362367872774, - "grad_norm": 0.0009892922826111317, - "learning_rate": 0.0001999975197805139, - "loss": 46.0, - "step": 29332 - }, - { - "epoch": 2.242712693770667, - "grad_norm": 0.0009540737955830991, - "learning_rate": 0.0001999975196113405, - "loss": 46.0, - "step": 29333 - }, - { - "epoch": 2.242789150754057, - "grad_norm": 0.0009146829834207892, - "learning_rate": 0.00019999751944216128, - "loss": 46.0, - "step": 29334 - }, - { - "epoch": 2.2428656077374467, - "grad_norm": 0.0017535848310217261, - "learning_rate": 0.0001999975192729763, - "loss": 46.0, - "step": 29335 - }, - { - "epoch": 2.2429420647208365, - "grad_norm": 0.003172148484736681, - "learning_rate": 0.00019999751910378558, - "loss": 46.0, - "step": 29336 - }, - { - "epoch": 2.243018521704226, - "grad_norm": 0.0013302986044436693, - "learning_rate": 0.00019999751893458908, - "loss": 46.0, - "step": 29337 - }, - { - "epoch": 2.243094978687616, - "grad_norm": 0.0008674630662426353, - "learning_rate": 0.0001999975187653868, - "loss": 46.0, - "step": 29338 - }, - { - "epoch": 2.2431714356710057, - "grad_norm": 0.0010425063082948327, - "learning_rate": 0.0001999975185961788, - "loss": 46.0, - "step": 29339 - }, - { - "epoch": 2.2432478926543955, - "grad_norm": 0.0006177789182402194, - "learning_rate": 0.000199997518426965, - "loss": 46.0, - "step": 29340 - }, - { - "epoch": 2.243324349637785, - "grad_norm": 0.0006449943757615983, - "learning_rate": 0.0001999975182577454, - "loss": 46.0, - "step": 29341 - }, - { - "epoch": 2.2434008066211746, - "grad_norm": 0.0010228544706478715, - "learning_rate": 0.00019999751808852007, - "loss": 46.0, - "step": 29342 - }, - { - "epoch": 2.2434772636045643, - "grad_norm": 0.0019454704597592354, - "learning_rate": 0.00019999751791928896, - "loss": 46.0, - "step": 29343 - }, - { - "epoch": 2.243553720587954, - "grad_norm": 0.0006619360647164285, - "learning_rate": 0.00019999751775005208, - "loss": 46.0, - "step": 29344 - }, - { - "epoch": 2.243630177571344, - "grad_norm": 0.0011464472627267241, - "learning_rate": 0.0001999975175808094, - "loss": 46.0, - "step": 29345 - }, - { - "epoch": 2.2437066345547336, - "grad_norm": 0.0009071673848666251, - "learning_rate": 0.000199997517411561, - "loss": 46.0, - "step": 29346 - }, - { - "epoch": 2.2437830915381234, - "grad_norm": 0.0016402152832597494, - "learning_rate": 0.0001999975172423068, - "loss": 46.0, - "step": 29347 - }, - { - "epoch": 2.243859548521513, - "grad_norm": 0.0016108324052765965, - "learning_rate": 0.00019999751707304683, - "loss": 46.0, - "step": 29348 - }, - { - "epoch": 2.243936005504903, - "grad_norm": 0.0009354056091979146, - "learning_rate": 0.0001999975169037811, - "loss": 46.0, - "step": 29349 - }, - { - "epoch": 2.2440124624882927, - "grad_norm": 0.0011478504166007042, - "learning_rate": 0.0001999975167345096, - "loss": 46.0, - "step": 29350 - }, - { - "epoch": 2.2440889194716824, - "grad_norm": 0.0016720503335818648, - "learning_rate": 0.00019999751656523234, - "loss": 46.0, - "step": 29351 - }, - { - "epoch": 2.244165376455072, - "grad_norm": 0.0006281096138991416, - "learning_rate": 0.0001999975163959493, - "loss": 46.0, - "step": 29352 - }, - { - "epoch": 2.2442418334384615, - "grad_norm": 0.00035824498627334833, - "learning_rate": 0.0001999975162266605, - "loss": 46.0, - "step": 29353 - }, - { - "epoch": 2.2443182904218513, - "grad_norm": 0.0005668998346664011, - "learning_rate": 0.0001999975160573659, - "loss": 46.0, - "step": 29354 - }, - { - "epoch": 2.244394747405241, - "grad_norm": 0.0008568732882849872, - "learning_rate": 0.00019999751588806557, - "loss": 46.0, - "step": 29355 - }, - { - "epoch": 2.244471204388631, - "grad_norm": 0.0013889207039028406, - "learning_rate": 0.00019999751571875946, - "loss": 46.0, - "step": 29356 - }, - { - "epoch": 2.2445476613720206, - "grad_norm": 0.001104099559597671, - "learning_rate": 0.00019999751554944758, - "loss": 46.0, - "step": 29357 - }, - { - "epoch": 2.2446241183554103, - "grad_norm": 0.0008470212342217565, - "learning_rate": 0.00019999751538012992, - "loss": 46.0, - "step": 29358 - }, - { - "epoch": 2.2447005753388, - "grad_norm": 0.001955494051799178, - "learning_rate": 0.0001999975152108065, - "loss": 46.0, - "step": 29359 - }, - { - "epoch": 2.24477703232219, - "grad_norm": 0.001402291003614664, - "learning_rate": 0.0001999975150414773, - "loss": 46.0, - "step": 29360 - }, - { - "epoch": 2.2448534893055796, - "grad_norm": 0.0017629220383241773, - "learning_rate": 0.00019999751487214233, - "loss": 46.0, - "step": 29361 - }, - { - "epoch": 2.2449299462889694, - "grad_norm": 0.0005411452730186284, - "learning_rate": 0.0001999975147028016, - "loss": 46.0, - "step": 29362 - }, - { - "epoch": 2.2450064032723587, - "grad_norm": 0.0006768542807549238, - "learning_rate": 0.0001999975145334551, - "loss": 46.0, - "step": 29363 - }, - { - "epoch": 2.2450828602557484, - "grad_norm": 0.0007872622227296233, - "learning_rate": 0.00019999751436410287, - "loss": 46.0, - "step": 29364 - }, - { - "epoch": 2.245159317239138, - "grad_norm": 0.0003998048778157681, - "learning_rate": 0.00019999751419474482, - "loss": 46.0, - "step": 29365 - }, - { - "epoch": 2.245235774222528, - "grad_norm": 0.0008432682952843606, - "learning_rate": 0.000199997514025381, - "loss": 46.0, - "step": 29366 - }, - { - "epoch": 2.2453122312059177, - "grad_norm": 0.0018499908037483692, - "learning_rate": 0.00019999751385601141, - "loss": 46.0, - "step": 29367 - }, - { - "epoch": 2.2453886881893075, - "grad_norm": 0.0022874511778354645, - "learning_rate": 0.00019999751368663605, - "loss": 46.0, - "step": 29368 - }, - { - "epoch": 2.2454651451726972, - "grad_norm": 0.0006427046610042453, - "learning_rate": 0.00019999751351725494, - "loss": 46.0, - "step": 29369 - }, - { - "epoch": 2.245541602156087, - "grad_norm": 0.0009616664028726518, - "learning_rate": 0.00019999751334786808, - "loss": 46.0, - "step": 29370 - }, - { - "epoch": 2.2456180591394768, - "grad_norm": 0.0009112671250477433, - "learning_rate": 0.00019999751317847543, - "loss": 46.0, - "step": 29371 - }, - { - "epoch": 2.2456945161228665, - "grad_norm": 0.00039321009535342455, - "learning_rate": 0.000199997513009077, - "loss": 46.0, - "step": 29372 - }, - { - "epoch": 2.2457709731062563, - "grad_norm": 0.0014207576168701053, - "learning_rate": 0.00019999751283967282, - "loss": 46.0, - "step": 29373 - }, - { - "epoch": 2.245847430089646, - "grad_norm": 0.0010622048284858465, - "learning_rate": 0.00019999751267026284, - "loss": 46.0, - "step": 29374 - }, - { - "epoch": 2.2459238870730354, - "grad_norm": 0.0010640205582603812, - "learning_rate": 0.0001999975125008471, - "loss": 46.0, - "step": 29375 - }, - { - "epoch": 2.246000344056425, - "grad_norm": 0.000943817023653537, - "learning_rate": 0.0001999975123314256, - "loss": 46.0, - "step": 29376 - }, - { - "epoch": 2.246076801039815, - "grad_norm": 0.0022350195795297623, - "learning_rate": 0.00019999751216199832, - "loss": 46.0, - "step": 29377 - }, - { - "epoch": 2.2461532580232046, - "grad_norm": 0.0010578795336186886, - "learning_rate": 0.00019999751199256528, - "loss": 46.0, - "step": 29378 - }, - { - "epoch": 2.2462297150065944, - "grad_norm": 0.0008161651203408837, - "learning_rate": 0.0001999975118231265, - "loss": 46.0, - "step": 29379 - }, - { - "epoch": 2.246306171989984, - "grad_norm": 0.0018506267806515098, - "learning_rate": 0.0001999975116536819, - "loss": 46.0, - "step": 29380 - }, - { - "epoch": 2.246382628973374, - "grad_norm": 0.00091931241331622, - "learning_rate": 0.00019999751148423153, - "loss": 46.0, - "step": 29381 - }, - { - "epoch": 2.2464590859567637, - "grad_norm": 0.0011082744458690286, - "learning_rate": 0.00019999751131477545, - "loss": 46.0, - "step": 29382 - }, - { - "epoch": 2.2465355429401535, - "grad_norm": 0.0008622603490948677, - "learning_rate": 0.00019999751114531354, - "loss": 46.0, - "step": 29383 - }, - { - "epoch": 2.246611999923543, - "grad_norm": 0.0023591257631778717, - "learning_rate": 0.00019999751097584588, - "loss": 46.0, - "step": 29384 - }, - { - "epoch": 2.2466884569069325, - "grad_norm": 0.0021558767184615135, - "learning_rate": 0.00019999751080637248, - "loss": 46.0, - "step": 29385 - }, - { - "epoch": 2.2467649138903223, - "grad_norm": 0.004051904659718275, - "learning_rate": 0.00019999751063689327, - "loss": 46.0, - "step": 29386 - }, - { - "epoch": 2.246841370873712, - "grad_norm": 0.00044367031659930944, - "learning_rate": 0.00019999751046740832, - "loss": 46.0, - "step": 29387 - }, - { - "epoch": 2.246917827857102, - "grad_norm": 0.002209751633927226, - "learning_rate": 0.00019999751029791757, - "loss": 46.0, - "step": 29388 - }, - { - "epoch": 2.2469942848404916, - "grad_norm": 0.00037307501770555973, - "learning_rate": 0.00019999751012842108, - "loss": 46.0, - "step": 29389 - }, - { - "epoch": 2.2470707418238813, - "grad_norm": 0.0007075180765241385, - "learning_rate": 0.0001999975099589188, - "loss": 46.0, - "step": 29390 - }, - { - "epoch": 2.247147198807271, - "grad_norm": 0.0005778814665973186, - "learning_rate": 0.00019999750978941076, - "loss": 46.0, - "step": 29391 - }, - { - "epoch": 2.247223655790661, - "grad_norm": 0.001449332688935101, - "learning_rate": 0.00019999750961989694, - "loss": 46.0, - "step": 29392 - }, - { - "epoch": 2.2473001127740506, - "grad_norm": 0.0008784688543528318, - "learning_rate": 0.00019999750945037738, - "loss": 46.0, - "step": 29393 - }, - { - "epoch": 2.2473765697574404, - "grad_norm": 0.0005454125930555165, - "learning_rate": 0.00019999750928085202, - "loss": 46.0, - "step": 29394 - }, - { - "epoch": 2.24745302674083, - "grad_norm": 0.0008241348550654948, - "learning_rate": 0.0001999975091113209, - "loss": 46.0, - "step": 29395 - }, - { - "epoch": 2.2475294837242195, - "grad_norm": 0.0003697400097735226, - "learning_rate": 0.00019999750894178402, - "loss": 46.0, - "step": 29396 - }, - { - "epoch": 2.247605940707609, - "grad_norm": 0.0016068790573626757, - "learning_rate": 0.00019999750877224134, - "loss": 46.0, - "step": 29397 - }, - { - "epoch": 2.247682397690999, - "grad_norm": 0.0020260708406567574, - "learning_rate": 0.00019999750860269294, - "loss": 46.0, - "step": 29398 - }, - { - "epoch": 2.2477588546743887, - "grad_norm": 0.0008248375379480422, - "learning_rate": 0.00019999750843313873, - "loss": 46.0, - "step": 29399 - }, - { - "epoch": 2.2478353116577785, - "grad_norm": 0.00041719956789165735, - "learning_rate": 0.00019999750826357878, - "loss": 46.0, - "step": 29400 - }, - { - "epoch": 2.2479117686411683, - "grad_norm": 0.003310014959424734, - "learning_rate": 0.00019999750809401303, - "loss": 46.0, - "step": 29401 - }, - { - "epoch": 2.247988225624558, - "grad_norm": 0.0011237786384299397, - "learning_rate": 0.0001999975079244415, - "loss": 46.0, - "step": 29402 - }, - { - "epoch": 2.248064682607948, - "grad_norm": 0.0049889967776834965, - "learning_rate": 0.00019999750775486427, - "loss": 46.0, - "step": 29403 - }, - { - "epoch": 2.2481411395913375, - "grad_norm": 0.0010788104264065623, - "learning_rate": 0.00019999750758528122, - "loss": 46.0, - "step": 29404 - }, - { - "epoch": 2.2482175965747273, - "grad_norm": 0.0011255888966843486, - "learning_rate": 0.0001999975074156924, - "loss": 46.0, - "step": 29405 - }, - { - "epoch": 2.248294053558117, - "grad_norm": 0.0010205833241343498, - "learning_rate": 0.00019999750724609782, - "loss": 46.0, - "step": 29406 - }, - { - "epoch": 2.2483705105415064, - "grad_norm": 0.0005901691038161516, - "learning_rate": 0.00019999750707649748, - "loss": 46.0, - "step": 29407 - }, - { - "epoch": 2.248446967524896, - "grad_norm": 0.0007039479096420109, - "learning_rate": 0.00019999750690689134, - "loss": 46.0, - "step": 29408 - }, - { - "epoch": 2.248523424508286, - "grad_norm": 0.0026410738937556744, - "learning_rate": 0.00019999750673727946, - "loss": 46.0, - "step": 29409 - }, - { - "epoch": 2.2485998814916757, - "grad_norm": 0.0024696753825992346, - "learning_rate": 0.0001999975065676618, - "loss": 46.0, - "step": 29410 - }, - { - "epoch": 2.2486763384750654, - "grad_norm": 0.000584301189519465, - "learning_rate": 0.00019999750639803837, - "loss": 46.0, - "step": 29411 - }, - { - "epoch": 2.248752795458455, - "grad_norm": 0.0004448372928891331, - "learning_rate": 0.00019999750622840917, - "loss": 46.0, - "step": 29412 - }, - { - "epoch": 2.248829252441845, - "grad_norm": 0.004667339380830526, - "learning_rate": 0.00019999750605877422, - "loss": 46.0, - "step": 29413 - }, - { - "epoch": 2.2489057094252347, - "grad_norm": 0.0008403750834986567, - "learning_rate": 0.0001999975058891335, - "loss": 46.0, - "step": 29414 - }, - { - "epoch": 2.2489821664086245, - "grad_norm": 0.0008512880303896964, - "learning_rate": 0.000199997505719487, - "loss": 46.0, - "step": 29415 - }, - { - "epoch": 2.2490586233920142, - "grad_norm": 0.0006080466555431485, - "learning_rate": 0.0001999975055498347, - "loss": 46.0, - "step": 29416 - }, - { - "epoch": 2.249135080375404, - "grad_norm": 0.001602043048478663, - "learning_rate": 0.00019999750538017666, - "loss": 46.0, - "step": 29417 - }, - { - "epoch": 2.2492115373587933, - "grad_norm": 0.0003134894941467792, - "learning_rate": 0.00019999750521051285, - "loss": 46.0, - "step": 29418 - }, - { - "epoch": 2.249287994342183, - "grad_norm": 0.000761925708502531, - "learning_rate": 0.00019999750504084328, - "loss": 46.0, - "step": 29419 - }, - { - "epoch": 2.249364451325573, - "grad_norm": 0.0009567033848725259, - "learning_rate": 0.00019999750487116792, - "loss": 46.0, - "step": 29420 - }, - { - "epoch": 2.2494409083089626, - "grad_norm": 0.0008765193051658571, - "learning_rate": 0.0001999975047014868, - "loss": 46.0, - "step": 29421 - }, - { - "epoch": 2.2495173652923524, - "grad_norm": 0.0009923727484419942, - "learning_rate": 0.0001999975045317999, - "loss": 46.0, - "step": 29422 - }, - { - "epoch": 2.249593822275742, - "grad_norm": 0.003081100294366479, - "learning_rate": 0.00019999750436210724, - "loss": 46.0, - "step": 29423 - }, - { - "epoch": 2.249670279259132, - "grad_norm": 0.0005716641899198294, - "learning_rate": 0.00019999750419240884, - "loss": 46.0, - "step": 29424 - }, - { - "epoch": 2.2497467362425216, - "grad_norm": 0.001090982579626143, - "learning_rate": 0.00019999750402270464, - "loss": 46.0, - "step": 29425 - }, - { - "epoch": 2.2498231932259114, - "grad_norm": 0.0011637975694611669, - "learning_rate": 0.00019999750385299466, - "loss": 46.0, - "step": 29426 - }, - { - "epoch": 2.249899650209301, - "grad_norm": 0.0017158568371087313, - "learning_rate": 0.0001999975036832789, - "loss": 46.0, - "step": 29427 - }, - { - "epoch": 2.2499761071926905, - "grad_norm": 0.0006134028662927449, - "learning_rate": 0.00019999750351355742, - "loss": 46.0, - "step": 29428 - }, - { - "epoch": 2.2500525641760802, - "grad_norm": 0.0015880957944318652, - "learning_rate": 0.00019999750334383015, - "loss": 46.0, - "step": 29429 - }, - { - "epoch": 2.25012902115947, - "grad_norm": 0.0008633694960735738, - "learning_rate": 0.00019999750317409708, - "loss": 46.0, - "step": 29430 - }, - { - "epoch": 2.2502054781428598, - "grad_norm": 0.001385657349601388, - "learning_rate": 0.0001999975030043583, - "loss": 46.0, - "step": 29431 - }, - { - "epoch": 2.2502819351262495, - "grad_norm": 0.0020005065016448498, - "learning_rate": 0.00019999750283461373, - "loss": 46.0, - "step": 29432 - }, - { - "epoch": 2.2503583921096393, - "grad_norm": 0.0009481015149503946, - "learning_rate": 0.00019999750266486334, - "loss": 46.0, - "step": 29433 - }, - { - "epoch": 2.250434849093029, - "grad_norm": 0.0007318708812817931, - "learning_rate": 0.00019999750249510723, - "loss": 46.0, - "step": 29434 - }, - { - "epoch": 2.250511306076419, - "grad_norm": 0.0009338194504380226, - "learning_rate": 0.00019999750232534534, - "loss": 46.0, - "step": 29435 - }, - { - "epoch": 2.2505877630598086, - "grad_norm": 0.003099459456279874, - "learning_rate": 0.0001999975021555777, - "loss": 46.0, - "step": 29436 - }, - { - "epoch": 2.2506642200431983, - "grad_norm": 0.0013390317326411605, - "learning_rate": 0.00019999750198580426, - "loss": 46.0, - "step": 29437 - }, - { - "epoch": 2.250740677026588, - "grad_norm": 0.0012756186770275235, - "learning_rate": 0.00019999750181602503, - "loss": 46.0, - "step": 29438 - }, - { - "epoch": 2.250817134009978, - "grad_norm": 0.0015720747178420424, - "learning_rate": 0.00019999750164624008, - "loss": 46.0, - "step": 29439 - }, - { - "epoch": 2.250893590993367, - "grad_norm": 0.012928523123264313, - "learning_rate": 0.00019999750147644933, - "loss": 46.0, - "step": 29440 - }, - { - "epoch": 2.250970047976757, - "grad_norm": 0.0005817175842821598, - "learning_rate": 0.00019999750130665284, - "loss": 46.0, - "step": 29441 - }, - { - "epoch": 2.2510465049601467, - "grad_norm": 0.0013524278765544295, - "learning_rate": 0.00019999750113685057, - "loss": 46.0, - "step": 29442 - }, - { - "epoch": 2.2511229619435364, - "grad_norm": 0.0006919489824213088, - "learning_rate": 0.0001999975009670425, - "loss": 46.0, - "step": 29443 - }, - { - "epoch": 2.251199418926926, - "grad_norm": 0.0015825749142095447, - "learning_rate": 0.0001999975007972287, - "loss": 46.0, - "step": 29444 - }, - { - "epoch": 2.251275875910316, - "grad_norm": 0.0010053508449345827, - "learning_rate": 0.00019999750062740912, - "loss": 46.0, - "step": 29445 - }, - { - "epoch": 2.2513523328937057, - "grad_norm": 0.0017596713732928038, - "learning_rate": 0.00019999750045758376, - "loss": 46.0, - "step": 29446 - }, - { - "epoch": 2.2514287898770955, - "grad_norm": 0.001610561739653349, - "learning_rate": 0.00019999750028775265, - "loss": 46.0, - "step": 29447 - }, - { - "epoch": 2.2515052468604853, - "grad_norm": 0.001406658673658967, - "learning_rate": 0.00019999750011791574, - "loss": 46.0, - "step": 29448 - }, - { - "epoch": 2.251581703843875, - "grad_norm": 0.0011525223962962627, - "learning_rate": 0.00019999749994807308, - "loss": 46.0, - "step": 29449 - }, - { - "epoch": 2.2516581608272643, - "grad_norm": 0.000749699305742979, - "learning_rate": 0.00019999749977822465, - "loss": 46.0, - "step": 29450 - }, - { - "epoch": 2.251734617810654, - "grad_norm": 0.001271737739443779, - "learning_rate": 0.00019999749960837045, - "loss": 46.0, - "step": 29451 - }, - { - "epoch": 2.251811074794044, - "grad_norm": 0.0007768102223053575, - "learning_rate": 0.0001999974994385105, - "loss": 46.0, - "step": 29452 - }, - { - "epoch": 2.2518875317774336, - "grad_norm": 0.0006906852358952165, - "learning_rate": 0.00019999749926864475, - "loss": 46.0, - "step": 29453 - }, - { - "epoch": 2.2519639887608234, - "grad_norm": 0.0012180572375655174, - "learning_rate": 0.00019999749909877323, - "loss": 46.0, - "step": 29454 - }, - { - "epoch": 2.252040445744213, - "grad_norm": 0.0012188206892460585, - "learning_rate": 0.00019999749892889596, - "loss": 46.0, - "step": 29455 - }, - { - "epoch": 2.252116902727603, - "grad_norm": 0.0030461689457297325, - "learning_rate": 0.00019999749875901292, - "loss": 46.0, - "step": 29456 - }, - { - "epoch": 2.2521933597109927, - "grad_norm": 0.0017065851716324687, - "learning_rate": 0.0001999974985891241, - "loss": 46.0, - "step": 29457 - }, - { - "epoch": 2.2522698166943824, - "grad_norm": 0.001509082387201488, - "learning_rate": 0.00019999749841922952, - "loss": 46.0, - "step": 29458 - }, - { - "epoch": 2.252346273677772, - "grad_norm": 0.000760240713134408, - "learning_rate": 0.00019999749824932915, - "loss": 46.0, - "step": 29459 - }, - { - "epoch": 2.252422730661162, - "grad_norm": 0.0006394116207957268, - "learning_rate": 0.00019999749807942302, - "loss": 46.0, - "step": 29460 - }, - { - "epoch": 2.2524991876445517, - "grad_norm": 0.0008832362364046276, - "learning_rate": 0.00019999749790951116, - "loss": 46.0, - "step": 29461 - }, - { - "epoch": 2.252575644627941, - "grad_norm": 0.0010208101011812687, - "learning_rate": 0.00019999749773959348, - "loss": 46.0, - "step": 29462 - }, - { - "epoch": 2.252652101611331, - "grad_norm": 0.000825251976493746, - "learning_rate": 0.00019999749756967005, - "loss": 46.0, - "step": 29463 - }, - { - "epoch": 2.2527285585947205, - "grad_norm": 0.0006938123260624707, - "learning_rate": 0.00019999749739974085, - "loss": 46.0, - "step": 29464 - }, - { - "epoch": 2.2528050155781103, - "grad_norm": 0.004282000940293074, - "learning_rate": 0.0001999974972298059, - "loss": 46.0, - "step": 29465 - }, - { - "epoch": 2.2528814725615, - "grad_norm": 0.0006187624530866742, - "learning_rate": 0.00019999749705986515, - "loss": 46.0, - "step": 29466 - }, - { - "epoch": 2.25295792954489, - "grad_norm": 0.0004884095978923142, - "learning_rate": 0.00019999749688991863, - "loss": 46.0, - "step": 29467 - }, - { - "epoch": 2.2530343865282796, - "grad_norm": 0.0007862441707402468, - "learning_rate": 0.00019999749671996634, - "loss": 46.0, - "step": 29468 - }, - { - "epoch": 2.2531108435116693, - "grad_norm": 0.002405275823548436, - "learning_rate": 0.0001999974965500083, - "loss": 46.0, - "step": 29469 - }, - { - "epoch": 2.253187300495059, - "grad_norm": 0.0012418583501130342, - "learning_rate": 0.0001999974963800445, - "loss": 46.0, - "step": 29470 - }, - { - "epoch": 2.253263757478449, - "grad_norm": 0.0008651570533402264, - "learning_rate": 0.00019999749621007492, - "loss": 46.0, - "step": 29471 - }, - { - "epoch": 2.253340214461838, - "grad_norm": 0.003094903426244855, - "learning_rate": 0.00019999749604009956, - "loss": 46.0, - "step": 29472 - }, - { - "epoch": 2.253416671445228, - "grad_norm": 0.0023649903014302254, - "learning_rate": 0.00019999749587011842, - "loss": 46.0, - "step": 29473 - }, - { - "epoch": 2.2534931284286177, - "grad_norm": 0.0015670970315113664, - "learning_rate": 0.00019999749570013157, - "loss": 46.0, - "step": 29474 - }, - { - "epoch": 2.2535695854120075, - "grad_norm": 0.0008354040328413248, - "learning_rate": 0.00019999749553013886, - "loss": 46.0, - "step": 29475 - }, - { - "epoch": 2.2536460423953972, - "grad_norm": 0.0007118411595001817, - "learning_rate": 0.00019999749536014046, - "loss": 46.0, - "step": 29476 - }, - { - "epoch": 2.253722499378787, - "grad_norm": 0.0009362486889585853, - "learning_rate": 0.00019999749519013625, - "loss": 46.0, - "step": 29477 - }, - { - "epoch": 2.2537989563621768, - "grad_norm": 0.0008002620888873935, - "learning_rate": 0.00019999749502012625, - "loss": 46.0, - "step": 29478 - }, - { - "epoch": 2.2538754133455665, - "grad_norm": 0.000947941851336509, - "learning_rate": 0.00019999749485011053, - "loss": 46.0, - "step": 29479 - }, - { - "epoch": 2.2539518703289563, - "grad_norm": 0.0005985077586956322, - "learning_rate": 0.000199997494680089, - "loss": 46.0, - "step": 29480 - }, - { - "epoch": 2.254028327312346, - "grad_norm": 0.0011883239494636655, - "learning_rate": 0.00019999749451006174, - "loss": 46.0, - "step": 29481 - }, - { - "epoch": 2.254104784295736, - "grad_norm": 0.001346498727798462, - "learning_rate": 0.00019999749434002867, - "loss": 46.0, - "step": 29482 - }, - { - "epoch": 2.2541812412791256, - "grad_norm": 0.0012821832206100225, - "learning_rate": 0.00019999749416998988, - "loss": 46.0, - "step": 29483 - }, - { - "epoch": 2.254257698262515, - "grad_norm": 0.0011586984619498253, - "learning_rate": 0.0001999974939999453, - "loss": 46.0, - "step": 29484 - }, - { - "epoch": 2.2543341552459046, - "grad_norm": 0.0006998831522651017, - "learning_rate": 0.00019999749382989493, - "loss": 46.0, - "step": 29485 - }, - { - "epoch": 2.2544106122292944, - "grad_norm": 0.0012054620310664177, - "learning_rate": 0.00019999749365983883, - "loss": 46.0, - "step": 29486 - }, - { - "epoch": 2.254487069212684, - "grad_norm": 0.0005119956331327558, - "learning_rate": 0.0001999974934897769, - "loss": 46.0, - "step": 29487 - }, - { - "epoch": 2.254563526196074, - "grad_norm": 0.006121808663010597, - "learning_rate": 0.00019999749331970926, - "loss": 46.0, - "step": 29488 - }, - { - "epoch": 2.2546399831794637, - "grad_norm": 0.002351575531065464, - "learning_rate": 0.00019999749314963584, - "loss": 46.0, - "step": 29489 - }, - { - "epoch": 2.2547164401628534, - "grad_norm": 0.0008991486392915249, - "learning_rate": 0.0001999974929795566, - "loss": 46.0, - "step": 29490 - }, - { - "epoch": 2.254792897146243, - "grad_norm": 0.001591112231835723, - "learning_rate": 0.00019999749280947163, - "loss": 46.0, - "step": 29491 - }, - { - "epoch": 2.254869354129633, - "grad_norm": 0.0016514176968485117, - "learning_rate": 0.0001999974926393809, - "loss": 46.0, - "step": 29492 - }, - { - "epoch": 2.2549458111130227, - "grad_norm": 0.0009082417236641049, - "learning_rate": 0.0001999974924692844, - "loss": 46.0, - "step": 29493 - }, - { - "epoch": 2.255022268096412, - "grad_norm": 0.0008060195250436664, - "learning_rate": 0.00019999749229918212, - "loss": 46.0, - "step": 29494 - }, - { - "epoch": 2.255098725079802, - "grad_norm": 0.0004528650897555053, - "learning_rate": 0.00019999749212907405, - "loss": 46.0, - "step": 29495 - }, - { - "epoch": 2.2551751820631916, - "grad_norm": 0.0017089232569560409, - "learning_rate": 0.0001999974919589602, - "loss": 46.0, - "step": 29496 - }, - { - "epoch": 2.2552516390465813, - "grad_norm": 0.0009664823883213103, - "learning_rate": 0.00019999749178884065, - "loss": 46.0, - "step": 29497 - }, - { - "epoch": 2.255328096029971, - "grad_norm": 0.005389222875237465, - "learning_rate": 0.0001999974916187153, - "loss": 46.0, - "step": 29498 - }, - { - "epoch": 2.255404553013361, - "grad_norm": 0.0006029255455359817, - "learning_rate": 0.00019999749144858418, - "loss": 46.0, - "step": 29499 - }, - { - "epoch": 2.2554810099967506, - "grad_norm": 0.0005662412731908262, - "learning_rate": 0.00019999749127844727, - "loss": 46.0, - "step": 29500 - }, - { - "epoch": 2.2555574669801404, - "grad_norm": 0.0008485973230563104, - "learning_rate": 0.0001999974911083046, - "loss": 46.0, - "step": 29501 - }, - { - "epoch": 2.25563392396353, - "grad_norm": 0.00564415380358696, - "learning_rate": 0.00019999749093815617, - "loss": 46.0, - "step": 29502 - }, - { - "epoch": 2.25571038094692, - "grad_norm": 0.0012757977237924933, - "learning_rate": 0.00019999749076800196, - "loss": 46.0, - "step": 29503 - }, - { - "epoch": 2.2557868379303097, - "grad_norm": 0.0005799197242595255, - "learning_rate": 0.000199997490597842, - "loss": 46.0, - "step": 29504 - }, - { - "epoch": 2.2558632949136994, - "grad_norm": 0.0013380179880186915, - "learning_rate": 0.00019999749042767624, - "loss": 46.0, - "step": 29505 - }, - { - "epoch": 2.2559397518970887, - "grad_norm": 0.0011360276257619262, - "learning_rate": 0.00019999749025750475, - "loss": 46.0, - "step": 29506 - }, - { - "epoch": 2.2560162088804785, - "grad_norm": 0.0008341424399986863, - "learning_rate": 0.00019999749008732745, - "loss": 46.0, - "step": 29507 - }, - { - "epoch": 2.2560926658638683, - "grad_norm": 0.0003634248860180378, - "learning_rate": 0.00019999748991714439, - "loss": 46.0, - "step": 29508 - }, - { - "epoch": 2.256169122847258, - "grad_norm": 0.00027449720073491335, - "learning_rate": 0.0001999974897469556, - "loss": 46.0, - "step": 29509 - }, - { - "epoch": 2.2562455798306478, - "grad_norm": 0.0008891826728358865, - "learning_rate": 0.00019999748957676098, - "loss": 46.0, - "step": 29510 - }, - { - "epoch": 2.2563220368140375, - "grad_norm": 0.002426140708848834, - "learning_rate": 0.00019999748940656065, - "loss": 46.0, - "step": 29511 - }, - { - "epoch": 2.2563984937974273, - "grad_norm": 0.0028874347917735577, - "learning_rate": 0.00019999748923635454, - "loss": 46.0, - "step": 29512 - }, - { - "epoch": 2.256474950780817, - "grad_norm": 0.0008574674720875919, - "learning_rate": 0.0001999974890661426, - "loss": 46.0, - "step": 29513 - }, - { - "epoch": 2.256551407764207, - "grad_norm": 0.0010328348726034164, - "learning_rate": 0.00019999748889592495, - "loss": 46.0, - "step": 29514 - }, - { - "epoch": 2.256627864747596, - "grad_norm": 0.0008847320568747818, - "learning_rate": 0.00019999748872570153, - "loss": 46.0, - "step": 29515 - }, - { - "epoch": 2.256704321730986, - "grad_norm": 0.0007886847597546875, - "learning_rate": 0.0001999974885554723, - "loss": 46.0, - "step": 29516 - }, - { - "epoch": 2.2567807787143757, - "grad_norm": 0.0005528398905880749, - "learning_rate": 0.00019999748838523735, - "loss": 46.0, - "step": 29517 - }, - { - "epoch": 2.2568572356977654, - "grad_norm": 0.001189409988000989, - "learning_rate": 0.0001999974882149966, - "loss": 46.0, - "step": 29518 - }, - { - "epoch": 2.256933692681155, - "grad_norm": 0.0007394845597445965, - "learning_rate": 0.00019999748804475008, - "loss": 46.0, - "step": 29519 - }, - { - "epoch": 2.257010149664545, - "grad_norm": 0.004855920094996691, - "learning_rate": 0.00019999748787449782, - "loss": 46.0, - "step": 29520 - }, - { - "epoch": 2.2570866066479347, - "grad_norm": 0.0017416721675544977, - "learning_rate": 0.00019999748770423975, - "loss": 46.0, - "step": 29521 - }, - { - "epoch": 2.2571630636313245, - "grad_norm": 0.001229771296493709, - "learning_rate": 0.00019999748753397593, - "loss": 46.0, - "step": 29522 - }, - { - "epoch": 2.2572395206147142, - "grad_norm": 0.0015130923129618168, - "learning_rate": 0.00019999748736370635, - "loss": 46.0, - "step": 29523 - }, - { - "epoch": 2.257315977598104, - "grad_norm": 0.0007134611369110644, - "learning_rate": 0.00019999748719343096, - "loss": 46.0, - "step": 29524 - }, - { - "epoch": 2.2573924345814937, - "grad_norm": 0.0020269188098609447, - "learning_rate": 0.00019999748702314985, - "loss": 46.0, - "step": 29525 - }, - { - "epoch": 2.2574688915648835, - "grad_norm": 0.001218091114424169, - "learning_rate": 0.00019999748685286295, - "loss": 46.0, - "step": 29526 - }, - { - "epoch": 2.2575453485482733, - "grad_norm": 0.0012776233488693833, - "learning_rate": 0.0001999974866825703, - "loss": 46.0, - "step": 29527 - }, - { - "epoch": 2.2576218055316626, - "grad_norm": 0.0025559107307344675, - "learning_rate": 0.00019999748651227184, - "loss": 46.0, - "step": 29528 - }, - { - "epoch": 2.2576982625150523, - "grad_norm": 0.0003450391814112663, - "learning_rate": 0.00019999748634196764, - "loss": 46.0, - "step": 29529 - }, - { - "epoch": 2.257774719498442, - "grad_norm": 0.0008357316837646067, - "learning_rate": 0.0001999974861716577, - "loss": 46.0, - "step": 29530 - }, - { - "epoch": 2.257851176481832, - "grad_norm": 0.0022005855571478605, - "learning_rate": 0.00019999748600134192, - "loss": 46.0, - "step": 29531 - }, - { - "epoch": 2.2579276334652216, - "grad_norm": 0.003092424478381872, - "learning_rate": 0.00019999748583102042, - "loss": 46.0, - "step": 29532 - }, - { - "epoch": 2.2580040904486114, - "grad_norm": 0.0006877164705656469, - "learning_rate": 0.00019999748566069313, - "loss": 46.0, - "step": 29533 - }, - { - "epoch": 2.258080547432001, - "grad_norm": 0.0008767228573560715, - "learning_rate": 0.0001999974854903601, - "loss": 46.0, - "step": 29534 - }, - { - "epoch": 2.258157004415391, - "grad_norm": 0.002584712579846382, - "learning_rate": 0.00019999748532002125, - "loss": 46.0, - "step": 29535 - }, - { - "epoch": 2.2582334613987807, - "grad_norm": 0.002083809347823262, - "learning_rate": 0.00019999748514967666, - "loss": 46.0, - "step": 29536 - }, - { - "epoch": 2.25830991838217, - "grad_norm": 0.0006496751448139548, - "learning_rate": 0.0001999974849793263, - "loss": 46.0, - "step": 29537 - }, - { - "epoch": 2.2583863753655598, - "grad_norm": 0.0010729285422712564, - "learning_rate": 0.00019999748480897017, - "loss": 46.0, - "step": 29538 - }, - { - "epoch": 2.2584628323489495, - "grad_norm": 0.0014099609106779099, - "learning_rate": 0.00019999748463860826, - "loss": 46.0, - "step": 29539 - }, - { - "epoch": 2.2585392893323393, - "grad_norm": 0.007729626260697842, - "learning_rate": 0.0001999974844682406, - "loss": 46.0, - "step": 29540 - }, - { - "epoch": 2.258615746315729, - "grad_norm": 0.002664318773895502, - "learning_rate": 0.00019999748429786718, - "loss": 46.0, - "step": 29541 - }, - { - "epoch": 2.258692203299119, - "grad_norm": 0.0011940925614908338, - "learning_rate": 0.00019999748412748795, - "loss": 46.0, - "step": 29542 - }, - { - "epoch": 2.2587686602825086, - "grad_norm": 0.0060840873047709465, - "learning_rate": 0.00019999748395710298, - "loss": 46.0, - "step": 29543 - }, - { - "epoch": 2.2588451172658983, - "grad_norm": 0.0009045965853147209, - "learning_rate": 0.00019999748378671223, - "loss": 46.0, - "step": 29544 - }, - { - "epoch": 2.258921574249288, - "grad_norm": 0.0004799450689461082, - "learning_rate": 0.00019999748361631574, - "loss": 46.0, - "step": 29545 - }, - { - "epoch": 2.258998031232678, - "grad_norm": 0.0007900987402535975, - "learning_rate": 0.00019999748344591342, - "loss": 46.0, - "step": 29546 - }, - { - "epoch": 2.2590744882160676, - "grad_norm": 0.0010051662102341652, - "learning_rate": 0.00019999748327550538, - "loss": 46.0, - "step": 29547 - }, - { - "epoch": 2.2591509451994574, - "grad_norm": 0.0010583508992567658, - "learning_rate": 0.00019999748310509156, - "loss": 46.0, - "step": 29548 - }, - { - "epoch": 2.259227402182847, - "grad_norm": 0.001418265514075756, - "learning_rate": 0.00019999748293467198, - "loss": 46.0, - "step": 29549 - }, - { - "epoch": 2.2593038591662364, - "grad_norm": 0.0005542614962905645, - "learning_rate": 0.00019999748276424662, - "loss": 46.0, - "step": 29550 - }, - { - "epoch": 2.259380316149626, - "grad_norm": 0.003746501635760069, - "learning_rate": 0.00019999748259381548, - "loss": 46.0, - "step": 29551 - }, - { - "epoch": 2.259456773133016, - "grad_norm": 0.0011258306913077831, - "learning_rate": 0.00019999748242337858, - "loss": 46.0, - "step": 29552 - }, - { - "epoch": 2.2595332301164057, - "grad_norm": 0.0018026726320385933, - "learning_rate": 0.0001999974822529359, - "loss": 46.0, - "step": 29553 - }, - { - "epoch": 2.2596096870997955, - "grad_norm": 0.0005133642116561532, - "learning_rate": 0.00019999748208248747, - "loss": 46.0, - "step": 29554 - }, - { - "epoch": 2.2596861440831852, - "grad_norm": 0.0009562474442645907, - "learning_rate": 0.00019999748191203327, - "loss": 46.0, - "step": 29555 - }, - { - "epoch": 2.259762601066575, - "grad_norm": 0.0010139537043869495, - "learning_rate": 0.0001999974817415733, - "loss": 46.0, - "step": 29556 - }, - { - "epoch": 2.2598390580499648, - "grad_norm": 0.0009557768935337663, - "learning_rate": 0.00019999748157110752, - "loss": 46.0, - "step": 29557 - }, - { - "epoch": 2.2599155150333545, - "grad_norm": 0.00045353241148404777, - "learning_rate": 0.000199997481400636, - "loss": 46.0, - "step": 29558 - }, - { - "epoch": 2.259991972016744, - "grad_norm": 0.006289840210229158, - "learning_rate": 0.00019999748123015874, - "loss": 46.0, - "step": 29559 - }, - { - "epoch": 2.2600684290001336, - "grad_norm": 0.0006640045321546495, - "learning_rate": 0.00019999748105967567, - "loss": 46.0, - "step": 29560 - }, - { - "epoch": 2.2601448859835234, - "grad_norm": 0.0006483257166109979, - "learning_rate": 0.00019999748088918686, - "loss": 46.0, - "step": 29561 - }, - { - "epoch": 2.260221342966913, - "grad_norm": 0.0006216985639184713, - "learning_rate": 0.00019999748071869227, - "loss": 46.0, - "step": 29562 - }, - { - "epoch": 2.260297799950303, - "grad_norm": 0.0009260012884624302, - "learning_rate": 0.00019999748054819189, - "loss": 46.0, - "step": 29563 - }, - { - "epoch": 2.2603742569336926, - "grad_norm": 0.019574521109461784, - "learning_rate": 0.00019999748037768578, - "loss": 46.0, - "step": 29564 - }, - { - "epoch": 2.2604507139170824, - "grad_norm": 0.000898151658475399, - "learning_rate": 0.00019999748020717387, - "loss": 46.0, - "step": 29565 - }, - { - "epoch": 2.260527170900472, - "grad_norm": 0.00130720273591578, - "learning_rate": 0.0001999974800366562, - "loss": 46.0, - "step": 29566 - }, - { - "epoch": 2.260603627883862, - "grad_norm": 0.0009814626537263393, - "learning_rate": 0.00019999747986613277, - "loss": 46.0, - "step": 29567 - }, - { - "epoch": 2.2606800848672517, - "grad_norm": 0.001772459247149527, - "learning_rate": 0.00019999747969560354, - "loss": 46.0, - "step": 29568 - }, - { - "epoch": 2.2607565418506415, - "grad_norm": 0.0010695073287934065, - "learning_rate": 0.00019999747952506857, - "loss": 46.0, - "step": 29569 - }, - { - "epoch": 2.260832998834031, - "grad_norm": 0.005123097449541092, - "learning_rate": 0.00019999747935452785, - "loss": 46.0, - "step": 29570 - }, - { - "epoch": 2.2609094558174205, - "grad_norm": 0.003967008087784052, - "learning_rate": 0.00019999747918398133, - "loss": 46.0, - "step": 29571 - }, - { - "epoch": 2.2609859128008103, - "grad_norm": 0.0028595540206879377, - "learning_rate": 0.00019999747901342904, - "loss": 46.0, - "step": 29572 - }, - { - "epoch": 2.2610623697842, - "grad_norm": 0.0009488065261393785, - "learning_rate": 0.00019999747884287097, - "loss": 46.0, - "step": 29573 - }, - { - "epoch": 2.26113882676759, - "grad_norm": 0.001119833905249834, - "learning_rate": 0.00019999747867230716, - "loss": 46.0, - "step": 29574 - }, - { - "epoch": 2.2612152837509796, - "grad_norm": 0.0013063232181593776, - "learning_rate": 0.00019999747850173754, - "loss": 46.0, - "step": 29575 - }, - { - "epoch": 2.2612917407343693, - "grad_norm": 0.0007169015007093549, - "learning_rate": 0.00019999747833116218, - "loss": 46.0, - "step": 29576 - }, - { - "epoch": 2.261368197717759, - "grad_norm": 0.0009960820898413658, - "learning_rate": 0.00019999747816058105, - "loss": 46.0, - "step": 29577 - }, - { - "epoch": 2.261444654701149, - "grad_norm": 0.0028823399916291237, - "learning_rate": 0.00019999747798999415, - "loss": 46.0, - "step": 29578 - }, - { - "epoch": 2.2615211116845386, - "grad_norm": 0.0024333784822374582, - "learning_rate": 0.00019999747781940147, - "loss": 46.0, - "step": 29579 - }, - { - "epoch": 2.2615975686679284, - "grad_norm": 0.0026083190459758043, - "learning_rate": 0.00019999747764880304, - "loss": 46.0, - "step": 29580 - }, - { - "epoch": 2.2616740256513177, - "grad_norm": 0.002803124487400055, - "learning_rate": 0.00019999747747819884, - "loss": 46.0, - "step": 29581 - }, - { - "epoch": 2.2617504826347075, - "grad_norm": 0.0011537878308445215, - "learning_rate": 0.00019999747730758887, - "loss": 46.0, - "step": 29582 - }, - { - "epoch": 2.261826939618097, - "grad_norm": 0.0011096703819930553, - "learning_rate": 0.00019999747713697312, - "loss": 46.0, - "step": 29583 - }, - { - "epoch": 2.261903396601487, - "grad_norm": 0.0015065460465848446, - "learning_rate": 0.0001999974769663516, - "loss": 46.0, - "step": 29584 - }, - { - "epoch": 2.2619798535848767, - "grad_norm": 0.0005231752875261009, - "learning_rate": 0.0001999974767957243, - "loss": 46.0, - "step": 29585 - }, - { - "epoch": 2.2620563105682665, - "grad_norm": 0.002661655889824033, - "learning_rate": 0.00019999747662509125, - "loss": 46.0, - "step": 29586 - }, - { - "epoch": 2.2621327675516563, - "grad_norm": 0.0010993279283866286, - "learning_rate": 0.0001999974764544524, - "loss": 46.0, - "step": 29587 - }, - { - "epoch": 2.262209224535046, - "grad_norm": 0.0008182556484825909, - "learning_rate": 0.00019999747628380782, - "loss": 46.0, - "step": 29588 - }, - { - "epoch": 2.262285681518436, - "grad_norm": 0.0008605673792771995, - "learning_rate": 0.00019999747611315746, - "loss": 46.0, - "step": 29589 - }, - { - "epoch": 2.2623621385018255, - "grad_norm": 0.0006910401862114668, - "learning_rate": 0.0001999974759425013, - "loss": 46.0, - "step": 29590 - }, - { - "epoch": 2.2624385954852153, - "grad_norm": 0.0030075605027377605, - "learning_rate": 0.00019999747577183942, - "loss": 46.0, - "step": 29591 - }, - { - "epoch": 2.262515052468605, - "grad_norm": 0.000518039392773062, - "learning_rate": 0.00019999747560117174, - "loss": 46.0, - "step": 29592 - }, - { - "epoch": 2.2625915094519944, - "grad_norm": 0.0009006580803543329, - "learning_rate": 0.0001999974754304983, - "loss": 46.0, - "step": 29593 - }, - { - "epoch": 2.262667966435384, - "grad_norm": 0.0022540846839547157, - "learning_rate": 0.0001999974752598191, - "loss": 46.0, - "step": 29594 - }, - { - "epoch": 2.262744423418774, - "grad_norm": 0.0008996726246550679, - "learning_rate": 0.00019999747508913412, - "loss": 46.0, - "step": 29595 - }, - { - "epoch": 2.2628208804021637, - "grad_norm": 0.0013292315416038036, - "learning_rate": 0.00019999747491844337, - "loss": 46.0, - "step": 29596 - }, - { - "epoch": 2.2628973373855534, - "grad_norm": 0.0029233135282993317, - "learning_rate": 0.00019999747474774683, - "loss": 46.0, - "step": 29597 - }, - { - "epoch": 2.262973794368943, - "grad_norm": 0.00048096824320964515, - "learning_rate": 0.00019999747457704456, - "loss": 46.0, - "step": 29598 - }, - { - "epoch": 2.263050251352333, - "grad_norm": 0.0011139829875901341, - "learning_rate": 0.00019999747440633652, - "loss": 46.0, - "step": 29599 - }, - { - "epoch": 2.2631267083357227, - "grad_norm": 0.0020469920709729195, - "learning_rate": 0.00019999747423562268, - "loss": 46.0, - "step": 29600 - }, - { - "epoch": 2.2632031653191125, - "grad_norm": 0.0010408778907731175, - "learning_rate": 0.00019999747406490307, - "loss": 46.0, - "step": 29601 - }, - { - "epoch": 2.2632796223025022, - "grad_norm": 0.0006072634132578969, - "learning_rate": 0.00019999747389417771, - "loss": 46.0, - "step": 29602 - }, - { - "epoch": 2.2633560792858916, - "grad_norm": 0.0003907477075699717, - "learning_rate": 0.00019999747372344658, - "loss": 46.0, - "step": 29603 - }, - { - "epoch": 2.2634325362692813, - "grad_norm": 0.001082484144717455, - "learning_rate": 0.00019999747355270968, - "loss": 46.0, - "step": 29604 - }, - { - "epoch": 2.263508993252671, - "grad_norm": 0.0016665211878716946, - "learning_rate": 0.000199997473381967, - "loss": 46.0, - "step": 29605 - }, - { - "epoch": 2.263585450236061, - "grad_norm": 0.0007992112077772617, - "learning_rate": 0.00019999747321121854, - "loss": 46.0, - "step": 29606 - }, - { - "epoch": 2.2636619072194506, - "grad_norm": 0.0005896664806641638, - "learning_rate": 0.00019999747304046435, - "loss": 46.0, - "step": 29607 - }, - { - "epoch": 2.2637383642028404, - "grad_norm": 0.0008752181311137974, - "learning_rate": 0.00019999747286970437, - "loss": 46.0, - "step": 29608 - }, - { - "epoch": 2.26381482118623, - "grad_norm": 0.0006907433271408081, - "learning_rate": 0.0001999974726989386, - "loss": 46.0, - "step": 29609 - }, - { - "epoch": 2.26389127816962, - "grad_norm": 0.011877243407070637, - "learning_rate": 0.00019999747252816708, - "loss": 46.0, - "step": 29610 - }, - { - "epoch": 2.2639677351530096, - "grad_norm": 0.0005057640955783427, - "learning_rate": 0.00019999747235738982, - "loss": 46.0, - "step": 29611 - }, - { - "epoch": 2.2640441921363994, - "grad_norm": 0.0006477448623627424, - "learning_rate": 0.00019999747218660673, - "loss": 46.0, - "step": 29612 - }, - { - "epoch": 2.264120649119789, - "grad_norm": 0.0009521179017610848, - "learning_rate": 0.00019999747201581794, - "loss": 46.0, - "step": 29613 - }, - { - "epoch": 2.264197106103179, - "grad_norm": 0.012359128333628178, - "learning_rate": 0.00019999747184502333, - "loss": 46.0, - "step": 29614 - }, - { - "epoch": 2.2642735630865682, - "grad_norm": 0.005482463166117668, - "learning_rate": 0.00019999747167422297, - "loss": 46.0, - "step": 29615 - }, - { - "epoch": 2.264350020069958, - "grad_norm": 0.0012245887191966176, - "learning_rate": 0.00019999747150341684, - "loss": 46.0, - "step": 29616 - }, - { - "epoch": 2.2644264770533478, - "grad_norm": 0.0010517871705815196, - "learning_rate": 0.00019999747133260493, - "loss": 46.0, - "step": 29617 - }, - { - "epoch": 2.2645029340367375, - "grad_norm": 0.0007861704798415303, - "learning_rate": 0.00019999747116178726, - "loss": 46.0, - "step": 29618 - }, - { - "epoch": 2.2645793910201273, - "grad_norm": 0.0008534854860045016, - "learning_rate": 0.0001999974709909638, - "loss": 46.0, - "step": 29619 - }, - { - "epoch": 2.264655848003517, - "grad_norm": 0.0005096105160191655, - "learning_rate": 0.0001999974708201346, - "loss": 46.0, - "step": 29620 - }, - { - "epoch": 2.264732304986907, - "grad_norm": 0.0011729886755347252, - "learning_rate": 0.0001999974706492996, - "loss": 46.0, - "step": 29621 - }, - { - "epoch": 2.2648087619702966, - "grad_norm": 0.0016736198449507356, - "learning_rate": 0.00019999747047845886, - "loss": 46.0, - "step": 29622 - }, - { - "epoch": 2.2648852189536863, - "grad_norm": 0.0012622796930372715, - "learning_rate": 0.00019999747030761234, - "loss": 46.0, - "step": 29623 - }, - { - "epoch": 2.264961675937076, - "grad_norm": 0.0012547977967187762, - "learning_rate": 0.00019999747013676005, - "loss": 46.0, - "step": 29624 - }, - { - "epoch": 2.2650381329204654, - "grad_norm": 0.0004887098912149668, - "learning_rate": 0.00019999746996590199, - "loss": 46.0, - "step": 29625 - }, - { - "epoch": 2.265114589903855, - "grad_norm": 0.0005050490726716816, - "learning_rate": 0.00019999746979503815, - "loss": 46.0, - "step": 29626 - }, - { - "epoch": 2.265191046887245, - "grad_norm": 0.0016263237921521068, - "learning_rate": 0.00019999746962416856, - "loss": 46.0, - "step": 29627 - }, - { - "epoch": 2.2652675038706347, - "grad_norm": 0.002395712537690997, - "learning_rate": 0.0001999974694532932, - "loss": 46.0, - "step": 29628 - }, - { - "epoch": 2.2653439608540245, - "grad_norm": 0.0006180467898957431, - "learning_rate": 0.00019999746928241205, - "loss": 46.0, - "step": 29629 - }, - { - "epoch": 2.265420417837414, - "grad_norm": 0.0007490097777917981, - "learning_rate": 0.00019999746911152514, - "loss": 46.0, - "step": 29630 - }, - { - "epoch": 2.265496874820804, - "grad_norm": 0.002582067623734474, - "learning_rate": 0.00019999746894063246, - "loss": 46.0, - "step": 29631 - }, - { - "epoch": 2.2655733318041937, - "grad_norm": 0.0018171604024246335, - "learning_rate": 0.00019999746876973404, - "loss": 46.0, - "step": 29632 - }, - { - "epoch": 2.2656497887875835, - "grad_norm": 0.0010933029698207974, - "learning_rate": 0.0001999974685988298, - "loss": 46.0, - "step": 29633 - }, - { - "epoch": 2.2657262457709733, - "grad_norm": 0.0016335133695974946, - "learning_rate": 0.00019999746842791981, - "loss": 46.0, - "step": 29634 - }, - { - "epoch": 2.265802702754363, - "grad_norm": 0.0020243460312485695, - "learning_rate": 0.0001999974682570041, - "loss": 46.0, - "step": 29635 - }, - { - "epoch": 2.265879159737753, - "grad_norm": 0.0008935952791944146, - "learning_rate": 0.00019999746808608255, - "loss": 46.0, - "step": 29636 - }, - { - "epoch": 2.265955616721142, - "grad_norm": 0.0014319374458864331, - "learning_rate": 0.0001999974679151553, - "loss": 46.0, - "step": 29637 - }, - { - "epoch": 2.266032073704532, - "grad_norm": 0.000753024301957339, - "learning_rate": 0.0001999974677442222, - "loss": 46.0, - "step": 29638 - }, - { - "epoch": 2.2661085306879216, - "grad_norm": 0.0008614014368504286, - "learning_rate": 0.00019999746757328336, - "loss": 46.0, - "step": 29639 - }, - { - "epoch": 2.2661849876713114, - "grad_norm": 0.0008059954270720482, - "learning_rate": 0.00019999746740233877, - "loss": 46.0, - "step": 29640 - }, - { - "epoch": 2.266261444654701, - "grad_norm": 0.0008873986080288887, - "learning_rate": 0.00019999746723138842, - "loss": 46.0, - "step": 29641 - }, - { - "epoch": 2.266337901638091, - "grad_norm": 0.0017346162348985672, - "learning_rate": 0.00019999746706043228, - "loss": 46.0, - "step": 29642 - }, - { - "epoch": 2.2664143586214807, - "grad_norm": 0.0013345525367185473, - "learning_rate": 0.00019999746688947035, - "loss": 46.0, - "step": 29643 - }, - { - "epoch": 2.2664908156048704, - "grad_norm": 0.0009429907659068704, - "learning_rate": 0.00019999746671850267, - "loss": 46.0, - "step": 29644 - }, - { - "epoch": 2.26656727258826, - "grad_norm": 0.0003901784948538989, - "learning_rate": 0.00019999746654752925, - "loss": 46.0, - "step": 29645 - }, - { - "epoch": 2.2666437295716495, - "grad_norm": 0.0008872750331647694, - "learning_rate": 0.00019999746637655003, - "loss": 46.0, - "step": 29646 - }, - { - "epoch": 2.2667201865550393, - "grad_norm": 0.010518618859350681, - "learning_rate": 0.00019999746620556503, - "loss": 46.0, - "step": 29647 - }, - { - "epoch": 2.266796643538429, - "grad_norm": 0.0004517077759373933, - "learning_rate": 0.0001999974660345743, - "loss": 46.0, - "step": 29648 - }, - { - "epoch": 2.266873100521819, - "grad_norm": 0.0004741284647025168, - "learning_rate": 0.00019999746586357777, - "loss": 46.0, - "step": 29649 - }, - { - "epoch": 2.2669495575052085, - "grad_norm": 0.0011188186472281814, - "learning_rate": 0.00019999746569257547, - "loss": 46.0, - "step": 29650 - }, - { - "epoch": 2.2670260144885983, - "grad_norm": 0.0005691573023796082, - "learning_rate": 0.0001999974655215674, - "loss": 46.0, - "step": 29651 - }, - { - "epoch": 2.267102471471988, - "grad_norm": 0.0015496093546971679, - "learning_rate": 0.0001999974653505536, - "loss": 46.0, - "step": 29652 - }, - { - "epoch": 2.267178928455378, - "grad_norm": 0.0010420939652249217, - "learning_rate": 0.000199997465179534, - "loss": 46.0, - "step": 29653 - }, - { - "epoch": 2.2672553854387676, - "grad_norm": 0.0010665485169738531, - "learning_rate": 0.00019999746500850863, - "loss": 46.0, - "step": 29654 - }, - { - "epoch": 2.2673318424221574, - "grad_norm": 0.0026481342501938343, - "learning_rate": 0.00019999746483747747, - "loss": 46.0, - "step": 29655 - }, - { - "epoch": 2.267408299405547, - "grad_norm": 0.0008155222167260945, - "learning_rate": 0.00019999746466644057, - "loss": 46.0, - "step": 29656 - }, - { - "epoch": 2.267484756388937, - "grad_norm": 0.0023945928551256657, - "learning_rate": 0.0001999974644953979, - "loss": 46.0, - "step": 29657 - }, - { - "epoch": 2.2675612133723266, - "grad_norm": 0.0009923020843416452, - "learning_rate": 0.00019999746432434944, - "loss": 46.0, - "step": 29658 - }, - { - "epoch": 2.267637670355716, - "grad_norm": 0.0012029190547764301, - "learning_rate": 0.00019999746415329522, - "loss": 46.0, - "step": 29659 - }, - { - "epoch": 2.2677141273391057, - "grad_norm": 0.0007509376155212522, - "learning_rate": 0.00019999746398223524, - "loss": 46.0, - "step": 29660 - }, - { - "epoch": 2.2677905843224955, - "grad_norm": 0.0009993338026106358, - "learning_rate": 0.00019999746381116947, - "loss": 46.0, - "step": 29661 - }, - { - "epoch": 2.2678670413058852, - "grad_norm": 0.0031778153497725725, - "learning_rate": 0.00019999746364009796, - "loss": 46.0, - "step": 29662 - }, - { - "epoch": 2.267943498289275, - "grad_norm": 0.0007198613602668047, - "learning_rate": 0.00019999746346902067, - "loss": 46.0, - "step": 29663 - }, - { - "epoch": 2.2680199552726648, - "grad_norm": 0.0012843615841120481, - "learning_rate": 0.0001999974632979376, - "loss": 46.0, - "step": 29664 - }, - { - "epoch": 2.2680964122560545, - "grad_norm": 0.0006143125938251615, - "learning_rate": 0.00019999746312684876, - "loss": 46.0, - "step": 29665 - }, - { - "epoch": 2.2681728692394443, - "grad_norm": 0.003900025272741914, - "learning_rate": 0.00019999746295575418, - "loss": 46.0, - "step": 29666 - }, - { - "epoch": 2.268249326222834, - "grad_norm": 0.0013759088469669223, - "learning_rate": 0.0001999974627846538, - "loss": 46.0, - "step": 29667 - }, - { - "epoch": 2.2683257832062234, - "grad_norm": 0.0015312323812395334, - "learning_rate": 0.00019999746261354766, - "loss": 46.0, - "step": 29668 - }, - { - "epoch": 2.268402240189613, - "grad_norm": 0.0011515525402501225, - "learning_rate": 0.00019999746244243576, - "loss": 46.0, - "step": 29669 - }, - { - "epoch": 2.268478697173003, - "grad_norm": 0.0018562345067039132, - "learning_rate": 0.00019999746227131808, - "loss": 46.0, - "step": 29670 - }, - { - "epoch": 2.2685551541563926, - "grad_norm": 0.0018017544643953443, - "learning_rate": 0.00019999746210019463, - "loss": 46.0, - "step": 29671 - }, - { - "epoch": 2.2686316111397824, - "grad_norm": 0.005346236284822226, - "learning_rate": 0.0001999974619290654, - "loss": 46.0, - "step": 29672 - }, - { - "epoch": 2.268708068123172, - "grad_norm": 0.0018701773369684815, - "learning_rate": 0.0001999974617579304, - "loss": 46.0, - "step": 29673 - }, - { - "epoch": 2.268784525106562, - "grad_norm": 0.0005911273183301091, - "learning_rate": 0.00019999746158678967, - "loss": 46.0, - "step": 29674 - }, - { - "epoch": 2.2688609820899517, - "grad_norm": 0.001537785166874528, - "learning_rate": 0.00019999746141564315, - "loss": 46.0, - "step": 29675 - }, - { - "epoch": 2.2689374390733414, - "grad_norm": 0.0005459311651065946, - "learning_rate": 0.00019999746124449086, - "loss": 46.0, - "step": 29676 - }, - { - "epoch": 2.269013896056731, - "grad_norm": 0.0008583724847994745, - "learning_rate": 0.0001999974610733328, - "loss": 46.0, - "step": 29677 - }, - { - "epoch": 2.269090353040121, - "grad_norm": 0.0016862861812114716, - "learning_rate": 0.00019999746090216896, - "loss": 46.0, - "step": 29678 - }, - { - "epoch": 2.2691668100235107, - "grad_norm": 0.0015586403897032142, - "learning_rate": 0.00019999746073099935, - "loss": 46.0, - "step": 29679 - }, - { - "epoch": 2.2692432670069005, - "grad_norm": 0.009733383543789387, - "learning_rate": 0.000199997460559824, - "loss": 46.0, - "step": 29680 - }, - { - "epoch": 2.26931972399029, - "grad_norm": 0.0011432974133640528, - "learning_rate": 0.00019999746038864283, - "loss": 46.0, - "step": 29681 - }, - { - "epoch": 2.2693961809736796, - "grad_norm": 0.0027637737803161144, - "learning_rate": 0.00019999746021745593, - "loss": 46.0, - "step": 29682 - }, - { - "epoch": 2.2694726379570693, - "grad_norm": 0.0008523364667780697, - "learning_rate": 0.00019999746004626325, - "loss": 46.0, - "step": 29683 - }, - { - "epoch": 2.269549094940459, - "grad_norm": 0.005892177112400532, - "learning_rate": 0.00019999745987506483, - "loss": 46.0, - "step": 29684 - }, - { - "epoch": 2.269625551923849, - "grad_norm": 0.00045390776358544827, - "learning_rate": 0.00019999745970386058, - "loss": 46.0, - "step": 29685 - }, - { - "epoch": 2.2697020089072386, - "grad_norm": 0.0009760102839209139, - "learning_rate": 0.00019999745953265058, - "loss": 46.0, - "step": 29686 - }, - { - "epoch": 2.2697784658906284, - "grad_norm": 0.0030714794993400574, - "learning_rate": 0.00019999745936143484, - "loss": 46.0, - "step": 29687 - }, - { - "epoch": 2.269854922874018, - "grad_norm": 0.0006153856520541012, - "learning_rate": 0.00019999745919021332, - "loss": 46.0, - "step": 29688 - }, - { - "epoch": 2.269931379857408, - "grad_norm": 0.0010291390353813767, - "learning_rate": 0.000199997459018986, - "loss": 46.0, - "step": 29689 - }, - { - "epoch": 2.270007836840797, - "grad_norm": 0.0010182630503550172, - "learning_rate": 0.00019999745884775297, - "loss": 46.0, - "step": 29690 - }, - { - "epoch": 2.270084293824187, - "grad_norm": 0.001276559429243207, - "learning_rate": 0.00019999745867651413, - "loss": 46.0, - "step": 29691 - }, - { - "epoch": 2.2701607508075767, - "grad_norm": 0.0018659690394997597, - "learning_rate": 0.00019999745850526952, - "loss": 46.0, - "step": 29692 - }, - { - "epoch": 2.2702372077909665, - "grad_norm": 0.002090269234031439, - "learning_rate": 0.00019999745833401914, - "loss": 46.0, - "step": 29693 - }, - { - "epoch": 2.2703136647743563, - "grad_norm": 0.0008636604179628193, - "learning_rate": 0.000199997458162763, - "loss": 46.0, - "step": 29694 - }, - { - "epoch": 2.270390121757746, - "grad_norm": 0.000776358472649008, - "learning_rate": 0.0001999974579915011, - "loss": 46.0, - "step": 29695 - }, - { - "epoch": 2.2704665787411358, - "grad_norm": 0.0010542470263317227, - "learning_rate": 0.0001999974578202334, - "loss": 46.0, - "step": 29696 - }, - { - "epoch": 2.2705430357245255, - "grad_norm": 0.0009724535630084574, - "learning_rate": 0.00019999745764895995, - "loss": 46.0, - "step": 29697 - }, - { - "epoch": 2.2706194927079153, - "grad_norm": 0.0012763264821842313, - "learning_rate": 0.00019999745747768075, - "loss": 46.0, - "step": 29698 - }, - { - "epoch": 2.270695949691305, - "grad_norm": 0.003610215848311782, - "learning_rate": 0.00019999745730639576, - "loss": 46.0, - "step": 29699 - }, - { - "epoch": 2.270772406674695, - "grad_norm": 0.0007914112065918744, - "learning_rate": 0.00019999745713510501, - "loss": 46.0, - "step": 29700 - }, - { - "epoch": 2.2708488636580846, - "grad_norm": 0.00998303759843111, - "learning_rate": 0.00019999745696380847, - "loss": 46.0, - "step": 29701 - }, - { - "epoch": 2.270925320641474, - "grad_norm": 0.0024524356704205275, - "learning_rate": 0.00019999745679250615, - "loss": 46.0, - "step": 29702 - }, - { - "epoch": 2.2710017776248637, - "grad_norm": 0.0009396827081218362, - "learning_rate": 0.00019999745662119812, - "loss": 46.0, - "step": 29703 - }, - { - "epoch": 2.2710782346082534, - "grad_norm": 0.0023927337024360895, - "learning_rate": 0.00019999745644988428, - "loss": 46.0, - "step": 29704 - }, - { - "epoch": 2.271154691591643, - "grad_norm": 0.0009745231363922358, - "learning_rate": 0.00019999745627856467, - "loss": 46.0, - "step": 29705 - }, - { - "epoch": 2.271231148575033, - "grad_norm": 0.0026453144382685423, - "learning_rate": 0.0001999974561072393, - "loss": 46.0, - "step": 29706 - }, - { - "epoch": 2.2713076055584227, - "grad_norm": 0.002038449514657259, - "learning_rate": 0.00019999745593590816, - "loss": 46.0, - "step": 29707 - }, - { - "epoch": 2.2713840625418125, - "grad_norm": 0.0005880210665054619, - "learning_rate": 0.00019999745576457123, - "loss": 46.0, - "step": 29708 - }, - { - "epoch": 2.2714605195252022, - "grad_norm": 0.001248908112756908, - "learning_rate": 0.00019999745559322855, - "loss": 46.0, - "step": 29709 - }, - { - "epoch": 2.271536976508592, - "grad_norm": 0.0015942917671054602, - "learning_rate": 0.0001999974554218801, - "loss": 46.0, - "step": 29710 - }, - { - "epoch": 2.2716134334919817, - "grad_norm": 0.0035309777595102787, - "learning_rate": 0.00019999745525052588, - "loss": 46.0, - "step": 29711 - }, - { - "epoch": 2.271689890475371, - "grad_norm": 0.00039428056334145367, - "learning_rate": 0.0001999974550791659, - "loss": 46.0, - "step": 29712 - }, - { - "epoch": 2.271766347458761, - "grad_norm": 0.0013230836484581232, - "learning_rate": 0.00019999745490780014, - "loss": 46.0, - "step": 29713 - }, - { - "epoch": 2.2718428044421506, - "grad_norm": 0.0016719972481951118, - "learning_rate": 0.00019999745473642862, - "loss": 46.0, - "step": 29714 - }, - { - "epoch": 2.2719192614255403, - "grad_norm": 0.0009867926128208637, - "learning_rate": 0.00019999745456505133, - "loss": 46.0, - "step": 29715 - }, - { - "epoch": 2.27199571840893, - "grad_norm": 0.0016819201409816742, - "learning_rate": 0.00019999745439366824, - "loss": 46.0, - "step": 29716 - }, - { - "epoch": 2.27207217539232, - "grad_norm": 0.003248332068324089, - "learning_rate": 0.00019999745422227943, - "loss": 46.0, - "step": 29717 - }, - { - "epoch": 2.2721486323757096, - "grad_norm": 0.002380089135840535, - "learning_rate": 0.00019999745405088482, - "loss": 46.0, - "step": 29718 - }, - { - "epoch": 2.2722250893590994, - "grad_norm": 0.0023042226675897837, - "learning_rate": 0.00019999745387948444, - "loss": 46.0, - "step": 29719 - }, - { - "epoch": 2.272301546342489, - "grad_norm": 0.0011556106619536877, - "learning_rate": 0.00019999745370807829, - "loss": 46.0, - "step": 29720 - }, - { - "epoch": 2.272378003325879, - "grad_norm": 0.0018479221034795046, - "learning_rate": 0.00019999745353666638, - "loss": 46.0, - "step": 29721 - }, - { - "epoch": 2.2724544603092687, - "grad_norm": 0.0017148521728813648, - "learning_rate": 0.0001999974533652487, - "loss": 46.0, - "step": 29722 - }, - { - "epoch": 2.2725309172926584, - "grad_norm": 0.0037179149221628904, - "learning_rate": 0.00019999745319382526, - "loss": 46.0, - "step": 29723 - }, - { - "epoch": 2.2726073742760478, - "grad_norm": 0.0006437920383177698, - "learning_rate": 0.00019999745302239603, - "loss": 46.0, - "step": 29724 - }, - { - "epoch": 2.2726838312594375, - "grad_norm": 0.0008031605393625796, - "learning_rate": 0.00019999745285096104, - "loss": 46.0, - "step": 29725 - }, - { - "epoch": 2.2727602882428273, - "grad_norm": 0.001416450017131865, - "learning_rate": 0.00019999745267952027, - "loss": 46.0, - "step": 29726 - }, - { - "epoch": 2.272836745226217, - "grad_norm": 0.0010032163700088859, - "learning_rate": 0.00019999745250807375, - "loss": 46.0, - "step": 29727 - }, - { - "epoch": 2.272913202209607, - "grad_norm": 0.000730924482923001, - "learning_rate": 0.00019999745233662144, - "loss": 46.0, - "step": 29728 - }, - { - "epoch": 2.2729896591929966, - "grad_norm": 0.003745750989764929, - "learning_rate": 0.0001999974521651634, - "loss": 46.0, - "step": 29729 - }, - { - "epoch": 2.2730661161763863, - "grad_norm": 0.0022143807727843523, - "learning_rate": 0.00019999745199369954, - "loss": 46.0, - "step": 29730 - }, - { - "epoch": 2.273142573159776, - "grad_norm": 0.0009323981939814985, - "learning_rate": 0.00019999745182222993, - "loss": 46.0, - "step": 29731 - }, - { - "epoch": 2.273219030143166, - "grad_norm": 0.0009212127770297229, - "learning_rate": 0.00019999745165075457, - "loss": 46.0, - "step": 29732 - }, - { - "epoch": 2.2732954871265556, - "grad_norm": 0.000590279814787209, - "learning_rate": 0.00019999745147927342, - "loss": 46.0, - "step": 29733 - }, - { - "epoch": 2.273371944109945, - "grad_norm": 0.0015829089097678661, - "learning_rate": 0.00019999745130778651, - "loss": 46.0, - "step": 29734 - }, - { - "epoch": 2.2734484010933347, - "grad_norm": 0.0006811293424107134, - "learning_rate": 0.0001999974511362938, - "loss": 46.0, - "step": 29735 - }, - { - "epoch": 2.2735248580767244, - "grad_norm": 0.0013754547107964754, - "learning_rate": 0.00019999745096479536, - "loss": 46.0, - "step": 29736 - }, - { - "epoch": 2.273601315060114, - "grad_norm": 0.0018407799070701003, - "learning_rate": 0.00019999745079329114, - "loss": 46.0, - "step": 29737 - }, - { - "epoch": 2.273677772043504, - "grad_norm": 0.001791534828953445, - "learning_rate": 0.00019999745062178117, - "loss": 46.0, - "step": 29738 - }, - { - "epoch": 2.2737542290268937, - "grad_norm": 0.0009379560360684991, - "learning_rate": 0.0001999974504502654, - "loss": 46.0, - "step": 29739 - }, - { - "epoch": 2.2738306860102835, - "grad_norm": 0.0031715580262243748, - "learning_rate": 0.0001999974502787439, - "loss": 46.0, - "step": 29740 - }, - { - "epoch": 2.2739071429936732, - "grad_norm": 0.001070274505764246, - "learning_rate": 0.00019999745010721654, - "loss": 46.0, - "step": 29741 - }, - { - "epoch": 2.273983599977063, - "grad_norm": 0.0007721201982349157, - "learning_rate": 0.0001999974499356835, - "loss": 46.0, - "step": 29742 - }, - { - "epoch": 2.2740600569604528, - "grad_norm": 0.0006021598819643259, - "learning_rate": 0.00019999744976414467, - "loss": 46.0, - "step": 29743 - }, - { - "epoch": 2.2741365139438425, - "grad_norm": 0.0005464857094921172, - "learning_rate": 0.00019999744959260006, - "loss": 46.0, - "step": 29744 - }, - { - "epoch": 2.2742129709272323, - "grad_norm": 0.0006201923824846745, - "learning_rate": 0.00019999744942104968, - "loss": 46.0, - "step": 29745 - }, - { - "epoch": 2.2742894279106216, - "grad_norm": 0.000782308226916939, - "learning_rate": 0.00019999744924949353, - "loss": 46.0, - "step": 29746 - }, - { - "epoch": 2.2743658848940114, - "grad_norm": 0.0025512855499982834, - "learning_rate": 0.00019999744907793162, - "loss": 46.0, - "step": 29747 - }, - { - "epoch": 2.274442341877401, - "grad_norm": 0.0008598458371125162, - "learning_rate": 0.00019999744890636395, - "loss": 46.0, - "step": 29748 - }, - { - "epoch": 2.274518798860791, - "grad_norm": 0.0007506250403821468, - "learning_rate": 0.00019999744873479047, - "loss": 46.0, - "step": 29749 - }, - { - "epoch": 2.2745952558441807, - "grad_norm": 0.0023013518657535315, - "learning_rate": 0.00019999744856321125, - "loss": 46.0, - "step": 29750 - }, - { - "epoch": 2.2746717128275704, - "grad_norm": 0.0005325338570401073, - "learning_rate": 0.00019999744839162625, - "loss": 46.0, - "step": 29751 - }, - { - "epoch": 2.27474816981096, - "grad_norm": 0.0015212559374049306, - "learning_rate": 0.0001999974482200355, - "loss": 46.0, - "step": 29752 - }, - { - "epoch": 2.27482462679435, - "grad_norm": 0.000996324117295444, - "learning_rate": 0.00019999744804843897, - "loss": 46.0, - "step": 29753 - }, - { - "epoch": 2.2749010837777397, - "grad_norm": 0.0012367131421342492, - "learning_rate": 0.00019999744787683668, - "loss": 46.0, - "step": 29754 - }, - { - "epoch": 2.2749775407611295, - "grad_norm": 0.0016539925709366798, - "learning_rate": 0.00019999744770522862, - "loss": 46.0, - "step": 29755 - }, - { - "epoch": 2.2750539977445188, - "grad_norm": 0.0005481173284351826, - "learning_rate": 0.00019999744753361478, - "loss": 46.0, - "step": 29756 - }, - { - "epoch": 2.2751304547279085, - "grad_norm": 0.0004594842903316021, - "learning_rate": 0.00019999744736199518, - "loss": 46.0, - "step": 29757 - }, - { - "epoch": 2.2752069117112983, - "grad_norm": 0.0009141605696640909, - "learning_rate": 0.0001999974471903698, - "loss": 46.0, - "step": 29758 - }, - { - "epoch": 2.275283368694688, - "grad_norm": 0.0008808863931335509, - "learning_rate": 0.00019999744701873864, - "loss": 46.0, - "step": 29759 - }, - { - "epoch": 2.275359825678078, - "grad_norm": 0.003569790394976735, - "learning_rate": 0.0001999974468471017, - "loss": 46.0, - "step": 29760 - }, - { - "epoch": 2.2754362826614676, - "grad_norm": 0.0015856667887419462, - "learning_rate": 0.00019999744667545903, - "loss": 46.0, - "step": 29761 - }, - { - "epoch": 2.2755127396448573, - "grad_norm": 0.0010833556298166513, - "learning_rate": 0.00019999744650381059, - "loss": 46.0, - "step": 29762 - }, - { - "epoch": 2.275589196628247, - "grad_norm": 0.0008403917890973389, - "learning_rate": 0.00019999744633215636, - "loss": 46.0, - "step": 29763 - }, - { - "epoch": 2.275665653611637, - "grad_norm": 0.0012686635600402951, - "learning_rate": 0.00019999744616049637, - "loss": 46.0, - "step": 29764 - }, - { - "epoch": 2.2757421105950266, - "grad_norm": 0.0013573673786595464, - "learning_rate": 0.0001999974459888306, - "loss": 46.0, - "step": 29765 - }, - { - "epoch": 2.2758185675784164, - "grad_norm": 0.006290150340646505, - "learning_rate": 0.00019999744581715906, - "loss": 46.0, - "step": 29766 - }, - { - "epoch": 2.275895024561806, - "grad_norm": 0.0023067155852913857, - "learning_rate": 0.00019999744564548174, - "loss": 46.0, - "step": 29767 - }, - { - "epoch": 2.2759714815451955, - "grad_norm": 0.000956893723923713, - "learning_rate": 0.00019999744547379868, - "loss": 46.0, - "step": 29768 - }, - { - "epoch": 2.2760479385285852, - "grad_norm": 0.0006701195961795747, - "learning_rate": 0.00019999744530210985, - "loss": 46.0, - "step": 29769 - }, - { - "epoch": 2.276124395511975, - "grad_norm": 0.0016100106295198202, - "learning_rate": 0.00019999744513041524, - "loss": 46.0, - "step": 29770 - }, - { - "epoch": 2.2762008524953647, - "grad_norm": 0.000908153597265482, - "learning_rate": 0.00019999744495871486, - "loss": 46.0, - "step": 29771 - }, - { - "epoch": 2.2762773094787545, - "grad_norm": 0.0010683792643249035, - "learning_rate": 0.00019999744478700873, - "loss": 46.0, - "step": 29772 - }, - { - "epoch": 2.2763537664621443, - "grad_norm": 0.0010068045230582356, - "learning_rate": 0.0001999974446152968, - "loss": 46.0, - "step": 29773 - }, - { - "epoch": 2.276430223445534, - "grad_norm": 0.0007091158186085522, - "learning_rate": 0.00019999744444357912, - "loss": 46.0, - "step": 29774 - }, - { - "epoch": 2.276506680428924, - "grad_norm": 0.0006944980123080313, - "learning_rate": 0.00019999744427185565, - "loss": 46.0, - "step": 29775 - }, - { - "epoch": 2.2765831374123136, - "grad_norm": 0.0012299803784117103, - "learning_rate": 0.00019999744410012645, - "loss": 46.0, - "step": 29776 - }, - { - "epoch": 2.276659594395703, - "grad_norm": 0.0010052475845441222, - "learning_rate": 0.00019999744392839143, - "loss": 46.0, - "step": 29777 - }, - { - "epoch": 2.2767360513790926, - "grad_norm": 0.000735848443582654, - "learning_rate": 0.0001999974437566507, - "loss": 46.0, - "step": 29778 - }, - { - "epoch": 2.2768125083624824, - "grad_norm": 0.0009324339334852993, - "learning_rate": 0.00019999744358490412, - "loss": 46.0, - "step": 29779 - }, - { - "epoch": 2.276888965345872, - "grad_norm": 0.0026561864651739597, - "learning_rate": 0.00019999744341315183, - "loss": 46.0, - "step": 29780 - }, - { - "epoch": 2.276965422329262, - "grad_norm": 0.0018196672899648547, - "learning_rate": 0.00019999744324139375, - "loss": 46.0, - "step": 29781 - }, - { - "epoch": 2.2770418793126517, - "grad_norm": 0.0010685429442673922, - "learning_rate": 0.00019999744306962994, - "loss": 46.0, - "step": 29782 - }, - { - "epoch": 2.2771183362960414, - "grad_norm": 0.0009594897856004536, - "learning_rate": 0.00019999744289786033, - "loss": 46.0, - "step": 29783 - }, - { - "epoch": 2.277194793279431, - "grad_norm": 0.0013892094139009714, - "learning_rate": 0.00019999744272608495, - "loss": 46.0, - "step": 29784 - }, - { - "epoch": 2.277271250262821, - "grad_norm": 0.0011564657324925065, - "learning_rate": 0.0001999974425543038, - "loss": 46.0, - "step": 29785 - }, - { - "epoch": 2.2773477072462107, - "grad_norm": 0.0018852401990443468, - "learning_rate": 0.00019999744238251687, - "loss": 46.0, - "step": 29786 - }, - { - "epoch": 2.2774241642296005, - "grad_norm": 0.0007022134959697723, - "learning_rate": 0.0001999974422107242, - "loss": 46.0, - "step": 29787 - }, - { - "epoch": 2.2775006212129902, - "grad_norm": 0.0003709019219968468, - "learning_rate": 0.00019999744203892572, - "loss": 46.0, - "step": 29788 - }, - { - "epoch": 2.27757707819638, - "grad_norm": 0.0013929371489211917, - "learning_rate": 0.0001999974418671215, - "loss": 46.0, - "step": 29789 - }, - { - "epoch": 2.2776535351797693, - "grad_norm": 0.007136743050068617, - "learning_rate": 0.00019999744169531153, - "loss": 46.0, - "step": 29790 - }, - { - "epoch": 2.277729992163159, - "grad_norm": 0.003079803427681327, - "learning_rate": 0.00019999744152349576, - "loss": 46.0, - "step": 29791 - }, - { - "epoch": 2.277806449146549, - "grad_norm": 0.00047877951874397695, - "learning_rate": 0.00019999744135167422, - "loss": 46.0, - "step": 29792 - }, - { - "epoch": 2.2778829061299386, - "grad_norm": 0.0004885787493549287, - "learning_rate": 0.0001999974411798469, - "loss": 46.0, - "step": 29793 - }, - { - "epoch": 2.2779593631133284, - "grad_norm": 0.001778580597601831, - "learning_rate": 0.00019999744100801384, - "loss": 46.0, - "step": 29794 - }, - { - "epoch": 2.278035820096718, - "grad_norm": 0.0012993629788979888, - "learning_rate": 0.000199997440836175, - "loss": 46.0, - "step": 29795 - }, - { - "epoch": 2.278112277080108, - "grad_norm": 0.0024956779088824987, - "learning_rate": 0.0001999974406643304, - "loss": 46.0, - "step": 29796 - }, - { - "epoch": 2.2781887340634976, - "grad_norm": 0.0010920710628852248, - "learning_rate": 0.00019999744049248, - "loss": 46.0, - "step": 29797 - }, - { - "epoch": 2.2782651910468874, - "grad_norm": 0.002022413071244955, - "learning_rate": 0.00019999744032062386, - "loss": 46.0, - "step": 29798 - }, - { - "epoch": 2.2783416480302767, - "grad_norm": 0.002443901728838682, - "learning_rate": 0.00019999744014876194, - "loss": 46.0, - "step": 29799 - }, - { - "epoch": 2.2784181050136665, - "grad_norm": 0.003246761392802, - "learning_rate": 0.00019999743997689424, - "loss": 46.0, - "step": 29800 - }, - { - "epoch": 2.2784945619970562, - "grad_norm": 0.0005772531148977578, - "learning_rate": 0.0001999974398050208, - "loss": 46.0, - "step": 29801 - }, - { - "epoch": 2.278571018980446, - "grad_norm": 0.0007120491936802864, - "learning_rate": 0.00019999743963314157, - "loss": 46.0, - "step": 29802 - }, - { - "epoch": 2.2786474759638358, - "grad_norm": 0.0008428823784925044, - "learning_rate": 0.00019999743946125657, - "loss": 46.0, - "step": 29803 - }, - { - "epoch": 2.2787239329472255, - "grad_norm": 0.00045496365055441856, - "learning_rate": 0.0001999974392893658, - "loss": 46.0, - "step": 29804 - }, - { - "epoch": 2.2788003899306153, - "grad_norm": 0.0004526723932940513, - "learning_rate": 0.00019999743911746926, - "loss": 46.0, - "step": 29805 - }, - { - "epoch": 2.278876846914005, - "grad_norm": 0.000582497101277113, - "learning_rate": 0.00019999743894556695, - "loss": 46.0, - "step": 29806 - }, - { - "epoch": 2.278953303897395, - "grad_norm": 0.0008856842177920043, - "learning_rate": 0.0001999974387736589, - "loss": 46.0, - "step": 29807 - }, - { - "epoch": 2.2790297608807846, - "grad_norm": 0.0016849333187565207, - "learning_rate": 0.00019999743860174505, - "loss": 46.0, - "step": 29808 - }, - { - "epoch": 2.2791062178641743, - "grad_norm": 0.0005244106287136674, - "learning_rate": 0.00019999743842982545, - "loss": 46.0, - "step": 29809 - }, - { - "epoch": 2.279182674847564, - "grad_norm": 0.0010295978281646967, - "learning_rate": 0.00019999743825790004, - "loss": 46.0, - "step": 29810 - }, - { - "epoch": 2.279259131830954, - "grad_norm": 0.004000594839453697, - "learning_rate": 0.0001999974380859689, - "loss": 46.0, - "step": 29811 - }, - { - "epoch": 2.279335588814343, - "grad_norm": 0.0008389134891331196, - "learning_rate": 0.00019999743791403198, - "loss": 46.0, - "step": 29812 - }, - { - "epoch": 2.279412045797733, - "grad_norm": 0.00045328133273869753, - "learning_rate": 0.00019999743774208928, - "loss": 46.0, - "step": 29813 - }, - { - "epoch": 2.2794885027811227, - "grad_norm": 0.0017156020039692521, - "learning_rate": 0.0001999974375701408, - "loss": 46.0, - "step": 29814 - }, - { - "epoch": 2.2795649597645125, - "grad_norm": 0.0011768981348723173, - "learning_rate": 0.0001999974373981866, - "loss": 46.0, - "step": 29815 - }, - { - "epoch": 2.279641416747902, - "grad_norm": 0.0009523766930215061, - "learning_rate": 0.00019999743722622662, - "loss": 46.0, - "step": 29816 - }, - { - "epoch": 2.279717873731292, - "grad_norm": 0.0010894424049183726, - "learning_rate": 0.00019999743705426083, - "loss": 46.0, - "step": 29817 - }, - { - "epoch": 2.2797943307146817, - "grad_norm": 0.001336152316071093, - "learning_rate": 0.00019999743688228931, - "loss": 46.0, - "step": 29818 - }, - { - "epoch": 2.2798707876980715, - "grad_norm": 0.0010350003140047193, - "learning_rate": 0.000199997436710312, - "loss": 46.0, - "step": 29819 - }, - { - "epoch": 2.2799472446814613, - "grad_norm": 0.0022319364361464977, - "learning_rate": 0.00019999743653832894, - "loss": 46.0, - "step": 29820 - }, - { - "epoch": 2.2800237016648506, - "grad_norm": 0.0007240134291350842, - "learning_rate": 0.00019999743636634008, - "loss": 46.0, - "step": 29821 - }, - { - "epoch": 2.2801001586482403, - "grad_norm": 0.0012203151127323508, - "learning_rate": 0.00019999743619434547, - "loss": 46.0, - "step": 29822 - }, - { - "epoch": 2.28017661563163, - "grad_norm": 0.0013332647504284978, - "learning_rate": 0.0001999974360223451, - "loss": 46.0, - "step": 29823 - }, - { - "epoch": 2.28025307261502, - "grad_norm": 0.003884687786921859, - "learning_rate": 0.00019999743585033894, - "loss": 46.0, - "step": 29824 - }, - { - "epoch": 2.2803295295984096, - "grad_norm": 0.0010043522343039513, - "learning_rate": 0.000199997435678327, - "loss": 46.0, - "step": 29825 - }, - { - "epoch": 2.2804059865817994, - "grad_norm": 0.0025026064831763506, - "learning_rate": 0.0001999974355063093, - "loss": 46.0, - "step": 29826 - }, - { - "epoch": 2.280482443565189, - "grad_norm": 0.000705933605786413, - "learning_rate": 0.00019999743533428586, - "loss": 46.0, - "step": 29827 - }, - { - "epoch": 2.280558900548579, - "grad_norm": 0.0018747919239103794, - "learning_rate": 0.00019999743516225664, - "loss": 46.0, - "step": 29828 - }, - { - "epoch": 2.2806353575319687, - "grad_norm": 0.00440255319699645, - "learning_rate": 0.00019999743499022165, - "loss": 46.0, - "step": 29829 - }, - { - "epoch": 2.2807118145153584, - "grad_norm": 0.0008105953456833959, - "learning_rate": 0.00019999743481818088, - "loss": 46.0, - "step": 29830 - }, - { - "epoch": 2.280788271498748, - "grad_norm": 0.0013962170341983438, - "learning_rate": 0.00019999743464613434, - "loss": 46.0, - "step": 29831 - }, - { - "epoch": 2.280864728482138, - "grad_norm": 0.000740594114176929, - "learning_rate": 0.00019999743447408203, - "loss": 46.0, - "step": 29832 - }, - { - "epoch": 2.2809411854655273, - "grad_norm": 0.0007576592615805566, - "learning_rate": 0.00019999743430202397, - "loss": 46.0, - "step": 29833 - }, - { - "epoch": 2.281017642448917, - "grad_norm": 0.0009967153891921043, - "learning_rate": 0.0001999974341299601, - "loss": 46.0, - "step": 29834 - }, - { - "epoch": 2.281094099432307, - "grad_norm": 0.0013766296906396747, - "learning_rate": 0.0001999974339578905, - "loss": 46.0, - "step": 29835 - }, - { - "epoch": 2.2811705564156965, - "grad_norm": 0.001410242053680122, - "learning_rate": 0.00019999743378581512, - "loss": 46.0, - "step": 29836 - }, - { - "epoch": 2.2812470133990863, - "grad_norm": 0.0006855498650111258, - "learning_rate": 0.00019999743361373397, - "loss": 46.0, - "step": 29837 - }, - { - "epoch": 2.281323470382476, - "grad_norm": 0.00066139898262918, - "learning_rate": 0.00019999743344164707, - "loss": 46.0, - "step": 29838 - }, - { - "epoch": 2.281399927365866, - "grad_norm": 0.0007244086591526866, - "learning_rate": 0.00019999743326955437, - "loss": 46.0, - "step": 29839 - }, - { - "epoch": 2.2814763843492556, - "grad_norm": 0.0006686818669550121, - "learning_rate": 0.0001999974330974559, - "loss": 46.0, - "step": 29840 - }, - { - "epoch": 2.2815528413326454, - "grad_norm": 0.00043235751218162477, - "learning_rate": 0.00019999743292535164, - "loss": 46.0, - "step": 29841 - }, - { - "epoch": 2.281629298316035, - "grad_norm": 0.0006182743818499148, - "learning_rate": 0.00019999743275324165, - "loss": 46.0, - "step": 29842 - }, - { - "epoch": 2.2817057552994244, - "grad_norm": 0.001644460717216134, - "learning_rate": 0.00019999743258112588, - "loss": 46.0, - "step": 29843 - }, - { - "epoch": 2.281782212282814, - "grad_norm": 0.002482478739693761, - "learning_rate": 0.00019999743240900434, - "loss": 46.0, - "step": 29844 - }, - { - "epoch": 2.281858669266204, - "grad_norm": 0.002156356815248728, - "learning_rate": 0.00019999743223687703, - "loss": 46.0, - "step": 29845 - }, - { - "epoch": 2.2819351262495937, - "grad_norm": 0.0010309579083696008, - "learning_rate": 0.00019999743206474397, - "loss": 46.0, - "step": 29846 - }, - { - "epoch": 2.2820115832329835, - "grad_norm": 0.0010222181444987655, - "learning_rate": 0.00019999743189260514, - "loss": 46.0, - "step": 29847 - }, - { - "epoch": 2.2820880402163732, - "grad_norm": 0.0021566657815128565, - "learning_rate": 0.0001999974317204605, - "loss": 46.0, - "step": 29848 - }, - { - "epoch": 2.282164497199763, - "grad_norm": 0.002492171246558428, - "learning_rate": 0.00019999743154831012, - "loss": 46.0, - "step": 29849 - }, - { - "epoch": 2.2822409541831528, - "grad_norm": 0.0013407922815531492, - "learning_rate": 0.000199997431376154, - "loss": 46.0, - "step": 29850 - }, - { - "epoch": 2.2823174111665425, - "grad_norm": 0.0008965315064415336, - "learning_rate": 0.00019999743120399207, - "loss": 46.0, - "step": 29851 - }, - { - "epoch": 2.2823938681499323, - "grad_norm": 0.002933842595666647, - "learning_rate": 0.00019999743103182434, - "loss": 46.0, - "step": 29852 - }, - { - "epoch": 2.282470325133322, - "grad_norm": 0.0007745855255052447, - "learning_rate": 0.00019999743085965087, - "loss": 46.0, - "step": 29853 - }, - { - "epoch": 2.282546782116712, - "grad_norm": 0.00517926923930645, - "learning_rate": 0.00019999743068747168, - "loss": 46.0, - "step": 29854 - }, - { - "epoch": 2.282623239100101, - "grad_norm": 0.0013732551597058773, - "learning_rate": 0.00019999743051528666, - "loss": 46.0, - "step": 29855 - }, - { - "epoch": 2.282699696083491, - "grad_norm": 0.0012999350437894464, - "learning_rate": 0.0001999974303430959, - "loss": 46.0, - "step": 29856 - }, - { - "epoch": 2.2827761530668806, - "grad_norm": 0.0008606440969742835, - "learning_rate": 0.00019999743017089935, - "loss": 46.0, - "step": 29857 - }, - { - "epoch": 2.2828526100502704, - "grad_norm": 0.0005423149559646845, - "learning_rate": 0.00019999742999869704, - "loss": 46.0, - "step": 29858 - }, - { - "epoch": 2.28292906703366, - "grad_norm": 0.0017095900839194655, - "learning_rate": 0.00019999742982648898, - "loss": 46.0, - "step": 29859 - }, - { - "epoch": 2.28300552401705, - "grad_norm": 0.0012269478756934404, - "learning_rate": 0.00019999742965427512, - "loss": 46.0, - "step": 29860 - }, - { - "epoch": 2.2830819810004397, - "grad_norm": 0.0009281620150431991, - "learning_rate": 0.0001999974294820555, - "loss": 46.0, - "step": 29861 - }, - { - "epoch": 2.2831584379838294, - "grad_norm": 0.000761642586439848, - "learning_rate": 0.0001999974293098301, - "loss": 46.0, - "step": 29862 - }, - { - "epoch": 2.283234894967219, - "grad_norm": 0.0007000572513788939, - "learning_rate": 0.00019999742913759895, - "loss": 46.0, - "step": 29863 - }, - { - "epoch": 2.283311351950609, - "grad_norm": 0.0002810922160279006, - "learning_rate": 0.00019999742896536205, - "loss": 46.0, - "step": 29864 - }, - { - "epoch": 2.2833878089339983, - "grad_norm": 0.0009073476539924741, - "learning_rate": 0.00019999742879311935, - "loss": 46.0, - "step": 29865 - }, - { - "epoch": 2.283464265917388, - "grad_norm": 0.0023306068032979965, - "learning_rate": 0.00019999742862087088, - "loss": 46.0, - "step": 29866 - }, - { - "epoch": 2.283540722900778, - "grad_norm": 0.0026636889670044184, - "learning_rate": 0.00019999742844861666, - "loss": 46.0, - "step": 29867 - }, - { - "epoch": 2.2836171798841676, - "grad_norm": 0.0037422257009893656, - "learning_rate": 0.00019999742827635667, - "loss": 46.0, - "step": 29868 - }, - { - "epoch": 2.2836936368675573, - "grad_norm": 0.0008414930780418217, - "learning_rate": 0.00019999742810409088, - "loss": 46.0, - "step": 29869 - }, - { - "epoch": 2.283770093850947, - "grad_norm": 0.0005929945036768913, - "learning_rate": 0.00019999742793181934, - "loss": 46.0, - "step": 29870 - }, - { - "epoch": 2.283846550834337, - "grad_norm": 0.0012171786511316895, - "learning_rate": 0.00019999742775954205, - "loss": 46.0, - "step": 29871 - }, - { - "epoch": 2.2839230078177266, - "grad_norm": 0.0004863725043833256, - "learning_rate": 0.00019999742758725896, - "loss": 46.0, - "step": 29872 - }, - { - "epoch": 2.2839994648011164, - "grad_norm": 0.0014412231976166368, - "learning_rate": 0.0001999974274149701, - "loss": 46.0, - "step": 29873 - }, - { - "epoch": 2.284075921784506, - "grad_norm": 0.001976309111341834, - "learning_rate": 0.0001999974272426755, - "loss": 46.0, - "step": 29874 - }, - { - "epoch": 2.284152378767896, - "grad_norm": 0.0014832784654572606, - "learning_rate": 0.0001999974270703751, - "loss": 46.0, - "step": 29875 - }, - { - "epoch": 2.2842288357512857, - "grad_norm": 0.0011276789009571075, - "learning_rate": 0.00019999742689806894, - "loss": 46.0, - "step": 29876 - }, - { - "epoch": 2.284305292734675, - "grad_norm": 0.0005336818867363036, - "learning_rate": 0.00019999742672575704, - "loss": 46.0, - "step": 29877 - }, - { - "epoch": 2.2843817497180647, - "grad_norm": 0.0009416636312380433, - "learning_rate": 0.00019999742655343934, - "loss": 46.0, - "step": 29878 - }, - { - "epoch": 2.2844582067014545, - "grad_norm": 0.0009630148415453732, - "learning_rate": 0.00019999742638111587, - "loss": 46.0, - "step": 29879 - }, - { - "epoch": 2.2845346636848443, - "grad_norm": 0.0008959217229858041, - "learning_rate": 0.00019999742620878665, - "loss": 46.0, - "step": 29880 - }, - { - "epoch": 2.284611120668234, - "grad_norm": 0.0006849021883681417, - "learning_rate": 0.00019999742603645163, - "loss": 46.0, - "step": 29881 - }, - { - "epoch": 2.284687577651624, - "grad_norm": 0.0010131739545613527, - "learning_rate": 0.00019999742586411087, - "loss": 46.0, - "step": 29882 - }, - { - "epoch": 2.2847640346350135, - "grad_norm": 0.001503929146565497, - "learning_rate": 0.00019999742569176433, - "loss": 46.0, - "step": 29883 - }, - { - "epoch": 2.2848404916184033, - "grad_norm": 0.0009931280510500073, - "learning_rate": 0.000199997425519412, - "loss": 46.0, - "step": 29884 - }, - { - "epoch": 2.284916948601793, - "grad_norm": 0.0009015887626446784, - "learning_rate": 0.00019999742534705393, - "loss": 46.0, - "step": 29885 - }, - { - "epoch": 2.284993405585183, - "grad_norm": 0.0009522573673166335, - "learning_rate": 0.0001999974251746901, - "loss": 46.0, - "step": 29886 - }, - { - "epoch": 2.285069862568572, - "grad_norm": 0.0006824164884164929, - "learning_rate": 0.0001999974250023205, - "loss": 46.0, - "step": 29887 - }, - { - "epoch": 2.285146319551962, - "grad_norm": 0.0007158354856073856, - "learning_rate": 0.00019999742482994508, - "loss": 46.0, - "step": 29888 - }, - { - "epoch": 2.2852227765353517, - "grad_norm": 0.0004895995953120291, - "learning_rate": 0.00019999742465756393, - "loss": 46.0, - "step": 29889 - }, - { - "epoch": 2.2852992335187414, - "grad_norm": 0.008395830169320107, - "learning_rate": 0.000199997424485177, - "loss": 46.0, - "step": 29890 - }, - { - "epoch": 2.285375690502131, - "grad_norm": 0.000889509916305542, - "learning_rate": 0.00019999742431278433, - "loss": 46.0, - "step": 29891 - }, - { - "epoch": 2.285452147485521, - "grad_norm": 0.0013936301693320274, - "learning_rate": 0.00019999742414038583, - "loss": 46.0, - "step": 29892 - }, - { - "epoch": 2.2855286044689107, - "grad_norm": 0.0006106882356107235, - "learning_rate": 0.0001999974239679816, - "loss": 46.0, - "step": 29893 - }, - { - "epoch": 2.2856050614523005, - "grad_norm": 0.006154028233140707, - "learning_rate": 0.0001999974237955716, - "loss": 46.0, - "step": 29894 - }, - { - "epoch": 2.2856815184356902, - "grad_norm": 0.0005006272112950683, - "learning_rate": 0.00019999742362315583, - "loss": 46.0, - "step": 29895 - }, - { - "epoch": 2.28575797541908, - "grad_norm": 0.0012299169320613146, - "learning_rate": 0.0001999974234507343, - "loss": 46.0, - "step": 29896 - }, - { - "epoch": 2.2858344324024698, - "grad_norm": 0.001086096977815032, - "learning_rate": 0.00019999742327830698, - "loss": 46.0, - "step": 29897 - }, - { - "epoch": 2.2859108893858595, - "grad_norm": 0.0012332922779023647, - "learning_rate": 0.0001999974231058739, - "loss": 46.0, - "step": 29898 - }, - { - "epoch": 2.285987346369249, - "grad_norm": 0.0012582505587488413, - "learning_rate": 0.00019999742293343506, - "loss": 46.0, - "step": 29899 - }, - { - "epoch": 2.2860638033526386, - "grad_norm": 0.0005635573179461062, - "learning_rate": 0.00019999742276099043, - "loss": 46.0, - "step": 29900 - }, - { - "epoch": 2.2861402603360284, - "grad_norm": 0.0013460407499223948, - "learning_rate": 0.00019999742258854005, - "loss": 46.0, - "step": 29901 - }, - { - "epoch": 2.286216717319418, - "grad_norm": 0.00185186299495399, - "learning_rate": 0.00019999742241608387, - "loss": 46.0, - "step": 29902 - }, - { - "epoch": 2.286293174302808, - "grad_norm": 0.0011935721850022674, - "learning_rate": 0.00019999742224362197, - "loss": 46.0, - "step": 29903 - }, - { - "epoch": 2.2863696312861976, - "grad_norm": 0.0013290493516251445, - "learning_rate": 0.00019999742207115425, - "loss": 46.0, - "step": 29904 - }, - { - "epoch": 2.2864460882695874, - "grad_norm": 0.0009953428525477648, - "learning_rate": 0.00019999742189868078, - "loss": 46.0, - "step": 29905 - }, - { - "epoch": 2.286522545252977, - "grad_norm": 0.0010958951897919178, - "learning_rate": 0.00019999742172620156, - "loss": 46.0, - "step": 29906 - }, - { - "epoch": 2.286599002236367, - "grad_norm": 0.00038658553967252374, - "learning_rate": 0.00019999742155371654, - "loss": 46.0, - "step": 29907 - }, - { - "epoch": 2.2866754592197567, - "grad_norm": 0.003159634303301573, - "learning_rate": 0.00019999742138122577, - "loss": 46.0, - "step": 29908 - }, - { - "epoch": 2.286751916203146, - "grad_norm": 0.0006177702452987432, - "learning_rate": 0.00019999742120872923, - "loss": 46.0, - "step": 29909 - }, - { - "epoch": 2.2868283731865358, - "grad_norm": 0.000541712564881891, - "learning_rate": 0.00019999742103622695, - "loss": 46.0, - "step": 29910 - }, - { - "epoch": 2.2869048301699255, - "grad_norm": 0.0014085343573242426, - "learning_rate": 0.00019999742086371884, - "loss": 46.0, - "step": 29911 - }, - { - "epoch": 2.2869812871533153, - "grad_norm": 0.0007358420989476144, - "learning_rate": 0.000199997420691205, - "loss": 46.0, - "step": 29912 - }, - { - "epoch": 2.287057744136705, - "grad_norm": 0.005819522775709629, - "learning_rate": 0.00019999742051868537, - "loss": 46.0, - "step": 29913 - }, - { - "epoch": 2.287134201120095, - "grad_norm": 0.00030125753255560994, - "learning_rate": 0.00019999742034616, - "loss": 46.0, - "step": 29914 - }, - { - "epoch": 2.2872106581034846, - "grad_norm": 0.002479326678439975, - "learning_rate": 0.00019999742017362884, - "loss": 46.0, - "step": 29915 - }, - { - "epoch": 2.2872871150868743, - "grad_norm": 0.0004563711991067976, - "learning_rate": 0.0001999974200010919, - "loss": 46.0, - "step": 29916 - }, - { - "epoch": 2.287363572070264, - "grad_norm": 0.0006198675255291164, - "learning_rate": 0.0001999974198285492, - "loss": 46.0, - "step": 29917 - }, - { - "epoch": 2.287440029053654, - "grad_norm": 0.0020556203089654446, - "learning_rate": 0.00019999741965600075, - "loss": 46.0, - "step": 29918 - }, - { - "epoch": 2.2875164860370436, - "grad_norm": 0.004435187671333551, - "learning_rate": 0.0001999974194834465, - "loss": 46.0, - "step": 29919 - }, - { - "epoch": 2.2875929430204334, - "grad_norm": 0.0008035156643018126, - "learning_rate": 0.00019999741931088648, - "loss": 46.0, - "step": 29920 - }, - { - "epoch": 2.2876694000038227, - "grad_norm": 0.0015783481067046523, - "learning_rate": 0.00019999741913832072, - "loss": 46.0, - "step": 29921 - }, - { - "epoch": 2.2877458569872124, - "grad_norm": 0.0007616088259965181, - "learning_rate": 0.00019999741896574918, - "loss": 46.0, - "step": 29922 - }, - { - "epoch": 2.287822313970602, - "grad_norm": 0.0054249633103609085, - "learning_rate": 0.00019999741879317187, - "loss": 46.0, - "step": 29923 - }, - { - "epoch": 2.287898770953992, - "grad_norm": 0.0009115795837715268, - "learning_rate": 0.00019999741862058879, - "loss": 46.0, - "step": 29924 - }, - { - "epoch": 2.2879752279373817, - "grad_norm": 0.0009051746455952525, - "learning_rate": 0.00019999741844799993, - "loss": 46.0, - "step": 29925 - }, - { - "epoch": 2.2880516849207715, - "grad_norm": 0.0028160931542515755, - "learning_rate": 0.00019999741827540532, - "loss": 46.0, - "step": 29926 - }, - { - "epoch": 2.2881281419041613, - "grad_norm": 0.0008904538699425757, - "learning_rate": 0.00019999741810280492, - "loss": 46.0, - "step": 29927 - }, - { - "epoch": 2.288204598887551, - "grad_norm": 0.0006686669075861573, - "learning_rate": 0.00019999741793019874, - "loss": 46.0, - "step": 29928 - }, - { - "epoch": 2.2882810558709408, - "grad_norm": 0.0006663257372565567, - "learning_rate": 0.00019999741775758681, - "loss": 46.0, - "step": 29929 - }, - { - "epoch": 2.28835751285433, - "grad_norm": 0.0008810656727291644, - "learning_rate": 0.00019999741758496912, - "loss": 46.0, - "step": 29930 - }, - { - "epoch": 2.28843396983772, - "grad_norm": 0.0005971870268695056, - "learning_rate": 0.00019999741741234564, - "loss": 46.0, - "step": 29931 - }, - { - "epoch": 2.2885104268211096, - "grad_norm": 0.0008884619455784559, - "learning_rate": 0.00019999741723971643, - "loss": 46.0, - "step": 29932 - }, - { - "epoch": 2.2885868838044994, - "grad_norm": 0.0011491224868223071, - "learning_rate": 0.00019999741706708144, - "loss": 46.0, - "step": 29933 - }, - { - "epoch": 2.288663340787889, - "grad_norm": 0.001505630207248032, - "learning_rate": 0.00019999741689444064, - "loss": 46.0, - "step": 29934 - }, - { - "epoch": 2.288739797771279, - "grad_norm": 0.0007792623364366591, - "learning_rate": 0.0001999974167217941, - "loss": 46.0, - "step": 29935 - }, - { - "epoch": 2.2888162547546687, - "grad_norm": 0.0008154707029461861, - "learning_rate": 0.0001999974165491418, - "loss": 46.0, - "step": 29936 - }, - { - "epoch": 2.2888927117380584, - "grad_norm": 0.000983793055638671, - "learning_rate": 0.00019999741637648374, - "loss": 46.0, - "step": 29937 - }, - { - "epoch": 2.288969168721448, - "grad_norm": 0.0006218686467036605, - "learning_rate": 0.00019999741620381985, - "loss": 46.0, - "step": 29938 - }, - { - "epoch": 2.289045625704838, - "grad_norm": 0.0004598811501637101, - "learning_rate": 0.00019999741603115022, - "loss": 46.0, - "step": 29939 - }, - { - "epoch": 2.2891220826882277, - "grad_norm": 0.0019994915928691626, - "learning_rate": 0.00019999741585847484, - "loss": 46.0, - "step": 29940 - }, - { - "epoch": 2.2891985396716175, - "grad_norm": 0.0004396119329612702, - "learning_rate": 0.0001999974156857937, - "loss": 46.0, - "step": 29941 - }, - { - "epoch": 2.289274996655007, - "grad_norm": 0.0009876680560410023, - "learning_rate": 0.00019999741551310677, - "loss": 46.0, - "step": 29942 - }, - { - "epoch": 2.2893514536383965, - "grad_norm": 0.0024848051834851503, - "learning_rate": 0.00019999741534041404, - "loss": 46.0, - "step": 29943 - }, - { - "epoch": 2.2894279106217863, - "grad_norm": 0.0008179487776942551, - "learning_rate": 0.00019999741516771557, - "loss": 46.0, - "step": 29944 - }, - { - "epoch": 2.289504367605176, - "grad_norm": 0.001497873105108738, - "learning_rate": 0.00019999741499501138, - "loss": 46.0, - "step": 29945 - }, - { - "epoch": 2.289580824588566, - "grad_norm": 0.0014988635666668415, - "learning_rate": 0.00019999741482230134, - "loss": 46.0, - "step": 29946 - }, - { - "epoch": 2.2896572815719556, - "grad_norm": 0.0013089224230498075, - "learning_rate": 0.00019999741464958557, - "loss": 46.0, - "step": 29947 - }, - { - "epoch": 2.2897337385553453, - "grad_norm": 0.0023359693586826324, - "learning_rate": 0.000199997414476864, - "loss": 46.0, - "step": 29948 - }, - { - "epoch": 2.289810195538735, - "grad_norm": 0.002481769537553191, - "learning_rate": 0.00019999741430413672, - "loss": 46.0, - "step": 29949 - }, - { - "epoch": 2.289886652522125, - "grad_norm": 0.0012910236837342381, - "learning_rate": 0.00019999741413140364, - "loss": 46.0, - "step": 29950 - }, - { - "epoch": 2.2899631095055146, - "grad_norm": 0.0023895101621747017, - "learning_rate": 0.00019999741395866475, - "loss": 46.0, - "step": 29951 - }, - { - "epoch": 2.290039566488904, - "grad_norm": 0.0013047148240730166, - "learning_rate": 0.00019999741378592015, - "loss": 46.0, - "step": 29952 - }, - { - "epoch": 2.2901160234722937, - "grad_norm": 0.002602041931822896, - "learning_rate": 0.00019999741361316975, - "loss": 46.0, - "step": 29953 - }, - { - "epoch": 2.2901924804556835, - "grad_norm": 0.0013167491415515542, - "learning_rate": 0.0001999974134404136, - "loss": 46.0, - "step": 29954 - }, - { - "epoch": 2.2902689374390732, - "grad_norm": 0.003955171909183264, - "learning_rate": 0.00019999741326765167, - "loss": 46.0, - "step": 29955 - }, - { - "epoch": 2.290345394422463, - "grad_norm": 0.0011904009152203798, - "learning_rate": 0.00019999741309488397, - "loss": 46.0, - "step": 29956 - }, - { - "epoch": 2.2904218514058527, - "grad_norm": 0.00259603769518435, - "learning_rate": 0.0001999974129221105, - "loss": 46.0, - "step": 29957 - }, - { - "epoch": 2.2904983083892425, - "grad_norm": 0.0013402254553511739, - "learning_rate": 0.00019999741274933126, - "loss": 46.0, - "step": 29958 - }, - { - "epoch": 2.2905747653726323, - "grad_norm": 0.0010348311625421047, - "learning_rate": 0.00019999741257654624, - "loss": 46.0, - "step": 29959 - }, - { - "epoch": 2.290651222356022, - "grad_norm": 0.0031335444655269384, - "learning_rate": 0.00019999741240375548, - "loss": 46.0, - "step": 29960 - }, - { - "epoch": 2.290727679339412, - "grad_norm": 0.0008222724427469075, - "learning_rate": 0.0001999974122309589, - "loss": 46.0, - "step": 29961 - }, - { - "epoch": 2.2908041363228016, - "grad_norm": 0.0014036670327186584, - "learning_rate": 0.0001999974120581566, - "loss": 46.0, - "step": 29962 - }, - { - "epoch": 2.2908805933061913, - "grad_norm": 0.0012424660380929708, - "learning_rate": 0.00019999741188534852, - "loss": 46.0, - "step": 29963 - }, - { - "epoch": 2.2909570502895806, - "grad_norm": 0.0011386418482288718, - "learning_rate": 0.0001999974117125347, - "loss": 46.0, - "step": 29964 - }, - { - "epoch": 2.2910335072729704, - "grad_norm": 0.0006336446385830641, - "learning_rate": 0.00019999741153971506, - "loss": 46.0, - "step": 29965 - }, - { - "epoch": 2.29110996425636, - "grad_norm": 0.0011672864202409983, - "learning_rate": 0.00019999741136688965, - "loss": 46.0, - "step": 29966 - }, - { - "epoch": 2.29118642123975, - "grad_norm": 0.00043369957711547613, - "learning_rate": 0.00019999741119405847, - "loss": 46.0, - "step": 29967 - }, - { - "epoch": 2.2912628782231397, - "grad_norm": 0.0011581830913200974, - "learning_rate": 0.00019999741102122155, - "loss": 46.0, - "step": 29968 - }, - { - "epoch": 2.2913393352065294, - "grad_norm": 0.0007346579222939909, - "learning_rate": 0.00019999741084837885, - "loss": 46.0, - "step": 29969 - }, - { - "epoch": 2.291415792189919, - "grad_norm": 0.0009424564777873456, - "learning_rate": 0.00019999741067553038, - "loss": 46.0, - "step": 29970 - }, - { - "epoch": 2.291492249173309, - "grad_norm": 0.0012498552678152919, - "learning_rate": 0.00019999741050267617, - "loss": 46.0, - "step": 29971 - }, - { - "epoch": 2.2915687061566987, - "grad_norm": 0.0029973573982715607, - "learning_rate": 0.00019999741032981615, - "loss": 46.0, - "step": 29972 - }, - { - "epoch": 2.2916451631400885, - "grad_norm": 0.0024053265806287527, - "learning_rate": 0.00019999741015695036, - "loss": 46.0, - "step": 29973 - }, - { - "epoch": 2.291721620123478, - "grad_norm": 0.0004428261308930814, - "learning_rate": 0.00019999740998407882, - "loss": 46.0, - "step": 29974 - }, - { - "epoch": 2.2917980771068676, - "grad_norm": 0.0013902232749387622, - "learning_rate": 0.0001999974098112015, - "loss": 46.0, - "step": 29975 - }, - { - "epoch": 2.2918745340902573, - "grad_norm": 0.002455491106957197, - "learning_rate": 0.0001999974096383184, - "loss": 46.0, - "step": 29976 - }, - { - "epoch": 2.291950991073647, - "grad_norm": 0.0016400108579546213, - "learning_rate": 0.00019999740946542954, - "loss": 46.0, - "step": 29977 - }, - { - "epoch": 2.292027448057037, - "grad_norm": 0.0013364371843636036, - "learning_rate": 0.0001999974092925349, - "loss": 46.0, - "step": 29978 - }, - { - "epoch": 2.2921039050404266, - "grad_norm": 0.0009853814262896776, - "learning_rate": 0.00019999740911963454, - "loss": 46.0, - "step": 29979 - }, - { - "epoch": 2.2921803620238164, - "grad_norm": 0.002387967659160495, - "learning_rate": 0.00019999740894672839, - "loss": 46.0, - "step": 29980 - }, - { - "epoch": 2.292256819007206, - "grad_norm": 0.0014814838068559766, - "learning_rate": 0.00019999740877381643, - "loss": 46.0, - "step": 29981 - }, - { - "epoch": 2.292333275990596, - "grad_norm": 0.008097806014120579, - "learning_rate": 0.00019999740860089874, - "loss": 46.0, - "step": 29982 - }, - { - "epoch": 2.2924097329739856, - "grad_norm": 0.0017949416069313884, - "learning_rate": 0.00019999740842797527, - "loss": 46.0, - "step": 29983 - }, - { - "epoch": 2.2924861899573754, - "grad_norm": 0.0006521407631225884, - "learning_rate": 0.00019999740825504602, - "loss": 46.0, - "step": 29984 - }, - { - "epoch": 2.292562646940765, - "grad_norm": 0.0005579882999882102, - "learning_rate": 0.00019999740808211103, - "loss": 46.0, - "step": 29985 - }, - { - "epoch": 2.2926391039241545, - "grad_norm": 0.0008918361854739487, - "learning_rate": 0.00019999740790917024, - "loss": 46.0, - "step": 29986 - }, - { - "epoch": 2.2927155609075442, - "grad_norm": 0.0007091266452334821, - "learning_rate": 0.0001999974077362237, - "loss": 46.0, - "step": 29987 - }, - { - "epoch": 2.292792017890934, - "grad_norm": 0.0009522182517684996, - "learning_rate": 0.0001999974075632714, - "loss": 46.0, - "step": 29988 - }, - { - "epoch": 2.2928684748743238, - "grad_norm": 0.00140350719448179, - "learning_rate": 0.0001999974073903133, - "loss": 46.0, - "step": 29989 - }, - { - "epoch": 2.2929449318577135, - "grad_norm": 0.0009052487439475954, - "learning_rate": 0.00019999740721734943, - "loss": 46.0, - "step": 29990 - }, - { - "epoch": 2.2930213888411033, - "grad_norm": 0.00048189322114922106, - "learning_rate": 0.0001999974070443798, - "loss": 46.0, - "step": 29991 - }, - { - "epoch": 2.293097845824493, - "grad_norm": 0.0009244322427548468, - "learning_rate": 0.00019999740687140442, - "loss": 46.0, - "step": 29992 - }, - { - "epoch": 2.293174302807883, - "grad_norm": 0.0008392860763706267, - "learning_rate": 0.00019999740669842325, - "loss": 46.0, - "step": 29993 - }, - { - "epoch": 2.2932507597912726, - "grad_norm": 0.0004440682241693139, - "learning_rate": 0.00019999740652543632, - "loss": 46.0, - "step": 29994 - }, - { - "epoch": 2.2933272167746623, - "grad_norm": 0.0014964850852265954, - "learning_rate": 0.0001999974063524436, - "loss": 46.0, - "step": 29995 - }, - { - "epoch": 2.2934036737580517, - "grad_norm": 0.0008686116780154407, - "learning_rate": 0.00019999740617944513, - "loss": 46.0, - "step": 29996 - }, - { - "epoch": 2.2934801307414414, - "grad_norm": 0.0015177861787378788, - "learning_rate": 0.0001999974060064409, - "loss": 46.0, - "step": 29997 - }, - { - "epoch": 2.293556587724831, - "grad_norm": 0.0005570792127400637, - "learning_rate": 0.0001999974058334309, - "loss": 46.0, - "step": 29998 - }, - { - "epoch": 2.293633044708221, - "grad_norm": 0.0007198775419965386, - "learning_rate": 0.0001999974056604151, - "loss": 46.0, - "step": 29999 - }, - { - "epoch": 2.2937095016916107, - "grad_norm": 0.000669146073050797, - "learning_rate": 0.00019999740548739357, - "loss": 46.0, - "step": 30000 - }, - { - "epoch": 2.2937859586750005, - "grad_norm": 0.0008395511540584266, - "learning_rate": 0.00019999740531436623, - "loss": 46.0, - "step": 30001 - }, - { - "epoch": 2.29386241565839, - "grad_norm": 0.0005476570804603398, - "learning_rate": 0.00019999740514133315, - "loss": 46.0, - "step": 30002 - }, - { - "epoch": 2.29393887264178, - "grad_norm": 0.0007812397088855505, - "learning_rate": 0.00019999740496829432, - "loss": 46.0, - "step": 30003 - }, - { - "epoch": 2.2940153296251697, - "grad_norm": 0.001501706661656499, - "learning_rate": 0.00019999740479524966, - "loss": 46.0, - "step": 30004 - }, - { - "epoch": 2.2940917866085595, - "grad_norm": 0.0033552004024386406, - "learning_rate": 0.00019999740462219926, - "loss": 46.0, - "step": 30005 - }, - { - "epoch": 2.2941682435919493, - "grad_norm": 0.0019420883618295193, - "learning_rate": 0.0001999974044491431, - "loss": 46.0, - "step": 30006 - }, - { - "epoch": 2.294244700575339, - "grad_norm": 0.000641855294816196, - "learning_rate": 0.0001999974042760812, - "loss": 46.0, - "step": 30007 - }, - { - "epoch": 2.2943211575587283, - "grad_norm": 0.0012845391174778342, - "learning_rate": 0.00019999740410301347, - "loss": 46.0, - "step": 30008 - }, - { - "epoch": 2.294397614542118, - "grad_norm": 0.000756037246901542, - "learning_rate": 0.00019999740392994, - "loss": 46.0, - "step": 30009 - }, - { - "epoch": 2.294474071525508, - "grad_norm": 0.0076445769518613815, - "learning_rate": 0.00019999740375686075, - "loss": 46.0, - "step": 30010 - }, - { - "epoch": 2.2945505285088976, - "grad_norm": 0.000882270629517734, - "learning_rate": 0.00019999740358377574, - "loss": 46.0, - "step": 30011 - }, - { - "epoch": 2.2946269854922874, - "grad_norm": 0.003852242836728692, - "learning_rate": 0.00019999740341068495, - "loss": 46.0, - "step": 30012 - }, - { - "epoch": 2.294703442475677, - "grad_norm": 0.0011800319189205766, - "learning_rate": 0.0001999974032375884, - "loss": 46.0, - "step": 30013 - }, - { - "epoch": 2.294779899459067, - "grad_norm": 0.005899304524064064, - "learning_rate": 0.0001999974030644861, - "loss": 46.0, - "step": 30014 - }, - { - "epoch": 2.2948563564424567, - "grad_norm": 0.005453599151223898, - "learning_rate": 0.00019999740289137802, - "loss": 46.0, - "step": 30015 - }, - { - "epoch": 2.2949328134258464, - "grad_norm": 0.0016312829684466124, - "learning_rate": 0.00019999740271826416, - "loss": 46.0, - "step": 30016 - }, - { - "epoch": 2.295009270409236, - "grad_norm": 0.0029654079116880894, - "learning_rate": 0.0001999974025451445, - "loss": 46.0, - "step": 30017 - }, - { - "epoch": 2.2950857273926255, - "grad_norm": 0.0005732237477786839, - "learning_rate": 0.0001999974023720191, - "loss": 46.0, - "step": 30018 - }, - { - "epoch": 2.2951621843760153, - "grad_norm": 0.0008385609253309667, - "learning_rate": 0.00019999740219888795, - "loss": 46.0, - "step": 30019 - }, - { - "epoch": 2.295238641359405, - "grad_norm": 0.0013872620183974504, - "learning_rate": 0.000199997402025751, - "loss": 46.0, - "step": 30020 - }, - { - "epoch": 2.295315098342795, - "grad_norm": 0.0018239635974168777, - "learning_rate": 0.0001999974018526083, - "loss": 46.0, - "step": 30021 - }, - { - "epoch": 2.2953915553261846, - "grad_norm": 0.001474417862482369, - "learning_rate": 0.00019999740167945984, - "loss": 46.0, - "step": 30022 - }, - { - "epoch": 2.2954680123095743, - "grad_norm": 0.0007369461818598211, - "learning_rate": 0.0001999974015063056, - "loss": 46.0, - "step": 30023 - }, - { - "epoch": 2.295544469292964, - "grad_norm": 0.002494187094271183, - "learning_rate": 0.0001999974013331456, - "loss": 46.0, - "step": 30024 - }, - { - "epoch": 2.295620926276354, - "grad_norm": 0.0020532524213194847, - "learning_rate": 0.0001999974011599798, - "loss": 46.0, - "step": 30025 - }, - { - "epoch": 2.2956973832597436, - "grad_norm": 0.001050082384608686, - "learning_rate": 0.00019999740098680823, - "loss": 46.0, - "step": 30026 - }, - { - "epoch": 2.2957738402431334, - "grad_norm": 0.0012227309634909034, - "learning_rate": 0.00019999740081363095, - "loss": 46.0, - "step": 30027 - }, - { - "epoch": 2.295850297226523, - "grad_norm": 0.0008813955937512219, - "learning_rate": 0.00019999740064044784, - "loss": 46.0, - "step": 30028 - }, - { - "epoch": 2.295926754209913, - "grad_norm": 0.0013336681295186281, - "learning_rate": 0.00019999740046725898, - "loss": 46.0, - "step": 30029 - }, - { - "epoch": 2.296003211193302, - "grad_norm": 0.00067457917612046, - "learning_rate": 0.00019999740029406435, - "loss": 46.0, - "step": 30030 - }, - { - "epoch": 2.296079668176692, - "grad_norm": 0.000672497961204499, - "learning_rate": 0.00019999740012086395, - "loss": 46.0, - "step": 30031 - }, - { - "epoch": 2.2961561251600817, - "grad_norm": 0.001501060905866325, - "learning_rate": 0.00019999739994765777, - "loss": 46.0, - "step": 30032 - }, - { - "epoch": 2.2962325821434715, - "grad_norm": 0.0044987439177930355, - "learning_rate": 0.00019999739977444585, - "loss": 46.0, - "step": 30033 - }, - { - "epoch": 2.2963090391268612, - "grad_norm": 0.01607019267976284, - "learning_rate": 0.00019999739960122815, - "loss": 46.0, - "step": 30034 - }, - { - "epoch": 2.296385496110251, - "grad_norm": 0.0008642894681543112, - "learning_rate": 0.00019999739942800466, - "loss": 46.0, - "step": 30035 - }, - { - "epoch": 2.2964619530936408, - "grad_norm": 0.0005976841785013676, - "learning_rate": 0.00019999739925477544, - "loss": 46.0, - "step": 30036 - }, - { - "epoch": 2.2965384100770305, - "grad_norm": 0.0015923274913802743, - "learning_rate": 0.00019999739908154043, - "loss": 46.0, - "step": 30037 - }, - { - "epoch": 2.2966148670604203, - "grad_norm": 0.0007830019458197057, - "learning_rate": 0.00019999739890829964, - "loss": 46.0, - "step": 30038 - }, - { - "epoch": 2.29669132404381, - "grad_norm": 0.0009042581077665091, - "learning_rate": 0.00019999739873505308, - "loss": 46.0, - "step": 30039 - }, - { - "epoch": 2.2967677810271994, - "grad_norm": 0.000887769041582942, - "learning_rate": 0.00019999739856180074, - "loss": 46.0, - "step": 30040 - }, - { - "epoch": 2.296844238010589, - "grad_norm": 0.000370532157830894, - "learning_rate": 0.00019999739838854268, - "loss": 46.0, - "step": 30041 - }, - { - "epoch": 2.296920694993979, - "grad_norm": 0.003807772183790803, - "learning_rate": 0.0001999973982152788, - "loss": 46.0, - "step": 30042 - }, - { - "epoch": 2.2969971519773686, - "grad_norm": 0.002727210521697998, - "learning_rate": 0.00019999739804200917, - "loss": 46.0, - "step": 30043 - }, - { - "epoch": 2.2970736089607584, - "grad_norm": 0.0009425010648556054, - "learning_rate": 0.00019999739786873377, - "loss": 46.0, - "step": 30044 - }, - { - "epoch": 2.297150065944148, - "grad_norm": 0.00043616388575173914, - "learning_rate": 0.0001999973976954526, - "loss": 46.0, - "step": 30045 - }, - { - "epoch": 2.297226522927538, - "grad_norm": 0.0006302734836935997, - "learning_rate": 0.00019999739752216567, - "loss": 46.0, - "step": 30046 - }, - { - "epoch": 2.2973029799109277, - "grad_norm": 0.0007072797161526978, - "learning_rate": 0.00019999739734887295, - "loss": 46.0, - "step": 30047 - }, - { - "epoch": 2.2973794368943175, - "grad_norm": 0.0006940692546777427, - "learning_rate": 0.00019999739717557448, - "loss": 46.0, - "step": 30048 - }, - { - "epoch": 2.297455893877707, - "grad_norm": 0.008013441227376461, - "learning_rate": 0.00019999739700227024, - "loss": 46.0, - "step": 30049 - }, - { - "epoch": 2.297532350861097, - "grad_norm": 0.00067564332857728, - "learning_rate": 0.00019999739682896022, - "loss": 46.0, - "step": 30050 - }, - { - "epoch": 2.2976088078444867, - "grad_norm": 0.0023563748691231012, - "learning_rate": 0.00019999739665564446, - "loss": 46.0, - "step": 30051 - }, - { - "epoch": 2.297685264827876, - "grad_norm": 0.001248229993507266, - "learning_rate": 0.0001999973964823229, - "loss": 46.0, - "step": 30052 - }, - { - "epoch": 2.297761721811266, - "grad_norm": 0.0003808919573202729, - "learning_rate": 0.00019999739630899557, - "loss": 46.0, - "step": 30053 - }, - { - "epoch": 2.2978381787946556, - "grad_norm": 0.006725207436829805, - "learning_rate": 0.00019999739613566248, - "loss": 46.0, - "step": 30054 - }, - { - "epoch": 2.2979146357780453, - "grad_norm": 0.00045433282502926886, - "learning_rate": 0.00019999739596232363, - "loss": 46.0, - "step": 30055 - }, - { - "epoch": 2.297991092761435, - "grad_norm": 0.0012498340802267194, - "learning_rate": 0.000199997395788979, - "loss": 46.0, - "step": 30056 - }, - { - "epoch": 2.298067549744825, - "grad_norm": 0.0007556340424343944, - "learning_rate": 0.0001999973956156286, - "loss": 46.0, - "step": 30057 - }, - { - "epoch": 2.2981440067282146, - "grad_norm": 0.003670429578050971, - "learning_rate": 0.00019999739544227242, - "loss": 46.0, - "step": 30058 - }, - { - "epoch": 2.2982204637116044, - "grad_norm": 0.0035471406299620867, - "learning_rate": 0.00019999739526891047, - "loss": 46.0, - "step": 30059 - }, - { - "epoch": 2.298296920694994, - "grad_norm": 0.0005399485817179084, - "learning_rate": 0.00019999739509554275, - "loss": 46.0, - "step": 30060 - }, - { - "epoch": 2.2983733776783835, - "grad_norm": 0.003899995470419526, - "learning_rate": 0.0001999973949221693, - "loss": 46.0, - "step": 30061 - }, - { - "epoch": 2.298449834661773, - "grad_norm": 0.0012264401884749532, - "learning_rate": 0.00019999739474879004, - "loss": 46.0, - "step": 30062 - }, - { - "epoch": 2.298526291645163, - "grad_norm": 0.000918879231903702, - "learning_rate": 0.00019999739457540505, - "loss": 46.0, - "step": 30063 - }, - { - "epoch": 2.2986027486285527, - "grad_norm": 0.0006566160591319203, - "learning_rate": 0.00019999739440201424, - "loss": 46.0, - "step": 30064 - }, - { - "epoch": 2.2986792056119425, - "grad_norm": 0.001869792933575809, - "learning_rate": 0.0001999973942286177, - "loss": 46.0, - "step": 30065 - }, - { - "epoch": 2.2987556625953323, - "grad_norm": 0.0034287371672689915, - "learning_rate": 0.00019999739405521537, - "loss": 46.0, - "step": 30066 - }, - { - "epoch": 2.298832119578722, - "grad_norm": 0.004114525858312845, - "learning_rate": 0.0001999973938818073, - "loss": 46.0, - "step": 30067 - }, - { - "epoch": 2.298908576562112, - "grad_norm": 0.0008481629774905741, - "learning_rate": 0.00019999739370839343, - "loss": 46.0, - "step": 30068 - }, - { - "epoch": 2.2989850335455015, - "grad_norm": 0.000555143051315099, - "learning_rate": 0.0001999973935349738, - "loss": 46.0, - "step": 30069 - }, - { - "epoch": 2.2990614905288913, - "grad_norm": 0.0005480466643348336, - "learning_rate": 0.0001999973933615484, - "loss": 46.0, - "step": 30070 - }, - { - "epoch": 2.299137947512281, - "grad_norm": 0.0005450470489449799, - "learning_rate": 0.0001999973931881172, - "loss": 46.0, - "step": 30071 - }, - { - "epoch": 2.299214404495671, - "grad_norm": 0.0013974080793559551, - "learning_rate": 0.00019999739301468028, - "loss": 46.0, - "step": 30072 - }, - { - "epoch": 2.2992908614790606, - "grad_norm": 0.001103778020478785, - "learning_rate": 0.00019999739284123758, - "loss": 46.0, - "step": 30073 - }, - { - "epoch": 2.29936731846245, - "grad_norm": 0.000586771231610328, - "learning_rate": 0.00019999739266778906, - "loss": 46.0, - "step": 30074 - }, - { - "epoch": 2.2994437754458397, - "grad_norm": 0.005902382545173168, - "learning_rate": 0.00019999739249433485, - "loss": 46.0, - "step": 30075 - }, - { - "epoch": 2.2995202324292294, - "grad_norm": 0.00202909461222589, - "learning_rate": 0.00019999739232087483, - "loss": 46.0, - "step": 30076 - }, - { - "epoch": 2.299596689412619, - "grad_norm": 0.0013913969742134213, - "learning_rate": 0.00019999739214740904, - "loss": 46.0, - "step": 30077 - }, - { - "epoch": 2.299673146396009, - "grad_norm": 0.0008264286443591118, - "learning_rate": 0.00019999739197393748, - "loss": 46.0, - "step": 30078 - }, - { - "epoch": 2.2997496033793987, - "grad_norm": 0.0007550848531536758, - "learning_rate": 0.00019999739180046015, - "loss": 46.0, - "step": 30079 - }, - { - "epoch": 2.2998260603627885, - "grad_norm": 0.0009506526403129101, - "learning_rate": 0.00019999739162697707, - "loss": 46.0, - "step": 30080 - }, - { - "epoch": 2.2999025173461782, - "grad_norm": 0.0008553520892746747, - "learning_rate": 0.00019999739145348818, - "loss": 46.0, - "step": 30081 - }, - { - "epoch": 2.299978974329568, - "grad_norm": 0.0010694566881284118, - "learning_rate": 0.00019999739127999358, - "loss": 46.0, - "step": 30082 - }, - { - "epoch": 2.3000554313129573, - "grad_norm": 0.0004899316118098795, - "learning_rate": 0.00019999739110649318, - "loss": 46.0, - "step": 30083 - }, - { - "epoch": 2.300131888296347, - "grad_norm": 0.0008950239280238748, - "learning_rate": 0.000199997390932987, - "loss": 46.0, - "step": 30084 - }, - { - "epoch": 2.300208345279737, - "grad_norm": 0.001813948038034141, - "learning_rate": 0.00019999739075947506, - "loss": 46.0, - "step": 30085 - }, - { - "epoch": 2.3002848022631266, - "grad_norm": 0.0028806107584387064, - "learning_rate": 0.00019999739058595734, - "loss": 46.0, - "step": 30086 - }, - { - "epoch": 2.3003612592465164, - "grad_norm": 0.0012276624329388142, - "learning_rate": 0.00019999739041243387, - "loss": 46.0, - "step": 30087 - }, - { - "epoch": 2.300437716229906, - "grad_norm": 0.0006821006536483765, - "learning_rate": 0.00019999739023890463, - "loss": 46.0, - "step": 30088 - }, - { - "epoch": 2.300514173213296, - "grad_norm": 0.0035636688116937876, - "learning_rate": 0.0001999973900653696, - "loss": 46.0, - "step": 30089 - }, - { - "epoch": 2.3005906301966856, - "grad_norm": 0.0020399398636072874, - "learning_rate": 0.0001999973898918288, - "loss": 46.0, - "step": 30090 - }, - { - "epoch": 2.3006670871800754, - "grad_norm": 0.001916010514833033, - "learning_rate": 0.00019999738971828227, - "loss": 46.0, - "step": 30091 - }, - { - "epoch": 2.300743544163465, - "grad_norm": 0.0010494361631572247, - "learning_rate": 0.00019999738954472993, - "loss": 46.0, - "step": 30092 - }, - { - "epoch": 2.300820001146855, - "grad_norm": 0.0022276821546256542, - "learning_rate": 0.00019999738937117182, - "loss": 46.0, - "step": 30093 - }, - { - "epoch": 2.3008964581302447, - "grad_norm": 0.00485273590311408, - "learning_rate": 0.00019999738919760797, - "loss": 46.0, - "step": 30094 - }, - { - "epoch": 2.300972915113634, - "grad_norm": 0.002339188475161791, - "learning_rate": 0.00019999738902403834, - "loss": 46.0, - "step": 30095 - }, - { - "epoch": 2.3010493720970238, - "grad_norm": 0.00916279386729002, - "learning_rate": 0.00019999738885046294, - "loss": 46.0, - "step": 30096 - }, - { - "epoch": 2.3011258290804135, - "grad_norm": 0.001643507624976337, - "learning_rate": 0.0001999973886768818, - "loss": 46.0, - "step": 30097 - }, - { - "epoch": 2.3012022860638033, - "grad_norm": 0.00224167387932539, - "learning_rate": 0.00019999738850329485, - "loss": 46.0, - "step": 30098 - }, - { - "epoch": 2.301278743047193, - "grad_norm": 0.0005773654556833208, - "learning_rate": 0.00019999738832970212, - "loss": 46.0, - "step": 30099 - }, - { - "epoch": 2.301355200030583, - "grad_norm": 0.0008862800314091146, - "learning_rate": 0.00019999738815610366, - "loss": 46.0, - "step": 30100 - }, - { - "epoch": 2.3014316570139726, - "grad_norm": 0.000557686376851052, - "learning_rate": 0.00019999738798249942, - "loss": 46.0, - "step": 30101 - }, - { - "epoch": 2.3015081139973623, - "grad_norm": 0.005363051779568195, - "learning_rate": 0.00019999738780888937, - "loss": 46.0, - "step": 30102 - }, - { - "epoch": 2.301584570980752, - "grad_norm": 0.0007143226102925837, - "learning_rate": 0.0001999973876352736, - "loss": 46.0, - "step": 30103 - }, - { - "epoch": 2.301661027964142, - "grad_norm": 0.0008837385685183108, - "learning_rate": 0.00019999738746165205, - "loss": 46.0, - "step": 30104 - }, - { - "epoch": 2.301737484947531, - "grad_norm": 0.0028602525126188993, - "learning_rate": 0.00019999738728802472, - "loss": 46.0, - "step": 30105 - }, - { - "epoch": 2.301813941930921, - "grad_norm": 0.001361931674182415, - "learning_rate": 0.0001999973871143916, - "loss": 46.0, - "step": 30106 - }, - { - "epoch": 2.3018903989143107, - "grad_norm": 0.0005813190946355462, - "learning_rate": 0.00019999738694075273, - "loss": 46.0, - "step": 30107 - }, - { - "epoch": 2.3019668558977004, - "grad_norm": 0.0005225312779657543, - "learning_rate": 0.0001999973867671081, - "loss": 46.0, - "step": 30108 - }, - { - "epoch": 2.30204331288109, - "grad_norm": 0.0018029806669801474, - "learning_rate": 0.0001999973865934577, - "loss": 46.0, - "step": 30109 - }, - { - "epoch": 2.30211976986448, - "grad_norm": 0.00046599714551120996, - "learning_rate": 0.00019999738641980153, - "loss": 46.0, - "step": 30110 - }, - { - "epoch": 2.3021962268478697, - "grad_norm": 0.000968014937825501, - "learning_rate": 0.0001999973862461396, - "loss": 46.0, - "step": 30111 - }, - { - "epoch": 2.3022726838312595, - "grad_norm": 0.0008470361935906112, - "learning_rate": 0.0001999973860724719, - "loss": 46.0, - "step": 30112 - }, - { - "epoch": 2.3023491408146493, - "grad_norm": 0.001534096198156476, - "learning_rate": 0.0001999973858987984, - "loss": 46.0, - "step": 30113 - }, - { - "epoch": 2.302425597798039, - "grad_norm": 0.0013971490552648902, - "learning_rate": 0.00019999738572511915, - "loss": 46.0, - "step": 30114 - }, - { - "epoch": 2.3025020547814288, - "grad_norm": 0.0011443597031757236, - "learning_rate": 0.0001999973855514341, - "loss": 46.0, - "step": 30115 - }, - { - "epoch": 2.3025785117648185, - "grad_norm": 0.0007007725071161985, - "learning_rate": 0.00019999738537774335, - "loss": 46.0, - "step": 30116 - }, - { - "epoch": 2.302654968748208, - "grad_norm": 0.0005706321680918336, - "learning_rate": 0.0001999973852040468, - "loss": 46.0, - "step": 30117 - }, - { - "epoch": 2.3027314257315976, - "grad_norm": 0.0025433956179767847, - "learning_rate": 0.00019999738503034443, - "loss": 46.0, - "step": 30118 - }, - { - "epoch": 2.3028078827149874, - "grad_norm": 0.0020307658705860376, - "learning_rate": 0.00019999738485663638, - "loss": 46.0, - "step": 30119 - }, - { - "epoch": 2.302884339698377, - "grad_norm": 0.0009990239050239325, - "learning_rate": 0.0001999973846829225, - "loss": 46.0, - "step": 30120 - }, - { - "epoch": 2.302960796681767, - "grad_norm": 0.0009324668790213764, - "learning_rate": 0.00019999738450920287, - "loss": 46.0, - "step": 30121 - }, - { - "epoch": 2.3030372536651567, - "grad_norm": 0.0004060252394992858, - "learning_rate": 0.00019999738433547747, - "loss": 46.0, - "step": 30122 - }, - { - "epoch": 2.3031137106485464, - "grad_norm": 0.0008862410904839635, - "learning_rate": 0.00019999738416174627, - "loss": 46.0, - "step": 30123 - }, - { - "epoch": 2.303190167631936, - "grad_norm": 0.0026099178940057755, - "learning_rate": 0.00019999738398800935, - "loss": 46.0, - "step": 30124 - }, - { - "epoch": 2.303266624615326, - "grad_norm": 0.011006048880517483, - "learning_rate": 0.00019999738381426663, - "loss": 46.0, - "step": 30125 - }, - { - "epoch": 2.3033430815987157, - "grad_norm": 0.0055942474864423275, - "learning_rate": 0.00019999738364051813, - "loss": 46.0, - "step": 30126 - }, - { - "epoch": 2.303419538582105, - "grad_norm": 0.0008677778532728553, - "learning_rate": 0.0001999973834667639, - "loss": 46.0, - "step": 30127 - }, - { - "epoch": 2.303495995565495, - "grad_norm": 0.0008999879937618971, - "learning_rate": 0.00019999738329300385, - "loss": 46.0, - "step": 30128 - }, - { - "epoch": 2.3035724525488845, - "grad_norm": 0.0031719410326331854, - "learning_rate": 0.0001999973831192381, - "loss": 46.0, - "step": 30129 - }, - { - "epoch": 2.3036489095322743, - "grad_norm": 0.0007733224774710834, - "learning_rate": 0.00019999738294546653, - "loss": 46.0, - "step": 30130 - }, - { - "epoch": 2.303725366515664, - "grad_norm": 0.0013872962445020676, - "learning_rate": 0.0001999973827716892, - "loss": 46.0, - "step": 30131 - }, - { - "epoch": 2.303801823499054, - "grad_norm": 0.001186657347716391, - "learning_rate": 0.0001999973825979061, - "loss": 46.0, - "step": 30132 - }, - { - "epoch": 2.3038782804824436, - "grad_norm": 0.0025625890120863914, - "learning_rate": 0.00019999738242411724, - "loss": 46.0, - "step": 30133 - }, - { - "epoch": 2.3039547374658333, - "grad_norm": 0.0007266376633197069, - "learning_rate": 0.00019999738225032258, - "loss": 46.0, - "step": 30134 - }, - { - "epoch": 2.304031194449223, - "grad_norm": 0.00029289996018633246, - "learning_rate": 0.00019999738207652218, - "loss": 46.0, - "step": 30135 - }, - { - "epoch": 2.304107651432613, - "grad_norm": 0.0008302509668283165, - "learning_rate": 0.000199997381902716, - "loss": 46.0, - "step": 30136 - }, - { - "epoch": 2.3041841084160026, - "grad_norm": 0.0012541902251541615, - "learning_rate": 0.00019999738172890406, - "loss": 46.0, - "step": 30137 - }, - { - "epoch": 2.3042605653993924, - "grad_norm": 0.001695367624051869, - "learning_rate": 0.00019999738155508637, - "loss": 46.0, - "step": 30138 - }, - { - "epoch": 2.3043370223827817, - "grad_norm": 0.0007267678156495094, - "learning_rate": 0.00019999738138126288, - "loss": 46.0, - "step": 30139 - }, - { - "epoch": 2.3044134793661715, - "grad_norm": 0.0007607286097481847, - "learning_rate": 0.0001999973812074336, - "loss": 46.0, - "step": 30140 - }, - { - "epoch": 2.3044899363495612, - "grad_norm": 0.0027221804484725, - "learning_rate": 0.00019999738103359862, - "loss": 46.0, - "step": 30141 - }, - { - "epoch": 2.304566393332951, - "grad_norm": 0.0008816447807475924, - "learning_rate": 0.0001999973808597578, - "loss": 46.0, - "step": 30142 - }, - { - "epoch": 2.3046428503163408, - "grad_norm": 0.00118237582501024, - "learning_rate": 0.00019999738068591125, - "loss": 46.0, - "step": 30143 - }, - { - "epoch": 2.3047193072997305, - "grad_norm": 0.002199642825871706, - "learning_rate": 0.00019999738051205892, - "loss": 46.0, - "step": 30144 - }, - { - "epoch": 2.3047957642831203, - "grad_norm": 0.0005205171182751656, - "learning_rate": 0.00019999738033820084, - "loss": 46.0, - "step": 30145 - }, - { - "epoch": 2.30487222126651, - "grad_norm": 0.0010612175101414323, - "learning_rate": 0.00019999738016433696, - "loss": 46.0, - "step": 30146 - }, - { - "epoch": 2.3049486782499, - "grad_norm": 0.0012619533808901906, - "learning_rate": 0.00019999737999046733, - "loss": 46.0, - "step": 30147 - }, - { - "epoch": 2.3050251352332896, - "grad_norm": 0.0014017487410455942, - "learning_rate": 0.0001999973798165919, - "loss": 46.0, - "step": 30148 - }, - { - "epoch": 2.305101592216679, - "grad_norm": 0.0009200768545269966, - "learning_rate": 0.00019999737964271073, - "loss": 46.0, - "step": 30149 - }, - { - "epoch": 2.3051780492000686, - "grad_norm": 0.0013504025992006063, - "learning_rate": 0.00019999737946882378, - "loss": 46.0, - "step": 30150 - }, - { - "epoch": 2.3052545061834584, - "grad_norm": 0.0005463776760734618, - "learning_rate": 0.00019999737929493106, - "loss": 46.0, - "step": 30151 - }, - { - "epoch": 2.305330963166848, - "grad_norm": 0.0007936308393254876, - "learning_rate": 0.0001999973791210326, - "loss": 46.0, - "step": 30152 - }, - { - "epoch": 2.305407420150238, - "grad_norm": 0.00023521768162027001, - "learning_rate": 0.00019999737894712833, - "loss": 46.0, - "step": 30153 - }, - { - "epoch": 2.3054838771336277, - "grad_norm": 0.0022964964155107737, - "learning_rate": 0.00019999737877321835, - "loss": 46.0, - "step": 30154 - }, - { - "epoch": 2.3055603341170174, - "grad_norm": 0.0014007611898705363, - "learning_rate": 0.00019999737859930253, - "loss": 46.0, - "step": 30155 - }, - { - "epoch": 2.305636791100407, - "grad_norm": 0.011091357097029686, - "learning_rate": 0.00019999737842538097, - "loss": 46.0, - "step": 30156 - }, - { - "epoch": 2.305713248083797, - "grad_norm": 0.002516264794394374, - "learning_rate": 0.00019999737825145364, - "loss": 46.0, - "step": 30157 - }, - { - "epoch": 2.3057897050671867, - "grad_norm": 0.0006208026316016912, - "learning_rate": 0.00019999737807752053, - "loss": 46.0, - "step": 30158 - }, - { - "epoch": 2.3058661620505765, - "grad_norm": 0.001432916265912354, - "learning_rate": 0.00019999737790358165, - "loss": 46.0, - "step": 30159 - }, - { - "epoch": 2.3059426190339662, - "grad_norm": 0.00058855174575001, - "learning_rate": 0.00019999737772963703, - "loss": 46.0, - "step": 30160 - }, - { - "epoch": 2.3060190760173556, - "grad_norm": 0.0009267079294659197, - "learning_rate": 0.00019999737755568663, - "loss": 46.0, - "step": 30161 - }, - { - "epoch": 2.3060955330007453, - "grad_norm": 0.0009682028321549296, - "learning_rate": 0.00019999737738173046, - "loss": 46.0, - "step": 30162 - }, - { - "epoch": 2.306171989984135, - "grad_norm": 0.002434520749375224, - "learning_rate": 0.0001999973772077685, - "loss": 46.0, - "step": 30163 - }, - { - "epoch": 2.306248446967525, - "grad_norm": 0.0008917953819036484, - "learning_rate": 0.0001999973770338008, - "loss": 46.0, - "step": 30164 - }, - { - "epoch": 2.3063249039509146, - "grad_norm": 0.0004295255639590323, - "learning_rate": 0.0001999973768598273, - "loss": 46.0, - "step": 30165 - }, - { - "epoch": 2.3064013609343044, - "grad_norm": 0.0009161210618913174, - "learning_rate": 0.00019999737668584806, - "loss": 46.0, - "step": 30166 - }, - { - "epoch": 2.306477817917694, - "grad_norm": 0.010465409606695175, - "learning_rate": 0.00019999737651186304, - "loss": 46.0, - "step": 30167 - }, - { - "epoch": 2.306554274901084, - "grad_norm": 0.0020112800411880016, - "learning_rate": 0.00019999737633787223, - "loss": 46.0, - "step": 30168 - }, - { - "epoch": 2.3066307318844737, - "grad_norm": 0.0007780776941217482, - "learning_rate": 0.00019999737616387567, - "loss": 46.0, - "step": 30169 - }, - { - "epoch": 2.3067071888678634, - "grad_norm": 0.0005456142826005816, - "learning_rate": 0.00019999737598987337, - "loss": 46.0, - "step": 30170 - }, - { - "epoch": 2.3067836458512527, - "grad_norm": 0.0008832073071971536, - "learning_rate": 0.00019999737581586523, - "loss": 46.0, - "step": 30171 - }, - { - "epoch": 2.3068601028346425, - "grad_norm": 0.0006058014114387333, - "learning_rate": 0.00019999737564185138, - "loss": 46.0, - "step": 30172 - }, - { - "epoch": 2.3069365598180323, - "grad_norm": 0.001385160954669118, - "learning_rate": 0.00019999737546783176, - "loss": 46.0, - "step": 30173 - }, - { - "epoch": 2.307013016801422, - "grad_norm": 0.002277539111673832, - "learning_rate": 0.00019999737529380633, - "loss": 46.0, - "step": 30174 - }, - { - "epoch": 2.3070894737848118, - "grad_norm": 0.0012423406587913632, - "learning_rate": 0.00019999737511977516, - "loss": 46.0, - "step": 30175 - }, - { - "epoch": 2.3071659307682015, - "grad_norm": 0.00147822720464319, - "learning_rate": 0.0001999973749457382, - "loss": 46.0, - "step": 30176 - }, - { - "epoch": 2.3072423877515913, - "grad_norm": 0.0013297604164108634, - "learning_rate": 0.0001999973747716955, - "loss": 46.0, - "step": 30177 - }, - { - "epoch": 2.307318844734981, - "grad_norm": 0.0011833177413791418, - "learning_rate": 0.000199997374597647, - "loss": 46.0, - "step": 30178 - }, - { - "epoch": 2.307395301718371, - "grad_norm": 0.0004478892660699785, - "learning_rate": 0.00019999737442359273, - "loss": 46.0, - "step": 30179 - }, - { - "epoch": 2.3074717587017606, - "grad_norm": 0.0011697242734953761, - "learning_rate": 0.00019999737424953272, - "loss": 46.0, - "step": 30180 - }, - { - "epoch": 2.3075482156851503, - "grad_norm": 0.00387997692450881, - "learning_rate": 0.00019999737407546694, - "loss": 46.0, - "step": 30181 - }, - { - "epoch": 2.30762467266854, - "grad_norm": 0.0017362211365252733, - "learning_rate": 0.00019999737390139538, - "loss": 46.0, - "step": 30182 - }, - { - "epoch": 2.3077011296519294, - "grad_norm": 0.001383223687298596, - "learning_rate": 0.00019999737372731804, - "loss": 46.0, - "step": 30183 - }, - { - "epoch": 2.307777586635319, - "grad_norm": 0.001065898803062737, - "learning_rate": 0.00019999737355323494, - "loss": 46.0, - "step": 30184 - }, - { - "epoch": 2.307854043618709, - "grad_norm": 0.002347080735489726, - "learning_rate": 0.00019999737337914606, - "loss": 46.0, - "step": 30185 - }, - { - "epoch": 2.3079305006020987, - "grad_norm": 0.0007108405698090792, - "learning_rate": 0.00019999737320505143, - "loss": 46.0, - "step": 30186 - }, - { - "epoch": 2.3080069575854885, - "grad_norm": 0.0005572242080233991, - "learning_rate": 0.000199997373030951, - "loss": 46.0, - "step": 30187 - }, - { - "epoch": 2.3080834145688782, - "grad_norm": 0.0014329697005450726, - "learning_rate": 0.00019999737285684484, - "loss": 46.0, - "step": 30188 - }, - { - "epoch": 2.308159871552268, - "grad_norm": 0.008112660609185696, - "learning_rate": 0.0001999973726827329, - "loss": 46.0, - "step": 30189 - }, - { - "epoch": 2.3082363285356577, - "grad_norm": 0.0018196408636868, - "learning_rate": 0.00019999737250861517, - "loss": 46.0, - "step": 30190 - }, - { - "epoch": 2.3083127855190475, - "grad_norm": 0.002917099976912141, - "learning_rate": 0.00019999737233449168, - "loss": 46.0, - "step": 30191 - }, - { - "epoch": 2.308389242502437, - "grad_norm": 0.00043530986295081675, - "learning_rate": 0.00019999737216036244, - "loss": 46.0, - "step": 30192 - }, - { - "epoch": 2.3084656994858266, - "grad_norm": 0.000667903630528599, - "learning_rate": 0.0001999973719862274, - "loss": 46.0, - "step": 30193 - }, - { - "epoch": 2.3085421564692163, - "grad_norm": 0.0007341289892792702, - "learning_rate": 0.00019999737181208662, - "loss": 46.0, - "step": 30194 - }, - { - "epoch": 2.308618613452606, - "grad_norm": 0.004593354184180498, - "learning_rate": 0.00019999737163794006, - "loss": 46.0, - "step": 30195 - }, - { - "epoch": 2.308695070435996, - "grad_norm": 0.004649362992495298, - "learning_rate": 0.00019999737146378772, - "loss": 46.0, - "step": 30196 - }, - { - "epoch": 2.3087715274193856, - "grad_norm": 0.0009565045475028455, - "learning_rate": 0.00019999737128962962, - "loss": 46.0, - "step": 30197 - }, - { - "epoch": 2.3088479844027754, - "grad_norm": 0.00112698704469949, - "learning_rate": 0.0001999973711154657, - "loss": 46.0, - "step": 30198 - }, - { - "epoch": 2.308924441386165, - "grad_norm": 0.0011483555426821113, - "learning_rate": 0.00019999737094129612, - "loss": 46.0, - "step": 30199 - }, - { - "epoch": 2.309000898369555, - "grad_norm": 0.0005525632295757532, - "learning_rate": 0.0001999973707671207, - "loss": 46.0, - "step": 30200 - }, - { - "epoch": 2.3090773553529447, - "grad_norm": 0.0005164967733435333, - "learning_rate": 0.00019999737059293952, - "loss": 46.0, - "step": 30201 - }, - { - "epoch": 2.3091538123363344, - "grad_norm": 0.0015221787616610527, - "learning_rate": 0.00019999737041875255, - "loss": 46.0, - "step": 30202 - }, - { - "epoch": 2.309230269319724, - "grad_norm": 0.0009435153915546834, - "learning_rate": 0.00019999737024455985, - "loss": 46.0, - "step": 30203 - }, - { - "epoch": 2.309306726303114, - "grad_norm": 0.0009867754997685552, - "learning_rate": 0.00019999737007036136, - "loss": 46.0, - "step": 30204 - }, - { - "epoch": 2.3093831832865033, - "grad_norm": 0.004971676971763372, - "learning_rate": 0.00019999736989615712, - "loss": 46.0, - "step": 30205 - }, - { - "epoch": 2.309459640269893, - "grad_norm": 0.0011034803465008736, - "learning_rate": 0.00019999736972194706, - "loss": 46.0, - "step": 30206 - }, - { - "epoch": 2.309536097253283, - "grad_norm": 0.0004129599255975336, - "learning_rate": 0.0001999973695477313, - "loss": 46.0, - "step": 30207 - }, - { - "epoch": 2.3096125542366726, - "grad_norm": 0.001159068662673235, - "learning_rate": 0.00019999736937350971, - "loss": 46.0, - "step": 30208 - }, - { - "epoch": 2.3096890112200623, - "grad_norm": 0.0007293835515156388, - "learning_rate": 0.00019999736919928238, - "loss": 46.0, - "step": 30209 - }, - { - "epoch": 2.309765468203452, - "grad_norm": 0.001638144487515092, - "learning_rate": 0.00019999736902504928, - "loss": 46.0, - "step": 30210 - }, - { - "epoch": 2.309841925186842, - "grad_norm": 0.0009650388965383172, - "learning_rate": 0.00019999736885081043, - "loss": 46.0, - "step": 30211 - }, - { - "epoch": 2.3099183821702316, - "grad_norm": 0.0005872561596333981, - "learning_rate": 0.00019999736867656575, - "loss": 46.0, - "step": 30212 - }, - { - "epoch": 2.3099948391536214, - "grad_norm": 0.0011244539637118578, - "learning_rate": 0.00019999736850231538, - "loss": 46.0, - "step": 30213 - }, - { - "epoch": 2.3100712961370107, - "grad_norm": 0.0007432249840348959, - "learning_rate": 0.00019999736832805918, - "loss": 46.0, - "step": 30214 - }, - { - "epoch": 2.3101477531204004, - "grad_norm": 0.0015721962554380298, - "learning_rate": 0.0001999973681537972, - "loss": 46.0, - "step": 30215 - }, - { - "epoch": 2.31022421010379, - "grad_norm": 0.0021371503826230764, - "learning_rate": 0.00019999736797952951, - "loss": 46.0, - "step": 30216 - }, - { - "epoch": 2.31030066708718, - "grad_norm": 0.0011967411264777184, - "learning_rate": 0.00019999736780525602, - "loss": 46.0, - "step": 30217 - }, - { - "epoch": 2.3103771240705697, - "grad_norm": 0.0012975335121154785, - "learning_rate": 0.00019999736763097676, - "loss": 46.0, - "step": 30218 - }, - { - "epoch": 2.3104535810539595, - "grad_norm": 0.0028240846004337072, - "learning_rate": 0.00019999736745669172, - "loss": 46.0, - "step": 30219 - }, - { - "epoch": 2.3105300380373492, - "grad_norm": 0.003750396892428398, - "learning_rate": 0.00019999736728240093, - "loss": 46.0, - "step": 30220 - }, - { - "epoch": 2.310606495020739, - "grad_norm": 0.0035218799021095037, - "learning_rate": 0.00019999736710810438, - "loss": 46.0, - "step": 30221 - }, - { - "epoch": 2.3106829520041288, - "grad_norm": 0.0011214811820536852, - "learning_rate": 0.00019999736693380204, - "loss": 46.0, - "step": 30222 - }, - { - "epoch": 2.3107594089875185, - "grad_norm": 0.0009723555995151401, - "learning_rate": 0.0001999973667594939, - "loss": 46.0, - "step": 30223 - }, - { - "epoch": 2.3108358659709083, - "grad_norm": 0.0012814803048968315, - "learning_rate": 0.00019999736658518006, - "loss": 46.0, - "step": 30224 - }, - { - "epoch": 2.310912322954298, - "grad_norm": 0.0007300298311747611, - "learning_rate": 0.0001999973664108604, - "loss": 46.0, - "step": 30225 - }, - { - "epoch": 2.3109887799376874, - "grad_norm": 0.0017820047214627266, - "learning_rate": 0.000199997366236535, - "loss": 46.0, - "step": 30226 - }, - { - "epoch": 2.311065236921077, - "grad_norm": 0.0025835237465798855, - "learning_rate": 0.00019999736606220384, - "loss": 46.0, - "step": 30227 - }, - { - "epoch": 2.311141693904467, - "grad_norm": 0.0013711490901187062, - "learning_rate": 0.0001999973658878669, - "loss": 46.0, - "step": 30228 - }, - { - "epoch": 2.3112181508878566, - "grad_norm": 0.0012380421394482255, - "learning_rate": 0.00019999736571352418, - "loss": 46.0, - "step": 30229 - }, - { - "epoch": 2.3112946078712464, - "grad_norm": 0.0014767007669433951, - "learning_rate": 0.0001999973655391757, - "loss": 46.0, - "step": 30230 - }, - { - "epoch": 2.311371064854636, - "grad_norm": 0.00156877760309726, - "learning_rate": 0.00019999736536482142, - "loss": 46.0, - "step": 30231 - }, - { - "epoch": 2.311447521838026, - "grad_norm": 0.00189289974514395, - "learning_rate": 0.00019999736519046138, - "loss": 46.0, - "step": 30232 - }, - { - "epoch": 2.3115239788214157, - "grad_norm": 0.0013805158669129014, - "learning_rate": 0.00019999736501609557, - "loss": 46.0, - "step": 30233 - }, - { - "epoch": 2.3116004358048055, - "grad_norm": 0.0008209001389332116, - "learning_rate": 0.00019999736484172401, - "loss": 46.0, - "step": 30234 - }, - { - "epoch": 2.311676892788195, - "grad_norm": 0.002141595119610429, - "learning_rate": 0.00019999736466734666, - "loss": 46.0, - "step": 30235 - }, - { - "epoch": 2.3117533497715845, - "grad_norm": 0.0018898362759500742, - "learning_rate": 0.00019999736449296358, - "loss": 46.0, - "step": 30236 - }, - { - "epoch": 2.3118298067549743, - "grad_norm": 0.0008827374549582601, - "learning_rate": 0.0001999973643185747, - "loss": 46.0, - "step": 30237 - }, - { - "epoch": 2.311906263738364, - "grad_norm": 0.0011403128737583756, - "learning_rate": 0.00019999736414418005, - "loss": 46.0, - "step": 30238 - }, - { - "epoch": 2.311982720721754, - "grad_norm": 0.0005950057529844344, - "learning_rate": 0.00019999736396977965, - "loss": 46.0, - "step": 30239 - }, - { - "epoch": 2.3120591777051436, - "grad_norm": 0.0008201288874261081, - "learning_rate": 0.00019999736379537345, - "loss": 46.0, - "step": 30240 - }, - { - "epoch": 2.3121356346885333, - "grad_norm": 0.0006285528652369976, - "learning_rate": 0.0001999973636209615, - "loss": 46.0, - "step": 30241 - }, - { - "epoch": 2.312212091671923, - "grad_norm": 0.0007488488918170333, - "learning_rate": 0.0001999973634465438, - "loss": 46.0, - "step": 30242 - }, - { - "epoch": 2.312288548655313, - "grad_norm": 0.001049170969054103, - "learning_rate": 0.00019999736327212027, - "loss": 46.0, - "step": 30243 - }, - { - "epoch": 2.3123650056387026, - "grad_norm": 0.001010696403682232, - "learning_rate": 0.00019999736309769104, - "loss": 46.0, - "step": 30244 - }, - { - "epoch": 2.3124414626220924, - "grad_norm": 0.0008575752144679427, - "learning_rate": 0.00019999736292325603, - "loss": 46.0, - "step": 30245 - }, - { - "epoch": 2.312517919605482, - "grad_norm": 0.001815955387428403, - "learning_rate": 0.0001999973627488152, - "loss": 46.0, - "step": 30246 - }, - { - "epoch": 2.312594376588872, - "grad_norm": 0.0006540436297655106, - "learning_rate": 0.00019999736257436866, - "loss": 46.0, - "step": 30247 - }, - { - "epoch": 2.312670833572261, - "grad_norm": 0.0007758033461868763, - "learning_rate": 0.0001999973623999163, - "loss": 46.0, - "step": 30248 - }, - { - "epoch": 2.312747290555651, - "grad_norm": 0.001631412305869162, - "learning_rate": 0.0001999973622254582, - "loss": 46.0, - "step": 30249 - }, - { - "epoch": 2.3128237475390407, - "grad_norm": 0.001589313498698175, - "learning_rate": 0.00019999736205099432, - "loss": 46.0, - "step": 30250 - }, - { - "epoch": 2.3129002045224305, - "grad_norm": 0.0008404984255321324, - "learning_rate": 0.0001999973618765247, - "loss": 46.0, - "step": 30251 - }, - { - "epoch": 2.3129766615058203, - "grad_norm": 0.0006540753529407084, - "learning_rate": 0.00019999736170204927, - "loss": 46.0, - "step": 30252 - }, - { - "epoch": 2.31305311848921, - "grad_norm": 0.006734836380928755, - "learning_rate": 0.0001999973615275681, - "loss": 46.0, - "step": 30253 - }, - { - "epoch": 2.3131295754726, - "grad_norm": 0.0005335891037248075, - "learning_rate": 0.00019999736135308113, - "loss": 46.0, - "step": 30254 - }, - { - "epoch": 2.3132060324559895, - "grad_norm": 0.008970017544925213, - "learning_rate": 0.0001999973611785884, - "loss": 46.0, - "step": 30255 - }, - { - "epoch": 2.3132824894393793, - "grad_norm": 0.0017543798312544823, - "learning_rate": 0.00019999736100408992, - "loss": 46.0, - "step": 30256 - }, - { - "epoch": 2.313358946422769, - "grad_norm": 0.0038674629759043455, - "learning_rate": 0.00019999736082958566, - "loss": 46.0, - "step": 30257 - }, - { - "epoch": 2.3134354034061584, - "grad_norm": 0.0016310069477185607, - "learning_rate": 0.00019999736065507564, - "loss": 46.0, - "step": 30258 - }, - { - "epoch": 2.313511860389548, - "grad_norm": 0.0008364637033082545, - "learning_rate": 0.00019999736048055983, - "loss": 46.0, - "step": 30259 - }, - { - "epoch": 2.313588317372938, - "grad_norm": 0.0006872367230243981, - "learning_rate": 0.00019999736030603825, - "loss": 46.0, - "step": 30260 - }, - { - "epoch": 2.3136647743563277, - "grad_norm": 0.0005224893102422357, - "learning_rate": 0.00019999736013151092, - "loss": 46.0, - "step": 30261 - }, - { - "epoch": 2.3137412313397174, - "grad_norm": 0.001109803793951869, - "learning_rate": 0.00019999735995697782, - "loss": 46.0, - "step": 30262 - }, - { - "epoch": 2.313817688323107, - "grad_norm": 0.000571669836062938, - "learning_rate": 0.00019999735978243894, - "loss": 46.0, - "step": 30263 - }, - { - "epoch": 2.313894145306497, - "grad_norm": 0.0007968081044964492, - "learning_rate": 0.0001999973596078943, - "loss": 46.0, - "step": 30264 - }, - { - "epoch": 2.3139706022898867, - "grad_norm": 0.002043564571067691, - "learning_rate": 0.0001999973594333439, - "loss": 46.0, - "step": 30265 - }, - { - "epoch": 2.3140470592732765, - "grad_norm": 0.0011303542414680123, - "learning_rate": 0.0001999973592587877, - "loss": 46.0, - "step": 30266 - }, - { - "epoch": 2.3141235162566662, - "grad_norm": 0.0017548424657434225, - "learning_rate": 0.00019999735908422575, - "loss": 46.0, - "step": 30267 - }, - { - "epoch": 2.314199973240056, - "grad_norm": 0.002858856227248907, - "learning_rate": 0.00019999735890965803, - "loss": 46.0, - "step": 30268 - }, - { - "epoch": 2.3142764302234458, - "grad_norm": 0.0018452861113473773, - "learning_rate": 0.00019999735873508454, - "loss": 46.0, - "step": 30269 - }, - { - "epoch": 2.314352887206835, - "grad_norm": 0.0005100212874822319, - "learning_rate": 0.00019999735856050528, - "loss": 46.0, - "step": 30270 - }, - { - "epoch": 2.314429344190225, - "grad_norm": 0.00043524097418412566, - "learning_rate": 0.00019999735838592024, - "loss": 46.0, - "step": 30271 - }, - { - "epoch": 2.3145058011736146, - "grad_norm": 0.003048122860491276, - "learning_rate": 0.00019999735821132943, - "loss": 46.0, - "step": 30272 - }, - { - "epoch": 2.3145822581570044, - "grad_norm": 0.0009016020921990275, - "learning_rate": 0.00019999735803673287, - "loss": 46.0, - "step": 30273 - }, - { - "epoch": 2.314658715140394, - "grad_norm": 0.0008780946373008192, - "learning_rate": 0.00019999735786213054, - "loss": 46.0, - "step": 30274 - }, - { - "epoch": 2.314735172123784, - "grad_norm": 0.0017649485962465405, - "learning_rate": 0.00019999735768752244, - "loss": 46.0, - "step": 30275 - }, - { - "epoch": 2.3148116291071736, - "grad_norm": 0.0006517200963571668, - "learning_rate": 0.00019999735751290856, - "loss": 46.0, - "step": 30276 - }, - { - "epoch": 2.3148880860905634, - "grad_norm": 0.0012541132746264338, - "learning_rate": 0.0001999973573382889, - "loss": 46.0, - "step": 30277 - }, - { - "epoch": 2.314964543073953, - "grad_norm": 0.0021966828498989344, - "learning_rate": 0.0001999973571636635, - "loss": 46.0, - "step": 30278 - }, - { - "epoch": 2.315041000057343, - "grad_norm": 0.0016574910841882229, - "learning_rate": 0.0001999973569890323, - "loss": 46.0, - "step": 30279 - }, - { - "epoch": 2.3151174570407322, - "grad_norm": 0.0009617339237593114, - "learning_rate": 0.00019999735681439535, - "loss": 46.0, - "step": 30280 - }, - { - "epoch": 2.315193914024122, - "grad_norm": 0.0018893566448241472, - "learning_rate": 0.00019999735663975263, - "loss": 46.0, - "step": 30281 - }, - { - "epoch": 2.3152703710075118, - "grad_norm": 0.0006024938193149865, - "learning_rate": 0.00019999735646510414, - "loss": 46.0, - "step": 30282 - }, - { - "epoch": 2.3153468279909015, - "grad_norm": 0.002040133811533451, - "learning_rate": 0.00019999735629044988, - "loss": 46.0, - "step": 30283 - }, - { - "epoch": 2.3154232849742913, - "grad_norm": 0.0017384991515427828, - "learning_rate": 0.00019999735611578981, - "loss": 46.0, - "step": 30284 - }, - { - "epoch": 2.315499741957681, - "grad_norm": 0.0019039979670196772, - "learning_rate": 0.00019999735594112403, - "loss": 46.0, - "step": 30285 - }, - { - "epoch": 2.315576198941071, - "grad_norm": 0.001144712558016181, - "learning_rate": 0.00019999735576645247, - "loss": 46.0, - "step": 30286 - }, - { - "epoch": 2.3156526559244606, - "grad_norm": 0.0006676096236333251, - "learning_rate": 0.00019999735559177512, - "loss": 46.0, - "step": 30287 - }, - { - "epoch": 2.3157291129078503, - "grad_norm": 0.000968686887063086, - "learning_rate": 0.00019999735541709201, - "loss": 46.0, - "step": 30288 - }, - { - "epoch": 2.31580556989124, - "grad_norm": 0.002427425468340516, - "learning_rate": 0.00019999735524240314, - "loss": 46.0, - "step": 30289 - }, - { - "epoch": 2.31588202687463, - "grad_norm": 0.0005142023437656462, - "learning_rate": 0.0001999973550677085, - "loss": 46.0, - "step": 30290 - }, - { - "epoch": 2.3159584838580196, - "grad_norm": 0.000832767691463232, - "learning_rate": 0.0001999973548930081, - "loss": 46.0, - "step": 30291 - }, - { - "epoch": 2.316034940841409, - "grad_norm": 0.0015518913278356194, - "learning_rate": 0.0001999973547183019, - "loss": 46.0, - "step": 30292 - }, - { - "epoch": 2.3161113978247987, - "grad_norm": 0.0012677236227318645, - "learning_rate": 0.00019999735454358995, - "loss": 46.0, - "step": 30293 - }, - { - "epoch": 2.3161878548081885, - "grad_norm": 0.003694751998409629, - "learning_rate": 0.0001999973543688722, - "loss": 46.0, - "step": 30294 - }, - { - "epoch": 2.316264311791578, - "grad_norm": 0.0014075635699555278, - "learning_rate": 0.00019999735419414872, - "loss": 46.0, - "step": 30295 - }, - { - "epoch": 2.316340768774968, - "grad_norm": 0.0017439830116927624, - "learning_rate": 0.00019999735401941945, - "loss": 46.0, - "step": 30296 - }, - { - "epoch": 2.3164172257583577, - "grad_norm": 0.0006086484063416719, - "learning_rate": 0.00019999735384468445, - "loss": 46.0, - "step": 30297 - }, - { - "epoch": 2.3164936827417475, - "grad_norm": 0.0037569128908216953, - "learning_rate": 0.00019999735366994364, - "loss": 46.0, - "step": 30298 - }, - { - "epoch": 2.3165701397251373, - "grad_norm": 0.0025564441457390785, - "learning_rate": 0.00019999735349519705, - "loss": 46.0, - "step": 30299 - }, - { - "epoch": 2.316646596708527, - "grad_norm": 0.0008942708373069763, - "learning_rate": 0.00019999735332044472, - "loss": 46.0, - "step": 30300 - }, - { - "epoch": 2.316723053691917, - "grad_norm": 0.0012030428042635322, - "learning_rate": 0.0001999973531456866, - "loss": 46.0, - "step": 30301 - }, - { - "epoch": 2.316799510675306, - "grad_norm": 0.002030459465458989, - "learning_rate": 0.00019999735297092272, - "loss": 46.0, - "step": 30302 - }, - { - "epoch": 2.316875967658696, - "grad_norm": 0.0016904083313420415, - "learning_rate": 0.0001999973527961531, - "loss": 46.0, - "step": 30303 - }, - { - "epoch": 2.3169524246420856, - "grad_norm": 0.004592102952301502, - "learning_rate": 0.00019999735262137767, - "loss": 46.0, - "step": 30304 - }, - { - "epoch": 2.3170288816254754, - "grad_norm": 0.0010771737433969975, - "learning_rate": 0.00019999735244659647, - "loss": 46.0, - "step": 30305 - }, - { - "epoch": 2.317105338608865, - "grad_norm": 0.0024316797498613596, - "learning_rate": 0.0001999973522718095, - "loss": 46.0, - "step": 30306 - }, - { - "epoch": 2.317181795592255, - "grad_norm": 0.0016414186684414744, - "learning_rate": 0.0001999973520970168, - "loss": 46.0, - "step": 30307 - }, - { - "epoch": 2.3172582525756447, - "grad_norm": 0.0006019505672156811, - "learning_rate": 0.0001999973519222183, - "loss": 46.0, - "step": 30308 - }, - { - "epoch": 2.3173347095590344, - "grad_norm": 0.0006140986224636436, - "learning_rate": 0.00019999735174741404, - "loss": 46.0, - "step": 30309 - }, - { - "epoch": 2.317411166542424, - "grad_norm": 0.0009695172775536776, - "learning_rate": 0.000199997351572604, - "loss": 46.0, - "step": 30310 - }, - { - "epoch": 2.317487623525814, - "grad_norm": 0.0035742600448429585, - "learning_rate": 0.0001999973513977882, - "loss": 46.0, - "step": 30311 - }, - { - "epoch": 2.3175640805092037, - "grad_norm": 0.004137310199439526, - "learning_rate": 0.0001999973512229666, - "loss": 46.0, - "step": 30312 - }, - { - "epoch": 2.3176405374925935, - "grad_norm": 0.0005126777687110007, - "learning_rate": 0.00019999735104813928, - "loss": 46.0, - "step": 30313 - }, - { - "epoch": 2.317716994475983, - "grad_norm": 0.003727610455825925, - "learning_rate": 0.00019999735087330617, - "loss": 46.0, - "step": 30314 - }, - { - "epoch": 2.3177934514593725, - "grad_norm": 0.0030770886223763227, - "learning_rate": 0.00019999735069846727, - "loss": 46.0, - "step": 30315 - }, - { - "epoch": 2.3178699084427623, - "grad_norm": 0.0015235097380355, - "learning_rate": 0.00019999735052362265, - "loss": 46.0, - "step": 30316 - }, - { - "epoch": 2.317946365426152, - "grad_norm": 0.0004589337622746825, - "learning_rate": 0.00019999735034877223, - "loss": 46.0, - "step": 30317 - }, - { - "epoch": 2.318022822409542, - "grad_norm": 0.0015530851669609547, - "learning_rate": 0.00019999735017391603, - "loss": 46.0, - "step": 30318 - }, - { - "epoch": 2.3180992793929316, - "grad_norm": 0.0016325052129104733, - "learning_rate": 0.0001999973499990541, - "loss": 46.0, - "step": 30319 - }, - { - "epoch": 2.3181757363763213, - "grad_norm": 0.0013463397044688463, - "learning_rate": 0.00019999734982418637, - "loss": 46.0, - "step": 30320 - }, - { - "epoch": 2.318252193359711, - "grad_norm": 0.0008545163436792791, - "learning_rate": 0.00019999734964931288, - "loss": 46.0, - "step": 30321 - }, - { - "epoch": 2.318328650343101, - "grad_norm": 0.0005678696907125413, - "learning_rate": 0.0001999973494744336, - "loss": 46.0, - "step": 30322 - }, - { - "epoch": 2.31840510732649, - "grad_norm": 0.0007805890636518598, - "learning_rate": 0.00019999734929954856, - "loss": 46.0, - "step": 30323 - }, - { - "epoch": 2.31848156430988, - "grad_norm": 0.0005209120572544634, - "learning_rate": 0.00019999734912465775, - "loss": 46.0, - "step": 30324 - }, - { - "epoch": 2.3185580212932697, - "grad_norm": 0.0008189770742319524, - "learning_rate": 0.0001999973489497612, - "loss": 46.0, - "step": 30325 - }, - { - "epoch": 2.3186344782766595, - "grad_norm": 0.0010674988152459264, - "learning_rate": 0.00019999734877485886, - "loss": 46.0, - "step": 30326 - }, - { - "epoch": 2.3187109352600492, - "grad_norm": 0.0014612135710194707, - "learning_rate": 0.0001999973485999507, - "loss": 46.0, - "step": 30327 - }, - { - "epoch": 2.318787392243439, - "grad_norm": 0.001121841254644096, - "learning_rate": 0.00019999734842503686, - "loss": 46.0, - "step": 30328 - }, - { - "epoch": 2.3188638492268288, - "grad_norm": 0.001231951406225562, - "learning_rate": 0.0001999973482501172, - "loss": 46.0, - "step": 30329 - }, - { - "epoch": 2.3189403062102185, - "grad_norm": 0.006180086638778448, - "learning_rate": 0.00019999734807519176, - "loss": 46.0, - "step": 30330 - }, - { - "epoch": 2.3190167631936083, - "grad_norm": 0.001452418277040124, - "learning_rate": 0.0001999973479002606, - "loss": 46.0, - "step": 30331 - }, - { - "epoch": 2.319093220176998, - "grad_norm": 0.0003985106013715267, - "learning_rate": 0.00019999734772532365, - "loss": 46.0, - "step": 30332 - }, - { - "epoch": 2.319169677160388, - "grad_norm": 0.0009089506347663701, - "learning_rate": 0.0001999973475503809, - "loss": 46.0, - "step": 30333 - }, - { - "epoch": 2.3192461341437776, - "grad_norm": 0.0010232562199234962, - "learning_rate": 0.00019999734737543242, - "loss": 46.0, - "step": 30334 - }, - { - "epoch": 2.3193225911271673, - "grad_norm": 0.0010310267098248005, - "learning_rate": 0.00019999734720047813, - "loss": 46.0, - "step": 30335 - }, - { - "epoch": 2.3193990481105566, - "grad_norm": 0.0007289510103873909, - "learning_rate": 0.00019999734702551812, - "loss": 46.0, - "step": 30336 - }, - { - "epoch": 2.3194755050939464, - "grad_norm": 0.000700947770383209, - "learning_rate": 0.0001999973468505523, - "loss": 46.0, - "step": 30337 - }, - { - "epoch": 2.319551962077336, - "grad_norm": 0.0006614306475967169, - "learning_rate": 0.00019999734667558073, - "loss": 46.0, - "step": 30338 - }, - { - "epoch": 2.319628419060726, - "grad_norm": 0.0013760696165263653, - "learning_rate": 0.0001999973465006034, - "loss": 46.0, - "step": 30339 - }, - { - "epoch": 2.3197048760441157, - "grad_norm": 0.001355270855128765, - "learning_rate": 0.00019999734632562027, - "loss": 46.0, - "step": 30340 - }, - { - "epoch": 2.3197813330275054, - "grad_norm": 0.006319751963019371, - "learning_rate": 0.0001999973461506314, - "loss": 46.0, - "step": 30341 - }, - { - "epoch": 2.319857790010895, - "grad_norm": 0.0006171645945869386, - "learning_rate": 0.00019999734597563675, - "loss": 46.0, - "step": 30342 - }, - { - "epoch": 2.319934246994285, - "grad_norm": 0.00040689727757126093, - "learning_rate": 0.00019999734580063632, - "loss": 46.0, - "step": 30343 - }, - { - "epoch": 2.3200107039776747, - "grad_norm": 0.0015397823881357908, - "learning_rate": 0.00019999734562563016, - "loss": 46.0, - "step": 30344 - }, - { - "epoch": 2.320087160961064, - "grad_norm": 0.00041277610580436885, - "learning_rate": 0.0001999973454506182, - "loss": 46.0, - "step": 30345 - }, - { - "epoch": 2.320163617944454, - "grad_norm": 0.003013300010934472, - "learning_rate": 0.00019999734527560045, - "loss": 46.0, - "step": 30346 - }, - { - "epoch": 2.3202400749278436, - "grad_norm": 0.003307922510430217, - "learning_rate": 0.00019999734510057696, - "loss": 46.0, - "step": 30347 - }, - { - "epoch": 2.3203165319112333, - "grad_norm": 0.003326849080622196, - "learning_rate": 0.00019999734492554767, - "loss": 46.0, - "step": 30348 - }, - { - "epoch": 2.320392988894623, - "grad_norm": 0.002616100711748004, - "learning_rate": 0.00019999734475051266, - "loss": 46.0, - "step": 30349 - }, - { - "epoch": 2.320469445878013, - "grad_norm": 0.0008141118451021612, - "learning_rate": 0.00019999734457547182, - "loss": 46.0, - "step": 30350 - }, - { - "epoch": 2.3205459028614026, - "grad_norm": 0.003728854237124324, - "learning_rate": 0.00019999734440042527, - "loss": 46.0, - "step": 30351 - }, - { - "epoch": 2.3206223598447924, - "grad_norm": 0.0004859961336478591, - "learning_rate": 0.00019999734422537294, - "loss": 46.0, - "step": 30352 - }, - { - "epoch": 2.320698816828182, - "grad_norm": 0.0008103257860057056, - "learning_rate": 0.0001999973440503148, - "loss": 46.0, - "step": 30353 - }, - { - "epoch": 2.320775273811572, - "grad_norm": 0.0008832439198158681, - "learning_rate": 0.00019999734387525094, - "loss": 46.0, - "step": 30354 - }, - { - "epoch": 2.3208517307949617, - "grad_norm": 0.0009649523999541998, - "learning_rate": 0.0001999973437001813, - "loss": 46.0, - "step": 30355 - }, - { - "epoch": 2.3209281877783514, - "grad_norm": 0.0007761421729810536, - "learning_rate": 0.00019999734352510587, - "loss": 46.0, - "step": 30356 - }, - { - "epoch": 2.3210046447617407, - "grad_norm": 0.0006364434957504272, - "learning_rate": 0.00019999734335002467, - "loss": 46.0, - "step": 30357 - }, - { - "epoch": 2.3210811017451305, - "grad_norm": 0.0007939972565509379, - "learning_rate": 0.0001999973431749377, - "loss": 46.0, - "step": 30358 - }, - { - "epoch": 2.3211575587285203, - "grad_norm": 0.0008202563039958477, - "learning_rate": 0.000199997342999845, - "loss": 46.0, - "step": 30359 - }, - { - "epoch": 2.32123401571191, - "grad_norm": 0.0007119790534488857, - "learning_rate": 0.0001999973428247465, - "loss": 46.0, - "step": 30360 - }, - { - "epoch": 2.3213104726952998, - "grad_norm": 0.001329778111539781, - "learning_rate": 0.0001999973426496422, - "loss": 46.0, - "step": 30361 - }, - { - "epoch": 2.3213869296786895, - "grad_norm": 0.001179746468551457, - "learning_rate": 0.0001999973424745322, - "loss": 46.0, - "step": 30362 - }, - { - "epoch": 2.3214633866620793, - "grad_norm": 0.000469493301352486, - "learning_rate": 0.00019999734229941637, - "loss": 46.0, - "step": 30363 - }, - { - "epoch": 2.321539843645469, - "grad_norm": 0.000981647172011435, - "learning_rate": 0.00019999734212429479, - "loss": 46.0, - "step": 30364 - }, - { - "epoch": 2.321616300628859, - "grad_norm": 0.001795116811990738, - "learning_rate": 0.00019999734194916746, - "loss": 46.0, - "step": 30365 - }, - { - "epoch": 2.3216927576122486, - "grad_norm": 0.0005095938104204834, - "learning_rate": 0.00019999734177403433, - "loss": 46.0, - "step": 30366 - }, - { - "epoch": 2.321769214595638, - "grad_norm": 0.0017778949113562703, - "learning_rate": 0.00019999734159889545, - "loss": 46.0, - "step": 30367 - }, - { - "epoch": 2.3218456715790277, - "grad_norm": 0.0010609703604131937, - "learning_rate": 0.0001999973414237508, - "loss": 46.0, - "step": 30368 - }, - { - "epoch": 2.3219221285624174, - "grad_norm": 0.0007412207778543234, - "learning_rate": 0.00019999734124860038, - "loss": 46.0, - "step": 30369 - }, - { - "epoch": 2.321998585545807, - "grad_norm": 0.008994394913315773, - "learning_rate": 0.0001999973410734442, - "loss": 46.0, - "step": 30370 - }, - { - "epoch": 2.322075042529197, - "grad_norm": 0.0006412541260942817, - "learning_rate": 0.00019999734089828222, - "loss": 46.0, - "step": 30371 - }, - { - "epoch": 2.3221514995125867, - "grad_norm": 0.0005678889574483037, - "learning_rate": 0.00019999734072311448, - "loss": 46.0, - "step": 30372 - }, - { - "epoch": 2.3222279564959765, - "grad_norm": 0.0014511931221932173, - "learning_rate": 0.000199997340547941, - "loss": 46.0, - "step": 30373 - }, - { - "epoch": 2.3223044134793662, - "grad_norm": 0.0009684332180768251, - "learning_rate": 0.00019999734037276173, - "loss": 46.0, - "step": 30374 - }, - { - "epoch": 2.322380870462756, - "grad_norm": 0.005044231191277504, - "learning_rate": 0.00019999734019757667, - "loss": 46.0, - "step": 30375 - }, - { - "epoch": 2.3224573274461457, - "grad_norm": 0.0008116128738038242, - "learning_rate": 0.0001999973400223859, - "loss": 46.0, - "step": 30376 - }, - { - "epoch": 2.3225337844295355, - "grad_norm": 0.0010314596584066749, - "learning_rate": 0.0001999973398471893, - "loss": 46.0, - "step": 30377 - }, - { - "epoch": 2.3226102414129253, - "grad_norm": 0.00035514350747689605, - "learning_rate": 0.00019999733967198698, - "loss": 46.0, - "step": 30378 - }, - { - "epoch": 2.3226866983963146, - "grad_norm": 0.012497498653829098, - "learning_rate": 0.00019999733949677885, - "loss": 46.0, - "step": 30379 - }, - { - "epoch": 2.3227631553797043, - "grad_norm": 0.0017736826557666063, - "learning_rate": 0.00019999733932156498, - "loss": 46.0, - "step": 30380 - }, - { - "epoch": 2.322839612363094, - "grad_norm": 0.0007666827295906842, - "learning_rate": 0.0001999973391463453, - "loss": 46.0, - "step": 30381 - }, - { - "epoch": 2.322916069346484, - "grad_norm": 0.00315473391674459, - "learning_rate": 0.00019999733897111988, - "loss": 46.0, - "step": 30382 - }, - { - "epoch": 2.3229925263298736, - "grad_norm": 0.005299501121044159, - "learning_rate": 0.00019999733879588869, - "loss": 46.0, - "step": 30383 - }, - { - "epoch": 2.3230689833132634, - "grad_norm": 0.0009433921659365296, - "learning_rate": 0.00019999733862065172, - "loss": 46.0, - "step": 30384 - }, - { - "epoch": 2.323145440296653, - "grad_norm": 0.002504811156541109, - "learning_rate": 0.000199997338445409, - "loss": 46.0, - "step": 30385 - }, - { - "epoch": 2.323221897280043, - "grad_norm": 0.001130260294303298, - "learning_rate": 0.0001999973382701605, - "loss": 46.0, - "step": 30386 - }, - { - "epoch": 2.3232983542634327, - "grad_norm": 0.0006547889206558466, - "learning_rate": 0.00019999733809490623, - "loss": 46.0, - "step": 30387 - }, - { - "epoch": 2.3233748112468224, - "grad_norm": 0.0007459830376319587, - "learning_rate": 0.0001999973379196462, - "loss": 46.0, - "step": 30388 - }, - { - "epoch": 2.3234512682302118, - "grad_norm": 0.0011226703645661473, - "learning_rate": 0.00019999733774438039, - "loss": 46.0, - "step": 30389 - }, - { - "epoch": 2.3235277252136015, - "grad_norm": 0.0005145056638866663, - "learning_rate": 0.0001999973375691088, - "loss": 46.0, - "step": 30390 - }, - { - "epoch": 2.3236041821969913, - "grad_norm": 0.0028296527452766895, - "learning_rate": 0.00019999733739383145, - "loss": 46.0, - "step": 30391 - }, - { - "epoch": 2.323680639180381, - "grad_norm": 0.0005512085044756532, - "learning_rate": 0.00019999733721854835, - "loss": 46.0, - "step": 30392 - }, - { - "epoch": 2.323757096163771, - "grad_norm": 0.0010922551155090332, - "learning_rate": 0.00019999733704325948, - "loss": 46.0, - "step": 30393 - }, - { - "epoch": 2.3238335531471606, - "grad_norm": 0.0026275126729160547, - "learning_rate": 0.0001999973368679648, - "loss": 46.0, - "step": 30394 - }, - { - "epoch": 2.3239100101305503, - "grad_norm": 0.0015638680197298527, - "learning_rate": 0.00019999733669266438, - "loss": 46.0, - "step": 30395 - }, - { - "epoch": 2.32398646711394, - "grad_norm": 0.0012028798228129745, - "learning_rate": 0.00019999733651735819, - "loss": 46.0, - "step": 30396 - }, - { - "epoch": 2.32406292409733, - "grad_norm": 0.0008055296493694186, - "learning_rate": 0.00019999733634204622, - "loss": 46.0, - "step": 30397 - }, - { - "epoch": 2.3241393810807196, - "grad_norm": 0.0015386966988444328, - "learning_rate": 0.00019999733616672848, - "loss": 46.0, - "step": 30398 - }, - { - "epoch": 2.3242158380641094, - "grad_norm": 0.00240634148940444, - "learning_rate": 0.000199997335991405, - "loss": 46.0, - "step": 30399 - }, - { - "epoch": 2.324292295047499, - "grad_norm": 0.0007274376112036407, - "learning_rate": 0.00019999733581607573, - "loss": 46.0, - "step": 30400 - }, - { - "epoch": 2.3243687520308884, - "grad_norm": 0.0027278985362499952, - "learning_rate": 0.00019999733564074067, - "loss": 46.0, - "step": 30401 - }, - { - "epoch": 2.324445209014278, - "grad_norm": 0.00022958245244808495, - "learning_rate": 0.00019999733546539986, - "loss": 46.0, - "step": 30402 - }, - { - "epoch": 2.324521665997668, - "grad_norm": 0.0007272228831425309, - "learning_rate": 0.0001999973352900533, - "loss": 46.0, - "step": 30403 - }, - { - "epoch": 2.3245981229810577, - "grad_norm": 0.002035551704466343, - "learning_rate": 0.00019999733511470095, - "loss": 46.0, - "step": 30404 - }, - { - "epoch": 2.3246745799644475, - "grad_norm": 0.00048469213652424514, - "learning_rate": 0.00019999733493934282, - "loss": 46.0, - "step": 30405 - }, - { - "epoch": 2.3247510369478372, - "grad_norm": 0.0007801530882716179, - "learning_rate": 0.00019999733476397895, - "loss": 46.0, - "step": 30406 - }, - { - "epoch": 2.324827493931227, - "grad_norm": 0.0012340234825387597, - "learning_rate": 0.0001999973345886093, - "loss": 46.0, - "step": 30407 - }, - { - "epoch": 2.3249039509146168, - "grad_norm": 0.003101398004218936, - "learning_rate": 0.00019999733441323386, - "loss": 46.0, - "step": 30408 - }, - { - "epoch": 2.3249804078980065, - "grad_norm": 0.0011867587454617023, - "learning_rate": 0.00019999733423785266, - "loss": 46.0, - "step": 30409 - }, - { - "epoch": 2.3250568648813963, - "grad_norm": 0.0005943225114606321, - "learning_rate": 0.00019999733406246572, - "loss": 46.0, - "step": 30410 - }, - { - "epoch": 2.3251333218647856, - "grad_norm": 0.0003382488212082535, - "learning_rate": 0.00019999733388707298, - "loss": 46.0, - "step": 30411 - }, - { - "epoch": 2.3252097788481754, - "grad_norm": 0.001074868137948215, - "learning_rate": 0.00019999733371167447, - "loss": 46.0, - "step": 30412 - }, - { - "epoch": 2.325286235831565, - "grad_norm": 0.0004787916550412774, - "learning_rate": 0.00019999733353627018, - "loss": 46.0, - "step": 30413 - }, - { - "epoch": 2.325362692814955, - "grad_norm": 0.0014202031306922436, - "learning_rate": 0.00019999733336086015, - "loss": 46.0, - "step": 30414 - }, - { - "epoch": 2.3254391497983447, - "grad_norm": 0.0015058716526255012, - "learning_rate": 0.00019999733318544437, - "loss": 46.0, - "step": 30415 - }, - { - "epoch": 2.3255156067817344, - "grad_norm": 0.0023006699047982693, - "learning_rate": 0.00019999733301002276, - "loss": 46.0, - "step": 30416 - }, - { - "epoch": 2.325592063765124, - "grad_norm": 0.0005122700240463018, - "learning_rate": 0.00019999733283459543, - "loss": 46.0, - "step": 30417 - }, - { - "epoch": 2.325668520748514, - "grad_norm": 0.0009394968510605395, - "learning_rate": 0.0001999973326591623, - "loss": 46.0, - "step": 30418 - }, - { - "epoch": 2.3257449777319037, - "grad_norm": 0.0005133896484039724, - "learning_rate": 0.00019999733248372343, - "loss": 46.0, - "step": 30419 - }, - { - "epoch": 2.3258214347152935, - "grad_norm": 0.001428685849532485, - "learning_rate": 0.00019999733230827876, - "loss": 46.0, - "step": 30420 - }, - { - "epoch": 2.325897891698683, - "grad_norm": 0.0014269306557253003, - "learning_rate": 0.00019999733213282834, - "loss": 46.0, - "step": 30421 - }, - { - "epoch": 2.325974348682073, - "grad_norm": 0.001305045560002327, - "learning_rate": 0.00019999733195737217, - "loss": 46.0, - "step": 30422 - }, - { - "epoch": 2.3260508056654623, - "grad_norm": 0.0014530970947816968, - "learning_rate": 0.00019999733178191018, - "loss": 46.0, - "step": 30423 - }, - { - "epoch": 2.326127262648852, - "grad_norm": 0.0005421048845164478, - "learning_rate": 0.00019999733160644246, - "loss": 46.0, - "step": 30424 - }, - { - "epoch": 2.326203719632242, - "grad_norm": 0.0009188229450955987, - "learning_rate": 0.00019999733143096892, - "loss": 46.0, - "step": 30425 - }, - { - "epoch": 2.3262801766156316, - "grad_norm": 0.0007503820233978331, - "learning_rate": 0.00019999733125548966, - "loss": 46.0, - "step": 30426 - }, - { - "epoch": 2.3263566335990213, - "grad_norm": 0.0008856317726895213, - "learning_rate": 0.00019999733108000463, - "loss": 46.0, - "step": 30427 - }, - { - "epoch": 2.326433090582411, - "grad_norm": 0.002229111734777689, - "learning_rate": 0.00019999733090451382, - "loss": 46.0, - "step": 30428 - }, - { - "epoch": 2.326509547565801, - "grad_norm": 0.0005680381436832249, - "learning_rate": 0.00019999733072901724, - "loss": 46.0, - "step": 30429 - }, - { - "epoch": 2.3265860045491906, - "grad_norm": 0.0021651163697242737, - "learning_rate": 0.00019999733055351492, - "loss": 46.0, - "step": 30430 - }, - { - "epoch": 2.3266624615325804, - "grad_norm": 0.0011214156402274966, - "learning_rate": 0.00019999733037800676, - "loss": 46.0, - "step": 30431 - }, - { - "epoch": 2.32673891851597, - "grad_norm": 0.001414934522472322, - "learning_rate": 0.0001999973302024929, - "loss": 46.0, - "step": 30432 - }, - { - "epoch": 2.3268153754993595, - "grad_norm": 0.0006146717933006585, - "learning_rate": 0.00019999733002697324, - "loss": 46.0, - "step": 30433 - }, - { - "epoch": 2.3268918324827492, - "grad_norm": 0.003162570297718048, - "learning_rate": 0.0001999973298514478, - "loss": 46.0, - "step": 30434 - }, - { - "epoch": 2.326968289466139, - "grad_norm": 0.0012074160622432828, - "learning_rate": 0.0001999973296759166, - "loss": 46.0, - "step": 30435 - }, - { - "epoch": 2.3270447464495287, - "grad_norm": 0.001848422223702073, - "learning_rate": 0.00019999732950037963, - "loss": 46.0, - "step": 30436 - }, - { - "epoch": 2.3271212034329185, - "grad_norm": 0.0008912977646104991, - "learning_rate": 0.0001999973293248369, - "loss": 46.0, - "step": 30437 - }, - { - "epoch": 2.3271976604163083, - "grad_norm": 0.0007331767701543868, - "learning_rate": 0.00019999732914928838, - "loss": 46.0, - "step": 30438 - }, - { - "epoch": 2.327274117399698, - "grad_norm": 0.0008977054385468364, - "learning_rate": 0.00019999732897373412, - "loss": 46.0, - "step": 30439 - }, - { - "epoch": 2.327350574383088, - "grad_norm": 0.0007164858980104327, - "learning_rate": 0.0001999973287981741, - "loss": 46.0, - "step": 30440 - }, - { - "epoch": 2.3274270313664776, - "grad_norm": 0.0018716392805799842, - "learning_rate": 0.00019999732862260828, - "loss": 46.0, - "step": 30441 - }, - { - "epoch": 2.3275034883498673, - "grad_norm": 0.0040169828571379185, - "learning_rate": 0.0001999973284470367, - "loss": 46.0, - "step": 30442 - }, - { - "epoch": 2.327579945333257, - "grad_norm": 0.0009843767620623112, - "learning_rate": 0.00019999732827145935, - "loss": 46.0, - "step": 30443 - }, - { - "epoch": 2.327656402316647, - "grad_norm": 0.0008170006331056356, - "learning_rate": 0.00019999732809587622, - "loss": 46.0, - "step": 30444 - }, - { - "epoch": 2.327732859300036, - "grad_norm": 0.0007758899009786546, - "learning_rate": 0.00019999732792028735, - "loss": 46.0, - "step": 30445 - }, - { - "epoch": 2.327809316283426, - "grad_norm": 0.005486639216542244, - "learning_rate": 0.00019999732774469268, - "loss": 46.0, - "step": 30446 - }, - { - "epoch": 2.3278857732668157, - "grad_norm": 0.0006798380054533482, - "learning_rate": 0.00019999732756909223, - "loss": 46.0, - "step": 30447 - }, - { - "epoch": 2.3279622302502054, - "grad_norm": 0.0021661140490323305, - "learning_rate": 0.00019999732739348604, - "loss": 46.0, - "step": 30448 - }, - { - "epoch": 2.328038687233595, - "grad_norm": 0.00602521700784564, - "learning_rate": 0.00019999732721787407, - "loss": 46.0, - "step": 30449 - }, - { - "epoch": 2.328115144216985, - "grad_norm": 0.0005183838657103479, - "learning_rate": 0.00019999732704225636, - "loss": 46.0, - "step": 30450 - }, - { - "epoch": 2.3281916012003747, - "grad_norm": 0.0010052935685962439, - "learning_rate": 0.00019999732686663287, - "loss": 46.0, - "step": 30451 - }, - { - "epoch": 2.3282680581837645, - "grad_norm": 0.0010794798145070672, - "learning_rate": 0.00019999732669100358, - "loss": 46.0, - "step": 30452 - }, - { - "epoch": 2.3283445151671542, - "grad_norm": 0.0012344920542091131, - "learning_rate": 0.00019999732651536852, - "loss": 46.0, - "step": 30453 - }, - { - "epoch": 2.3284209721505436, - "grad_norm": 0.017593327909708023, - "learning_rate": 0.00019999732633972772, - "loss": 46.0, - "step": 30454 - }, - { - "epoch": 2.3284974291339333, - "grad_norm": 0.0006167201208882034, - "learning_rate": 0.00019999732616408114, - "loss": 46.0, - "step": 30455 - }, - { - "epoch": 2.328573886117323, - "grad_norm": 0.004931359086185694, - "learning_rate": 0.00019999732598842879, - "loss": 46.0, - "step": 30456 - }, - { - "epoch": 2.328650343100713, - "grad_norm": 0.007100431248545647, - "learning_rate": 0.00019999732581277066, - "loss": 46.0, - "step": 30457 - }, - { - "epoch": 2.3287268000841026, - "grad_norm": 0.0004416010924614966, - "learning_rate": 0.0001999973256371068, - "loss": 46.0, - "step": 30458 - }, - { - "epoch": 2.3288032570674924, - "grad_norm": 0.00205968483351171, - "learning_rate": 0.00019999732546143711, - "loss": 46.0, - "step": 30459 - }, - { - "epoch": 2.328879714050882, - "grad_norm": 0.0005541814607568085, - "learning_rate": 0.0001999973252857617, - "loss": 46.0, - "step": 30460 - }, - { - "epoch": 2.328956171034272, - "grad_norm": 0.0046439687721431255, - "learning_rate": 0.0001999973251100805, - "loss": 46.0, - "step": 30461 - }, - { - "epoch": 2.3290326280176616, - "grad_norm": 0.0010102344676852226, - "learning_rate": 0.0001999973249343935, - "loss": 46.0, - "step": 30462 - }, - { - "epoch": 2.3291090850010514, - "grad_norm": 0.0005489391623996198, - "learning_rate": 0.0001999973247587008, - "loss": 46.0, - "step": 30463 - }, - { - "epoch": 2.329185541984441, - "grad_norm": 0.0008963418076746166, - "learning_rate": 0.00019999732458300228, - "loss": 46.0, - "step": 30464 - }, - { - "epoch": 2.329261998967831, - "grad_norm": 0.0007536340854130685, - "learning_rate": 0.000199997324407298, - "loss": 46.0, - "step": 30465 - }, - { - "epoch": 2.3293384559512207, - "grad_norm": 0.0007768994546495378, - "learning_rate": 0.00019999732423158797, - "loss": 46.0, - "step": 30466 - }, - { - "epoch": 2.32941491293461, - "grad_norm": 0.000318201316986233, - "learning_rate": 0.00019999732405587216, - "loss": 46.0, - "step": 30467 - }, - { - "epoch": 2.3294913699179998, - "grad_norm": 0.0025257545057684183, - "learning_rate": 0.00019999732388015055, - "loss": 46.0, - "step": 30468 - }, - { - "epoch": 2.3295678269013895, - "grad_norm": 0.0012237076880410314, - "learning_rate": 0.0001999973237044232, - "loss": 46.0, - "step": 30469 - }, - { - "epoch": 2.3296442838847793, - "grad_norm": 0.0021016024984419346, - "learning_rate": 0.0001999973235286901, - "loss": 46.0, - "step": 30470 - }, - { - "epoch": 2.329720740868169, - "grad_norm": 0.00078096758807078, - "learning_rate": 0.0001999973233529512, - "loss": 46.0, - "step": 30471 - }, - { - "epoch": 2.329797197851559, - "grad_norm": 0.0003566005907487124, - "learning_rate": 0.00019999732317720653, - "loss": 46.0, - "step": 30472 - }, - { - "epoch": 2.3298736548349486, - "grad_norm": 0.0019797333516180515, - "learning_rate": 0.0001999973230014561, - "loss": 46.0, - "step": 30473 - }, - { - "epoch": 2.3299501118183383, - "grad_norm": 0.002198479138314724, - "learning_rate": 0.00019999732282569992, - "loss": 46.0, - "step": 30474 - }, - { - "epoch": 2.330026568801728, - "grad_norm": 0.001873291446827352, - "learning_rate": 0.00019999732264993795, - "loss": 46.0, - "step": 30475 - }, - { - "epoch": 2.3301030257851174, - "grad_norm": 0.0012714068870991468, - "learning_rate": 0.0001999973224741702, - "loss": 46.0, - "step": 30476 - }, - { - "epoch": 2.330179482768507, - "grad_norm": 0.0009153428836725652, - "learning_rate": 0.00019999732229839673, - "loss": 46.0, - "step": 30477 - }, - { - "epoch": 2.330255939751897, - "grad_norm": 0.0014175878604874015, - "learning_rate": 0.00019999732212261741, - "loss": 46.0, - "step": 30478 - }, - { - "epoch": 2.3303323967352867, - "grad_norm": 0.0028817395213991404, - "learning_rate": 0.00019999732194683238, - "loss": 46.0, - "step": 30479 - }, - { - "epoch": 2.3304088537186765, - "grad_norm": 0.0017851786687970161, - "learning_rate": 0.00019999732177104158, - "loss": 46.0, - "step": 30480 - }, - { - "epoch": 2.330485310702066, - "grad_norm": 0.0026716215070337057, - "learning_rate": 0.00019999732159524497, - "loss": 46.0, - "step": 30481 - }, - { - "epoch": 2.330561767685456, - "grad_norm": 0.0009459813009016216, - "learning_rate": 0.00019999732141944262, - "loss": 46.0, - "step": 30482 - }, - { - "epoch": 2.3306382246688457, - "grad_norm": 0.0034814453683793545, - "learning_rate": 0.0001999973212436345, - "loss": 46.0, - "step": 30483 - }, - { - "epoch": 2.3307146816522355, - "grad_norm": 0.0011281903134658933, - "learning_rate": 0.00019999732106782062, - "loss": 46.0, - "step": 30484 - }, - { - "epoch": 2.3307911386356253, - "grad_norm": 0.0005879094824194908, - "learning_rate": 0.00019999732089200095, - "loss": 46.0, - "step": 30485 - }, - { - "epoch": 2.330867595619015, - "grad_norm": 0.002650426933541894, - "learning_rate": 0.00019999732071617553, - "loss": 46.0, - "step": 30486 - }, - { - "epoch": 2.330944052602405, - "grad_norm": 0.0069006881676614285, - "learning_rate": 0.0001999973205403443, - "loss": 46.0, - "step": 30487 - }, - { - "epoch": 2.331020509585794, - "grad_norm": 0.0012131606927141547, - "learning_rate": 0.00019999732036450735, - "loss": 46.0, - "step": 30488 - }, - { - "epoch": 2.331096966569184, - "grad_norm": 0.0006248114514164627, - "learning_rate": 0.0001999973201886646, - "loss": 46.0, - "step": 30489 - }, - { - "epoch": 2.3311734235525736, - "grad_norm": 0.0011029633460566401, - "learning_rate": 0.0001999973200128161, - "loss": 46.0, - "step": 30490 - }, - { - "epoch": 2.3312498805359634, - "grad_norm": 0.0009441353613510728, - "learning_rate": 0.00019999731983696184, - "loss": 46.0, - "step": 30491 - }, - { - "epoch": 2.331326337519353, - "grad_norm": 0.0024268615525215864, - "learning_rate": 0.0001999973196611018, - "loss": 46.0, - "step": 30492 - }, - { - "epoch": 2.331402794502743, - "grad_norm": 0.00297020236030221, - "learning_rate": 0.000199997319485236, - "loss": 46.0, - "step": 30493 - }, - { - "epoch": 2.3314792514861327, - "grad_norm": 0.0012593739666044712, - "learning_rate": 0.0001999973193093644, - "loss": 46.0, - "step": 30494 - }, - { - "epoch": 2.3315557084695224, - "grad_norm": 0.0007747412309981883, - "learning_rate": 0.00019999731913348704, - "loss": 46.0, - "step": 30495 - }, - { - "epoch": 2.331632165452912, - "grad_norm": 0.0008016584906727076, - "learning_rate": 0.00019999731895760392, - "loss": 46.0, - "step": 30496 - }, - { - "epoch": 2.331708622436302, - "grad_norm": 0.00738663412630558, - "learning_rate": 0.00019999731878171502, - "loss": 46.0, - "step": 30497 - }, - { - "epoch": 2.3317850794196913, - "grad_norm": 0.000793817569501698, - "learning_rate": 0.00019999731860582037, - "loss": 46.0, - "step": 30498 - }, - { - "epoch": 2.331861536403081, - "grad_norm": 0.0009280915837734938, - "learning_rate": 0.00019999731842991993, - "loss": 46.0, - "step": 30499 - }, - { - "epoch": 2.331937993386471, - "grad_norm": 0.0012025247560814023, - "learning_rate": 0.00019999731825401374, - "loss": 46.0, - "step": 30500 - }, - { - "epoch": 2.3320144503698605, - "grad_norm": 0.0007714900420978665, - "learning_rate": 0.00019999731807810177, - "loss": 46.0, - "step": 30501 - }, - { - "epoch": 2.3320909073532503, - "grad_norm": 0.0005416963831521571, - "learning_rate": 0.00019999731790218403, - "loss": 46.0, - "step": 30502 - }, - { - "epoch": 2.33216736433664, - "grad_norm": 0.001500443322584033, - "learning_rate": 0.00019999731772626052, - "loss": 46.0, - "step": 30503 - }, - { - "epoch": 2.33224382132003, - "grad_norm": 0.0024475310929119587, - "learning_rate": 0.0001999973175503312, - "loss": 46.0, - "step": 30504 - }, - { - "epoch": 2.3323202783034196, - "grad_norm": 0.000529143784660846, - "learning_rate": 0.00019999731737439618, - "loss": 46.0, - "step": 30505 - }, - { - "epoch": 2.3323967352868094, - "grad_norm": 0.0009378319373354316, - "learning_rate": 0.00019999731719845537, - "loss": 46.0, - "step": 30506 - }, - { - "epoch": 2.332473192270199, - "grad_norm": 0.0016075520543381572, - "learning_rate": 0.0001999973170225088, - "loss": 46.0, - "step": 30507 - }, - { - "epoch": 2.332549649253589, - "grad_norm": 0.0009560227626934648, - "learning_rate": 0.00019999731684655641, - "loss": 46.0, - "step": 30508 - }, - { - "epoch": 2.3326261062369786, - "grad_norm": 0.002297154162079096, - "learning_rate": 0.00019999731667059832, - "loss": 46.0, - "step": 30509 - }, - { - "epoch": 2.332702563220368, - "grad_norm": 0.0015520333545282483, - "learning_rate": 0.0001999973164946344, - "loss": 46.0, - "step": 30510 - }, - { - "epoch": 2.3327790202037577, - "grad_norm": 0.0014315292937681079, - "learning_rate": 0.00019999731631866477, - "loss": 46.0, - "step": 30511 - }, - { - "epoch": 2.3328554771871475, - "grad_norm": 0.0010838517919182777, - "learning_rate": 0.0001999973161426893, - "loss": 46.0, - "step": 30512 - }, - { - "epoch": 2.3329319341705372, - "grad_norm": 0.0006422838196158409, - "learning_rate": 0.0001999973159667081, - "loss": 46.0, - "step": 30513 - }, - { - "epoch": 2.333008391153927, - "grad_norm": 0.0005534583469852805, - "learning_rate": 0.00019999731579072117, - "loss": 46.0, - "step": 30514 - }, - { - "epoch": 2.3330848481373168, - "grad_norm": 0.0018934594700112939, - "learning_rate": 0.0001999973156147284, - "loss": 46.0, - "step": 30515 - }, - { - "epoch": 2.3331613051207065, - "grad_norm": 0.0005745832459069788, - "learning_rate": 0.0001999973154387299, - "loss": 46.0, - "step": 30516 - }, - { - "epoch": 2.3332377621040963, - "grad_norm": 0.000632535433396697, - "learning_rate": 0.0001999973152627256, - "loss": 46.0, - "step": 30517 - }, - { - "epoch": 2.333314219087486, - "grad_norm": 0.0062083289958536625, - "learning_rate": 0.00019999731508671558, - "loss": 46.0, - "step": 30518 - }, - { - "epoch": 2.333390676070876, - "grad_norm": 0.00106734037399292, - "learning_rate": 0.00019999731491069975, - "loss": 46.0, - "step": 30519 - }, - { - "epoch": 2.333467133054265, - "grad_norm": 0.0006080340826883912, - "learning_rate": 0.00019999731473467817, - "loss": 46.0, - "step": 30520 - }, - { - "epoch": 2.333543590037655, - "grad_norm": 0.00070382677949965, - "learning_rate": 0.00019999731455865082, - "loss": 46.0, - "step": 30521 - }, - { - "epoch": 2.3336200470210446, - "grad_norm": 0.0015601797495037317, - "learning_rate": 0.0001999973143826177, - "loss": 46.0, - "step": 30522 - }, - { - "epoch": 2.3336965040044344, - "grad_norm": 0.0027359300293028355, - "learning_rate": 0.0001999973142065788, - "loss": 46.0, - "step": 30523 - }, - { - "epoch": 2.333772960987824, - "grad_norm": 0.000908554473426193, - "learning_rate": 0.00019999731403053415, - "loss": 46.0, - "step": 30524 - }, - { - "epoch": 2.333849417971214, - "grad_norm": 0.001058922614902258, - "learning_rate": 0.0001999973138544837, - "loss": 46.0, - "step": 30525 - }, - { - "epoch": 2.3339258749546037, - "grad_norm": 0.0007167409639805555, - "learning_rate": 0.0001999973136784275, - "loss": 46.0, - "step": 30526 - }, - { - "epoch": 2.3340023319379934, - "grad_norm": 0.0019888204988092184, - "learning_rate": 0.00019999731350236552, - "loss": 46.0, - "step": 30527 - }, - { - "epoch": 2.334078788921383, - "grad_norm": 0.00045928670442663133, - "learning_rate": 0.00019999731332629779, - "loss": 46.0, - "step": 30528 - }, - { - "epoch": 2.334155245904773, - "grad_norm": 0.0015401450218632817, - "learning_rate": 0.00019999731315022427, - "loss": 46.0, - "step": 30529 - }, - { - "epoch": 2.3342317028881627, - "grad_norm": 0.001193465432152152, - "learning_rate": 0.000199997312974145, - "loss": 46.0, - "step": 30530 - }, - { - "epoch": 2.3343081598715525, - "grad_norm": 0.0012732665054500103, - "learning_rate": 0.00019999731279805993, - "loss": 46.0, - "step": 30531 - }, - { - "epoch": 2.334384616854942, - "grad_norm": 0.0010085650719702244, - "learning_rate": 0.00019999731262196913, - "loss": 46.0, - "step": 30532 - }, - { - "epoch": 2.3344610738383316, - "grad_norm": 0.0021177898161113262, - "learning_rate": 0.00019999731244587255, - "loss": 46.0, - "step": 30533 - }, - { - "epoch": 2.3345375308217213, - "grad_norm": 0.003935427404940128, - "learning_rate": 0.0001999973122697702, - "loss": 46.0, - "step": 30534 - }, - { - "epoch": 2.334613987805111, - "grad_norm": 0.0009917697170749307, - "learning_rate": 0.00019999731209366205, - "loss": 46.0, - "step": 30535 - }, - { - "epoch": 2.334690444788501, - "grad_norm": 0.002426746068522334, - "learning_rate": 0.00019999731191754815, - "loss": 46.0, - "step": 30536 - }, - { - "epoch": 2.3347669017718906, - "grad_norm": 0.0019559075590223074, - "learning_rate": 0.00019999731174142848, - "loss": 46.0, - "step": 30537 - }, - { - "epoch": 2.3348433587552804, - "grad_norm": 0.0004813242703676224, - "learning_rate": 0.00019999731156530306, - "loss": 46.0, - "step": 30538 - }, - { - "epoch": 2.33491981573867, - "grad_norm": 0.0017152774380519986, - "learning_rate": 0.00019999731138917187, - "loss": 46.0, - "step": 30539 - }, - { - "epoch": 2.33499627272206, - "grad_norm": 0.0010211910121142864, - "learning_rate": 0.00019999731121303488, - "loss": 46.0, - "step": 30540 - }, - { - "epoch": 2.3350727297054497, - "grad_norm": 0.002255661878734827, - "learning_rate": 0.00019999731103689214, - "loss": 46.0, - "step": 30541 - }, - { - "epoch": 2.335149186688839, - "grad_norm": 0.0010235972004011273, - "learning_rate": 0.00019999731086074363, - "loss": 46.0, - "step": 30542 - }, - { - "epoch": 2.3352256436722287, - "grad_norm": 0.0012261184165254235, - "learning_rate": 0.00019999731068458935, - "loss": 46.0, - "step": 30543 - }, - { - "epoch": 2.3353021006556185, - "grad_norm": 0.001387918833643198, - "learning_rate": 0.00019999731050842932, - "loss": 46.0, - "step": 30544 - }, - { - "epoch": 2.3353785576390083, - "grad_norm": 0.0010396098950877786, - "learning_rate": 0.00019999731033226348, - "loss": 46.0, - "step": 30545 - }, - { - "epoch": 2.335455014622398, - "grad_norm": 0.0004188910825178027, - "learning_rate": 0.0001999973101560919, - "loss": 46.0, - "step": 30546 - }, - { - "epoch": 2.335531471605788, - "grad_norm": 0.00046145892702043056, - "learning_rate": 0.00019999730997991456, - "loss": 46.0, - "step": 30547 - }, - { - "epoch": 2.3356079285891775, - "grad_norm": 0.0006948307272978127, - "learning_rate": 0.0001999973098037314, - "loss": 46.0, - "step": 30548 - }, - { - "epoch": 2.3356843855725673, - "grad_norm": 0.002568683587014675, - "learning_rate": 0.00019999730962754253, - "loss": 46.0, - "step": 30549 - }, - { - "epoch": 2.335760842555957, - "grad_norm": 0.0007333027315326035, - "learning_rate": 0.00019999730945134786, - "loss": 46.0, - "step": 30550 - }, - { - "epoch": 2.335837299539347, - "grad_norm": 0.0020218323916196823, - "learning_rate": 0.00019999730927514745, - "loss": 46.0, - "step": 30551 - }, - { - "epoch": 2.3359137565227366, - "grad_norm": 0.00773219857364893, - "learning_rate": 0.00019999730909894123, - "loss": 46.0, - "step": 30552 - }, - { - "epoch": 2.3359902135061263, - "grad_norm": 0.0006330051692202687, - "learning_rate": 0.00019999730892272927, - "loss": 46.0, - "step": 30553 - }, - { - "epoch": 2.3360666704895157, - "grad_norm": 0.001696947729215026, - "learning_rate": 0.0001999973087465115, - "loss": 46.0, - "step": 30554 - }, - { - "epoch": 2.3361431274729054, - "grad_norm": 0.004732252564281225, - "learning_rate": 0.000199997308570288, - "loss": 46.0, - "step": 30555 - }, - { - "epoch": 2.336219584456295, - "grad_norm": 0.0006153184804134071, - "learning_rate": 0.00019999730839405873, - "loss": 46.0, - "step": 30556 - }, - { - "epoch": 2.336296041439685, - "grad_norm": 0.0025214930064976215, - "learning_rate": 0.00019999730821782365, - "loss": 46.0, - "step": 30557 - }, - { - "epoch": 2.3363724984230747, - "grad_norm": 0.0012366330483928323, - "learning_rate": 0.00019999730804158285, - "loss": 46.0, - "step": 30558 - }, - { - "epoch": 2.3364489554064645, - "grad_norm": 0.0008409314905293286, - "learning_rate": 0.00019999730786533624, - "loss": 46.0, - "step": 30559 - }, - { - "epoch": 2.3365254123898542, - "grad_norm": 0.0023845331743359566, - "learning_rate": 0.0001999973076890839, - "loss": 46.0, - "step": 30560 - }, - { - "epoch": 2.336601869373244, - "grad_norm": 0.0009311073808930814, - "learning_rate": 0.00019999730751282577, - "loss": 46.0, - "step": 30561 - }, - { - "epoch": 2.3366783263566338, - "grad_norm": 0.0021973440889269114, - "learning_rate": 0.00019999730733656187, - "loss": 46.0, - "step": 30562 - }, - { - "epoch": 2.3367547833400235, - "grad_norm": 0.0005608109058812261, - "learning_rate": 0.00019999730716029223, - "loss": 46.0, - "step": 30563 - }, - { - "epoch": 2.336831240323413, - "grad_norm": 0.0018422291614115238, - "learning_rate": 0.00019999730698401678, - "loss": 46.0, - "step": 30564 - }, - { - "epoch": 2.3369076973068026, - "grad_norm": 0.0034021951723843813, - "learning_rate": 0.00019999730680773557, - "loss": 46.0, - "step": 30565 - }, - { - "epoch": 2.3369841542901924, - "grad_norm": 0.001579405041411519, - "learning_rate": 0.00019999730663144857, - "loss": 46.0, - "step": 30566 - }, - { - "epoch": 2.337060611273582, - "grad_norm": 0.000303540175082162, - "learning_rate": 0.00019999730645515584, - "loss": 46.0, - "step": 30567 - }, - { - "epoch": 2.337137068256972, - "grad_norm": 0.0019256646046414971, - "learning_rate": 0.00019999730627885733, - "loss": 46.0, - "step": 30568 - }, - { - "epoch": 2.3372135252403616, - "grad_norm": 0.0006757780211046338, - "learning_rate": 0.00019999730610255304, - "loss": 46.0, - "step": 30569 - }, - { - "epoch": 2.3372899822237514, - "grad_norm": 0.0009310618625022471, - "learning_rate": 0.000199997305926243, - "loss": 46.0, - "step": 30570 - }, - { - "epoch": 2.337366439207141, - "grad_norm": 0.0010758353164419532, - "learning_rate": 0.00019999730574992718, - "loss": 46.0, - "step": 30571 - }, - { - "epoch": 2.337442896190531, - "grad_norm": 0.0011591686634346843, - "learning_rate": 0.00019999730557360558, - "loss": 46.0, - "step": 30572 - }, - { - "epoch": 2.3375193531739207, - "grad_norm": 0.002370295813307166, - "learning_rate": 0.00019999730539727823, - "loss": 46.0, - "step": 30573 - }, - { - "epoch": 2.3375958101573104, - "grad_norm": 0.0007798952865414321, - "learning_rate": 0.00019999730522094508, - "loss": 46.0, - "step": 30574 - }, - { - "epoch": 2.3376722671407, - "grad_norm": 0.001364949275739491, - "learning_rate": 0.00019999730504460618, - "loss": 46.0, - "step": 30575 - }, - { - "epoch": 2.3377487241240895, - "grad_norm": 0.0006470885127782822, - "learning_rate": 0.00019999730486826154, - "loss": 46.0, - "step": 30576 - }, - { - "epoch": 2.3378251811074793, - "grad_norm": 0.005174380727112293, - "learning_rate": 0.00019999730469191112, - "loss": 46.0, - "step": 30577 - }, - { - "epoch": 2.337901638090869, - "grad_norm": 0.0017438264330849051, - "learning_rate": 0.0001999973045155549, - "loss": 46.0, - "step": 30578 - }, - { - "epoch": 2.337978095074259, - "grad_norm": 0.0007920488133095205, - "learning_rate": 0.00019999730433919292, - "loss": 46.0, - "step": 30579 - }, - { - "epoch": 2.3380545520576486, - "grad_norm": 0.0006459720898419619, - "learning_rate": 0.00019999730416282518, - "loss": 46.0, - "step": 30580 - }, - { - "epoch": 2.3381310090410383, - "grad_norm": 0.001057083485648036, - "learning_rate": 0.00019999730398645164, - "loss": 46.0, - "step": 30581 - }, - { - "epoch": 2.338207466024428, - "grad_norm": 0.0011769833508878946, - "learning_rate": 0.00019999730381007239, - "loss": 46.0, - "step": 30582 - }, - { - "epoch": 2.338283923007818, - "grad_norm": 0.000645259628072381, - "learning_rate": 0.00019999730363368733, - "loss": 46.0, - "step": 30583 - }, - { - "epoch": 2.3383603799912076, - "grad_norm": 0.0006173406145535409, - "learning_rate": 0.0001999973034572965, - "loss": 46.0, - "step": 30584 - }, - { - "epoch": 2.338436836974597, - "grad_norm": 0.0008263772469945252, - "learning_rate": 0.00019999730328089992, - "loss": 46.0, - "step": 30585 - }, - { - "epoch": 2.3385132939579867, - "grad_norm": 0.0009204766829498112, - "learning_rate": 0.00019999730310449755, - "loss": 46.0, - "step": 30586 - }, - { - "epoch": 2.3385897509413764, - "grad_norm": 0.0016777734272181988, - "learning_rate": 0.00019999730292808942, - "loss": 46.0, - "step": 30587 - }, - { - "epoch": 2.338666207924766, - "grad_norm": 0.0012596884043887258, - "learning_rate": 0.00019999730275167555, - "loss": 46.0, - "step": 30588 - }, - { - "epoch": 2.338742664908156, - "grad_norm": 0.0017181789735332131, - "learning_rate": 0.00019999730257525586, - "loss": 46.0, - "step": 30589 - }, - { - "epoch": 2.3388191218915457, - "grad_norm": 0.0006443418678827584, - "learning_rate": 0.0001999973023988304, - "loss": 46.0, - "step": 30590 - }, - { - "epoch": 2.3388955788749355, - "grad_norm": 0.0016996599733829498, - "learning_rate": 0.00019999730222239922, - "loss": 46.0, - "step": 30591 - }, - { - "epoch": 2.3389720358583252, - "grad_norm": 0.0005018014926463366, - "learning_rate": 0.00019999730204596223, - "loss": 46.0, - "step": 30592 - }, - { - "epoch": 2.339048492841715, - "grad_norm": 0.0034854840487241745, - "learning_rate": 0.0001999973018695195, - "loss": 46.0, - "step": 30593 - }, - { - "epoch": 2.3391249498251048, - "grad_norm": 0.004720806144177914, - "learning_rate": 0.000199997301693071, - "loss": 46.0, - "step": 30594 - }, - { - "epoch": 2.3392014068084945, - "grad_norm": 0.0018677798798307776, - "learning_rate": 0.00019999730151661668, - "loss": 46.0, - "step": 30595 - }, - { - "epoch": 2.3392778637918843, - "grad_norm": 0.001091250916942954, - "learning_rate": 0.00019999730134015665, - "loss": 46.0, - "step": 30596 - }, - { - "epoch": 2.339354320775274, - "grad_norm": 0.0006587326060980558, - "learning_rate": 0.00019999730116369082, - "loss": 46.0, - "step": 30597 - }, - { - "epoch": 2.3394307777586634, - "grad_norm": 0.00306577212177217, - "learning_rate": 0.00019999730098721921, - "loss": 46.0, - "step": 30598 - }, - { - "epoch": 2.339507234742053, - "grad_norm": 0.0012429801281541586, - "learning_rate": 0.00019999730081074186, - "loss": 46.0, - "step": 30599 - }, - { - "epoch": 2.339583691725443, - "grad_norm": 0.0006678019417449832, - "learning_rate": 0.00019999730063425872, - "loss": 46.0, - "step": 30600 - }, - { - "epoch": 2.3396601487088327, - "grad_norm": 0.0007223449647426605, - "learning_rate": 0.00019999730045776985, - "loss": 46.0, - "step": 30601 - }, - { - "epoch": 2.3397366056922224, - "grad_norm": 0.001051132334396243, - "learning_rate": 0.00019999730028127518, - "loss": 46.0, - "step": 30602 - }, - { - "epoch": 2.339813062675612, - "grad_norm": 0.0008924936992116272, - "learning_rate": 0.00019999730010477473, - "loss": 46.0, - "step": 30603 - }, - { - "epoch": 2.339889519659002, - "grad_norm": 0.0009778476087376475, - "learning_rate": 0.00019999729992826852, - "loss": 46.0, - "step": 30604 - }, - { - "epoch": 2.3399659766423917, - "grad_norm": 0.0033746790140867233, - "learning_rate": 0.00019999729975175655, - "loss": 46.0, - "step": 30605 - }, - { - "epoch": 2.3400424336257815, - "grad_norm": 0.0012777198571711779, - "learning_rate": 0.00019999729957523882, - "loss": 46.0, - "step": 30606 - }, - { - "epoch": 2.3401188906091708, - "grad_norm": 0.0009875097312033176, - "learning_rate": 0.00019999729939871528, - "loss": 46.0, - "step": 30607 - }, - { - "epoch": 2.3401953475925605, - "grad_norm": 0.0009483841131441295, - "learning_rate": 0.000199997299222186, - "loss": 46.0, - "step": 30608 - }, - { - "epoch": 2.3402718045759503, - "grad_norm": 0.000711943837814033, - "learning_rate": 0.00019999729904565094, - "loss": 46.0, - "step": 30609 - }, - { - "epoch": 2.34034826155934, - "grad_norm": 0.00043770045158453286, - "learning_rate": 0.00019999729886911011, - "loss": 46.0, - "step": 30610 - }, - { - "epoch": 2.34042471854273, - "grad_norm": 0.0006217218469828367, - "learning_rate": 0.0001999972986925635, - "loss": 46.0, - "step": 30611 - }, - { - "epoch": 2.3405011755261196, - "grad_norm": 0.0036873382050544024, - "learning_rate": 0.00019999729851601116, - "loss": 46.0, - "step": 30612 - }, - { - "epoch": 2.3405776325095093, - "grad_norm": 0.011502964422106743, - "learning_rate": 0.00019999729833945304, - "loss": 46.0, - "step": 30613 - }, - { - "epoch": 2.340654089492899, - "grad_norm": 0.0013821851462125778, - "learning_rate": 0.00019999729816288912, - "loss": 46.0, - "step": 30614 - }, - { - "epoch": 2.340730546476289, - "grad_norm": 0.002367577748373151, - "learning_rate": 0.00019999729798631947, - "loss": 46.0, - "step": 30615 - }, - { - "epoch": 2.3408070034596786, - "grad_norm": 0.001113171805627644, - "learning_rate": 0.00019999729780974403, - "loss": 46.0, - "step": 30616 - }, - { - "epoch": 2.3408834604430684, - "grad_norm": 0.0007057064794935286, - "learning_rate": 0.00019999729763316282, - "loss": 46.0, - "step": 30617 - }, - { - "epoch": 2.340959917426458, - "grad_norm": 0.0005223373300395906, - "learning_rate": 0.00019999729745657585, - "loss": 46.0, - "step": 30618 - }, - { - "epoch": 2.341036374409848, - "grad_norm": 0.0008474922506138682, - "learning_rate": 0.00019999729727998312, - "loss": 46.0, - "step": 30619 - }, - { - "epoch": 2.3411128313932372, - "grad_norm": 0.0010565832490101457, - "learning_rate": 0.0001999972971033846, - "loss": 46.0, - "step": 30620 - }, - { - "epoch": 2.341189288376627, - "grad_norm": 0.001985379261896014, - "learning_rate": 0.0001999972969267803, - "loss": 46.0, - "step": 30621 - }, - { - "epoch": 2.3412657453600167, - "grad_norm": 0.0007389218080788851, - "learning_rate": 0.00019999729675017024, - "loss": 46.0, - "step": 30622 - }, - { - "epoch": 2.3413422023434065, - "grad_norm": 0.0008347560069523752, - "learning_rate": 0.00019999729657355441, - "loss": 46.0, - "step": 30623 - }, - { - "epoch": 2.3414186593267963, - "grad_norm": 0.0028940513730049133, - "learning_rate": 0.0001999972963969328, - "loss": 46.0, - "step": 30624 - }, - { - "epoch": 2.341495116310186, - "grad_norm": 0.0005485435831360519, - "learning_rate": 0.00019999729622030546, - "loss": 46.0, - "step": 30625 - }, - { - "epoch": 2.341571573293576, - "grad_norm": 0.0010249732295051217, - "learning_rate": 0.0001999972960436723, - "loss": 46.0, - "step": 30626 - }, - { - "epoch": 2.3416480302769656, - "grad_norm": 0.0017250453820452094, - "learning_rate": 0.00019999729586703344, - "loss": 46.0, - "step": 30627 - }, - { - "epoch": 2.3417244872603553, - "grad_norm": 0.00275959400460124, - "learning_rate": 0.00019999729569038875, - "loss": 46.0, - "step": 30628 - }, - { - "epoch": 2.3418009442437446, - "grad_norm": 0.0008972191135399044, - "learning_rate": 0.0001999972955137383, - "loss": 46.0, - "step": 30629 - }, - { - "epoch": 2.3418774012271344, - "grad_norm": 0.005577577743679285, - "learning_rate": 0.00019999729533708212, - "loss": 46.0, - "step": 30630 - }, - { - "epoch": 2.341953858210524, - "grad_norm": 0.006267010234296322, - "learning_rate": 0.00019999729516042013, - "loss": 46.0, - "step": 30631 - }, - { - "epoch": 2.342030315193914, - "grad_norm": 0.003595035057514906, - "learning_rate": 0.0001999972949837524, - "loss": 46.0, - "step": 30632 - }, - { - "epoch": 2.3421067721773037, - "grad_norm": 0.005955283064395189, - "learning_rate": 0.00019999729480707888, - "loss": 46.0, - "step": 30633 - }, - { - "epoch": 2.3421832291606934, - "grad_norm": 0.0007644465076737106, - "learning_rate": 0.00019999729463039958, - "loss": 46.0, - "step": 30634 - }, - { - "epoch": 2.342259686144083, - "grad_norm": 0.0017080981051549315, - "learning_rate": 0.00019999729445371452, - "loss": 46.0, - "step": 30635 - }, - { - "epoch": 2.342336143127473, - "grad_norm": 0.0015398006653413177, - "learning_rate": 0.0001999972942770237, - "loss": 46.0, - "step": 30636 - }, - { - "epoch": 2.3424126001108627, - "grad_norm": 0.0013983638491481543, - "learning_rate": 0.00019999729410032712, - "loss": 46.0, - "step": 30637 - }, - { - "epoch": 2.3424890570942525, - "grad_norm": 0.0028409857768565416, - "learning_rate": 0.00019999729392362474, - "loss": 46.0, - "step": 30638 - }, - { - "epoch": 2.3425655140776422, - "grad_norm": 0.002657357370480895, - "learning_rate": 0.0001999972937469166, - "loss": 46.0, - "step": 30639 - }, - { - "epoch": 2.342641971061032, - "grad_norm": 0.0007117083296179771, - "learning_rate": 0.00019999729357020272, - "loss": 46.0, - "step": 30640 - }, - { - "epoch": 2.3427184280444213, - "grad_norm": 0.0010999025544151664, - "learning_rate": 0.00019999729339348303, - "loss": 46.0, - "step": 30641 - }, - { - "epoch": 2.342794885027811, - "grad_norm": 0.0006137785385362804, - "learning_rate": 0.00019999729321675759, - "loss": 46.0, - "step": 30642 - }, - { - "epoch": 2.342871342011201, - "grad_norm": 0.00030042778234928846, - "learning_rate": 0.00019999729304002637, - "loss": 46.0, - "step": 30643 - }, - { - "epoch": 2.3429477989945906, - "grad_norm": 0.001172129064798355, - "learning_rate": 0.0001999972928632894, - "loss": 46.0, - "step": 30644 - }, - { - "epoch": 2.3430242559779804, - "grad_norm": 0.0009331195615231991, - "learning_rate": 0.00019999729268654665, - "loss": 46.0, - "step": 30645 - }, - { - "epoch": 2.34310071296137, - "grad_norm": 0.0014410910662263632, - "learning_rate": 0.00019999729250979814, - "loss": 46.0, - "step": 30646 - }, - { - "epoch": 2.34317716994476, - "grad_norm": 0.0015979704912751913, - "learning_rate": 0.00019999729233304386, - "loss": 46.0, - "step": 30647 - }, - { - "epoch": 2.3432536269281496, - "grad_norm": 0.0007211298798210919, - "learning_rate": 0.00019999729215628377, - "loss": 46.0, - "step": 30648 - }, - { - "epoch": 2.3433300839115394, - "grad_norm": 0.0012526677455753088, - "learning_rate": 0.00019999729197951797, - "loss": 46.0, - "step": 30649 - }, - { - "epoch": 2.343406540894929, - "grad_norm": 0.0009590137051418424, - "learning_rate": 0.00019999729180274637, - "loss": 46.0, - "step": 30650 - }, - { - "epoch": 2.3434829978783185, - "grad_norm": 0.0009515249985270202, - "learning_rate": 0.000199997291625969, - "loss": 46.0, - "step": 30651 - }, - { - "epoch": 2.3435594548617082, - "grad_norm": 0.0008460519602522254, - "learning_rate": 0.00019999729144918587, - "loss": 46.0, - "step": 30652 - }, - { - "epoch": 2.343635911845098, - "grad_norm": 0.0012833202490583062, - "learning_rate": 0.00019999729127239698, - "loss": 46.0, - "step": 30653 - }, - { - "epoch": 2.3437123688284878, - "grad_norm": 0.0005600783042609692, - "learning_rate": 0.0001999972910956023, - "loss": 46.0, - "step": 30654 - }, - { - "epoch": 2.3437888258118775, - "grad_norm": 0.0024408174213021994, - "learning_rate": 0.00019999729091880187, - "loss": 46.0, - "step": 30655 - }, - { - "epoch": 2.3438652827952673, - "grad_norm": 0.001440030406229198, - "learning_rate": 0.00019999729074199565, - "loss": 46.0, - "step": 30656 - }, - { - "epoch": 2.343941739778657, - "grad_norm": 0.0006131033296696842, - "learning_rate": 0.0001999972905651837, - "loss": 46.0, - "step": 30657 - }, - { - "epoch": 2.344018196762047, - "grad_norm": 0.0018768934532999992, - "learning_rate": 0.00019999729038836593, - "loss": 46.0, - "step": 30658 - }, - { - "epoch": 2.3440946537454366, - "grad_norm": 0.0006309091695584357, - "learning_rate": 0.0001999972902115424, - "loss": 46.0, - "step": 30659 - }, - { - "epoch": 2.3441711107288263, - "grad_norm": 0.001732588279992342, - "learning_rate": 0.00019999729003471311, - "loss": 46.0, - "step": 30660 - }, - { - "epoch": 2.344247567712216, - "grad_norm": 0.0013158574001863599, - "learning_rate": 0.00019999728985787803, - "loss": 46.0, - "step": 30661 - }, - { - "epoch": 2.344324024695606, - "grad_norm": 0.000650020781904459, - "learning_rate": 0.00019999728968103723, - "loss": 46.0, - "step": 30662 - }, - { - "epoch": 2.344400481678995, - "grad_norm": 0.0009020527359098196, - "learning_rate": 0.00019999728950419063, - "loss": 46.0, - "step": 30663 - }, - { - "epoch": 2.344476938662385, - "grad_norm": 0.0011322556529194117, - "learning_rate": 0.00019999728932733825, - "loss": 46.0, - "step": 30664 - }, - { - "epoch": 2.3445533956457747, - "grad_norm": 0.0035704541951417923, - "learning_rate": 0.00019999728915048013, - "loss": 46.0, - "step": 30665 - }, - { - "epoch": 2.3446298526291645, - "grad_norm": 0.004053534008562565, - "learning_rate": 0.00019999728897361624, - "loss": 46.0, - "step": 30666 - }, - { - "epoch": 2.344706309612554, - "grad_norm": 0.0008521229028701782, - "learning_rate": 0.00019999728879674654, - "loss": 46.0, - "step": 30667 - }, - { - "epoch": 2.344782766595944, - "grad_norm": 0.00043917031143791974, - "learning_rate": 0.0001999972886198711, - "loss": 46.0, - "step": 30668 - }, - { - "epoch": 2.3448592235793337, - "grad_norm": 0.0017025087727233768, - "learning_rate": 0.0001999972884429899, - "loss": 46.0, - "step": 30669 - }, - { - "epoch": 2.3449356805627235, - "grad_norm": 0.0006187063409015536, - "learning_rate": 0.00019999728826610293, - "loss": 46.0, - "step": 30670 - }, - { - "epoch": 2.3450121375461133, - "grad_norm": 0.0034894514828920364, - "learning_rate": 0.00019999728808921016, - "loss": 46.0, - "step": 30671 - }, - { - "epoch": 2.345088594529503, - "grad_norm": 0.0006457815179601312, - "learning_rate": 0.00019999728791231166, - "loss": 46.0, - "step": 30672 - }, - { - "epoch": 2.3451650515128923, - "grad_norm": 0.0014341780915856361, - "learning_rate": 0.00019999728773540735, - "loss": 46.0, - "step": 30673 - }, - { - "epoch": 2.345241508496282, - "grad_norm": 0.003676380729302764, - "learning_rate": 0.00019999728755849732, - "loss": 46.0, - "step": 30674 - }, - { - "epoch": 2.345317965479672, - "grad_norm": 0.0005658547743223608, - "learning_rate": 0.00019999728738158147, - "loss": 46.0, - "step": 30675 - }, - { - "epoch": 2.3453944224630616, - "grad_norm": 0.0010032716672867537, - "learning_rate": 0.00019999728720465986, - "loss": 46.0, - "step": 30676 - }, - { - "epoch": 2.3454708794464514, - "grad_norm": 0.0024425850715488195, - "learning_rate": 0.00019999728702773252, - "loss": 46.0, - "step": 30677 - }, - { - "epoch": 2.345547336429841, - "grad_norm": 0.00108078692574054, - "learning_rate": 0.00019999728685079937, - "loss": 46.0, - "step": 30678 - }, - { - "epoch": 2.345623793413231, - "grad_norm": 0.0026095963548868895, - "learning_rate": 0.00019999728667386048, - "loss": 46.0, - "step": 30679 - }, - { - "epoch": 2.3457002503966207, - "grad_norm": 0.0005333885783329606, - "learning_rate": 0.00019999728649691578, - "loss": 46.0, - "step": 30680 - }, - { - "epoch": 2.3457767073800104, - "grad_norm": 0.006590796168893576, - "learning_rate": 0.00019999728631996534, - "loss": 46.0, - "step": 30681 - }, - { - "epoch": 2.3458531643634, - "grad_norm": 0.0011731353588402271, - "learning_rate": 0.00019999728614300915, - "loss": 46.0, - "step": 30682 - }, - { - "epoch": 2.34592962134679, - "grad_norm": 0.001045377692207694, - "learning_rate": 0.00019999728596604714, - "loss": 46.0, - "step": 30683 - }, - { - "epoch": 2.3460060783301797, - "grad_norm": 0.0006777209928259254, - "learning_rate": 0.0001999972857890794, - "loss": 46.0, - "step": 30684 - }, - { - "epoch": 2.346082535313569, - "grad_norm": 0.0013066051760688424, - "learning_rate": 0.0001999972856121059, - "loss": 46.0, - "step": 30685 - }, - { - "epoch": 2.346158992296959, - "grad_norm": 0.0022873030975461006, - "learning_rate": 0.0001999972854351266, - "loss": 46.0, - "step": 30686 - }, - { - "epoch": 2.3462354492803486, - "grad_norm": 0.000904332147911191, - "learning_rate": 0.00019999728525814153, - "loss": 46.0, - "step": 30687 - }, - { - "epoch": 2.3463119062637383, - "grad_norm": 0.0005471340846270323, - "learning_rate": 0.00019999728508115073, - "loss": 46.0, - "step": 30688 - }, - { - "epoch": 2.346388363247128, - "grad_norm": 0.000711886037606746, - "learning_rate": 0.00019999728490415413, - "loss": 46.0, - "step": 30689 - }, - { - "epoch": 2.346464820230518, - "grad_norm": 0.001956287072971463, - "learning_rate": 0.00019999728472715176, - "loss": 46.0, - "step": 30690 - }, - { - "epoch": 2.3465412772139076, - "grad_norm": 0.001109390170313418, - "learning_rate": 0.0001999972845501436, - "loss": 46.0, - "step": 30691 - }, - { - "epoch": 2.3466177341972974, - "grad_norm": 0.0011955811642110348, - "learning_rate": 0.0001999972843731297, - "loss": 46.0, - "step": 30692 - }, - { - "epoch": 2.346694191180687, - "grad_norm": 0.0011027324944734573, - "learning_rate": 0.00019999728419611002, - "loss": 46.0, - "step": 30693 - }, - { - "epoch": 2.346770648164077, - "grad_norm": 0.0012277988716959953, - "learning_rate": 0.0001999972840190846, - "loss": 46.0, - "step": 30694 - }, - { - "epoch": 2.346847105147466, - "grad_norm": 0.001902602263726294, - "learning_rate": 0.00019999728384205337, - "loss": 46.0, - "step": 30695 - }, - { - "epoch": 2.346923562130856, - "grad_norm": 0.0032249605283141136, - "learning_rate": 0.0001999972836650164, - "loss": 46.0, - "step": 30696 - }, - { - "epoch": 2.3470000191142457, - "grad_norm": 0.0006361831328831613, - "learning_rate": 0.00019999728348797365, - "loss": 46.0, - "step": 30697 - }, - { - "epoch": 2.3470764760976355, - "grad_norm": 0.001081528957001865, - "learning_rate": 0.0001999972833109251, - "loss": 46.0, - "step": 30698 - }, - { - "epoch": 2.3471529330810252, - "grad_norm": 0.0009883008897304535, - "learning_rate": 0.00019999728313387083, - "loss": 46.0, - "step": 30699 - }, - { - "epoch": 2.347229390064415, - "grad_norm": 0.0011008074507117271, - "learning_rate": 0.00019999728295681078, - "loss": 46.0, - "step": 30700 - }, - { - "epoch": 2.3473058470478048, - "grad_norm": 0.0003863166202791035, - "learning_rate": 0.00019999728277974495, - "loss": 46.0, - "step": 30701 - }, - { - "epoch": 2.3473823040311945, - "grad_norm": 0.0012528032530099154, - "learning_rate": 0.00019999728260267335, - "loss": 46.0, - "step": 30702 - }, - { - "epoch": 2.3474587610145843, - "grad_norm": 0.0021434552036225796, - "learning_rate": 0.00019999728242559597, - "loss": 46.0, - "step": 30703 - }, - { - "epoch": 2.347535217997974, - "grad_norm": 0.0015423644799739122, - "learning_rate": 0.00019999728224851283, - "loss": 46.0, - "step": 30704 - }, - { - "epoch": 2.347611674981364, - "grad_norm": 0.0014932374469935894, - "learning_rate": 0.0001999972820714239, - "loss": 46.0, - "step": 30705 - }, - { - "epoch": 2.3476881319647536, - "grad_norm": 0.0009682325180619955, - "learning_rate": 0.00019999728189432924, - "loss": 46.0, - "step": 30706 - }, - { - "epoch": 2.347764588948143, - "grad_norm": 0.0006935708224773407, - "learning_rate": 0.0001999972817172288, - "loss": 46.0, - "step": 30707 - }, - { - "epoch": 2.3478410459315326, - "grad_norm": 0.0032118032686412334, - "learning_rate": 0.00019999728154012258, - "loss": 46.0, - "step": 30708 - }, - { - "epoch": 2.3479175029149224, - "grad_norm": 0.005844541825354099, - "learning_rate": 0.00019999728136301062, - "loss": 46.0, - "step": 30709 - }, - { - "epoch": 2.347993959898312, - "grad_norm": 0.0003876216651406139, - "learning_rate": 0.00019999728118589284, - "loss": 46.0, - "step": 30710 - }, - { - "epoch": 2.348070416881702, - "grad_norm": 0.0008173697860911489, - "learning_rate": 0.00019999728100876933, - "loss": 46.0, - "step": 30711 - }, - { - "epoch": 2.3481468738650917, - "grad_norm": 0.0008290798286907375, - "learning_rate": 0.00019999728083164005, - "loss": 46.0, - "step": 30712 - }, - { - "epoch": 2.3482233308484814, - "grad_norm": 0.0008743872749619186, - "learning_rate": 0.00019999728065450497, - "loss": 46.0, - "step": 30713 - }, - { - "epoch": 2.348299787831871, - "grad_norm": 0.0017585615860298276, - "learning_rate": 0.00019999728047736414, - "loss": 46.0, - "step": 30714 - }, - { - "epoch": 2.348376244815261, - "grad_norm": 0.0006434962269850075, - "learning_rate": 0.00019999728030021757, - "loss": 46.0, - "step": 30715 - }, - { - "epoch": 2.3484527017986503, - "grad_norm": 0.0011547869071364403, - "learning_rate": 0.00019999728012306517, - "loss": 46.0, - "step": 30716 - }, - { - "epoch": 2.34852915878204, - "grad_norm": 0.004181582015007734, - "learning_rate": 0.00019999727994590705, - "loss": 46.0, - "step": 30717 - }, - { - "epoch": 2.34860561576543, - "grad_norm": 0.002044013002887368, - "learning_rate": 0.00019999727976874313, - "loss": 46.0, - "step": 30718 - }, - { - "epoch": 2.3486820727488196, - "grad_norm": 0.0012599320616573095, - "learning_rate": 0.00019999727959157346, - "loss": 46.0, - "step": 30719 - }, - { - "epoch": 2.3487585297322093, - "grad_norm": 0.0017522042617201805, - "learning_rate": 0.00019999727941439802, - "loss": 46.0, - "step": 30720 - }, - { - "epoch": 2.348834986715599, - "grad_norm": 0.0011873355833813548, - "learning_rate": 0.0001999972792372168, - "loss": 46.0, - "step": 30721 - }, - { - "epoch": 2.348911443698989, - "grad_norm": 0.002941085258498788, - "learning_rate": 0.00019999727906002982, - "loss": 46.0, - "step": 30722 - }, - { - "epoch": 2.3489879006823786, - "grad_norm": 0.006120025645941496, - "learning_rate": 0.00019999727888283706, - "loss": 46.0, - "step": 30723 - }, - { - "epoch": 2.3490643576657684, - "grad_norm": 0.0012597902677953243, - "learning_rate": 0.00019999727870563853, - "loss": 46.0, - "step": 30724 - }, - { - "epoch": 2.349140814649158, - "grad_norm": 0.0005356183391995728, - "learning_rate": 0.00019999727852843425, - "loss": 46.0, - "step": 30725 - }, - { - "epoch": 2.349217271632548, - "grad_norm": 0.000579779502004385, - "learning_rate": 0.0001999972783512242, - "loss": 46.0, - "step": 30726 - }, - { - "epoch": 2.3492937286159377, - "grad_norm": 0.0021668465342372656, - "learning_rate": 0.00019999727817400834, - "loss": 46.0, - "step": 30727 - }, - { - "epoch": 2.3493701855993274, - "grad_norm": 0.00046065717469900846, - "learning_rate": 0.00019999727799678674, - "loss": 46.0, - "step": 30728 - }, - { - "epoch": 2.3494466425827167, - "grad_norm": 0.0023027784191071987, - "learning_rate": 0.0001999972778195594, - "loss": 46.0, - "step": 30729 - }, - { - "epoch": 2.3495230995661065, - "grad_norm": 0.0004900216590613127, - "learning_rate": 0.00019999727764232622, - "loss": 46.0, - "step": 30730 - }, - { - "epoch": 2.3495995565494963, - "grad_norm": 0.0006098304875195026, - "learning_rate": 0.00019999727746508735, - "loss": 46.0, - "step": 30731 - }, - { - "epoch": 2.349676013532886, - "grad_norm": 0.0008547514444217086, - "learning_rate": 0.00019999727728784266, - "loss": 46.0, - "step": 30732 - }, - { - "epoch": 2.349752470516276, - "grad_norm": 0.0005831779562868178, - "learning_rate": 0.0001999972771105922, - "loss": 46.0, - "step": 30733 - }, - { - "epoch": 2.3498289274996655, - "grad_norm": 0.0009844928281381726, - "learning_rate": 0.00019999727693333598, - "loss": 46.0, - "step": 30734 - }, - { - "epoch": 2.3499053844830553, - "grad_norm": 0.0020535937510430813, - "learning_rate": 0.000199997276756074, - "loss": 46.0, - "step": 30735 - }, - { - "epoch": 2.349981841466445, - "grad_norm": 0.0007150209858082235, - "learning_rate": 0.00019999727657880626, - "loss": 46.0, - "step": 30736 - }, - { - "epoch": 2.350058298449835, - "grad_norm": 0.001040019909851253, - "learning_rate": 0.00019999727640153273, - "loss": 46.0, - "step": 30737 - }, - { - "epoch": 2.350134755433224, - "grad_norm": 0.003776006633415818, - "learning_rate": 0.00019999727622425345, - "loss": 46.0, - "step": 30738 - }, - { - "epoch": 2.350211212416614, - "grad_norm": 0.0020182861480861902, - "learning_rate": 0.0001999972760469684, - "loss": 46.0, - "step": 30739 - }, - { - "epoch": 2.3502876694000037, - "grad_norm": 0.0010680132545530796, - "learning_rate": 0.00019999727586967754, - "loss": 46.0, - "step": 30740 - }, - { - "epoch": 2.3503641263833934, - "grad_norm": 0.0028938716277480125, - "learning_rate": 0.00019999727569238094, - "loss": 46.0, - "step": 30741 - }, - { - "epoch": 2.350440583366783, - "grad_norm": 0.009856638498604298, - "learning_rate": 0.00019999727551507857, - "loss": 46.0, - "step": 30742 - }, - { - "epoch": 2.350517040350173, - "grad_norm": 0.0007910510757938027, - "learning_rate": 0.00019999727533777045, - "loss": 46.0, - "step": 30743 - }, - { - "epoch": 2.3505934973335627, - "grad_norm": 0.0017557626124471426, - "learning_rate": 0.00019999727516045653, - "loss": 46.0, - "step": 30744 - }, - { - "epoch": 2.3506699543169525, - "grad_norm": 0.0006075821002013981, - "learning_rate": 0.00019999727498313684, - "loss": 46.0, - "step": 30745 - }, - { - "epoch": 2.3507464113003422, - "grad_norm": 0.0007803900516591966, - "learning_rate": 0.0001999972748058114, - "loss": 46.0, - "step": 30746 - }, - { - "epoch": 2.350822868283732, - "grad_norm": 0.0017525721341371536, - "learning_rate": 0.00019999727462848016, - "loss": 46.0, - "step": 30747 - }, - { - "epoch": 2.3508993252671218, - "grad_norm": 0.002435565460473299, - "learning_rate": 0.00019999727445114317, - "loss": 46.0, - "step": 30748 - }, - { - "epoch": 2.3509757822505115, - "grad_norm": 0.000667648680973798, - "learning_rate": 0.00019999727427380044, - "loss": 46.0, - "step": 30749 - }, - { - "epoch": 2.3510522392339013, - "grad_norm": 0.0017121753189712763, - "learning_rate": 0.00019999727409645193, - "loss": 46.0, - "step": 30750 - }, - { - "epoch": 2.3511286962172906, - "grad_norm": 0.0009407647303305566, - "learning_rate": 0.00019999727391909763, - "loss": 46.0, - "step": 30751 - }, - { - "epoch": 2.3512051532006804, - "grad_norm": 0.0024833460338413715, - "learning_rate": 0.00019999727374173755, - "loss": 46.0, - "step": 30752 - }, - { - "epoch": 2.35128161018407, - "grad_norm": 0.001971642719581723, - "learning_rate": 0.00019999727356437172, - "loss": 46.0, - "step": 30753 - }, - { - "epoch": 2.35135806716746, - "grad_norm": 0.0005710643017664552, - "learning_rate": 0.00019999727338700012, - "loss": 46.0, - "step": 30754 - }, - { - "epoch": 2.3514345241508496, - "grad_norm": 0.0008880415698513389, - "learning_rate": 0.00019999727320962275, - "loss": 46.0, - "step": 30755 - }, - { - "epoch": 2.3515109811342394, - "grad_norm": 0.0020124532748013735, - "learning_rate": 0.0001999972730322396, - "loss": 46.0, - "step": 30756 - }, - { - "epoch": 2.351587438117629, - "grad_norm": 0.001457576989196241, - "learning_rate": 0.0001999972728548507, - "loss": 46.0, - "step": 30757 - }, - { - "epoch": 2.351663895101019, - "grad_norm": 0.001222244231030345, - "learning_rate": 0.00019999727267745602, - "loss": 46.0, - "step": 30758 - }, - { - "epoch": 2.3517403520844087, - "grad_norm": 0.00220334529876709, - "learning_rate": 0.00019999727250005558, - "loss": 46.0, - "step": 30759 - }, - { - "epoch": 2.351816809067798, - "grad_norm": 0.0007857642485760152, - "learning_rate": 0.00019999727232264936, - "loss": 46.0, - "step": 30760 - }, - { - "epoch": 2.3518932660511878, - "grad_norm": 0.0033274961169809103, - "learning_rate": 0.00019999727214523735, - "loss": 46.0, - "step": 30761 - }, - { - "epoch": 2.3519697230345775, - "grad_norm": 0.0010401670588180423, - "learning_rate": 0.00019999727196781962, - "loss": 46.0, - "step": 30762 - }, - { - "epoch": 2.3520461800179673, - "grad_norm": 0.0006546966033056378, - "learning_rate": 0.0001999972717903961, - "loss": 46.0, - "step": 30763 - }, - { - "epoch": 2.352122637001357, - "grad_norm": 0.001543274731375277, - "learning_rate": 0.00019999727161296678, - "loss": 46.0, - "step": 30764 - }, - { - "epoch": 2.352199093984747, - "grad_norm": 0.0011219036532565951, - "learning_rate": 0.00019999727143553173, - "loss": 46.0, - "step": 30765 - }, - { - "epoch": 2.3522755509681366, - "grad_norm": 0.0004796836001332849, - "learning_rate": 0.0001999972712580909, - "loss": 46.0, - "step": 30766 - }, - { - "epoch": 2.3523520079515263, - "grad_norm": 0.0017966593150049448, - "learning_rate": 0.00019999727108064428, - "loss": 46.0, - "step": 30767 - }, - { - "epoch": 2.352428464934916, - "grad_norm": 0.0005488636088557541, - "learning_rate": 0.0001999972709031919, - "loss": 46.0, - "step": 30768 - }, - { - "epoch": 2.352504921918306, - "grad_norm": 0.002760576317086816, - "learning_rate": 0.00019999727072573379, - "loss": 46.0, - "step": 30769 - }, - { - "epoch": 2.3525813789016956, - "grad_norm": 0.001007545622996986, - "learning_rate": 0.00019999727054826987, - "loss": 46.0, - "step": 30770 - }, - { - "epoch": 2.3526578358850854, - "grad_norm": 0.0010352828539907932, - "learning_rate": 0.00019999727037080017, - "loss": 46.0, - "step": 30771 - }, - { - "epoch": 2.3527342928684747, - "grad_norm": 0.0012859228299930692, - "learning_rate": 0.00019999727019332473, - "loss": 46.0, - "step": 30772 - }, - { - "epoch": 2.3528107498518644, - "grad_norm": 0.000690045184455812, - "learning_rate": 0.00019999727001584352, - "loss": 46.0, - "step": 30773 - }, - { - "epoch": 2.352887206835254, - "grad_norm": 0.002257169224321842, - "learning_rate": 0.00019999726983835654, - "loss": 46.0, - "step": 30774 - }, - { - "epoch": 2.352963663818644, - "grad_norm": 0.0005929278559051454, - "learning_rate": 0.00019999726966086375, - "loss": 46.0, - "step": 30775 - }, - { - "epoch": 2.3530401208020337, - "grad_norm": 0.0006345452857203782, - "learning_rate": 0.00019999726948336525, - "loss": 46.0, - "step": 30776 - }, - { - "epoch": 2.3531165777854235, - "grad_norm": 0.0011484071146696806, - "learning_rate": 0.00019999726930586094, - "loss": 46.0, - "step": 30777 - }, - { - "epoch": 2.3531930347688133, - "grad_norm": 0.00044060597429051995, - "learning_rate": 0.0001999972691283509, - "loss": 46.0, - "step": 30778 - }, - { - "epoch": 2.353269491752203, - "grad_norm": 0.0034412231761962175, - "learning_rate": 0.00019999726895083506, - "loss": 46.0, - "step": 30779 - }, - { - "epoch": 2.3533459487355928, - "grad_norm": 0.0036489868070930243, - "learning_rate": 0.00019999726877331344, - "loss": 46.0, - "step": 30780 - }, - { - "epoch": 2.3534224057189825, - "grad_norm": 0.000718778814189136, - "learning_rate": 0.00019999726859578606, - "loss": 46.0, - "step": 30781 - }, - { - "epoch": 2.353498862702372, - "grad_norm": 0.0014931521145626903, - "learning_rate": 0.00019999726841825292, - "loss": 46.0, - "step": 30782 - }, - { - "epoch": 2.3535753196857616, - "grad_norm": 0.001067790319211781, - "learning_rate": 0.00019999726824071403, - "loss": 46.0, - "step": 30783 - }, - { - "epoch": 2.3536517766691514, - "grad_norm": 0.0009619550546631217, - "learning_rate": 0.00019999726806316933, - "loss": 46.0, - "step": 30784 - }, - { - "epoch": 2.353728233652541, - "grad_norm": 0.0009539675083942711, - "learning_rate": 0.00019999726788561887, - "loss": 46.0, - "step": 30785 - }, - { - "epoch": 2.353804690635931, - "grad_norm": 0.0015302995452657342, - "learning_rate": 0.00019999726770806266, - "loss": 46.0, - "step": 30786 - }, - { - "epoch": 2.3538811476193207, - "grad_norm": 0.0007947874255478382, - "learning_rate": 0.00019999726753050067, - "loss": 46.0, - "step": 30787 - }, - { - "epoch": 2.3539576046027104, - "grad_norm": 0.0013851206749677658, - "learning_rate": 0.0001999972673529329, - "loss": 46.0, - "step": 30788 - }, - { - "epoch": 2.3540340615861, - "grad_norm": 0.003003871301189065, - "learning_rate": 0.00019999726717535938, - "loss": 46.0, - "step": 30789 - }, - { - "epoch": 2.35411051856949, - "grad_norm": 0.0026445782277733088, - "learning_rate": 0.0001999972669977801, - "loss": 46.0, - "step": 30790 - }, - { - "epoch": 2.3541869755528797, - "grad_norm": 0.0009551442926749587, - "learning_rate": 0.00019999726682019505, - "loss": 46.0, - "step": 30791 - }, - { - "epoch": 2.3542634325362695, - "grad_norm": 0.0006128668901510537, - "learning_rate": 0.0001999972666426042, - "loss": 46.0, - "step": 30792 - }, - { - "epoch": 2.3543398895196592, - "grad_norm": 0.011979698203504086, - "learning_rate": 0.0001999972664650076, - "loss": 46.0, - "step": 30793 - }, - { - "epoch": 2.3544163465030485, - "grad_norm": 0.0013513767626136541, - "learning_rate": 0.00019999726628740523, - "loss": 46.0, - "step": 30794 - }, - { - "epoch": 2.3544928034864383, - "grad_norm": 0.0025088328402489424, - "learning_rate": 0.00019999726610979708, - "loss": 46.0, - "step": 30795 - }, - { - "epoch": 2.354569260469828, - "grad_norm": 0.0006789755425415933, - "learning_rate": 0.00019999726593218316, - "loss": 46.0, - "step": 30796 - }, - { - "epoch": 2.354645717453218, - "grad_norm": 0.000706896185874939, - "learning_rate": 0.00019999726575456347, - "loss": 46.0, - "step": 30797 - }, - { - "epoch": 2.3547221744366076, - "grad_norm": 0.0007117788773030043, - "learning_rate": 0.00019999726557693803, - "loss": 46.0, - "step": 30798 - }, - { - "epoch": 2.3547986314199973, - "grad_norm": 0.0007525378605350852, - "learning_rate": 0.00019999726539930682, - "loss": 46.0, - "step": 30799 - }, - { - "epoch": 2.354875088403387, - "grad_norm": 0.0009500780142843723, - "learning_rate": 0.0001999972652216698, - "loss": 46.0, - "step": 30800 - }, - { - "epoch": 2.354951545386777, - "grad_norm": 0.004723402205854654, - "learning_rate": 0.00019999726504402708, - "loss": 46.0, - "step": 30801 - }, - { - "epoch": 2.3550280023701666, - "grad_norm": 0.0009589608525857329, - "learning_rate": 0.00019999726486637852, - "loss": 46.0, - "step": 30802 - }, - { - "epoch": 2.3551044593535564, - "grad_norm": 0.0011295626172795892, - "learning_rate": 0.00019999726468872422, - "loss": 46.0, - "step": 30803 - }, - { - "epoch": 2.3551809163369457, - "grad_norm": 0.0011377278715372086, - "learning_rate": 0.00019999726451106416, - "loss": 46.0, - "step": 30804 - }, - { - "epoch": 2.3552573733203355, - "grad_norm": 0.007689039688557386, - "learning_rate": 0.0001999972643333983, - "loss": 46.0, - "step": 30805 - }, - { - "epoch": 2.3553338303037252, - "grad_norm": 0.0011764847440645099, - "learning_rate": 0.00019999726415572671, - "loss": 46.0, - "step": 30806 - }, - { - "epoch": 2.355410287287115, - "grad_norm": 0.0006372727802954614, - "learning_rate": 0.00019999726397804934, - "loss": 46.0, - "step": 30807 - }, - { - "epoch": 2.3554867442705048, - "grad_norm": 0.004856351763010025, - "learning_rate": 0.0001999972638003662, - "loss": 46.0, - "step": 30808 - }, - { - "epoch": 2.3555632012538945, - "grad_norm": 0.001070099649950862, - "learning_rate": 0.00019999726362267728, - "loss": 46.0, - "step": 30809 - }, - { - "epoch": 2.3556396582372843, - "grad_norm": 0.0009453344391658902, - "learning_rate": 0.00019999726344498262, - "loss": 46.0, - "step": 30810 - }, - { - "epoch": 2.355716115220674, - "grad_norm": 0.0009171488345600665, - "learning_rate": 0.00019999726326728215, - "loss": 46.0, - "step": 30811 - }, - { - "epoch": 2.355792572204064, - "grad_norm": 0.003414018778130412, - "learning_rate": 0.00019999726308957594, - "loss": 46.0, - "step": 30812 - }, - { - "epoch": 2.3558690291874536, - "grad_norm": 0.0006449449574574828, - "learning_rate": 0.00019999726291186395, - "loss": 46.0, - "step": 30813 - }, - { - "epoch": 2.3559454861708433, - "grad_norm": 0.0008448041626252234, - "learning_rate": 0.0001999972627341462, - "loss": 46.0, - "step": 30814 - }, - { - "epoch": 2.356021943154233, - "grad_norm": 0.0008702003979124129, - "learning_rate": 0.00019999726255642266, - "loss": 46.0, - "step": 30815 - }, - { - "epoch": 2.3560984001376224, - "grad_norm": 0.0020965328440070152, - "learning_rate": 0.00019999726237869336, - "loss": 46.0, - "step": 30816 - }, - { - "epoch": 2.356174857121012, - "grad_norm": 0.0018821465782821178, - "learning_rate": 0.00019999726220095828, - "loss": 46.0, - "step": 30817 - }, - { - "epoch": 2.356251314104402, - "grad_norm": 0.002290338510647416, - "learning_rate": 0.00019999726202321746, - "loss": 46.0, - "step": 30818 - }, - { - "epoch": 2.3563277710877917, - "grad_norm": 0.0006560833426192403, - "learning_rate": 0.00019999726184547086, - "loss": 46.0, - "step": 30819 - }, - { - "epoch": 2.3564042280711814, - "grad_norm": 0.0010972610907629132, - "learning_rate": 0.00019999726166771846, - "loss": 46.0, - "step": 30820 - }, - { - "epoch": 2.356480685054571, - "grad_norm": 0.0018406495219096541, - "learning_rate": 0.00019999726148996034, - "loss": 46.0, - "step": 30821 - }, - { - "epoch": 2.356557142037961, - "grad_norm": 0.0008733987342566252, - "learning_rate": 0.0001999972613121964, - "loss": 46.0, - "step": 30822 - }, - { - "epoch": 2.3566335990213507, - "grad_norm": 0.0029574641957879066, - "learning_rate": 0.00019999726113442673, - "loss": 46.0, - "step": 30823 - }, - { - "epoch": 2.3567100560047405, - "grad_norm": 0.0005557197728194296, - "learning_rate": 0.00019999726095665127, - "loss": 46.0, - "step": 30824 - }, - { - "epoch": 2.3567865129881302, - "grad_norm": 0.0033350992016494274, - "learning_rate": 0.00019999726077887003, - "loss": 46.0, - "step": 30825 - }, - { - "epoch": 2.3568629699715196, - "grad_norm": 0.0005353637970983982, - "learning_rate": 0.00019999726060108305, - "loss": 46.0, - "step": 30826 - }, - { - "epoch": 2.3569394269549093, - "grad_norm": 0.000887516827788204, - "learning_rate": 0.0001999972604232903, - "loss": 46.0, - "step": 30827 - }, - { - "epoch": 2.357015883938299, - "grad_norm": 0.0009481757879257202, - "learning_rate": 0.00019999726024549176, - "loss": 46.0, - "step": 30828 - }, - { - "epoch": 2.357092340921689, - "grad_norm": 0.005767423659563065, - "learning_rate": 0.00019999726006768745, - "loss": 46.0, - "step": 30829 - }, - { - "epoch": 2.3571687979050786, - "grad_norm": 0.0010970557341352105, - "learning_rate": 0.00019999725988987738, - "loss": 46.0, - "step": 30830 - }, - { - "epoch": 2.3572452548884684, - "grad_norm": 0.007884407415986061, - "learning_rate": 0.00019999725971206158, - "loss": 46.0, - "step": 30831 - }, - { - "epoch": 2.357321711871858, - "grad_norm": 0.0010746211046352983, - "learning_rate": 0.00019999725953423995, - "loss": 46.0, - "step": 30832 - }, - { - "epoch": 2.357398168855248, - "grad_norm": 0.0007399875321425498, - "learning_rate": 0.00019999725935641258, - "loss": 46.0, - "step": 30833 - }, - { - "epoch": 2.3574746258386376, - "grad_norm": 0.0010197452502325177, - "learning_rate": 0.00019999725917857944, - "loss": 46.0, - "step": 30834 - }, - { - "epoch": 2.3575510828220274, - "grad_norm": 0.0006012238445691764, - "learning_rate": 0.00019999725900074052, - "loss": 46.0, - "step": 30835 - }, - { - "epoch": 2.357627539805417, - "grad_norm": 0.0015155320288613439, - "learning_rate": 0.00019999725882289583, - "loss": 46.0, - "step": 30836 - }, - { - "epoch": 2.357703996788807, - "grad_norm": 0.0023198137059807777, - "learning_rate": 0.00019999725864504537, - "loss": 46.0, - "step": 30837 - }, - { - "epoch": 2.3577804537721962, - "grad_norm": 0.0015870064962655306, - "learning_rate": 0.00019999725846718916, - "loss": 46.0, - "step": 30838 - }, - { - "epoch": 2.357856910755586, - "grad_norm": 0.000814481230918318, - "learning_rate": 0.00019999725828932717, - "loss": 46.0, - "step": 30839 - }, - { - "epoch": 2.3579333677389758, - "grad_norm": 0.0036859349347651005, - "learning_rate": 0.0001999972581114594, - "loss": 46.0, - "step": 30840 - }, - { - "epoch": 2.3580098247223655, - "grad_norm": 0.0014893336920067668, - "learning_rate": 0.00019999725793358588, - "loss": 46.0, - "step": 30841 - }, - { - "epoch": 2.3580862817057553, - "grad_norm": 0.0007407942903228104, - "learning_rate": 0.00019999725775570658, - "loss": 46.0, - "step": 30842 - }, - { - "epoch": 2.358162738689145, - "grad_norm": 0.0024777348153293133, - "learning_rate": 0.0001999972575778215, - "loss": 46.0, - "step": 30843 - }, - { - "epoch": 2.358239195672535, - "grad_norm": 0.0009335758513770998, - "learning_rate": 0.00019999725739993068, - "loss": 46.0, - "step": 30844 - }, - { - "epoch": 2.3583156526559246, - "grad_norm": 0.0009884078754112124, - "learning_rate": 0.00019999725722203405, - "loss": 46.0, - "step": 30845 - }, - { - "epoch": 2.3583921096393143, - "grad_norm": 0.002203387673944235, - "learning_rate": 0.00019999725704413168, - "loss": 46.0, - "step": 30846 - }, - { - "epoch": 2.3584685666227037, - "grad_norm": 0.0014249550877138972, - "learning_rate": 0.00019999725686622354, - "loss": 46.0, - "step": 30847 - }, - { - "epoch": 2.3585450236060934, - "grad_norm": 0.0006944107008166611, - "learning_rate": 0.00019999725668830962, - "loss": 46.0, - "step": 30848 - }, - { - "epoch": 2.358621480589483, - "grad_norm": 0.0008494915091432631, - "learning_rate": 0.00019999725651038993, - "loss": 46.0, - "step": 30849 - }, - { - "epoch": 2.358697937572873, - "grad_norm": 0.001505842199549079, - "learning_rate": 0.0001999972563324645, - "loss": 46.0, - "step": 30850 - }, - { - "epoch": 2.3587743945562627, - "grad_norm": 0.0015603555366396904, - "learning_rate": 0.00019999725615453326, - "loss": 46.0, - "step": 30851 - }, - { - "epoch": 2.3588508515396525, - "grad_norm": 0.0012774834176525474, - "learning_rate": 0.00019999725597659627, - "loss": 46.0, - "step": 30852 - }, - { - "epoch": 2.358927308523042, - "grad_norm": 0.006070606876164675, - "learning_rate": 0.0001999972557986535, - "loss": 46.0, - "step": 30853 - }, - { - "epoch": 2.359003765506432, - "grad_norm": 0.0010035528102889657, - "learning_rate": 0.00019999725562070496, - "loss": 46.0, - "step": 30854 - }, - { - "epoch": 2.3590802224898217, - "grad_norm": 0.0014161423314362764, - "learning_rate": 0.00019999725544275068, - "loss": 46.0, - "step": 30855 - }, - { - "epoch": 2.3591566794732115, - "grad_norm": 0.0005423652473837137, - "learning_rate": 0.00019999725526479058, - "loss": 46.0, - "step": 30856 - }, - { - "epoch": 2.3592331364566013, - "grad_norm": 0.0007155163330025971, - "learning_rate": 0.00019999725508682475, - "loss": 46.0, - "step": 30857 - }, - { - "epoch": 2.359309593439991, - "grad_norm": 0.001379706314764917, - "learning_rate": 0.00019999725490885316, - "loss": 46.0, - "step": 30858 - }, - { - "epoch": 2.359386050423381, - "grad_norm": 0.0008177123381756246, - "learning_rate": 0.00019999725473087576, - "loss": 46.0, - "step": 30859 - }, - { - "epoch": 2.35946250740677, - "grad_norm": 0.0007992482278496027, - "learning_rate": 0.00019999725455289262, - "loss": 46.0, - "step": 30860 - }, - { - "epoch": 2.35953896439016, - "grad_norm": 0.0012482131132856011, - "learning_rate": 0.0001999972543749037, - "loss": 46.0, - "step": 30861 - }, - { - "epoch": 2.3596154213735496, - "grad_norm": 0.0006626953254453838, - "learning_rate": 0.00019999725419690904, - "loss": 46.0, - "step": 30862 - }, - { - "epoch": 2.3596918783569394, - "grad_norm": 0.0007058997871354222, - "learning_rate": 0.00019999725401890857, - "loss": 46.0, - "step": 30863 - }, - { - "epoch": 2.359768335340329, - "grad_norm": 0.0005448069423437119, - "learning_rate": 0.00019999725384090233, - "loss": 46.0, - "step": 30864 - }, - { - "epoch": 2.359844792323719, - "grad_norm": 0.002846070798113942, - "learning_rate": 0.00019999725366289035, - "loss": 46.0, - "step": 30865 - }, - { - "epoch": 2.3599212493071087, - "grad_norm": 0.0016631190665066242, - "learning_rate": 0.0001999972534848726, - "loss": 46.0, - "step": 30866 - }, - { - "epoch": 2.3599977062904984, - "grad_norm": 0.0006251556915231049, - "learning_rate": 0.00019999725330684904, - "loss": 46.0, - "step": 30867 - }, - { - "epoch": 2.360074163273888, - "grad_norm": 0.0035454677417874336, - "learning_rate": 0.00019999725312881976, - "loss": 46.0, - "step": 30868 - }, - { - "epoch": 2.3601506202572775, - "grad_norm": 0.0012839263072237372, - "learning_rate": 0.00019999725295078466, - "loss": 46.0, - "step": 30869 - }, - { - "epoch": 2.3602270772406673, - "grad_norm": 0.0020389000419527292, - "learning_rate": 0.00019999725277274383, - "loss": 46.0, - "step": 30870 - }, - { - "epoch": 2.360303534224057, - "grad_norm": 0.0011894642375409603, - "learning_rate": 0.00019999725259469724, - "loss": 46.0, - "step": 30871 - }, - { - "epoch": 2.360379991207447, - "grad_norm": 0.002931153168901801, - "learning_rate": 0.00019999725241664487, - "loss": 46.0, - "step": 30872 - }, - { - "epoch": 2.3604564481908366, - "grad_norm": 0.0009152429993264377, - "learning_rate": 0.0001999972522385867, - "loss": 46.0, - "step": 30873 - }, - { - "epoch": 2.3605329051742263, - "grad_norm": 0.003977891523391008, - "learning_rate": 0.0001999972520605228, - "loss": 46.0, - "step": 30874 - }, - { - "epoch": 2.360609362157616, - "grad_norm": 0.0004825768992304802, - "learning_rate": 0.00019999725188245312, - "loss": 46.0, - "step": 30875 - }, - { - "epoch": 2.360685819141006, - "grad_norm": 0.0015788174932822585, - "learning_rate": 0.00019999725170437765, - "loss": 46.0, - "step": 30876 - }, - { - "epoch": 2.3607622761243956, - "grad_norm": 0.006124661769717932, - "learning_rate": 0.0001999972515262964, - "loss": 46.0, - "step": 30877 - }, - { - "epoch": 2.3608387331077854, - "grad_norm": 0.004751675296574831, - "learning_rate": 0.00019999725134820943, - "loss": 46.0, - "step": 30878 - }, - { - "epoch": 2.360915190091175, - "grad_norm": 0.0005537885590456426, - "learning_rate": 0.00019999725117011665, - "loss": 46.0, - "step": 30879 - }, - { - "epoch": 2.360991647074565, - "grad_norm": 0.002137881936505437, - "learning_rate": 0.00019999725099201815, - "loss": 46.0, - "step": 30880 - }, - { - "epoch": 2.3610681040579546, - "grad_norm": 0.0030518837738782167, - "learning_rate": 0.00019999725081391382, - "loss": 46.0, - "step": 30881 - }, - { - "epoch": 2.361144561041344, - "grad_norm": 0.0020048003643751144, - "learning_rate": 0.00019999725063580374, - "loss": 46.0, - "step": 30882 - }, - { - "epoch": 2.3612210180247337, - "grad_norm": 0.0003562805359251797, - "learning_rate": 0.00019999725045768792, - "loss": 46.0, - "step": 30883 - }, - { - "epoch": 2.3612974750081235, - "grad_norm": 0.0008817478665150702, - "learning_rate": 0.0001999972502795663, - "loss": 46.0, - "step": 30884 - }, - { - "epoch": 2.3613739319915132, - "grad_norm": 0.001112551661208272, - "learning_rate": 0.00019999725010143892, - "loss": 46.0, - "step": 30885 - }, - { - "epoch": 2.361450388974903, - "grad_norm": 0.002130249049514532, - "learning_rate": 0.00019999724992330578, - "loss": 46.0, - "step": 30886 - }, - { - "epoch": 2.3615268459582928, - "grad_norm": 0.002317998791113496, - "learning_rate": 0.00019999724974516684, - "loss": 46.0, - "step": 30887 - }, - { - "epoch": 2.3616033029416825, - "grad_norm": 0.0006509704398922622, - "learning_rate": 0.00019999724956702215, - "loss": 46.0, - "step": 30888 - }, - { - "epoch": 2.3616797599250723, - "grad_norm": 0.0009209293057210743, - "learning_rate": 0.0001999972493888717, - "loss": 46.0, - "step": 30889 - }, - { - "epoch": 2.361756216908462, - "grad_norm": 0.003643839852884412, - "learning_rate": 0.00019999724921071548, - "loss": 46.0, - "step": 30890 - }, - { - "epoch": 2.3618326738918514, - "grad_norm": 0.002169376937672496, - "learning_rate": 0.0001999972490325535, - "loss": 46.0, - "step": 30891 - }, - { - "epoch": 2.361909130875241, - "grad_norm": 0.000851595017593354, - "learning_rate": 0.0001999972488543857, - "loss": 46.0, - "step": 30892 - }, - { - "epoch": 2.361985587858631, - "grad_norm": 0.003256110940128565, - "learning_rate": 0.00019999724867621218, - "loss": 46.0, - "step": 30893 - }, - { - "epoch": 2.3620620448420206, - "grad_norm": 0.002151092980057001, - "learning_rate": 0.0001999972484980329, - "loss": 46.0, - "step": 30894 - }, - { - "epoch": 2.3621385018254104, - "grad_norm": 0.0015356120420619845, - "learning_rate": 0.0001999972483198478, - "loss": 46.0, - "step": 30895 - }, - { - "epoch": 2.3622149588088, - "grad_norm": 0.0007646828889846802, - "learning_rate": 0.00019999724814165698, - "loss": 46.0, - "step": 30896 - }, - { - "epoch": 2.36229141579219, - "grad_norm": 0.0017656392883509398, - "learning_rate": 0.00019999724796346036, - "loss": 46.0, - "step": 30897 - }, - { - "epoch": 2.3623678727755797, - "grad_norm": 0.0050153667107224464, - "learning_rate": 0.00019999724778525796, - "loss": 46.0, - "step": 30898 - }, - { - "epoch": 2.3624443297589695, - "grad_norm": 0.0005469491006806493, - "learning_rate": 0.00019999724760704984, - "loss": 46.0, - "step": 30899 - }, - { - "epoch": 2.362520786742359, - "grad_norm": 0.0013224628055468202, - "learning_rate": 0.0001999972474288359, - "loss": 46.0, - "step": 30900 - }, - { - "epoch": 2.362597243725749, - "grad_norm": 0.0009792111814022064, - "learning_rate": 0.0001999972472506162, - "loss": 46.0, - "step": 30901 - }, - { - "epoch": 2.3626737007091387, - "grad_norm": 0.004378196317702532, - "learning_rate": 0.00019999724707239075, - "loss": 46.0, - "step": 30902 - }, - { - "epoch": 2.362750157692528, - "grad_norm": 0.00042918085819110274, - "learning_rate": 0.00019999724689415954, - "loss": 46.0, - "step": 30903 - }, - { - "epoch": 2.362826614675918, - "grad_norm": 0.0004521795781329274, - "learning_rate": 0.00019999724671592253, - "loss": 46.0, - "step": 30904 - }, - { - "epoch": 2.3629030716593076, - "grad_norm": 0.002102494239807129, - "learning_rate": 0.00019999724653767977, - "loss": 46.0, - "step": 30905 - }, - { - "epoch": 2.3629795286426973, - "grad_norm": 0.006573577877134085, - "learning_rate": 0.00019999724635943124, - "loss": 46.0, - "step": 30906 - }, - { - "epoch": 2.363055985626087, - "grad_norm": 0.001122870366089046, - "learning_rate": 0.00019999724618117694, - "loss": 46.0, - "step": 30907 - }, - { - "epoch": 2.363132442609477, - "grad_norm": 0.0008915234357118607, - "learning_rate": 0.00019999724600291687, - "loss": 46.0, - "step": 30908 - }, - { - "epoch": 2.3632088995928666, - "grad_norm": 0.004498935304582119, - "learning_rate": 0.00019999724582465102, - "loss": 46.0, - "step": 30909 - }, - { - "epoch": 2.3632853565762564, - "grad_norm": 0.0027440243866294622, - "learning_rate": 0.0001999972456463794, - "loss": 46.0, - "step": 30910 - }, - { - "epoch": 2.363361813559646, - "grad_norm": 0.005329929757863283, - "learning_rate": 0.00019999724546810202, - "loss": 46.0, - "step": 30911 - }, - { - "epoch": 2.363438270543036, - "grad_norm": 0.002080376725643873, - "learning_rate": 0.00019999724528981888, - "loss": 46.0, - "step": 30912 - }, - { - "epoch": 2.363514727526425, - "grad_norm": 0.0010124585824087262, - "learning_rate": 0.00019999724511152994, - "loss": 46.0, - "step": 30913 - }, - { - "epoch": 2.363591184509815, - "grad_norm": 0.0005254840361885726, - "learning_rate": 0.00019999724493323525, - "loss": 46.0, - "step": 30914 - }, - { - "epoch": 2.3636676414932047, - "grad_norm": 0.0017286852234974504, - "learning_rate": 0.0001999972447549348, - "loss": 46.0, - "step": 30915 - }, - { - "epoch": 2.3637440984765945, - "grad_norm": 0.0022400347515940666, - "learning_rate": 0.00019999724457662858, - "loss": 46.0, - "step": 30916 - }, - { - "epoch": 2.3638205554599843, - "grad_norm": 0.0013412097468972206, - "learning_rate": 0.00019999724439831657, - "loss": 46.0, - "step": 30917 - }, - { - "epoch": 2.363897012443374, - "grad_norm": 0.001063137548044324, - "learning_rate": 0.0001999972442199988, - "loss": 46.0, - "step": 30918 - }, - { - "epoch": 2.363973469426764, - "grad_norm": 0.0012534894049167633, - "learning_rate": 0.00019999724404167526, - "loss": 46.0, - "step": 30919 - }, - { - "epoch": 2.3640499264101535, - "grad_norm": 0.001384580391459167, - "learning_rate": 0.00019999724386334596, - "loss": 46.0, - "step": 30920 - }, - { - "epoch": 2.3641263833935433, - "grad_norm": 0.0021318020299077034, - "learning_rate": 0.00019999724368501088, - "loss": 46.0, - "step": 30921 - }, - { - "epoch": 2.364202840376933, - "grad_norm": 0.0035450602881610394, - "learning_rate": 0.00019999724350667006, - "loss": 46.0, - "step": 30922 - }, - { - "epoch": 2.364279297360323, - "grad_norm": 0.0009228177950717509, - "learning_rate": 0.00019999724332832344, - "loss": 46.0, - "step": 30923 - }, - { - "epoch": 2.3643557543437126, - "grad_norm": 0.0019502609502524137, - "learning_rate": 0.00019999724314997104, - "loss": 46.0, - "step": 30924 - }, - { - "epoch": 2.364432211327102, - "grad_norm": 0.0012009326601400971, - "learning_rate": 0.0001999972429716129, - "loss": 46.0, - "step": 30925 - }, - { - "epoch": 2.3645086683104917, - "grad_norm": 0.0007818982703611255, - "learning_rate": 0.00019999724279324898, - "loss": 46.0, - "step": 30926 - }, - { - "epoch": 2.3645851252938814, - "grad_norm": 0.0011191205121576786, - "learning_rate": 0.0001999972426148793, - "loss": 46.0, - "step": 30927 - }, - { - "epoch": 2.364661582277271, - "grad_norm": 0.00046191579895094037, - "learning_rate": 0.00019999724243650383, - "loss": 46.0, - "step": 30928 - }, - { - "epoch": 2.364738039260661, - "grad_norm": 0.0007101739174686372, - "learning_rate": 0.0001999972422581226, - "loss": 46.0, - "step": 30929 - }, - { - "epoch": 2.3648144962440507, - "grad_norm": 0.0007685082382522523, - "learning_rate": 0.00019999724207973561, - "loss": 46.0, - "step": 30930 - }, - { - "epoch": 2.3648909532274405, - "grad_norm": 0.0011201307643204927, - "learning_rate": 0.00019999724190134283, - "loss": 46.0, - "step": 30931 - }, - { - "epoch": 2.3649674102108302, - "grad_norm": 0.0009113825508393347, - "learning_rate": 0.0001999972417229443, - "loss": 46.0, - "step": 30932 - }, - { - "epoch": 2.36504386719422, - "grad_norm": 0.007421477697789669, - "learning_rate": 0.00019999724154454, - "loss": 46.0, - "step": 30933 - }, - { - "epoch": 2.3651203241776098, - "grad_norm": 0.0011129294289276004, - "learning_rate": 0.00019999724136612993, - "loss": 46.0, - "step": 30934 - }, - { - "epoch": 2.365196781160999, - "grad_norm": 0.0013804876944050193, - "learning_rate": 0.00019999724118771405, - "loss": 46.0, - "step": 30935 - }, - { - "epoch": 2.365273238144389, - "grad_norm": 0.000568172603379935, - "learning_rate": 0.00019999724100929246, - "loss": 46.0, - "step": 30936 - }, - { - "epoch": 2.3653496951277786, - "grad_norm": 0.0005792875890620053, - "learning_rate": 0.00019999724083086506, - "loss": 46.0, - "step": 30937 - }, - { - "epoch": 2.3654261521111684, - "grad_norm": 0.0008729451801627874, - "learning_rate": 0.00019999724065243192, - "loss": 46.0, - "step": 30938 - }, - { - "epoch": 2.365502609094558, - "grad_norm": 0.0023423947859555483, - "learning_rate": 0.000199997240473993, - "loss": 46.0, - "step": 30939 - }, - { - "epoch": 2.365579066077948, - "grad_norm": 0.0012942733010277152, - "learning_rate": 0.00019999724029554831, - "loss": 46.0, - "step": 30940 - }, - { - "epoch": 2.3656555230613376, - "grad_norm": 0.0011621961602941155, - "learning_rate": 0.00019999724011709785, - "loss": 46.0, - "step": 30941 - }, - { - "epoch": 2.3657319800447274, - "grad_norm": 0.0010152551112696528, - "learning_rate": 0.00019999723993864162, - "loss": 46.0, - "step": 30942 - }, - { - "epoch": 2.365808437028117, - "grad_norm": 0.0011088368482887745, - "learning_rate": 0.0001999972397601796, - "loss": 46.0, - "step": 30943 - }, - { - "epoch": 2.365884894011507, - "grad_norm": 0.001341847237199545, - "learning_rate": 0.00019999723958171185, - "loss": 46.0, - "step": 30944 - }, - { - "epoch": 2.3659613509948967, - "grad_norm": 0.0016284238081425428, - "learning_rate": 0.00019999723940323833, - "loss": 46.0, - "step": 30945 - }, - { - "epoch": 2.3660378079782864, - "grad_norm": 0.0007567990687675774, - "learning_rate": 0.00019999723922475902, - "loss": 46.0, - "step": 30946 - }, - { - "epoch": 2.3661142649616758, - "grad_norm": 0.002202435629442334, - "learning_rate": 0.00019999723904627392, - "loss": 46.0, - "step": 30947 - }, - { - "epoch": 2.3661907219450655, - "grad_norm": 0.0023028121795505285, - "learning_rate": 0.0001999972388677831, - "loss": 46.0, - "step": 30948 - }, - { - "epoch": 2.3662671789284553, - "grad_norm": 0.004321018699556589, - "learning_rate": 0.00019999723868928645, - "loss": 46.0, - "step": 30949 - }, - { - "epoch": 2.366343635911845, - "grad_norm": 0.0028072157874703407, - "learning_rate": 0.00019999723851078408, - "loss": 46.0, - "step": 30950 - }, - { - "epoch": 2.366420092895235, - "grad_norm": 0.0012792878551408648, - "learning_rate": 0.00019999723833227594, - "loss": 46.0, - "step": 30951 - }, - { - "epoch": 2.3664965498786246, - "grad_norm": 0.0009372485219500959, - "learning_rate": 0.000199997238153762, - "loss": 46.0, - "step": 30952 - }, - { - "epoch": 2.3665730068620143, - "grad_norm": 0.0024277525953948498, - "learning_rate": 0.0001999972379752423, - "loss": 46.0, - "step": 30953 - }, - { - "epoch": 2.366649463845404, - "grad_norm": 0.0006208621198311448, - "learning_rate": 0.00019999723779671685, - "loss": 46.0, - "step": 30954 - }, - { - "epoch": 2.366725920828794, - "grad_norm": 0.002180324401706457, - "learning_rate": 0.00019999723761818562, - "loss": 46.0, - "step": 30955 - }, - { - "epoch": 2.3668023778121836, - "grad_norm": 0.0007477523759007454, - "learning_rate": 0.00019999723743964863, - "loss": 46.0, - "step": 30956 - }, - { - "epoch": 2.366878834795573, - "grad_norm": 0.0010377010330557823, - "learning_rate": 0.00019999723726110585, - "loss": 46.0, - "step": 30957 - }, - { - "epoch": 2.3669552917789627, - "grad_norm": 0.0016980805667117238, - "learning_rate": 0.0001999972370825573, - "loss": 46.0, - "step": 30958 - }, - { - "epoch": 2.3670317487623524, - "grad_norm": 0.000663291197270155, - "learning_rate": 0.00019999723690400302, - "loss": 46.0, - "step": 30959 - }, - { - "epoch": 2.367108205745742, - "grad_norm": 0.003171132877469063, - "learning_rate": 0.00019999723672544292, - "loss": 46.0, - "step": 30960 - }, - { - "epoch": 2.367184662729132, - "grad_norm": 0.0006985990912653506, - "learning_rate": 0.0001999972365468771, - "loss": 46.0, - "step": 30961 - }, - { - "epoch": 2.3672611197125217, - "grad_norm": 0.00045496723032556474, - "learning_rate": 0.00019999723636830548, - "loss": 46.0, - "step": 30962 - }, - { - "epoch": 2.3673375766959115, - "grad_norm": 0.004115582909435034, - "learning_rate": 0.00019999723618972808, - "loss": 46.0, - "step": 30963 - }, - { - "epoch": 2.3674140336793013, - "grad_norm": 0.0023013551253825426, - "learning_rate": 0.00019999723601114492, - "loss": 46.0, - "step": 30964 - }, - { - "epoch": 2.367490490662691, - "grad_norm": 0.0006369391339831054, - "learning_rate": 0.000199997235832556, - "loss": 46.0, - "step": 30965 - }, - { - "epoch": 2.3675669476460808, - "grad_norm": 0.0007399871246889234, - "learning_rate": 0.0001999972356539613, - "loss": 46.0, - "step": 30966 - }, - { - "epoch": 2.3676434046294705, - "grad_norm": 0.0016781376907601953, - "learning_rate": 0.00019999723547536085, - "loss": 46.0, - "step": 30967 - }, - { - "epoch": 2.3677198616128603, - "grad_norm": 0.0019182696705684066, - "learning_rate": 0.00019999723529675462, - "loss": 46.0, - "step": 30968 - }, - { - "epoch": 2.3677963185962496, - "grad_norm": 0.0011726620141416788, - "learning_rate": 0.00019999723511814264, - "loss": 46.0, - "step": 30969 - }, - { - "epoch": 2.3678727755796394, - "grad_norm": 0.0006101656472310424, - "learning_rate": 0.00019999723493952486, - "loss": 46.0, - "step": 30970 - }, - { - "epoch": 2.367949232563029, - "grad_norm": 0.0014591108774766326, - "learning_rate": 0.00019999723476090133, - "loss": 46.0, - "step": 30971 - }, - { - "epoch": 2.368025689546419, - "grad_norm": 0.0007382669136859477, - "learning_rate": 0.000199997234582272, - "loss": 46.0, - "step": 30972 - }, - { - "epoch": 2.3681021465298087, - "grad_norm": 0.0012290574377402663, - "learning_rate": 0.00019999723440363693, - "loss": 46.0, - "step": 30973 - }, - { - "epoch": 2.3681786035131984, - "grad_norm": 0.00162209733389318, - "learning_rate": 0.0001999972342249961, - "loss": 46.0, - "step": 30974 - }, - { - "epoch": 2.368255060496588, - "grad_norm": 0.0007517147460021079, - "learning_rate": 0.00019999723404634946, - "loss": 46.0, - "step": 30975 - }, - { - "epoch": 2.368331517479978, - "grad_norm": 0.006431076675653458, - "learning_rate": 0.0001999972338676971, - "loss": 46.0, - "step": 30976 - }, - { - "epoch": 2.3684079744633677, - "grad_norm": 0.0002466006553731859, - "learning_rate": 0.00019999723368903892, - "loss": 46.0, - "step": 30977 - }, - { - "epoch": 2.368484431446757, - "grad_norm": 0.0009315300267189741, - "learning_rate": 0.000199997233510375, - "loss": 46.0, - "step": 30978 - }, - { - "epoch": 2.368560888430147, - "grad_norm": 0.002660072175785899, - "learning_rate": 0.00019999723333170532, - "loss": 46.0, - "step": 30979 - }, - { - "epoch": 2.3686373454135365, - "grad_norm": 0.0045874943025410175, - "learning_rate": 0.00019999723315302986, - "loss": 46.0, - "step": 30980 - }, - { - "epoch": 2.3687138023969263, - "grad_norm": 0.000573175901081413, - "learning_rate": 0.0001999972329743486, - "loss": 46.0, - "step": 30981 - }, - { - "epoch": 2.368790259380316, - "grad_norm": 0.0015418661059811711, - "learning_rate": 0.00019999723279566162, - "loss": 46.0, - "step": 30982 - }, - { - "epoch": 2.368866716363706, - "grad_norm": 0.0032072793692350388, - "learning_rate": 0.00019999723261696884, - "loss": 46.0, - "step": 30983 - }, - { - "epoch": 2.3689431733470956, - "grad_norm": 0.002618050202727318, - "learning_rate": 0.0001999972324382703, - "loss": 46.0, - "step": 30984 - }, - { - "epoch": 2.3690196303304853, - "grad_norm": 0.0007513040909543633, - "learning_rate": 0.00019999723225956598, - "loss": 46.0, - "step": 30985 - }, - { - "epoch": 2.369096087313875, - "grad_norm": 0.0012579533504322171, - "learning_rate": 0.0001999972320808559, - "loss": 46.0, - "step": 30986 - }, - { - "epoch": 2.369172544297265, - "grad_norm": 0.0029055820778012276, - "learning_rate": 0.00019999723190214006, - "loss": 46.0, - "step": 30987 - }, - { - "epoch": 2.3692490012806546, - "grad_norm": 0.0009218064951710403, - "learning_rate": 0.00019999723172341844, - "loss": 46.0, - "step": 30988 - }, - { - "epoch": 2.3693254582640444, - "grad_norm": 0.002410757588222623, - "learning_rate": 0.00019999723154469104, - "loss": 46.0, - "step": 30989 - }, - { - "epoch": 2.369401915247434, - "grad_norm": 0.0009782094275578856, - "learning_rate": 0.0001999972313659579, - "loss": 46.0, - "step": 30990 - }, - { - "epoch": 2.3694783722308235, - "grad_norm": 0.0027987691573798656, - "learning_rate": 0.000199997231187219, - "loss": 46.0, - "step": 30991 - }, - { - "epoch": 2.3695548292142132, - "grad_norm": 0.0019433513516560197, - "learning_rate": 0.0001999972310084743, - "loss": 46.0, - "step": 30992 - }, - { - "epoch": 2.369631286197603, - "grad_norm": 0.002832096768543124, - "learning_rate": 0.00019999723082972381, - "loss": 46.0, - "step": 30993 - }, - { - "epoch": 2.3697077431809928, - "grad_norm": 0.0005691216210834682, - "learning_rate": 0.0001999972306509676, - "loss": 46.0, - "step": 30994 - }, - { - "epoch": 2.3697842001643825, - "grad_norm": 0.0015089535154402256, - "learning_rate": 0.00019999723047220557, - "loss": 46.0, - "step": 30995 - }, - { - "epoch": 2.3698606571477723, - "grad_norm": 0.0014779954217374325, - "learning_rate": 0.00019999723029343782, - "loss": 46.0, - "step": 30996 - }, - { - "epoch": 2.369937114131162, - "grad_norm": 0.0012628003023564816, - "learning_rate": 0.0001999972301146643, - "loss": 46.0, - "step": 30997 - }, - { - "epoch": 2.370013571114552, - "grad_norm": 0.0011182072339579463, - "learning_rate": 0.00019999722993588496, - "loss": 46.0, - "step": 30998 - }, - { - "epoch": 2.3700900280979416, - "grad_norm": 0.0027189922984689474, - "learning_rate": 0.0001999972297570999, - "loss": 46.0, - "step": 30999 - }, - { - "epoch": 2.370166485081331, - "grad_norm": 0.0006680736551061273, - "learning_rate": 0.00019999722957830904, - "loss": 46.0, - "step": 31000 - }, - { - "epoch": 2.3702429420647206, - "grad_norm": 0.0004944147076457739, - "learning_rate": 0.00019999722939951242, - "loss": 46.0, - "step": 31001 - }, - { - "epoch": 2.3703193990481104, - "grad_norm": 0.0015551430406048894, - "learning_rate": 0.00019999722922071006, - "loss": 46.0, - "step": 31002 - }, - { - "epoch": 2.3703958560315, - "grad_norm": 0.000798713939730078, - "learning_rate": 0.0001999972290419019, - "loss": 46.0, - "step": 31003 - }, - { - "epoch": 2.37047231301489, - "grad_norm": 0.0010620767716318369, - "learning_rate": 0.00019999722886308795, - "loss": 46.0, - "step": 31004 - }, - { - "epoch": 2.3705487699982797, - "grad_norm": 0.0005952194333076477, - "learning_rate": 0.00019999722868426826, - "loss": 46.0, - "step": 31005 - }, - { - "epoch": 2.3706252269816694, - "grad_norm": 0.002582385204732418, - "learning_rate": 0.00019999722850544277, - "loss": 46.0, - "step": 31006 - }, - { - "epoch": 2.370701683965059, - "grad_norm": 0.0011500270338729024, - "learning_rate": 0.00019999722832661157, - "loss": 46.0, - "step": 31007 - }, - { - "epoch": 2.370778140948449, - "grad_norm": 0.0010102280648425221, - "learning_rate": 0.00019999722814777456, - "loss": 46.0, - "step": 31008 - }, - { - "epoch": 2.3708545979318387, - "grad_norm": 0.0017857919447124004, - "learning_rate": 0.00019999722796893175, - "loss": 46.0, - "step": 31009 - }, - { - "epoch": 2.3709310549152285, - "grad_norm": 0.0013549618888646364, - "learning_rate": 0.00019999722779008323, - "loss": 46.0, - "step": 31010 - }, - { - "epoch": 2.3710075118986182, - "grad_norm": 0.0007024784572422504, - "learning_rate": 0.00019999722761122893, - "loss": 46.0, - "step": 31011 - }, - { - "epoch": 2.371083968882008, - "grad_norm": 0.001392704900354147, - "learning_rate": 0.00019999722743236883, - "loss": 46.0, - "step": 31012 - }, - { - "epoch": 2.3711604258653973, - "grad_norm": 0.000905881286598742, - "learning_rate": 0.00019999722725350298, - "loss": 46.0, - "step": 31013 - }, - { - "epoch": 2.371236882848787, - "grad_norm": 0.0008513322682119906, - "learning_rate": 0.00019999722707463139, - "loss": 46.0, - "step": 31014 - }, - { - "epoch": 2.371313339832177, - "grad_norm": 0.0006747301667928696, - "learning_rate": 0.000199997226895754, - "loss": 46.0, - "step": 31015 - }, - { - "epoch": 2.3713897968155666, - "grad_norm": 0.0037178483325988054, - "learning_rate": 0.00019999722671687083, - "loss": 46.0, - "step": 31016 - }, - { - "epoch": 2.3714662537989564, - "grad_norm": 0.000788997916970402, - "learning_rate": 0.0001999972265379819, - "loss": 46.0, - "step": 31017 - }, - { - "epoch": 2.371542710782346, - "grad_norm": 0.0034271508920937777, - "learning_rate": 0.0001999972263590872, - "loss": 46.0, - "step": 31018 - }, - { - "epoch": 2.371619167765736, - "grad_norm": 0.005774281453341246, - "learning_rate": 0.00019999722618018674, - "loss": 46.0, - "step": 31019 - }, - { - "epoch": 2.3716956247491257, - "grad_norm": 0.0017899870872497559, - "learning_rate": 0.00019999722600128053, - "loss": 46.0, - "step": 31020 - }, - { - "epoch": 2.3717720817325154, - "grad_norm": 0.0028651640750467777, - "learning_rate": 0.0001999972258223685, - "loss": 46.0, - "step": 31021 - }, - { - "epoch": 2.3718485387159047, - "grad_norm": 0.007358357310295105, - "learning_rate": 0.00019999722564345075, - "loss": 46.0, - "step": 31022 - }, - { - "epoch": 2.3719249956992945, - "grad_norm": 0.0011318784672766924, - "learning_rate": 0.0001999972254645272, - "loss": 46.0, - "step": 31023 - }, - { - "epoch": 2.3720014526826843, - "grad_norm": 0.001023461576551199, - "learning_rate": 0.00019999722528559786, - "loss": 46.0, - "step": 31024 - }, - { - "epoch": 2.372077909666074, - "grad_norm": 0.0014502619160339236, - "learning_rate": 0.00019999722510666282, - "loss": 46.0, - "step": 31025 - }, - { - "epoch": 2.3721543666494638, - "grad_norm": 0.002782789058983326, - "learning_rate": 0.00019999722492772195, - "loss": 46.0, - "step": 31026 - }, - { - "epoch": 2.3722308236328535, - "grad_norm": 0.0050197262316942215, - "learning_rate": 0.00019999722474877533, - "loss": 46.0, - "step": 31027 - }, - { - "epoch": 2.3723072806162433, - "grad_norm": 0.00089174514869228, - "learning_rate": 0.00019999722456982296, - "loss": 46.0, - "step": 31028 - }, - { - "epoch": 2.372383737599633, - "grad_norm": 0.0007411700207740068, - "learning_rate": 0.00019999722439086477, - "loss": 46.0, - "step": 31029 - }, - { - "epoch": 2.372460194583023, - "grad_norm": 0.0029685255140066147, - "learning_rate": 0.00019999722421190085, - "loss": 46.0, - "step": 31030 - }, - { - "epoch": 2.3725366515664126, - "grad_norm": 0.0009744045091792941, - "learning_rate": 0.00019999722403293114, - "loss": 46.0, - "step": 31031 - }, - { - "epoch": 2.3726131085498023, - "grad_norm": 0.0021773381158709526, - "learning_rate": 0.0001999972238539557, - "loss": 46.0, - "step": 31032 - }, - { - "epoch": 2.372689565533192, - "grad_norm": 0.0009067789651453495, - "learning_rate": 0.00019999722367497445, - "loss": 46.0, - "step": 31033 - }, - { - "epoch": 2.3727660225165814, - "grad_norm": 0.0006509035592898726, - "learning_rate": 0.00019999722349598744, - "loss": 46.0, - "step": 31034 - }, - { - "epoch": 2.372842479499971, - "grad_norm": 0.001086916890926659, - "learning_rate": 0.00019999722331699466, - "loss": 46.0, - "step": 31035 - }, - { - "epoch": 2.372918936483361, - "grad_norm": 0.0010047780815511942, - "learning_rate": 0.00019999722313799613, - "loss": 46.0, - "step": 31036 - }, - { - "epoch": 2.3729953934667507, - "grad_norm": 0.0004870621196459979, - "learning_rate": 0.00019999722295899183, - "loss": 46.0, - "step": 31037 - }, - { - "epoch": 2.3730718504501405, - "grad_norm": 0.0014111370546743274, - "learning_rate": 0.00019999722277998173, - "loss": 46.0, - "step": 31038 - }, - { - "epoch": 2.3731483074335302, - "grad_norm": 0.000663279031869024, - "learning_rate": 0.00019999722260096586, - "loss": 46.0, - "step": 31039 - }, - { - "epoch": 2.37322476441692, - "grad_norm": 0.0018719497602432966, - "learning_rate": 0.00019999722242194427, - "loss": 46.0, - "step": 31040 - }, - { - "epoch": 2.3733012214003097, - "grad_norm": 0.0011070228647440672, - "learning_rate": 0.00019999722224291688, - "loss": 46.0, - "step": 31041 - }, - { - "epoch": 2.3733776783836995, - "grad_norm": 0.0011250664247199893, - "learning_rate": 0.0001999972220638837, - "loss": 46.0, - "step": 31042 - }, - { - "epoch": 2.3734541353670893, - "grad_norm": 0.0011409572325646877, - "learning_rate": 0.0001999972218848448, - "loss": 46.0, - "step": 31043 - }, - { - "epoch": 2.3735305923504786, - "grad_norm": 0.0025951277930289507, - "learning_rate": 0.0001999972217058001, - "loss": 46.0, - "step": 31044 - }, - { - "epoch": 2.3736070493338683, - "grad_norm": 0.0009983801282942295, - "learning_rate": 0.00019999722152674962, - "loss": 46.0, - "step": 31045 - }, - { - "epoch": 2.373683506317258, - "grad_norm": 0.001446805545128882, - "learning_rate": 0.0001999972213476934, - "loss": 46.0, - "step": 31046 - }, - { - "epoch": 2.373759963300648, - "grad_norm": 0.0011241381289437413, - "learning_rate": 0.00019999722116863139, - "loss": 46.0, - "step": 31047 - }, - { - "epoch": 2.3738364202840376, - "grad_norm": 0.0019013514975085855, - "learning_rate": 0.0001999972209895636, - "loss": 46.0, - "step": 31048 - }, - { - "epoch": 2.3739128772674274, - "grad_norm": 0.005932185333222151, - "learning_rate": 0.00019999722081049005, - "loss": 46.0, - "step": 31049 - }, - { - "epoch": 2.373989334250817, - "grad_norm": 0.0008484215359203517, - "learning_rate": 0.00019999722063141075, - "loss": 46.0, - "step": 31050 - }, - { - "epoch": 2.374065791234207, - "grad_norm": 0.0006371770286932588, - "learning_rate": 0.00019999722045232568, - "loss": 46.0, - "step": 31051 - }, - { - "epoch": 2.3741422482175967, - "grad_norm": 0.0008268074016086757, - "learning_rate": 0.0001999972202732348, - "loss": 46.0, - "step": 31052 - }, - { - "epoch": 2.3742187052009864, - "grad_norm": 0.001894623157568276, - "learning_rate": 0.0001999972200941382, - "loss": 46.0, - "step": 31053 - }, - { - "epoch": 2.374295162184376, - "grad_norm": 0.0017251019598916173, - "learning_rate": 0.00019999721991503582, - "loss": 46.0, - "step": 31054 - }, - { - "epoch": 2.374371619167766, - "grad_norm": 0.0010152189061045647, - "learning_rate": 0.00019999721973592763, - "loss": 46.0, - "step": 31055 - }, - { - "epoch": 2.3744480761511553, - "grad_norm": 0.0009373384527862072, - "learning_rate": 0.00019999721955681372, - "loss": 46.0, - "step": 31056 - }, - { - "epoch": 2.374524533134545, - "grad_norm": 0.0018818433163687587, - "learning_rate": 0.000199997219377694, - "loss": 46.0, - "step": 31057 - }, - { - "epoch": 2.374600990117935, - "grad_norm": 0.0017436678754165769, - "learning_rate": 0.00019999721919856855, - "loss": 46.0, - "step": 31058 - }, - { - "epoch": 2.3746774471013246, - "grad_norm": 0.0021920257713645697, - "learning_rate": 0.00019999721901943731, - "loss": 46.0, - "step": 31059 - }, - { - "epoch": 2.3747539040847143, - "grad_norm": 0.0013635688228532672, - "learning_rate": 0.0001999972188403003, - "loss": 46.0, - "step": 31060 - }, - { - "epoch": 2.374830361068104, - "grad_norm": 0.005182044580578804, - "learning_rate": 0.00019999721866115753, - "loss": 46.0, - "step": 31061 - }, - { - "epoch": 2.374906818051494, - "grad_norm": 0.001008194638416171, - "learning_rate": 0.00019999721848200898, - "loss": 46.0, - "step": 31062 - }, - { - "epoch": 2.3749832750348836, - "grad_norm": 0.0006955301505513489, - "learning_rate": 0.00019999721830285468, - "loss": 46.0, - "step": 31063 - }, - { - "epoch": 2.3750597320182734, - "grad_norm": 0.001902335905469954, - "learning_rate": 0.00019999721812369458, - "loss": 46.0, - "step": 31064 - }, - { - "epoch": 2.375136189001663, - "grad_norm": 0.001501190010458231, - "learning_rate": 0.00019999721794452873, - "loss": 46.0, - "step": 31065 - }, - { - "epoch": 2.3752126459850524, - "grad_norm": 0.0017575160600245, - "learning_rate": 0.00019999721776535711, - "loss": 46.0, - "step": 31066 - }, - { - "epoch": 2.375289102968442, - "grad_norm": 0.0008912758203223348, - "learning_rate": 0.00019999721758617972, - "loss": 46.0, - "step": 31067 - }, - { - "epoch": 2.375365559951832, - "grad_norm": 0.0009623733349144459, - "learning_rate": 0.00019999721740699658, - "loss": 46.0, - "step": 31068 - }, - { - "epoch": 2.3754420169352217, - "grad_norm": 0.0036720200441777706, - "learning_rate": 0.00019999721722780764, - "loss": 46.0, - "step": 31069 - }, - { - "epoch": 2.3755184739186115, - "grad_norm": 0.0041097113862633705, - "learning_rate": 0.00019999721704861293, - "loss": 46.0, - "step": 31070 - }, - { - "epoch": 2.3755949309020012, - "grad_norm": 0.000994296045973897, - "learning_rate": 0.00019999721686941247, - "loss": 46.0, - "step": 31071 - }, - { - "epoch": 2.375671387885391, - "grad_norm": 0.0012296505738049746, - "learning_rate": 0.00019999721669020621, - "loss": 46.0, - "step": 31072 - }, - { - "epoch": 2.3757478448687808, - "grad_norm": 0.001984257949516177, - "learning_rate": 0.0001999972165109942, - "loss": 46.0, - "step": 31073 - }, - { - "epoch": 2.3758243018521705, - "grad_norm": 0.000604613742325455, - "learning_rate": 0.00019999721633177643, - "loss": 46.0, - "step": 31074 - }, - { - "epoch": 2.3759007588355603, - "grad_norm": 0.00041230194619856775, - "learning_rate": 0.0001999972161525529, - "loss": 46.0, - "step": 31075 - }, - { - "epoch": 2.37597721581895, - "grad_norm": 0.0019305472960695624, - "learning_rate": 0.00019999721597332358, - "loss": 46.0, - "step": 31076 - }, - { - "epoch": 2.37605367280234, - "grad_norm": 0.0006674590986222029, - "learning_rate": 0.0001999972157940885, - "loss": 46.0, - "step": 31077 - }, - { - "epoch": 2.376130129785729, - "grad_norm": 0.0015203512739390135, - "learning_rate": 0.00019999721561484766, - "loss": 46.0, - "step": 31078 - }, - { - "epoch": 2.376206586769119, - "grad_norm": 0.0005426773568615317, - "learning_rate": 0.00019999721543560102, - "loss": 46.0, - "step": 31079 - }, - { - "epoch": 2.3762830437525087, - "grad_norm": 0.0009335912181995809, - "learning_rate": 0.00019999721525634862, - "loss": 46.0, - "step": 31080 - }, - { - "epoch": 2.3763595007358984, - "grad_norm": 0.0007931062136776745, - "learning_rate": 0.00019999721507709046, - "loss": 46.0, - "step": 31081 - }, - { - "epoch": 2.376435957719288, - "grad_norm": 0.0015477658016607165, - "learning_rate": 0.00019999721489782655, - "loss": 46.0, - "step": 31082 - }, - { - "epoch": 2.376512414702678, - "grad_norm": 0.0005699508474208415, - "learning_rate": 0.00019999721471855683, - "loss": 46.0, - "step": 31083 - }, - { - "epoch": 2.3765888716860677, - "grad_norm": 0.0011459199013188481, - "learning_rate": 0.00019999721453928135, - "loss": 46.0, - "step": 31084 - }, - { - "epoch": 2.3766653286694575, - "grad_norm": 0.002297770930454135, - "learning_rate": 0.00019999721436000014, - "loss": 46.0, - "step": 31085 - }, - { - "epoch": 2.376741785652847, - "grad_norm": 0.000576640188228339, - "learning_rate": 0.0001999972141807131, - "loss": 46.0, - "step": 31086 - }, - { - "epoch": 2.376818242636237, - "grad_norm": 0.0005016205250285566, - "learning_rate": 0.00019999721400142036, - "loss": 46.0, - "step": 31087 - }, - { - "epoch": 2.3768946996196263, - "grad_norm": 0.0010746658081188798, - "learning_rate": 0.0001999972138221218, - "loss": 46.0, - "step": 31088 - }, - { - "epoch": 2.376971156603016, - "grad_norm": 0.0003642332158051431, - "learning_rate": 0.00019999721364281748, - "loss": 46.0, - "step": 31089 - }, - { - "epoch": 2.377047613586406, - "grad_norm": 0.0005703719216398895, - "learning_rate": 0.00019999721346350738, - "loss": 46.0, - "step": 31090 - }, - { - "epoch": 2.3771240705697956, - "grad_norm": 0.0012409646296873689, - "learning_rate": 0.00019999721328419154, - "loss": 46.0, - "step": 31091 - }, - { - "epoch": 2.3772005275531853, - "grad_norm": 0.0014029587619006634, - "learning_rate": 0.00019999721310486992, - "loss": 46.0, - "step": 31092 - }, - { - "epoch": 2.377276984536575, - "grad_norm": 0.0009414395317435265, - "learning_rate": 0.00019999721292554253, - "loss": 46.0, - "step": 31093 - }, - { - "epoch": 2.377353441519965, - "grad_norm": 0.0009052905370481312, - "learning_rate": 0.00019999721274620934, - "loss": 46.0, - "step": 31094 - }, - { - "epoch": 2.3774298985033546, - "grad_norm": 0.0005171510856598616, - "learning_rate": 0.00019999721256687043, - "loss": 46.0, - "step": 31095 - }, - { - "epoch": 2.3775063554867444, - "grad_norm": 0.000886720372363925, - "learning_rate": 0.00019999721238752574, - "loss": 46.0, - "step": 31096 - }, - { - "epoch": 2.377582812470134, - "grad_norm": 0.0028995713219046593, - "learning_rate": 0.00019999721220817526, - "loss": 46.0, - "step": 31097 - }, - { - "epoch": 2.377659269453524, - "grad_norm": 0.006990660913288593, - "learning_rate": 0.00019999721202881903, - "loss": 46.0, - "step": 31098 - }, - { - "epoch": 2.3777357264369137, - "grad_norm": 0.0026172089856117964, - "learning_rate": 0.00019999721184945702, - "loss": 46.0, - "step": 31099 - }, - { - "epoch": 2.377812183420303, - "grad_norm": 0.0028872620314359665, - "learning_rate": 0.00019999721167008921, - "loss": 46.0, - "step": 31100 - }, - { - "epoch": 2.3778886404036927, - "grad_norm": 0.00242223241366446, - "learning_rate": 0.0001999972114907157, - "loss": 46.0, - "step": 31101 - }, - { - "epoch": 2.3779650973870825, - "grad_norm": 0.0010536222252994776, - "learning_rate": 0.00019999721131133637, - "loss": 46.0, - "step": 31102 - }, - { - "epoch": 2.3780415543704723, - "grad_norm": 0.0017761014169082046, - "learning_rate": 0.0001999972111319513, - "loss": 46.0, - "step": 31103 - }, - { - "epoch": 2.378118011353862, - "grad_norm": 0.0007009514956735075, - "learning_rate": 0.00019999721095256042, - "loss": 46.0, - "step": 31104 - }, - { - "epoch": 2.378194468337252, - "grad_norm": 0.0007399828173220158, - "learning_rate": 0.0001999972107731638, - "loss": 46.0, - "step": 31105 - }, - { - "epoch": 2.3782709253206415, - "grad_norm": 0.0005016362993046641, - "learning_rate": 0.00019999721059376139, - "loss": 46.0, - "step": 31106 - }, - { - "epoch": 2.3783473823040313, - "grad_norm": 0.0009125939104706049, - "learning_rate": 0.00019999721041435325, - "loss": 46.0, - "step": 31107 - }, - { - "epoch": 2.378423839287421, - "grad_norm": 0.0016421690816059709, - "learning_rate": 0.0001999972102349393, - "loss": 46.0, - "step": 31108 - }, - { - "epoch": 2.3785002962708104, - "grad_norm": 0.0006779235554859042, - "learning_rate": 0.00019999721005551963, - "loss": 46.0, - "step": 31109 - }, - { - "epoch": 2.3785767532542, - "grad_norm": 0.0011737538734450936, - "learning_rate": 0.00019999720987609414, - "loss": 46.0, - "step": 31110 - }, - { - "epoch": 2.37865321023759, - "grad_norm": 0.000546792522072792, - "learning_rate": 0.0001999972096966629, - "loss": 46.0, - "step": 31111 - }, - { - "epoch": 2.3787296672209797, - "grad_norm": 0.0015708922874182463, - "learning_rate": 0.0001999972095172259, - "loss": 46.0, - "step": 31112 - }, - { - "epoch": 2.3788061242043694, - "grad_norm": 0.0027509615756571293, - "learning_rate": 0.0001999972093377831, - "loss": 46.0, - "step": 31113 - }, - { - "epoch": 2.378882581187759, - "grad_norm": 0.0005737630417570472, - "learning_rate": 0.00019999720915833455, - "loss": 46.0, - "step": 31114 - }, - { - "epoch": 2.378959038171149, - "grad_norm": 0.003620326053351164, - "learning_rate": 0.00019999720897888022, - "loss": 46.0, - "step": 31115 - }, - { - "epoch": 2.3790354951545387, - "grad_norm": 0.0005674313870258629, - "learning_rate": 0.00019999720879942015, - "loss": 46.0, - "step": 31116 - }, - { - "epoch": 2.3791119521379285, - "grad_norm": 0.00027439510449767113, - "learning_rate": 0.0001999972086199543, - "loss": 46.0, - "step": 31117 - }, - { - "epoch": 2.3791884091213182, - "grad_norm": 0.001495744800195098, - "learning_rate": 0.0001999972084404827, - "loss": 46.0, - "step": 31118 - }, - { - "epoch": 2.379264866104708, - "grad_norm": 0.0009910111548379064, - "learning_rate": 0.00019999720826100527, - "loss": 46.0, - "step": 31119 - }, - { - "epoch": 2.3793413230880978, - "grad_norm": 0.0007201422704383731, - "learning_rate": 0.0001999972080815221, - "loss": 46.0, - "step": 31120 - }, - { - "epoch": 2.3794177800714875, - "grad_norm": 0.0007935430039651692, - "learning_rate": 0.00019999720790203317, - "loss": 46.0, - "step": 31121 - }, - { - "epoch": 2.379494237054877, - "grad_norm": 0.0013482357608154416, - "learning_rate": 0.0001999972077225385, - "loss": 46.0, - "step": 31122 - }, - { - "epoch": 2.3795706940382666, - "grad_norm": 0.0011176994303241372, - "learning_rate": 0.000199997207543038, - "loss": 46.0, - "step": 31123 - }, - { - "epoch": 2.3796471510216564, - "grad_norm": 0.0024398467503488064, - "learning_rate": 0.00019999720736353177, - "loss": 46.0, - "step": 31124 - }, - { - "epoch": 2.379723608005046, - "grad_norm": 0.002355023752897978, - "learning_rate": 0.00019999720718401974, - "loss": 46.0, - "step": 31125 - }, - { - "epoch": 2.379800064988436, - "grad_norm": 0.0017233600374311209, - "learning_rate": 0.00019999720700450196, - "loss": 46.0, - "step": 31126 - }, - { - "epoch": 2.3798765219718256, - "grad_norm": 0.0014562989817932248, - "learning_rate": 0.0001999972068249784, - "loss": 46.0, - "step": 31127 - }, - { - "epoch": 2.3799529789552154, - "grad_norm": 0.0007559538935311139, - "learning_rate": 0.0001999972066454491, - "loss": 46.0, - "step": 31128 - }, - { - "epoch": 2.380029435938605, - "grad_norm": 0.0008023447007872164, - "learning_rate": 0.00019999720646591402, - "loss": 46.0, - "step": 31129 - }, - { - "epoch": 2.380105892921995, - "grad_norm": 0.0030296442564576864, - "learning_rate": 0.00019999720628637317, - "loss": 46.0, - "step": 31130 - }, - { - "epoch": 2.3801823499053842, - "grad_norm": 0.0007195067591965199, - "learning_rate": 0.00019999720610682653, - "loss": 46.0, - "step": 31131 - }, - { - "epoch": 2.380258806888774, - "grad_norm": 0.00210877344943583, - "learning_rate": 0.00019999720592727416, - "loss": 46.0, - "step": 31132 - }, - { - "epoch": 2.3803352638721638, - "grad_norm": 0.0011491795303300023, - "learning_rate": 0.00019999720574771597, - "loss": 46.0, - "step": 31133 - }, - { - "epoch": 2.3804117208555535, - "grad_norm": 0.0038050892762839794, - "learning_rate": 0.00019999720556815204, - "loss": 46.0, - "step": 31134 - }, - { - "epoch": 2.3804881778389433, - "grad_norm": 0.0013043897924944758, - "learning_rate": 0.00019999720538858235, - "loss": 46.0, - "step": 31135 - }, - { - "epoch": 2.380564634822333, - "grad_norm": 0.0022914758883416653, - "learning_rate": 0.00019999720520900687, - "loss": 46.0, - "step": 31136 - }, - { - "epoch": 2.380641091805723, - "grad_norm": 0.0007555970805697143, - "learning_rate": 0.00019999720502942564, - "loss": 46.0, - "step": 31137 - }, - { - "epoch": 2.3807175487891126, - "grad_norm": 0.0010308842174708843, - "learning_rate": 0.0001999972048498386, - "loss": 46.0, - "step": 31138 - }, - { - "epoch": 2.3807940057725023, - "grad_norm": 0.001108479336835444, - "learning_rate": 0.00019999720467024583, - "loss": 46.0, - "step": 31139 - }, - { - "epoch": 2.380870462755892, - "grad_norm": 0.0010350996162742376, - "learning_rate": 0.0001999972044906473, - "loss": 46.0, - "step": 31140 - }, - { - "epoch": 2.380946919739282, - "grad_norm": 0.0009307586005888879, - "learning_rate": 0.00019999720431104295, - "loss": 46.0, - "step": 31141 - }, - { - "epoch": 2.3810233767226716, - "grad_norm": 0.0016070916317403316, - "learning_rate": 0.00019999720413143288, - "loss": 46.0, - "step": 31142 - }, - { - "epoch": 2.3810998337060614, - "grad_norm": 0.0019747591577470303, - "learning_rate": 0.000199997203951817, - "loss": 46.0, - "step": 31143 - }, - { - "epoch": 2.3811762906894507, - "grad_norm": 0.0006214067107066512, - "learning_rate": 0.0001999972037721954, - "loss": 46.0, - "step": 31144 - }, - { - "epoch": 2.3812527476728405, - "grad_norm": 0.001856733695603907, - "learning_rate": 0.000199997203592568, - "loss": 46.0, - "step": 31145 - }, - { - "epoch": 2.38132920465623, - "grad_norm": 0.0006737332441844046, - "learning_rate": 0.00019999720341293484, - "loss": 46.0, - "step": 31146 - }, - { - "epoch": 2.38140566163962, - "grad_norm": 0.001253923517651856, - "learning_rate": 0.0001999972032332959, - "loss": 46.0, - "step": 31147 - }, - { - "epoch": 2.3814821186230097, - "grad_norm": 0.0016853242414072156, - "learning_rate": 0.0001999972030536512, - "loss": 46.0, - "step": 31148 - }, - { - "epoch": 2.3815585756063995, - "grad_norm": 0.0011951141059398651, - "learning_rate": 0.0001999972028740007, - "loss": 46.0, - "step": 31149 - }, - { - "epoch": 2.3816350325897893, - "grad_norm": 0.00036048179026693106, - "learning_rate": 0.00019999720269434448, - "loss": 46.0, - "step": 31150 - }, - { - "epoch": 2.381711489573179, - "grad_norm": 0.000941413571126759, - "learning_rate": 0.00019999720251468245, - "loss": 46.0, - "step": 31151 - }, - { - "epoch": 2.381787946556569, - "grad_norm": 0.0020912436302751303, - "learning_rate": 0.0001999972023350147, - "loss": 46.0, - "step": 31152 - }, - { - "epoch": 2.381864403539958, - "grad_norm": 0.005619356874376535, - "learning_rate": 0.00019999720215534112, - "loss": 46.0, - "step": 31153 - }, - { - "epoch": 2.381940860523348, - "grad_norm": 0.0009027887717820704, - "learning_rate": 0.0001999972019756618, - "loss": 46.0, - "step": 31154 - }, - { - "epoch": 2.3820173175067376, - "grad_norm": 0.0007508570561185479, - "learning_rate": 0.00019999720179597673, - "loss": 46.0, - "step": 31155 - }, - { - "epoch": 2.3820937744901274, - "grad_norm": 0.0010588339064270258, - "learning_rate": 0.00019999720161628588, - "loss": 46.0, - "step": 31156 - }, - { - "epoch": 2.382170231473517, - "grad_norm": 0.006090869195759296, - "learning_rate": 0.00019999720143658921, - "loss": 46.0, - "step": 31157 - }, - { - "epoch": 2.382246688456907, - "grad_norm": 0.0016812618123367429, - "learning_rate": 0.00019999720125688682, - "loss": 46.0, - "step": 31158 - }, - { - "epoch": 2.3823231454402967, - "grad_norm": 0.0006664101383648813, - "learning_rate": 0.0001999972010771787, - "loss": 46.0, - "step": 31159 - }, - { - "epoch": 2.3823996024236864, - "grad_norm": 0.0016779318684712052, - "learning_rate": 0.00019999720089746475, - "loss": 46.0, - "step": 31160 - }, - { - "epoch": 2.382476059407076, - "grad_norm": 0.0023215990513563156, - "learning_rate": 0.00019999720071774504, - "loss": 46.0, - "step": 31161 - }, - { - "epoch": 2.382552516390466, - "grad_norm": 0.0011817141203209758, - "learning_rate": 0.00019999720053801956, - "loss": 46.0, - "step": 31162 - }, - { - "epoch": 2.3826289733738557, - "grad_norm": 0.0021164584904909134, - "learning_rate": 0.0001999972003582883, - "loss": 46.0, - "step": 31163 - }, - { - "epoch": 2.3827054303572455, - "grad_norm": 0.00023572304053232074, - "learning_rate": 0.0001999972001785513, - "loss": 46.0, - "step": 31164 - }, - { - "epoch": 2.382781887340635, - "grad_norm": 0.001020329655148089, - "learning_rate": 0.00019999719999880852, - "loss": 46.0, - "step": 31165 - }, - { - "epoch": 2.3828583443240245, - "grad_norm": 0.0002638470323290676, - "learning_rate": 0.00019999719981905994, - "loss": 46.0, - "step": 31166 - }, - { - "epoch": 2.3829348013074143, - "grad_norm": 0.0009640008211135864, - "learning_rate": 0.00019999719963930565, - "loss": 46.0, - "step": 31167 - }, - { - "epoch": 2.383011258290804, - "grad_norm": 0.0006028738571330905, - "learning_rate": 0.00019999719945954555, - "loss": 46.0, - "step": 31168 - }, - { - "epoch": 2.383087715274194, - "grad_norm": 0.004470419138669968, - "learning_rate": 0.00019999719927977968, - "loss": 46.0, - "step": 31169 - }, - { - "epoch": 2.3831641722575836, - "grad_norm": 0.0014518130337819457, - "learning_rate": 0.00019999719910000806, - "loss": 46.0, - "step": 31170 - }, - { - "epoch": 2.3832406292409734, - "grad_norm": 0.004461311735212803, - "learning_rate": 0.00019999719892023067, - "loss": 46.0, - "step": 31171 - }, - { - "epoch": 2.383317086224363, - "grad_norm": 0.0008494371431879699, - "learning_rate": 0.0001999971987404475, - "loss": 46.0, - "step": 31172 - }, - { - "epoch": 2.383393543207753, - "grad_norm": 0.0008391728042624891, - "learning_rate": 0.00019999719856065857, - "loss": 46.0, - "step": 31173 - }, - { - "epoch": 2.3834700001911426, - "grad_norm": 0.0011764371301978827, - "learning_rate": 0.00019999719838086386, - "loss": 46.0, - "step": 31174 - }, - { - "epoch": 2.383546457174532, - "grad_norm": 0.0017578359693288803, - "learning_rate": 0.00019999719820106338, - "loss": 46.0, - "step": 31175 - }, - { - "epoch": 2.3836229141579217, - "grad_norm": 0.0004938683705404401, - "learning_rate": 0.00019999719802125715, - "loss": 46.0, - "step": 31176 - }, - { - "epoch": 2.3836993711413115, - "grad_norm": 0.000595057150349021, - "learning_rate": 0.00019999719784144512, - "loss": 46.0, - "step": 31177 - }, - { - "epoch": 2.3837758281247012, - "grad_norm": 0.0013430517865344882, - "learning_rate": 0.00019999719766162734, - "loss": 46.0, - "step": 31178 - }, - { - "epoch": 2.383852285108091, - "grad_norm": 0.0007416472653858364, - "learning_rate": 0.0001999971974818038, - "loss": 46.0, - "step": 31179 - }, - { - "epoch": 2.3839287420914808, - "grad_norm": 0.0010170770110562444, - "learning_rate": 0.00019999719730197447, - "loss": 46.0, - "step": 31180 - }, - { - "epoch": 2.3840051990748705, - "grad_norm": 0.0012651252327486873, - "learning_rate": 0.00019999719712213937, - "loss": 46.0, - "step": 31181 - }, - { - "epoch": 2.3840816560582603, - "grad_norm": 0.0011170330690219998, - "learning_rate": 0.0001999971969422985, - "loss": 46.0, - "step": 31182 - }, - { - "epoch": 2.38415811304165, - "grad_norm": 0.0007763446774333715, - "learning_rate": 0.0001999971967624519, - "loss": 46.0, - "step": 31183 - }, - { - "epoch": 2.38423457002504, - "grad_norm": 0.002614803845062852, - "learning_rate": 0.00019999719658259947, - "loss": 46.0, - "step": 31184 - }, - { - "epoch": 2.3843110270084296, - "grad_norm": 0.001550038461573422, - "learning_rate": 0.00019999719640274134, - "loss": 46.0, - "step": 31185 - }, - { - "epoch": 2.3843874839918193, - "grad_norm": 0.0008787338738329709, - "learning_rate": 0.00019999719622287737, - "loss": 46.0, - "step": 31186 - }, - { - "epoch": 2.3844639409752086, - "grad_norm": 0.0003545415820553899, - "learning_rate": 0.0001999971960430077, - "loss": 46.0, - "step": 31187 - }, - { - "epoch": 2.3845403979585984, - "grad_norm": 0.0014985463349148631, - "learning_rate": 0.00019999719586313218, - "loss": 46.0, - "step": 31188 - }, - { - "epoch": 2.384616854941988, - "grad_norm": 0.0008877535001374781, - "learning_rate": 0.00019999719568325098, - "loss": 46.0, - "step": 31189 - }, - { - "epoch": 2.384693311925378, - "grad_norm": 0.0011304514482617378, - "learning_rate": 0.00019999719550336392, - "loss": 46.0, - "step": 31190 - }, - { - "epoch": 2.3847697689087677, - "grad_norm": 0.0015305752167478204, - "learning_rate": 0.00019999719532347117, - "loss": 46.0, - "step": 31191 - }, - { - "epoch": 2.3848462258921574, - "grad_norm": 0.0006367014721035957, - "learning_rate": 0.0001999971951435726, - "loss": 46.0, - "step": 31192 - }, - { - "epoch": 2.384922682875547, - "grad_norm": 0.0004974103649146855, - "learning_rate": 0.0001999971949636683, - "loss": 46.0, - "step": 31193 - }, - { - "epoch": 2.384999139858937, - "grad_norm": 0.004650414921343327, - "learning_rate": 0.00019999719478375818, - "loss": 46.0, - "step": 31194 - }, - { - "epoch": 2.3850755968423267, - "grad_norm": 0.0012259904760867357, - "learning_rate": 0.0001999971946038423, - "loss": 46.0, - "step": 31195 - }, - { - "epoch": 2.3851520538257165, - "grad_norm": 0.0017547912430018187, - "learning_rate": 0.0001999971944239207, - "loss": 46.0, - "step": 31196 - }, - { - "epoch": 2.385228510809106, - "grad_norm": 0.000748968857806176, - "learning_rate": 0.0001999971942439933, - "loss": 46.0, - "step": 31197 - }, - { - "epoch": 2.3853049677924956, - "grad_norm": 0.001265846542082727, - "learning_rate": 0.00019999719406406014, - "loss": 46.0, - "step": 31198 - }, - { - "epoch": 2.3853814247758853, - "grad_norm": 0.0012564935022965074, - "learning_rate": 0.0001999971938841212, - "loss": 46.0, - "step": 31199 - }, - { - "epoch": 2.385457881759275, - "grad_norm": 0.0007321368320845068, - "learning_rate": 0.0001999971937041765, - "loss": 46.0, - "step": 31200 - }, - { - "epoch": 2.385534338742665, - "grad_norm": 0.001971021294593811, - "learning_rate": 0.000199997193524226, - "loss": 46.0, - "step": 31201 - }, - { - "epoch": 2.3856107957260546, - "grad_norm": 0.0019034226424992085, - "learning_rate": 0.00019999719334426976, - "loss": 46.0, - "step": 31202 - }, - { - "epoch": 2.3856872527094444, - "grad_norm": 0.0008287046803161502, - "learning_rate": 0.00019999719316430773, - "loss": 46.0, - "step": 31203 - }, - { - "epoch": 2.385763709692834, - "grad_norm": 0.0008830353617668152, - "learning_rate": 0.00019999719298433998, - "loss": 46.0, - "step": 31204 - }, - { - "epoch": 2.385840166676224, - "grad_norm": 0.0006927649956196547, - "learning_rate": 0.00019999719280436643, - "loss": 46.0, - "step": 31205 - }, - { - "epoch": 2.3859166236596137, - "grad_norm": 0.0009765359573066235, - "learning_rate": 0.0001999971926243871, - "loss": 46.0, - "step": 31206 - }, - { - "epoch": 2.3859930806430034, - "grad_norm": 0.0032678490970283747, - "learning_rate": 0.000199997192444402, - "loss": 46.0, - "step": 31207 - }, - { - "epoch": 2.386069537626393, - "grad_norm": 0.000645723775960505, - "learning_rate": 0.00019999719226441114, - "loss": 46.0, - "step": 31208 - }, - { - "epoch": 2.3861459946097825, - "grad_norm": 0.0046941922046244144, - "learning_rate": 0.0001999971920844145, - "loss": 46.0, - "step": 31209 - }, - { - "epoch": 2.3862224515931723, - "grad_norm": 0.0006700173835270107, - "learning_rate": 0.0001999971919044121, - "loss": 46.0, - "step": 31210 - }, - { - "epoch": 2.386298908576562, - "grad_norm": 0.0007795327110216022, - "learning_rate": 0.00019999719172440395, - "loss": 46.0, - "step": 31211 - }, - { - "epoch": 2.3863753655599518, - "grad_norm": 0.001132732955738902, - "learning_rate": 0.00019999719154438998, - "loss": 46.0, - "step": 31212 - }, - { - "epoch": 2.3864518225433415, - "grad_norm": 0.0007726813782937825, - "learning_rate": 0.0001999971913643703, - "loss": 46.0, - "step": 31213 - }, - { - "epoch": 2.3865282795267313, - "grad_norm": 0.000650379981379956, - "learning_rate": 0.00019999719118434482, - "loss": 46.0, - "step": 31214 - }, - { - "epoch": 2.386604736510121, - "grad_norm": 0.0025822732131928205, - "learning_rate": 0.00019999719100431356, - "loss": 46.0, - "step": 31215 - }, - { - "epoch": 2.386681193493511, - "grad_norm": 0.0006342643173411489, - "learning_rate": 0.00019999719082427654, - "loss": 46.0, - "step": 31216 - }, - { - "epoch": 2.3867576504769006, - "grad_norm": 0.0015285132685676217, - "learning_rate": 0.00019999719064423376, - "loss": 46.0, - "step": 31217 - }, - { - "epoch": 2.3868341074602903, - "grad_norm": 0.0009955320274457335, - "learning_rate": 0.0001999971904641852, - "loss": 46.0, - "step": 31218 - }, - { - "epoch": 2.3869105644436797, - "grad_norm": 0.0007325439946725965, - "learning_rate": 0.0001999971902841309, - "loss": 46.0, - "step": 31219 - }, - { - "epoch": 2.3869870214270694, - "grad_norm": 0.0027244549710303545, - "learning_rate": 0.0001999971901040708, - "loss": 46.0, - "step": 31220 - }, - { - "epoch": 2.387063478410459, - "grad_norm": 0.0036848834715783596, - "learning_rate": 0.00019999718992400493, - "loss": 46.0, - "step": 31221 - }, - { - "epoch": 2.387139935393849, - "grad_norm": 0.002210303209722042, - "learning_rate": 0.0001999971897439333, - "loss": 46.0, - "step": 31222 - }, - { - "epoch": 2.3872163923772387, - "grad_norm": 0.002620500046759844, - "learning_rate": 0.0001999971895638559, - "loss": 46.0, - "step": 31223 - }, - { - "epoch": 2.3872928493606285, - "grad_norm": 0.0015748973237350583, - "learning_rate": 0.00019999718938377273, - "loss": 46.0, - "step": 31224 - }, - { - "epoch": 2.3873693063440182, - "grad_norm": 0.0005100901471450925, - "learning_rate": 0.00019999718920368377, - "loss": 46.0, - "step": 31225 - }, - { - "epoch": 2.387445763327408, - "grad_norm": 0.0011751210549846292, - "learning_rate": 0.0001999971890235891, - "loss": 46.0, - "step": 31226 - }, - { - "epoch": 2.3875222203107977, - "grad_norm": 0.0005510689807124436, - "learning_rate": 0.00019999718884348858, - "loss": 46.0, - "step": 31227 - }, - { - "epoch": 2.3875986772941875, - "grad_norm": 0.0006459723808802664, - "learning_rate": 0.00019999718866338235, - "loss": 46.0, - "step": 31228 - }, - { - "epoch": 2.3876751342775773, - "grad_norm": 0.00062156020430848, - "learning_rate": 0.00019999718848327035, - "loss": 46.0, - "step": 31229 - }, - { - "epoch": 2.387751591260967, - "grad_norm": 0.0003964526695199311, - "learning_rate": 0.00019999718830315255, - "loss": 46.0, - "step": 31230 - }, - { - "epoch": 2.3878280482443563, - "grad_norm": 0.0015967487124726176, - "learning_rate": 0.000199997188123029, - "loss": 46.0, - "step": 31231 - }, - { - "epoch": 2.387904505227746, - "grad_norm": 0.0016473009018227458, - "learning_rate": 0.00019999718794289968, - "loss": 46.0, - "step": 31232 - }, - { - "epoch": 2.387980962211136, - "grad_norm": 0.000670640030875802, - "learning_rate": 0.00019999718776276458, - "loss": 46.0, - "step": 31233 - }, - { - "epoch": 2.3880574191945256, - "grad_norm": 0.0016202578553929925, - "learning_rate": 0.0001999971875826237, - "loss": 46.0, - "step": 31234 - }, - { - "epoch": 2.3881338761779154, - "grad_norm": 0.0014540303964167833, - "learning_rate": 0.00019999718740247707, - "loss": 46.0, - "step": 31235 - }, - { - "epoch": 2.388210333161305, - "grad_norm": 0.009749515913426876, - "learning_rate": 0.00019999718722232468, - "loss": 46.0, - "step": 31236 - }, - { - "epoch": 2.388286790144695, - "grad_norm": 0.0029620288405567408, - "learning_rate": 0.0001999971870421665, - "loss": 46.0, - "step": 31237 - }, - { - "epoch": 2.3883632471280847, - "grad_norm": 0.0009739661472849548, - "learning_rate": 0.00019999718686200256, - "loss": 46.0, - "step": 31238 - }, - { - "epoch": 2.3884397041114744, - "grad_norm": 0.0006199827766977251, - "learning_rate": 0.00019999718668183285, - "loss": 46.0, - "step": 31239 - }, - { - "epoch": 2.3885161610948638, - "grad_norm": 0.000636140292044729, - "learning_rate": 0.00019999718650165737, - "loss": 46.0, - "step": 31240 - }, - { - "epoch": 2.3885926180782535, - "grad_norm": 0.00253242626786232, - "learning_rate": 0.00019999718632147611, - "loss": 46.0, - "step": 31241 - }, - { - "epoch": 2.3886690750616433, - "grad_norm": 0.0029478955548256636, - "learning_rate": 0.0001999971861412891, - "loss": 46.0, - "step": 31242 - }, - { - "epoch": 2.388745532045033, - "grad_norm": 0.001852401182986796, - "learning_rate": 0.00019999718596109634, - "loss": 46.0, - "step": 31243 - }, - { - "epoch": 2.388821989028423, - "grad_norm": 0.000414897920563817, - "learning_rate": 0.00019999718578089776, - "loss": 46.0, - "step": 31244 - }, - { - "epoch": 2.3888984460118126, - "grad_norm": 0.0010077353799715638, - "learning_rate": 0.00019999718560069341, - "loss": 46.0, - "step": 31245 - }, - { - "epoch": 2.3889749029952023, - "grad_norm": 0.0010178263764828444, - "learning_rate": 0.00019999718542048335, - "loss": 46.0, - "step": 31246 - }, - { - "epoch": 2.389051359978592, - "grad_norm": 0.000524612027220428, - "learning_rate": 0.00019999718524026748, - "loss": 46.0, - "step": 31247 - }, - { - "epoch": 2.389127816961982, - "grad_norm": 0.001703913207165897, - "learning_rate": 0.00019999718506004586, - "loss": 46.0, - "step": 31248 - }, - { - "epoch": 2.3892042739453716, - "grad_norm": 0.0011087754974141717, - "learning_rate": 0.00019999718487981845, - "loss": 46.0, - "step": 31249 - }, - { - "epoch": 2.3892807309287614, - "grad_norm": 0.0014830634463578463, - "learning_rate": 0.00019999718469958526, - "loss": 46.0, - "step": 31250 - }, - { - "epoch": 2.389357187912151, - "grad_norm": 0.0029669131617993116, - "learning_rate": 0.00019999718451934633, - "loss": 46.0, - "step": 31251 - }, - { - "epoch": 2.389433644895541, - "grad_norm": 0.0004268599150236696, - "learning_rate": 0.00019999718433910162, - "loss": 46.0, - "step": 31252 - }, - { - "epoch": 2.38951010187893, - "grad_norm": 0.000671941670589149, - "learning_rate": 0.00019999718415885114, - "loss": 46.0, - "step": 31253 - }, - { - "epoch": 2.38958655886232, - "grad_norm": 0.0008306749514304101, - "learning_rate": 0.00019999718397859488, - "loss": 46.0, - "step": 31254 - }, - { - "epoch": 2.3896630158457097, - "grad_norm": 0.0003019349533133209, - "learning_rate": 0.00019999718379833288, - "loss": 46.0, - "step": 31255 - }, - { - "epoch": 2.3897394728290995, - "grad_norm": 0.0019008255330845714, - "learning_rate": 0.0001999971836180651, - "loss": 46.0, - "step": 31256 - }, - { - "epoch": 2.3898159298124892, - "grad_norm": 0.001142167137004435, - "learning_rate": 0.00019999718343779153, - "loss": 46.0, - "step": 31257 - }, - { - "epoch": 2.389892386795879, - "grad_norm": 0.0017596747493371367, - "learning_rate": 0.0001999971832575122, - "loss": 46.0, - "step": 31258 - }, - { - "epoch": 2.3899688437792688, - "grad_norm": 0.0011836143676191568, - "learning_rate": 0.00019999718307722711, - "loss": 46.0, - "step": 31259 - }, - { - "epoch": 2.3900453007626585, - "grad_norm": 0.0012896500993520021, - "learning_rate": 0.00019999718289693625, - "loss": 46.0, - "step": 31260 - }, - { - "epoch": 2.3901217577460483, - "grad_norm": 0.0018199884798377752, - "learning_rate": 0.0001999971827166396, - "loss": 46.0, - "step": 31261 - }, - { - "epoch": 2.3901982147294376, - "grad_norm": 0.0008378308266401291, - "learning_rate": 0.0001999971825363372, - "loss": 46.0, - "step": 31262 - }, - { - "epoch": 2.3902746717128274, - "grad_norm": 0.0016890474362298846, - "learning_rate": 0.00019999718235602903, - "loss": 46.0, - "step": 31263 - }, - { - "epoch": 2.390351128696217, - "grad_norm": 0.004787810146808624, - "learning_rate": 0.00019999718217571507, - "loss": 46.0, - "step": 31264 - }, - { - "epoch": 2.390427585679607, - "grad_norm": 0.00042374138138256967, - "learning_rate": 0.00019999718199539536, - "loss": 46.0, - "step": 31265 - }, - { - "epoch": 2.3905040426629967, - "grad_norm": 0.0007421452319249511, - "learning_rate": 0.00019999718181506988, - "loss": 46.0, - "step": 31266 - }, - { - "epoch": 2.3905804996463864, - "grad_norm": 0.0014471496688202024, - "learning_rate": 0.00019999718163473865, - "loss": 46.0, - "step": 31267 - }, - { - "epoch": 2.390656956629776, - "grad_norm": 0.000660435005556792, - "learning_rate": 0.00019999718145440162, - "loss": 46.0, - "step": 31268 - }, - { - "epoch": 2.390733413613166, - "grad_norm": 0.00048395595513284206, - "learning_rate": 0.00019999718127405885, - "loss": 46.0, - "step": 31269 - }, - { - "epoch": 2.3908098705965557, - "grad_norm": 0.0012016019318252802, - "learning_rate": 0.00019999718109371028, - "loss": 46.0, - "step": 31270 - }, - { - "epoch": 2.3908863275799455, - "grad_norm": 0.0011572001967579126, - "learning_rate": 0.00019999718091335595, - "loss": 46.0, - "step": 31271 - }, - { - "epoch": 2.390962784563335, - "grad_norm": 0.0034466516226530075, - "learning_rate": 0.00019999718073299586, - "loss": 46.0, - "step": 31272 - }, - { - "epoch": 2.391039241546725, - "grad_norm": 0.0009810482151806355, - "learning_rate": 0.00019999718055263, - "loss": 46.0, - "step": 31273 - }, - { - "epoch": 2.3911156985301147, - "grad_norm": 0.0008015355560928583, - "learning_rate": 0.00019999718037225835, - "loss": 46.0, - "step": 31274 - }, - { - "epoch": 2.391192155513504, - "grad_norm": 0.0008540457347407937, - "learning_rate": 0.00019999718019188096, - "loss": 46.0, - "step": 31275 - }, - { - "epoch": 2.391268612496894, - "grad_norm": 0.0015199249610304832, - "learning_rate": 0.00019999718001149778, - "loss": 46.0, - "step": 31276 - }, - { - "epoch": 2.3913450694802836, - "grad_norm": 0.001088008750230074, - "learning_rate": 0.00019999717983110884, - "loss": 46.0, - "step": 31277 - }, - { - "epoch": 2.3914215264636733, - "grad_norm": 0.0005309219122864306, - "learning_rate": 0.0001999971796507141, - "loss": 46.0, - "step": 31278 - }, - { - "epoch": 2.391497983447063, - "grad_norm": 0.0019616049248725176, - "learning_rate": 0.00019999717947031365, - "loss": 46.0, - "step": 31279 - }, - { - "epoch": 2.391574440430453, - "grad_norm": 0.007848215289413929, - "learning_rate": 0.0001999971792899074, - "loss": 46.0, - "step": 31280 - }, - { - "epoch": 2.3916508974138426, - "grad_norm": 0.0005991210928186774, - "learning_rate": 0.00019999717910949535, - "loss": 46.0, - "step": 31281 - }, - { - "epoch": 2.3917273543972324, - "grad_norm": 0.0008754156297072768, - "learning_rate": 0.0001999971789290776, - "loss": 46.0, - "step": 31282 - }, - { - "epoch": 2.391803811380622, - "grad_norm": 0.0019264626316726208, - "learning_rate": 0.00019999717874865402, - "loss": 46.0, - "step": 31283 - }, - { - "epoch": 2.3918802683640115, - "grad_norm": 0.0007541723316535354, - "learning_rate": 0.00019999717856822468, - "loss": 46.0, - "step": 31284 - }, - { - "epoch": 2.3919567253474012, - "grad_norm": 0.0006770867039449513, - "learning_rate": 0.0001999971783877896, - "loss": 46.0, - "step": 31285 - }, - { - "epoch": 2.392033182330791, - "grad_norm": 0.001182405510917306, - "learning_rate": 0.00019999717820734874, - "loss": 46.0, - "step": 31286 - }, - { - "epoch": 2.3921096393141807, - "grad_norm": 0.0008292681304737926, - "learning_rate": 0.0001999971780269021, - "loss": 46.0, - "step": 31287 - }, - { - "epoch": 2.3921860962975705, - "grad_norm": 0.0008599006687290967, - "learning_rate": 0.0001999971778464497, - "loss": 46.0, - "step": 31288 - }, - { - "epoch": 2.3922625532809603, - "grad_norm": 0.0004576443461701274, - "learning_rate": 0.0001999971776659915, - "loss": 46.0, - "step": 31289 - }, - { - "epoch": 2.39233901026435, - "grad_norm": 0.001664156443439424, - "learning_rate": 0.00019999717748552757, - "loss": 46.0, - "step": 31290 - }, - { - "epoch": 2.39241546724774, - "grad_norm": 0.0005999609711579978, - "learning_rate": 0.00019999717730505786, - "loss": 46.0, - "step": 31291 - }, - { - "epoch": 2.3924919242311296, - "grad_norm": 0.0020037388894706964, - "learning_rate": 0.00019999717712458238, - "loss": 46.0, - "step": 31292 - }, - { - "epoch": 2.3925683812145193, - "grad_norm": 0.0004422641941346228, - "learning_rate": 0.00019999717694410115, - "loss": 46.0, - "step": 31293 - }, - { - "epoch": 2.392644838197909, - "grad_norm": 0.0010603275150060654, - "learning_rate": 0.0001999971767636141, - "loss": 46.0, - "step": 31294 - }, - { - "epoch": 2.392721295181299, - "grad_norm": 0.001079891575500369, - "learning_rate": 0.00019999717658312132, - "loss": 46.0, - "step": 31295 - }, - { - "epoch": 2.392797752164688, - "grad_norm": 0.004955118987709284, - "learning_rate": 0.00019999717640262278, - "loss": 46.0, - "step": 31296 - }, - { - "epoch": 2.392874209148078, - "grad_norm": 0.0013935695169493556, - "learning_rate": 0.00019999717622211843, - "loss": 46.0, - "step": 31297 - }, - { - "epoch": 2.3929506661314677, - "grad_norm": 0.0029185789171606302, - "learning_rate": 0.00019999717604160834, - "loss": 46.0, - "step": 31298 - }, - { - "epoch": 2.3930271231148574, - "grad_norm": 0.0003364873700775206, - "learning_rate": 0.00019999717586109247, - "loss": 46.0, - "step": 31299 - }, - { - "epoch": 2.393103580098247, - "grad_norm": 0.002857153071090579, - "learning_rate": 0.00019999717568057083, - "loss": 46.0, - "step": 31300 - }, - { - "epoch": 2.393180037081637, - "grad_norm": 0.0005324921803548932, - "learning_rate": 0.00019999717550004342, - "loss": 46.0, - "step": 31301 - }, - { - "epoch": 2.3932564940650267, - "grad_norm": 0.003195501398295164, - "learning_rate": 0.00019999717531951023, - "loss": 46.0, - "step": 31302 - }, - { - "epoch": 2.3933329510484165, - "grad_norm": 0.0012877669651061296, - "learning_rate": 0.0001999971751389713, - "loss": 46.0, - "step": 31303 - }, - { - "epoch": 2.3934094080318062, - "grad_norm": 0.0008315282175317407, - "learning_rate": 0.0001999971749584266, - "loss": 46.0, - "step": 31304 - }, - { - "epoch": 2.393485865015196, - "grad_norm": 0.0009507263312116265, - "learning_rate": 0.0001999971747778761, - "loss": 46.0, - "step": 31305 - }, - { - "epoch": 2.3935623219985853, - "grad_norm": 0.002657354576513171, - "learning_rate": 0.00019999717459731983, - "loss": 46.0, - "step": 31306 - }, - { - "epoch": 2.393638778981975, - "grad_norm": 0.002530923578888178, - "learning_rate": 0.00019999717441675783, - "loss": 46.0, - "step": 31307 - }, - { - "epoch": 2.393715235965365, - "grad_norm": 0.0006451071822084486, - "learning_rate": 0.00019999717423619005, - "loss": 46.0, - "step": 31308 - }, - { - "epoch": 2.3937916929487546, - "grad_norm": 0.004086668603122234, - "learning_rate": 0.00019999717405561648, - "loss": 46.0, - "step": 31309 - }, - { - "epoch": 2.3938681499321444, - "grad_norm": 0.0005853670299984515, - "learning_rate": 0.00019999717387503716, - "loss": 46.0, - "step": 31310 - }, - { - "epoch": 2.393944606915534, - "grad_norm": 0.001546251936815679, - "learning_rate": 0.00019999717369445207, - "loss": 46.0, - "step": 31311 - }, - { - "epoch": 2.394021063898924, - "grad_norm": 0.00194163725245744, - "learning_rate": 0.00019999717351386117, - "loss": 46.0, - "step": 31312 - }, - { - "epoch": 2.3940975208823136, - "grad_norm": 0.0006438237614929676, - "learning_rate": 0.00019999717333326453, - "loss": 46.0, - "step": 31313 - }, - { - "epoch": 2.3941739778657034, - "grad_norm": 0.0033377467188984156, - "learning_rate": 0.00019999717315266212, - "loss": 46.0, - "step": 31314 - }, - { - "epoch": 2.394250434849093, - "grad_norm": 0.0006141770863905549, - "learning_rate": 0.00019999717297205396, - "loss": 46.0, - "step": 31315 - }, - { - "epoch": 2.394326891832483, - "grad_norm": 0.0017294621793553233, - "learning_rate": 0.00019999717279144, - "loss": 46.0, - "step": 31316 - }, - { - "epoch": 2.3944033488158727, - "grad_norm": 0.00232842774130404, - "learning_rate": 0.0001999971726108203, - "loss": 46.0, - "step": 31317 - }, - { - "epoch": 2.394479805799262, - "grad_norm": 0.000787562457844615, - "learning_rate": 0.0001999971724301948, - "loss": 46.0, - "step": 31318 - }, - { - "epoch": 2.3945562627826518, - "grad_norm": 0.002118197735399008, - "learning_rate": 0.0001999971722495636, - "loss": 46.0, - "step": 31319 - }, - { - "epoch": 2.3946327197660415, - "grad_norm": 0.0007089497521519661, - "learning_rate": 0.00019999717206892653, - "loss": 46.0, - "step": 31320 - }, - { - "epoch": 2.3947091767494313, - "grad_norm": 0.0035323509946465492, - "learning_rate": 0.00019999717188828376, - "loss": 46.0, - "step": 31321 - }, - { - "epoch": 2.394785633732821, - "grad_norm": 0.0014900483656674623, - "learning_rate": 0.0001999971717076352, - "loss": 46.0, - "step": 31322 - }, - { - "epoch": 2.394862090716211, - "grad_norm": 0.00037516685551963747, - "learning_rate": 0.00019999717152698084, - "loss": 46.0, - "step": 31323 - }, - { - "epoch": 2.3949385476996006, - "grad_norm": 0.001524752820841968, - "learning_rate": 0.00019999717134632077, - "loss": 46.0, - "step": 31324 - }, - { - "epoch": 2.3950150046829903, - "grad_norm": 0.001080443151295185, - "learning_rate": 0.00019999717116565488, - "loss": 46.0, - "step": 31325 - }, - { - "epoch": 2.39509146166638, - "grad_norm": 0.0011418763315305114, - "learning_rate": 0.00019999717098498324, - "loss": 46.0, - "step": 31326 - }, - { - "epoch": 2.39516791864977, - "grad_norm": 0.0006253799074329436, - "learning_rate": 0.00019999717080430586, - "loss": 46.0, - "step": 31327 - }, - { - "epoch": 2.395244375633159, - "grad_norm": 0.0028666167054325342, - "learning_rate": 0.00019999717062362267, - "loss": 46.0, - "step": 31328 - }, - { - "epoch": 2.395320832616549, - "grad_norm": 0.0007449330296367407, - "learning_rate": 0.0001999971704429337, - "loss": 46.0, - "step": 31329 - }, - { - "epoch": 2.3953972895999387, - "grad_norm": 0.001074870233424008, - "learning_rate": 0.000199997170262239, - "loss": 46.0, - "step": 31330 - }, - { - "epoch": 2.3954737465833285, - "grad_norm": 0.0013929917477071285, - "learning_rate": 0.00019999717008153852, - "loss": 46.0, - "step": 31331 - }, - { - "epoch": 2.395550203566718, - "grad_norm": 0.0007614702917635441, - "learning_rate": 0.00019999716990083227, - "loss": 46.0, - "step": 31332 - }, - { - "epoch": 2.395626660550108, - "grad_norm": 0.0010670649353414774, - "learning_rate": 0.00019999716972012027, - "loss": 46.0, - "step": 31333 - }, - { - "epoch": 2.3957031175334977, - "grad_norm": 0.0053705996833741665, - "learning_rate": 0.00019999716953940247, - "loss": 46.0, - "step": 31334 - }, - { - "epoch": 2.3957795745168875, - "grad_norm": 0.001210033311508596, - "learning_rate": 0.0001999971693586789, - "loss": 46.0, - "step": 31335 - }, - { - "epoch": 2.3958560315002773, - "grad_norm": 0.0006040573352947831, - "learning_rate": 0.00019999716917794955, - "loss": 46.0, - "step": 31336 - }, - { - "epoch": 2.395932488483667, - "grad_norm": 0.0007626016740687191, - "learning_rate": 0.00019999716899721446, - "loss": 46.0, - "step": 31337 - }, - { - "epoch": 2.396008945467057, - "grad_norm": 0.0016253925859928131, - "learning_rate": 0.0001999971688164736, - "loss": 46.0, - "step": 31338 - }, - { - "epoch": 2.3960854024504465, - "grad_norm": 0.0010328862117603421, - "learning_rate": 0.00019999716863572695, - "loss": 46.0, - "step": 31339 - }, - { - "epoch": 2.396161859433836, - "grad_norm": 0.000635054602753371, - "learning_rate": 0.00019999716845497454, - "loss": 46.0, - "step": 31340 - }, - { - "epoch": 2.3962383164172256, - "grad_norm": 0.0006187382969073951, - "learning_rate": 0.00019999716827421638, - "loss": 46.0, - "step": 31341 - }, - { - "epoch": 2.3963147734006154, - "grad_norm": 0.0009499857551418245, - "learning_rate": 0.00019999716809345242, - "loss": 46.0, - "step": 31342 - }, - { - "epoch": 2.396391230384005, - "grad_norm": 0.002331551630049944, - "learning_rate": 0.00019999716791268272, - "loss": 46.0, - "step": 31343 - }, - { - "epoch": 2.396467687367395, - "grad_norm": 0.0027765617705881596, - "learning_rate": 0.0001999971677319072, - "loss": 46.0, - "step": 31344 - }, - { - "epoch": 2.3965441443507847, - "grad_norm": 0.0008973247604444623, - "learning_rate": 0.00019999716755112596, - "loss": 46.0, - "step": 31345 - }, - { - "epoch": 2.3966206013341744, - "grad_norm": 0.0007501794607378542, - "learning_rate": 0.00019999716737033893, - "loss": 46.0, - "step": 31346 - }, - { - "epoch": 2.396697058317564, - "grad_norm": 0.0018190309638157487, - "learning_rate": 0.00019999716718954616, - "loss": 46.0, - "step": 31347 - }, - { - "epoch": 2.396773515300954, - "grad_norm": 0.00607098825275898, - "learning_rate": 0.0001999971670087476, - "loss": 46.0, - "step": 31348 - }, - { - "epoch": 2.3968499722843437, - "grad_norm": 0.0010250707855448127, - "learning_rate": 0.00019999716682794324, - "loss": 46.0, - "step": 31349 - }, - { - "epoch": 2.396926429267733, - "grad_norm": 0.001407513045705855, - "learning_rate": 0.00019999716664713315, - "loss": 46.0, - "step": 31350 - }, - { - "epoch": 2.397002886251123, - "grad_norm": 0.001078450120985508, - "learning_rate": 0.00019999716646631728, - "loss": 46.0, - "step": 31351 - }, - { - "epoch": 2.3970793432345125, - "grad_norm": 0.0011363965459167957, - "learning_rate": 0.00019999716628549562, - "loss": 46.0, - "step": 31352 - }, - { - "epoch": 2.3971558002179023, - "grad_norm": 0.0013813456753268838, - "learning_rate": 0.00019999716610466823, - "loss": 46.0, - "step": 31353 - }, - { - "epoch": 2.397232257201292, - "grad_norm": 0.0007420259062200785, - "learning_rate": 0.00019999716592383507, - "loss": 46.0, - "step": 31354 - }, - { - "epoch": 2.397308714184682, - "grad_norm": 0.0005797110497951508, - "learning_rate": 0.0001999971657429961, - "loss": 46.0, - "step": 31355 - }, - { - "epoch": 2.3973851711680716, - "grad_norm": 0.0020147799514234066, - "learning_rate": 0.0001999971655621514, - "loss": 46.0, - "step": 31356 - }, - { - "epoch": 2.3974616281514614, - "grad_norm": 0.0009422108996659517, - "learning_rate": 0.0001999971653813009, - "loss": 46.0, - "step": 31357 - }, - { - "epoch": 2.397538085134851, - "grad_norm": 0.0028956353198736906, - "learning_rate": 0.00019999716520044465, - "loss": 46.0, - "step": 31358 - }, - { - "epoch": 2.397614542118241, - "grad_norm": 0.0019463779171928763, - "learning_rate": 0.00019999716501958262, - "loss": 46.0, - "step": 31359 - }, - { - "epoch": 2.3976909991016306, - "grad_norm": 0.0006795019144192338, - "learning_rate": 0.00019999716483871482, - "loss": 46.0, - "step": 31360 - }, - { - "epoch": 2.3977674560850204, - "grad_norm": 0.0010647271992638707, - "learning_rate": 0.00019999716465784125, - "loss": 46.0, - "step": 31361 - }, - { - "epoch": 2.3978439130684097, - "grad_norm": 0.004848768934607506, - "learning_rate": 0.00019999716447696193, - "loss": 46.0, - "step": 31362 - }, - { - "epoch": 2.3979203700517995, - "grad_norm": 0.0045928750187158585, - "learning_rate": 0.00019999716429607681, - "loss": 46.0, - "step": 31363 - }, - { - "epoch": 2.3979968270351892, - "grad_norm": 0.0007254926022142172, - "learning_rate": 0.00019999716411518598, - "loss": 46.0, - "step": 31364 - }, - { - "epoch": 2.398073284018579, - "grad_norm": 0.0014428653521463275, - "learning_rate": 0.00019999716393428934, - "loss": 46.0, - "step": 31365 - }, - { - "epoch": 2.3981497410019688, - "grad_norm": 0.002059647347778082, - "learning_rate": 0.0001999971637533869, - "loss": 46.0, - "step": 31366 - }, - { - "epoch": 2.3982261979853585, - "grad_norm": 0.0028466309886425734, - "learning_rate": 0.00019999716357247874, - "loss": 46.0, - "step": 31367 - }, - { - "epoch": 2.3983026549687483, - "grad_norm": 0.0023598556872457266, - "learning_rate": 0.00019999716339156478, - "loss": 46.0, - "step": 31368 - }, - { - "epoch": 2.398379111952138, - "grad_norm": 0.0007187400478869677, - "learning_rate": 0.00019999716321064508, - "loss": 46.0, - "step": 31369 - }, - { - "epoch": 2.398455568935528, - "grad_norm": 0.003509592032060027, - "learning_rate": 0.00019999716302971957, - "loss": 46.0, - "step": 31370 - }, - { - "epoch": 2.398532025918917, - "grad_norm": 0.0012699612416327, - "learning_rate": 0.00019999716284878832, - "loss": 46.0, - "step": 31371 - }, - { - "epoch": 2.398608482902307, - "grad_norm": 0.0013038552133366466, - "learning_rate": 0.0001999971626678513, - "loss": 46.0, - "step": 31372 - }, - { - "epoch": 2.3986849398856966, - "grad_norm": 0.002157720737159252, - "learning_rate": 0.0001999971624869085, - "loss": 46.0, - "step": 31373 - }, - { - "epoch": 2.3987613968690864, - "grad_norm": 0.0009187221294268966, - "learning_rate": 0.00019999716230595995, - "loss": 46.0, - "step": 31374 - }, - { - "epoch": 2.398837853852476, - "grad_norm": 0.0021000653505325317, - "learning_rate": 0.00019999716212500558, - "loss": 46.0, - "step": 31375 - }, - { - "epoch": 2.398914310835866, - "grad_norm": 0.0013093932066112757, - "learning_rate": 0.00019999716194404548, - "loss": 46.0, - "step": 31376 - }, - { - "epoch": 2.3989907678192557, - "grad_norm": 0.0006359801045618951, - "learning_rate": 0.00019999716176307962, - "loss": 46.0, - "step": 31377 - }, - { - "epoch": 2.3990672248026454, - "grad_norm": 0.0007734121754765511, - "learning_rate": 0.000199997161582108, - "loss": 46.0, - "step": 31378 - }, - { - "epoch": 2.399143681786035, - "grad_norm": 0.0008498116512782872, - "learning_rate": 0.0001999971614011306, - "loss": 46.0, - "step": 31379 - }, - { - "epoch": 2.399220138769425, - "grad_norm": 0.004063235595822334, - "learning_rate": 0.0001999971612201474, - "loss": 46.0, - "step": 31380 - }, - { - "epoch": 2.3992965957528147, - "grad_norm": 0.004265876021236181, - "learning_rate": 0.00019999716103915845, - "loss": 46.0, - "step": 31381 - }, - { - "epoch": 2.3993730527362045, - "grad_norm": 0.0007822285988368094, - "learning_rate": 0.00019999716085816372, - "loss": 46.0, - "step": 31382 - }, - { - "epoch": 2.3994495097195943, - "grad_norm": 0.0044850800186395645, - "learning_rate": 0.00019999716067716324, - "loss": 46.0, - "step": 31383 - }, - { - "epoch": 2.3995259667029836, - "grad_norm": 0.0004107136046513915, - "learning_rate": 0.000199997160496157, - "loss": 46.0, - "step": 31384 - }, - { - "epoch": 2.3996024236863733, - "grad_norm": 0.0012389696203172207, - "learning_rate": 0.00019999716031514496, - "loss": 46.0, - "step": 31385 - }, - { - "epoch": 2.399678880669763, - "grad_norm": 0.0011143486481159925, - "learning_rate": 0.00019999716013412714, - "loss": 46.0, - "step": 31386 - }, - { - "epoch": 2.399755337653153, - "grad_norm": 0.001441480708308518, - "learning_rate": 0.0001999971599531036, - "loss": 46.0, - "step": 31387 - }, - { - "epoch": 2.3998317946365426, - "grad_norm": 0.0011616770643740892, - "learning_rate": 0.00019999715977207428, - "loss": 46.0, - "step": 31388 - }, - { - "epoch": 2.3999082516199324, - "grad_norm": 0.0009899999713525176, - "learning_rate": 0.00019999715959103919, - "loss": 46.0, - "step": 31389 - }, - { - "epoch": 2.399984708603322, - "grad_norm": 0.000577994214836508, - "learning_rate": 0.0001999971594099983, - "loss": 46.0, - "step": 31390 - }, - { - "epoch": 2.400061165586712, - "grad_norm": 0.0006987660308368504, - "learning_rate": 0.00019999715922895166, - "loss": 46.0, - "step": 31391 - }, - { - "epoch": 2.4001376225701017, - "grad_norm": 0.0009973723208531737, - "learning_rate": 0.00019999715904789924, - "loss": 46.0, - "step": 31392 - }, - { - "epoch": 2.400214079553491, - "grad_norm": 0.000840356049593538, - "learning_rate": 0.00019999715886684106, - "loss": 46.0, - "step": 31393 - }, - { - "epoch": 2.4002905365368807, - "grad_norm": 0.001468954375013709, - "learning_rate": 0.00019999715868577707, - "loss": 46.0, - "step": 31394 - }, - { - "epoch": 2.4003669935202705, - "grad_norm": 0.0015069774817675352, - "learning_rate": 0.0001999971585047074, - "loss": 46.0, - "step": 31395 - }, - { - "epoch": 2.4004434505036603, - "grad_norm": 0.0023700129240751266, - "learning_rate": 0.0001999971583236319, - "loss": 46.0, - "step": 31396 - }, - { - "epoch": 2.40051990748705, - "grad_norm": 0.005073704291135073, - "learning_rate": 0.00019999715814255064, - "loss": 46.0, - "step": 31397 - }, - { - "epoch": 2.40059636447044, - "grad_norm": 0.00040106047526933253, - "learning_rate": 0.00019999715796146361, - "loss": 46.0, - "step": 31398 - }, - { - "epoch": 2.4006728214538295, - "grad_norm": 0.0010780381271615624, - "learning_rate": 0.00019999715778037082, - "loss": 46.0, - "step": 31399 - }, - { - "epoch": 2.4007492784372193, - "grad_norm": 0.0011490309843793511, - "learning_rate": 0.00019999715759927224, - "loss": 46.0, - "step": 31400 - }, - { - "epoch": 2.400825735420609, - "grad_norm": 0.002880827756598592, - "learning_rate": 0.0001999971574181679, - "loss": 46.0, - "step": 31401 - }, - { - "epoch": 2.400902192403999, - "grad_norm": 0.0007725796313025057, - "learning_rate": 0.0001999971572370578, - "loss": 46.0, - "step": 31402 - }, - { - "epoch": 2.4009786493873886, - "grad_norm": 0.0008708566310815513, - "learning_rate": 0.00019999715705594192, - "loss": 46.0, - "step": 31403 - }, - { - "epoch": 2.4010551063707783, - "grad_norm": 0.0014673697296530008, - "learning_rate": 0.00019999715687482028, - "loss": 46.0, - "step": 31404 - }, - { - "epoch": 2.401131563354168, - "grad_norm": 0.0008105520391836762, - "learning_rate": 0.00019999715669369287, - "loss": 46.0, - "step": 31405 - }, - { - "epoch": 2.4012080203375574, - "grad_norm": 0.0025470424443483353, - "learning_rate": 0.0001999971565125597, - "loss": 46.0, - "step": 31406 - }, - { - "epoch": 2.401284477320947, - "grad_norm": 0.0012722652172669768, - "learning_rate": 0.00019999715633142073, - "loss": 46.0, - "step": 31407 - }, - { - "epoch": 2.401360934304337, - "grad_norm": 0.0007586120627820492, - "learning_rate": 0.00019999715615027602, - "loss": 46.0, - "step": 31408 - }, - { - "epoch": 2.4014373912877267, - "grad_norm": 0.0020619595889002085, - "learning_rate": 0.00019999715596912554, - "loss": 46.0, - "step": 31409 - }, - { - "epoch": 2.4015138482711165, - "grad_norm": 0.0012643811060115695, - "learning_rate": 0.00019999715578796927, - "loss": 46.0, - "step": 31410 - }, - { - "epoch": 2.4015903052545062, - "grad_norm": 0.0006271416204981506, - "learning_rate": 0.00019999715560680724, - "loss": 46.0, - "step": 31411 - }, - { - "epoch": 2.401666762237896, - "grad_norm": 0.0019694161601364613, - "learning_rate": 0.00019999715542563947, - "loss": 46.0, - "step": 31412 - }, - { - "epoch": 2.4017432192212858, - "grad_norm": 0.002762384945526719, - "learning_rate": 0.0001999971552444659, - "loss": 46.0, - "step": 31413 - }, - { - "epoch": 2.4018196762046755, - "grad_norm": 0.002677842741832137, - "learning_rate": 0.00019999715506328655, - "loss": 46.0, - "step": 31414 - }, - { - "epoch": 2.401896133188065, - "grad_norm": 0.004689258988946676, - "learning_rate": 0.00019999715488210144, - "loss": 46.0, - "step": 31415 - }, - { - "epoch": 2.4019725901714546, - "grad_norm": 0.000655218493193388, - "learning_rate": 0.00019999715470091057, - "loss": 46.0, - "step": 31416 - }, - { - "epoch": 2.4020490471548444, - "grad_norm": 0.0021223174408078194, - "learning_rate": 0.00019999715451971394, - "loss": 46.0, - "step": 31417 - }, - { - "epoch": 2.402125504138234, - "grad_norm": 0.020028213039040565, - "learning_rate": 0.00019999715433851152, - "loss": 46.0, - "step": 31418 - }, - { - "epoch": 2.402201961121624, - "grad_norm": 0.0004493211454246193, - "learning_rate": 0.00019999715415730334, - "loss": 46.0, - "step": 31419 - }, - { - "epoch": 2.4022784181050136, - "grad_norm": 0.005673614796251059, - "learning_rate": 0.00019999715397608938, - "loss": 46.0, - "step": 31420 - }, - { - "epoch": 2.4023548750884034, - "grad_norm": 0.0017824191600084305, - "learning_rate": 0.00019999715379486968, - "loss": 46.0, - "step": 31421 - }, - { - "epoch": 2.402431332071793, - "grad_norm": 0.000902598025277257, - "learning_rate": 0.00019999715361364417, - "loss": 46.0, - "step": 31422 - }, - { - "epoch": 2.402507789055183, - "grad_norm": 0.0007216076483018696, - "learning_rate": 0.0001999971534324129, - "loss": 46.0, - "step": 31423 - }, - { - "epoch": 2.4025842460385727, - "grad_norm": 0.004235380329191685, - "learning_rate": 0.00019999715325117587, - "loss": 46.0, - "step": 31424 - }, - { - "epoch": 2.4026607030219624, - "grad_norm": 0.003838630858808756, - "learning_rate": 0.00019999715306993307, - "loss": 46.0, - "step": 31425 - }, - { - "epoch": 2.402737160005352, - "grad_norm": 0.0007197781815193594, - "learning_rate": 0.00019999715288868453, - "loss": 46.0, - "step": 31426 - }, - { - "epoch": 2.4028136169887415, - "grad_norm": 0.0013190425233915448, - "learning_rate": 0.0001999971527074302, - "loss": 46.0, - "step": 31427 - }, - { - "epoch": 2.4028900739721313, - "grad_norm": 0.00269205286167562, - "learning_rate": 0.0001999971525261701, - "loss": 46.0, - "step": 31428 - }, - { - "epoch": 2.402966530955521, - "grad_norm": 0.0007927425322122872, - "learning_rate": 0.0001999971523449042, - "loss": 46.0, - "step": 31429 - }, - { - "epoch": 2.403042987938911, - "grad_norm": 0.0002986110339406878, - "learning_rate": 0.00019999715216363257, - "loss": 46.0, - "step": 31430 - }, - { - "epoch": 2.4031194449223006, - "grad_norm": 0.0008843218674883246, - "learning_rate": 0.00019999715198235513, - "loss": 46.0, - "step": 31431 - }, - { - "epoch": 2.4031959019056903, - "grad_norm": 0.0009437150438316166, - "learning_rate": 0.00019999715180107197, - "loss": 46.0, - "step": 31432 - }, - { - "epoch": 2.40327235888908, - "grad_norm": 0.00034062284976243973, - "learning_rate": 0.00019999715161978302, - "loss": 46.0, - "step": 31433 - }, - { - "epoch": 2.40334881587247, - "grad_norm": 0.0017825865652412176, - "learning_rate": 0.00019999715143848829, - "loss": 46.0, - "step": 31434 - }, - { - "epoch": 2.4034252728558596, - "grad_norm": 0.002644026419147849, - "learning_rate": 0.00019999715125718778, - "loss": 46.0, - "step": 31435 - }, - { - "epoch": 2.4035017298392494, - "grad_norm": 0.0011010251473635435, - "learning_rate": 0.00019999715107588153, - "loss": 46.0, - "step": 31436 - }, - { - "epoch": 2.4035781868226387, - "grad_norm": 0.0040678055956959724, - "learning_rate": 0.0001999971508945695, - "loss": 46.0, - "step": 31437 - }, - { - "epoch": 2.4036546438060284, - "grad_norm": 0.0007749042706564069, - "learning_rate": 0.0001999971507132517, - "loss": 46.0, - "step": 31438 - }, - { - "epoch": 2.403731100789418, - "grad_norm": 0.005407374817878008, - "learning_rate": 0.00019999715053192817, - "loss": 46.0, - "step": 31439 - }, - { - "epoch": 2.403807557772808, - "grad_norm": 0.0012303779367357492, - "learning_rate": 0.0001999971503505988, - "loss": 46.0, - "step": 31440 - }, - { - "epoch": 2.4038840147561977, - "grad_norm": 0.0011702467454597354, - "learning_rate": 0.0001999971501692637, - "loss": 46.0, - "step": 31441 - }, - { - "epoch": 2.4039604717395875, - "grad_norm": 0.0011009284062311053, - "learning_rate": 0.00019999714998792281, - "loss": 46.0, - "step": 31442 - }, - { - "epoch": 2.4040369287229773, - "grad_norm": 0.0022076440509408712, - "learning_rate": 0.00019999714980657618, - "loss": 46.0, - "step": 31443 - }, - { - "epoch": 2.404113385706367, - "grad_norm": 0.0006162030040286481, - "learning_rate": 0.00019999714962522377, - "loss": 46.0, - "step": 31444 - }, - { - "epoch": 2.4041898426897568, - "grad_norm": 0.0013319840654730797, - "learning_rate": 0.00019999714944386558, - "loss": 46.0, - "step": 31445 - }, - { - "epoch": 2.4042662996731465, - "grad_norm": 0.0016527213156223297, - "learning_rate": 0.00019999714926250163, - "loss": 46.0, - "step": 31446 - }, - { - "epoch": 2.4043427566565363, - "grad_norm": 0.00266616465523839, - "learning_rate": 0.00019999714908113192, - "loss": 46.0, - "step": 31447 - }, - { - "epoch": 2.404419213639926, - "grad_norm": 0.000726548838429153, - "learning_rate": 0.00019999714889975642, - "loss": 46.0, - "step": 31448 - }, - { - "epoch": 2.4044956706233154, - "grad_norm": 0.0028805367182940245, - "learning_rate": 0.00019999714871837517, - "loss": 46.0, - "step": 31449 - }, - { - "epoch": 2.404572127606705, - "grad_norm": 0.0006409891066141427, - "learning_rate": 0.00019999714853698812, - "loss": 46.0, - "step": 31450 - }, - { - "epoch": 2.404648584590095, - "grad_norm": 0.0016447784146293998, - "learning_rate": 0.00019999714835559532, - "loss": 46.0, - "step": 31451 - }, - { - "epoch": 2.4047250415734847, - "grad_norm": 0.0009670992731116712, - "learning_rate": 0.00019999714817419678, - "loss": 46.0, - "step": 31452 - }, - { - "epoch": 2.4048014985568744, - "grad_norm": 0.0010341479210183024, - "learning_rate": 0.0001999971479927924, - "loss": 46.0, - "step": 31453 - }, - { - "epoch": 2.404877955540264, - "grad_norm": 0.0007617168012075126, - "learning_rate": 0.00019999714781138232, - "loss": 46.0, - "step": 31454 - }, - { - "epoch": 2.404954412523654, - "grad_norm": 0.0035740381572395563, - "learning_rate": 0.00019999714762996643, - "loss": 46.0, - "step": 31455 - }, - { - "epoch": 2.4050308695070437, - "grad_norm": 0.0007605867576785386, - "learning_rate": 0.0001999971474485448, - "loss": 46.0, - "step": 31456 - }, - { - "epoch": 2.4051073264904335, - "grad_norm": 0.00036915618693456054, - "learning_rate": 0.00019999714726711738, - "loss": 46.0, - "step": 31457 - }, - { - "epoch": 2.4051837834738232, - "grad_norm": 0.0008888214360922575, - "learning_rate": 0.0001999971470856842, - "loss": 46.0, - "step": 31458 - }, - { - "epoch": 2.4052602404572125, - "grad_norm": 0.0006965924403630197, - "learning_rate": 0.00019999714690424524, - "loss": 46.0, - "step": 31459 - }, - { - "epoch": 2.4053366974406023, - "grad_norm": 0.0020298114977777004, - "learning_rate": 0.0001999971467228005, - "loss": 46.0, - "step": 31460 - }, - { - "epoch": 2.405413154423992, - "grad_norm": 0.0011782522778958082, - "learning_rate": 0.00019999714654135003, - "loss": 46.0, - "step": 31461 - }, - { - "epoch": 2.405489611407382, - "grad_norm": 0.0005383405950851738, - "learning_rate": 0.00019999714635989376, - "loss": 46.0, - "step": 31462 - }, - { - "epoch": 2.4055660683907716, - "grad_norm": 0.0018920161528512836, - "learning_rate": 0.00019999714617843173, - "loss": 46.0, - "step": 31463 - }, - { - "epoch": 2.4056425253741613, - "grad_norm": 0.0005845411214977503, - "learning_rate": 0.00019999714599696394, - "loss": 46.0, - "step": 31464 - }, - { - "epoch": 2.405718982357551, - "grad_norm": 0.0007085256511345506, - "learning_rate": 0.00019999714581549034, - "loss": 46.0, - "step": 31465 - }, - { - "epoch": 2.405795439340941, - "grad_norm": 0.0014195661060512066, - "learning_rate": 0.00019999714563401102, - "loss": 46.0, - "step": 31466 - }, - { - "epoch": 2.4058718963243306, - "grad_norm": 0.004898398648947477, - "learning_rate": 0.0001999971454525259, - "loss": 46.0, - "step": 31467 - }, - { - "epoch": 2.4059483533077204, - "grad_norm": 0.0015270456206053495, - "learning_rate": 0.00019999714527103502, - "loss": 46.0, - "step": 31468 - }, - { - "epoch": 2.40602481029111, - "grad_norm": 0.0007033307920210063, - "learning_rate": 0.00019999714508953838, - "loss": 46.0, - "step": 31469 - }, - { - "epoch": 2.4061012672745, - "grad_norm": 0.00536193186417222, - "learning_rate": 0.00019999714490803597, - "loss": 46.0, - "step": 31470 - }, - { - "epoch": 2.4061777242578892, - "grad_norm": 0.0017196249682456255, - "learning_rate": 0.00019999714472652776, - "loss": 46.0, - "step": 31471 - }, - { - "epoch": 2.406254181241279, - "grad_norm": 0.0007414100109599531, - "learning_rate": 0.0001999971445450138, - "loss": 46.0, - "step": 31472 - }, - { - "epoch": 2.4063306382246687, - "grad_norm": 0.0031941295601427555, - "learning_rate": 0.0001999971443634941, - "loss": 46.0, - "step": 31473 - }, - { - "epoch": 2.4064070952080585, - "grad_norm": 0.0009738372173160315, - "learning_rate": 0.00019999714418196863, - "loss": 46.0, - "step": 31474 - }, - { - "epoch": 2.4064835521914483, - "grad_norm": 0.0012634678278118372, - "learning_rate": 0.00019999714400043735, - "loss": 46.0, - "step": 31475 - }, - { - "epoch": 2.406560009174838, - "grad_norm": 0.0009300952078774571, - "learning_rate": 0.0001999971438189003, - "loss": 46.0, - "step": 31476 - }, - { - "epoch": 2.406636466158228, - "grad_norm": 0.001526662497781217, - "learning_rate": 0.0001999971436373575, - "loss": 46.0, - "step": 31477 - }, - { - "epoch": 2.4067129231416176, - "grad_norm": 0.0007352812099270523, - "learning_rate": 0.00019999714345580896, - "loss": 46.0, - "step": 31478 - }, - { - "epoch": 2.4067893801250073, - "grad_norm": 0.001667393953539431, - "learning_rate": 0.00019999714327425462, - "loss": 46.0, - "step": 31479 - }, - { - "epoch": 2.406865837108397, - "grad_norm": 0.0012034173123538494, - "learning_rate": 0.00019999714309269448, - "loss": 46.0, - "step": 31480 - }, - { - "epoch": 2.4069422940917864, - "grad_norm": 0.0015440764836966991, - "learning_rate": 0.0001999971429111286, - "loss": 46.0, - "step": 31481 - }, - { - "epoch": 2.407018751075176, - "grad_norm": 0.0015276235062628984, - "learning_rate": 0.00019999714272955698, - "loss": 46.0, - "step": 31482 - }, - { - "epoch": 2.407095208058566, - "grad_norm": 0.0011639675358310342, - "learning_rate": 0.00019999714254797954, - "loss": 46.0, - "step": 31483 - }, - { - "epoch": 2.4071716650419557, - "grad_norm": 0.0016474663279950619, - "learning_rate": 0.00019999714236639638, - "loss": 46.0, - "step": 31484 - }, - { - "epoch": 2.4072481220253454, - "grad_norm": 0.001146140624769032, - "learning_rate": 0.00019999714218480743, - "loss": 46.0, - "step": 31485 - }, - { - "epoch": 2.407324579008735, - "grad_norm": 0.0010244814911857247, - "learning_rate": 0.0001999971420032127, - "loss": 46.0, - "step": 31486 - }, - { - "epoch": 2.407401035992125, - "grad_norm": 0.0009179611806757748, - "learning_rate": 0.0001999971418216122, - "loss": 46.0, - "step": 31487 - }, - { - "epoch": 2.4074774929755147, - "grad_norm": 0.0006006889743730426, - "learning_rate": 0.00019999714164000595, - "loss": 46.0, - "step": 31488 - }, - { - "epoch": 2.4075539499589045, - "grad_norm": 0.0005202093161642551, - "learning_rate": 0.0001999971414583939, - "loss": 46.0, - "step": 31489 - }, - { - "epoch": 2.4076304069422942, - "grad_norm": 0.0145442895591259, - "learning_rate": 0.0001999971412767761, - "loss": 46.0, - "step": 31490 - }, - { - "epoch": 2.407706863925684, - "grad_norm": 0.0003983795759268105, - "learning_rate": 0.00019999714109515253, - "loss": 46.0, - "step": 31491 - }, - { - "epoch": 2.4077833209090738, - "grad_norm": 0.0032133013010025024, - "learning_rate": 0.0001999971409135232, - "loss": 46.0, - "step": 31492 - }, - { - "epoch": 2.407859777892463, - "grad_norm": 0.0011245921486988664, - "learning_rate": 0.0001999971407318881, - "loss": 46.0, - "step": 31493 - }, - { - "epoch": 2.407936234875853, - "grad_norm": 0.0017752742860466242, - "learning_rate": 0.0001999971405502472, - "loss": 46.0, - "step": 31494 - }, - { - "epoch": 2.4080126918592426, - "grad_norm": 0.00025600663502700627, - "learning_rate": 0.00019999714036860055, - "loss": 46.0, - "step": 31495 - }, - { - "epoch": 2.4080891488426324, - "grad_norm": 0.004592161625623703, - "learning_rate": 0.00019999714018694814, - "loss": 46.0, - "step": 31496 - }, - { - "epoch": 2.408165605826022, - "grad_norm": 0.0018526840722188354, - "learning_rate": 0.00019999714000528993, - "loss": 46.0, - "step": 31497 - }, - { - "epoch": 2.408242062809412, - "grad_norm": 0.0015600936021655798, - "learning_rate": 0.000199997139823626, - "loss": 46.0, - "step": 31498 - }, - { - "epoch": 2.4083185197928016, - "grad_norm": 0.0012808283790946007, - "learning_rate": 0.00019999713964195625, - "loss": 46.0, - "step": 31499 - }, - { - "epoch": 2.4083949767761914, - "grad_norm": 0.00048348886775784194, - "learning_rate": 0.00019999713946028077, - "loss": 46.0, - "step": 31500 - }, - { - "epoch": 2.408471433759581, - "grad_norm": 0.0006938119768165052, - "learning_rate": 0.00019999713927859952, - "loss": 46.0, - "step": 31501 - }, - { - "epoch": 2.4085478907429705, - "grad_norm": 0.0008967588655650616, - "learning_rate": 0.00019999713909691247, - "loss": 46.0, - "step": 31502 - }, - { - "epoch": 2.4086243477263602, - "grad_norm": 0.0019483311334624887, - "learning_rate": 0.00019999713891521968, - "loss": 46.0, - "step": 31503 - }, - { - "epoch": 2.40870080470975, - "grad_norm": 0.000997532275505364, - "learning_rate": 0.0001999971387335211, - "loss": 46.0, - "step": 31504 - }, - { - "epoch": 2.4087772616931398, - "grad_norm": 0.0006950461538508534, - "learning_rate": 0.00019999713855181676, - "loss": 46.0, - "step": 31505 - }, - { - "epoch": 2.4088537186765295, - "grad_norm": 0.0004208044265396893, - "learning_rate": 0.00019999713837010665, - "loss": 46.0, - "step": 31506 - }, - { - "epoch": 2.4089301756599193, - "grad_norm": 0.0014009278966113925, - "learning_rate": 0.00019999713818839076, - "loss": 46.0, - "step": 31507 - }, - { - "epoch": 2.409006632643309, - "grad_norm": 0.002767933765426278, - "learning_rate": 0.0001999971380066691, - "loss": 46.0, - "step": 31508 - }, - { - "epoch": 2.409083089626699, - "grad_norm": 0.0007349203224293888, - "learning_rate": 0.00019999713782494172, - "loss": 46.0, - "step": 31509 - }, - { - "epoch": 2.4091595466100886, - "grad_norm": 0.0011516865342855453, - "learning_rate": 0.0001999971376432085, - "loss": 46.0, - "step": 31510 - }, - { - "epoch": 2.4092360035934783, - "grad_norm": 0.002653359202668071, - "learning_rate": 0.00019999713746146955, - "loss": 46.0, - "step": 31511 - }, - { - "epoch": 2.409312460576868, - "grad_norm": 0.0011696862056851387, - "learning_rate": 0.00019999713727972482, - "loss": 46.0, - "step": 31512 - }, - { - "epoch": 2.409388917560258, - "grad_norm": 0.0016497414326295257, - "learning_rate": 0.00019999713709797435, - "loss": 46.0, - "step": 31513 - }, - { - "epoch": 2.4094653745436476, - "grad_norm": 0.0007007354288361967, - "learning_rate": 0.0001999971369162181, - "loss": 46.0, - "step": 31514 - }, - { - "epoch": 2.409541831527037, - "grad_norm": 0.0008845037664286792, - "learning_rate": 0.00019999713673445605, - "loss": 46.0, - "step": 31515 - }, - { - "epoch": 2.4096182885104267, - "grad_norm": 0.0003855802060570568, - "learning_rate": 0.00019999713655268823, - "loss": 46.0, - "step": 31516 - }, - { - "epoch": 2.4096947454938165, - "grad_norm": 0.001040634699165821, - "learning_rate": 0.00019999713637091466, - "loss": 46.0, - "step": 31517 - }, - { - "epoch": 2.409771202477206, - "grad_norm": 0.0004562884569168091, - "learning_rate": 0.00019999713618913532, - "loss": 46.0, - "step": 31518 - }, - { - "epoch": 2.409847659460596, - "grad_norm": 0.00244263862259686, - "learning_rate": 0.00019999713600735023, - "loss": 46.0, - "step": 31519 - }, - { - "epoch": 2.4099241164439857, - "grad_norm": 0.000925388652831316, - "learning_rate": 0.0001999971358255593, - "loss": 46.0, - "step": 31520 - }, - { - "epoch": 2.4100005734273755, - "grad_norm": 0.0005213592085056007, - "learning_rate": 0.00019999713564376268, - "loss": 46.0, - "step": 31521 - }, - { - "epoch": 2.4100770304107653, - "grad_norm": 0.0021028933115303516, - "learning_rate": 0.00019999713546196027, - "loss": 46.0, - "step": 31522 - }, - { - "epoch": 2.410153487394155, - "grad_norm": 0.0018643634393811226, - "learning_rate": 0.00019999713528015206, - "loss": 46.0, - "step": 31523 - }, - { - "epoch": 2.4102299443775443, - "grad_norm": 0.00029715648270212114, - "learning_rate": 0.0001999971350983381, - "loss": 46.0, - "step": 31524 - }, - { - "epoch": 2.410306401360934, - "grad_norm": 0.0009367327438667417, - "learning_rate": 0.0001999971349165184, - "loss": 46.0, - "step": 31525 - }, - { - "epoch": 2.410382858344324, - "grad_norm": 0.0008120790007524192, - "learning_rate": 0.0001999971347346929, - "loss": 46.0, - "step": 31526 - }, - { - "epoch": 2.4104593153277136, - "grad_norm": 0.004285277798771858, - "learning_rate": 0.00019999713455286163, - "loss": 46.0, - "step": 31527 - }, - { - "epoch": 2.4105357723111034, - "grad_norm": 0.004256781190633774, - "learning_rate": 0.00019999713437102458, - "loss": 46.0, - "step": 31528 - }, - { - "epoch": 2.410612229294493, - "grad_norm": 0.0008301177294924855, - "learning_rate": 0.0001999971341891818, - "loss": 46.0, - "step": 31529 - }, - { - "epoch": 2.410688686277883, - "grad_norm": 0.0032910851296037436, - "learning_rate": 0.00019999713400733321, - "loss": 46.0, - "step": 31530 - }, - { - "epoch": 2.4107651432612727, - "grad_norm": 0.0005402807146310806, - "learning_rate": 0.00019999713382547887, - "loss": 46.0, - "step": 31531 - }, - { - "epoch": 2.4108416002446624, - "grad_norm": 0.004251751117408276, - "learning_rate": 0.00019999713364361876, - "loss": 46.0, - "step": 31532 - }, - { - "epoch": 2.410918057228052, - "grad_norm": 0.0009280891972593963, - "learning_rate": 0.0001999971334617529, - "loss": 46.0, - "step": 31533 - }, - { - "epoch": 2.410994514211442, - "grad_norm": 0.000361142068868503, - "learning_rate": 0.0001999971332798812, - "loss": 46.0, - "step": 31534 - }, - { - "epoch": 2.4110709711948317, - "grad_norm": 0.0009967322694137692, - "learning_rate": 0.0001999971330980038, - "loss": 46.0, - "step": 31535 - }, - { - "epoch": 2.4111474281782215, - "grad_norm": 0.0011202595196664333, - "learning_rate": 0.00019999713291612064, - "loss": 46.0, - "step": 31536 - }, - { - "epoch": 2.411223885161611, - "grad_norm": 0.0006760488613508642, - "learning_rate": 0.00019999713273423166, - "loss": 46.0, - "step": 31537 - }, - { - "epoch": 2.4113003421450006, - "grad_norm": 0.0012034527026116848, - "learning_rate": 0.0001999971325523369, - "loss": 46.0, - "step": 31538 - }, - { - "epoch": 2.4113767991283903, - "grad_norm": 0.0009123107302002609, - "learning_rate": 0.00019999713237043643, - "loss": 46.0, - "step": 31539 - }, - { - "epoch": 2.41145325611178, - "grad_norm": 0.0012663532979786396, - "learning_rate": 0.00019999713218853016, - "loss": 46.0, - "step": 31540 - }, - { - "epoch": 2.41152971309517, - "grad_norm": 0.002751997672021389, - "learning_rate": 0.00019999713200661814, - "loss": 46.0, - "step": 31541 - }, - { - "epoch": 2.4116061700785596, - "grad_norm": 0.0032665308099240065, - "learning_rate": 0.00019999713182470032, - "loss": 46.0, - "step": 31542 - }, - { - "epoch": 2.4116826270619494, - "grad_norm": 0.0006853691302239895, - "learning_rate": 0.00019999713164277675, - "loss": 46.0, - "step": 31543 - }, - { - "epoch": 2.411759084045339, - "grad_norm": 0.001948764780536294, - "learning_rate": 0.0001999971314608474, - "loss": 46.0, - "step": 31544 - }, - { - "epoch": 2.411835541028729, - "grad_norm": 0.0012749229790642858, - "learning_rate": 0.00019999713127891232, - "loss": 46.0, - "step": 31545 - }, - { - "epoch": 2.411911998012118, - "grad_norm": 0.0024692656006664038, - "learning_rate": 0.0001999971310969714, - "loss": 46.0, - "step": 31546 - }, - { - "epoch": 2.411988454995508, - "grad_norm": 0.0035269109066575766, - "learning_rate": 0.00019999713091502477, - "loss": 46.0, - "step": 31547 - }, - { - "epoch": 2.4120649119788977, - "grad_norm": 0.0007178937084972858, - "learning_rate": 0.00019999713073307236, - "loss": 46.0, - "step": 31548 - }, - { - "epoch": 2.4121413689622875, - "grad_norm": 0.0016796267591416836, - "learning_rate": 0.00019999713055111415, - "loss": 46.0, - "step": 31549 - }, - { - "epoch": 2.4122178259456772, - "grad_norm": 0.00041162510751746595, - "learning_rate": 0.0001999971303691502, - "loss": 46.0, - "step": 31550 - }, - { - "epoch": 2.412294282929067, - "grad_norm": 0.001185059198178351, - "learning_rate": 0.00019999713018718047, - "loss": 46.0, - "step": 31551 - }, - { - "epoch": 2.4123707399124568, - "grad_norm": 0.0041820150800049305, - "learning_rate": 0.00019999713000520497, - "loss": 46.0, - "step": 31552 - }, - { - "epoch": 2.4124471968958465, - "grad_norm": 0.0005849146982654929, - "learning_rate": 0.00019999712982322372, - "loss": 46.0, - "step": 31553 - }, - { - "epoch": 2.4125236538792363, - "grad_norm": 0.0012389356270432472, - "learning_rate": 0.00019999712964123667, - "loss": 46.0, - "step": 31554 - }, - { - "epoch": 2.412600110862626, - "grad_norm": 0.0004523008537944406, - "learning_rate": 0.00019999712945924385, - "loss": 46.0, - "step": 31555 - }, - { - "epoch": 2.412676567846016, - "grad_norm": 0.0015283380635082722, - "learning_rate": 0.0001999971292772453, - "loss": 46.0, - "step": 31556 - }, - { - "epoch": 2.4127530248294056, - "grad_norm": 0.0006729109445586801, - "learning_rate": 0.00019999712909524094, - "loss": 46.0, - "step": 31557 - }, - { - "epoch": 2.412829481812795, - "grad_norm": 0.004310677759349346, - "learning_rate": 0.00019999712891323083, - "loss": 46.0, - "step": 31558 - }, - { - "epoch": 2.4129059387961846, - "grad_norm": 0.003804714884608984, - "learning_rate": 0.00019999712873121494, - "loss": 46.0, - "step": 31559 - }, - { - "epoch": 2.4129823957795744, - "grad_norm": 0.002762932563200593, - "learning_rate": 0.00019999712854919328, - "loss": 46.0, - "step": 31560 - }, - { - "epoch": 2.413058852762964, - "grad_norm": 0.004486938007175922, - "learning_rate": 0.00019999712836716587, - "loss": 46.0, - "step": 31561 - }, - { - "epoch": 2.413135309746354, - "grad_norm": 0.0009690411970950663, - "learning_rate": 0.00019999712818513266, - "loss": 46.0, - "step": 31562 - }, - { - "epoch": 2.4132117667297437, - "grad_norm": 0.002592853968963027, - "learning_rate": 0.0001999971280030937, - "loss": 46.0, - "step": 31563 - }, - { - "epoch": 2.4132882237131335, - "grad_norm": 0.0028453718405216932, - "learning_rate": 0.00019999712782104898, - "loss": 46.0, - "step": 31564 - }, - { - "epoch": 2.413364680696523, - "grad_norm": 0.002826140960678458, - "learning_rate": 0.00019999712763899848, - "loss": 46.0, - "step": 31565 - }, - { - "epoch": 2.413441137679913, - "grad_norm": 0.0014487982261925936, - "learning_rate": 0.00019999712745694223, - "loss": 46.0, - "step": 31566 - }, - { - "epoch": 2.4135175946633027, - "grad_norm": 0.0006863312446512282, - "learning_rate": 0.00019999712727488019, - "loss": 46.0, - "step": 31567 - }, - { - "epoch": 2.413594051646692, - "grad_norm": 0.0010164452251046896, - "learning_rate": 0.0001999971270928124, - "loss": 46.0, - "step": 31568 - }, - { - "epoch": 2.413670508630082, - "grad_norm": 0.0006221099756658077, - "learning_rate": 0.0001999971269107388, - "loss": 46.0, - "step": 31569 - }, - { - "epoch": 2.4137469656134716, - "grad_norm": 0.0015117890434339643, - "learning_rate": 0.00019999712672865946, - "loss": 46.0, - "step": 31570 - }, - { - "epoch": 2.4138234225968613, - "grad_norm": 0.0005253482377156615, - "learning_rate": 0.00019999712654657437, - "loss": 46.0, - "step": 31571 - }, - { - "epoch": 2.413899879580251, - "grad_norm": 0.001459247781895101, - "learning_rate": 0.00019999712636448346, - "loss": 46.0, - "step": 31572 - }, - { - "epoch": 2.413976336563641, - "grad_norm": 0.0011814262252300978, - "learning_rate": 0.00019999712618238682, - "loss": 46.0, - "step": 31573 - }, - { - "epoch": 2.4140527935470306, - "grad_norm": 0.0007734876126050949, - "learning_rate": 0.0001999971260002844, - "loss": 46.0, - "step": 31574 - }, - { - "epoch": 2.4141292505304204, - "grad_norm": 0.0020652583334594965, - "learning_rate": 0.0001999971258181762, - "loss": 46.0, - "step": 31575 - }, - { - "epoch": 2.41420570751381, - "grad_norm": 0.0021665054373443127, - "learning_rate": 0.00019999712563606225, - "loss": 46.0, - "step": 31576 - }, - { - "epoch": 2.4142821644972, - "grad_norm": 0.0009689588914625347, - "learning_rate": 0.00019999712545394253, - "loss": 46.0, - "step": 31577 - }, - { - "epoch": 2.4143586214805897, - "grad_norm": 0.0005599790019914508, - "learning_rate": 0.000199997125271817, - "loss": 46.0, - "step": 31578 - }, - { - "epoch": 2.4144350784639794, - "grad_norm": 0.0007454440928995609, - "learning_rate": 0.00019999712508968575, - "loss": 46.0, - "step": 31579 - }, - { - "epoch": 2.4145115354473687, - "grad_norm": 0.0014415301848202944, - "learning_rate": 0.0001999971249075487, - "loss": 46.0, - "step": 31580 - }, - { - "epoch": 2.4145879924307585, - "grad_norm": 0.0008488707826472819, - "learning_rate": 0.0001999971247254059, - "loss": 46.0, - "step": 31581 - }, - { - "epoch": 2.4146644494141483, - "grad_norm": 0.0015603253850713372, - "learning_rate": 0.00019999712454325732, - "loss": 46.0, - "step": 31582 - }, - { - "epoch": 2.414740906397538, - "grad_norm": 0.0016115342732518911, - "learning_rate": 0.00019999712436110295, - "loss": 46.0, - "step": 31583 - }, - { - "epoch": 2.414817363380928, - "grad_norm": 0.0008736657910048962, - "learning_rate": 0.00019999712417894286, - "loss": 46.0, - "step": 31584 - }, - { - "epoch": 2.4148938203643175, - "grad_norm": 0.0008387601701542735, - "learning_rate": 0.00019999712399677697, - "loss": 46.0, - "step": 31585 - }, - { - "epoch": 2.4149702773477073, - "grad_norm": 0.0005933020147494972, - "learning_rate": 0.0001999971238146053, - "loss": 46.0, - "step": 31586 - }, - { - "epoch": 2.415046734331097, - "grad_norm": 0.0011197460116818547, - "learning_rate": 0.00019999712363242788, - "loss": 46.0, - "step": 31587 - }, - { - "epoch": 2.415123191314487, - "grad_norm": 0.0008185844053514302, - "learning_rate": 0.0001999971234502447, - "loss": 46.0, - "step": 31588 - }, - { - "epoch": 2.4151996482978766, - "grad_norm": 0.0007765372283756733, - "learning_rate": 0.00019999712326805575, - "loss": 46.0, - "step": 31589 - }, - { - "epoch": 2.415276105281266, - "grad_norm": 0.003065679920837283, - "learning_rate": 0.000199997123085861, - "loss": 46.0, - "step": 31590 - }, - { - "epoch": 2.4153525622646557, - "grad_norm": 0.0013270934578031301, - "learning_rate": 0.00019999712290366052, - "loss": 46.0, - "step": 31591 - }, - { - "epoch": 2.4154290192480454, - "grad_norm": 0.0016827181680127978, - "learning_rate": 0.00019999712272145424, - "loss": 46.0, - "step": 31592 - }, - { - "epoch": 2.415505476231435, - "grad_norm": 0.0006743364501744509, - "learning_rate": 0.0001999971225392422, - "loss": 46.0, - "step": 31593 - }, - { - "epoch": 2.415581933214825, - "grad_norm": 0.0009680672665126622, - "learning_rate": 0.0001999971223570244, - "loss": 46.0, - "step": 31594 - }, - { - "epoch": 2.4156583901982147, - "grad_norm": 0.0009353538043797016, - "learning_rate": 0.0001999971221748008, - "loss": 46.0, - "step": 31595 - }, - { - "epoch": 2.4157348471816045, - "grad_norm": 0.001025684643536806, - "learning_rate": 0.00019999712199257147, - "loss": 46.0, - "step": 31596 - }, - { - "epoch": 2.4158113041649942, - "grad_norm": 0.003344607073813677, - "learning_rate": 0.00019999712181033636, - "loss": 46.0, - "step": 31597 - }, - { - "epoch": 2.415887761148384, - "grad_norm": 0.0011846135603263974, - "learning_rate": 0.00019999712162809547, - "loss": 46.0, - "step": 31598 - }, - { - "epoch": 2.4159642181317738, - "grad_norm": 0.00117380334995687, - "learning_rate": 0.00019999712144584884, - "loss": 46.0, - "step": 31599 - }, - { - "epoch": 2.4160406751151635, - "grad_norm": 0.0008556417305953801, - "learning_rate": 0.0001999971212635964, - "loss": 46.0, - "step": 31600 - }, - { - "epoch": 2.4161171320985533, - "grad_norm": 0.0029806990642100573, - "learning_rate": 0.00019999712108133822, - "loss": 46.0, - "step": 31601 - }, - { - "epoch": 2.4161935890819426, - "grad_norm": 0.0006079701706767082, - "learning_rate": 0.00019999712089907424, - "loss": 46.0, - "step": 31602 - }, - { - "epoch": 2.4162700460653324, - "grad_norm": 0.0009088863735087216, - "learning_rate": 0.0001999971207168045, - "loss": 46.0, - "step": 31603 - }, - { - "epoch": 2.416346503048722, - "grad_norm": 0.001433251309208572, - "learning_rate": 0.00019999712053452902, - "loss": 46.0, - "step": 31604 - }, - { - "epoch": 2.416422960032112, - "grad_norm": 0.0022963511291891336, - "learning_rate": 0.00019999712035224777, - "loss": 46.0, - "step": 31605 - }, - { - "epoch": 2.4164994170155016, - "grad_norm": 0.0035245544277131557, - "learning_rate": 0.0001999971201699607, - "loss": 46.0, - "step": 31606 - }, - { - "epoch": 2.4165758739988914, - "grad_norm": 0.0010022609494626522, - "learning_rate": 0.00019999711998766788, - "loss": 46.0, - "step": 31607 - }, - { - "epoch": 2.416652330982281, - "grad_norm": 0.0012003815500065684, - "learning_rate": 0.0001999971198053693, - "loss": 46.0, - "step": 31608 - }, - { - "epoch": 2.416728787965671, - "grad_norm": 0.0019965777173638344, - "learning_rate": 0.00019999711962306494, - "loss": 46.0, - "step": 31609 - }, - { - "epoch": 2.4168052449490607, - "grad_norm": 0.00037816326948814094, - "learning_rate": 0.00019999711944075486, - "loss": 46.0, - "step": 31610 - }, - { - "epoch": 2.4168817019324504, - "grad_norm": 0.0005169878713786602, - "learning_rate": 0.00019999711925843897, - "loss": 46.0, - "step": 31611 - }, - { - "epoch": 2.4169581589158398, - "grad_norm": 0.0006264549447223544, - "learning_rate": 0.0001999971190761173, - "loss": 46.0, - "step": 31612 - }, - { - "epoch": 2.4170346158992295, - "grad_norm": 0.001726500573568046, - "learning_rate": 0.0001999971188937899, - "loss": 46.0, - "step": 31613 - }, - { - "epoch": 2.4171110728826193, - "grad_norm": 0.0012484944891184568, - "learning_rate": 0.0001999971187114567, - "loss": 46.0, - "step": 31614 - }, - { - "epoch": 2.417187529866009, - "grad_norm": 0.00048276971210725605, - "learning_rate": 0.00019999711852911774, - "loss": 46.0, - "step": 31615 - }, - { - "epoch": 2.417263986849399, - "grad_norm": 0.00045784362009726465, - "learning_rate": 0.000199997118346773, - "loss": 46.0, - "step": 31616 - }, - { - "epoch": 2.4173404438327886, - "grad_norm": 0.001244532992132008, - "learning_rate": 0.0001999971181644225, - "loss": 46.0, - "step": 31617 - }, - { - "epoch": 2.4174169008161783, - "grad_norm": 0.0007591007743030787, - "learning_rate": 0.00019999711798206624, - "loss": 46.0, - "step": 31618 - }, - { - "epoch": 2.417493357799568, - "grad_norm": 0.000927189365029335, - "learning_rate": 0.0001999971177997042, - "loss": 46.0, - "step": 31619 - }, - { - "epoch": 2.417569814782958, - "grad_norm": 0.0026697805151343346, - "learning_rate": 0.0001999971176173364, - "loss": 46.0, - "step": 31620 - }, - { - "epoch": 2.4176462717663476, - "grad_norm": 0.0009276054333895445, - "learning_rate": 0.0001999971174349628, - "loss": 46.0, - "step": 31621 - }, - { - "epoch": 2.4177227287497374, - "grad_norm": 0.0027608326636254787, - "learning_rate": 0.00019999711725258345, - "loss": 46.0, - "step": 31622 - }, - { - "epoch": 2.417799185733127, - "grad_norm": 0.002007543807849288, - "learning_rate": 0.00019999711707019833, - "loss": 46.0, - "step": 31623 - }, - { - "epoch": 2.4178756427165164, - "grad_norm": 0.0006276968051679432, - "learning_rate": 0.00019999711688780745, - "loss": 46.0, - "step": 31624 - }, - { - "epoch": 2.417952099699906, - "grad_norm": 0.0011470308527350426, - "learning_rate": 0.0001999971167054108, - "loss": 46.0, - "step": 31625 - }, - { - "epoch": 2.418028556683296, - "grad_norm": 0.00213835877366364, - "learning_rate": 0.00019999711652300835, - "loss": 46.0, - "step": 31626 - }, - { - "epoch": 2.4181050136666857, - "grad_norm": 0.0007017204770818353, - "learning_rate": 0.00019999711634060017, - "loss": 46.0, - "step": 31627 - }, - { - "epoch": 2.4181814706500755, - "grad_norm": 0.0052144769579172134, - "learning_rate": 0.00019999711615818622, - "loss": 46.0, - "step": 31628 - }, - { - "epoch": 2.4182579276334653, - "grad_norm": 0.0019888682290911674, - "learning_rate": 0.0001999971159757665, - "loss": 46.0, - "step": 31629 - }, - { - "epoch": 2.418334384616855, - "grad_norm": 0.00046531064435839653, - "learning_rate": 0.00019999711579334097, - "loss": 46.0, - "step": 31630 - }, - { - "epoch": 2.4184108416002448, - "grad_norm": 0.0008483295678161085, - "learning_rate": 0.0001999971156109097, - "loss": 46.0, - "step": 31631 - }, - { - "epoch": 2.4184872985836345, - "grad_norm": 0.004493400454521179, - "learning_rate": 0.00019999711542847268, - "loss": 46.0, - "step": 31632 - }, - { - "epoch": 2.418563755567024, - "grad_norm": 0.0008407704881392419, - "learning_rate": 0.00019999711524602986, - "loss": 46.0, - "step": 31633 - }, - { - "epoch": 2.4186402125504136, - "grad_norm": 0.0018366235308349133, - "learning_rate": 0.00019999711506358127, - "loss": 46.0, - "step": 31634 - }, - { - "epoch": 2.4187166695338034, - "grad_norm": 0.0008160322322510183, - "learning_rate": 0.0001999971148811269, - "loss": 46.0, - "step": 31635 - }, - { - "epoch": 2.418793126517193, - "grad_norm": 0.0033823465928435326, - "learning_rate": 0.00019999711469866681, - "loss": 46.0, - "step": 31636 - }, - { - "epoch": 2.418869583500583, - "grad_norm": 0.0017869259463623166, - "learning_rate": 0.0001999971145162009, - "loss": 46.0, - "step": 31637 - }, - { - "epoch": 2.4189460404839727, - "grad_norm": 0.0011285413056612015, - "learning_rate": 0.00019999711433372927, - "loss": 46.0, - "step": 31638 - }, - { - "epoch": 2.4190224974673624, - "grad_norm": 0.0016802398022264242, - "learning_rate": 0.00019999711415125184, - "loss": 46.0, - "step": 31639 - }, - { - "epoch": 2.419098954450752, - "grad_norm": 0.001447243383154273, - "learning_rate": 0.00019999711396876863, - "loss": 46.0, - "step": 31640 - }, - { - "epoch": 2.419175411434142, - "grad_norm": 0.0010327561758458614, - "learning_rate": 0.00019999711378627968, - "loss": 46.0, - "step": 31641 - }, - { - "epoch": 2.4192518684175317, - "grad_norm": 0.0011783323716372252, - "learning_rate": 0.00019999711360378495, - "loss": 46.0, - "step": 31642 - }, - { - "epoch": 2.4193283254009215, - "grad_norm": 0.0013162004761397839, - "learning_rate": 0.00019999711342128445, - "loss": 46.0, - "step": 31643 - }, - { - "epoch": 2.4194047823843112, - "grad_norm": 0.0014117922401055694, - "learning_rate": 0.00019999711323877818, - "loss": 46.0, - "step": 31644 - }, - { - "epoch": 2.419481239367701, - "grad_norm": 0.0011919975513592362, - "learning_rate": 0.0001999971130562661, - "loss": 46.0, - "step": 31645 - }, - { - "epoch": 2.4195576963510903, - "grad_norm": 0.0003853417874779552, - "learning_rate": 0.00019999711287374831, - "loss": 46.0, - "step": 31646 - }, - { - "epoch": 2.41963415333448, - "grad_norm": 0.002028072252869606, - "learning_rate": 0.00019999711269122475, - "loss": 46.0, - "step": 31647 - }, - { - "epoch": 2.41971061031787, - "grad_norm": 0.0008285519434139132, - "learning_rate": 0.00019999711250869538, - "loss": 46.0, - "step": 31648 - }, - { - "epoch": 2.4197870673012596, - "grad_norm": 0.002793597988784313, - "learning_rate": 0.00019999711232616027, - "loss": 46.0, - "step": 31649 - }, - { - "epoch": 2.4198635242846493, - "grad_norm": 0.003838328877463937, - "learning_rate": 0.00019999711214361939, - "loss": 46.0, - "step": 31650 - }, - { - "epoch": 2.419939981268039, - "grad_norm": 0.0007785335765220225, - "learning_rate": 0.00019999711196107273, - "loss": 46.0, - "step": 31651 - }, - { - "epoch": 2.420016438251429, - "grad_norm": 0.0006616014870814979, - "learning_rate": 0.0001999971117785203, - "loss": 46.0, - "step": 31652 - }, - { - "epoch": 2.4200928952348186, - "grad_norm": 0.0004947613342665136, - "learning_rate": 0.00019999711159596212, - "loss": 46.0, - "step": 31653 - }, - { - "epoch": 2.4201693522182084, - "grad_norm": 0.0010030142730101943, - "learning_rate": 0.00019999711141339814, - "loss": 46.0, - "step": 31654 - }, - { - "epoch": 2.4202458092015977, - "grad_norm": 0.00097366125555709, - "learning_rate": 0.0001999971112308284, - "loss": 46.0, - "step": 31655 - }, - { - "epoch": 2.4203222661849875, - "grad_norm": 0.00045676305308006704, - "learning_rate": 0.00019999711104825289, - "loss": 46.0, - "step": 31656 - }, - { - "epoch": 2.4203987231683772, - "grad_norm": 0.001204445376060903, - "learning_rate": 0.00019999711086567164, - "loss": 46.0, - "step": 31657 - }, - { - "epoch": 2.420475180151767, - "grad_norm": 0.005395912565290928, - "learning_rate": 0.0001999971106830846, - "loss": 46.0, - "step": 31658 - }, - { - "epoch": 2.4205516371351568, - "grad_norm": 0.001471759402193129, - "learning_rate": 0.00019999711050049178, - "loss": 46.0, - "step": 31659 - }, - { - "epoch": 2.4206280941185465, - "grad_norm": 0.0007064960664138198, - "learning_rate": 0.0001999971103178932, - "loss": 46.0, - "step": 31660 - }, - { - "epoch": 2.4207045511019363, - "grad_norm": 0.0021935170516371727, - "learning_rate": 0.00019999711013528884, - "loss": 46.0, - "step": 31661 - }, - { - "epoch": 2.420781008085326, - "grad_norm": 0.003960418980568647, - "learning_rate": 0.00019999710995267873, - "loss": 46.0, - "step": 31662 - }, - { - "epoch": 2.420857465068716, - "grad_norm": 0.0005817844066768885, - "learning_rate": 0.00019999710977006285, - "loss": 46.0, - "step": 31663 - }, - { - "epoch": 2.4209339220521056, - "grad_norm": 0.0022377900313585997, - "learning_rate": 0.0001999971095874412, - "loss": 46.0, - "step": 31664 - }, - { - "epoch": 2.4210103790354953, - "grad_norm": 0.0008167008636519313, - "learning_rate": 0.00019999710940481376, - "loss": 46.0, - "step": 31665 - }, - { - "epoch": 2.421086836018885, - "grad_norm": 0.0031989584676921368, - "learning_rate": 0.00019999710922218055, - "loss": 46.0, - "step": 31666 - }, - { - "epoch": 2.421163293002275, - "grad_norm": 0.00047056740731932223, - "learning_rate": 0.00019999710903954157, - "loss": 46.0, - "step": 31667 - }, - { - "epoch": 2.421239749985664, - "grad_norm": 0.0010123159736394882, - "learning_rate": 0.00019999710885689687, - "loss": 46.0, - "step": 31668 - }, - { - "epoch": 2.421316206969054, - "grad_norm": 0.0006796225789003074, - "learning_rate": 0.00019999710867424635, - "loss": 46.0, - "step": 31669 - }, - { - "epoch": 2.4213926639524437, - "grad_norm": 0.0011805481044575572, - "learning_rate": 0.00019999710849159008, - "loss": 46.0, - "step": 31670 - }, - { - "epoch": 2.4214691209358334, - "grad_norm": 0.0012977055739611387, - "learning_rate": 0.00019999710830892803, - "loss": 46.0, - "step": 31671 - }, - { - "epoch": 2.421545577919223, - "grad_norm": 0.0007809889502823353, - "learning_rate": 0.0001999971081262602, - "loss": 46.0, - "step": 31672 - }, - { - "epoch": 2.421622034902613, - "grad_norm": 0.0019299163250252604, - "learning_rate": 0.00019999710794358665, - "loss": 46.0, - "step": 31673 - }, - { - "epoch": 2.4216984918860027, - "grad_norm": 0.0006584340590052307, - "learning_rate": 0.0001999971077609073, - "loss": 46.0, - "step": 31674 - }, - { - "epoch": 2.4217749488693925, - "grad_norm": 0.0007480731001123786, - "learning_rate": 0.00019999710757822217, - "loss": 46.0, - "step": 31675 - }, - { - "epoch": 2.4218514058527822, - "grad_norm": 0.002124340273439884, - "learning_rate": 0.0001999971073955313, - "loss": 46.0, - "step": 31676 - }, - { - "epoch": 2.4219278628361716, - "grad_norm": 0.004308842122554779, - "learning_rate": 0.00019999710721283463, - "loss": 46.0, - "step": 31677 - }, - { - "epoch": 2.4220043198195613, - "grad_norm": 0.0022425497882068157, - "learning_rate": 0.0001999971070301322, - "loss": 46.0, - "step": 31678 - }, - { - "epoch": 2.422080776802951, - "grad_norm": 0.0007315670954994857, - "learning_rate": 0.00019999710684742402, - "loss": 46.0, - "step": 31679 - }, - { - "epoch": 2.422157233786341, - "grad_norm": 0.0017121689161285758, - "learning_rate": 0.00019999710666471004, - "loss": 46.0, - "step": 31680 - }, - { - "epoch": 2.4222336907697306, - "grad_norm": 0.0007860715268179774, - "learning_rate": 0.0001999971064819903, - "loss": 46.0, - "step": 31681 - }, - { - "epoch": 2.4223101477531204, - "grad_norm": 0.00105209369212389, - "learning_rate": 0.0001999971062992648, - "loss": 46.0, - "step": 31682 - }, - { - "epoch": 2.42238660473651, - "grad_norm": 0.0011122459545731544, - "learning_rate": 0.00019999710611653352, - "loss": 46.0, - "step": 31683 - }, - { - "epoch": 2.4224630617199, - "grad_norm": 0.0007105217082425952, - "learning_rate": 0.00019999710593379647, - "loss": 46.0, - "step": 31684 - }, - { - "epoch": 2.4225395187032897, - "grad_norm": 0.0014697462320327759, - "learning_rate": 0.00019999710575105366, - "loss": 46.0, - "step": 31685 - }, - { - "epoch": 2.4226159756866794, - "grad_norm": 0.003164944937452674, - "learning_rate": 0.00019999710556830506, - "loss": 46.0, - "step": 31686 - }, - { - "epoch": 2.422692432670069, - "grad_norm": 0.0006941268220543861, - "learning_rate": 0.00019999710538555073, - "loss": 46.0, - "step": 31687 - }, - { - "epoch": 2.422768889653459, - "grad_norm": 0.0008146260515786707, - "learning_rate": 0.00019999710520279061, - "loss": 46.0, - "step": 31688 - }, - { - "epoch": 2.4228453466368483, - "grad_norm": 0.0024288722779601812, - "learning_rate": 0.0001999971050200247, - "loss": 46.0, - "step": 31689 - }, - { - "epoch": 2.422921803620238, - "grad_norm": 0.0005013248883187771, - "learning_rate": 0.00019999710483725304, - "loss": 46.0, - "step": 31690 - }, - { - "epoch": 2.4229982606036278, - "grad_norm": 0.0020023786928504705, - "learning_rate": 0.00019999710465447564, - "loss": 46.0, - "step": 31691 - }, - { - "epoch": 2.4230747175870175, - "grad_norm": 0.0013715730747208, - "learning_rate": 0.00019999710447169244, - "loss": 46.0, - "step": 31692 - }, - { - "epoch": 2.4231511745704073, - "grad_norm": 0.03180105984210968, - "learning_rate": 0.00019999710428890346, - "loss": 46.0, - "step": 31693 - }, - { - "epoch": 2.423227631553797, - "grad_norm": 0.0007216735975816846, - "learning_rate": 0.00019999710410610873, - "loss": 46.0, - "step": 31694 - }, - { - "epoch": 2.423304088537187, - "grad_norm": 0.0008931442280299962, - "learning_rate": 0.0001999971039233082, - "loss": 46.0, - "step": 31695 - }, - { - "epoch": 2.4233805455205766, - "grad_norm": 0.002911068731918931, - "learning_rate": 0.00019999710374050196, - "loss": 46.0, - "step": 31696 - }, - { - "epoch": 2.4234570025039663, - "grad_norm": 0.0006665420951321721, - "learning_rate": 0.0001999971035576899, - "loss": 46.0, - "step": 31697 - }, - { - "epoch": 2.423533459487356, - "grad_norm": 0.0019252111669629812, - "learning_rate": 0.00019999710337487207, - "loss": 46.0, - "step": 31698 - }, - { - "epoch": 2.4236099164707454, - "grad_norm": 0.001246387604624033, - "learning_rate": 0.00019999710319204848, - "loss": 46.0, - "step": 31699 - }, - { - "epoch": 2.423686373454135, - "grad_norm": 0.0019187004072591662, - "learning_rate": 0.00019999710300921915, - "loss": 46.0, - "step": 31700 - }, - { - "epoch": 2.423762830437525, - "grad_norm": 0.0011649333173409104, - "learning_rate": 0.000199997102826384, - "loss": 46.0, - "step": 31701 - }, - { - "epoch": 2.4238392874209147, - "grad_norm": 0.005542614497244358, - "learning_rate": 0.00019999710264354312, - "loss": 46.0, - "step": 31702 - }, - { - "epoch": 2.4239157444043045, - "grad_norm": 0.0004525418335106224, - "learning_rate": 0.00019999710246069647, - "loss": 46.0, - "step": 31703 - }, - { - "epoch": 2.4239922013876942, - "grad_norm": 0.0006581030320376158, - "learning_rate": 0.00019999710227784406, - "loss": 46.0, - "step": 31704 - }, - { - "epoch": 2.424068658371084, - "grad_norm": 0.0010272247018292546, - "learning_rate": 0.00019999710209498586, - "loss": 46.0, - "step": 31705 - }, - { - "epoch": 2.4241451153544737, - "grad_norm": 0.001188663300126791, - "learning_rate": 0.00019999710191212188, - "loss": 46.0, - "step": 31706 - }, - { - "epoch": 2.4242215723378635, - "grad_norm": 0.0005571183864958584, - "learning_rate": 0.00019999710172925215, - "loss": 46.0, - "step": 31707 - }, - { - "epoch": 2.4242980293212533, - "grad_norm": 0.0007985879783518612, - "learning_rate": 0.00019999710154637663, - "loss": 46.0, - "step": 31708 - }, - { - "epoch": 2.424374486304643, - "grad_norm": 0.0010718220146372914, - "learning_rate": 0.00019999710136349536, - "loss": 46.0, - "step": 31709 - }, - { - "epoch": 2.424450943288033, - "grad_norm": 0.0004937925841659307, - "learning_rate": 0.00019999710118060831, - "loss": 46.0, - "step": 31710 - }, - { - "epoch": 2.424527400271422, - "grad_norm": 0.0007205493748188019, - "learning_rate": 0.00019999710099771552, - "loss": 46.0, - "step": 31711 - }, - { - "epoch": 2.424603857254812, - "grad_norm": 0.0011248005321249366, - "learning_rate": 0.00019999710081481693, - "loss": 46.0, - "step": 31712 - }, - { - "epoch": 2.4246803142382016, - "grad_norm": 0.0012535803252831101, - "learning_rate": 0.00019999710063191257, - "loss": 46.0, - "step": 31713 - }, - { - "epoch": 2.4247567712215914, - "grad_norm": 0.001577549846842885, - "learning_rate": 0.00019999710044900243, - "loss": 46.0, - "step": 31714 - }, - { - "epoch": 2.424833228204981, - "grad_norm": 0.002583804540336132, - "learning_rate": 0.00019999710026608655, - "loss": 46.0, - "step": 31715 - }, - { - "epoch": 2.424909685188371, - "grad_norm": 0.0013151037273928523, - "learning_rate": 0.0001999971000831649, - "loss": 46.0, - "step": 31716 - }, - { - "epoch": 2.4249861421717607, - "grad_norm": 0.0008083076681941748, - "learning_rate": 0.00019999709990023746, - "loss": 46.0, - "step": 31717 - }, - { - "epoch": 2.4250625991551504, - "grad_norm": 0.0055492958053946495, - "learning_rate": 0.00019999709971730428, - "loss": 46.0, - "step": 31718 - }, - { - "epoch": 2.42513905613854, - "grad_norm": 0.0009085440542548895, - "learning_rate": 0.0001999970995343653, - "loss": 46.0, - "step": 31719 - }, - { - "epoch": 2.42521551312193, - "grad_norm": 0.0007682053255848587, - "learning_rate": 0.00019999709935142055, - "loss": 46.0, - "step": 31720 - }, - { - "epoch": 2.4252919701053193, - "grad_norm": 0.0011408563004806638, - "learning_rate": 0.00019999709916847006, - "loss": 46.0, - "step": 31721 - }, - { - "epoch": 2.425368427088709, - "grad_norm": 0.003419312182813883, - "learning_rate": 0.00019999709898551379, - "loss": 46.0, - "step": 31722 - }, - { - "epoch": 2.425444884072099, - "grad_norm": 0.0007048809784464538, - "learning_rate": 0.00019999709880255174, - "loss": 46.0, - "step": 31723 - }, - { - "epoch": 2.4255213410554886, - "grad_norm": 0.001151367207057774, - "learning_rate": 0.00019999709861958392, - "loss": 46.0, - "step": 31724 - }, - { - "epoch": 2.4255977980388783, - "grad_norm": 0.0008718199096620083, - "learning_rate": 0.0001999970984366103, - "loss": 46.0, - "step": 31725 - }, - { - "epoch": 2.425674255022268, - "grad_norm": 0.001035741763189435, - "learning_rate": 0.00019999709825363097, - "loss": 46.0, - "step": 31726 - }, - { - "epoch": 2.425750712005658, - "grad_norm": 0.0016902287025004625, - "learning_rate": 0.00019999709807064586, - "loss": 46.0, - "step": 31727 - }, - { - "epoch": 2.4258271689890476, - "grad_norm": 0.007216343656182289, - "learning_rate": 0.00019999709788765495, - "loss": 46.0, - "step": 31728 - }, - { - "epoch": 2.4259036259724374, - "grad_norm": 0.0051716906018555164, - "learning_rate": 0.00019999709770465832, - "loss": 46.0, - "step": 31729 - }, - { - "epoch": 2.425980082955827, - "grad_norm": 0.0012299424270167947, - "learning_rate": 0.00019999709752165586, - "loss": 46.0, - "step": 31730 - }, - { - "epoch": 2.426056539939217, - "grad_norm": 0.0009762330446392298, - "learning_rate": 0.00019999709733864766, - "loss": 46.0, - "step": 31731 - }, - { - "epoch": 2.4261329969226066, - "grad_norm": 0.000712329987436533, - "learning_rate": 0.0001999970971556337, - "loss": 46.0, - "step": 31732 - }, - { - "epoch": 2.426209453905996, - "grad_norm": 0.0041829003021121025, - "learning_rate": 0.00019999709697261398, - "loss": 46.0, - "step": 31733 - }, - { - "epoch": 2.4262859108893857, - "grad_norm": 0.0012989091919735074, - "learning_rate": 0.00019999709678958843, - "loss": 46.0, - "step": 31734 - }, - { - "epoch": 2.4263623678727755, - "grad_norm": 0.001802045269869268, - "learning_rate": 0.0001999970966065572, - "loss": 46.0, - "step": 31735 - }, - { - "epoch": 2.4264388248561652, - "grad_norm": 0.004469194449484348, - "learning_rate": 0.00019999709642352015, - "loss": 46.0, - "step": 31736 - }, - { - "epoch": 2.426515281839555, - "grad_norm": 0.0012777147348970175, - "learning_rate": 0.0001999970962404773, - "loss": 46.0, - "step": 31737 - }, - { - "epoch": 2.4265917388229448, - "grad_norm": 0.0007960747461766005, - "learning_rate": 0.0001999970960574287, - "loss": 46.0, - "step": 31738 - }, - { - "epoch": 2.4266681958063345, - "grad_norm": 0.0008298384491354227, - "learning_rate": 0.00019999709587437437, - "loss": 46.0, - "step": 31739 - }, - { - "epoch": 2.4267446527897243, - "grad_norm": 0.0012634429149329662, - "learning_rate": 0.00019999709569131424, - "loss": 46.0, - "step": 31740 - }, - { - "epoch": 2.426821109773114, - "grad_norm": 0.0007753687677904963, - "learning_rate": 0.00019999709550824835, - "loss": 46.0, - "step": 31741 - }, - { - "epoch": 2.426897566756504, - "grad_norm": 0.0021268592681735754, - "learning_rate": 0.00019999709532517667, - "loss": 46.0, - "step": 31742 - }, - { - "epoch": 2.426974023739893, - "grad_norm": 0.004711328539997339, - "learning_rate": 0.00019999709514209927, - "loss": 46.0, - "step": 31743 - }, - { - "epoch": 2.427050480723283, - "grad_norm": 0.0008751682471483946, - "learning_rate": 0.00019999709495901606, - "loss": 46.0, - "step": 31744 - }, - { - "epoch": 2.4271269377066726, - "grad_norm": 0.001242566853761673, - "learning_rate": 0.00019999709477592709, - "loss": 46.0, - "step": 31745 - }, - { - "epoch": 2.4272033946900624, - "grad_norm": 0.0014638626016676426, - "learning_rate": 0.00019999709459283236, - "loss": 46.0, - "step": 31746 - }, - { - "epoch": 2.427279851673452, - "grad_norm": 0.0010472440626472235, - "learning_rate": 0.00019999709440973184, - "loss": 46.0, - "step": 31747 - }, - { - "epoch": 2.427356308656842, - "grad_norm": 0.0005147071206010878, - "learning_rate": 0.00019999709422662557, - "loss": 46.0, - "step": 31748 - }, - { - "epoch": 2.4274327656402317, - "grad_norm": 0.0014069130411371589, - "learning_rate": 0.0001999970940435135, - "loss": 46.0, - "step": 31749 - }, - { - "epoch": 2.4275092226236215, - "grad_norm": 0.0008542793802917004, - "learning_rate": 0.0001999970938603957, - "loss": 46.0, - "step": 31750 - }, - { - "epoch": 2.427585679607011, - "grad_norm": 0.001664007198996842, - "learning_rate": 0.00019999709367727212, - "loss": 46.0, - "step": 31751 - }, - { - "epoch": 2.427662136590401, - "grad_norm": 0.0010284207528457046, - "learning_rate": 0.00019999709349414278, - "loss": 46.0, - "step": 31752 - }, - { - "epoch": 2.4277385935737907, - "grad_norm": 0.0014334612060338259, - "learning_rate": 0.00019999709331100762, - "loss": 46.0, - "step": 31753 - }, - { - "epoch": 2.4278150505571805, - "grad_norm": 0.0014943047426640987, - "learning_rate": 0.00019999709312786674, - "loss": 46.0, - "step": 31754 - }, - { - "epoch": 2.42789150754057, - "grad_norm": 0.0010804033372551203, - "learning_rate": 0.00019999709294472008, - "loss": 46.0, - "step": 31755 - }, - { - "epoch": 2.4279679645239596, - "grad_norm": 0.0009538778685964644, - "learning_rate": 0.00019999709276156765, - "loss": 46.0, - "step": 31756 - }, - { - "epoch": 2.4280444215073493, - "grad_norm": 0.0008481864933855832, - "learning_rate": 0.00019999709257840945, - "loss": 46.0, - "step": 31757 - }, - { - "epoch": 2.428120878490739, - "grad_norm": 0.005737654864788055, - "learning_rate": 0.00019999709239524547, - "loss": 46.0, - "step": 31758 - }, - { - "epoch": 2.428197335474129, - "grad_norm": 0.003212561132386327, - "learning_rate": 0.00019999709221207575, - "loss": 46.0, - "step": 31759 - }, - { - "epoch": 2.4282737924575186, - "grad_norm": 0.004905946087092161, - "learning_rate": 0.00019999709202890022, - "loss": 46.0, - "step": 31760 - }, - { - "epoch": 2.4283502494409084, - "grad_norm": 0.0010980576043948531, - "learning_rate": 0.00019999709184571893, - "loss": 46.0, - "step": 31761 - }, - { - "epoch": 2.428426706424298, - "grad_norm": 0.0005742415669374168, - "learning_rate": 0.0001999970916625319, - "loss": 46.0, - "step": 31762 - }, - { - "epoch": 2.428503163407688, - "grad_norm": 0.0009037034469656646, - "learning_rate": 0.00019999709147933907, - "loss": 46.0, - "step": 31763 - }, - { - "epoch": 2.428579620391077, - "grad_norm": 0.0034793277736753225, - "learning_rate": 0.0001999970912961405, - "loss": 46.0, - "step": 31764 - }, - { - "epoch": 2.428656077374467, - "grad_norm": 0.0029805137310177088, - "learning_rate": 0.00019999709111293611, - "loss": 46.0, - "step": 31765 - }, - { - "epoch": 2.4287325343578567, - "grad_norm": 0.0028320078272372484, - "learning_rate": 0.000199997090929726, - "loss": 46.0, - "step": 31766 - }, - { - "epoch": 2.4288089913412465, - "grad_norm": 0.0004579621599987149, - "learning_rate": 0.00019999709074651012, - "loss": 46.0, - "step": 31767 - }, - { - "epoch": 2.4288854483246363, - "grad_norm": 0.002480153925716877, - "learning_rate": 0.00019999709056328847, - "loss": 46.0, - "step": 31768 - }, - { - "epoch": 2.428961905308026, - "grad_norm": 0.0014389334246516228, - "learning_rate": 0.00019999709038006104, - "loss": 46.0, - "step": 31769 - }, - { - "epoch": 2.429038362291416, - "grad_norm": 0.0009162189671769738, - "learning_rate": 0.0001999970901968278, - "loss": 46.0, - "step": 31770 - }, - { - "epoch": 2.4291148192748055, - "grad_norm": 0.001437582541257143, - "learning_rate": 0.00019999709001358883, - "loss": 46.0, - "step": 31771 - }, - { - "epoch": 2.4291912762581953, - "grad_norm": 0.0005465935682877898, - "learning_rate": 0.0001999970898303441, - "loss": 46.0, - "step": 31772 - }, - { - "epoch": 2.429267733241585, - "grad_norm": 0.0018559836316853762, - "learning_rate": 0.0001999970896470936, - "loss": 46.0, - "step": 31773 - }, - { - "epoch": 2.429344190224975, - "grad_norm": 0.0007934711175039411, - "learning_rate": 0.00019999708946383732, - "loss": 46.0, - "step": 31774 - }, - { - "epoch": 2.4294206472083646, - "grad_norm": 0.0009905733168125153, - "learning_rate": 0.00019999708928057527, - "loss": 46.0, - "step": 31775 - }, - { - "epoch": 2.4294971041917544, - "grad_norm": 0.00230024172924459, - "learning_rate": 0.00019999708909730743, - "loss": 46.0, - "step": 31776 - }, - { - "epoch": 2.4295735611751437, - "grad_norm": 0.0014973955694586039, - "learning_rate": 0.00019999708891403387, - "loss": 46.0, - "step": 31777 - }, - { - "epoch": 2.4296500181585334, - "grad_norm": 0.0024959947913885117, - "learning_rate": 0.0001999970887307545, - "loss": 46.0, - "step": 31778 - }, - { - "epoch": 2.429726475141923, - "grad_norm": 0.012260590679943562, - "learning_rate": 0.0001999970885474694, - "loss": 46.0, - "step": 31779 - }, - { - "epoch": 2.429802932125313, - "grad_norm": 0.0025973531883209944, - "learning_rate": 0.00019999708836417849, - "loss": 46.0, - "step": 31780 - }, - { - "epoch": 2.4298793891087027, - "grad_norm": 0.001155383768491447, - "learning_rate": 0.00019999708818088183, - "loss": 46.0, - "step": 31781 - }, - { - "epoch": 2.4299558460920925, - "grad_norm": 0.0009758244268596172, - "learning_rate": 0.0001999970879975794, - "loss": 46.0, - "step": 31782 - }, - { - "epoch": 2.4300323030754822, - "grad_norm": 0.0007594484486617148, - "learning_rate": 0.00019999708781427117, - "loss": 46.0, - "step": 31783 - }, - { - "epoch": 2.430108760058872, - "grad_norm": 0.0009277135832235217, - "learning_rate": 0.00019999708763095722, - "loss": 46.0, - "step": 31784 - }, - { - "epoch": 2.4301852170422618, - "grad_norm": 0.003238823963329196, - "learning_rate": 0.00019999708744763747, - "loss": 46.0, - "step": 31785 - }, - { - "epoch": 2.430261674025651, - "grad_norm": 0.0010456267045810819, - "learning_rate": 0.00019999708726431195, - "loss": 46.0, - "step": 31786 - }, - { - "epoch": 2.430338131009041, - "grad_norm": 0.0018340791575610638, - "learning_rate": 0.00019999708708098068, - "loss": 46.0, - "step": 31787 - }, - { - "epoch": 2.4304145879924306, - "grad_norm": 0.006502361968159676, - "learning_rate": 0.00019999708689764364, - "loss": 46.0, - "step": 31788 - }, - { - "epoch": 2.4304910449758204, - "grad_norm": 0.0012338829692453146, - "learning_rate": 0.0001999970867143008, - "loss": 46.0, - "step": 31789 - }, - { - "epoch": 2.43056750195921, - "grad_norm": 0.0012245511170476675, - "learning_rate": 0.0001999970865309522, - "loss": 46.0, - "step": 31790 - }, - { - "epoch": 2.4306439589426, - "grad_norm": 0.0005599466385319829, - "learning_rate": 0.00019999708634759787, - "loss": 46.0, - "step": 31791 - }, - { - "epoch": 2.4307204159259896, - "grad_norm": 0.001547261024825275, - "learning_rate": 0.00019999708616423774, - "loss": 46.0, - "step": 31792 - }, - { - "epoch": 2.4307968729093794, - "grad_norm": 0.002378972480073571, - "learning_rate": 0.00019999708598087183, - "loss": 46.0, - "step": 31793 - }, - { - "epoch": 2.430873329892769, - "grad_norm": 0.0019048320828005672, - "learning_rate": 0.00019999708579750017, - "loss": 46.0, - "step": 31794 - }, - { - "epoch": 2.430949786876159, - "grad_norm": 0.0014762452337890863, - "learning_rate": 0.00019999708561412274, - "loss": 46.0, - "step": 31795 - }, - { - "epoch": 2.4310262438595487, - "grad_norm": 0.0006561970221810043, - "learning_rate": 0.00019999708543073954, - "loss": 46.0, - "step": 31796 - }, - { - "epoch": 2.4311027008429384, - "grad_norm": 0.008589284494519234, - "learning_rate": 0.00019999708524735056, - "loss": 46.0, - "step": 31797 - }, - { - "epoch": 2.431179157826328, - "grad_norm": 0.001867231447249651, - "learning_rate": 0.00019999708506395581, - "loss": 46.0, - "step": 31798 - }, - { - "epoch": 2.4312556148097175, - "grad_norm": 0.0009940499439835548, - "learning_rate": 0.00019999708488055532, - "loss": 46.0, - "step": 31799 - }, - { - "epoch": 2.4313320717931073, - "grad_norm": 0.0003697279025800526, - "learning_rate": 0.00019999708469714902, - "loss": 46.0, - "step": 31800 - }, - { - "epoch": 2.431408528776497, - "grad_norm": 0.000913189840503037, - "learning_rate": 0.00019999708451373698, - "loss": 46.0, - "step": 31801 - }, - { - "epoch": 2.431484985759887, - "grad_norm": 0.00109881442040205, - "learning_rate": 0.00019999708433031917, - "loss": 46.0, - "step": 31802 - }, - { - "epoch": 2.4315614427432766, - "grad_norm": 0.0009672069572843611, - "learning_rate": 0.00019999708414689558, - "loss": 46.0, - "step": 31803 - }, - { - "epoch": 2.4316378997266663, - "grad_norm": 0.0010592021280899644, - "learning_rate": 0.00019999708396346621, - "loss": 46.0, - "step": 31804 - }, - { - "epoch": 2.431714356710056, - "grad_norm": 0.0005268522654660046, - "learning_rate": 0.00019999708378003108, - "loss": 46.0, - "step": 31805 - }, - { - "epoch": 2.431790813693446, - "grad_norm": 0.00042245833901688457, - "learning_rate": 0.00019999708359659017, - "loss": 46.0, - "step": 31806 - }, - { - "epoch": 2.4318672706768356, - "grad_norm": 0.003212742740288377, - "learning_rate": 0.00019999708341314354, - "loss": 46.0, - "step": 31807 - }, - { - "epoch": 2.431943727660225, - "grad_norm": 0.00044132917537353933, - "learning_rate": 0.00019999708322969109, - "loss": 46.0, - "step": 31808 - }, - { - "epoch": 2.4320201846436147, - "grad_norm": 0.0009283123072236776, - "learning_rate": 0.00019999708304623288, - "loss": 46.0, - "step": 31809 - }, - { - "epoch": 2.4320966416270045, - "grad_norm": 0.0013121285010129213, - "learning_rate": 0.0001999970828627689, - "loss": 46.0, - "step": 31810 - }, - { - "epoch": 2.432173098610394, - "grad_norm": 0.0009874447714537382, - "learning_rate": 0.0001999970826792992, - "loss": 46.0, - "step": 31811 - }, - { - "epoch": 2.432249555593784, - "grad_norm": 0.0009543704800307751, - "learning_rate": 0.00019999708249582364, - "loss": 46.0, - "step": 31812 - }, - { - "epoch": 2.4323260125771737, - "grad_norm": 0.0035185085143893957, - "learning_rate": 0.00019999708231234237, - "loss": 46.0, - "step": 31813 - }, - { - "epoch": 2.4324024695605635, - "grad_norm": 0.0032497625797986984, - "learning_rate": 0.0001999970821288553, - "loss": 46.0, - "step": 31814 - }, - { - "epoch": 2.4324789265439533, - "grad_norm": 0.0007659861003048718, - "learning_rate": 0.0001999970819453625, - "loss": 46.0, - "step": 31815 - }, - { - "epoch": 2.432555383527343, - "grad_norm": 0.0014378949999809265, - "learning_rate": 0.00019999708176186392, - "loss": 46.0, - "step": 31816 - }, - { - "epoch": 2.432631840510733, - "grad_norm": 0.008012588135898113, - "learning_rate": 0.00019999708157835956, - "loss": 46.0, - "step": 31817 - }, - { - "epoch": 2.4327082974941225, - "grad_norm": 0.001989668468013406, - "learning_rate": 0.00019999708139484943, - "loss": 46.0, - "step": 31818 - }, - { - "epoch": 2.4327847544775123, - "grad_norm": 0.0018083906034007668, - "learning_rate": 0.00019999708121133352, - "loss": 46.0, - "step": 31819 - }, - { - "epoch": 2.4328612114609016, - "grad_norm": 0.0019533506128937006, - "learning_rate": 0.00019999708102781186, - "loss": 46.0, - "step": 31820 - }, - { - "epoch": 2.4329376684442914, - "grad_norm": 0.0029634260572493076, - "learning_rate": 0.0001999970808442844, - "loss": 46.0, - "step": 31821 - }, - { - "epoch": 2.433014125427681, - "grad_norm": 0.0013277167454361916, - "learning_rate": 0.00019999708066075123, - "loss": 46.0, - "step": 31822 - }, - { - "epoch": 2.433090582411071, - "grad_norm": 0.0011007209541276097, - "learning_rate": 0.00019999708047721226, - "loss": 46.0, - "step": 31823 - }, - { - "epoch": 2.4331670393944607, - "grad_norm": 0.0014727492816746235, - "learning_rate": 0.0001999970802936675, - "loss": 46.0, - "step": 31824 - }, - { - "epoch": 2.4332434963778504, - "grad_norm": 0.0006560039473697543, - "learning_rate": 0.00019999708011011696, - "loss": 46.0, - "step": 31825 - }, - { - "epoch": 2.43331995336124, - "grad_norm": 0.0007435241132043302, - "learning_rate": 0.0001999970799265607, - "loss": 46.0, - "step": 31826 - }, - { - "epoch": 2.43339641034463, - "grad_norm": 0.0004581456014420837, - "learning_rate": 0.00019999707974299865, - "loss": 46.0, - "step": 31827 - }, - { - "epoch": 2.4334728673280197, - "grad_norm": 0.0006350258481688797, - "learning_rate": 0.00019999707955943084, - "loss": 46.0, - "step": 31828 - }, - { - "epoch": 2.4335493243114095, - "grad_norm": 0.0023034026380628347, - "learning_rate": 0.00019999707937585725, - "loss": 46.0, - "step": 31829 - }, - { - "epoch": 2.433625781294799, - "grad_norm": 0.0016607922734692693, - "learning_rate": 0.0001999970791922779, - "loss": 46.0, - "step": 31830 - }, - { - "epoch": 2.4337022382781885, - "grad_norm": 0.001769884373061359, - "learning_rate": 0.00019999707900869275, - "loss": 46.0, - "step": 31831 - }, - { - "epoch": 2.4337786952615783, - "grad_norm": 0.0013325974578037858, - "learning_rate": 0.00019999707882510187, - "loss": 46.0, - "step": 31832 - }, - { - "epoch": 2.433855152244968, - "grad_norm": 0.0011648869840428233, - "learning_rate": 0.0001999970786415052, - "loss": 46.0, - "step": 31833 - }, - { - "epoch": 2.433931609228358, - "grad_norm": 0.000647606560960412, - "learning_rate": 0.00019999707845790273, - "loss": 46.0, - "step": 31834 - }, - { - "epoch": 2.4340080662117476, - "grad_norm": 0.0010726067703217268, - "learning_rate": 0.00019999707827429456, - "loss": 46.0, - "step": 31835 - }, - { - "epoch": 2.4340845231951374, - "grad_norm": 0.0014563523000106215, - "learning_rate": 0.00019999707809068059, - "loss": 46.0, - "step": 31836 - }, - { - "epoch": 2.434160980178527, - "grad_norm": 0.0017075929790735245, - "learning_rate": 0.00019999707790706084, - "loss": 46.0, - "step": 31837 - }, - { - "epoch": 2.434237437161917, - "grad_norm": 0.0021645165979862213, - "learning_rate": 0.00019999707772343532, - "loss": 46.0, - "step": 31838 - }, - { - "epoch": 2.4343138941453066, - "grad_norm": 0.0009498960571363568, - "learning_rate": 0.00019999707753980402, - "loss": 46.0, - "step": 31839 - }, - { - "epoch": 2.4343903511286964, - "grad_norm": 0.002539821667596698, - "learning_rate": 0.00019999707735616698, - "loss": 46.0, - "step": 31840 - }, - { - "epoch": 2.434466808112086, - "grad_norm": 0.0008213865221478045, - "learning_rate": 0.00019999707717252417, - "loss": 46.0, - "step": 31841 - }, - { - "epoch": 2.4345432650954755, - "grad_norm": 0.003667982295155525, - "learning_rate": 0.00019999707698887558, - "loss": 46.0, - "step": 31842 - }, - { - "epoch": 2.4346197220788652, - "grad_norm": 0.0010683451546356082, - "learning_rate": 0.0001999970768052212, - "loss": 46.0, - "step": 31843 - }, - { - "epoch": 2.434696179062255, - "grad_norm": 0.0005103599978610873, - "learning_rate": 0.00019999707662156108, - "loss": 46.0, - "step": 31844 - }, - { - "epoch": 2.4347726360456448, - "grad_norm": 0.0010878884932026267, - "learning_rate": 0.00019999707643789517, - "loss": 46.0, - "step": 31845 - }, - { - "epoch": 2.4348490930290345, - "grad_norm": 0.002139989286661148, - "learning_rate": 0.0001999970762542235, - "loss": 46.0, - "step": 31846 - }, - { - "epoch": 2.4349255500124243, - "grad_norm": 0.0013321478618308902, - "learning_rate": 0.00019999707607054606, - "loss": 46.0, - "step": 31847 - }, - { - "epoch": 2.435002006995814, - "grad_norm": 0.0012961842585355043, - "learning_rate": 0.00019999707588686286, - "loss": 46.0, - "step": 31848 - }, - { - "epoch": 2.435078463979204, - "grad_norm": 0.0013146473793312907, - "learning_rate": 0.0001999970757031739, - "loss": 46.0, - "step": 31849 - }, - { - "epoch": 2.4351549209625936, - "grad_norm": 0.0008391342125833035, - "learning_rate": 0.00019999707551947914, - "loss": 46.0, - "step": 31850 - }, - { - "epoch": 2.4352313779459833, - "grad_norm": 0.0009723032708279788, - "learning_rate": 0.00019999707533577862, - "loss": 46.0, - "step": 31851 - }, - { - "epoch": 2.4353078349293726, - "grad_norm": 0.0024366267025470734, - "learning_rate": 0.00019999707515207233, - "loss": 46.0, - "step": 31852 - }, - { - "epoch": 2.4353842919127624, - "grad_norm": 0.0009454222745262086, - "learning_rate": 0.0001999970749683603, - "loss": 46.0, - "step": 31853 - }, - { - "epoch": 2.435460748896152, - "grad_norm": 0.0012927548959851265, - "learning_rate": 0.00019999707478464247, - "loss": 46.0, - "step": 31854 - }, - { - "epoch": 2.435537205879542, - "grad_norm": 0.0013167349388822913, - "learning_rate": 0.00019999707460091888, - "loss": 46.0, - "step": 31855 - }, - { - "epoch": 2.4356136628629317, - "grad_norm": 0.00020622643933165818, - "learning_rate": 0.00019999707441718952, - "loss": 46.0, - "step": 31856 - }, - { - "epoch": 2.4356901198463214, - "grad_norm": 0.000532140547875315, - "learning_rate": 0.0001999970742334544, - "loss": 46.0, - "step": 31857 - }, - { - "epoch": 2.435766576829711, - "grad_norm": 0.0008245834615081549, - "learning_rate": 0.0001999970740497135, - "loss": 46.0, - "step": 31858 - }, - { - "epoch": 2.435843033813101, - "grad_norm": 0.00235170335508883, - "learning_rate": 0.00019999707386596683, - "loss": 46.0, - "step": 31859 - }, - { - "epoch": 2.4359194907964907, - "grad_norm": 0.0011770181590691209, - "learning_rate": 0.0001999970736822144, - "loss": 46.0, - "step": 31860 - }, - { - "epoch": 2.4359959477798805, - "grad_norm": 0.0007052259752526879, - "learning_rate": 0.00019999707349845617, - "loss": 46.0, - "step": 31861 - }, - { - "epoch": 2.4360724047632702, - "grad_norm": 0.003323481883853674, - "learning_rate": 0.0001999970733146922, - "loss": 46.0, - "step": 31862 - }, - { - "epoch": 2.43614886174666, - "grad_norm": 0.0009671826846897602, - "learning_rate": 0.00019999707313092245, - "loss": 46.0, - "step": 31863 - }, - { - "epoch": 2.4362253187300493, - "grad_norm": 0.0006834216765128076, - "learning_rate": 0.00019999707294714693, - "loss": 46.0, - "step": 31864 - }, - { - "epoch": 2.436301775713439, - "grad_norm": 0.0007340940064750612, - "learning_rate": 0.00019999707276336566, - "loss": 46.0, - "step": 31865 - }, - { - "epoch": 2.436378232696829, - "grad_norm": 0.0011766324751079082, - "learning_rate": 0.0001999970725795786, - "loss": 46.0, - "step": 31866 - }, - { - "epoch": 2.4364546896802186, - "grad_norm": 0.00044746213825419545, - "learning_rate": 0.00019999707239578578, - "loss": 46.0, - "step": 31867 - }, - { - "epoch": 2.4365311466636084, - "grad_norm": 0.0004813310515601188, - "learning_rate": 0.0001999970722119872, - "loss": 46.0, - "step": 31868 - }, - { - "epoch": 2.436607603646998, - "grad_norm": 0.0016076306346803904, - "learning_rate": 0.00019999707202818283, - "loss": 46.0, - "step": 31869 - }, - { - "epoch": 2.436684060630388, - "grad_norm": 0.0016828869702294469, - "learning_rate": 0.0001999970718443727, - "loss": 46.0, - "step": 31870 - }, - { - "epoch": 2.4367605176137777, - "grad_norm": 0.003962460905313492, - "learning_rate": 0.0001999970716605568, - "loss": 46.0, - "step": 31871 - }, - { - "epoch": 2.4368369745971674, - "grad_norm": 0.0010938819032162428, - "learning_rate": 0.0001999970714767351, - "loss": 46.0, - "step": 31872 - }, - { - "epoch": 2.436913431580557, - "grad_norm": 0.0016832476248964667, - "learning_rate": 0.0001999970712929077, - "loss": 46.0, - "step": 31873 - }, - { - "epoch": 2.4369898885639465, - "grad_norm": 0.0013671672204509377, - "learning_rate": 0.00019999707110907448, - "loss": 46.0, - "step": 31874 - }, - { - "epoch": 2.4370663455473363, - "grad_norm": 0.001036601490341127, - "learning_rate": 0.0001999970709252355, - "loss": 46.0, - "step": 31875 - }, - { - "epoch": 2.437142802530726, - "grad_norm": 0.0007080077775754035, - "learning_rate": 0.00019999707074139076, - "loss": 46.0, - "step": 31876 - }, - { - "epoch": 2.4372192595141158, - "grad_norm": 0.0007337298011407256, - "learning_rate": 0.00019999707055754024, - "loss": 46.0, - "step": 31877 - }, - { - "epoch": 2.4372957164975055, - "grad_norm": 0.0007968803984113038, - "learning_rate": 0.00019999707037368395, - "loss": 46.0, - "step": 31878 - }, - { - "epoch": 2.4373721734808953, - "grad_norm": 0.0016598189249634743, - "learning_rate": 0.0001999970701898219, - "loss": 46.0, - "step": 31879 - }, - { - "epoch": 2.437448630464285, - "grad_norm": 0.0009609736152924597, - "learning_rate": 0.0001999970700059541, - "loss": 46.0, - "step": 31880 - }, - { - "epoch": 2.437525087447675, - "grad_norm": 0.0015193286817520857, - "learning_rate": 0.00019999706982208048, - "loss": 46.0, - "step": 31881 - }, - { - "epoch": 2.4376015444310646, - "grad_norm": 0.0011460300302132964, - "learning_rate": 0.00019999706963820112, - "loss": 46.0, - "step": 31882 - }, - { - "epoch": 2.4376780014144543, - "grad_norm": 0.0017795903841033578, - "learning_rate": 0.00019999706945431599, - "loss": 46.0, - "step": 31883 - }, - { - "epoch": 2.437754458397844, - "grad_norm": 0.0004253470688126981, - "learning_rate": 0.00019999706927042508, - "loss": 46.0, - "step": 31884 - }, - { - "epoch": 2.437830915381234, - "grad_norm": 0.0007202128763310611, - "learning_rate": 0.00019999706908652843, - "loss": 46.0, - "step": 31885 - }, - { - "epoch": 2.437907372364623, - "grad_norm": 0.0009381232084706426, - "learning_rate": 0.00019999706890262597, - "loss": 46.0, - "step": 31886 - }, - { - "epoch": 2.437983829348013, - "grad_norm": 0.0008243191405199468, - "learning_rate": 0.00019999706871871777, - "loss": 46.0, - "step": 31887 - }, - { - "epoch": 2.4380602863314027, - "grad_norm": 0.0009223247179761529, - "learning_rate": 0.00019999706853480377, - "loss": 46.0, - "step": 31888 - }, - { - "epoch": 2.4381367433147925, - "grad_norm": 0.0010832598200067878, - "learning_rate": 0.00019999706835088405, - "loss": 46.0, - "step": 31889 - }, - { - "epoch": 2.4382132002981822, - "grad_norm": 0.0009400818380527198, - "learning_rate": 0.0001999970681669585, - "loss": 46.0, - "step": 31890 - }, - { - "epoch": 2.438289657281572, - "grad_norm": 0.001397757907398045, - "learning_rate": 0.00019999706798302724, - "loss": 46.0, - "step": 31891 - }, - { - "epoch": 2.4383661142649617, - "grad_norm": 0.0007155849016271532, - "learning_rate": 0.0001999970677990902, - "loss": 46.0, - "step": 31892 - }, - { - "epoch": 2.4384425712483515, - "grad_norm": 0.0024621887132525444, - "learning_rate": 0.00019999706761514736, - "loss": 46.0, - "step": 31893 - }, - { - "epoch": 2.4385190282317413, - "grad_norm": 0.002815013052895665, - "learning_rate": 0.00019999706743119877, - "loss": 46.0, - "step": 31894 - }, - { - "epoch": 2.438595485215131, - "grad_norm": 0.0024654981680214405, - "learning_rate": 0.0001999970672472444, - "loss": 46.0, - "step": 31895 - }, - { - "epoch": 2.4386719421985203, - "grad_norm": 0.0006057376158423722, - "learning_rate": 0.00019999706706328428, - "loss": 46.0, - "step": 31896 - }, - { - "epoch": 2.43874839918191, - "grad_norm": 0.000602005748078227, - "learning_rate": 0.0001999970668793184, - "loss": 46.0, - "step": 31897 - }, - { - "epoch": 2.4388248561653, - "grad_norm": 0.0019210334867238998, - "learning_rate": 0.0001999970666953467, - "loss": 46.0, - "step": 31898 - }, - { - "epoch": 2.4389013131486896, - "grad_norm": 0.0010675546946004033, - "learning_rate": 0.00019999706651136924, - "loss": 46.0, - "step": 31899 - }, - { - "epoch": 2.4389777701320794, - "grad_norm": 0.0012831983622163534, - "learning_rate": 0.00019999706632738606, - "loss": 46.0, - "step": 31900 - }, - { - "epoch": 2.439054227115469, - "grad_norm": 0.0009948968654498458, - "learning_rate": 0.00019999706614339706, - "loss": 46.0, - "step": 31901 - }, - { - "epoch": 2.439130684098859, - "grad_norm": 0.0006226995028555393, - "learning_rate": 0.00019999706595940232, - "loss": 46.0, - "step": 31902 - }, - { - "epoch": 2.4392071410822487, - "grad_norm": 0.0006625099340453744, - "learning_rate": 0.0001999970657754018, - "loss": 46.0, - "step": 31903 - }, - { - "epoch": 2.4392835980656384, - "grad_norm": 0.0006312231998890638, - "learning_rate": 0.0001999970655913955, - "loss": 46.0, - "step": 31904 - }, - { - "epoch": 2.439360055049028, - "grad_norm": 0.0005924067227169871, - "learning_rate": 0.00019999706540738346, - "loss": 46.0, - "step": 31905 - }, - { - "epoch": 2.439436512032418, - "grad_norm": 0.0010023250943049788, - "learning_rate": 0.00019999706522336562, - "loss": 46.0, - "step": 31906 - }, - { - "epoch": 2.4395129690158077, - "grad_norm": 0.002129745204001665, - "learning_rate": 0.00019999706503934204, - "loss": 46.0, - "step": 31907 - }, - { - "epoch": 2.439589425999197, - "grad_norm": 0.0006177141331136227, - "learning_rate": 0.00019999706485531268, - "loss": 46.0, - "step": 31908 - }, - { - "epoch": 2.439665882982587, - "grad_norm": 0.0008443251717835665, - "learning_rate": 0.00019999706467127755, - "loss": 46.0, - "step": 31909 - }, - { - "epoch": 2.4397423399659766, - "grad_norm": 0.002235991880297661, - "learning_rate": 0.00019999706448723664, - "loss": 46.0, - "step": 31910 - }, - { - "epoch": 2.4398187969493663, - "grad_norm": 0.0009709433652460575, - "learning_rate": 0.00019999706430318996, - "loss": 46.0, - "step": 31911 - }, - { - "epoch": 2.439895253932756, - "grad_norm": 0.0016714951489120722, - "learning_rate": 0.00019999706411913753, - "loss": 46.0, - "step": 31912 - }, - { - "epoch": 2.439971710916146, - "grad_norm": 0.0006552743725478649, - "learning_rate": 0.0001999970639350793, - "loss": 46.0, - "step": 31913 - }, - { - "epoch": 2.4400481678995356, - "grad_norm": 0.0012795705115422606, - "learning_rate": 0.00019999706375101533, - "loss": 46.0, - "step": 31914 - }, - { - "epoch": 2.4401246248829254, - "grad_norm": 0.0016064875526353717, - "learning_rate": 0.00019999706356694561, - "loss": 46.0, - "step": 31915 - }, - { - "epoch": 2.440201081866315, - "grad_norm": 0.002970742527395487, - "learning_rate": 0.00019999706338287007, - "loss": 46.0, - "step": 31916 - }, - { - "epoch": 2.4402775388497044, - "grad_norm": 0.0008429615991190076, - "learning_rate": 0.0001999970631987888, - "loss": 46.0, - "step": 31917 - }, - { - "epoch": 2.440353995833094, - "grad_norm": 0.014287672936916351, - "learning_rate": 0.00019999706301470174, - "loss": 46.0, - "step": 31918 - }, - { - "epoch": 2.440430452816484, - "grad_norm": 0.0012704402906820178, - "learning_rate": 0.0001999970628306089, - "loss": 46.0, - "step": 31919 - }, - { - "epoch": 2.4405069097998737, - "grad_norm": 0.0016309116035699844, - "learning_rate": 0.00019999706264651034, - "loss": 46.0, - "step": 31920 - }, - { - "epoch": 2.4405833667832635, - "grad_norm": 0.0007852125563658774, - "learning_rate": 0.00019999706246240595, - "loss": 46.0, - "step": 31921 - }, - { - "epoch": 2.4406598237666532, - "grad_norm": 0.0008507322636432946, - "learning_rate": 0.0001999970622782958, - "loss": 46.0, - "step": 31922 - }, - { - "epoch": 2.440736280750043, - "grad_norm": 0.0010519071947783232, - "learning_rate": 0.0001999970620941799, - "loss": 46.0, - "step": 31923 - }, - { - "epoch": 2.4408127377334328, - "grad_norm": 0.0009500946616753936, - "learning_rate": 0.00019999706191005823, - "loss": 46.0, - "step": 31924 - }, - { - "epoch": 2.4408891947168225, - "grad_norm": 0.0014651718083769083, - "learning_rate": 0.0001999970617259308, - "loss": 46.0, - "step": 31925 - }, - { - "epoch": 2.4409656517002123, - "grad_norm": 0.0013047963147982955, - "learning_rate": 0.00019999706154179758, - "loss": 46.0, - "step": 31926 - }, - { - "epoch": 2.441042108683602, - "grad_norm": 0.0008419928490184247, - "learning_rate": 0.0001999970613576586, - "loss": 46.0, - "step": 31927 - }, - { - "epoch": 2.441118565666992, - "grad_norm": 0.0018451045034453273, - "learning_rate": 0.00019999706117351384, - "loss": 46.0, - "step": 31928 - }, - { - "epoch": 2.4411950226503816, - "grad_norm": 0.0006400843267329037, - "learning_rate": 0.00019999706098936332, - "loss": 46.0, - "step": 31929 - }, - { - "epoch": 2.441271479633771, - "grad_norm": 0.0015969921369105577, - "learning_rate": 0.00019999706080520705, - "loss": 46.0, - "step": 31930 - }, - { - "epoch": 2.4413479366171607, - "grad_norm": 0.000968107720836997, - "learning_rate": 0.00019999706062104498, - "loss": 46.0, - "step": 31931 - }, - { - "epoch": 2.4414243936005504, - "grad_norm": 0.0012496842537075281, - "learning_rate": 0.00019999706043687717, - "loss": 46.0, - "step": 31932 - }, - { - "epoch": 2.44150085058394, - "grad_norm": 0.0011838324135169387, - "learning_rate": 0.00019999706025270356, - "loss": 46.0, - "step": 31933 - }, - { - "epoch": 2.44157730756733, - "grad_norm": 0.0006128470413386822, - "learning_rate": 0.0001999970600685242, - "loss": 46.0, - "step": 31934 - }, - { - "epoch": 2.4416537645507197, - "grad_norm": 0.0011169547215104103, - "learning_rate": 0.00019999705988433907, - "loss": 46.0, - "step": 31935 - }, - { - "epoch": 2.4417302215341095, - "grad_norm": 0.0014762532664462924, - "learning_rate": 0.00019999705970014816, - "loss": 46.0, - "step": 31936 - }, - { - "epoch": 2.441806678517499, - "grad_norm": 0.0013938657939434052, - "learning_rate": 0.00019999705951595148, - "loss": 46.0, - "step": 31937 - }, - { - "epoch": 2.441883135500889, - "grad_norm": 0.0011981924762949347, - "learning_rate": 0.00019999705933174906, - "loss": 46.0, - "step": 31938 - }, - { - "epoch": 2.4419595924842783, - "grad_norm": 0.0009762713452801108, - "learning_rate": 0.00019999705914754086, - "loss": 46.0, - "step": 31939 - }, - { - "epoch": 2.442036049467668, - "grad_norm": 0.0007055072346702218, - "learning_rate": 0.00019999705896332686, - "loss": 46.0, - "step": 31940 - }, - { - "epoch": 2.442112506451058, - "grad_norm": 0.00048238105955533683, - "learning_rate": 0.00019999705877910711, - "loss": 46.0, - "step": 31941 - }, - { - "epoch": 2.4421889634344476, - "grad_norm": 0.0010008387034758925, - "learning_rate": 0.0001999970585948816, - "loss": 46.0, - "step": 31942 - }, - { - "epoch": 2.4422654204178373, - "grad_norm": 0.0013541887747123837, - "learning_rate": 0.0001999970584106503, - "loss": 46.0, - "step": 31943 - }, - { - "epoch": 2.442341877401227, - "grad_norm": 0.000608786242082715, - "learning_rate": 0.00019999705822641324, - "loss": 46.0, - "step": 31944 - }, - { - "epoch": 2.442418334384617, - "grad_norm": 0.001000485965050757, - "learning_rate": 0.00019999705804217042, - "loss": 46.0, - "step": 31945 - }, - { - "epoch": 2.4424947913680066, - "grad_norm": 0.0013240780681371689, - "learning_rate": 0.0001999970578579218, - "loss": 46.0, - "step": 31946 - }, - { - "epoch": 2.4425712483513964, - "grad_norm": 0.00046530243707820773, - "learning_rate": 0.00019999705767366745, - "loss": 46.0, - "step": 31947 - }, - { - "epoch": 2.442647705334786, - "grad_norm": 0.0012926728231832385, - "learning_rate": 0.00019999705748940732, - "loss": 46.0, - "step": 31948 - }, - { - "epoch": 2.442724162318176, - "grad_norm": 0.0011619871947914362, - "learning_rate": 0.00019999705730514142, - "loss": 46.0, - "step": 31949 - }, - { - "epoch": 2.4428006193015657, - "grad_norm": 0.0077623589895665646, - "learning_rate": 0.00019999705712086976, - "loss": 46.0, - "step": 31950 - }, - { - "epoch": 2.442877076284955, - "grad_norm": 0.0014879575464874506, - "learning_rate": 0.00019999705693659228, - "loss": 46.0, - "step": 31951 - }, - { - "epoch": 2.4429535332683447, - "grad_norm": 0.0017783952644094825, - "learning_rate": 0.00019999705675230909, - "loss": 46.0, - "step": 31952 - }, - { - "epoch": 2.4430299902517345, - "grad_norm": 0.0004658265970647335, - "learning_rate": 0.00019999705656802011, - "loss": 46.0, - "step": 31953 - }, - { - "epoch": 2.4431064472351243, - "grad_norm": 0.01797652244567871, - "learning_rate": 0.00019999705638372537, - "loss": 46.0, - "step": 31954 - }, - { - "epoch": 2.443182904218514, - "grad_norm": 0.0008855718770064414, - "learning_rate": 0.00019999705619942485, - "loss": 46.0, - "step": 31955 - }, - { - "epoch": 2.443259361201904, - "grad_norm": 0.001002999721094966, - "learning_rate": 0.00019999705601511853, - "loss": 46.0, - "step": 31956 - }, - { - "epoch": 2.4433358181852936, - "grad_norm": 0.0015826683957129717, - "learning_rate": 0.0001999970558308065, - "loss": 46.0, - "step": 31957 - }, - { - "epoch": 2.4434122751686833, - "grad_norm": 0.009470977820456028, - "learning_rate": 0.00019999705564648865, - "loss": 46.0, - "step": 31958 - }, - { - "epoch": 2.443488732152073, - "grad_norm": 0.003207790432497859, - "learning_rate": 0.00019999705546216507, - "loss": 46.0, - "step": 31959 - }, - { - "epoch": 2.443565189135463, - "grad_norm": 0.002510436112061143, - "learning_rate": 0.0001999970552778357, - "loss": 46.0, - "step": 31960 - }, - { - "epoch": 2.443641646118852, - "grad_norm": 0.0008366541005671024, - "learning_rate": 0.00019999705509350055, - "loss": 46.0, - "step": 31961 - }, - { - "epoch": 2.443718103102242, - "grad_norm": 0.001121058245189488, - "learning_rate": 0.00019999705490915965, - "loss": 46.0, - "step": 31962 - }, - { - "epoch": 2.4437945600856317, - "grad_norm": 0.003746144473552704, - "learning_rate": 0.000199997054724813, - "loss": 46.0, - "step": 31963 - }, - { - "epoch": 2.4438710170690214, - "grad_norm": 0.0009716642089188099, - "learning_rate": 0.00019999705454046054, - "loss": 46.0, - "step": 31964 - }, - { - "epoch": 2.443947474052411, - "grad_norm": 0.0008370826835744083, - "learning_rate": 0.00019999705435610232, - "loss": 46.0, - "step": 31965 - }, - { - "epoch": 2.444023931035801, - "grad_norm": 0.001802515471354127, - "learning_rate": 0.00019999705417173832, - "loss": 46.0, - "step": 31966 - }, - { - "epoch": 2.4441003880191907, - "grad_norm": 0.0009477935382165015, - "learning_rate": 0.00019999705398736857, - "loss": 46.0, - "step": 31967 - }, - { - "epoch": 2.4441768450025805, - "grad_norm": 0.00043646173435263336, - "learning_rate": 0.00019999705380299308, - "loss": 46.0, - "step": 31968 - }, - { - "epoch": 2.4442533019859702, - "grad_norm": 0.007541113067418337, - "learning_rate": 0.00019999705361861176, - "loss": 46.0, - "step": 31969 - }, - { - "epoch": 2.44432975896936, - "grad_norm": 0.003198739606887102, - "learning_rate": 0.00019999705343422473, - "loss": 46.0, - "step": 31970 - }, - { - "epoch": 2.4444062159527498, - "grad_norm": 0.0025840254966169596, - "learning_rate": 0.0001999970532498319, - "loss": 46.0, - "step": 31971 - }, - { - "epoch": 2.4444826729361395, - "grad_norm": 0.0018801280530169606, - "learning_rate": 0.00019999705306543328, - "loss": 46.0, - "step": 31972 - }, - { - "epoch": 2.444559129919529, - "grad_norm": 0.015308624133467674, - "learning_rate": 0.00019999705288102892, - "loss": 46.0, - "step": 31973 - }, - { - "epoch": 2.4446355869029186, - "grad_norm": 0.0012719205114990473, - "learning_rate": 0.0001999970526966188, - "loss": 46.0, - "step": 31974 - }, - { - "epoch": 2.4447120438863084, - "grad_norm": 0.0006378758698701859, - "learning_rate": 0.00019999705251220288, - "loss": 46.0, - "step": 31975 - }, - { - "epoch": 2.444788500869698, - "grad_norm": 0.0011293942807242274, - "learning_rate": 0.0001999970523277812, - "loss": 46.0, - "step": 31976 - }, - { - "epoch": 2.444864957853088, - "grad_norm": 0.002601534128189087, - "learning_rate": 0.00019999705214335375, - "loss": 46.0, - "step": 31977 - }, - { - "epoch": 2.4449414148364776, - "grad_norm": 0.00045958292321301997, - "learning_rate": 0.00019999705195892053, - "loss": 46.0, - "step": 31978 - }, - { - "epoch": 2.4450178718198674, - "grad_norm": 0.0007823935593478382, - "learning_rate": 0.00019999705177448158, - "loss": 46.0, - "step": 31979 - }, - { - "epoch": 2.445094328803257, - "grad_norm": 0.0014765918022021651, - "learning_rate": 0.0001999970515900368, - "loss": 46.0, - "step": 31980 - }, - { - "epoch": 2.445170785786647, - "grad_norm": 0.0008068651659414172, - "learning_rate": 0.0001999970514055863, - "loss": 46.0, - "step": 31981 - }, - { - "epoch": 2.4452472427700367, - "grad_norm": 0.0008751056739129126, - "learning_rate": 0.00019999705122113, - "loss": 46.0, - "step": 31982 - }, - { - "epoch": 2.445323699753426, - "grad_norm": 0.00212300568819046, - "learning_rate": 0.00019999705103666794, - "loss": 46.0, - "step": 31983 - }, - { - "epoch": 2.4454001567368158, - "grad_norm": 0.0007084000390022993, - "learning_rate": 0.00019999705085220013, - "loss": 46.0, - "step": 31984 - }, - { - "epoch": 2.4454766137202055, - "grad_norm": 0.001924376585520804, - "learning_rate": 0.00019999705066772651, - "loss": 46.0, - "step": 31985 - }, - { - "epoch": 2.4455530707035953, - "grad_norm": 0.0019496403401717544, - "learning_rate": 0.00019999705048324716, - "loss": 46.0, - "step": 31986 - }, - { - "epoch": 2.445629527686985, - "grad_norm": 0.001012491062283516, - "learning_rate": 0.00019999705029876203, - "loss": 46.0, - "step": 31987 - }, - { - "epoch": 2.445705984670375, - "grad_norm": 0.0011541333515197039, - "learning_rate": 0.0001999970501142711, - "loss": 46.0, - "step": 31988 - }, - { - "epoch": 2.4457824416537646, - "grad_norm": 0.0011622386518865824, - "learning_rate": 0.00019999704992977444, - "loss": 46.0, - "step": 31989 - }, - { - "epoch": 2.4458588986371543, - "grad_norm": 0.00046954042045399547, - "learning_rate": 0.000199997049745272, - "loss": 46.0, - "step": 31990 - }, - { - "epoch": 2.445935355620544, - "grad_norm": 0.0008914373465813696, - "learning_rate": 0.00019999704956076377, - "loss": 46.0, - "step": 31991 - }, - { - "epoch": 2.446011812603934, - "grad_norm": 0.0016433949349448085, - "learning_rate": 0.0001999970493762498, - "loss": 46.0, - "step": 31992 - }, - { - "epoch": 2.4460882695873236, - "grad_norm": 0.0010150804882869124, - "learning_rate": 0.00019999704919173002, - "loss": 46.0, - "step": 31993 - }, - { - "epoch": 2.4461647265707134, - "grad_norm": 0.001598947448655963, - "learning_rate": 0.0001999970490072045, - "loss": 46.0, - "step": 31994 - }, - { - "epoch": 2.4462411835541027, - "grad_norm": 0.001388934557326138, - "learning_rate": 0.00019999704882267322, - "loss": 46.0, - "step": 31995 - }, - { - "epoch": 2.4463176405374925, - "grad_norm": 0.0008281638729386032, - "learning_rate": 0.00019999704863813615, - "loss": 46.0, - "step": 31996 - }, - { - "epoch": 2.446394097520882, - "grad_norm": 0.0008215660927817225, - "learning_rate": 0.00019999704845359334, - "loss": 46.0, - "step": 31997 - }, - { - "epoch": 2.446470554504272, - "grad_norm": 0.009521810337901115, - "learning_rate": 0.00019999704826904473, - "loss": 46.0, - "step": 31998 - }, - { - "epoch": 2.4465470114876617, - "grad_norm": 0.0006725895800627768, - "learning_rate": 0.00019999704808449034, - "loss": 46.0, - "step": 31999 - }, - { - "epoch": 2.4466234684710515, - "grad_norm": 0.001011132262647152, - "learning_rate": 0.00019999704789993024, - "loss": 46.0, - "step": 32000 - }, - { - "epoch": 2.4466999254544413, - "grad_norm": 0.0009333459311164916, - "learning_rate": 0.0001999970477153643, - "loss": 46.0, - "step": 32001 - }, - { - "epoch": 2.446776382437831, - "grad_norm": 0.0007062758086249232, - "learning_rate": 0.00019999704753079263, - "loss": 46.0, - "step": 32002 - }, - { - "epoch": 2.446852839421221, - "grad_norm": 0.0009965809294953942, - "learning_rate": 0.0001999970473462152, - "loss": 46.0, - "step": 32003 - }, - { - "epoch": 2.4469292964046105, - "grad_norm": 0.0012880159774795175, - "learning_rate": 0.00019999704716163196, - "loss": 46.0, - "step": 32004 - }, - { - "epoch": 2.447005753388, - "grad_norm": 0.0006319022504612803, - "learning_rate": 0.000199997046977043, - "loss": 46.0, - "step": 32005 - }, - { - "epoch": 2.4470822103713896, - "grad_norm": 0.0012123904889449477, - "learning_rate": 0.00019999704679244824, - "loss": 46.0, - "step": 32006 - }, - { - "epoch": 2.4471586673547794, - "grad_norm": 0.0016980203799903393, - "learning_rate": 0.0001999970466078477, - "loss": 46.0, - "step": 32007 - }, - { - "epoch": 2.447235124338169, - "grad_norm": 0.0016341886948794127, - "learning_rate": 0.0001999970464232414, - "loss": 46.0, - "step": 32008 - }, - { - "epoch": 2.447311581321559, - "grad_norm": 0.0005806373665109277, - "learning_rate": 0.00019999704623862937, - "loss": 46.0, - "step": 32009 - }, - { - "epoch": 2.4473880383049487, - "grad_norm": 0.0018561262404546142, - "learning_rate": 0.00019999704605401153, - "loss": 46.0, - "step": 32010 - }, - { - "epoch": 2.4474644952883384, - "grad_norm": 0.001861725701019168, - "learning_rate": 0.00019999704586938792, - "loss": 46.0, - "step": 32011 - }, - { - "epoch": 2.447540952271728, - "grad_norm": 0.0013776309788227081, - "learning_rate": 0.00019999704568475856, - "loss": 46.0, - "step": 32012 - }, - { - "epoch": 2.447617409255118, - "grad_norm": 0.012885675765573978, - "learning_rate": 0.00019999704550012343, - "loss": 46.0, - "step": 32013 - }, - { - "epoch": 2.4476938662385077, - "grad_norm": 0.0007430001278407872, - "learning_rate": 0.0001999970453154825, - "loss": 46.0, - "step": 32014 - }, - { - "epoch": 2.4477703232218975, - "grad_norm": 0.0009364989236928523, - "learning_rate": 0.00019999704513083583, - "loss": 46.0, - "step": 32015 - }, - { - "epoch": 2.4478467802052872, - "grad_norm": 0.00456291763111949, - "learning_rate": 0.00019999704494618337, - "loss": 46.0, - "step": 32016 - }, - { - "epoch": 2.4479232371886765, - "grad_norm": 0.0014080549590289593, - "learning_rate": 0.00019999704476152518, - "loss": 46.0, - "step": 32017 - }, - { - "epoch": 2.4479996941720663, - "grad_norm": 0.0025584816467016935, - "learning_rate": 0.00019999704457686118, - "loss": 46.0, - "step": 32018 - }, - { - "epoch": 2.448076151155456, - "grad_norm": 0.002959862118586898, - "learning_rate": 0.00019999704439219144, - "loss": 46.0, - "step": 32019 - }, - { - "epoch": 2.448152608138846, - "grad_norm": 0.0004960898659192026, - "learning_rate": 0.0001999970442075159, - "loss": 46.0, - "step": 32020 - }, - { - "epoch": 2.4482290651222356, - "grad_norm": 0.004734597634524107, - "learning_rate": 0.0001999970440228346, - "loss": 46.0, - "step": 32021 - }, - { - "epoch": 2.4483055221056254, - "grad_norm": 0.0010306901531293988, - "learning_rate": 0.00019999704383814754, - "loss": 46.0, - "step": 32022 - }, - { - "epoch": 2.448381979089015, - "grad_norm": 0.00044556776992976665, - "learning_rate": 0.00019999704365345473, - "loss": 46.0, - "step": 32023 - }, - { - "epoch": 2.448458436072405, - "grad_norm": 0.0014852783642709255, - "learning_rate": 0.00019999704346875614, - "loss": 46.0, - "step": 32024 - }, - { - "epoch": 2.4485348930557946, - "grad_norm": 0.001416479586623609, - "learning_rate": 0.00019999704328405176, - "loss": 46.0, - "step": 32025 - }, - { - "epoch": 2.4486113500391844, - "grad_norm": 0.003507385728880763, - "learning_rate": 0.00019999704309934163, - "loss": 46.0, - "step": 32026 - }, - { - "epoch": 2.4486878070225737, - "grad_norm": 0.00270731165073812, - "learning_rate": 0.0001999970429146257, - "loss": 46.0, - "step": 32027 - }, - { - "epoch": 2.4487642640059635, - "grad_norm": 0.0009261043160222471, - "learning_rate": 0.00019999704272990402, - "loss": 46.0, - "step": 32028 - }, - { - "epoch": 2.4488407209893532, - "grad_norm": 0.0010700608836486936, - "learning_rate": 0.00019999704254517657, - "loss": 46.0, - "step": 32029 - }, - { - "epoch": 2.448917177972743, - "grad_norm": 0.001064253505319357, - "learning_rate": 0.00019999704236044338, - "loss": 46.0, - "step": 32030 - }, - { - "epoch": 2.4489936349561328, - "grad_norm": 0.00033997936407104135, - "learning_rate": 0.00019999704217570438, - "loss": 46.0, - "step": 32031 - }, - { - "epoch": 2.4490700919395225, - "grad_norm": 0.001571160857565701, - "learning_rate": 0.00019999704199095964, - "loss": 46.0, - "step": 32032 - }, - { - "epoch": 2.4491465489229123, - "grad_norm": 0.0011864836560562253, - "learning_rate": 0.00019999704180620912, - "loss": 46.0, - "step": 32033 - }, - { - "epoch": 2.449223005906302, - "grad_norm": 0.0008409509900957346, - "learning_rate": 0.0001999970416214528, - "loss": 46.0, - "step": 32034 - }, - { - "epoch": 2.449299462889692, - "grad_norm": 0.000771131890360266, - "learning_rate": 0.00019999704143669074, - "loss": 46.0, - "step": 32035 - }, - { - "epoch": 2.4493759198730816, - "grad_norm": 0.004988211207091808, - "learning_rate": 0.0001999970412519229, - "loss": 46.0, - "step": 32036 - }, - { - "epoch": 2.4494523768564713, - "grad_norm": 0.0006533483974635601, - "learning_rate": 0.00019999704106714932, - "loss": 46.0, - "step": 32037 - }, - { - "epoch": 2.449528833839861, - "grad_norm": 0.0023822709918022156, - "learning_rate": 0.00019999704088236993, - "loss": 46.0, - "step": 32038 - }, - { - "epoch": 2.4496052908232504, - "grad_norm": 0.0010483047226443887, - "learning_rate": 0.0001999970406975848, - "loss": 46.0, - "step": 32039 - }, - { - "epoch": 2.44968174780664, - "grad_norm": 0.0023576875682920218, - "learning_rate": 0.0001999970405127939, - "loss": 46.0, - "step": 32040 - }, - { - "epoch": 2.44975820479003, - "grad_norm": 0.0004939362406730652, - "learning_rate": 0.00019999704032799722, - "loss": 46.0, - "step": 32041 - }, - { - "epoch": 2.4498346617734197, - "grad_norm": 0.0010357382707297802, - "learning_rate": 0.00019999704014319475, - "loss": 46.0, - "step": 32042 - }, - { - "epoch": 2.4499111187568094, - "grad_norm": 0.0008340787026099861, - "learning_rate": 0.00019999703995838655, - "loss": 46.0, - "step": 32043 - }, - { - "epoch": 2.449987575740199, - "grad_norm": 0.0011341251665726304, - "learning_rate": 0.00019999703977357255, - "loss": 46.0, - "step": 32044 - }, - { - "epoch": 2.450064032723589, - "grad_norm": 0.0005242135957814753, - "learning_rate": 0.0001999970395887528, - "loss": 46.0, - "step": 32045 - }, - { - "epoch": 2.4501404897069787, - "grad_norm": 0.003730755066499114, - "learning_rate": 0.0001999970394039273, - "loss": 46.0, - "step": 32046 - }, - { - "epoch": 2.4502169466903685, - "grad_norm": 0.0014987685717642307, - "learning_rate": 0.000199997039219096, - "loss": 46.0, - "step": 32047 - }, - { - "epoch": 2.450293403673758, - "grad_norm": 0.006126203574240208, - "learning_rate": 0.00019999703903425891, - "loss": 46.0, - "step": 32048 - }, - { - "epoch": 2.4503698606571476, - "grad_norm": 0.0012287484714761376, - "learning_rate": 0.0001999970388494161, - "loss": 46.0, - "step": 32049 - }, - { - "epoch": 2.4504463176405373, - "grad_norm": 0.002660485915839672, - "learning_rate": 0.0001999970386645675, - "loss": 46.0, - "step": 32050 - }, - { - "epoch": 2.450522774623927, - "grad_norm": 0.0015183178475126624, - "learning_rate": 0.0001999970384797131, - "loss": 46.0, - "step": 32051 - }, - { - "epoch": 2.450599231607317, - "grad_norm": 0.0007381152245216072, - "learning_rate": 0.00019999703829485296, - "loss": 46.0, - "step": 32052 - }, - { - "epoch": 2.4506756885907066, - "grad_norm": 0.0008989167399704456, - "learning_rate": 0.00019999703810998708, - "loss": 46.0, - "step": 32053 - }, - { - "epoch": 2.4507521455740964, - "grad_norm": 0.002915708813816309, - "learning_rate": 0.0001999970379251154, - "loss": 46.0, - "step": 32054 - }, - { - "epoch": 2.450828602557486, - "grad_norm": 0.0012920403387397528, - "learning_rate": 0.00019999703774023795, - "loss": 46.0, - "step": 32055 - }, - { - "epoch": 2.450905059540876, - "grad_norm": 0.0013374312547966838, - "learning_rate": 0.00019999703755535473, - "loss": 46.0, - "step": 32056 - }, - { - "epoch": 2.4509815165242657, - "grad_norm": 0.0007244454463943839, - "learning_rate": 0.00019999703737046573, - "loss": 46.0, - "step": 32057 - }, - { - "epoch": 2.4510579735076554, - "grad_norm": 0.00081466423580423, - "learning_rate": 0.00019999703718557096, - "loss": 46.0, - "step": 32058 - }, - { - "epoch": 2.451134430491045, - "grad_norm": 0.0006333947530947626, - "learning_rate": 0.00019999703700067045, - "loss": 46.0, - "step": 32059 - }, - { - "epoch": 2.451210887474435, - "grad_norm": 0.0007041457574814558, - "learning_rate": 0.00019999703681576416, - "loss": 46.0, - "step": 32060 - }, - { - "epoch": 2.4512873444578243, - "grad_norm": 0.0004056443285662681, - "learning_rate": 0.0001999970366308521, - "loss": 46.0, - "step": 32061 - }, - { - "epoch": 2.451363801441214, - "grad_norm": 0.0014375589089468122, - "learning_rate": 0.00019999703644593426, - "loss": 46.0, - "step": 32062 - }, - { - "epoch": 2.451440258424604, - "grad_norm": 0.0017896929057314992, - "learning_rate": 0.00019999703626101068, - "loss": 46.0, - "step": 32063 - }, - { - "epoch": 2.4515167154079935, - "grad_norm": 0.004768447019159794, - "learning_rate": 0.0001999970360760813, - "loss": 46.0, - "step": 32064 - }, - { - "epoch": 2.4515931723913833, - "grad_norm": 0.0019432223634794354, - "learning_rate": 0.00019999703589114614, - "loss": 46.0, - "step": 32065 - }, - { - "epoch": 2.451669629374773, - "grad_norm": 0.0008068630704656243, - "learning_rate": 0.00019999703570620523, - "loss": 46.0, - "step": 32066 - }, - { - "epoch": 2.451746086358163, - "grad_norm": 0.0009804224828258157, - "learning_rate": 0.00019999703552125856, - "loss": 46.0, - "step": 32067 - }, - { - "epoch": 2.4518225433415526, - "grad_norm": 0.0006549140089191496, - "learning_rate": 0.00019999703533630608, - "loss": 46.0, - "step": 32068 - }, - { - "epoch": 2.4518990003249423, - "grad_norm": 0.0006274455226957798, - "learning_rate": 0.00019999703515134789, - "loss": 46.0, - "step": 32069 - }, - { - "epoch": 2.4519754573083317, - "grad_norm": 0.0029337084852159023, - "learning_rate": 0.00019999703496638392, - "loss": 46.0, - "step": 32070 - }, - { - "epoch": 2.4520519142917214, - "grad_norm": 0.0013975371839478612, - "learning_rate": 0.00019999703478141415, - "loss": 46.0, - "step": 32071 - }, - { - "epoch": 2.452128371275111, - "grad_norm": 0.001947292941622436, - "learning_rate": 0.0001999970345964386, - "loss": 46.0, - "step": 32072 - }, - { - "epoch": 2.452204828258501, - "grad_norm": 0.001765129272826016, - "learning_rate": 0.00019999703441145732, - "loss": 46.0, - "step": 32073 - }, - { - "epoch": 2.4522812852418907, - "grad_norm": 0.0006401055143214762, - "learning_rate": 0.00019999703422647028, - "loss": 46.0, - "step": 32074 - }, - { - "epoch": 2.4523577422252805, - "grad_norm": 0.001285024918615818, - "learning_rate": 0.00019999703404147742, - "loss": 46.0, - "step": 32075 - }, - { - "epoch": 2.4524341992086702, - "grad_norm": 0.001259715761989355, - "learning_rate": 0.0001999970338564788, - "loss": 46.0, - "step": 32076 - }, - { - "epoch": 2.45251065619206, - "grad_norm": 0.0008648103685118258, - "learning_rate": 0.00019999703367147445, - "loss": 46.0, - "step": 32077 - }, - { - "epoch": 2.4525871131754498, - "grad_norm": 0.0014680068707093596, - "learning_rate": 0.0001999970334864643, - "loss": 46.0, - "step": 32078 - }, - { - "epoch": 2.4526635701588395, - "grad_norm": 0.00038193483487702906, - "learning_rate": 0.0001999970333014484, - "loss": 46.0, - "step": 32079 - }, - { - "epoch": 2.4527400271422293, - "grad_norm": 0.0006948375958018005, - "learning_rate": 0.0001999970331164267, - "loss": 46.0, - "step": 32080 - }, - { - "epoch": 2.452816484125619, - "grad_norm": 0.0009676854824647307, - "learning_rate": 0.00019999703293139927, - "loss": 46.0, - "step": 32081 - }, - { - "epoch": 2.4528929411090084, - "grad_norm": 0.0006656266050413251, - "learning_rate": 0.00019999703274636605, - "loss": 46.0, - "step": 32082 - }, - { - "epoch": 2.452969398092398, - "grad_norm": 0.004371342249214649, - "learning_rate": 0.00019999703256132705, - "loss": 46.0, - "step": 32083 - }, - { - "epoch": 2.453045855075788, - "grad_norm": 0.0012889942154288292, - "learning_rate": 0.0001999970323762823, - "loss": 46.0, - "step": 32084 - }, - { - "epoch": 2.4531223120591776, - "grad_norm": 0.0029644868336617947, - "learning_rate": 0.00019999703219123177, - "loss": 46.0, - "step": 32085 - }, - { - "epoch": 2.4531987690425674, - "grad_norm": 0.0009314757189713418, - "learning_rate": 0.00019999703200617548, - "loss": 46.0, - "step": 32086 - }, - { - "epoch": 2.453275226025957, - "grad_norm": 0.0008678769809193909, - "learning_rate": 0.00019999703182111341, - "loss": 46.0, - "step": 32087 - }, - { - "epoch": 2.453351683009347, - "grad_norm": 0.0007079067872837186, - "learning_rate": 0.00019999703163604558, - "loss": 46.0, - "step": 32088 - }, - { - "epoch": 2.4534281399927367, - "grad_norm": 0.0004546970303636044, - "learning_rate": 0.00019999703145097197, - "loss": 46.0, - "step": 32089 - }, - { - "epoch": 2.4535045969761264, - "grad_norm": 0.0015264780959114432, - "learning_rate": 0.0001999970312658926, - "loss": 46.0, - "step": 32090 - }, - { - "epoch": 2.453581053959516, - "grad_norm": 0.0009667367557995021, - "learning_rate": 0.00019999703108080746, - "loss": 46.0, - "step": 32091 - }, - { - "epoch": 2.4536575109429055, - "grad_norm": 0.0011149076744914055, - "learning_rate": 0.00019999703089571656, - "loss": 46.0, - "step": 32092 - }, - { - "epoch": 2.4537339679262953, - "grad_norm": 0.0019979476928710938, - "learning_rate": 0.00019999703071061985, - "loss": 46.0, - "step": 32093 - }, - { - "epoch": 2.453810424909685, - "grad_norm": 0.0008110285270959139, - "learning_rate": 0.0001999970305255174, - "loss": 46.0, - "step": 32094 - }, - { - "epoch": 2.453886881893075, - "grad_norm": 0.0009752092300914228, - "learning_rate": 0.00019999703034040918, - "loss": 46.0, - "step": 32095 - }, - { - "epoch": 2.4539633388764646, - "grad_norm": 0.0010237341048195958, - "learning_rate": 0.00019999703015529522, - "loss": 46.0, - "step": 32096 - }, - { - "epoch": 2.4540397958598543, - "grad_norm": 0.0045866151340305805, - "learning_rate": 0.00019999702997017545, - "loss": 46.0, - "step": 32097 - }, - { - "epoch": 2.454116252843244, - "grad_norm": 0.0014634841354563832, - "learning_rate": 0.00019999702978504993, - "loss": 46.0, - "step": 32098 - }, - { - "epoch": 2.454192709826634, - "grad_norm": 0.0004835967265535146, - "learning_rate": 0.00019999702959991862, - "loss": 46.0, - "step": 32099 - }, - { - "epoch": 2.4542691668100236, - "grad_norm": 0.0005293546710163355, - "learning_rate": 0.00019999702941478155, - "loss": 46.0, - "step": 32100 - }, - { - "epoch": 2.4543456237934134, - "grad_norm": 0.00044838787289336324, - "learning_rate": 0.00019999702922963872, - "loss": 46.0, - "step": 32101 - }, - { - "epoch": 2.454422080776803, - "grad_norm": 0.0010153127368539572, - "learning_rate": 0.0001999970290444901, - "loss": 46.0, - "step": 32102 - }, - { - "epoch": 2.454498537760193, - "grad_norm": 0.0007873608265072107, - "learning_rate": 0.00019999702885933575, - "loss": 46.0, - "step": 32103 - }, - { - "epoch": 2.454574994743582, - "grad_norm": 0.002770470455288887, - "learning_rate": 0.0001999970286741756, - "loss": 46.0, - "step": 32104 - }, - { - "epoch": 2.454651451726972, - "grad_norm": 0.00040425301995128393, - "learning_rate": 0.00019999702848900967, - "loss": 46.0, - "step": 32105 - }, - { - "epoch": 2.4547279087103617, - "grad_norm": 0.0010005495278164744, - "learning_rate": 0.000199997028303838, - "loss": 46.0, - "step": 32106 - }, - { - "epoch": 2.4548043656937515, - "grad_norm": 0.0009729396551847458, - "learning_rate": 0.00019999702811866055, - "loss": 46.0, - "step": 32107 - }, - { - "epoch": 2.4548808226771412, - "grad_norm": 0.0011714320862665772, - "learning_rate": 0.00019999702793347735, - "loss": 46.0, - "step": 32108 - }, - { - "epoch": 2.454957279660531, - "grad_norm": 0.0004402663034852594, - "learning_rate": 0.00019999702774828836, - "loss": 46.0, - "step": 32109 - }, - { - "epoch": 2.4550337366439208, - "grad_norm": 0.0009628739207983017, - "learning_rate": 0.0001999970275630936, - "loss": 46.0, - "step": 32110 - }, - { - "epoch": 2.4551101936273105, - "grad_norm": 0.0004306513292249292, - "learning_rate": 0.00019999702737789305, - "loss": 46.0, - "step": 32111 - }, - { - "epoch": 2.4551866506107003, - "grad_norm": 0.0023378280457109213, - "learning_rate": 0.00019999702719268676, - "loss": 46.0, - "step": 32112 - }, - { - "epoch": 2.45526310759409, - "grad_norm": 0.0012850608909502625, - "learning_rate": 0.0001999970270074747, - "loss": 46.0, - "step": 32113 - }, - { - "epoch": 2.4553395645774794, - "grad_norm": 0.001222437946125865, - "learning_rate": 0.00019999702682225686, - "loss": 46.0, - "step": 32114 - }, - { - "epoch": 2.455416021560869, - "grad_norm": 0.0012058448046445847, - "learning_rate": 0.00019999702663703323, - "loss": 46.0, - "step": 32115 - }, - { - "epoch": 2.455492478544259, - "grad_norm": 0.001976849278435111, - "learning_rate": 0.00019999702645180387, - "loss": 46.0, - "step": 32116 - }, - { - "epoch": 2.4555689355276487, - "grad_norm": 0.0006149174296297133, - "learning_rate": 0.00019999702626656874, - "loss": 46.0, - "step": 32117 - }, - { - "epoch": 2.4556453925110384, - "grad_norm": 0.00037595003959722817, - "learning_rate": 0.00019999702608132781, - "loss": 46.0, - "step": 32118 - }, - { - "epoch": 2.455721849494428, - "grad_norm": 0.0008315707091242075, - "learning_rate": 0.00019999702589608114, - "loss": 46.0, - "step": 32119 - }, - { - "epoch": 2.455798306477818, - "grad_norm": 0.0006714237970300019, - "learning_rate": 0.00019999702571082866, - "loss": 46.0, - "step": 32120 - }, - { - "epoch": 2.4558747634612077, - "grad_norm": 0.001386646879836917, - "learning_rate": 0.00019999702552557044, - "loss": 46.0, - "step": 32121 - }, - { - "epoch": 2.4559512204445975, - "grad_norm": 0.0036568334326148033, - "learning_rate": 0.00019999702534030648, - "loss": 46.0, - "step": 32122 - }, - { - "epoch": 2.456027677427987, - "grad_norm": 0.00029667068156413734, - "learning_rate": 0.0001999970251550367, - "loss": 46.0, - "step": 32123 - }, - { - "epoch": 2.456104134411377, - "grad_norm": 0.0011132522486150265, - "learning_rate": 0.0001999970249697612, - "loss": 46.0, - "step": 32124 - }, - { - "epoch": 2.4561805913947667, - "grad_norm": 0.0009052717359736562, - "learning_rate": 0.00019999702478447988, - "loss": 46.0, - "step": 32125 - }, - { - "epoch": 2.456257048378156, - "grad_norm": 0.0037274565547704697, - "learning_rate": 0.00019999702459919282, - "loss": 46.0, - "step": 32126 - }, - { - "epoch": 2.456333505361546, - "grad_norm": 0.0019299163250252604, - "learning_rate": 0.00019999702441389998, - "loss": 46.0, - "step": 32127 - }, - { - "epoch": 2.4564099623449356, - "grad_norm": 0.0007135948399081826, - "learning_rate": 0.00019999702422860137, - "loss": 46.0, - "step": 32128 - }, - { - "epoch": 2.4564864193283253, - "grad_norm": 0.0008471620385535061, - "learning_rate": 0.000199997024043297, - "loss": 46.0, - "step": 32129 - }, - { - "epoch": 2.456562876311715, - "grad_norm": 0.0017200012225657701, - "learning_rate": 0.00019999702385798686, - "loss": 46.0, - "step": 32130 - }, - { - "epoch": 2.456639333295105, - "grad_norm": 0.010277921333909035, - "learning_rate": 0.00019999702367267094, - "loss": 46.0, - "step": 32131 - }, - { - "epoch": 2.4567157902784946, - "grad_norm": 0.0011906875297427177, - "learning_rate": 0.00019999702348734926, - "loss": 46.0, - "step": 32132 - }, - { - "epoch": 2.4567922472618844, - "grad_norm": 0.0069778901524841785, - "learning_rate": 0.00019999702330202181, - "loss": 46.0, - "step": 32133 - }, - { - "epoch": 2.456868704245274, - "grad_norm": 0.001418792293407023, - "learning_rate": 0.00019999702311668857, - "loss": 46.0, - "step": 32134 - }, - { - "epoch": 2.456945161228664, - "grad_norm": 0.002820760477334261, - "learning_rate": 0.0001999970229313496, - "loss": 46.0, - "step": 32135 - }, - { - "epoch": 2.4570216182120532, - "grad_norm": 0.0032284879125654697, - "learning_rate": 0.00019999702274600483, - "loss": 46.0, - "step": 32136 - }, - { - "epoch": 2.457098075195443, - "grad_norm": 0.0012881056172773242, - "learning_rate": 0.00019999702256065432, - "loss": 46.0, - "step": 32137 - }, - { - "epoch": 2.4571745321788327, - "grad_norm": 0.0013070888817310333, - "learning_rate": 0.000199997022375298, - "loss": 46.0, - "step": 32138 - }, - { - "epoch": 2.4572509891622225, - "grad_norm": 0.0015307943103834987, - "learning_rate": 0.0001999970221899359, - "loss": 46.0, - "step": 32139 - }, - { - "epoch": 2.4573274461456123, - "grad_norm": 0.000834343780297786, - "learning_rate": 0.00019999702200456808, - "loss": 46.0, - "step": 32140 - }, - { - "epoch": 2.457403903129002, - "grad_norm": 0.0012775223003700376, - "learning_rate": 0.0001999970218191945, - "loss": 46.0, - "step": 32141 - }, - { - "epoch": 2.457480360112392, - "grad_norm": 0.0007632126798853278, - "learning_rate": 0.00019999702163381512, - "loss": 46.0, - "step": 32142 - }, - { - "epoch": 2.4575568170957816, - "grad_norm": 0.003969459794461727, - "learning_rate": 0.00019999702144842996, - "loss": 46.0, - "step": 32143 - }, - { - "epoch": 2.4576332740791713, - "grad_norm": 0.0008371869334951043, - "learning_rate": 0.00019999702126303906, - "loss": 46.0, - "step": 32144 - }, - { - "epoch": 2.457709731062561, - "grad_norm": 0.0017724273493513465, - "learning_rate": 0.0001999970210776424, - "loss": 46.0, - "step": 32145 - }, - { - "epoch": 2.457786188045951, - "grad_norm": 0.001332386047579348, - "learning_rate": 0.00019999702089223994, - "loss": 46.0, - "step": 32146 - }, - { - "epoch": 2.4578626450293406, - "grad_norm": 0.0004408008244354278, - "learning_rate": 0.00019999702070683172, - "loss": 46.0, - "step": 32147 - }, - { - "epoch": 2.45793910201273, - "grad_norm": 0.0024061615113168955, - "learning_rate": 0.0001999970205214177, - "loss": 46.0, - "step": 32148 - }, - { - "epoch": 2.4580155589961197, - "grad_norm": 0.0027121463790535927, - "learning_rate": 0.00019999702033599793, - "loss": 46.0, - "step": 32149 - }, - { - "epoch": 2.4580920159795094, - "grad_norm": 0.004833335522562265, - "learning_rate": 0.0001999970201505724, - "loss": 46.0, - "step": 32150 - }, - { - "epoch": 2.458168472962899, - "grad_norm": 0.0013423613272607327, - "learning_rate": 0.0001999970199651411, - "loss": 46.0, - "step": 32151 - }, - { - "epoch": 2.458244929946289, - "grad_norm": 0.0016987406415864825, - "learning_rate": 0.00019999701977970404, - "loss": 46.0, - "step": 32152 - }, - { - "epoch": 2.4583213869296787, - "grad_norm": 0.0005409973673522472, - "learning_rate": 0.0001999970195942612, - "loss": 46.0, - "step": 32153 - }, - { - "epoch": 2.4583978439130685, - "grad_norm": 0.0005198657163418829, - "learning_rate": 0.0001999970194088126, - "loss": 46.0, - "step": 32154 - }, - { - "epoch": 2.4584743008964582, - "grad_norm": 0.0006227836711332202, - "learning_rate": 0.00019999701922335822, - "loss": 46.0, - "step": 32155 - }, - { - "epoch": 2.458550757879848, - "grad_norm": 0.004347793757915497, - "learning_rate": 0.00019999701903789806, - "loss": 46.0, - "step": 32156 - }, - { - "epoch": 2.4586272148632378, - "grad_norm": 0.0011652481043711305, - "learning_rate": 0.00019999701885243216, - "loss": 46.0, - "step": 32157 - }, - { - "epoch": 2.458703671846627, - "grad_norm": 0.0013047493994235992, - "learning_rate": 0.00019999701866696046, - "loss": 46.0, - "step": 32158 - }, - { - "epoch": 2.458780128830017, - "grad_norm": 0.0013005641521885991, - "learning_rate": 0.00019999701848148302, - "loss": 46.0, - "step": 32159 - }, - { - "epoch": 2.4588565858134066, - "grad_norm": 0.0007573722396045923, - "learning_rate": 0.0001999970182959998, - "loss": 46.0, - "step": 32160 - }, - { - "epoch": 2.4589330427967964, - "grad_norm": 0.0008397951605729759, - "learning_rate": 0.0001999970181105108, - "loss": 46.0, - "step": 32161 - }, - { - "epoch": 2.459009499780186, - "grad_norm": 0.0004184510326012969, - "learning_rate": 0.00019999701792501603, - "loss": 46.0, - "step": 32162 - }, - { - "epoch": 2.459085956763576, - "grad_norm": 0.0016234355280175805, - "learning_rate": 0.00019999701773951552, - "loss": 46.0, - "step": 32163 - }, - { - "epoch": 2.4591624137469656, - "grad_norm": 0.0015076295239850879, - "learning_rate": 0.0001999970175540092, - "loss": 46.0, - "step": 32164 - }, - { - "epoch": 2.4592388707303554, - "grad_norm": 0.0006178086623549461, - "learning_rate": 0.00019999701736849715, - "loss": 46.0, - "step": 32165 - }, - { - "epoch": 2.459315327713745, - "grad_norm": 0.001316750654950738, - "learning_rate": 0.00019999701718297929, - "loss": 46.0, - "step": 32166 - }, - { - "epoch": 2.459391784697135, - "grad_norm": 0.0028804135508835316, - "learning_rate": 0.0001999970169974557, - "loss": 46.0, - "step": 32167 - }, - { - "epoch": 2.4594682416805247, - "grad_norm": 0.0008769866544753313, - "learning_rate": 0.00019999701681192633, - "loss": 46.0, - "step": 32168 - }, - { - "epoch": 2.4595446986639145, - "grad_norm": 0.0012958385050296783, - "learning_rate": 0.00019999701662639117, - "loss": 46.0, - "step": 32169 - }, - { - "epoch": 2.4596211556473038, - "grad_norm": 0.0011356809409335256, - "learning_rate": 0.00019999701644085024, - "loss": 46.0, - "step": 32170 - }, - { - "epoch": 2.4596976126306935, - "grad_norm": 0.00117954658344388, - "learning_rate": 0.00019999701625530357, - "loss": 46.0, - "step": 32171 - }, - { - "epoch": 2.4597740696140833, - "grad_norm": 0.007854013703763485, - "learning_rate": 0.00019999701606975112, - "loss": 46.0, - "step": 32172 - }, - { - "epoch": 2.459850526597473, - "grad_norm": 0.0016878879396244884, - "learning_rate": 0.00019999701588419288, - "loss": 46.0, - "step": 32173 - }, - { - "epoch": 2.459926983580863, - "grad_norm": 0.001979435095563531, - "learning_rate": 0.0001999970156986289, - "loss": 46.0, - "step": 32174 - }, - { - "epoch": 2.4600034405642526, - "grad_norm": 0.002631532261148095, - "learning_rate": 0.00019999701551305914, - "loss": 46.0, - "step": 32175 - }, - { - "epoch": 2.4600798975476423, - "grad_norm": 0.003942139912396669, - "learning_rate": 0.0001999970153274836, - "loss": 46.0, - "step": 32176 - }, - { - "epoch": 2.460156354531032, - "grad_norm": 0.0004847529053222388, - "learning_rate": 0.0001999970151419023, - "loss": 46.0, - "step": 32177 - }, - { - "epoch": 2.460232811514422, - "grad_norm": 0.0006172974244691432, - "learning_rate": 0.00019999701495631526, - "loss": 46.0, - "step": 32178 - }, - { - "epoch": 2.460309268497811, - "grad_norm": 0.0007586546125821769, - "learning_rate": 0.0001999970147707224, - "loss": 46.0, - "step": 32179 - }, - { - "epoch": 2.460385725481201, - "grad_norm": 0.0016714805969968438, - "learning_rate": 0.0001999970145851238, - "loss": 46.0, - "step": 32180 - }, - { - "epoch": 2.4604621824645907, - "grad_norm": 0.0025886481162160635, - "learning_rate": 0.00019999701439951938, - "loss": 46.0, - "step": 32181 - }, - { - "epoch": 2.4605386394479805, - "grad_norm": 0.0012383094290271401, - "learning_rate": 0.00019999701421390925, - "loss": 46.0, - "step": 32182 - }, - { - "epoch": 2.46061509643137, - "grad_norm": 0.0012315765488892794, - "learning_rate": 0.00019999701402829333, - "loss": 46.0, - "step": 32183 - }, - { - "epoch": 2.46069155341476, - "grad_norm": 0.0004620544204954058, - "learning_rate": 0.00019999701384267165, - "loss": 46.0, - "step": 32184 - }, - { - "epoch": 2.4607680103981497, - "grad_norm": 0.0013298518024384975, - "learning_rate": 0.0001999970136570442, - "loss": 46.0, - "step": 32185 - }, - { - "epoch": 2.4608444673815395, - "grad_norm": 0.002070858608931303, - "learning_rate": 0.000199997013471411, - "loss": 46.0, - "step": 32186 - }, - { - "epoch": 2.4609209243649293, - "grad_norm": 0.0007270520436577499, - "learning_rate": 0.000199997013285772, - "loss": 46.0, - "step": 32187 - }, - { - "epoch": 2.460997381348319, - "grad_norm": 0.0015808738535270095, - "learning_rate": 0.00019999701310012723, - "loss": 46.0, - "step": 32188 - }, - { - "epoch": 2.461073838331709, - "grad_norm": 0.000589851348195225, - "learning_rate": 0.0001999970129144767, - "loss": 46.0, - "step": 32189 - }, - { - "epoch": 2.4611502953150985, - "grad_norm": 0.0008861060487106442, - "learning_rate": 0.00019999701272882037, - "loss": 46.0, - "step": 32190 - }, - { - "epoch": 2.4612267522984883, - "grad_norm": 0.0009151807171292603, - "learning_rate": 0.00019999701254315831, - "loss": 46.0, - "step": 32191 - }, - { - "epoch": 2.4613032092818776, - "grad_norm": 0.0012511750683188438, - "learning_rate": 0.00019999701235749048, - "loss": 46.0, - "step": 32192 - }, - { - "epoch": 2.4613796662652674, - "grad_norm": 0.0006788653554394841, - "learning_rate": 0.00019999701217181688, - "loss": 46.0, - "step": 32193 - }, - { - "epoch": 2.461456123248657, - "grad_norm": 0.0006716499337926507, - "learning_rate": 0.0001999970119861375, - "loss": 46.0, - "step": 32194 - }, - { - "epoch": 2.461532580232047, - "grad_norm": 0.0012442311272025108, - "learning_rate": 0.00019999701180045234, - "loss": 46.0, - "step": 32195 - }, - { - "epoch": 2.4616090372154367, - "grad_norm": 0.0007843896746635437, - "learning_rate": 0.00019999701161476142, - "loss": 46.0, - "step": 32196 - }, - { - "epoch": 2.4616854941988264, - "grad_norm": 0.0032779022585600615, - "learning_rate": 0.00019999701142906474, - "loss": 46.0, - "step": 32197 - }, - { - "epoch": 2.461761951182216, - "grad_norm": 0.0005988853517919779, - "learning_rate": 0.00019999701124336227, - "loss": 46.0, - "step": 32198 - }, - { - "epoch": 2.461838408165606, - "grad_norm": 0.0013059888733550906, - "learning_rate": 0.00019999701105765405, - "loss": 46.0, - "step": 32199 - }, - { - "epoch": 2.4619148651489957, - "grad_norm": 0.001087350188754499, - "learning_rate": 0.00019999701087194006, - "loss": 46.0, - "step": 32200 - }, - { - "epoch": 2.461991322132385, - "grad_norm": 0.0027107377536594868, - "learning_rate": 0.0001999970106862203, - "loss": 46.0, - "step": 32201 - }, - { - "epoch": 2.462067779115775, - "grad_norm": 0.0014383131638169289, - "learning_rate": 0.00019999701050049478, - "loss": 46.0, - "step": 32202 - }, - { - "epoch": 2.4621442360991646, - "grad_norm": 0.0004719188145827502, - "learning_rate": 0.00019999701031476346, - "loss": 46.0, - "step": 32203 - }, - { - "epoch": 2.4622206930825543, - "grad_norm": 0.0006269064615480602, - "learning_rate": 0.00019999701012902638, - "loss": 46.0, - "step": 32204 - }, - { - "epoch": 2.462297150065944, - "grad_norm": 0.0017841834342107177, - "learning_rate": 0.00019999700994328354, - "loss": 46.0, - "step": 32205 - }, - { - "epoch": 2.462373607049334, - "grad_norm": 0.000889175571501255, - "learning_rate": 0.00019999700975753494, - "loss": 46.0, - "step": 32206 - }, - { - "epoch": 2.4624500640327236, - "grad_norm": 0.0005687976372428238, - "learning_rate": 0.00019999700957178056, - "loss": 46.0, - "step": 32207 - }, - { - "epoch": 2.4625265210161134, - "grad_norm": 0.0019288725452497602, - "learning_rate": 0.0001999970093860204, - "loss": 46.0, - "step": 32208 - }, - { - "epoch": 2.462602977999503, - "grad_norm": 0.0008934037177823484, - "learning_rate": 0.0001999970092002545, - "loss": 46.0, - "step": 32209 - }, - { - "epoch": 2.462679434982893, - "grad_norm": 0.0010685424786061049, - "learning_rate": 0.0001999970090144828, - "loss": 46.0, - "step": 32210 - }, - { - "epoch": 2.4627558919662826, - "grad_norm": 0.0014368095435202122, - "learning_rate": 0.00019999700882870536, - "loss": 46.0, - "step": 32211 - }, - { - "epoch": 2.4628323489496724, - "grad_norm": 0.0029335259459912777, - "learning_rate": 0.00019999700864292212, - "loss": 46.0, - "step": 32212 - }, - { - "epoch": 2.4629088059330617, - "grad_norm": 0.0017619087593629956, - "learning_rate": 0.00019999700845713312, - "loss": 46.0, - "step": 32213 - }, - { - "epoch": 2.4629852629164515, - "grad_norm": 0.0010946551337838173, - "learning_rate": 0.00019999700827133836, - "loss": 46.0, - "step": 32214 - }, - { - "epoch": 2.4630617198998412, - "grad_norm": 0.002092518378049135, - "learning_rate": 0.00019999700808553782, - "loss": 46.0, - "step": 32215 - }, - { - "epoch": 2.463138176883231, - "grad_norm": 0.0003836005926132202, - "learning_rate": 0.00019999700789973153, - "loss": 46.0, - "step": 32216 - }, - { - "epoch": 2.4632146338666208, - "grad_norm": 0.0011468147858977318, - "learning_rate": 0.00019999700771391944, - "loss": 46.0, - "step": 32217 - }, - { - "epoch": 2.4632910908500105, - "grad_norm": 0.0011520790867507458, - "learning_rate": 0.00019999700752810159, - "loss": 46.0, - "step": 32218 - }, - { - "epoch": 2.4633675478334003, - "grad_norm": 0.0026980803813785315, - "learning_rate": 0.000199997007342278, - "loss": 46.0, - "step": 32219 - }, - { - "epoch": 2.46344400481679, - "grad_norm": 0.002163398778066039, - "learning_rate": 0.00019999700715644863, - "loss": 46.0, - "step": 32220 - }, - { - "epoch": 2.46352046180018, - "grad_norm": 0.002358921803534031, - "learning_rate": 0.00019999700697061347, - "loss": 46.0, - "step": 32221 - }, - { - "epoch": 2.4635969187835696, - "grad_norm": 0.0031822770833969116, - "learning_rate": 0.00019999700678477255, - "loss": 46.0, - "step": 32222 - }, - { - "epoch": 2.463673375766959, - "grad_norm": 0.0005781924701295793, - "learning_rate": 0.00019999700659892585, - "loss": 46.0, - "step": 32223 - }, - { - "epoch": 2.4637498327503486, - "grad_norm": 0.001497967285104096, - "learning_rate": 0.0001999970064130734, - "loss": 46.0, - "step": 32224 - }, - { - "epoch": 2.4638262897337384, - "grad_norm": 0.0009857508121058345, - "learning_rate": 0.00019999700622721518, - "loss": 46.0, - "step": 32225 - }, - { - "epoch": 2.463902746717128, - "grad_norm": 0.0014936871593818069, - "learning_rate": 0.0001999970060413512, - "loss": 46.0, - "step": 32226 - }, - { - "epoch": 2.463979203700518, - "grad_norm": 0.0029334549326449633, - "learning_rate": 0.00019999700585548143, - "loss": 46.0, - "step": 32227 - }, - { - "epoch": 2.4640556606839077, - "grad_norm": 0.0009575043222866952, - "learning_rate": 0.0001999970056696059, - "loss": 46.0, - "step": 32228 - }, - { - "epoch": 2.4641321176672975, - "grad_norm": 0.0009826376335695386, - "learning_rate": 0.00019999700548372457, - "loss": 46.0, - "step": 32229 - }, - { - "epoch": 2.464208574650687, - "grad_norm": 0.0008980318089015782, - "learning_rate": 0.00019999700529783752, - "loss": 46.0, - "step": 32230 - }, - { - "epoch": 2.464285031634077, - "grad_norm": 0.0011363662779331207, - "learning_rate": 0.00019999700511194468, - "loss": 46.0, - "step": 32231 - }, - { - "epoch": 2.4643614886174667, - "grad_norm": 0.000897760153748095, - "learning_rate": 0.00019999700492604605, - "loss": 46.0, - "step": 32232 - }, - { - "epoch": 2.4644379456008565, - "grad_norm": 0.001164128421805799, - "learning_rate": 0.0001999970047401417, - "loss": 46.0, - "step": 32233 - }, - { - "epoch": 2.4645144025842463, - "grad_norm": 0.0026480199303478003, - "learning_rate": 0.00019999700455423152, - "loss": 46.0, - "step": 32234 - }, - { - "epoch": 2.4645908595676356, - "grad_norm": 0.0019794886466115713, - "learning_rate": 0.00019999700436831562, - "loss": 46.0, - "step": 32235 - }, - { - "epoch": 2.4646673165510253, - "grad_norm": 0.001332507119514048, - "learning_rate": 0.00019999700418239392, - "loss": 46.0, - "step": 32236 - }, - { - "epoch": 2.464743773534415, - "grad_norm": 0.001517377095296979, - "learning_rate": 0.00019999700399646645, - "loss": 46.0, - "step": 32237 - }, - { - "epoch": 2.464820230517805, - "grad_norm": 0.0037037094589322805, - "learning_rate": 0.00019999700381053323, - "loss": 46.0, - "step": 32238 - }, - { - "epoch": 2.4648966875011946, - "grad_norm": 0.0012010071659460664, - "learning_rate": 0.00019999700362459424, - "loss": 46.0, - "step": 32239 - }, - { - "epoch": 2.4649731444845844, - "grad_norm": 0.005148399155586958, - "learning_rate": 0.00019999700343864947, - "loss": 46.0, - "step": 32240 - }, - { - "epoch": 2.465049601467974, - "grad_norm": 0.0004465868987608701, - "learning_rate": 0.00019999700325269894, - "loss": 46.0, - "step": 32241 - }, - { - "epoch": 2.465126058451364, - "grad_norm": 0.0009997423039749265, - "learning_rate": 0.00019999700306674262, - "loss": 46.0, - "step": 32242 - }, - { - "epoch": 2.4652025154347537, - "grad_norm": 0.001208460540510714, - "learning_rate": 0.00019999700288078056, - "loss": 46.0, - "step": 32243 - }, - { - "epoch": 2.4652789724181434, - "grad_norm": 0.0018073919927701354, - "learning_rate": 0.0001999970026948127, - "loss": 46.0, - "step": 32244 - }, - { - "epoch": 2.4653554294015327, - "grad_norm": 0.0026139733381569386, - "learning_rate": 0.0001999970025088391, - "loss": 46.0, - "step": 32245 - }, - { - "epoch": 2.4654318863849225, - "grad_norm": 0.0010302651207894087, - "learning_rate": 0.00019999700232285972, - "loss": 46.0, - "step": 32246 - }, - { - "epoch": 2.4655083433683123, - "grad_norm": 0.0005748834810219705, - "learning_rate": 0.00019999700213687457, - "loss": 46.0, - "step": 32247 - }, - { - "epoch": 2.465584800351702, - "grad_norm": 0.0008782865479588509, - "learning_rate": 0.00019999700195088367, - "loss": 46.0, - "step": 32248 - }, - { - "epoch": 2.465661257335092, - "grad_norm": 0.0010622115805745125, - "learning_rate": 0.00019999700176488697, - "loss": 46.0, - "step": 32249 - }, - { - "epoch": 2.4657377143184815, - "grad_norm": 0.0006868946366012096, - "learning_rate": 0.0001999970015788845, - "loss": 46.0, - "step": 32250 - }, - { - "epoch": 2.4658141713018713, - "grad_norm": 0.0005327576072886586, - "learning_rate": 0.00019999700139287628, - "loss": 46.0, - "step": 32251 - }, - { - "epoch": 2.465890628285261, - "grad_norm": 0.0008218061411753297, - "learning_rate": 0.0001999970012068623, - "loss": 46.0, - "step": 32252 - }, - { - "epoch": 2.465967085268651, - "grad_norm": 0.0008427886059507728, - "learning_rate": 0.0001999970010208425, - "loss": 46.0, - "step": 32253 - }, - { - "epoch": 2.4660435422520406, - "grad_norm": 0.0016122242668643594, - "learning_rate": 0.00019999700083481696, - "loss": 46.0, - "step": 32254 - }, - { - "epoch": 2.4661199992354303, - "grad_norm": 0.0029113860800862312, - "learning_rate": 0.00019999700064878568, - "loss": 46.0, - "step": 32255 - }, - { - "epoch": 2.46619645621882, - "grad_norm": 0.0021183029748499393, - "learning_rate": 0.0001999970004627486, - "loss": 46.0, - "step": 32256 - }, - { - "epoch": 2.4662729132022094, - "grad_norm": 0.0018390774494037032, - "learning_rate": 0.00019999700027670576, - "loss": 46.0, - "step": 32257 - }, - { - "epoch": 2.466349370185599, - "grad_norm": 0.002160990610718727, - "learning_rate": 0.00019999700009065713, - "loss": 46.0, - "step": 32258 - }, - { - "epoch": 2.466425827168989, - "grad_norm": 0.0007847305969335139, - "learning_rate": 0.00019999699990460275, - "loss": 46.0, - "step": 32259 - }, - { - "epoch": 2.4665022841523787, - "grad_norm": 0.0011799189960584044, - "learning_rate": 0.0001999969997185426, - "loss": 46.0, - "step": 32260 - }, - { - "epoch": 2.4665787411357685, - "grad_norm": 0.0003658886707853526, - "learning_rate": 0.00019999699953247667, - "loss": 46.0, - "step": 32261 - }, - { - "epoch": 2.4666551981191582, - "grad_norm": 0.000489134166855365, - "learning_rate": 0.000199996999346405, - "loss": 46.0, - "step": 32262 - }, - { - "epoch": 2.466731655102548, - "grad_norm": 0.004200296476483345, - "learning_rate": 0.00019999699916032753, - "loss": 46.0, - "step": 32263 - }, - { - "epoch": 2.4668081120859378, - "grad_norm": 0.0016868250677362084, - "learning_rate": 0.0001999969989742443, - "loss": 46.0, - "step": 32264 - }, - { - "epoch": 2.4668845690693275, - "grad_norm": 0.0009139256435446441, - "learning_rate": 0.00019999699878815532, - "loss": 46.0, - "step": 32265 - }, - { - "epoch": 2.4669610260527173, - "grad_norm": 0.0009249647846445441, - "learning_rate": 0.00019999699860206053, - "loss": 46.0, - "step": 32266 - }, - { - "epoch": 2.4670374830361066, - "grad_norm": 0.0003955393913201988, - "learning_rate": 0.00019999699841596002, - "loss": 46.0, - "step": 32267 - }, - { - "epoch": 2.4671139400194964, - "grad_norm": 0.0006288831355050206, - "learning_rate": 0.0001999969982298537, - "loss": 46.0, - "step": 32268 - }, - { - "epoch": 2.467190397002886, - "grad_norm": 0.00339855276979506, - "learning_rate": 0.00019999699804374165, - "loss": 46.0, - "step": 32269 - }, - { - "epoch": 2.467266853986276, - "grad_norm": 0.0021211400162428617, - "learning_rate": 0.0001999969978576238, - "loss": 46.0, - "step": 32270 - }, - { - "epoch": 2.4673433109696656, - "grad_norm": 0.00104645942337811, - "learning_rate": 0.00019999699767150018, - "loss": 46.0, - "step": 32271 - }, - { - "epoch": 2.4674197679530554, - "grad_norm": 0.0008200169540941715, - "learning_rate": 0.00019999699748537078, - "loss": 46.0, - "step": 32272 - }, - { - "epoch": 2.467496224936445, - "grad_norm": 0.0027899937704205513, - "learning_rate": 0.00019999699729923563, - "loss": 46.0, - "step": 32273 - }, - { - "epoch": 2.467572681919835, - "grad_norm": 0.003018836723640561, - "learning_rate": 0.00019999699711309473, - "loss": 46.0, - "step": 32274 - }, - { - "epoch": 2.4676491389032247, - "grad_norm": 0.002071422291919589, - "learning_rate": 0.00019999699692694803, - "loss": 46.0, - "step": 32275 - }, - { - "epoch": 2.4677255958866144, - "grad_norm": 0.0008218807051889598, - "learning_rate": 0.00019999699674079556, - "loss": 46.0, - "step": 32276 - }, - { - "epoch": 2.467802052870004, - "grad_norm": 0.0008420305675826967, - "learning_rate": 0.00019999699655463731, - "loss": 46.0, - "step": 32277 - }, - { - "epoch": 2.467878509853394, - "grad_norm": 0.002019083360210061, - "learning_rate": 0.00019999699636847335, - "loss": 46.0, - "step": 32278 - }, - { - "epoch": 2.4679549668367833, - "grad_norm": 0.0021862878929823637, - "learning_rate": 0.00019999699618230356, - "loss": 46.0, - "step": 32279 - }, - { - "epoch": 2.468031423820173, - "grad_norm": 0.004516915418207645, - "learning_rate": 0.00019999699599612805, - "loss": 46.0, - "step": 32280 - }, - { - "epoch": 2.468107880803563, - "grad_norm": 0.0007316782139241695, - "learning_rate": 0.00019999699580994674, - "loss": 46.0, - "step": 32281 - }, - { - "epoch": 2.4681843377869526, - "grad_norm": 0.0011412501335144043, - "learning_rate": 0.00019999699562375965, - "loss": 46.0, - "step": 32282 - }, - { - "epoch": 2.4682607947703423, - "grad_norm": 0.0019098496995866299, - "learning_rate": 0.0001999969954375668, - "loss": 46.0, - "step": 32283 - }, - { - "epoch": 2.468337251753732, - "grad_norm": 0.0008229966042563319, - "learning_rate": 0.0001999969952513682, - "loss": 46.0, - "step": 32284 - }, - { - "epoch": 2.468413708737122, - "grad_norm": 0.0008402031962759793, - "learning_rate": 0.0001999969950651638, - "loss": 46.0, - "step": 32285 - }, - { - "epoch": 2.4684901657205116, - "grad_norm": 0.001198038924485445, - "learning_rate": 0.00019999699487895363, - "loss": 46.0, - "step": 32286 - }, - { - "epoch": 2.4685666227039014, - "grad_norm": 0.002581742126494646, - "learning_rate": 0.00019999699469273774, - "loss": 46.0, - "step": 32287 - }, - { - "epoch": 2.468643079687291, - "grad_norm": 0.0008640863816253841, - "learning_rate": 0.00019999699450651607, - "loss": 46.0, - "step": 32288 - }, - { - "epoch": 2.4687195366706804, - "grad_norm": 0.0006658991915173829, - "learning_rate": 0.00019999699432028857, - "loss": 46.0, - "step": 32289 - }, - { - "epoch": 2.46879599365407, - "grad_norm": 0.0011746136005967855, - "learning_rate": 0.00019999699413405535, - "loss": 46.0, - "step": 32290 - }, - { - "epoch": 2.46887245063746, - "grad_norm": 0.0010299982968717813, - "learning_rate": 0.00019999699394781636, - "loss": 46.0, - "step": 32291 - }, - { - "epoch": 2.4689489076208497, - "grad_norm": 0.0011975272791460156, - "learning_rate": 0.0001999969937615716, - "loss": 46.0, - "step": 32292 - }, - { - "epoch": 2.4690253646042395, - "grad_norm": 0.0013336162082850933, - "learning_rate": 0.00019999699357532106, - "loss": 46.0, - "step": 32293 - }, - { - "epoch": 2.4691018215876293, - "grad_norm": 0.0005860582459717989, - "learning_rate": 0.00019999699338906475, - "loss": 46.0, - "step": 32294 - }, - { - "epoch": 2.469178278571019, - "grad_norm": 0.0009031447698362172, - "learning_rate": 0.00019999699320280266, - "loss": 46.0, - "step": 32295 - }, - { - "epoch": 2.4692547355544088, - "grad_norm": 0.0010756520787253976, - "learning_rate": 0.0001999969930165348, - "loss": 46.0, - "step": 32296 - }, - { - "epoch": 2.4693311925377985, - "grad_norm": 0.0005827349959872663, - "learning_rate": 0.0001999969928302612, - "loss": 46.0, - "step": 32297 - }, - { - "epoch": 2.4694076495211883, - "grad_norm": 0.0012503070756793022, - "learning_rate": 0.00019999699264398182, - "loss": 46.0, - "step": 32298 - }, - { - "epoch": 2.469484106504578, - "grad_norm": 0.0012438365956768394, - "learning_rate": 0.00019999699245769667, - "loss": 46.0, - "step": 32299 - }, - { - "epoch": 2.469560563487968, - "grad_norm": 0.0010974142933264375, - "learning_rate": 0.00019999699227140575, - "loss": 46.0, - "step": 32300 - }, - { - "epoch": 2.469637020471357, - "grad_norm": 0.0007490361458621919, - "learning_rate": 0.00019999699208510908, - "loss": 46.0, - "step": 32301 - }, - { - "epoch": 2.469713477454747, - "grad_norm": 0.0009076794958673418, - "learning_rate": 0.00019999699189880658, - "loss": 46.0, - "step": 32302 - }, - { - "epoch": 2.4697899344381367, - "grad_norm": 0.004136491566896439, - "learning_rate": 0.00019999699171249836, - "loss": 46.0, - "step": 32303 - }, - { - "epoch": 2.4698663914215264, - "grad_norm": 0.0006584388902410865, - "learning_rate": 0.00019999699152618437, - "loss": 46.0, - "step": 32304 - }, - { - "epoch": 2.469942848404916, - "grad_norm": 0.005605303682386875, - "learning_rate": 0.00019999699133986458, - "loss": 46.0, - "step": 32305 - }, - { - "epoch": 2.470019305388306, - "grad_norm": 0.0013109392020851374, - "learning_rate": 0.00019999699115353905, - "loss": 46.0, - "step": 32306 - }, - { - "epoch": 2.4700957623716957, - "grad_norm": 0.0028625130653381348, - "learning_rate": 0.00019999699096720773, - "loss": 46.0, - "step": 32307 - }, - { - "epoch": 2.4701722193550855, - "grad_norm": 0.0007773717516101897, - "learning_rate": 0.00019999699078087065, - "loss": 46.0, - "step": 32308 - }, - { - "epoch": 2.4702486763384752, - "grad_norm": 0.0008473945199511945, - "learning_rate": 0.00019999699059452782, - "loss": 46.0, - "step": 32309 - }, - { - "epoch": 2.4703251333218645, - "grad_norm": 0.0017666032072156668, - "learning_rate": 0.00019999699040817922, - "loss": 46.0, - "step": 32310 - }, - { - "epoch": 2.4704015903052543, - "grad_norm": 0.0006012325175106525, - "learning_rate": 0.00019999699022182484, - "loss": 46.0, - "step": 32311 - }, - { - "epoch": 2.470478047288644, - "grad_norm": 0.0017275470308959484, - "learning_rate": 0.00019999699003546466, - "loss": 46.0, - "step": 32312 - }, - { - "epoch": 2.470554504272034, - "grad_norm": 0.0014314352301880717, - "learning_rate": 0.00019999698984909877, - "loss": 46.0, - "step": 32313 - }, - { - "epoch": 2.4706309612554236, - "grad_norm": 0.0008748232503421605, - "learning_rate": 0.00019999698966272704, - "loss": 46.0, - "step": 32314 - }, - { - "epoch": 2.4707074182388133, - "grad_norm": 0.0006330252508632839, - "learning_rate": 0.0001999969894763496, - "loss": 46.0, - "step": 32315 - }, - { - "epoch": 2.470783875222203, - "grad_norm": 0.0024749613367021084, - "learning_rate": 0.00019999698928996638, - "loss": 46.0, - "step": 32316 - }, - { - "epoch": 2.470860332205593, - "grad_norm": 0.0007952415617182851, - "learning_rate": 0.00019999698910357736, - "loss": 46.0, - "step": 32317 - }, - { - "epoch": 2.4709367891889826, - "grad_norm": 0.0006256563356146216, - "learning_rate": 0.0001999969889171826, - "loss": 46.0, - "step": 32318 - }, - { - "epoch": 2.4710132461723724, - "grad_norm": 0.0018135980935767293, - "learning_rate": 0.00019999698873078206, - "loss": 46.0, - "step": 32319 - }, - { - "epoch": 2.471089703155762, - "grad_norm": 0.0026998789981007576, - "learning_rate": 0.00019999698854437575, - "loss": 46.0, - "step": 32320 - }, - { - "epoch": 2.471166160139152, - "grad_norm": 0.0007628350285813212, - "learning_rate": 0.00019999698835796367, - "loss": 46.0, - "step": 32321 - }, - { - "epoch": 2.4712426171225417, - "grad_norm": 0.0013211965560913086, - "learning_rate": 0.0001999969881715458, - "loss": 46.0, - "step": 32322 - }, - { - "epoch": 2.471319074105931, - "grad_norm": 0.0022015448193997145, - "learning_rate": 0.0001999969879851222, - "loss": 46.0, - "step": 32323 - }, - { - "epoch": 2.4713955310893208, - "grad_norm": 0.0012848052429035306, - "learning_rate": 0.00019999698779869283, - "loss": 46.0, - "step": 32324 - }, - { - "epoch": 2.4714719880727105, - "grad_norm": 0.0006338043604046106, - "learning_rate": 0.00019999698761225768, - "loss": 46.0, - "step": 32325 - }, - { - "epoch": 2.4715484450561003, - "grad_norm": 0.0014433825854212046, - "learning_rate": 0.00019999698742581673, - "loss": 46.0, - "step": 32326 - }, - { - "epoch": 2.47162490203949, - "grad_norm": 0.001359913614578545, - "learning_rate": 0.00019999698723937006, - "loss": 46.0, - "step": 32327 - }, - { - "epoch": 2.47170135902288, - "grad_norm": 0.0005980661371722817, - "learning_rate": 0.0001999969870529176, - "loss": 46.0, - "step": 32328 - }, - { - "epoch": 2.4717778160062696, - "grad_norm": 0.004556672647595406, - "learning_rate": 0.00019999698686645935, - "loss": 46.0, - "step": 32329 - }, - { - "epoch": 2.4718542729896593, - "grad_norm": 0.0006020395085215569, - "learning_rate": 0.00019999698667999536, - "loss": 46.0, - "step": 32330 - }, - { - "epoch": 2.471930729973049, - "grad_norm": 0.0012320332461968064, - "learning_rate": 0.0001999969864935256, - "loss": 46.0, - "step": 32331 - }, - { - "epoch": 2.4720071869564384, - "grad_norm": 0.0012373302597552538, - "learning_rate": 0.00019999698630705006, - "loss": 46.0, - "step": 32332 - }, - { - "epoch": 2.472083643939828, - "grad_norm": 0.0025837216526269913, - "learning_rate": 0.00019999698612056875, - "loss": 46.0, - "step": 32333 - }, - { - "epoch": 2.472160100923218, - "grad_norm": 0.0013923693913966417, - "learning_rate": 0.00019999698593408164, - "loss": 46.0, - "step": 32334 - }, - { - "epoch": 2.4722365579066077, - "grad_norm": 0.0004421780467964709, - "learning_rate": 0.00019999698574758883, - "loss": 46.0, - "step": 32335 - }, - { - "epoch": 2.4723130148899974, - "grad_norm": 0.001366337644867599, - "learning_rate": 0.00019999698556109018, - "loss": 46.0, - "step": 32336 - }, - { - "epoch": 2.472389471873387, - "grad_norm": 0.0003482487518340349, - "learning_rate": 0.00019999698537458583, - "loss": 46.0, - "step": 32337 - }, - { - "epoch": 2.472465928856777, - "grad_norm": 0.004511781502515078, - "learning_rate": 0.00019999698518807565, - "loss": 46.0, - "step": 32338 - }, - { - "epoch": 2.4725423858401667, - "grad_norm": 0.0009244129760190845, - "learning_rate": 0.00019999698500155973, - "loss": 46.0, - "step": 32339 - }, - { - "epoch": 2.4726188428235565, - "grad_norm": 0.0006865226314403117, - "learning_rate": 0.00019999698481503803, - "loss": 46.0, - "step": 32340 - }, - { - "epoch": 2.4726952998069462, - "grad_norm": 0.0011483271373435855, - "learning_rate": 0.00019999698462851056, - "loss": 46.0, - "step": 32341 - }, - { - "epoch": 2.472771756790336, - "grad_norm": 0.001501023885793984, - "learning_rate": 0.00019999698444197735, - "loss": 46.0, - "step": 32342 - }, - { - "epoch": 2.4728482137737258, - "grad_norm": 0.0008674286073073745, - "learning_rate": 0.00019999698425543833, - "loss": 46.0, - "step": 32343 - }, - { - "epoch": 2.472924670757115, - "grad_norm": 0.0015875682001933455, - "learning_rate": 0.00019999698406889357, - "loss": 46.0, - "step": 32344 - }, - { - "epoch": 2.473001127740505, - "grad_norm": 0.0008716161246411502, - "learning_rate": 0.00019999698388234303, - "loss": 46.0, - "step": 32345 - }, - { - "epoch": 2.4730775847238946, - "grad_norm": 0.009208821691572666, - "learning_rate": 0.00019999698369578672, - "loss": 46.0, - "step": 32346 - }, - { - "epoch": 2.4731540417072844, - "grad_norm": 0.002032272983342409, - "learning_rate": 0.00019999698350922464, - "loss": 46.0, - "step": 32347 - }, - { - "epoch": 2.473230498690674, - "grad_norm": 0.0009493536781519651, - "learning_rate": 0.0001999969833226568, - "loss": 46.0, - "step": 32348 - }, - { - "epoch": 2.473306955674064, - "grad_norm": 0.0016980364453047514, - "learning_rate": 0.00019999698313608318, - "loss": 46.0, - "step": 32349 - }, - { - "epoch": 2.4733834126574537, - "grad_norm": 0.003632499137893319, - "learning_rate": 0.00019999698294950377, - "loss": 46.0, - "step": 32350 - }, - { - "epoch": 2.4734598696408434, - "grad_norm": 0.0018604836659505963, - "learning_rate": 0.00019999698276291863, - "loss": 46.0, - "step": 32351 - }, - { - "epoch": 2.473536326624233, - "grad_norm": 0.0009248324204236269, - "learning_rate": 0.0001999969825763277, - "loss": 46.0, - "step": 32352 - }, - { - "epoch": 2.473612783607623, - "grad_norm": 0.0007362568285316229, - "learning_rate": 0.000199996982389731, - "loss": 46.0, - "step": 32353 - }, - { - "epoch": 2.4736892405910123, - "grad_norm": 0.0016131887678056955, - "learning_rate": 0.00019999698220312854, - "loss": 46.0, - "step": 32354 - }, - { - "epoch": 2.473765697574402, - "grad_norm": 0.002820591675117612, - "learning_rate": 0.0001999969820165203, - "loss": 46.0, - "step": 32355 - }, - { - "epoch": 2.4738421545577918, - "grad_norm": 0.002857802202925086, - "learning_rate": 0.0001999969818299063, - "loss": 46.0, - "step": 32356 - }, - { - "epoch": 2.4739186115411815, - "grad_norm": 0.0010428531095385551, - "learning_rate": 0.00019999698164328654, - "loss": 46.0, - "step": 32357 - }, - { - "epoch": 2.4739950685245713, - "grad_norm": 0.001030318788252771, - "learning_rate": 0.00019999698145666098, - "loss": 46.0, - "step": 32358 - }, - { - "epoch": 2.474071525507961, - "grad_norm": 0.016375398263335228, - "learning_rate": 0.0001999969812700297, - "loss": 46.0, - "step": 32359 - }, - { - "epoch": 2.474147982491351, - "grad_norm": 0.0018805437721312046, - "learning_rate": 0.0001999969810833926, - "loss": 46.0, - "step": 32360 - }, - { - "epoch": 2.4742244394747406, - "grad_norm": 0.001070512575097382, - "learning_rate": 0.00019999698089674976, - "loss": 46.0, - "step": 32361 - }, - { - "epoch": 2.4743008964581303, - "grad_norm": 0.002054716693237424, - "learning_rate": 0.00019999698071010116, - "loss": 46.0, - "step": 32362 - }, - { - "epoch": 2.47437735344152, - "grad_norm": 0.0006869182689115405, - "learning_rate": 0.00019999698052344675, - "loss": 46.0, - "step": 32363 - }, - { - "epoch": 2.47445381042491, - "grad_norm": 0.0006861745496280491, - "learning_rate": 0.0001999969803367866, - "loss": 46.0, - "step": 32364 - }, - { - "epoch": 2.4745302674082996, - "grad_norm": 0.0037119807675480843, - "learning_rate": 0.00019999698015012068, - "loss": 46.0, - "step": 32365 - }, - { - "epoch": 2.474606724391689, - "grad_norm": 0.001989027950912714, - "learning_rate": 0.00019999697996344898, - "loss": 46.0, - "step": 32366 - }, - { - "epoch": 2.4746831813750787, - "grad_norm": 0.001872391439974308, - "learning_rate": 0.00019999697977677152, - "loss": 46.0, - "step": 32367 - }, - { - "epoch": 2.4747596383584685, - "grad_norm": 0.0007466906099580228, - "learning_rate": 0.00019999697959008827, - "loss": 46.0, - "step": 32368 - }, - { - "epoch": 2.474836095341858, - "grad_norm": 0.002406191546469927, - "learning_rate": 0.00019999697940339928, - "loss": 46.0, - "step": 32369 - }, - { - "epoch": 2.474912552325248, - "grad_norm": 0.0009625056991353631, - "learning_rate": 0.0001999969792167045, - "loss": 46.0, - "step": 32370 - }, - { - "epoch": 2.4749890093086377, - "grad_norm": 0.0009684687829576433, - "learning_rate": 0.00019999697903000396, - "loss": 46.0, - "step": 32371 - }, - { - "epoch": 2.4750654662920275, - "grad_norm": 0.0006879267748445272, - "learning_rate": 0.00019999697884329765, - "loss": 46.0, - "step": 32372 - }, - { - "epoch": 2.4751419232754173, - "grad_norm": 0.0007508319686166942, - "learning_rate": 0.00019999697865658557, - "loss": 46.0, - "step": 32373 - }, - { - "epoch": 2.475218380258807, - "grad_norm": 0.0012423944426700473, - "learning_rate": 0.00019999697846986774, - "loss": 46.0, - "step": 32374 - }, - { - "epoch": 2.475294837242197, - "grad_norm": 0.001323615200817585, - "learning_rate": 0.0001999969782831441, - "loss": 46.0, - "step": 32375 - }, - { - "epoch": 2.475371294225586, - "grad_norm": 0.0038680562283843756, - "learning_rate": 0.00019999697809641473, - "loss": 46.0, - "step": 32376 - }, - { - "epoch": 2.475447751208976, - "grad_norm": 0.000774978194385767, - "learning_rate": 0.00019999697790967959, - "loss": 46.0, - "step": 32377 - }, - { - "epoch": 2.4755242081923656, - "grad_norm": 0.0005598383140750229, - "learning_rate": 0.00019999697772293864, - "loss": 46.0, - "step": 32378 - }, - { - "epoch": 2.4756006651757554, - "grad_norm": 0.0015458286507055163, - "learning_rate": 0.00019999697753619194, - "loss": 46.0, - "step": 32379 - }, - { - "epoch": 2.475677122159145, - "grad_norm": 0.0006524888449348509, - "learning_rate": 0.00019999697734943947, - "loss": 46.0, - "step": 32380 - }, - { - "epoch": 2.475753579142535, - "grad_norm": 0.00337987975217402, - "learning_rate": 0.00019999697716268123, - "loss": 46.0, - "step": 32381 - }, - { - "epoch": 2.4758300361259247, - "grad_norm": 0.0012290806043893099, - "learning_rate": 0.00019999697697591724, - "loss": 46.0, - "step": 32382 - }, - { - "epoch": 2.4759064931093144, - "grad_norm": 0.0008876342908479273, - "learning_rate": 0.00019999697678914745, - "loss": 46.0, - "step": 32383 - }, - { - "epoch": 2.475982950092704, - "grad_norm": 0.0016609154408797622, - "learning_rate": 0.00019999697660237194, - "loss": 46.0, - "step": 32384 - }, - { - "epoch": 2.476059407076094, - "grad_norm": 0.005838238634169102, - "learning_rate": 0.0001999969764155906, - "loss": 46.0, - "step": 32385 - }, - { - "epoch": 2.4761358640594837, - "grad_norm": 0.004494909197092056, - "learning_rate": 0.00019999697622880353, - "loss": 46.0, - "step": 32386 - }, - { - "epoch": 2.4762123210428735, - "grad_norm": 0.0010775546543300152, - "learning_rate": 0.0001999969760420107, - "loss": 46.0, - "step": 32387 - }, - { - "epoch": 2.476288778026263, - "grad_norm": 0.0007386936922557652, - "learning_rate": 0.00019999697585521207, - "loss": 46.0, - "step": 32388 - }, - { - "epoch": 2.4763652350096526, - "grad_norm": 0.0006629854906350374, - "learning_rate": 0.00019999697566840767, - "loss": 46.0, - "step": 32389 - }, - { - "epoch": 2.4764416919930423, - "grad_norm": 0.0007553491741418839, - "learning_rate": 0.00019999697548159752, - "loss": 46.0, - "step": 32390 - }, - { - "epoch": 2.476518148976432, - "grad_norm": 0.0013514121528714895, - "learning_rate": 0.0001999969752947816, - "loss": 46.0, - "step": 32391 - }, - { - "epoch": 2.476594605959822, - "grad_norm": 0.000956163217779249, - "learning_rate": 0.0001999969751079599, - "loss": 46.0, - "step": 32392 - }, - { - "epoch": 2.4766710629432116, - "grad_norm": 0.0015494560357183218, - "learning_rate": 0.00019999697492113243, - "loss": 46.0, - "step": 32393 - }, - { - "epoch": 2.4767475199266014, - "grad_norm": 0.0021596322767436504, - "learning_rate": 0.0001999969747342992, - "loss": 46.0, - "step": 32394 - }, - { - "epoch": 2.476823976909991, - "grad_norm": 0.003033431014046073, - "learning_rate": 0.00019999697454746017, - "loss": 46.0, - "step": 32395 - }, - { - "epoch": 2.476900433893381, - "grad_norm": 0.0135241923853755, - "learning_rate": 0.0001999969743606154, - "loss": 46.0, - "step": 32396 - }, - { - "epoch": 2.4769768908767706, - "grad_norm": 0.0014106598682701588, - "learning_rate": 0.00019999697417376488, - "loss": 46.0, - "step": 32397 - }, - { - "epoch": 2.47705334786016, - "grad_norm": 0.0006915717967785895, - "learning_rate": 0.00019999697398690857, - "loss": 46.0, - "step": 32398 - }, - { - "epoch": 2.4771298048435497, - "grad_norm": 0.0007915030582807958, - "learning_rate": 0.0001999969738000465, - "loss": 46.0, - "step": 32399 - }, - { - "epoch": 2.4772062618269395, - "grad_norm": 0.0016740431310608983, - "learning_rate": 0.00019999697361317863, - "loss": 46.0, - "step": 32400 - }, - { - "epoch": 2.4772827188103292, - "grad_norm": 0.0011863181134685874, - "learning_rate": 0.000199996973426305, - "loss": 46.0, - "step": 32401 - }, - { - "epoch": 2.477359175793719, - "grad_norm": 0.0049256556667387486, - "learning_rate": 0.0001999969732394256, - "loss": 46.0, - "step": 32402 - }, - { - "epoch": 2.4774356327771088, - "grad_norm": 0.008531459607183933, - "learning_rate": 0.00019999697305254045, - "loss": 46.0, - "step": 32403 - }, - { - "epoch": 2.4775120897604985, - "grad_norm": 0.0011433891486376524, - "learning_rate": 0.00019999697286564953, - "loss": 46.0, - "step": 32404 - }, - { - "epoch": 2.4775885467438883, - "grad_norm": 0.0006535463035106659, - "learning_rate": 0.00019999697267875284, - "loss": 46.0, - "step": 32405 - }, - { - "epoch": 2.477665003727278, - "grad_norm": 0.0004138807416893542, - "learning_rate": 0.00019999697249185037, - "loss": 46.0, - "step": 32406 - }, - { - "epoch": 2.477741460710668, - "grad_norm": 0.0014902279945090413, - "learning_rate": 0.00019999697230494215, - "loss": 46.0, - "step": 32407 - }, - { - "epoch": 2.4778179176940576, - "grad_norm": 0.0012376923114061356, - "learning_rate": 0.00019999697211802814, - "loss": 46.0, - "step": 32408 - }, - { - "epoch": 2.4778943746774473, - "grad_norm": 0.0007314681424759328, - "learning_rate": 0.00019999697193110835, - "loss": 46.0, - "step": 32409 - }, - { - "epoch": 2.4779708316608366, - "grad_norm": 0.00043874586117453873, - "learning_rate": 0.00019999697174418282, - "loss": 46.0, - "step": 32410 - }, - { - "epoch": 2.4780472886442264, - "grad_norm": 0.0008751305867917836, - "learning_rate": 0.0001999969715572515, - "loss": 46.0, - "step": 32411 - }, - { - "epoch": 2.478123745627616, - "grad_norm": 0.0021014243829995394, - "learning_rate": 0.00019999697137031443, - "loss": 46.0, - "step": 32412 - }, - { - "epoch": 2.478200202611006, - "grad_norm": 0.0017999489791691303, - "learning_rate": 0.00019999697118337157, - "loss": 46.0, - "step": 32413 - }, - { - "epoch": 2.4782766595943957, - "grad_norm": 0.0012681579682976007, - "learning_rate": 0.00019999697099642294, - "loss": 46.0, - "step": 32414 - }, - { - "epoch": 2.4783531165777855, - "grad_norm": 0.005125392694026232, - "learning_rate": 0.00019999697080946854, - "loss": 46.0, - "step": 32415 - }, - { - "epoch": 2.478429573561175, - "grad_norm": 0.0006486070342361927, - "learning_rate": 0.0001999969706225084, - "loss": 46.0, - "step": 32416 - }, - { - "epoch": 2.478506030544565, - "grad_norm": 0.0009953194530680776, - "learning_rate": 0.00019999697043554247, - "loss": 46.0, - "step": 32417 - }, - { - "epoch": 2.4785824875279547, - "grad_norm": 0.0013166008284315467, - "learning_rate": 0.00019999697024857075, - "loss": 46.0, - "step": 32418 - }, - { - "epoch": 2.4786589445113445, - "grad_norm": 0.0005559337441809475, - "learning_rate": 0.0001999969700615933, - "loss": 46.0, - "step": 32419 - }, - { - "epoch": 2.478735401494734, - "grad_norm": 0.001008525025099516, - "learning_rate": 0.00019999696987461007, - "loss": 46.0, - "step": 32420 - }, - { - "epoch": 2.4788118584781236, - "grad_norm": 0.0012607964454218745, - "learning_rate": 0.00019999696968762108, - "loss": 46.0, - "step": 32421 - }, - { - "epoch": 2.4788883154615133, - "grad_norm": 0.0005901282420381904, - "learning_rate": 0.0001999969695006263, - "loss": 46.0, - "step": 32422 - }, - { - "epoch": 2.478964772444903, - "grad_norm": 0.0042494540102779865, - "learning_rate": 0.00019999696931362576, - "loss": 46.0, - "step": 32423 - }, - { - "epoch": 2.479041229428293, - "grad_norm": 0.0010255294619128108, - "learning_rate": 0.00019999696912661942, - "loss": 46.0, - "step": 32424 - }, - { - "epoch": 2.4791176864116826, - "grad_norm": 0.0007286560721695423, - "learning_rate": 0.00019999696893960734, - "loss": 46.0, - "step": 32425 - }, - { - "epoch": 2.4791941433950724, - "grad_norm": 0.0015877713449299335, - "learning_rate": 0.00019999696875258951, - "loss": 46.0, - "step": 32426 - }, - { - "epoch": 2.479270600378462, - "grad_norm": 0.001166089903563261, - "learning_rate": 0.00019999696856556586, - "loss": 46.0, - "step": 32427 - }, - { - "epoch": 2.479347057361852, - "grad_norm": 0.0010971451411023736, - "learning_rate": 0.00019999696837853649, - "loss": 46.0, - "step": 32428 - }, - { - "epoch": 2.4794235143452417, - "grad_norm": 0.0034140772186219692, - "learning_rate": 0.0001999969681915013, - "loss": 46.0, - "step": 32429 - }, - { - "epoch": 2.4794999713286314, - "grad_norm": 0.00035172211937606335, - "learning_rate": 0.0001999969680044604, - "loss": 46.0, - "step": 32430 - }, - { - "epoch": 2.479576428312021, - "grad_norm": 0.005393309984356165, - "learning_rate": 0.0001999969678174137, - "loss": 46.0, - "step": 32431 - }, - { - "epoch": 2.4796528852954105, - "grad_norm": 0.0007292910595424473, - "learning_rate": 0.00019999696763036125, - "loss": 46.0, - "step": 32432 - }, - { - "epoch": 2.4797293422788003, - "grad_norm": 0.0033124315086752176, - "learning_rate": 0.000199996967443303, - "loss": 46.0, - "step": 32433 - }, - { - "epoch": 2.47980579926219, - "grad_norm": 0.0006346868467517197, - "learning_rate": 0.000199996967256239, - "loss": 46.0, - "step": 32434 - }, - { - "epoch": 2.47988225624558, - "grad_norm": 0.00042091519571840763, - "learning_rate": 0.0001999969670691692, - "loss": 46.0, - "step": 32435 - }, - { - "epoch": 2.4799587132289695, - "grad_norm": 0.0014532859204337, - "learning_rate": 0.00019999696688209365, - "loss": 46.0, - "step": 32436 - }, - { - "epoch": 2.4800351702123593, - "grad_norm": 0.0007465852540917695, - "learning_rate": 0.00019999696669501234, - "loss": 46.0, - "step": 32437 - }, - { - "epoch": 2.480111627195749, - "grad_norm": 0.0013091983273625374, - "learning_rate": 0.0001999969665079253, - "loss": 46.0, - "step": 32438 - }, - { - "epoch": 2.480188084179139, - "grad_norm": 0.00368438265286386, - "learning_rate": 0.00019999696632083243, - "loss": 46.0, - "step": 32439 - }, - { - "epoch": 2.4802645411625286, - "grad_norm": 0.0008188743959181011, - "learning_rate": 0.0001999969661337338, - "loss": 46.0, - "step": 32440 - }, - { - "epoch": 2.480340998145918, - "grad_norm": 0.0009064456098712981, - "learning_rate": 0.00019999696594662943, - "loss": 46.0, - "step": 32441 - }, - { - "epoch": 2.4804174551293077, - "grad_norm": 0.0004914067685604095, - "learning_rate": 0.00019999696575951926, - "loss": 46.0, - "step": 32442 - }, - { - "epoch": 2.4804939121126974, - "grad_norm": 0.001155688427388668, - "learning_rate": 0.0001999969655724033, - "loss": 46.0, - "step": 32443 - }, - { - "epoch": 2.480570369096087, - "grad_norm": 0.0013556908816099167, - "learning_rate": 0.00019999696538528162, - "loss": 46.0, - "step": 32444 - }, - { - "epoch": 2.480646826079477, - "grad_norm": 0.0025988768320530653, - "learning_rate": 0.00019999696519815415, - "loss": 46.0, - "step": 32445 - }, - { - "epoch": 2.4807232830628667, - "grad_norm": 0.0007351776584982872, - "learning_rate": 0.0001999969650110209, - "loss": 46.0, - "step": 32446 - }, - { - "epoch": 2.4807997400462565, - "grad_norm": 0.001170598203316331, - "learning_rate": 0.0001999969648238819, - "loss": 46.0, - "step": 32447 - }, - { - "epoch": 2.4808761970296462, - "grad_norm": 0.0006654290482401848, - "learning_rate": 0.00019999696463673713, - "loss": 46.0, - "step": 32448 - }, - { - "epoch": 2.480952654013036, - "grad_norm": 0.0014254014240577817, - "learning_rate": 0.00019999696444958657, - "loss": 46.0, - "step": 32449 - }, - { - "epoch": 2.4810291109964258, - "grad_norm": 0.0009064446203410625, - "learning_rate": 0.0001999969642624303, - "loss": 46.0, - "step": 32450 - }, - { - "epoch": 2.4811055679798155, - "grad_norm": 0.0004398701130412519, - "learning_rate": 0.00019999696407526819, - "loss": 46.0, - "step": 32451 - }, - { - "epoch": 2.4811820249632053, - "grad_norm": 0.0022492967545986176, - "learning_rate": 0.00019999696388810033, - "loss": 46.0, - "step": 32452 - }, - { - "epoch": 2.481258481946595, - "grad_norm": 0.004112998489290476, - "learning_rate": 0.0001999969637009267, - "loss": 46.0, - "step": 32453 - }, - { - "epoch": 2.4813349389299844, - "grad_norm": 0.000455722794868052, - "learning_rate": 0.0001999969635137473, - "loss": 46.0, - "step": 32454 - }, - { - "epoch": 2.481411395913374, - "grad_norm": 0.0008955828379839659, - "learning_rate": 0.00019999696332656216, - "loss": 46.0, - "step": 32455 - }, - { - "epoch": 2.481487852896764, - "grad_norm": 0.0007259807898662984, - "learning_rate": 0.0001999969631393712, - "loss": 46.0, - "step": 32456 - }, - { - "epoch": 2.4815643098801536, - "grad_norm": 0.0006770557956770062, - "learning_rate": 0.00019999696295217452, - "loss": 46.0, - "step": 32457 - }, - { - "epoch": 2.4816407668635434, - "grad_norm": 0.005172649398446083, - "learning_rate": 0.00019999696276497208, - "loss": 46.0, - "step": 32458 - }, - { - "epoch": 2.481717223846933, - "grad_norm": 0.0006946736248210073, - "learning_rate": 0.00019999696257776384, - "loss": 46.0, - "step": 32459 - }, - { - "epoch": 2.481793680830323, - "grad_norm": 0.0003239812212996185, - "learning_rate": 0.00019999696239054982, - "loss": 46.0, - "step": 32460 - }, - { - "epoch": 2.4818701378137127, - "grad_norm": 0.0033833985216915607, - "learning_rate": 0.00019999696220333003, - "loss": 46.0, - "step": 32461 - }, - { - "epoch": 2.4819465947971024, - "grad_norm": 0.0011916621588170528, - "learning_rate": 0.0001999969620161045, - "loss": 46.0, - "step": 32462 - }, - { - "epoch": 2.4820230517804918, - "grad_norm": 0.0005481817643158138, - "learning_rate": 0.00019999696182887317, - "loss": 46.0, - "step": 32463 - }, - { - "epoch": 2.4820995087638815, - "grad_norm": 0.0004768413200508803, - "learning_rate": 0.0001999969616416361, - "loss": 46.0, - "step": 32464 - }, - { - "epoch": 2.4821759657472713, - "grad_norm": 0.0014096852391958237, - "learning_rate": 0.00019999696145439323, - "loss": 46.0, - "step": 32465 - }, - { - "epoch": 2.482252422730661, - "grad_norm": 0.005327824503183365, - "learning_rate": 0.00019999696126714463, - "loss": 46.0, - "step": 32466 - }, - { - "epoch": 2.482328879714051, - "grad_norm": 0.0014724304201081395, - "learning_rate": 0.00019999696107989023, - "loss": 46.0, - "step": 32467 - }, - { - "epoch": 2.4824053366974406, - "grad_norm": 0.0009439281420782208, - "learning_rate": 0.00019999696089263006, - "loss": 46.0, - "step": 32468 - }, - { - "epoch": 2.4824817936808303, - "grad_norm": 0.0021287312265485525, - "learning_rate": 0.00019999696070536414, - "loss": 46.0, - "step": 32469 - }, - { - "epoch": 2.48255825066422, - "grad_norm": 0.0019370531663298607, - "learning_rate": 0.00019999696051809242, - "loss": 46.0, - "step": 32470 - }, - { - "epoch": 2.48263470764761, - "grad_norm": 0.000743091746699065, - "learning_rate": 0.00019999696033081495, - "loss": 46.0, - "step": 32471 - }, - { - "epoch": 2.4827111646309996, - "grad_norm": 0.0014310280093923211, - "learning_rate": 0.00019999696014353174, - "loss": 46.0, - "step": 32472 - }, - { - "epoch": 2.4827876216143894, - "grad_norm": 0.0010180834215134382, - "learning_rate": 0.0001999969599562427, - "loss": 46.0, - "step": 32473 - }, - { - "epoch": 2.482864078597779, - "grad_norm": 0.0030324929393827915, - "learning_rate": 0.00019999695976894794, - "loss": 46.0, - "step": 32474 - }, - { - "epoch": 2.4829405355811685, - "grad_norm": 0.0007764458423480392, - "learning_rate": 0.00019999695958164738, - "loss": 46.0, - "step": 32475 - }, - { - "epoch": 2.483016992564558, - "grad_norm": 0.0027090120129287243, - "learning_rate": 0.00019999695939434107, - "loss": 46.0, - "step": 32476 - }, - { - "epoch": 2.483093449547948, - "grad_norm": 0.0004728749336209148, - "learning_rate": 0.000199996959207029, - "loss": 46.0, - "step": 32477 - }, - { - "epoch": 2.4831699065313377, - "grad_norm": 0.0012124466011300683, - "learning_rate": 0.00019999695901971114, - "loss": 46.0, - "step": 32478 - }, - { - "epoch": 2.4832463635147275, - "grad_norm": 0.012036803178489208, - "learning_rate": 0.0001999969588323875, - "loss": 46.0, - "step": 32479 - }, - { - "epoch": 2.4833228204981173, - "grad_norm": 0.0007612515473738313, - "learning_rate": 0.0001999969586450581, - "loss": 46.0, - "step": 32480 - }, - { - "epoch": 2.483399277481507, - "grad_norm": 0.0007346108322963119, - "learning_rate": 0.00019999695845772294, - "loss": 46.0, - "step": 32481 - }, - { - "epoch": 2.483475734464897, - "grad_norm": 0.0005843139952048659, - "learning_rate": 0.00019999695827038202, - "loss": 46.0, - "step": 32482 - }, - { - "epoch": 2.4835521914482865, - "grad_norm": 0.0017749707913026214, - "learning_rate": 0.00019999695808303533, - "loss": 46.0, - "step": 32483 - }, - { - "epoch": 2.4836286484316763, - "grad_norm": 0.013531950302422047, - "learning_rate": 0.00019999695789568286, - "loss": 46.0, - "step": 32484 - }, - { - "epoch": 2.4837051054150656, - "grad_norm": 0.0015886970795691013, - "learning_rate": 0.00019999695770832462, - "loss": 46.0, - "step": 32485 - }, - { - "epoch": 2.4837815623984554, - "grad_norm": 0.0021449862979352474, - "learning_rate": 0.0001999969575209606, - "loss": 46.0, - "step": 32486 - }, - { - "epoch": 2.483858019381845, - "grad_norm": 0.000996781513094902, - "learning_rate": 0.00019999695733359082, - "loss": 46.0, - "step": 32487 - }, - { - "epoch": 2.483934476365235, - "grad_norm": 0.0012609054101631045, - "learning_rate": 0.0001999969571462153, - "loss": 46.0, - "step": 32488 - }, - { - "epoch": 2.4840109333486247, - "grad_norm": 0.0006104173371568322, - "learning_rate": 0.00019999695695883398, - "loss": 46.0, - "step": 32489 - }, - { - "epoch": 2.4840873903320144, - "grad_norm": 0.0028106090612709522, - "learning_rate": 0.0001999969567714469, - "loss": 46.0, - "step": 32490 - }, - { - "epoch": 2.484163847315404, - "grad_norm": 0.0007372854161076248, - "learning_rate": 0.00019999695658405405, - "loss": 46.0, - "step": 32491 - }, - { - "epoch": 2.484240304298794, - "grad_norm": 0.001557126292027533, - "learning_rate": 0.00019999695639665542, - "loss": 46.0, - "step": 32492 - }, - { - "epoch": 2.4843167612821837, - "grad_norm": 0.000501113070640713, - "learning_rate": 0.000199996956209251, - "loss": 46.0, - "step": 32493 - }, - { - "epoch": 2.4843932182655735, - "grad_norm": 0.0021587435621768236, - "learning_rate": 0.00019999695602184082, - "loss": 46.0, - "step": 32494 - }, - { - "epoch": 2.4844696752489632, - "grad_norm": 0.0008666223147884011, - "learning_rate": 0.00019999695583442493, - "loss": 46.0, - "step": 32495 - }, - { - "epoch": 2.484546132232353, - "grad_norm": 0.000774035113863647, - "learning_rate": 0.0001999969556470032, - "loss": 46.0, - "step": 32496 - }, - { - "epoch": 2.4846225892157423, - "grad_norm": 0.00021022098371759057, - "learning_rate": 0.00019999695545957574, - "loss": 46.0, - "step": 32497 - }, - { - "epoch": 2.484699046199132, - "grad_norm": 0.0007480334024876356, - "learning_rate": 0.0001999969552721425, - "loss": 46.0, - "step": 32498 - }, - { - "epoch": 2.484775503182522, - "grad_norm": 0.000816861167550087, - "learning_rate": 0.0001999969550847035, - "loss": 46.0, - "step": 32499 - }, - { - "epoch": 2.4848519601659116, - "grad_norm": 0.0007491225842386484, - "learning_rate": 0.0001999969548972587, - "loss": 46.0, - "step": 32500 - }, - { - "epoch": 2.4849284171493013, - "grad_norm": 0.000617528916336596, - "learning_rate": 0.00019999695470980817, - "loss": 46.0, - "step": 32501 - }, - { - "epoch": 2.485004874132691, - "grad_norm": 0.008154521696269512, - "learning_rate": 0.00019999695452235187, - "loss": 46.0, - "step": 32502 - }, - { - "epoch": 2.485081331116081, - "grad_norm": 0.0012328510638326406, - "learning_rate": 0.00019999695433488976, - "loss": 46.0, - "step": 32503 - }, - { - "epoch": 2.4851577880994706, - "grad_norm": 0.0009552455157972872, - "learning_rate": 0.0001999969541474219, - "loss": 46.0, - "step": 32504 - }, - { - "epoch": 2.4852342450828604, - "grad_norm": 0.0008059277315624058, - "learning_rate": 0.00019999695395994828, - "loss": 46.0, - "step": 32505 - }, - { - "epoch": 2.48531070206625, - "grad_norm": 0.0009990391554310918, - "learning_rate": 0.0001999969537724689, - "loss": 46.0, - "step": 32506 - }, - { - "epoch": 2.4853871590496395, - "grad_norm": 0.004519835114479065, - "learning_rate": 0.0001999969535849837, - "loss": 46.0, - "step": 32507 - }, - { - "epoch": 2.4854636160330292, - "grad_norm": 0.0011185647454112768, - "learning_rate": 0.0001999969533974928, - "loss": 46.0, - "step": 32508 - }, - { - "epoch": 2.485540073016419, - "grad_norm": 0.005013096146285534, - "learning_rate": 0.00019999695320999607, - "loss": 46.0, - "step": 32509 - }, - { - "epoch": 2.4856165299998088, - "grad_norm": 0.006383952219039202, - "learning_rate": 0.0001999969530224936, - "loss": 46.0, - "step": 32510 - }, - { - "epoch": 2.4856929869831985, - "grad_norm": 0.0010234022047370672, - "learning_rate": 0.0001999969528349854, - "loss": 46.0, - "step": 32511 - }, - { - "epoch": 2.4857694439665883, - "grad_norm": 0.0013509561540558934, - "learning_rate": 0.00019999695264747138, - "loss": 46.0, - "step": 32512 - }, - { - "epoch": 2.485845900949978, - "grad_norm": 0.0020701014436781406, - "learning_rate": 0.0001999969524599516, - "loss": 46.0, - "step": 32513 - }, - { - "epoch": 2.485922357933368, - "grad_norm": 0.0007009868859313428, - "learning_rate": 0.00019999695227242606, - "loss": 46.0, - "step": 32514 - }, - { - "epoch": 2.4859988149167576, - "grad_norm": 0.0017209473298862576, - "learning_rate": 0.00019999695208489473, - "loss": 46.0, - "step": 32515 - }, - { - "epoch": 2.4860752719001473, - "grad_norm": 0.0010355832055211067, - "learning_rate": 0.00019999695189735762, - "loss": 46.0, - "step": 32516 - }, - { - "epoch": 2.486151728883537, - "grad_norm": 0.0010259883711114526, - "learning_rate": 0.00019999695170981477, - "loss": 46.0, - "step": 32517 - }, - { - "epoch": 2.486228185866927, - "grad_norm": 0.0019412415567785501, - "learning_rate": 0.00019999695152226617, - "loss": 46.0, - "step": 32518 - }, - { - "epoch": 2.486304642850316, - "grad_norm": 0.0009146252414211631, - "learning_rate": 0.00019999695133471175, - "loss": 46.0, - "step": 32519 - }, - { - "epoch": 2.486381099833706, - "grad_norm": 0.003369443817064166, - "learning_rate": 0.0001999969511471516, - "loss": 46.0, - "step": 32520 - }, - { - "epoch": 2.4864575568170957, - "grad_norm": 0.0010440386831760406, - "learning_rate": 0.00019999695095958565, - "loss": 46.0, - "step": 32521 - }, - { - "epoch": 2.4865340138004854, - "grad_norm": 0.0016332159284502268, - "learning_rate": 0.00019999695077201394, - "loss": 46.0, - "step": 32522 - }, - { - "epoch": 2.486610470783875, - "grad_norm": 0.0008343071676790714, - "learning_rate": 0.00019999695058443647, - "loss": 46.0, - "step": 32523 - }, - { - "epoch": 2.486686927767265, - "grad_norm": 0.0004725205944851041, - "learning_rate": 0.00019999695039685323, - "loss": 46.0, - "step": 32524 - }, - { - "epoch": 2.4867633847506547, - "grad_norm": 0.0013226979644969106, - "learning_rate": 0.00019999695020926425, - "loss": 46.0, - "step": 32525 - }, - { - "epoch": 2.4868398417340445, - "grad_norm": 0.00045004760613664985, - "learning_rate": 0.00019999695002166946, - "loss": 46.0, - "step": 32526 - }, - { - "epoch": 2.4869162987174342, - "grad_norm": 0.0012962721521034837, - "learning_rate": 0.0001999969498340689, - "loss": 46.0, - "step": 32527 - }, - { - "epoch": 2.486992755700824, - "grad_norm": 0.001267138752155006, - "learning_rate": 0.0001999969496464626, - "loss": 46.0, - "step": 32528 - }, - { - "epoch": 2.4870692126842133, - "grad_norm": 0.0006998016033321619, - "learning_rate": 0.0001999969494588505, - "loss": 46.0, - "step": 32529 - }, - { - "epoch": 2.487145669667603, - "grad_norm": 0.0007355526322498918, - "learning_rate": 0.00019999694927123264, - "loss": 46.0, - "step": 32530 - }, - { - "epoch": 2.487222126650993, - "grad_norm": 0.0008501976262778044, - "learning_rate": 0.000199996949083609, - "loss": 46.0, - "step": 32531 - }, - { - "epoch": 2.4872985836343826, - "grad_norm": 0.0025960856582969427, - "learning_rate": 0.00019999694889597964, - "loss": 46.0, - "step": 32532 - }, - { - "epoch": 2.4873750406177724, - "grad_norm": 0.003075724234804511, - "learning_rate": 0.00019999694870834444, - "loss": 46.0, - "step": 32533 - }, - { - "epoch": 2.487451497601162, - "grad_norm": 0.0011970649939030409, - "learning_rate": 0.00019999694852070352, - "loss": 46.0, - "step": 32534 - }, - { - "epoch": 2.487527954584552, - "grad_norm": 0.0022439074236899614, - "learning_rate": 0.00019999694833305683, - "loss": 46.0, - "step": 32535 - }, - { - "epoch": 2.4876044115679417, - "grad_norm": 0.0016282013384625316, - "learning_rate": 0.00019999694814540434, - "loss": 46.0, - "step": 32536 - }, - { - "epoch": 2.4876808685513314, - "grad_norm": 0.0013592778705060482, - "learning_rate": 0.0001999969479577461, - "loss": 46.0, - "step": 32537 - }, - { - "epoch": 2.487757325534721, - "grad_norm": 0.0005730963894166052, - "learning_rate": 0.0001999969477700821, - "loss": 46.0, - "step": 32538 - }, - { - "epoch": 2.487833782518111, - "grad_norm": 0.0007011077832430601, - "learning_rate": 0.00019999694758241233, - "loss": 46.0, - "step": 32539 - }, - { - "epoch": 2.4879102395015007, - "grad_norm": 0.0015100507298484445, - "learning_rate": 0.00019999694739473677, - "loss": 46.0, - "step": 32540 - }, - { - "epoch": 2.48798669648489, - "grad_norm": 0.0009014597744680941, - "learning_rate": 0.00019999694720705544, - "loss": 46.0, - "step": 32541 - }, - { - "epoch": 2.4880631534682798, - "grad_norm": 0.001240005251020193, - "learning_rate": 0.00019999694701936836, - "loss": 46.0, - "step": 32542 - }, - { - "epoch": 2.4881396104516695, - "grad_norm": 0.001145404065027833, - "learning_rate": 0.00019999694683167548, - "loss": 46.0, - "step": 32543 - }, - { - "epoch": 2.4882160674350593, - "grad_norm": 0.0012710869777947664, - "learning_rate": 0.00019999694664397688, - "loss": 46.0, - "step": 32544 - }, - { - "epoch": 2.488292524418449, - "grad_norm": 0.0004915580502711236, - "learning_rate": 0.0001999969464562725, - "loss": 46.0, - "step": 32545 - }, - { - "epoch": 2.488368981401839, - "grad_norm": 0.0012591304257512093, - "learning_rate": 0.00019999694626856231, - "loss": 46.0, - "step": 32546 - }, - { - "epoch": 2.4884454383852286, - "grad_norm": 0.0006453980458900332, - "learning_rate": 0.00019999694608084637, - "loss": 46.0, - "step": 32547 - }, - { - "epoch": 2.4885218953686183, - "grad_norm": 0.0010155484778806567, - "learning_rate": 0.00019999694589312468, - "loss": 46.0, - "step": 32548 - }, - { - "epoch": 2.488598352352008, - "grad_norm": 0.0007393939304165542, - "learning_rate": 0.0001999969457053972, - "loss": 46.0, - "step": 32549 - }, - { - "epoch": 2.488674809335398, - "grad_norm": 0.0004635357763618231, - "learning_rate": 0.00019999694551766397, - "loss": 46.0, - "step": 32550 - }, - { - "epoch": 2.488751266318787, - "grad_norm": 0.002516088541597128, - "learning_rate": 0.00019999694532992494, - "loss": 46.0, - "step": 32551 - }, - { - "epoch": 2.488827723302177, - "grad_norm": 0.0008175940602086484, - "learning_rate": 0.00019999694514218018, - "loss": 46.0, - "step": 32552 - }, - { - "epoch": 2.4889041802855667, - "grad_norm": 0.0004407418891787529, - "learning_rate": 0.00019999694495442962, - "loss": 46.0, - "step": 32553 - }, - { - "epoch": 2.4889806372689565, - "grad_norm": 0.00406007980927825, - "learning_rate": 0.0001999969447666733, - "loss": 46.0, - "step": 32554 - }, - { - "epoch": 2.4890570942523462, - "grad_norm": 0.0010890637058764696, - "learning_rate": 0.0001999969445789112, - "loss": 46.0, - "step": 32555 - }, - { - "epoch": 2.489133551235736, - "grad_norm": 0.0007092013256624341, - "learning_rate": 0.00019999694439114336, - "loss": 46.0, - "step": 32556 - }, - { - "epoch": 2.4892100082191257, - "grad_norm": 0.0005649402737617493, - "learning_rate": 0.00019999694420336973, - "loss": 46.0, - "step": 32557 - }, - { - "epoch": 2.4892864652025155, - "grad_norm": 0.0011889153392985463, - "learning_rate": 0.00019999694401559034, - "loss": 46.0, - "step": 32558 - }, - { - "epoch": 2.4893629221859053, - "grad_norm": 0.00298025063239038, - "learning_rate": 0.00019999694382780516, - "loss": 46.0, - "step": 32559 - }, - { - "epoch": 2.489439379169295, - "grad_norm": 0.0008088881731964648, - "learning_rate": 0.00019999694364001422, - "loss": 46.0, - "step": 32560 - }, - { - "epoch": 2.489515836152685, - "grad_norm": 0.0007073302404023707, - "learning_rate": 0.00019999694345221753, - "loss": 46.0, - "step": 32561 - }, - { - "epoch": 2.4895922931360746, - "grad_norm": 0.001739811385050416, - "learning_rate": 0.00019999694326441504, - "loss": 46.0, - "step": 32562 - }, - { - "epoch": 2.489668750119464, - "grad_norm": 0.0009057508432306349, - "learning_rate": 0.0001999969430766068, - "loss": 46.0, - "step": 32563 - }, - { - "epoch": 2.4897452071028536, - "grad_norm": 0.00041170476470142603, - "learning_rate": 0.00019999694288879281, - "loss": 46.0, - "step": 32564 - }, - { - "epoch": 2.4898216640862434, - "grad_norm": 0.001177989412099123, - "learning_rate": 0.000199996942700973, - "loss": 46.0, - "step": 32565 - }, - { - "epoch": 2.489898121069633, - "grad_norm": 0.0005341498763300478, - "learning_rate": 0.00019999694251314747, - "loss": 46.0, - "step": 32566 - }, - { - "epoch": 2.489974578053023, - "grad_norm": 0.0008835952612571418, - "learning_rate": 0.00019999694232531614, - "loss": 46.0, - "step": 32567 - }, - { - "epoch": 2.4900510350364127, - "grad_norm": 0.004304313100874424, - "learning_rate": 0.00019999694213747904, - "loss": 46.0, - "step": 32568 - }, - { - "epoch": 2.4901274920198024, - "grad_norm": 0.0006942129693925381, - "learning_rate": 0.00019999694194963618, - "loss": 46.0, - "step": 32569 - }, - { - "epoch": 2.490203949003192, - "grad_norm": 0.0007404296775348485, - "learning_rate": 0.00019999694176178756, - "loss": 46.0, - "step": 32570 - }, - { - "epoch": 2.490280405986582, - "grad_norm": 0.001132192905060947, - "learning_rate": 0.00019999694157393316, - "loss": 46.0, - "step": 32571 - }, - { - "epoch": 2.4903568629699713, - "grad_norm": 0.0007190343458205462, - "learning_rate": 0.000199996941386073, - "loss": 46.0, - "step": 32572 - }, - { - "epoch": 2.490433319953361, - "grad_norm": 0.0016687260940670967, - "learning_rate": 0.00019999694119820707, - "loss": 46.0, - "step": 32573 - }, - { - "epoch": 2.490509776936751, - "grad_norm": 0.001053826417773962, - "learning_rate": 0.00019999694101033538, - "loss": 46.0, - "step": 32574 - }, - { - "epoch": 2.4905862339201406, - "grad_norm": 0.004366938024759293, - "learning_rate": 0.0001999969408224579, - "loss": 46.0, - "step": 32575 - }, - { - "epoch": 2.4906626909035303, - "grad_norm": 0.0012544711353257298, - "learning_rate": 0.00019999694063457465, - "loss": 46.0, - "step": 32576 - }, - { - "epoch": 2.49073914788692, - "grad_norm": 0.00032960408134385943, - "learning_rate": 0.00019999694044668562, - "loss": 46.0, - "step": 32577 - }, - { - "epoch": 2.49081560487031, - "grad_norm": 0.001531166024506092, - "learning_rate": 0.00019999694025879086, - "loss": 46.0, - "step": 32578 - }, - { - "epoch": 2.4908920618536996, - "grad_norm": 0.000752673193346709, - "learning_rate": 0.0001999969400708903, - "loss": 46.0, - "step": 32579 - }, - { - "epoch": 2.4909685188370894, - "grad_norm": 0.0005099460249766707, - "learning_rate": 0.00019999693988298397, - "loss": 46.0, - "step": 32580 - }, - { - "epoch": 2.491044975820479, - "grad_norm": 0.0019086366519331932, - "learning_rate": 0.00019999693969507186, - "loss": 46.0, - "step": 32581 - }, - { - "epoch": 2.491121432803869, - "grad_norm": 0.004595336504280567, - "learning_rate": 0.00019999693950715401, - "loss": 46.0, - "step": 32582 - }, - { - "epoch": 2.4911978897872586, - "grad_norm": 0.0014849234139546752, - "learning_rate": 0.0001999969393192304, - "loss": 46.0, - "step": 32583 - }, - { - "epoch": 2.4912743467706484, - "grad_norm": 0.0008845271659083664, - "learning_rate": 0.000199996939131301, - "loss": 46.0, - "step": 32584 - }, - { - "epoch": 2.4913508037540377, - "grad_norm": 0.0021731401793658733, - "learning_rate": 0.00019999693894336585, - "loss": 46.0, - "step": 32585 - }, - { - "epoch": 2.4914272607374275, - "grad_norm": 0.0014632310485467315, - "learning_rate": 0.00019999693875542488, - "loss": 46.0, - "step": 32586 - }, - { - "epoch": 2.4915037177208172, - "grad_norm": 0.001202008337713778, - "learning_rate": 0.0001999969385674782, - "loss": 46.0, - "step": 32587 - }, - { - "epoch": 2.491580174704207, - "grad_norm": 0.0009073092369362712, - "learning_rate": 0.00019999693837952572, - "loss": 46.0, - "step": 32588 - }, - { - "epoch": 2.4916566316875968, - "grad_norm": 0.0008802267257124186, - "learning_rate": 0.00019999693819156749, - "loss": 46.0, - "step": 32589 - }, - { - "epoch": 2.4917330886709865, - "grad_norm": 0.0012836562236770988, - "learning_rate": 0.00019999693800360347, - "loss": 46.0, - "step": 32590 - }, - { - "epoch": 2.4918095456543763, - "grad_norm": 0.001667350996285677, - "learning_rate": 0.00019999693781563366, - "loss": 46.0, - "step": 32591 - }, - { - "epoch": 2.491886002637766, - "grad_norm": 0.0005499919061549008, - "learning_rate": 0.00019999693762765813, - "loss": 46.0, - "step": 32592 - }, - { - "epoch": 2.491962459621156, - "grad_norm": 0.0009524726192466915, - "learning_rate": 0.0001999969374396768, - "loss": 46.0, - "step": 32593 - }, - { - "epoch": 2.492038916604545, - "grad_norm": 0.0006169872940517962, - "learning_rate": 0.0001999969372516897, - "loss": 46.0, - "step": 32594 - }, - { - "epoch": 2.492115373587935, - "grad_norm": 0.0004173332708887756, - "learning_rate": 0.00019999693706369685, - "loss": 46.0, - "step": 32595 - }, - { - "epoch": 2.4921918305713247, - "grad_norm": 0.0013197311200201511, - "learning_rate": 0.00019999693687569822, - "loss": 46.0, - "step": 32596 - }, - { - "epoch": 2.4922682875547144, - "grad_norm": 0.0007887695683166385, - "learning_rate": 0.00019999693668769383, - "loss": 46.0, - "step": 32597 - }, - { - "epoch": 2.492344744538104, - "grad_norm": 0.0005454887286759913, - "learning_rate": 0.00019999693649968366, - "loss": 46.0, - "step": 32598 - }, - { - "epoch": 2.492421201521494, - "grad_norm": 0.0009504213812761009, - "learning_rate": 0.00019999693631166774, - "loss": 46.0, - "step": 32599 - }, - { - "epoch": 2.4924976585048837, - "grad_norm": 0.0014154567616060376, - "learning_rate": 0.000199996936123646, - "loss": 46.0, - "step": 32600 - }, - { - "epoch": 2.4925741154882735, - "grad_norm": 0.0009172839345410466, - "learning_rate": 0.00019999693593561856, - "loss": 46.0, - "step": 32601 - }, - { - "epoch": 2.492650572471663, - "grad_norm": 0.0005338436458259821, - "learning_rate": 0.0001999969357475853, - "loss": 46.0, - "step": 32602 - }, - { - "epoch": 2.492727029455053, - "grad_norm": 0.00151747465133667, - "learning_rate": 0.00019999693555954628, - "loss": 46.0, - "step": 32603 - }, - { - "epoch": 2.4928034864384427, - "grad_norm": 0.0008318493491970003, - "learning_rate": 0.00019999693537150147, - "loss": 46.0, - "step": 32604 - }, - { - "epoch": 2.4928799434218325, - "grad_norm": 0.0011670246021822095, - "learning_rate": 0.00019999693518345092, - "loss": 46.0, - "step": 32605 - }, - { - "epoch": 2.4929564004052223, - "grad_norm": 0.0017090964829549193, - "learning_rate": 0.0001999969349953946, - "loss": 46.0, - "step": 32606 - }, - { - "epoch": 2.4930328573886116, - "grad_norm": 0.0006046075141057372, - "learning_rate": 0.00019999693480733254, - "loss": 46.0, - "step": 32607 - }, - { - "epoch": 2.4931093143720013, - "grad_norm": 0.00048030464677140117, - "learning_rate": 0.00019999693461926469, - "loss": 46.0, - "step": 32608 - }, - { - "epoch": 2.493185771355391, - "grad_norm": 0.0007360557792708278, - "learning_rate": 0.00019999693443119104, - "loss": 46.0, - "step": 32609 - }, - { - "epoch": 2.493262228338781, - "grad_norm": 0.0014631547965109348, - "learning_rate": 0.00019999693424311167, - "loss": 46.0, - "step": 32610 - }, - { - "epoch": 2.4933386853221706, - "grad_norm": 0.0015953963156789541, - "learning_rate": 0.00019999693405502647, - "loss": 46.0, - "step": 32611 - }, - { - "epoch": 2.4934151423055604, - "grad_norm": 0.002187523990869522, - "learning_rate": 0.00019999693386693555, - "loss": 46.0, - "step": 32612 - }, - { - "epoch": 2.49349159928895, - "grad_norm": 0.002458705101162195, - "learning_rate": 0.00019999693367883883, - "loss": 46.0, - "step": 32613 - }, - { - "epoch": 2.49356805627234, - "grad_norm": 0.0013839320745319128, - "learning_rate": 0.00019999693349073634, - "loss": 46.0, - "step": 32614 - }, - { - "epoch": 2.4936445132557297, - "grad_norm": 0.0014073910424485803, - "learning_rate": 0.0001999969333026281, - "loss": 46.0, - "step": 32615 - }, - { - "epoch": 2.493720970239119, - "grad_norm": 0.0007907609106041491, - "learning_rate": 0.0001999969331145141, - "loss": 46.0, - "step": 32616 - }, - { - "epoch": 2.4937974272225087, - "grad_norm": 0.0008132322109304368, - "learning_rate": 0.0001999969329263943, - "loss": 46.0, - "step": 32617 - }, - { - "epoch": 2.4938738842058985, - "grad_norm": 0.0012784870341420174, - "learning_rate": 0.00019999693273826876, - "loss": 46.0, - "step": 32618 - }, - { - "epoch": 2.4939503411892883, - "grad_norm": 0.0011030759196728468, - "learning_rate": 0.00019999693255013745, - "loss": 46.0, - "step": 32619 - }, - { - "epoch": 2.494026798172678, - "grad_norm": 0.0020263739861547947, - "learning_rate": 0.00019999693236200035, - "loss": 46.0, - "step": 32620 - }, - { - "epoch": 2.494103255156068, - "grad_norm": 0.000965129816904664, - "learning_rate": 0.00019999693217385747, - "loss": 46.0, - "step": 32621 - }, - { - "epoch": 2.4941797121394575, - "grad_norm": 0.0012597328750416636, - "learning_rate": 0.00019999693198570885, - "loss": 46.0, - "step": 32622 - }, - { - "epoch": 2.4942561691228473, - "grad_norm": 0.0016392285469919443, - "learning_rate": 0.00019999693179755445, - "loss": 46.0, - "step": 32623 - }, - { - "epoch": 2.494332626106237, - "grad_norm": 0.001721691689454019, - "learning_rate": 0.00019999693160939428, - "loss": 46.0, - "step": 32624 - }, - { - "epoch": 2.494409083089627, - "grad_norm": 0.004060937557369471, - "learning_rate": 0.00019999693142122834, - "loss": 46.0, - "step": 32625 - }, - { - "epoch": 2.4944855400730166, - "grad_norm": 0.001772783463820815, - "learning_rate": 0.00019999693123305665, - "loss": 46.0, - "step": 32626 - }, - { - "epoch": 2.4945619970564064, - "grad_norm": 0.000491643266286701, - "learning_rate": 0.00019999693104487916, - "loss": 46.0, - "step": 32627 - }, - { - "epoch": 2.4946384540397957, - "grad_norm": 0.0010192712070420384, - "learning_rate": 0.00019999693085669592, - "loss": 46.0, - "step": 32628 - }, - { - "epoch": 2.4947149110231854, - "grad_norm": 0.0007798741571605206, - "learning_rate": 0.00019999693066850691, - "loss": 46.0, - "step": 32629 - }, - { - "epoch": 2.494791368006575, - "grad_norm": 0.0015332680195569992, - "learning_rate": 0.0001999969304803121, - "loss": 46.0, - "step": 32630 - }, - { - "epoch": 2.494867824989965, - "grad_norm": 0.0057282885536551476, - "learning_rate": 0.00019999693029211155, - "loss": 46.0, - "step": 32631 - }, - { - "epoch": 2.4949442819733547, - "grad_norm": 0.0012404846493154764, - "learning_rate": 0.00019999693010390525, - "loss": 46.0, - "step": 32632 - }, - { - "epoch": 2.4950207389567445, - "grad_norm": 0.0008654487901367247, - "learning_rate": 0.00019999692991569314, - "loss": 46.0, - "step": 32633 - }, - { - "epoch": 2.4950971959401342, - "grad_norm": 0.0031990830320864916, - "learning_rate": 0.0001999969297274753, - "loss": 46.0, - "step": 32634 - }, - { - "epoch": 2.495173652923524, - "grad_norm": 0.0012202790239825845, - "learning_rate": 0.00019999692953925164, - "loss": 46.0, - "step": 32635 - }, - { - "epoch": 2.4952501099069138, - "grad_norm": 0.0010098565835505724, - "learning_rate": 0.00019999692935102225, - "loss": 46.0, - "step": 32636 - }, - { - "epoch": 2.4953265668903035, - "grad_norm": 0.0014236140996217728, - "learning_rate": 0.0001999969291627871, - "loss": 46.0, - "step": 32637 - }, - { - "epoch": 2.495403023873693, - "grad_norm": 0.003221197286620736, - "learning_rate": 0.00019999692897454613, - "loss": 46.0, - "step": 32638 - }, - { - "epoch": 2.4954794808570826, - "grad_norm": 0.0005447784205898643, - "learning_rate": 0.00019999692878629944, - "loss": 46.0, - "step": 32639 - }, - { - "epoch": 2.4955559378404724, - "grad_norm": 0.004033575765788555, - "learning_rate": 0.00019999692859804698, - "loss": 46.0, - "step": 32640 - }, - { - "epoch": 2.495632394823862, - "grad_norm": 0.0007976160850375891, - "learning_rate": 0.00019999692840978872, - "loss": 46.0, - "step": 32641 - }, - { - "epoch": 2.495708851807252, - "grad_norm": 0.0008567469194531441, - "learning_rate": 0.00019999692822152468, - "loss": 46.0, - "step": 32642 - }, - { - "epoch": 2.4957853087906416, - "grad_norm": 0.0055723777040839195, - "learning_rate": 0.00019999692803325493, - "loss": 46.0, - "step": 32643 - }, - { - "epoch": 2.4958617657740314, - "grad_norm": 0.0006186652462929487, - "learning_rate": 0.00019999692784497937, - "loss": 46.0, - "step": 32644 - }, - { - "epoch": 2.495938222757421, - "grad_norm": 0.001370106590911746, - "learning_rate": 0.000199996927656698, - "loss": 46.0, - "step": 32645 - }, - { - "epoch": 2.496014679740811, - "grad_norm": 0.0014947515446692705, - "learning_rate": 0.00019999692746841094, - "loss": 46.0, - "step": 32646 - }, - { - "epoch": 2.4960911367242007, - "grad_norm": 0.0015226037940010428, - "learning_rate": 0.00019999692728011806, - "loss": 46.0, - "step": 32647 - }, - { - "epoch": 2.4961675937075904, - "grad_norm": 0.0005861383397132158, - "learning_rate": 0.00019999692709181944, - "loss": 46.0, - "step": 32648 - }, - { - "epoch": 2.49624405069098, - "grad_norm": 0.0015473462408408523, - "learning_rate": 0.00019999692690351504, - "loss": 46.0, - "step": 32649 - }, - { - "epoch": 2.4963205076743695, - "grad_norm": 0.0011263686465099454, - "learning_rate": 0.00019999692671520487, - "loss": 46.0, - "step": 32650 - }, - { - "epoch": 2.4963969646577593, - "grad_norm": 0.0026451812591403723, - "learning_rate": 0.00019999692652688893, - "loss": 46.0, - "step": 32651 - }, - { - "epoch": 2.496473421641149, - "grad_norm": 0.001112994970753789, - "learning_rate": 0.00019999692633856721, - "loss": 46.0, - "step": 32652 - }, - { - "epoch": 2.496549878624539, - "grad_norm": 0.0016106105176731944, - "learning_rate": 0.00019999692615023975, - "loss": 46.0, - "step": 32653 - }, - { - "epoch": 2.4966263356079286, - "grad_norm": 0.0025343927554786205, - "learning_rate": 0.00019999692596190652, - "loss": 46.0, - "step": 32654 - }, - { - "epoch": 2.4967027925913183, - "grad_norm": 0.0008390550501644611, - "learning_rate": 0.00019999692577356748, - "loss": 46.0, - "step": 32655 - }, - { - "epoch": 2.496779249574708, - "grad_norm": 0.0007332876557484269, - "learning_rate": 0.0001999969255852227, - "loss": 46.0, - "step": 32656 - }, - { - "epoch": 2.496855706558098, - "grad_norm": 0.0015012803487479687, - "learning_rate": 0.00019999692539687214, - "loss": 46.0, - "step": 32657 - }, - { - "epoch": 2.4969321635414876, - "grad_norm": 0.0027170462999492884, - "learning_rate": 0.00019999692520851584, - "loss": 46.0, - "step": 32658 - }, - { - "epoch": 2.4970086205248774, - "grad_norm": 0.0007027161191217601, - "learning_rate": 0.00019999692502015374, - "loss": 46.0, - "step": 32659 - }, - { - "epoch": 2.4970850775082667, - "grad_norm": 0.0015824147267267108, - "learning_rate": 0.00019999692483178586, - "loss": 46.0, - "step": 32660 - }, - { - "epoch": 2.4971615344916565, - "grad_norm": 0.0011959223775193095, - "learning_rate": 0.0001999969246434122, - "loss": 46.0, - "step": 32661 - }, - { - "epoch": 2.497237991475046, - "grad_norm": 0.002277828985825181, - "learning_rate": 0.00019999692445503282, - "loss": 46.0, - "step": 32662 - }, - { - "epoch": 2.497314448458436, - "grad_norm": 0.00031184719409793615, - "learning_rate": 0.00019999692426664765, - "loss": 46.0, - "step": 32663 - }, - { - "epoch": 2.4973909054418257, - "grad_norm": 0.0008964571752585471, - "learning_rate": 0.00019999692407825673, - "loss": 46.0, - "step": 32664 - }, - { - "epoch": 2.4974673624252155, - "grad_norm": 0.0010842683259397745, - "learning_rate": 0.00019999692388986002, - "loss": 46.0, - "step": 32665 - }, - { - "epoch": 2.4975438194086053, - "grad_norm": 0.0008160679135471582, - "learning_rate": 0.00019999692370145753, - "loss": 46.0, - "step": 32666 - }, - { - "epoch": 2.497620276391995, - "grad_norm": 0.0011588375782594085, - "learning_rate": 0.0001999969235130493, - "loss": 46.0, - "step": 32667 - }, - { - "epoch": 2.497696733375385, - "grad_norm": 0.002093835035338998, - "learning_rate": 0.00019999692332463526, - "loss": 46.0, - "step": 32668 - }, - { - "epoch": 2.4977731903587745, - "grad_norm": 0.0007744574686512351, - "learning_rate": 0.00019999692313621547, - "loss": 46.0, - "step": 32669 - }, - { - "epoch": 2.4978496473421643, - "grad_norm": 0.0005367117119021714, - "learning_rate": 0.00019999692294778992, - "loss": 46.0, - "step": 32670 - }, - { - "epoch": 2.497926104325554, - "grad_norm": 0.0008444421691820025, - "learning_rate": 0.0001999969227593586, - "loss": 46.0, - "step": 32671 - }, - { - "epoch": 2.4980025613089434, - "grad_norm": 0.0024097352288663387, - "learning_rate": 0.00019999692257092151, - "loss": 46.0, - "step": 32672 - }, - { - "epoch": 2.498079018292333, - "grad_norm": 0.0011364661622792482, - "learning_rate": 0.00019999692238247864, - "loss": 46.0, - "step": 32673 - }, - { - "epoch": 2.498155475275723, - "grad_norm": 0.000714063469786197, - "learning_rate": 0.00019999692219403002, - "loss": 46.0, - "step": 32674 - }, - { - "epoch": 2.4982319322591127, - "grad_norm": 0.0016102782683447003, - "learning_rate": 0.0001999969220055756, - "loss": 46.0, - "step": 32675 - }, - { - "epoch": 2.4983083892425024, - "grad_norm": 0.0009185677045024931, - "learning_rate": 0.00019999692181711543, - "loss": 46.0, - "step": 32676 - }, - { - "epoch": 2.498384846225892, - "grad_norm": 0.0014042760012671351, - "learning_rate": 0.00019999692162864948, - "loss": 46.0, - "step": 32677 - }, - { - "epoch": 2.498461303209282, - "grad_norm": 0.0019407031359151006, - "learning_rate": 0.0001999969214401778, - "loss": 46.0, - "step": 32678 - }, - { - "epoch": 2.4985377601926717, - "grad_norm": 0.0013697692193090916, - "learning_rate": 0.0001999969212517003, - "loss": 46.0, - "step": 32679 - }, - { - "epoch": 2.4986142171760615, - "grad_norm": 0.0006164151127450168, - "learning_rate": 0.00019999692106321707, - "loss": 46.0, - "step": 32680 - }, - { - "epoch": 2.4986906741594512, - "grad_norm": 0.000803289411123842, - "learning_rate": 0.00019999692087472806, - "loss": 46.0, - "step": 32681 - }, - { - "epoch": 2.4987671311428405, - "grad_norm": 0.004597480408847332, - "learning_rate": 0.00019999692068623325, - "loss": 46.0, - "step": 32682 - }, - { - "epoch": 2.4988435881262303, - "grad_norm": 0.000799823144916445, - "learning_rate": 0.0001999969204977327, - "loss": 46.0, - "step": 32683 - }, - { - "epoch": 2.49892004510962, - "grad_norm": 0.0007131488528102636, - "learning_rate": 0.00019999692030922637, - "loss": 46.0, - "step": 32684 - }, - { - "epoch": 2.49899650209301, - "grad_norm": 0.001024338649585843, - "learning_rate": 0.00019999692012071427, - "loss": 46.0, - "step": 32685 - }, - { - "epoch": 2.4990729590763996, - "grad_norm": 0.0010273678926751018, - "learning_rate": 0.0001999969199321964, - "loss": 46.0, - "step": 32686 - }, - { - "epoch": 2.4991494160597894, - "grad_norm": 0.0014682613546028733, - "learning_rate": 0.00019999691974367277, - "loss": 46.0, - "step": 32687 - }, - { - "epoch": 2.499225873043179, - "grad_norm": 0.002199523849412799, - "learning_rate": 0.0001999969195551434, - "loss": 46.0, - "step": 32688 - }, - { - "epoch": 2.499302330026569, - "grad_norm": 0.0007699707639403641, - "learning_rate": 0.0001999969193666082, - "loss": 46.0, - "step": 32689 - }, - { - "epoch": 2.4993787870099586, - "grad_norm": 0.0017689234809949994, - "learning_rate": 0.00019999691917806727, - "loss": 46.0, - "step": 32690 - }, - { - "epoch": 2.4994552439933484, - "grad_norm": 0.002967545296996832, - "learning_rate": 0.00019999691898952055, - "loss": 46.0, - "step": 32691 - }, - { - "epoch": 2.499531700976738, - "grad_norm": 0.005779649131000042, - "learning_rate": 0.00019999691880096806, - "loss": 46.0, - "step": 32692 - }, - { - "epoch": 2.499608157960128, - "grad_norm": 0.0022865410428494215, - "learning_rate": 0.0001999969186124098, - "loss": 46.0, - "step": 32693 - }, - { - "epoch": 2.4996846149435172, - "grad_norm": 0.00222966936416924, - "learning_rate": 0.0001999969184238458, - "loss": 46.0, - "step": 32694 - }, - { - "epoch": 2.499761071926907, - "grad_norm": 0.0006263068644329906, - "learning_rate": 0.000199996918235276, - "loss": 46.0, - "step": 32695 - }, - { - "epoch": 2.4998375289102968, - "grad_norm": 0.0014327605022117496, - "learning_rate": 0.00019999691804670046, - "loss": 46.0, - "step": 32696 - }, - { - "epoch": 2.4999139858936865, - "grad_norm": 0.00161940383259207, - "learning_rate": 0.00019999691785811913, - "loss": 46.0, - "step": 32697 - }, - { - "epoch": 2.4999904428770763, - "grad_norm": 0.0005888966261409223, - "learning_rate": 0.00019999691766953203, - "loss": 46.0, - "step": 32698 - }, - { - "epoch": 2.500066899860466, - "grad_norm": 0.0008472443441860378, - "learning_rate": 0.00019999691748093915, - "loss": 46.0, - "step": 32699 - }, - { - "epoch": 2.500143356843856, - "grad_norm": 0.0009773269994184375, - "learning_rate": 0.00019999691729234056, - "loss": 46.0, - "step": 32700 - }, - { - "epoch": 2.5002198138272456, - "grad_norm": 0.0011044393759220839, - "learning_rate": 0.00019999691710373613, - "loss": 46.0, - "step": 32701 - }, - { - "epoch": 2.5002962708106353, - "grad_norm": 0.007905993610620499, - "learning_rate": 0.00019999691691512597, - "loss": 46.0, - "step": 32702 - }, - { - "epoch": 2.5003727277940246, - "grad_norm": 0.0008292117854580283, - "learning_rate": 0.00019999691672651002, - "loss": 46.0, - "step": 32703 - }, - { - "epoch": 2.5004491847774144, - "grad_norm": 0.0011388702550902963, - "learning_rate": 0.0001999969165378883, - "loss": 46.0, - "step": 32704 - }, - { - "epoch": 2.500525641760804, - "grad_norm": 0.0009121305192820728, - "learning_rate": 0.00019999691634926082, - "loss": 46.0, - "step": 32705 - }, - { - "epoch": 2.500602098744194, - "grad_norm": 0.0013917204923927784, - "learning_rate": 0.00019999691616062756, - "loss": 46.0, - "step": 32706 - }, - { - "epoch": 2.5006785557275837, - "grad_norm": 0.0006432253867387772, - "learning_rate": 0.00019999691597198855, - "loss": 46.0, - "step": 32707 - }, - { - "epoch": 2.5007550127109734, - "grad_norm": 0.0015068117063492537, - "learning_rate": 0.00019999691578334377, - "loss": 46.0, - "step": 32708 - }, - { - "epoch": 2.500831469694363, - "grad_norm": 0.0011101285926997662, - "learning_rate": 0.00019999691559469322, - "loss": 46.0, - "step": 32709 - }, - { - "epoch": 2.500907926677753, - "grad_norm": 0.0012562782503664494, - "learning_rate": 0.0001999969154060369, - "loss": 46.0, - "step": 32710 - }, - { - "epoch": 2.5009843836611427, - "grad_norm": 0.0008389346767216921, - "learning_rate": 0.00019999691521737481, - "loss": 46.0, - "step": 32711 - }, - { - "epoch": 2.5010608406445325, - "grad_norm": 0.0009495936101302505, - "learning_rate": 0.0001999969150287069, - "loss": 46.0, - "step": 32712 - }, - { - "epoch": 2.5011372976279223, - "grad_norm": 0.0013499533524736762, - "learning_rate": 0.00019999691484003332, - "loss": 46.0, - "step": 32713 - }, - { - "epoch": 2.501213754611312, - "grad_norm": 0.0017085482832044363, - "learning_rate": 0.0001999969146513539, - "loss": 46.0, - "step": 32714 - }, - { - "epoch": 2.5012902115947018, - "grad_norm": 0.0028621528763324022, - "learning_rate": 0.00019999691446266873, - "loss": 46.0, - "step": 32715 - }, - { - "epoch": 2.501366668578091, - "grad_norm": 0.0010731735965237021, - "learning_rate": 0.0001999969142739778, - "loss": 46.0, - "step": 32716 - }, - { - "epoch": 2.501443125561481, - "grad_norm": 0.0013197783846408129, - "learning_rate": 0.00019999691408528105, - "loss": 46.0, - "step": 32717 - }, - { - "epoch": 2.5015195825448706, - "grad_norm": 0.004666207358241081, - "learning_rate": 0.00019999691389657859, - "loss": 46.0, - "step": 32718 - }, - { - "epoch": 2.5015960395282604, - "grad_norm": 0.00262489658780396, - "learning_rate": 0.00019999691370787032, - "loss": 46.0, - "step": 32719 - }, - { - "epoch": 2.50167249651165, - "grad_norm": 0.002234694315120578, - "learning_rate": 0.00019999691351915632, - "loss": 46.0, - "step": 32720 - }, - { - "epoch": 2.50174895349504, - "grad_norm": 0.0006830698112025857, - "learning_rate": 0.00019999691333043654, - "loss": 46.0, - "step": 32721 - }, - { - "epoch": 2.5018254104784297, - "grad_norm": 0.0013971508014947176, - "learning_rate": 0.00019999691314171098, - "loss": 46.0, - "step": 32722 - }, - { - "epoch": 2.5019018674618194, - "grad_norm": 0.0012611669953912497, - "learning_rate": 0.00019999691295297965, - "loss": 46.0, - "step": 32723 - }, - { - "epoch": 2.501978324445209, - "grad_norm": 0.0026633082889020443, - "learning_rate": 0.00019999691276424255, - "loss": 46.0, - "step": 32724 - }, - { - "epoch": 2.5020547814285985, - "grad_norm": 0.0008328151889145374, - "learning_rate": 0.00019999691257549968, - "loss": 46.0, - "step": 32725 - }, - { - "epoch": 2.5021312384119883, - "grad_norm": 0.000989908934570849, - "learning_rate": 0.00019999691238675106, - "loss": 46.0, - "step": 32726 - }, - { - "epoch": 2.502207695395378, - "grad_norm": 0.0012619729386642575, - "learning_rate": 0.00019999691219799664, - "loss": 46.0, - "step": 32727 - }, - { - "epoch": 2.502284152378768, - "grad_norm": 0.0007630998734384775, - "learning_rate": 0.00019999691200923647, - "loss": 46.0, - "step": 32728 - }, - { - "epoch": 2.5023606093621575, - "grad_norm": 0.0009400617564097047, - "learning_rate": 0.00019999691182047053, - "loss": 46.0, - "step": 32729 - }, - { - "epoch": 2.5024370663455473, - "grad_norm": 0.0006512033869512379, - "learning_rate": 0.00019999691163169881, - "loss": 46.0, - "step": 32730 - }, - { - "epoch": 2.502513523328937, - "grad_norm": 0.0071093435399234295, - "learning_rate": 0.00019999691144292135, - "loss": 46.0, - "step": 32731 - }, - { - "epoch": 2.502589980312327, - "grad_norm": 0.0006232858868315816, - "learning_rate": 0.0001999969112541381, - "loss": 46.0, - "step": 32732 - }, - { - "epoch": 2.5026664372957166, - "grad_norm": 0.0005956319510005414, - "learning_rate": 0.00019999691106534906, - "loss": 46.0, - "step": 32733 - }, - { - "epoch": 2.5027428942791063, - "grad_norm": 0.0031116027384996414, - "learning_rate": 0.00019999691087655425, - "loss": 46.0, - "step": 32734 - }, - { - "epoch": 2.502819351262496, - "grad_norm": 0.0009393964428454638, - "learning_rate": 0.00019999691068775372, - "loss": 46.0, - "step": 32735 - }, - { - "epoch": 2.502895808245886, - "grad_norm": 0.0032348413951694965, - "learning_rate": 0.00019999691049894737, - "loss": 46.0, - "step": 32736 - }, - { - "epoch": 2.5029722652292756, - "grad_norm": 0.0026711774989962578, - "learning_rate": 0.0001999969103101353, - "loss": 46.0, - "step": 32737 - }, - { - "epoch": 2.503048722212665, - "grad_norm": 0.0005530026974156499, - "learning_rate": 0.0001999969101213174, - "loss": 46.0, - "step": 32738 - }, - { - "epoch": 2.5031251791960547, - "grad_norm": 0.004038301762193441, - "learning_rate": 0.00019999690993249377, - "loss": 46.0, - "step": 32739 - }, - { - "epoch": 2.5032016361794445, - "grad_norm": 0.0007219588151201606, - "learning_rate": 0.00019999690974366438, - "loss": 46.0, - "step": 32740 - }, - { - "epoch": 2.5032780931628342, - "grad_norm": 0.0010287873446941376, - "learning_rate": 0.00019999690955482921, - "loss": 46.0, - "step": 32741 - }, - { - "epoch": 2.503354550146224, - "grad_norm": 0.0012785352300852537, - "learning_rate": 0.00019999690936598827, - "loss": 46.0, - "step": 32742 - }, - { - "epoch": 2.5034310071296138, - "grad_norm": 0.001279329531826079, - "learning_rate": 0.00019999690917714156, - "loss": 46.0, - "step": 32743 - }, - { - "epoch": 2.5035074641130035, - "grad_norm": 0.0006859968416392803, - "learning_rate": 0.00019999690898828907, - "loss": 46.0, - "step": 32744 - }, - { - "epoch": 2.5035839210963933, - "grad_norm": 0.0009535603458061814, - "learning_rate": 0.0001999969087994308, - "loss": 46.0, - "step": 32745 - }, - { - "epoch": 2.5036603780797826, - "grad_norm": 0.0008990428759716451, - "learning_rate": 0.00019999690861056678, - "loss": 46.0, - "step": 32746 - }, - { - "epoch": 2.5037368350631724, - "grad_norm": 0.0030643814243376255, - "learning_rate": 0.000199996908421697, - "loss": 46.0, - "step": 32747 - }, - { - "epoch": 2.503813292046562, - "grad_norm": 0.0015444988384842873, - "learning_rate": 0.00019999690823282144, - "loss": 46.0, - "step": 32748 - }, - { - "epoch": 2.503889749029952, - "grad_norm": 0.001566438120789826, - "learning_rate": 0.00019999690804394012, - "loss": 46.0, - "step": 32749 - }, - { - "epoch": 2.5039662060133416, - "grad_norm": 0.0034345362801104784, - "learning_rate": 0.00019999690785505302, - "loss": 46.0, - "step": 32750 - }, - { - "epoch": 2.5040426629967314, - "grad_norm": 0.001355501706711948, - "learning_rate": 0.00019999690766616014, - "loss": 46.0, - "step": 32751 - }, - { - "epoch": 2.504119119980121, - "grad_norm": 0.0006056506535969675, - "learning_rate": 0.00019999690747726152, - "loss": 46.0, - "step": 32752 - }, - { - "epoch": 2.504195576963511, - "grad_norm": 0.0035692111123353243, - "learning_rate": 0.0001999969072883571, - "loss": 46.0, - "step": 32753 - }, - { - "epoch": 2.5042720339469007, - "grad_norm": 0.00087547063594684, - "learning_rate": 0.00019999690709944694, - "loss": 46.0, - "step": 32754 - }, - { - "epoch": 2.5043484909302904, - "grad_norm": 0.0014224721817299724, - "learning_rate": 0.000199996906910531, - "loss": 46.0, - "step": 32755 - }, - { - "epoch": 2.50442494791368, - "grad_norm": 0.0005808738642372191, - "learning_rate": 0.00019999690672160928, - "loss": 46.0, - "step": 32756 - }, - { - "epoch": 2.50450140489707, - "grad_norm": 0.0012215047609061003, - "learning_rate": 0.0001999969065326818, - "loss": 46.0, - "step": 32757 - }, - { - "epoch": 2.5045778618804597, - "grad_norm": 0.00176725210621953, - "learning_rate": 0.00019999690634374853, - "loss": 46.0, - "step": 32758 - }, - { - "epoch": 2.5046543188638495, - "grad_norm": 0.0017734557623043656, - "learning_rate": 0.00019999690615480953, - "loss": 46.0, - "step": 32759 - }, - { - "epoch": 2.504730775847239, - "grad_norm": 0.0005930705810897052, - "learning_rate": 0.00019999690596586472, - "loss": 46.0, - "step": 32760 - }, - { - "epoch": 2.5048072328306286, - "grad_norm": 0.002758664544671774, - "learning_rate": 0.00019999690577691417, - "loss": 46.0, - "step": 32761 - }, - { - "epoch": 2.5048836898140183, - "grad_norm": 0.0008418118814006448, - "learning_rate": 0.00019999690558795784, - "loss": 46.0, - "step": 32762 - }, - { - "epoch": 2.504960146797408, - "grad_norm": 0.002822203328832984, - "learning_rate": 0.00019999690539899577, - "loss": 46.0, - "step": 32763 - }, - { - "epoch": 2.505036603780798, - "grad_norm": 0.0014254904817789793, - "learning_rate": 0.00019999690521002787, - "loss": 46.0, - "step": 32764 - }, - { - "epoch": 2.5051130607641876, - "grad_norm": 0.0014400569489225745, - "learning_rate": 0.00019999690502105425, - "loss": 46.0, - "step": 32765 - }, - { - "epoch": 2.5051895177475774, - "grad_norm": 0.0014554229564964771, - "learning_rate": 0.00019999690483207485, - "loss": 46.0, - "step": 32766 - }, - { - "epoch": 2.505265974730967, - "grad_norm": 0.0006737028597854078, - "learning_rate": 0.00019999690464308966, - "loss": 46.0, - "step": 32767 - }, - { - "epoch": 2.5053424317143564, - "grad_norm": 0.000716458132956177, - "learning_rate": 0.00019999690445409872, - "loss": 46.0, - "step": 32768 - }, - { - "epoch": 2.505418888697746, - "grad_norm": 0.000928579771425575, - "learning_rate": 0.000199996904265102, - "loss": 46.0, - "step": 32769 - }, - { - "epoch": 2.505495345681136, - "grad_norm": 0.0034551192075014114, - "learning_rate": 0.00019999690407609952, - "loss": 46.0, - "step": 32770 - }, - { - "epoch": 2.5055718026645257, - "grad_norm": 0.0009016767144203186, - "learning_rate": 0.00019999690388709126, - "loss": 46.0, - "step": 32771 - }, - { - "epoch": 2.5056482596479155, - "grad_norm": 0.0013303476152941585, - "learning_rate": 0.00019999690369807725, - "loss": 46.0, - "step": 32772 - }, - { - "epoch": 2.5057247166313052, - "grad_norm": 0.0006002801819704473, - "learning_rate": 0.00019999690350905747, - "loss": 46.0, - "step": 32773 - }, - { - "epoch": 2.505801173614695, - "grad_norm": 0.0036748128477483988, - "learning_rate": 0.0001999969033200319, - "loss": 46.0, - "step": 32774 - }, - { - "epoch": 2.5058776305980848, - "grad_norm": 0.0011790312128141522, - "learning_rate": 0.00019999690313100054, - "loss": 46.0, - "step": 32775 - }, - { - "epoch": 2.5059540875814745, - "grad_norm": 0.0011653387919068336, - "learning_rate": 0.00019999690294196344, - "loss": 46.0, - "step": 32776 - }, - { - "epoch": 2.5060305445648643, - "grad_norm": 0.0011330617126077414, - "learning_rate": 0.00019999690275292057, - "loss": 46.0, - "step": 32777 - }, - { - "epoch": 2.506107001548254, - "grad_norm": 0.0009041562443599105, - "learning_rate": 0.00019999690256387195, - "loss": 46.0, - "step": 32778 - }, - { - "epoch": 2.506183458531644, - "grad_norm": 0.0019856621511280537, - "learning_rate": 0.00019999690237481756, - "loss": 46.0, - "step": 32779 - }, - { - "epoch": 2.5062599155150336, - "grad_norm": 0.0006077262223698199, - "learning_rate": 0.00019999690218575736, - "loss": 46.0, - "step": 32780 - }, - { - "epoch": 2.5063363724984233, - "grad_norm": 0.0012735208729282022, - "learning_rate": 0.00019999690199669142, - "loss": 46.0, - "step": 32781 - }, - { - "epoch": 2.5064128294818127, - "grad_norm": 0.005194342229515314, - "learning_rate": 0.0001999969018076197, - "loss": 46.0, - "step": 32782 - }, - { - "epoch": 2.5064892864652024, - "grad_norm": 0.0016863117925822735, - "learning_rate": 0.00019999690161854222, - "loss": 46.0, - "step": 32783 - }, - { - "epoch": 2.506565743448592, - "grad_norm": 0.005246866028755903, - "learning_rate": 0.00019999690142945896, - "loss": 46.0, - "step": 32784 - }, - { - "epoch": 2.506642200431982, - "grad_norm": 0.0007456231978721917, - "learning_rate": 0.00019999690124036996, - "loss": 46.0, - "step": 32785 - }, - { - "epoch": 2.5067186574153717, - "grad_norm": 0.0008065645815804601, - "learning_rate": 0.00019999690105127518, - "loss": 46.0, - "step": 32786 - }, - { - "epoch": 2.5067951143987615, - "grad_norm": 0.0010334714315831661, - "learning_rate": 0.00019999690086217463, - "loss": 46.0, - "step": 32787 - }, - { - "epoch": 2.506871571382151, - "grad_norm": 0.0019579287618398666, - "learning_rate": 0.0001999969006730683, - "loss": 46.0, - "step": 32788 - }, - { - "epoch": 2.506948028365541, - "grad_norm": 0.001493968302384019, - "learning_rate": 0.00019999690048395617, - "loss": 46.0, - "step": 32789 - }, - { - "epoch": 2.5070244853489303, - "grad_norm": 0.003920863848179579, - "learning_rate": 0.0001999969002948383, - "loss": 46.0, - "step": 32790 - }, - { - "epoch": 2.50710094233232, - "grad_norm": 0.0006900986190885305, - "learning_rate": 0.00019999690010571465, - "loss": 46.0, - "step": 32791 - }, - { - "epoch": 2.50717739931571, - "grad_norm": 0.003912812098860741, - "learning_rate": 0.00019999689991658526, - "loss": 46.0, - "step": 32792 - }, - { - "epoch": 2.5072538562990996, - "grad_norm": 0.0009005999891087413, - "learning_rate": 0.00019999689972745007, - "loss": 46.0, - "step": 32793 - }, - { - "epoch": 2.5073303132824893, - "grad_norm": 0.0008407142595387995, - "learning_rate": 0.00019999689953830913, - "loss": 46.0, - "step": 32794 - }, - { - "epoch": 2.507406770265879, - "grad_norm": 0.0013869135873392224, - "learning_rate": 0.00019999689934916242, - "loss": 46.0, - "step": 32795 - }, - { - "epoch": 2.507483227249269, - "grad_norm": 0.0012822578428313136, - "learning_rate": 0.00019999689916000993, - "loss": 46.0, - "step": 32796 - }, - { - "epoch": 2.5075596842326586, - "grad_norm": 0.0007940813666209579, - "learning_rate": 0.0001999968989708517, - "loss": 46.0, - "step": 32797 - }, - { - "epoch": 2.5076361412160484, - "grad_norm": 0.001456239726394415, - "learning_rate": 0.00019999689878168767, - "loss": 46.0, - "step": 32798 - }, - { - "epoch": 2.507712598199438, - "grad_norm": 0.0014756184536963701, - "learning_rate": 0.00019999689859251786, - "loss": 46.0, - "step": 32799 - }, - { - "epoch": 2.507789055182828, - "grad_norm": 0.0007517407066188753, - "learning_rate": 0.0001999968984033423, - "loss": 46.0, - "step": 32800 - }, - { - "epoch": 2.5078655121662177, - "grad_norm": 0.0006073712138459086, - "learning_rate": 0.00019999689821416098, - "loss": 46.0, - "step": 32801 - }, - { - "epoch": 2.5079419691496074, - "grad_norm": 0.001129685202613473, - "learning_rate": 0.00019999689802497388, - "loss": 46.0, - "step": 32802 - }, - { - "epoch": 2.508018426132997, - "grad_norm": 0.0008915666840039194, - "learning_rate": 0.00019999689783578098, - "loss": 46.0, - "step": 32803 - }, - { - "epoch": 2.5080948831163865, - "grad_norm": 0.002187796402722597, - "learning_rate": 0.0001999968976465824, - "loss": 46.0, - "step": 32804 - }, - { - "epoch": 2.5081713400997763, - "grad_norm": 0.0031287139281630516, - "learning_rate": 0.00019999689745737795, - "loss": 46.0, - "step": 32805 - }, - { - "epoch": 2.508247797083166, - "grad_norm": 0.0019955255556851625, - "learning_rate": 0.0001999968972681678, - "loss": 46.0, - "step": 32806 - }, - { - "epoch": 2.508324254066556, - "grad_norm": 0.000831499753985554, - "learning_rate": 0.00019999689707895184, - "loss": 46.0, - "step": 32807 - }, - { - "epoch": 2.5084007110499456, - "grad_norm": 0.003255280200392008, - "learning_rate": 0.0001999968968897301, - "loss": 46.0, - "step": 32808 - }, - { - "epoch": 2.5084771680333353, - "grad_norm": 0.0008602631860412657, - "learning_rate": 0.00019999689670050264, - "loss": 46.0, - "step": 32809 - }, - { - "epoch": 2.508553625016725, - "grad_norm": 0.0027964110486209393, - "learning_rate": 0.00019999689651126938, - "loss": 46.0, - "step": 32810 - }, - { - "epoch": 2.508630082000115, - "grad_norm": 0.007330305874347687, - "learning_rate": 0.00019999689632203035, - "loss": 46.0, - "step": 32811 - }, - { - "epoch": 2.508706538983504, - "grad_norm": 0.000710338877979666, - "learning_rate": 0.00019999689613278557, - "loss": 46.0, - "step": 32812 - }, - { - "epoch": 2.508782995966894, - "grad_norm": 0.0018319184891879559, - "learning_rate": 0.000199996895943535, - "loss": 46.0, - "step": 32813 - }, - { - "epoch": 2.5088594529502837, - "grad_norm": 0.0028413746040314436, - "learning_rate": 0.00019999689575427867, - "loss": 46.0, - "step": 32814 - }, - { - "epoch": 2.5089359099336734, - "grad_norm": 0.0018154596909880638, - "learning_rate": 0.00019999689556501657, - "loss": 46.0, - "step": 32815 - }, - { - "epoch": 2.509012366917063, - "grad_norm": 0.0008049430907703936, - "learning_rate": 0.0001999968953757487, - "loss": 46.0, - "step": 32816 - }, - { - "epoch": 2.509088823900453, - "grad_norm": 0.0011200510198250413, - "learning_rate": 0.00019999689518647505, - "loss": 46.0, - "step": 32817 - }, - { - "epoch": 2.5091652808838427, - "grad_norm": 0.0011329521657899022, - "learning_rate": 0.00019999689499719566, - "loss": 46.0, - "step": 32818 - }, - { - "epoch": 2.5092417378672325, - "grad_norm": 0.0006639102357439697, - "learning_rate": 0.00019999689480791047, - "loss": 46.0, - "step": 32819 - }, - { - "epoch": 2.5093181948506222, - "grad_norm": 0.0013859100872650743, - "learning_rate": 0.00019999689461861953, - "loss": 46.0, - "step": 32820 - }, - { - "epoch": 2.509394651834012, - "grad_norm": 0.0009265526896342635, - "learning_rate": 0.00019999689442932282, - "loss": 46.0, - "step": 32821 - }, - { - "epoch": 2.5094711088174018, - "grad_norm": 0.0005473807686939836, - "learning_rate": 0.00019999689424002033, - "loss": 46.0, - "step": 32822 - }, - { - "epoch": 2.5095475658007915, - "grad_norm": 0.0008722553611733019, - "learning_rate": 0.00019999689405071207, - "loss": 46.0, - "step": 32823 - }, - { - "epoch": 2.5096240227841813, - "grad_norm": 0.00034943880746141076, - "learning_rate": 0.00019999689386139804, - "loss": 46.0, - "step": 32824 - }, - { - "epoch": 2.509700479767571, - "grad_norm": 0.0010079960338771343, - "learning_rate": 0.00019999689367207826, - "loss": 46.0, - "step": 32825 - }, - { - "epoch": 2.5097769367509604, - "grad_norm": 0.0007239811238832772, - "learning_rate": 0.0001999968934827527, - "loss": 46.0, - "step": 32826 - }, - { - "epoch": 2.50985339373435, - "grad_norm": 0.000974804803263396, - "learning_rate": 0.00019999689329342136, - "loss": 46.0, - "step": 32827 - }, - { - "epoch": 2.50992985071774, - "grad_norm": 0.00154615449719131, - "learning_rate": 0.00019999689310408426, - "loss": 46.0, - "step": 32828 - }, - { - "epoch": 2.5100063077011296, - "grad_norm": 0.0010489854030311108, - "learning_rate": 0.0001999968929147414, - "loss": 46.0, - "step": 32829 - }, - { - "epoch": 2.5100827646845194, - "grad_norm": 0.0009069915977306664, - "learning_rate": 0.00019999689272539274, - "loss": 46.0, - "step": 32830 - }, - { - "epoch": 2.510159221667909, - "grad_norm": 0.0017528278985992074, - "learning_rate": 0.00019999689253603835, - "loss": 46.0, - "step": 32831 - }, - { - "epoch": 2.510235678651299, - "grad_norm": 0.001177777536213398, - "learning_rate": 0.00019999689234667819, - "loss": 46.0, - "step": 32832 - }, - { - "epoch": 2.5103121356346887, - "grad_norm": 0.0024528945796191692, - "learning_rate": 0.00019999689215731222, - "loss": 46.0, - "step": 32833 - }, - { - "epoch": 2.510388592618078, - "grad_norm": 0.0008305267547257245, - "learning_rate": 0.00019999689196794048, - "loss": 46.0, - "step": 32834 - }, - { - "epoch": 2.5104650496014678, - "grad_norm": 0.001171586220152676, - "learning_rate": 0.00019999689177856302, - "loss": 46.0, - "step": 32835 - }, - { - "epoch": 2.5105415065848575, - "grad_norm": 0.0007078165654093027, - "learning_rate": 0.00019999689158917977, - "loss": 46.0, - "step": 32836 - }, - { - "epoch": 2.5106179635682473, - "grad_norm": 0.0019236498046666384, - "learning_rate": 0.00019999689139979073, - "loss": 46.0, - "step": 32837 - }, - { - "epoch": 2.510694420551637, - "grad_norm": 0.0007407772354781628, - "learning_rate": 0.00019999689121039593, - "loss": 46.0, - "step": 32838 - }, - { - "epoch": 2.510770877535027, - "grad_norm": 0.0016303022857755423, - "learning_rate": 0.00019999689102099538, - "loss": 46.0, - "step": 32839 - }, - { - "epoch": 2.5108473345184166, - "grad_norm": 0.0019795165862888098, - "learning_rate": 0.00019999689083158905, - "loss": 46.0, - "step": 32840 - }, - { - "epoch": 2.5109237915018063, - "grad_norm": 0.001191888004541397, - "learning_rate": 0.00019999689064217693, - "loss": 46.0, - "step": 32841 - }, - { - "epoch": 2.511000248485196, - "grad_norm": 0.0008496284135617316, - "learning_rate": 0.00019999689045275905, - "loss": 46.0, - "step": 32842 - }, - { - "epoch": 2.511076705468586, - "grad_norm": 0.0013293935917317867, - "learning_rate": 0.00019999689026333544, - "loss": 46.0, - "step": 32843 - }, - { - "epoch": 2.5111531624519756, - "grad_norm": 0.010424631647765636, - "learning_rate": 0.00019999689007390602, - "loss": 46.0, - "step": 32844 - }, - { - "epoch": 2.5112296194353654, - "grad_norm": 0.0011201835004612803, - "learning_rate": 0.00019999688988447085, - "loss": 46.0, - "step": 32845 - }, - { - "epoch": 2.511306076418755, - "grad_norm": 0.00044287124183028936, - "learning_rate": 0.0001999968896950299, - "loss": 46.0, - "step": 32846 - }, - { - "epoch": 2.5113825334021445, - "grad_norm": 0.0008032558253034949, - "learning_rate": 0.00019999688950558318, - "loss": 46.0, - "step": 32847 - }, - { - "epoch": 2.511458990385534, - "grad_norm": 0.000719800591468811, - "learning_rate": 0.00019999688931613067, - "loss": 46.0, - "step": 32848 - }, - { - "epoch": 2.511535447368924, - "grad_norm": 0.0009950101375579834, - "learning_rate": 0.00019999688912667243, - "loss": 46.0, - "step": 32849 - }, - { - "epoch": 2.5116119043523137, - "grad_norm": 0.0013402453623712063, - "learning_rate": 0.0001999968889372084, - "loss": 46.0, - "step": 32850 - }, - { - "epoch": 2.5116883613357035, - "grad_norm": 0.001264795777387917, - "learning_rate": 0.0001999968887477386, - "loss": 46.0, - "step": 32851 - }, - { - "epoch": 2.5117648183190933, - "grad_norm": 0.0008573863306082785, - "learning_rate": 0.00019999688855826305, - "loss": 46.0, - "step": 32852 - }, - { - "epoch": 2.511841275302483, - "grad_norm": 0.005295548588037491, - "learning_rate": 0.00019999688836878172, - "loss": 46.0, - "step": 32853 - }, - { - "epoch": 2.511917732285873, - "grad_norm": 0.0005234805284999311, - "learning_rate": 0.00019999688817929462, - "loss": 46.0, - "step": 32854 - }, - { - "epoch": 2.5119941892692625, - "grad_norm": 0.002573030535131693, - "learning_rate": 0.00019999688798980173, - "loss": 46.0, - "step": 32855 - }, - { - "epoch": 2.512070646252652, - "grad_norm": 0.0009861671132966876, - "learning_rate": 0.0001999968878003031, - "loss": 46.0, - "step": 32856 - }, - { - "epoch": 2.5121471032360416, - "grad_norm": 0.0071407221257686615, - "learning_rate": 0.0001999968876107987, - "loss": 46.0, - "step": 32857 - }, - { - "epoch": 2.5122235602194314, - "grad_norm": 0.0008289107936434448, - "learning_rate": 0.00019999688742128853, - "loss": 46.0, - "step": 32858 - }, - { - "epoch": 2.512300017202821, - "grad_norm": 0.0008245704229921103, - "learning_rate": 0.00019999688723177256, - "loss": 46.0, - "step": 32859 - }, - { - "epoch": 2.512376474186211, - "grad_norm": 0.001350258244201541, - "learning_rate": 0.00019999688704225082, - "loss": 46.0, - "step": 32860 - }, - { - "epoch": 2.5124529311696007, - "grad_norm": 0.0007459091139025986, - "learning_rate": 0.00019999688685272337, - "loss": 46.0, - "step": 32861 - }, - { - "epoch": 2.5125293881529904, - "grad_norm": 0.0016938502667471766, - "learning_rate": 0.0001999968866631901, - "loss": 46.0, - "step": 32862 - }, - { - "epoch": 2.51260584513638, - "grad_norm": 0.0015764908166602254, - "learning_rate": 0.00019999688647365108, - "loss": 46.0, - "step": 32863 - }, - { - "epoch": 2.51268230211977, - "grad_norm": 0.0005885313148610294, - "learning_rate": 0.00019999688628410627, - "loss": 46.0, - "step": 32864 - }, - { - "epoch": 2.5127587591031597, - "grad_norm": 0.0015417566755786538, - "learning_rate": 0.00019999688609455572, - "loss": 46.0, - "step": 32865 - }, - { - "epoch": 2.5128352160865495, - "grad_norm": 0.0005860993987880647, - "learning_rate": 0.0001999968859049994, - "loss": 46.0, - "step": 32866 - }, - { - "epoch": 2.5129116730699392, - "grad_norm": 0.0011281523620709777, - "learning_rate": 0.0001999968857154373, - "loss": 46.0, - "step": 32867 - }, - { - "epoch": 2.512988130053329, - "grad_norm": 0.0008245331700891256, - "learning_rate": 0.0001999968855258694, - "loss": 46.0, - "step": 32868 - }, - { - "epoch": 2.5130645870367183, - "grad_norm": 0.0006819698610343039, - "learning_rate": 0.00019999688533629576, - "loss": 46.0, - "step": 32869 - }, - { - "epoch": 2.513141044020108, - "grad_norm": 0.0015945348422974348, - "learning_rate": 0.00019999688514671634, - "loss": 46.0, - "step": 32870 - }, - { - "epoch": 2.513217501003498, - "grad_norm": 0.0012751793256029487, - "learning_rate": 0.00019999688495713115, - "loss": 46.0, - "step": 32871 - }, - { - "epoch": 2.5132939579868876, - "grad_norm": 0.003421960398554802, - "learning_rate": 0.00019999688476754024, - "loss": 46.0, - "step": 32872 - }, - { - "epoch": 2.5133704149702774, - "grad_norm": 0.0015391081105917692, - "learning_rate": 0.0001999968845779435, - "loss": 46.0, - "step": 32873 - }, - { - "epoch": 2.513446871953667, - "grad_norm": 0.0010220227995887399, - "learning_rate": 0.00019999688438834104, - "loss": 46.0, - "step": 32874 - }, - { - "epoch": 2.513523328937057, - "grad_norm": 0.0011553529184311628, - "learning_rate": 0.00019999688419873276, - "loss": 46.0, - "step": 32875 - }, - { - "epoch": 2.5135997859204466, - "grad_norm": 0.0015240157954394817, - "learning_rate": 0.00019999688400911873, - "loss": 46.0, - "step": 32876 - }, - { - "epoch": 2.513676242903836, - "grad_norm": 0.0022138566710054874, - "learning_rate": 0.00019999688381949895, - "loss": 46.0, - "step": 32877 - }, - { - "epoch": 2.5137526998872257, - "grad_norm": 0.0008458830998279154, - "learning_rate": 0.0001999968836298734, - "loss": 46.0, - "step": 32878 - }, - { - "epoch": 2.5138291568706155, - "grad_norm": 0.0003650127036962658, - "learning_rate": 0.00019999688344024205, - "loss": 46.0, - "step": 32879 - }, - { - "epoch": 2.5139056138540052, - "grad_norm": 0.0014310518745332956, - "learning_rate": 0.00019999688325060495, - "loss": 46.0, - "step": 32880 - }, - { - "epoch": 2.513982070837395, - "grad_norm": 0.0009572127601131797, - "learning_rate": 0.00019999688306096208, - "loss": 46.0, - "step": 32881 - }, - { - "epoch": 2.5140585278207848, - "grad_norm": 0.0021009778138250113, - "learning_rate": 0.0001999968828713134, - "loss": 46.0, - "step": 32882 - }, - { - "epoch": 2.5141349848041745, - "grad_norm": 0.0013248994946479797, - "learning_rate": 0.00019999688268165902, - "loss": 46.0, - "step": 32883 - }, - { - "epoch": 2.5142114417875643, - "grad_norm": 0.001694239559583366, - "learning_rate": 0.00019999688249199883, - "loss": 46.0, - "step": 32884 - }, - { - "epoch": 2.514287898770954, - "grad_norm": 0.00220459490083158, - "learning_rate": 0.00019999688230233287, - "loss": 46.0, - "step": 32885 - }, - { - "epoch": 2.514364355754344, - "grad_norm": 0.001286650775000453, - "learning_rate": 0.00019999688211266116, - "loss": 46.0, - "step": 32886 - }, - { - "epoch": 2.5144408127377336, - "grad_norm": 0.0016371086239814758, - "learning_rate": 0.00019999688192298367, - "loss": 46.0, - "step": 32887 - }, - { - "epoch": 2.5145172697211233, - "grad_norm": 0.0019026239169761539, - "learning_rate": 0.0001999968817333004, - "loss": 46.0, - "step": 32888 - }, - { - "epoch": 2.514593726704513, - "grad_norm": 0.0012701016385108232, - "learning_rate": 0.00019999688154361138, - "loss": 46.0, - "step": 32889 - }, - { - "epoch": 2.514670183687903, - "grad_norm": 0.004471160005778074, - "learning_rate": 0.00019999688135391658, - "loss": 46.0, - "step": 32890 - }, - { - "epoch": 2.514746640671292, - "grad_norm": 0.0031129554845392704, - "learning_rate": 0.00019999688116421603, - "loss": 46.0, - "step": 32891 - }, - { - "epoch": 2.514823097654682, - "grad_norm": 0.0012290870072320104, - "learning_rate": 0.00019999688097450968, - "loss": 46.0, - "step": 32892 - }, - { - "epoch": 2.5148995546380717, - "grad_norm": 0.0015509986551478505, - "learning_rate": 0.00019999688078479758, - "loss": 46.0, - "step": 32893 - }, - { - "epoch": 2.5149760116214614, - "grad_norm": 0.0010045382659882307, - "learning_rate": 0.00019999688059507974, - "loss": 46.0, - "step": 32894 - }, - { - "epoch": 2.515052468604851, - "grad_norm": 0.0011538172839209437, - "learning_rate": 0.00019999688040535607, - "loss": 46.0, - "step": 32895 - }, - { - "epoch": 2.515128925588241, - "grad_norm": 0.0009462969610467553, - "learning_rate": 0.00019999688021562665, - "loss": 46.0, - "step": 32896 - }, - { - "epoch": 2.5152053825716307, - "grad_norm": 0.0011038155062124133, - "learning_rate": 0.0001999968800258915, - "loss": 46.0, - "step": 32897 - }, - { - "epoch": 2.5152818395550205, - "grad_norm": 0.000499115907587111, - "learning_rate": 0.00019999687983615055, - "loss": 46.0, - "step": 32898 - }, - { - "epoch": 2.51535829653841, - "grad_norm": 0.001995911356061697, - "learning_rate": 0.00019999687964640381, - "loss": 46.0, - "step": 32899 - }, - { - "epoch": 2.5154347535217996, - "grad_norm": 0.0008694082498550415, - "learning_rate": 0.0001999968794566513, - "loss": 46.0, - "step": 32900 - }, - { - "epoch": 2.5155112105051893, - "grad_norm": 0.0004751631640829146, - "learning_rate": 0.00019999687926689305, - "loss": 46.0, - "step": 32901 - }, - { - "epoch": 2.515587667488579, - "grad_norm": 0.0011406440753489733, - "learning_rate": 0.00019999687907712902, - "loss": 46.0, - "step": 32902 - }, - { - "epoch": 2.515664124471969, - "grad_norm": 0.002283995971083641, - "learning_rate": 0.00019999687888735924, - "loss": 46.0, - "step": 32903 - }, - { - "epoch": 2.5157405814553586, - "grad_norm": 0.0010508904233574867, - "learning_rate": 0.00019999687869758366, - "loss": 46.0, - "step": 32904 - }, - { - "epoch": 2.5158170384387484, - "grad_norm": 0.0009684675605967641, - "learning_rate": 0.00019999687850780234, - "loss": 46.0, - "step": 32905 - }, - { - "epoch": 2.515893495422138, - "grad_norm": 0.0034374427050352097, - "learning_rate": 0.00019999687831801524, - "loss": 46.0, - "step": 32906 - }, - { - "epoch": 2.515969952405528, - "grad_norm": 0.0010148673318326473, - "learning_rate": 0.00019999687812822235, - "loss": 46.0, - "step": 32907 - }, - { - "epoch": 2.5160464093889177, - "grad_norm": 0.0016581009840592742, - "learning_rate": 0.00019999687793842373, - "loss": 46.0, - "step": 32908 - }, - { - "epoch": 2.5161228663723074, - "grad_norm": 0.0014887274010106921, - "learning_rate": 0.0001999968777486193, - "loss": 46.0, - "step": 32909 - }, - { - "epoch": 2.516199323355697, - "grad_norm": 0.0026363113429397345, - "learning_rate": 0.00019999687755880912, - "loss": 46.0, - "step": 32910 - }, - { - "epoch": 2.516275780339087, - "grad_norm": 0.0006123919156379998, - "learning_rate": 0.00019999687736899319, - "loss": 46.0, - "step": 32911 - }, - { - "epoch": 2.5163522373224767, - "grad_norm": 0.0008107998874038458, - "learning_rate": 0.00019999687717917145, - "loss": 46.0, - "step": 32912 - }, - { - "epoch": 2.516428694305866, - "grad_norm": 0.0015831629280000925, - "learning_rate": 0.00019999687698934397, - "loss": 46.0, - "step": 32913 - }, - { - "epoch": 2.516505151289256, - "grad_norm": 0.001789933885447681, - "learning_rate": 0.0001999968767995107, - "loss": 46.0, - "step": 32914 - }, - { - "epoch": 2.5165816082726455, - "grad_norm": 0.0007072822190821171, - "learning_rate": 0.00019999687660967168, - "loss": 46.0, - "step": 32915 - }, - { - "epoch": 2.5166580652560353, - "grad_norm": 0.0008345975656993687, - "learning_rate": 0.00019999687641982688, - "loss": 46.0, - "step": 32916 - }, - { - "epoch": 2.516734522239425, - "grad_norm": 0.0014782397774979472, - "learning_rate": 0.0001999968762299763, - "loss": 46.0, - "step": 32917 - }, - { - "epoch": 2.516810979222815, - "grad_norm": 0.0030025290325284004, - "learning_rate": 0.00019999687604011998, - "loss": 46.0, - "step": 32918 - }, - { - "epoch": 2.5168874362062046, - "grad_norm": 0.00047987012658268213, - "learning_rate": 0.00019999687585025785, - "loss": 46.0, - "step": 32919 - }, - { - "epoch": 2.5169638931895943, - "grad_norm": 0.0013627614825963974, - "learning_rate": 0.00019999687566039, - "loss": 46.0, - "step": 32920 - }, - { - "epoch": 2.5170403501729837, - "grad_norm": 0.0011875573545694351, - "learning_rate": 0.00019999687547051634, - "loss": 46.0, - "step": 32921 - }, - { - "epoch": 2.5171168071563734, - "grad_norm": 0.0007395835127681494, - "learning_rate": 0.00019999687528063692, - "loss": 46.0, - "step": 32922 - }, - { - "epoch": 2.517193264139763, - "grad_norm": 0.0012649290729314089, - "learning_rate": 0.00019999687509075176, - "loss": 46.0, - "step": 32923 - }, - { - "epoch": 2.517269721123153, - "grad_norm": 0.0015310108428820968, - "learning_rate": 0.0001999968749008608, - "loss": 46.0, - "step": 32924 - }, - { - "epoch": 2.5173461781065427, - "grad_norm": 0.001608656020835042, - "learning_rate": 0.0001999968747109641, - "loss": 46.0, - "step": 32925 - }, - { - "epoch": 2.5174226350899325, - "grad_norm": 0.0010443860664963722, - "learning_rate": 0.0001999968745210616, - "loss": 46.0, - "step": 32926 - }, - { - "epoch": 2.5174990920733222, - "grad_norm": 0.0007467984687536955, - "learning_rate": 0.00019999687433115332, - "loss": 46.0, - "step": 32927 - }, - { - "epoch": 2.517575549056712, - "grad_norm": 0.0017736062873154879, - "learning_rate": 0.0001999968741412393, - "loss": 46.0, - "step": 32928 - }, - { - "epoch": 2.5176520060401018, - "grad_norm": 0.0007338566356338561, - "learning_rate": 0.00019999687395131952, - "loss": 46.0, - "step": 32929 - }, - { - "epoch": 2.5177284630234915, - "grad_norm": 0.0023565846495330334, - "learning_rate": 0.00019999687376139394, - "loss": 46.0, - "step": 32930 - }, - { - "epoch": 2.5178049200068813, - "grad_norm": 0.0006290346500463784, - "learning_rate": 0.0001999968735714626, - "loss": 46.0, - "step": 32931 - }, - { - "epoch": 2.517881376990271, - "grad_norm": 0.0005563198938034475, - "learning_rate": 0.0001999968733815255, - "loss": 46.0, - "step": 32932 - }, - { - "epoch": 2.517957833973661, - "grad_norm": 0.0013433565618470311, - "learning_rate": 0.00019999687319158263, - "loss": 46.0, - "step": 32933 - }, - { - "epoch": 2.5180342909570506, - "grad_norm": 0.0016104771057143807, - "learning_rate": 0.00019999687300163398, - "loss": 46.0, - "step": 32934 - }, - { - "epoch": 2.51811074794044, - "grad_norm": 0.0004023423243779689, - "learning_rate": 0.00019999687281167957, - "loss": 46.0, - "step": 32935 - }, - { - "epoch": 2.5181872049238296, - "grad_norm": 0.0013931648572906852, - "learning_rate": 0.00019999687262171938, - "loss": 46.0, - "step": 32936 - }, - { - "epoch": 2.5182636619072194, - "grad_norm": 0.001002289354801178, - "learning_rate": 0.00019999687243175341, - "loss": 46.0, - "step": 32937 - }, - { - "epoch": 2.518340118890609, - "grad_norm": 0.0022146524861454964, - "learning_rate": 0.0001999968722417817, - "loss": 46.0, - "step": 32938 - }, - { - "epoch": 2.518416575873999, - "grad_norm": 0.0013069614069536328, - "learning_rate": 0.00019999687205180422, - "loss": 46.0, - "step": 32939 - }, - { - "epoch": 2.5184930328573887, - "grad_norm": 0.003414312144741416, - "learning_rate": 0.00019999687186182094, - "loss": 46.0, - "step": 32940 - }, - { - "epoch": 2.5185694898407784, - "grad_norm": 0.0006989172543399036, - "learning_rate": 0.0001999968716718319, - "loss": 46.0, - "step": 32941 - }, - { - "epoch": 2.518645946824168, - "grad_norm": 0.0010113092139363289, - "learning_rate": 0.00019999687148183713, - "loss": 46.0, - "step": 32942 - }, - { - "epoch": 2.5187224038075575, - "grad_norm": 0.0007022622157819569, - "learning_rate": 0.00019999687129183659, - "loss": 46.0, - "step": 32943 - }, - { - "epoch": 2.5187988607909473, - "grad_norm": 0.000595089397393167, - "learning_rate": 0.0001999968711018302, - "loss": 46.0, - "step": 32944 - }, - { - "epoch": 2.518875317774337, - "grad_norm": 0.0014249193482100964, - "learning_rate": 0.00019999687091181811, - "loss": 46.0, - "step": 32945 - }, - { - "epoch": 2.518951774757727, - "grad_norm": 0.0017278653103858232, - "learning_rate": 0.00019999687072180024, - "loss": 46.0, - "step": 32946 - }, - { - "epoch": 2.5190282317411166, - "grad_norm": 0.0006695809424854815, - "learning_rate": 0.0001999968705317766, - "loss": 46.0, - "step": 32947 - }, - { - "epoch": 2.5191046887245063, - "grad_norm": 0.003549568820744753, - "learning_rate": 0.0001999968703417472, - "loss": 46.0, - "step": 32948 - }, - { - "epoch": 2.519181145707896, - "grad_norm": 0.0015840554842725396, - "learning_rate": 0.000199996870151712, - "loss": 46.0, - "step": 32949 - }, - { - "epoch": 2.519257602691286, - "grad_norm": 0.0017992067150771618, - "learning_rate": 0.00019999686996167103, - "loss": 46.0, - "step": 32950 - }, - { - "epoch": 2.5193340596746756, - "grad_norm": 0.0005472741322591901, - "learning_rate": 0.00019999686977162433, - "loss": 46.0, - "step": 32951 - }, - { - "epoch": 2.5194105166580654, - "grad_norm": 0.0027882393915206194, - "learning_rate": 0.00019999686958157184, - "loss": 46.0, - "step": 32952 - }, - { - "epoch": 2.519486973641455, - "grad_norm": 0.003878476098179817, - "learning_rate": 0.00019999686939151356, - "loss": 46.0, - "step": 32953 - }, - { - "epoch": 2.519563430624845, - "grad_norm": 0.0015768310986459255, - "learning_rate": 0.00019999686920144956, - "loss": 46.0, - "step": 32954 - }, - { - "epoch": 2.5196398876082347, - "grad_norm": 0.0009607269312255085, - "learning_rate": 0.00019999686901137973, - "loss": 46.0, - "step": 32955 - }, - { - "epoch": 2.5197163445916244, - "grad_norm": 0.0030656075105071068, - "learning_rate": 0.00019999686882130418, - "loss": 46.0, - "step": 32956 - }, - { - "epoch": 2.5197928015750137, - "grad_norm": 0.0005757392500527203, - "learning_rate": 0.00019999686863122283, - "loss": 46.0, - "step": 32957 - }, - { - "epoch": 2.5198692585584035, - "grad_norm": 0.0006189522100612521, - "learning_rate": 0.00019999686844113574, - "loss": 46.0, - "step": 32958 - }, - { - "epoch": 2.5199457155417933, - "grad_norm": 0.001912234234623611, - "learning_rate": 0.00019999686825104287, - "loss": 46.0, - "step": 32959 - }, - { - "epoch": 2.520022172525183, - "grad_norm": 0.003201256040483713, - "learning_rate": 0.00019999686806094423, - "loss": 46.0, - "step": 32960 - }, - { - "epoch": 2.5200986295085728, - "grad_norm": 0.0025721059646457434, - "learning_rate": 0.0001999968678708398, - "loss": 46.0, - "step": 32961 - }, - { - "epoch": 2.5201750864919625, - "grad_norm": 0.0004677971010096371, - "learning_rate": 0.0001999968676807296, - "loss": 46.0, - "step": 32962 - }, - { - "epoch": 2.5202515434753523, - "grad_norm": 0.0006928106886334717, - "learning_rate": 0.00019999686749061366, - "loss": 46.0, - "step": 32963 - }, - { - "epoch": 2.520328000458742, - "grad_norm": 0.0036883829161524773, - "learning_rate": 0.00019999686730049195, - "loss": 46.0, - "step": 32964 - }, - { - "epoch": 2.5204044574421314, - "grad_norm": 0.0023962792474776506, - "learning_rate": 0.00019999686711036441, - "loss": 46.0, - "step": 32965 - }, - { - "epoch": 2.520480914425521, - "grad_norm": 0.0009388107573613524, - "learning_rate": 0.00019999686692023119, - "loss": 46.0, - "step": 32966 - }, - { - "epoch": 2.520557371408911, - "grad_norm": 0.0010167048312723637, - "learning_rate": 0.00019999686673009216, - "loss": 46.0, - "step": 32967 - }, - { - "epoch": 2.5206338283923007, - "grad_norm": 0.0011221676832064986, - "learning_rate": 0.00019999686653994733, - "loss": 46.0, - "step": 32968 - }, - { - "epoch": 2.5207102853756904, - "grad_norm": 0.00042411015601828694, - "learning_rate": 0.00019999686634979678, - "loss": 46.0, - "step": 32969 - }, - { - "epoch": 2.52078674235908, - "grad_norm": 0.0015562771586701274, - "learning_rate": 0.00019999686615964043, - "loss": 46.0, - "step": 32970 - }, - { - "epoch": 2.52086319934247, - "grad_norm": 0.0009822442661970854, - "learning_rate": 0.00019999686596947834, - "loss": 46.0, - "step": 32971 - }, - { - "epoch": 2.5209396563258597, - "grad_norm": 0.0010432639392092824, - "learning_rate": 0.00019999686577931044, - "loss": 46.0, - "step": 32972 - }, - { - "epoch": 2.5210161133092495, - "grad_norm": 0.002705514198169112, - "learning_rate": 0.00019999686558913683, - "loss": 46.0, - "step": 32973 - }, - { - "epoch": 2.5210925702926392, - "grad_norm": 0.0012569775572046638, - "learning_rate": 0.0001999968653989574, - "loss": 46.0, - "step": 32974 - }, - { - "epoch": 2.521169027276029, - "grad_norm": 0.004672076087445021, - "learning_rate": 0.00019999686520877222, - "loss": 46.0, - "step": 32975 - }, - { - "epoch": 2.5212454842594187, - "grad_norm": 0.0038774562999606133, - "learning_rate": 0.00019999686501858126, - "loss": 46.0, - "step": 32976 - }, - { - "epoch": 2.5213219412428085, - "grad_norm": 0.002517918823286891, - "learning_rate": 0.00019999686482838453, - "loss": 46.0, - "step": 32977 - }, - { - "epoch": 2.521398398226198, - "grad_norm": 0.0015174434520304203, - "learning_rate": 0.00019999686463818207, - "loss": 46.0, - "step": 32978 - }, - { - "epoch": 2.5214748552095876, - "grad_norm": 0.0007237580721266568, - "learning_rate": 0.0001999968644479738, - "loss": 46.0, - "step": 32979 - }, - { - "epoch": 2.5215513121929773, - "grad_norm": 0.0016403637127950788, - "learning_rate": 0.00019999686425775973, - "loss": 46.0, - "step": 32980 - }, - { - "epoch": 2.521627769176367, - "grad_norm": 0.0016672651981934905, - "learning_rate": 0.00019999686406753996, - "loss": 46.0, - "step": 32981 - }, - { - "epoch": 2.521704226159757, - "grad_norm": 0.0010922338115051389, - "learning_rate": 0.00019999686387731438, - "loss": 46.0, - "step": 32982 - }, - { - "epoch": 2.5217806831431466, - "grad_norm": 0.001425395836122334, - "learning_rate": 0.00019999686368708303, - "loss": 46.0, - "step": 32983 - }, - { - "epoch": 2.5218571401265364, - "grad_norm": 0.0012705454137176275, - "learning_rate": 0.0001999968634968459, - "loss": 46.0, - "step": 32984 - }, - { - "epoch": 2.521933597109926, - "grad_norm": 0.0016701509011909366, - "learning_rate": 0.00019999686330660305, - "loss": 46.0, - "step": 32985 - }, - { - "epoch": 2.522010054093316, - "grad_norm": 0.0021274802275002003, - "learning_rate": 0.0001999968631163544, - "loss": 46.0, - "step": 32986 - }, - { - "epoch": 2.5220865110767052, - "grad_norm": 0.0012934100814163685, - "learning_rate": 0.0001999968629261, - "loss": 46.0, - "step": 32987 - }, - { - "epoch": 2.522162968060095, - "grad_norm": 0.0013350382214412093, - "learning_rate": 0.00019999686273583983, - "loss": 46.0, - "step": 32988 - }, - { - "epoch": 2.5222394250434848, - "grad_norm": 0.004998221527785063, - "learning_rate": 0.00019999686254557387, - "loss": 46.0, - "step": 32989 - }, - { - "epoch": 2.5223158820268745, - "grad_norm": 0.0007904151570983231, - "learning_rate": 0.00019999686235530213, - "loss": 46.0, - "step": 32990 - }, - { - "epoch": 2.5223923390102643, - "grad_norm": 0.0021653117146342993, - "learning_rate": 0.00019999686216502465, - "loss": 46.0, - "step": 32991 - }, - { - "epoch": 2.522468795993654, - "grad_norm": 0.0019591893069446087, - "learning_rate": 0.0001999968619747414, - "loss": 46.0, - "step": 32992 - }, - { - "epoch": 2.522545252977044, - "grad_norm": 0.0014295496512204409, - "learning_rate": 0.00019999686178445237, - "loss": 46.0, - "step": 32993 - }, - { - "epoch": 2.5226217099604336, - "grad_norm": 0.003106633434072137, - "learning_rate": 0.00019999686159415757, - "loss": 46.0, - "step": 32994 - }, - { - "epoch": 2.5226981669438233, - "grad_norm": 0.0009059727308340371, - "learning_rate": 0.000199996861403857, - "loss": 46.0, - "step": 32995 - }, - { - "epoch": 2.522774623927213, - "grad_norm": 0.0010645522270351648, - "learning_rate": 0.00019999686121355064, - "loss": 46.0, - "step": 32996 - }, - { - "epoch": 2.522851080910603, - "grad_norm": 0.0027523839380592108, - "learning_rate": 0.00019999686102323855, - "loss": 46.0, - "step": 32997 - }, - { - "epoch": 2.5229275378939926, - "grad_norm": 0.002353076823055744, - "learning_rate": 0.00019999686083292065, - "loss": 46.0, - "step": 32998 - }, - { - "epoch": 2.5230039948773824, - "grad_norm": 0.0017124420264735818, - "learning_rate": 0.00019999686064259698, - "loss": 46.0, - "step": 32999 - }, - { - "epoch": 2.5230804518607717, - "grad_norm": 0.0005272567505016923, - "learning_rate": 0.0001999968604522676, - "loss": 46.0, - "step": 33000 - }, - { - "epoch": 2.5231569088441614, - "grad_norm": 0.001233465038239956, - "learning_rate": 0.0001999968602619324, - "loss": 46.0, - "step": 33001 - }, - { - "epoch": 2.523233365827551, - "grad_norm": 0.001403333037160337, - "learning_rate": 0.00019999686007159145, - "loss": 46.0, - "step": 33002 - }, - { - "epoch": 2.523309822810941, - "grad_norm": 0.0008262894698418677, - "learning_rate": 0.00019999685988124471, - "loss": 46.0, - "step": 33003 - }, - { - "epoch": 2.5233862797943307, - "grad_norm": 0.0021805153228342533, - "learning_rate": 0.00019999685969089223, - "loss": 46.0, - "step": 33004 - }, - { - "epoch": 2.5234627367777205, - "grad_norm": 0.000986619503237307, - "learning_rate": 0.00019999685950053398, - "loss": 46.0, - "step": 33005 - }, - { - "epoch": 2.5235391937611102, - "grad_norm": 0.00140160892624408, - "learning_rate": 0.00019999685931016995, - "loss": 46.0, - "step": 33006 - }, - { - "epoch": 2.5236156507445, - "grad_norm": 0.0011755131417885423, - "learning_rate": 0.00019999685911980012, - "loss": 46.0, - "step": 33007 - }, - { - "epoch": 2.5236921077278893, - "grad_norm": 0.0016934167360886931, - "learning_rate": 0.00019999685892942458, - "loss": 46.0, - "step": 33008 - }, - { - "epoch": 2.523768564711279, - "grad_norm": 0.0009053974063135684, - "learning_rate": 0.00019999685873904323, - "loss": 46.0, - "step": 33009 - }, - { - "epoch": 2.523845021694669, - "grad_norm": 0.002372286980971694, - "learning_rate": 0.0001999968585486561, - "loss": 46.0, - "step": 33010 - }, - { - "epoch": 2.5239214786780586, - "grad_norm": 0.0016073455335572362, - "learning_rate": 0.00019999685835826324, - "loss": 46.0, - "step": 33011 - }, - { - "epoch": 2.5239979356614484, - "grad_norm": 0.0011299309553578496, - "learning_rate": 0.00019999685816786457, - "loss": 46.0, - "step": 33012 - }, - { - "epoch": 2.524074392644838, - "grad_norm": 0.0006076949648559093, - "learning_rate": 0.00019999685797746018, - "loss": 46.0, - "step": 33013 - }, - { - "epoch": 2.524150849628228, - "grad_norm": 0.0010196286020800471, - "learning_rate": 0.00019999685778705, - "loss": 46.0, - "step": 33014 - }, - { - "epoch": 2.5242273066116176, - "grad_norm": 0.0008471967303194106, - "learning_rate": 0.00019999685759663403, - "loss": 46.0, - "step": 33015 - }, - { - "epoch": 2.5243037635950074, - "grad_norm": 0.0006085417116992176, - "learning_rate": 0.0001999968574062123, - "loss": 46.0, - "step": 33016 - }, - { - "epoch": 2.524380220578397, - "grad_norm": 0.0013407089281827211, - "learning_rate": 0.00019999685721578482, - "loss": 46.0, - "step": 33017 - }, - { - "epoch": 2.524456677561787, - "grad_norm": 0.0007719437126070261, - "learning_rate": 0.00019999685702535157, - "loss": 46.0, - "step": 33018 - }, - { - "epoch": 2.5245331345451767, - "grad_norm": 0.0005961884162388742, - "learning_rate": 0.0001999968568349125, - "loss": 46.0, - "step": 33019 - }, - { - "epoch": 2.5246095915285665, - "grad_norm": 0.003857127856463194, - "learning_rate": 0.0001999968566444677, - "loss": 46.0, - "step": 33020 - }, - { - "epoch": 2.524686048511956, - "grad_norm": 0.0009103150805458426, - "learning_rate": 0.00019999685645401716, - "loss": 46.0, - "step": 33021 - }, - { - "epoch": 2.5247625054953455, - "grad_norm": 0.001532554510049522, - "learning_rate": 0.0001999968562635608, - "loss": 46.0, - "step": 33022 - }, - { - "epoch": 2.5248389624787353, - "grad_norm": 0.004269981291145086, - "learning_rate": 0.0001999968560730987, - "loss": 46.0, - "step": 33023 - }, - { - "epoch": 2.524915419462125, - "grad_norm": 0.0023879276122897863, - "learning_rate": 0.00019999685588263083, - "loss": 46.0, - "step": 33024 - }, - { - "epoch": 2.524991876445515, - "grad_norm": 0.0017792272847145796, - "learning_rate": 0.00019999685569215716, - "loss": 46.0, - "step": 33025 - }, - { - "epoch": 2.5250683334289046, - "grad_norm": 0.0006454586982727051, - "learning_rate": 0.00019999685550167775, - "loss": 46.0, - "step": 33026 - }, - { - "epoch": 2.5251447904122943, - "grad_norm": 0.0007813402335159481, - "learning_rate": 0.00019999685531119256, - "loss": 46.0, - "step": 33027 - }, - { - "epoch": 2.525221247395684, - "grad_norm": 0.0016382804606109858, - "learning_rate": 0.0001999968551207016, - "loss": 46.0, - "step": 33028 - }, - { - "epoch": 2.525297704379074, - "grad_norm": 0.004850442986935377, - "learning_rate": 0.0001999968549302049, - "loss": 46.0, - "step": 33029 - }, - { - "epoch": 2.525374161362463, - "grad_norm": 0.0013013448333367705, - "learning_rate": 0.00019999685473970238, - "loss": 46.0, - "step": 33030 - }, - { - "epoch": 2.525450618345853, - "grad_norm": 0.0010715138632804155, - "learning_rate": 0.00019999685454919413, - "loss": 46.0, - "step": 33031 - }, - { - "epoch": 2.5255270753292427, - "grad_norm": 0.0010582986287772655, - "learning_rate": 0.0001999968543586801, - "loss": 46.0, - "step": 33032 - }, - { - "epoch": 2.5256035323126325, - "grad_norm": 0.0021574136335402727, - "learning_rate": 0.00019999685416816028, - "loss": 46.0, - "step": 33033 - }, - { - "epoch": 2.525679989296022, - "grad_norm": 0.0013527146074920893, - "learning_rate": 0.00019999685397763473, - "loss": 46.0, - "step": 33034 - }, - { - "epoch": 2.525756446279412, - "grad_norm": 0.0011443831026554108, - "learning_rate": 0.00019999685378710338, - "loss": 46.0, - "step": 33035 - }, - { - "epoch": 2.5258329032628017, - "grad_norm": 0.0012167183449491858, - "learning_rate": 0.00019999685359656626, - "loss": 46.0, - "step": 33036 - }, - { - "epoch": 2.5259093602461915, - "grad_norm": 0.0017361333593726158, - "learning_rate": 0.0001999968534060234, - "loss": 46.0, - "step": 33037 - }, - { - "epoch": 2.5259858172295813, - "grad_norm": 0.0005349047714844346, - "learning_rate": 0.00019999685321547475, - "loss": 46.0, - "step": 33038 - }, - { - "epoch": 2.526062274212971, - "grad_norm": 0.0010023682843893766, - "learning_rate": 0.00019999685302492034, - "loss": 46.0, - "step": 33039 - }, - { - "epoch": 2.526138731196361, - "grad_norm": 0.001555194379761815, - "learning_rate": 0.00019999685283436013, - "loss": 46.0, - "step": 33040 - }, - { - "epoch": 2.5262151881797505, - "grad_norm": 0.0010983027750626206, - "learning_rate": 0.00019999685264379417, - "loss": 46.0, - "step": 33041 - }, - { - "epoch": 2.5262916451631403, - "grad_norm": 0.004526956006884575, - "learning_rate": 0.00019999685245322243, - "loss": 46.0, - "step": 33042 - }, - { - "epoch": 2.52636810214653, - "grad_norm": 0.0010569618316367269, - "learning_rate": 0.00019999685226264495, - "loss": 46.0, - "step": 33043 - }, - { - "epoch": 2.5264445591299194, - "grad_norm": 0.001752821379341185, - "learning_rate": 0.0001999968520720617, - "loss": 46.0, - "step": 33044 - }, - { - "epoch": 2.526521016113309, - "grad_norm": 0.0019070666749030352, - "learning_rate": 0.00019999685188147264, - "loss": 46.0, - "step": 33045 - }, - { - "epoch": 2.526597473096699, - "grad_norm": 0.004493042826652527, - "learning_rate": 0.00019999685169087784, - "loss": 46.0, - "step": 33046 - }, - { - "epoch": 2.5266739300800887, - "grad_norm": 0.0007848155801184475, - "learning_rate": 0.0001999968515002773, - "loss": 46.0, - "step": 33047 - }, - { - "epoch": 2.5267503870634784, - "grad_norm": 0.00039175813435576856, - "learning_rate": 0.00019999685130967095, - "loss": 46.0, - "step": 33048 - }, - { - "epoch": 2.526826844046868, - "grad_norm": 0.0006324544665403664, - "learning_rate": 0.0001999968511190588, - "loss": 46.0, - "step": 33049 - }, - { - "epoch": 2.526903301030258, - "grad_norm": 0.0021999473683536053, - "learning_rate": 0.00019999685092844094, - "loss": 46.0, - "step": 33050 - }, - { - "epoch": 2.5269797580136477, - "grad_norm": 0.0011834505712613463, - "learning_rate": 0.0001999968507378173, - "loss": 46.0, - "step": 33051 - }, - { - "epoch": 2.527056214997037, - "grad_norm": 0.00431178929284215, - "learning_rate": 0.00019999685054718788, - "loss": 46.0, - "step": 33052 - }, - { - "epoch": 2.527132671980427, - "grad_norm": 0.002885617082938552, - "learning_rate": 0.0001999968503565527, - "loss": 46.0, - "step": 33053 - }, - { - "epoch": 2.5272091289638166, - "grad_norm": 0.001554681919515133, - "learning_rate": 0.00019999685016591174, - "loss": 46.0, - "step": 33054 - }, - { - "epoch": 2.5272855859472063, - "grad_norm": 0.011085913516581059, - "learning_rate": 0.000199996849975265, - "loss": 46.0, - "step": 33055 - }, - { - "epoch": 2.527362042930596, - "grad_norm": 0.0007977046770974994, - "learning_rate": 0.00019999684978461252, - "loss": 46.0, - "step": 33056 - }, - { - "epoch": 2.527438499913986, - "grad_norm": 0.00047069592983461916, - "learning_rate": 0.00019999684959395424, - "loss": 46.0, - "step": 33057 - }, - { - "epoch": 2.5275149568973756, - "grad_norm": 0.0007736576371826231, - "learning_rate": 0.00019999684940329022, - "loss": 46.0, - "step": 33058 - }, - { - "epoch": 2.5275914138807654, - "grad_norm": 0.004061607643961906, - "learning_rate": 0.00019999684921262042, - "loss": 46.0, - "step": 33059 - }, - { - "epoch": 2.527667870864155, - "grad_norm": 0.0016254741931334138, - "learning_rate": 0.00019999684902194484, - "loss": 46.0, - "step": 33060 - }, - { - "epoch": 2.527744327847545, - "grad_norm": 0.0014038145309314132, - "learning_rate": 0.00019999684883126347, - "loss": 46.0, - "step": 33061 - }, - { - "epoch": 2.5278207848309346, - "grad_norm": 0.002375418320298195, - "learning_rate": 0.00019999684864057638, - "loss": 46.0, - "step": 33062 - }, - { - "epoch": 2.5278972418143244, - "grad_norm": 0.0010355737758800387, - "learning_rate": 0.0001999968484498835, - "loss": 46.0, - "step": 33063 - }, - { - "epoch": 2.527973698797714, - "grad_norm": 0.0009131869301199913, - "learning_rate": 0.00019999684825918485, - "loss": 46.0, - "step": 33064 - }, - { - "epoch": 2.528050155781104, - "grad_norm": 0.0016305450117215514, - "learning_rate": 0.00019999684806848046, - "loss": 46.0, - "step": 33065 - }, - { - "epoch": 2.5281266127644932, - "grad_norm": 0.0015315579948946834, - "learning_rate": 0.00019999684787777025, - "loss": 46.0, - "step": 33066 - }, - { - "epoch": 2.528203069747883, - "grad_norm": 0.0011016825446859002, - "learning_rate": 0.00019999684768705429, - "loss": 46.0, - "step": 33067 - }, - { - "epoch": 2.5282795267312728, - "grad_norm": 0.0016450200928375125, - "learning_rate": 0.00019999684749633255, - "loss": 46.0, - "step": 33068 - }, - { - "epoch": 2.5283559837146625, - "grad_norm": 0.001494886470027268, - "learning_rate": 0.00019999684730560507, - "loss": 46.0, - "step": 33069 - }, - { - "epoch": 2.5284324406980523, - "grad_norm": 0.0009504807530902326, - "learning_rate": 0.0001999968471148718, - "loss": 46.0, - "step": 33070 - }, - { - "epoch": 2.528508897681442, - "grad_norm": 0.0010892717400565743, - "learning_rate": 0.00019999684692413277, - "loss": 46.0, - "step": 33071 - }, - { - "epoch": 2.528585354664832, - "grad_norm": 0.0015142413321882486, - "learning_rate": 0.00019999684673338797, - "loss": 46.0, - "step": 33072 - }, - { - "epoch": 2.5286618116482216, - "grad_norm": 0.0008207831415347755, - "learning_rate": 0.0001999968465426374, - "loss": 46.0, - "step": 33073 - }, - { - "epoch": 2.528738268631611, - "grad_norm": 0.0041599883697927, - "learning_rate": 0.00019999684635188105, - "loss": 46.0, - "step": 33074 - }, - { - "epoch": 2.5288147256150006, - "grad_norm": 0.0008629719377495348, - "learning_rate": 0.00019999684616111893, - "loss": 46.0, - "step": 33075 - }, - { - "epoch": 2.5288911825983904, - "grad_norm": 0.0015698411734774709, - "learning_rate": 0.00019999684597035104, - "loss": 46.0, - "step": 33076 - }, - { - "epoch": 2.52896763958178, - "grad_norm": 0.0048864311538636684, - "learning_rate": 0.0001999968457795774, - "loss": 46.0, - "step": 33077 - }, - { - "epoch": 2.52904409656517, - "grad_norm": 0.0007527322159148753, - "learning_rate": 0.00019999684558879798, - "loss": 46.0, - "step": 33078 - }, - { - "epoch": 2.5291205535485597, - "grad_norm": 0.0012936079874634743, - "learning_rate": 0.0001999968453980128, - "loss": 46.0, - "step": 33079 - }, - { - "epoch": 2.5291970105319495, - "grad_norm": 0.003917722962796688, - "learning_rate": 0.00019999684520722184, - "loss": 46.0, - "step": 33080 - }, - { - "epoch": 2.529273467515339, - "grad_norm": 0.00046918168663978577, - "learning_rate": 0.00019999684501642513, - "loss": 46.0, - "step": 33081 - }, - { - "epoch": 2.529349924498729, - "grad_norm": 0.0012198842596262693, - "learning_rate": 0.00019999684482562263, - "loss": 46.0, - "step": 33082 - }, - { - "epoch": 2.5294263814821187, - "grad_norm": 0.0036734065506607294, - "learning_rate": 0.00019999684463481435, - "loss": 46.0, - "step": 33083 - }, - { - "epoch": 2.5295028384655085, - "grad_norm": 0.0011560390703380108, - "learning_rate": 0.00019999684444400032, - "loss": 46.0, - "step": 33084 - }, - { - "epoch": 2.5295792954488983, - "grad_norm": 0.0005636009736917913, - "learning_rate": 0.0001999968442531805, - "loss": 46.0, - "step": 33085 - }, - { - "epoch": 2.529655752432288, - "grad_norm": 0.0014248858205974102, - "learning_rate": 0.00019999684406235492, - "loss": 46.0, - "step": 33086 - }, - { - "epoch": 2.529732209415678, - "grad_norm": 0.001160150277428329, - "learning_rate": 0.0001999968438715236, - "loss": 46.0, - "step": 33087 - }, - { - "epoch": 2.529808666399067, - "grad_norm": 0.006105609238147736, - "learning_rate": 0.00019999684368068648, - "loss": 46.0, - "step": 33088 - }, - { - "epoch": 2.529885123382457, - "grad_norm": 0.0021130649838596582, - "learning_rate": 0.0001999968434898436, - "loss": 46.0, - "step": 33089 - }, - { - "epoch": 2.5299615803658466, - "grad_norm": 0.002258037682622671, - "learning_rate": 0.00019999684329899495, - "loss": 46.0, - "step": 33090 - }, - { - "epoch": 2.5300380373492364, - "grad_norm": 0.0029479372315108776, - "learning_rate": 0.00019999684310814054, - "loss": 46.0, - "step": 33091 - }, - { - "epoch": 2.530114494332626, - "grad_norm": 0.0010797957656905055, - "learning_rate": 0.00019999684291728033, - "loss": 46.0, - "step": 33092 - }, - { - "epoch": 2.530190951316016, - "grad_norm": 0.0007504408131353557, - "learning_rate": 0.00019999684272641437, - "loss": 46.0, - "step": 33093 - }, - { - "epoch": 2.5302674082994057, - "grad_norm": 0.001633386593312025, - "learning_rate": 0.00019999684253554266, - "loss": 46.0, - "step": 33094 - }, - { - "epoch": 2.5303438652827954, - "grad_norm": 0.00040354696102440357, - "learning_rate": 0.00019999684234466516, - "loss": 46.0, - "step": 33095 - }, - { - "epoch": 2.5304203222661847, - "grad_norm": 0.0015485252952203155, - "learning_rate": 0.0001999968421537819, - "loss": 46.0, - "step": 33096 - }, - { - "epoch": 2.5304967792495745, - "grad_norm": 0.0013022536877542734, - "learning_rate": 0.00019999684196289285, - "loss": 46.0, - "step": 33097 - }, - { - "epoch": 2.5305732362329643, - "grad_norm": 0.0011216049315407872, - "learning_rate": 0.00019999684177199805, - "loss": 46.0, - "step": 33098 - }, - { - "epoch": 2.530649693216354, - "grad_norm": 0.0005394744803197682, - "learning_rate": 0.00019999684158109745, - "loss": 46.0, - "step": 33099 - }, - { - "epoch": 2.530726150199744, - "grad_norm": 0.0030475840903818607, - "learning_rate": 0.00019999684139019114, - "loss": 46.0, - "step": 33100 - }, - { - "epoch": 2.5308026071831335, - "grad_norm": 0.001269277185201645, - "learning_rate": 0.00019999684119927902, - "loss": 46.0, - "step": 33101 - }, - { - "epoch": 2.5308790641665233, - "grad_norm": 0.0021002446301281452, - "learning_rate": 0.00019999684100836112, - "loss": 46.0, - "step": 33102 - }, - { - "epoch": 2.530955521149913, - "grad_norm": 0.0009012917871586978, - "learning_rate": 0.00019999684081743749, - "loss": 46.0, - "step": 33103 - }, - { - "epoch": 2.531031978133303, - "grad_norm": 0.0014785972889512777, - "learning_rate": 0.00019999684062650807, - "loss": 46.0, - "step": 33104 - }, - { - "epoch": 2.5311084351166926, - "grad_norm": 0.0034333476796746254, - "learning_rate": 0.00019999684043557286, - "loss": 46.0, - "step": 33105 - }, - { - "epoch": 2.5311848921000824, - "grad_norm": 0.0017998154507949948, - "learning_rate": 0.0001999968402446319, - "loss": 46.0, - "step": 33106 - }, - { - "epoch": 2.531261349083472, - "grad_norm": 0.0005658913287334144, - "learning_rate": 0.00019999684005368517, - "loss": 46.0, - "step": 33107 - }, - { - "epoch": 2.531337806066862, - "grad_norm": 0.004063844680786133, - "learning_rate": 0.00019999683986273266, - "loss": 46.0, - "step": 33108 - }, - { - "epoch": 2.531414263050251, - "grad_norm": 0.011886722408235073, - "learning_rate": 0.0001999968396717744, - "loss": 46.0, - "step": 33109 - }, - { - "epoch": 2.531490720033641, - "grad_norm": 0.0013547228882089257, - "learning_rate": 0.0001999968394808104, - "loss": 46.0, - "step": 33110 - }, - { - "epoch": 2.5315671770170307, - "grad_norm": 0.0014104462461546063, - "learning_rate": 0.0001999968392898406, - "loss": 46.0, - "step": 33111 - }, - { - "epoch": 2.5316436340004205, - "grad_norm": 0.0008720263722352684, - "learning_rate": 0.000199996839098865, - "loss": 46.0, - "step": 33112 - }, - { - "epoch": 2.5317200909838102, - "grad_norm": 0.001358148641884327, - "learning_rate": 0.00019999683890788364, - "loss": 46.0, - "step": 33113 - }, - { - "epoch": 2.5317965479672, - "grad_norm": 0.0017387621337547898, - "learning_rate": 0.00019999683871689653, - "loss": 46.0, - "step": 33114 - }, - { - "epoch": 2.5318730049505898, - "grad_norm": 0.001016968977637589, - "learning_rate": 0.00019999683852590366, - "loss": 46.0, - "step": 33115 - }, - { - "epoch": 2.5319494619339795, - "grad_norm": 0.002298773266375065, - "learning_rate": 0.00019999683833490502, - "loss": 46.0, - "step": 33116 - }, - { - "epoch": 2.5320259189173693, - "grad_norm": 0.0013734218664467335, - "learning_rate": 0.00019999683814390058, - "loss": 46.0, - "step": 33117 - }, - { - "epoch": 2.5321023759007586, - "grad_norm": 0.002011583186686039, - "learning_rate": 0.0001999968379528904, - "loss": 46.0, - "step": 33118 - }, - { - "epoch": 2.5321788328841484, - "grad_norm": 0.001407851930707693, - "learning_rate": 0.00019999683776187444, - "loss": 46.0, - "step": 33119 - }, - { - "epoch": 2.532255289867538, - "grad_norm": 0.0011419655056670308, - "learning_rate": 0.0001999968375708527, - "loss": 46.0, - "step": 33120 - }, - { - "epoch": 2.532331746850928, - "grad_norm": 0.0007425840012729168, - "learning_rate": 0.0001999968373798252, - "loss": 46.0, - "step": 33121 - }, - { - "epoch": 2.5324082038343176, - "grad_norm": 0.008075503632426262, - "learning_rate": 0.00019999683718879195, - "loss": 46.0, - "step": 33122 - }, - { - "epoch": 2.5324846608177074, - "grad_norm": 0.0005412347964011133, - "learning_rate": 0.0001999968369977529, - "loss": 46.0, - "step": 33123 - }, - { - "epoch": 2.532561117801097, - "grad_norm": 0.0007657576934434474, - "learning_rate": 0.00019999683680670807, - "loss": 46.0, - "step": 33124 - }, - { - "epoch": 2.532637574784487, - "grad_norm": 0.001452778815291822, - "learning_rate": 0.00019999683661565753, - "loss": 46.0, - "step": 33125 - }, - { - "epoch": 2.5327140317678767, - "grad_norm": 0.0012236229376867414, - "learning_rate": 0.00019999683642460119, - "loss": 46.0, - "step": 33126 - }, - { - "epoch": 2.5327904887512664, - "grad_norm": 0.0012881732545793056, - "learning_rate": 0.00019999683623353907, - "loss": 46.0, - "step": 33127 - }, - { - "epoch": 2.532866945734656, - "grad_norm": 0.0012932823738083243, - "learning_rate": 0.0001999968360424712, - "loss": 46.0, - "step": 33128 - }, - { - "epoch": 2.532943402718046, - "grad_norm": 0.002646426670253277, - "learning_rate": 0.00019999683585139754, - "loss": 46.0, - "step": 33129 - }, - { - "epoch": 2.5330198597014357, - "grad_norm": 0.0012315618805587292, - "learning_rate": 0.00019999683566031813, - "loss": 46.0, - "step": 33130 - }, - { - "epoch": 2.533096316684825, - "grad_norm": 0.0010689053451642394, - "learning_rate": 0.00019999683546923294, - "loss": 46.0, - "step": 33131 - }, - { - "epoch": 2.533172773668215, - "grad_norm": 0.0018512161914259195, - "learning_rate": 0.00019999683527814198, - "loss": 46.0, - "step": 33132 - }, - { - "epoch": 2.5332492306516046, - "grad_norm": 0.003827217733487487, - "learning_rate": 0.00019999683508704522, - "loss": 46.0, - "step": 33133 - }, - { - "epoch": 2.5333256876349943, - "grad_norm": 0.004490390885621309, - "learning_rate": 0.00019999683489594272, - "loss": 46.0, - "step": 33134 - }, - { - "epoch": 2.533402144618384, - "grad_norm": 0.0033484799787402153, - "learning_rate": 0.00019999683470483447, - "loss": 46.0, - "step": 33135 - }, - { - "epoch": 2.533478601601774, - "grad_norm": 0.0030469768680632114, - "learning_rate": 0.00019999683451372042, - "loss": 46.0, - "step": 33136 - }, - { - "epoch": 2.5335550585851636, - "grad_norm": 0.0019159261137247086, - "learning_rate": 0.00019999683432260065, - "loss": 46.0, - "step": 33137 - }, - { - "epoch": 2.5336315155685534, - "grad_norm": 0.0017138720722869039, - "learning_rate": 0.00019999683413147502, - "loss": 46.0, - "step": 33138 - }, - { - "epoch": 2.5337079725519427, - "grad_norm": 0.00629083439707756, - "learning_rate": 0.0001999968339403437, - "loss": 46.0, - "step": 33139 - }, - { - "epoch": 2.5337844295353324, - "grad_norm": 0.0014840165385976434, - "learning_rate": 0.00019999683374920658, - "loss": 46.0, - "step": 33140 - }, - { - "epoch": 2.533860886518722, - "grad_norm": 0.001617331407032907, - "learning_rate": 0.00019999683355806372, - "loss": 46.0, - "step": 33141 - }, - { - "epoch": 2.533937343502112, - "grad_norm": 0.005992131307721138, - "learning_rate": 0.00019999683336691505, - "loss": 46.0, - "step": 33142 - }, - { - "epoch": 2.5340138004855017, - "grad_norm": 0.0008454010239802301, - "learning_rate": 0.00019999683317576064, - "loss": 46.0, - "step": 33143 - }, - { - "epoch": 2.5340902574688915, - "grad_norm": 0.002283011330291629, - "learning_rate": 0.00019999683298460043, - "loss": 46.0, - "step": 33144 - }, - { - "epoch": 2.5341667144522813, - "grad_norm": 0.002449593273922801, - "learning_rate": 0.00019999683279343447, - "loss": 46.0, - "step": 33145 - }, - { - "epoch": 2.534243171435671, - "grad_norm": 0.0007709492347203195, - "learning_rate": 0.00019999683260226274, - "loss": 46.0, - "step": 33146 - }, - { - "epoch": 2.5343196284190608, - "grad_norm": 0.0013838186860084534, - "learning_rate": 0.00019999683241108524, - "loss": 46.0, - "step": 33147 - }, - { - "epoch": 2.5343960854024505, - "grad_norm": 0.0021905521862208843, - "learning_rate": 0.000199996832219902, - "loss": 46.0, - "step": 33148 - }, - { - "epoch": 2.5344725423858403, - "grad_norm": 0.0016100776847451925, - "learning_rate": 0.00019999683202871294, - "loss": 46.0, - "step": 33149 - }, - { - "epoch": 2.53454899936923, - "grad_norm": 0.0025662125553935766, - "learning_rate": 0.00019999683183751814, - "loss": 46.0, - "step": 33150 - }, - { - "epoch": 2.53462545635262, - "grad_norm": 0.004624331835657358, - "learning_rate": 0.00019999683164631754, - "loss": 46.0, - "step": 33151 - }, - { - "epoch": 2.5347019133360096, - "grad_norm": 0.0040935128927230835, - "learning_rate": 0.00019999683145511122, - "loss": 46.0, - "step": 33152 - }, - { - "epoch": 2.534778370319399, - "grad_norm": 0.0006664943648502231, - "learning_rate": 0.00019999683126389908, - "loss": 46.0, - "step": 33153 - }, - { - "epoch": 2.5348548273027887, - "grad_norm": 0.0034209024161100388, - "learning_rate": 0.00019999683107268121, - "loss": 46.0, - "step": 33154 - }, - { - "epoch": 2.5349312842861784, - "grad_norm": 0.0006979820318520069, - "learning_rate": 0.00019999683088145755, - "loss": 46.0, - "step": 33155 - }, - { - "epoch": 2.535007741269568, - "grad_norm": 0.0009852969087660313, - "learning_rate": 0.00019999683069022814, - "loss": 46.0, - "step": 33156 - }, - { - "epoch": 2.535084198252958, - "grad_norm": 0.0010831133695319295, - "learning_rate": 0.00019999683049899293, - "loss": 46.0, - "step": 33157 - }, - { - "epoch": 2.5351606552363477, - "grad_norm": 0.001308755250647664, - "learning_rate": 0.00019999683030775197, - "loss": 46.0, - "step": 33158 - }, - { - "epoch": 2.5352371122197375, - "grad_norm": 0.0013171881437301636, - "learning_rate": 0.00019999683011650524, - "loss": 46.0, - "step": 33159 - }, - { - "epoch": 2.5353135692031272, - "grad_norm": 0.0004779967712238431, - "learning_rate": 0.00019999682992525276, - "loss": 46.0, - "step": 33160 - }, - { - "epoch": 2.5353900261865165, - "grad_norm": 0.0014062876580283046, - "learning_rate": 0.00019999682973399448, - "loss": 46.0, - "step": 33161 - }, - { - "epoch": 2.5354664831699063, - "grad_norm": 0.00204377225600183, - "learning_rate": 0.00019999682954273043, - "loss": 46.0, - "step": 33162 - }, - { - "epoch": 2.535542940153296, - "grad_norm": 0.0034058387391269207, - "learning_rate": 0.00019999682935146064, - "loss": 46.0, - "step": 33163 - }, - { - "epoch": 2.535619397136686, - "grad_norm": 0.0014958331594243646, - "learning_rate": 0.00019999682916018504, - "loss": 46.0, - "step": 33164 - }, - { - "epoch": 2.5356958541200756, - "grad_norm": 0.001771115930750966, - "learning_rate": 0.0001999968289689037, - "loss": 46.0, - "step": 33165 - }, - { - "epoch": 2.5357723111034653, - "grad_norm": 0.0006459006108343601, - "learning_rate": 0.0001999968287776166, - "loss": 46.0, - "step": 33166 - }, - { - "epoch": 2.535848768086855, - "grad_norm": 0.0008857916691340506, - "learning_rate": 0.0001999968285863237, - "loss": 46.0, - "step": 33167 - }, - { - "epoch": 2.535925225070245, - "grad_norm": 0.0011233666446059942, - "learning_rate": 0.00019999682839502505, - "loss": 46.0, - "step": 33168 - }, - { - "epoch": 2.5360016820536346, - "grad_norm": 0.0017704321071505547, - "learning_rate": 0.00019999682820372064, - "loss": 46.0, - "step": 33169 - }, - { - "epoch": 2.5360781390370244, - "grad_norm": 0.0009050744702108204, - "learning_rate": 0.00019999682801241045, - "loss": 46.0, - "step": 33170 - }, - { - "epoch": 2.536154596020414, - "grad_norm": 0.0032069936860352755, - "learning_rate": 0.00019999682782109447, - "loss": 46.0, - "step": 33171 - }, - { - "epoch": 2.536231053003804, - "grad_norm": 0.0012016250984743237, - "learning_rate": 0.00019999682762977274, - "loss": 46.0, - "step": 33172 - }, - { - "epoch": 2.5363075099871937, - "grad_norm": 0.0016288973856717348, - "learning_rate": 0.00019999682743844526, - "loss": 46.0, - "step": 33173 - }, - { - "epoch": 2.5363839669705834, - "grad_norm": 0.004902354441583157, - "learning_rate": 0.00019999682724711199, - "loss": 46.0, - "step": 33174 - }, - { - "epoch": 2.5364604239539728, - "grad_norm": 0.004460535012185574, - "learning_rate": 0.00019999682705577293, - "loss": 46.0, - "step": 33175 - }, - { - "epoch": 2.5365368809373625, - "grad_norm": 0.0013700368581339717, - "learning_rate": 0.00019999682686442814, - "loss": 46.0, - "step": 33176 - }, - { - "epoch": 2.5366133379207523, - "grad_norm": 0.0005593557725660503, - "learning_rate": 0.00019999682667307754, - "loss": 46.0, - "step": 33177 - }, - { - "epoch": 2.536689794904142, - "grad_norm": 0.0013128576101735234, - "learning_rate": 0.0001999968264817212, - "loss": 46.0, - "step": 33178 - }, - { - "epoch": 2.536766251887532, - "grad_norm": 0.0026080291718244553, - "learning_rate": 0.0001999968262903591, - "loss": 46.0, - "step": 33179 - }, - { - "epoch": 2.5368427088709216, - "grad_norm": 0.003099835244938731, - "learning_rate": 0.0001999968260989912, - "loss": 46.0, - "step": 33180 - }, - { - "epoch": 2.5369191658543113, - "grad_norm": 0.001067670644260943, - "learning_rate": 0.00019999682590761755, - "loss": 46.0, - "step": 33181 - }, - { - "epoch": 2.536995622837701, - "grad_norm": 0.0036657836753875017, - "learning_rate": 0.00019999682571623812, - "loss": 46.0, - "step": 33182 - }, - { - "epoch": 2.5370720798210904, - "grad_norm": 0.0008162794983945787, - "learning_rate": 0.00019999682552485293, - "loss": 46.0, - "step": 33183 - }, - { - "epoch": 2.53714853680448, - "grad_norm": 0.0023726962972432375, - "learning_rate": 0.00019999682533346195, - "loss": 46.0, - "step": 33184 - }, - { - "epoch": 2.53722499378787, - "grad_norm": 0.004566030576825142, - "learning_rate": 0.00019999682514206524, - "loss": 46.0, - "step": 33185 - }, - { - "epoch": 2.5373014507712597, - "grad_norm": 0.001993596088141203, - "learning_rate": 0.00019999682495066274, - "loss": 46.0, - "step": 33186 - }, - { - "epoch": 2.5373779077546494, - "grad_norm": 0.0012082206085324287, - "learning_rate": 0.00019999682475925446, - "loss": 46.0, - "step": 33187 - }, - { - "epoch": 2.537454364738039, - "grad_norm": 0.0015303262043744326, - "learning_rate": 0.00019999682456784044, - "loss": 46.0, - "step": 33188 - }, - { - "epoch": 2.537530821721429, - "grad_norm": 0.0009081342723220587, - "learning_rate": 0.00019999682437642062, - "loss": 46.0, - "step": 33189 - }, - { - "epoch": 2.5376072787048187, - "grad_norm": 0.0024489767383784056, - "learning_rate": 0.00019999682418499505, - "loss": 46.0, - "step": 33190 - }, - { - "epoch": 2.5376837356882085, - "grad_norm": 0.0011608997592702508, - "learning_rate": 0.0001999968239935637, - "loss": 46.0, - "step": 33191 - }, - { - "epoch": 2.5377601926715982, - "grad_norm": 0.00101863918825984, - "learning_rate": 0.00019999682380212659, - "loss": 46.0, - "step": 33192 - }, - { - "epoch": 2.537836649654988, - "grad_norm": 0.0020845935214310884, - "learning_rate": 0.00019999682361068367, - "loss": 46.0, - "step": 33193 - }, - { - "epoch": 2.5379131066383778, - "grad_norm": 0.0013141219969838858, - "learning_rate": 0.00019999682341923506, - "loss": 46.0, - "step": 33194 - }, - { - "epoch": 2.5379895636217675, - "grad_norm": 0.0008195218397304416, - "learning_rate": 0.00019999682322778062, - "loss": 46.0, - "step": 33195 - }, - { - "epoch": 2.5380660206051573, - "grad_norm": 0.0008450259338133037, - "learning_rate": 0.0001999968230363204, - "loss": 46.0, - "step": 33196 - }, - { - "epoch": 2.5381424775885466, - "grad_norm": 0.0016064007068052888, - "learning_rate": 0.00019999682284485446, - "loss": 46.0, - "step": 33197 - }, - { - "epoch": 2.5382189345719364, - "grad_norm": 0.0030375681817531586, - "learning_rate": 0.00019999682265338273, - "loss": 46.0, - "step": 33198 - }, - { - "epoch": 2.538295391555326, - "grad_norm": 0.00444938987493515, - "learning_rate": 0.00019999682246190522, - "loss": 46.0, - "step": 33199 - }, - { - "epoch": 2.538371848538716, - "grad_norm": 0.0016077859327197075, - "learning_rate": 0.00019999682227042195, - "loss": 46.0, - "step": 33200 - }, - { - "epoch": 2.5384483055221057, - "grad_norm": 0.0022703332360833883, - "learning_rate": 0.00019999682207893292, - "loss": 46.0, - "step": 33201 - }, - { - "epoch": 2.5385247625054954, - "grad_norm": 0.006222869269549847, - "learning_rate": 0.0001999968218874381, - "loss": 46.0, - "step": 33202 - }, - { - "epoch": 2.538601219488885, - "grad_norm": 0.001716992468573153, - "learning_rate": 0.00019999682169593753, - "loss": 46.0, - "step": 33203 - }, - { - "epoch": 2.538677676472275, - "grad_norm": 0.004201965872198343, - "learning_rate": 0.0001999968215044312, - "loss": 46.0, - "step": 33204 - }, - { - "epoch": 2.5387541334556643, - "grad_norm": 0.003738280152902007, - "learning_rate": 0.00019999682131291907, - "loss": 46.0, - "step": 33205 - }, - { - "epoch": 2.538830590439054, - "grad_norm": 0.001802522805519402, - "learning_rate": 0.00019999682112140118, - "loss": 46.0, - "step": 33206 - }, - { - "epoch": 2.5389070474224438, - "grad_norm": 0.0011579536367207766, - "learning_rate": 0.00019999682092987752, - "loss": 46.0, - "step": 33207 - }, - { - "epoch": 2.5389835044058335, - "grad_norm": 0.002159234369173646, - "learning_rate": 0.00019999682073834808, - "loss": 46.0, - "step": 33208 - }, - { - "epoch": 2.5390599613892233, - "grad_norm": 0.0009778555249795318, - "learning_rate": 0.0001999968205468129, - "loss": 46.0, - "step": 33209 - }, - { - "epoch": 2.539136418372613, - "grad_norm": 0.0015894186217337847, - "learning_rate": 0.00019999682035527191, - "loss": 46.0, - "step": 33210 - }, - { - "epoch": 2.539212875356003, - "grad_norm": 0.002017409075051546, - "learning_rate": 0.00019999682016372518, - "loss": 46.0, - "step": 33211 - }, - { - "epoch": 2.5392893323393926, - "grad_norm": 0.0012309554731473327, - "learning_rate": 0.0001999968199721727, - "loss": 46.0, - "step": 33212 - }, - { - "epoch": 2.5393657893227823, - "grad_norm": 0.0009768244344741106, - "learning_rate": 0.00019999681978061443, - "loss": 46.0, - "step": 33213 - }, - { - "epoch": 2.539442246306172, - "grad_norm": 0.001832338748499751, - "learning_rate": 0.00019999681958905038, - "loss": 46.0, - "step": 33214 - }, - { - "epoch": 2.539518703289562, - "grad_norm": 0.0016539524076506495, - "learning_rate": 0.00019999681939748056, - "loss": 46.0, - "step": 33215 - }, - { - "epoch": 2.5395951602729516, - "grad_norm": 0.005241594277322292, - "learning_rate": 0.00019999681920590502, - "loss": 46.0, - "step": 33216 - }, - { - "epoch": 2.5396716172563414, - "grad_norm": 0.0011519135441631079, - "learning_rate": 0.00019999681901432365, - "loss": 46.0, - "step": 33217 - }, - { - "epoch": 2.539748074239731, - "grad_norm": 0.00048249386600218713, - "learning_rate": 0.00019999681882273653, - "loss": 46.0, - "step": 33218 - }, - { - "epoch": 2.5398245312231205, - "grad_norm": 0.0012649719137698412, - "learning_rate": 0.00019999681863114367, - "loss": 46.0, - "step": 33219 - }, - { - "epoch": 2.5399009882065102, - "grad_norm": 0.0008692637202329934, - "learning_rate": 0.00019999681843954498, - "loss": 46.0, - "step": 33220 - }, - { - "epoch": 2.5399774451899, - "grad_norm": 0.0014745030784979463, - "learning_rate": 0.00019999681824794057, - "loss": 46.0, - "step": 33221 - }, - { - "epoch": 2.5400539021732897, - "grad_norm": 0.0013729128986597061, - "learning_rate": 0.00019999681805633036, - "loss": 46.0, - "step": 33222 - }, - { - "epoch": 2.5401303591566795, - "grad_norm": 0.0007544734980911016, - "learning_rate": 0.0001999968178647144, - "loss": 46.0, - "step": 33223 - }, - { - "epoch": 2.5402068161400693, - "grad_norm": 0.0025739565026015043, - "learning_rate": 0.00019999681767309267, - "loss": 46.0, - "step": 33224 - }, - { - "epoch": 2.540283273123459, - "grad_norm": 0.000791975820902735, - "learning_rate": 0.00019999681748146517, - "loss": 46.0, - "step": 33225 - }, - { - "epoch": 2.540359730106849, - "grad_norm": 0.00694398395717144, - "learning_rate": 0.0001999968172898319, - "loss": 46.0, - "step": 33226 - }, - { - "epoch": 2.540436187090238, - "grad_norm": 0.0014294743305072188, - "learning_rate": 0.00019999681709819287, - "loss": 46.0, - "step": 33227 - }, - { - "epoch": 2.540512644073628, - "grad_norm": 0.0010745887411758304, - "learning_rate": 0.00019999681690654802, - "loss": 46.0, - "step": 33228 - }, - { - "epoch": 2.5405891010570176, - "grad_norm": 0.009002290666103363, - "learning_rate": 0.00019999681671489748, - "loss": 46.0, - "step": 33229 - }, - { - "epoch": 2.5406655580404074, - "grad_norm": 0.0010493833106011152, - "learning_rate": 0.0001999968165232411, - "loss": 46.0, - "step": 33230 - }, - { - "epoch": 2.540742015023797, - "grad_norm": 0.0008426719577983022, - "learning_rate": 0.000199996816331579, - "loss": 46.0, - "step": 33231 - }, - { - "epoch": 2.540818472007187, - "grad_norm": 0.0007171841571107507, - "learning_rate": 0.0001999968161399111, - "loss": 46.0, - "step": 33232 - }, - { - "epoch": 2.5408949289905767, - "grad_norm": 0.002814331790432334, - "learning_rate": 0.00019999681594823744, - "loss": 46.0, - "step": 33233 - }, - { - "epoch": 2.5409713859739664, - "grad_norm": 0.00034515763400122523, - "learning_rate": 0.000199996815756558, - "loss": 46.0, - "step": 33234 - }, - { - "epoch": 2.541047842957356, - "grad_norm": 0.0007364174234680831, - "learning_rate": 0.00019999681556487283, - "loss": 46.0, - "step": 33235 - }, - { - "epoch": 2.541124299940746, - "grad_norm": 0.0016923134680837393, - "learning_rate": 0.00019999681537318187, - "loss": 46.0, - "step": 33236 - }, - { - "epoch": 2.5412007569241357, - "grad_norm": 0.001314935740083456, - "learning_rate": 0.00019999681518148514, - "loss": 46.0, - "step": 33237 - }, - { - "epoch": 2.5412772139075255, - "grad_norm": 0.0013194195926189423, - "learning_rate": 0.0001999968149897826, - "loss": 46.0, - "step": 33238 - }, - { - "epoch": 2.5413536708909152, - "grad_norm": 0.0010526138357818127, - "learning_rate": 0.00019999681479807436, - "loss": 46.0, - "step": 33239 - }, - { - "epoch": 2.5414301278743046, - "grad_norm": 0.0029739351011812687, - "learning_rate": 0.0001999968146063603, - "loss": 46.0, - "step": 33240 - }, - { - "epoch": 2.5415065848576943, - "grad_norm": 0.001488157780840993, - "learning_rate": 0.00019999681441464052, - "loss": 46.0, - "step": 33241 - }, - { - "epoch": 2.541583041841084, - "grad_norm": 0.005616537760943174, - "learning_rate": 0.00019999681422291495, - "loss": 46.0, - "step": 33242 - }, - { - "epoch": 2.541659498824474, - "grad_norm": 0.0008851112797856331, - "learning_rate": 0.00019999681403118358, - "loss": 46.0, - "step": 33243 - }, - { - "epoch": 2.5417359558078636, - "grad_norm": 0.0014336224412545562, - "learning_rate": 0.00019999681383944646, - "loss": 46.0, - "step": 33244 - }, - { - "epoch": 2.5418124127912534, - "grad_norm": 0.0009643150260671973, - "learning_rate": 0.00019999681364770358, - "loss": 46.0, - "step": 33245 - }, - { - "epoch": 2.541888869774643, - "grad_norm": 0.0013688254402950406, - "learning_rate": 0.0001999968134559549, - "loss": 46.0, - "step": 33246 - }, - { - "epoch": 2.541965326758033, - "grad_norm": 0.00523220282047987, - "learning_rate": 0.0001999968132642005, - "loss": 46.0, - "step": 33247 - }, - { - "epoch": 2.5420417837414226, - "grad_norm": 0.0007325896294787526, - "learning_rate": 0.0001999968130724403, - "loss": 46.0, - "step": 33248 - }, - { - "epoch": 2.542118240724812, - "grad_norm": 0.0009506585774943233, - "learning_rate": 0.00019999681288067434, - "loss": 46.0, - "step": 33249 - }, - { - "epoch": 2.5421946977082017, - "grad_norm": 0.0008434379706159234, - "learning_rate": 0.0001999968126889026, - "loss": 46.0, - "step": 33250 - }, - { - "epoch": 2.5422711546915915, - "grad_norm": 0.0008841908420436084, - "learning_rate": 0.0001999968124971251, - "loss": 46.0, - "step": 33251 - }, - { - "epoch": 2.5423476116749812, - "grad_norm": 0.0014496170915663242, - "learning_rate": 0.0001999968123053418, - "loss": 46.0, - "step": 33252 - }, - { - "epoch": 2.542424068658371, - "grad_norm": 0.0011465571587905288, - "learning_rate": 0.00019999681211355278, - "loss": 46.0, - "step": 33253 - }, - { - "epoch": 2.5425005256417608, - "grad_norm": 0.00113820587284863, - "learning_rate": 0.00019999681192175796, - "loss": 46.0, - "step": 33254 - }, - { - "epoch": 2.5425769826251505, - "grad_norm": 0.000366052845492959, - "learning_rate": 0.0001999968117299574, - "loss": 46.0, - "step": 33255 - }, - { - "epoch": 2.5426534396085403, - "grad_norm": 0.0012464741012081504, - "learning_rate": 0.00019999681153815103, - "loss": 46.0, - "step": 33256 - }, - { - "epoch": 2.54272989659193, - "grad_norm": 0.0011187423951923847, - "learning_rate": 0.0001999968113463389, - "loss": 46.0, - "step": 33257 - }, - { - "epoch": 2.54280635357532, - "grad_norm": 0.0029635403770953417, - "learning_rate": 0.00019999681115452102, - "loss": 46.0, - "step": 33258 - }, - { - "epoch": 2.5428828105587096, - "grad_norm": 0.0016510817222297192, - "learning_rate": 0.0001999968109626974, - "loss": 46.0, - "step": 33259 - }, - { - "epoch": 2.5429592675420993, - "grad_norm": 0.0010473427828401327, - "learning_rate": 0.00019999681077086792, - "loss": 46.0, - "step": 33260 - }, - { - "epoch": 2.543035724525489, - "grad_norm": 0.0004295198596082628, - "learning_rate": 0.00019999681057903274, - "loss": 46.0, - "step": 33261 - }, - { - "epoch": 2.5431121815088784, - "grad_norm": 0.0024259027559310198, - "learning_rate": 0.0001999968103871918, - "loss": 46.0, - "step": 33262 - }, - { - "epoch": 2.543188638492268, - "grad_norm": 0.0007955812616273761, - "learning_rate": 0.00019999681019534503, - "loss": 46.0, - "step": 33263 - }, - { - "epoch": 2.543265095475658, - "grad_norm": 0.0005930929910391569, - "learning_rate": 0.00019999681000349253, - "loss": 46.0, - "step": 33264 - }, - { - "epoch": 2.5433415524590477, - "grad_norm": 0.0008387153502553701, - "learning_rate": 0.00019999680981163426, - "loss": 46.0, - "step": 33265 - }, - { - "epoch": 2.5434180094424375, - "grad_norm": 0.0006535985157825053, - "learning_rate": 0.00019999680961977023, - "loss": 46.0, - "step": 33266 - }, - { - "epoch": 2.543494466425827, - "grad_norm": 0.0007571714813821018, - "learning_rate": 0.0001999968094279004, - "loss": 46.0, - "step": 33267 - }, - { - "epoch": 2.543570923409217, - "grad_norm": 0.000826736562885344, - "learning_rate": 0.00019999680923602484, - "loss": 46.0, - "step": 33268 - }, - { - "epoch": 2.5436473803926067, - "grad_norm": 0.010101170279085636, - "learning_rate": 0.00019999680904414347, - "loss": 46.0, - "step": 33269 - }, - { - "epoch": 2.543723837375996, - "grad_norm": 0.0013747261837124825, - "learning_rate": 0.00019999680885225636, - "loss": 46.0, - "step": 33270 - }, - { - "epoch": 2.543800294359386, - "grad_norm": 0.0010341558372601867, - "learning_rate": 0.00019999680866036347, - "loss": 46.0, - "step": 33271 - }, - { - "epoch": 2.5438767513427756, - "grad_norm": 0.0012722710380330682, - "learning_rate": 0.00019999680846846484, - "loss": 46.0, - "step": 33272 - }, - { - "epoch": 2.5439532083261653, - "grad_norm": 0.00043182953959330916, - "learning_rate": 0.00019999680827656037, - "loss": 46.0, - "step": 33273 - }, - { - "epoch": 2.544029665309555, - "grad_norm": 0.0030942584853619337, - "learning_rate": 0.0001999968080846502, - "loss": 46.0, - "step": 33274 - }, - { - "epoch": 2.544106122292945, - "grad_norm": 0.002040538005530834, - "learning_rate": 0.00019999680789273424, - "loss": 46.0, - "step": 33275 - }, - { - "epoch": 2.5441825792763346, - "grad_norm": 0.001795622636564076, - "learning_rate": 0.00019999680770081248, - "loss": 46.0, - "step": 33276 - }, - { - "epoch": 2.5442590362597244, - "grad_norm": 0.0005658180452883244, - "learning_rate": 0.00019999680750888498, - "loss": 46.0, - "step": 33277 - }, - { - "epoch": 2.544335493243114, - "grad_norm": 0.00115725037176162, - "learning_rate": 0.0001999968073169517, - "loss": 46.0, - "step": 33278 - }, - { - "epoch": 2.544411950226504, - "grad_norm": 0.0005812536110170186, - "learning_rate": 0.00019999680712501269, - "loss": 46.0, - "step": 33279 - }, - { - "epoch": 2.5444884072098937, - "grad_norm": 0.002618224360048771, - "learning_rate": 0.00019999680693306786, - "loss": 46.0, - "step": 33280 - }, - { - "epoch": 2.5445648641932834, - "grad_norm": 0.0012272645253688097, - "learning_rate": 0.00019999680674111727, - "loss": 46.0, - "step": 33281 - }, - { - "epoch": 2.544641321176673, - "grad_norm": 0.0010032330173999071, - "learning_rate": 0.0001999968065491609, - "loss": 46.0, - "step": 33282 - }, - { - "epoch": 2.544717778160063, - "grad_norm": 0.0012789410538971424, - "learning_rate": 0.00019999680635719881, - "loss": 46.0, - "step": 33283 - }, - { - "epoch": 2.5447942351434523, - "grad_norm": 0.0009242161177098751, - "learning_rate": 0.0001999968061652309, - "loss": 46.0, - "step": 33284 - }, - { - "epoch": 2.544870692126842, - "grad_norm": 0.0009049131767824292, - "learning_rate": 0.00019999680597325724, - "loss": 46.0, - "step": 33285 - }, - { - "epoch": 2.544947149110232, - "grad_norm": 0.0017617036355659366, - "learning_rate": 0.00019999680578127783, - "loss": 46.0, - "step": 33286 - }, - { - "epoch": 2.5450236060936215, - "grad_norm": 0.0009699770016595721, - "learning_rate": 0.00019999680558929262, - "loss": 46.0, - "step": 33287 - }, - { - "epoch": 2.5451000630770113, - "grad_norm": 0.0015616221353411674, - "learning_rate": 0.00019999680539730167, - "loss": 46.0, - "step": 33288 - }, - { - "epoch": 2.545176520060401, - "grad_norm": 0.0021542140748351812, - "learning_rate": 0.00019999680520530494, - "loss": 46.0, - "step": 33289 - }, - { - "epoch": 2.545252977043791, - "grad_norm": 0.000639402074739337, - "learning_rate": 0.0001999968050133024, - "loss": 46.0, - "step": 33290 - }, - { - "epoch": 2.5453294340271806, - "grad_norm": 0.0028817288111895323, - "learning_rate": 0.00019999680482129414, - "loss": 46.0, - "step": 33291 - }, - { - "epoch": 2.54540589101057, - "grad_norm": 0.002486302051693201, - "learning_rate": 0.00019999680462928012, - "loss": 46.0, - "step": 33292 - }, - { - "epoch": 2.5454823479939597, - "grad_norm": 0.0012124533532187343, - "learning_rate": 0.0001999968044372603, - "loss": 46.0, - "step": 33293 - }, - { - "epoch": 2.5455588049773494, - "grad_norm": 0.0017838348867371678, - "learning_rate": 0.0001999968042452347, - "loss": 46.0, - "step": 33294 - }, - { - "epoch": 2.545635261960739, - "grad_norm": 0.001625219825655222, - "learning_rate": 0.00019999680405320336, - "loss": 46.0, - "step": 33295 - }, - { - "epoch": 2.545711718944129, - "grad_norm": 0.0009912997484207153, - "learning_rate": 0.00019999680386116624, - "loss": 46.0, - "step": 33296 - }, - { - "epoch": 2.5457881759275187, - "grad_norm": 0.0007726296898908913, - "learning_rate": 0.00019999680366912336, - "loss": 46.0, - "step": 33297 - }, - { - "epoch": 2.5458646329109085, - "grad_norm": 0.0005682088667526841, - "learning_rate": 0.00019999680347707467, - "loss": 46.0, - "step": 33298 - }, - { - "epoch": 2.5459410898942982, - "grad_norm": 0.0009149821707978845, - "learning_rate": 0.00019999680328502026, - "loss": 46.0, - "step": 33299 - }, - { - "epoch": 2.546017546877688, - "grad_norm": 0.0012556836009025574, - "learning_rate": 0.00019999680309296008, - "loss": 46.0, - "step": 33300 - }, - { - "epoch": 2.5460940038610778, - "grad_norm": 0.0017113303765654564, - "learning_rate": 0.0001999968029008941, - "loss": 46.0, - "step": 33301 - }, - { - "epoch": 2.5461704608444675, - "grad_norm": 0.0011956775560975075, - "learning_rate": 0.00019999680270882237, - "loss": 46.0, - "step": 33302 - }, - { - "epoch": 2.5462469178278573, - "grad_norm": 0.005448531825095415, - "learning_rate": 0.00019999680251674487, - "loss": 46.0, - "step": 33303 - }, - { - "epoch": 2.546323374811247, - "grad_norm": 0.001171428244560957, - "learning_rate": 0.00019999680232466157, - "loss": 46.0, - "step": 33304 - }, - { - "epoch": 2.546399831794637, - "grad_norm": 0.0004494576423894614, - "learning_rate": 0.00019999680213257252, - "loss": 46.0, - "step": 33305 - }, - { - "epoch": 2.546476288778026, - "grad_norm": 0.0011833661701530218, - "learning_rate": 0.00019999680194047773, - "loss": 46.0, - "step": 33306 - }, - { - "epoch": 2.546552745761416, - "grad_norm": 0.0019324348540976644, - "learning_rate": 0.00019999680174837713, - "loss": 46.0, - "step": 33307 - }, - { - "epoch": 2.5466292027448056, - "grad_norm": 0.002247591968625784, - "learning_rate": 0.0001999968015562708, - "loss": 46.0, - "step": 33308 - }, - { - "epoch": 2.5467056597281954, - "grad_norm": 0.0015211805002763867, - "learning_rate": 0.00019999680136415865, - "loss": 46.0, - "step": 33309 - }, - { - "epoch": 2.546782116711585, - "grad_norm": 0.0016391740646213293, - "learning_rate": 0.0001999968011720408, - "loss": 46.0, - "step": 33310 - }, - { - "epoch": 2.546858573694975, - "grad_norm": 0.0014959746040403843, - "learning_rate": 0.00019999680097991713, - "loss": 46.0, - "step": 33311 - }, - { - "epoch": 2.5469350306783647, - "grad_norm": 0.0007031409186311066, - "learning_rate": 0.0001999968007877877, - "loss": 46.0, - "step": 33312 - }, - { - "epoch": 2.5470114876617544, - "grad_norm": 0.0019786416087299585, - "learning_rate": 0.0001999968005956525, - "loss": 46.0, - "step": 33313 - }, - { - "epoch": 2.5470879446451438, - "grad_norm": 0.0008429271983914077, - "learning_rate": 0.0001999968004035115, - "loss": 46.0, - "step": 33314 - }, - { - "epoch": 2.5471644016285335, - "grad_norm": 0.0015400771517306566, - "learning_rate": 0.0001999968002113648, - "loss": 46.0, - "step": 33315 - }, - { - "epoch": 2.5472408586119233, - "grad_norm": 0.0009245475521311164, - "learning_rate": 0.00019999680001921228, - "loss": 46.0, - "step": 33316 - }, - { - "epoch": 2.547317315595313, - "grad_norm": 0.0016756549011915922, - "learning_rate": 0.00019999679982705398, - "loss": 46.0, - "step": 33317 - }, - { - "epoch": 2.547393772578703, - "grad_norm": 0.0011326465755701065, - "learning_rate": 0.00019999679963489, - "loss": 46.0, - "step": 33318 - }, - { - "epoch": 2.5474702295620926, - "grad_norm": 0.0005454855272546411, - "learning_rate": 0.00019999679944272014, - "loss": 46.0, - "step": 33319 - }, - { - "epoch": 2.5475466865454823, - "grad_norm": 0.0008574057137593627, - "learning_rate": 0.00019999679925054457, - "loss": 46.0, - "step": 33320 - }, - { - "epoch": 2.547623143528872, - "grad_norm": 0.0014961649430915713, - "learning_rate": 0.0001999967990583632, - "loss": 46.0, - "step": 33321 - }, - { - "epoch": 2.547699600512262, - "grad_norm": 0.0017506371950730681, - "learning_rate": 0.0001999967988661761, - "loss": 46.0, - "step": 33322 - }, - { - "epoch": 2.5477760574956516, - "grad_norm": 0.0020219087600708008, - "learning_rate": 0.0001999967986739832, - "loss": 46.0, - "step": 33323 - }, - { - "epoch": 2.5478525144790414, - "grad_norm": 0.004890563897788525, - "learning_rate": 0.00019999679848178454, - "loss": 46.0, - "step": 33324 - }, - { - "epoch": 2.547928971462431, - "grad_norm": 0.0014269209932535887, - "learning_rate": 0.0001999967982895801, - "loss": 46.0, - "step": 33325 - }, - { - "epoch": 2.548005428445821, - "grad_norm": 0.0008529675542376935, - "learning_rate": 0.0001999967980973699, - "loss": 46.0, - "step": 33326 - }, - { - "epoch": 2.5480818854292107, - "grad_norm": 0.001352623919956386, - "learning_rate": 0.00019999679790515392, - "loss": 46.0, - "step": 33327 - }, - { - "epoch": 2.5481583424126, - "grad_norm": 0.0012432413641363382, - "learning_rate": 0.0001999967977129322, - "loss": 46.0, - "step": 33328 - }, - { - "epoch": 2.5482347993959897, - "grad_norm": 0.0006741942488588393, - "learning_rate": 0.0001999967975207047, - "loss": 46.0, - "step": 33329 - }, - { - "epoch": 2.5483112563793795, - "grad_norm": 0.0006733816699124873, - "learning_rate": 0.0001999967973284714, - "loss": 46.0, - "step": 33330 - }, - { - "epoch": 2.5483877133627693, - "grad_norm": 0.0005597376730293036, - "learning_rate": 0.00019999679713623238, - "loss": 46.0, - "step": 33331 - }, - { - "epoch": 2.548464170346159, - "grad_norm": 0.0027153403498232365, - "learning_rate": 0.00019999679694398756, - "loss": 46.0, - "step": 33332 - }, - { - "epoch": 2.548540627329549, - "grad_norm": 0.0020021172240376472, - "learning_rate": 0.00019999679675173696, - "loss": 46.0, - "step": 33333 - }, - { - "epoch": 2.5486170843129385, - "grad_norm": 0.0025056200101971626, - "learning_rate": 0.00019999679655948062, - "loss": 46.0, - "step": 33334 - }, - { - "epoch": 2.5486935412963283, - "grad_norm": 0.0010182602563872933, - "learning_rate": 0.0001999967963672185, - "loss": 46.0, - "step": 33335 - }, - { - "epoch": 2.5487699982797176, - "grad_norm": 0.0009912733221426606, - "learning_rate": 0.00019999679617495062, - "loss": 46.0, - "step": 33336 - }, - { - "epoch": 2.5488464552631074, - "grad_norm": 0.0009709055302664638, - "learning_rate": 0.00019999679598267696, - "loss": 46.0, - "step": 33337 - }, - { - "epoch": 2.548922912246497, - "grad_norm": 0.001121595036238432, - "learning_rate": 0.00019999679579039753, - "loss": 46.0, - "step": 33338 - }, - { - "epoch": 2.548999369229887, - "grad_norm": 0.0010202991543337703, - "learning_rate": 0.0001999967955981123, - "loss": 46.0, - "step": 33339 - }, - { - "epoch": 2.5490758262132767, - "grad_norm": 0.0019579462241381407, - "learning_rate": 0.00019999679540582137, - "loss": 46.0, - "step": 33340 - }, - { - "epoch": 2.5491522831966664, - "grad_norm": 0.0018069371581077576, - "learning_rate": 0.00019999679521352461, - "loss": 46.0, - "step": 33341 - }, - { - "epoch": 2.549228740180056, - "grad_norm": 0.00036687328247353435, - "learning_rate": 0.00019999679502122211, - "loss": 46.0, - "step": 33342 - }, - { - "epoch": 2.549305197163446, - "grad_norm": 0.0003666691482067108, - "learning_rate": 0.00019999679482891381, - "loss": 46.0, - "step": 33343 - }, - { - "epoch": 2.5493816541468357, - "grad_norm": 0.0014866949059069157, - "learning_rate": 0.0001999967946365998, - "loss": 46.0, - "step": 33344 - }, - { - "epoch": 2.5494581111302255, - "grad_norm": 0.000989401014521718, - "learning_rate": 0.00019999679444427997, - "loss": 46.0, - "step": 33345 - }, - { - "epoch": 2.5495345681136152, - "grad_norm": 0.0017552630743011832, - "learning_rate": 0.00019999679425195438, - "loss": 46.0, - "step": 33346 - }, - { - "epoch": 2.549611025097005, - "grad_norm": 0.0023222470190376043, - "learning_rate": 0.00019999679405962304, - "loss": 46.0, - "step": 33347 - }, - { - "epoch": 2.5496874820803948, - "grad_norm": 0.0011187938507646322, - "learning_rate": 0.0001999967938672859, - "loss": 46.0, - "step": 33348 - }, - { - "epoch": 2.5497639390637845, - "grad_norm": 0.0005822560051456094, - "learning_rate": 0.00019999679367494302, - "loss": 46.0, - "step": 33349 - }, - { - "epoch": 2.549840396047174, - "grad_norm": 0.0005749605479650199, - "learning_rate": 0.00019999679348259438, - "loss": 46.0, - "step": 33350 - }, - { - "epoch": 2.5499168530305636, - "grad_norm": 0.0005238081212155521, - "learning_rate": 0.00019999679329023992, - "loss": 46.0, - "step": 33351 - }, - { - "epoch": 2.5499933100139534, - "grad_norm": 0.0006781385745853186, - "learning_rate": 0.00019999679309787974, - "loss": 46.0, - "step": 33352 - }, - { - "epoch": 2.550069766997343, - "grad_norm": 0.0012105421628803015, - "learning_rate": 0.00019999679290551376, - "loss": 46.0, - "step": 33353 - }, - { - "epoch": 2.550146223980733, - "grad_norm": 0.002104924526065588, - "learning_rate": 0.000199996792713142, - "loss": 46.0, - "step": 33354 - }, - { - "epoch": 2.5502226809641226, - "grad_norm": 0.0015381406992673874, - "learning_rate": 0.0001999967925207645, - "loss": 46.0, - "step": 33355 - }, - { - "epoch": 2.5502991379475124, - "grad_norm": 0.0014545018784701824, - "learning_rate": 0.00019999679232838124, - "loss": 46.0, - "step": 33356 - }, - { - "epoch": 2.550375594930902, - "grad_norm": 0.0015850639902055264, - "learning_rate": 0.00019999679213599222, - "loss": 46.0, - "step": 33357 - }, - { - "epoch": 2.5504520519142915, - "grad_norm": 0.0022465726360678673, - "learning_rate": 0.00019999679194359737, - "loss": 46.0, - "step": 33358 - }, - { - "epoch": 2.5505285088976812, - "grad_norm": 0.0015107401413843036, - "learning_rate": 0.00019999679175119678, - "loss": 46.0, - "step": 33359 - }, - { - "epoch": 2.550604965881071, - "grad_norm": 0.0005488633178174496, - "learning_rate": 0.00019999679155879044, - "loss": 46.0, - "step": 33360 - }, - { - "epoch": 2.5506814228644608, - "grad_norm": 0.0007519600912928581, - "learning_rate": 0.00019999679136637833, - "loss": 46.0, - "step": 33361 - }, - { - "epoch": 2.5507578798478505, - "grad_norm": 0.0017860318766906857, - "learning_rate": 0.0001999967911739604, - "loss": 46.0, - "step": 33362 - }, - { - "epoch": 2.5508343368312403, - "grad_norm": 0.0011648437939584255, - "learning_rate": 0.00019999679098153675, - "loss": 46.0, - "step": 33363 - }, - { - "epoch": 2.55091079381463, - "grad_norm": 0.002433040412142873, - "learning_rate": 0.00019999679078910735, - "loss": 46.0, - "step": 33364 - }, - { - "epoch": 2.55098725079802, - "grad_norm": 0.0032220236025750637, - "learning_rate": 0.00019999679059667211, - "loss": 46.0, - "step": 33365 - }, - { - "epoch": 2.5510637077814096, - "grad_norm": 0.0004233763611409813, - "learning_rate": 0.00019999679040423116, - "loss": 46.0, - "step": 33366 - }, - { - "epoch": 2.5511401647647993, - "grad_norm": 0.0004299703286960721, - "learning_rate": 0.00019999679021178444, - "loss": 46.0, - "step": 33367 - }, - { - "epoch": 2.551216621748189, - "grad_norm": 0.001925118500366807, - "learning_rate": 0.0001999967900193319, - "loss": 46.0, - "step": 33368 - }, - { - "epoch": 2.551293078731579, - "grad_norm": 0.0017158965347334743, - "learning_rate": 0.0001999967898268736, - "loss": 46.0, - "step": 33369 - }, - { - "epoch": 2.5513695357149686, - "grad_norm": 0.00152087421156466, - "learning_rate": 0.00019999678963440956, - "loss": 46.0, - "step": 33370 - }, - { - "epoch": 2.551445992698358, - "grad_norm": 0.0013696595560759306, - "learning_rate": 0.00019999678944193977, - "loss": 46.0, - "step": 33371 - }, - { - "epoch": 2.5515224496817477, - "grad_norm": 0.001246079453267157, - "learning_rate": 0.00019999678924946418, - "loss": 46.0, - "step": 33372 - }, - { - "epoch": 2.5515989066651374, - "grad_norm": 0.0010100255021825433, - "learning_rate": 0.00019999678905698284, - "loss": 46.0, - "step": 33373 - }, - { - "epoch": 2.551675363648527, - "grad_norm": 0.0011573780793696642, - "learning_rate": 0.0001999967888644957, - "loss": 46.0, - "step": 33374 - }, - { - "epoch": 2.551751820631917, - "grad_norm": 0.0016226014122366905, - "learning_rate": 0.0001999967886720028, - "loss": 46.0, - "step": 33375 - }, - { - "epoch": 2.5518282776153067, - "grad_norm": 0.0026063635013997555, - "learning_rate": 0.00019999678847950416, - "loss": 46.0, - "step": 33376 - }, - { - "epoch": 2.5519047345986965, - "grad_norm": 0.0005384384421631694, - "learning_rate": 0.00019999678828699972, - "loss": 46.0, - "step": 33377 - }, - { - "epoch": 2.5519811915820863, - "grad_norm": 0.0006929469527676702, - "learning_rate": 0.00019999678809448952, - "loss": 46.0, - "step": 33378 - }, - { - "epoch": 2.552057648565476, - "grad_norm": 0.00575612485408783, - "learning_rate": 0.00019999678790197356, - "loss": 46.0, - "step": 33379 - }, - { - "epoch": 2.5521341055488653, - "grad_norm": 0.0017424135003238916, - "learning_rate": 0.00019999678770945184, - "loss": 46.0, - "step": 33380 - }, - { - "epoch": 2.552210562532255, - "grad_norm": 0.0011673882836475968, - "learning_rate": 0.0001999967875169243, - "loss": 46.0, - "step": 33381 - }, - { - "epoch": 2.552287019515645, - "grad_norm": 0.0013589125592261553, - "learning_rate": 0.000199996787324391, - "loss": 46.0, - "step": 33382 - }, - { - "epoch": 2.5523634764990346, - "grad_norm": 0.0007842128979973495, - "learning_rate": 0.000199996787131852, - "loss": 46.0, - "step": 33383 - }, - { - "epoch": 2.5524399334824244, - "grad_norm": 0.0025036949664354324, - "learning_rate": 0.00019999678693930718, - "loss": 46.0, - "step": 33384 - }, - { - "epoch": 2.552516390465814, - "grad_norm": 0.0018862350843846798, - "learning_rate": 0.00019999678674675658, - "loss": 46.0, - "step": 33385 - }, - { - "epoch": 2.552592847449204, - "grad_norm": 0.006162185687571764, - "learning_rate": 0.00019999678655420022, - "loss": 46.0, - "step": 33386 - }, - { - "epoch": 2.5526693044325937, - "grad_norm": 0.005736600141972303, - "learning_rate": 0.0001999967863616381, - "loss": 46.0, - "step": 33387 - }, - { - "epoch": 2.5527457614159834, - "grad_norm": 0.0017516075167804956, - "learning_rate": 0.0001999967861690702, - "loss": 46.0, - "step": 33388 - }, - { - "epoch": 2.552822218399373, - "grad_norm": 0.0022579121869057417, - "learning_rate": 0.00019999678597649653, - "loss": 46.0, - "step": 33389 - }, - { - "epoch": 2.552898675382763, - "grad_norm": 0.0016126515110954642, - "learning_rate": 0.00019999678578391713, - "loss": 46.0, - "step": 33390 - }, - { - "epoch": 2.5529751323661527, - "grad_norm": 0.001691891928203404, - "learning_rate": 0.0001999967855913319, - "loss": 46.0, - "step": 33391 - }, - { - "epoch": 2.5530515893495425, - "grad_norm": 0.0013627305161207914, - "learning_rate": 0.00019999678539874094, - "loss": 46.0, - "step": 33392 - }, - { - "epoch": 2.5531280463329318, - "grad_norm": 0.00231366278603673, - "learning_rate": 0.0001999967852061442, - "loss": 46.0, - "step": 33393 - }, - { - "epoch": 2.5532045033163215, - "grad_norm": 0.00300302030518651, - "learning_rate": 0.0001999967850135417, - "loss": 46.0, - "step": 33394 - }, - { - "epoch": 2.5532809602997113, - "grad_norm": 0.001603800104930997, - "learning_rate": 0.00019999678482093342, - "loss": 46.0, - "step": 33395 - }, - { - "epoch": 2.553357417283101, - "grad_norm": 0.0015295346966013312, - "learning_rate": 0.00019999678462831938, - "loss": 46.0, - "step": 33396 - }, - { - "epoch": 2.553433874266491, - "grad_norm": 0.001510647707618773, - "learning_rate": 0.00019999678443569953, - "loss": 46.0, - "step": 33397 - }, - { - "epoch": 2.5535103312498806, - "grad_norm": 0.0020446134731173515, - "learning_rate": 0.00019999678424307397, - "loss": 46.0, - "step": 33398 - }, - { - "epoch": 2.5535867882332703, - "grad_norm": 0.0016776991542428732, - "learning_rate": 0.0001999967840504426, - "loss": 46.0, - "step": 33399 - }, - { - "epoch": 2.55366324521666, - "grad_norm": 0.0006239119102247059, - "learning_rate": 0.0001999967838578055, - "loss": 46.0, - "step": 33400 - }, - { - "epoch": 2.5537397022000494, - "grad_norm": 0.002045438624918461, - "learning_rate": 0.0001999967836651626, - "loss": 46.0, - "step": 33401 - }, - { - "epoch": 2.553816159183439, - "grad_norm": 0.008139276877045631, - "learning_rate": 0.00019999678347251392, - "loss": 46.0, - "step": 33402 - }, - { - "epoch": 2.553892616166829, - "grad_norm": 0.002613227814435959, - "learning_rate": 0.00019999678327985949, - "loss": 46.0, - "step": 33403 - }, - { - "epoch": 2.5539690731502187, - "grad_norm": 0.001017614733427763, - "learning_rate": 0.00019999678308719928, - "loss": 46.0, - "step": 33404 - }, - { - "epoch": 2.5540455301336085, - "grad_norm": 0.005765401292592287, - "learning_rate": 0.00019999678289453333, - "loss": 46.0, - "step": 33405 - }, - { - "epoch": 2.5541219871169982, - "grad_norm": 0.0026276176795363426, - "learning_rate": 0.00019999678270186158, - "loss": 46.0, - "step": 33406 - }, - { - "epoch": 2.554198444100388, - "grad_norm": 0.0007947334670461714, - "learning_rate": 0.00019999678250918405, - "loss": 46.0, - "step": 33407 - }, - { - "epoch": 2.5542749010837777, - "grad_norm": 0.0008668103255331516, - "learning_rate": 0.00019999678231650078, - "loss": 46.0, - "step": 33408 - }, - { - "epoch": 2.5543513580671675, - "grad_norm": 0.0006482668686658144, - "learning_rate": 0.00019999678212381173, - "loss": 46.0, - "step": 33409 - }, - { - "epoch": 2.5544278150505573, - "grad_norm": 0.0008261186303570867, - "learning_rate": 0.00019999678193111692, - "loss": 46.0, - "step": 33410 - }, - { - "epoch": 2.554504272033947, - "grad_norm": 0.002974668750539422, - "learning_rate": 0.00019999678173841635, - "loss": 46.0, - "step": 33411 - }, - { - "epoch": 2.554580729017337, - "grad_norm": 0.0009730941383168101, - "learning_rate": 0.00019999678154570999, - "loss": 46.0, - "step": 33412 - }, - { - "epoch": 2.5546571860007266, - "grad_norm": 0.001791209913790226, - "learning_rate": 0.00019999678135299785, - "loss": 46.0, - "step": 33413 - }, - { - "epoch": 2.5547336429841163, - "grad_norm": 0.0011224858462810516, - "learning_rate": 0.00019999678116027996, - "loss": 46.0, - "step": 33414 - }, - { - "epoch": 2.5548100999675056, - "grad_norm": 0.0017287597293034196, - "learning_rate": 0.0001999967809675563, - "loss": 46.0, - "step": 33415 - }, - { - "epoch": 2.5548865569508954, - "grad_norm": 0.001097777159884572, - "learning_rate": 0.00019999678077482687, - "loss": 46.0, - "step": 33416 - }, - { - "epoch": 2.554963013934285, - "grad_norm": 0.001997461309656501, - "learning_rate": 0.00019999678058209167, - "loss": 46.0, - "step": 33417 - }, - { - "epoch": 2.555039470917675, - "grad_norm": 0.0033196588046848774, - "learning_rate": 0.0001999967803893507, - "loss": 46.0, - "step": 33418 - }, - { - "epoch": 2.5551159279010647, - "grad_norm": 0.003946193493902683, - "learning_rate": 0.00019999678019660394, - "loss": 46.0, - "step": 33419 - }, - { - "epoch": 2.5551923848844544, - "grad_norm": 0.0007623904966749251, - "learning_rate": 0.00019999678000385144, - "loss": 46.0, - "step": 33420 - }, - { - "epoch": 2.555268841867844, - "grad_norm": 0.002262078458443284, - "learning_rate": 0.00019999677981109314, - "loss": 46.0, - "step": 33421 - }, - { - "epoch": 2.555345298851234, - "grad_norm": 0.0016731873620301485, - "learning_rate": 0.00019999677961832912, - "loss": 46.0, - "step": 33422 - }, - { - "epoch": 2.5554217558346233, - "grad_norm": 0.0014001419767737389, - "learning_rate": 0.00019999677942555928, - "loss": 46.0, - "step": 33423 - }, - { - "epoch": 2.555498212818013, - "grad_norm": 0.0010974627221003175, - "learning_rate": 0.00019999677923278371, - "loss": 46.0, - "step": 33424 - }, - { - "epoch": 2.555574669801403, - "grad_norm": 0.0031964555382728577, - "learning_rate": 0.00019999677904000235, - "loss": 46.0, - "step": 33425 - }, - { - "epoch": 2.5556511267847926, - "grad_norm": 0.002898680279031396, - "learning_rate": 0.00019999677884721524, - "loss": 46.0, - "step": 33426 - }, - { - "epoch": 2.5557275837681823, - "grad_norm": 0.0013742714654654264, - "learning_rate": 0.00019999677865442235, - "loss": 46.0, - "step": 33427 - }, - { - "epoch": 2.555804040751572, - "grad_norm": 0.0030296859331429005, - "learning_rate": 0.00019999677846162367, - "loss": 46.0, - "step": 33428 - }, - { - "epoch": 2.555880497734962, - "grad_norm": 0.001304943929426372, - "learning_rate": 0.00019999677826881924, - "loss": 46.0, - "step": 33429 - }, - { - "epoch": 2.5559569547183516, - "grad_norm": 0.0012732669711112976, - "learning_rate": 0.00019999677807600903, - "loss": 46.0, - "step": 33430 - }, - { - "epoch": 2.5560334117017414, - "grad_norm": 0.005901511758565903, - "learning_rate": 0.00019999677788319305, - "loss": 46.0, - "step": 33431 - }, - { - "epoch": 2.556109868685131, - "grad_norm": 0.001071861363016069, - "learning_rate": 0.00019999677769037133, - "loss": 46.0, - "step": 33432 - }, - { - "epoch": 2.556186325668521, - "grad_norm": 0.014260247349739075, - "learning_rate": 0.0001999967774975438, - "loss": 46.0, - "step": 33433 - }, - { - "epoch": 2.5562627826519106, - "grad_norm": 0.0006738616502843797, - "learning_rate": 0.00019999677730471053, - "loss": 46.0, - "step": 33434 - }, - { - "epoch": 2.5563392396353004, - "grad_norm": 0.0005618222639895976, - "learning_rate": 0.0001999967771118715, - "loss": 46.0, - "step": 33435 - }, - { - "epoch": 2.55641569661869, - "grad_norm": 0.0009934313129633665, - "learning_rate": 0.00019999677691902664, - "loss": 46.0, - "step": 33436 - }, - { - "epoch": 2.5564921536020795, - "grad_norm": 0.001667368458583951, - "learning_rate": 0.00019999677672617608, - "loss": 46.0, - "step": 33437 - }, - { - "epoch": 2.5565686105854692, - "grad_norm": 0.0036817453801631927, - "learning_rate": 0.00019999677653331972, - "loss": 46.0, - "step": 33438 - }, - { - "epoch": 2.556645067568859, - "grad_norm": 0.001768269226886332, - "learning_rate": 0.0001999967763404576, - "loss": 46.0, - "step": 33439 - }, - { - "epoch": 2.5567215245522488, - "grad_norm": 0.0015852535143494606, - "learning_rate": 0.0001999967761475897, - "loss": 46.0, - "step": 33440 - }, - { - "epoch": 2.5567979815356385, - "grad_norm": 0.0037774841766804457, - "learning_rate": 0.000199996775954716, - "loss": 46.0, - "step": 33441 - }, - { - "epoch": 2.5568744385190283, - "grad_norm": 0.0019883010536432266, - "learning_rate": 0.0001999967757618366, - "loss": 46.0, - "step": 33442 - }, - { - "epoch": 2.556950895502418, - "grad_norm": 0.0014235106064006686, - "learning_rate": 0.0001999967755689514, - "loss": 46.0, - "step": 33443 - }, - { - "epoch": 2.557027352485808, - "grad_norm": 0.0007633913191966712, - "learning_rate": 0.0001999967753760604, - "loss": 46.0, - "step": 33444 - }, - { - "epoch": 2.557103809469197, - "grad_norm": 0.0004396808799356222, - "learning_rate": 0.0001999967751831637, - "loss": 46.0, - "step": 33445 - }, - { - "epoch": 2.557180266452587, - "grad_norm": 0.0007296592812053859, - "learning_rate": 0.00019999677499026118, - "loss": 46.0, - "step": 33446 - }, - { - "epoch": 2.5572567234359767, - "grad_norm": 0.0022788874339312315, - "learning_rate": 0.00019999677479735288, - "loss": 46.0, - "step": 33447 - }, - { - "epoch": 2.5573331804193664, - "grad_norm": 0.0008734318544156849, - "learning_rate": 0.00019999677460443883, - "loss": 46.0, - "step": 33448 - }, - { - "epoch": 2.557409637402756, - "grad_norm": 0.0006941691972315311, - "learning_rate": 0.00019999677441151902, - "loss": 46.0, - "step": 33449 - }, - { - "epoch": 2.557486094386146, - "grad_norm": 0.004113301634788513, - "learning_rate": 0.00019999677421859345, - "loss": 46.0, - "step": 33450 - }, - { - "epoch": 2.5575625513695357, - "grad_norm": 0.0012209225678816438, - "learning_rate": 0.0001999967740256621, - "loss": 46.0, - "step": 33451 - }, - { - "epoch": 2.5576390083529255, - "grad_norm": 0.0018497559940442443, - "learning_rate": 0.00019999677383272495, - "loss": 46.0, - "step": 33452 - }, - { - "epoch": 2.557715465336315, - "grad_norm": 0.0012195002054795623, - "learning_rate": 0.00019999677363978204, - "loss": 46.0, - "step": 33453 - }, - { - "epoch": 2.557791922319705, - "grad_norm": 0.0019482439383864403, - "learning_rate": 0.00019999677344683338, - "loss": 46.0, - "step": 33454 - }, - { - "epoch": 2.5578683793030947, - "grad_norm": 0.0013766209594905376, - "learning_rate": 0.00019999677325387895, - "loss": 46.0, - "step": 33455 - }, - { - "epoch": 2.5579448362864845, - "grad_norm": 0.0021581826731562614, - "learning_rate": 0.00019999677306091872, - "loss": 46.0, - "step": 33456 - }, - { - "epoch": 2.5580212932698743, - "grad_norm": 0.003526849439367652, - "learning_rate": 0.00019999677286795277, - "loss": 46.0, - "step": 33457 - }, - { - "epoch": 2.558097750253264, - "grad_norm": 0.0009136443841271102, - "learning_rate": 0.00019999677267498102, - "loss": 46.0, - "step": 33458 - }, - { - "epoch": 2.5581742072366533, - "grad_norm": 0.0012041646987199783, - "learning_rate": 0.0001999967724820035, - "loss": 46.0, - "step": 33459 - }, - { - "epoch": 2.558250664220043, - "grad_norm": 0.0011009437730535865, - "learning_rate": 0.00019999677228902022, - "loss": 46.0, - "step": 33460 - }, - { - "epoch": 2.558327121203433, - "grad_norm": 0.0035847376566380262, - "learning_rate": 0.00019999677209603118, - "loss": 46.0, - "step": 33461 - }, - { - "epoch": 2.5584035781868226, - "grad_norm": 0.003409388242289424, - "learning_rate": 0.00019999677190303636, - "loss": 46.0, - "step": 33462 - }, - { - "epoch": 2.5584800351702124, - "grad_norm": 0.000990078435279429, - "learning_rate": 0.00019999677171003577, - "loss": 46.0, - "step": 33463 - }, - { - "epoch": 2.558556492153602, - "grad_norm": 0.0005495616933330894, - "learning_rate": 0.00019999677151702944, - "loss": 46.0, - "step": 33464 - }, - { - "epoch": 2.558632949136992, - "grad_norm": 0.0006910604424774647, - "learning_rate": 0.0001999967713240173, - "loss": 46.0, - "step": 33465 - }, - { - "epoch": 2.5587094061203817, - "grad_norm": 0.0028846152126789093, - "learning_rate": 0.0001999967711309994, - "loss": 46.0, - "step": 33466 - }, - { - "epoch": 2.558785863103771, - "grad_norm": 0.0016454203287139535, - "learning_rate": 0.00019999677093797573, - "loss": 46.0, - "step": 33467 - }, - { - "epoch": 2.5588623200871607, - "grad_norm": 0.0008280647452920675, - "learning_rate": 0.0001999967707449463, - "loss": 46.0, - "step": 33468 - }, - { - "epoch": 2.5589387770705505, - "grad_norm": 0.0014637556159868836, - "learning_rate": 0.0001999967705519111, - "loss": 46.0, - "step": 33469 - }, - { - "epoch": 2.5590152340539403, - "grad_norm": 0.001353220664896071, - "learning_rate": 0.00019999677035887012, - "loss": 46.0, - "step": 33470 - }, - { - "epoch": 2.55909169103733, - "grad_norm": 0.001629892736673355, - "learning_rate": 0.00019999677016582337, - "loss": 46.0, - "step": 33471 - }, - { - "epoch": 2.55916814802072, - "grad_norm": 0.003064531832933426, - "learning_rate": 0.00019999676997277085, - "loss": 46.0, - "step": 33472 - }, - { - "epoch": 2.5592446050041096, - "grad_norm": 0.005234165582805872, - "learning_rate": 0.00019999676977971258, - "loss": 46.0, - "step": 33473 - }, - { - "epoch": 2.5593210619874993, - "grad_norm": 0.0006723539554513991, - "learning_rate": 0.00019999676958664853, - "loss": 46.0, - "step": 33474 - }, - { - "epoch": 2.559397518970889, - "grad_norm": 0.0031113573350012302, - "learning_rate": 0.00019999676939357872, - "loss": 46.0, - "step": 33475 - }, - { - "epoch": 2.559473975954279, - "grad_norm": 0.00047030122368596494, - "learning_rate": 0.00019999676920050315, - "loss": 46.0, - "step": 33476 - }, - { - "epoch": 2.5595504329376686, - "grad_norm": 0.0019747463520616293, - "learning_rate": 0.00019999676900742176, - "loss": 46.0, - "step": 33477 - }, - { - "epoch": 2.5596268899210584, - "grad_norm": 0.0023429852444678545, - "learning_rate": 0.00019999676881433465, - "loss": 46.0, - "step": 33478 - }, - { - "epoch": 2.559703346904448, - "grad_norm": 0.005712544545531273, - "learning_rate": 0.00019999676862124174, - "loss": 46.0, - "step": 33479 - }, - { - "epoch": 2.559779803887838, - "grad_norm": 0.001561876735650003, - "learning_rate": 0.00019999676842814308, - "loss": 46.0, - "step": 33480 - }, - { - "epoch": 2.559856260871227, - "grad_norm": 0.00130911183077842, - "learning_rate": 0.00019999676823503863, - "loss": 46.0, - "step": 33481 - }, - { - "epoch": 2.559932717854617, - "grad_norm": 0.0027656827587634325, - "learning_rate": 0.00019999676804192845, - "loss": 46.0, - "step": 33482 - }, - { - "epoch": 2.5600091748380067, - "grad_norm": 0.0010164499981328845, - "learning_rate": 0.00019999676784881247, - "loss": 46.0, - "step": 33483 - }, - { - "epoch": 2.5600856318213965, - "grad_norm": 0.0018488691421225667, - "learning_rate": 0.00019999676765569072, - "loss": 46.0, - "step": 33484 - }, - { - "epoch": 2.5601620888047862, - "grad_norm": 0.0022583925165235996, - "learning_rate": 0.00019999676746256323, - "loss": 46.0, - "step": 33485 - }, - { - "epoch": 2.560238545788176, - "grad_norm": 0.001279329531826079, - "learning_rate": 0.00019999676726942993, - "loss": 46.0, - "step": 33486 - }, - { - "epoch": 2.5603150027715658, - "grad_norm": 0.002781086368486285, - "learning_rate": 0.00019999676707629086, - "loss": 46.0, - "step": 33487 - }, - { - "epoch": 2.5603914597549555, - "grad_norm": 0.0018222532235085964, - "learning_rate": 0.00019999676688314607, - "loss": 46.0, - "step": 33488 - }, - { - "epoch": 2.560467916738345, - "grad_norm": 0.00404335418716073, - "learning_rate": 0.00019999676668999548, - "loss": 46.0, - "step": 33489 - }, - { - "epoch": 2.5605443737217346, - "grad_norm": 0.0010960038052871823, - "learning_rate": 0.0001999967664968391, - "loss": 46.0, - "step": 33490 - }, - { - "epoch": 2.5606208307051244, - "grad_norm": 0.0065406016074121, - "learning_rate": 0.00019999676630367698, - "loss": 46.0, - "step": 33491 - }, - { - "epoch": 2.560697287688514, - "grad_norm": 0.0009528365335427225, - "learning_rate": 0.00019999676611050907, - "loss": 46.0, - "step": 33492 - }, - { - "epoch": 2.560773744671904, - "grad_norm": 0.0016528575215488672, - "learning_rate": 0.00019999676591733541, - "loss": 46.0, - "step": 33493 - }, - { - "epoch": 2.5608502016552936, - "grad_norm": 0.0014290288090705872, - "learning_rate": 0.00019999676572415599, - "loss": 46.0, - "step": 33494 - }, - { - "epoch": 2.5609266586386834, - "grad_norm": 0.0037852406967431307, - "learning_rate": 0.00019999676553097078, - "loss": 46.0, - "step": 33495 - }, - { - "epoch": 2.561003115622073, - "grad_norm": 0.0017256696010008454, - "learning_rate": 0.0001999967653377798, - "loss": 46.0, - "step": 33496 - }, - { - "epoch": 2.561079572605463, - "grad_norm": 0.0007360866293311119, - "learning_rate": 0.00019999676514458306, - "loss": 46.0, - "step": 33497 - }, - { - "epoch": 2.5611560295888527, - "grad_norm": 0.0005949679180048406, - "learning_rate": 0.00019999676495138053, - "loss": 46.0, - "step": 33498 - }, - { - "epoch": 2.5612324865722425, - "grad_norm": 0.0014350616838783026, - "learning_rate": 0.00019999676475817226, - "loss": 46.0, - "step": 33499 - }, - { - "epoch": 2.561308943555632, - "grad_norm": 0.0010082963854074478, - "learning_rate": 0.00019999676456495822, - "loss": 46.0, - "step": 33500 - }, - { - "epoch": 2.561385400539022, - "grad_norm": 0.0010203431593254209, - "learning_rate": 0.00019999676437173838, - "loss": 46.0, - "step": 33501 - }, - { - "epoch": 2.5614618575224113, - "grad_norm": 0.001922943745739758, - "learning_rate": 0.00019999676417851281, - "loss": 46.0, - "step": 33502 - }, - { - "epoch": 2.561538314505801, - "grad_norm": 0.0004550886806100607, - "learning_rate": 0.00019999676398528145, - "loss": 46.0, - "step": 33503 - }, - { - "epoch": 2.561614771489191, - "grad_norm": 0.0019321753643453121, - "learning_rate": 0.00019999676379204432, - "loss": 46.0, - "step": 33504 - }, - { - "epoch": 2.5616912284725806, - "grad_norm": 0.0015047990018501878, - "learning_rate": 0.0001999967635988014, - "loss": 46.0, - "step": 33505 - }, - { - "epoch": 2.5617676854559703, - "grad_norm": 0.0038688669446855783, - "learning_rate": 0.00019999676340555275, - "loss": 46.0, - "step": 33506 - }, - { - "epoch": 2.56184414243936, - "grad_norm": 0.001031518098898232, - "learning_rate": 0.00019999676321229832, - "loss": 46.0, - "step": 33507 - }, - { - "epoch": 2.56192059942275, - "grad_norm": 0.001505220658145845, - "learning_rate": 0.00019999676301903812, - "loss": 46.0, - "step": 33508 - }, - { - "epoch": 2.5619970564061396, - "grad_norm": 0.0029162021819502115, - "learning_rate": 0.00019999676282577214, - "loss": 46.0, - "step": 33509 - }, - { - "epoch": 2.5620735133895294, - "grad_norm": 0.013235975056886673, - "learning_rate": 0.0001999967626325004, - "loss": 46.0, - "step": 33510 - }, - { - "epoch": 2.5621499703729187, - "grad_norm": 0.0014610809739679098, - "learning_rate": 0.00019999676243922287, - "loss": 46.0, - "step": 33511 - }, - { - "epoch": 2.5622264273563085, - "grad_norm": 0.0014320885529741645, - "learning_rate": 0.0001999967622459396, - "loss": 46.0, - "step": 33512 - }, - { - "epoch": 2.562302884339698, - "grad_norm": 0.0010532052256166935, - "learning_rate": 0.00019999676205265053, - "loss": 46.0, - "step": 33513 - }, - { - "epoch": 2.562379341323088, - "grad_norm": 0.0010041113710030913, - "learning_rate": 0.00019999676185935571, - "loss": 46.0, - "step": 33514 - }, - { - "epoch": 2.5624557983064777, - "grad_norm": 0.0019522263901308179, - "learning_rate": 0.00019999676166605512, - "loss": 46.0, - "step": 33515 - }, - { - "epoch": 2.5625322552898675, - "grad_norm": 0.001968939322978258, - "learning_rate": 0.00019999676147274876, - "loss": 46.0, - "step": 33516 - }, - { - "epoch": 2.5626087122732573, - "grad_norm": 0.0012359711108729243, - "learning_rate": 0.00019999676127943663, - "loss": 46.0, - "step": 33517 - }, - { - "epoch": 2.562685169256647, - "grad_norm": 0.0009915182599797845, - "learning_rate": 0.00019999676108611874, - "loss": 46.0, - "step": 33518 - }, - { - "epoch": 2.562761626240037, - "grad_norm": 0.005109227262437344, - "learning_rate": 0.00019999676089279506, - "loss": 46.0, - "step": 33519 - }, - { - "epoch": 2.5628380832234265, - "grad_norm": 0.0030386755242943764, - "learning_rate": 0.00019999676069946563, - "loss": 46.0, - "step": 33520 - }, - { - "epoch": 2.5629145402068163, - "grad_norm": 0.003647509263828397, - "learning_rate": 0.00019999676050613043, - "loss": 46.0, - "step": 33521 - }, - { - "epoch": 2.562990997190206, - "grad_norm": 0.001174344215542078, - "learning_rate": 0.00019999676031278945, - "loss": 46.0, - "step": 33522 - }, - { - "epoch": 2.563067454173596, - "grad_norm": 0.0018427810864523053, - "learning_rate": 0.0001999967601194427, - "loss": 46.0, - "step": 33523 - }, - { - "epoch": 2.563143911156985, - "grad_norm": 0.002110244706273079, - "learning_rate": 0.00019999675992609018, - "loss": 46.0, - "step": 33524 - }, - { - "epoch": 2.563220368140375, - "grad_norm": 0.002172976965084672, - "learning_rate": 0.0001999967597327319, - "loss": 46.0, - "step": 33525 - }, - { - "epoch": 2.5632968251237647, - "grad_norm": 0.002311317715793848, - "learning_rate": 0.00019999675953936787, - "loss": 46.0, - "step": 33526 - }, - { - "epoch": 2.5633732821071544, - "grad_norm": 0.0005780644714832306, - "learning_rate": 0.00019999675934599803, - "loss": 46.0, - "step": 33527 - }, - { - "epoch": 2.563449739090544, - "grad_norm": 0.002176650334149599, - "learning_rate": 0.00019999675915262244, - "loss": 46.0, - "step": 33528 - }, - { - "epoch": 2.563526196073934, - "grad_norm": 0.000832623743917793, - "learning_rate": 0.00019999675895924108, - "loss": 46.0, - "step": 33529 - }, - { - "epoch": 2.5636026530573237, - "grad_norm": 0.0008746240637265146, - "learning_rate": 0.00019999675876585394, - "loss": 46.0, - "step": 33530 - }, - { - "epoch": 2.5636791100407135, - "grad_norm": 0.0012328110169619322, - "learning_rate": 0.00019999675857246106, - "loss": 46.0, - "step": 33531 - }, - { - "epoch": 2.563755567024103, - "grad_norm": 0.0006522799958474934, - "learning_rate": 0.00019999675837906238, - "loss": 46.0, - "step": 33532 - }, - { - "epoch": 2.5638320240074925, - "grad_norm": 0.0017877287464216352, - "learning_rate": 0.00019999675818565795, - "loss": 46.0, - "step": 33533 - }, - { - "epoch": 2.5639084809908823, - "grad_norm": 0.0022729779593646526, - "learning_rate": 0.00019999675799224775, - "loss": 46.0, - "step": 33534 - }, - { - "epoch": 2.563984937974272, - "grad_norm": 0.0014852328458800912, - "learning_rate": 0.00019999675779883177, - "loss": 46.0, - "step": 33535 - }, - { - "epoch": 2.564061394957662, - "grad_norm": 0.0029160999692976475, - "learning_rate": 0.00019999675760541002, - "loss": 46.0, - "step": 33536 - }, - { - "epoch": 2.5641378519410516, - "grad_norm": 0.0014590134378522635, - "learning_rate": 0.0001999967574119825, - "loss": 46.0, - "step": 33537 - }, - { - "epoch": 2.5642143089244414, - "grad_norm": 0.0007296906551346183, - "learning_rate": 0.00019999675721854923, - "loss": 46.0, - "step": 33538 - }, - { - "epoch": 2.564290765907831, - "grad_norm": 0.008215557783842087, - "learning_rate": 0.0001999967570251102, - "loss": 46.0, - "step": 33539 - }, - { - "epoch": 2.564367222891221, - "grad_norm": 0.004027934279292822, - "learning_rate": 0.00019999675683166537, - "loss": 46.0, - "step": 33540 - }, - { - "epoch": 2.5644436798746106, - "grad_norm": 0.007147032767534256, - "learning_rate": 0.00019999675663821476, - "loss": 46.0, - "step": 33541 - }, - { - "epoch": 2.5645201368580004, - "grad_norm": 0.0016328798374161124, - "learning_rate": 0.00019999675644475842, - "loss": 46.0, - "step": 33542 - }, - { - "epoch": 2.56459659384139, - "grad_norm": 0.001315317116677761, - "learning_rate": 0.0001999967562512963, - "loss": 46.0, - "step": 33543 - }, - { - "epoch": 2.56467305082478, - "grad_norm": 0.004822699353098869, - "learning_rate": 0.00019999675605782838, - "loss": 46.0, - "step": 33544 - }, - { - "epoch": 2.5647495078081697, - "grad_norm": 0.0032904769759625196, - "learning_rate": 0.0001999967558643547, - "loss": 46.0, - "step": 33545 - }, - { - "epoch": 2.564825964791559, - "grad_norm": 0.0007563504041172564, - "learning_rate": 0.00019999675567087527, - "loss": 46.0, - "step": 33546 - }, - { - "epoch": 2.5649024217749488, - "grad_norm": 0.0038826102390885353, - "learning_rate": 0.00019999675547739004, - "loss": 46.0, - "step": 33547 - }, - { - "epoch": 2.5649788787583385, - "grad_norm": 0.0017097495729103684, - "learning_rate": 0.0001999967552838991, - "loss": 46.0, - "step": 33548 - }, - { - "epoch": 2.5650553357417283, - "grad_norm": 0.0016232189955189824, - "learning_rate": 0.00019999675509040231, - "loss": 46.0, - "step": 33549 - }, - { - "epoch": 2.565131792725118, - "grad_norm": 0.0006448444910347462, - "learning_rate": 0.00019999675489689982, - "loss": 46.0, - "step": 33550 - }, - { - "epoch": 2.565208249708508, - "grad_norm": 0.001872544176876545, - "learning_rate": 0.00019999675470339155, - "loss": 46.0, - "step": 33551 - }, - { - "epoch": 2.5652847066918976, - "grad_norm": 0.0015749622834846377, - "learning_rate": 0.00019999675450987748, - "loss": 46.0, - "step": 33552 - }, - { - "epoch": 2.5653611636752873, - "grad_norm": 0.0016301916912198067, - "learning_rate": 0.00019999675431635767, - "loss": 46.0, - "step": 33553 - }, - { - "epoch": 2.5654376206586766, - "grad_norm": 0.003610240062698722, - "learning_rate": 0.00019999675412283208, - "loss": 46.0, - "step": 33554 - }, - { - "epoch": 2.5655140776420664, - "grad_norm": 0.000785507436376065, - "learning_rate": 0.00019999675392930072, - "loss": 46.0, - "step": 33555 - }, - { - "epoch": 2.565590534625456, - "grad_norm": 0.0005011885077692568, - "learning_rate": 0.00019999675373576356, - "loss": 46.0, - "step": 33556 - }, - { - "epoch": 2.565666991608846, - "grad_norm": 0.0007357841241173446, - "learning_rate": 0.00019999675354222067, - "loss": 46.0, - "step": 33557 - }, - { - "epoch": 2.5657434485922357, - "grad_norm": 0.0011004769476130605, - "learning_rate": 0.000199996753348672, - "loss": 46.0, - "step": 33558 - }, - { - "epoch": 2.5658199055756254, - "grad_norm": 0.0021856778766959906, - "learning_rate": 0.00019999675315511756, - "loss": 46.0, - "step": 33559 - }, - { - "epoch": 2.565896362559015, - "grad_norm": 0.0007466661627404392, - "learning_rate": 0.00019999675296155736, - "loss": 46.0, - "step": 33560 - }, - { - "epoch": 2.565972819542405, - "grad_norm": 0.0012138142483308911, - "learning_rate": 0.0001999967527679914, - "loss": 46.0, - "step": 33561 - }, - { - "epoch": 2.5660492765257947, - "grad_norm": 0.0024261625949293375, - "learning_rate": 0.00019999675257441967, - "loss": 46.0, - "step": 33562 - }, - { - "epoch": 2.5661257335091845, - "grad_norm": 0.003449970157817006, - "learning_rate": 0.00019999675238084212, - "loss": 46.0, - "step": 33563 - }, - { - "epoch": 2.5662021904925743, - "grad_norm": 0.005892294924706221, - "learning_rate": 0.00019999675218725885, - "loss": 46.0, - "step": 33564 - }, - { - "epoch": 2.566278647475964, - "grad_norm": 0.004068222362548113, - "learning_rate": 0.00019999675199366978, - "loss": 46.0, - "step": 33565 - }, - { - "epoch": 2.5663551044593538, - "grad_norm": 0.003267345717176795, - "learning_rate": 0.00019999675180007497, - "loss": 46.0, - "step": 33566 - }, - { - "epoch": 2.5664315614427435, - "grad_norm": 0.0005713776336051524, - "learning_rate": 0.00019999675160647438, - "loss": 46.0, - "step": 33567 - }, - { - "epoch": 2.566508018426133, - "grad_norm": 0.000813572492916137, - "learning_rate": 0.000199996751412868, - "loss": 46.0, - "step": 33568 - }, - { - "epoch": 2.5665844754095226, - "grad_norm": 0.0006860420107841492, - "learning_rate": 0.00019999675121925588, - "loss": 46.0, - "step": 33569 - }, - { - "epoch": 2.5666609323929124, - "grad_norm": 0.0026067919097840786, - "learning_rate": 0.00019999675102563797, - "loss": 46.0, - "step": 33570 - }, - { - "epoch": 2.566737389376302, - "grad_norm": 0.0018609222024679184, - "learning_rate": 0.00019999675083201432, - "loss": 46.0, - "step": 33571 - }, - { - "epoch": 2.566813846359692, - "grad_norm": 0.0013453407445922494, - "learning_rate": 0.00019999675063838487, - "loss": 46.0, - "step": 33572 - }, - { - "epoch": 2.5668903033430817, - "grad_norm": 0.0034865285269916058, - "learning_rate": 0.00019999675044474966, - "loss": 46.0, - "step": 33573 - }, - { - "epoch": 2.5669667603264714, - "grad_norm": 0.0015838018152862787, - "learning_rate": 0.0001999967502511087, - "loss": 46.0, - "step": 33574 - }, - { - "epoch": 2.567043217309861, - "grad_norm": 0.000810201745480299, - "learning_rate": 0.00019999675005746194, - "loss": 46.0, - "step": 33575 - }, - { - "epoch": 2.5671196742932505, - "grad_norm": 0.0010030876146629453, - "learning_rate": 0.00019999674986380942, - "loss": 46.0, - "step": 33576 - }, - { - "epoch": 2.5671961312766403, - "grad_norm": 0.001643717521801591, - "learning_rate": 0.00019999674967015115, - "loss": 46.0, - "step": 33577 - }, - { - "epoch": 2.56727258826003, - "grad_norm": 0.0008654296980239451, - "learning_rate": 0.00019999674947648708, - "loss": 46.0, - "step": 33578 - }, - { - "epoch": 2.56734904524342, - "grad_norm": 0.005815157666802406, - "learning_rate": 0.00019999674928281727, - "loss": 46.0, - "step": 33579 - }, - { - "epoch": 2.5674255022268095, - "grad_norm": 0.0018154950812458992, - "learning_rate": 0.00019999674908914168, - "loss": 46.0, - "step": 33580 - }, - { - "epoch": 2.5675019592101993, - "grad_norm": 0.0009040693985298276, - "learning_rate": 0.0001999967488954603, - "loss": 46.0, - "step": 33581 - }, - { - "epoch": 2.567578416193589, - "grad_norm": 0.0006755805225111544, - "learning_rate": 0.0001999967487017732, - "loss": 46.0, - "step": 33582 - }, - { - "epoch": 2.567654873176979, - "grad_norm": 0.0009333838243037462, - "learning_rate": 0.00019999674850808025, - "loss": 46.0, - "step": 33583 - }, - { - "epoch": 2.5677313301603686, - "grad_norm": 0.003079622983932495, - "learning_rate": 0.0001999967483143816, - "loss": 46.0, - "step": 33584 - }, - { - "epoch": 2.5678077871437583, - "grad_norm": 0.00397895323112607, - "learning_rate": 0.00019999674812067717, - "loss": 46.0, - "step": 33585 - }, - { - "epoch": 2.567884244127148, - "grad_norm": 0.008154308423399925, - "learning_rate": 0.00019999674792696697, - "loss": 46.0, - "step": 33586 - }, - { - "epoch": 2.567960701110538, - "grad_norm": 0.006857165135443211, - "learning_rate": 0.000199996747733251, - "loss": 46.0, - "step": 33587 - }, - { - "epoch": 2.5680371580939276, - "grad_norm": 0.00257407920435071, - "learning_rate": 0.00019999674753952925, - "loss": 46.0, - "step": 33588 - }, - { - "epoch": 2.5681136150773174, - "grad_norm": 0.0014389710268005729, - "learning_rate": 0.00019999674734580173, - "loss": 46.0, - "step": 33589 - }, - { - "epoch": 2.5681900720607067, - "grad_norm": 0.0016827452927827835, - "learning_rate": 0.00019999674715206843, - "loss": 46.0, - "step": 33590 - }, - { - "epoch": 2.5682665290440965, - "grad_norm": 0.0016161476960405707, - "learning_rate": 0.00019999674695832936, - "loss": 46.0, - "step": 33591 - }, - { - "epoch": 2.5683429860274862, - "grad_norm": 0.003877685870975256, - "learning_rate": 0.00019999674676458455, - "loss": 46.0, - "step": 33592 - }, - { - "epoch": 2.568419443010876, - "grad_norm": 0.0011132400250062346, - "learning_rate": 0.00019999674657083396, - "loss": 46.0, - "step": 33593 - }, - { - "epoch": 2.5684958999942658, - "grad_norm": 0.0015398639952763915, - "learning_rate": 0.0001999967463770776, - "loss": 46.0, - "step": 33594 - }, - { - "epoch": 2.5685723569776555, - "grad_norm": 0.0035009966231882572, - "learning_rate": 0.00019999674618331547, - "loss": 46.0, - "step": 33595 - }, - { - "epoch": 2.5686488139610453, - "grad_norm": 0.0007522220839746296, - "learning_rate": 0.00019999674598954756, - "loss": 46.0, - "step": 33596 - }, - { - "epoch": 2.568725270944435, - "grad_norm": 0.0007269795169122517, - "learning_rate": 0.00019999674579577388, - "loss": 46.0, - "step": 33597 - }, - { - "epoch": 2.5688017279278244, - "grad_norm": 0.0006011362420395017, - "learning_rate": 0.00019999674560199445, - "loss": 46.0, - "step": 33598 - }, - { - "epoch": 2.568878184911214, - "grad_norm": 0.0009947501821443439, - "learning_rate": 0.00019999674540820922, - "loss": 46.0, - "step": 33599 - }, - { - "epoch": 2.568954641894604, - "grad_norm": 0.0006983648636378348, - "learning_rate": 0.00019999674521441828, - "loss": 46.0, - "step": 33600 - }, - { - "epoch": 2.5690310988779936, - "grad_norm": 0.0011247338261455297, - "learning_rate": 0.0001999967450206215, - "loss": 46.0, - "step": 33601 - }, - { - "epoch": 2.5691075558613834, - "grad_norm": 0.0011075049405917525, - "learning_rate": 0.000199996744826819, - "loss": 46.0, - "step": 33602 - }, - { - "epoch": 2.569184012844773, - "grad_norm": 0.0007380927563644946, - "learning_rate": 0.0001999967446330107, - "loss": 46.0, - "step": 33603 - }, - { - "epoch": 2.569260469828163, - "grad_norm": 0.0014994388911873102, - "learning_rate": 0.00019999674443919668, - "loss": 46.0, - "step": 33604 - }, - { - "epoch": 2.5693369268115527, - "grad_norm": 0.0019309587078168988, - "learning_rate": 0.00019999674424537683, - "loss": 46.0, - "step": 33605 - }, - { - "epoch": 2.5694133837949424, - "grad_norm": 0.00415783142670989, - "learning_rate": 0.00019999674405155125, - "loss": 46.0, - "step": 33606 - }, - { - "epoch": 2.569489840778332, - "grad_norm": 0.0017408544663339853, - "learning_rate": 0.0001999967438577199, - "loss": 46.0, - "step": 33607 - }, - { - "epoch": 2.569566297761722, - "grad_norm": 0.0021758307702839375, - "learning_rate": 0.00019999674366388275, - "loss": 46.0, - "step": 33608 - }, - { - "epoch": 2.5696427547451117, - "grad_norm": 0.0010166955180466175, - "learning_rate": 0.00019999674347003982, - "loss": 46.0, - "step": 33609 - }, - { - "epoch": 2.5697192117285015, - "grad_norm": 0.0029189339838922024, - "learning_rate": 0.00019999674327619117, - "loss": 46.0, - "step": 33610 - }, - { - "epoch": 2.5697956687118912, - "grad_norm": 0.0016484318766742945, - "learning_rate": 0.00019999674308233674, - "loss": 46.0, - "step": 33611 - }, - { - "epoch": 2.5698721256952806, - "grad_norm": 0.003079600166529417, - "learning_rate": 0.0001999967428884765, - "loss": 46.0, - "step": 33612 - }, - { - "epoch": 2.5699485826786703, - "grad_norm": 0.003807327477261424, - "learning_rate": 0.00019999674269461054, - "loss": 46.0, - "step": 33613 - }, - { - "epoch": 2.57002503966206, - "grad_norm": 0.001914694090373814, - "learning_rate": 0.0001999967425007388, - "loss": 46.0, - "step": 33614 - }, - { - "epoch": 2.57010149664545, - "grad_norm": 0.0032240008004009724, - "learning_rate": 0.00019999674230686127, - "loss": 46.0, - "step": 33615 - }, - { - "epoch": 2.5701779536288396, - "grad_norm": 0.0016452540876343846, - "learning_rate": 0.00019999674211297798, - "loss": 46.0, - "step": 33616 - }, - { - "epoch": 2.5702544106122294, - "grad_norm": 0.0035483958199620247, - "learning_rate": 0.00019999674191908894, - "loss": 46.0, - "step": 33617 - }, - { - "epoch": 2.570330867595619, - "grad_norm": 0.0014126044698059559, - "learning_rate": 0.00019999674172519412, - "loss": 46.0, - "step": 33618 - }, - { - "epoch": 2.570407324579009, - "grad_norm": 0.0008452071342617273, - "learning_rate": 0.0001999967415312935, - "loss": 46.0, - "step": 33619 - }, - { - "epoch": 2.570483781562398, - "grad_norm": 0.0029648886993527412, - "learning_rate": 0.00019999674133738715, - "loss": 46.0, - "step": 33620 - }, - { - "epoch": 2.570560238545788, - "grad_norm": 0.0016587642021477222, - "learning_rate": 0.00019999674114347502, - "loss": 46.0, - "step": 33621 - }, - { - "epoch": 2.5706366955291777, - "grad_norm": 0.000729656545445323, - "learning_rate": 0.0001999967409495571, - "loss": 46.0, - "step": 33622 - }, - { - "epoch": 2.5707131525125675, - "grad_norm": 0.001706088543869555, - "learning_rate": 0.00019999674075563343, - "loss": 46.0, - "step": 33623 - }, - { - "epoch": 2.5707896094959573, - "grad_norm": 0.0004884802037850022, - "learning_rate": 0.000199996740561704, - "loss": 46.0, - "step": 33624 - }, - { - "epoch": 2.570866066479347, - "grad_norm": 0.002172946697100997, - "learning_rate": 0.0001999967403677688, - "loss": 46.0, - "step": 33625 - }, - { - "epoch": 2.5709425234627368, - "grad_norm": 0.0008171516237780452, - "learning_rate": 0.0001999967401738278, - "loss": 46.0, - "step": 33626 - }, - { - "epoch": 2.5710189804461265, - "grad_norm": 0.006667159032076597, - "learning_rate": 0.00019999673997988105, - "loss": 46.0, - "step": 33627 - }, - { - "epoch": 2.5710954374295163, - "grad_norm": 0.006327591836452484, - "learning_rate": 0.00019999673978592853, - "loss": 46.0, - "step": 33628 - }, - { - "epoch": 2.571171894412906, - "grad_norm": 0.0012598424218595028, - "learning_rate": 0.00019999673959197027, - "loss": 46.0, - "step": 33629 - }, - { - "epoch": 2.571248351396296, - "grad_norm": 0.001248611486516893, - "learning_rate": 0.0001999967393980062, - "loss": 46.0, - "step": 33630 - }, - { - "epoch": 2.5713248083796856, - "grad_norm": 0.002740581287071109, - "learning_rate": 0.0001999967392040364, - "loss": 46.0, - "step": 33631 - }, - { - "epoch": 2.5714012653630753, - "grad_norm": 0.00342528335750103, - "learning_rate": 0.00019999673901006078, - "loss": 46.0, - "step": 33632 - }, - { - "epoch": 2.5714777223464647, - "grad_norm": 0.0024696860928088427, - "learning_rate": 0.00019999673881607942, - "loss": 46.0, - "step": 33633 - }, - { - "epoch": 2.5715541793298544, - "grad_norm": 0.0038649081252515316, - "learning_rate": 0.00019999673862209228, - "loss": 46.0, - "step": 33634 - }, - { - "epoch": 2.571630636313244, - "grad_norm": 0.0012987615773454309, - "learning_rate": 0.0001999967384280994, - "loss": 46.0, - "step": 33635 - }, - { - "epoch": 2.571707093296634, - "grad_norm": 0.003049397375434637, - "learning_rate": 0.00019999673823410072, - "loss": 46.0, - "step": 33636 - }, - { - "epoch": 2.5717835502800237, - "grad_norm": 0.0021371340844780207, - "learning_rate": 0.00019999673804009627, - "loss": 46.0, - "step": 33637 - }, - { - "epoch": 2.5718600072634135, - "grad_norm": 0.0009890544461086392, - "learning_rate": 0.00019999673784608607, - "loss": 46.0, - "step": 33638 - }, - { - "epoch": 2.5719364642468032, - "grad_norm": 0.0012890303041785955, - "learning_rate": 0.00019999673765207007, - "loss": 46.0, - "step": 33639 - }, - { - "epoch": 2.572012921230193, - "grad_norm": 0.0018265777034685016, - "learning_rate": 0.00019999673745804835, - "loss": 46.0, - "step": 33640 - }, - { - "epoch": 2.5720893782135827, - "grad_norm": 0.0012183840153738856, - "learning_rate": 0.00019999673726402083, - "loss": 46.0, - "step": 33641 - }, - { - "epoch": 2.572165835196972, - "grad_norm": 0.0014565408928319812, - "learning_rate": 0.0001999967370699875, - "loss": 46.0, - "step": 33642 - }, - { - "epoch": 2.572242292180362, - "grad_norm": 0.0016076304018497467, - "learning_rate": 0.00019999673687594847, - "loss": 46.0, - "step": 33643 - }, - { - "epoch": 2.5723187491637516, - "grad_norm": 0.003128182841464877, - "learning_rate": 0.00019999673668190363, - "loss": 46.0, - "step": 33644 - }, - { - "epoch": 2.5723952061471413, - "grad_norm": 0.0024909370113164186, - "learning_rate": 0.00019999673648785304, - "loss": 46.0, - "step": 33645 - }, - { - "epoch": 2.572471663130531, - "grad_norm": 0.003303632140159607, - "learning_rate": 0.00019999673629379669, - "loss": 46.0, - "step": 33646 - }, - { - "epoch": 2.572548120113921, - "grad_norm": 0.0010045719100162387, - "learning_rate": 0.00019999673609973458, - "loss": 46.0, - "step": 33647 - }, - { - "epoch": 2.5726245770973106, - "grad_norm": 0.0004578182997647673, - "learning_rate": 0.00019999673590566665, - "loss": 46.0, - "step": 33648 - }, - { - "epoch": 2.5727010340807004, - "grad_norm": 0.0010105711407959461, - "learning_rate": 0.000199996735711593, - "loss": 46.0, - "step": 33649 - }, - { - "epoch": 2.57277749106409, - "grad_norm": 0.0014336196472868323, - "learning_rate": 0.00019999673551751354, - "loss": 46.0, - "step": 33650 - }, - { - "epoch": 2.57285394804748, - "grad_norm": 0.003676900640130043, - "learning_rate": 0.00019999673532342834, - "loss": 46.0, - "step": 33651 - }, - { - "epoch": 2.5729304050308697, - "grad_norm": 0.0015876519028097391, - "learning_rate": 0.00019999673512933734, - "loss": 46.0, - "step": 33652 - }, - { - "epoch": 2.5730068620142594, - "grad_norm": 0.0017812944715842605, - "learning_rate": 0.0001999967349352406, - "loss": 46.0, - "step": 33653 - }, - { - "epoch": 2.573083318997649, - "grad_norm": 0.0015645832754671574, - "learning_rate": 0.0001999967347411381, - "loss": 46.0, - "step": 33654 - }, - { - "epoch": 2.5731597759810385, - "grad_norm": 0.0014201278099790215, - "learning_rate": 0.0001999967345470298, - "loss": 46.0, - "step": 33655 - }, - { - "epoch": 2.5732362329644283, - "grad_norm": 0.003005894599482417, - "learning_rate": 0.00019999673435291574, - "loss": 46.0, - "step": 33656 - }, - { - "epoch": 2.573312689947818, - "grad_norm": 0.0018718059873208404, - "learning_rate": 0.0001999967341587959, - "loss": 46.0, - "step": 33657 - }, - { - "epoch": 2.573389146931208, - "grad_norm": 0.001077447785064578, - "learning_rate": 0.00019999673396467032, - "loss": 46.0, - "step": 33658 - }, - { - "epoch": 2.5734656039145976, - "grad_norm": 0.0024721743538975716, - "learning_rate": 0.00019999673377053893, - "loss": 46.0, - "step": 33659 - }, - { - "epoch": 2.5735420608979873, - "grad_norm": 0.0018371124751865864, - "learning_rate": 0.00019999673357640183, - "loss": 46.0, - "step": 33660 - }, - { - "epoch": 2.573618517881377, - "grad_norm": 0.0035809038672596216, - "learning_rate": 0.0001999967333822589, - "loss": 46.0, - "step": 33661 - }, - { - "epoch": 2.573694974864767, - "grad_norm": 0.0020826654508709908, - "learning_rate": 0.00019999673318811024, - "loss": 46.0, - "step": 33662 - }, - { - "epoch": 2.573771431848156, - "grad_norm": 0.0010911946883425117, - "learning_rate": 0.0001999967329939558, - "loss": 46.0, - "step": 33663 - }, - { - "epoch": 2.573847888831546, - "grad_norm": 0.0034325115848332644, - "learning_rate": 0.0001999967327997956, - "loss": 46.0, - "step": 33664 - }, - { - "epoch": 2.5739243458149357, - "grad_norm": 0.0009272743482142687, - "learning_rate": 0.00019999673260562962, - "loss": 46.0, - "step": 33665 - }, - { - "epoch": 2.5740008027983254, - "grad_norm": 0.002528467681258917, - "learning_rate": 0.00019999673241145787, - "loss": 46.0, - "step": 33666 - }, - { - "epoch": 2.574077259781715, - "grad_norm": 0.003457816783338785, - "learning_rate": 0.00019999673221728035, - "loss": 46.0, - "step": 33667 - }, - { - "epoch": 2.574153716765105, - "grad_norm": 0.0015440174611285329, - "learning_rate": 0.00019999673202309706, - "loss": 46.0, - "step": 33668 - }, - { - "epoch": 2.5742301737484947, - "grad_norm": 0.0019255020888522267, - "learning_rate": 0.000199996731828908, - "loss": 46.0, - "step": 33669 - }, - { - "epoch": 2.5743066307318845, - "grad_norm": 0.002855937462300062, - "learning_rate": 0.00019999673163471316, - "loss": 46.0, - "step": 33670 - }, - { - "epoch": 2.5743830877152742, - "grad_norm": 0.0024788465816527605, - "learning_rate": 0.0001999967314405126, - "loss": 46.0, - "step": 33671 - }, - { - "epoch": 2.574459544698664, - "grad_norm": 0.0024461187422275543, - "learning_rate": 0.0001999967312463062, - "loss": 46.0, - "step": 33672 - }, - { - "epoch": 2.5745360016820538, - "grad_norm": 0.0008722626371309161, - "learning_rate": 0.00019999673105209408, - "loss": 46.0, - "step": 33673 - }, - { - "epoch": 2.5746124586654435, - "grad_norm": 0.001374467508867383, - "learning_rate": 0.00019999673085787615, - "loss": 46.0, - "step": 33674 - }, - { - "epoch": 2.5746889156488333, - "grad_norm": 0.0013584342086687684, - "learning_rate": 0.0001999967306636525, - "loss": 46.0, - "step": 33675 - }, - { - "epoch": 2.574765372632223, - "grad_norm": 0.0022337569389492273, - "learning_rate": 0.00019999673046942307, - "loss": 46.0, - "step": 33676 - }, - { - "epoch": 2.5748418296156124, - "grad_norm": 0.0015917246928438544, - "learning_rate": 0.00019999673027518784, - "loss": 46.0, - "step": 33677 - }, - { - "epoch": 2.574918286599002, - "grad_norm": 0.004520599730312824, - "learning_rate": 0.00019999673008094687, - "loss": 46.0, - "step": 33678 - }, - { - "epoch": 2.574994743582392, - "grad_norm": 0.003556224051862955, - "learning_rate": 0.0001999967298867001, - "loss": 46.0, - "step": 33679 - }, - { - "epoch": 2.5750712005657816, - "grad_norm": 0.002192916814237833, - "learning_rate": 0.0001999967296924476, - "loss": 46.0, - "step": 33680 - }, - { - "epoch": 2.5751476575491714, - "grad_norm": 0.0011125840246677399, - "learning_rate": 0.00019999672949818931, - "loss": 46.0, - "step": 33681 - }, - { - "epoch": 2.575224114532561, - "grad_norm": 0.0029193125665187836, - "learning_rate": 0.00019999672930392522, - "loss": 46.0, - "step": 33682 - }, - { - "epoch": 2.575300571515951, - "grad_norm": 0.0005085760494694114, - "learning_rate": 0.0001999967291096554, - "loss": 46.0, - "step": 33683 - }, - { - "epoch": 2.5753770284993407, - "grad_norm": 0.0013059613993391395, - "learning_rate": 0.00019999672891537982, - "loss": 46.0, - "step": 33684 - }, - { - "epoch": 2.57545348548273, - "grad_norm": 0.0014554500812664628, - "learning_rate": 0.00019999672872109844, - "loss": 46.0, - "step": 33685 - }, - { - "epoch": 2.5755299424661198, - "grad_norm": 0.0008898632368072867, - "learning_rate": 0.0001999967285268113, - "loss": 46.0, - "step": 33686 - }, - { - "epoch": 2.5756063994495095, - "grad_norm": 0.0008985575404949486, - "learning_rate": 0.0001999967283325184, - "loss": 46.0, - "step": 33687 - }, - { - "epoch": 2.5756828564328993, - "grad_norm": 0.0007783126784488559, - "learning_rate": 0.00019999672813821975, - "loss": 46.0, - "step": 33688 - }, - { - "epoch": 2.575759313416289, - "grad_norm": 0.00416223518550396, - "learning_rate": 0.0001999967279439153, - "loss": 46.0, - "step": 33689 - }, - { - "epoch": 2.575835770399679, - "grad_norm": 0.00104646070394665, - "learning_rate": 0.0001999967277496051, - "loss": 46.0, - "step": 33690 - }, - { - "epoch": 2.5759122273830686, - "grad_norm": 0.0002899757237173617, - "learning_rate": 0.0001999967275552891, - "loss": 46.0, - "step": 33691 - }, - { - "epoch": 2.5759886843664583, - "grad_norm": 0.0018683980451896787, - "learning_rate": 0.00019999672736096736, - "loss": 46.0, - "step": 33692 - }, - { - "epoch": 2.576065141349848, - "grad_norm": 0.003739645704627037, - "learning_rate": 0.0001999967271666398, - "loss": 46.0, - "step": 33693 - }, - { - "epoch": 2.576141598333238, - "grad_norm": 0.0026869659777730703, - "learning_rate": 0.00019999672697230655, - "loss": 46.0, - "step": 33694 - }, - { - "epoch": 2.5762180553166276, - "grad_norm": 0.0011040762765333056, - "learning_rate": 0.00019999672677796748, - "loss": 46.0, - "step": 33695 - }, - { - "epoch": 2.5762945123000174, - "grad_norm": 0.002665249863639474, - "learning_rate": 0.00019999672658362267, - "loss": 46.0, - "step": 33696 - }, - { - "epoch": 2.576370969283407, - "grad_norm": 0.0019124011741951108, - "learning_rate": 0.00019999672638927206, - "loss": 46.0, - "step": 33697 - }, - { - "epoch": 2.576447426266797, - "grad_norm": 0.00514562800526619, - "learning_rate": 0.0001999967261949157, - "loss": 46.0, - "step": 33698 - }, - { - "epoch": 2.576523883250186, - "grad_norm": 0.0018402902642264962, - "learning_rate": 0.00019999672600055357, - "loss": 46.0, - "step": 33699 - }, - { - "epoch": 2.576600340233576, - "grad_norm": 0.0008133978117257357, - "learning_rate": 0.00019999672580618566, - "loss": 46.0, - "step": 33700 - }, - { - "epoch": 2.5766767972169657, - "grad_norm": 0.0008695360156707466, - "learning_rate": 0.00019999672561181198, - "loss": 46.0, - "step": 33701 - }, - { - "epoch": 2.5767532542003555, - "grad_norm": 0.0003591017739381641, - "learning_rate": 0.00019999672541743256, - "loss": 46.0, - "step": 33702 - }, - { - "epoch": 2.5768297111837453, - "grad_norm": 0.0021886201575398445, - "learning_rate": 0.00019999672522304733, - "loss": 46.0, - "step": 33703 - }, - { - "epoch": 2.576906168167135, - "grad_norm": 0.0007825404172763228, - "learning_rate": 0.00019999672502865633, - "loss": 46.0, - "step": 33704 - }, - { - "epoch": 2.576982625150525, - "grad_norm": 0.0009387555182911456, - "learning_rate": 0.0001999967248342596, - "loss": 46.0, - "step": 33705 - }, - { - "epoch": 2.5770590821339145, - "grad_norm": 0.00335617084056139, - "learning_rate": 0.00019999672463985707, - "loss": 46.0, - "step": 33706 - }, - { - "epoch": 2.577135539117304, - "grad_norm": 0.0018673943122848868, - "learning_rate": 0.00019999672444544878, - "loss": 46.0, - "step": 33707 - }, - { - "epoch": 2.5772119961006936, - "grad_norm": 0.0021284364629536867, - "learning_rate": 0.00019999672425103472, - "loss": 46.0, - "step": 33708 - }, - { - "epoch": 2.5772884530840834, - "grad_norm": 0.0014881722163408995, - "learning_rate": 0.0001999967240566149, - "loss": 46.0, - "step": 33709 - }, - { - "epoch": 2.577364910067473, - "grad_norm": 0.0010011779377236962, - "learning_rate": 0.0001999967238621893, - "loss": 46.0, - "step": 33710 - }, - { - "epoch": 2.577441367050863, - "grad_norm": 0.0006519795279018581, - "learning_rate": 0.00019999672366775793, - "loss": 46.0, - "step": 33711 - }, - { - "epoch": 2.5775178240342527, - "grad_norm": 0.010698611848056316, - "learning_rate": 0.0001999967234733208, - "loss": 46.0, - "step": 33712 - }, - { - "epoch": 2.5775942810176424, - "grad_norm": 0.001863163779489696, - "learning_rate": 0.00019999672327887787, - "loss": 46.0, - "step": 33713 - }, - { - "epoch": 2.577670738001032, - "grad_norm": 0.0022500120103359222, - "learning_rate": 0.0001999967230844292, - "loss": 46.0, - "step": 33714 - }, - { - "epoch": 2.577747194984422, - "grad_norm": 0.0022092899307608604, - "learning_rate": 0.00019999672288997477, - "loss": 46.0, - "step": 33715 - }, - { - "epoch": 2.5778236519678117, - "grad_norm": 0.0008713945280760527, - "learning_rate": 0.00019999672269551457, - "loss": 46.0, - "step": 33716 - }, - { - "epoch": 2.5779001089512015, - "grad_norm": 0.0024856626987457275, - "learning_rate": 0.00019999672250104857, - "loss": 46.0, - "step": 33717 - }, - { - "epoch": 2.5779765659345912, - "grad_norm": 0.002778966911137104, - "learning_rate": 0.00019999672230657683, - "loss": 46.0, - "step": 33718 - }, - { - "epoch": 2.578053022917981, - "grad_norm": 0.0017632093513384461, - "learning_rate": 0.0001999967221120993, - "loss": 46.0, - "step": 33719 - }, - { - "epoch": 2.5781294799013708, - "grad_norm": 0.001748785492964089, - "learning_rate": 0.00019999672191761602, - "loss": 46.0, - "step": 33720 - }, - { - "epoch": 2.57820593688476, - "grad_norm": 0.0016892984276637435, - "learning_rate": 0.00019999672172312695, - "loss": 46.0, - "step": 33721 - }, - { - "epoch": 2.57828239386815, - "grad_norm": 0.0011058708187192678, - "learning_rate": 0.00019999672152863212, - "loss": 46.0, - "step": 33722 - }, - { - "epoch": 2.5783588508515396, - "grad_norm": 0.00383828766644001, - "learning_rate": 0.00019999672133413153, - "loss": 46.0, - "step": 33723 - }, - { - "epoch": 2.5784353078349294, - "grad_norm": 0.0030533461831510067, - "learning_rate": 0.00019999672113962515, - "loss": 46.0, - "step": 33724 - }, - { - "epoch": 2.578511764818319, - "grad_norm": 0.0014342984650284052, - "learning_rate": 0.00019999672094511302, - "loss": 46.0, - "step": 33725 - }, - { - "epoch": 2.578588221801709, - "grad_norm": 0.001511389040388167, - "learning_rate": 0.0001999967207505951, - "loss": 46.0, - "step": 33726 - }, - { - "epoch": 2.5786646787850986, - "grad_norm": 0.001929558697156608, - "learning_rate": 0.00019999672055607146, - "loss": 46.0, - "step": 33727 - }, - { - "epoch": 2.5787411357684884, - "grad_norm": 0.0009858261328190565, - "learning_rate": 0.000199996720361542, - "loss": 46.0, - "step": 33728 - }, - { - "epoch": 2.5788175927518777, - "grad_norm": 0.001606738893315196, - "learning_rate": 0.00019999672016700679, - "loss": 46.0, - "step": 33729 - }, - { - "epoch": 2.5788940497352675, - "grad_norm": 0.0007273387163877487, - "learning_rate": 0.00019999671997246582, - "loss": 46.0, - "step": 33730 - }, - { - "epoch": 2.5789705067186572, - "grad_norm": 0.0014322358183562756, - "learning_rate": 0.00019999671977791904, - "loss": 46.0, - "step": 33731 - }, - { - "epoch": 2.579046963702047, - "grad_norm": 0.0007518419879488647, - "learning_rate": 0.00019999671958336653, - "loss": 46.0, - "step": 33732 - }, - { - "epoch": 2.5791234206854368, - "grad_norm": 0.0021305095870047808, - "learning_rate": 0.00019999671938880824, - "loss": 46.0, - "step": 33733 - }, - { - "epoch": 2.5791998776688265, - "grad_norm": 0.0024050401989370584, - "learning_rate": 0.0001999967191942442, - "loss": 46.0, - "step": 33734 - }, - { - "epoch": 2.5792763346522163, - "grad_norm": 0.0021108253858983517, - "learning_rate": 0.00019999671899967436, - "loss": 46.0, - "step": 33735 - }, - { - "epoch": 2.579352791635606, - "grad_norm": 0.00048105738824233413, - "learning_rate": 0.00019999671880509875, - "loss": 46.0, - "step": 33736 - }, - { - "epoch": 2.579429248618996, - "grad_norm": 0.0007690681959502399, - "learning_rate": 0.0001999967186105174, - "loss": 46.0, - "step": 33737 - }, - { - "epoch": 2.5795057056023856, - "grad_norm": 0.00043410947546362877, - "learning_rate": 0.00019999671841593023, - "loss": 46.0, - "step": 33738 - }, - { - "epoch": 2.5795821625857753, - "grad_norm": 0.0018299379153177142, - "learning_rate": 0.00019999671822133733, - "loss": 46.0, - "step": 33739 - }, - { - "epoch": 2.579658619569165, - "grad_norm": 0.0031106076203286648, - "learning_rate": 0.00019999671802673868, - "loss": 46.0, - "step": 33740 - }, - { - "epoch": 2.579735076552555, - "grad_norm": 0.00333911064080894, - "learning_rate": 0.0001999967178321342, - "loss": 46.0, - "step": 33741 - }, - { - "epoch": 2.5798115335359446, - "grad_norm": 0.0020811359863728285, - "learning_rate": 0.000199996717637524, - "loss": 46.0, - "step": 33742 - }, - { - "epoch": 2.579887990519334, - "grad_norm": 0.0011897641234099865, - "learning_rate": 0.00019999671744290803, - "loss": 46.0, - "step": 33743 - }, - { - "epoch": 2.5799644475027237, - "grad_norm": 0.002339713741093874, - "learning_rate": 0.00019999671724828626, - "loss": 46.0, - "step": 33744 - }, - { - "epoch": 2.5800409044861135, - "grad_norm": 0.00048502953723073006, - "learning_rate": 0.00019999671705365875, - "loss": 46.0, - "step": 33745 - }, - { - "epoch": 2.580117361469503, - "grad_norm": 0.003254833398386836, - "learning_rate": 0.00019999671685902546, - "loss": 46.0, - "step": 33746 - }, - { - "epoch": 2.580193818452893, - "grad_norm": 0.0021795781794935465, - "learning_rate": 0.0001999967166643864, - "loss": 46.0, - "step": 33747 - }, - { - "epoch": 2.5802702754362827, - "grad_norm": 0.004946145229041576, - "learning_rate": 0.00019999671646974155, - "loss": 46.0, - "step": 33748 - }, - { - "epoch": 2.5803467324196725, - "grad_norm": 0.0015211660647764802, - "learning_rate": 0.00019999671627509094, - "loss": 46.0, - "step": 33749 - }, - { - "epoch": 2.5804231894030623, - "grad_norm": 0.0014608881901949644, - "learning_rate": 0.0001999967160804346, - "loss": 46.0, - "step": 33750 - }, - { - "epoch": 2.5804996463864516, - "grad_norm": 0.0010390982497483492, - "learning_rate": 0.00019999671588577246, - "loss": 46.0, - "step": 33751 - }, - { - "epoch": 2.5805761033698413, - "grad_norm": 0.0014662137255072594, - "learning_rate": 0.00019999671569110455, - "loss": 46.0, - "step": 33752 - }, - { - "epoch": 2.580652560353231, - "grad_norm": 0.0027485128957778215, - "learning_rate": 0.00019999671549643088, - "loss": 46.0, - "step": 33753 - }, - { - "epoch": 2.580729017336621, - "grad_norm": 0.002101237652823329, - "learning_rate": 0.00019999671530175143, - "loss": 46.0, - "step": 33754 - }, - { - "epoch": 2.5808054743200106, - "grad_norm": 0.0024111801758408546, - "learning_rate": 0.0001999967151070662, - "loss": 46.0, - "step": 33755 - }, - { - "epoch": 2.5808819313034004, - "grad_norm": 0.0012782351113855839, - "learning_rate": 0.00019999671491237523, - "loss": 46.0, - "step": 33756 - }, - { - "epoch": 2.58095838828679, - "grad_norm": 0.0021875794045627117, - "learning_rate": 0.0001999967147176785, - "loss": 46.0, - "step": 33757 - }, - { - "epoch": 2.58103484527018, - "grad_norm": 0.001406670780852437, - "learning_rate": 0.00019999671452297597, - "loss": 46.0, - "step": 33758 - }, - { - "epoch": 2.5811113022535697, - "grad_norm": 0.0009112459956668317, - "learning_rate": 0.00019999671432826768, - "loss": 46.0, - "step": 33759 - }, - { - "epoch": 2.5811877592369594, - "grad_norm": 0.0012908895732834935, - "learning_rate": 0.00019999671413355362, - "loss": 46.0, - "step": 33760 - }, - { - "epoch": 2.581264216220349, - "grad_norm": 0.0015117382863536477, - "learning_rate": 0.00019999671393883378, - "loss": 46.0, - "step": 33761 - }, - { - "epoch": 2.581340673203739, - "grad_norm": 0.0008893916965462267, - "learning_rate": 0.0001999967137441082, - "loss": 46.0, - "step": 33762 - }, - { - "epoch": 2.5814171301871287, - "grad_norm": 0.0013658194802701473, - "learning_rate": 0.0001999967135493768, - "loss": 46.0, - "step": 33763 - }, - { - "epoch": 2.581493587170518, - "grad_norm": 0.0013998374342918396, - "learning_rate": 0.00019999671335463968, - "loss": 46.0, - "step": 33764 - }, - { - "epoch": 2.581570044153908, - "grad_norm": 0.0026385600212961435, - "learning_rate": 0.00019999671315989675, - "loss": 46.0, - "step": 33765 - }, - { - "epoch": 2.5816465011372975, - "grad_norm": 0.0012058269931003451, - "learning_rate": 0.0001999967129651481, - "loss": 46.0, - "step": 33766 - }, - { - "epoch": 2.5817229581206873, - "grad_norm": 0.000891146541107446, - "learning_rate": 0.00019999671277039365, - "loss": 46.0, - "step": 33767 - }, - { - "epoch": 2.581799415104077, - "grad_norm": 0.002193536376580596, - "learning_rate": 0.00019999671257563343, - "loss": 46.0, - "step": 33768 - }, - { - "epoch": 2.581875872087467, - "grad_norm": 0.0008351202704943717, - "learning_rate": 0.00019999671238086746, - "loss": 46.0, - "step": 33769 - }, - { - "epoch": 2.5819523290708566, - "grad_norm": 0.0015749818412587047, - "learning_rate": 0.00019999671218609572, - "loss": 46.0, - "step": 33770 - }, - { - "epoch": 2.5820287860542463, - "grad_norm": 0.002329234965145588, - "learning_rate": 0.00019999671199131817, - "loss": 46.0, - "step": 33771 - }, - { - "epoch": 2.582105243037636, - "grad_norm": 0.003988371230661869, - "learning_rate": 0.0001999967117965349, - "loss": 46.0, - "step": 33772 - }, - { - "epoch": 2.5821817000210254, - "grad_norm": 0.0020972026977688074, - "learning_rate": 0.00019999671160174582, - "loss": 46.0, - "step": 33773 - }, - { - "epoch": 2.582258157004415, - "grad_norm": 0.0029230595100671053, - "learning_rate": 0.00019999671140695098, - "loss": 46.0, - "step": 33774 - }, - { - "epoch": 2.582334613987805, - "grad_norm": 0.0022066854871809483, - "learning_rate": 0.0001999967112121504, - "loss": 46.0, - "step": 33775 - }, - { - "epoch": 2.5824110709711947, - "grad_norm": 0.0011470660101622343, - "learning_rate": 0.00019999671101734404, - "loss": 46.0, - "step": 33776 - }, - { - "epoch": 2.5824875279545845, - "grad_norm": 0.002847378607839346, - "learning_rate": 0.00019999671082253189, - "loss": 46.0, - "step": 33777 - }, - { - "epoch": 2.5825639849379742, - "grad_norm": 0.00179480726365, - "learning_rate": 0.00019999671062771398, - "loss": 46.0, - "step": 33778 - }, - { - "epoch": 2.582640441921364, - "grad_norm": 0.00517666433006525, - "learning_rate": 0.0001999967104328903, - "loss": 46.0, - "step": 33779 - }, - { - "epoch": 2.5827168989047538, - "grad_norm": 0.0033719746861606836, - "learning_rate": 0.00019999671023806086, - "loss": 46.0, - "step": 33780 - }, - { - "epoch": 2.5827933558881435, - "grad_norm": 0.000728595710825175, - "learning_rate": 0.00019999671004322563, - "loss": 46.0, - "step": 33781 - }, - { - "epoch": 2.5828698128715333, - "grad_norm": 0.0019043530337512493, - "learning_rate": 0.00019999670984838466, - "loss": 46.0, - "step": 33782 - }, - { - "epoch": 2.582946269854923, - "grad_norm": 0.001992369070649147, - "learning_rate": 0.00019999670965353792, - "loss": 46.0, - "step": 33783 - }, - { - "epoch": 2.583022726838313, - "grad_norm": 0.002411763882264495, - "learning_rate": 0.00019999670945868538, - "loss": 46.0, - "step": 33784 - }, - { - "epoch": 2.5830991838217026, - "grad_norm": 0.0005923790158703923, - "learning_rate": 0.0001999967092638271, - "loss": 46.0, - "step": 33785 - }, - { - "epoch": 2.583175640805092, - "grad_norm": 0.0019952126313000917, - "learning_rate": 0.00019999670906896305, - "loss": 46.0, - "step": 33786 - }, - { - "epoch": 2.5832520977884816, - "grad_norm": 0.002692426787689328, - "learning_rate": 0.0001999967088740932, - "loss": 46.0, - "step": 33787 - }, - { - "epoch": 2.5833285547718714, - "grad_norm": 0.0026989937759935856, - "learning_rate": 0.0001999967086792176, - "loss": 46.0, - "step": 33788 - }, - { - "epoch": 2.583405011755261, - "grad_norm": 0.006905751768499613, - "learning_rate": 0.00019999670848433622, - "loss": 46.0, - "step": 33789 - }, - { - "epoch": 2.583481468738651, - "grad_norm": 0.0012205137172713876, - "learning_rate": 0.00019999670828944907, - "loss": 46.0, - "step": 33790 - }, - { - "epoch": 2.5835579257220407, - "grad_norm": 0.0014203093014657497, - "learning_rate": 0.00019999670809455616, - "loss": 46.0, - "step": 33791 - }, - { - "epoch": 2.5836343827054304, - "grad_norm": 0.0013054253067821264, - "learning_rate": 0.00019999670789965749, - "loss": 46.0, - "step": 33792 - }, - { - "epoch": 2.58371083968882, - "grad_norm": 0.0023591769859194756, - "learning_rate": 0.00019999670770475304, - "loss": 46.0, - "step": 33793 - }, - { - "epoch": 2.5837872966722095, - "grad_norm": 0.0023021188098937273, - "learning_rate": 0.00019999670750984284, - "loss": 46.0, - "step": 33794 - }, - { - "epoch": 2.5838637536555993, - "grad_norm": 0.0019056061282753944, - "learning_rate": 0.00019999670731492684, - "loss": 46.0, - "step": 33795 - }, - { - "epoch": 2.583940210638989, - "grad_norm": 0.0023336634039878845, - "learning_rate": 0.0001999967071200051, - "loss": 46.0, - "step": 33796 - }, - { - "epoch": 2.584016667622379, - "grad_norm": 0.0038977316580712795, - "learning_rate": 0.00019999670692507756, - "loss": 46.0, - "step": 33797 - }, - { - "epoch": 2.5840931246057686, - "grad_norm": 0.0013777086278423667, - "learning_rate": 0.0001999967067301443, - "loss": 46.0, - "step": 33798 - }, - { - "epoch": 2.5841695815891583, - "grad_norm": 0.002245998941361904, - "learning_rate": 0.0001999967065352052, - "loss": 46.0, - "step": 33799 - }, - { - "epoch": 2.584246038572548, - "grad_norm": 0.007543109357357025, - "learning_rate": 0.0001999967063402604, - "loss": 46.0, - "step": 33800 - }, - { - "epoch": 2.584322495555938, - "grad_norm": 0.001238432014361024, - "learning_rate": 0.0001999967061453098, - "loss": 46.0, - "step": 33801 - }, - { - "epoch": 2.5843989525393276, - "grad_norm": 0.0006391782080754638, - "learning_rate": 0.00019999670595035343, - "loss": 46.0, - "step": 33802 - }, - { - "epoch": 2.5844754095227174, - "grad_norm": 0.0020712323021143675, - "learning_rate": 0.00019999670575539128, - "loss": 46.0, - "step": 33803 - }, - { - "epoch": 2.584551866506107, - "grad_norm": 0.00156506453640759, - "learning_rate": 0.00019999670556042335, - "loss": 46.0, - "step": 33804 - }, - { - "epoch": 2.584628323489497, - "grad_norm": 0.0009177823667414486, - "learning_rate": 0.0001999967053654497, - "loss": 46.0, - "step": 33805 - }, - { - "epoch": 2.5847047804728867, - "grad_norm": 0.0037315429653972387, - "learning_rate": 0.00019999670517047028, - "loss": 46.0, - "step": 33806 - }, - { - "epoch": 2.5847812374562764, - "grad_norm": 0.0017961516277864575, - "learning_rate": 0.00019999670497548503, - "loss": 46.0, - "step": 33807 - }, - { - "epoch": 2.5848576944396657, - "grad_norm": 0.0024719026405364275, - "learning_rate": 0.00019999670478049403, - "loss": 46.0, - "step": 33808 - }, - { - "epoch": 2.5849341514230555, - "grad_norm": 0.0015128189697861671, - "learning_rate": 0.00019999670458549731, - "loss": 46.0, - "step": 33809 - }, - { - "epoch": 2.5850106084064453, - "grad_norm": 0.001197112025693059, - "learning_rate": 0.00019999670439049477, - "loss": 46.0, - "step": 33810 - }, - { - "epoch": 2.585087065389835, - "grad_norm": 0.001631994266062975, - "learning_rate": 0.00019999670419548648, - "loss": 46.0, - "step": 33811 - }, - { - "epoch": 2.5851635223732248, - "grad_norm": 0.0007233674987219274, - "learning_rate": 0.00019999670400047242, - "loss": 46.0, - "step": 33812 - }, - { - "epoch": 2.5852399793566145, - "grad_norm": 0.000873743963893503, - "learning_rate": 0.00019999670380545256, - "loss": 46.0, - "step": 33813 - }, - { - "epoch": 2.5853164363400043, - "grad_norm": 0.0009753777994774282, - "learning_rate": 0.00019999670361042698, - "loss": 46.0, - "step": 33814 - }, - { - "epoch": 2.585392893323394, - "grad_norm": 0.0008296352461911738, - "learning_rate": 0.0001999967034153956, - "loss": 46.0, - "step": 33815 - }, - { - "epoch": 2.5854693503067834, - "grad_norm": 0.002044152468442917, - "learning_rate": 0.00019999670322035846, - "loss": 46.0, - "step": 33816 - }, - { - "epoch": 2.585545807290173, - "grad_norm": 0.001382470945827663, - "learning_rate": 0.00019999670302531556, - "loss": 46.0, - "step": 33817 - }, - { - "epoch": 2.585622264273563, - "grad_norm": 0.0017183058662340045, - "learning_rate": 0.00019999670283026691, - "loss": 46.0, - "step": 33818 - }, - { - "epoch": 2.5856987212569527, - "grad_norm": 0.0010268158512189984, - "learning_rate": 0.00019999670263521244, - "loss": 46.0, - "step": 33819 - }, - { - "epoch": 2.5857751782403424, - "grad_norm": 0.002678610384464264, - "learning_rate": 0.00019999670244015222, - "loss": 46.0, - "step": 33820 - }, - { - "epoch": 2.585851635223732, - "grad_norm": 0.000850468291901052, - "learning_rate": 0.00019999670224508625, - "loss": 46.0, - "step": 33821 - }, - { - "epoch": 2.585928092207122, - "grad_norm": 0.001257752301171422, - "learning_rate": 0.00019999670205001448, - "loss": 46.0, - "step": 33822 - }, - { - "epoch": 2.5860045491905117, - "grad_norm": 0.0014282895717769861, - "learning_rate": 0.00019999670185493696, - "loss": 46.0, - "step": 33823 - }, - { - "epoch": 2.5860810061739015, - "grad_norm": 0.0011255107820034027, - "learning_rate": 0.00019999670165985367, - "loss": 46.0, - "step": 33824 - }, - { - "epoch": 2.5861574631572912, - "grad_norm": 0.0014628826174885035, - "learning_rate": 0.0001999967014647646, - "loss": 46.0, - "step": 33825 - }, - { - "epoch": 2.586233920140681, - "grad_norm": 0.0013565486297011375, - "learning_rate": 0.00019999670126966978, - "loss": 46.0, - "step": 33826 - }, - { - "epoch": 2.5863103771240707, - "grad_norm": 0.001358826644718647, - "learning_rate": 0.0001999967010745692, - "loss": 46.0, - "step": 33827 - }, - { - "epoch": 2.5863868341074605, - "grad_norm": 0.0026653208769857883, - "learning_rate": 0.0001999967008794628, - "loss": 46.0, - "step": 33828 - }, - { - "epoch": 2.5864632910908503, - "grad_norm": 0.0028843041509389877, - "learning_rate": 0.00019999670068435066, - "loss": 46.0, - "step": 33829 - }, - { - "epoch": 2.5865397480742396, - "grad_norm": 0.0011480532120913267, - "learning_rate": 0.00019999670048923273, - "loss": 46.0, - "step": 33830 - }, - { - "epoch": 2.5866162050576293, - "grad_norm": 0.0025333103258162737, - "learning_rate": 0.00019999670029410908, - "loss": 46.0, - "step": 33831 - }, - { - "epoch": 2.586692662041019, - "grad_norm": 0.001028066617436707, - "learning_rate": 0.0001999967000989796, - "loss": 46.0, - "step": 33832 - }, - { - "epoch": 2.586769119024409, - "grad_norm": 0.0008120053098537028, - "learning_rate": 0.0001999966999038444, - "loss": 46.0, - "step": 33833 - }, - { - "epoch": 2.5868455760077986, - "grad_norm": 0.005214180797338486, - "learning_rate": 0.0001999966997087034, - "loss": 46.0, - "step": 33834 - }, - { - "epoch": 2.5869220329911884, - "grad_norm": 0.0017119479598477483, - "learning_rate": 0.00019999669951355667, - "loss": 46.0, - "step": 33835 - }, - { - "epoch": 2.586998489974578, - "grad_norm": 0.0007648076862096786, - "learning_rate": 0.00019999669931840416, - "loss": 46.0, - "step": 33836 - }, - { - "epoch": 2.587074946957968, - "grad_norm": 0.0007501570507884026, - "learning_rate": 0.00019999669912324587, - "loss": 46.0, - "step": 33837 - }, - { - "epoch": 2.5871514039413572, - "grad_norm": 0.0037393071688711643, - "learning_rate": 0.0001999966989280818, - "loss": 46.0, - "step": 33838 - }, - { - "epoch": 2.587227860924747, - "grad_norm": 0.004476012662053108, - "learning_rate": 0.00019999669873291197, - "loss": 46.0, - "step": 33839 - }, - { - "epoch": 2.5873043179081368, - "grad_norm": 0.0015364629216492176, - "learning_rate": 0.00019999669853773634, - "loss": 46.0, - "step": 33840 - }, - { - "epoch": 2.5873807748915265, - "grad_norm": 0.0007751963566988707, - "learning_rate": 0.00019999669834255495, - "loss": 46.0, - "step": 33841 - }, - { - "epoch": 2.5874572318749163, - "grad_norm": 0.0038668352644890547, - "learning_rate": 0.00019999669814736783, - "loss": 46.0, - "step": 33842 - }, - { - "epoch": 2.587533688858306, - "grad_norm": 0.0022029713727533817, - "learning_rate": 0.00019999669795217492, - "loss": 46.0, - "step": 33843 - }, - { - "epoch": 2.587610145841696, - "grad_norm": 0.0037909240927547216, - "learning_rate": 0.00019999669775697625, - "loss": 46.0, - "step": 33844 - }, - { - "epoch": 2.5876866028250856, - "grad_norm": 0.002746768994256854, - "learning_rate": 0.0001999966975617718, - "loss": 46.0, - "step": 33845 - }, - { - "epoch": 2.5877630598084753, - "grad_norm": 0.0005620379815809429, - "learning_rate": 0.00019999669736656158, - "loss": 46.0, - "step": 33846 - }, - { - "epoch": 2.587839516791865, - "grad_norm": 0.00043551131966523826, - "learning_rate": 0.00019999669717134558, - "loss": 46.0, - "step": 33847 - }, - { - "epoch": 2.587915973775255, - "grad_norm": 0.0010091025615110993, - "learning_rate": 0.00019999669697612384, - "loss": 46.0, - "step": 33848 - }, - { - "epoch": 2.5879924307586446, - "grad_norm": 0.0017041726969182491, - "learning_rate": 0.00019999669678089633, - "loss": 46.0, - "step": 33849 - }, - { - "epoch": 2.5880688877420344, - "grad_norm": 0.001076510176062584, - "learning_rate": 0.000199996696585663, - "loss": 46.0, - "step": 33850 - }, - { - "epoch": 2.588145344725424, - "grad_norm": 0.0009676502668298781, - "learning_rate": 0.00019999669639042395, - "loss": 46.0, - "step": 33851 - }, - { - "epoch": 2.5882218017088134, - "grad_norm": 0.0008860592497512698, - "learning_rate": 0.00019999669619517914, - "loss": 46.0, - "step": 33852 - }, - { - "epoch": 2.588298258692203, - "grad_norm": 0.0005833989707753062, - "learning_rate": 0.0001999966959999285, - "loss": 46.0, - "step": 33853 - }, - { - "epoch": 2.588374715675593, - "grad_norm": 0.001482087536714971, - "learning_rate": 0.00019999669580467215, - "loss": 46.0, - "step": 33854 - }, - { - "epoch": 2.5884511726589827, - "grad_norm": 0.0020538996905088425, - "learning_rate": 0.00019999669560941, - "loss": 46.0, - "step": 33855 - }, - { - "epoch": 2.5885276296423725, - "grad_norm": 0.0028575328178703785, - "learning_rate": 0.0001999966954141421, - "loss": 46.0, - "step": 33856 - }, - { - "epoch": 2.5886040866257622, - "grad_norm": 0.0011522978311404586, - "learning_rate": 0.00019999669521886842, - "loss": 46.0, - "step": 33857 - }, - { - "epoch": 2.588680543609152, - "grad_norm": 0.0020093759521842003, - "learning_rate": 0.00019999669502358898, - "loss": 46.0, - "step": 33858 - }, - { - "epoch": 2.5887570005925418, - "grad_norm": 0.0018720257794484496, - "learning_rate": 0.00019999669482830375, - "loss": 46.0, - "step": 33859 - }, - { - "epoch": 2.588833457575931, - "grad_norm": 0.0005863170954398811, - "learning_rate": 0.00019999669463301279, - "loss": 46.0, - "step": 33860 - }, - { - "epoch": 2.588909914559321, - "grad_norm": 0.0005447547882795334, - "learning_rate": 0.00019999669443771602, - "loss": 46.0, - "step": 33861 - }, - { - "epoch": 2.5889863715427106, - "grad_norm": 0.0011578646954149008, - "learning_rate": 0.0001999966942424135, - "loss": 46.0, - "step": 33862 - }, - { - "epoch": 2.5890628285261004, - "grad_norm": 0.0027566482312977314, - "learning_rate": 0.0001999966940471052, - "loss": 46.0, - "step": 33863 - }, - { - "epoch": 2.58913928550949, - "grad_norm": 0.0008959327824413776, - "learning_rate": 0.00019999669385179113, - "loss": 46.0, - "step": 33864 - }, - { - "epoch": 2.58921574249288, - "grad_norm": 0.0005726159433834255, - "learning_rate": 0.0001999966936564713, - "loss": 46.0, - "step": 33865 - }, - { - "epoch": 2.5892921994762697, - "grad_norm": 0.0006823652074672282, - "learning_rate": 0.00019999669346114569, - "loss": 46.0, - "step": 33866 - }, - { - "epoch": 2.5893686564596594, - "grad_norm": 0.0037108648102730513, - "learning_rate": 0.0001999966932658143, - "loss": 46.0, - "step": 33867 - }, - { - "epoch": 2.589445113443049, - "grad_norm": 0.003020165953785181, - "learning_rate": 0.00019999669307047718, - "loss": 46.0, - "step": 33868 - }, - { - "epoch": 2.589521570426439, - "grad_norm": 0.005767005030065775, - "learning_rate": 0.00019999669287513428, - "loss": 46.0, - "step": 33869 - }, - { - "epoch": 2.5895980274098287, - "grad_norm": 0.0068554868921637535, - "learning_rate": 0.0001999966926797856, - "loss": 46.0, - "step": 33870 - }, - { - "epoch": 2.5896744843932185, - "grad_norm": 0.000507902936078608, - "learning_rate": 0.00019999669248443113, - "loss": 46.0, - "step": 33871 - }, - { - "epoch": 2.589750941376608, - "grad_norm": 0.000857727078255266, - "learning_rate": 0.00019999669228907093, - "loss": 46.0, - "step": 33872 - }, - { - "epoch": 2.589827398359998, - "grad_norm": 0.0010478412732481956, - "learning_rate": 0.00019999669209370494, - "loss": 46.0, - "step": 33873 - }, - { - "epoch": 2.5899038553433873, - "grad_norm": 0.0006254063337109983, - "learning_rate": 0.0001999966918983332, - "loss": 46.0, - "step": 33874 - }, - { - "epoch": 2.589980312326777, - "grad_norm": 0.0029933182522654533, - "learning_rate": 0.00019999669170295566, - "loss": 46.0, - "step": 33875 - }, - { - "epoch": 2.590056769310167, - "grad_norm": 0.0027603567577898502, - "learning_rate": 0.00019999669150757234, - "loss": 46.0, - "step": 33876 - }, - { - "epoch": 2.5901332262935566, - "grad_norm": 0.004642440937459469, - "learning_rate": 0.0001999966913121833, - "loss": 46.0, - "step": 33877 - }, - { - "epoch": 2.5902096832769463, - "grad_norm": 0.000852566328831017, - "learning_rate": 0.00019999669111678845, - "loss": 46.0, - "step": 33878 - }, - { - "epoch": 2.590286140260336, - "grad_norm": 0.002421814249828458, - "learning_rate": 0.00019999669092138787, - "loss": 46.0, - "step": 33879 - }, - { - "epoch": 2.590362597243726, - "grad_norm": 0.0021579849999397993, - "learning_rate": 0.00019999669072598148, - "loss": 46.0, - "step": 33880 - }, - { - "epoch": 2.5904390542271156, - "grad_norm": 0.001946039847098291, - "learning_rate": 0.00019999669053056933, - "loss": 46.0, - "step": 33881 - }, - { - "epoch": 2.590515511210505, - "grad_norm": 0.0004008824471384287, - "learning_rate": 0.00019999669033515143, - "loss": 46.0, - "step": 33882 - }, - { - "epoch": 2.5905919681938947, - "grad_norm": 0.0016343913739547133, - "learning_rate": 0.00019999669013972776, - "loss": 46.0, - "step": 33883 - }, - { - "epoch": 2.5906684251772845, - "grad_norm": 0.0013753061648458242, - "learning_rate": 0.0001999966899442983, - "loss": 46.0, - "step": 33884 - }, - { - "epoch": 2.5907448821606742, - "grad_norm": 0.0008443729602731764, - "learning_rate": 0.0001999966897488631, - "loss": 46.0, - "step": 33885 - }, - { - "epoch": 2.590821339144064, - "grad_norm": 0.0020147201139479876, - "learning_rate": 0.00019999668955342207, - "loss": 46.0, - "step": 33886 - }, - { - "epoch": 2.5908977961274537, - "grad_norm": 0.001294368994422257, - "learning_rate": 0.00019999668935797533, - "loss": 46.0, - "step": 33887 - }, - { - "epoch": 2.5909742531108435, - "grad_norm": 0.000877061509527266, - "learning_rate": 0.0001999966891625228, - "loss": 46.0, - "step": 33888 - }, - { - "epoch": 2.5910507100942333, - "grad_norm": 0.0011326199164614081, - "learning_rate": 0.00019999668896706453, - "loss": 46.0, - "step": 33889 - }, - { - "epoch": 2.591127167077623, - "grad_norm": 0.0012657109182327986, - "learning_rate": 0.00019999668877160044, - "loss": 46.0, - "step": 33890 - }, - { - "epoch": 2.591203624061013, - "grad_norm": 0.0007595903007313609, - "learning_rate": 0.0001999966885761306, - "loss": 46.0, - "step": 33891 - }, - { - "epoch": 2.5912800810444026, - "grad_norm": 0.0013786893105134368, - "learning_rate": 0.000199996688380655, - "loss": 46.0, - "step": 33892 - }, - { - "epoch": 2.5913565380277923, - "grad_norm": 0.0011777592590078712, - "learning_rate": 0.00019999668818517364, - "loss": 46.0, - "step": 33893 - }, - { - "epoch": 2.591432995011182, - "grad_norm": 0.0014845497207716107, - "learning_rate": 0.0001999966879896865, - "loss": 46.0, - "step": 33894 - }, - { - "epoch": 2.5915094519945714, - "grad_norm": 0.001108934753574431, - "learning_rate": 0.0001999966877941936, - "loss": 46.0, - "step": 33895 - }, - { - "epoch": 2.591585908977961, - "grad_norm": 0.0019122897647321224, - "learning_rate": 0.0001999966875986949, - "loss": 46.0, - "step": 33896 - }, - { - "epoch": 2.591662365961351, - "grad_norm": 0.0029160024132579565, - "learning_rate": 0.00019999668740319044, - "loss": 46.0, - "step": 33897 - }, - { - "epoch": 2.5917388229447407, - "grad_norm": 0.003632324282079935, - "learning_rate": 0.00019999668720768022, - "loss": 46.0, - "step": 33898 - }, - { - "epoch": 2.5918152799281304, - "grad_norm": 0.003607020480558276, - "learning_rate": 0.00019999668701216425, - "loss": 46.0, - "step": 33899 - }, - { - "epoch": 2.59189173691152, - "grad_norm": 0.00321124866604805, - "learning_rate": 0.0001999966868166425, - "loss": 46.0, - "step": 33900 - }, - { - "epoch": 2.59196819389491, - "grad_norm": 0.0006294617196545005, - "learning_rate": 0.00019999668662111497, - "loss": 46.0, - "step": 33901 - }, - { - "epoch": 2.5920446508782997, - "grad_norm": 0.0020169615745544434, - "learning_rate": 0.00019999668642558166, - "loss": 46.0, - "step": 33902 - }, - { - "epoch": 2.5921211078616895, - "grad_norm": 0.001106072450056672, - "learning_rate": 0.0001999966862300426, - "loss": 46.0, - "step": 33903 - }, - { - "epoch": 2.592197564845079, - "grad_norm": 0.0036720754578709602, - "learning_rate": 0.00019999668603449777, - "loss": 46.0, - "step": 33904 - }, - { - "epoch": 2.5922740218284686, - "grad_norm": 0.0009459693101234734, - "learning_rate": 0.00019999668583894719, - "loss": 46.0, - "step": 33905 - }, - { - "epoch": 2.5923504788118583, - "grad_norm": 0.0033843584824353456, - "learning_rate": 0.0001999966856433908, - "loss": 46.0, - "step": 33906 - }, - { - "epoch": 2.592426935795248, - "grad_norm": 0.0013822912005707622, - "learning_rate": 0.00019999668544782865, - "loss": 46.0, - "step": 33907 - }, - { - "epoch": 2.592503392778638, - "grad_norm": 0.0008619792060926557, - "learning_rate": 0.00019999668525226075, - "loss": 46.0, - "step": 33908 - }, - { - "epoch": 2.5925798497620276, - "grad_norm": 0.0005219600861892104, - "learning_rate": 0.00019999668505668705, - "loss": 46.0, - "step": 33909 - }, - { - "epoch": 2.5926563067454174, - "grad_norm": 0.0026846961118280888, - "learning_rate": 0.0001999966848611076, - "loss": 46.0, - "step": 33910 - }, - { - "epoch": 2.592732763728807, - "grad_norm": 0.0019448735984042287, - "learning_rate": 0.00019999668466552238, - "loss": 46.0, - "step": 33911 - }, - { - "epoch": 2.592809220712197, - "grad_norm": 0.005883508827537298, - "learning_rate": 0.0001999966844699314, - "loss": 46.0, - "step": 33912 - }, - { - "epoch": 2.5928856776955866, - "grad_norm": 0.0012319523375481367, - "learning_rate": 0.00019999668427433465, - "loss": 46.0, - "step": 33913 - }, - { - "epoch": 2.5929621346789764, - "grad_norm": 0.0009735599160194397, - "learning_rate": 0.0001999966840787321, - "loss": 46.0, - "step": 33914 - }, - { - "epoch": 2.593038591662366, - "grad_norm": 0.0007737409323453903, - "learning_rate": 0.0001999966838831238, - "loss": 46.0, - "step": 33915 - }, - { - "epoch": 2.593115048645756, - "grad_norm": 0.001422416535206139, - "learning_rate": 0.00019999668368750974, - "loss": 46.0, - "step": 33916 - }, - { - "epoch": 2.5931915056291452, - "grad_norm": 0.0009661955991759896, - "learning_rate": 0.0001999966834918899, - "loss": 46.0, - "step": 33917 - }, - { - "epoch": 2.593267962612535, - "grad_norm": 0.0022469093091785908, - "learning_rate": 0.0001999966832962643, - "loss": 46.0, - "step": 33918 - }, - { - "epoch": 2.5933444195959248, - "grad_norm": 0.0009600328048691154, - "learning_rate": 0.00019999668310063294, - "loss": 46.0, - "step": 33919 - }, - { - "epoch": 2.5934208765793145, - "grad_norm": 0.0019555389881134033, - "learning_rate": 0.0001999966829049958, - "loss": 46.0, - "step": 33920 - }, - { - "epoch": 2.5934973335627043, - "grad_norm": 0.0006724236882291734, - "learning_rate": 0.0001999966827093529, - "loss": 46.0, - "step": 33921 - }, - { - "epoch": 2.593573790546094, - "grad_norm": 0.0019694152288138866, - "learning_rate": 0.0001999966825137042, - "loss": 46.0, - "step": 33922 - }, - { - "epoch": 2.593650247529484, - "grad_norm": 0.002382457023486495, - "learning_rate": 0.00019999668231804974, - "loss": 46.0, - "step": 33923 - }, - { - "epoch": 2.5937267045128736, - "grad_norm": 0.0046404823660850525, - "learning_rate": 0.00019999668212238953, - "loss": 46.0, - "step": 33924 - }, - { - "epoch": 2.593803161496263, - "grad_norm": 0.001402257476001978, - "learning_rate": 0.00019999668192672353, - "loss": 46.0, - "step": 33925 - }, - { - "epoch": 2.5938796184796526, - "grad_norm": 0.0011917828815057874, - "learning_rate": 0.00019999668173105177, - "loss": 46.0, - "step": 33926 - }, - { - "epoch": 2.5939560754630424, - "grad_norm": 0.003565862076357007, - "learning_rate": 0.00019999668153537425, - "loss": 46.0, - "step": 33927 - }, - { - "epoch": 2.594032532446432, - "grad_norm": 0.0021697422489523888, - "learning_rate": 0.00019999668133969097, - "loss": 46.0, - "step": 33928 - }, - { - "epoch": 2.594108989429822, - "grad_norm": 0.002213774248957634, - "learning_rate": 0.0001999966811440019, - "loss": 46.0, - "step": 33929 - }, - { - "epoch": 2.5941854464132117, - "grad_norm": 0.0020207951311022043, - "learning_rate": 0.00019999668094830705, - "loss": 46.0, - "step": 33930 - }, - { - "epoch": 2.5942619033966015, - "grad_norm": 0.0025760820135474205, - "learning_rate": 0.00019999668075260644, - "loss": 46.0, - "step": 33931 - }, - { - "epoch": 2.594338360379991, - "grad_norm": 0.0029874928295612335, - "learning_rate": 0.00019999668055690006, - "loss": 46.0, - "step": 33932 - }, - { - "epoch": 2.594414817363381, - "grad_norm": 0.0019100755453109741, - "learning_rate": 0.00019999668036118796, - "loss": 46.0, - "step": 33933 - }, - { - "epoch": 2.5944912743467707, - "grad_norm": 0.0020764644723385572, - "learning_rate": 0.00019999668016547, - "loss": 46.0, - "step": 33934 - }, - { - "epoch": 2.5945677313301605, - "grad_norm": 0.0012909704819321632, - "learning_rate": 0.00019999667996974634, - "loss": 46.0, - "step": 33935 - }, - { - "epoch": 2.5946441883135503, - "grad_norm": 0.0033717325422912836, - "learning_rate": 0.0001999966797740169, - "loss": 46.0, - "step": 33936 - }, - { - "epoch": 2.59472064529694, - "grad_norm": 0.0011126762256026268, - "learning_rate": 0.00019999667957828167, - "loss": 46.0, - "step": 33937 - }, - { - "epoch": 2.59479710228033, - "grad_norm": 0.0026965197175741196, - "learning_rate": 0.00019999667938254068, - "loss": 46.0, - "step": 33938 - }, - { - "epoch": 2.594873559263719, - "grad_norm": 0.0013524913229048252, - "learning_rate": 0.0001999966791867939, - "loss": 46.0, - "step": 33939 - }, - { - "epoch": 2.594950016247109, - "grad_norm": 0.0035900238435715437, - "learning_rate": 0.00019999667899104137, - "loss": 46.0, - "step": 33940 - }, - { - "epoch": 2.5950264732304986, - "grad_norm": 0.0008633043034933507, - "learning_rate": 0.0001999966787952831, - "loss": 46.0, - "step": 33941 - }, - { - "epoch": 2.5951029302138884, - "grad_norm": 0.0011326251551508904, - "learning_rate": 0.00019999667859951903, - "loss": 46.0, - "step": 33942 - }, - { - "epoch": 2.595179387197278, - "grad_norm": 0.0024322012905031443, - "learning_rate": 0.0001999966784037492, - "loss": 46.0, - "step": 33943 - }, - { - "epoch": 2.595255844180668, - "grad_norm": 0.0012551966356113553, - "learning_rate": 0.0001999966782079736, - "loss": 46.0, - "step": 33944 - }, - { - "epoch": 2.5953323011640577, - "grad_norm": 0.0033246458042412996, - "learning_rate": 0.0001999966780121922, - "loss": 46.0, - "step": 33945 - }, - { - "epoch": 2.5954087581474474, - "grad_norm": 0.0011773412115871906, - "learning_rate": 0.00019999667781640508, - "loss": 46.0, - "step": 33946 - }, - { - "epoch": 2.5954852151308367, - "grad_norm": 0.0021568629890680313, - "learning_rate": 0.00019999667762061213, - "loss": 46.0, - "step": 33947 - }, - { - "epoch": 2.5955616721142265, - "grad_norm": 0.0020415030885487795, - "learning_rate": 0.00019999667742481348, - "loss": 46.0, - "step": 33948 - }, - { - "epoch": 2.5956381290976163, - "grad_norm": 0.0015277519123628736, - "learning_rate": 0.00019999667722900904, - "loss": 46.0, - "step": 33949 - }, - { - "epoch": 2.595714586081006, - "grad_norm": 0.0013656937517225742, - "learning_rate": 0.0001999966770331988, - "loss": 46.0, - "step": 33950 - }, - { - "epoch": 2.595791043064396, - "grad_norm": 0.001709373202174902, - "learning_rate": 0.0001999966768373828, - "loss": 46.0, - "step": 33951 - }, - { - "epoch": 2.5958675000477855, - "grad_norm": 0.0029922376852482557, - "learning_rate": 0.00019999667664156106, - "loss": 46.0, - "step": 33952 - }, - { - "epoch": 2.5959439570311753, - "grad_norm": 0.0014170288341119885, - "learning_rate": 0.00019999667644573352, - "loss": 46.0, - "step": 33953 - }, - { - "epoch": 2.596020414014565, - "grad_norm": 0.0010529591236263514, - "learning_rate": 0.00019999667624990024, - "loss": 46.0, - "step": 33954 - }, - { - "epoch": 2.596096870997955, - "grad_norm": 0.0014678220031782985, - "learning_rate": 0.00019999667605406115, - "loss": 46.0, - "step": 33955 - }, - { - "epoch": 2.5961733279813446, - "grad_norm": 0.0017287025693804026, - "learning_rate": 0.00019999667585821632, - "loss": 46.0, - "step": 33956 - }, - { - "epoch": 2.5962497849647344, - "grad_norm": 0.0024102828465402126, - "learning_rate": 0.00019999667566236574, - "loss": 46.0, - "step": 33957 - }, - { - "epoch": 2.596326241948124, - "grad_norm": 0.006789756938815117, - "learning_rate": 0.00019999667546650933, - "loss": 46.0, - "step": 33958 - }, - { - "epoch": 2.596402698931514, - "grad_norm": 0.0015074698021635413, - "learning_rate": 0.0001999966752706472, - "loss": 46.0, - "step": 33959 - }, - { - "epoch": 2.5964791559149036, - "grad_norm": 0.0037692601326853037, - "learning_rate": 0.00019999667507477928, - "loss": 46.0, - "step": 33960 - }, - { - "epoch": 2.596555612898293, - "grad_norm": 0.0010296533582732081, - "learning_rate": 0.00019999667487890558, - "loss": 46.0, - "step": 33961 - }, - { - "epoch": 2.5966320698816827, - "grad_norm": 0.0011312720598652959, - "learning_rate": 0.00019999667468302614, - "loss": 46.0, - "step": 33962 - }, - { - "epoch": 2.5967085268650725, - "grad_norm": 0.004012007731944323, - "learning_rate": 0.00019999667448714092, - "loss": 46.0, - "step": 33963 - }, - { - "epoch": 2.5967849838484622, - "grad_norm": 0.002886988688260317, - "learning_rate": 0.00019999667429124993, - "loss": 46.0, - "step": 33964 - }, - { - "epoch": 2.596861440831852, - "grad_norm": 0.0023925844579935074, - "learning_rate": 0.00019999667409535316, - "loss": 46.0, - "step": 33965 - }, - { - "epoch": 2.5969378978152418, - "grad_norm": 0.0028296641539782286, - "learning_rate": 0.00019999667389945065, - "loss": 46.0, - "step": 33966 - }, - { - "epoch": 2.5970143547986315, - "grad_norm": 0.0014572347281500697, - "learning_rate": 0.00019999667370354236, - "loss": 46.0, - "step": 33967 - }, - { - "epoch": 2.5970908117820213, - "grad_norm": 0.002130937995389104, - "learning_rate": 0.00019999667350762828, - "loss": 46.0, - "step": 33968 - }, - { - "epoch": 2.5971672687654106, - "grad_norm": 0.0013775993138551712, - "learning_rate": 0.00019999667331170847, - "loss": 46.0, - "step": 33969 - }, - { - "epoch": 2.5972437257488004, - "grad_norm": 0.002277441555634141, - "learning_rate": 0.00019999667311578284, - "loss": 46.0, - "step": 33970 - }, - { - "epoch": 2.59732018273219, - "grad_norm": 0.0018924273317679763, - "learning_rate": 0.00019999667291985146, - "loss": 46.0, - "step": 33971 - }, - { - "epoch": 2.59739663971558, - "grad_norm": 0.005080229137092829, - "learning_rate": 0.00019999667272391434, - "loss": 46.0, - "step": 33972 - }, - { - "epoch": 2.5974730966989696, - "grad_norm": 0.0018999003805220127, - "learning_rate": 0.0001999966725279714, - "loss": 46.0, - "step": 33973 - }, - { - "epoch": 2.5975495536823594, - "grad_norm": 0.0007585359853692353, - "learning_rate": 0.00019999667233202274, - "loss": 46.0, - "step": 33974 - }, - { - "epoch": 2.597626010665749, - "grad_norm": 0.0010942388325929642, - "learning_rate": 0.00019999667213606827, - "loss": 46.0, - "step": 33975 - }, - { - "epoch": 2.597702467649139, - "grad_norm": 0.0010093394666910172, - "learning_rate": 0.00019999667194010805, - "loss": 46.0, - "step": 33976 - }, - { - "epoch": 2.5977789246325287, - "grad_norm": 0.0016063363291323185, - "learning_rate": 0.00019999667174414208, - "loss": 46.0, - "step": 33977 - }, - { - "epoch": 2.5978553816159184, - "grad_norm": 0.004621224012225866, - "learning_rate": 0.0001999966715481703, - "loss": 46.0, - "step": 33978 - }, - { - "epoch": 2.597931838599308, - "grad_norm": 0.0021016260143369436, - "learning_rate": 0.00019999667135219275, - "loss": 46.0, - "step": 33979 - }, - { - "epoch": 2.598008295582698, - "grad_norm": 0.0015274283941835165, - "learning_rate": 0.0001999966711562095, - "loss": 46.0, - "step": 33980 - }, - { - "epoch": 2.5980847525660877, - "grad_norm": 0.0007747607305645943, - "learning_rate": 0.00019999667096022038, - "loss": 46.0, - "step": 33981 - }, - { - "epoch": 2.5981612095494775, - "grad_norm": 0.002963385544717312, - "learning_rate": 0.00019999667076422558, - "loss": 46.0, - "step": 33982 - }, - { - "epoch": 2.598237666532867, - "grad_norm": 0.0015990951796993613, - "learning_rate": 0.00019999667056822497, - "loss": 46.0, - "step": 33983 - }, - { - "epoch": 2.5983141235162566, - "grad_norm": 0.0021712330635637045, - "learning_rate": 0.0001999966703722186, - "loss": 46.0, - "step": 33984 - }, - { - "epoch": 2.5983905804996463, - "grad_norm": 0.0018673304002732038, - "learning_rate": 0.00019999667017620641, - "loss": 46.0, - "step": 33985 - }, - { - "epoch": 2.598467037483036, - "grad_norm": 0.003089440520852804, - "learning_rate": 0.00019999666998018852, - "loss": 46.0, - "step": 33986 - }, - { - "epoch": 2.598543494466426, - "grad_norm": 0.0027108315844088793, - "learning_rate": 0.00019999666978416484, - "loss": 46.0, - "step": 33987 - }, - { - "epoch": 2.5986199514498156, - "grad_norm": 0.0011818775674328208, - "learning_rate": 0.0001999966695881354, - "loss": 46.0, - "step": 33988 - }, - { - "epoch": 2.5986964084332054, - "grad_norm": 0.004194196779280901, - "learning_rate": 0.00019999666939210018, - "loss": 46.0, - "step": 33989 - }, - { - "epoch": 2.598772865416595, - "grad_norm": 0.0020329616963863373, - "learning_rate": 0.00019999666919605916, - "loss": 46.0, - "step": 33990 - }, - { - "epoch": 2.5988493223999845, - "grad_norm": 0.0012380784610286355, - "learning_rate": 0.00019999666900001243, - "loss": 46.0, - "step": 33991 - }, - { - "epoch": 2.598925779383374, - "grad_norm": 0.0014517760137096047, - "learning_rate": 0.0001999966688039599, - "loss": 46.0, - "step": 33992 - }, - { - "epoch": 2.599002236366764, - "grad_norm": 0.0012300026137381792, - "learning_rate": 0.00019999666860790158, - "loss": 46.0, - "step": 33993 - }, - { - "epoch": 2.5990786933501537, - "grad_norm": 0.0010018979664891958, - "learning_rate": 0.00019999666841183752, - "loss": 46.0, - "step": 33994 - }, - { - "epoch": 2.5991551503335435, - "grad_norm": 0.0036307142581790686, - "learning_rate": 0.00019999666821576769, - "loss": 46.0, - "step": 33995 - }, - { - "epoch": 2.5992316073169333, - "grad_norm": 0.0016579385846853256, - "learning_rate": 0.00019999666801969208, - "loss": 46.0, - "step": 33996 - }, - { - "epoch": 2.599308064300323, - "grad_norm": 0.0024304785765707493, - "learning_rate": 0.0001999966678236107, - "loss": 46.0, - "step": 33997 - }, - { - "epoch": 2.599384521283713, - "grad_norm": 0.0007768187206238508, - "learning_rate": 0.00019999666762752355, - "loss": 46.0, - "step": 33998 - }, - { - "epoch": 2.5994609782671025, - "grad_norm": 0.0021852378267794847, - "learning_rate": 0.00019999666743143063, - "loss": 46.0, - "step": 33999 - }, - { - "epoch": 2.5995374352504923, - "grad_norm": 0.0020817629992961884, - "learning_rate": 0.00019999666723533196, - "loss": 46.0, - "step": 34000 - }, - { - "epoch": 2.599613892233882, - "grad_norm": 0.0021746279671788216, - "learning_rate": 0.0001999966670392275, - "loss": 46.0, - "step": 34001 - }, - { - "epoch": 2.599690349217272, - "grad_norm": 0.003117659827694297, - "learning_rate": 0.0001999966668431173, - "loss": 46.0, - "step": 34002 - }, - { - "epoch": 2.5997668062006616, - "grad_norm": 0.004377887584269047, - "learning_rate": 0.00019999666664700127, - "loss": 46.0, - "step": 34003 - }, - { - "epoch": 2.5998432631840513, - "grad_norm": 0.0021769730374217033, - "learning_rate": 0.00019999666645087954, - "loss": 46.0, - "step": 34004 - }, - { - "epoch": 2.5999197201674407, - "grad_norm": 0.0030063041485846043, - "learning_rate": 0.000199996666254752, - "loss": 46.0, - "step": 34005 - }, - { - "epoch": 2.5999961771508304, - "grad_norm": 0.0011040999088436365, - "learning_rate": 0.0001999966660586187, - "loss": 46.0, - "step": 34006 - }, - { - "epoch": 2.60007263413422, - "grad_norm": 0.0009236817131750286, - "learning_rate": 0.00019999666586247963, - "loss": 46.0, - "step": 34007 - }, - { - "epoch": 2.60014909111761, - "grad_norm": 0.0016864055069163442, - "learning_rate": 0.0001999966656663348, - "loss": 46.0, - "step": 34008 - }, - { - "epoch": 2.6002255481009997, - "grad_norm": 0.0010945064714178443, - "learning_rate": 0.0001999966654701842, - "loss": 46.0, - "step": 34009 - }, - { - "epoch": 2.6003020050843895, - "grad_norm": 0.00214216741733253, - "learning_rate": 0.00019999666527402782, - "loss": 46.0, - "step": 34010 - }, - { - "epoch": 2.6003784620677792, - "grad_norm": 0.003336291527375579, - "learning_rate": 0.00019999666507786567, - "loss": 46.0, - "step": 34011 - }, - { - "epoch": 2.600454919051169, - "grad_norm": 0.0020719943568110466, - "learning_rate": 0.00019999666488169774, - "loss": 46.0, - "step": 34012 - }, - { - "epoch": 2.6005313760345583, - "grad_norm": 0.004970516078174114, - "learning_rate": 0.00019999666468552407, - "loss": 46.0, - "step": 34013 - }, - { - "epoch": 2.600607833017948, - "grad_norm": 0.006673033349215984, - "learning_rate": 0.0001999966644893446, - "loss": 46.0, - "step": 34014 - }, - { - "epoch": 2.600684290001338, - "grad_norm": 0.0027091046795248985, - "learning_rate": 0.0001999966642931594, - "loss": 46.0, - "step": 34015 - }, - { - "epoch": 2.6007607469847276, - "grad_norm": 0.0013261794811114669, - "learning_rate": 0.0001999966640969684, - "loss": 46.0, - "step": 34016 - }, - { - "epoch": 2.6008372039681174, - "grad_norm": 0.0004412404086906463, - "learning_rate": 0.00019999666390077163, - "loss": 46.0, - "step": 34017 - }, - { - "epoch": 2.600913660951507, - "grad_norm": 0.0014693073462694883, - "learning_rate": 0.00019999666370456911, - "loss": 46.0, - "step": 34018 - }, - { - "epoch": 2.600990117934897, - "grad_norm": 0.0025201099924743176, - "learning_rate": 0.0001999966635083608, - "loss": 46.0, - "step": 34019 - }, - { - "epoch": 2.6010665749182866, - "grad_norm": 0.002558163134381175, - "learning_rate": 0.00019999666331214675, - "loss": 46.0, - "step": 34020 - }, - { - "epoch": 2.6011430319016764, - "grad_norm": 0.0022936142049729824, - "learning_rate": 0.00019999666311592692, - "loss": 46.0, - "step": 34021 - }, - { - "epoch": 2.601219488885066, - "grad_norm": 0.0013978230999782681, - "learning_rate": 0.00019999666291970128, - "loss": 46.0, - "step": 34022 - }, - { - "epoch": 2.601295945868456, - "grad_norm": 0.0016516291070729494, - "learning_rate": 0.0001999966627234699, - "loss": 46.0, - "step": 34023 - }, - { - "epoch": 2.6013724028518457, - "grad_norm": 0.001629309612326324, - "learning_rate": 0.00019999666252723278, - "loss": 46.0, - "step": 34024 - }, - { - "epoch": 2.6014488598352354, - "grad_norm": 0.0021412211935967207, - "learning_rate": 0.00019999666233098986, - "loss": 46.0, - "step": 34025 - }, - { - "epoch": 2.6015253168186248, - "grad_norm": 0.0010138375218957663, - "learning_rate": 0.00019999666213474116, - "loss": 46.0, - "step": 34026 - }, - { - "epoch": 2.6016017738020145, - "grad_norm": 0.004015614278614521, - "learning_rate": 0.00019999666193848672, - "loss": 46.0, - "step": 34027 - }, - { - "epoch": 2.6016782307854043, - "grad_norm": 0.0007702329312451184, - "learning_rate": 0.0001999966617422265, - "loss": 46.0, - "step": 34028 - }, - { - "epoch": 2.601754687768794, - "grad_norm": 0.002063112799078226, - "learning_rate": 0.00019999666154596048, - "loss": 46.0, - "step": 34029 - }, - { - "epoch": 2.601831144752184, - "grad_norm": 0.001537511358037591, - "learning_rate": 0.00019999666134968872, - "loss": 46.0, - "step": 34030 - }, - { - "epoch": 2.6019076017355736, - "grad_norm": 0.001073982915841043, - "learning_rate": 0.0001999966611534112, - "loss": 46.0, - "step": 34031 - }, - { - "epoch": 2.6019840587189633, - "grad_norm": 0.0009681865922175348, - "learning_rate": 0.0001999966609571279, - "loss": 46.0, - "step": 34032 - }, - { - "epoch": 2.602060515702353, - "grad_norm": 0.001937026041559875, - "learning_rate": 0.00019999666076083884, - "loss": 46.0, - "step": 34033 - }, - { - "epoch": 2.602136972685743, - "grad_norm": 0.0020606957841664553, - "learning_rate": 0.00019999666056454398, - "loss": 46.0, - "step": 34034 - }, - { - "epoch": 2.602213429669132, - "grad_norm": 0.0016524831298738718, - "learning_rate": 0.00019999666036824338, - "loss": 46.0, - "step": 34035 - }, - { - "epoch": 2.602289886652522, - "grad_norm": 0.0014185130130499601, - "learning_rate": 0.000199996660171937, - "loss": 46.0, - "step": 34036 - }, - { - "epoch": 2.6023663436359117, - "grad_norm": 0.002785137854516506, - "learning_rate": 0.00019999665997562488, - "loss": 46.0, - "step": 34037 - }, - { - "epoch": 2.6024428006193014, - "grad_norm": 0.0029016488697379827, - "learning_rate": 0.00019999665977930695, - "loss": 46.0, - "step": 34038 - }, - { - "epoch": 2.602519257602691, - "grad_norm": 0.0011367335682734847, - "learning_rate": 0.00019999665958298326, - "loss": 46.0, - "step": 34039 - }, - { - "epoch": 2.602595714586081, - "grad_norm": 0.001681118505075574, - "learning_rate": 0.00019999665938665379, - "loss": 46.0, - "step": 34040 - }, - { - "epoch": 2.6026721715694707, - "grad_norm": 0.0014663425972685218, - "learning_rate": 0.00019999665919031857, - "loss": 46.0, - "step": 34041 - }, - { - "epoch": 2.6027486285528605, - "grad_norm": 0.0017261416651308537, - "learning_rate": 0.0001999966589939776, - "loss": 46.0, - "step": 34042 - }, - { - "epoch": 2.6028250855362502, - "grad_norm": 0.0016541347140446305, - "learning_rate": 0.00019999665879763084, - "loss": 46.0, - "step": 34043 - }, - { - "epoch": 2.60290154251964, - "grad_norm": 0.0010272046783939004, - "learning_rate": 0.0001999966586012783, - "loss": 46.0, - "step": 34044 - }, - { - "epoch": 2.6029779995030298, - "grad_norm": 0.0005189761868678033, - "learning_rate": 0.00019999665840492, - "loss": 46.0, - "step": 34045 - }, - { - "epoch": 2.6030544564864195, - "grad_norm": 0.0015348091255873442, - "learning_rate": 0.00019999665820855594, - "loss": 46.0, - "step": 34046 - }, - { - "epoch": 2.6031309134698093, - "grad_norm": 0.001924305222928524, - "learning_rate": 0.0001999966580121861, - "loss": 46.0, - "step": 34047 - }, - { - "epoch": 2.6032073704531986, - "grad_norm": 0.0012631723657250404, - "learning_rate": 0.00019999665781581048, - "loss": 46.0, - "step": 34048 - }, - { - "epoch": 2.6032838274365884, - "grad_norm": 0.0016728206537663937, - "learning_rate": 0.00019999665761942913, - "loss": 46.0, - "step": 34049 - }, - { - "epoch": 2.603360284419978, - "grad_norm": 0.00212831050157547, - "learning_rate": 0.00019999665742304195, - "loss": 46.0, - "step": 34050 - }, - { - "epoch": 2.603436741403368, - "grad_norm": 0.0016493863658979535, - "learning_rate": 0.00019999665722664905, - "loss": 46.0, - "step": 34051 - }, - { - "epoch": 2.6035131983867577, - "grad_norm": 0.0008678300655446947, - "learning_rate": 0.00019999665703025036, - "loss": 46.0, - "step": 34052 - }, - { - "epoch": 2.6035896553701474, - "grad_norm": 0.0013137904461473227, - "learning_rate": 0.00019999665683384589, - "loss": 46.0, - "step": 34053 - }, - { - "epoch": 2.603666112353537, - "grad_norm": 0.0011362849036231637, - "learning_rate": 0.00019999665663743567, - "loss": 46.0, - "step": 34054 - }, - { - "epoch": 2.603742569336927, - "grad_norm": 0.004044428933411837, - "learning_rate": 0.00019999665644101968, - "loss": 46.0, - "step": 34055 - }, - { - "epoch": 2.6038190263203163, - "grad_norm": 0.0015811502235010266, - "learning_rate": 0.00019999665624459794, - "loss": 46.0, - "step": 34056 - }, - { - "epoch": 2.603895483303706, - "grad_norm": 0.0016064064111560583, - "learning_rate": 0.0001999966560481704, - "loss": 46.0, - "step": 34057 - }, - { - "epoch": 2.6039719402870958, - "grad_norm": 0.0035136446822434664, - "learning_rate": 0.00019999665585173707, - "loss": 46.0, - "step": 34058 - }, - { - "epoch": 2.6040483972704855, - "grad_norm": 0.0018162113847211003, - "learning_rate": 0.000199996655655298, - "loss": 46.0, - "step": 34059 - }, - { - "epoch": 2.6041248542538753, - "grad_norm": 0.00223892112262547, - "learning_rate": 0.00019999665545885318, - "loss": 46.0, - "step": 34060 - }, - { - "epoch": 2.604201311237265, - "grad_norm": 0.0017682722536846995, - "learning_rate": 0.00019999665526240255, - "loss": 46.0, - "step": 34061 - }, - { - "epoch": 2.604277768220655, - "grad_norm": 0.0018040291033685207, - "learning_rate": 0.00019999665506594618, - "loss": 46.0, - "step": 34062 - }, - { - "epoch": 2.6043542252040446, - "grad_norm": 0.0020374213345348835, - "learning_rate": 0.00019999665486948405, - "loss": 46.0, - "step": 34063 - }, - { - "epoch": 2.6044306821874343, - "grad_norm": 0.007367412094026804, - "learning_rate": 0.00019999665467301613, - "loss": 46.0, - "step": 34064 - }, - { - "epoch": 2.604507139170824, - "grad_norm": 0.0012284088879823685, - "learning_rate": 0.00019999665447654246, - "loss": 46.0, - "step": 34065 - }, - { - "epoch": 2.604583596154214, - "grad_norm": 0.001375415246002376, - "learning_rate": 0.000199996654280063, - "loss": 46.0, - "step": 34066 - }, - { - "epoch": 2.6046600531376036, - "grad_norm": 0.0028562911320477724, - "learning_rate": 0.00019999665408357777, - "loss": 46.0, - "step": 34067 - }, - { - "epoch": 2.6047365101209934, - "grad_norm": 0.0009460800793021917, - "learning_rate": 0.00019999665388708678, - "loss": 46.0, - "step": 34068 - }, - { - "epoch": 2.604812967104383, - "grad_norm": 0.0012787526939064264, - "learning_rate": 0.00019999665369059, - "loss": 46.0, - "step": 34069 - }, - { - "epoch": 2.6048894240877725, - "grad_norm": 0.0011006894055753946, - "learning_rate": 0.00019999665349408748, - "loss": 46.0, - "step": 34070 - }, - { - "epoch": 2.6049658810711622, - "grad_norm": 0.0007063025841489434, - "learning_rate": 0.00019999665329757917, - "loss": 46.0, - "step": 34071 - }, - { - "epoch": 2.605042338054552, - "grad_norm": 0.0017444679979234934, - "learning_rate": 0.0001999966531010651, - "loss": 46.0, - "step": 34072 - }, - { - "epoch": 2.6051187950379417, - "grad_norm": 0.00123389414511621, - "learning_rate": 0.00019999665290454526, - "loss": 46.0, - "step": 34073 - }, - { - "epoch": 2.6051952520213315, - "grad_norm": 0.004127791617065668, - "learning_rate": 0.00019999665270801966, - "loss": 46.0, - "step": 34074 - }, - { - "epoch": 2.6052717090047213, - "grad_norm": 0.0009403133881278336, - "learning_rate": 0.00019999665251148826, - "loss": 46.0, - "step": 34075 - }, - { - "epoch": 2.605348165988111, - "grad_norm": 0.002468319144099951, - "learning_rate": 0.00019999665231495113, - "loss": 46.0, - "step": 34076 - }, - { - "epoch": 2.605424622971501, - "grad_norm": 0.001703559304587543, - "learning_rate": 0.0001999966521184082, - "loss": 46.0, - "step": 34077 - }, - { - "epoch": 2.60550107995489, - "grad_norm": 0.0014593893429264426, - "learning_rate": 0.00019999665192185951, - "loss": 46.0, - "step": 34078 - }, - { - "epoch": 2.60557753693828, - "grad_norm": 0.002438412746414542, - "learning_rate": 0.00019999665172530507, - "loss": 46.0, - "step": 34079 - }, - { - "epoch": 2.6056539939216696, - "grad_norm": 0.004381807986646891, - "learning_rate": 0.00019999665152874486, - "loss": 46.0, - "step": 34080 - }, - { - "epoch": 2.6057304509050594, - "grad_norm": 0.0005928900209255517, - "learning_rate": 0.00019999665133217884, - "loss": 46.0, - "step": 34081 - }, - { - "epoch": 2.605806907888449, - "grad_norm": 0.0010792040266096592, - "learning_rate": 0.00019999665113560708, - "loss": 46.0, - "step": 34082 - }, - { - "epoch": 2.605883364871839, - "grad_norm": 0.0020960774272680283, - "learning_rate": 0.00019999665093902954, - "loss": 46.0, - "step": 34083 - }, - { - "epoch": 2.6059598218552287, - "grad_norm": 0.0011029322631657124, - "learning_rate": 0.00019999665074244626, - "loss": 46.0, - "step": 34084 - }, - { - "epoch": 2.6060362788386184, - "grad_norm": 0.001569297513924539, - "learning_rate": 0.00019999665054585717, - "loss": 46.0, - "step": 34085 - }, - { - "epoch": 2.606112735822008, - "grad_norm": 0.002657995093613863, - "learning_rate": 0.00019999665034926235, - "loss": 46.0, - "step": 34086 - }, - { - "epoch": 2.606189192805398, - "grad_norm": 0.0014559200499206781, - "learning_rate": 0.00019999665015266174, - "loss": 46.0, - "step": 34087 - }, - { - "epoch": 2.6062656497887877, - "grad_norm": 0.0014072259655222297, - "learning_rate": 0.00019999664995605537, - "loss": 46.0, - "step": 34088 - }, - { - "epoch": 2.6063421067721775, - "grad_norm": 0.0015695304609835148, - "learning_rate": 0.00019999664975944322, - "loss": 46.0, - "step": 34089 - }, - { - "epoch": 2.6064185637555672, - "grad_norm": 0.0016932080034166574, - "learning_rate": 0.0001999966495628253, - "loss": 46.0, - "step": 34090 - }, - { - "epoch": 2.606495020738957, - "grad_norm": 0.0007436703890562057, - "learning_rate": 0.0001999966493662016, - "loss": 46.0, - "step": 34091 - }, - { - "epoch": 2.6065714777223463, - "grad_norm": 0.005224339663982391, - "learning_rate": 0.00019999664916957216, - "loss": 46.0, - "step": 34092 - }, - { - "epoch": 2.606647934705736, - "grad_norm": 0.0010078026680275798, - "learning_rate": 0.00019999664897293694, - "loss": 46.0, - "step": 34093 - }, - { - "epoch": 2.606724391689126, - "grad_norm": 0.0017173726810142398, - "learning_rate": 0.00019999664877629592, - "loss": 46.0, - "step": 34094 - }, - { - "epoch": 2.6068008486725156, - "grad_norm": 0.0013045882806181908, - "learning_rate": 0.00019999664857964916, - "loss": 46.0, - "step": 34095 - }, - { - "epoch": 2.6068773056559054, - "grad_norm": 0.004429432563483715, - "learning_rate": 0.00019999664838299663, - "loss": 46.0, - "step": 34096 - }, - { - "epoch": 2.606953762639295, - "grad_norm": 0.0021900886204093695, - "learning_rate": 0.00019999664818633832, - "loss": 46.0, - "step": 34097 - }, - { - "epoch": 2.607030219622685, - "grad_norm": 0.0018946188502013683, - "learning_rate": 0.00019999664798967426, - "loss": 46.0, - "step": 34098 - }, - { - "epoch": 2.6071066766060746, - "grad_norm": 0.0026240062434226274, - "learning_rate": 0.00019999664779300443, - "loss": 46.0, - "step": 34099 - }, - { - "epoch": 2.607183133589464, - "grad_norm": 0.002428285777568817, - "learning_rate": 0.0001999966475963288, - "loss": 46.0, - "step": 34100 - }, - { - "epoch": 2.6072595905728537, - "grad_norm": 0.0018335528438910842, - "learning_rate": 0.00019999664739964743, - "loss": 46.0, - "step": 34101 - }, - { - "epoch": 2.6073360475562435, - "grad_norm": 0.0008534009684808552, - "learning_rate": 0.00019999664720296028, - "loss": 46.0, - "step": 34102 - }, - { - "epoch": 2.6074125045396332, - "grad_norm": 0.0008628587238490582, - "learning_rate": 0.00019999664700626736, - "loss": 46.0, - "step": 34103 - }, - { - "epoch": 2.607488961523023, - "grad_norm": 0.001418377156369388, - "learning_rate": 0.00019999664680956866, - "loss": 46.0, - "step": 34104 - }, - { - "epoch": 2.6075654185064128, - "grad_norm": 0.004012479446828365, - "learning_rate": 0.00019999664661286422, - "loss": 46.0, - "step": 34105 - }, - { - "epoch": 2.6076418754898025, - "grad_norm": 0.001176545862108469, - "learning_rate": 0.00019999664641615398, - "loss": 46.0, - "step": 34106 - }, - { - "epoch": 2.6077183324731923, - "grad_norm": 0.0008842108072713017, - "learning_rate": 0.000199996646219438, - "loss": 46.0, - "step": 34107 - }, - { - "epoch": 2.607794789456582, - "grad_norm": 0.0022622086107730865, - "learning_rate": 0.00019999664602271623, - "loss": 46.0, - "step": 34108 - }, - { - "epoch": 2.607871246439972, - "grad_norm": 0.0013154157204553485, - "learning_rate": 0.0001999966458259887, - "loss": 46.0, - "step": 34109 - }, - { - "epoch": 2.6079477034233616, - "grad_norm": 0.002860425738617778, - "learning_rate": 0.0001999966456292554, - "loss": 46.0, - "step": 34110 - }, - { - "epoch": 2.6080241604067513, - "grad_norm": 0.0016381718451157212, - "learning_rate": 0.00019999664543251633, - "loss": 46.0, - "step": 34111 - }, - { - "epoch": 2.608100617390141, - "grad_norm": 0.0011745529482141137, - "learning_rate": 0.0001999966452357715, - "loss": 46.0, - "step": 34112 - }, - { - "epoch": 2.608177074373531, - "grad_norm": 0.001570095424540341, - "learning_rate": 0.00019999664503902087, - "loss": 46.0, - "step": 34113 - }, - { - "epoch": 2.60825353135692, - "grad_norm": 0.0018143949564546347, - "learning_rate": 0.0001999966448422645, - "loss": 46.0, - "step": 34114 - }, - { - "epoch": 2.60832998834031, - "grad_norm": 0.0024891297798603773, - "learning_rate": 0.00019999664464550234, - "loss": 46.0, - "step": 34115 - }, - { - "epoch": 2.6084064453236997, - "grad_norm": 0.002164674922823906, - "learning_rate": 0.00019999664444873442, - "loss": 46.0, - "step": 34116 - }, - { - "epoch": 2.6084829023070895, - "grad_norm": 0.0016388414660468698, - "learning_rate": 0.00019999664425196075, - "loss": 46.0, - "step": 34117 - }, - { - "epoch": 2.608559359290479, - "grad_norm": 0.001399126835167408, - "learning_rate": 0.00019999664405518129, - "loss": 46.0, - "step": 34118 - }, - { - "epoch": 2.608635816273869, - "grad_norm": 0.00407001469284296, - "learning_rate": 0.00019999664385839604, - "loss": 46.0, - "step": 34119 - }, - { - "epoch": 2.6087122732572587, - "grad_norm": 0.0023797585163265467, - "learning_rate": 0.00019999664366160505, - "loss": 46.0, - "step": 34120 - }, - { - "epoch": 2.6087887302406485, - "grad_norm": 0.002845124341547489, - "learning_rate": 0.00019999664346480832, - "loss": 46.0, - "step": 34121 - }, - { - "epoch": 2.608865187224038, - "grad_norm": 0.0032964125275611877, - "learning_rate": 0.00019999664326800576, - "loss": 46.0, - "step": 34122 - }, - { - "epoch": 2.6089416442074276, - "grad_norm": 0.0012169118272140622, - "learning_rate": 0.00019999664307119745, - "loss": 46.0, - "step": 34123 - }, - { - "epoch": 2.6090181011908173, - "grad_norm": 0.0029273962136358023, - "learning_rate": 0.0001999966428743834, - "loss": 46.0, - "step": 34124 - }, - { - "epoch": 2.609094558174207, - "grad_norm": 0.0015583352651447058, - "learning_rate": 0.00019999664267756354, - "loss": 46.0, - "step": 34125 - }, - { - "epoch": 2.609171015157597, - "grad_norm": 0.000859149091411382, - "learning_rate": 0.00019999664248073794, - "loss": 46.0, - "step": 34126 - }, - { - "epoch": 2.6092474721409866, - "grad_norm": 0.0019579019863158464, - "learning_rate": 0.00019999664228390656, - "loss": 46.0, - "step": 34127 - }, - { - "epoch": 2.6093239291243764, - "grad_norm": 0.002120393794029951, - "learning_rate": 0.00019999664208706941, - "loss": 46.0, - "step": 34128 - }, - { - "epoch": 2.609400386107766, - "grad_norm": 0.002389185829088092, - "learning_rate": 0.0001999966418902265, - "loss": 46.0, - "step": 34129 - }, - { - "epoch": 2.609476843091156, - "grad_norm": 0.003060850314795971, - "learning_rate": 0.0001999966416933778, - "loss": 46.0, - "step": 34130 - }, - { - "epoch": 2.6095533000745457, - "grad_norm": 0.000556423154193908, - "learning_rate": 0.00019999664149652336, - "loss": 46.0, - "step": 34131 - }, - { - "epoch": 2.6096297570579354, - "grad_norm": 0.001494173426181078, - "learning_rate": 0.00019999664129966311, - "loss": 46.0, - "step": 34132 - }, - { - "epoch": 2.609706214041325, - "grad_norm": 0.0013685955200344324, - "learning_rate": 0.00019999664110279713, - "loss": 46.0, - "step": 34133 - }, - { - "epoch": 2.609782671024715, - "grad_norm": 0.0028649617452174425, - "learning_rate": 0.00019999664090592536, - "loss": 46.0, - "step": 34134 - }, - { - "epoch": 2.6098591280081047, - "grad_norm": 0.0011249084491282701, - "learning_rate": 0.00019999664070904783, - "loss": 46.0, - "step": 34135 - }, - { - "epoch": 2.609935584991494, - "grad_norm": 0.0007842201739549637, - "learning_rate": 0.00019999664051216452, - "loss": 46.0, - "step": 34136 - }, - { - "epoch": 2.610012041974884, - "grad_norm": 0.01031413022428751, - "learning_rate": 0.00019999664031527544, - "loss": 46.0, - "step": 34137 - }, - { - "epoch": 2.6100884989582736, - "grad_norm": 0.001108855358324945, - "learning_rate": 0.00019999664011838058, - "loss": 46.0, - "step": 34138 - }, - { - "epoch": 2.6101649559416633, - "grad_norm": 0.0007446836098097265, - "learning_rate": 0.00019999663992147998, - "loss": 46.0, - "step": 34139 - }, - { - "epoch": 2.610241412925053, - "grad_norm": 0.001520541263744235, - "learning_rate": 0.0001999966397245736, - "loss": 46.0, - "step": 34140 - }, - { - "epoch": 2.610317869908443, - "grad_norm": 0.0014953704085201025, - "learning_rate": 0.0001999966395276615, - "loss": 46.0, - "step": 34141 - }, - { - "epoch": 2.6103943268918326, - "grad_norm": 0.0013079511700198054, - "learning_rate": 0.00019999663933074354, - "loss": 46.0, - "step": 34142 - }, - { - "epoch": 2.6104707838752224, - "grad_norm": 0.0025454566348344088, - "learning_rate": 0.00019999663913381984, - "loss": 46.0, - "step": 34143 - }, - { - "epoch": 2.6105472408586117, - "grad_norm": 0.003493966767564416, - "learning_rate": 0.0001999966389368904, - "loss": 46.0, - "step": 34144 - }, - { - "epoch": 2.6106236978420014, - "grad_norm": 0.007100183051079512, - "learning_rate": 0.0001999966387399552, - "loss": 46.0, - "step": 34145 - }, - { - "epoch": 2.610700154825391, - "grad_norm": 0.000790994381532073, - "learning_rate": 0.00019999663854301417, - "loss": 46.0, - "step": 34146 - }, - { - "epoch": 2.610776611808781, - "grad_norm": 0.0023144404403865337, - "learning_rate": 0.0001999966383460674, - "loss": 46.0, - "step": 34147 - }, - { - "epoch": 2.6108530687921707, - "grad_norm": 0.0013884566724300385, - "learning_rate": 0.00019999663814911488, - "loss": 46.0, - "step": 34148 - }, - { - "epoch": 2.6109295257755605, - "grad_norm": 0.001954339211806655, - "learning_rate": 0.00019999663795215657, - "loss": 46.0, - "step": 34149 - }, - { - "epoch": 2.6110059827589502, - "grad_norm": 0.0010637211380526423, - "learning_rate": 0.00019999663775519252, - "loss": 46.0, - "step": 34150 - }, - { - "epoch": 2.61108243974234, - "grad_norm": 0.0017372184665873647, - "learning_rate": 0.00019999663755822263, - "loss": 46.0, - "step": 34151 - }, - { - "epoch": 2.6111588967257298, - "grad_norm": 0.0013366000493988395, - "learning_rate": 0.00019999663736124706, - "loss": 46.0, - "step": 34152 - }, - { - "epoch": 2.6112353537091195, - "grad_norm": 0.0007023279322311282, - "learning_rate": 0.00019999663716426566, - "loss": 46.0, - "step": 34153 - }, - { - "epoch": 2.6113118106925093, - "grad_norm": 0.004130127374082804, - "learning_rate": 0.0001999966369672785, - "loss": 46.0, - "step": 34154 - }, - { - "epoch": 2.611388267675899, - "grad_norm": 0.000591421325225383, - "learning_rate": 0.0001999966367702856, - "loss": 46.0, - "step": 34155 - }, - { - "epoch": 2.611464724659289, - "grad_norm": 0.0014904154231771827, - "learning_rate": 0.00019999663657328692, - "loss": 46.0, - "step": 34156 - }, - { - "epoch": 2.611541181642678, - "grad_norm": 0.0037304868455976248, - "learning_rate": 0.00019999663637628245, - "loss": 46.0, - "step": 34157 - }, - { - "epoch": 2.611617638626068, - "grad_norm": 0.003107240190729499, - "learning_rate": 0.0001999966361792722, - "loss": 46.0, - "step": 34158 - }, - { - "epoch": 2.6116940956094576, - "grad_norm": 0.00446449825540185, - "learning_rate": 0.00019999663598225623, - "loss": 46.0, - "step": 34159 - }, - { - "epoch": 2.6117705525928474, - "grad_norm": 0.0037031034007668495, - "learning_rate": 0.00019999663578523446, - "loss": 46.0, - "step": 34160 - }, - { - "epoch": 2.611847009576237, - "grad_norm": 0.0011913626221939921, - "learning_rate": 0.00019999663558820693, - "loss": 46.0, - "step": 34161 - }, - { - "epoch": 2.611923466559627, - "grad_norm": 0.0018605723744258285, - "learning_rate": 0.00019999663539117362, - "loss": 46.0, - "step": 34162 - }, - { - "epoch": 2.6119999235430167, - "grad_norm": 0.00363340531475842, - "learning_rate": 0.00019999663519413457, - "loss": 46.0, - "step": 34163 - }, - { - "epoch": 2.6120763805264064, - "grad_norm": 0.0017527599120512605, - "learning_rate": 0.0001999966349970897, - "loss": 46.0, - "step": 34164 - }, - { - "epoch": 2.612152837509796, - "grad_norm": 0.00210291869007051, - "learning_rate": 0.0001999966348000391, - "loss": 46.0, - "step": 34165 - }, - { - "epoch": 2.6122292944931855, - "grad_norm": 0.002765426877886057, - "learning_rate": 0.0001999966346029827, - "loss": 46.0, - "step": 34166 - }, - { - "epoch": 2.6123057514765753, - "grad_norm": 0.001917311572469771, - "learning_rate": 0.00019999663440592057, - "loss": 46.0, - "step": 34167 - }, - { - "epoch": 2.612382208459965, - "grad_norm": 0.0011326767271384597, - "learning_rate": 0.00019999663420885264, - "loss": 46.0, - "step": 34168 - }, - { - "epoch": 2.612458665443355, - "grad_norm": 0.0016144639812409878, - "learning_rate": 0.00019999663401177895, - "loss": 46.0, - "step": 34169 - }, - { - "epoch": 2.6125351224267446, - "grad_norm": 0.0007132149185054004, - "learning_rate": 0.0001999966338146995, - "loss": 46.0, - "step": 34170 - }, - { - "epoch": 2.6126115794101343, - "grad_norm": 0.0034199159126728773, - "learning_rate": 0.00019999663361761427, - "loss": 46.0, - "step": 34171 - }, - { - "epoch": 2.612688036393524, - "grad_norm": 0.00129833840765059, - "learning_rate": 0.00019999663342052325, - "loss": 46.0, - "step": 34172 - }, - { - "epoch": 2.612764493376914, - "grad_norm": 0.0021075336262583733, - "learning_rate": 0.00019999663322342652, - "loss": 46.0, - "step": 34173 - }, - { - "epoch": 2.6128409503603036, - "grad_norm": 0.004253344144672155, - "learning_rate": 0.00019999663302632396, - "loss": 46.0, - "step": 34174 - }, - { - "epoch": 2.6129174073436934, - "grad_norm": 0.0020897623617202044, - "learning_rate": 0.00019999663282921568, - "loss": 46.0, - "step": 34175 - }, - { - "epoch": 2.612993864327083, - "grad_norm": 0.0017245084745809436, - "learning_rate": 0.00019999663263210157, - "loss": 46.0, - "step": 34176 - }, - { - "epoch": 2.613070321310473, - "grad_norm": 0.0014214001130312681, - "learning_rate": 0.00019999663243498175, - "loss": 46.0, - "step": 34177 - }, - { - "epoch": 2.6131467782938627, - "grad_norm": 0.001643747091293335, - "learning_rate": 0.00019999663223785614, - "loss": 46.0, - "step": 34178 - }, - { - "epoch": 2.613223235277252, - "grad_norm": 0.0024216067977249622, - "learning_rate": 0.00019999663204072477, - "loss": 46.0, - "step": 34179 - }, - { - "epoch": 2.6132996922606417, - "grad_norm": 0.0014718774473294616, - "learning_rate": 0.00019999663184358762, - "loss": 46.0, - "step": 34180 - }, - { - "epoch": 2.6133761492440315, - "grad_norm": 0.0007778847357258201, - "learning_rate": 0.00019999663164644468, - "loss": 46.0, - "step": 34181 - }, - { - "epoch": 2.6134526062274213, - "grad_norm": 0.0027990483213216066, - "learning_rate": 0.00019999663144929598, - "loss": 46.0, - "step": 34182 - }, - { - "epoch": 2.613529063210811, - "grad_norm": 0.0014099058462306857, - "learning_rate": 0.00019999663125214152, - "loss": 46.0, - "step": 34183 - }, - { - "epoch": 2.613605520194201, - "grad_norm": 0.0008423132821917534, - "learning_rate": 0.00019999663105498133, - "loss": 46.0, - "step": 34184 - }, - { - "epoch": 2.6136819771775905, - "grad_norm": 0.0017143214354291558, - "learning_rate": 0.00019999663085781531, - "loss": 46.0, - "step": 34185 - }, - { - "epoch": 2.6137584341609803, - "grad_norm": 0.001519199926406145, - "learning_rate": 0.00019999663066064355, - "loss": 46.0, - "step": 34186 - }, - { - "epoch": 2.61383489114437, - "grad_norm": 0.00189464190043509, - "learning_rate": 0.000199996630463466, - "loss": 46.0, - "step": 34187 - }, - { - "epoch": 2.6139113481277594, - "grad_norm": 0.0021376097574830055, - "learning_rate": 0.00019999663026628271, - "loss": 46.0, - "step": 34188 - }, - { - "epoch": 2.613987805111149, - "grad_norm": 0.0017821603687480092, - "learning_rate": 0.00019999663006909363, - "loss": 46.0, - "step": 34189 - }, - { - "epoch": 2.614064262094539, - "grad_norm": 0.005011957138776779, - "learning_rate": 0.0001999966298718988, - "loss": 46.0, - "step": 34190 - }, - { - "epoch": 2.6141407190779287, - "grad_norm": 0.0014494098722934723, - "learning_rate": 0.00019999662967469818, - "loss": 46.0, - "step": 34191 - }, - { - "epoch": 2.6142171760613184, - "grad_norm": 0.0032881658989936113, - "learning_rate": 0.0001999966294774918, - "loss": 46.0, - "step": 34192 - }, - { - "epoch": 2.614293633044708, - "grad_norm": 0.0013622696278616786, - "learning_rate": 0.00019999662928027963, - "loss": 46.0, - "step": 34193 - }, - { - "epoch": 2.614370090028098, - "grad_norm": 0.0026153349317610264, - "learning_rate": 0.0001999966290830617, - "loss": 46.0, - "step": 34194 - }, - { - "epoch": 2.6144465470114877, - "grad_norm": 0.001099519431591034, - "learning_rate": 0.00019999662888583805, - "loss": 46.0, - "step": 34195 - }, - { - "epoch": 2.6145230039948775, - "grad_norm": 0.002636526944115758, - "learning_rate": 0.00019999662868860858, - "loss": 46.0, - "step": 34196 - }, - { - "epoch": 2.6145994609782672, - "grad_norm": 0.0010001073824241757, - "learning_rate": 0.00019999662849137334, - "loss": 46.0, - "step": 34197 - }, - { - "epoch": 2.614675917961657, - "grad_norm": 0.0005777434562332928, - "learning_rate": 0.00019999662829413235, - "loss": 46.0, - "step": 34198 - }, - { - "epoch": 2.6147523749450468, - "grad_norm": 0.001196935772895813, - "learning_rate": 0.0001999966280968856, - "loss": 46.0, - "step": 34199 - }, - { - "epoch": 2.6148288319284365, - "grad_norm": 0.0016990507720038295, - "learning_rate": 0.00019999662789963306, - "loss": 46.0, - "step": 34200 - }, - { - "epoch": 2.614905288911826, - "grad_norm": 0.00142157101072371, - "learning_rate": 0.00019999662770237473, - "loss": 46.0, - "step": 34201 - }, - { - "epoch": 2.6149817458952156, - "grad_norm": 0.0005694128340110183, - "learning_rate": 0.00019999662750511065, - "loss": 46.0, - "step": 34202 - }, - { - "epoch": 2.6150582028786054, - "grad_norm": 0.0015122366603463888, - "learning_rate": 0.00019999662730784082, - "loss": 46.0, - "step": 34203 - }, - { - "epoch": 2.615134659861995, - "grad_norm": 0.0010198099771514535, - "learning_rate": 0.0001999966271105652, - "loss": 46.0, - "step": 34204 - }, - { - "epoch": 2.615211116845385, - "grad_norm": 0.0018776233773678541, - "learning_rate": 0.00019999662691328382, - "loss": 46.0, - "step": 34205 - }, - { - "epoch": 2.6152875738287746, - "grad_norm": 0.0016221227124333382, - "learning_rate": 0.00019999662671599667, - "loss": 46.0, - "step": 34206 - }, - { - "epoch": 2.6153640308121644, - "grad_norm": 0.0007513632299378514, - "learning_rate": 0.00019999662651870375, - "loss": 46.0, - "step": 34207 - }, - { - "epoch": 2.615440487795554, - "grad_norm": 0.004027173854410648, - "learning_rate": 0.00019999662632140509, - "loss": 46.0, - "step": 34208 - }, - { - "epoch": 2.6155169447789435, - "grad_norm": 0.0006107151857577264, - "learning_rate": 0.0001999966261241006, - "loss": 46.0, - "step": 34209 - }, - { - "epoch": 2.6155934017623332, - "grad_norm": 0.0035889095161110163, - "learning_rate": 0.00019999662592679038, - "loss": 46.0, - "step": 34210 - }, - { - "epoch": 2.615669858745723, - "grad_norm": 0.0011578392004594207, - "learning_rate": 0.00019999662572947437, - "loss": 46.0, - "step": 34211 - }, - { - "epoch": 2.6157463157291128, - "grad_norm": 0.0013914982555434108, - "learning_rate": 0.0001999966255321526, - "loss": 46.0, - "step": 34212 - }, - { - "epoch": 2.6158227727125025, - "grad_norm": 0.0019806597847491503, - "learning_rate": 0.00019999662533482507, - "loss": 46.0, - "step": 34213 - }, - { - "epoch": 2.6158992296958923, - "grad_norm": 0.0034138422925025225, - "learning_rate": 0.00019999662513749177, - "loss": 46.0, - "step": 34214 - }, - { - "epoch": 2.615975686679282, - "grad_norm": 0.003941693343222141, - "learning_rate": 0.0001999966249401527, - "loss": 46.0, - "step": 34215 - }, - { - "epoch": 2.616052143662672, - "grad_norm": 0.002259183442220092, - "learning_rate": 0.00019999662474280783, - "loss": 46.0, - "step": 34216 - }, - { - "epoch": 2.6161286006460616, - "grad_norm": 0.0019805864430963993, - "learning_rate": 0.00019999662454545724, - "loss": 46.0, - "step": 34217 - }, - { - "epoch": 2.6162050576294513, - "grad_norm": 0.0004943533567711711, - "learning_rate": 0.00019999662434810084, - "loss": 46.0, - "step": 34218 - }, - { - "epoch": 2.616281514612841, - "grad_norm": 0.0006937652360647917, - "learning_rate": 0.0001999966241507387, - "loss": 46.0, - "step": 34219 - }, - { - "epoch": 2.616357971596231, - "grad_norm": 0.002335480647161603, - "learning_rate": 0.00019999662395337077, - "loss": 46.0, - "step": 34220 - }, - { - "epoch": 2.6164344285796206, - "grad_norm": 0.0006180853815749288, - "learning_rate": 0.0001999966237559971, - "loss": 46.0, - "step": 34221 - }, - { - "epoch": 2.6165108855630104, - "grad_norm": 0.0008049941970966756, - "learning_rate": 0.00019999662355861764, - "loss": 46.0, - "step": 34222 - }, - { - "epoch": 2.6165873425463997, - "grad_norm": 0.003091156482696533, - "learning_rate": 0.0001999966233612324, - "loss": 46.0, - "step": 34223 - }, - { - "epoch": 2.6166637995297894, - "grad_norm": 0.008240773342549801, - "learning_rate": 0.0001999966231638414, - "loss": 46.0, - "step": 34224 - }, - { - "epoch": 2.616740256513179, - "grad_norm": 0.0018525755731388927, - "learning_rate": 0.00019999662296644463, - "loss": 46.0, - "step": 34225 - }, - { - "epoch": 2.616816713496569, - "grad_norm": 0.0033004239667207003, - "learning_rate": 0.0001999966227690421, - "loss": 46.0, - "step": 34226 - }, - { - "epoch": 2.6168931704799587, - "grad_norm": 0.0016091905999928713, - "learning_rate": 0.0001999966225716338, - "loss": 46.0, - "step": 34227 - }, - { - "epoch": 2.6169696274633485, - "grad_norm": 0.001077565597370267, - "learning_rate": 0.00019999662237421973, - "loss": 46.0, - "step": 34228 - }, - { - "epoch": 2.6170460844467383, - "grad_norm": 0.0013370043598115444, - "learning_rate": 0.00019999662217679988, - "loss": 46.0, - "step": 34229 - }, - { - "epoch": 2.617122541430128, - "grad_norm": 0.007584643084555864, - "learning_rate": 0.00019999662197937426, - "loss": 46.0, - "step": 34230 - }, - { - "epoch": 2.6171989984135173, - "grad_norm": 0.0016999304061755538, - "learning_rate": 0.00019999662178194288, - "loss": 46.0, - "step": 34231 - }, - { - "epoch": 2.617275455396907, - "grad_norm": 0.0012719600927084684, - "learning_rate": 0.0001999966215845057, - "loss": 46.0, - "step": 34232 - }, - { - "epoch": 2.617351912380297, - "grad_norm": 0.0020562242716550827, - "learning_rate": 0.00019999662138706282, - "loss": 46.0, - "step": 34233 - }, - { - "epoch": 2.6174283693636866, - "grad_norm": 0.004436023533344269, - "learning_rate": 0.0001999966211896141, - "loss": 46.0, - "step": 34234 - }, - { - "epoch": 2.6175048263470764, - "grad_norm": 0.0015971716493368149, - "learning_rate": 0.00019999662099215966, - "loss": 46.0, - "step": 34235 - }, - { - "epoch": 2.617581283330466, - "grad_norm": 0.0008590783108957112, - "learning_rate": 0.00019999662079469942, - "loss": 46.0, - "step": 34236 - }, - { - "epoch": 2.617657740313856, - "grad_norm": 0.0024100837763398886, - "learning_rate": 0.0001999966205972334, - "loss": 46.0, - "step": 34237 - }, - { - "epoch": 2.6177341972972457, - "grad_norm": 0.001463544089347124, - "learning_rate": 0.00019999662039976165, - "loss": 46.0, - "step": 34238 - }, - { - "epoch": 2.6178106542806354, - "grad_norm": 0.0020168256014585495, - "learning_rate": 0.00019999662020228414, - "loss": 46.0, - "step": 34239 - }, - { - "epoch": 2.617887111264025, - "grad_norm": 0.0016613135812804103, - "learning_rate": 0.0001999966200048008, - "loss": 46.0, - "step": 34240 - }, - { - "epoch": 2.617963568247415, - "grad_norm": 0.001150017138570547, - "learning_rate": 0.00019999661980731173, - "loss": 46.0, - "step": 34241 - }, - { - "epoch": 2.6180400252308047, - "grad_norm": 0.0012975874124094844, - "learning_rate": 0.0001999966196098169, - "loss": 46.0, - "step": 34242 - }, - { - "epoch": 2.6181164822141945, - "grad_norm": 0.0014641644665971398, - "learning_rate": 0.00019999661941231628, - "loss": 46.0, - "step": 34243 - }, - { - "epoch": 2.6181929391975842, - "grad_norm": 0.0006540699396282434, - "learning_rate": 0.0001999966192148099, - "loss": 46.0, - "step": 34244 - }, - { - "epoch": 2.6182693961809735, - "grad_norm": 0.0019124718382954597, - "learning_rate": 0.00019999661901729773, - "loss": 46.0, - "step": 34245 - }, - { - "epoch": 2.6183458531643633, - "grad_norm": 0.002147477585822344, - "learning_rate": 0.00019999661881977982, - "loss": 46.0, - "step": 34246 - }, - { - "epoch": 2.618422310147753, - "grad_norm": 0.0020005377009510994, - "learning_rate": 0.00019999661862225612, - "loss": 46.0, - "step": 34247 - }, - { - "epoch": 2.618498767131143, - "grad_norm": 0.001586235361173749, - "learning_rate": 0.00019999661842472666, - "loss": 46.0, - "step": 34248 - }, - { - "epoch": 2.6185752241145326, - "grad_norm": 0.0025970041751861572, - "learning_rate": 0.00019999661822719142, - "loss": 46.0, - "step": 34249 - }, - { - "epoch": 2.6186516810979223, - "grad_norm": 0.00046401366125792265, - "learning_rate": 0.00019999661802965043, - "loss": 46.0, - "step": 34250 - }, - { - "epoch": 2.618728138081312, - "grad_norm": 0.0013988347491249442, - "learning_rate": 0.00019999661783210368, - "loss": 46.0, - "step": 34251 - }, - { - "epoch": 2.618804595064702, - "grad_norm": 0.0010600073728710413, - "learning_rate": 0.00019999661763455114, - "loss": 46.0, - "step": 34252 - }, - { - "epoch": 2.618881052048091, - "grad_norm": 0.003847547108307481, - "learning_rate": 0.00019999661743699284, - "loss": 46.0, - "step": 34253 - }, - { - "epoch": 2.618957509031481, - "grad_norm": 0.0008074132492765784, - "learning_rate": 0.00019999661723942876, - "loss": 46.0, - "step": 34254 - }, - { - "epoch": 2.6190339660148707, - "grad_norm": 0.0013632638147100806, - "learning_rate": 0.0001999966170418589, - "loss": 46.0, - "step": 34255 - }, - { - "epoch": 2.6191104229982605, - "grad_norm": 0.0018430332420393825, - "learning_rate": 0.00019999661684428328, - "loss": 46.0, - "step": 34256 - }, - { - "epoch": 2.6191868799816502, - "grad_norm": 0.0004163621924817562, - "learning_rate": 0.0001999966166467019, - "loss": 46.0, - "step": 34257 - }, - { - "epoch": 2.61926333696504, - "grad_norm": 0.0022576674818992615, - "learning_rate": 0.00019999661644911476, - "loss": 46.0, - "step": 34258 - }, - { - "epoch": 2.6193397939484298, - "grad_norm": 0.002548081800341606, - "learning_rate": 0.00019999661625152185, - "loss": 46.0, - "step": 34259 - }, - { - "epoch": 2.6194162509318195, - "grad_norm": 0.0010557289933785796, - "learning_rate": 0.00019999661605392315, - "loss": 46.0, - "step": 34260 - }, - { - "epoch": 2.6194927079152093, - "grad_norm": 0.0024785371497273445, - "learning_rate": 0.0001999966158563187, - "loss": 46.0, - "step": 34261 - }, - { - "epoch": 2.619569164898599, - "grad_norm": 0.0010202809935435653, - "learning_rate": 0.00019999661565870845, - "loss": 46.0, - "step": 34262 - }, - { - "epoch": 2.619645621881989, - "grad_norm": 0.004763559903949499, - "learning_rate": 0.00019999661546109246, - "loss": 46.0, - "step": 34263 - }, - { - "epoch": 2.6197220788653786, - "grad_norm": 0.0013072541914880276, - "learning_rate": 0.00019999661526347068, - "loss": 46.0, - "step": 34264 - }, - { - "epoch": 2.6197985358487683, - "grad_norm": 0.0020412427838891745, - "learning_rate": 0.00019999661506584312, - "loss": 46.0, - "step": 34265 - }, - { - "epoch": 2.619874992832158, - "grad_norm": 0.0008841636590659618, - "learning_rate": 0.00019999661486820984, - "loss": 46.0, - "step": 34266 - }, - { - "epoch": 2.6199514498155474, - "grad_norm": 0.0012474969262257218, - "learning_rate": 0.00019999661467057076, - "loss": 46.0, - "step": 34267 - }, - { - "epoch": 2.620027906798937, - "grad_norm": 0.002453595632687211, - "learning_rate": 0.0001999966144729259, - "loss": 46.0, - "step": 34268 - }, - { - "epoch": 2.620104363782327, - "grad_norm": 0.0005820005317218602, - "learning_rate": 0.00019999661427527529, - "loss": 46.0, - "step": 34269 - }, - { - "epoch": 2.6201808207657167, - "grad_norm": 0.0011149175697937608, - "learning_rate": 0.00019999661407761891, - "loss": 46.0, - "step": 34270 - }, - { - "epoch": 2.6202572777491064, - "grad_norm": 0.0014274739660322666, - "learning_rate": 0.00019999661387995677, - "loss": 46.0, - "step": 34271 - }, - { - "epoch": 2.620333734732496, - "grad_norm": 0.00154223816934973, - "learning_rate": 0.00019999661368228882, - "loss": 46.0, - "step": 34272 - }, - { - "epoch": 2.620410191715886, - "grad_norm": 0.00814348366111517, - "learning_rate": 0.00019999661348461513, - "loss": 46.0, - "step": 34273 - }, - { - "epoch": 2.6204866486992757, - "grad_norm": 0.0024103494361042976, - "learning_rate": 0.0001999966132869357, - "loss": 46.0, - "step": 34274 - }, - { - "epoch": 2.620563105682665, - "grad_norm": 0.001020262949168682, - "learning_rate": 0.00019999661308925046, - "loss": 46.0, - "step": 34275 - }, - { - "epoch": 2.620639562666055, - "grad_norm": 0.0028688423335552216, - "learning_rate": 0.00019999661289155944, - "loss": 46.0, - "step": 34276 - }, - { - "epoch": 2.6207160196494446, - "grad_norm": 0.002358399098739028, - "learning_rate": 0.0001999966126938627, - "loss": 46.0, - "step": 34277 - }, - { - "epoch": 2.6207924766328343, - "grad_norm": 0.0029547843150794506, - "learning_rate": 0.00019999661249616016, - "loss": 46.0, - "step": 34278 - }, - { - "epoch": 2.620868933616224, - "grad_norm": 0.0013284212909638882, - "learning_rate": 0.00019999661229845182, - "loss": 46.0, - "step": 34279 - }, - { - "epoch": 2.620945390599614, - "grad_norm": 0.0023126585874706507, - "learning_rate": 0.00019999661210073777, - "loss": 46.0, - "step": 34280 - }, - { - "epoch": 2.6210218475830036, - "grad_norm": 0.0021834131330251694, - "learning_rate": 0.00019999661190301792, - "loss": 46.0, - "step": 34281 - }, - { - "epoch": 2.6210983045663934, - "grad_norm": 0.001145324669778347, - "learning_rate": 0.00019999661170529232, - "loss": 46.0, - "step": 34282 - }, - { - "epoch": 2.621174761549783, - "grad_norm": 0.0011937310919165611, - "learning_rate": 0.00019999661150756092, - "loss": 46.0, - "step": 34283 - }, - { - "epoch": 2.621251218533173, - "grad_norm": 0.0054331193678081036, - "learning_rate": 0.00019999661130982378, - "loss": 46.0, - "step": 34284 - }, - { - "epoch": 2.6213276755165626, - "grad_norm": 0.002767684403806925, - "learning_rate": 0.00019999661111208083, - "loss": 46.0, - "step": 34285 - }, - { - "epoch": 2.6214041324999524, - "grad_norm": 0.008863287046551704, - "learning_rate": 0.00019999661091433214, - "loss": 46.0, - "step": 34286 - }, - { - "epoch": 2.621480589483342, - "grad_norm": 0.001534686191007495, - "learning_rate": 0.0001999966107165777, - "loss": 46.0, - "step": 34287 - }, - { - "epoch": 2.6215570464667315, - "grad_norm": 0.000786025368142873, - "learning_rate": 0.00019999661051881747, - "loss": 46.0, - "step": 34288 - }, - { - "epoch": 2.6216335034501212, - "grad_norm": 0.0008636950515210629, - "learning_rate": 0.00019999661032105146, - "loss": 46.0, - "step": 34289 - }, - { - "epoch": 2.621709960433511, - "grad_norm": 0.0013416169676929712, - "learning_rate": 0.0001999966101232797, - "loss": 46.0, - "step": 34290 - }, - { - "epoch": 2.6217864174169008, - "grad_norm": 0.0021538599394261837, - "learning_rate": 0.00019999660992550217, - "loss": 46.0, - "step": 34291 - }, - { - "epoch": 2.6218628744002905, - "grad_norm": 0.001304124714806676, - "learning_rate": 0.00019999660972771884, - "loss": 46.0, - "step": 34292 - }, - { - "epoch": 2.6219393313836803, - "grad_norm": 0.0011879676021635532, - "learning_rate": 0.00019999660952992976, - "loss": 46.0, - "step": 34293 - }, - { - "epoch": 2.62201578836707, - "grad_norm": 0.0019534036982804537, - "learning_rate": 0.0001999966093321349, - "loss": 46.0, - "step": 34294 - }, - { - "epoch": 2.62209224535046, - "grad_norm": 0.003510749898850918, - "learning_rate": 0.00019999660913433428, - "loss": 46.0, - "step": 34295 - }, - { - "epoch": 2.6221687023338496, - "grad_norm": 0.005587299820035696, - "learning_rate": 0.0001999966089365279, - "loss": 46.0, - "step": 34296 - }, - { - "epoch": 2.622245159317239, - "grad_norm": 0.000609685608651489, - "learning_rate": 0.00019999660873871577, - "loss": 46.0, - "step": 34297 - }, - { - "epoch": 2.6223216163006287, - "grad_norm": 0.002166262362152338, - "learning_rate": 0.00019999660854089782, - "loss": 46.0, - "step": 34298 - }, - { - "epoch": 2.6223980732840184, - "grad_norm": 0.0036390682216733694, - "learning_rate": 0.00019999660834307416, - "loss": 46.0, - "step": 34299 - }, - { - "epoch": 2.622474530267408, - "grad_norm": 0.0044851237908005714, - "learning_rate": 0.00019999660814524467, - "loss": 46.0, - "step": 34300 - }, - { - "epoch": 2.622550987250798, - "grad_norm": 0.0018594893626868725, - "learning_rate": 0.00019999660794740946, - "loss": 46.0, - "step": 34301 - }, - { - "epoch": 2.6226274442341877, - "grad_norm": 0.0031945365481078625, - "learning_rate": 0.00019999660774956845, - "loss": 46.0, - "step": 34302 - }, - { - "epoch": 2.6227039012175775, - "grad_norm": 0.001575936796143651, - "learning_rate": 0.00019999660755172166, - "loss": 46.0, - "step": 34303 - }, - { - "epoch": 2.622780358200967, - "grad_norm": 0.0022734710946679115, - "learning_rate": 0.00019999660735386913, - "loss": 46.0, - "step": 34304 - }, - { - "epoch": 2.622856815184357, - "grad_norm": 0.0025252013001590967, - "learning_rate": 0.00019999660715601083, - "loss": 46.0, - "step": 34305 - }, - { - "epoch": 2.6229332721677467, - "grad_norm": 0.001038175425492227, - "learning_rate": 0.00019999660695814675, - "loss": 46.0, - "step": 34306 - }, - { - "epoch": 2.6230097291511365, - "grad_norm": 0.0008089214097708464, - "learning_rate": 0.00019999660676027692, - "loss": 46.0, - "step": 34307 - }, - { - "epoch": 2.6230861861345263, - "grad_norm": 0.004474613815546036, - "learning_rate": 0.0001999966065624013, - "loss": 46.0, - "step": 34308 - }, - { - "epoch": 2.623162643117916, - "grad_norm": 0.002928811125457287, - "learning_rate": 0.0001999966063645199, - "loss": 46.0, - "step": 34309 - }, - { - "epoch": 2.6232391001013053, - "grad_norm": 0.0070663499645888805, - "learning_rate": 0.00019999660616663276, - "loss": 46.0, - "step": 34310 - }, - { - "epoch": 2.623315557084695, - "grad_norm": 0.0021506063640117645, - "learning_rate": 0.00019999660596873984, - "loss": 46.0, - "step": 34311 - }, - { - "epoch": 2.623392014068085, - "grad_norm": 0.0022853894624859095, - "learning_rate": 0.00019999660577084115, - "loss": 46.0, - "step": 34312 - }, - { - "epoch": 2.6234684710514746, - "grad_norm": 0.000793284852989018, - "learning_rate": 0.00019999660557293666, - "loss": 46.0, - "step": 34313 - }, - { - "epoch": 2.6235449280348644, - "grad_norm": 0.0008688968373462558, - "learning_rate": 0.00019999660537502645, - "loss": 46.0, - "step": 34314 - }, - { - "epoch": 2.623621385018254, - "grad_norm": 0.001831280766054988, - "learning_rate": 0.00019999660517711047, - "loss": 46.0, - "step": 34315 - }, - { - "epoch": 2.623697842001644, - "grad_norm": 0.0012517948634922504, - "learning_rate": 0.00019999660497918868, - "loss": 46.0, - "step": 34316 - }, - { - "epoch": 2.6237742989850337, - "grad_norm": 0.0013124850811436772, - "learning_rate": 0.00019999660478126115, - "loss": 46.0, - "step": 34317 - }, - { - "epoch": 2.6238507559684234, - "grad_norm": 0.0013771505327895284, - "learning_rate": 0.00019999660458332782, - "loss": 46.0, - "step": 34318 - }, - { - "epoch": 2.6239272129518127, - "grad_norm": 0.004698462318629026, - "learning_rate": 0.00019999660438538874, - "loss": 46.0, - "step": 34319 - }, - { - "epoch": 2.6240036699352025, - "grad_norm": 0.0013371316017583013, - "learning_rate": 0.0001999966041874439, - "loss": 46.0, - "step": 34320 - }, - { - "epoch": 2.6240801269185923, - "grad_norm": 0.002094406634569168, - "learning_rate": 0.0001999966039894933, - "loss": 46.0, - "step": 34321 - }, - { - "epoch": 2.624156583901982, - "grad_norm": 0.0022246220614761114, - "learning_rate": 0.0001999966037915369, - "loss": 46.0, - "step": 34322 - }, - { - "epoch": 2.624233040885372, - "grad_norm": 0.0017451178282499313, - "learning_rate": 0.00019999660359357475, - "loss": 46.0, - "step": 34323 - }, - { - "epoch": 2.6243094978687616, - "grad_norm": 0.001889218227006495, - "learning_rate": 0.00019999660339560684, - "loss": 46.0, - "step": 34324 - }, - { - "epoch": 2.6243859548521513, - "grad_norm": 0.0023120564874261618, - "learning_rate": 0.00019999660319763315, - "loss": 46.0, - "step": 34325 - }, - { - "epoch": 2.624462411835541, - "grad_norm": 0.0010360432788729668, - "learning_rate": 0.00019999660299965366, - "loss": 46.0, - "step": 34326 - }, - { - "epoch": 2.624538868818931, - "grad_norm": 0.0015052318340167403, - "learning_rate": 0.00019999660280166842, - "loss": 46.0, - "step": 34327 - }, - { - "epoch": 2.6246153258023206, - "grad_norm": 0.0027069970965385437, - "learning_rate": 0.00019999660260367743, - "loss": 46.0, - "step": 34328 - }, - { - "epoch": 2.6246917827857104, - "grad_norm": 0.003471989184617996, - "learning_rate": 0.00019999660240568065, - "loss": 46.0, - "step": 34329 - }, - { - "epoch": 2.6247682397691, - "grad_norm": 0.002597326645627618, - "learning_rate": 0.00019999660220767812, - "loss": 46.0, - "step": 34330 - }, - { - "epoch": 2.62484469675249, - "grad_norm": 0.0014209537766873837, - "learning_rate": 0.00019999660200966984, - "loss": 46.0, - "step": 34331 - }, - { - "epoch": 2.624921153735879, - "grad_norm": 0.0005744562949985266, - "learning_rate": 0.00019999660181165574, - "loss": 46.0, - "step": 34332 - }, - { - "epoch": 2.624997610719269, - "grad_norm": 0.0027566375210881233, - "learning_rate": 0.0001999966016136359, - "loss": 46.0, - "step": 34333 - }, - { - "epoch": 2.6250740677026587, - "grad_norm": 0.002971071982756257, - "learning_rate": 0.00019999660141561027, - "loss": 46.0, - "step": 34334 - }, - { - "epoch": 2.6251505246860485, - "grad_norm": 0.0012065370101481676, - "learning_rate": 0.0001999966012175789, - "loss": 46.0, - "step": 34335 - }, - { - "epoch": 2.6252269816694382, - "grad_norm": 0.00252229324541986, - "learning_rate": 0.00019999660101954175, - "loss": 46.0, - "step": 34336 - }, - { - "epoch": 2.625303438652828, - "grad_norm": 0.0016391382087022066, - "learning_rate": 0.0001999966008214988, - "loss": 46.0, - "step": 34337 - }, - { - "epoch": 2.6253798956362178, - "grad_norm": 0.008321761153638363, - "learning_rate": 0.00019999660062345012, - "loss": 46.0, - "step": 34338 - }, - { - "epoch": 2.6254563526196075, - "grad_norm": 0.0021071250084787607, - "learning_rate": 0.00019999660042539566, - "loss": 46.0, - "step": 34339 - }, - { - "epoch": 2.625532809602997, - "grad_norm": 0.00257648597471416, - "learning_rate": 0.00019999660022733542, - "loss": 46.0, - "step": 34340 - }, - { - "epoch": 2.6256092665863866, - "grad_norm": 0.005129849538207054, - "learning_rate": 0.0001999966000292694, - "loss": 46.0, - "step": 34341 - }, - { - "epoch": 2.6256857235697764, - "grad_norm": 0.004966387525200844, - "learning_rate": 0.00019999659983119765, - "loss": 46.0, - "step": 34342 - }, - { - "epoch": 2.625762180553166, - "grad_norm": 0.0020260175224393606, - "learning_rate": 0.00019999659963312012, - "loss": 46.0, - "step": 34343 - }, - { - "epoch": 2.625838637536556, - "grad_norm": 0.0017710226820781827, - "learning_rate": 0.0001999965994350368, - "loss": 46.0, - "step": 34344 - }, - { - "epoch": 2.6259150945199456, - "grad_norm": 0.0019671388436108828, - "learning_rate": 0.00019999659923694774, - "loss": 46.0, - "step": 34345 - }, - { - "epoch": 2.6259915515033354, - "grad_norm": 0.0016586111159995198, - "learning_rate": 0.00019999659903885287, - "loss": 46.0, - "step": 34346 - }, - { - "epoch": 2.626068008486725, - "grad_norm": 0.0020401792135089636, - "learning_rate": 0.00019999659884075227, - "loss": 46.0, - "step": 34347 - }, - { - "epoch": 2.626144465470115, - "grad_norm": 0.0009087237995117903, - "learning_rate": 0.00019999659864264587, - "loss": 46.0, - "step": 34348 - }, - { - "epoch": 2.6262209224535047, - "grad_norm": 0.001577352057211101, - "learning_rate": 0.0001999965984445337, - "loss": 46.0, - "step": 34349 - }, - { - "epoch": 2.6262973794368945, - "grad_norm": 0.002824016846716404, - "learning_rate": 0.00019999659824641579, - "loss": 46.0, - "step": 34350 - }, - { - "epoch": 2.626373836420284, - "grad_norm": 0.001687657437287271, - "learning_rate": 0.0001999965980482921, - "loss": 46.0, - "step": 34351 - }, - { - "epoch": 2.626450293403674, - "grad_norm": 0.0012981118634343147, - "learning_rate": 0.00019999659785016266, - "loss": 46.0, - "step": 34352 - }, - { - "epoch": 2.6265267503870637, - "grad_norm": 0.0025908120442181826, - "learning_rate": 0.0001999965976520274, - "loss": 46.0, - "step": 34353 - }, - { - "epoch": 2.626603207370453, - "grad_norm": 0.001559897675178945, - "learning_rate": 0.0001999965974538864, - "loss": 46.0, - "step": 34354 - }, - { - "epoch": 2.626679664353843, - "grad_norm": 0.0015025973552837968, - "learning_rate": 0.00019999659725573963, - "loss": 46.0, - "step": 34355 - }, - { - "epoch": 2.6267561213372326, - "grad_norm": 0.0030372445471584797, - "learning_rate": 0.0001999965970575871, - "loss": 46.0, - "step": 34356 - }, - { - "epoch": 2.6268325783206223, - "grad_norm": 0.0015179236652329564, - "learning_rate": 0.00019999659685942877, - "loss": 46.0, - "step": 34357 - }, - { - "epoch": 2.626909035304012, - "grad_norm": 0.0029208441264927387, - "learning_rate": 0.0001999965966612647, - "loss": 46.0, - "step": 34358 - }, - { - "epoch": 2.626985492287402, - "grad_norm": 0.001754774246364832, - "learning_rate": 0.00019999659646309484, - "loss": 46.0, - "step": 34359 - }, - { - "epoch": 2.6270619492707916, - "grad_norm": 0.0019002397311851382, - "learning_rate": 0.00019999659626491925, - "loss": 46.0, - "step": 34360 - }, - { - "epoch": 2.6271384062541814, - "grad_norm": 0.0021670605055987835, - "learning_rate": 0.00019999659606673785, - "loss": 46.0, - "step": 34361 - }, - { - "epoch": 2.6272148632375707, - "grad_norm": 0.0015996188158169389, - "learning_rate": 0.0001999965958685507, - "loss": 46.0, - "step": 34362 - }, - { - "epoch": 2.6272913202209605, - "grad_norm": 0.0019499182235449553, - "learning_rate": 0.00019999659567035777, - "loss": 46.0, - "step": 34363 - }, - { - "epoch": 2.62736777720435, - "grad_norm": 0.002738367998972535, - "learning_rate": 0.00019999659547215908, - "loss": 46.0, - "step": 34364 - }, - { - "epoch": 2.62744423418774, - "grad_norm": 0.0008483403944410384, - "learning_rate": 0.00019999659527395461, - "loss": 46.0, - "step": 34365 - }, - { - "epoch": 2.6275206911711297, - "grad_norm": 0.0020275218412280083, - "learning_rate": 0.00019999659507574438, - "loss": 46.0, - "step": 34366 - }, - { - "epoch": 2.6275971481545195, - "grad_norm": 0.0024220733903348446, - "learning_rate": 0.00019999659487752837, - "loss": 46.0, - "step": 34367 - }, - { - "epoch": 2.6276736051379093, - "grad_norm": 0.0010920250788331032, - "learning_rate": 0.0001999965946793066, - "loss": 46.0, - "step": 34368 - }, - { - "epoch": 2.627750062121299, - "grad_norm": 0.0010820974130183458, - "learning_rate": 0.00019999659448107906, - "loss": 46.0, - "step": 34369 - }, - { - "epoch": 2.627826519104689, - "grad_norm": 0.0027158481534570456, - "learning_rate": 0.00019999659428284575, - "loss": 46.0, - "step": 34370 - }, - { - "epoch": 2.6279029760880785, - "grad_norm": 0.002140153432264924, - "learning_rate": 0.00019999659408460668, - "loss": 46.0, - "step": 34371 - }, - { - "epoch": 2.6279794330714683, - "grad_norm": 0.0016918446635827422, - "learning_rate": 0.00019999659388636183, - "loss": 46.0, - "step": 34372 - }, - { - "epoch": 2.628055890054858, - "grad_norm": 0.0025619249790906906, - "learning_rate": 0.00019999659368811118, - "loss": 46.0, - "step": 34373 - }, - { - "epoch": 2.628132347038248, - "grad_norm": 0.0029054523911327124, - "learning_rate": 0.0001999965934898548, - "loss": 46.0, - "step": 34374 - }, - { - "epoch": 2.6282088040216376, - "grad_norm": 0.001972772879526019, - "learning_rate": 0.00019999659329159267, - "loss": 46.0, - "step": 34375 - }, - { - "epoch": 2.628285261005027, - "grad_norm": 0.0021854841616004705, - "learning_rate": 0.00019999659309332475, - "loss": 46.0, - "step": 34376 - }, - { - "epoch": 2.6283617179884167, - "grad_norm": 0.000992395682260394, - "learning_rate": 0.00019999659289505104, - "loss": 46.0, - "step": 34377 - }, - { - "epoch": 2.6284381749718064, - "grad_norm": 0.0008169599459506571, - "learning_rate": 0.00019999659269677157, - "loss": 46.0, - "step": 34378 - }, - { - "epoch": 2.628514631955196, - "grad_norm": 0.0013462668284773827, - "learning_rate": 0.00019999659249848634, - "loss": 46.0, - "step": 34379 - }, - { - "epoch": 2.628591088938586, - "grad_norm": 0.0016212827758863568, - "learning_rate": 0.00019999659230019533, - "loss": 46.0, - "step": 34380 - }, - { - "epoch": 2.6286675459219757, - "grad_norm": 0.0017279828898608685, - "learning_rate": 0.00019999659210189857, - "loss": 46.0, - "step": 34381 - }, - { - "epoch": 2.6287440029053655, - "grad_norm": 0.0017663320759311318, - "learning_rate": 0.00019999659190359604, - "loss": 46.0, - "step": 34382 - }, - { - "epoch": 2.6288204598887552, - "grad_norm": 0.0015197661705315113, - "learning_rate": 0.00019999659170528774, - "loss": 46.0, - "step": 34383 - }, - { - "epoch": 2.6288969168721446, - "grad_norm": 0.00364927900955081, - "learning_rate": 0.00019999659150697367, - "loss": 46.0, - "step": 34384 - }, - { - "epoch": 2.6289733738555343, - "grad_norm": 0.004130763933062553, - "learning_rate": 0.0001999965913086538, - "loss": 46.0, - "step": 34385 - }, - { - "epoch": 2.629049830838924, - "grad_norm": 0.0013872553827241063, - "learning_rate": 0.00019999659111032817, - "loss": 46.0, - "step": 34386 - }, - { - "epoch": 2.629126287822314, - "grad_norm": 0.0010498985648155212, - "learning_rate": 0.00019999659091199677, - "loss": 46.0, - "step": 34387 - }, - { - "epoch": 2.6292027448057036, - "grad_norm": 0.0013104953104630113, - "learning_rate": 0.0001999965907136596, - "loss": 46.0, - "step": 34388 - }, - { - "epoch": 2.6292792017890934, - "grad_norm": 0.0033699474297463894, - "learning_rate": 0.0001999965905153167, - "loss": 46.0, - "step": 34389 - }, - { - "epoch": 2.629355658772483, - "grad_norm": 0.007030009757727385, - "learning_rate": 0.000199996590316968, - "loss": 46.0, - "step": 34390 - }, - { - "epoch": 2.629432115755873, - "grad_norm": 0.002016279147937894, - "learning_rate": 0.00019999659011861356, - "loss": 46.0, - "step": 34391 - }, - { - "epoch": 2.6295085727392626, - "grad_norm": 0.0018494086107239127, - "learning_rate": 0.0001999965899202533, - "loss": 46.0, - "step": 34392 - }, - { - "epoch": 2.6295850297226524, - "grad_norm": 0.0013845224166288972, - "learning_rate": 0.00019999658972188732, - "loss": 46.0, - "step": 34393 - }, - { - "epoch": 2.629661486706042, - "grad_norm": 0.0006367545574903488, - "learning_rate": 0.00019999658952351554, - "loss": 46.0, - "step": 34394 - }, - { - "epoch": 2.629737943689432, - "grad_norm": 0.0026914917398244143, - "learning_rate": 0.000199996589325138, - "loss": 46.0, - "step": 34395 - }, - { - "epoch": 2.6298144006728217, - "grad_norm": 0.000510679034050554, - "learning_rate": 0.0001999965891267547, - "loss": 46.0, - "step": 34396 - }, - { - "epoch": 2.6298908576562114, - "grad_norm": 0.002147011226043105, - "learning_rate": 0.0001999965889283656, - "loss": 46.0, - "step": 34397 - }, - { - "epoch": 2.6299673146396008, - "grad_norm": 0.0006715363706462085, - "learning_rate": 0.00019999658872997075, - "loss": 46.0, - "step": 34398 - }, - { - "epoch": 2.6300437716229905, - "grad_norm": 0.0020176381804049015, - "learning_rate": 0.00019999658853157013, - "loss": 46.0, - "step": 34399 - }, - { - "epoch": 2.6301202286063803, - "grad_norm": 0.0038959882222115993, - "learning_rate": 0.00019999658833316374, - "loss": 46.0, - "step": 34400 - }, - { - "epoch": 2.63019668558977, - "grad_norm": 0.0014475034549832344, - "learning_rate": 0.0001999965881347516, - "loss": 46.0, - "step": 34401 - }, - { - "epoch": 2.63027314257316, - "grad_norm": 0.0027651104610413313, - "learning_rate": 0.00019999658793633368, - "loss": 46.0, - "step": 34402 - }, - { - "epoch": 2.6303495995565496, - "grad_norm": 0.0041971090249717236, - "learning_rate": 0.00019999658773790996, - "loss": 46.0, - "step": 34403 - }, - { - "epoch": 2.6304260565399393, - "grad_norm": 0.0018077807035297155, - "learning_rate": 0.00019999658753948053, - "loss": 46.0, - "step": 34404 - }, - { - "epoch": 2.630502513523329, - "grad_norm": 0.00109379505738616, - "learning_rate": 0.00019999658734104527, - "loss": 46.0, - "step": 34405 - }, - { - "epoch": 2.6305789705067184, - "grad_norm": 0.003543097758665681, - "learning_rate": 0.00019999658714260426, - "loss": 46.0, - "step": 34406 - }, - { - "epoch": 2.630655427490108, - "grad_norm": 0.001951010199263692, - "learning_rate": 0.00019999658694415748, - "loss": 46.0, - "step": 34407 - }, - { - "epoch": 2.630731884473498, - "grad_norm": 0.0016046827659010887, - "learning_rate": 0.00019999658674570495, - "loss": 46.0, - "step": 34408 - }, - { - "epoch": 2.6308083414568877, - "grad_norm": 0.0026712275575846434, - "learning_rate": 0.00019999658654724665, - "loss": 46.0, - "step": 34409 - }, - { - "epoch": 2.6308847984402775, - "grad_norm": 0.0020372001454234123, - "learning_rate": 0.00019999658634878257, - "loss": 46.0, - "step": 34410 - }, - { - "epoch": 2.630961255423667, - "grad_norm": 0.007009337190538645, - "learning_rate": 0.00019999658615031272, - "loss": 46.0, - "step": 34411 - }, - { - "epoch": 2.631037712407057, - "grad_norm": 0.0014605749165639281, - "learning_rate": 0.00019999658595183707, - "loss": 46.0, - "step": 34412 - }, - { - "epoch": 2.6311141693904467, - "grad_norm": 0.0013487362302839756, - "learning_rate": 0.0001999965857533557, - "loss": 46.0, - "step": 34413 - }, - { - "epoch": 2.6311906263738365, - "grad_norm": 0.0034657728392630816, - "learning_rate": 0.00019999658555486856, - "loss": 46.0, - "step": 34414 - }, - { - "epoch": 2.6312670833572263, - "grad_norm": 0.004519772715866566, - "learning_rate": 0.00019999658535637562, - "loss": 46.0, - "step": 34415 - }, - { - "epoch": 2.631343540340616, - "grad_norm": 0.0028691054321825504, - "learning_rate": 0.00019999658515787693, - "loss": 46.0, - "step": 34416 - }, - { - "epoch": 2.6314199973240058, - "grad_norm": 0.004534099251031876, - "learning_rate": 0.00019999658495937245, - "loss": 46.0, - "step": 34417 - }, - { - "epoch": 2.6314964543073955, - "grad_norm": 0.0007312087691389024, - "learning_rate": 0.00019999658476086224, - "loss": 46.0, - "step": 34418 - }, - { - "epoch": 2.6315729112907853, - "grad_norm": 0.000681075151078403, - "learning_rate": 0.00019999658456234623, - "loss": 46.0, - "step": 34419 - }, - { - "epoch": 2.6316493682741746, - "grad_norm": 0.0007741820299997926, - "learning_rate": 0.00019999658436382445, - "loss": 46.0, - "step": 34420 - }, - { - "epoch": 2.6317258252575644, - "grad_norm": 0.0019792323000729084, - "learning_rate": 0.00019999658416529692, - "loss": 46.0, - "step": 34421 - }, - { - "epoch": 2.631802282240954, - "grad_norm": 0.001056221197359264, - "learning_rate": 0.0001999965839667636, - "loss": 46.0, - "step": 34422 - }, - { - "epoch": 2.631878739224344, - "grad_norm": 0.0012056275736540556, - "learning_rate": 0.00019999658376822452, - "loss": 46.0, - "step": 34423 - }, - { - "epoch": 2.6319551962077337, - "grad_norm": 0.002178872236981988, - "learning_rate": 0.00019999658356967967, - "loss": 46.0, - "step": 34424 - }, - { - "epoch": 2.6320316531911234, - "grad_norm": 0.004085775464773178, - "learning_rate": 0.00019999658337112902, - "loss": 46.0, - "step": 34425 - }, - { - "epoch": 2.632108110174513, - "grad_norm": 0.0010769017972052097, - "learning_rate": 0.00019999658317257265, - "loss": 46.0, - "step": 34426 - }, - { - "epoch": 2.632184567157903, - "grad_norm": 0.006025294773280621, - "learning_rate": 0.0001999965829740105, - "loss": 46.0, - "step": 34427 - }, - { - "epoch": 2.6322610241412923, - "grad_norm": 0.0007110938895493746, - "learning_rate": 0.00019999658277544257, - "loss": 46.0, - "step": 34428 - }, - { - "epoch": 2.632337481124682, - "grad_norm": 0.005359198898077011, - "learning_rate": 0.00019999658257686885, - "loss": 46.0, - "step": 34429 - }, - { - "epoch": 2.632413938108072, - "grad_norm": 0.001609212253242731, - "learning_rate": 0.0001999965823782894, - "loss": 46.0, - "step": 34430 - }, - { - "epoch": 2.6324903950914615, - "grad_norm": 0.0019025879446417093, - "learning_rate": 0.00019999658217970419, - "loss": 46.0, - "step": 34431 - }, - { - "epoch": 2.6325668520748513, - "grad_norm": 0.001044404343701899, - "learning_rate": 0.00019999658198111318, - "loss": 46.0, - "step": 34432 - }, - { - "epoch": 2.632643309058241, - "grad_norm": 0.002205275697633624, - "learning_rate": 0.00019999658178251642, - "loss": 46.0, - "step": 34433 - }, - { - "epoch": 2.632719766041631, - "grad_norm": 0.001529672066681087, - "learning_rate": 0.00019999658158391387, - "loss": 46.0, - "step": 34434 - }, - { - "epoch": 2.6327962230250206, - "grad_norm": 0.0007609836757183075, - "learning_rate": 0.00019999658138530554, - "loss": 46.0, - "step": 34435 - }, - { - "epoch": 2.6328726800084103, - "grad_norm": 0.01854216866195202, - "learning_rate": 0.00019999658118669147, - "loss": 46.0, - "step": 34436 - }, - { - "epoch": 2.6329491369918, - "grad_norm": 0.0010859571630135179, - "learning_rate": 0.00019999658098807162, - "loss": 46.0, - "step": 34437 - }, - { - "epoch": 2.63302559397519, - "grad_norm": 0.0020236559212207794, - "learning_rate": 0.000199996580789446, - "loss": 46.0, - "step": 34438 - }, - { - "epoch": 2.6331020509585796, - "grad_norm": 0.0007272697403095663, - "learning_rate": 0.0001999965805908146, - "loss": 46.0, - "step": 34439 - }, - { - "epoch": 2.6331785079419694, - "grad_norm": 0.0018953965045511723, - "learning_rate": 0.00019999658039217746, - "loss": 46.0, - "step": 34440 - }, - { - "epoch": 2.6332549649253587, - "grad_norm": 0.0022129833232611418, - "learning_rate": 0.00019999658019353452, - "loss": 46.0, - "step": 34441 - }, - { - "epoch": 2.6333314219087485, - "grad_norm": 0.0027869415935128927, - "learning_rate": 0.00019999657999488583, - "loss": 46.0, - "step": 34442 - }, - { - "epoch": 2.6334078788921382, - "grad_norm": 0.0012360612163320184, - "learning_rate": 0.00019999657979623137, - "loss": 46.0, - "step": 34443 - }, - { - "epoch": 2.633484335875528, - "grad_norm": 0.0013782597379758954, - "learning_rate": 0.0001999965795975711, - "loss": 46.0, - "step": 34444 - }, - { - "epoch": 2.6335607928589178, - "grad_norm": 0.0070426990278065205, - "learning_rate": 0.00019999657939890513, - "loss": 46.0, - "step": 34445 - }, - { - "epoch": 2.6336372498423075, - "grad_norm": 0.002031357027590275, - "learning_rate": 0.00019999657920023335, - "loss": 46.0, - "step": 34446 - }, - { - "epoch": 2.6337137068256973, - "grad_norm": 0.003125097369775176, - "learning_rate": 0.0001999965790015558, - "loss": 46.0, - "step": 34447 - }, - { - "epoch": 2.633790163809087, - "grad_norm": 0.002093460876494646, - "learning_rate": 0.0001999965788028725, - "loss": 46.0, - "step": 34448 - }, - { - "epoch": 2.633866620792477, - "grad_norm": 0.0041023981757462025, - "learning_rate": 0.00019999657860418342, - "loss": 46.0, - "step": 34449 - }, - { - "epoch": 2.633943077775866, - "grad_norm": 0.0030651772394776344, - "learning_rate": 0.00019999657840548854, - "loss": 46.0, - "step": 34450 - }, - { - "epoch": 2.634019534759256, - "grad_norm": 0.0016462085768580437, - "learning_rate": 0.00019999657820678795, - "loss": 46.0, - "step": 34451 - }, - { - "epoch": 2.6340959917426456, - "grad_norm": 0.003537294454872608, - "learning_rate": 0.00019999657800808156, - "loss": 46.0, - "step": 34452 - }, - { - "epoch": 2.6341724487260354, - "grad_norm": 0.0009102692711167037, - "learning_rate": 0.0001999965778093694, - "loss": 46.0, - "step": 34453 - }, - { - "epoch": 2.634248905709425, - "grad_norm": 0.0050512878224253654, - "learning_rate": 0.00019999657761065148, - "loss": 46.0, - "step": 34454 - }, - { - "epoch": 2.634325362692815, - "grad_norm": 0.0009945003548637033, - "learning_rate": 0.00019999657741192776, - "loss": 46.0, - "step": 34455 - }, - { - "epoch": 2.6344018196762047, - "grad_norm": 0.001974952407181263, - "learning_rate": 0.0001999965772131983, - "loss": 46.0, - "step": 34456 - }, - { - "epoch": 2.6344782766595944, - "grad_norm": 0.003743717446923256, - "learning_rate": 0.0001999965770144631, - "loss": 46.0, - "step": 34457 - }, - { - "epoch": 2.634554733642984, - "grad_norm": 0.004385035485029221, - "learning_rate": 0.00019999657681572206, - "loss": 46.0, - "step": 34458 - }, - { - "epoch": 2.634631190626374, - "grad_norm": 0.0026215077377855778, - "learning_rate": 0.0001999965766169753, - "loss": 46.0, - "step": 34459 - }, - { - "epoch": 2.6347076476097637, - "grad_norm": 0.0020132833160459995, - "learning_rate": 0.00019999657641822272, - "loss": 46.0, - "step": 34460 - }, - { - "epoch": 2.6347841045931535, - "grad_norm": 0.002285352209582925, - "learning_rate": 0.00019999657621946445, - "loss": 46.0, - "step": 34461 - }, - { - "epoch": 2.6348605615765432, - "grad_norm": 0.001081867259927094, - "learning_rate": 0.00019999657602070038, - "loss": 46.0, - "step": 34462 - }, - { - "epoch": 2.6349370185599326, - "grad_norm": 0.0041771335527300835, - "learning_rate": 0.0001999965758219305, - "loss": 46.0, - "step": 34463 - }, - { - "epoch": 2.6350134755433223, - "grad_norm": 0.00625573517754674, - "learning_rate": 0.00019999657562315488, - "loss": 46.0, - "step": 34464 - }, - { - "epoch": 2.635089932526712, - "grad_norm": 0.0029771230183541775, - "learning_rate": 0.0001999965754243735, - "loss": 46.0, - "step": 34465 - }, - { - "epoch": 2.635166389510102, - "grad_norm": 0.0036802447866648436, - "learning_rate": 0.00019999657522558635, - "loss": 46.0, - "step": 34466 - }, - { - "epoch": 2.6352428464934916, - "grad_norm": 0.002327551832422614, - "learning_rate": 0.0001999965750267934, - "loss": 46.0, - "step": 34467 - }, - { - "epoch": 2.6353193034768814, - "grad_norm": 0.002293108031153679, - "learning_rate": 0.0001999965748279947, - "loss": 46.0, - "step": 34468 - }, - { - "epoch": 2.635395760460271, - "grad_norm": 0.002166707767173648, - "learning_rate": 0.00019999657462919026, - "loss": 46.0, - "step": 34469 - }, - { - "epoch": 2.635472217443661, - "grad_norm": 0.0009519589366391301, - "learning_rate": 0.00019999657443038, - "loss": 46.0, - "step": 34470 - }, - { - "epoch": 2.63554867442705, - "grad_norm": 0.0011486202711239457, - "learning_rate": 0.00019999657423156402, - "loss": 46.0, - "step": 34471 - }, - { - "epoch": 2.63562513141044, - "grad_norm": 0.0026585632003843784, - "learning_rate": 0.00019999657403274224, - "loss": 46.0, - "step": 34472 - }, - { - "epoch": 2.6357015883938297, - "grad_norm": 0.0011468218872323632, - "learning_rate": 0.00019999657383391469, - "loss": 46.0, - "step": 34473 - }, - { - "epoch": 2.6357780453772195, - "grad_norm": 0.001684209331870079, - "learning_rate": 0.00019999657363508139, - "loss": 46.0, - "step": 34474 - }, - { - "epoch": 2.6358545023606093, - "grad_norm": 0.0006070074741728604, - "learning_rate": 0.00019999657343624228, - "loss": 46.0, - "step": 34475 - }, - { - "epoch": 2.635930959343999, - "grad_norm": 0.0072415233589708805, - "learning_rate": 0.00019999657323739744, - "loss": 46.0, - "step": 34476 - }, - { - "epoch": 2.6360074163273888, - "grad_norm": 0.0012715159682556987, - "learning_rate": 0.00019999657303854682, - "loss": 46.0, - "step": 34477 - }, - { - "epoch": 2.6360838733107785, - "grad_norm": 0.0023942769039422274, - "learning_rate": 0.00019999657283969042, - "loss": 46.0, - "step": 34478 - }, - { - "epoch": 2.6361603302941683, - "grad_norm": 0.0010972925228998065, - "learning_rate": 0.00019999657264082828, - "loss": 46.0, - "step": 34479 - }, - { - "epoch": 2.636236787277558, - "grad_norm": 0.003284666920080781, - "learning_rate": 0.00019999657244196034, - "loss": 46.0, - "step": 34480 - }, - { - "epoch": 2.636313244260948, - "grad_norm": 0.004400253761559725, - "learning_rate": 0.00019999657224308663, - "loss": 46.0, - "step": 34481 - }, - { - "epoch": 2.6363897012443376, - "grad_norm": 0.0010560192167758942, - "learning_rate": 0.0001999965720442072, - "loss": 46.0, - "step": 34482 - }, - { - "epoch": 2.6364661582277273, - "grad_norm": 0.0020533977076411247, - "learning_rate": 0.00019999657184532194, - "loss": 46.0, - "step": 34483 - }, - { - "epoch": 2.636542615211117, - "grad_norm": 0.007894976064562798, - "learning_rate": 0.00019999657164643093, - "loss": 46.0, - "step": 34484 - }, - { - "epoch": 2.6366190721945064, - "grad_norm": 0.0035504489205777645, - "learning_rate": 0.00019999657144753418, - "loss": 46.0, - "step": 34485 - }, - { - "epoch": 2.636695529177896, - "grad_norm": 0.0021106658969074488, - "learning_rate": 0.00019999657124863162, - "loss": 46.0, - "step": 34486 - }, - { - "epoch": 2.636771986161286, - "grad_norm": 0.0019726473838090897, - "learning_rate": 0.00019999657104972332, - "loss": 46.0, - "step": 34487 - }, - { - "epoch": 2.6368484431446757, - "grad_norm": 0.0006805500597693026, - "learning_rate": 0.00019999657085080922, - "loss": 46.0, - "step": 34488 - }, - { - "epoch": 2.6369249001280655, - "grad_norm": 0.001005430007353425, - "learning_rate": 0.00019999657065188938, - "loss": 46.0, - "step": 34489 - }, - { - "epoch": 2.6370013571114552, - "grad_norm": 0.0013657687231898308, - "learning_rate": 0.00019999657045296373, - "loss": 46.0, - "step": 34490 - }, - { - "epoch": 2.637077814094845, - "grad_norm": 0.0024143001064658165, - "learning_rate": 0.00019999657025403236, - "loss": 46.0, - "step": 34491 - }, - { - "epoch": 2.6371542710782347, - "grad_norm": 0.0012777001829817891, - "learning_rate": 0.0001999965700550952, - "loss": 46.0, - "step": 34492 - }, - { - "epoch": 2.637230728061624, - "grad_norm": 0.0016745832981541753, - "learning_rate": 0.00019999656985615228, - "loss": 46.0, - "step": 34493 - }, - { - "epoch": 2.637307185045014, - "grad_norm": 0.0018149771494790912, - "learning_rate": 0.00019999656965720357, - "loss": 46.0, - "step": 34494 - }, - { - "epoch": 2.6373836420284036, - "grad_norm": 0.0011310898698866367, - "learning_rate": 0.0001999965694582491, - "loss": 46.0, - "step": 34495 - }, - { - "epoch": 2.6374600990117933, - "grad_norm": 0.0013143346877768636, - "learning_rate": 0.00019999656925928888, - "loss": 46.0, - "step": 34496 - }, - { - "epoch": 2.637536555995183, - "grad_norm": 0.0025121895596385, - "learning_rate": 0.00019999656906032287, - "loss": 46.0, - "step": 34497 - }, - { - "epoch": 2.637613012978573, - "grad_norm": 0.0021099471487104893, - "learning_rate": 0.0001999965688613511, - "loss": 46.0, - "step": 34498 - }, - { - "epoch": 2.6376894699619626, - "grad_norm": 0.001771169132553041, - "learning_rate": 0.00019999656866237354, - "loss": 46.0, - "step": 34499 - }, - { - "epoch": 2.6377659269453524, - "grad_norm": 0.005528313107788563, - "learning_rate": 0.00019999656846339024, - "loss": 46.0, - "step": 34500 - }, - { - "epoch": 2.637842383928742, - "grad_norm": 0.0035174351651221514, - "learning_rate": 0.00019999656826440117, - "loss": 46.0, - "step": 34501 - }, - { - "epoch": 2.637918840912132, - "grad_norm": 0.001308982609771192, - "learning_rate": 0.0001999965680654063, - "loss": 46.0, - "step": 34502 - }, - { - "epoch": 2.6379952978955217, - "grad_norm": 0.00360392895527184, - "learning_rate": 0.00019999656786640567, - "loss": 46.0, - "step": 34503 - }, - { - "epoch": 2.6380717548789114, - "grad_norm": 0.001978496555238962, - "learning_rate": 0.00019999656766739928, - "loss": 46.0, - "step": 34504 - }, - { - "epoch": 2.638148211862301, - "grad_norm": 0.0020974003709852695, - "learning_rate": 0.00019999656746838714, - "loss": 46.0, - "step": 34505 - }, - { - "epoch": 2.638224668845691, - "grad_norm": 0.0007851683767512441, - "learning_rate": 0.0001999965672693692, - "loss": 46.0, - "step": 34506 - }, - { - "epoch": 2.6383011258290803, - "grad_norm": 0.0012631692225113511, - "learning_rate": 0.00019999656707034552, - "loss": 46.0, - "step": 34507 - }, - { - "epoch": 2.63837758281247, - "grad_norm": 0.0027504574973136187, - "learning_rate": 0.00019999656687131606, - "loss": 46.0, - "step": 34508 - }, - { - "epoch": 2.63845403979586, - "grad_norm": 0.0007870674598962069, - "learning_rate": 0.0001999965666722808, - "loss": 46.0, - "step": 34509 - }, - { - "epoch": 2.6385304967792496, - "grad_norm": 0.0020757061429321766, - "learning_rate": 0.0001999965664732398, - "loss": 46.0, - "step": 34510 - }, - { - "epoch": 2.6386069537626393, - "grad_norm": 0.0027561241295188665, - "learning_rate": 0.00019999656627419304, - "loss": 46.0, - "step": 34511 - }, - { - "epoch": 2.638683410746029, - "grad_norm": 0.007109851110726595, - "learning_rate": 0.00019999656607514048, - "loss": 46.0, - "step": 34512 - }, - { - "epoch": 2.638759867729419, - "grad_norm": 0.002134371316060424, - "learning_rate": 0.00019999656587608216, - "loss": 46.0, - "step": 34513 - }, - { - "epoch": 2.6388363247128086, - "grad_norm": 0.0007126507116481662, - "learning_rate": 0.00019999656567701809, - "loss": 46.0, - "step": 34514 - }, - { - "epoch": 2.638912781696198, - "grad_norm": 0.004940371494740248, - "learning_rate": 0.00019999656547794824, - "loss": 46.0, - "step": 34515 - }, - { - "epoch": 2.6389892386795877, - "grad_norm": 0.0021288772113621235, - "learning_rate": 0.0001999965652788726, - "loss": 46.0, - "step": 34516 - }, - { - "epoch": 2.6390656956629774, - "grad_norm": 0.0030443675350397825, - "learning_rate": 0.0001999965650797912, - "loss": 46.0, - "step": 34517 - }, - { - "epoch": 2.639142152646367, - "grad_norm": 0.00041789733222685754, - "learning_rate": 0.00019999656488070406, - "loss": 46.0, - "step": 34518 - }, - { - "epoch": 2.639218609629757, - "grad_norm": 0.0013990928418934345, - "learning_rate": 0.00019999656468161112, - "loss": 46.0, - "step": 34519 - }, - { - "epoch": 2.6392950666131467, - "grad_norm": 0.004180360585451126, - "learning_rate": 0.00019999656448251244, - "loss": 46.0, - "step": 34520 - }, - { - "epoch": 2.6393715235965365, - "grad_norm": 0.003556195180863142, - "learning_rate": 0.00019999656428340795, - "loss": 46.0, - "step": 34521 - }, - { - "epoch": 2.6394479805799262, - "grad_norm": 0.0025172897148877382, - "learning_rate": 0.00019999656408429772, - "loss": 46.0, - "step": 34522 - }, - { - "epoch": 2.639524437563316, - "grad_norm": 0.007746831979602575, - "learning_rate": 0.0001999965638851817, - "loss": 46.0, - "step": 34523 - }, - { - "epoch": 2.6396008945467058, - "grad_norm": 0.0015589401591569185, - "learning_rate": 0.00019999656368605993, - "loss": 46.0, - "step": 34524 - }, - { - "epoch": 2.6396773515300955, - "grad_norm": 0.0030775293707847595, - "learning_rate": 0.00019999656348693238, - "loss": 46.0, - "step": 34525 - }, - { - "epoch": 2.6397538085134853, - "grad_norm": 0.0007467251853086054, - "learning_rate": 0.00019999656328779908, - "loss": 46.0, - "step": 34526 - }, - { - "epoch": 2.639830265496875, - "grad_norm": 0.0019878584425896406, - "learning_rate": 0.00019999656308865998, - "loss": 46.0, - "step": 34527 - }, - { - "epoch": 2.639906722480265, - "grad_norm": 0.003020251402631402, - "learning_rate": 0.00019999656288951514, - "loss": 46.0, - "step": 34528 - }, - { - "epoch": 2.639983179463654, - "grad_norm": 0.0054749296978116035, - "learning_rate": 0.00019999656269036452, - "loss": 46.0, - "step": 34529 - }, - { - "epoch": 2.640059636447044, - "grad_norm": 0.0028721713460981846, - "learning_rate": 0.00019999656249120813, - "loss": 46.0, - "step": 34530 - }, - { - "epoch": 2.6401360934304337, - "grad_norm": 0.0021708596032112837, - "learning_rate": 0.000199996562292046, - "loss": 46.0, - "step": 34531 - }, - { - "epoch": 2.6402125504138234, - "grad_norm": 0.0022460289765149355, - "learning_rate": 0.00019999656209287805, - "loss": 46.0, - "step": 34532 - }, - { - "epoch": 2.640289007397213, - "grad_norm": 0.0009714094339869916, - "learning_rate": 0.00019999656189370434, - "loss": 46.0, - "step": 34533 - }, - { - "epoch": 2.640365464380603, - "grad_norm": 0.0015169760445132852, - "learning_rate": 0.00019999656169452488, - "loss": 46.0, - "step": 34534 - }, - { - "epoch": 2.6404419213639927, - "grad_norm": 0.0034266652073711157, - "learning_rate": 0.00019999656149533962, - "loss": 46.0, - "step": 34535 - }, - { - "epoch": 2.6405183783473825, - "grad_norm": 0.0005773219163529575, - "learning_rate": 0.00019999656129614864, - "loss": 46.0, - "step": 34536 - }, - { - "epoch": 2.6405948353307718, - "grad_norm": 0.0011426068376749754, - "learning_rate": 0.00019999656109695186, - "loss": 46.0, - "step": 34537 - }, - { - "epoch": 2.6406712923141615, - "grad_norm": 0.001394849386997521, - "learning_rate": 0.0001999965608977493, - "loss": 46.0, - "step": 34538 - }, - { - "epoch": 2.6407477492975513, - "grad_norm": 0.0030240879859775305, - "learning_rate": 0.000199996560698541, - "loss": 46.0, - "step": 34539 - }, - { - "epoch": 2.640824206280941, - "grad_norm": 0.0009540761238895357, - "learning_rate": 0.0001999965604993269, - "loss": 46.0, - "step": 34540 - }, - { - "epoch": 2.640900663264331, - "grad_norm": 0.0013019067700952291, - "learning_rate": 0.00019999656030010704, - "loss": 46.0, - "step": 34541 - }, - { - "epoch": 2.6409771202477206, - "grad_norm": 0.001934366999194026, - "learning_rate": 0.00019999656010088145, - "loss": 46.0, - "step": 34542 - }, - { - "epoch": 2.6410535772311103, - "grad_norm": 0.006734516471624374, - "learning_rate": 0.00019999655990165006, - "loss": 46.0, - "step": 34543 - }, - { - "epoch": 2.6411300342145, - "grad_norm": 0.0030596167780458927, - "learning_rate": 0.0001999965597024129, - "loss": 46.0, - "step": 34544 - }, - { - "epoch": 2.64120649119789, - "grad_norm": 0.0014932332560420036, - "learning_rate": 0.00019999655950316995, - "loss": 46.0, - "step": 34545 - }, - { - "epoch": 2.6412829481812796, - "grad_norm": 0.002858963096514344, - "learning_rate": 0.00019999655930392124, - "loss": 46.0, - "step": 34546 - }, - { - "epoch": 2.6413594051646694, - "grad_norm": 0.0037363243754953146, - "learning_rate": 0.00019999655910466678, - "loss": 46.0, - "step": 34547 - }, - { - "epoch": 2.641435862148059, - "grad_norm": 0.0030468739569187164, - "learning_rate": 0.00019999655890540655, - "loss": 46.0, - "step": 34548 - }, - { - "epoch": 2.641512319131449, - "grad_norm": 0.0016986342379823327, - "learning_rate": 0.00019999655870614054, - "loss": 46.0, - "step": 34549 - }, - { - "epoch": 2.6415887761148387, - "grad_norm": 0.002146833809092641, - "learning_rate": 0.0001999965585068688, - "loss": 46.0, - "step": 34550 - }, - { - "epoch": 2.641665233098228, - "grad_norm": 0.0015977276489138603, - "learning_rate": 0.0001999965583075912, - "loss": 46.0, - "step": 34551 - }, - { - "epoch": 2.6417416900816177, - "grad_norm": 0.0032917342614382505, - "learning_rate": 0.00019999655810830791, - "loss": 46.0, - "step": 34552 - }, - { - "epoch": 2.6418181470650075, - "grad_norm": 0.0008477296214550734, - "learning_rate": 0.00019999655790901884, - "loss": 46.0, - "step": 34553 - }, - { - "epoch": 2.6418946040483973, - "grad_norm": 0.0011758451582863927, - "learning_rate": 0.00019999655770972397, - "loss": 46.0, - "step": 34554 - }, - { - "epoch": 2.641971061031787, - "grad_norm": 0.008312544785439968, - "learning_rate": 0.00019999655751042335, - "loss": 46.0, - "step": 34555 - }, - { - "epoch": 2.642047518015177, - "grad_norm": 0.0012131272815167904, - "learning_rate": 0.00019999655731111696, - "loss": 46.0, - "step": 34556 - }, - { - "epoch": 2.6421239749985665, - "grad_norm": 0.0031499280594289303, - "learning_rate": 0.0001999965571118048, - "loss": 46.0, - "step": 34557 - }, - { - "epoch": 2.6422004319819563, - "grad_norm": 0.0006568707758560777, - "learning_rate": 0.00019999655691248686, - "loss": 46.0, - "step": 34558 - }, - { - "epoch": 2.6422768889653456, - "grad_norm": 0.0007174179772846401, - "learning_rate": 0.00019999655671316315, - "loss": 46.0, - "step": 34559 - }, - { - "epoch": 2.6423533459487354, - "grad_norm": 0.0008868909208104014, - "learning_rate": 0.00019999655651383371, - "loss": 46.0, - "step": 34560 - }, - { - "epoch": 2.642429802932125, - "grad_norm": 0.0007538720965385437, - "learning_rate": 0.00019999655631449846, - "loss": 46.0, - "step": 34561 - }, - { - "epoch": 2.642506259915515, - "grad_norm": 0.0009438938577659428, - "learning_rate": 0.00019999655611515745, - "loss": 46.0, - "step": 34562 - }, - { - "epoch": 2.6425827168989047, - "grad_norm": 0.0023029108997434378, - "learning_rate": 0.00019999655591581067, - "loss": 46.0, - "step": 34563 - }, - { - "epoch": 2.6426591738822944, - "grad_norm": 0.0010392619296908379, - "learning_rate": 0.00019999655571645812, - "loss": 46.0, - "step": 34564 - }, - { - "epoch": 2.642735630865684, - "grad_norm": 0.0024848429020494223, - "learning_rate": 0.00019999655551709982, - "loss": 46.0, - "step": 34565 - }, - { - "epoch": 2.642812087849074, - "grad_norm": 0.0009105200879275799, - "learning_rate": 0.00019999655531773573, - "loss": 46.0, - "step": 34566 - }, - { - "epoch": 2.6428885448324637, - "grad_norm": 0.0010038192849606276, - "learning_rate": 0.00019999655511836588, - "loss": 46.0, - "step": 34567 - }, - { - "epoch": 2.6429650018158535, - "grad_norm": 0.0014624958857893944, - "learning_rate": 0.00019999655491899026, - "loss": 46.0, - "step": 34568 - }, - { - "epoch": 2.6430414587992432, - "grad_norm": 0.00266293459571898, - "learning_rate": 0.00019999655471960884, - "loss": 46.0, - "step": 34569 - }, - { - "epoch": 2.643117915782633, - "grad_norm": 0.0011956710368394852, - "learning_rate": 0.0001999965545202217, - "loss": 46.0, - "step": 34570 - }, - { - "epoch": 2.6431943727660228, - "grad_norm": 0.001659775385633111, - "learning_rate": 0.00019999655432082877, - "loss": 46.0, - "step": 34571 - }, - { - "epoch": 2.643270829749412, - "grad_norm": 0.0020263863261789083, - "learning_rate": 0.00019999655412143006, - "loss": 46.0, - "step": 34572 - }, - { - "epoch": 2.643347286732802, - "grad_norm": 0.0014143850421532989, - "learning_rate": 0.0001999965539220256, - "loss": 46.0, - "step": 34573 - }, - { - "epoch": 2.6434237437161916, - "grad_norm": 0.000806483905762434, - "learning_rate": 0.00019999655372261537, - "loss": 46.0, - "step": 34574 - }, - { - "epoch": 2.6435002006995814, - "grad_norm": 0.002073373179882765, - "learning_rate": 0.00019999655352319934, - "loss": 46.0, - "step": 34575 - }, - { - "epoch": 2.643576657682971, - "grad_norm": 0.0022042703349143267, - "learning_rate": 0.00019999655332377759, - "loss": 46.0, - "step": 34576 - }, - { - "epoch": 2.643653114666361, - "grad_norm": 0.0010752958478406072, - "learning_rate": 0.00019999655312435003, - "loss": 46.0, - "step": 34577 - }, - { - "epoch": 2.6437295716497506, - "grad_norm": 0.0013884485233575106, - "learning_rate": 0.00019999655292491674, - "loss": 46.0, - "step": 34578 - }, - { - "epoch": 2.6438060286331404, - "grad_norm": 0.0021038968116045, - "learning_rate": 0.00019999655272547764, - "loss": 46.0, - "step": 34579 - }, - { - "epoch": 2.64388248561653, - "grad_norm": 0.004786514677107334, - "learning_rate": 0.00019999655252603277, - "loss": 46.0, - "step": 34580 - }, - { - "epoch": 2.6439589425999195, - "grad_norm": 0.002001231536269188, - "learning_rate": 0.00019999655232658215, - "loss": 46.0, - "step": 34581 - }, - { - "epoch": 2.6440353995833092, - "grad_norm": 0.007867136038839817, - "learning_rate": 0.00019999655212712576, - "loss": 46.0, - "step": 34582 - }, - { - "epoch": 2.644111856566699, - "grad_norm": 0.0010004050564020872, - "learning_rate": 0.0001999965519276636, - "loss": 46.0, - "step": 34583 - }, - { - "epoch": 2.6441883135500888, - "grad_norm": 0.003883182071149349, - "learning_rate": 0.00019999655172819566, - "loss": 46.0, - "step": 34584 - }, - { - "epoch": 2.6442647705334785, - "grad_norm": 0.0037523575592786074, - "learning_rate": 0.00019999655152872197, - "loss": 46.0, - "step": 34585 - }, - { - "epoch": 2.6443412275168683, - "grad_norm": 0.0014262836193665862, - "learning_rate": 0.00019999655132924251, - "loss": 46.0, - "step": 34586 - }, - { - "epoch": 2.644417684500258, - "grad_norm": 0.0019873965065926313, - "learning_rate": 0.00019999655112975726, - "loss": 46.0, - "step": 34587 - }, - { - "epoch": 2.644494141483648, - "grad_norm": 0.0050540766678750515, - "learning_rate": 0.00019999655093026625, - "loss": 46.0, - "step": 34588 - }, - { - "epoch": 2.6445705984670376, - "grad_norm": 0.0022524537052959204, - "learning_rate": 0.00019999655073076947, - "loss": 46.0, - "step": 34589 - }, - { - "epoch": 2.6446470554504273, - "grad_norm": 0.013481991365551949, - "learning_rate": 0.00019999655053126692, - "loss": 46.0, - "step": 34590 - }, - { - "epoch": 2.644723512433817, - "grad_norm": 0.0007073396118357778, - "learning_rate": 0.00019999655033175863, - "loss": 46.0, - "step": 34591 - }, - { - "epoch": 2.644799969417207, - "grad_norm": 0.0007290412322618067, - "learning_rate": 0.00019999655013224456, - "loss": 46.0, - "step": 34592 - }, - { - "epoch": 2.6448764264005966, - "grad_norm": 0.0034683244302868843, - "learning_rate": 0.00019999654993272468, - "loss": 46.0, - "step": 34593 - }, - { - "epoch": 2.644952883383986, - "grad_norm": 0.00046749861212447286, - "learning_rate": 0.00019999654973319907, - "loss": 46.0, - "step": 34594 - }, - { - "epoch": 2.6450293403673757, - "grad_norm": 0.0016555659240111709, - "learning_rate": 0.00019999654953366768, - "loss": 46.0, - "step": 34595 - }, - { - "epoch": 2.6451057973507655, - "grad_norm": 0.0027320387307554483, - "learning_rate": 0.0001999965493341305, - "loss": 46.0, - "step": 34596 - }, - { - "epoch": 2.645182254334155, - "grad_norm": 0.02012760005891323, - "learning_rate": 0.00019999654913458757, - "loss": 46.0, - "step": 34597 - }, - { - "epoch": 2.645258711317545, - "grad_norm": 0.001547559048049152, - "learning_rate": 0.00019999654893503886, - "loss": 46.0, - "step": 34598 - }, - { - "epoch": 2.6453351683009347, - "grad_norm": 0.001451008371077478, - "learning_rate": 0.0001999965487354844, - "loss": 46.0, - "step": 34599 - }, - { - "epoch": 2.6454116252843245, - "grad_norm": 0.0030293797608464956, - "learning_rate": 0.00019999654853592415, - "loss": 46.0, - "step": 34600 - }, - { - "epoch": 2.6454880822677143, - "grad_norm": 0.0010853284038603306, - "learning_rate": 0.00019999654833635815, - "loss": 46.0, - "step": 34601 - }, - { - "epoch": 2.6455645392511036, - "grad_norm": 0.0025759569834917784, - "learning_rate": 0.0001999965481367864, - "loss": 46.0, - "step": 34602 - }, - { - "epoch": 2.6456409962344933, - "grad_norm": 0.0020716486033052206, - "learning_rate": 0.00019999654793720884, - "loss": 46.0, - "step": 34603 - }, - { - "epoch": 2.645717453217883, - "grad_norm": 0.001761682447977364, - "learning_rate": 0.0001999965477376255, - "loss": 46.0, - "step": 34604 - }, - { - "epoch": 2.645793910201273, - "grad_norm": 0.002164250472560525, - "learning_rate": 0.00019999654753803642, - "loss": 46.0, - "step": 34605 - }, - { - "epoch": 2.6458703671846626, - "grad_norm": 0.0022773926611989737, - "learning_rate": 0.00019999654733844158, - "loss": 46.0, - "step": 34606 - }, - { - "epoch": 2.6459468241680524, - "grad_norm": 0.0038353088311851025, - "learning_rate": 0.00019999654713884096, - "loss": 46.0, - "step": 34607 - }, - { - "epoch": 2.646023281151442, - "grad_norm": 0.0009662198135629296, - "learning_rate": 0.00019999654693923454, - "loss": 46.0, - "step": 34608 - }, - { - "epoch": 2.646099738134832, - "grad_norm": 0.0008317287429235876, - "learning_rate": 0.0001999965467396224, - "loss": 46.0, - "step": 34609 - }, - { - "epoch": 2.6461761951182217, - "grad_norm": 0.0014157487312331796, - "learning_rate": 0.00019999654654000447, - "loss": 46.0, - "step": 34610 - }, - { - "epoch": 2.6462526521016114, - "grad_norm": 0.00263407826423645, - "learning_rate": 0.00019999654634038076, - "loss": 46.0, - "step": 34611 - }, - { - "epoch": 2.646329109085001, - "grad_norm": 0.001302023883908987, - "learning_rate": 0.0001999965461407513, - "loss": 46.0, - "step": 34612 - }, - { - "epoch": 2.646405566068391, - "grad_norm": 0.0025027331430464983, - "learning_rate": 0.00019999654594111605, - "loss": 46.0, - "step": 34613 - }, - { - "epoch": 2.6464820230517807, - "grad_norm": 0.0037913252599537373, - "learning_rate": 0.00019999654574147504, - "loss": 46.0, - "step": 34614 - }, - { - "epoch": 2.6465584800351705, - "grad_norm": 0.0040735891088843346, - "learning_rate": 0.00019999654554182827, - "loss": 46.0, - "step": 34615 - }, - { - "epoch": 2.64663493701856, - "grad_norm": 0.0013186606811359525, - "learning_rate": 0.00019999654534217572, - "loss": 46.0, - "step": 34616 - }, - { - "epoch": 2.6467113940019495, - "grad_norm": 0.005385016091167927, - "learning_rate": 0.0001999965451425174, - "loss": 46.0, - "step": 34617 - }, - { - "epoch": 2.6467878509853393, - "grad_norm": 0.001892649452202022, - "learning_rate": 0.00019999654494285332, - "loss": 46.0, - "step": 34618 - }, - { - "epoch": 2.646864307968729, - "grad_norm": 0.0024929847568273544, - "learning_rate": 0.00019999654474318348, - "loss": 46.0, - "step": 34619 - }, - { - "epoch": 2.646940764952119, - "grad_norm": 0.001314322929829359, - "learning_rate": 0.00019999654454350783, - "loss": 46.0, - "step": 34620 - }, - { - "epoch": 2.6470172219355086, - "grad_norm": 0.001464893575757742, - "learning_rate": 0.00019999654434382644, - "loss": 46.0, - "step": 34621 - }, - { - "epoch": 2.6470936789188984, - "grad_norm": 0.0019550009164959192, - "learning_rate": 0.00019999654414413928, - "loss": 46.0, - "step": 34622 - }, - { - "epoch": 2.647170135902288, - "grad_norm": 0.004785238299518824, - "learning_rate": 0.00019999654394444637, - "loss": 46.0, - "step": 34623 - }, - { - "epoch": 2.6472465928856774, - "grad_norm": 0.0021834736689925194, - "learning_rate": 0.00019999654374474766, - "loss": 46.0, - "step": 34624 - }, - { - "epoch": 2.647323049869067, - "grad_norm": 0.002081941580399871, - "learning_rate": 0.00019999654354504318, - "loss": 46.0, - "step": 34625 - }, - { - "epoch": 2.647399506852457, - "grad_norm": 0.0018951010424643755, - "learning_rate": 0.00019999654334533292, - "loss": 46.0, - "step": 34626 - }, - { - "epoch": 2.6474759638358467, - "grad_norm": 0.0009608858381398022, - "learning_rate": 0.00019999654314561694, - "loss": 46.0, - "step": 34627 - }, - { - "epoch": 2.6475524208192365, - "grad_norm": 0.0017959446413442492, - "learning_rate": 0.00019999654294589517, - "loss": 46.0, - "step": 34628 - }, - { - "epoch": 2.6476288778026262, - "grad_norm": 0.0013378075091168284, - "learning_rate": 0.0001999965427461676, - "loss": 46.0, - "step": 34629 - }, - { - "epoch": 2.647705334786016, - "grad_norm": 0.0006940686726011336, - "learning_rate": 0.0001999965425464343, - "loss": 46.0, - "step": 34630 - }, - { - "epoch": 2.6477817917694058, - "grad_norm": 0.0012696638004854321, - "learning_rate": 0.0001999965423466952, - "loss": 46.0, - "step": 34631 - }, - { - "epoch": 2.6478582487527955, - "grad_norm": 0.0016210098983719945, - "learning_rate": 0.00019999654214695033, - "loss": 46.0, - "step": 34632 - }, - { - "epoch": 2.6479347057361853, - "grad_norm": 0.003102767514064908, - "learning_rate": 0.0001999965419471997, - "loss": 46.0, - "step": 34633 - }, - { - "epoch": 2.648011162719575, - "grad_norm": 0.0026267266366630793, - "learning_rate": 0.00019999654174744332, - "loss": 46.0, - "step": 34634 - }, - { - "epoch": 2.648087619702965, - "grad_norm": 0.0010340509470552206, - "learning_rate": 0.00019999654154768118, - "loss": 46.0, - "step": 34635 - }, - { - "epoch": 2.6481640766863546, - "grad_norm": 0.0023651120718568563, - "learning_rate": 0.00019999654134791325, - "loss": 46.0, - "step": 34636 - }, - { - "epoch": 2.6482405336697443, - "grad_norm": 0.0007886143866926432, - "learning_rate": 0.00019999654114813954, - "loss": 46.0, - "step": 34637 - }, - { - "epoch": 2.6483169906531336, - "grad_norm": 0.004997018724679947, - "learning_rate": 0.00019999654094836005, - "loss": 46.0, - "step": 34638 - }, - { - "epoch": 2.6483934476365234, - "grad_norm": 0.002043304732069373, - "learning_rate": 0.00019999654074857482, - "loss": 46.0, - "step": 34639 - }, - { - "epoch": 2.648469904619913, - "grad_norm": 0.003641656832769513, - "learning_rate": 0.0001999965405487838, - "loss": 46.0, - "step": 34640 - }, - { - "epoch": 2.648546361603303, - "grad_norm": 0.00275347288697958, - "learning_rate": 0.00019999654034898705, - "loss": 46.0, - "step": 34641 - }, - { - "epoch": 2.6486228185866927, - "grad_norm": 0.0019136063056066632, - "learning_rate": 0.00019999654014918447, - "loss": 46.0, - "step": 34642 - }, - { - "epoch": 2.6486992755700824, - "grad_norm": 0.0008162970771081746, - "learning_rate": 0.00019999653994937617, - "loss": 46.0, - "step": 34643 - }, - { - "epoch": 2.648775732553472, - "grad_norm": 0.0010963745880872011, - "learning_rate": 0.0001999965397495621, - "loss": 46.0, - "step": 34644 - }, - { - "epoch": 2.648852189536862, - "grad_norm": 0.0033200655598193407, - "learning_rate": 0.00019999653954974223, - "loss": 46.0, - "step": 34645 - }, - { - "epoch": 2.6489286465202513, - "grad_norm": 0.0012055494589731097, - "learning_rate": 0.0001999965393499166, - "loss": 46.0, - "step": 34646 - }, - { - "epoch": 2.649005103503641, - "grad_norm": 0.0018360440153628588, - "learning_rate": 0.00019999653915008523, - "loss": 46.0, - "step": 34647 - }, - { - "epoch": 2.649081560487031, - "grad_norm": 0.005940403789281845, - "learning_rate": 0.00019999653895024806, - "loss": 46.0, - "step": 34648 - }, - { - "epoch": 2.6491580174704206, - "grad_norm": 0.001664777984842658, - "learning_rate": 0.00019999653875040513, - "loss": 46.0, - "step": 34649 - }, - { - "epoch": 2.6492344744538103, - "grad_norm": 0.00216398062184453, - "learning_rate": 0.0001999965385505564, - "loss": 46.0, - "step": 34650 - }, - { - "epoch": 2.6493109314372, - "grad_norm": 0.0035663945600390434, - "learning_rate": 0.00019999653835070196, - "loss": 46.0, - "step": 34651 - }, - { - "epoch": 2.64938738842059, - "grad_norm": 0.001355396001599729, - "learning_rate": 0.0001999965381508417, - "loss": 46.0, - "step": 34652 - }, - { - "epoch": 2.6494638454039796, - "grad_norm": 0.001657457323744893, - "learning_rate": 0.0001999965379509757, - "loss": 46.0, - "step": 34653 - }, - { - "epoch": 2.6495403023873694, - "grad_norm": 0.004674006253480911, - "learning_rate": 0.00019999653775110393, - "loss": 46.0, - "step": 34654 - }, - { - "epoch": 2.649616759370759, - "grad_norm": 0.0028455324936658144, - "learning_rate": 0.00019999653755122635, - "loss": 46.0, - "step": 34655 - }, - { - "epoch": 2.649693216354149, - "grad_norm": 0.0024973968975245953, - "learning_rate": 0.00019999653735134306, - "loss": 46.0, - "step": 34656 - }, - { - "epoch": 2.6497696733375387, - "grad_norm": 0.00198940047994256, - "learning_rate": 0.00019999653715145396, - "loss": 46.0, - "step": 34657 - }, - { - "epoch": 2.6498461303209284, - "grad_norm": 0.0021798640955239534, - "learning_rate": 0.00019999653695155912, - "loss": 46.0, - "step": 34658 - }, - { - "epoch": 2.649922587304318, - "grad_norm": 0.0021034269593656063, - "learning_rate": 0.00019999653675165847, - "loss": 46.0, - "step": 34659 - }, - { - "epoch": 2.6499990442877075, - "grad_norm": 0.004055732395499945, - "learning_rate": 0.00019999653655175209, - "loss": 46.0, - "step": 34660 - }, - { - "epoch": 2.6500755012710973, - "grad_norm": 0.0025905249640345573, - "learning_rate": 0.00019999653635183992, - "loss": 46.0, - "step": 34661 - }, - { - "epoch": 2.650151958254487, - "grad_norm": 0.0036613019183278084, - "learning_rate": 0.000199996536151922, - "loss": 46.0, - "step": 34662 - }, - { - "epoch": 2.6502284152378768, - "grad_norm": 0.0006996187730692327, - "learning_rate": 0.00019999653595199828, - "loss": 46.0, - "step": 34663 - }, - { - "epoch": 2.6503048722212665, - "grad_norm": 0.0028038721065968275, - "learning_rate": 0.00019999653575206882, - "loss": 46.0, - "step": 34664 - }, - { - "epoch": 2.6503813292046563, - "grad_norm": 0.0009371620253659785, - "learning_rate": 0.0001999965355521336, - "loss": 46.0, - "step": 34665 - }, - { - "epoch": 2.650457786188046, - "grad_norm": 0.0011505559086799622, - "learning_rate": 0.00019999653535219256, - "loss": 46.0, - "step": 34666 - }, - { - "epoch": 2.650534243171436, - "grad_norm": 0.0015809711767360568, - "learning_rate": 0.0001999965351522458, - "loss": 46.0, - "step": 34667 - }, - { - "epoch": 2.650610700154825, - "grad_norm": 0.0022850402165204287, - "learning_rate": 0.00019999653495229324, - "loss": 46.0, - "step": 34668 - }, - { - "epoch": 2.650687157138215, - "grad_norm": 0.004635780584067106, - "learning_rate": 0.00019999653475233494, - "loss": 46.0, - "step": 34669 - }, - { - "epoch": 2.6507636141216047, - "grad_norm": 0.0013846515212208033, - "learning_rate": 0.00019999653455237085, - "loss": 46.0, - "step": 34670 - }, - { - "epoch": 2.6508400711049944, - "grad_norm": 0.0009411721839569509, - "learning_rate": 0.00019999653435240098, - "loss": 46.0, - "step": 34671 - }, - { - "epoch": 2.650916528088384, - "grad_norm": 0.0021812478080391884, - "learning_rate": 0.00019999653415242534, - "loss": 46.0, - "step": 34672 - }, - { - "epoch": 2.650992985071774, - "grad_norm": 0.006250261329114437, - "learning_rate": 0.00019999653395244397, - "loss": 46.0, - "step": 34673 - }, - { - "epoch": 2.6510694420551637, - "grad_norm": 0.0016414192505180836, - "learning_rate": 0.00019999653375245678, - "loss": 46.0, - "step": 34674 - }, - { - "epoch": 2.6511458990385535, - "grad_norm": 0.0011620079167187214, - "learning_rate": 0.00019999653355246385, - "loss": 46.0, - "step": 34675 - }, - { - "epoch": 2.6512223560219432, - "grad_norm": 0.0025420573074370623, - "learning_rate": 0.00019999653335246517, - "loss": 46.0, - "step": 34676 - }, - { - "epoch": 2.651298813005333, - "grad_norm": 0.0022182592656463385, - "learning_rate": 0.00019999653315246068, - "loss": 46.0, - "step": 34677 - }, - { - "epoch": 2.6513752699887227, - "grad_norm": 0.0012899975990876555, - "learning_rate": 0.00019999653295245046, - "loss": 46.0, - "step": 34678 - }, - { - "epoch": 2.6514517269721125, - "grad_norm": 0.0014714538119733334, - "learning_rate": 0.00019999653275243443, - "loss": 46.0, - "step": 34679 - }, - { - "epoch": 2.6515281839555023, - "grad_norm": 0.0020265597850084305, - "learning_rate": 0.00019999653255241265, - "loss": 46.0, - "step": 34680 - }, - { - "epoch": 2.651604640938892, - "grad_norm": 0.0014766695676371455, - "learning_rate": 0.0001999965323523851, - "loss": 46.0, - "step": 34681 - }, - { - "epoch": 2.6516810979222813, - "grad_norm": 0.0008644903427921236, - "learning_rate": 0.0001999965321523518, - "loss": 46.0, - "step": 34682 - }, - { - "epoch": 2.651757554905671, - "grad_norm": 0.0012086700880900025, - "learning_rate": 0.0001999965319523127, - "loss": 46.0, - "step": 34683 - }, - { - "epoch": 2.651834011889061, - "grad_norm": 0.0030023485887795687, - "learning_rate": 0.00019999653175226784, - "loss": 46.0, - "step": 34684 - }, - { - "epoch": 2.6519104688724506, - "grad_norm": 0.004265329800546169, - "learning_rate": 0.00019999653155221723, - "loss": 46.0, - "step": 34685 - }, - { - "epoch": 2.6519869258558404, - "grad_norm": 0.0036564667243510485, - "learning_rate": 0.00019999653135216084, - "loss": 46.0, - "step": 34686 - }, - { - "epoch": 2.65206338283923, - "grad_norm": 0.0026469770818948746, - "learning_rate": 0.00019999653115209868, - "loss": 46.0, - "step": 34687 - }, - { - "epoch": 2.65213983982262, - "grad_norm": 0.0012685690307989717, - "learning_rate": 0.00019999653095203071, - "loss": 46.0, - "step": 34688 - }, - { - "epoch": 2.6522162968060097, - "grad_norm": 0.0019143827958032489, - "learning_rate": 0.00019999653075195703, - "loss": 46.0, - "step": 34689 - }, - { - "epoch": 2.652292753789399, - "grad_norm": 0.002410567831248045, - "learning_rate": 0.00019999653055187758, - "loss": 46.0, - "step": 34690 - }, - { - "epoch": 2.6523692107727888, - "grad_norm": 0.0011470012832432985, - "learning_rate": 0.00019999653035179232, - "loss": 46.0, - "step": 34691 - }, - { - "epoch": 2.6524456677561785, - "grad_norm": 0.0011017803335562348, - "learning_rate": 0.00019999653015170132, - "loss": 46.0, - "step": 34692 - }, - { - "epoch": 2.6525221247395683, - "grad_norm": 0.004031628370285034, - "learning_rate": 0.00019999652995160455, - "loss": 46.0, - "step": 34693 - }, - { - "epoch": 2.652598581722958, - "grad_norm": 0.004409824963659048, - "learning_rate": 0.00019999652975150197, - "loss": 46.0, - "step": 34694 - }, - { - "epoch": 2.652675038706348, - "grad_norm": 0.0014532955829054117, - "learning_rate": 0.00019999652955139365, - "loss": 46.0, - "step": 34695 - }, - { - "epoch": 2.6527514956897376, - "grad_norm": 0.0015394381480291486, - "learning_rate": 0.00019999652935127958, - "loss": 46.0, - "step": 34696 - }, - { - "epoch": 2.6528279526731273, - "grad_norm": 0.0015390662010759115, - "learning_rate": 0.0001999965291511597, - "loss": 46.0, - "step": 34697 - }, - { - "epoch": 2.652904409656517, - "grad_norm": 0.0018092122627422214, - "learning_rate": 0.0001999965289510341, - "loss": 46.0, - "step": 34698 - }, - { - "epoch": 2.652980866639907, - "grad_norm": 0.001475409371778369, - "learning_rate": 0.00019999652875090268, - "loss": 46.0, - "step": 34699 - }, - { - "epoch": 2.6530573236232966, - "grad_norm": 0.001401489251293242, - "learning_rate": 0.00019999652855076555, - "loss": 46.0, - "step": 34700 - }, - { - "epoch": 2.6531337806066864, - "grad_norm": 0.0014195594703778625, - "learning_rate": 0.0001999965283506226, - "loss": 46.0, - "step": 34701 - }, - { - "epoch": 2.653210237590076, - "grad_norm": 0.0015073702670633793, - "learning_rate": 0.0001999965281504739, - "loss": 46.0, - "step": 34702 - }, - { - "epoch": 2.6532866945734654, - "grad_norm": 0.015037347562611103, - "learning_rate": 0.00019999652795031942, - "loss": 46.0, - "step": 34703 - }, - { - "epoch": 2.653363151556855, - "grad_norm": 0.0013826737413182855, - "learning_rate": 0.00019999652775015917, - "loss": 46.0, - "step": 34704 - }, - { - "epoch": 2.653439608540245, - "grad_norm": 0.0038377426099032164, - "learning_rate": 0.0001999965275499932, - "loss": 46.0, - "step": 34705 - }, - { - "epoch": 2.6535160655236347, - "grad_norm": 0.0030768071301281452, - "learning_rate": 0.0001999965273498214, - "loss": 46.0, - "step": 34706 - }, - { - "epoch": 2.6535925225070245, - "grad_norm": 0.0010840905597433448, - "learning_rate": 0.00019999652714964384, - "loss": 46.0, - "step": 34707 - }, - { - "epoch": 2.6536689794904142, - "grad_norm": 0.0008362262160517275, - "learning_rate": 0.00019999652694946055, - "loss": 46.0, - "step": 34708 - }, - { - "epoch": 2.653745436473804, - "grad_norm": 0.0028353342786431313, - "learning_rate": 0.00019999652674927143, - "loss": 46.0, - "step": 34709 - }, - { - "epoch": 2.6538218934571938, - "grad_norm": 0.0010955880861729383, - "learning_rate": 0.00019999652654907659, - "loss": 46.0, - "step": 34710 - }, - { - "epoch": 2.6538983504405835, - "grad_norm": 0.002578646643087268, - "learning_rate": 0.00019999652634887594, - "loss": 46.0, - "step": 34711 - }, - { - "epoch": 2.653974807423973, - "grad_norm": 0.0019248604075983167, - "learning_rate": 0.00019999652614866956, - "loss": 46.0, - "step": 34712 - }, - { - "epoch": 2.6540512644073626, - "grad_norm": 0.0011777599574998021, - "learning_rate": 0.0001999965259484574, - "loss": 46.0, - "step": 34713 - }, - { - "epoch": 2.6541277213907524, - "grad_norm": 0.0010027263779193163, - "learning_rate": 0.00019999652574823946, - "loss": 46.0, - "step": 34714 - }, - { - "epoch": 2.654204178374142, - "grad_norm": 0.0019008881645277143, - "learning_rate": 0.00019999652554801575, - "loss": 46.0, - "step": 34715 - }, - { - "epoch": 2.654280635357532, - "grad_norm": 0.001758950762450695, - "learning_rate": 0.00019999652534778627, - "loss": 46.0, - "step": 34716 - }, - { - "epoch": 2.6543570923409217, - "grad_norm": 0.002618011087179184, - "learning_rate": 0.00019999652514755102, - "loss": 46.0, - "step": 34717 - }, - { - "epoch": 2.6544335493243114, - "grad_norm": 0.0020383228547871113, - "learning_rate": 0.00019999652494731004, - "loss": 46.0, - "step": 34718 - }, - { - "epoch": 2.654510006307701, - "grad_norm": 0.003801194950938225, - "learning_rate": 0.00019999652474706324, - "loss": 46.0, - "step": 34719 - }, - { - "epoch": 2.654586463291091, - "grad_norm": 0.00209213700145483, - "learning_rate": 0.0001999965245468107, - "loss": 46.0, - "step": 34720 - }, - { - "epoch": 2.6546629202744807, - "grad_norm": 0.0017593438969925046, - "learning_rate": 0.00019999652434655237, - "loss": 46.0, - "step": 34721 - }, - { - "epoch": 2.6547393772578705, - "grad_norm": 0.001032951520755887, - "learning_rate": 0.0001999965241462883, - "loss": 46.0, - "step": 34722 - }, - { - "epoch": 2.65481583424126, - "grad_norm": 0.005916541907936335, - "learning_rate": 0.00019999652394601844, - "loss": 46.0, - "step": 34723 - }, - { - "epoch": 2.65489229122465, - "grad_norm": 0.0013478639302775264, - "learning_rate": 0.0001999965237457428, - "loss": 46.0, - "step": 34724 - }, - { - "epoch": 2.6549687482080393, - "grad_norm": 0.002013017423450947, - "learning_rate": 0.0001999965235454614, - "loss": 46.0, - "step": 34725 - }, - { - "epoch": 2.655045205191429, - "grad_norm": 0.0032584015280008316, - "learning_rate": 0.00019999652334517425, - "loss": 46.0, - "step": 34726 - }, - { - "epoch": 2.655121662174819, - "grad_norm": 0.003934497945010662, - "learning_rate": 0.0001999965231448813, - "loss": 46.0, - "step": 34727 - }, - { - "epoch": 2.6551981191582086, - "grad_norm": 0.0013849419774487615, - "learning_rate": 0.0001999965229445826, - "loss": 46.0, - "step": 34728 - }, - { - "epoch": 2.6552745761415983, - "grad_norm": 0.0014424193650484085, - "learning_rate": 0.00019999652274427815, - "loss": 46.0, - "step": 34729 - }, - { - "epoch": 2.655351033124988, - "grad_norm": 0.0008990371134132147, - "learning_rate": 0.0001999965225439679, - "loss": 46.0, - "step": 34730 - }, - { - "epoch": 2.655427490108378, - "grad_norm": 0.0011558630503714085, - "learning_rate": 0.0001999965223436519, - "loss": 46.0, - "step": 34731 - }, - { - "epoch": 2.6555039470917676, - "grad_norm": 0.001537567120976746, - "learning_rate": 0.00019999652214333012, - "loss": 46.0, - "step": 34732 - }, - { - "epoch": 2.655580404075157, - "grad_norm": 0.000707258703187108, - "learning_rate": 0.00019999652194300255, - "loss": 46.0, - "step": 34733 - }, - { - "epoch": 2.6556568610585467, - "grad_norm": 0.005646726116538048, - "learning_rate": 0.00019999652174266922, - "loss": 46.0, - "step": 34734 - }, - { - "epoch": 2.6557333180419365, - "grad_norm": 0.003111784579232335, - "learning_rate": 0.00019999652154233016, - "loss": 46.0, - "step": 34735 - }, - { - "epoch": 2.6558097750253262, - "grad_norm": 0.0026187580078840256, - "learning_rate": 0.0001999965213419853, - "loss": 46.0, - "step": 34736 - }, - { - "epoch": 2.655886232008716, - "grad_norm": 0.0016155462944880128, - "learning_rate": 0.00019999652114163468, - "loss": 46.0, - "step": 34737 - }, - { - "epoch": 2.6559626889921057, - "grad_norm": 0.001129030715674162, - "learning_rate": 0.0001999965209412783, - "loss": 46.0, - "step": 34738 - }, - { - "epoch": 2.6560391459754955, - "grad_norm": 0.001172374584712088, - "learning_rate": 0.0001999965207409161, - "loss": 46.0, - "step": 34739 - }, - { - "epoch": 2.6561156029588853, - "grad_norm": 0.003757399506866932, - "learning_rate": 0.00019999652054054817, - "loss": 46.0, - "step": 34740 - }, - { - "epoch": 2.656192059942275, - "grad_norm": 0.005926027428358793, - "learning_rate": 0.00019999652034017446, - "loss": 46.0, - "step": 34741 - }, - { - "epoch": 2.656268516925665, - "grad_norm": 0.005261502228677273, - "learning_rate": 0.000199996520139795, - "loss": 46.0, - "step": 34742 - }, - { - "epoch": 2.6563449739090546, - "grad_norm": 0.0016953968442976475, - "learning_rate": 0.00019999651993940975, - "loss": 46.0, - "step": 34743 - }, - { - "epoch": 2.6564214308924443, - "grad_norm": 0.003016469534486532, - "learning_rate": 0.00019999651973901875, - "loss": 46.0, - "step": 34744 - }, - { - "epoch": 2.656497887875834, - "grad_norm": 0.0041351779364049435, - "learning_rate": 0.00019999651953862198, - "loss": 46.0, - "step": 34745 - }, - { - "epoch": 2.656574344859224, - "grad_norm": 0.001842982484959066, - "learning_rate": 0.0001999965193382194, - "loss": 46.0, - "step": 34746 - }, - { - "epoch": 2.656650801842613, - "grad_norm": 0.0019013939891010523, - "learning_rate": 0.00019999651913781108, - "loss": 46.0, - "step": 34747 - }, - { - "epoch": 2.656727258826003, - "grad_norm": 0.0015365495346486568, - "learning_rate": 0.000199996518937397, - "loss": 46.0, - "step": 34748 - }, - { - "epoch": 2.6568037158093927, - "grad_norm": 0.002518998458981514, - "learning_rate": 0.00019999651873697715, - "loss": 46.0, - "step": 34749 - }, - { - "epoch": 2.6568801727927824, - "grad_norm": 0.0017262675100937486, - "learning_rate": 0.0001999965185365515, - "loss": 46.0, - "step": 34750 - }, - { - "epoch": 2.656956629776172, - "grad_norm": 0.0008960299892351031, - "learning_rate": 0.00019999651833612012, - "loss": 46.0, - "step": 34751 - }, - { - "epoch": 2.657033086759562, - "grad_norm": 0.002901954110711813, - "learning_rate": 0.00019999651813568296, - "loss": 46.0, - "step": 34752 - }, - { - "epoch": 2.6571095437429517, - "grad_norm": 0.001309744082391262, - "learning_rate": 0.00019999651793524003, - "loss": 46.0, - "step": 34753 - }, - { - "epoch": 2.6571860007263415, - "grad_norm": 0.0017822058871388435, - "learning_rate": 0.00019999651773479135, - "loss": 46.0, - "step": 34754 - }, - { - "epoch": 2.657262457709731, - "grad_norm": 0.001950585749000311, - "learning_rate": 0.00019999651753433684, - "loss": 46.0, - "step": 34755 - }, - { - "epoch": 2.6573389146931206, - "grad_norm": 0.0018254952738061547, - "learning_rate": 0.0001999965173338766, - "loss": 46.0, - "step": 34756 - }, - { - "epoch": 2.6574153716765103, - "grad_norm": 0.0036601268220692873, - "learning_rate": 0.00019999651713341058, - "loss": 46.0, - "step": 34757 - }, - { - "epoch": 2.6574918286599, - "grad_norm": 0.0006742942496202886, - "learning_rate": 0.00019999651693293878, - "loss": 46.0, - "step": 34758 - }, - { - "epoch": 2.65756828564329, - "grad_norm": 0.004033296834677458, - "learning_rate": 0.00019999651673246124, - "loss": 46.0, - "step": 34759 - }, - { - "epoch": 2.6576447426266796, - "grad_norm": 0.003381472546607256, - "learning_rate": 0.00019999651653197792, - "loss": 46.0, - "step": 34760 - }, - { - "epoch": 2.6577211996100694, - "grad_norm": 0.0013182959519326687, - "learning_rate": 0.00019999651633148885, - "loss": 46.0, - "step": 34761 - }, - { - "epoch": 2.657797656593459, - "grad_norm": 0.001889455015771091, - "learning_rate": 0.00019999651613099398, - "loss": 46.0, - "step": 34762 - }, - { - "epoch": 2.657874113576849, - "grad_norm": 0.0028760316781699657, - "learning_rate": 0.00019999651593049337, - "loss": 46.0, - "step": 34763 - }, - { - "epoch": 2.6579505705602386, - "grad_norm": 0.001243208535015583, - "learning_rate": 0.00019999651572998696, - "loss": 46.0, - "step": 34764 - }, - { - "epoch": 2.6580270275436284, - "grad_norm": 0.0011337959440425038, - "learning_rate": 0.0001999965155294748, - "loss": 46.0, - "step": 34765 - }, - { - "epoch": 2.658103484527018, - "grad_norm": 0.001040091854520142, - "learning_rate": 0.00019999651532895686, - "loss": 46.0, - "step": 34766 - }, - { - "epoch": 2.658179941510408, - "grad_norm": 0.0016802186146378517, - "learning_rate": 0.00019999651512843316, - "loss": 46.0, - "step": 34767 - }, - { - "epoch": 2.6582563984937977, - "grad_norm": 0.002288917312398553, - "learning_rate": 0.00019999651492790368, - "loss": 46.0, - "step": 34768 - }, - { - "epoch": 2.658332855477187, - "grad_norm": 0.0032778973691165447, - "learning_rate": 0.00019999651472736845, - "loss": 46.0, - "step": 34769 - }, - { - "epoch": 2.6584093124605768, - "grad_norm": 0.00397077389061451, - "learning_rate": 0.00019999651452682745, - "loss": 46.0, - "step": 34770 - }, - { - "epoch": 2.6584857694439665, - "grad_norm": 0.0044480194337666035, - "learning_rate": 0.00019999651432628065, - "loss": 46.0, - "step": 34771 - }, - { - "epoch": 2.6585622264273563, - "grad_norm": 0.0025676286313682795, - "learning_rate": 0.0001999965141257281, - "loss": 46.0, - "step": 34772 - }, - { - "epoch": 2.658638683410746, - "grad_norm": 0.0006629474228248, - "learning_rate": 0.00019999651392516978, - "loss": 46.0, - "step": 34773 - }, - { - "epoch": 2.658715140394136, - "grad_norm": 0.0013394899433478713, - "learning_rate": 0.0001999965137246057, - "loss": 46.0, - "step": 34774 - }, - { - "epoch": 2.6587915973775256, - "grad_norm": 0.0027176544535905123, - "learning_rate": 0.00019999651352403582, - "loss": 46.0, - "step": 34775 - }, - { - "epoch": 2.6588680543609153, - "grad_norm": 0.0010863530915230513, - "learning_rate": 0.0001999965133234602, - "loss": 46.0, - "step": 34776 - }, - { - "epoch": 2.6589445113443047, - "grad_norm": 0.005446822382509708, - "learning_rate": 0.00019999651312287882, - "loss": 46.0, - "step": 34777 - }, - { - "epoch": 2.6590209683276944, - "grad_norm": 0.002435858128592372, - "learning_rate": 0.00019999651292229164, - "loss": 46.0, - "step": 34778 - }, - { - "epoch": 2.659097425311084, - "grad_norm": 0.008534245193004608, - "learning_rate": 0.0001999965127216987, - "loss": 46.0, - "step": 34779 - }, - { - "epoch": 2.659173882294474, - "grad_norm": 0.002253937069326639, - "learning_rate": 0.0001999965125211, - "loss": 46.0, - "step": 34780 - }, - { - "epoch": 2.6592503392778637, - "grad_norm": 0.0034409069921821356, - "learning_rate": 0.00019999651232049552, - "loss": 46.0, - "step": 34781 - }, - { - "epoch": 2.6593267962612535, - "grad_norm": 0.0028056346345692873, - "learning_rate": 0.0001999965121198853, - "loss": 46.0, - "step": 34782 - }, - { - "epoch": 2.659403253244643, - "grad_norm": 0.0018720593070611358, - "learning_rate": 0.00019999651191926926, - "loss": 46.0, - "step": 34783 - }, - { - "epoch": 2.659479710228033, - "grad_norm": 0.005867813713848591, - "learning_rate": 0.0001999965117186475, - "loss": 46.0, - "step": 34784 - }, - { - "epoch": 2.6595561672114227, - "grad_norm": 0.000925446511246264, - "learning_rate": 0.00019999651151801994, - "loss": 46.0, - "step": 34785 - }, - { - "epoch": 2.6596326241948125, - "grad_norm": 0.003714770544320345, - "learning_rate": 0.00019999651131738663, - "loss": 46.0, - "step": 34786 - }, - { - "epoch": 2.6597090811782023, - "grad_norm": 0.0031848398502916098, - "learning_rate": 0.00019999651111674753, - "loss": 46.0, - "step": 34787 - }, - { - "epoch": 2.659785538161592, - "grad_norm": 0.0030491426587104797, - "learning_rate": 0.00019999651091610267, - "loss": 46.0, - "step": 34788 - }, - { - "epoch": 2.659861995144982, - "grad_norm": 0.0005798854399472475, - "learning_rate": 0.00019999651071545205, - "loss": 46.0, - "step": 34789 - }, - { - "epoch": 2.6599384521283715, - "grad_norm": 0.0024995480198413134, - "learning_rate": 0.00019999651051479567, - "loss": 46.0, - "step": 34790 - }, - { - "epoch": 2.660014909111761, - "grad_norm": 0.001593344029970467, - "learning_rate": 0.00019999651031413348, - "loss": 46.0, - "step": 34791 - }, - { - "epoch": 2.6600913660951506, - "grad_norm": 0.0030092853121459484, - "learning_rate": 0.00019999651011346555, - "loss": 46.0, - "step": 34792 - }, - { - "epoch": 2.6601678230785404, - "grad_norm": 0.001418916042894125, - "learning_rate": 0.00019999650991279184, - "loss": 46.0, - "step": 34793 - }, - { - "epoch": 2.66024428006193, - "grad_norm": 0.0015205746749415994, - "learning_rate": 0.00019999650971211236, - "loss": 46.0, - "step": 34794 - }, - { - "epoch": 2.66032073704532, - "grad_norm": 0.0015802271664142609, - "learning_rate": 0.0001999965095114271, - "loss": 46.0, - "step": 34795 - }, - { - "epoch": 2.6603971940287097, - "grad_norm": 0.0026045767590403557, - "learning_rate": 0.0001999965093107361, - "loss": 46.0, - "step": 34796 - }, - { - "epoch": 2.6604736510120994, - "grad_norm": 0.0038425433449447155, - "learning_rate": 0.00019999650911003934, - "loss": 46.0, - "step": 34797 - }, - { - "epoch": 2.660550107995489, - "grad_norm": 0.005135990213602781, - "learning_rate": 0.0001999965089093368, - "loss": 46.0, - "step": 34798 - }, - { - "epoch": 2.6606265649788785, - "grad_norm": 0.0011006761342287064, - "learning_rate": 0.00019999650870862845, - "loss": 46.0, - "step": 34799 - }, - { - "epoch": 2.6607030219622683, - "grad_norm": 0.0017809598939493299, - "learning_rate": 0.00019999650850791438, - "loss": 46.0, - "step": 34800 - }, - { - "epoch": 2.660779478945658, - "grad_norm": 0.0019728532060980797, - "learning_rate": 0.00019999650830719449, - "loss": 46.0, - "step": 34801 - }, - { - "epoch": 2.660855935929048, - "grad_norm": 0.004906200338155031, - "learning_rate": 0.00019999650810646887, - "loss": 46.0, - "step": 34802 - }, - { - "epoch": 2.6609323929124375, - "grad_norm": 0.0007619601092301309, - "learning_rate": 0.0001999965079057375, - "loss": 46.0, - "step": 34803 - }, - { - "epoch": 2.6610088498958273, - "grad_norm": 0.0022770592477172613, - "learning_rate": 0.0001999965077050003, - "loss": 46.0, - "step": 34804 - }, - { - "epoch": 2.661085306879217, - "grad_norm": 0.0015122673939913511, - "learning_rate": 0.00019999650750425737, - "loss": 46.0, - "step": 34805 - }, - { - "epoch": 2.661161763862607, - "grad_norm": 0.0011271742405369878, - "learning_rate": 0.00019999650730350866, - "loss": 46.0, - "step": 34806 - }, - { - "epoch": 2.6612382208459966, - "grad_norm": 0.0006319869426079094, - "learning_rate": 0.0001999965071027542, - "loss": 46.0, - "step": 34807 - }, - { - "epoch": 2.6613146778293864, - "grad_norm": 0.0006759246462024748, - "learning_rate": 0.00019999650690199396, - "loss": 46.0, - "step": 34808 - }, - { - "epoch": 2.661391134812776, - "grad_norm": 0.0028826105408370495, - "learning_rate": 0.00019999650670122793, - "loss": 46.0, - "step": 34809 - }, - { - "epoch": 2.661467591796166, - "grad_norm": 0.002271621488034725, - "learning_rate": 0.00019999650650045616, - "loss": 46.0, - "step": 34810 - }, - { - "epoch": 2.6615440487795556, - "grad_norm": 0.003738553263247013, - "learning_rate": 0.00019999650629967862, - "loss": 46.0, - "step": 34811 - }, - { - "epoch": 2.6616205057629454, - "grad_norm": 0.0016260184347629547, - "learning_rate": 0.0001999965060988953, - "loss": 46.0, - "step": 34812 - }, - { - "epoch": 2.6616969627463347, - "grad_norm": 0.0011628199135884643, - "learning_rate": 0.00019999650589810618, - "loss": 46.0, - "step": 34813 - }, - { - "epoch": 2.6617734197297245, - "grad_norm": 0.0026137125678360462, - "learning_rate": 0.00019999650569731134, - "loss": 46.0, - "step": 34814 - }, - { - "epoch": 2.6618498767131142, - "grad_norm": 0.001854165457189083, - "learning_rate": 0.00019999650549651073, - "loss": 46.0, - "step": 34815 - }, - { - "epoch": 2.661926333696504, - "grad_norm": 0.006665118504315615, - "learning_rate": 0.00019999650529570434, - "loss": 46.0, - "step": 34816 - }, - { - "epoch": 2.6620027906798938, - "grad_norm": 0.00467985263094306, - "learning_rate": 0.00019999650509489215, - "loss": 46.0, - "step": 34817 - }, - { - "epoch": 2.6620792476632835, - "grad_norm": 0.0038444646634161472, - "learning_rate": 0.00019999650489407422, - "loss": 46.0, - "step": 34818 - }, - { - "epoch": 2.6621557046466733, - "grad_norm": 0.004331445321440697, - "learning_rate": 0.00019999650469325052, - "loss": 46.0, - "step": 34819 - }, - { - "epoch": 2.662232161630063, - "grad_norm": 0.0020280415192246437, - "learning_rate": 0.000199996504492421, - "loss": 46.0, - "step": 34820 - }, - { - "epoch": 2.6623086186134524, - "grad_norm": 0.0009757000952959061, - "learning_rate": 0.00019999650429158579, - "loss": 46.0, - "step": 34821 - }, - { - "epoch": 2.662385075596842, - "grad_norm": 0.0019797596614807844, - "learning_rate": 0.0001999965040907448, - "loss": 46.0, - "step": 34822 - }, - { - "epoch": 2.662461532580232, - "grad_norm": 0.004333363380283117, - "learning_rate": 0.000199996503889898, - "loss": 46.0, - "step": 34823 - }, - { - "epoch": 2.6625379895636216, - "grad_norm": 0.00207108655013144, - "learning_rate": 0.00019999650368904544, - "loss": 46.0, - "step": 34824 - }, - { - "epoch": 2.6626144465470114, - "grad_norm": 0.0006575423758476973, - "learning_rate": 0.00019999650348818712, - "loss": 46.0, - "step": 34825 - }, - { - "epoch": 2.662690903530401, - "grad_norm": 0.0009055683040060103, - "learning_rate": 0.00019999650328732303, - "loss": 46.0, - "step": 34826 - }, - { - "epoch": 2.662767360513791, - "grad_norm": 0.0012490932131186128, - "learning_rate": 0.00019999650308645317, - "loss": 46.0, - "step": 34827 - }, - { - "epoch": 2.6628438174971807, - "grad_norm": 0.00224423804320395, - "learning_rate": 0.00019999650288557753, - "loss": 46.0, - "step": 34828 - }, - { - "epoch": 2.6629202744805704, - "grad_norm": 0.0010030536213889718, - "learning_rate": 0.00019999650268469614, - "loss": 46.0, - "step": 34829 - }, - { - "epoch": 2.66299673146396, - "grad_norm": 0.0027507049962878227, - "learning_rate": 0.00019999650248380898, - "loss": 46.0, - "step": 34830 - }, - { - "epoch": 2.66307318844735, - "grad_norm": 0.003532292554154992, - "learning_rate": 0.00019999650228291605, - "loss": 46.0, - "step": 34831 - }, - { - "epoch": 2.6631496454307397, - "grad_norm": 0.0010683690197765827, - "learning_rate": 0.00019999650208201732, - "loss": 46.0, - "step": 34832 - }, - { - "epoch": 2.6632261024141295, - "grad_norm": 0.0010906958486884832, - "learning_rate": 0.00019999650188111287, - "loss": 46.0, - "step": 34833 - }, - { - "epoch": 2.663302559397519, - "grad_norm": 0.0026041092351078987, - "learning_rate": 0.00019999650168020262, - "loss": 46.0, - "step": 34834 - }, - { - "epoch": 2.6633790163809086, - "grad_norm": 0.0015963248442858458, - "learning_rate": 0.00019999650147928662, - "loss": 46.0, - "step": 34835 - }, - { - "epoch": 2.6634554733642983, - "grad_norm": 0.001647595432586968, - "learning_rate": 0.00019999650127836482, - "loss": 46.0, - "step": 34836 - }, - { - "epoch": 2.663531930347688, - "grad_norm": 0.0028493814170360565, - "learning_rate": 0.00019999650107743725, - "loss": 46.0, - "step": 34837 - }, - { - "epoch": 2.663608387331078, - "grad_norm": 0.0014009576989337802, - "learning_rate": 0.00019999650087650396, - "loss": 46.0, - "step": 34838 - }, - { - "epoch": 2.6636848443144676, - "grad_norm": 0.0016593976179137826, - "learning_rate": 0.00019999650067556486, - "loss": 46.0, - "step": 34839 - }, - { - "epoch": 2.6637613012978574, - "grad_norm": 0.0015545575879514217, - "learning_rate": 0.00019999650047462, - "loss": 46.0, - "step": 34840 - }, - { - "epoch": 2.663837758281247, - "grad_norm": 0.00259993108920753, - "learning_rate": 0.00019999650027366936, - "loss": 46.0, - "step": 34841 - }, - { - "epoch": 2.663914215264637, - "grad_norm": 0.0014673768309876323, - "learning_rate": 0.00019999650007271295, - "loss": 46.0, - "step": 34842 - }, - { - "epoch": 2.663990672248026, - "grad_norm": 0.0011018171207979321, - "learning_rate": 0.0001999964998717508, - "loss": 46.0, - "step": 34843 - }, - { - "epoch": 2.664067129231416, - "grad_norm": 0.0022359294816851616, - "learning_rate": 0.00019999649967078286, - "loss": 46.0, - "step": 34844 - }, - { - "epoch": 2.6641435862148057, - "grad_norm": 0.0011683318298310041, - "learning_rate": 0.00019999649946980915, - "loss": 46.0, - "step": 34845 - }, - { - "epoch": 2.6642200431981955, - "grad_norm": 0.0017478878144174814, - "learning_rate": 0.00019999649926882968, - "loss": 46.0, - "step": 34846 - }, - { - "epoch": 2.6642965001815853, - "grad_norm": 0.001801581820473075, - "learning_rate": 0.00019999649906784442, - "loss": 46.0, - "step": 34847 - }, - { - "epoch": 2.664372957164975, - "grad_norm": 0.0030484963208436966, - "learning_rate": 0.0001999964988668534, - "loss": 46.0, - "step": 34848 - }, - { - "epoch": 2.664449414148365, - "grad_norm": 0.0020317472517490387, - "learning_rate": 0.00019999649866585663, - "loss": 46.0, - "step": 34849 - }, - { - "epoch": 2.6645258711317545, - "grad_norm": 0.001673631020821631, - "learning_rate": 0.00019999649846485408, - "loss": 46.0, - "step": 34850 - }, - { - "epoch": 2.6646023281151443, - "grad_norm": 0.0023410418070852757, - "learning_rate": 0.00019999649826384577, - "loss": 46.0, - "step": 34851 - }, - { - "epoch": 2.664678785098534, - "grad_norm": 0.006933731026947498, - "learning_rate": 0.00019999649806283167, - "loss": 46.0, - "step": 34852 - }, - { - "epoch": 2.664755242081924, - "grad_norm": 0.0024782984983175993, - "learning_rate": 0.0001999964978618118, - "loss": 46.0, - "step": 34853 - }, - { - "epoch": 2.6648316990653136, - "grad_norm": 0.006291090976446867, - "learning_rate": 0.0001999964976607862, - "loss": 46.0, - "step": 34854 - }, - { - "epoch": 2.6649081560487033, - "grad_norm": 0.0015015807002782822, - "learning_rate": 0.00019999649745975479, - "loss": 46.0, - "step": 34855 - }, - { - "epoch": 2.6649846130320927, - "grad_norm": 0.0023913918994367123, - "learning_rate": 0.0001999964972587176, - "loss": 46.0, - "step": 34856 - }, - { - "epoch": 2.6650610700154824, - "grad_norm": 0.004275403451174498, - "learning_rate": 0.0001999964970576747, - "loss": 46.0, - "step": 34857 - }, - { - "epoch": 2.665137526998872, - "grad_norm": 0.0036315189208835363, - "learning_rate": 0.000199996496856626, - "loss": 46.0, - "step": 34858 - }, - { - "epoch": 2.665213983982262, - "grad_norm": 0.0021812699269503355, - "learning_rate": 0.0001999964966555715, - "loss": 46.0, - "step": 34859 - }, - { - "epoch": 2.6652904409656517, - "grad_norm": 0.0026718545705080032, - "learning_rate": 0.00019999649645451126, - "loss": 46.0, - "step": 34860 - }, - { - "epoch": 2.6653668979490415, - "grad_norm": 0.004206669516861439, - "learning_rate": 0.00019999649625344527, - "loss": 46.0, - "step": 34861 - }, - { - "epoch": 2.6654433549324312, - "grad_norm": 0.0021670302376151085, - "learning_rate": 0.00019999649605237347, - "loss": 46.0, - "step": 34862 - }, - { - "epoch": 2.665519811915821, - "grad_norm": 0.0020318003371357918, - "learning_rate": 0.00019999649585129592, - "loss": 46.0, - "step": 34863 - }, - { - "epoch": 2.6655962688992103, - "grad_norm": 0.0037286237347871065, - "learning_rate": 0.00019999649565021258, - "loss": 46.0, - "step": 34864 - }, - { - "epoch": 2.6656727258826, - "grad_norm": 0.0020174169912934303, - "learning_rate": 0.0001999964954491235, - "loss": 46.0, - "step": 34865 - }, - { - "epoch": 2.66574918286599, - "grad_norm": 0.001935688080266118, - "learning_rate": 0.00019999649524802865, - "loss": 46.0, - "step": 34866 - }, - { - "epoch": 2.6658256398493796, - "grad_norm": 0.0025332774966955185, - "learning_rate": 0.000199996495046928, - "loss": 46.0, - "step": 34867 - }, - { - "epoch": 2.6659020968327694, - "grad_norm": 0.0013528376584872603, - "learning_rate": 0.0001999964948458216, - "loss": 46.0, - "step": 34868 - }, - { - "epoch": 2.665978553816159, - "grad_norm": 0.001148971146903932, - "learning_rate": 0.00019999649464470944, - "loss": 46.0, - "step": 34869 - }, - { - "epoch": 2.666055010799549, - "grad_norm": 0.0028508210089057684, - "learning_rate": 0.00019999649444359149, - "loss": 46.0, - "step": 34870 - }, - { - "epoch": 2.6661314677829386, - "grad_norm": 0.002312930068001151, - "learning_rate": 0.00019999649424246778, - "loss": 46.0, - "step": 34871 - }, - { - "epoch": 2.6662079247663284, - "grad_norm": 0.003013030393049121, - "learning_rate": 0.0001999964940413383, - "loss": 46.0, - "step": 34872 - }, - { - "epoch": 2.666284381749718, - "grad_norm": 0.002281905384734273, - "learning_rate": 0.00019999649384020308, - "loss": 46.0, - "step": 34873 - }, - { - "epoch": 2.666360838733108, - "grad_norm": 0.0022345969919115305, - "learning_rate": 0.00019999649363906206, - "loss": 46.0, - "step": 34874 - }, - { - "epoch": 2.6664372957164977, - "grad_norm": 0.0032459436915814877, - "learning_rate": 0.00019999649343791526, - "loss": 46.0, - "step": 34875 - }, - { - "epoch": 2.6665137526998874, - "grad_norm": 0.0013581333914771676, - "learning_rate": 0.0001999964932367627, - "loss": 46.0, - "step": 34876 - }, - { - "epoch": 2.666590209683277, - "grad_norm": 0.0010926889954134822, - "learning_rate": 0.00019999649303560442, - "loss": 46.0, - "step": 34877 - }, - { - "epoch": 2.6666666666666665, - "grad_norm": 0.006870744749903679, - "learning_rate": 0.0001999964928344403, - "loss": 46.0, - "step": 34878 - }, - { - "epoch": 2.6667431236500563, - "grad_norm": 0.001762778265401721, - "learning_rate": 0.00019999649263327044, - "loss": 46.0, - "step": 34879 - }, - { - "epoch": 2.666819580633446, - "grad_norm": 0.0024501406587660313, - "learning_rate": 0.00019999649243209483, - "loss": 46.0, - "step": 34880 - }, - { - "epoch": 2.666896037616836, - "grad_norm": 0.0017349875997751951, - "learning_rate": 0.00019999649223091342, - "loss": 46.0, - "step": 34881 - }, - { - "epoch": 2.6669724946002256, - "grad_norm": 0.004817704204469919, - "learning_rate": 0.00019999649202972624, - "loss": 46.0, - "step": 34882 - }, - { - "epoch": 2.6670489515836153, - "grad_norm": 0.002187000121921301, - "learning_rate": 0.0001999964918285333, - "loss": 46.0, - "step": 34883 - }, - { - "epoch": 2.667125408567005, - "grad_norm": 0.0023201985750347376, - "learning_rate": 0.0001999964916273346, - "loss": 46.0, - "step": 34884 - }, - { - "epoch": 2.667201865550395, - "grad_norm": 0.0014903944684192538, - "learning_rate": 0.00019999649142613012, - "loss": 46.0, - "step": 34885 - }, - { - "epoch": 2.667278322533784, - "grad_norm": 0.0037571508437395096, - "learning_rate": 0.00019999649122491987, - "loss": 46.0, - "step": 34886 - }, - { - "epoch": 2.667354779517174, - "grad_norm": 0.0024626918602734804, - "learning_rate": 0.00019999649102370385, - "loss": 46.0, - "step": 34887 - }, - { - "epoch": 2.6674312365005637, - "grad_norm": 0.0016515154857188463, - "learning_rate": 0.00019999649082248208, - "loss": 46.0, - "step": 34888 - }, - { - "epoch": 2.6675076934839534, - "grad_norm": 0.0016520003555342555, - "learning_rate": 0.00019999649062125453, - "loss": 46.0, - "step": 34889 - }, - { - "epoch": 2.667584150467343, - "grad_norm": 0.0006765213911421597, - "learning_rate": 0.0001999964904200212, - "loss": 46.0, - "step": 34890 - }, - { - "epoch": 2.667660607450733, - "grad_norm": 0.001740854699164629, - "learning_rate": 0.00019999649021878213, - "loss": 46.0, - "step": 34891 - }, - { - "epoch": 2.6677370644341227, - "grad_norm": 0.0018777099903672934, - "learning_rate": 0.00019999649001753724, - "loss": 46.0, - "step": 34892 - }, - { - "epoch": 2.6678135214175125, - "grad_norm": 0.0012235044268891215, - "learning_rate": 0.00019999648981628663, - "loss": 46.0, - "step": 34893 - }, - { - "epoch": 2.6678899784009023, - "grad_norm": 0.0076610553078353405, - "learning_rate": 0.00019999648961503024, - "loss": 46.0, - "step": 34894 - }, - { - "epoch": 2.667966435384292, - "grad_norm": 0.002160225994884968, - "learning_rate": 0.00019999648941376809, - "loss": 46.0, - "step": 34895 - }, - { - "epoch": 2.6680428923676818, - "grad_norm": 0.004819528199732304, - "learning_rate": 0.00019999648921250013, - "loss": 46.0, - "step": 34896 - }, - { - "epoch": 2.6681193493510715, - "grad_norm": 0.0034319048281759024, - "learning_rate": 0.00019999648901122642, - "loss": 46.0, - "step": 34897 - }, - { - "epoch": 2.6681958063344613, - "grad_norm": 0.002473304281011224, - "learning_rate": 0.00019999648880994695, - "loss": 46.0, - "step": 34898 - }, - { - "epoch": 2.668272263317851, - "grad_norm": 0.0007150843739509583, - "learning_rate": 0.0001999964886086617, - "loss": 46.0, - "step": 34899 - }, - { - "epoch": 2.6683487203012404, - "grad_norm": 0.002251900965347886, - "learning_rate": 0.00019999648840737067, - "loss": 46.0, - "step": 34900 - }, - { - "epoch": 2.66842517728463, - "grad_norm": 0.0034669458400458097, - "learning_rate": 0.0001999964882060739, - "loss": 46.0, - "step": 34901 - }, - { - "epoch": 2.66850163426802, - "grad_norm": 0.0013607167638838291, - "learning_rate": 0.00019999648800477133, - "loss": 46.0, - "step": 34902 - }, - { - "epoch": 2.6685780912514097, - "grad_norm": 0.002192132640630007, - "learning_rate": 0.00019999648780346302, - "loss": 46.0, - "step": 34903 - }, - { - "epoch": 2.6686545482347994, - "grad_norm": 0.004075739067047834, - "learning_rate": 0.00019999648760214893, - "loss": 46.0, - "step": 34904 - }, - { - "epoch": 2.668731005218189, - "grad_norm": 0.0013467296957969666, - "learning_rate": 0.00019999648740082906, - "loss": 46.0, - "step": 34905 - }, - { - "epoch": 2.668807462201579, - "grad_norm": 0.0016778097487986088, - "learning_rate": 0.00019999648719950342, - "loss": 46.0, - "step": 34906 - }, - { - "epoch": 2.6688839191849687, - "grad_norm": 0.0029106067959219217, - "learning_rate": 0.00019999648699817204, - "loss": 46.0, - "step": 34907 - }, - { - "epoch": 2.668960376168358, - "grad_norm": 0.0023601623252034187, - "learning_rate": 0.00019999648679683486, - "loss": 46.0, - "step": 34908 - }, - { - "epoch": 2.669036833151748, - "grad_norm": 0.0019676717929542065, - "learning_rate": 0.00019999648659549193, - "loss": 46.0, - "step": 34909 - }, - { - "epoch": 2.6691132901351375, - "grad_norm": 0.0014156763209030032, - "learning_rate": 0.00019999648639414322, - "loss": 46.0, - "step": 34910 - }, - { - "epoch": 2.6691897471185273, - "grad_norm": 0.0007549666916020215, - "learning_rate": 0.00019999648619278875, - "loss": 46.0, - "step": 34911 - }, - { - "epoch": 2.669266204101917, - "grad_norm": 0.0026200178544968367, - "learning_rate": 0.0001999964859914285, - "loss": 46.0, - "step": 34912 - }, - { - "epoch": 2.669342661085307, - "grad_norm": 0.002217887667939067, - "learning_rate": 0.00019999648579006247, - "loss": 46.0, - "step": 34913 - }, - { - "epoch": 2.6694191180686966, - "grad_norm": 0.004524400923401117, - "learning_rate": 0.0001999964855886907, - "loss": 46.0, - "step": 34914 - }, - { - "epoch": 2.6694955750520863, - "grad_norm": 0.003973620478063822, - "learning_rate": 0.00019999648538731316, - "loss": 46.0, - "step": 34915 - }, - { - "epoch": 2.669572032035476, - "grad_norm": 0.001967710442841053, - "learning_rate": 0.00019999648518592982, - "loss": 46.0, - "step": 34916 - }, - { - "epoch": 2.669648489018866, - "grad_norm": 0.001152605633251369, - "learning_rate": 0.00019999648498454073, - "loss": 46.0, - "step": 34917 - }, - { - "epoch": 2.6697249460022556, - "grad_norm": 0.0014740233309566975, - "learning_rate": 0.00019999648478314586, - "loss": 46.0, - "step": 34918 - }, - { - "epoch": 2.6698014029856454, - "grad_norm": 0.0013740730937570333, - "learning_rate": 0.00019999648458174526, - "loss": 46.0, - "step": 34919 - }, - { - "epoch": 2.669877859969035, - "grad_norm": 0.0022710999473929405, - "learning_rate": 0.00019999648438033885, - "loss": 46.0, - "step": 34920 - }, - { - "epoch": 2.669954316952425, - "grad_norm": 0.0019967190455645323, - "learning_rate": 0.00019999648417892666, - "loss": 46.0, - "step": 34921 - }, - { - "epoch": 2.6700307739358142, - "grad_norm": 0.0017750104889273643, - "learning_rate": 0.00019999648397750873, - "loss": 46.0, - "step": 34922 - }, - { - "epoch": 2.670107230919204, - "grad_norm": 0.0023550225887447596, - "learning_rate": 0.00019999648377608503, - "loss": 46.0, - "step": 34923 - }, - { - "epoch": 2.6701836879025937, - "grad_norm": 0.0017071013571694493, - "learning_rate": 0.00019999648357465555, - "loss": 46.0, - "step": 34924 - }, - { - "epoch": 2.6702601448859835, - "grad_norm": 0.002447047270834446, - "learning_rate": 0.0001999964833732203, - "loss": 46.0, - "step": 34925 - }, - { - "epoch": 2.6703366018693733, - "grad_norm": 0.0017103665741160512, - "learning_rate": 0.00019999648317177928, - "loss": 46.0, - "step": 34926 - }, - { - "epoch": 2.670413058852763, - "grad_norm": 0.003538508666679263, - "learning_rate": 0.0001999964829703325, - "loss": 46.0, - "step": 34927 - }, - { - "epoch": 2.670489515836153, - "grad_norm": 0.0041619157418608665, - "learning_rate": 0.00019999648276887994, - "loss": 46.0, - "step": 34928 - }, - { - "epoch": 2.6705659728195426, - "grad_norm": 0.0019612619653344154, - "learning_rate": 0.0001999964825674216, - "loss": 46.0, - "step": 34929 - }, - { - "epoch": 2.670642429802932, - "grad_norm": 0.0009307538857683539, - "learning_rate": 0.0001999964823659575, - "loss": 46.0, - "step": 34930 - }, - { - "epoch": 2.6707188867863216, - "grad_norm": 0.0013061750214546919, - "learning_rate": 0.00019999648216448764, - "loss": 46.0, - "step": 34931 - }, - { - "epoch": 2.6707953437697114, - "grad_norm": 0.0037661294918507338, - "learning_rate": 0.00019999648196301204, - "loss": 46.0, - "step": 34932 - }, - { - "epoch": 2.670871800753101, - "grad_norm": 0.011401028372347355, - "learning_rate": 0.00019999648176153065, - "loss": 46.0, - "step": 34933 - }, - { - "epoch": 2.670948257736491, - "grad_norm": 0.0036667597014456987, - "learning_rate": 0.00019999648156004347, - "loss": 46.0, - "step": 34934 - }, - { - "epoch": 2.6710247147198807, - "grad_norm": 0.0012479517608880997, - "learning_rate": 0.0001999964813585505, - "loss": 46.0, - "step": 34935 - }, - { - "epoch": 2.6711011717032704, - "grad_norm": 0.00181959371548146, - "learning_rate": 0.00019999648115705184, - "loss": 46.0, - "step": 34936 - }, - { - "epoch": 2.67117762868666, - "grad_norm": 0.0012184573570266366, - "learning_rate": 0.00019999648095554733, - "loss": 46.0, - "step": 34937 - }, - { - "epoch": 2.67125408567005, - "grad_norm": 0.0032569922041147947, - "learning_rate": 0.00019999648075403709, - "loss": 46.0, - "step": 34938 - }, - { - "epoch": 2.6713305426534397, - "grad_norm": 0.0022691187914460897, - "learning_rate": 0.0001999964805525211, - "loss": 46.0, - "step": 34939 - }, - { - "epoch": 2.6714069996368295, - "grad_norm": 0.001322792493738234, - "learning_rate": 0.0001999964803509993, - "loss": 46.0, - "step": 34940 - }, - { - "epoch": 2.6714834566202192, - "grad_norm": 0.003794808639213443, - "learning_rate": 0.00019999648014947175, - "loss": 46.0, - "step": 34941 - }, - { - "epoch": 2.671559913603609, - "grad_norm": 0.002194920089095831, - "learning_rate": 0.0001999964799479384, - "loss": 46.0, - "step": 34942 - }, - { - "epoch": 2.6716363705869988, - "grad_norm": 0.002413443522527814, - "learning_rate": 0.00019999647974639932, - "loss": 46.0, - "step": 34943 - }, - { - "epoch": 2.671712827570388, - "grad_norm": 0.0017411314183846116, - "learning_rate": 0.00019999647954485448, - "loss": 46.0, - "step": 34944 - }, - { - "epoch": 2.671789284553778, - "grad_norm": 0.001352843944914639, - "learning_rate": 0.00019999647934330382, - "loss": 46.0, - "step": 34945 - }, - { - "epoch": 2.6718657415371676, - "grad_norm": 0.0018036641413345933, - "learning_rate": 0.0001999964791417474, - "loss": 46.0, - "step": 34946 - }, - { - "epoch": 2.6719421985205574, - "grad_norm": 0.0032898527570068836, - "learning_rate": 0.00019999647894018528, - "loss": 46.0, - "step": 34947 - }, - { - "epoch": 2.672018655503947, - "grad_norm": 0.005762800574302673, - "learning_rate": 0.0001999964787386173, - "loss": 46.0, - "step": 34948 - }, - { - "epoch": 2.672095112487337, - "grad_norm": 0.00338967377319932, - "learning_rate": 0.00019999647853704362, - "loss": 46.0, - "step": 34949 - }, - { - "epoch": 2.6721715694707266, - "grad_norm": 0.002865397837013006, - "learning_rate": 0.00019999647833546412, - "loss": 46.0, - "step": 34950 - }, - { - "epoch": 2.6722480264541164, - "grad_norm": 0.0030455742962658405, - "learning_rate": 0.0001999964781338789, - "loss": 46.0, - "step": 34951 - }, - { - "epoch": 2.6723244834375057, - "grad_norm": 0.0018135480349883437, - "learning_rate": 0.00019999647793228785, - "loss": 46.0, - "step": 34952 - }, - { - "epoch": 2.6724009404208955, - "grad_norm": 0.0018838674295693636, - "learning_rate": 0.00019999647773069108, - "loss": 46.0, - "step": 34953 - }, - { - "epoch": 2.6724773974042852, - "grad_norm": 0.009196566417813301, - "learning_rate": 0.0001999964775290885, - "loss": 46.0, - "step": 34954 - }, - { - "epoch": 2.672553854387675, - "grad_norm": 0.0010365053312852979, - "learning_rate": 0.00019999647732748022, - "loss": 46.0, - "step": 34955 - }, - { - "epoch": 2.6726303113710648, - "grad_norm": 0.0014754708390682936, - "learning_rate": 0.00019999647712586613, - "loss": 46.0, - "step": 34956 - }, - { - "epoch": 2.6727067683544545, - "grad_norm": 0.0023555499501526356, - "learning_rate": 0.00019999647692424624, - "loss": 46.0, - "step": 34957 - }, - { - "epoch": 2.6727832253378443, - "grad_norm": 0.004240604117512703, - "learning_rate": 0.00019999647672262064, - "loss": 46.0, - "step": 34958 - }, - { - "epoch": 2.672859682321234, - "grad_norm": 0.0013119049835950136, - "learning_rate": 0.00019999647652098923, - "loss": 46.0, - "step": 34959 - }, - { - "epoch": 2.672936139304624, - "grad_norm": 0.007284971885383129, - "learning_rate": 0.00019999647631935204, - "loss": 46.0, - "step": 34960 - }, - { - "epoch": 2.6730125962880136, - "grad_norm": 0.001796469441615045, - "learning_rate": 0.00019999647611770912, - "loss": 46.0, - "step": 34961 - }, - { - "epoch": 2.6730890532714033, - "grad_norm": 0.004304004367440939, - "learning_rate": 0.0001999964759160604, - "loss": 46.0, - "step": 34962 - }, - { - "epoch": 2.673165510254793, - "grad_norm": 0.0023026950657367706, - "learning_rate": 0.00019999647571440594, - "loss": 46.0, - "step": 34963 - }, - { - "epoch": 2.673241967238183, - "grad_norm": 0.001401193207129836, - "learning_rate": 0.0001999964755127457, - "loss": 46.0, - "step": 34964 - }, - { - "epoch": 2.673318424221572, - "grad_norm": 0.0014134080847725272, - "learning_rate": 0.00019999647531107967, - "loss": 46.0, - "step": 34965 - }, - { - "epoch": 2.673394881204962, - "grad_norm": 0.008156070485711098, - "learning_rate": 0.0001999964751094079, - "loss": 46.0, - "step": 34966 - }, - { - "epoch": 2.6734713381883517, - "grad_norm": 0.0025683222338557243, - "learning_rate": 0.00019999647490773033, - "loss": 46.0, - "step": 34967 - }, - { - "epoch": 2.6735477951717415, - "grad_norm": 0.004338476341217756, - "learning_rate": 0.000199996474706047, - "loss": 46.0, - "step": 34968 - }, - { - "epoch": 2.673624252155131, - "grad_norm": 0.0038774064742028713, - "learning_rate": 0.00019999647450435793, - "loss": 46.0, - "step": 34969 - }, - { - "epoch": 2.673700709138521, - "grad_norm": 0.0011908930027857423, - "learning_rate": 0.00019999647430266304, - "loss": 46.0, - "step": 34970 - }, - { - "epoch": 2.6737771661219107, - "grad_norm": 0.001355922082439065, - "learning_rate": 0.00019999647410096243, - "loss": 46.0, - "step": 34971 - }, - { - "epoch": 2.6738536231053005, - "grad_norm": 0.0017022473039105535, - "learning_rate": 0.000199996473899256, - "loss": 46.0, - "step": 34972 - }, - { - "epoch": 2.6739300800886903, - "grad_norm": 0.0013512865407392383, - "learning_rate": 0.00019999647369754384, - "loss": 46.0, - "step": 34973 - }, - { - "epoch": 2.6740065370720796, - "grad_norm": 0.0008856017957441509, - "learning_rate": 0.0001999964734958259, - "loss": 46.0, - "step": 34974 - }, - { - "epoch": 2.6740829940554693, - "grad_norm": 0.003687289310619235, - "learning_rate": 0.0001999964732941022, - "loss": 46.0, - "step": 34975 - }, - { - "epoch": 2.674159451038859, - "grad_norm": 0.0016867963131517172, - "learning_rate": 0.00019999647309237273, - "loss": 46.0, - "step": 34976 - }, - { - "epoch": 2.674235908022249, - "grad_norm": 0.0017421615775674582, - "learning_rate": 0.00019999647289063748, - "loss": 46.0, - "step": 34977 - }, - { - "epoch": 2.6743123650056386, - "grad_norm": 0.0016528844134882092, - "learning_rate": 0.00019999647268889646, - "loss": 46.0, - "step": 34978 - }, - { - "epoch": 2.6743888219890284, - "grad_norm": 0.0010495816823095083, - "learning_rate": 0.00019999647248714967, - "loss": 46.0, - "step": 34979 - }, - { - "epoch": 2.674465278972418, - "grad_norm": 0.005541035905480385, - "learning_rate": 0.0001999964722853971, - "loss": 46.0, - "step": 34980 - }, - { - "epoch": 2.674541735955808, - "grad_norm": 0.0022761805448681116, - "learning_rate": 0.00019999647208363878, - "loss": 46.0, - "step": 34981 - }, - { - "epoch": 2.6746181929391977, - "grad_norm": 0.0017221964662894607, - "learning_rate": 0.0001999964718818747, - "loss": 46.0, - "step": 34982 - }, - { - "epoch": 2.6746946499225874, - "grad_norm": 0.001439619343727827, - "learning_rate": 0.00019999647168010483, - "loss": 46.0, - "step": 34983 - }, - { - "epoch": 2.674771106905977, - "grad_norm": 0.0028904296923428774, - "learning_rate": 0.0001999964714783292, - "loss": 46.0, - "step": 34984 - }, - { - "epoch": 2.674847563889367, - "grad_norm": 0.001968087861314416, - "learning_rate": 0.00019999647127654782, - "loss": 46.0, - "step": 34985 - }, - { - "epoch": 2.6749240208727567, - "grad_norm": 0.0012570518301799893, - "learning_rate": 0.00019999647107476063, - "loss": 46.0, - "step": 34986 - }, - { - "epoch": 2.675000477856146, - "grad_norm": 0.0020639647264033556, - "learning_rate": 0.00019999647087296768, - "loss": 46.0, - "step": 34987 - }, - { - "epoch": 2.675076934839536, - "grad_norm": 0.002605196787044406, - "learning_rate": 0.00019999647067116898, - "loss": 46.0, - "step": 34988 - }, - { - "epoch": 2.6751533918229256, - "grad_norm": 0.000987744890153408, - "learning_rate": 0.0001999964704693645, - "loss": 46.0, - "step": 34989 - }, - { - "epoch": 2.6752298488063153, - "grad_norm": 0.00182677514385432, - "learning_rate": 0.00019999647026755426, - "loss": 46.0, - "step": 34990 - }, - { - "epoch": 2.675306305789705, - "grad_norm": 0.003008483210578561, - "learning_rate": 0.00019999647006573823, - "loss": 46.0, - "step": 34991 - }, - { - "epoch": 2.675382762773095, - "grad_norm": 0.0008762613288126886, - "learning_rate": 0.00019999646986391644, - "loss": 46.0, - "step": 34992 - }, - { - "epoch": 2.6754592197564846, - "grad_norm": 0.002440882846713066, - "learning_rate": 0.0001999964696620889, - "loss": 46.0, - "step": 34993 - }, - { - "epoch": 2.6755356767398744, - "grad_norm": 0.003429112955927849, - "learning_rate": 0.00019999646946025558, - "loss": 46.0, - "step": 34994 - }, - { - "epoch": 2.6756121337232637, - "grad_norm": 0.001544648315757513, - "learning_rate": 0.00019999646925841647, - "loss": 46.0, - "step": 34995 - }, - { - "epoch": 2.6756885907066534, - "grad_norm": 0.002543485490605235, - "learning_rate": 0.0001999964690565716, - "loss": 46.0, - "step": 34996 - }, - { - "epoch": 2.675765047690043, - "grad_norm": 0.0014046822907403111, - "learning_rate": 0.000199996468854721, - "loss": 46.0, - "step": 34997 - }, - { - "epoch": 2.675841504673433, - "grad_norm": 0.001448543625883758, - "learning_rate": 0.0001999964686528646, - "loss": 46.0, - "step": 34998 - }, - { - "epoch": 2.6759179616568227, - "grad_norm": 0.002085628919303417, - "learning_rate": 0.0001999964684510024, - "loss": 46.0, - "step": 34999 - }, - { - "epoch": 2.6759944186402125, - "grad_norm": 0.0050635188817977905, - "learning_rate": 0.00019999646824913448, - "loss": 46.0, - "step": 35000 - }, - { - "epoch": 2.6760708756236022, - "grad_norm": 0.00619416544213891, - "learning_rate": 0.00019999646804726075, - "loss": 46.0, - "step": 35001 - }, - { - "epoch": 2.676147332606992, - "grad_norm": 0.0022781158331781626, - "learning_rate": 0.00019999646784538128, - "loss": 46.0, - "step": 35002 - }, - { - "epoch": 2.6762237895903818, - "grad_norm": 0.0011711466358974576, - "learning_rate": 0.00019999646764349603, - "loss": 46.0, - "step": 35003 - }, - { - "epoch": 2.6763002465737715, - "grad_norm": 0.0022175174672156572, - "learning_rate": 0.000199996467441605, - "loss": 46.0, - "step": 35004 - }, - { - "epoch": 2.6763767035571613, - "grad_norm": 0.0016163773834705353, - "learning_rate": 0.00019999646723970824, - "loss": 46.0, - "step": 35005 - }, - { - "epoch": 2.676453160540551, - "grad_norm": 0.0019132839515805244, - "learning_rate": 0.00019999646703780567, - "loss": 46.0, - "step": 35006 - }, - { - "epoch": 2.676529617523941, - "grad_norm": 0.0010947369737550616, - "learning_rate": 0.00019999646683589733, - "loss": 46.0, - "step": 35007 - }, - { - "epoch": 2.6766060745073306, - "grad_norm": 0.0025650332681834698, - "learning_rate": 0.00019999646663398327, - "loss": 46.0, - "step": 35008 - }, - { - "epoch": 2.67668253149072, - "grad_norm": 0.0014202925376594067, - "learning_rate": 0.0001999964664320634, - "loss": 46.0, - "step": 35009 - }, - { - "epoch": 2.6767589884741096, - "grad_norm": 0.0020427873823791742, - "learning_rate": 0.00019999646623013778, - "loss": 46.0, - "step": 35010 - }, - { - "epoch": 2.6768354454574994, - "grad_norm": 0.008256982080638409, - "learning_rate": 0.00019999646602820637, - "loss": 46.0, - "step": 35011 - }, - { - "epoch": 2.676911902440889, - "grad_norm": 0.002671420807018876, - "learning_rate": 0.0001999964658262692, - "loss": 46.0, - "step": 35012 - }, - { - "epoch": 2.676988359424279, - "grad_norm": 0.001022005220875144, - "learning_rate": 0.00019999646562432623, - "loss": 46.0, - "step": 35013 - }, - { - "epoch": 2.6770648164076687, - "grad_norm": 0.0018111125100404024, - "learning_rate": 0.00019999646542237756, - "loss": 46.0, - "step": 35014 - }, - { - "epoch": 2.6771412733910585, - "grad_norm": 0.0015490794321522117, - "learning_rate": 0.00019999646522042306, - "loss": 46.0, - "step": 35015 - }, - { - "epoch": 2.677217730374448, - "grad_norm": 0.0018617082387208939, - "learning_rate": 0.0001999964650184628, - "loss": 46.0, - "step": 35016 - }, - { - "epoch": 2.6772941873578375, - "grad_norm": 0.0005698470631614327, - "learning_rate": 0.0001999964648164968, - "loss": 46.0, - "step": 35017 - }, - { - "epoch": 2.6773706443412273, - "grad_norm": 0.0016014582943171263, - "learning_rate": 0.00019999646461452502, - "loss": 46.0, - "step": 35018 - }, - { - "epoch": 2.677447101324617, - "grad_norm": 0.0014512023190036416, - "learning_rate": 0.00019999646441254748, - "loss": 46.0, - "step": 35019 - }, - { - "epoch": 2.677523558308007, - "grad_norm": 0.000892001437023282, - "learning_rate": 0.00019999646421056414, - "loss": 46.0, - "step": 35020 - }, - { - "epoch": 2.6776000152913966, - "grad_norm": 0.0026293147820979357, - "learning_rate": 0.00019999646400857503, - "loss": 46.0, - "step": 35021 - }, - { - "epoch": 2.6776764722747863, - "grad_norm": 0.0035947246942669153, - "learning_rate": 0.00019999646380658017, - "loss": 46.0, - "step": 35022 - }, - { - "epoch": 2.677752929258176, - "grad_norm": 0.0032561246771365404, - "learning_rate": 0.00019999646360457953, - "loss": 46.0, - "step": 35023 - }, - { - "epoch": 2.677829386241566, - "grad_norm": 0.005203768610954285, - "learning_rate": 0.00019999646340257315, - "loss": 46.0, - "step": 35024 - }, - { - "epoch": 2.6779058432249556, - "grad_norm": 0.001112635713070631, - "learning_rate": 0.00019999646320056097, - "loss": 46.0, - "step": 35025 - }, - { - "epoch": 2.6779823002083454, - "grad_norm": 0.0029776745941489935, - "learning_rate": 0.00019999646299854302, - "loss": 46.0, - "step": 35026 - }, - { - "epoch": 2.678058757191735, - "grad_norm": 0.0018760369857773185, - "learning_rate": 0.00019999646279651932, - "loss": 46.0, - "step": 35027 - }, - { - "epoch": 2.678135214175125, - "grad_norm": 0.005247129593044519, - "learning_rate": 0.00019999646259448984, - "loss": 46.0, - "step": 35028 - }, - { - "epoch": 2.6782116711585147, - "grad_norm": 0.0019117380725219846, - "learning_rate": 0.00019999646239245457, - "loss": 46.0, - "step": 35029 - }, - { - "epoch": 2.6782881281419044, - "grad_norm": 0.00510834576562047, - "learning_rate": 0.00019999646219041357, - "loss": 46.0, - "step": 35030 - }, - { - "epoch": 2.6783645851252937, - "grad_norm": 0.001243475009687245, - "learning_rate": 0.00019999646198836678, - "loss": 46.0, - "step": 35031 - }, - { - "epoch": 2.6784410421086835, - "grad_norm": 0.00366071704775095, - "learning_rate": 0.00019999646178631424, - "loss": 46.0, - "step": 35032 - }, - { - "epoch": 2.6785174990920733, - "grad_norm": 0.003371445694938302, - "learning_rate": 0.0001999964615842559, - "loss": 46.0, - "step": 35033 - }, - { - "epoch": 2.678593956075463, - "grad_norm": 0.0020993282087147236, - "learning_rate": 0.0001999964613821918, - "loss": 46.0, - "step": 35034 - }, - { - "epoch": 2.678670413058853, - "grad_norm": 0.0020369626581668854, - "learning_rate": 0.00019999646118012195, - "loss": 46.0, - "step": 35035 - }, - { - "epoch": 2.6787468700422425, - "grad_norm": 0.0016634553903713822, - "learning_rate": 0.0001999964609780463, - "loss": 46.0, - "step": 35036 - }, - { - "epoch": 2.6788233270256323, - "grad_norm": 0.0015325468266382813, - "learning_rate": 0.0001999964607759649, - "loss": 46.0, - "step": 35037 - }, - { - "epoch": 2.678899784009022, - "grad_norm": 0.005376342684030533, - "learning_rate": 0.00019999646057387773, - "loss": 46.0, - "step": 35038 - }, - { - "epoch": 2.6789762409924114, - "grad_norm": 0.001149569870904088, - "learning_rate": 0.0001999964603717848, - "loss": 46.0, - "step": 35039 - }, - { - "epoch": 2.679052697975801, - "grad_norm": 0.0020430092699825764, - "learning_rate": 0.00019999646016968608, - "loss": 46.0, - "step": 35040 - }, - { - "epoch": 2.679129154959191, - "grad_norm": 0.0006907646893523633, - "learning_rate": 0.0001999964599675816, - "loss": 46.0, - "step": 35041 - }, - { - "epoch": 2.6792056119425807, - "grad_norm": 0.003037497866898775, - "learning_rate": 0.00019999645976547136, - "loss": 46.0, - "step": 35042 - }, - { - "epoch": 2.6792820689259704, - "grad_norm": 0.0015470362268388271, - "learning_rate": 0.00019999645956335536, - "loss": 46.0, - "step": 35043 - }, - { - "epoch": 2.67935852590936, - "grad_norm": 0.0027011220809072256, - "learning_rate": 0.00019999645936123357, - "loss": 46.0, - "step": 35044 - }, - { - "epoch": 2.67943498289275, - "grad_norm": 0.005984518676996231, - "learning_rate": 0.000199996459159106, - "loss": 46.0, - "step": 35045 - }, - { - "epoch": 2.6795114398761397, - "grad_norm": 0.003937001805752516, - "learning_rate": 0.0001999964589569727, - "loss": 46.0, - "step": 35046 - }, - { - "epoch": 2.6795878968595295, - "grad_norm": 0.002172737615182996, - "learning_rate": 0.00019999645875483357, - "loss": 46.0, - "step": 35047 - }, - { - "epoch": 2.6796643538429192, - "grad_norm": 0.003583714831620455, - "learning_rate": 0.00019999645855268871, - "loss": 46.0, - "step": 35048 - }, - { - "epoch": 2.679740810826309, - "grad_norm": 0.002341628773137927, - "learning_rate": 0.00019999645835053808, - "loss": 46.0, - "step": 35049 - }, - { - "epoch": 2.6798172678096988, - "grad_norm": 0.0005359657225199044, - "learning_rate": 0.00019999645814838167, - "loss": 46.0, - "step": 35050 - }, - { - "epoch": 2.6798937247930885, - "grad_norm": 0.002184459939599037, - "learning_rate": 0.0001999964579462195, - "loss": 46.0, - "step": 35051 - }, - { - "epoch": 2.6799701817764783, - "grad_norm": 0.0036105432081967592, - "learning_rate": 0.00019999645774405157, - "loss": 46.0, - "step": 35052 - }, - { - "epoch": 2.6800466387598676, - "grad_norm": 0.002817615633830428, - "learning_rate": 0.00019999645754187787, - "loss": 46.0, - "step": 35053 - }, - { - "epoch": 2.6801230957432574, - "grad_norm": 0.0012330783065408468, - "learning_rate": 0.00019999645733969837, - "loss": 46.0, - "step": 35054 - }, - { - "epoch": 2.680199552726647, - "grad_norm": 0.0019340860890224576, - "learning_rate": 0.00019999645713751312, - "loss": 46.0, - "step": 35055 - }, - { - "epoch": 2.680276009710037, - "grad_norm": 0.009502885863184929, - "learning_rate": 0.0001999964569353221, - "loss": 46.0, - "step": 35056 - }, - { - "epoch": 2.6803524666934266, - "grad_norm": 0.0009414135129190981, - "learning_rate": 0.0001999964567331253, - "loss": 46.0, - "step": 35057 - }, - { - "epoch": 2.6804289236768164, - "grad_norm": 0.002125833183526993, - "learning_rate": 0.00019999645653092277, - "loss": 46.0, - "step": 35058 - }, - { - "epoch": 2.680505380660206, - "grad_norm": 0.003920309711247683, - "learning_rate": 0.00019999645632871445, - "loss": 46.0, - "step": 35059 - }, - { - "epoch": 2.680581837643596, - "grad_norm": 0.0010468428954482079, - "learning_rate": 0.00019999645612650034, - "loss": 46.0, - "step": 35060 - }, - { - "epoch": 2.6806582946269852, - "grad_norm": 0.0008408360299654305, - "learning_rate": 0.00019999645592428048, - "loss": 46.0, - "step": 35061 - }, - { - "epoch": 2.680734751610375, - "grad_norm": 0.001694794395007193, - "learning_rate": 0.00019999645572205482, - "loss": 46.0, - "step": 35062 - }, - { - "epoch": 2.6808112085937648, - "grad_norm": 0.004552492871880531, - "learning_rate": 0.00019999645551982344, - "loss": 46.0, - "step": 35063 - }, - { - "epoch": 2.6808876655771545, - "grad_norm": 0.0019799016881734133, - "learning_rate": 0.0001999964553175863, - "loss": 46.0, - "step": 35064 - }, - { - "epoch": 2.6809641225605443, - "grad_norm": 0.0021943531464785337, - "learning_rate": 0.00019999645511534333, - "loss": 46.0, - "step": 35065 - }, - { - "epoch": 2.681040579543934, - "grad_norm": 0.002820164430886507, - "learning_rate": 0.00019999645491309464, - "loss": 46.0, - "step": 35066 - }, - { - "epoch": 2.681117036527324, - "grad_norm": 0.00448134820908308, - "learning_rate": 0.00019999645471084014, - "loss": 46.0, - "step": 35067 - }, - { - "epoch": 2.6811934935107136, - "grad_norm": 0.0018064838368445635, - "learning_rate": 0.0001999964545085799, - "loss": 46.0, - "step": 35068 - }, - { - "epoch": 2.6812699504941033, - "grad_norm": 0.0037594132591038942, - "learning_rate": 0.0001999964543063139, - "loss": 46.0, - "step": 35069 - }, - { - "epoch": 2.681346407477493, - "grad_norm": 0.001437538187019527, - "learning_rate": 0.00019999645410404208, - "loss": 46.0, - "step": 35070 - }, - { - "epoch": 2.681422864460883, - "grad_norm": 0.0025133390445262194, - "learning_rate": 0.0001999964539017645, - "loss": 46.0, - "step": 35071 - }, - { - "epoch": 2.6814993214442726, - "grad_norm": 0.0035058173816651106, - "learning_rate": 0.00019999645369948122, - "loss": 46.0, - "step": 35072 - }, - { - "epoch": 2.6815757784276624, - "grad_norm": 0.0013061017962172627, - "learning_rate": 0.00019999645349719208, - "loss": 46.0, - "step": 35073 - }, - { - "epoch": 2.681652235411052, - "grad_norm": 0.0017626304179430008, - "learning_rate": 0.00019999645329489725, - "loss": 46.0, - "step": 35074 - }, - { - "epoch": 2.6817286923944414, - "grad_norm": 0.0030665218364447355, - "learning_rate": 0.0001999964530925966, - "loss": 46.0, - "step": 35075 - }, - { - "epoch": 2.681805149377831, - "grad_norm": 0.0013497495092451572, - "learning_rate": 0.0001999964528902902, - "loss": 46.0, - "step": 35076 - }, - { - "epoch": 2.681881606361221, - "grad_norm": 0.006613880395889282, - "learning_rate": 0.00019999645268797803, - "loss": 46.0, - "step": 35077 - }, - { - "epoch": 2.6819580633446107, - "grad_norm": 0.0018535639392212033, - "learning_rate": 0.00019999645248566008, - "loss": 46.0, - "step": 35078 - }, - { - "epoch": 2.6820345203280005, - "grad_norm": 0.0017786029493436217, - "learning_rate": 0.00019999645228333635, - "loss": 46.0, - "step": 35079 - }, - { - "epoch": 2.6821109773113903, - "grad_norm": 0.0045642368495464325, - "learning_rate": 0.0001999964520810069, - "loss": 46.0, - "step": 35080 - }, - { - "epoch": 2.68218743429478, - "grad_norm": 0.003160534193739295, - "learning_rate": 0.00019999645187867166, - "loss": 46.0, - "step": 35081 - }, - { - "epoch": 2.6822638912781698, - "grad_norm": 0.0013153674080967903, - "learning_rate": 0.00019999645167633064, - "loss": 46.0, - "step": 35082 - }, - { - "epoch": 2.682340348261559, - "grad_norm": 0.001571283326484263, - "learning_rate": 0.00019999645147398385, - "loss": 46.0, - "step": 35083 - }, - { - "epoch": 2.682416805244949, - "grad_norm": 0.002594613702967763, - "learning_rate": 0.00019999645127163128, - "loss": 46.0, - "step": 35084 - }, - { - "epoch": 2.6824932622283386, - "grad_norm": 0.0020451454911381006, - "learning_rate": 0.00019999645106927297, - "loss": 46.0, - "step": 35085 - }, - { - "epoch": 2.6825697192117284, - "grad_norm": 0.004231937695294619, - "learning_rate": 0.00019999645086690886, - "loss": 46.0, - "step": 35086 - }, - { - "epoch": 2.682646176195118, - "grad_norm": 0.003918634261935949, - "learning_rate": 0.000199996450664539, - "loss": 46.0, - "step": 35087 - }, - { - "epoch": 2.682722633178508, - "grad_norm": 0.001403136528097093, - "learning_rate": 0.00019999645046216337, - "loss": 46.0, - "step": 35088 - }, - { - "epoch": 2.6827990901618977, - "grad_norm": 0.002732867607846856, - "learning_rate": 0.000199996450259782, - "loss": 46.0, - "step": 35089 - }, - { - "epoch": 2.6828755471452874, - "grad_norm": 0.0022099416237324476, - "learning_rate": 0.0001999964500573948, - "loss": 46.0, - "step": 35090 - }, - { - "epoch": 2.682952004128677, - "grad_norm": 0.0009359550895169377, - "learning_rate": 0.00019999644985500186, - "loss": 46.0, - "step": 35091 - }, - { - "epoch": 2.683028461112067, - "grad_norm": 0.0019647094886749983, - "learning_rate": 0.00019999644965260313, - "loss": 46.0, - "step": 35092 - }, - { - "epoch": 2.6831049180954567, - "grad_norm": 0.0013681192649528384, - "learning_rate": 0.00019999644945019866, - "loss": 46.0, - "step": 35093 - }, - { - "epoch": 2.6831813750788465, - "grad_norm": 0.0019265684532001615, - "learning_rate": 0.0001999964492477884, - "loss": 46.0, - "step": 35094 - }, - { - "epoch": 2.6832578320622362, - "grad_norm": 0.0015732484171167016, - "learning_rate": 0.0001999964490453724, - "loss": 46.0, - "step": 35095 - }, - { - "epoch": 2.6833342890456255, - "grad_norm": 0.0016988591523841023, - "learning_rate": 0.0001999964488429506, - "loss": 46.0, - "step": 35096 - }, - { - "epoch": 2.6834107460290153, - "grad_norm": 0.0039989701472222805, - "learning_rate": 0.00019999644864052306, - "loss": 46.0, - "step": 35097 - }, - { - "epoch": 2.683487203012405, - "grad_norm": 0.001421229331754148, - "learning_rate": 0.00019999644843808972, - "loss": 46.0, - "step": 35098 - }, - { - "epoch": 2.683563659995795, - "grad_norm": 0.0016833083936944604, - "learning_rate": 0.00019999644823565064, - "loss": 46.0, - "step": 35099 - }, - { - "epoch": 2.6836401169791846, - "grad_norm": 0.0023268675431609154, - "learning_rate": 0.00019999644803320575, - "loss": 46.0, - "step": 35100 - }, - { - "epoch": 2.6837165739625743, - "grad_norm": 0.0021617510356009007, - "learning_rate": 0.00019999644783075512, - "loss": 46.0, - "step": 35101 - }, - { - "epoch": 2.683793030945964, - "grad_norm": 0.003536701900884509, - "learning_rate": 0.0001999964476282987, - "loss": 46.0, - "step": 35102 - }, - { - "epoch": 2.683869487929354, - "grad_norm": 0.0004385752836242318, - "learning_rate": 0.00019999644742583653, - "loss": 46.0, - "step": 35103 - }, - { - "epoch": 2.6839459449127436, - "grad_norm": 0.0013113029999658465, - "learning_rate": 0.0001999964472233686, - "loss": 46.0, - "step": 35104 - }, - { - "epoch": 2.684022401896133, - "grad_norm": 0.0024137503933161497, - "learning_rate": 0.0001999964470208949, - "loss": 46.0, - "step": 35105 - }, - { - "epoch": 2.6840988588795227, - "grad_norm": 0.0028730938211083412, - "learning_rate": 0.0001999964468184154, - "loss": 46.0, - "step": 35106 - }, - { - "epoch": 2.6841753158629125, - "grad_norm": 0.0022919427137821913, - "learning_rate": 0.00019999644661593017, - "loss": 46.0, - "step": 35107 - }, - { - "epoch": 2.6842517728463022, - "grad_norm": 0.0031354199163615704, - "learning_rate": 0.00019999644641343912, - "loss": 46.0, - "step": 35108 - }, - { - "epoch": 2.684328229829692, - "grad_norm": 0.0026893760077655315, - "learning_rate": 0.00019999644621094236, - "loss": 46.0, - "step": 35109 - }, - { - "epoch": 2.6844046868130818, - "grad_norm": 0.00113083200994879, - "learning_rate": 0.00019999644600843982, - "loss": 46.0, - "step": 35110 - }, - { - "epoch": 2.6844811437964715, - "grad_norm": 0.0008158150012604892, - "learning_rate": 0.00019999644580593148, - "loss": 46.0, - "step": 35111 - }, - { - "epoch": 2.6845576007798613, - "grad_norm": 0.0029527395963668823, - "learning_rate": 0.0001999964456034174, - "loss": 46.0, - "step": 35112 - }, - { - "epoch": 2.684634057763251, - "grad_norm": 0.004799459595233202, - "learning_rate": 0.00019999644540089753, - "loss": 46.0, - "step": 35113 - }, - { - "epoch": 2.684710514746641, - "grad_norm": 0.0021030528005212545, - "learning_rate": 0.00019999644519837188, - "loss": 46.0, - "step": 35114 - }, - { - "epoch": 2.6847869717300306, - "grad_norm": 0.004186443984508514, - "learning_rate": 0.00019999644499584047, - "loss": 46.0, - "step": 35115 - }, - { - "epoch": 2.6848634287134203, - "grad_norm": 0.003666236298158765, - "learning_rate": 0.00019999644479330332, - "loss": 46.0, - "step": 35116 - }, - { - "epoch": 2.68493988569681, - "grad_norm": 0.0019482123898342252, - "learning_rate": 0.00019999644459076037, - "loss": 46.0, - "step": 35117 - }, - { - "epoch": 2.6850163426801994, - "grad_norm": 0.010220256634056568, - "learning_rate": 0.00019999644438821164, - "loss": 46.0, - "step": 35118 - }, - { - "epoch": 2.685092799663589, - "grad_norm": 0.0038749543018639088, - "learning_rate": 0.00019999644418565717, - "loss": 46.0, - "step": 35119 - }, - { - "epoch": 2.685169256646979, - "grad_norm": 0.0007671939674764872, - "learning_rate": 0.00019999644398309692, - "loss": 46.0, - "step": 35120 - }, - { - "epoch": 2.6852457136303687, - "grad_norm": 0.005448718089610338, - "learning_rate": 0.00019999644378053088, - "loss": 46.0, - "step": 35121 - }, - { - "epoch": 2.6853221706137584, - "grad_norm": 0.0010346717899665236, - "learning_rate": 0.00019999644357795911, - "loss": 46.0, - "step": 35122 - }, - { - "epoch": 2.685398627597148, - "grad_norm": 0.001677076448686421, - "learning_rate": 0.00019999644337538155, - "loss": 46.0, - "step": 35123 - }, - { - "epoch": 2.685475084580538, - "grad_norm": 0.004414484370499849, - "learning_rate": 0.0001999964431727982, - "loss": 46.0, - "step": 35124 - }, - { - "epoch": 2.6855515415639277, - "grad_norm": 0.0010584424016997218, - "learning_rate": 0.00019999644297020913, - "loss": 46.0, - "step": 35125 - }, - { - "epoch": 2.685627998547317, - "grad_norm": 0.0024583705235272646, - "learning_rate": 0.00019999644276761427, - "loss": 46.0, - "step": 35126 - }, - { - "epoch": 2.685704455530707, - "grad_norm": 0.0018608884420245886, - "learning_rate": 0.00019999644256501364, - "loss": 46.0, - "step": 35127 - }, - { - "epoch": 2.6857809125140966, - "grad_norm": 0.004331204108893871, - "learning_rate": 0.00019999644236240723, - "loss": 46.0, - "step": 35128 - }, - { - "epoch": 2.6858573694974863, - "grad_norm": 0.0006679832586087286, - "learning_rate": 0.00019999644215979505, - "loss": 46.0, - "step": 35129 - }, - { - "epoch": 2.685933826480876, - "grad_norm": 0.003459856379777193, - "learning_rate": 0.0001999964419571771, - "loss": 46.0, - "step": 35130 - }, - { - "epoch": 2.686010283464266, - "grad_norm": 0.0014105868758633733, - "learning_rate": 0.0001999964417545534, - "loss": 46.0, - "step": 35131 - }, - { - "epoch": 2.6860867404476556, - "grad_norm": 0.0034913215786218643, - "learning_rate": 0.00019999644155192393, - "loss": 46.0, - "step": 35132 - }, - { - "epoch": 2.6861631974310454, - "grad_norm": 0.0031154786702245474, - "learning_rate": 0.00019999644134928866, - "loss": 46.0, - "step": 35133 - }, - { - "epoch": 2.686239654414435, - "grad_norm": 0.003329252591356635, - "learning_rate": 0.00019999644114664764, - "loss": 46.0, - "step": 35134 - }, - { - "epoch": 2.686316111397825, - "grad_norm": 0.0034857955761253834, - "learning_rate": 0.00019999644094400085, - "loss": 46.0, - "step": 35135 - }, - { - "epoch": 2.6863925683812147, - "grad_norm": 0.0012331334874033928, - "learning_rate": 0.00019999644074134828, - "loss": 46.0, - "step": 35136 - }, - { - "epoch": 2.6864690253646044, - "grad_norm": 0.0033851582556962967, - "learning_rate": 0.00019999644053868995, - "loss": 46.0, - "step": 35137 - }, - { - "epoch": 2.686545482347994, - "grad_norm": 0.0023544603027403355, - "learning_rate": 0.0001999964403360259, - "loss": 46.0, - "step": 35138 - }, - { - "epoch": 2.686621939331384, - "grad_norm": 0.002336224541068077, - "learning_rate": 0.000199996440133356, - "loss": 46.0, - "step": 35139 - }, - { - "epoch": 2.6866983963147733, - "grad_norm": 0.004990197252482176, - "learning_rate": 0.00019999643993068037, - "loss": 46.0, - "step": 35140 - }, - { - "epoch": 2.686774853298163, - "grad_norm": 0.0011775752063840628, - "learning_rate": 0.00019999643972799897, - "loss": 46.0, - "step": 35141 - }, - { - "epoch": 2.6868513102815528, - "grad_norm": 0.004172973334789276, - "learning_rate": 0.0001999964395253118, - "loss": 46.0, - "step": 35142 - }, - { - "epoch": 2.6869277672649425, - "grad_norm": 0.0011100547853857279, - "learning_rate": 0.00019999643932261884, - "loss": 46.0, - "step": 35143 - }, - { - "epoch": 2.6870042242483323, - "grad_norm": 0.003154816571623087, - "learning_rate": 0.00019999643911992014, - "loss": 46.0, - "step": 35144 - }, - { - "epoch": 2.687080681231722, - "grad_norm": 0.0008737738244235516, - "learning_rate": 0.00019999643891721567, - "loss": 46.0, - "step": 35145 - }, - { - "epoch": 2.687157138215112, - "grad_norm": 0.0017895514611154795, - "learning_rate": 0.00019999643871450543, - "loss": 46.0, - "step": 35146 - }, - { - "epoch": 2.6872335951985016, - "grad_norm": 0.0014592085499316454, - "learning_rate": 0.0001999964385117894, - "loss": 46.0, - "step": 35147 - }, - { - "epoch": 2.687310052181891, - "grad_norm": 0.003521059639751911, - "learning_rate": 0.0001999964383090676, - "loss": 46.0, - "step": 35148 - }, - { - "epoch": 2.6873865091652807, - "grad_norm": 0.0019142618402838707, - "learning_rate": 0.00019999643810634003, - "loss": 46.0, - "step": 35149 - }, - { - "epoch": 2.6874629661486704, - "grad_norm": 0.0029140296392142773, - "learning_rate": 0.00019999643790360671, - "loss": 46.0, - "step": 35150 - }, - { - "epoch": 2.68753942313206, - "grad_norm": 0.001297891023568809, - "learning_rate": 0.00019999643770086763, - "loss": 46.0, - "step": 35151 - }, - { - "epoch": 2.68761588011545, - "grad_norm": 0.0015747452853247523, - "learning_rate": 0.00019999643749812275, - "loss": 46.0, - "step": 35152 - }, - { - "epoch": 2.6876923370988397, - "grad_norm": 0.0018668327247723937, - "learning_rate": 0.00019999643729537211, - "loss": 46.0, - "step": 35153 - }, - { - "epoch": 2.6877687940822295, - "grad_norm": 0.0010680914856493473, - "learning_rate": 0.00019999643709261574, - "loss": 46.0, - "step": 35154 - }, - { - "epoch": 2.6878452510656192, - "grad_norm": 0.002542471047490835, - "learning_rate": 0.00019999643688985353, - "loss": 46.0, - "step": 35155 - }, - { - "epoch": 2.687921708049009, - "grad_norm": 0.0011306102387607098, - "learning_rate": 0.00019999643668708558, - "loss": 46.0, - "step": 35156 - }, - { - "epoch": 2.6879981650323987, - "grad_norm": 0.0032563714776188135, - "learning_rate": 0.00019999643648431188, - "loss": 46.0, - "step": 35157 - }, - { - "epoch": 2.6880746220157885, - "grad_norm": 0.002381636993959546, - "learning_rate": 0.00019999643628153239, - "loss": 46.0, - "step": 35158 - }, - { - "epoch": 2.6881510789991783, - "grad_norm": 0.0018264978425577283, - "learning_rate": 0.00019999643607874714, - "loss": 46.0, - "step": 35159 - }, - { - "epoch": 2.688227535982568, - "grad_norm": 0.0035214636009186506, - "learning_rate": 0.00019999643587595612, - "loss": 46.0, - "step": 35160 - }, - { - "epoch": 2.688303992965958, - "grad_norm": 0.0015457180561497808, - "learning_rate": 0.00019999643567315936, - "loss": 46.0, - "step": 35161 - }, - { - "epoch": 2.688380449949347, - "grad_norm": 0.0030938242562115192, - "learning_rate": 0.00019999643547035677, - "loss": 46.0, - "step": 35162 - }, - { - "epoch": 2.688456906932737, - "grad_norm": 0.0020048683509230614, - "learning_rate": 0.00019999643526754846, - "loss": 46.0, - "step": 35163 - }, - { - "epoch": 2.6885333639161266, - "grad_norm": 0.0010409500682726502, - "learning_rate": 0.00019999643506473435, - "loss": 46.0, - "step": 35164 - }, - { - "epoch": 2.6886098208995164, - "grad_norm": 0.0026394836604595184, - "learning_rate": 0.00019999643486191452, - "loss": 46.0, - "step": 35165 - }, - { - "epoch": 2.688686277882906, - "grad_norm": 0.0027438250835984945, - "learning_rate": 0.00019999643465908886, - "loss": 46.0, - "step": 35166 - }, - { - "epoch": 2.688762734866296, - "grad_norm": 0.0018162140622735023, - "learning_rate": 0.00019999643445625743, - "loss": 46.0, - "step": 35167 - }, - { - "epoch": 2.6888391918496857, - "grad_norm": 0.004773767665028572, - "learning_rate": 0.00019999643425342028, - "loss": 46.0, - "step": 35168 - }, - { - "epoch": 2.6889156488330754, - "grad_norm": 0.002019051695242524, - "learning_rate": 0.00019999643405057733, - "loss": 46.0, - "step": 35169 - }, - { - "epoch": 2.6889921058164648, - "grad_norm": 0.001027501537464559, - "learning_rate": 0.0001999964338477286, - "loss": 46.0, - "step": 35170 - }, - { - "epoch": 2.6890685627998545, - "grad_norm": 0.001205040025524795, - "learning_rate": 0.00019999643364487416, - "loss": 46.0, - "step": 35171 - }, - { - "epoch": 2.6891450197832443, - "grad_norm": 0.0021995757706463337, - "learning_rate": 0.00019999643344201392, - "loss": 46.0, - "step": 35172 - }, - { - "epoch": 2.689221476766634, - "grad_norm": 0.0010647772578522563, - "learning_rate": 0.00019999643323914787, - "loss": 46.0, - "step": 35173 - }, - { - "epoch": 2.689297933750024, - "grad_norm": 0.0008045045542530715, - "learning_rate": 0.00019999643303627608, - "loss": 46.0, - "step": 35174 - }, - { - "epoch": 2.6893743907334136, - "grad_norm": 0.0009922184981405735, - "learning_rate": 0.00019999643283339852, - "loss": 46.0, - "step": 35175 - }, - { - "epoch": 2.6894508477168033, - "grad_norm": 0.0014489333843812346, - "learning_rate": 0.0001999964326305152, - "loss": 46.0, - "step": 35176 - }, - { - "epoch": 2.689527304700193, - "grad_norm": 0.0020227523054927588, - "learning_rate": 0.0001999964324276261, - "loss": 46.0, - "step": 35177 - }, - { - "epoch": 2.689603761683583, - "grad_norm": 0.001468850881792605, - "learning_rate": 0.00019999643222473124, - "loss": 46.0, - "step": 35178 - }, - { - "epoch": 2.6896802186669726, - "grad_norm": 0.003612266853451729, - "learning_rate": 0.0001999964320218306, - "loss": 46.0, - "step": 35179 - }, - { - "epoch": 2.6897566756503624, - "grad_norm": 0.0022340486757457256, - "learning_rate": 0.0001999964318189242, - "loss": 46.0, - "step": 35180 - }, - { - "epoch": 2.689833132633752, - "grad_norm": 0.004980416037142277, - "learning_rate": 0.00019999643161601203, - "loss": 46.0, - "step": 35181 - }, - { - "epoch": 2.689909589617142, - "grad_norm": 0.003069869475439191, - "learning_rate": 0.00019999643141309408, - "loss": 46.0, - "step": 35182 - }, - { - "epoch": 2.6899860466005316, - "grad_norm": 0.0022921806667000055, - "learning_rate": 0.00019999643121017035, - "loss": 46.0, - "step": 35183 - }, - { - "epoch": 2.690062503583921, - "grad_norm": 0.0012576646404340863, - "learning_rate": 0.00019999643100724088, - "loss": 46.0, - "step": 35184 - }, - { - "epoch": 2.6901389605673107, - "grad_norm": 0.0012805479345843196, - "learning_rate": 0.00019999643080430564, - "loss": 46.0, - "step": 35185 - }, - { - "epoch": 2.6902154175507005, - "grad_norm": 0.004034394398331642, - "learning_rate": 0.00019999643060136462, - "loss": 46.0, - "step": 35186 - }, - { - "epoch": 2.6902918745340902, - "grad_norm": 0.0019110506400465965, - "learning_rate": 0.00019999643039841783, - "loss": 46.0, - "step": 35187 - }, - { - "epoch": 2.69036833151748, - "grad_norm": 0.005394344218075275, - "learning_rate": 0.00019999643019546524, - "loss": 46.0, - "step": 35188 - }, - { - "epoch": 2.6904447885008698, - "grad_norm": 0.003047197125852108, - "learning_rate": 0.0001999964299925069, - "loss": 46.0, - "step": 35189 - }, - { - "epoch": 2.6905212454842595, - "grad_norm": 0.001858064904808998, - "learning_rate": 0.00019999642978954282, - "loss": 46.0, - "step": 35190 - }, - { - "epoch": 2.6905977024676493, - "grad_norm": 0.002759219380095601, - "learning_rate": 0.00019999642958657297, - "loss": 46.0, - "step": 35191 - }, - { - "epoch": 2.6906741594510386, - "grad_norm": 0.0016702038701623678, - "learning_rate": 0.00019999642938359734, - "loss": 46.0, - "step": 35192 - }, - { - "epoch": 2.6907506164344284, - "grad_norm": 0.005024959798902273, - "learning_rate": 0.0001999964291806159, - "loss": 46.0, - "step": 35193 - }, - { - "epoch": 2.690827073417818, - "grad_norm": 0.0030106569174677134, - "learning_rate": 0.00019999642897762876, - "loss": 46.0, - "step": 35194 - }, - { - "epoch": 2.690903530401208, - "grad_norm": 0.0036216035950928926, - "learning_rate": 0.00019999642877463583, - "loss": 46.0, - "step": 35195 - }, - { - "epoch": 2.6909799873845976, - "grad_norm": 0.0009093974367715418, - "learning_rate": 0.00019999642857163708, - "loss": 46.0, - "step": 35196 - }, - { - "epoch": 2.6910564443679874, - "grad_norm": 0.0031241197139024734, - "learning_rate": 0.0001999964283686326, - "loss": 46.0, - "step": 35197 - }, - { - "epoch": 2.691132901351377, - "grad_norm": 0.0017396444454789162, - "learning_rate": 0.00019999642816562237, - "loss": 46.0, - "step": 35198 - }, - { - "epoch": 2.691209358334767, - "grad_norm": 0.0011495795333757997, - "learning_rate": 0.00019999642796260635, - "loss": 46.0, - "step": 35199 - }, - { - "epoch": 2.6912858153181567, - "grad_norm": 0.002570424461737275, - "learning_rate": 0.00019999642775958456, - "loss": 46.0, - "step": 35200 - }, - { - "epoch": 2.6913622723015465, - "grad_norm": 0.002257262822240591, - "learning_rate": 0.000199996427556557, - "loss": 46.0, - "step": 35201 - }, - { - "epoch": 2.691438729284936, - "grad_norm": 0.0009384382283315063, - "learning_rate": 0.00019999642735352366, - "loss": 46.0, - "step": 35202 - }, - { - "epoch": 2.691515186268326, - "grad_norm": 0.0033902712166309357, - "learning_rate": 0.00019999642715048455, - "loss": 46.0, - "step": 35203 - }, - { - "epoch": 2.6915916432517157, - "grad_norm": 0.0037153740413486958, - "learning_rate": 0.0001999964269474397, - "loss": 46.0, - "step": 35204 - }, - { - "epoch": 2.6916681002351055, - "grad_norm": 0.0019718408584594727, - "learning_rate": 0.00019999642674438907, - "loss": 46.0, - "step": 35205 - }, - { - "epoch": 2.691744557218495, - "grad_norm": 0.0008516894886270165, - "learning_rate": 0.00019999642654133266, - "loss": 46.0, - "step": 35206 - }, - { - "epoch": 2.6918210142018846, - "grad_norm": 0.0014777202159166336, - "learning_rate": 0.0001999964263382705, - "loss": 46.0, - "step": 35207 - }, - { - "epoch": 2.6918974711852743, - "grad_norm": 0.0006840305286459625, - "learning_rate": 0.00019999642613520254, - "loss": 46.0, - "step": 35208 - }, - { - "epoch": 2.691973928168664, - "grad_norm": 0.003831393551081419, - "learning_rate": 0.00019999642593212884, - "loss": 46.0, - "step": 35209 - }, - { - "epoch": 2.692050385152054, - "grad_norm": 0.003159074578434229, - "learning_rate": 0.00019999642572904935, - "loss": 46.0, - "step": 35210 - }, - { - "epoch": 2.6921268421354436, - "grad_norm": 0.0026791805867105722, - "learning_rate": 0.0001999964255259641, - "loss": 46.0, - "step": 35211 - }, - { - "epoch": 2.6922032991188334, - "grad_norm": 0.006213878747075796, - "learning_rate": 0.00019999642532287309, - "loss": 46.0, - "step": 35212 - }, - { - "epoch": 2.692279756102223, - "grad_norm": 0.001594706904143095, - "learning_rate": 0.0001999964251197763, - "loss": 46.0, - "step": 35213 - }, - { - "epoch": 2.6923562130856125, - "grad_norm": 0.0015687403501942754, - "learning_rate": 0.00019999642491667373, - "loss": 46.0, - "step": 35214 - }, - { - "epoch": 2.692432670069002, - "grad_norm": 0.0021522280294448137, - "learning_rate": 0.00019999642471356542, - "loss": 46.0, - "step": 35215 - }, - { - "epoch": 2.692509127052392, - "grad_norm": 0.0032415541354566813, - "learning_rate": 0.00019999642451045131, - "loss": 46.0, - "step": 35216 - }, - { - "epoch": 2.6925855840357817, - "grad_norm": 0.007459606509655714, - "learning_rate": 0.00019999642430733143, - "loss": 46.0, - "step": 35217 - }, - { - "epoch": 2.6926620410191715, - "grad_norm": 0.0024852720089256763, - "learning_rate": 0.00019999642410420577, - "loss": 46.0, - "step": 35218 - }, - { - "epoch": 2.6927384980025613, - "grad_norm": 0.0013977159978821874, - "learning_rate": 0.0001999964239010744, - "loss": 46.0, - "step": 35219 - }, - { - "epoch": 2.692814954985951, - "grad_norm": 0.0006656849291175604, - "learning_rate": 0.0001999964236979372, - "loss": 46.0, - "step": 35220 - }, - { - "epoch": 2.692891411969341, - "grad_norm": 0.0030040808487683535, - "learning_rate": 0.00019999642349479427, - "loss": 46.0, - "step": 35221 - }, - { - "epoch": 2.6929678689527305, - "grad_norm": 0.0021106444764882326, - "learning_rate": 0.00019999642329164555, - "loss": 46.0, - "step": 35222 - }, - { - "epoch": 2.6930443259361203, - "grad_norm": 0.0013050640700384974, - "learning_rate": 0.00019999642308849108, - "loss": 46.0, - "step": 35223 - }, - { - "epoch": 2.69312078291951, - "grad_norm": 0.001634501852095127, - "learning_rate": 0.0001999964228853308, - "loss": 46.0, - "step": 35224 - }, - { - "epoch": 2.6931972399029, - "grad_norm": 0.005037304479628801, - "learning_rate": 0.0001999964226821648, - "loss": 46.0, - "step": 35225 - }, - { - "epoch": 2.6932736968862896, - "grad_norm": 0.0028133774176239967, - "learning_rate": 0.000199996422478993, - "loss": 46.0, - "step": 35226 - }, - { - "epoch": 2.693350153869679, - "grad_norm": 0.0013189658056944609, - "learning_rate": 0.00019999642227581544, - "loss": 46.0, - "step": 35227 - }, - { - "epoch": 2.6934266108530687, - "grad_norm": 0.003289174987003207, - "learning_rate": 0.0001999964220726321, - "loss": 46.0, - "step": 35228 - }, - { - "epoch": 2.6935030678364584, - "grad_norm": 0.0027267406694591045, - "learning_rate": 0.00019999642186944303, - "loss": 46.0, - "step": 35229 - }, - { - "epoch": 2.693579524819848, - "grad_norm": 0.0015421024290844798, - "learning_rate": 0.00019999642166624817, - "loss": 46.0, - "step": 35230 - }, - { - "epoch": 2.693655981803238, - "grad_norm": 0.00500150490552187, - "learning_rate": 0.00019999642146304751, - "loss": 46.0, - "step": 35231 - }, - { - "epoch": 2.6937324387866277, - "grad_norm": 0.0019818164873868227, - "learning_rate": 0.00019999642125984114, - "loss": 46.0, - "step": 35232 - }, - { - "epoch": 2.6938088957700175, - "grad_norm": 0.0008503650315105915, - "learning_rate": 0.00019999642105662894, - "loss": 46.0, - "step": 35233 - }, - { - "epoch": 2.6938853527534072, - "grad_norm": 0.0012651070719584823, - "learning_rate": 0.000199996420853411, - "loss": 46.0, - "step": 35234 - }, - { - "epoch": 2.693961809736797, - "grad_norm": 0.0013718416448682547, - "learning_rate": 0.0001999964206501873, - "loss": 46.0, - "step": 35235 - }, - { - "epoch": 2.6940382667201863, - "grad_norm": 0.0012118752347305417, - "learning_rate": 0.00019999642044695782, - "loss": 46.0, - "step": 35236 - }, - { - "epoch": 2.694114723703576, - "grad_norm": 0.0019576693885028362, - "learning_rate": 0.00019999642024372255, - "loss": 46.0, - "step": 35237 - }, - { - "epoch": 2.694191180686966, - "grad_norm": 0.0013692042557522655, - "learning_rate": 0.00019999642004048154, - "loss": 46.0, - "step": 35238 - }, - { - "epoch": 2.6942676376703556, - "grad_norm": 0.001424482325091958, - "learning_rate": 0.00019999641983723475, - "loss": 46.0, - "step": 35239 - }, - { - "epoch": 2.6943440946537454, - "grad_norm": 0.0012743906117975712, - "learning_rate": 0.00019999641963398216, - "loss": 46.0, - "step": 35240 - }, - { - "epoch": 2.694420551637135, - "grad_norm": 0.001204194501042366, - "learning_rate": 0.00019999641943072385, - "loss": 46.0, - "step": 35241 - }, - { - "epoch": 2.694497008620525, - "grad_norm": 0.003842654637992382, - "learning_rate": 0.00019999641922745974, - "loss": 46.0, - "step": 35242 - }, - { - "epoch": 2.6945734656039146, - "grad_norm": 0.004496491048485041, - "learning_rate": 0.00019999641902418989, - "loss": 46.0, - "step": 35243 - }, - { - "epoch": 2.6946499225873044, - "grad_norm": 0.006841009948402643, - "learning_rate": 0.00019999641882091426, - "loss": 46.0, - "step": 35244 - }, - { - "epoch": 2.694726379570694, - "grad_norm": 0.0011372884036973119, - "learning_rate": 0.00019999641861763285, - "loss": 46.0, - "step": 35245 - }, - { - "epoch": 2.694802836554084, - "grad_norm": 0.003786845598369837, - "learning_rate": 0.00019999641841434565, - "loss": 46.0, - "step": 35246 - }, - { - "epoch": 2.6948792935374737, - "grad_norm": 0.0032982053235173225, - "learning_rate": 0.0001999964182110527, - "loss": 46.0, - "step": 35247 - }, - { - "epoch": 2.6949557505208634, - "grad_norm": 0.002931008581072092, - "learning_rate": 0.00019999641800775398, - "loss": 46.0, - "step": 35248 - }, - { - "epoch": 2.6950322075042528, - "grad_norm": 0.001842274097725749, - "learning_rate": 0.0001999964178044495, - "loss": 46.0, - "step": 35249 - }, - { - "epoch": 2.6951086644876425, - "grad_norm": 0.0025896658189594746, - "learning_rate": 0.00019999641760113927, - "loss": 46.0, - "step": 35250 - }, - { - "epoch": 2.6951851214710323, - "grad_norm": 0.002406393177807331, - "learning_rate": 0.00019999641739782326, - "loss": 46.0, - "step": 35251 - }, - { - "epoch": 2.695261578454422, - "grad_norm": 0.0017267242074012756, - "learning_rate": 0.00019999641719450147, - "loss": 46.0, - "step": 35252 - }, - { - "epoch": 2.695338035437812, - "grad_norm": 0.005018622614443302, - "learning_rate": 0.00019999641699117388, - "loss": 46.0, - "step": 35253 - }, - { - "epoch": 2.6954144924212016, - "grad_norm": 0.0018275509355589747, - "learning_rate": 0.00019999641678784057, - "loss": 46.0, - "step": 35254 - }, - { - "epoch": 2.6954909494045913, - "grad_norm": 0.002327537862583995, - "learning_rate": 0.00019999641658450146, - "loss": 46.0, - "step": 35255 - }, - { - "epoch": 2.695567406387981, - "grad_norm": 0.0013509526615962386, - "learning_rate": 0.0001999964163811566, - "loss": 46.0, - "step": 35256 - }, - { - "epoch": 2.6956438633713704, - "grad_norm": 0.0010866483207792044, - "learning_rate": 0.00019999641617780598, - "loss": 46.0, - "step": 35257 - }, - { - "epoch": 2.69572032035476, - "grad_norm": 0.0009713412146084011, - "learning_rate": 0.00019999641597444955, - "loss": 46.0, - "step": 35258 - }, - { - "epoch": 2.69579677733815, - "grad_norm": 0.001560694188810885, - "learning_rate": 0.0001999964157710874, - "loss": 46.0, - "step": 35259 - }, - { - "epoch": 2.6958732343215397, - "grad_norm": 0.001534345792606473, - "learning_rate": 0.00019999641556771945, - "loss": 46.0, - "step": 35260 - }, - { - "epoch": 2.6959496913049295, - "grad_norm": 0.0016061970964074135, - "learning_rate": 0.00019999641536434573, - "loss": 46.0, - "step": 35261 - }, - { - "epoch": 2.696026148288319, - "grad_norm": 0.0008186472696252167, - "learning_rate": 0.00019999641516096626, - "loss": 46.0, - "step": 35262 - }, - { - "epoch": 2.696102605271709, - "grad_norm": 0.0018672121223062277, - "learning_rate": 0.000199996414957581, - "loss": 46.0, - "step": 35263 - }, - { - "epoch": 2.6961790622550987, - "grad_norm": 0.005111853592097759, - "learning_rate": 0.00019999641475418995, - "loss": 46.0, - "step": 35264 - }, - { - "epoch": 2.6962555192384885, - "grad_norm": 0.0013540758518502116, - "learning_rate": 0.0001999964145507932, - "loss": 46.0, - "step": 35265 - }, - { - "epoch": 2.6963319762218783, - "grad_norm": 0.0012928255600854754, - "learning_rate": 0.0001999964143473906, - "loss": 46.0, - "step": 35266 - }, - { - "epoch": 2.696408433205268, - "grad_norm": 0.000994555070064962, - "learning_rate": 0.0001999964141439823, - "loss": 46.0, - "step": 35267 - }, - { - "epoch": 2.696484890188658, - "grad_norm": 0.004056311212480068, - "learning_rate": 0.0001999964139405682, - "loss": 46.0, - "step": 35268 - }, - { - "epoch": 2.6965613471720475, - "grad_norm": 0.0022508034016937017, - "learning_rate": 0.00019999641373714833, - "loss": 46.0, - "step": 35269 - }, - { - "epoch": 2.6966378041554373, - "grad_norm": 0.0005544980522245169, - "learning_rate": 0.00019999641353372267, - "loss": 46.0, - "step": 35270 - }, - { - "epoch": 2.6967142611388266, - "grad_norm": 0.002987751504406333, - "learning_rate": 0.00019999641333029127, - "loss": 46.0, - "step": 35271 - }, - { - "epoch": 2.6967907181222164, - "grad_norm": 0.0006851975922472775, - "learning_rate": 0.0001999964131268541, - "loss": 46.0, - "step": 35272 - }, - { - "epoch": 2.696867175105606, - "grad_norm": 0.0015056090196594596, - "learning_rate": 0.00019999641292341115, - "loss": 46.0, - "step": 35273 - }, - { - "epoch": 2.696943632088996, - "grad_norm": 0.0022397409193217754, - "learning_rate": 0.00019999641271996245, - "loss": 46.0, - "step": 35274 - }, - { - "epoch": 2.6970200890723857, - "grad_norm": 0.007908019237220287, - "learning_rate": 0.00019999641251650796, - "loss": 46.0, - "step": 35275 - }, - { - "epoch": 2.6970965460557754, - "grad_norm": 0.002086890395730734, - "learning_rate": 0.00019999641231304772, - "loss": 46.0, - "step": 35276 - }, - { - "epoch": 2.697173003039165, - "grad_norm": 0.002060619881376624, - "learning_rate": 0.00019999641210958168, - "loss": 46.0, - "step": 35277 - }, - { - "epoch": 2.697249460022555, - "grad_norm": 0.0007522001978941262, - "learning_rate": 0.0001999964119061099, - "loss": 46.0, - "step": 35278 - }, - { - "epoch": 2.6973259170059443, - "grad_norm": 0.0033598621375858784, - "learning_rate": 0.00019999641170263233, - "loss": 46.0, - "step": 35279 - }, - { - "epoch": 2.697402373989334, - "grad_norm": 0.001232060370966792, - "learning_rate": 0.000199996411499149, - "loss": 46.0, - "step": 35280 - }, - { - "epoch": 2.697478830972724, - "grad_norm": 0.002128458581864834, - "learning_rate": 0.0001999964112956599, - "loss": 46.0, - "step": 35281 - }, - { - "epoch": 2.6975552879561135, - "grad_norm": 0.003697927575558424, - "learning_rate": 0.00019999641109216506, - "loss": 46.0, - "step": 35282 - }, - { - "epoch": 2.6976317449395033, - "grad_norm": 0.0032263384200632572, - "learning_rate": 0.0001999964108886644, - "loss": 46.0, - "step": 35283 - }, - { - "epoch": 2.697708201922893, - "grad_norm": 0.0013719000853598118, - "learning_rate": 0.000199996410685158, - "loss": 46.0, - "step": 35284 - }, - { - "epoch": 2.697784658906283, - "grad_norm": 0.0016746128676459193, - "learning_rate": 0.0001999964104816458, - "loss": 46.0, - "step": 35285 - }, - { - "epoch": 2.6978611158896726, - "grad_norm": 0.002153475768864155, - "learning_rate": 0.00019999641027812788, - "loss": 46.0, - "step": 35286 - }, - { - "epoch": 2.6979375728730624, - "grad_norm": 0.0036397355142980814, - "learning_rate": 0.00019999641007460416, - "loss": 46.0, - "step": 35287 - }, - { - "epoch": 2.698014029856452, - "grad_norm": 0.0024249032139778137, - "learning_rate": 0.00019999640987107466, - "loss": 46.0, - "step": 35288 - }, - { - "epoch": 2.698090486839842, - "grad_norm": 0.002020876156166196, - "learning_rate": 0.00019999640966753942, - "loss": 46.0, - "step": 35289 - }, - { - "epoch": 2.6981669438232316, - "grad_norm": 0.002728401217609644, - "learning_rate": 0.0001999964094639984, - "loss": 46.0, - "step": 35290 - }, - { - "epoch": 2.6982434008066214, - "grad_norm": 0.002699471777305007, - "learning_rate": 0.0001999964092604516, - "loss": 46.0, - "step": 35291 - }, - { - "epoch": 2.698319857790011, - "grad_norm": 0.0021462140139192343, - "learning_rate": 0.00019999640905689906, - "loss": 46.0, - "step": 35292 - }, - { - "epoch": 2.6983963147734005, - "grad_norm": 0.0015100553864613175, - "learning_rate": 0.00019999640885334072, - "loss": 46.0, - "step": 35293 - }, - { - "epoch": 2.6984727717567902, - "grad_norm": 0.0021135956048965454, - "learning_rate": 0.00019999640864977662, - "loss": 46.0, - "step": 35294 - }, - { - "epoch": 2.69854922874018, - "grad_norm": 0.0013445416698232293, - "learning_rate": 0.00019999640844620676, - "loss": 46.0, - "step": 35295 - }, - { - "epoch": 2.6986256857235698, - "grad_norm": 0.0022414170671254396, - "learning_rate": 0.0001999964082426311, - "loss": 46.0, - "step": 35296 - }, - { - "epoch": 2.6987021427069595, - "grad_norm": 0.011541006155312061, - "learning_rate": 0.0001999964080390497, - "loss": 46.0, - "step": 35297 - }, - { - "epoch": 2.6987785996903493, - "grad_norm": 0.0015841620042920113, - "learning_rate": 0.00019999640783546253, - "loss": 46.0, - "step": 35298 - }, - { - "epoch": 2.698855056673739, - "grad_norm": 0.002681355457752943, - "learning_rate": 0.00019999640763186958, - "loss": 46.0, - "step": 35299 - }, - { - "epoch": 2.698931513657129, - "grad_norm": 0.0009408853948116302, - "learning_rate": 0.0001999964074282709, - "loss": 46.0, - "step": 35300 - }, - { - "epoch": 2.699007970640518, - "grad_norm": 0.0065516140311956406, - "learning_rate": 0.00019999640722466637, - "loss": 46.0, - "step": 35301 - }, - { - "epoch": 2.699084427623908, - "grad_norm": 0.0010787565261125565, - "learning_rate": 0.00019999640702105613, - "loss": 46.0, - "step": 35302 - }, - { - "epoch": 2.6991608846072976, - "grad_norm": 0.0023870819713920355, - "learning_rate": 0.00019999640681744011, - "loss": 46.0, - "step": 35303 - }, - { - "epoch": 2.6992373415906874, - "grad_norm": 0.002004152163863182, - "learning_rate": 0.00019999640661381833, - "loss": 46.0, - "step": 35304 - }, - { - "epoch": 2.699313798574077, - "grad_norm": 0.0015158592723309994, - "learning_rate": 0.00019999640641019077, - "loss": 46.0, - "step": 35305 - }, - { - "epoch": 2.699390255557467, - "grad_norm": 0.0020799008198082447, - "learning_rate": 0.00019999640620655743, - "loss": 46.0, - "step": 35306 - }, - { - "epoch": 2.6994667125408567, - "grad_norm": 0.0017356773605570197, - "learning_rate": 0.00019999640600291832, - "loss": 46.0, - "step": 35307 - }, - { - "epoch": 2.6995431695242464, - "grad_norm": 0.0011968230828642845, - "learning_rate": 0.00019999640579927344, - "loss": 46.0, - "step": 35308 - }, - { - "epoch": 2.699619626507636, - "grad_norm": 0.0025514685548841953, - "learning_rate": 0.00019999640559562282, - "loss": 46.0, - "step": 35309 - }, - { - "epoch": 2.699696083491026, - "grad_norm": 0.005680154077708721, - "learning_rate": 0.00019999640539196642, - "loss": 46.0, - "step": 35310 - }, - { - "epoch": 2.6997725404744157, - "grad_norm": 0.0023623218294233084, - "learning_rate": 0.00019999640518830422, - "loss": 46.0, - "step": 35311 - }, - { - "epoch": 2.6998489974578055, - "grad_norm": 0.0020611362997442484, - "learning_rate": 0.0001999964049846363, - "loss": 46.0, - "step": 35312 - }, - { - "epoch": 2.6999254544411952, - "grad_norm": 0.001051096012815833, - "learning_rate": 0.00019999640478096257, - "loss": 46.0, - "step": 35313 - }, - { - "epoch": 2.700001911424585, - "grad_norm": 0.0034514509607106447, - "learning_rate": 0.00019999640457728308, - "loss": 46.0, - "step": 35314 - }, - { - "epoch": 2.7000783684079743, - "grad_norm": 0.0023732339031994343, - "learning_rate": 0.00019999640437359784, - "loss": 46.0, - "step": 35315 - }, - { - "epoch": 2.700154825391364, - "grad_norm": 0.00437965476885438, - "learning_rate": 0.00019999640416990683, - "loss": 46.0, - "step": 35316 - }, - { - "epoch": 2.700231282374754, - "grad_norm": 0.002494441345334053, - "learning_rate": 0.00019999640396621, - "loss": 46.0, - "step": 35317 - }, - { - "epoch": 2.7003077393581436, - "grad_norm": 0.001108883647248149, - "learning_rate": 0.00019999640376250745, - "loss": 46.0, - "step": 35318 - }, - { - "epoch": 2.7003841963415334, - "grad_norm": 0.0007003109785728157, - "learning_rate": 0.00019999640355879914, - "loss": 46.0, - "step": 35319 - }, - { - "epoch": 2.700460653324923, - "grad_norm": 0.002885231049731374, - "learning_rate": 0.00019999640335508504, - "loss": 46.0, - "step": 35320 - }, - { - "epoch": 2.700537110308313, - "grad_norm": 0.006922852247953415, - "learning_rate": 0.00019999640315136516, - "loss": 46.0, - "step": 35321 - }, - { - "epoch": 2.7006135672917027, - "grad_norm": 0.003610148560255766, - "learning_rate": 0.00019999640294763953, - "loss": 46.0, - "step": 35322 - }, - { - "epoch": 2.700690024275092, - "grad_norm": 0.00418445048853755, - "learning_rate": 0.00019999640274390813, - "loss": 46.0, - "step": 35323 - }, - { - "epoch": 2.7007664812584817, - "grad_norm": 0.0013093260349705815, - "learning_rate": 0.00019999640254017096, - "loss": 46.0, - "step": 35324 - }, - { - "epoch": 2.7008429382418715, - "grad_norm": 0.0038081479724496603, - "learning_rate": 0.000199996402336428, - "loss": 46.0, - "step": 35325 - }, - { - "epoch": 2.7009193952252613, - "grad_norm": 0.001398626365698874, - "learning_rate": 0.00019999640213267926, - "loss": 46.0, - "step": 35326 - }, - { - "epoch": 2.700995852208651, - "grad_norm": 0.0038214665837585926, - "learning_rate": 0.0001999964019289248, - "loss": 46.0, - "step": 35327 - }, - { - "epoch": 2.7010723091920408, - "grad_norm": 0.00497637502849102, - "learning_rate": 0.00019999640172516453, - "loss": 46.0, - "step": 35328 - }, - { - "epoch": 2.7011487661754305, - "grad_norm": 0.00842752680182457, - "learning_rate": 0.00019999640152139851, - "loss": 46.0, - "step": 35329 - }, - { - "epoch": 2.7012252231588203, - "grad_norm": 0.001993365352973342, - "learning_rate": 0.00019999640131762673, - "loss": 46.0, - "step": 35330 - }, - { - "epoch": 2.70130168014221, - "grad_norm": 0.00212588207796216, - "learning_rate": 0.0001999964011138492, - "loss": 46.0, - "step": 35331 - }, - { - "epoch": 2.7013781371256, - "grad_norm": 0.0052423616871237755, - "learning_rate": 0.00019999640091006583, - "loss": 46.0, - "step": 35332 - }, - { - "epoch": 2.7014545941089896, - "grad_norm": 0.0010455293813720345, - "learning_rate": 0.00019999640070627675, - "loss": 46.0, - "step": 35333 - }, - { - "epoch": 2.7015310510923793, - "grad_norm": 0.002992102177813649, - "learning_rate": 0.00019999640050248187, - "loss": 46.0, - "step": 35334 - }, - { - "epoch": 2.701607508075769, - "grad_norm": 0.006704271305352449, - "learning_rate": 0.00019999640029868122, - "loss": 46.0, - "step": 35335 - }, - { - "epoch": 2.701683965059159, - "grad_norm": 0.0024180531036108732, - "learning_rate": 0.00019999640009487482, - "loss": 46.0, - "step": 35336 - }, - { - "epoch": 2.701760422042548, - "grad_norm": 0.002983035985380411, - "learning_rate": 0.00019999639989106265, - "loss": 46.0, - "step": 35337 - }, - { - "epoch": 2.701836879025938, - "grad_norm": 0.0051630097441375256, - "learning_rate": 0.0001999963996872447, - "loss": 46.0, - "step": 35338 - }, - { - "epoch": 2.7019133360093277, - "grad_norm": 0.0015458324924111366, - "learning_rate": 0.00019999639948342098, - "loss": 46.0, - "step": 35339 - }, - { - "epoch": 2.7019897929927175, - "grad_norm": 0.003122902475297451, - "learning_rate": 0.00019999639927959151, - "loss": 46.0, - "step": 35340 - }, - { - "epoch": 2.7020662499761072, - "grad_norm": 0.0019318468403071165, - "learning_rate": 0.00019999639907575625, - "loss": 46.0, - "step": 35341 - }, - { - "epoch": 2.702142706959497, - "grad_norm": 0.005658584646880627, - "learning_rate": 0.0001999963988719152, - "loss": 46.0, - "step": 35342 - }, - { - "epoch": 2.7022191639428867, - "grad_norm": 0.002577787032350898, - "learning_rate": 0.00019999639866806842, - "loss": 46.0, - "step": 35343 - }, - { - "epoch": 2.7022956209262765, - "grad_norm": 0.002360620303079486, - "learning_rate": 0.00019999639846421586, - "loss": 46.0, - "step": 35344 - }, - { - "epoch": 2.702372077909666, - "grad_norm": 0.0013628533342853189, - "learning_rate": 0.00019999639826035755, - "loss": 46.0, - "step": 35345 - }, - { - "epoch": 2.7024485348930556, - "grad_norm": 0.0034952175337821245, - "learning_rate": 0.00019999639805649345, - "loss": 46.0, - "step": 35346 - }, - { - "epoch": 2.7025249918764453, - "grad_norm": 0.0005553431692533195, - "learning_rate": 0.0001999963978526236, - "loss": 46.0, - "step": 35347 - }, - { - "epoch": 2.702601448859835, - "grad_norm": 0.0011918711243197322, - "learning_rate": 0.00019999639764874794, - "loss": 46.0, - "step": 35348 - }, - { - "epoch": 2.702677905843225, - "grad_norm": 0.003305414691567421, - "learning_rate": 0.00019999639744486652, - "loss": 46.0, - "step": 35349 - }, - { - "epoch": 2.7027543628266146, - "grad_norm": 0.0020327558740973473, - "learning_rate": 0.00019999639724097934, - "loss": 46.0, - "step": 35350 - }, - { - "epoch": 2.7028308198100044, - "grad_norm": 0.001078551053069532, - "learning_rate": 0.0001999963970370864, - "loss": 46.0, - "step": 35351 - }, - { - "epoch": 2.702907276793394, - "grad_norm": 0.0008975128876045346, - "learning_rate": 0.0001999963968331877, - "loss": 46.0, - "step": 35352 - }, - { - "epoch": 2.702983733776784, - "grad_norm": 0.0022663024719804525, - "learning_rate": 0.0001999963966292832, - "loss": 46.0, - "step": 35353 - }, - { - "epoch": 2.7030601907601737, - "grad_norm": 0.0071113938465714455, - "learning_rate": 0.00019999639642537294, - "loss": 46.0, - "step": 35354 - }, - { - "epoch": 2.7031366477435634, - "grad_norm": 0.0020904135890305042, - "learning_rate": 0.0001999963962214569, - "loss": 46.0, - "step": 35355 - }, - { - "epoch": 2.703213104726953, - "grad_norm": 0.0027654962614178658, - "learning_rate": 0.00019999639601753514, - "loss": 46.0, - "step": 35356 - }, - { - "epoch": 2.703289561710343, - "grad_norm": 0.0020273367408663034, - "learning_rate": 0.00019999639581360756, - "loss": 46.0, - "step": 35357 - }, - { - "epoch": 2.7033660186937323, - "grad_norm": 0.0011409459402784705, - "learning_rate": 0.00019999639560967425, - "loss": 46.0, - "step": 35358 - }, - { - "epoch": 2.703442475677122, - "grad_norm": 0.0032199742272496223, - "learning_rate": 0.00019999639540573515, - "loss": 46.0, - "step": 35359 - }, - { - "epoch": 2.703518932660512, - "grad_norm": 0.002354648895561695, - "learning_rate": 0.00019999639520179027, - "loss": 46.0, - "step": 35360 - }, - { - "epoch": 2.7035953896439016, - "grad_norm": 0.004205424804240465, - "learning_rate": 0.0001999963949978396, - "loss": 46.0, - "step": 35361 - }, - { - "epoch": 2.7036718466272913, - "grad_norm": 0.0016587713034823537, - "learning_rate": 0.00019999639479388321, - "loss": 46.0, - "step": 35362 - }, - { - "epoch": 2.703748303610681, - "grad_norm": 0.0016274135559797287, - "learning_rate": 0.00019999639458992104, - "loss": 46.0, - "step": 35363 - }, - { - "epoch": 2.703824760594071, - "grad_norm": 0.0011680605821311474, - "learning_rate": 0.0001999963943859531, - "loss": 46.0, - "step": 35364 - }, - { - "epoch": 2.7039012175774606, - "grad_norm": 0.0032782619819045067, - "learning_rate": 0.0001999963941819794, - "loss": 46.0, - "step": 35365 - }, - { - "epoch": 2.7039776745608504, - "grad_norm": 0.0015273921890184283, - "learning_rate": 0.00019999639397799988, - "loss": 46.0, - "step": 35366 - }, - { - "epoch": 2.7040541315442397, - "grad_norm": 0.0031500523909926414, - "learning_rate": 0.00019999639377401464, - "loss": 46.0, - "step": 35367 - }, - { - "epoch": 2.7041305885276294, - "grad_norm": 0.0013141479576006532, - "learning_rate": 0.00019999639357002363, - "loss": 46.0, - "step": 35368 - }, - { - "epoch": 2.704207045511019, - "grad_norm": 0.0020598049741238356, - "learning_rate": 0.00019999639336602685, - "loss": 46.0, - "step": 35369 - }, - { - "epoch": 2.704283502494409, - "grad_norm": 0.002656572964042425, - "learning_rate": 0.0001999963931620243, - "loss": 46.0, - "step": 35370 - }, - { - "epoch": 2.7043599594777987, - "grad_norm": 0.0013831154210492969, - "learning_rate": 0.00019999639295801593, - "loss": 46.0, - "step": 35371 - }, - { - "epoch": 2.7044364164611885, - "grad_norm": 0.0015021731378510594, - "learning_rate": 0.00019999639275400185, - "loss": 46.0, - "step": 35372 - }, - { - "epoch": 2.7045128734445782, - "grad_norm": 0.0015439930139109492, - "learning_rate": 0.00019999639254998197, - "loss": 46.0, - "step": 35373 - }, - { - "epoch": 2.704589330427968, - "grad_norm": 0.001346655422821641, - "learning_rate": 0.00019999639234595632, - "loss": 46.0, - "step": 35374 - }, - { - "epoch": 2.7046657874113578, - "grad_norm": 0.002888454357162118, - "learning_rate": 0.00019999639214192492, - "loss": 46.0, - "step": 35375 - }, - { - "epoch": 2.7047422443947475, - "grad_norm": 0.004119241610169411, - "learning_rate": 0.00019999639193788774, - "loss": 46.0, - "step": 35376 - }, - { - "epoch": 2.7048187013781373, - "grad_norm": 0.0015408092876896262, - "learning_rate": 0.0001999963917338448, - "loss": 46.0, - "step": 35377 - }, - { - "epoch": 2.704895158361527, - "grad_norm": 0.0027071263175457716, - "learning_rate": 0.00019999639152979608, - "loss": 46.0, - "step": 35378 - }, - { - "epoch": 2.704971615344917, - "grad_norm": 0.0008009267039597034, - "learning_rate": 0.0001999963913257416, - "loss": 46.0, - "step": 35379 - }, - { - "epoch": 2.705048072328306, - "grad_norm": 0.001985331764444709, - "learning_rate": 0.00019999639112168135, - "loss": 46.0, - "step": 35380 - }, - { - "epoch": 2.705124529311696, - "grad_norm": 0.0011344733648002148, - "learning_rate": 0.00019999639091761534, - "loss": 46.0, - "step": 35381 - }, - { - "epoch": 2.7052009862950857, - "grad_norm": 0.001126070972532034, - "learning_rate": 0.00019999639071354352, - "loss": 46.0, - "step": 35382 - }, - { - "epoch": 2.7052774432784754, - "grad_norm": 0.0031002238392829895, - "learning_rate": 0.00019999639050946596, - "loss": 46.0, - "step": 35383 - }, - { - "epoch": 2.705353900261865, - "grad_norm": 0.0029096296057105064, - "learning_rate": 0.00019999639030538263, - "loss": 46.0, - "step": 35384 - }, - { - "epoch": 2.705430357245255, - "grad_norm": 0.0006352112395688891, - "learning_rate": 0.00019999639010129353, - "loss": 46.0, - "step": 35385 - }, - { - "epoch": 2.7055068142286447, - "grad_norm": 0.002712411340326071, - "learning_rate": 0.00019999638989719868, - "loss": 46.0, - "step": 35386 - }, - { - "epoch": 2.7055832712120345, - "grad_norm": 0.0032077289652079344, - "learning_rate": 0.00019999638969309805, - "loss": 46.0, - "step": 35387 - }, - { - "epoch": 2.7056597281954238, - "grad_norm": 0.00955118890851736, - "learning_rate": 0.00019999638948899162, - "loss": 46.0, - "step": 35388 - }, - { - "epoch": 2.7057361851788135, - "grad_norm": 0.005012406036257744, - "learning_rate": 0.00019999638928487942, - "loss": 46.0, - "step": 35389 - }, - { - "epoch": 2.7058126421622033, - "grad_norm": 0.0012305768905207515, - "learning_rate": 0.0001999963890807615, - "loss": 46.0, - "step": 35390 - }, - { - "epoch": 2.705889099145593, - "grad_norm": 0.0027132295072078705, - "learning_rate": 0.00019999638887663776, - "loss": 46.0, - "step": 35391 - }, - { - "epoch": 2.705965556128983, - "grad_norm": 0.004948517307639122, - "learning_rate": 0.0001999963886725083, - "loss": 46.0, - "step": 35392 - }, - { - "epoch": 2.7060420131123726, - "grad_norm": 0.002816193038597703, - "learning_rate": 0.00019999638846837306, - "loss": 46.0, - "step": 35393 - }, - { - "epoch": 2.7061184700957623, - "grad_norm": 0.0007753594545647502, - "learning_rate": 0.00019999638826423202, - "loss": 46.0, - "step": 35394 - }, - { - "epoch": 2.706194927079152, - "grad_norm": 0.0018347802106291056, - "learning_rate": 0.0001999963880600852, - "loss": 46.0, - "step": 35395 - }, - { - "epoch": 2.706271384062542, - "grad_norm": 0.0029829605482518673, - "learning_rate": 0.00019999638785593265, - "loss": 46.0, - "step": 35396 - }, - { - "epoch": 2.7063478410459316, - "grad_norm": 0.002420371398329735, - "learning_rate": 0.00019999638765177434, - "loss": 46.0, - "step": 35397 - }, - { - "epoch": 2.7064242980293214, - "grad_norm": 0.002120414050295949, - "learning_rate": 0.0001999963874476102, - "loss": 46.0, - "step": 35398 - }, - { - "epoch": 2.706500755012711, - "grad_norm": 0.0017406987026333809, - "learning_rate": 0.00019999638724344036, - "loss": 46.0, - "step": 35399 - }, - { - "epoch": 2.706577211996101, - "grad_norm": 0.0028157096821814775, - "learning_rate": 0.0001999963870392647, - "loss": 46.0, - "step": 35400 - }, - { - "epoch": 2.7066536689794907, - "grad_norm": 0.002767870668321848, - "learning_rate": 0.0001999963868350833, - "loss": 46.0, - "step": 35401 - }, - { - "epoch": 2.70673012596288, - "grad_norm": 0.002995781134814024, - "learning_rate": 0.00019999638663089614, - "loss": 46.0, - "step": 35402 - }, - { - "epoch": 2.7068065829462697, - "grad_norm": 0.0030997698195278645, - "learning_rate": 0.0001999963864267032, - "loss": 46.0, - "step": 35403 - }, - { - "epoch": 2.7068830399296595, - "grad_norm": 0.0011447790311649442, - "learning_rate": 0.00019999638622250447, - "loss": 46.0, - "step": 35404 - }, - { - "epoch": 2.7069594969130493, - "grad_norm": 0.00422257836908102, - "learning_rate": 0.00019999638601829998, - "loss": 46.0, - "step": 35405 - }, - { - "epoch": 2.707035953896439, - "grad_norm": 0.0011334065347909927, - "learning_rate": 0.00019999638581408972, - "loss": 46.0, - "step": 35406 - }, - { - "epoch": 2.707112410879829, - "grad_norm": 0.0016981714870780706, - "learning_rate": 0.0001999963856098737, - "loss": 46.0, - "step": 35407 - }, - { - "epoch": 2.7071888678632186, - "grad_norm": 0.0013773051323369145, - "learning_rate": 0.00019999638540565192, - "loss": 46.0, - "step": 35408 - }, - { - "epoch": 2.7072653248466083, - "grad_norm": 0.0024255216121673584, - "learning_rate": 0.00019999638520142433, - "loss": 46.0, - "step": 35409 - }, - { - "epoch": 2.7073417818299976, - "grad_norm": 0.0020831571891903877, - "learning_rate": 0.000199996384997191, - "loss": 46.0, - "step": 35410 - }, - { - "epoch": 2.7074182388133874, - "grad_norm": 0.003036827314645052, - "learning_rate": 0.00019999638479295192, - "loss": 46.0, - "step": 35411 - }, - { - "epoch": 2.707494695796777, - "grad_norm": 0.0010015048319473863, - "learning_rate": 0.00019999638458870705, - "loss": 46.0, - "step": 35412 - }, - { - "epoch": 2.707571152780167, - "grad_norm": 0.0033317063935101032, - "learning_rate": 0.0001999963843844564, - "loss": 46.0, - "step": 35413 - }, - { - "epoch": 2.7076476097635567, - "grad_norm": 0.000883603235706687, - "learning_rate": 0.0001999963841802, - "loss": 46.0, - "step": 35414 - }, - { - "epoch": 2.7077240667469464, - "grad_norm": 0.0031761000864207745, - "learning_rate": 0.00019999638397593782, - "loss": 46.0, - "step": 35415 - }, - { - "epoch": 2.707800523730336, - "grad_norm": 0.0026982135605067015, - "learning_rate": 0.00019999638377166988, - "loss": 46.0, - "step": 35416 - }, - { - "epoch": 2.707876980713726, - "grad_norm": 0.0013232608325779438, - "learning_rate": 0.00019999638356739613, - "loss": 46.0, - "step": 35417 - }, - { - "epoch": 2.7079534376971157, - "grad_norm": 0.0022729686461389065, - "learning_rate": 0.00019999638336311667, - "loss": 46.0, - "step": 35418 - }, - { - "epoch": 2.7080298946805055, - "grad_norm": 0.0015212746802717447, - "learning_rate": 0.0001999963831588314, - "loss": 46.0, - "step": 35419 - }, - { - "epoch": 2.7081063516638952, - "grad_norm": 0.002108345041051507, - "learning_rate": 0.0001999963829545404, - "loss": 46.0, - "step": 35420 - }, - { - "epoch": 2.708182808647285, - "grad_norm": 0.0010278718546032906, - "learning_rate": 0.00019999638275024358, - "loss": 46.0, - "step": 35421 - }, - { - "epoch": 2.7082592656306748, - "grad_norm": 0.0030459852423518896, - "learning_rate": 0.00019999638254594105, - "loss": 46.0, - "step": 35422 - }, - { - "epoch": 2.7083357226140645, - "grad_norm": 0.0027643898501992226, - "learning_rate": 0.0001999963823416327, - "loss": 46.0, - "step": 35423 - }, - { - "epoch": 2.708412179597454, - "grad_norm": 0.0010355358244851232, - "learning_rate": 0.00019999638213731862, - "loss": 46.0, - "step": 35424 - }, - { - "epoch": 2.7084886365808436, - "grad_norm": 0.0035593723878264427, - "learning_rate": 0.0001999963819329987, - "loss": 46.0, - "step": 35425 - }, - { - "epoch": 2.7085650935642334, - "grad_norm": 0.0013980477815493941, - "learning_rate": 0.00019999638172867309, - "loss": 46.0, - "step": 35426 - }, - { - "epoch": 2.708641550547623, - "grad_norm": 0.001255303737707436, - "learning_rate": 0.0001999963815243417, - "loss": 46.0, - "step": 35427 - }, - { - "epoch": 2.708718007531013, - "grad_norm": 0.001013144152238965, - "learning_rate": 0.0001999963813200045, - "loss": 46.0, - "step": 35428 - }, - { - "epoch": 2.7087944645144026, - "grad_norm": 0.0032639699056744576, - "learning_rate": 0.00019999638111566155, - "loss": 46.0, - "step": 35429 - }, - { - "epoch": 2.7088709214977924, - "grad_norm": 0.0023879779037088156, - "learning_rate": 0.00019999638091131283, - "loss": 46.0, - "step": 35430 - }, - { - "epoch": 2.708947378481182, - "grad_norm": 0.0034250114113092422, - "learning_rate": 0.00019999638070695834, - "loss": 46.0, - "step": 35431 - }, - { - "epoch": 2.7090238354645715, - "grad_norm": 0.0020324087236076593, - "learning_rate": 0.0001999963805025981, - "loss": 46.0, - "step": 35432 - }, - { - "epoch": 2.7091002924479612, - "grad_norm": 0.0038056583143770695, - "learning_rate": 0.0001999963802982321, - "loss": 46.0, - "step": 35433 - }, - { - "epoch": 2.709176749431351, - "grad_norm": 0.004083080217242241, - "learning_rate": 0.00019999638009386028, - "loss": 46.0, - "step": 35434 - }, - { - "epoch": 2.7092532064147408, - "grad_norm": 0.004164343234151602, - "learning_rate": 0.00019999637988948272, - "loss": 46.0, - "step": 35435 - }, - { - "epoch": 2.7093296633981305, - "grad_norm": 0.002578981453552842, - "learning_rate": 0.0001999963796850994, - "loss": 46.0, - "step": 35436 - }, - { - "epoch": 2.7094061203815203, - "grad_norm": 0.004260354209691286, - "learning_rate": 0.00019999637948071028, - "loss": 46.0, - "step": 35437 - }, - { - "epoch": 2.70948257736491, - "grad_norm": 0.00152291648555547, - "learning_rate": 0.0001999963792763154, - "loss": 46.0, - "step": 35438 - }, - { - "epoch": 2.7095590343483, - "grad_norm": 0.0012238899944350123, - "learning_rate": 0.00019999637907191478, - "loss": 46.0, - "step": 35439 - }, - { - "epoch": 2.7096354913316896, - "grad_norm": 0.0012467509368434548, - "learning_rate": 0.00019999637886750838, - "loss": 46.0, - "step": 35440 - }, - { - "epoch": 2.7097119483150793, - "grad_norm": 0.00599660025909543, - "learning_rate": 0.0001999963786630962, - "loss": 46.0, - "step": 35441 - }, - { - "epoch": 2.709788405298469, - "grad_norm": 0.0012749666348099709, - "learning_rate": 0.00019999637845867824, - "loss": 46.0, - "step": 35442 - }, - { - "epoch": 2.709864862281859, - "grad_norm": 0.0025537002366036177, - "learning_rate": 0.00019999637825425453, - "loss": 46.0, - "step": 35443 - }, - { - "epoch": 2.7099413192652486, - "grad_norm": 0.010889332741498947, - "learning_rate": 0.00019999637804982503, - "loss": 46.0, - "step": 35444 - }, - { - "epoch": 2.7100177762486384, - "grad_norm": 0.0038424122612923384, - "learning_rate": 0.00019999637784538977, - "loss": 46.0, - "step": 35445 - }, - { - "epoch": 2.7100942332320277, - "grad_norm": 0.002768601756542921, - "learning_rate": 0.00019999637764094876, - "loss": 46.0, - "step": 35446 - }, - { - "epoch": 2.7101706902154175, - "grad_norm": 0.0013511487049981952, - "learning_rate": 0.00019999637743650198, - "loss": 46.0, - "step": 35447 - }, - { - "epoch": 2.710247147198807, - "grad_norm": 0.004036264028400183, - "learning_rate": 0.0001999963772320494, - "loss": 46.0, - "step": 35448 - }, - { - "epoch": 2.710323604182197, - "grad_norm": 0.0021058148704469204, - "learning_rate": 0.00019999637702759106, - "loss": 46.0, - "step": 35449 - }, - { - "epoch": 2.7104000611655867, - "grad_norm": 0.0024338255170732737, - "learning_rate": 0.00019999637682312696, - "loss": 46.0, - "step": 35450 - }, - { - "epoch": 2.7104765181489765, - "grad_norm": 0.0029360386542975903, - "learning_rate": 0.00019999637661865708, - "loss": 46.0, - "step": 35451 - }, - { - "epoch": 2.7105529751323663, - "grad_norm": 0.0011251167161390185, - "learning_rate": 0.00019999637641418145, - "loss": 46.0, - "step": 35452 - }, - { - "epoch": 2.710629432115756, - "grad_norm": 0.0020247255451977253, - "learning_rate": 0.00019999637620970003, - "loss": 46.0, - "step": 35453 - }, - { - "epoch": 2.7107058890991453, - "grad_norm": 0.0014465537387877703, - "learning_rate": 0.00019999637600521286, - "loss": 46.0, - "step": 35454 - }, - { - "epoch": 2.710782346082535, - "grad_norm": 0.0016894011059775949, - "learning_rate": 0.00019999637580071992, - "loss": 46.0, - "step": 35455 - }, - { - "epoch": 2.710858803065925, - "grad_norm": 0.0010414608987048268, - "learning_rate": 0.00019999637559622123, - "loss": 46.0, - "step": 35456 - }, - { - "epoch": 2.7109352600493146, - "grad_norm": 0.003288756823167205, - "learning_rate": 0.0001999963753917167, - "loss": 46.0, - "step": 35457 - }, - { - "epoch": 2.7110117170327044, - "grad_norm": 0.0006927825161255896, - "learning_rate": 0.00019999637518720647, - "loss": 46.0, - "step": 35458 - }, - { - "epoch": 2.711088174016094, - "grad_norm": 0.0015430875355377793, - "learning_rate": 0.00019999637498269043, - "loss": 46.0, - "step": 35459 - }, - { - "epoch": 2.711164630999484, - "grad_norm": 0.002221212489530444, - "learning_rate": 0.00019999637477816865, - "loss": 46.0, - "step": 35460 - }, - { - "epoch": 2.7112410879828737, - "grad_norm": 0.006392726209014654, - "learning_rate": 0.00019999637457364107, - "loss": 46.0, - "step": 35461 - }, - { - "epoch": 2.7113175449662634, - "grad_norm": 0.002017437480390072, - "learning_rate": 0.00019999637436910774, - "loss": 46.0, - "step": 35462 - }, - { - "epoch": 2.711394001949653, - "grad_norm": 0.0013278081314638257, - "learning_rate": 0.00019999637416456863, - "loss": 46.0, - "step": 35463 - }, - { - "epoch": 2.711470458933043, - "grad_norm": 0.0023082876577973366, - "learning_rate": 0.00019999637396002378, - "loss": 46.0, - "step": 35464 - }, - { - "epoch": 2.7115469159164327, - "grad_norm": 0.003115879837423563, - "learning_rate": 0.00019999637375547313, - "loss": 46.0, - "step": 35465 - }, - { - "epoch": 2.7116233728998225, - "grad_norm": 0.002698844764381647, - "learning_rate": 0.0001999963735509167, - "loss": 46.0, - "step": 35466 - }, - { - "epoch": 2.7116998298832122, - "grad_norm": 0.0011624912731349468, - "learning_rate": 0.00019999637334635454, - "loss": 46.0, - "step": 35467 - }, - { - "epoch": 2.7117762868666015, - "grad_norm": 0.002155412919819355, - "learning_rate": 0.00019999637314178662, - "loss": 46.0, - "step": 35468 - }, - { - "epoch": 2.7118527438499913, - "grad_norm": 0.000701119948644191, - "learning_rate": 0.00019999637293721288, - "loss": 46.0, - "step": 35469 - }, - { - "epoch": 2.711929200833381, - "grad_norm": 0.0009733604383654892, - "learning_rate": 0.00019999637273263341, - "loss": 46.0, - "step": 35470 - }, - { - "epoch": 2.712005657816771, - "grad_norm": 0.004210932180285454, - "learning_rate": 0.00019999637252804812, - "loss": 46.0, - "step": 35471 - }, - { - "epoch": 2.7120821148001606, - "grad_norm": 0.0015431327046826482, - "learning_rate": 0.00019999637232345711, - "loss": 46.0, - "step": 35472 - }, - { - "epoch": 2.7121585717835504, - "grad_norm": 0.0025987043045461178, - "learning_rate": 0.00019999637211886033, - "loss": 46.0, - "step": 35473 - }, - { - "epoch": 2.71223502876694, - "grad_norm": 0.0026992056518793106, - "learning_rate": 0.00019999637191425777, - "loss": 46.0, - "step": 35474 - }, - { - "epoch": 2.71231148575033, - "grad_norm": 0.0030761456582695246, - "learning_rate": 0.00019999637170964942, - "loss": 46.0, - "step": 35475 - }, - { - "epoch": 2.712387942733719, - "grad_norm": 0.0014795601600781083, - "learning_rate": 0.00019999637150503534, - "loss": 46.0, - "step": 35476 - }, - { - "epoch": 2.712464399717109, - "grad_norm": 0.0015331594040617347, - "learning_rate": 0.00019999637130041546, - "loss": 46.0, - "step": 35477 - }, - { - "epoch": 2.7125408567004987, - "grad_norm": 0.0017951850313693285, - "learning_rate": 0.0001999963710957898, - "loss": 46.0, - "step": 35478 - }, - { - "epoch": 2.7126173136838885, - "grad_norm": 0.0012449405621737242, - "learning_rate": 0.0001999963708911584, - "loss": 46.0, - "step": 35479 - }, - { - "epoch": 2.7126937706672782, - "grad_norm": 0.001645549084059894, - "learning_rate": 0.00019999637068652125, - "loss": 46.0, - "step": 35480 - }, - { - "epoch": 2.712770227650668, - "grad_norm": 0.001400281791575253, - "learning_rate": 0.00019999637048187828, - "loss": 46.0, - "step": 35481 - }, - { - "epoch": 2.7128466846340578, - "grad_norm": 0.004993239417672157, - "learning_rate": 0.00019999637027722956, - "loss": 46.0, - "step": 35482 - }, - { - "epoch": 2.7129231416174475, - "grad_norm": 0.001269301981665194, - "learning_rate": 0.00019999637007257507, - "loss": 46.0, - "step": 35483 - }, - { - "epoch": 2.7129995986008373, - "grad_norm": 0.0011594791430979967, - "learning_rate": 0.0001999963698679148, - "loss": 46.0, - "step": 35484 - }, - { - "epoch": 2.713076055584227, - "grad_norm": 0.00197885325178504, - "learning_rate": 0.0001999963696632488, - "loss": 46.0, - "step": 35485 - }, - { - "epoch": 2.713152512567617, - "grad_norm": 0.0017935370560735464, - "learning_rate": 0.00019999636945857701, - "loss": 46.0, - "step": 35486 - }, - { - "epoch": 2.7132289695510066, - "grad_norm": 0.0006822082796134055, - "learning_rate": 0.00019999636925389943, - "loss": 46.0, - "step": 35487 - }, - { - "epoch": 2.7133054265343963, - "grad_norm": 0.0006907025235705078, - "learning_rate": 0.0001999963690492161, - "loss": 46.0, - "step": 35488 - }, - { - "epoch": 2.7133818835177856, - "grad_norm": 0.0016619703965261579, - "learning_rate": 0.000199996368844527, - "loss": 46.0, - "step": 35489 - }, - { - "epoch": 2.7134583405011754, - "grad_norm": 0.003065954428166151, - "learning_rate": 0.00019999636863983212, - "loss": 46.0, - "step": 35490 - }, - { - "epoch": 2.713534797484565, - "grad_norm": 0.002040877938270569, - "learning_rate": 0.0001999963684351315, - "loss": 46.0, - "step": 35491 - }, - { - "epoch": 2.713611254467955, - "grad_norm": 0.00245712255127728, - "learning_rate": 0.00019999636823042508, - "loss": 46.0, - "step": 35492 - }, - { - "epoch": 2.7136877114513447, - "grad_norm": 0.005380833987146616, - "learning_rate": 0.0001999963680257129, - "loss": 46.0, - "step": 35493 - }, - { - "epoch": 2.7137641684347344, - "grad_norm": 0.007019604090601206, - "learning_rate": 0.00019999636782099494, - "loss": 46.0, - "step": 35494 - }, - { - "epoch": 2.713840625418124, - "grad_norm": 0.002466310281306505, - "learning_rate": 0.00019999636761627122, - "loss": 46.0, - "step": 35495 - }, - { - "epoch": 2.713917082401514, - "grad_norm": 0.0022517023608088493, - "learning_rate": 0.00019999636741154176, - "loss": 46.0, - "step": 35496 - }, - { - "epoch": 2.7139935393849037, - "grad_norm": 0.004487857688218355, - "learning_rate": 0.0001999963672068065, - "loss": 46.0, - "step": 35497 - }, - { - "epoch": 2.714069996368293, - "grad_norm": 0.002486432669684291, - "learning_rate": 0.00019999636700206548, - "loss": 46.0, - "step": 35498 - }, - { - "epoch": 2.714146453351683, - "grad_norm": 0.0017079716781154275, - "learning_rate": 0.00019999636679731867, - "loss": 46.0, - "step": 35499 - }, - { - "epoch": 2.7142229103350726, - "grad_norm": 0.0009632365545257926, - "learning_rate": 0.00019999636659256612, - "loss": 46.0, - "step": 35500 - }, - { - "epoch": 2.7142993673184623, - "grad_norm": 0.001022086013108492, - "learning_rate": 0.00019999636638780776, - "loss": 46.0, - "step": 35501 - }, - { - "epoch": 2.714375824301852, - "grad_norm": 0.0017497050575911999, - "learning_rate": 0.00019999636618304366, - "loss": 46.0, - "step": 35502 - }, - { - "epoch": 2.714452281285242, - "grad_norm": 0.0019058617763221264, - "learning_rate": 0.0001999963659782738, - "loss": 46.0, - "step": 35503 - }, - { - "epoch": 2.7145287382686316, - "grad_norm": 0.001119736349210143, - "learning_rate": 0.00019999636577349816, - "loss": 46.0, - "step": 35504 - }, - { - "epoch": 2.7146051952520214, - "grad_norm": 0.0005977515247650445, - "learning_rate": 0.00019999636556871674, - "loss": 46.0, - "step": 35505 - }, - { - "epoch": 2.714681652235411, - "grad_norm": 0.0005228149238973856, - "learning_rate": 0.0001999963653639296, - "loss": 46.0, - "step": 35506 - }, - { - "epoch": 2.714758109218801, - "grad_norm": 0.0025001182220876217, - "learning_rate": 0.00019999636515913663, - "loss": 46.0, - "step": 35507 - }, - { - "epoch": 2.7148345662021907, - "grad_norm": 0.0024831844493746758, - "learning_rate": 0.00019999636495433788, - "loss": 46.0, - "step": 35508 - }, - { - "epoch": 2.7149110231855804, - "grad_norm": 0.0020419922657310963, - "learning_rate": 0.0001999963647495334, - "loss": 46.0, - "step": 35509 - }, - { - "epoch": 2.71498748016897, - "grad_norm": 0.0034890954848378897, - "learning_rate": 0.00019999636454472316, - "loss": 46.0, - "step": 35510 - }, - { - "epoch": 2.7150639371523595, - "grad_norm": 0.0022106829565018415, - "learning_rate": 0.00019999636433990715, - "loss": 46.0, - "step": 35511 - }, - { - "epoch": 2.7151403941357493, - "grad_norm": 0.0013709880877286196, - "learning_rate": 0.00019999636413508537, - "loss": 46.0, - "step": 35512 - }, - { - "epoch": 2.715216851119139, - "grad_norm": 0.001358649111352861, - "learning_rate": 0.00019999636393025778, - "loss": 46.0, - "step": 35513 - }, - { - "epoch": 2.715293308102529, - "grad_norm": 0.004970632493495941, - "learning_rate": 0.00019999636372542443, - "loss": 46.0, - "step": 35514 - }, - { - "epoch": 2.7153697650859185, - "grad_norm": 0.0006761665572412312, - "learning_rate": 0.00019999636352058535, - "loss": 46.0, - "step": 35515 - }, - { - "epoch": 2.7154462220693083, - "grad_norm": 0.002757573500275612, - "learning_rate": 0.00019999636331574045, - "loss": 46.0, - "step": 35516 - }, - { - "epoch": 2.715522679052698, - "grad_norm": 0.001515775453299284, - "learning_rate": 0.00019999636311088983, - "loss": 46.0, - "step": 35517 - }, - { - "epoch": 2.715599136036088, - "grad_norm": 0.0017731004627421498, - "learning_rate": 0.00019999636290603343, - "loss": 46.0, - "step": 35518 - }, - { - "epoch": 2.715675593019477, - "grad_norm": 0.0014121552230790257, - "learning_rate": 0.00019999636270117124, - "loss": 46.0, - "step": 35519 - }, - { - "epoch": 2.715752050002867, - "grad_norm": 0.0013069083215668797, - "learning_rate": 0.0001999963624963033, - "loss": 46.0, - "step": 35520 - }, - { - "epoch": 2.7158285069862567, - "grad_norm": 0.0019986287225037813, - "learning_rate": 0.00019999636229142958, - "loss": 46.0, - "step": 35521 - }, - { - "epoch": 2.7159049639696464, - "grad_norm": 0.000965594663284719, - "learning_rate": 0.0001999963620865501, - "loss": 46.0, - "step": 35522 - }, - { - "epoch": 2.715981420953036, - "grad_norm": 0.0017062557162716985, - "learning_rate": 0.00019999636188166485, - "loss": 46.0, - "step": 35523 - }, - { - "epoch": 2.716057877936426, - "grad_norm": 0.001205394510179758, - "learning_rate": 0.0001999963616767738, - "loss": 46.0, - "step": 35524 - }, - { - "epoch": 2.7161343349198157, - "grad_norm": 0.0023399549536406994, - "learning_rate": 0.00019999636147187698, - "loss": 46.0, - "step": 35525 - }, - { - "epoch": 2.7162107919032055, - "grad_norm": 0.0017552935751155019, - "learning_rate": 0.00019999636126697445, - "loss": 46.0, - "step": 35526 - }, - { - "epoch": 2.7162872488865952, - "grad_norm": 0.0023944140411913395, - "learning_rate": 0.0001999963610620661, - "loss": 46.0, - "step": 35527 - }, - { - "epoch": 2.716363705869985, - "grad_norm": 0.002572909463196993, - "learning_rate": 0.00019999636085715202, - "loss": 46.0, - "step": 35528 - }, - { - "epoch": 2.7164401628533748, - "grad_norm": 0.0019933246076107025, - "learning_rate": 0.00019999636065223215, - "loss": 46.0, - "step": 35529 - }, - { - "epoch": 2.7165166198367645, - "grad_norm": 0.0023498532827943563, - "learning_rate": 0.0001999963604473065, - "loss": 46.0, - "step": 35530 - }, - { - "epoch": 2.7165930768201543, - "grad_norm": 0.0037701076362282038, - "learning_rate": 0.00019999636024237508, - "loss": 46.0, - "step": 35531 - }, - { - "epoch": 2.716669533803544, - "grad_norm": 0.0011174597311764956, - "learning_rate": 0.00019999636003743788, - "loss": 46.0, - "step": 35532 - }, - { - "epoch": 2.7167459907869334, - "grad_norm": 0.0029042684473097324, - "learning_rate": 0.00019999635983249496, - "loss": 46.0, - "step": 35533 - }, - { - "epoch": 2.716822447770323, - "grad_norm": 0.003120404202491045, - "learning_rate": 0.00019999635962754625, - "loss": 46.0, - "step": 35534 - }, - { - "epoch": 2.716898904753713, - "grad_norm": 0.0019677034579217434, - "learning_rate": 0.00019999635942259176, - "loss": 46.0, - "step": 35535 - }, - { - "epoch": 2.7169753617371026, - "grad_norm": 0.002027340931817889, - "learning_rate": 0.0001999963592176315, - "loss": 46.0, - "step": 35536 - }, - { - "epoch": 2.7170518187204924, - "grad_norm": 0.0013542985543608665, - "learning_rate": 0.00019999635901266546, - "loss": 46.0, - "step": 35537 - }, - { - "epoch": 2.717128275703882, - "grad_norm": 0.002734426874667406, - "learning_rate": 0.00019999635880769368, - "loss": 46.0, - "step": 35538 - }, - { - "epoch": 2.717204732687272, - "grad_norm": 0.0033826292492449284, - "learning_rate": 0.0001999963586027161, - "loss": 46.0, - "step": 35539 - }, - { - "epoch": 2.7172811896706617, - "grad_norm": 0.001405687304213643, - "learning_rate": 0.00019999635839773277, - "loss": 46.0, - "step": 35540 - }, - { - "epoch": 2.717357646654051, - "grad_norm": 0.003008811268955469, - "learning_rate": 0.00019999635819274367, - "loss": 46.0, - "step": 35541 - }, - { - "epoch": 2.7174341036374408, - "grad_norm": 0.0008095461525954306, - "learning_rate": 0.0001999963579877488, - "loss": 46.0, - "step": 35542 - }, - { - "epoch": 2.7175105606208305, - "grad_norm": 0.003856959054246545, - "learning_rate": 0.00019999635778274817, - "loss": 46.0, - "step": 35543 - }, - { - "epoch": 2.7175870176042203, - "grad_norm": 0.0023096930235624313, - "learning_rate": 0.00019999635757774175, - "loss": 46.0, - "step": 35544 - }, - { - "epoch": 2.71766347458761, - "grad_norm": 0.0013384120538830757, - "learning_rate": 0.00019999635737272958, - "loss": 46.0, - "step": 35545 - }, - { - "epoch": 2.717739931571, - "grad_norm": 0.001882214448414743, - "learning_rate": 0.0001999963571677116, - "loss": 46.0, - "step": 35546 - }, - { - "epoch": 2.7178163885543896, - "grad_norm": 0.0015160755719989538, - "learning_rate": 0.0001999963569626879, - "loss": 46.0, - "step": 35547 - }, - { - "epoch": 2.7178928455377793, - "grad_norm": 0.0011680960888043046, - "learning_rate": 0.0001999963567576584, - "loss": 46.0, - "step": 35548 - }, - { - "epoch": 2.717969302521169, - "grad_norm": 0.002470925450325012, - "learning_rate": 0.00019999635655262315, - "loss": 46.0, - "step": 35549 - }, - { - "epoch": 2.718045759504559, - "grad_norm": 0.0013076576869934797, - "learning_rate": 0.00019999635634758214, - "loss": 46.0, - "step": 35550 - }, - { - "epoch": 2.7181222164879486, - "grad_norm": 0.003878205083310604, - "learning_rate": 0.00019999635614253533, - "loss": 46.0, - "step": 35551 - }, - { - "epoch": 2.7181986734713384, - "grad_norm": 0.006067465990781784, - "learning_rate": 0.00019999635593748275, - "loss": 46.0, - "step": 35552 - }, - { - "epoch": 2.718275130454728, - "grad_norm": 0.0033899592235684395, - "learning_rate": 0.00019999635573242442, - "loss": 46.0, - "step": 35553 - }, - { - "epoch": 2.718351587438118, - "grad_norm": 0.0023160893470048904, - "learning_rate": 0.00019999635552736035, - "loss": 46.0, - "step": 35554 - }, - { - "epoch": 2.718428044421507, - "grad_norm": 0.0024210549890995026, - "learning_rate": 0.00019999635532229047, - "loss": 46.0, - "step": 35555 - }, - { - "epoch": 2.718504501404897, - "grad_norm": 0.0027642895001918077, - "learning_rate": 0.00019999635511721482, - "loss": 46.0, - "step": 35556 - }, - { - "epoch": 2.7185809583882867, - "grad_norm": 0.002420208416879177, - "learning_rate": 0.0001999963549121334, - "loss": 46.0, - "step": 35557 - }, - { - "epoch": 2.7186574153716765, - "grad_norm": 0.0031396544072777033, - "learning_rate": 0.00019999635470704623, - "loss": 46.0, - "step": 35558 - }, - { - "epoch": 2.7187338723550663, - "grad_norm": 0.0014422963140532374, - "learning_rate": 0.0001999963545019533, - "loss": 46.0, - "step": 35559 - }, - { - "epoch": 2.718810329338456, - "grad_norm": 0.0010809110244736075, - "learning_rate": 0.00019999635429685455, - "loss": 46.0, - "step": 35560 - }, - { - "epoch": 2.7188867863218458, - "grad_norm": 0.007205615285784006, - "learning_rate": 0.0001999963540917501, - "loss": 46.0, - "step": 35561 - }, - { - "epoch": 2.7189632433052355, - "grad_norm": 0.0031048774253576994, - "learning_rate": 0.00019999635388663983, - "loss": 46.0, - "step": 35562 - }, - { - "epoch": 2.719039700288625, - "grad_norm": 0.001426363829523325, - "learning_rate": 0.0001999963536815238, - "loss": 46.0, - "step": 35563 - }, - { - "epoch": 2.7191161572720146, - "grad_norm": 0.006565196439623833, - "learning_rate": 0.00019999635347640199, - "loss": 46.0, - "step": 35564 - }, - { - "epoch": 2.7191926142554044, - "grad_norm": 0.0015313653275370598, - "learning_rate": 0.00019999635327127443, - "loss": 46.0, - "step": 35565 - }, - { - "epoch": 2.719269071238794, - "grad_norm": 0.004134522285312414, - "learning_rate": 0.0001999963530661411, - "loss": 46.0, - "step": 35566 - }, - { - "epoch": 2.719345528222184, - "grad_norm": 0.001232789596542716, - "learning_rate": 0.000199996352861002, - "loss": 46.0, - "step": 35567 - }, - { - "epoch": 2.7194219852055737, - "grad_norm": 0.0021813849452883005, - "learning_rate": 0.00019999635265585713, - "loss": 46.0, - "step": 35568 - }, - { - "epoch": 2.7194984421889634, - "grad_norm": 0.0008139744168147445, - "learning_rate": 0.00019999635245070648, - "loss": 46.0, - "step": 35569 - }, - { - "epoch": 2.719574899172353, - "grad_norm": 0.005653867963701487, - "learning_rate": 0.00019999635224555008, - "loss": 46.0, - "step": 35570 - }, - { - "epoch": 2.719651356155743, - "grad_norm": 0.0007541353115811944, - "learning_rate": 0.0001999963520403879, - "loss": 46.0, - "step": 35571 - }, - { - "epoch": 2.7197278131391327, - "grad_norm": 0.0018454411765560508, - "learning_rate": 0.00019999635183521995, - "loss": 46.0, - "step": 35572 - }, - { - "epoch": 2.7198042701225225, - "grad_norm": 0.002281837398186326, - "learning_rate": 0.0001999963516300462, - "loss": 46.0, - "step": 35573 - }, - { - "epoch": 2.719880727105912, - "grad_norm": 0.0009177671163342893, - "learning_rate": 0.00019999635142486675, - "loss": 46.0, - "step": 35574 - }, - { - "epoch": 2.719957184089302, - "grad_norm": 0.003966895863413811, - "learning_rate": 0.00019999635121968149, - "loss": 46.0, - "step": 35575 - }, - { - "epoch": 2.7200336410726917, - "grad_norm": 0.0015515388222411275, - "learning_rate": 0.00019999635101449045, - "loss": 46.0, - "step": 35576 - }, - { - "epoch": 2.720110098056081, - "grad_norm": 0.002380692632868886, - "learning_rate": 0.00019999635080929364, - "loss": 46.0, - "step": 35577 - }, - { - "epoch": 2.720186555039471, - "grad_norm": 0.0020098290406167507, - "learning_rate": 0.0001999963506040911, - "loss": 46.0, - "step": 35578 - }, - { - "epoch": 2.7202630120228606, - "grad_norm": 0.0036084617022424936, - "learning_rate": 0.00019999635039888276, - "loss": 46.0, - "step": 35579 - }, - { - "epoch": 2.7203394690062503, - "grad_norm": 0.001635113381780684, - "learning_rate": 0.00019999635019366866, - "loss": 46.0, - "step": 35580 - }, - { - "epoch": 2.72041592598964, - "grad_norm": 0.0009914236143231392, - "learning_rate": 0.00019999634998844879, - "loss": 46.0, - "step": 35581 - }, - { - "epoch": 2.72049238297303, - "grad_norm": 0.0019363588653504848, - "learning_rate": 0.00019999634978322314, - "loss": 46.0, - "step": 35582 - }, - { - "epoch": 2.7205688399564196, - "grad_norm": 0.005446561146527529, - "learning_rate": 0.00019999634957799174, - "loss": 46.0, - "step": 35583 - }, - { - "epoch": 2.7206452969398094, - "grad_norm": 0.001541463891044259, - "learning_rate": 0.00019999634937275455, - "loss": 46.0, - "step": 35584 - }, - { - "epoch": 2.7207217539231987, - "grad_norm": 0.004012261051684618, - "learning_rate": 0.0001999963491675116, - "loss": 46.0, - "step": 35585 - }, - { - "epoch": 2.7207982109065885, - "grad_norm": 0.0019922463688999414, - "learning_rate": 0.0001999963489622629, - "loss": 46.0, - "step": 35586 - }, - { - "epoch": 2.7208746678899782, - "grad_norm": 0.003240313148126006, - "learning_rate": 0.00019999634875700838, - "loss": 46.0, - "step": 35587 - }, - { - "epoch": 2.720951124873368, - "grad_norm": 0.0028779287822544575, - "learning_rate": 0.00019999634855174815, - "loss": 46.0, - "step": 35588 - }, - { - "epoch": 2.7210275818567577, - "grad_norm": 0.00312438840046525, - "learning_rate": 0.0001999963483464821, - "loss": 46.0, - "step": 35589 - }, - { - "epoch": 2.7211040388401475, - "grad_norm": 0.0009869623463600874, - "learning_rate": 0.0001999963481412103, - "loss": 46.0, - "step": 35590 - }, - { - "epoch": 2.7211804958235373, - "grad_norm": 0.0036377054639160633, - "learning_rate": 0.00019999634793593275, - "loss": 46.0, - "step": 35591 - }, - { - "epoch": 2.721256952806927, - "grad_norm": 0.0024209481198340654, - "learning_rate": 0.00019999634773064942, - "loss": 46.0, - "step": 35592 - }, - { - "epoch": 2.721333409790317, - "grad_norm": 0.0050981780514121056, - "learning_rate": 0.00019999634752536032, - "loss": 46.0, - "step": 35593 - }, - { - "epoch": 2.7214098667737066, - "grad_norm": 0.0013969967840239406, - "learning_rate": 0.00019999634732006545, - "loss": 46.0, - "step": 35594 - }, - { - "epoch": 2.7214863237570963, - "grad_norm": 0.004011660348623991, - "learning_rate": 0.0001999963471147648, - "loss": 46.0, - "step": 35595 - }, - { - "epoch": 2.721562780740486, - "grad_norm": 0.002396687399595976, - "learning_rate": 0.00019999634690945838, - "loss": 46.0, - "step": 35596 - }, - { - "epoch": 2.721639237723876, - "grad_norm": 0.0010593049228191376, - "learning_rate": 0.00019999634670414621, - "loss": 46.0, - "step": 35597 - }, - { - "epoch": 2.7217156947072656, - "grad_norm": 0.0007295301184058189, - "learning_rate": 0.00019999634649882825, - "loss": 46.0, - "step": 35598 - }, - { - "epoch": 2.721792151690655, - "grad_norm": 0.0017183547606691718, - "learning_rate": 0.00019999634629350453, - "loss": 46.0, - "step": 35599 - }, - { - "epoch": 2.7218686086740447, - "grad_norm": 0.002091650851070881, - "learning_rate": 0.00019999634608817505, - "loss": 46.0, - "step": 35600 - }, - { - "epoch": 2.7219450656574344, - "grad_norm": 0.0021779905073344707, - "learning_rate": 0.00019999634588283979, - "loss": 46.0, - "step": 35601 - }, - { - "epoch": 2.722021522640824, - "grad_norm": 0.003952302038669586, - "learning_rate": 0.00019999634567749875, - "loss": 46.0, - "step": 35602 - }, - { - "epoch": 2.722097979624214, - "grad_norm": 0.0035736628342419863, - "learning_rate": 0.00019999634547215197, - "loss": 46.0, - "step": 35603 - }, - { - "epoch": 2.7221744366076037, - "grad_norm": 0.014075192622840405, - "learning_rate": 0.0001999963452667994, - "loss": 46.0, - "step": 35604 - }, - { - "epoch": 2.7222508935909935, - "grad_norm": 0.003875596448779106, - "learning_rate": 0.00019999634506144106, - "loss": 46.0, - "step": 35605 - }, - { - "epoch": 2.7223273505743832, - "grad_norm": 0.012529782019555569, - "learning_rate": 0.00019999634485607696, - "loss": 46.0, - "step": 35606 - }, - { - "epoch": 2.7224038075577726, - "grad_norm": 0.0012497693533077836, - "learning_rate": 0.0001999963446507071, - "loss": 46.0, - "step": 35607 - }, - { - "epoch": 2.7224802645411623, - "grad_norm": 0.0019326459150761366, - "learning_rate": 0.00019999634444533144, - "loss": 46.0, - "step": 35608 - }, - { - "epoch": 2.722556721524552, - "grad_norm": 0.0034439931623637676, - "learning_rate": 0.00019999634423995005, - "loss": 46.0, - "step": 35609 - }, - { - "epoch": 2.722633178507942, - "grad_norm": 0.0005621527088806033, - "learning_rate": 0.00019999634403456286, - "loss": 46.0, - "step": 35610 - }, - { - "epoch": 2.7227096354913316, - "grad_norm": 0.0011200627777725458, - "learning_rate": 0.00019999634382916992, - "loss": 46.0, - "step": 35611 - }, - { - "epoch": 2.7227860924747214, - "grad_norm": 0.0025317443069070578, - "learning_rate": 0.0001999963436237712, - "loss": 46.0, - "step": 35612 - }, - { - "epoch": 2.722862549458111, - "grad_norm": 0.008220239542424679, - "learning_rate": 0.00019999634341836672, - "loss": 46.0, - "step": 35613 - }, - { - "epoch": 2.722939006441501, - "grad_norm": 0.0037902274634689093, - "learning_rate": 0.00019999634321295643, - "loss": 46.0, - "step": 35614 - }, - { - "epoch": 2.7230154634248906, - "grad_norm": 0.0019833194091916084, - "learning_rate": 0.00019999634300754042, - "loss": 46.0, - "step": 35615 - }, - { - "epoch": 2.7230919204082804, - "grad_norm": 0.0006837555556558073, - "learning_rate": 0.00019999634280211864, - "loss": 46.0, - "step": 35616 - }, - { - "epoch": 2.72316837739167, - "grad_norm": 0.004103212617337704, - "learning_rate": 0.00019999634259669103, - "loss": 46.0, - "step": 35617 - }, - { - "epoch": 2.72324483437506, - "grad_norm": 0.0024128654040396214, - "learning_rate": 0.00019999634239125774, - "loss": 46.0, - "step": 35618 - }, - { - "epoch": 2.7233212913584497, - "grad_norm": 0.001528444467112422, - "learning_rate": 0.0001999963421858186, - "loss": 46.0, - "step": 35619 - }, - { - "epoch": 2.723397748341839, - "grad_norm": 0.0012990866089239717, - "learning_rate": 0.00019999634198037373, - "loss": 46.0, - "step": 35620 - }, - { - "epoch": 2.7234742053252288, - "grad_norm": 0.0012682609958574176, - "learning_rate": 0.00019999634177492312, - "loss": 46.0, - "step": 35621 - }, - { - "epoch": 2.7235506623086185, - "grad_norm": 0.004261441994458437, - "learning_rate": 0.0001999963415694667, - "loss": 46.0, - "step": 35622 - }, - { - "epoch": 2.7236271192920083, - "grad_norm": 0.0017196964472532272, - "learning_rate": 0.0001999963413640045, - "loss": 46.0, - "step": 35623 - }, - { - "epoch": 2.723703576275398, - "grad_norm": 0.0011589573696255684, - "learning_rate": 0.00019999634115853656, - "loss": 46.0, - "step": 35624 - }, - { - "epoch": 2.723780033258788, - "grad_norm": 0.001352710765786469, - "learning_rate": 0.00019999634095306282, - "loss": 46.0, - "step": 35625 - }, - { - "epoch": 2.7238564902421776, - "grad_norm": 0.0031290026381611824, - "learning_rate": 0.00019999634074758336, - "loss": 46.0, - "step": 35626 - }, - { - "epoch": 2.7239329472255673, - "grad_norm": 0.001416188315488398, - "learning_rate": 0.0001999963405420981, - "loss": 46.0, - "step": 35627 - }, - { - "epoch": 2.724009404208957, - "grad_norm": 0.002246958203613758, - "learning_rate": 0.00019999634033660707, - "loss": 46.0, - "step": 35628 - }, - { - "epoch": 2.7240858611923464, - "grad_norm": 0.0038429428823292255, - "learning_rate": 0.0001999963401311103, - "loss": 46.0, - "step": 35629 - }, - { - "epoch": 2.724162318175736, - "grad_norm": 0.005536423996090889, - "learning_rate": 0.0001999963399256077, - "loss": 46.0, - "step": 35630 - }, - { - "epoch": 2.724238775159126, - "grad_norm": 0.003033737652003765, - "learning_rate": 0.00019999633972009936, - "loss": 46.0, - "step": 35631 - }, - { - "epoch": 2.7243152321425157, - "grad_norm": 0.0022738086991012096, - "learning_rate": 0.00019999633951458528, - "loss": 46.0, - "step": 35632 - }, - { - "epoch": 2.7243916891259055, - "grad_norm": 0.0040948837995529175, - "learning_rate": 0.0001999963393090654, - "loss": 46.0, - "step": 35633 - }, - { - "epoch": 2.724468146109295, - "grad_norm": 0.0018557875882834196, - "learning_rate": 0.00019999633910353976, - "loss": 46.0, - "step": 35634 - }, - { - "epoch": 2.724544603092685, - "grad_norm": 0.002667769556865096, - "learning_rate": 0.00019999633889800834, - "loss": 46.0, - "step": 35635 - }, - { - "epoch": 2.7246210600760747, - "grad_norm": 0.001529740751720965, - "learning_rate": 0.00019999633869247115, - "loss": 46.0, - "step": 35636 - }, - { - "epoch": 2.7246975170594645, - "grad_norm": 0.0015548748197034001, - "learning_rate": 0.0001999963384869282, - "loss": 46.0, - "step": 35637 - }, - { - "epoch": 2.7247739740428543, - "grad_norm": 0.0031659023370593786, - "learning_rate": 0.0001999963382813795, - "loss": 46.0, - "step": 35638 - }, - { - "epoch": 2.724850431026244, - "grad_norm": 0.005182690918445587, - "learning_rate": 0.000199996338075825, - "loss": 46.0, - "step": 35639 - }, - { - "epoch": 2.724926888009634, - "grad_norm": 0.0036581619642674923, - "learning_rate": 0.00019999633787026475, - "loss": 46.0, - "step": 35640 - }, - { - "epoch": 2.7250033449930235, - "grad_norm": 0.00366486725397408, - "learning_rate": 0.00019999633766469872, - "loss": 46.0, - "step": 35641 - }, - { - "epoch": 2.725079801976413, - "grad_norm": 0.0017780952621251345, - "learning_rate": 0.00019999633745912694, - "loss": 46.0, - "step": 35642 - }, - { - "epoch": 2.7251562589598026, - "grad_norm": 0.0021709902212023735, - "learning_rate": 0.00019999633725354936, - "loss": 46.0, - "step": 35643 - }, - { - "epoch": 2.7252327159431924, - "grad_norm": 0.0026098215021193027, - "learning_rate": 0.000199996337047966, - "loss": 46.0, - "step": 35644 - }, - { - "epoch": 2.725309172926582, - "grad_norm": 0.003806529100984335, - "learning_rate": 0.0001999963368423769, - "loss": 46.0, - "step": 35645 - }, - { - "epoch": 2.725385629909972, - "grad_norm": 0.004472440108656883, - "learning_rate": 0.00019999633663678204, - "loss": 46.0, - "step": 35646 - }, - { - "epoch": 2.7254620868933617, - "grad_norm": 0.0022441651672124863, - "learning_rate": 0.0001999963364311814, - "loss": 46.0, - "step": 35647 - }, - { - "epoch": 2.7255385438767514, - "grad_norm": 0.0036506294272840023, - "learning_rate": 0.000199996336225575, - "loss": 46.0, - "step": 35648 - }, - { - "epoch": 2.725615000860141, - "grad_norm": 0.001735470024868846, - "learning_rate": 0.0001999963360199628, - "loss": 46.0, - "step": 35649 - }, - { - "epoch": 2.7256914578435305, - "grad_norm": 0.0015078134601935744, - "learning_rate": 0.00019999633581434484, - "loss": 46.0, - "step": 35650 - }, - { - "epoch": 2.7257679148269203, - "grad_norm": 0.0022744243033230305, - "learning_rate": 0.00019999633560872115, - "loss": 46.0, - "step": 35651 - }, - { - "epoch": 2.72584437181031, - "grad_norm": 0.0013780840672552586, - "learning_rate": 0.00019999633540309164, - "loss": 46.0, - "step": 35652 - }, - { - "epoch": 2.7259208287937, - "grad_norm": 0.0014011393068358302, - "learning_rate": 0.0001999963351974564, - "loss": 46.0, - "step": 35653 - }, - { - "epoch": 2.7259972857770896, - "grad_norm": 0.0027522617019712925, - "learning_rate": 0.00019999633499181538, - "loss": 46.0, - "step": 35654 - }, - { - "epoch": 2.7260737427604793, - "grad_norm": 0.003042238298803568, - "learning_rate": 0.00019999633478616857, - "loss": 46.0, - "step": 35655 - }, - { - "epoch": 2.726150199743869, - "grad_norm": 0.0026717178989201784, - "learning_rate": 0.000199996334580516, - "loss": 46.0, - "step": 35656 - }, - { - "epoch": 2.726226656727259, - "grad_norm": 0.001669479301199317, - "learning_rate": 0.00019999633437485766, - "loss": 46.0, - "step": 35657 - }, - { - "epoch": 2.7263031137106486, - "grad_norm": 0.000637056480627507, - "learning_rate": 0.00019999633416919357, - "loss": 46.0, - "step": 35658 - }, - { - "epoch": 2.7263795706940384, - "grad_norm": 0.0011748549295589328, - "learning_rate": 0.0001999963339635237, - "loss": 46.0, - "step": 35659 - }, - { - "epoch": 2.726456027677428, - "grad_norm": 0.0032335759606212378, - "learning_rate": 0.00019999633375784805, - "loss": 46.0, - "step": 35660 - }, - { - "epoch": 2.726532484660818, - "grad_norm": 0.004441841039806604, - "learning_rate": 0.00019999633355216663, - "loss": 46.0, - "step": 35661 - }, - { - "epoch": 2.7266089416442076, - "grad_norm": 0.0007603495614603162, - "learning_rate": 0.00019999633334647946, - "loss": 46.0, - "step": 35662 - }, - { - "epoch": 2.7266853986275974, - "grad_norm": 0.002256447449326515, - "learning_rate": 0.0001999963331407865, - "loss": 46.0, - "step": 35663 - }, - { - "epoch": 2.7267618556109867, - "grad_norm": 0.0007049803389236331, - "learning_rate": 0.00019999633293508778, - "loss": 46.0, - "step": 35664 - }, - { - "epoch": 2.7268383125943765, - "grad_norm": 0.0009467173367738724, - "learning_rate": 0.0001999963327293833, - "loss": 46.0, - "step": 35665 - }, - { - "epoch": 2.7269147695777662, - "grad_norm": 0.0037316635716706514, - "learning_rate": 0.000199996332523673, - "loss": 46.0, - "step": 35666 - }, - { - "epoch": 2.726991226561156, - "grad_norm": 0.0007724087918177247, - "learning_rate": 0.000199996332317957, - "loss": 46.0, - "step": 35667 - }, - { - "epoch": 2.7270676835445458, - "grad_norm": 0.0012444661697372794, - "learning_rate": 0.0001999963321122352, - "loss": 46.0, - "step": 35668 - }, - { - "epoch": 2.7271441405279355, - "grad_norm": 0.002533516613766551, - "learning_rate": 0.00019999633190650765, - "loss": 46.0, - "step": 35669 - }, - { - "epoch": 2.7272205975113253, - "grad_norm": 0.003680545138195157, - "learning_rate": 0.0001999963317007743, - "loss": 46.0, - "step": 35670 - }, - { - "epoch": 2.727297054494715, - "grad_norm": 0.003232403425499797, - "learning_rate": 0.0001999963314950352, - "loss": 46.0, - "step": 35671 - }, - { - "epoch": 2.7273735114781044, - "grad_norm": 0.0034296144731342793, - "learning_rate": 0.00019999633128929033, - "loss": 46.0, - "step": 35672 - }, - { - "epoch": 2.727449968461494, - "grad_norm": 0.003555634757503867, - "learning_rate": 0.00019999633108353968, - "loss": 46.0, - "step": 35673 - }, - { - "epoch": 2.727526425444884, - "grad_norm": 0.00254733394831419, - "learning_rate": 0.0001999963308777833, - "loss": 46.0, - "step": 35674 - }, - { - "epoch": 2.7276028824282736, - "grad_norm": 0.00363207352347672, - "learning_rate": 0.00019999633067202107, - "loss": 46.0, - "step": 35675 - }, - { - "epoch": 2.7276793394116634, - "grad_norm": 0.0008738340111449361, - "learning_rate": 0.00019999633046625316, - "loss": 46.0, - "step": 35676 - }, - { - "epoch": 2.727755796395053, - "grad_norm": 0.0018527911743149161, - "learning_rate": 0.00019999633026047945, - "loss": 46.0, - "step": 35677 - }, - { - "epoch": 2.727832253378443, - "grad_norm": 0.0008666341891512275, - "learning_rate": 0.00019999633005469996, - "loss": 46.0, - "step": 35678 - }, - { - "epoch": 2.7279087103618327, - "grad_norm": 0.0027163878548890352, - "learning_rate": 0.00019999632984891468, - "loss": 46.0, - "step": 35679 - }, - { - "epoch": 2.7279851673452225, - "grad_norm": 0.001168629969470203, - "learning_rate": 0.00019999632964312364, - "loss": 46.0, - "step": 35680 - }, - { - "epoch": 2.728061624328612, - "grad_norm": 0.0011977845570072532, - "learning_rate": 0.00019999632943732687, - "loss": 46.0, - "step": 35681 - }, - { - "epoch": 2.728138081312002, - "grad_norm": 0.0009905926417559385, - "learning_rate": 0.0001999963292315243, - "loss": 46.0, - "step": 35682 - }, - { - "epoch": 2.7282145382953917, - "grad_norm": 0.0015684437239542603, - "learning_rate": 0.00019999632902571593, - "loss": 46.0, - "step": 35683 - }, - { - "epoch": 2.7282909952787815, - "grad_norm": 0.0016992834862321615, - "learning_rate": 0.00019999632881990184, - "loss": 46.0, - "step": 35684 - }, - { - "epoch": 2.7283674522621713, - "grad_norm": 0.0010718251578509808, - "learning_rate": 0.00019999632861408196, - "loss": 46.0, - "step": 35685 - }, - { - "epoch": 2.7284439092455606, - "grad_norm": 0.002412281697615981, - "learning_rate": 0.00019999632840825632, - "loss": 46.0, - "step": 35686 - }, - { - "epoch": 2.7285203662289503, - "grad_norm": 0.0036779099609702826, - "learning_rate": 0.0001999963282024249, - "loss": 46.0, - "step": 35687 - }, - { - "epoch": 2.72859682321234, - "grad_norm": 0.0032590029295533895, - "learning_rate": 0.00019999632799658774, - "loss": 46.0, - "step": 35688 - }, - { - "epoch": 2.72867328019573, - "grad_norm": 0.00938719604164362, - "learning_rate": 0.0001999963277907448, - "loss": 46.0, - "step": 35689 - }, - { - "epoch": 2.7287497371791196, - "grad_norm": 0.0025378817226737738, - "learning_rate": 0.00019999632758489609, - "loss": 46.0, - "step": 35690 - }, - { - "epoch": 2.7288261941625094, - "grad_norm": 0.0009176959283649921, - "learning_rate": 0.0001999963273790416, - "loss": 46.0, - "step": 35691 - }, - { - "epoch": 2.728902651145899, - "grad_norm": 0.007104227319359779, - "learning_rate": 0.00019999632717318134, - "loss": 46.0, - "step": 35692 - }, - { - "epoch": 2.728979108129289, - "grad_norm": 0.0013569480506703258, - "learning_rate": 0.00019999632696731528, - "loss": 46.0, - "step": 35693 - }, - { - "epoch": 2.729055565112678, - "grad_norm": 0.004530667793005705, - "learning_rate": 0.00019999632676144348, - "loss": 46.0, - "step": 35694 - }, - { - "epoch": 2.729132022096068, - "grad_norm": 0.0025318230036646128, - "learning_rate": 0.00019999632655556593, - "loss": 46.0, - "step": 35695 - }, - { - "epoch": 2.7292084790794577, - "grad_norm": 0.001357459113933146, - "learning_rate": 0.0001999963263496826, - "loss": 46.0, - "step": 35696 - }, - { - "epoch": 2.7292849360628475, - "grad_norm": 0.007906139828264713, - "learning_rate": 0.00019999632614379348, - "loss": 46.0, - "step": 35697 - }, - { - "epoch": 2.7293613930462373, - "grad_norm": 0.0061918217688798904, - "learning_rate": 0.0001999963259378986, - "loss": 46.0, - "step": 35698 - }, - { - "epoch": 2.729437850029627, - "grad_norm": 0.0012706361012533307, - "learning_rate": 0.000199996325731998, - "loss": 46.0, - "step": 35699 - }, - { - "epoch": 2.729514307013017, - "grad_norm": 0.0025306467432528734, - "learning_rate": 0.00019999632552609157, - "loss": 46.0, - "step": 35700 - }, - { - "epoch": 2.7295907639964065, - "grad_norm": 0.0007252030191011727, - "learning_rate": 0.00019999632532017938, - "loss": 46.0, - "step": 35701 - }, - { - "epoch": 2.7296672209797963, - "grad_norm": 0.0014977443497627974, - "learning_rate": 0.00019999632511426144, - "loss": 46.0, - "step": 35702 - }, - { - "epoch": 2.729743677963186, - "grad_norm": 0.010900103487074375, - "learning_rate": 0.0001999963249083377, - "loss": 46.0, - "step": 35703 - }, - { - "epoch": 2.729820134946576, - "grad_norm": 0.0011279555037617683, - "learning_rate": 0.00019999632470240825, - "loss": 46.0, - "step": 35704 - }, - { - "epoch": 2.7298965919299656, - "grad_norm": 0.0035279574804008007, - "learning_rate": 0.00019999632449647296, - "loss": 46.0, - "step": 35705 - }, - { - "epoch": 2.7299730489133553, - "grad_norm": 0.004021984990686178, - "learning_rate": 0.00019999632429053193, - "loss": 46.0, - "step": 35706 - }, - { - "epoch": 2.730049505896745, - "grad_norm": 0.0011964804725721478, - "learning_rate": 0.00019999632408458515, - "loss": 46.0, - "step": 35707 - }, - { - "epoch": 2.7301259628801344, - "grad_norm": 0.0013586652930825949, - "learning_rate": 0.00019999632387863257, - "loss": 46.0, - "step": 35708 - }, - { - "epoch": 2.730202419863524, - "grad_norm": 0.001162472297437489, - "learning_rate": 0.00019999632367267425, - "loss": 46.0, - "step": 35709 - }, - { - "epoch": 2.730278876846914, - "grad_norm": 0.001532992348074913, - "learning_rate": 0.00019999632346671012, - "loss": 46.0, - "step": 35710 - }, - { - "epoch": 2.7303553338303037, - "grad_norm": 0.0024481674190610647, - "learning_rate": 0.00019999632326074025, - "loss": 46.0, - "step": 35711 - }, - { - "epoch": 2.7304317908136935, - "grad_norm": 0.0020043975673615932, - "learning_rate": 0.0001999963230547646, - "loss": 46.0, - "step": 35712 - }, - { - "epoch": 2.7305082477970832, - "grad_norm": 0.0006835907697677612, - "learning_rate": 0.0001999963228487832, - "loss": 46.0, - "step": 35713 - }, - { - "epoch": 2.730584704780473, - "grad_norm": 0.0015180815244093537, - "learning_rate": 0.00019999632264279603, - "loss": 46.0, - "step": 35714 - }, - { - "epoch": 2.7306611617638628, - "grad_norm": 0.0008863538387231529, - "learning_rate": 0.00019999632243680306, - "loss": 46.0, - "step": 35715 - }, - { - "epoch": 2.730737618747252, - "grad_norm": 0.001259046490304172, - "learning_rate": 0.00019999632223080432, - "loss": 46.0, - "step": 35716 - }, - { - "epoch": 2.730814075730642, - "grad_norm": 0.0015540440799668431, - "learning_rate": 0.00019999632202479987, - "loss": 46.0, - "step": 35717 - }, - { - "epoch": 2.7308905327140316, - "grad_norm": 0.002205285709351301, - "learning_rate": 0.0001999963218187896, - "loss": 46.0, - "step": 35718 - }, - { - "epoch": 2.7309669896974214, - "grad_norm": 0.002824450610205531, - "learning_rate": 0.00019999632161277358, - "loss": 46.0, - "step": 35719 - }, - { - "epoch": 2.731043446680811, - "grad_norm": 0.002018020721152425, - "learning_rate": 0.00019999632140675177, - "loss": 46.0, - "step": 35720 - }, - { - "epoch": 2.731119903664201, - "grad_norm": 0.0010712331859394908, - "learning_rate": 0.0001999963212007242, - "loss": 46.0, - "step": 35721 - }, - { - "epoch": 2.7311963606475906, - "grad_norm": 0.0025074591394513845, - "learning_rate": 0.00019999632099469084, - "loss": 46.0, - "step": 35722 - }, - { - "epoch": 2.7312728176309804, - "grad_norm": 0.0017509774770587683, - "learning_rate": 0.00019999632078865175, - "loss": 46.0, - "step": 35723 - }, - { - "epoch": 2.73134927461437, - "grad_norm": 0.003326605772599578, - "learning_rate": 0.00019999632058260688, - "loss": 46.0, - "step": 35724 - }, - { - "epoch": 2.73142573159776, - "grad_norm": 0.003630370367318392, - "learning_rate": 0.00019999632037655626, - "loss": 46.0, - "step": 35725 - }, - { - "epoch": 2.7315021885811497, - "grad_norm": 0.0009780654218047857, - "learning_rate": 0.00019999632017049984, - "loss": 46.0, - "step": 35726 - }, - { - "epoch": 2.7315786455645394, - "grad_norm": 0.0018377938540652394, - "learning_rate": 0.00019999631996443765, - "loss": 46.0, - "step": 35727 - }, - { - "epoch": 2.731655102547929, - "grad_norm": 0.0017985814483836293, - "learning_rate": 0.00019999631975836969, - "loss": 46.0, - "step": 35728 - }, - { - "epoch": 2.731731559531319, - "grad_norm": 0.001997133484110236, - "learning_rate": 0.00019999631955229597, - "loss": 46.0, - "step": 35729 - }, - { - "epoch": 2.7318080165147083, - "grad_norm": 0.0013265230227261782, - "learning_rate": 0.00019999631934621646, - "loss": 46.0, - "step": 35730 - }, - { - "epoch": 2.731884473498098, - "grad_norm": 0.002895316341891885, - "learning_rate": 0.00019999631914013123, - "loss": 46.0, - "step": 35731 - }, - { - "epoch": 2.731960930481488, - "grad_norm": 0.001926775206811726, - "learning_rate": 0.0001999963189340402, - "loss": 46.0, - "step": 35732 - }, - { - "epoch": 2.7320373874648776, - "grad_norm": 0.0025911573320627213, - "learning_rate": 0.0001999963187279434, - "loss": 46.0, - "step": 35733 - }, - { - "epoch": 2.7321138444482673, - "grad_norm": 0.002382179256528616, - "learning_rate": 0.00019999631852184085, - "loss": 46.0, - "step": 35734 - }, - { - "epoch": 2.732190301431657, - "grad_norm": 0.0013867170782759786, - "learning_rate": 0.00019999631831573252, - "loss": 46.0, - "step": 35735 - }, - { - "epoch": 2.732266758415047, - "grad_norm": 0.0007950286963023245, - "learning_rate": 0.0001999963181096184, - "loss": 46.0, - "step": 35736 - }, - { - "epoch": 2.7323432153984366, - "grad_norm": 0.002047120127826929, - "learning_rate": 0.00019999631790349853, - "loss": 46.0, - "step": 35737 - }, - { - "epoch": 2.732419672381826, - "grad_norm": 0.00254746968857944, - "learning_rate": 0.00019999631769737289, - "loss": 46.0, - "step": 35738 - }, - { - "epoch": 2.7324961293652157, - "grad_norm": 0.0014278758317232132, - "learning_rate": 0.00019999631749124147, - "loss": 46.0, - "step": 35739 - }, - { - "epoch": 2.7325725863486054, - "grad_norm": 0.00474910531193018, - "learning_rate": 0.00019999631728510428, - "loss": 46.0, - "step": 35740 - }, - { - "epoch": 2.732649043331995, - "grad_norm": 0.002569854026660323, - "learning_rate": 0.00019999631707896134, - "loss": 46.0, - "step": 35741 - }, - { - "epoch": 2.732725500315385, - "grad_norm": 0.0026957355439662933, - "learning_rate": 0.0001999963168728126, - "loss": 46.0, - "step": 35742 - }, - { - "epoch": 2.7328019572987747, - "grad_norm": 0.002226717071607709, - "learning_rate": 0.00019999631666665812, - "loss": 46.0, - "step": 35743 - }, - { - "epoch": 2.7328784142821645, - "grad_norm": 0.0013536263722926378, - "learning_rate": 0.0001999963164604979, - "loss": 46.0, - "step": 35744 - }, - { - "epoch": 2.7329548712655543, - "grad_norm": 0.0018991974648088217, - "learning_rate": 0.00019999631625433183, - "loss": 46.0, - "step": 35745 - }, - { - "epoch": 2.733031328248944, - "grad_norm": 0.0013735548127442598, - "learning_rate": 0.00019999631604816003, - "loss": 46.0, - "step": 35746 - }, - { - "epoch": 2.7331077852323338, - "grad_norm": 0.0017134378431364894, - "learning_rate": 0.00019999631584198245, - "loss": 46.0, - "step": 35747 - }, - { - "epoch": 2.7331842422157235, - "grad_norm": 0.0024166374932974577, - "learning_rate": 0.00019999631563579913, - "loss": 46.0, - "step": 35748 - }, - { - "epoch": 2.7332606991991133, - "grad_norm": 0.0044545698910951614, - "learning_rate": 0.00019999631542961003, - "loss": 46.0, - "step": 35749 - }, - { - "epoch": 2.733337156182503, - "grad_norm": 0.0028468146920204163, - "learning_rate": 0.00019999631522341516, - "loss": 46.0, - "step": 35750 - }, - { - "epoch": 2.7334136131658924, - "grad_norm": 0.0013901748461648822, - "learning_rate": 0.00019999631501721452, - "loss": 46.0, - "step": 35751 - }, - { - "epoch": 2.733490070149282, - "grad_norm": 0.0034736236557364464, - "learning_rate": 0.0001999963148110081, - "loss": 46.0, - "step": 35752 - }, - { - "epoch": 2.733566527132672, - "grad_norm": 0.0017775128362700343, - "learning_rate": 0.00019999631460479594, - "loss": 46.0, - "step": 35753 - }, - { - "epoch": 2.7336429841160617, - "grad_norm": 0.003621581243351102, - "learning_rate": 0.00019999631439857795, - "loss": 46.0, - "step": 35754 - }, - { - "epoch": 2.7337194410994514, - "grad_norm": 0.0021192056592553854, - "learning_rate": 0.00019999631419235426, - "loss": 46.0, - "step": 35755 - }, - { - "epoch": 2.733795898082841, - "grad_norm": 0.0020271912217140198, - "learning_rate": 0.00019999631398612478, - "loss": 46.0, - "step": 35756 - }, - { - "epoch": 2.733872355066231, - "grad_norm": 0.0035420232452452183, - "learning_rate": 0.0001999963137798895, - "loss": 46.0, - "step": 35757 - }, - { - "epoch": 2.7339488120496207, - "grad_norm": 0.0020351996645331383, - "learning_rate": 0.00019999631357364847, - "loss": 46.0, - "step": 35758 - }, - { - "epoch": 2.7340252690330105, - "grad_norm": 0.003690814832225442, - "learning_rate": 0.00019999631336740166, - "loss": 46.0, - "step": 35759 - }, - { - "epoch": 2.7341017260164, - "grad_norm": 0.0050978162325918674, - "learning_rate": 0.0001999963131611491, - "loss": 46.0, - "step": 35760 - }, - { - "epoch": 2.7341781829997895, - "grad_norm": 0.0019029497634619474, - "learning_rate": 0.00019999631295489076, - "loss": 46.0, - "step": 35761 - }, - { - "epoch": 2.7342546399831793, - "grad_norm": 0.005722524598240852, - "learning_rate": 0.00019999631274862667, - "loss": 46.0, - "step": 35762 - }, - { - "epoch": 2.734331096966569, - "grad_norm": 0.0033656407613307238, - "learning_rate": 0.00019999631254235677, - "loss": 46.0, - "step": 35763 - }, - { - "epoch": 2.734407553949959, - "grad_norm": 0.0016470981063321233, - "learning_rate": 0.00019999631233608115, - "loss": 46.0, - "step": 35764 - }, - { - "epoch": 2.7344840109333486, - "grad_norm": 0.001653734128922224, - "learning_rate": 0.00019999631212979974, - "loss": 46.0, - "step": 35765 - }, - { - "epoch": 2.7345604679167383, - "grad_norm": 0.0025281873531639576, - "learning_rate": 0.00019999631192351255, - "loss": 46.0, - "step": 35766 - }, - { - "epoch": 2.734636924900128, - "grad_norm": 0.0008028540760278702, - "learning_rate": 0.0001999963117172196, - "loss": 46.0, - "step": 35767 - }, - { - "epoch": 2.734713381883518, - "grad_norm": 0.0012554824352264404, - "learning_rate": 0.00019999631151092087, - "loss": 46.0, - "step": 35768 - }, - { - "epoch": 2.7347898388669076, - "grad_norm": 0.0021011321805417538, - "learning_rate": 0.0001999963113046164, - "loss": 46.0, - "step": 35769 - }, - { - "epoch": 2.7348662958502974, - "grad_norm": 0.0006718956283293664, - "learning_rate": 0.00019999631109830613, - "loss": 46.0, - "step": 35770 - }, - { - "epoch": 2.734942752833687, - "grad_norm": 0.0012235450558364391, - "learning_rate": 0.0001999963108919901, - "loss": 46.0, - "step": 35771 - }, - { - "epoch": 2.735019209817077, - "grad_norm": 0.005615275353193283, - "learning_rate": 0.00019999631068566827, - "loss": 46.0, - "step": 35772 - }, - { - "epoch": 2.7350956668004662, - "grad_norm": 0.0024202633649110794, - "learning_rate": 0.00019999631047934072, - "loss": 46.0, - "step": 35773 - }, - { - "epoch": 2.735172123783856, - "grad_norm": 0.0009801515843719244, - "learning_rate": 0.0001999963102730074, - "loss": 46.0, - "step": 35774 - }, - { - "epoch": 2.7352485807672458, - "grad_norm": 0.0037219219375401735, - "learning_rate": 0.00019999631006666828, - "loss": 46.0, - "step": 35775 - }, - { - "epoch": 2.7353250377506355, - "grad_norm": 0.002658379962667823, - "learning_rate": 0.00019999630986032344, - "loss": 46.0, - "step": 35776 - }, - { - "epoch": 2.7354014947340253, - "grad_norm": 0.0014870022423565388, - "learning_rate": 0.00019999630965397277, - "loss": 46.0, - "step": 35777 - }, - { - "epoch": 2.735477951717415, - "grad_norm": 0.0005365445977076888, - "learning_rate": 0.00019999630944761635, - "loss": 46.0, - "step": 35778 - }, - { - "epoch": 2.735554408700805, - "grad_norm": 0.0019919397309422493, - "learning_rate": 0.0001999963092412542, - "loss": 46.0, - "step": 35779 - }, - { - "epoch": 2.7356308656841946, - "grad_norm": 0.004397986456751823, - "learning_rate": 0.00019999630903488622, - "loss": 46.0, - "step": 35780 - }, - { - "epoch": 2.735707322667584, - "grad_norm": 0.001296763657592237, - "learning_rate": 0.00019999630882851251, - "loss": 46.0, - "step": 35781 - }, - { - "epoch": 2.7357837796509736, - "grad_norm": 0.0016664611175656319, - "learning_rate": 0.000199996308622133, - "loss": 46.0, - "step": 35782 - }, - { - "epoch": 2.7358602366343634, - "grad_norm": 0.0016566052800044417, - "learning_rate": 0.00019999630841574778, - "loss": 46.0, - "step": 35783 - }, - { - "epoch": 2.735936693617753, - "grad_norm": 0.008721900172531605, - "learning_rate": 0.00019999630820935672, - "loss": 46.0, - "step": 35784 - }, - { - "epoch": 2.736013150601143, - "grad_norm": 0.0013912980211898685, - "learning_rate": 0.00019999630800295992, - "loss": 46.0, - "step": 35785 - }, - { - "epoch": 2.7360896075845327, - "grad_norm": 0.0019085319945588708, - "learning_rate": 0.00019999630779655737, - "loss": 46.0, - "step": 35786 - }, - { - "epoch": 2.7361660645679224, - "grad_norm": 0.002847634954378009, - "learning_rate": 0.00019999630759014902, - "loss": 46.0, - "step": 35787 - }, - { - "epoch": 2.736242521551312, - "grad_norm": 0.0035724679473787546, - "learning_rate": 0.00019999630738373492, - "loss": 46.0, - "step": 35788 - }, - { - "epoch": 2.736318978534702, - "grad_norm": 0.0010674335062503815, - "learning_rate": 0.00019999630717731502, - "loss": 46.0, - "step": 35789 - }, - { - "epoch": 2.7363954355180917, - "grad_norm": 0.002224615076556802, - "learning_rate": 0.0001999963069708894, - "loss": 46.0, - "step": 35790 - }, - { - "epoch": 2.7364718925014815, - "grad_norm": 0.0008803022210486233, - "learning_rate": 0.000199996306764458, - "loss": 46.0, - "step": 35791 - }, - { - "epoch": 2.7365483494848712, - "grad_norm": 0.0015376565279439092, - "learning_rate": 0.00019999630655802083, - "loss": 46.0, - "step": 35792 - }, - { - "epoch": 2.736624806468261, - "grad_norm": 0.0029344032518565655, - "learning_rate": 0.00019999630635157787, - "loss": 46.0, - "step": 35793 - }, - { - "epoch": 2.7367012634516508, - "grad_norm": 0.0033081460278481245, - "learning_rate": 0.00019999630614512913, - "loss": 46.0, - "step": 35794 - }, - { - "epoch": 2.73677772043504, - "grad_norm": 0.0015810366021469235, - "learning_rate": 0.00019999630593867465, - "loss": 46.0, - "step": 35795 - }, - { - "epoch": 2.73685417741843, - "grad_norm": 0.0069196452386677265, - "learning_rate": 0.00019999630573221437, - "loss": 46.0, - "step": 35796 - }, - { - "epoch": 2.7369306344018196, - "grad_norm": 0.003984841983765364, - "learning_rate": 0.00019999630552574834, - "loss": 46.0, - "step": 35797 - }, - { - "epoch": 2.7370070913852094, - "grad_norm": 0.0014060544781386852, - "learning_rate": 0.00019999630531927656, - "loss": 46.0, - "step": 35798 - }, - { - "epoch": 2.737083548368599, - "grad_norm": 0.006560854613780975, - "learning_rate": 0.00019999630511279899, - "loss": 46.0, - "step": 35799 - }, - { - "epoch": 2.737160005351989, - "grad_norm": 0.0018308509606868029, - "learning_rate": 0.00019999630490631564, - "loss": 46.0, - "step": 35800 - }, - { - "epoch": 2.7372364623353787, - "grad_norm": 0.007635123562067747, - "learning_rate": 0.00019999630469982654, - "loss": 46.0, - "step": 35801 - }, - { - "epoch": 2.7373129193187684, - "grad_norm": 0.0012488069478422403, - "learning_rate": 0.00019999630449333167, - "loss": 46.0, - "step": 35802 - }, - { - "epoch": 2.7373893763021577, - "grad_norm": 0.003487673122435808, - "learning_rate": 0.00019999630428683103, - "loss": 46.0, - "step": 35803 - }, - { - "epoch": 2.7374658332855475, - "grad_norm": 0.0012271342566236854, - "learning_rate": 0.00019999630408032461, - "loss": 46.0, - "step": 35804 - }, - { - "epoch": 2.7375422902689373, - "grad_norm": 0.0009661121875979006, - "learning_rate": 0.00019999630387381243, - "loss": 46.0, - "step": 35805 - }, - { - "epoch": 2.737618747252327, - "grad_norm": 0.0016262778080999851, - "learning_rate": 0.00019999630366729446, - "loss": 46.0, - "step": 35806 - }, - { - "epoch": 2.7376952042357168, - "grad_norm": 0.0008614902035333216, - "learning_rate": 0.00019999630346077075, - "loss": 46.0, - "step": 35807 - }, - { - "epoch": 2.7377716612191065, - "grad_norm": 0.0013567577116191387, - "learning_rate": 0.00019999630325424125, - "loss": 46.0, - "step": 35808 - }, - { - "epoch": 2.7378481182024963, - "grad_norm": 0.0010432898998260498, - "learning_rate": 0.000199996303047706, - "loss": 46.0, - "step": 35809 - }, - { - "epoch": 2.737924575185886, - "grad_norm": 0.005283687729388475, - "learning_rate": 0.00019999630284116496, - "loss": 46.0, - "step": 35810 - }, - { - "epoch": 2.738001032169276, - "grad_norm": 0.007014344446361065, - "learning_rate": 0.00019999630263461819, - "loss": 46.0, - "step": 35811 - }, - { - "epoch": 2.7380774891526656, - "grad_norm": 0.0013796740677207708, - "learning_rate": 0.00019999630242806558, - "loss": 46.0, - "step": 35812 - }, - { - "epoch": 2.7381539461360553, - "grad_norm": 0.0013678011018782854, - "learning_rate": 0.00019999630222150726, - "loss": 46.0, - "step": 35813 - }, - { - "epoch": 2.738230403119445, - "grad_norm": 0.0013025131775066257, - "learning_rate": 0.00019999630201494317, - "loss": 46.0, - "step": 35814 - }, - { - "epoch": 2.738306860102835, - "grad_norm": 0.0008969233022071421, - "learning_rate": 0.0001999963018083733, - "loss": 46.0, - "step": 35815 - }, - { - "epoch": 2.7383833170862246, - "grad_norm": 0.002112897578626871, - "learning_rate": 0.00019999630160179766, - "loss": 46.0, - "step": 35816 - }, - { - "epoch": 2.738459774069614, - "grad_norm": 0.0024056946858763695, - "learning_rate": 0.00019999630139521624, - "loss": 46.0, - "step": 35817 - }, - { - "epoch": 2.7385362310530037, - "grad_norm": 0.0019872456323355436, - "learning_rate": 0.00019999630118862905, - "loss": 46.0, - "step": 35818 - }, - { - "epoch": 2.7386126880363935, - "grad_norm": 0.002187522826716304, - "learning_rate": 0.0001999963009820361, - "loss": 46.0, - "step": 35819 - }, - { - "epoch": 2.7386891450197832, - "grad_norm": 0.0013820001622661948, - "learning_rate": 0.00019999630077543738, - "loss": 46.0, - "step": 35820 - }, - { - "epoch": 2.738765602003173, - "grad_norm": 0.003323413198813796, - "learning_rate": 0.00019999630056883287, - "loss": 46.0, - "step": 35821 - }, - { - "epoch": 2.7388420589865627, - "grad_norm": 0.0035491979215294123, - "learning_rate": 0.00019999630036222264, - "loss": 46.0, - "step": 35822 - }, - { - "epoch": 2.7389185159699525, - "grad_norm": 0.0008238217560574412, - "learning_rate": 0.0001999963001556066, - "loss": 46.0, - "step": 35823 - }, - { - "epoch": 2.7389949729533423, - "grad_norm": 0.002367644337937236, - "learning_rate": 0.00019999629994898479, - "loss": 46.0, - "step": 35824 - }, - { - "epoch": 2.7390714299367316, - "grad_norm": 0.002570437965914607, - "learning_rate": 0.00019999629974235724, - "loss": 46.0, - "step": 35825 - }, - { - "epoch": 2.7391478869201213, - "grad_norm": 0.0036188121885061264, - "learning_rate": 0.0001999962995357239, - "loss": 46.0, - "step": 35826 - }, - { - "epoch": 2.739224343903511, - "grad_norm": 0.0014184325700625777, - "learning_rate": 0.0001999962993290848, - "loss": 46.0, - "step": 35827 - }, - { - "epoch": 2.739300800886901, - "grad_norm": 0.0021532007958739996, - "learning_rate": 0.0001999962991224399, - "loss": 46.0, - "step": 35828 - }, - { - "epoch": 2.7393772578702906, - "grad_norm": 0.002907169982790947, - "learning_rate": 0.00019999629891578926, - "loss": 46.0, - "step": 35829 - }, - { - "epoch": 2.7394537148536804, - "grad_norm": 0.0019722990691661835, - "learning_rate": 0.00019999629870913287, - "loss": 46.0, - "step": 35830 - }, - { - "epoch": 2.73953017183707, - "grad_norm": 0.0025312057696282864, - "learning_rate": 0.00019999629850247068, - "loss": 46.0, - "step": 35831 - }, - { - "epoch": 2.73960662882046, - "grad_norm": 0.0027756057679653168, - "learning_rate": 0.00019999629829580272, - "loss": 46.0, - "step": 35832 - }, - { - "epoch": 2.7396830858038497, - "grad_norm": 0.0014693514676764607, - "learning_rate": 0.000199996298089129, - "loss": 46.0, - "step": 35833 - }, - { - "epoch": 2.7397595427872394, - "grad_norm": 0.002156902104616165, - "learning_rate": 0.0001999962978824495, - "loss": 46.0, - "step": 35834 - }, - { - "epoch": 2.739835999770629, - "grad_norm": 0.000835475861094892, - "learning_rate": 0.00019999629767576425, - "loss": 46.0, - "step": 35835 - }, - { - "epoch": 2.739912456754019, - "grad_norm": 0.00239433441311121, - "learning_rate": 0.00019999629746907322, - "loss": 46.0, - "step": 35836 - }, - { - "epoch": 2.7399889137374087, - "grad_norm": 0.003271217690780759, - "learning_rate": 0.00019999629726237642, - "loss": 46.0, - "step": 35837 - }, - { - "epoch": 2.7400653707207985, - "grad_norm": 0.0021858541294932365, - "learning_rate": 0.00019999629705567387, - "loss": 46.0, - "step": 35838 - }, - { - "epoch": 2.740141827704188, - "grad_norm": 0.0009301493410021067, - "learning_rate": 0.00019999629684896552, - "loss": 46.0, - "step": 35839 - }, - { - "epoch": 2.7402182846875776, - "grad_norm": 0.003283920930698514, - "learning_rate": 0.0001999962966422514, - "loss": 46.0, - "step": 35840 - }, - { - "epoch": 2.7402947416709673, - "grad_norm": 0.0007741856388747692, - "learning_rate": 0.00019999629643553153, - "loss": 46.0, - "step": 35841 - }, - { - "epoch": 2.740371198654357, - "grad_norm": 0.003037592861801386, - "learning_rate": 0.0001999962962288059, - "loss": 46.0, - "step": 35842 - }, - { - "epoch": 2.740447655637747, - "grad_norm": 0.0016240758122876287, - "learning_rate": 0.00019999629602207448, - "loss": 46.0, - "step": 35843 - }, - { - "epoch": 2.7405241126211366, - "grad_norm": 0.0018706904957070947, - "learning_rate": 0.0001999962958153373, - "loss": 46.0, - "step": 35844 - }, - { - "epoch": 2.7406005696045264, - "grad_norm": 0.0018460426945239305, - "learning_rate": 0.00019999629560859435, - "loss": 46.0, - "step": 35845 - }, - { - "epoch": 2.740677026587916, - "grad_norm": 0.0030594519339501858, - "learning_rate": 0.00019999629540184565, - "loss": 46.0, - "step": 35846 - }, - { - "epoch": 2.7407534835713054, - "grad_norm": 0.0038096415810287, - "learning_rate": 0.00019999629519509114, - "loss": 46.0, - "step": 35847 - }, - { - "epoch": 2.740829940554695, - "grad_norm": 0.002642523031681776, - "learning_rate": 0.00019999629498833086, - "loss": 46.0, - "step": 35848 - }, - { - "epoch": 2.740906397538085, - "grad_norm": 0.0019593259785324335, - "learning_rate": 0.00019999629478156483, - "loss": 46.0, - "step": 35849 - }, - { - "epoch": 2.7409828545214747, - "grad_norm": 0.0007764084148220718, - "learning_rate": 0.00019999629457479303, - "loss": 46.0, - "step": 35850 - }, - { - "epoch": 2.7410593115048645, - "grad_norm": 0.002579642226919532, - "learning_rate": 0.00019999629436801545, - "loss": 46.0, - "step": 35851 - }, - { - "epoch": 2.7411357684882542, - "grad_norm": 0.0026985201984643936, - "learning_rate": 0.00019999629416123213, - "loss": 46.0, - "step": 35852 - }, - { - "epoch": 2.741212225471644, - "grad_norm": 0.0014592364896088839, - "learning_rate": 0.00019999629395444304, - "loss": 46.0, - "step": 35853 - }, - { - "epoch": 2.7412886824550338, - "grad_norm": 0.0005410446901805699, - "learning_rate": 0.00019999629374764814, - "loss": 46.0, - "step": 35854 - }, - { - "epoch": 2.7413651394384235, - "grad_norm": 0.0033891620114445686, - "learning_rate": 0.00019999629354084753, - "loss": 46.0, - "step": 35855 - }, - { - "epoch": 2.7414415964218133, - "grad_norm": 0.008296642452478409, - "learning_rate": 0.00019999629333404109, - "loss": 46.0, - "step": 35856 - }, - { - "epoch": 2.741518053405203, - "grad_norm": 0.004144382197409868, - "learning_rate": 0.00019999629312722892, - "loss": 46.0, - "step": 35857 - }, - { - "epoch": 2.741594510388593, - "grad_norm": 0.0008417487842962146, - "learning_rate": 0.00019999629292041096, - "loss": 46.0, - "step": 35858 - }, - { - "epoch": 2.7416709673719826, - "grad_norm": 0.0037346736062318087, - "learning_rate": 0.00019999629271358723, - "loss": 46.0, - "step": 35859 - }, - { - "epoch": 2.7417474243553723, - "grad_norm": 0.0009178059990517795, - "learning_rate": 0.00019999629250675772, - "loss": 46.0, - "step": 35860 - }, - { - "epoch": 2.7418238813387616, - "grad_norm": 0.005763344932347536, - "learning_rate": 0.00019999629229992247, - "loss": 46.0, - "step": 35861 - }, - { - "epoch": 2.7419003383221514, - "grad_norm": 0.0063377609476447105, - "learning_rate": 0.00019999629209308147, - "loss": 46.0, - "step": 35862 - }, - { - "epoch": 2.741976795305541, - "grad_norm": 0.0018648783443495631, - "learning_rate": 0.00019999629188623467, - "loss": 46.0, - "step": 35863 - }, - { - "epoch": 2.742053252288931, - "grad_norm": 0.001431356300599873, - "learning_rate": 0.0001999962916793821, - "loss": 46.0, - "step": 35864 - }, - { - "epoch": 2.7421297092723207, - "grad_norm": 0.005612307693809271, - "learning_rate": 0.00019999629147252374, - "loss": 46.0, - "step": 35865 - }, - { - "epoch": 2.7422061662557105, - "grad_norm": 0.0014139526756480336, - "learning_rate": 0.00019999629126565965, - "loss": 46.0, - "step": 35866 - }, - { - "epoch": 2.7422826232391, - "grad_norm": 0.0016323266318067908, - "learning_rate": 0.00019999629105878976, - "loss": 46.0, - "step": 35867 - }, - { - "epoch": 2.74235908022249, - "grad_norm": 0.003030815627425909, - "learning_rate": 0.00019999629085191414, - "loss": 46.0, - "step": 35868 - }, - { - "epoch": 2.7424355372058793, - "grad_norm": 0.004062585532665253, - "learning_rate": 0.0001999962906450327, - "loss": 46.0, - "step": 35869 - }, - { - "epoch": 2.742511994189269, - "grad_norm": 0.0009527135989628732, - "learning_rate": 0.0001999962904381455, - "loss": 46.0, - "step": 35870 - }, - { - "epoch": 2.742588451172659, - "grad_norm": 0.0015048084314912558, - "learning_rate": 0.00019999629023125258, - "loss": 46.0, - "step": 35871 - }, - { - "epoch": 2.7426649081560486, - "grad_norm": 0.0022276260424405336, - "learning_rate": 0.00019999629002435384, - "loss": 46.0, - "step": 35872 - }, - { - "epoch": 2.7427413651394383, - "grad_norm": 0.0015374044887721539, - "learning_rate": 0.00019999628981744936, - "loss": 46.0, - "step": 35873 - }, - { - "epoch": 2.742817822122828, - "grad_norm": 0.0025957589969038963, - "learning_rate": 0.00019999628961053908, - "loss": 46.0, - "step": 35874 - }, - { - "epoch": 2.742894279106218, - "grad_norm": 0.0025320476852357388, - "learning_rate": 0.00019999628940362308, - "loss": 46.0, - "step": 35875 - }, - { - "epoch": 2.7429707360896076, - "grad_norm": 0.003992842510342598, - "learning_rate": 0.00019999628919670128, - "loss": 46.0, - "step": 35876 - }, - { - "epoch": 2.7430471930729974, - "grad_norm": 0.0008204219047911465, - "learning_rate": 0.00019999628898977368, - "loss": 46.0, - "step": 35877 - }, - { - "epoch": 2.743123650056387, - "grad_norm": 0.0045212795957922935, - "learning_rate": 0.0001999962887828404, - "loss": 46.0, - "step": 35878 - }, - { - "epoch": 2.743200107039777, - "grad_norm": 0.0022172306198626757, - "learning_rate": 0.00019999628857590124, - "loss": 46.0, - "step": 35879 - }, - { - "epoch": 2.7432765640231667, - "grad_norm": 0.0025309273041784763, - "learning_rate": 0.00019999628836895637, - "loss": 46.0, - "step": 35880 - }, - { - "epoch": 2.7433530210065564, - "grad_norm": 0.0032449280843138695, - "learning_rate": 0.00019999628816200576, - "loss": 46.0, - "step": 35881 - }, - { - "epoch": 2.7434294779899457, - "grad_norm": 0.0018204592633992434, - "learning_rate": 0.00019999628795504932, - "loss": 46.0, - "step": 35882 - }, - { - "epoch": 2.7435059349733355, - "grad_norm": 0.0017473356565460563, - "learning_rate": 0.00019999628774808713, - "loss": 46.0, - "step": 35883 - }, - { - "epoch": 2.7435823919567253, - "grad_norm": 0.0010288238991051912, - "learning_rate": 0.0001999962875411192, - "loss": 46.0, - "step": 35884 - }, - { - "epoch": 2.743658848940115, - "grad_norm": 0.0011149861384183168, - "learning_rate": 0.0001999962873341455, - "loss": 46.0, - "step": 35885 - }, - { - "epoch": 2.743735305923505, - "grad_norm": 0.0027040743734687567, - "learning_rate": 0.00019999628712716598, - "loss": 46.0, - "step": 35886 - }, - { - "epoch": 2.7438117629068945, - "grad_norm": 0.0010989997535943985, - "learning_rate": 0.0001999962869201807, - "loss": 46.0, - "step": 35887 - }, - { - "epoch": 2.7438882198902843, - "grad_norm": 0.003336984198540449, - "learning_rate": 0.00019999628671318968, - "loss": 46.0, - "step": 35888 - }, - { - "epoch": 2.743964676873674, - "grad_norm": 0.0012599349720403552, - "learning_rate": 0.00019999628650619288, - "loss": 46.0, - "step": 35889 - }, - { - "epoch": 2.744041133857064, - "grad_norm": 0.0048316605389118195, - "learning_rate": 0.00019999628629919033, - "loss": 46.0, - "step": 35890 - }, - { - "epoch": 2.744117590840453, - "grad_norm": 0.00418563699349761, - "learning_rate": 0.00019999628609218198, - "loss": 46.0, - "step": 35891 - }, - { - "epoch": 2.744194047823843, - "grad_norm": 0.0033980535808950663, - "learning_rate": 0.0001999962858851679, - "loss": 46.0, - "step": 35892 - }, - { - "epoch": 2.7442705048072327, - "grad_norm": 0.0039049030747264624, - "learning_rate": 0.00019999628567814802, - "loss": 46.0, - "step": 35893 - }, - { - "epoch": 2.7443469617906224, - "grad_norm": 0.002149237785488367, - "learning_rate": 0.00019999628547112238, - "loss": 46.0, - "step": 35894 - }, - { - "epoch": 2.744423418774012, - "grad_norm": 0.0016097250627353787, - "learning_rate": 0.00019999628526409094, - "loss": 46.0, - "step": 35895 - }, - { - "epoch": 2.744499875757402, - "grad_norm": 0.004439320880919695, - "learning_rate": 0.00019999628505705375, - "loss": 46.0, - "step": 35896 - }, - { - "epoch": 2.7445763327407917, - "grad_norm": 0.004211506340652704, - "learning_rate": 0.0001999962848500108, - "loss": 46.0, - "step": 35897 - }, - { - "epoch": 2.7446527897241815, - "grad_norm": 0.002718964358791709, - "learning_rate": 0.00019999628464296206, - "loss": 46.0, - "step": 35898 - }, - { - "epoch": 2.7447292467075712, - "grad_norm": 0.0012025551404803991, - "learning_rate": 0.00019999628443590758, - "loss": 46.0, - "step": 35899 - }, - { - "epoch": 2.744805703690961, - "grad_norm": 0.0016557564958930016, - "learning_rate": 0.00019999628422884733, - "loss": 46.0, - "step": 35900 - }, - { - "epoch": 2.7448821606743508, - "grad_norm": 0.0010153529001399875, - "learning_rate": 0.0001999962840217813, - "loss": 46.0, - "step": 35901 - }, - { - "epoch": 2.7449586176577405, - "grad_norm": 0.002281966619193554, - "learning_rate": 0.0001999962838147095, - "loss": 46.0, - "step": 35902 - }, - { - "epoch": 2.7450350746411303, - "grad_norm": 0.0016252151690423489, - "learning_rate": 0.00019999628360763193, - "loss": 46.0, - "step": 35903 - }, - { - "epoch": 2.7451115316245196, - "grad_norm": 0.0010124027030542493, - "learning_rate": 0.00019999628340054858, - "loss": 46.0, - "step": 35904 - }, - { - "epoch": 2.7451879886079094, - "grad_norm": 0.0011132683139294386, - "learning_rate": 0.0001999962831934595, - "loss": 46.0, - "step": 35905 - }, - { - "epoch": 2.745264445591299, - "grad_norm": 0.0018804400460794568, - "learning_rate": 0.00019999628298636462, - "loss": 46.0, - "step": 35906 - }, - { - "epoch": 2.745340902574689, - "grad_norm": 0.002009012270718813, - "learning_rate": 0.00019999628277926395, - "loss": 46.0, - "step": 35907 - }, - { - "epoch": 2.7454173595580786, - "grad_norm": 0.002859940053895116, - "learning_rate": 0.00019999628257215757, - "loss": 46.0, - "step": 35908 - }, - { - "epoch": 2.7454938165414684, - "grad_norm": 0.0019179665250703692, - "learning_rate": 0.00019999628236504538, - "loss": 46.0, - "step": 35909 - }, - { - "epoch": 2.745570273524858, - "grad_norm": 0.0022360177244991064, - "learning_rate": 0.0001999962821579274, - "loss": 46.0, - "step": 35910 - }, - { - "epoch": 2.745646730508248, - "grad_norm": 0.002752751810476184, - "learning_rate": 0.0001999962819508037, - "loss": 46.0, - "step": 35911 - }, - { - "epoch": 2.7457231874916372, - "grad_norm": 0.0011977370595559478, - "learning_rate": 0.0001999962817436742, - "loss": 46.0, - "step": 35912 - }, - { - "epoch": 2.745799644475027, - "grad_norm": 0.0015832443023100495, - "learning_rate": 0.00019999628153653896, - "loss": 46.0, - "step": 35913 - }, - { - "epoch": 2.7458761014584168, - "grad_norm": 0.004873890429735184, - "learning_rate": 0.0001999962813293979, - "loss": 46.0, - "step": 35914 - }, - { - "epoch": 2.7459525584418065, - "grad_norm": 0.003168850438669324, - "learning_rate": 0.0001999962811222511, - "loss": 46.0, - "step": 35915 - }, - { - "epoch": 2.7460290154251963, - "grad_norm": 0.003808521432802081, - "learning_rate": 0.00019999628091509856, - "loss": 46.0, - "step": 35916 - }, - { - "epoch": 2.746105472408586, - "grad_norm": 0.0008133513038046658, - "learning_rate": 0.0001999962807079402, - "loss": 46.0, - "step": 35917 - }, - { - "epoch": 2.746181929391976, - "grad_norm": 0.0031596627086400986, - "learning_rate": 0.0001999962805007761, - "loss": 46.0, - "step": 35918 - }, - { - "epoch": 2.7462583863753656, - "grad_norm": 0.0028175306506454945, - "learning_rate": 0.00019999628029360622, - "loss": 46.0, - "step": 35919 - }, - { - "epoch": 2.7463348433587553, - "grad_norm": 0.0016469416441395879, - "learning_rate": 0.00019999628008643056, - "loss": 46.0, - "step": 35920 - }, - { - "epoch": 2.746411300342145, - "grad_norm": 0.00614013010635972, - "learning_rate": 0.00019999627987924917, - "loss": 46.0, - "step": 35921 - }, - { - "epoch": 2.746487757325535, - "grad_norm": 0.0020270897075533867, - "learning_rate": 0.00019999627967206199, - "loss": 46.0, - "step": 35922 - }, - { - "epoch": 2.7465642143089246, - "grad_norm": 0.014481998980045319, - "learning_rate": 0.00019999627946486903, - "loss": 46.0, - "step": 35923 - }, - { - "epoch": 2.7466406712923144, - "grad_norm": 0.0012304041301831603, - "learning_rate": 0.0001999962792576703, - "loss": 46.0, - "step": 35924 - }, - { - "epoch": 2.746717128275704, - "grad_norm": 0.003599070245400071, - "learning_rate": 0.00019999627905046581, - "loss": 46.0, - "step": 35925 - }, - { - "epoch": 2.7467935852590935, - "grad_norm": 0.0024315344635397196, - "learning_rate": 0.00019999627884325553, - "loss": 46.0, - "step": 35926 - }, - { - "epoch": 2.746870042242483, - "grad_norm": 0.0017161985160782933, - "learning_rate": 0.0001999962786360395, - "loss": 46.0, - "step": 35927 - }, - { - "epoch": 2.746946499225873, - "grad_norm": 0.0007929989951662719, - "learning_rate": 0.0001999962784288177, - "loss": 46.0, - "step": 35928 - }, - { - "epoch": 2.7470229562092627, - "grad_norm": 0.005236527882516384, - "learning_rate": 0.00019999627822159016, - "loss": 46.0, - "step": 35929 - }, - { - "epoch": 2.7470994131926525, - "grad_norm": 0.00308925355784595, - "learning_rate": 0.00019999627801435682, - "loss": 46.0, - "step": 35930 - }, - { - "epoch": 2.7471758701760423, - "grad_norm": 0.004056284204125404, - "learning_rate": 0.0001999962778071177, - "loss": 46.0, - "step": 35931 - }, - { - "epoch": 2.747252327159432, - "grad_norm": 0.004575693979859352, - "learning_rate": 0.00019999627759987283, - "loss": 46.0, - "step": 35932 - }, - { - "epoch": 2.747328784142822, - "grad_norm": 0.0008140190038830042, - "learning_rate": 0.0001999962773926222, - "loss": 46.0, - "step": 35933 - }, - { - "epoch": 2.747405241126211, - "grad_norm": 0.0013864230131730437, - "learning_rate": 0.00019999627718536575, - "loss": 46.0, - "step": 35934 - }, - { - "epoch": 2.747481698109601, - "grad_norm": 0.0034038848243653774, - "learning_rate": 0.0001999962769781036, - "loss": 46.0, - "step": 35935 - }, - { - "epoch": 2.7475581550929906, - "grad_norm": 0.003007151186466217, - "learning_rate": 0.00019999627677083563, - "loss": 46.0, - "step": 35936 - }, - { - "epoch": 2.7476346120763804, - "grad_norm": 0.0012666697148233652, - "learning_rate": 0.0001999962765635619, - "loss": 46.0, - "step": 35937 - }, - { - "epoch": 2.74771106905977, - "grad_norm": 0.005064800847321749, - "learning_rate": 0.00019999627635628242, - "loss": 46.0, - "step": 35938 - }, - { - "epoch": 2.74778752604316, - "grad_norm": 0.002488100901246071, - "learning_rate": 0.00019999627614899717, - "loss": 46.0, - "step": 35939 - }, - { - "epoch": 2.7478639830265497, - "grad_norm": 0.004570317454636097, - "learning_rate": 0.00019999627594170614, - "loss": 46.0, - "step": 35940 - }, - { - "epoch": 2.7479404400099394, - "grad_norm": 0.00286275171674788, - "learning_rate": 0.00019999627573440935, - "loss": 46.0, - "step": 35941 - }, - { - "epoch": 2.748016896993329, - "grad_norm": 0.0007824230706319213, - "learning_rate": 0.00019999627552710675, - "loss": 46.0, - "step": 35942 - }, - { - "epoch": 2.748093353976719, - "grad_norm": 0.004667325410991907, - "learning_rate": 0.0001999962753197984, - "loss": 46.0, - "step": 35943 - }, - { - "epoch": 2.7481698109601087, - "grad_norm": 0.003174850717186928, - "learning_rate": 0.0001999962751124843, - "loss": 46.0, - "step": 35944 - }, - { - "epoch": 2.7482462679434985, - "grad_norm": 0.002118821721524, - "learning_rate": 0.00019999627490516444, - "loss": 46.0, - "step": 35945 - }, - { - "epoch": 2.7483227249268882, - "grad_norm": 0.00480992067605257, - "learning_rate": 0.0001999962746978388, - "loss": 46.0, - "step": 35946 - }, - { - "epoch": 2.748399181910278, - "grad_norm": 0.0017848351271823049, - "learning_rate": 0.00019999627449050736, - "loss": 46.0, - "step": 35947 - }, - { - "epoch": 2.7484756388936673, - "grad_norm": 0.0010206529404968023, - "learning_rate": 0.0001999962742831702, - "loss": 46.0, - "step": 35948 - }, - { - "epoch": 2.748552095877057, - "grad_norm": 0.007254617754369974, - "learning_rate": 0.00019999627407582725, - "loss": 46.0, - "step": 35949 - }, - { - "epoch": 2.748628552860447, - "grad_norm": 0.0021318329963833094, - "learning_rate": 0.00019999627386847851, - "loss": 46.0, - "step": 35950 - }, - { - "epoch": 2.7487050098438366, - "grad_norm": 0.003249607514590025, - "learning_rate": 0.000199996273661124, - "loss": 46.0, - "step": 35951 - }, - { - "epoch": 2.7487814668272263, - "grad_norm": 0.0022035473957657814, - "learning_rate": 0.00019999627345376376, - "loss": 46.0, - "step": 35952 - }, - { - "epoch": 2.748857923810616, - "grad_norm": 0.0011220560409128666, - "learning_rate": 0.00019999627324639773, - "loss": 46.0, - "step": 35953 - }, - { - "epoch": 2.748934380794006, - "grad_norm": 0.005362468305975199, - "learning_rate": 0.00019999627303902596, - "loss": 46.0, - "step": 35954 - }, - { - "epoch": 2.7490108377773956, - "grad_norm": 0.00683090602979064, - "learning_rate": 0.00019999627283164836, - "loss": 46.0, - "step": 35955 - }, - { - "epoch": 2.749087294760785, - "grad_norm": 0.004931860137730837, - "learning_rate": 0.00019999627262426502, - "loss": 46.0, - "step": 35956 - }, - { - "epoch": 2.7491637517441747, - "grad_norm": 0.0010500292992219329, - "learning_rate": 0.00019999627241687592, - "loss": 46.0, - "step": 35957 - }, - { - "epoch": 2.7492402087275645, - "grad_norm": 0.002588866977021098, - "learning_rate": 0.00019999627220948106, - "loss": 46.0, - "step": 35958 - }, - { - "epoch": 2.7493166657109542, - "grad_norm": 0.0017054423224180937, - "learning_rate": 0.0001999962720020804, - "loss": 46.0, - "step": 35959 - }, - { - "epoch": 2.749393122694344, - "grad_norm": 0.0036954416427761316, - "learning_rate": 0.000199996271794674, - "loss": 46.0, - "step": 35960 - }, - { - "epoch": 2.7494695796777338, - "grad_norm": 0.0021198117174208164, - "learning_rate": 0.0001999962715872618, - "loss": 46.0, - "step": 35961 - }, - { - "epoch": 2.7495460366611235, - "grad_norm": 0.00528778275474906, - "learning_rate": 0.00019999627137984384, - "loss": 46.0, - "step": 35962 - }, - { - "epoch": 2.7496224936445133, - "grad_norm": 0.0011940563563257456, - "learning_rate": 0.00019999627117242013, - "loss": 46.0, - "step": 35963 - }, - { - "epoch": 2.749698950627903, - "grad_norm": 0.0019459824543446302, - "learning_rate": 0.00019999627096499063, - "loss": 46.0, - "step": 35964 - }, - { - "epoch": 2.749775407611293, - "grad_norm": 0.001858714036643505, - "learning_rate": 0.00019999627075755537, - "loss": 46.0, - "step": 35965 - }, - { - "epoch": 2.7498518645946826, - "grad_norm": 0.005933698266744614, - "learning_rate": 0.00019999627055011435, - "loss": 46.0, - "step": 35966 - }, - { - "epoch": 2.7499283215780723, - "grad_norm": 0.00395730696618557, - "learning_rate": 0.00019999627034266755, - "loss": 46.0, - "step": 35967 - }, - { - "epoch": 2.750004778561462, - "grad_norm": 0.003128431737422943, - "learning_rate": 0.00019999627013521498, - "loss": 46.0, - "step": 35968 - }, - { - "epoch": 2.750081235544852, - "grad_norm": 0.0015686870319768786, - "learning_rate": 0.0001999962699277566, - "loss": 46.0, - "step": 35969 - }, - { - "epoch": 2.750157692528241, - "grad_norm": 0.0016519950004294515, - "learning_rate": 0.00019999626972029252, - "loss": 46.0, - "step": 35970 - }, - { - "epoch": 2.750234149511631, - "grad_norm": 0.0013959029456600547, - "learning_rate": 0.00019999626951282268, - "loss": 46.0, - "step": 35971 - }, - { - "epoch": 2.7503106064950207, - "grad_norm": 0.0047317915596067905, - "learning_rate": 0.000199996269305347, - "loss": 46.0, - "step": 35972 - }, - { - "epoch": 2.7503870634784104, - "grad_norm": 0.004043021705001593, - "learning_rate": 0.00019999626909786557, - "loss": 46.0, - "step": 35973 - }, - { - "epoch": 2.7504635204618, - "grad_norm": 0.0009831939823925495, - "learning_rate": 0.0001999962688903784, - "loss": 46.0, - "step": 35974 - }, - { - "epoch": 2.75053997744519, - "grad_norm": 0.001072244718670845, - "learning_rate": 0.00019999626868288546, - "loss": 46.0, - "step": 35975 - }, - { - "epoch": 2.7506164344285797, - "grad_norm": 0.008005915209650993, - "learning_rate": 0.00019999626847538673, - "loss": 46.0, - "step": 35976 - }, - { - "epoch": 2.7506928914119695, - "grad_norm": 0.0027740371879190207, - "learning_rate": 0.00019999626826788225, - "loss": 46.0, - "step": 35977 - }, - { - "epoch": 2.750769348395359, - "grad_norm": 0.0008578238193877041, - "learning_rate": 0.00019999626806037197, - "loss": 46.0, - "step": 35978 - }, - { - "epoch": 2.7508458053787486, - "grad_norm": 0.002180932555347681, - "learning_rate": 0.00019999626785285594, - "loss": 46.0, - "step": 35979 - }, - { - "epoch": 2.7509222623621383, - "grad_norm": 0.005434263963252306, - "learning_rate": 0.00019999626764533415, - "loss": 46.0, - "step": 35980 - }, - { - "epoch": 2.750998719345528, - "grad_norm": 0.005442031193524599, - "learning_rate": 0.00019999626743780657, - "loss": 46.0, - "step": 35981 - }, - { - "epoch": 2.751075176328918, - "grad_norm": 0.0021151516120880842, - "learning_rate": 0.00019999626723027323, - "loss": 46.0, - "step": 35982 - }, - { - "epoch": 2.7511516333123076, - "grad_norm": 0.0027981321327388287, - "learning_rate": 0.00019999626702273414, - "loss": 46.0, - "step": 35983 - }, - { - "epoch": 2.7512280902956974, - "grad_norm": 0.0025466277729719877, - "learning_rate": 0.00019999626681518925, - "loss": 46.0, - "step": 35984 - }, - { - "epoch": 2.751304547279087, - "grad_norm": 0.009051463566720486, - "learning_rate": 0.0001999962666076386, - "loss": 46.0, - "step": 35985 - }, - { - "epoch": 2.751381004262477, - "grad_norm": 0.0024369198363274336, - "learning_rate": 0.0001999962664000822, - "loss": 46.0, - "step": 35986 - }, - { - "epoch": 2.7514574612458667, - "grad_norm": 0.0011120055569335818, - "learning_rate": 0.00019999626619252, - "loss": 46.0, - "step": 35987 - }, - { - "epoch": 2.7515339182292564, - "grad_norm": 0.0014921552501618862, - "learning_rate": 0.00019999626598495205, - "loss": 46.0, - "step": 35988 - }, - { - "epoch": 2.751610375212646, - "grad_norm": 0.003287061583250761, - "learning_rate": 0.00019999626577737832, - "loss": 46.0, - "step": 35989 - }, - { - "epoch": 2.751686832196036, - "grad_norm": 0.000821066030766815, - "learning_rate": 0.00019999626556979885, - "loss": 46.0, - "step": 35990 - }, - { - "epoch": 2.7517632891794257, - "grad_norm": 0.004554067738354206, - "learning_rate": 0.00019999626536221357, - "loss": 46.0, - "step": 35991 - }, - { - "epoch": 2.751839746162815, - "grad_norm": 0.005245990585535765, - "learning_rate": 0.00019999626515462254, - "loss": 46.0, - "step": 35992 - }, - { - "epoch": 2.7519162031462048, - "grad_norm": 0.002175557194277644, - "learning_rate": 0.00019999626494702575, - "loss": 46.0, - "step": 35993 - }, - { - "epoch": 2.7519926601295945, - "grad_norm": 0.0015694230096414685, - "learning_rate": 0.00019999626473942317, - "loss": 46.0, - "step": 35994 - }, - { - "epoch": 2.7520691171129843, - "grad_norm": 0.0018481186125427485, - "learning_rate": 0.00019999626453181483, - "loss": 46.0, - "step": 35995 - }, - { - "epoch": 2.752145574096374, - "grad_norm": 0.0029249568469822407, - "learning_rate": 0.00019999626432420074, - "loss": 46.0, - "step": 35996 - }, - { - "epoch": 2.752222031079764, - "grad_norm": 0.002087234053760767, - "learning_rate": 0.00019999626411658085, - "loss": 46.0, - "step": 35997 - }, - { - "epoch": 2.7522984880631536, - "grad_norm": 0.0006739587406627834, - "learning_rate": 0.0001999962639089552, - "loss": 46.0, - "step": 35998 - }, - { - "epoch": 2.7523749450465433, - "grad_norm": 0.002084338106215, - "learning_rate": 0.00019999626370132377, - "loss": 46.0, - "step": 35999 - }, - { - "epoch": 2.7524514020299327, - "grad_norm": 0.0034885734785348177, - "learning_rate": 0.00019999626349368661, - "loss": 46.0, - "step": 36000 - }, - { - "epoch": 2.7525278590133224, - "grad_norm": 0.004073447547852993, - "learning_rate": 0.00019999626328604366, - "loss": 46.0, - "step": 36001 - }, - { - "epoch": 2.752604315996712, - "grad_norm": 0.008138821460306644, - "learning_rate": 0.00019999626307839493, - "loss": 46.0, - "step": 36002 - }, - { - "epoch": 2.752680772980102, - "grad_norm": 0.00457029277458787, - "learning_rate": 0.00019999626287074045, - "loss": 46.0, - "step": 36003 - }, - { - "epoch": 2.7527572299634917, - "grad_norm": 0.0012259484501555562, - "learning_rate": 0.00019999626266308017, - "loss": 46.0, - "step": 36004 - }, - { - "epoch": 2.7528336869468815, - "grad_norm": 0.003478058846667409, - "learning_rate": 0.00019999626245541415, - "loss": 46.0, - "step": 36005 - }, - { - "epoch": 2.7529101439302712, - "grad_norm": 0.0032879735808819532, - "learning_rate": 0.00019999626224774235, - "loss": 46.0, - "step": 36006 - }, - { - "epoch": 2.752986600913661, - "grad_norm": 0.0019854367710649967, - "learning_rate": 0.00019999626204006478, - "loss": 46.0, - "step": 36007 - }, - { - "epoch": 2.7530630578970507, - "grad_norm": 0.001890413579531014, - "learning_rate": 0.00019999626183238146, - "loss": 46.0, - "step": 36008 - }, - { - "epoch": 2.7531395148804405, - "grad_norm": 0.001008575432933867, - "learning_rate": 0.00019999626162469234, - "loss": 46.0, - "step": 36009 - }, - { - "epoch": 2.7532159718638303, - "grad_norm": 0.0013948059640824795, - "learning_rate": 0.00019999626141699745, - "loss": 46.0, - "step": 36010 - }, - { - "epoch": 2.75329242884722, - "grad_norm": 0.0015301278326660395, - "learning_rate": 0.0001999962612092968, - "loss": 46.0, - "step": 36011 - }, - { - "epoch": 2.75336888583061, - "grad_norm": 0.0025844676420092583, - "learning_rate": 0.00019999626100159038, - "loss": 46.0, - "step": 36012 - }, - { - "epoch": 2.753445342813999, - "grad_norm": 0.0061693922616541386, - "learning_rate": 0.00019999626079387822, - "loss": 46.0, - "step": 36013 - }, - { - "epoch": 2.753521799797389, - "grad_norm": 0.0053797210566699505, - "learning_rate": 0.00019999626058616026, - "loss": 46.0, - "step": 36014 - }, - { - "epoch": 2.7535982567807786, - "grad_norm": 0.0013666145969182253, - "learning_rate": 0.00019999626037843653, - "loss": 46.0, - "step": 36015 - }, - { - "epoch": 2.7536747137641684, - "grad_norm": 0.004994364455342293, - "learning_rate": 0.00019999626017070705, - "loss": 46.0, - "step": 36016 - }, - { - "epoch": 2.753751170747558, - "grad_norm": 0.0019044834189116955, - "learning_rate": 0.00019999625996297178, - "loss": 46.0, - "step": 36017 - }, - { - "epoch": 2.753827627730948, - "grad_norm": 0.0020784682128578424, - "learning_rate": 0.00019999625975523075, - "loss": 46.0, - "step": 36018 - }, - { - "epoch": 2.7539040847143377, - "grad_norm": 0.0018723491812124848, - "learning_rate": 0.00019999625954748393, - "loss": 46.0, - "step": 36019 - }, - { - "epoch": 2.7539805416977274, - "grad_norm": 0.0008518744725733995, - "learning_rate": 0.00019999625933973139, - "loss": 46.0, - "step": 36020 - }, - { - "epoch": 2.754056998681117, - "grad_norm": 0.0081937275826931, - "learning_rate": 0.00019999625913197304, - "loss": 46.0, - "step": 36021 - }, - { - "epoch": 2.7541334556645065, - "grad_norm": 0.0035514591727405787, - "learning_rate": 0.00019999625892420895, - "loss": 46.0, - "step": 36022 - }, - { - "epoch": 2.7542099126478963, - "grad_norm": 0.002853967482224107, - "learning_rate": 0.00019999625871643906, - "loss": 46.0, - "step": 36023 - }, - { - "epoch": 2.754286369631286, - "grad_norm": 0.002367012668401003, - "learning_rate": 0.00019999625850866342, - "loss": 46.0, - "step": 36024 - }, - { - "epoch": 2.754362826614676, - "grad_norm": 0.0019829736556857824, - "learning_rate": 0.000199996258300882, - "loss": 46.0, - "step": 36025 - }, - { - "epoch": 2.7544392835980656, - "grad_norm": 0.0019415569258853793, - "learning_rate": 0.0001999962580930948, - "loss": 46.0, - "step": 36026 - }, - { - "epoch": 2.7545157405814553, - "grad_norm": 0.0017016794299706817, - "learning_rate": 0.00019999625788530185, - "loss": 46.0, - "step": 36027 - }, - { - "epoch": 2.754592197564845, - "grad_norm": 0.0019193514017388225, - "learning_rate": 0.00019999625767750314, - "loss": 46.0, - "step": 36028 - }, - { - "epoch": 2.754668654548235, - "grad_norm": 0.0009293374023400247, - "learning_rate": 0.00019999625746969867, - "loss": 46.0, - "step": 36029 - }, - { - "epoch": 2.7547451115316246, - "grad_norm": 0.0015002108411863446, - "learning_rate": 0.0001999962572618884, - "loss": 46.0, - "step": 36030 - }, - { - "epoch": 2.7548215685150144, - "grad_norm": 0.001663388917222619, - "learning_rate": 0.00019999625705407234, - "loss": 46.0, - "step": 36031 - }, - { - "epoch": 2.754898025498404, - "grad_norm": 0.001732103293761611, - "learning_rate": 0.00019999625684625057, - "loss": 46.0, - "step": 36032 - }, - { - "epoch": 2.754974482481794, - "grad_norm": 0.0009997776942327619, - "learning_rate": 0.000199996256638423, - "loss": 46.0, - "step": 36033 - }, - { - "epoch": 2.7550509394651836, - "grad_norm": 0.0010868809185922146, - "learning_rate": 0.00019999625643058963, - "loss": 46.0, - "step": 36034 - }, - { - "epoch": 2.755127396448573, - "grad_norm": 0.006627646274864674, - "learning_rate": 0.00019999625622275054, - "loss": 46.0, - "step": 36035 - }, - { - "epoch": 2.7552038534319627, - "grad_norm": 0.006199042312800884, - "learning_rate": 0.00019999625601490565, - "loss": 46.0, - "step": 36036 - }, - { - "epoch": 2.7552803104153525, - "grad_norm": 0.001618643756955862, - "learning_rate": 0.000199996255807055, - "loss": 46.0, - "step": 36037 - }, - { - "epoch": 2.7553567673987422, - "grad_norm": 0.006239763461053371, - "learning_rate": 0.0001999962555991986, - "loss": 46.0, - "step": 36038 - }, - { - "epoch": 2.755433224382132, - "grad_norm": 0.00206790491938591, - "learning_rate": 0.00019999625539133642, - "loss": 46.0, - "step": 36039 - }, - { - "epoch": 2.7555096813655218, - "grad_norm": 0.002926650457084179, - "learning_rate": 0.00019999625518346846, - "loss": 46.0, - "step": 36040 - }, - { - "epoch": 2.7555861383489115, - "grad_norm": 0.0022546935360878706, - "learning_rate": 0.00019999625497559473, - "loss": 46.0, - "step": 36041 - }, - { - "epoch": 2.7556625953323013, - "grad_norm": 0.0048040603287518024, - "learning_rate": 0.00019999625476771525, - "loss": 46.0, - "step": 36042 - }, - { - "epoch": 2.7557390523156906, - "grad_norm": 0.011813667602837086, - "learning_rate": 0.00019999625455982998, - "loss": 46.0, - "step": 36043 - }, - { - "epoch": 2.7558155092990804, - "grad_norm": 0.0035084239207208157, - "learning_rate": 0.00019999625435193895, - "loss": 46.0, - "step": 36044 - }, - { - "epoch": 2.75589196628247, - "grad_norm": 0.0012613576836884022, - "learning_rate": 0.00019999625414404216, - "loss": 46.0, - "step": 36045 - }, - { - "epoch": 2.75596842326586, - "grad_norm": 0.0015386604936793447, - "learning_rate": 0.00019999625393613956, - "loss": 46.0, - "step": 36046 - }, - { - "epoch": 2.7560448802492497, - "grad_norm": 0.0007678747642785311, - "learning_rate": 0.00019999625372823124, - "loss": 46.0, - "step": 36047 - }, - { - "epoch": 2.7561213372326394, - "grad_norm": 0.003417386207729578, - "learning_rate": 0.00019999625352031713, - "loss": 46.0, - "step": 36048 - }, - { - "epoch": 2.756197794216029, - "grad_norm": 0.0015921785961836576, - "learning_rate": 0.00019999625331239726, - "loss": 46.0, - "step": 36049 - }, - { - "epoch": 2.756274251199419, - "grad_norm": 0.006827584933489561, - "learning_rate": 0.0001999962531044716, - "loss": 46.0, - "step": 36050 - }, - { - "epoch": 2.7563507081828087, - "grad_norm": 0.002582926768809557, - "learning_rate": 0.0001999962528965402, - "loss": 46.0, - "step": 36051 - }, - { - "epoch": 2.7564271651661985, - "grad_norm": 0.0012997756712138653, - "learning_rate": 0.000199996252688603, - "loss": 46.0, - "step": 36052 - }, - { - "epoch": 2.756503622149588, - "grad_norm": 0.0009591053240001202, - "learning_rate": 0.00019999625248066005, - "loss": 46.0, - "step": 36053 - }, - { - "epoch": 2.756580079132978, - "grad_norm": 0.002003337722271681, - "learning_rate": 0.00019999625227271132, - "loss": 46.0, - "step": 36054 - }, - { - "epoch": 2.7566565361163677, - "grad_norm": 0.0012922347523272038, - "learning_rate": 0.00019999625206475682, - "loss": 46.0, - "step": 36055 - }, - { - "epoch": 2.7567329930997575, - "grad_norm": 0.0029477658681571484, - "learning_rate": 0.00019999625185679657, - "loss": 46.0, - "step": 36056 - }, - { - "epoch": 2.756809450083147, - "grad_norm": 0.0017977464012801647, - "learning_rate": 0.00019999625164883055, - "loss": 46.0, - "step": 36057 - }, - { - "epoch": 2.7568859070665366, - "grad_norm": 0.004344752058386803, - "learning_rate": 0.00019999625144085875, - "loss": 46.0, - "step": 36058 - }, - { - "epoch": 2.7569623640499263, - "grad_norm": 0.0014876354252919555, - "learning_rate": 0.00019999625123288118, - "loss": 46.0, - "step": 36059 - }, - { - "epoch": 2.757038821033316, - "grad_norm": 0.004128091968595982, - "learning_rate": 0.00019999625102489784, - "loss": 46.0, - "step": 36060 - }, - { - "epoch": 2.757115278016706, - "grad_norm": 0.0014715323923155665, - "learning_rate": 0.00019999625081690875, - "loss": 46.0, - "step": 36061 - }, - { - "epoch": 2.7571917350000956, - "grad_norm": 0.0011906761210411787, - "learning_rate": 0.00019999625060891386, - "loss": 46.0, - "step": 36062 - }, - { - "epoch": 2.7572681919834854, - "grad_norm": 0.0031480940524488688, - "learning_rate": 0.0001999962504009132, - "loss": 46.0, - "step": 36063 - }, - { - "epoch": 2.757344648966875, - "grad_norm": 0.0014148110058158636, - "learning_rate": 0.00019999625019290678, - "loss": 46.0, - "step": 36064 - }, - { - "epoch": 2.7574211059502645, - "grad_norm": 0.0014582074945792556, - "learning_rate": 0.0001999962499848946, - "loss": 46.0, - "step": 36065 - }, - { - "epoch": 2.7574975629336542, - "grad_norm": 0.0007444196962751448, - "learning_rate": 0.00019999624977687662, - "loss": 46.0, - "step": 36066 - }, - { - "epoch": 2.757574019917044, - "grad_norm": 0.0017299795290455222, - "learning_rate": 0.00019999624956885292, - "loss": 46.0, - "step": 36067 - }, - { - "epoch": 2.7576504769004337, - "grad_norm": 0.0023293776903301477, - "learning_rate": 0.00019999624936082344, - "loss": 46.0, - "step": 36068 - }, - { - "epoch": 2.7577269338838235, - "grad_norm": 0.0019161797827109694, - "learning_rate": 0.00019999624915278816, - "loss": 46.0, - "step": 36069 - }, - { - "epoch": 2.7578033908672133, - "grad_norm": 0.002756755333393812, - "learning_rate": 0.00019999624894474714, - "loss": 46.0, - "step": 36070 - }, - { - "epoch": 2.757879847850603, - "grad_norm": 0.0016580346273258328, - "learning_rate": 0.00019999624873670032, - "loss": 46.0, - "step": 36071 - }, - { - "epoch": 2.757956304833993, - "grad_norm": 0.0033509107306599617, - "learning_rate": 0.00019999624852864778, - "loss": 46.0, - "step": 36072 - }, - { - "epoch": 2.7580327618173825, - "grad_norm": 0.0018110659439116716, - "learning_rate": 0.00019999624832058943, - "loss": 46.0, - "step": 36073 - }, - { - "epoch": 2.7581092188007723, - "grad_norm": 0.00214362726546824, - "learning_rate": 0.00019999624811252532, - "loss": 46.0, - "step": 36074 - }, - { - "epoch": 2.758185675784162, - "grad_norm": 0.0007678482215851545, - "learning_rate": 0.00019999624790445543, - "loss": 46.0, - "step": 36075 - }, - { - "epoch": 2.758262132767552, - "grad_norm": 0.00136837107129395, - "learning_rate": 0.0001999962476963798, - "loss": 46.0, - "step": 36076 - }, - { - "epoch": 2.7583385897509416, - "grad_norm": 0.0011882303515449166, - "learning_rate": 0.00019999624748829838, - "loss": 46.0, - "step": 36077 - }, - { - "epoch": 2.7584150467343314, - "grad_norm": 0.000984542421065271, - "learning_rate": 0.00019999624728021117, - "loss": 46.0, - "step": 36078 - }, - { - "epoch": 2.7584915037177207, - "grad_norm": 0.0015301520470529795, - "learning_rate": 0.00019999624707211825, - "loss": 46.0, - "step": 36079 - }, - { - "epoch": 2.7585679607011104, - "grad_norm": 0.01070896815508604, - "learning_rate": 0.00019999624686401952, - "loss": 46.0, - "step": 36080 - }, - { - "epoch": 2.7586444176845, - "grad_norm": 0.0012187910033389926, - "learning_rate": 0.000199996246655915, - "loss": 46.0, - "step": 36081 - }, - { - "epoch": 2.75872087466789, - "grad_norm": 0.0012138438178226352, - "learning_rate": 0.00019999624644780476, - "loss": 46.0, - "step": 36082 - }, - { - "epoch": 2.7587973316512797, - "grad_norm": 0.004818329121917486, - "learning_rate": 0.00019999624623968874, - "loss": 46.0, - "step": 36083 - }, - { - "epoch": 2.7588737886346695, - "grad_norm": 0.005547572858631611, - "learning_rate": 0.00019999624603156692, - "loss": 46.0, - "step": 36084 - }, - { - "epoch": 2.7589502456180592, - "grad_norm": 0.003371372353285551, - "learning_rate": 0.00019999624582343938, - "loss": 46.0, - "step": 36085 - }, - { - "epoch": 2.759026702601449, - "grad_norm": 0.0012276427587494254, - "learning_rate": 0.00019999624561530604, - "loss": 46.0, - "step": 36086 - }, - { - "epoch": 2.7591031595848383, - "grad_norm": 0.0015174514846876264, - "learning_rate": 0.00019999624540716692, - "loss": 46.0, - "step": 36087 - }, - { - "epoch": 2.759179616568228, - "grad_norm": 0.0019222641130909324, - "learning_rate": 0.00019999624519902206, - "loss": 46.0, - "step": 36088 - }, - { - "epoch": 2.759256073551618, - "grad_norm": 0.001449376461096108, - "learning_rate": 0.0001999962449908714, - "loss": 46.0, - "step": 36089 - }, - { - "epoch": 2.7593325305350076, - "grad_norm": 0.003077549859881401, - "learning_rate": 0.00019999624478271496, - "loss": 46.0, - "step": 36090 - }, - { - "epoch": 2.7594089875183974, - "grad_norm": 0.0014875874621793628, - "learning_rate": 0.0001999962445745528, - "loss": 46.0, - "step": 36091 - }, - { - "epoch": 2.759485444501787, - "grad_norm": 0.002305681584402919, - "learning_rate": 0.00019999624436638485, - "loss": 46.0, - "step": 36092 - }, - { - "epoch": 2.759561901485177, - "grad_norm": 0.00429685739800334, - "learning_rate": 0.0001999962441582111, - "loss": 46.0, - "step": 36093 - }, - { - "epoch": 2.7596383584685666, - "grad_norm": 0.002804601565003395, - "learning_rate": 0.00019999624395003162, - "loss": 46.0, - "step": 36094 - }, - { - "epoch": 2.7597148154519564, - "grad_norm": 0.007309092208743095, - "learning_rate": 0.00019999624374184637, - "loss": 46.0, - "step": 36095 - }, - { - "epoch": 2.759791272435346, - "grad_norm": 0.002028807532042265, - "learning_rate": 0.00019999624353365532, - "loss": 46.0, - "step": 36096 - }, - { - "epoch": 2.759867729418736, - "grad_norm": 0.0016862526535987854, - "learning_rate": 0.00019999624332545852, - "loss": 46.0, - "step": 36097 - }, - { - "epoch": 2.7599441864021257, - "grad_norm": 0.001222234801389277, - "learning_rate": 0.00019999624311725596, - "loss": 46.0, - "step": 36098 - }, - { - "epoch": 2.7600206433855154, - "grad_norm": 0.0014830922009423375, - "learning_rate": 0.0001999962429090476, - "loss": 46.0, - "step": 36099 - }, - { - "epoch": 2.760097100368905, - "grad_norm": 0.0014992174692451954, - "learning_rate": 0.0001999962427008335, - "loss": 46.0, - "step": 36100 - }, - { - "epoch": 2.7601735573522945, - "grad_norm": 0.0015757655492052436, - "learning_rate": 0.0001999962424926136, - "loss": 46.0, - "step": 36101 - }, - { - "epoch": 2.7602500143356843, - "grad_norm": 0.0027269606944173574, - "learning_rate": 0.00019999624228438797, - "loss": 46.0, - "step": 36102 - }, - { - "epoch": 2.760326471319074, - "grad_norm": 0.0009732483886182308, - "learning_rate": 0.00019999624207615657, - "loss": 46.0, - "step": 36103 - }, - { - "epoch": 2.760402928302464, - "grad_norm": 0.00180434447247535, - "learning_rate": 0.00019999624186791936, - "loss": 46.0, - "step": 36104 - }, - { - "epoch": 2.7604793852858536, - "grad_norm": 0.0014392690500244498, - "learning_rate": 0.00019999624165967643, - "loss": 46.0, - "step": 36105 - }, - { - "epoch": 2.7605558422692433, - "grad_norm": 0.006552328821271658, - "learning_rate": 0.0001999962414514277, - "loss": 46.0, - "step": 36106 - }, - { - "epoch": 2.760632299252633, - "grad_norm": 0.005886552855372429, - "learning_rate": 0.0001999962412431732, - "loss": 46.0, - "step": 36107 - }, - { - "epoch": 2.760708756236023, - "grad_norm": 0.0030877916142344475, - "learning_rate": 0.00019999624103491295, - "loss": 46.0, - "step": 36108 - }, - { - "epoch": 2.760785213219412, - "grad_norm": 0.002319512888789177, - "learning_rate": 0.00019999624082664693, - "loss": 46.0, - "step": 36109 - }, - { - "epoch": 2.760861670202802, - "grad_norm": 0.0023680205922573805, - "learning_rate": 0.0001999962406183751, - "loss": 46.0, - "step": 36110 - }, - { - "epoch": 2.7609381271861917, - "grad_norm": 0.006912963464856148, - "learning_rate": 0.00019999624041009754, - "loss": 46.0, - "step": 36111 - }, - { - "epoch": 2.7610145841695815, - "grad_norm": 0.0069908746518194675, - "learning_rate": 0.0001999962402018142, - "loss": 46.0, - "step": 36112 - }, - { - "epoch": 2.761091041152971, - "grad_norm": 0.0026829326525330544, - "learning_rate": 0.00019999623999352508, - "loss": 46.0, - "step": 36113 - }, - { - "epoch": 2.761167498136361, - "grad_norm": 0.004067771602421999, - "learning_rate": 0.00019999623978523022, - "loss": 46.0, - "step": 36114 - }, - { - "epoch": 2.7612439551197507, - "grad_norm": 0.0021445206366479397, - "learning_rate": 0.00019999623957692956, - "loss": 46.0, - "step": 36115 - }, - { - "epoch": 2.7613204121031405, - "grad_norm": 0.001799249555915594, - "learning_rate": 0.00019999623936862318, - "loss": 46.0, - "step": 36116 - }, - { - "epoch": 2.7613968690865303, - "grad_norm": 0.0019214011263102293, - "learning_rate": 0.00019999623916031097, - "loss": 46.0, - "step": 36117 - }, - { - "epoch": 2.76147332606992, - "grad_norm": 0.0016728892223909497, - "learning_rate": 0.000199996238951993, - "loss": 46.0, - "step": 36118 - }, - { - "epoch": 2.76154978305331, - "grad_norm": 0.0009442161535844207, - "learning_rate": 0.0001999962387436693, - "loss": 46.0, - "step": 36119 - }, - { - "epoch": 2.7616262400366995, - "grad_norm": 0.004111644346266985, - "learning_rate": 0.0001999962385353398, - "loss": 46.0, - "step": 36120 - }, - { - "epoch": 2.7617026970200893, - "grad_norm": 0.0012342092813923955, - "learning_rate": 0.00019999623832700454, - "loss": 46.0, - "step": 36121 - }, - { - "epoch": 2.761779154003479, - "grad_norm": 0.006362533662468195, - "learning_rate": 0.00019999623811866351, - "loss": 46.0, - "step": 36122 - }, - { - "epoch": 2.7618556109868684, - "grad_norm": 0.001338394358754158, - "learning_rate": 0.00019999623791031672, - "loss": 46.0, - "step": 36123 - }, - { - "epoch": 2.761932067970258, - "grad_norm": 0.004385354463011026, - "learning_rate": 0.00019999623770196415, - "loss": 46.0, - "step": 36124 - }, - { - "epoch": 2.762008524953648, - "grad_norm": 0.0026679078582674265, - "learning_rate": 0.00019999623749360578, - "loss": 46.0, - "step": 36125 - }, - { - "epoch": 2.7620849819370377, - "grad_norm": 0.0011312422575429082, - "learning_rate": 0.0001999962372852417, - "loss": 46.0, - "step": 36126 - }, - { - "epoch": 2.7621614389204274, - "grad_norm": 0.0019035037839785218, - "learning_rate": 0.0001999962370768718, - "loss": 46.0, - "step": 36127 - }, - { - "epoch": 2.762237895903817, - "grad_norm": 0.001605572528205812, - "learning_rate": 0.00019999623686849617, - "loss": 46.0, - "step": 36128 - }, - { - "epoch": 2.762314352887207, - "grad_norm": 0.0028997021727263927, - "learning_rate": 0.00019999623666011474, - "loss": 46.0, - "step": 36129 - }, - { - "epoch": 2.7623908098705967, - "grad_norm": 0.004105194937437773, - "learning_rate": 0.00019999623645172756, - "loss": 46.0, - "step": 36130 - }, - { - "epoch": 2.762467266853986, - "grad_norm": 0.008957820944488049, - "learning_rate": 0.0001999962362433346, - "loss": 46.0, - "step": 36131 - }, - { - "epoch": 2.762543723837376, - "grad_norm": 0.004277107771486044, - "learning_rate": 0.0001999962360349359, - "loss": 46.0, - "step": 36132 - }, - { - "epoch": 2.7626201808207655, - "grad_norm": 0.005884347017854452, - "learning_rate": 0.0001999962358265314, - "loss": 46.0, - "step": 36133 - }, - { - "epoch": 2.7626966378041553, - "grad_norm": 0.003550578374415636, - "learning_rate": 0.00019999623561812112, - "loss": 46.0, - "step": 36134 - }, - { - "epoch": 2.762773094787545, - "grad_norm": 0.002957143122330308, - "learning_rate": 0.0001999962354097051, - "loss": 46.0, - "step": 36135 - }, - { - "epoch": 2.762849551770935, - "grad_norm": 0.0036615384742617607, - "learning_rate": 0.0001999962352012833, - "loss": 46.0, - "step": 36136 - }, - { - "epoch": 2.7629260087543246, - "grad_norm": 0.003823390230536461, - "learning_rate": 0.00019999623499285574, - "loss": 46.0, - "step": 36137 - }, - { - "epoch": 2.7630024657377144, - "grad_norm": 0.0018904708558693528, - "learning_rate": 0.00019999623478442237, - "loss": 46.0, - "step": 36138 - }, - { - "epoch": 2.763078922721104, - "grad_norm": 0.003042828291654587, - "learning_rate": 0.00019999623457598329, - "loss": 46.0, - "step": 36139 - }, - { - "epoch": 2.763155379704494, - "grad_norm": 0.0005860620294697583, - "learning_rate": 0.00019999623436753837, - "loss": 46.0, - "step": 36140 - }, - { - "epoch": 2.7632318366878836, - "grad_norm": 0.00408286415040493, - "learning_rate": 0.00019999623415908774, - "loss": 46.0, - "step": 36141 - }, - { - "epoch": 2.7633082936712734, - "grad_norm": 0.001012353110127151, - "learning_rate": 0.00019999623395063133, - "loss": 46.0, - "step": 36142 - }, - { - "epoch": 2.763384750654663, - "grad_norm": 0.004084533080458641, - "learning_rate": 0.00019999623374216915, - "loss": 46.0, - "step": 36143 - }, - { - "epoch": 2.7634612076380525, - "grad_norm": 0.0017718251328915358, - "learning_rate": 0.0001999962335337012, - "loss": 46.0, - "step": 36144 - }, - { - "epoch": 2.7635376646214422, - "grad_norm": 0.0029293482657521963, - "learning_rate": 0.00019999623332522747, - "loss": 46.0, - "step": 36145 - }, - { - "epoch": 2.763614121604832, - "grad_norm": 0.00159519596491009, - "learning_rate": 0.00019999623311674797, - "loss": 46.0, - "step": 36146 - }, - { - "epoch": 2.7636905785882218, - "grad_norm": 0.001751039526425302, - "learning_rate": 0.0001999962329082627, - "loss": 46.0, - "step": 36147 - }, - { - "epoch": 2.7637670355716115, - "grad_norm": 0.003855740884318948, - "learning_rate": 0.00019999623269977167, - "loss": 46.0, - "step": 36148 - }, - { - "epoch": 2.7638434925550013, - "grad_norm": 0.001822623424232006, - "learning_rate": 0.00019999623249127488, - "loss": 46.0, - "step": 36149 - }, - { - "epoch": 2.763919949538391, - "grad_norm": 0.002561405999585986, - "learning_rate": 0.0001999962322827723, - "loss": 46.0, - "step": 36150 - }, - { - "epoch": 2.763996406521781, - "grad_norm": 0.001635123509913683, - "learning_rate": 0.00019999623207426397, - "loss": 46.0, - "step": 36151 - }, - { - "epoch": 2.7640728635051706, - "grad_norm": 0.0016717080725356936, - "learning_rate": 0.00019999623186574988, - "loss": 46.0, - "step": 36152 - }, - { - "epoch": 2.76414932048856, - "grad_norm": 0.0024649505503475666, - "learning_rate": 0.00019999623165723, - "loss": 46.0, - "step": 36153 - }, - { - "epoch": 2.7642257774719496, - "grad_norm": 0.0011624830076470971, - "learning_rate": 0.00019999623144870433, - "loss": 46.0, - "step": 36154 - }, - { - "epoch": 2.7643022344553394, - "grad_norm": 0.003462236374616623, - "learning_rate": 0.00019999623124017293, - "loss": 46.0, - "step": 36155 - }, - { - "epoch": 2.764378691438729, - "grad_norm": 0.0019129704451188445, - "learning_rate": 0.00019999623103163574, - "loss": 46.0, - "step": 36156 - }, - { - "epoch": 2.764455148422119, - "grad_norm": 0.0025555144529789686, - "learning_rate": 0.0001999962308230928, - "loss": 46.0, - "step": 36157 - }, - { - "epoch": 2.7645316054055087, - "grad_norm": 0.006339132320135832, - "learning_rate": 0.00019999623061454406, - "loss": 46.0, - "step": 36158 - }, - { - "epoch": 2.7646080623888984, - "grad_norm": 0.0020024816039949656, - "learning_rate": 0.00019999623040598956, - "loss": 46.0, - "step": 36159 - }, - { - "epoch": 2.764684519372288, - "grad_norm": 0.00119693367742002, - "learning_rate": 0.0001999962301974293, - "loss": 46.0, - "step": 36160 - }, - { - "epoch": 2.764760976355678, - "grad_norm": 0.00252747954800725, - "learning_rate": 0.00019999622998886327, - "loss": 46.0, - "step": 36161 - }, - { - "epoch": 2.7648374333390677, - "grad_norm": 0.001226059626787901, - "learning_rate": 0.00019999622978029147, - "loss": 46.0, - "step": 36162 - }, - { - "epoch": 2.7649138903224575, - "grad_norm": 0.008969691582024097, - "learning_rate": 0.00019999622957171388, - "loss": 46.0, - "step": 36163 - }, - { - "epoch": 2.7649903473058473, - "grad_norm": 0.0032498487271368504, - "learning_rate": 0.00019999622936313057, - "loss": 46.0, - "step": 36164 - }, - { - "epoch": 2.765066804289237, - "grad_norm": 0.0022688761819154024, - "learning_rate": 0.00019999622915454145, - "loss": 46.0, - "step": 36165 - }, - { - "epoch": 2.7651432612726263, - "grad_norm": 0.0025000951718539, - "learning_rate": 0.0001999962289459466, - "loss": 46.0, - "step": 36166 - }, - { - "epoch": 2.765219718256016, - "grad_norm": 0.0007303148158825934, - "learning_rate": 0.00019999622873734593, - "loss": 46.0, - "step": 36167 - }, - { - "epoch": 2.765296175239406, - "grad_norm": 0.007277305703610182, - "learning_rate": 0.00019999622852873953, - "loss": 46.0, - "step": 36168 - }, - { - "epoch": 2.7653726322227956, - "grad_norm": 0.0010990467853844166, - "learning_rate": 0.00019999622832012732, - "loss": 46.0, - "step": 36169 - }, - { - "epoch": 2.7654490892061854, - "grad_norm": 0.0014773707371205091, - "learning_rate": 0.00019999622811150936, - "loss": 46.0, - "step": 36170 - }, - { - "epoch": 2.765525546189575, - "grad_norm": 0.003702755318954587, - "learning_rate": 0.00019999622790288566, - "loss": 46.0, - "step": 36171 - }, - { - "epoch": 2.765602003172965, - "grad_norm": 0.0007949652499519289, - "learning_rate": 0.00019999622769425614, - "loss": 46.0, - "step": 36172 - }, - { - "epoch": 2.7656784601563547, - "grad_norm": 0.001810187241062522, - "learning_rate": 0.0001999962274856209, - "loss": 46.0, - "step": 36173 - }, - { - "epoch": 2.7657549171397444, - "grad_norm": 0.0023992531932890415, - "learning_rate": 0.00019999622727697984, - "loss": 46.0, - "step": 36174 - }, - { - "epoch": 2.7658313741231337, - "grad_norm": 0.0015166165539994836, - "learning_rate": 0.00019999622706833305, - "loss": 46.0, - "step": 36175 - }, - { - "epoch": 2.7659078311065235, - "grad_norm": 0.002091736765578389, - "learning_rate": 0.00019999622685968048, - "loss": 46.0, - "step": 36176 - }, - { - "epoch": 2.7659842880899133, - "grad_norm": 0.0018164057983085513, - "learning_rate": 0.00019999622665102214, - "loss": 46.0, - "step": 36177 - }, - { - "epoch": 2.766060745073303, - "grad_norm": 0.0012643246445804834, - "learning_rate": 0.00019999622644235806, - "loss": 46.0, - "step": 36178 - }, - { - "epoch": 2.766137202056693, - "grad_norm": 0.0014303921489045024, - "learning_rate": 0.00019999622623368817, - "loss": 46.0, - "step": 36179 - }, - { - "epoch": 2.7662136590400825, - "grad_norm": 0.0027956566773355007, - "learning_rate": 0.0001999962260250125, - "loss": 46.0, - "step": 36180 - }, - { - "epoch": 2.7662901160234723, - "grad_norm": 0.0030597124714404345, - "learning_rate": 0.00019999622581633108, - "loss": 46.0, - "step": 36181 - }, - { - "epoch": 2.766366573006862, - "grad_norm": 0.0010386410867795348, - "learning_rate": 0.0001999962256076439, - "loss": 46.0, - "step": 36182 - }, - { - "epoch": 2.766443029990252, - "grad_norm": 0.005176815204322338, - "learning_rate": 0.00019999622539895094, - "loss": 46.0, - "step": 36183 - }, - { - "epoch": 2.7665194869736416, - "grad_norm": 0.002462107688188553, - "learning_rate": 0.00019999622519025222, - "loss": 46.0, - "step": 36184 - }, - { - "epoch": 2.7665959439570313, - "grad_norm": 0.005378553178161383, - "learning_rate": 0.00019999622498154774, - "loss": 46.0, - "step": 36185 - }, - { - "epoch": 2.766672400940421, - "grad_norm": 0.0006650412105955184, - "learning_rate": 0.00019999622477283747, - "loss": 46.0, - "step": 36186 - }, - { - "epoch": 2.766748857923811, - "grad_norm": 0.0008463840349577367, - "learning_rate": 0.00019999622456412145, - "loss": 46.0, - "step": 36187 - }, - { - "epoch": 2.7668253149072, - "grad_norm": 0.0014053566846996546, - "learning_rate": 0.00019999622435539963, - "loss": 46.0, - "step": 36188 - }, - { - "epoch": 2.76690177189059, - "grad_norm": 0.0012803409481421113, - "learning_rate": 0.00019999622414667206, - "loss": 46.0, - "step": 36189 - }, - { - "epoch": 2.7669782288739797, - "grad_norm": 0.0019033501157537103, - "learning_rate": 0.00019999622393793872, - "loss": 46.0, - "step": 36190 - }, - { - "epoch": 2.7670546858573695, - "grad_norm": 0.000809476594440639, - "learning_rate": 0.00019999622372919964, - "loss": 46.0, - "step": 36191 - }, - { - "epoch": 2.7671311428407592, - "grad_norm": 0.0021041552536189556, - "learning_rate": 0.00019999622352045475, - "loss": 46.0, - "step": 36192 - }, - { - "epoch": 2.767207599824149, - "grad_norm": 0.0007359951268881559, - "learning_rate": 0.0001999962233117041, - "loss": 46.0, - "step": 36193 - }, - { - "epoch": 2.7672840568075388, - "grad_norm": 0.0015482243616133928, - "learning_rate": 0.00019999622310294768, - "loss": 46.0, - "step": 36194 - }, - { - "epoch": 2.7673605137909285, - "grad_norm": 0.002026343485340476, - "learning_rate": 0.0001999962228941855, - "loss": 46.0, - "step": 36195 - }, - { - "epoch": 2.767436970774318, - "grad_norm": 0.0033258041366934776, - "learning_rate": 0.00019999622268541755, - "loss": 46.0, - "step": 36196 - }, - { - "epoch": 2.7675134277577076, - "grad_norm": 0.0007042661891318858, - "learning_rate": 0.0001999962224766438, - "loss": 46.0, - "step": 36197 - }, - { - "epoch": 2.7675898847410974, - "grad_norm": 0.0012359219836071134, - "learning_rate": 0.00019999622226786433, - "loss": 46.0, - "step": 36198 - }, - { - "epoch": 2.767666341724487, - "grad_norm": 0.001761345542035997, - "learning_rate": 0.00019999622205907905, - "loss": 46.0, - "step": 36199 - }, - { - "epoch": 2.767742798707877, - "grad_norm": 0.004379366058856249, - "learning_rate": 0.00019999622185028803, - "loss": 46.0, - "step": 36200 - }, - { - "epoch": 2.7678192556912666, - "grad_norm": 0.004129890818148851, - "learning_rate": 0.00019999622164149124, - "loss": 46.0, - "step": 36201 - }, - { - "epoch": 2.7678957126746564, - "grad_norm": 0.0027833811473101377, - "learning_rate": 0.00019999622143268865, - "loss": 46.0, - "step": 36202 - }, - { - "epoch": 2.767972169658046, - "grad_norm": 0.005369459744542837, - "learning_rate": 0.0001999962212238803, - "loss": 46.0, - "step": 36203 - }, - { - "epoch": 2.768048626641436, - "grad_norm": 0.005488489288836718, - "learning_rate": 0.00019999622101506622, - "loss": 46.0, - "step": 36204 - }, - { - "epoch": 2.7681250836248257, - "grad_norm": 0.0015503311296924949, - "learning_rate": 0.00019999622080624633, - "loss": 46.0, - "step": 36205 - }, - { - "epoch": 2.7682015406082154, - "grad_norm": 0.0011316556483507156, - "learning_rate": 0.00019999622059742068, - "loss": 46.0, - "step": 36206 - }, - { - "epoch": 2.768277997591605, - "grad_norm": 0.002888135612010956, - "learning_rate": 0.00019999622038858927, - "loss": 46.0, - "step": 36207 - }, - { - "epoch": 2.768354454574995, - "grad_norm": 0.0037156047765165567, - "learning_rate": 0.0001999962201797521, - "loss": 46.0, - "step": 36208 - }, - { - "epoch": 2.7684309115583847, - "grad_norm": 0.0015997167211025953, - "learning_rate": 0.0001999962199709091, - "loss": 46.0, - "step": 36209 - }, - { - "epoch": 2.768507368541774, - "grad_norm": 0.0008313936414197087, - "learning_rate": 0.0001999962197620604, - "loss": 46.0, - "step": 36210 - }, - { - "epoch": 2.768583825525164, - "grad_norm": 0.001955775311216712, - "learning_rate": 0.0001999962195532059, - "loss": 46.0, - "step": 36211 - }, - { - "epoch": 2.7686602825085536, - "grad_norm": 0.0029694768600165844, - "learning_rate": 0.00019999621934434564, - "loss": 46.0, - "step": 36212 - }, - { - "epoch": 2.7687367394919433, - "grad_norm": 0.0021423816215246916, - "learning_rate": 0.00019999621913547962, - "loss": 46.0, - "step": 36213 - }, - { - "epoch": 2.768813196475333, - "grad_norm": 0.0011938296956941485, - "learning_rate": 0.0001999962189266078, - "loss": 46.0, - "step": 36214 - }, - { - "epoch": 2.768889653458723, - "grad_norm": 0.0014556162059307098, - "learning_rate": 0.00019999621871773023, - "loss": 46.0, - "step": 36215 - }, - { - "epoch": 2.7689661104421126, - "grad_norm": 0.00334395794197917, - "learning_rate": 0.00019999621850884692, - "loss": 46.0, - "step": 36216 - }, - { - "epoch": 2.7690425674255024, - "grad_norm": 0.0022246367298066616, - "learning_rate": 0.00019999621829995778, - "loss": 46.0, - "step": 36217 - }, - { - "epoch": 2.7691190244088917, - "grad_norm": 0.002446898492053151, - "learning_rate": 0.00019999621809106292, - "loss": 46.0, - "step": 36218 - }, - { - "epoch": 2.7691954813922814, - "grad_norm": 0.002417887095361948, - "learning_rate": 0.00019999621788216226, - "loss": 46.0, - "step": 36219 - }, - { - "epoch": 2.769271938375671, - "grad_norm": 0.0012067490024492145, - "learning_rate": 0.00019999621767325586, - "loss": 46.0, - "step": 36220 - }, - { - "epoch": 2.769348395359061, - "grad_norm": 0.003124658018350601, - "learning_rate": 0.00019999621746434368, - "loss": 46.0, - "step": 36221 - }, - { - "epoch": 2.7694248523424507, - "grad_norm": 0.006745269522070885, - "learning_rate": 0.00019999621725542573, - "loss": 46.0, - "step": 36222 - }, - { - "epoch": 2.7695013093258405, - "grad_norm": 0.0035063677933067083, - "learning_rate": 0.000199996217046502, - "loss": 46.0, - "step": 36223 - }, - { - "epoch": 2.7695777663092302, - "grad_norm": 0.003929018508642912, - "learning_rate": 0.0001999962168375725, - "loss": 46.0, - "step": 36224 - }, - { - "epoch": 2.76965422329262, - "grad_norm": 0.0029506676364690065, - "learning_rate": 0.00019999621662863723, - "loss": 46.0, - "step": 36225 - }, - { - "epoch": 2.7697306802760098, - "grad_norm": 0.0023165347520262003, - "learning_rate": 0.00019999621641969618, - "loss": 46.0, - "step": 36226 - }, - { - "epoch": 2.7698071372593995, - "grad_norm": 0.002396000549197197, - "learning_rate": 0.00019999621621074936, - "loss": 46.0, - "step": 36227 - }, - { - "epoch": 2.7698835942427893, - "grad_norm": 0.001416377373971045, - "learning_rate": 0.00019999621600179683, - "loss": 46.0, - "step": 36228 - }, - { - "epoch": 2.769960051226179, - "grad_norm": 0.0013749051140621305, - "learning_rate": 0.00019999621579283846, - "loss": 46.0, - "step": 36229 - }, - { - "epoch": 2.770036508209569, - "grad_norm": 0.002663565333932638, - "learning_rate": 0.00019999621558387438, - "loss": 46.0, - "step": 36230 - }, - { - "epoch": 2.7701129651929586, - "grad_norm": 0.0009916143026202917, - "learning_rate": 0.0001999962153749045, - "loss": 46.0, - "step": 36231 - }, - { - "epoch": 2.770189422176348, - "grad_norm": 0.0006273167091421783, - "learning_rate": 0.00019999621516592883, - "loss": 46.0, - "step": 36232 - }, - { - "epoch": 2.7702658791597377, - "grad_norm": 0.007817982695996761, - "learning_rate": 0.00019999621495694743, - "loss": 46.0, - "step": 36233 - }, - { - "epoch": 2.7703423361431274, - "grad_norm": 0.0025522843934595585, - "learning_rate": 0.00019999621474796025, - "loss": 46.0, - "step": 36234 - }, - { - "epoch": 2.770418793126517, - "grad_norm": 0.0009370943298563361, - "learning_rate": 0.0001999962145389673, - "loss": 46.0, - "step": 36235 - }, - { - "epoch": 2.770495250109907, - "grad_norm": 0.002330845221877098, - "learning_rate": 0.00019999621432996857, - "loss": 46.0, - "step": 36236 - }, - { - "epoch": 2.7705717070932967, - "grad_norm": 0.004805863369256258, - "learning_rate": 0.00019999621412096407, - "loss": 46.0, - "step": 36237 - }, - { - "epoch": 2.7706481640766865, - "grad_norm": 0.0023467671126127243, - "learning_rate": 0.00019999621391195382, - "loss": 46.0, - "step": 36238 - }, - { - "epoch": 2.770724621060076, - "grad_norm": 0.0021957475692033768, - "learning_rate": 0.00019999621370293778, - "loss": 46.0, - "step": 36239 - }, - { - "epoch": 2.7708010780434655, - "grad_norm": 0.004642194602638483, - "learning_rate": 0.00019999621349391596, - "loss": 46.0, - "step": 36240 - }, - { - "epoch": 2.7708775350268553, - "grad_norm": 0.0004261403810232878, - "learning_rate": 0.0001999962132848884, - "loss": 46.0, - "step": 36241 - }, - { - "epoch": 2.770953992010245, - "grad_norm": 0.005633929278701544, - "learning_rate": 0.00019999621307585508, - "loss": 46.0, - "step": 36242 - }, - { - "epoch": 2.771030448993635, - "grad_norm": 0.0016725407913327217, - "learning_rate": 0.00019999621286681594, - "loss": 46.0, - "step": 36243 - }, - { - "epoch": 2.7711069059770246, - "grad_norm": 0.002157253213226795, - "learning_rate": 0.00019999621265777106, - "loss": 46.0, - "step": 36244 - }, - { - "epoch": 2.7711833629604143, - "grad_norm": 0.002128210850059986, - "learning_rate": 0.00019999621244872043, - "loss": 46.0, - "step": 36245 - }, - { - "epoch": 2.771259819943804, - "grad_norm": 0.0013361454475671053, - "learning_rate": 0.000199996212239664, - "loss": 46.0, - "step": 36246 - }, - { - "epoch": 2.771336276927194, - "grad_norm": 0.002243607770651579, - "learning_rate": 0.00019999621203060182, - "loss": 46.0, - "step": 36247 - }, - { - "epoch": 2.7714127339105836, - "grad_norm": 0.0027922396548092365, - "learning_rate": 0.00019999621182153387, - "loss": 46.0, - "step": 36248 - }, - { - "epoch": 2.7714891908939734, - "grad_norm": 0.003739304607734084, - "learning_rate": 0.00019999621161246017, - "loss": 46.0, - "step": 36249 - }, - { - "epoch": 2.771565647877363, - "grad_norm": 0.0011978268157690763, - "learning_rate": 0.00019999621140338064, - "loss": 46.0, - "step": 36250 - }, - { - "epoch": 2.771642104860753, - "grad_norm": 0.002482454990968108, - "learning_rate": 0.0001999962111942954, - "loss": 46.0, - "step": 36251 - }, - { - "epoch": 2.7717185618441427, - "grad_norm": 0.0015692224260419607, - "learning_rate": 0.00019999621098520435, - "loss": 46.0, - "step": 36252 - }, - { - "epoch": 2.7717950188275324, - "grad_norm": 0.002782933646813035, - "learning_rate": 0.00019999621077610756, - "loss": 46.0, - "step": 36253 - }, - { - "epoch": 2.7718714758109217, - "grad_norm": 0.001485670218244195, - "learning_rate": 0.00019999621056700497, - "loss": 46.0, - "step": 36254 - }, - { - "epoch": 2.7719479327943115, - "grad_norm": 0.0019724643789231777, - "learning_rate": 0.00019999621035789663, - "loss": 46.0, - "step": 36255 - }, - { - "epoch": 2.7720243897777013, - "grad_norm": 0.005401813890784979, - "learning_rate": 0.00019999621014878255, - "loss": 46.0, - "step": 36256 - }, - { - "epoch": 2.772100846761091, - "grad_norm": 0.0036992630921304226, - "learning_rate": 0.00019999620993966266, - "loss": 46.0, - "step": 36257 - }, - { - "epoch": 2.772177303744481, - "grad_norm": 0.0020485948771238327, - "learning_rate": 0.000199996209730537, - "loss": 46.0, - "step": 36258 - }, - { - "epoch": 2.7722537607278706, - "grad_norm": 0.0007092243176884949, - "learning_rate": 0.0001999962095214056, - "loss": 46.0, - "step": 36259 - }, - { - "epoch": 2.7723302177112603, - "grad_norm": 0.0016095779137685895, - "learning_rate": 0.0001999962093122684, - "loss": 46.0, - "step": 36260 - }, - { - "epoch": 2.77240667469465, - "grad_norm": 0.0028140642680227757, - "learning_rate": 0.00019999620910312547, - "loss": 46.0, - "step": 36261 - }, - { - "epoch": 2.7724831316780394, - "grad_norm": 0.001295390771701932, - "learning_rate": 0.00019999620889397672, - "loss": 46.0, - "step": 36262 - }, - { - "epoch": 2.772559588661429, - "grad_norm": 0.007932894863188267, - "learning_rate": 0.00019999620868482222, - "loss": 46.0, - "step": 36263 - }, - { - "epoch": 2.772636045644819, - "grad_norm": 0.0014076223596930504, - "learning_rate": 0.00019999620847566197, - "loss": 46.0, - "step": 36264 - }, - { - "epoch": 2.7727125026282087, - "grad_norm": 0.0055397008545696735, - "learning_rate": 0.00019999620826649596, - "loss": 46.0, - "step": 36265 - }, - { - "epoch": 2.7727889596115984, - "grad_norm": 0.003996656276285648, - "learning_rate": 0.00019999620805732414, - "loss": 46.0, - "step": 36266 - }, - { - "epoch": 2.772865416594988, - "grad_norm": 0.004375852644443512, - "learning_rate": 0.00019999620784814657, - "loss": 46.0, - "step": 36267 - }, - { - "epoch": 2.772941873578378, - "grad_norm": 0.0020823837257921696, - "learning_rate": 0.00019999620763896323, - "loss": 46.0, - "step": 36268 - }, - { - "epoch": 2.7730183305617677, - "grad_norm": 0.001259038457646966, - "learning_rate": 0.00019999620742977412, - "loss": 46.0, - "step": 36269 - }, - { - "epoch": 2.7730947875451575, - "grad_norm": 0.001441759755834937, - "learning_rate": 0.00019999620722057924, - "loss": 46.0, - "step": 36270 - }, - { - "epoch": 2.7731712445285472, - "grad_norm": 0.00216677226126194, - "learning_rate": 0.0001999962070113786, - "loss": 46.0, - "step": 36271 - }, - { - "epoch": 2.773247701511937, - "grad_norm": 0.0010187334846705198, - "learning_rate": 0.00019999620680217218, - "loss": 46.0, - "step": 36272 - }, - { - "epoch": 2.7733241584953268, - "grad_norm": 0.0012521959142759442, - "learning_rate": 0.00019999620659296, - "loss": 46.0, - "step": 36273 - }, - { - "epoch": 2.7734006154787165, - "grad_norm": 0.007458158303052187, - "learning_rate": 0.00019999620638374205, - "loss": 46.0, - "step": 36274 - }, - { - "epoch": 2.773477072462106, - "grad_norm": 0.0010355290723964572, - "learning_rate": 0.0001999962061745183, - "loss": 46.0, - "step": 36275 - }, - { - "epoch": 2.7735535294454956, - "grad_norm": 0.002079756697639823, - "learning_rate": 0.0001999962059652888, - "loss": 46.0, - "step": 36276 - }, - { - "epoch": 2.7736299864288854, - "grad_norm": 0.003104130271822214, - "learning_rate": 0.00019999620575605355, - "loss": 46.0, - "step": 36277 - }, - { - "epoch": 2.773706443412275, - "grad_norm": 0.0018078532302752137, - "learning_rate": 0.00019999620554681254, - "loss": 46.0, - "step": 36278 - }, - { - "epoch": 2.773782900395665, - "grad_norm": 0.0025641294196248055, - "learning_rate": 0.00019999620533756572, - "loss": 46.0, - "step": 36279 - }, - { - "epoch": 2.7738593573790546, - "grad_norm": 0.004777280148118734, - "learning_rate": 0.00019999620512831315, - "loss": 46.0, - "step": 36280 - }, - { - "epoch": 2.7739358143624444, - "grad_norm": 0.0016596839996054769, - "learning_rate": 0.00019999620491905482, - "loss": 46.0, - "step": 36281 - }, - { - "epoch": 2.774012271345834, - "grad_norm": 0.0017896078061312437, - "learning_rate": 0.0001999962047097907, - "loss": 46.0, - "step": 36282 - }, - { - "epoch": 2.774088728329224, - "grad_norm": 0.0012772009940817952, - "learning_rate": 0.00019999620450052082, - "loss": 46.0, - "step": 36283 - }, - { - "epoch": 2.7741651853126132, - "grad_norm": 0.002763568190857768, - "learning_rate": 0.00019999620429124516, - "loss": 46.0, - "step": 36284 - }, - { - "epoch": 2.774241642296003, - "grad_norm": 0.003044476266950369, - "learning_rate": 0.00019999620408196376, - "loss": 46.0, - "step": 36285 - }, - { - "epoch": 2.7743180992793928, - "grad_norm": 0.0017673844704404473, - "learning_rate": 0.00019999620387267658, - "loss": 46.0, - "step": 36286 - }, - { - "epoch": 2.7743945562627825, - "grad_norm": 0.0011492582270875573, - "learning_rate": 0.0001999962036633836, - "loss": 46.0, - "step": 36287 - }, - { - "epoch": 2.7744710132461723, - "grad_norm": 0.0019949048291891813, - "learning_rate": 0.0001999962034540849, - "loss": 46.0, - "step": 36288 - }, - { - "epoch": 2.774547470229562, - "grad_norm": 0.003000054508447647, - "learning_rate": 0.00019999620324478038, - "loss": 46.0, - "step": 36289 - }, - { - "epoch": 2.774623927212952, - "grad_norm": 0.0009036542032845318, - "learning_rate": 0.00019999620303547014, - "loss": 46.0, - "step": 36290 - }, - { - "epoch": 2.7747003841963416, - "grad_norm": 0.003439062973484397, - "learning_rate": 0.0001999962028261541, - "loss": 46.0, - "step": 36291 - }, - { - "epoch": 2.7747768411797313, - "grad_norm": 0.0027141759637743235, - "learning_rate": 0.0001999962026168323, - "loss": 46.0, - "step": 36292 - }, - { - "epoch": 2.774853298163121, - "grad_norm": 0.001287938212044537, - "learning_rate": 0.0001999962024075047, - "loss": 46.0, - "step": 36293 - }, - { - "epoch": 2.774929755146511, - "grad_norm": 0.003848046064376831, - "learning_rate": 0.0001999962021981714, - "loss": 46.0, - "step": 36294 - }, - { - "epoch": 2.7750062121299006, - "grad_norm": 0.0013844300992786884, - "learning_rate": 0.00019999620198883226, - "loss": 46.0, - "step": 36295 - }, - { - "epoch": 2.7750826691132904, - "grad_norm": 0.0021978113800287247, - "learning_rate": 0.00019999620177948738, - "loss": 46.0, - "step": 36296 - }, - { - "epoch": 2.7751591260966797, - "grad_norm": 0.0028412803076207638, - "learning_rate": 0.00019999620157013675, - "loss": 46.0, - "step": 36297 - }, - { - "epoch": 2.7752355830800695, - "grad_norm": 0.0024475762620568275, - "learning_rate": 0.00019999620136078035, - "loss": 46.0, - "step": 36298 - }, - { - "epoch": 2.775312040063459, - "grad_norm": 0.003025034675374627, - "learning_rate": 0.00019999620115141814, - "loss": 46.0, - "step": 36299 - }, - { - "epoch": 2.775388497046849, - "grad_norm": 0.002512165578082204, - "learning_rate": 0.0001999962009420502, - "loss": 46.0, - "step": 36300 - }, - { - "epoch": 2.7754649540302387, - "grad_norm": 0.002844077069312334, - "learning_rate": 0.00019999620073267647, - "loss": 46.0, - "step": 36301 - }, - { - "epoch": 2.7755414110136285, - "grad_norm": 0.0025078949984163046, - "learning_rate": 0.00019999620052329697, - "loss": 46.0, - "step": 36302 - }, - { - "epoch": 2.7756178679970183, - "grad_norm": 0.0013857072917744517, - "learning_rate": 0.0001999962003139117, - "loss": 46.0, - "step": 36303 - }, - { - "epoch": 2.775694324980408, - "grad_norm": 0.0011191281955689192, - "learning_rate": 0.00019999620010452068, - "loss": 46.0, - "step": 36304 - }, - { - "epoch": 2.775770781963798, - "grad_norm": 0.002449200488626957, - "learning_rate": 0.00019999619989512386, - "loss": 46.0, - "step": 36305 - }, - { - "epoch": 2.775847238947187, - "grad_norm": 0.006454708520323038, - "learning_rate": 0.0001999961996857213, - "loss": 46.0, - "step": 36306 - }, - { - "epoch": 2.775923695930577, - "grad_norm": 0.0035495825577527285, - "learning_rate": 0.00019999619947631296, - "loss": 46.0, - "step": 36307 - }, - { - "epoch": 2.7760001529139666, - "grad_norm": 0.003855351125821471, - "learning_rate": 0.00019999619926689885, - "loss": 46.0, - "step": 36308 - }, - { - "epoch": 2.7760766098973564, - "grad_norm": 0.006759051699191332, - "learning_rate": 0.00019999619905747897, - "loss": 46.0, - "step": 36309 - }, - { - "epoch": 2.776153066880746, - "grad_norm": 0.0027375759091228247, - "learning_rate": 0.00019999619884805334, - "loss": 46.0, - "step": 36310 - }, - { - "epoch": 2.776229523864136, - "grad_norm": 0.0016428364906460047, - "learning_rate": 0.0001999961986386219, - "loss": 46.0, - "step": 36311 - }, - { - "epoch": 2.7763059808475257, - "grad_norm": 0.003984153736382723, - "learning_rate": 0.0001999961984291847, - "loss": 46.0, - "step": 36312 - }, - { - "epoch": 2.7763824378309154, - "grad_norm": 0.0033783495891839266, - "learning_rate": 0.00019999619821974176, - "loss": 46.0, - "step": 36313 - }, - { - "epoch": 2.776458894814305, - "grad_norm": 0.0026289864908903837, - "learning_rate": 0.00019999619801029303, - "loss": 46.0, - "step": 36314 - }, - { - "epoch": 2.776535351797695, - "grad_norm": 0.002158515388146043, - "learning_rate": 0.00019999619780083853, - "loss": 46.0, - "step": 36315 - }, - { - "epoch": 2.7766118087810847, - "grad_norm": 0.0044413902796804905, - "learning_rate": 0.0001999961975913783, - "loss": 46.0, - "step": 36316 - }, - { - "epoch": 2.7766882657644745, - "grad_norm": 0.0013291488867253065, - "learning_rate": 0.00019999619738191227, - "loss": 46.0, - "step": 36317 - }, - { - "epoch": 2.7767647227478642, - "grad_norm": 0.0017508446471765637, - "learning_rate": 0.00019999619717244046, - "loss": 46.0, - "step": 36318 - }, - { - "epoch": 2.7768411797312536, - "grad_norm": 0.0025974586606025696, - "learning_rate": 0.00019999619696296287, - "loss": 46.0, - "step": 36319 - }, - { - "epoch": 2.7769176367146433, - "grad_norm": 0.0022220220416784286, - "learning_rate": 0.00019999619675347953, - "loss": 46.0, - "step": 36320 - }, - { - "epoch": 2.776994093698033, - "grad_norm": 0.0022402596659958363, - "learning_rate": 0.00019999619654399045, - "loss": 46.0, - "step": 36321 - }, - { - "epoch": 2.777070550681423, - "grad_norm": 0.00602367939427495, - "learning_rate": 0.00019999619633449554, - "loss": 46.0, - "step": 36322 - }, - { - "epoch": 2.7771470076648126, - "grad_norm": 0.0013270715717226267, - "learning_rate": 0.0001999961961249949, - "loss": 46.0, - "step": 36323 - }, - { - "epoch": 2.7772234646482024, - "grad_norm": 0.0033610500395298004, - "learning_rate": 0.00019999619591548848, - "loss": 46.0, - "step": 36324 - }, - { - "epoch": 2.777299921631592, - "grad_norm": 0.0031422311440110207, - "learning_rate": 0.0001999961957059763, - "loss": 46.0, - "step": 36325 - }, - { - "epoch": 2.777376378614982, - "grad_norm": 0.002408050699159503, - "learning_rate": 0.00019999619549645835, - "loss": 46.0, - "step": 36326 - }, - { - "epoch": 2.777452835598371, - "grad_norm": 0.0022337448317557573, - "learning_rate": 0.0001999961952869346, - "loss": 46.0, - "step": 36327 - }, - { - "epoch": 2.777529292581761, - "grad_norm": 0.0023750371765345335, - "learning_rate": 0.00019999619507740513, - "loss": 46.0, - "step": 36328 - }, - { - "epoch": 2.7776057495651507, - "grad_norm": 0.0017480048118159175, - "learning_rate": 0.00019999619486786986, - "loss": 46.0, - "step": 36329 - }, - { - "epoch": 2.7776822065485405, - "grad_norm": 0.00305254478007555, - "learning_rate": 0.00019999619465832882, - "loss": 46.0, - "step": 36330 - }, - { - "epoch": 2.7777586635319302, - "grad_norm": 0.0008896057843230665, - "learning_rate": 0.00019999619444878203, - "loss": 46.0, - "step": 36331 - }, - { - "epoch": 2.77783512051532, - "grad_norm": 0.0029970339965075254, - "learning_rate": 0.00019999619423922944, - "loss": 46.0, - "step": 36332 - }, - { - "epoch": 2.7779115774987098, - "grad_norm": 0.002943855244666338, - "learning_rate": 0.0001999961940296711, - "loss": 46.0, - "step": 36333 - }, - { - "epoch": 2.7779880344820995, - "grad_norm": 0.002170650754123926, - "learning_rate": 0.00019999619382010702, - "loss": 46.0, - "step": 36334 - }, - { - "epoch": 2.7780644914654893, - "grad_norm": 0.000411079527111724, - "learning_rate": 0.00019999619361053714, - "loss": 46.0, - "step": 36335 - }, - { - "epoch": 2.778140948448879, - "grad_norm": 0.003213879419490695, - "learning_rate": 0.00019999619340096148, - "loss": 46.0, - "step": 36336 - }, - { - "epoch": 2.778217405432269, - "grad_norm": 0.004092518240213394, - "learning_rate": 0.00019999619319138005, - "loss": 46.0, - "step": 36337 - }, - { - "epoch": 2.7782938624156586, - "grad_norm": 0.0019328441703692079, - "learning_rate": 0.00019999619298179287, - "loss": 46.0, - "step": 36338 - }, - { - "epoch": 2.7783703193990483, - "grad_norm": 0.0021066172048449516, - "learning_rate": 0.00019999619277219992, - "loss": 46.0, - "step": 36339 - }, - { - "epoch": 2.778446776382438, - "grad_norm": 0.0015207099495455623, - "learning_rate": 0.00019999619256260117, - "loss": 46.0, - "step": 36340 - }, - { - "epoch": 2.7785232333658274, - "grad_norm": 0.0011930682230740786, - "learning_rate": 0.0001999961923529967, - "loss": 46.0, - "step": 36341 - }, - { - "epoch": 2.778599690349217, - "grad_norm": 0.0009223195957019925, - "learning_rate": 0.00019999619214338643, - "loss": 46.0, - "step": 36342 - }, - { - "epoch": 2.778676147332607, - "grad_norm": 0.0011909353779628873, - "learning_rate": 0.00019999619193377042, - "loss": 46.0, - "step": 36343 - }, - { - "epoch": 2.7787526043159967, - "grad_norm": 0.0025372852105647326, - "learning_rate": 0.00019999619172414858, - "loss": 46.0, - "step": 36344 - }, - { - "epoch": 2.7788290612993864, - "grad_norm": 0.0016447481466457248, - "learning_rate": 0.000199996191514521, - "loss": 46.0, - "step": 36345 - }, - { - "epoch": 2.778905518282776, - "grad_norm": 0.0015572209376841784, - "learning_rate": 0.0001999961913048877, - "loss": 46.0, - "step": 36346 - }, - { - "epoch": 2.778981975266166, - "grad_norm": 0.0010300199501216412, - "learning_rate": 0.0001999961910952486, - "loss": 46.0, - "step": 36347 - }, - { - "epoch": 2.7790584322495557, - "grad_norm": 0.0009643419180065393, - "learning_rate": 0.0001999961908856037, - "loss": 46.0, - "step": 36348 - }, - { - "epoch": 2.779134889232945, - "grad_norm": 0.0009546820656396449, - "learning_rate": 0.00019999619067595305, - "loss": 46.0, - "step": 36349 - }, - { - "epoch": 2.779211346216335, - "grad_norm": 0.0013749372446909547, - "learning_rate": 0.00019999619046629663, - "loss": 46.0, - "step": 36350 - }, - { - "epoch": 2.7792878031997246, - "grad_norm": 0.004427598789334297, - "learning_rate": 0.00019999619025663445, - "loss": 46.0, - "step": 36351 - }, - { - "epoch": 2.7793642601831143, - "grad_norm": 0.0016899374313652515, - "learning_rate": 0.0001999961900469665, - "loss": 46.0, - "step": 36352 - }, - { - "epoch": 2.779440717166504, - "grad_norm": 0.0013406631769612432, - "learning_rate": 0.00019999618983729275, - "loss": 46.0, - "step": 36353 - }, - { - "epoch": 2.779517174149894, - "grad_norm": 0.0010044530499726534, - "learning_rate": 0.00019999618962761328, - "loss": 46.0, - "step": 36354 - }, - { - "epoch": 2.7795936311332836, - "grad_norm": 0.0012139335740357637, - "learning_rate": 0.000199996189417928, - "loss": 46.0, - "step": 36355 - }, - { - "epoch": 2.7796700881166734, - "grad_norm": 0.0035704125184565783, - "learning_rate": 0.00019999618920823697, - "loss": 46.0, - "step": 36356 - }, - { - "epoch": 2.779746545100063, - "grad_norm": 0.008283907547593117, - "learning_rate": 0.00019999618899854018, - "loss": 46.0, - "step": 36357 - }, - { - "epoch": 2.779823002083453, - "grad_norm": 0.0021186447702348232, - "learning_rate": 0.00019999618878883762, - "loss": 46.0, - "step": 36358 - }, - { - "epoch": 2.7798994590668427, - "grad_norm": 0.006470333784818649, - "learning_rate": 0.00019999618857912928, - "loss": 46.0, - "step": 36359 - }, - { - "epoch": 2.7799759160502324, - "grad_norm": 0.006633661687374115, - "learning_rate": 0.00019999618836941517, - "loss": 46.0, - "step": 36360 - }, - { - "epoch": 2.780052373033622, - "grad_norm": 0.0023193766828626394, - "learning_rate": 0.00019999618815969526, - "loss": 46.0, - "step": 36361 - }, - { - "epoch": 2.780128830017012, - "grad_norm": 0.0008143925806507468, - "learning_rate": 0.0001999961879499696, - "loss": 46.0, - "step": 36362 - }, - { - "epoch": 2.7802052870004013, - "grad_norm": 0.00410993630066514, - "learning_rate": 0.0001999961877402382, - "loss": 46.0, - "step": 36363 - }, - { - "epoch": 2.780281743983791, - "grad_norm": 0.00489655788987875, - "learning_rate": 0.00019999618753050103, - "loss": 46.0, - "step": 36364 - }, - { - "epoch": 2.780358200967181, - "grad_norm": 0.003732833778485656, - "learning_rate": 0.00019999618732075805, - "loss": 46.0, - "step": 36365 - }, - { - "epoch": 2.7804346579505705, - "grad_norm": 0.0021146514918655157, - "learning_rate": 0.00019999618711100933, - "loss": 46.0, - "step": 36366 - }, - { - "epoch": 2.7805111149339603, - "grad_norm": 0.0016313346568495035, - "learning_rate": 0.00019999618690125486, - "loss": 46.0, - "step": 36367 - }, - { - "epoch": 2.78058757191735, - "grad_norm": 0.0056966692209243774, - "learning_rate": 0.00019999618669149457, - "loss": 46.0, - "step": 36368 - }, - { - "epoch": 2.78066402890074, - "grad_norm": 0.002248887438327074, - "learning_rate": 0.00019999618648172855, - "loss": 46.0, - "step": 36369 - }, - { - "epoch": 2.7807404858841296, - "grad_norm": 0.0024897693656384945, - "learning_rate": 0.00019999618627195676, - "loss": 46.0, - "step": 36370 - }, - { - "epoch": 2.780816942867519, - "grad_norm": 0.001384552102535963, - "learning_rate": 0.00019999618606217917, - "loss": 46.0, - "step": 36371 - }, - { - "epoch": 2.7808933998509087, - "grad_norm": 0.0013797096908092499, - "learning_rate": 0.00019999618585239584, - "loss": 46.0, - "step": 36372 - }, - { - "epoch": 2.7809698568342984, - "grad_norm": 0.0019975188188254833, - "learning_rate": 0.00019999618564260673, - "loss": 46.0, - "step": 36373 - }, - { - "epoch": 2.781046313817688, - "grad_norm": 0.003750791074708104, - "learning_rate": 0.00019999618543281185, - "loss": 46.0, - "step": 36374 - }, - { - "epoch": 2.781122770801078, - "grad_norm": 0.0025771965738385916, - "learning_rate": 0.0001999961852230112, - "loss": 46.0, - "step": 36375 - }, - { - "epoch": 2.7811992277844677, - "grad_norm": 0.0018562711775302887, - "learning_rate": 0.0001999961850132048, - "loss": 46.0, - "step": 36376 - }, - { - "epoch": 2.7812756847678575, - "grad_norm": 0.0020029630977660418, - "learning_rate": 0.0001999961848033926, - "loss": 46.0, - "step": 36377 - }, - { - "epoch": 2.7813521417512472, - "grad_norm": 0.001481458661146462, - "learning_rate": 0.00019999618459357464, - "loss": 46.0, - "step": 36378 - }, - { - "epoch": 2.781428598734637, - "grad_norm": 0.0021441939752548933, - "learning_rate": 0.00019999618438375092, - "loss": 46.0, - "step": 36379 - }, - { - "epoch": 2.7815050557180268, - "grad_norm": 0.0022772380616515875, - "learning_rate": 0.00019999618417392142, - "loss": 46.0, - "step": 36380 - }, - { - "epoch": 2.7815815127014165, - "grad_norm": 0.003487536683678627, - "learning_rate": 0.00019999618396408618, - "loss": 46.0, - "step": 36381 - }, - { - "epoch": 2.7816579696848063, - "grad_norm": 0.0007609502063132823, - "learning_rate": 0.00019999618375424514, - "loss": 46.0, - "step": 36382 - }, - { - "epoch": 2.781734426668196, - "grad_norm": 0.002699241740629077, - "learning_rate": 0.00019999618354439832, - "loss": 46.0, - "step": 36383 - }, - { - "epoch": 2.781810883651586, - "grad_norm": 0.0015332651091739535, - "learning_rate": 0.00019999618333454576, - "loss": 46.0, - "step": 36384 - }, - { - "epoch": 2.781887340634975, - "grad_norm": 0.0027881485875695944, - "learning_rate": 0.0001999961831246874, - "loss": 46.0, - "step": 36385 - }, - { - "epoch": 2.781963797618365, - "grad_norm": 0.000976945273578167, - "learning_rate": 0.00019999618291482332, - "loss": 46.0, - "step": 36386 - }, - { - "epoch": 2.7820402546017546, - "grad_norm": 0.002280328655615449, - "learning_rate": 0.0001999961827049534, - "loss": 46.0, - "step": 36387 - }, - { - "epoch": 2.7821167115851444, - "grad_norm": 0.0009409145568497479, - "learning_rate": 0.00019999618249507778, - "loss": 46.0, - "step": 36388 - }, - { - "epoch": 2.782193168568534, - "grad_norm": 0.0022007119841873646, - "learning_rate": 0.00019999618228519635, - "loss": 46.0, - "step": 36389 - }, - { - "epoch": 2.782269625551924, - "grad_norm": 0.0029440466314554214, - "learning_rate": 0.00019999618207530918, - "loss": 46.0, - "step": 36390 - }, - { - "epoch": 2.7823460825353137, - "grad_norm": 0.0009036447736434639, - "learning_rate": 0.00019999618186541623, - "loss": 46.0, - "step": 36391 - }, - { - "epoch": 2.7824225395187034, - "grad_norm": 0.0019512152066454291, - "learning_rate": 0.0001999961816555175, - "loss": 46.0, - "step": 36392 - }, - { - "epoch": 2.7824989965020928, - "grad_norm": 0.0009316488867625594, - "learning_rate": 0.000199996181445613, - "loss": 46.0, - "step": 36393 - }, - { - "epoch": 2.7825754534854825, - "grad_norm": 0.0027817185036838055, - "learning_rate": 0.00019999618123570274, - "loss": 46.0, - "step": 36394 - }, - { - "epoch": 2.7826519104688723, - "grad_norm": 0.001116726896725595, - "learning_rate": 0.0001999961810257867, - "loss": 46.0, - "step": 36395 - }, - { - "epoch": 2.782728367452262, - "grad_norm": 0.003959340043365955, - "learning_rate": 0.00019999618081586489, - "loss": 46.0, - "step": 36396 - }, - { - "epoch": 2.782804824435652, - "grad_norm": 0.002118962351232767, - "learning_rate": 0.00019999618060593732, - "loss": 46.0, - "step": 36397 - }, - { - "epoch": 2.7828812814190416, - "grad_norm": 0.0013942746445536613, - "learning_rate": 0.000199996180396004, - "loss": 46.0, - "step": 36398 - }, - { - "epoch": 2.7829577384024313, - "grad_norm": 0.0030810667667537928, - "learning_rate": 0.00019999618018606488, - "loss": 46.0, - "step": 36399 - }, - { - "epoch": 2.783034195385821, - "grad_norm": 0.007912750355899334, - "learning_rate": 0.00019999617997612, - "loss": 46.0, - "step": 36400 - }, - { - "epoch": 2.783110652369211, - "grad_norm": 0.009619493968784809, - "learning_rate": 0.00019999617976616934, - "loss": 46.0, - "step": 36401 - }, - { - "epoch": 2.7831871093526006, - "grad_norm": 0.0014583384618163109, - "learning_rate": 0.00019999617955621294, - "loss": 46.0, - "step": 36402 - }, - { - "epoch": 2.7832635663359904, - "grad_norm": 0.0037126613315194845, - "learning_rate": 0.00019999617934625074, - "loss": 46.0, - "step": 36403 - }, - { - "epoch": 2.78334002331938, - "grad_norm": 0.0033482497092336416, - "learning_rate": 0.0001999961791362828, - "loss": 46.0, - "step": 36404 - }, - { - "epoch": 2.78341648030277, - "grad_norm": 0.0044174217619001865, - "learning_rate": 0.00019999617892630907, - "loss": 46.0, - "step": 36405 - }, - { - "epoch": 2.7834929372861597, - "grad_norm": 0.0028654313646256924, - "learning_rate": 0.00019999617871632958, - "loss": 46.0, - "step": 36406 - }, - { - "epoch": 2.783569394269549, - "grad_norm": 0.003379259957000613, - "learning_rate": 0.0001999961785063443, - "loss": 46.0, - "step": 36407 - }, - { - "epoch": 2.7836458512529387, - "grad_norm": 0.0035564000718295574, - "learning_rate": 0.00019999617829635327, - "loss": 46.0, - "step": 36408 - }, - { - "epoch": 2.7837223082363285, - "grad_norm": 0.0028411243110895157, - "learning_rate": 0.00019999617808635648, - "loss": 46.0, - "step": 36409 - }, - { - "epoch": 2.7837987652197183, - "grad_norm": 0.0036967976484447718, - "learning_rate": 0.0001999961778763539, - "loss": 46.0, - "step": 36410 - }, - { - "epoch": 2.783875222203108, - "grad_norm": 0.006522329058498144, - "learning_rate": 0.00019999617766634558, - "loss": 46.0, - "step": 36411 - }, - { - "epoch": 2.7839516791864978, - "grad_norm": 0.0036786424461752176, - "learning_rate": 0.00019999617745633142, - "loss": 46.0, - "step": 36412 - }, - { - "epoch": 2.7840281361698875, - "grad_norm": 0.0025744715239852667, - "learning_rate": 0.00019999617724631157, - "loss": 46.0, - "step": 36413 - }, - { - "epoch": 2.7841045931532773, - "grad_norm": 0.001146244234405458, - "learning_rate": 0.00019999617703628591, - "loss": 46.0, - "step": 36414 - }, - { - "epoch": 2.7841810501366666, - "grad_norm": 0.0016148522263392806, - "learning_rate": 0.0001999961768262545, - "loss": 46.0, - "step": 36415 - }, - { - "epoch": 2.7842575071200564, - "grad_norm": 0.005639056209474802, - "learning_rate": 0.0001999961766162173, - "loss": 46.0, - "step": 36416 - }, - { - "epoch": 2.784333964103446, - "grad_norm": 0.0010224903235211968, - "learning_rate": 0.00019999617640617436, - "loss": 46.0, - "step": 36417 - }, - { - "epoch": 2.784410421086836, - "grad_norm": 0.0015144539065659046, - "learning_rate": 0.00019999617619612562, - "loss": 46.0, - "step": 36418 - }, - { - "epoch": 2.7844868780702257, - "grad_norm": 0.0008064638241194189, - "learning_rate": 0.00019999617598607115, - "loss": 46.0, - "step": 36419 - }, - { - "epoch": 2.7845633350536154, - "grad_norm": 0.0031966788228601217, - "learning_rate": 0.00019999617577601088, - "loss": 46.0, - "step": 36420 - }, - { - "epoch": 2.784639792037005, - "grad_norm": 0.0011901513207703829, - "learning_rate": 0.00019999617556594484, - "loss": 46.0, - "step": 36421 - }, - { - "epoch": 2.784716249020395, - "grad_norm": 0.0008553629741072655, - "learning_rate": 0.00019999617535587305, - "loss": 46.0, - "step": 36422 - }, - { - "epoch": 2.7847927060037847, - "grad_norm": 0.0025794266257435083, - "learning_rate": 0.00019999617514579546, - "loss": 46.0, - "step": 36423 - }, - { - "epoch": 2.7848691629871745, - "grad_norm": 0.001204487169161439, - "learning_rate": 0.00019999617493571213, - "loss": 46.0, - "step": 36424 - }, - { - "epoch": 2.7849456199705642, - "grad_norm": 0.0024331745225936174, - "learning_rate": 0.00019999617472562302, - "loss": 46.0, - "step": 36425 - }, - { - "epoch": 2.785022076953954, - "grad_norm": 0.0017571276985108852, - "learning_rate": 0.00019999617451552814, - "loss": 46.0, - "step": 36426 - }, - { - "epoch": 2.7850985339373437, - "grad_norm": 0.0050315638072788715, - "learning_rate": 0.0001999961743054275, - "loss": 46.0, - "step": 36427 - }, - { - "epoch": 2.785174990920733, - "grad_norm": 0.0021945710759609938, - "learning_rate": 0.0001999961740953211, - "loss": 46.0, - "step": 36428 - }, - { - "epoch": 2.785251447904123, - "grad_norm": 0.0023964280262589455, - "learning_rate": 0.00019999617388520889, - "loss": 46.0, - "step": 36429 - }, - { - "epoch": 2.7853279048875126, - "grad_norm": 0.004022674169391394, - "learning_rate": 0.00019999617367509094, - "loss": 46.0, - "step": 36430 - }, - { - "epoch": 2.7854043618709023, - "grad_norm": 0.0017313695279881358, - "learning_rate": 0.00019999617346496722, - "loss": 46.0, - "step": 36431 - }, - { - "epoch": 2.785480818854292, - "grad_norm": 0.004610915202647448, - "learning_rate": 0.0001999961732548377, - "loss": 46.0, - "step": 36432 - }, - { - "epoch": 2.785557275837682, - "grad_norm": 0.0034621006343513727, - "learning_rate": 0.00019999617304470246, - "loss": 46.0, - "step": 36433 - }, - { - "epoch": 2.7856337328210716, - "grad_norm": 0.0037449817173182964, - "learning_rate": 0.00019999617283456142, - "loss": 46.0, - "step": 36434 - }, - { - "epoch": 2.7857101898044614, - "grad_norm": 0.0013526586117222905, - "learning_rate": 0.0001999961726244146, - "loss": 46.0, - "step": 36435 - }, - { - "epoch": 2.785786646787851, - "grad_norm": 0.002429828979074955, - "learning_rate": 0.00019999617241426204, - "loss": 46.0, - "step": 36436 - }, - { - "epoch": 2.7858631037712405, - "grad_norm": 0.0009661111980676651, - "learning_rate": 0.0001999961722041037, - "loss": 46.0, - "step": 36437 - }, - { - "epoch": 2.7859395607546302, - "grad_norm": 0.0012383927823975682, - "learning_rate": 0.0001999961719939396, - "loss": 46.0, - "step": 36438 - }, - { - "epoch": 2.78601601773802, - "grad_norm": 0.003857041010633111, - "learning_rate": 0.00019999617178376972, - "loss": 46.0, - "step": 36439 - }, - { - "epoch": 2.7860924747214098, - "grad_norm": 0.0017049004090949893, - "learning_rate": 0.00019999617157359406, - "loss": 46.0, - "step": 36440 - }, - { - "epoch": 2.7861689317047995, - "grad_norm": 0.0056803058832883835, - "learning_rate": 0.00019999617136341264, - "loss": 46.0, - "step": 36441 - }, - { - "epoch": 2.7862453886881893, - "grad_norm": 0.004347475245594978, - "learning_rate": 0.00019999617115322546, - "loss": 46.0, - "step": 36442 - }, - { - "epoch": 2.786321845671579, - "grad_norm": 0.0045688762329518795, - "learning_rate": 0.00019999617094303252, - "loss": 46.0, - "step": 36443 - }, - { - "epoch": 2.786398302654969, - "grad_norm": 0.0037312142085283995, - "learning_rate": 0.00019999617073283377, - "loss": 46.0, - "step": 36444 - }, - { - "epoch": 2.7864747596383586, - "grad_norm": 0.007425202056765556, - "learning_rate": 0.0001999961705226293, - "loss": 46.0, - "step": 36445 - }, - { - "epoch": 2.7865512166217483, - "grad_norm": 0.004221283830702305, - "learning_rate": 0.000199996170312419, - "loss": 46.0, - "step": 36446 - }, - { - "epoch": 2.786627673605138, - "grad_norm": 0.0018323225667700171, - "learning_rate": 0.00019999617010220297, - "loss": 46.0, - "step": 36447 - }, - { - "epoch": 2.786704130588528, - "grad_norm": 0.0020557355601340532, - "learning_rate": 0.00019999616989198118, - "loss": 46.0, - "step": 36448 - }, - { - "epoch": 2.7867805875719176, - "grad_norm": 0.006091445684432983, - "learning_rate": 0.00019999616968175362, - "loss": 46.0, - "step": 36449 - }, - { - "epoch": 2.786857044555307, - "grad_norm": 0.0021612439304590225, - "learning_rate": 0.00019999616947152026, - "loss": 46.0, - "step": 36450 - }, - { - "epoch": 2.7869335015386967, - "grad_norm": 0.002512024948373437, - "learning_rate": 0.00019999616926128115, - "loss": 46.0, - "step": 36451 - }, - { - "epoch": 2.7870099585220864, - "grad_norm": 0.002003186848014593, - "learning_rate": 0.0001999961690510363, - "loss": 46.0, - "step": 36452 - }, - { - "epoch": 2.787086415505476, - "grad_norm": 0.0011371636064723134, - "learning_rate": 0.00019999616884078565, - "loss": 46.0, - "step": 36453 - }, - { - "epoch": 2.787162872488866, - "grad_norm": 0.0038547248113900423, - "learning_rate": 0.00019999616863052922, - "loss": 46.0, - "step": 36454 - }, - { - "epoch": 2.7872393294722557, - "grad_norm": 0.0013719202252104878, - "learning_rate": 0.00019999616842026705, - "loss": 46.0, - "step": 36455 - }, - { - "epoch": 2.7873157864556455, - "grad_norm": 0.0020077896770089865, - "learning_rate": 0.00019999616820999907, - "loss": 46.0, - "step": 36456 - }, - { - "epoch": 2.7873922434390352, - "grad_norm": 0.00496314000338316, - "learning_rate": 0.00019999616799972535, - "loss": 46.0, - "step": 36457 - }, - { - "epoch": 2.7874687004224246, - "grad_norm": 0.0022020842880010605, - "learning_rate": 0.00019999616778944586, - "loss": 46.0, - "step": 36458 - }, - { - "epoch": 2.7875451574058143, - "grad_norm": 0.0027344324626028538, - "learning_rate": 0.0001999961675791606, - "loss": 46.0, - "step": 36459 - }, - { - "epoch": 2.787621614389204, - "grad_norm": 0.004561572335660458, - "learning_rate": 0.00019999616736886955, - "loss": 46.0, - "step": 36460 - }, - { - "epoch": 2.787698071372594, - "grad_norm": 0.004444970283657312, - "learning_rate": 0.00019999616715857274, - "loss": 46.0, - "step": 36461 - }, - { - "epoch": 2.7877745283559836, - "grad_norm": 0.0061172787100076675, - "learning_rate": 0.0001999961669482702, - "loss": 46.0, - "step": 36462 - }, - { - "epoch": 2.7878509853393734, - "grad_norm": 0.004543913062661886, - "learning_rate": 0.00019999616673796184, - "loss": 46.0, - "step": 36463 - }, - { - "epoch": 2.787927442322763, - "grad_norm": 0.0012532860273495317, - "learning_rate": 0.00019999616652764774, - "loss": 46.0, - "step": 36464 - }, - { - "epoch": 2.788003899306153, - "grad_norm": 0.0037034032866358757, - "learning_rate": 0.00019999616631732786, - "loss": 46.0, - "step": 36465 - }, - { - "epoch": 2.7880803562895426, - "grad_norm": 0.0006219047936610878, - "learning_rate": 0.00019999616610700223, - "loss": 46.0, - "step": 36466 - }, - { - "epoch": 2.7881568132729324, - "grad_norm": 0.0008324295631609857, - "learning_rate": 0.00019999616589667078, - "loss": 46.0, - "step": 36467 - }, - { - "epoch": 2.788233270256322, - "grad_norm": 0.0014626223128288984, - "learning_rate": 0.0001999961656863336, - "loss": 46.0, - "step": 36468 - }, - { - "epoch": 2.788309727239712, - "grad_norm": 0.003055607434362173, - "learning_rate": 0.00019999616547599066, - "loss": 46.0, - "step": 36469 - }, - { - "epoch": 2.7883861842231017, - "grad_norm": 0.0008964931475929916, - "learning_rate": 0.0001999961652656419, - "loss": 46.0, - "step": 36470 - }, - { - "epoch": 2.7884626412064915, - "grad_norm": 0.0030449212063103914, - "learning_rate": 0.00019999616505528742, - "loss": 46.0, - "step": 36471 - }, - { - "epoch": 2.7885390981898808, - "grad_norm": 0.003028854262083769, - "learning_rate": 0.00019999616484492715, - "loss": 46.0, - "step": 36472 - }, - { - "epoch": 2.7886155551732705, - "grad_norm": 0.002353883348405361, - "learning_rate": 0.0001999961646345611, - "loss": 46.0, - "step": 36473 - }, - { - "epoch": 2.7886920121566603, - "grad_norm": 0.0025278721004724503, - "learning_rate": 0.00019999616442418933, - "loss": 46.0, - "step": 36474 - }, - { - "epoch": 2.78876846914005, - "grad_norm": 0.002812107326462865, - "learning_rate": 0.00019999616421381177, - "loss": 46.0, - "step": 36475 - }, - { - "epoch": 2.78884492612344, - "grad_norm": 0.0036222331691533327, - "learning_rate": 0.00019999616400342843, - "loss": 46.0, - "step": 36476 - }, - { - "epoch": 2.7889213831068296, - "grad_norm": 0.0009932449320331216, - "learning_rate": 0.0001999961637930393, - "loss": 46.0, - "step": 36477 - }, - { - "epoch": 2.7889978400902193, - "grad_norm": 0.001032995874993503, - "learning_rate": 0.00019999616358264445, - "loss": 46.0, - "step": 36478 - }, - { - "epoch": 2.789074297073609, - "grad_norm": 0.001015535555779934, - "learning_rate": 0.0001999961633722438, - "loss": 46.0, - "step": 36479 - }, - { - "epoch": 2.7891507540569984, - "grad_norm": 0.004099308047443628, - "learning_rate": 0.00019999616316183737, - "loss": 46.0, - "step": 36480 - }, - { - "epoch": 2.789227211040388, - "grad_norm": 0.0024906625039875507, - "learning_rate": 0.00019999616295142517, - "loss": 46.0, - "step": 36481 - }, - { - "epoch": 2.789303668023778, - "grad_norm": 0.0019348154310137033, - "learning_rate": 0.00019999616274100722, - "loss": 46.0, - "step": 36482 - }, - { - "epoch": 2.7893801250071677, - "grad_norm": 0.0029479130171239376, - "learning_rate": 0.0001999961625305835, - "loss": 46.0, - "step": 36483 - }, - { - "epoch": 2.7894565819905575, - "grad_norm": 0.0029610886704176664, - "learning_rate": 0.000199996162320154, - "loss": 46.0, - "step": 36484 - }, - { - "epoch": 2.789533038973947, - "grad_norm": 0.002717749448493123, - "learning_rate": 0.00019999616210971874, - "loss": 46.0, - "step": 36485 - }, - { - "epoch": 2.789609495957337, - "grad_norm": 0.001117340987548232, - "learning_rate": 0.0001999961618992777, - "loss": 46.0, - "step": 36486 - }, - { - "epoch": 2.7896859529407267, - "grad_norm": 0.0037378885317593813, - "learning_rate": 0.0001999961616888309, - "loss": 46.0, - "step": 36487 - }, - { - "epoch": 2.7897624099241165, - "grad_norm": 0.002162329852581024, - "learning_rate": 0.00019999616147837833, - "loss": 46.0, - "step": 36488 - }, - { - "epoch": 2.7898388669075063, - "grad_norm": 0.005490526091307402, - "learning_rate": 0.00019999616126791997, - "loss": 46.0, - "step": 36489 - }, - { - "epoch": 2.789915323890896, - "grad_norm": 0.0013661356642842293, - "learning_rate": 0.0001999961610574559, - "loss": 46.0, - "step": 36490 - }, - { - "epoch": 2.789991780874286, - "grad_norm": 0.002923026215285063, - "learning_rate": 0.000199996160846986, - "loss": 46.0, - "step": 36491 - }, - { - "epoch": 2.7900682378576755, - "grad_norm": 0.001461016247048974, - "learning_rate": 0.00019999616063651033, - "loss": 46.0, - "step": 36492 - }, - { - "epoch": 2.7901446948410653, - "grad_norm": 0.0026465451810508966, - "learning_rate": 0.00019999616042602893, - "loss": 46.0, - "step": 36493 - }, - { - "epoch": 2.7902211518244546, - "grad_norm": 0.0008995048119686544, - "learning_rate": 0.00019999616021554176, - "loss": 46.0, - "step": 36494 - }, - { - "epoch": 2.7902976088078444, - "grad_norm": 0.0026205123867839575, - "learning_rate": 0.0001999961600050488, - "loss": 46.0, - "step": 36495 - }, - { - "epoch": 2.790374065791234, - "grad_norm": 0.004056366626173258, - "learning_rate": 0.00019999615979455007, - "loss": 46.0, - "step": 36496 - }, - { - "epoch": 2.790450522774624, - "grad_norm": 0.003872596425935626, - "learning_rate": 0.00019999615958404555, - "loss": 46.0, - "step": 36497 - }, - { - "epoch": 2.7905269797580137, - "grad_norm": 0.002656124532222748, - "learning_rate": 0.0001999961593735353, - "loss": 46.0, - "step": 36498 - }, - { - "epoch": 2.7906034367414034, - "grad_norm": 0.0016670010518282652, - "learning_rate": 0.00019999615916301927, - "loss": 46.0, - "step": 36499 - }, - { - "epoch": 2.790679893724793, - "grad_norm": 0.0013912769500166178, - "learning_rate": 0.00019999615895249746, - "loss": 46.0, - "step": 36500 - }, - { - "epoch": 2.790756350708183, - "grad_norm": 0.004013076424598694, - "learning_rate": 0.0001999961587419699, - "loss": 46.0, - "step": 36501 - }, - { - "epoch": 2.7908328076915723, - "grad_norm": 0.002834868850186467, - "learning_rate": 0.00019999615853143657, - "loss": 46.0, - "step": 36502 - }, - { - "epoch": 2.790909264674962, - "grad_norm": 0.0036473898217082024, - "learning_rate": 0.00019999615832089744, - "loss": 46.0, - "step": 36503 - }, - { - "epoch": 2.790985721658352, - "grad_norm": 0.005797350313514471, - "learning_rate": 0.00019999615811035258, - "loss": 46.0, - "step": 36504 - }, - { - "epoch": 2.7910621786417416, - "grad_norm": 0.0009111484978348017, - "learning_rate": 0.00019999615789980193, - "loss": 46.0, - "step": 36505 - }, - { - "epoch": 2.7911386356251313, - "grad_norm": 0.002440784592181444, - "learning_rate": 0.0001999961576892455, - "loss": 46.0, - "step": 36506 - }, - { - "epoch": 2.791215092608521, - "grad_norm": 0.0012514643603935838, - "learning_rate": 0.00019999615747868333, - "loss": 46.0, - "step": 36507 - }, - { - "epoch": 2.791291549591911, - "grad_norm": 0.001905411365441978, - "learning_rate": 0.00019999615726811536, - "loss": 46.0, - "step": 36508 - }, - { - "epoch": 2.7913680065753006, - "grad_norm": 0.002920496976003051, - "learning_rate": 0.00019999615705754161, - "loss": 46.0, - "step": 36509 - }, - { - "epoch": 2.7914444635586904, - "grad_norm": 0.0016895607113838196, - "learning_rate": 0.00019999615684696212, - "loss": 46.0, - "step": 36510 - }, - { - "epoch": 2.79152092054208, - "grad_norm": 0.0013283167500048876, - "learning_rate": 0.00019999615663637688, - "loss": 46.0, - "step": 36511 - }, - { - "epoch": 2.79159737752547, - "grad_norm": 0.0043432703241705894, - "learning_rate": 0.00019999615642578585, - "loss": 46.0, - "step": 36512 - }, - { - "epoch": 2.7916738345088596, - "grad_norm": 0.0035565346479415894, - "learning_rate": 0.00019999615621518903, - "loss": 46.0, - "step": 36513 - }, - { - "epoch": 2.7917502914922494, - "grad_norm": 0.004469890147447586, - "learning_rate": 0.00019999615600458647, - "loss": 46.0, - "step": 36514 - }, - { - "epoch": 2.791826748475639, - "grad_norm": 0.00395957101136446, - "learning_rate": 0.00019999615579397812, - "loss": 46.0, - "step": 36515 - }, - { - "epoch": 2.7919032054590285, - "grad_norm": 0.002598981373012066, - "learning_rate": 0.000199996155583364, - "loss": 46.0, - "step": 36516 - }, - { - "epoch": 2.7919796624424182, - "grad_norm": 0.0006251042359508574, - "learning_rate": 0.00019999615537274413, - "loss": 46.0, - "step": 36517 - }, - { - "epoch": 2.792056119425808, - "grad_norm": 0.007046767044812441, - "learning_rate": 0.0001999961551621185, - "loss": 46.0, - "step": 36518 - }, - { - "epoch": 2.7921325764091978, - "grad_norm": 0.003603205317631364, - "learning_rate": 0.00019999615495148708, - "loss": 46.0, - "step": 36519 - }, - { - "epoch": 2.7922090333925875, - "grad_norm": 0.0010431614937260747, - "learning_rate": 0.00019999615474084988, - "loss": 46.0, - "step": 36520 - }, - { - "epoch": 2.7922854903759773, - "grad_norm": 0.0029529815074056387, - "learning_rate": 0.00019999615453020694, - "loss": 46.0, - "step": 36521 - }, - { - "epoch": 2.792361947359367, - "grad_norm": 0.0012880582362413406, - "learning_rate": 0.0001999961543195582, - "loss": 46.0, - "step": 36522 - }, - { - "epoch": 2.792438404342757, - "grad_norm": 0.0016723256558179855, - "learning_rate": 0.0001999961541089037, - "loss": 46.0, - "step": 36523 - }, - { - "epoch": 2.792514861326146, - "grad_norm": 0.001146748778410256, - "learning_rate": 0.00019999615389824343, - "loss": 46.0, - "step": 36524 - }, - { - "epoch": 2.792591318309536, - "grad_norm": 0.0016539928037673235, - "learning_rate": 0.0001999961536875774, - "loss": 46.0, - "step": 36525 - }, - { - "epoch": 2.7926677752929256, - "grad_norm": 0.00864909216761589, - "learning_rate": 0.0001999961534769056, - "loss": 46.0, - "step": 36526 - }, - { - "epoch": 2.7927442322763154, - "grad_norm": 0.0016701811691746116, - "learning_rate": 0.00019999615326622805, - "loss": 46.0, - "step": 36527 - }, - { - "epoch": 2.792820689259705, - "grad_norm": 0.0006151789566501975, - "learning_rate": 0.00019999615305554472, - "loss": 46.0, - "step": 36528 - }, - { - "epoch": 2.792897146243095, - "grad_norm": 0.0015764220152050257, - "learning_rate": 0.0001999961528448556, - "loss": 46.0, - "step": 36529 - }, - { - "epoch": 2.7929736032264847, - "grad_norm": 0.002275099279358983, - "learning_rate": 0.0001999961526341607, - "loss": 46.0, - "step": 36530 - }, - { - "epoch": 2.7930500602098745, - "grad_norm": 0.0015367651358246803, - "learning_rate": 0.00019999615242346006, - "loss": 46.0, - "step": 36531 - }, - { - "epoch": 2.793126517193264, - "grad_norm": 0.0009864786406978965, - "learning_rate": 0.00019999615221275363, - "loss": 46.0, - "step": 36532 - }, - { - "epoch": 2.793202974176654, - "grad_norm": 0.00273247086443007, - "learning_rate": 0.00019999615200204146, - "loss": 46.0, - "step": 36533 - }, - { - "epoch": 2.7932794311600437, - "grad_norm": 0.0026058033108711243, - "learning_rate": 0.0001999961517913235, - "loss": 46.0, - "step": 36534 - }, - { - "epoch": 2.7933558881434335, - "grad_norm": 0.001718914951197803, - "learning_rate": 0.0001999961515805998, - "loss": 46.0, - "step": 36535 - }, - { - "epoch": 2.7934323451268233, - "grad_norm": 0.001381683861836791, - "learning_rate": 0.00019999615136987028, - "loss": 46.0, - "step": 36536 - }, - { - "epoch": 2.793508802110213, - "grad_norm": 0.003563657635822892, - "learning_rate": 0.00019999615115913501, - "loss": 46.0, - "step": 36537 - }, - { - "epoch": 2.7935852590936023, - "grad_norm": 0.0008521597483195364, - "learning_rate": 0.000199996150948394, - "loss": 46.0, - "step": 36538 - }, - { - "epoch": 2.793661716076992, - "grad_norm": 0.0009935145499184728, - "learning_rate": 0.0001999961507376472, - "loss": 46.0, - "step": 36539 - }, - { - "epoch": 2.793738173060382, - "grad_norm": 0.007693890016525984, - "learning_rate": 0.00019999615052689463, - "loss": 46.0, - "step": 36540 - }, - { - "epoch": 2.7938146300437716, - "grad_norm": 0.0028375021647661924, - "learning_rate": 0.00019999615031613628, - "loss": 46.0, - "step": 36541 - }, - { - "epoch": 2.7938910870271614, - "grad_norm": 0.0019172790925949812, - "learning_rate": 0.00019999615010537217, - "loss": 46.0, - "step": 36542 - }, - { - "epoch": 2.793967544010551, - "grad_norm": 0.0023453764151781797, - "learning_rate": 0.00019999614989460227, - "loss": 46.0, - "step": 36543 - }, - { - "epoch": 2.794044000993941, - "grad_norm": 0.0018810100154951215, - "learning_rate": 0.00019999614968382664, - "loss": 46.0, - "step": 36544 - }, - { - "epoch": 2.7941204579773307, - "grad_norm": 0.0016110677970573306, - "learning_rate": 0.00019999614947304522, - "loss": 46.0, - "step": 36545 - }, - { - "epoch": 2.79419691496072, - "grad_norm": 0.00260846852324903, - "learning_rate": 0.00019999614926225802, - "loss": 46.0, - "step": 36546 - }, - { - "epoch": 2.7942733719441097, - "grad_norm": 0.0018586397636681795, - "learning_rate": 0.00019999614905146507, - "loss": 46.0, - "step": 36547 - }, - { - "epoch": 2.7943498289274995, - "grad_norm": 0.0021287112031131983, - "learning_rate": 0.00019999614884066636, - "loss": 46.0, - "step": 36548 - }, - { - "epoch": 2.7944262859108893, - "grad_norm": 0.0032725015189498663, - "learning_rate": 0.00019999614862986184, - "loss": 46.0, - "step": 36549 - }, - { - "epoch": 2.794502742894279, - "grad_norm": 0.00107848783954978, - "learning_rate": 0.0001999961484190516, - "loss": 46.0, - "step": 36550 - }, - { - "epoch": 2.794579199877669, - "grad_norm": 0.0008354871533811092, - "learning_rate": 0.00019999614820823553, - "loss": 46.0, - "step": 36551 - }, - { - "epoch": 2.7946556568610585, - "grad_norm": 0.0033484341111034155, - "learning_rate": 0.00019999614799741375, - "loss": 46.0, - "step": 36552 - }, - { - "epoch": 2.7947321138444483, - "grad_norm": 0.0010308248456567526, - "learning_rate": 0.00019999614778658617, - "loss": 46.0, - "step": 36553 - }, - { - "epoch": 2.794808570827838, - "grad_norm": 0.0011451182654127479, - "learning_rate": 0.00019999614757575283, - "loss": 46.0, - "step": 36554 - }, - { - "epoch": 2.794885027811228, - "grad_norm": 0.007635314483195543, - "learning_rate": 0.00019999614736491373, - "loss": 46.0, - "step": 36555 - }, - { - "epoch": 2.7949614847946176, - "grad_norm": 0.0007045171223580837, - "learning_rate": 0.00019999614715406885, - "loss": 46.0, - "step": 36556 - }, - { - "epoch": 2.7950379417780074, - "grad_norm": 0.0026100799441337585, - "learning_rate": 0.0001999961469432182, - "loss": 46.0, - "step": 36557 - }, - { - "epoch": 2.795114398761397, - "grad_norm": 0.0018087979406118393, - "learning_rate": 0.00019999614673236178, - "loss": 46.0, - "step": 36558 - }, - { - "epoch": 2.7951908557447864, - "grad_norm": 0.002954366384074092, - "learning_rate": 0.00019999614652149958, - "loss": 46.0, - "step": 36559 - }, - { - "epoch": 2.795267312728176, - "grad_norm": 0.0015169483376666903, - "learning_rate": 0.00019999614631063163, - "loss": 46.0, - "step": 36560 - }, - { - "epoch": 2.795343769711566, - "grad_norm": 0.0022899971809238195, - "learning_rate": 0.00019999614609975792, - "loss": 46.0, - "step": 36561 - }, - { - "epoch": 2.7954202266949557, - "grad_norm": 0.001835056347772479, - "learning_rate": 0.00019999614588887843, - "loss": 46.0, - "step": 36562 - }, - { - "epoch": 2.7954966836783455, - "grad_norm": 0.0013374926056712866, - "learning_rate": 0.00019999614567799316, - "loss": 46.0, - "step": 36563 - }, - { - "epoch": 2.7955731406617352, - "grad_norm": 0.0013211757177487016, - "learning_rate": 0.00019999614546710212, - "loss": 46.0, - "step": 36564 - }, - { - "epoch": 2.795649597645125, - "grad_norm": 0.003265534294769168, - "learning_rate": 0.0001999961452562053, - "loss": 46.0, - "step": 36565 - }, - { - "epoch": 2.7957260546285148, - "grad_norm": 0.004683324601501226, - "learning_rate": 0.00019999614504530273, - "loss": 46.0, - "step": 36566 - }, - { - "epoch": 2.7958025116119045, - "grad_norm": 0.0015656973700970411, - "learning_rate": 0.00019999614483439442, - "loss": 46.0, - "step": 36567 - }, - { - "epoch": 2.795878968595294, - "grad_norm": 0.0036509211640805006, - "learning_rate": 0.0001999961446234803, - "loss": 46.0, - "step": 36568 - }, - { - "epoch": 2.7959554255786836, - "grad_norm": 0.0016124958638101816, - "learning_rate": 0.00019999614441256042, - "loss": 46.0, - "step": 36569 - }, - { - "epoch": 2.7960318825620734, - "grad_norm": 0.0064685288816690445, - "learning_rate": 0.00019999614420163474, - "loss": 46.0, - "step": 36570 - }, - { - "epoch": 2.796108339545463, - "grad_norm": 0.001625482807867229, - "learning_rate": 0.00019999614399070334, - "loss": 46.0, - "step": 36571 - }, - { - "epoch": 2.796184796528853, - "grad_norm": 0.0006707222200930119, - "learning_rate": 0.00019999614377976617, - "loss": 46.0, - "step": 36572 - }, - { - "epoch": 2.7962612535122426, - "grad_norm": 0.008109656162559986, - "learning_rate": 0.00019999614356882323, - "loss": 46.0, - "step": 36573 - }, - { - "epoch": 2.7963377104956324, - "grad_norm": 0.01435154490172863, - "learning_rate": 0.00019999614335787448, - "loss": 46.0, - "step": 36574 - }, - { - "epoch": 2.796414167479022, - "grad_norm": 0.0017522264970466495, - "learning_rate": 0.00019999614314692, - "loss": 46.0, - "step": 36575 - }, - { - "epoch": 2.796490624462412, - "grad_norm": 0.0024431291967630386, - "learning_rate": 0.00019999614293595973, - "loss": 46.0, - "step": 36576 - }, - { - "epoch": 2.7965670814458017, - "grad_norm": 0.0007457217434421182, - "learning_rate": 0.00019999614272499366, - "loss": 46.0, - "step": 36577 - }, - { - "epoch": 2.7966435384291914, - "grad_norm": 0.002585195703431964, - "learning_rate": 0.0001999961425140219, - "loss": 46.0, - "step": 36578 - }, - { - "epoch": 2.796719995412581, - "grad_norm": 0.0023897220380604267, - "learning_rate": 0.00019999614230304432, - "loss": 46.0, - "step": 36579 - }, - { - "epoch": 2.796796452395971, - "grad_norm": 0.003025853307917714, - "learning_rate": 0.00019999614209206096, - "loss": 46.0, - "step": 36580 - }, - { - "epoch": 2.7968729093793603, - "grad_norm": 0.000791246653534472, - "learning_rate": 0.00019999614188107186, - "loss": 46.0, - "step": 36581 - }, - { - "epoch": 2.79694936636275, - "grad_norm": 0.0030855664517730474, - "learning_rate": 0.00019999614167007698, - "loss": 46.0, - "step": 36582 - }, - { - "epoch": 2.79702582334614, - "grad_norm": 0.0031424977350980043, - "learning_rate": 0.00019999614145907636, - "loss": 46.0, - "step": 36583 - }, - { - "epoch": 2.7971022803295296, - "grad_norm": 0.0019661863334476948, - "learning_rate": 0.0001999961412480699, - "loss": 46.0, - "step": 36584 - }, - { - "epoch": 2.7971787373129193, - "grad_norm": 0.006525104399770498, - "learning_rate": 0.00019999614103705774, - "loss": 46.0, - "step": 36585 - }, - { - "epoch": 2.797255194296309, - "grad_norm": 0.004011107143014669, - "learning_rate": 0.00019999614082603977, - "loss": 46.0, - "step": 36586 - }, - { - "epoch": 2.797331651279699, - "grad_norm": 0.006423837039619684, - "learning_rate": 0.00019999614061501605, - "loss": 46.0, - "step": 36587 - }, - { - "epoch": 2.7974081082630886, - "grad_norm": 0.002542696427553892, - "learning_rate": 0.00019999614040398656, - "loss": 46.0, - "step": 36588 - }, - { - "epoch": 2.797484565246478, - "grad_norm": 0.0016533015295863152, - "learning_rate": 0.00019999614019295127, - "loss": 46.0, - "step": 36589 - }, - { - "epoch": 2.7975610222298677, - "grad_norm": 0.0015520985471084714, - "learning_rate": 0.00019999613998191026, - "loss": 46.0, - "step": 36590 - }, - { - "epoch": 2.7976374792132574, - "grad_norm": 0.0012625512899830937, - "learning_rate": 0.00019999613977086345, - "loss": 46.0, - "step": 36591 - }, - { - "epoch": 2.797713936196647, - "grad_norm": 0.0018682624213397503, - "learning_rate": 0.0001999961395598109, - "loss": 46.0, - "step": 36592 - }, - { - "epoch": 2.797790393180037, - "grad_norm": 0.0009864341700449586, - "learning_rate": 0.00019999613934875254, - "loss": 46.0, - "step": 36593 - }, - { - "epoch": 2.7978668501634267, - "grad_norm": 0.0020944890566170216, - "learning_rate": 0.00019999613913768843, - "loss": 46.0, - "step": 36594 - }, - { - "epoch": 2.7979433071468165, - "grad_norm": 0.00305737997405231, - "learning_rate": 0.00019999613892661856, - "loss": 46.0, - "step": 36595 - }, - { - "epoch": 2.7980197641302063, - "grad_norm": 0.0024078222922980785, - "learning_rate": 0.0001999961387155429, - "loss": 46.0, - "step": 36596 - }, - { - "epoch": 2.798096221113596, - "grad_norm": 0.0009995960863307118, - "learning_rate": 0.00019999613850446148, - "loss": 46.0, - "step": 36597 - }, - { - "epoch": 2.7981726780969858, - "grad_norm": 0.003978567663580179, - "learning_rate": 0.00019999613829337428, - "loss": 46.0, - "step": 36598 - }, - { - "epoch": 2.7982491350803755, - "grad_norm": 0.0009550178656354547, - "learning_rate": 0.00019999613808228131, - "loss": 46.0, - "step": 36599 - }, - { - "epoch": 2.7983255920637653, - "grad_norm": 0.0012933457037433982, - "learning_rate": 0.00019999613787118262, - "loss": 46.0, - "step": 36600 - }, - { - "epoch": 2.798402049047155, - "grad_norm": 0.0008998825214803219, - "learning_rate": 0.0001999961376600781, - "loss": 46.0, - "step": 36601 - }, - { - "epoch": 2.798478506030545, - "grad_norm": 0.0029246413614600897, - "learning_rate": 0.00019999613744896784, - "loss": 46.0, - "step": 36602 - }, - { - "epoch": 2.798554963013934, - "grad_norm": 0.010313055478036404, - "learning_rate": 0.00019999613723785183, - "loss": 46.0, - "step": 36603 - }, - { - "epoch": 2.798631419997324, - "grad_norm": 0.0011114992666989565, - "learning_rate": 0.00019999613702673002, - "loss": 46.0, - "step": 36604 - }, - { - "epoch": 2.7987078769807137, - "grad_norm": 0.001763931941241026, - "learning_rate": 0.00019999613681560244, - "loss": 46.0, - "step": 36605 - }, - { - "epoch": 2.7987843339641034, - "grad_norm": 0.004120492842048407, - "learning_rate": 0.0001999961366044691, - "loss": 46.0, - "step": 36606 - }, - { - "epoch": 2.798860790947493, - "grad_norm": 0.003552485490217805, - "learning_rate": 0.00019999613639332998, - "loss": 46.0, - "step": 36607 - }, - { - "epoch": 2.798937247930883, - "grad_norm": 0.0035627589095383883, - "learning_rate": 0.0001999961361821851, - "loss": 46.0, - "step": 36608 - }, - { - "epoch": 2.7990137049142727, - "grad_norm": 0.0026878025382757187, - "learning_rate": 0.00019999613597103445, - "loss": 46.0, - "step": 36609 - }, - { - "epoch": 2.7990901618976625, - "grad_norm": 0.0016114194877445698, - "learning_rate": 0.00019999613575987806, - "loss": 46.0, - "step": 36610 - }, - { - "epoch": 2.799166618881052, - "grad_norm": 0.004447620827704668, - "learning_rate": 0.0001999961355487159, - "loss": 46.0, - "step": 36611 - }, - { - "epoch": 2.7992430758644415, - "grad_norm": 0.0010776607086881995, - "learning_rate": 0.00019999613533754792, - "loss": 46.0, - "step": 36612 - }, - { - "epoch": 2.7993195328478313, - "grad_norm": 0.0018496030243113637, - "learning_rate": 0.00019999613512637418, - "loss": 46.0, - "step": 36613 - }, - { - "epoch": 2.799395989831221, - "grad_norm": 0.0019583420362323523, - "learning_rate": 0.00019999613491519469, - "loss": 46.0, - "step": 36614 - }, - { - "epoch": 2.799472446814611, - "grad_norm": 0.0009847491746768355, - "learning_rate": 0.00019999613470400945, - "loss": 46.0, - "step": 36615 - }, - { - "epoch": 2.7995489037980006, - "grad_norm": 0.005545987281948328, - "learning_rate": 0.00019999613449281839, - "loss": 46.0, - "step": 36616 - }, - { - "epoch": 2.7996253607813903, - "grad_norm": 0.001514257164672017, - "learning_rate": 0.00019999613428162158, - "loss": 46.0, - "step": 36617 - }, - { - "epoch": 2.79970181776478, - "grad_norm": 0.0012794131180271506, - "learning_rate": 0.00019999613407041902, - "loss": 46.0, - "step": 36618 - }, - { - "epoch": 2.79977827474817, - "grad_norm": 0.00804176740348339, - "learning_rate": 0.0001999961338592107, - "loss": 46.0, - "step": 36619 - }, - { - "epoch": 2.7998547317315596, - "grad_norm": 0.00441935146227479, - "learning_rate": 0.00019999613364799656, - "loss": 46.0, - "step": 36620 - }, - { - "epoch": 2.7999311887149494, - "grad_norm": 0.001385703100822866, - "learning_rate": 0.00019999613343677668, - "loss": 46.0, - "step": 36621 - }, - { - "epoch": 2.800007645698339, - "grad_norm": 0.005872361361980438, - "learning_rate": 0.00019999613322555103, - "loss": 46.0, - "step": 36622 - }, - { - "epoch": 2.800084102681729, - "grad_norm": 0.0010021342895925045, - "learning_rate": 0.00019999613301431964, - "loss": 46.0, - "step": 36623 - }, - { - "epoch": 2.8001605596651187, - "grad_norm": 0.0036808105651289225, - "learning_rate": 0.00019999613280308242, - "loss": 46.0, - "step": 36624 - }, - { - "epoch": 2.800237016648508, - "grad_norm": 0.002813017461448908, - "learning_rate": 0.0001999961325918395, - "loss": 46.0, - "step": 36625 - }, - { - "epoch": 2.8003134736318978, - "grad_norm": 0.002767500700429082, - "learning_rate": 0.00019999613238059073, - "loss": 46.0, - "step": 36626 - }, - { - "epoch": 2.8003899306152875, - "grad_norm": 0.0016132600139826536, - "learning_rate": 0.00019999613216933627, - "loss": 46.0, - "step": 36627 - }, - { - "epoch": 2.8004663875986773, - "grad_norm": 0.0026759873144328594, - "learning_rate": 0.000199996131958076, - "loss": 46.0, - "step": 36628 - }, - { - "epoch": 2.800542844582067, - "grad_norm": 0.001208735746331513, - "learning_rate": 0.00019999613174680997, - "loss": 46.0, - "step": 36629 - }, - { - "epoch": 2.800619301565457, - "grad_norm": 0.002023642882704735, - "learning_rate": 0.00019999613153553816, - "loss": 46.0, - "step": 36630 - }, - { - "epoch": 2.8006957585488466, - "grad_norm": 0.0018092935206368566, - "learning_rate": 0.00019999613132426058, - "loss": 46.0, - "step": 36631 - }, - { - "epoch": 2.8007722155322363, - "grad_norm": 0.004044120665639639, - "learning_rate": 0.00019999613111297725, - "loss": 46.0, - "step": 36632 - }, - { - "epoch": 2.8008486725156256, - "grad_norm": 0.0020952471531927586, - "learning_rate": 0.00019999613090168814, - "loss": 46.0, - "step": 36633 - }, - { - "epoch": 2.8009251294990154, - "grad_norm": 0.0015054637333378196, - "learning_rate": 0.00019999613069039324, - "loss": 46.0, - "step": 36634 - }, - { - "epoch": 2.801001586482405, - "grad_norm": 0.007036192808300257, - "learning_rate": 0.0001999961304790926, - "loss": 46.0, - "step": 36635 - }, - { - "epoch": 2.801078043465795, - "grad_norm": 0.002783105941489339, - "learning_rate": 0.0001999961302677862, - "loss": 46.0, - "step": 36636 - }, - { - "epoch": 2.8011545004491847, - "grad_norm": 0.0049445307813584805, - "learning_rate": 0.00019999613005647403, - "loss": 46.0, - "step": 36637 - }, - { - "epoch": 2.8012309574325744, - "grad_norm": 0.002596270525828004, - "learning_rate": 0.00019999612984515603, - "loss": 46.0, - "step": 36638 - }, - { - "epoch": 2.801307414415964, - "grad_norm": 0.001712812576442957, - "learning_rate": 0.00019999612963383232, - "loss": 46.0, - "step": 36639 - }, - { - "epoch": 2.801383871399354, - "grad_norm": 0.0025451479014009237, - "learning_rate": 0.00019999612942250283, - "loss": 46.0, - "step": 36640 - }, - { - "epoch": 2.8014603283827437, - "grad_norm": 0.006407268811017275, - "learning_rate": 0.00019999612921116756, - "loss": 46.0, - "step": 36641 - }, - { - "epoch": 2.8015367853661335, - "grad_norm": 0.0009229324059560895, - "learning_rate": 0.00019999612899982653, - "loss": 46.0, - "step": 36642 - }, - { - "epoch": 2.8016132423495232, - "grad_norm": 0.000678193406201899, - "learning_rate": 0.00019999612878847972, - "loss": 46.0, - "step": 36643 - }, - { - "epoch": 2.801689699332913, - "grad_norm": 0.0026372834108769894, - "learning_rate": 0.00019999612857712716, - "loss": 46.0, - "step": 36644 - }, - { - "epoch": 2.8017661563163028, - "grad_norm": 0.0019961786456406116, - "learning_rate": 0.0001999961283657688, - "loss": 46.0, - "step": 36645 - }, - { - "epoch": 2.8018426132996925, - "grad_norm": 0.0011185986222699285, - "learning_rate": 0.0001999961281544047, - "loss": 46.0, - "step": 36646 - }, - { - "epoch": 2.801919070283082, - "grad_norm": 0.0022907606326043606, - "learning_rate": 0.00019999612794303483, - "loss": 46.0, - "step": 36647 - }, - { - "epoch": 2.8019955272664716, - "grad_norm": 0.001931617734953761, - "learning_rate": 0.00019999612773165918, - "loss": 46.0, - "step": 36648 - }, - { - "epoch": 2.8020719842498614, - "grad_norm": 0.003951586317270994, - "learning_rate": 0.00019999612752027773, - "loss": 46.0, - "step": 36649 - }, - { - "epoch": 2.802148441233251, - "grad_norm": 0.0009364747093059123, - "learning_rate": 0.00019999612730889056, - "loss": 46.0, - "step": 36650 - }, - { - "epoch": 2.802224898216641, - "grad_norm": 0.002199733629822731, - "learning_rate": 0.00019999612709749762, - "loss": 46.0, - "step": 36651 - }, - { - "epoch": 2.8023013552000307, - "grad_norm": 0.0023284186609089375, - "learning_rate": 0.0001999961268860989, - "loss": 46.0, - "step": 36652 - }, - { - "epoch": 2.8023778121834204, - "grad_norm": 0.0029297014698386192, - "learning_rate": 0.00019999612667469442, - "loss": 46.0, - "step": 36653 - }, - { - "epoch": 2.80245426916681, - "grad_norm": 0.00334092671982944, - "learning_rate": 0.00019999612646328413, - "loss": 46.0, - "step": 36654 - }, - { - "epoch": 2.8025307261501995, - "grad_norm": 0.0008494119974784553, - "learning_rate": 0.0001999961262518681, - "loss": 46.0, - "step": 36655 - }, - { - "epoch": 2.8026071831335893, - "grad_norm": 0.001970664830878377, - "learning_rate": 0.00019999612604044628, - "loss": 46.0, - "step": 36656 - }, - { - "epoch": 2.802683640116979, - "grad_norm": 0.0009151042904704809, - "learning_rate": 0.00019999612582901873, - "loss": 46.0, - "step": 36657 - }, - { - "epoch": 2.8027600971003688, - "grad_norm": 0.006365691311657429, - "learning_rate": 0.00019999612561758537, - "loss": 46.0, - "step": 36658 - }, - { - "epoch": 2.8028365540837585, - "grad_norm": 0.0030670086853206158, - "learning_rate": 0.00019999612540614627, - "loss": 46.0, - "step": 36659 - }, - { - "epoch": 2.8029130110671483, - "grad_norm": 0.0006867140764370561, - "learning_rate": 0.0001999961251947014, - "loss": 46.0, - "step": 36660 - }, - { - "epoch": 2.802989468050538, - "grad_norm": 0.003196683246642351, - "learning_rate": 0.00019999612498325075, - "loss": 46.0, - "step": 36661 - }, - { - "epoch": 2.803065925033928, - "grad_norm": 0.0038683305028826, - "learning_rate": 0.00019999612477179432, - "loss": 46.0, - "step": 36662 - }, - { - "epoch": 2.8031423820173176, - "grad_norm": 0.0011197554413229227, - "learning_rate": 0.00019999612456033216, - "loss": 46.0, - "step": 36663 - }, - { - "epoch": 2.8032188390007073, - "grad_norm": 0.004056317266076803, - "learning_rate": 0.0001999961243488642, - "loss": 46.0, - "step": 36664 - }, - { - "epoch": 2.803295295984097, - "grad_norm": 0.004966258071362972, - "learning_rate": 0.00019999612413739047, - "loss": 46.0, - "step": 36665 - }, - { - "epoch": 2.803371752967487, - "grad_norm": 0.004595424514263868, - "learning_rate": 0.00019999612392591096, - "loss": 46.0, - "step": 36666 - }, - { - "epoch": 2.8034482099508766, - "grad_norm": 0.0008782196091488004, - "learning_rate": 0.00019999612371442572, - "loss": 46.0, - "step": 36667 - }, - { - "epoch": 2.8035246669342664, - "grad_norm": 0.01031880360096693, - "learning_rate": 0.00019999612350293466, - "loss": 46.0, - "step": 36668 - }, - { - "epoch": 2.8036011239176557, - "grad_norm": 0.007646709214895964, - "learning_rate": 0.00019999612329143788, - "loss": 46.0, - "step": 36669 - }, - { - "epoch": 2.8036775809010455, - "grad_norm": 0.001982444664463401, - "learning_rate": 0.0001999961230799353, - "loss": 46.0, - "step": 36670 - }, - { - "epoch": 2.8037540378844352, - "grad_norm": 0.007043974939733744, - "learning_rate": 0.00019999612286842694, - "loss": 46.0, - "step": 36671 - }, - { - "epoch": 2.803830494867825, - "grad_norm": 0.0013033469440415502, - "learning_rate": 0.00019999612265691284, - "loss": 46.0, - "step": 36672 - }, - { - "epoch": 2.8039069518512147, - "grad_norm": 0.0013943207450211048, - "learning_rate": 0.00019999612244539296, - "loss": 46.0, - "step": 36673 - }, - { - "epoch": 2.8039834088346045, - "grad_norm": 0.004663140047341585, - "learning_rate": 0.00019999612223386732, - "loss": 46.0, - "step": 36674 - }, - { - "epoch": 2.8040598658179943, - "grad_norm": 0.00047276122495532036, - "learning_rate": 0.00019999612202233592, - "loss": 46.0, - "step": 36675 - }, - { - "epoch": 2.804136322801384, - "grad_norm": 0.002373308641836047, - "learning_rate": 0.00019999612181079873, - "loss": 46.0, - "step": 36676 - }, - { - "epoch": 2.8042127797847733, - "grad_norm": 0.001830788911320269, - "learning_rate": 0.00019999612159925576, - "loss": 46.0, - "step": 36677 - }, - { - "epoch": 2.804289236768163, - "grad_norm": 0.0013828761875629425, - "learning_rate": 0.00019999612138770702, - "loss": 46.0, - "step": 36678 - }, - { - "epoch": 2.804365693751553, - "grad_norm": 0.0033276095055043697, - "learning_rate": 0.00019999612117615253, - "loss": 46.0, - "step": 36679 - }, - { - "epoch": 2.8044421507349426, - "grad_norm": 0.0018032494699582458, - "learning_rate": 0.0001999961209645923, - "loss": 46.0, - "step": 36680 - }, - { - "epoch": 2.8045186077183324, - "grad_norm": 0.0014347931137308478, - "learning_rate": 0.00019999612075302623, - "loss": 46.0, - "step": 36681 - }, - { - "epoch": 2.804595064701722, - "grad_norm": 0.0051933773793280125, - "learning_rate": 0.00019999612054145445, - "loss": 46.0, - "step": 36682 - }, - { - "epoch": 2.804671521685112, - "grad_norm": 0.002236979780718684, - "learning_rate": 0.00019999612032987684, - "loss": 46.0, - "step": 36683 - }, - { - "epoch": 2.8047479786685017, - "grad_norm": 0.00294465571641922, - "learning_rate": 0.0001999961201182935, - "loss": 46.0, - "step": 36684 - }, - { - "epoch": 2.8048244356518914, - "grad_norm": 0.0033904453739523888, - "learning_rate": 0.0001999961199067044, - "loss": 46.0, - "step": 36685 - }, - { - "epoch": 2.804900892635281, - "grad_norm": 0.0023571569472551346, - "learning_rate": 0.00019999611969510954, - "loss": 46.0, - "step": 36686 - }, - { - "epoch": 2.804977349618671, - "grad_norm": 0.003928797319531441, - "learning_rate": 0.0001999961194835089, - "loss": 46.0, - "step": 36687 - }, - { - "epoch": 2.8050538066020607, - "grad_norm": 0.0013346492778509855, - "learning_rate": 0.00019999611927190247, - "loss": 46.0, - "step": 36688 - }, - { - "epoch": 2.8051302635854505, - "grad_norm": 0.0025068700779229403, - "learning_rate": 0.00019999611906029027, - "loss": 46.0, - "step": 36689 - }, - { - "epoch": 2.80520672056884, - "grad_norm": 0.001671082922257483, - "learning_rate": 0.0001999961188486723, - "loss": 46.0, - "step": 36690 - }, - { - "epoch": 2.8052831775522296, - "grad_norm": 0.002068845322355628, - "learning_rate": 0.0001999961186370486, - "loss": 46.0, - "step": 36691 - }, - { - "epoch": 2.8053596345356193, - "grad_norm": 0.002072999021038413, - "learning_rate": 0.0001999961184254191, - "loss": 46.0, - "step": 36692 - }, - { - "epoch": 2.805436091519009, - "grad_norm": 0.002299954416230321, - "learning_rate": 0.00019999611821378384, - "loss": 46.0, - "step": 36693 - }, - { - "epoch": 2.805512548502399, - "grad_norm": 0.003928791731595993, - "learning_rate": 0.0001999961180021428, - "loss": 46.0, - "step": 36694 - }, - { - "epoch": 2.8055890054857886, - "grad_norm": 0.0034237774088978767, - "learning_rate": 0.000199996117790496, - "loss": 46.0, - "step": 36695 - }, - { - "epoch": 2.8056654624691784, - "grad_norm": 0.0029041548259556293, - "learning_rate": 0.00019999611757884345, - "loss": 46.0, - "step": 36696 - }, - { - "epoch": 2.805741919452568, - "grad_norm": 0.0033824958372861147, - "learning_rate": 0.0001999961173671851, - "loss": 46.0, - "step": 36697 - }, - { - "epoch": 2.805818376435958, - "grad_norm": 0.00648773368448019, - "learning_rate": 0.000199996117155521, - "loss": 46.0, - "step": 36698 - }, - { - "epoch": 2.805894833419347, - "grad_norm": 0.003248832654207945, - "learning_rate": 0.0001999961169438511, - "loss": 46.0, - "step": 36699 - }, - { - "epoch": 2.805971290402737, - "grad_norm": 0.0012092050164937973, - "learning_rate": 0.00019999611673217547, - "loss": 46.0, - "step": 36700 - }, - { - "epoch": 2.8060477473861267, - "grad_norm": 0.004391463007777929, - "learning_rate": 0.00019999611652049405, - "loss": 46.0, - "step": 36701 - }, - { - "epoch": 2.8061242043695165, - "grad_norm": 0.0015033665113151073, - "learning_rate": 0.00019999611630880682, - "loss": 46.0, - "step": 36702 - }, - { - "epoch": 2.8062006613529062, - "grad_norm": 0.0017571456264704466, - "learning_rate": 0.00019999611609711388, - "loss": 46.0, - "step": 36703 - }, - { - "epoch": 2.806277118336296, - "grad_norm": 0.001951251644641161, - "learning_rate": 0.00019999611588541517, - "loss": 46.0, - "step": 36704 - }, - { - "epoch": 2.8063535753196858, - "grad_norm": 0.0009492213139310479, - "learning_rate": 0.00019999611567371068, - "loss": 46.0, - "step": 36705 - }, - { - "epoch": 2.8064300323030755, - "grad_norm": 0.002249964280053973, - "learning_rate": 0.0001999961154620004, - "loss": 46.0, - "step": 36706 - }, - { - "epoch": 2.8065064892864653, - "grad_norm": 0.003665202995762229, - "learning_rate": 0.00019999611525028436, - "loss": 46.0, - "step": 36707 - }, - { - "epoch": 2.806582946269855, - "grad_norm": 0.005576695315539837, - "learning_rate": 0.00019999611503856258, - "loss": 46.0, - "step": 36708 - }, - { - "epoch": 2.806659403253245, - "grad_norm": 0.0066673350520431995, - "learning_rate": 0.000199996114826835, - "loss": 46.0, - "step": 36709 - }, - { - "epoch": 2.8067358602366346, - "grad_norm": 0.0008615495171397924, - "learning_rate": 0.00019999611461510167, - "loss": 46.0, - "step": 36710 - }, - { - "epoch": 2.8068123172200243, - "grad_norm": 0.00974003504961729, - "learning_rate": 0.00019999611440336254, - "loss": 46.0, - "step": 36711 - }, - { - "epoch": 2.8068887742034137, - "grad_norm": 0.0010575454216450453, - "learning_rate": 0.00019999611419161767, - "loss": 46.0, - "step": 36712 - }, - { - "epoch": 2.8069652311868034, - "grad_norm": 0.001377164269797504, - "learning_rate": 0.00019999611397986702, - "loss": 46.0, - "step": 36713 - }, - { - "epoch": 2.807041688170193, - "grad_norm": 0.009895585477352142, - "learning_rate": 0.0001999961137681106, - "loss": 46.0, - "step": 36714 - }, - { - "epoch": 2.807118145153583, - "grad_norm": 0.004479090683162212, - "learning_rate": 0.0001999961135563484, - "loss": 46.0, - "step": 36715 - }, - { - "epoch": 2.8071946021369727, - "grad_norm": 0.004253746941685677, - "learning_rate": 0.00019999611334458047, - "loss": 46.0, - "step": 36716 - }, - { - "epoch": 2.8072710591203625, - "grad_norm": 0.002376875374466181, - "learning_rate": 0.00019999611313280673, - "loss": 46.0, - "step": 36717 - }, - { - "epoch": 2.807347516103752, - "grad_norm": 0.003160391701385379, - "learning_rate": 0.0001999961129210272, - "loss": 46.0, - "step": 36718 - }, - { - "epoch": 2.807423973087142, - "grad_norm": 0.00340360589325428, - "learning_rate": 0.00019999611270924198, - "loss": 46.0, - "step": 36719 - }, - { - "epoch": 2.8075004300705313, - "grad_norm": 0.0024417827371507883, - "learning_rate": 0.00019999611249745094, - "loss": 46.0, - "step": 36720 - }, - { - "epoch": 2.807576887053921, - "grad_norm": 0.004683398175984621, - "learning_rate": 0.00019999611228565414, - "loss": 46.0, - "step": 36721 - }, - { - "epoch": 2.807653344037311, - "grad_norm": 0.0024101382587105036, - "learning_rate": 0.00019999611207385156, - "loss": 46.0, - "step": 36722 - }, - { - "epoch": 2.8077298010207006, - "grad_norm": 0.003924605902284384, - "learning_rate": 0.0001999961118620432, - "loss": 46.0, - "step": 36723 - }, - { - "epoch": 2.8078062580040903, - "grad_norm": 0.0025500264018774033, - "learning_rate": 0.0001999961116502291, - "loss": 46.0, - "step": 36724 - }, - { - "epoch": 2.80788271498748, - "grad_norm": 0.0017185703618451953, - "learning_rate": 0.00019999611143840923, - "loss": 46.0, - "step": 36725 - }, - { - "epoch": 2.80795917197087, - "grad_norm": 0.0017719519091770053, - "learning_rate": 0.00019999611122658358, - "loss": 46.0, - "step": 36726 - }, - { - "epoch": 2.8080356289542596, - "grad_norm": 0.0038139228709042072, - "learning_rate": 0.0001999961110147522, - "loss": 46.0, - "step": 36727 - }, - { - "epoch": 2.8081120859376494, - "grad_norm": 0.002001163549721241, - "learning_rate": 0.000199996110802915, - "loss": 46.0, - "step": 36728 - }, - { - "epoch": 2.808188542921039, - "grad_norm": 0.0032030779402703047, - "learning_rate": 0.00019999611059107203, - "loss": 46.0, - "step": 36729 - }, - { - "epoch": 2.808264999904429, - "grad_norm": 0.0012311318423599005, - "learning_rate": 0.0001999961103792233, - "loss": 46.0, - "step": 36730 - }, - { - "epoch": 2.8083414568878187, - "grad_norm": 0.0024967710487544537, - "learning_rate": 0.0001999961101673688, - "loss": 46.0, - "step": 36731 - }, - { - "epoch": 2.8084179138712084, - "grad_norm": 0.012091250158846378, - "learning_rate": 0.00019999610995550854, - "loss": 46.0, - "step": 36732 - }, - { - "epoch": 2.808494370854598, - "grad_norm": 0.0014718605671077967, - "learning_rate": 0.0001999961097436425, - "loss": 46.0, - "step": 36733 - }, - { - "epoch": 2.8085708278379875, - "grad_norm": 0.0026038612704724073, - "learning_rate": 0.0001999961095317707, - "loss": 46.0, - "step": 36734 - }, - { - "epoch": 2.8086472848213773, - "grad_norm": 0.0022275352384895086, - "learning_rate": 0.00019999610931989312, - "loss": 46.0, - "step": 36735 - }, - { - "epoch": 2.808723741804767, - "grad_norm": 0.002716431627050042, - "learning_rate": 0.00019999610910800982, - "loss": 46.0, - "step": 36736 - }, - { - "epoch": 2.808800198788157, - "grad_norm": 0.0014993941877037287, - "learning_rate": 0.0001999961088961207, - "loss": 46.0, - "step": 36737 - }, - { - "epoch": 2.8088766557715465, - "grad_norm": 0.001749646384268999, - "learning_rate": 0.0001999961086842258, - "loss": 46.0, - "step": 36738 - }, - { - "epoch": 2.8089531127549363, - "grad_norm": 0.003653186373412609, - "learning_rate": 0.00019999610847232514, - "loss": 46.0, - "step": 36739 - }, - { - "epoch": 2.809029569738326, - "grad_norm": 0.007551093120127916, - "learning_rate": 0.00019999610826041875, - "loss": 46.0, - "step": 36740 - }, - { - "epoch": 2.809106026721716, - "grad_norm": 0.0028746856842190027, - "learning_rate": 0.00019999610804850656, - "loss": 46.0, - "step": 36741 - }, - { - "epoch": 2.809182483705105, - "grad_norm": 0.0012204285012558103, - "learning_rate": 0.0001999961078365886, - "loss": 46.0, - "step": 36742 - }, - { - "epoch": 2.809258940688495, - "grad_norm": 0.0009773381752893329, - "learning_rate": 0.00019999610762466488, - "loss": 46.0, - "step": 36743 - }, - { - "epoch": 2.8093353976718847, - "grad_norm": 0.00681289890781045, - "learning_rate": 0.0001999961074127354, - "loss": 46.0, - "step": 36744 - }, - { - "epoch": 2.8094118546552744, - "grad_norm": 0.0008422063547186553, - "learning_rate": 0.0001999961072008001, - "loss": 46.0, - "step": 36745 - }, - { - "epoch": 2.809488311638664, - "grad_norm": 0.0015375600196421146, - "learning_rate": 0.0001999961069888591, - "loss": 46.0, - "step": 36746 - }, - { - "epoch": 2.809564768622054, - "grad_norm": 0.0026290591340512037, - "learning_rate": 0.00019999610677691226, - "loss": 46.0, - "step": 36747 - }, - { - "epoch": 2.8096412256054437, - "grad_norm": 0.0010255371453240514, - "learning_rate": 0.0001999961065649597, - "loss": 46.0, - "step": 36748 - }, - { - "epoch": 2.8097176825888335, - "grad_norm": 0.0021664327941834927, - "learning_rate": 0.00019999610635300136, - "loss": 46.0, - "step": 36749 - }, - { - "epoch": 2.8097941395722232, - "grad_norm": 0.0011113504879176617, - "learning_rate": 0.00019999610614103726, - "loss": 46.0, - "step": 36750 - }, - { - "epoch": 2.809870596555613, - "grad_norm": 0.003068802645429969, - "learning_rate": 0.00019999610592906736, - "loss": 46.0, - "step": 36751 - }, - { - "epoch": 2.8099470535390028, - "grad_norm": 0.004069286398589611, - "learning_rate": 0.00019999610571709174, - "loss": 46.0, - "step": 36752 - }, - { - "epoch": 2.8100235105223925, - "grad_norm": 0.0007427320233546197, - "learning_rate": 0.00019999610550511032, - "loss": 46.0, - "step": 36753 - }, - { - "epoch": 2.8100999675057823, - "grad_norm": 0.001602928969077766, - "learning_rate": 0.00019999610529312313, - "loss": 46.0, - "step": 36754 - }, - { - "epoch": 2.810176424489172, - "grad_norm": 0.0034579120110720396, - "learning_rate": 0.00019999610508113019, - "loss": 46.0, - "step": 36755 - }, - { - "epoch": 2.8102528814725614, - "grad_norm": 0.0034717454109340906, - "learning_rate": 0.00019999610486913145, - "loss": 46.0, - "step": 36756 - }, - { - "epoch": 2.810329338455951, - "grad_norm": 0.002721151802688837, - "learning_rate": 0.00019999610465712696, - "loss": 46.0, - "step": 36757 - }, - { - "epoch": 2.810405795439341, - "grad_norm": 0.004295787774026394, - "learning_rate": 0.00019999610444511667, - "loss": 46.0, - "step": 36758 - }, - { - "epoch": 2.8104822524227306, - "grad_norm": 0.0008510529878549278, - "learning_rate": 0.00019999610423310064, - "loss": 46.0, - "step": 36759 - }, - { - "epoch": 2.8105587094061204, - "grad_norm": 0.0027178653981536627, - "learning_rate": 0.00019999610402107883, - "loss": 46.0, - "step": 36760 - }, - { - "epoch": 2.81063516638951, - "grad_norm": 0.001633347012102604, - "learning_rate": 0.00019999610380905128, - "loss": 46.0, - "step": 36761 - }, - { - "epoch": 2.8107116233729, - "grad_norm": 0.0008918690728023648, - "learning_rate": 0.00019999610359701793, - "loss": 46.0, - "step": 36762 - }, - { - "epoch": 2.8107880803562897, - "grad_norm": 0.0025272017810493708, - "learning_rate": 0.00019999610338497883, - "loss": 46.0, - "step": 36763 - }, - { - "epoch": 2.810864537339679, - "grad_norm": 0.0019864998757839203, - "learning_rate": 0.00019999610317293396, - "loss": 46.0, - "step": 36764 - }, - { - "epoch": 2.8109409943230688, - "grad_norm": 0.0037984438240528107, - "learning_rate": 0.00019999610296088328, - "loss": 46.0, - "step": 36765 - }, - { - "epoch": 2.8110174513064585, - "grad_norm": 0.0013584971893578768, - "learning_rate": 0.00019999610274882686, - "loss": 46.0, - "step": 36766 - }, - { - "epoch": 2.8110939082898483, - "grad_norm": 0.0014875085325911641, - "learning_rate": 0.0001999961025367647, - "loss": 46.0, - "step": 36767 - }, - { - "epoch": 2.811170365273238, - "grad_norm": 0.0020454153418540955, - "learning_rate": 0.00019999610232469673, - "loss": 46.0, - "step": 36768 - }, - { - "epoch": 2.811246822256628, - "grad_norm": 0.002144475234672427, - "learning_rate": 0.00019999610211262302, - "loss": 46.0, - "step": 36769 - }, - { - "epoch": 2.8113232792400176, - "grad_norm": 0.0023602943401783705, - "learning_rate": 0.0001999961019005435, - "loss": 46.0, - "step": 36770 - }, - { - "epoch": 2.8113997362234073, - "grad_norm": 0.0009862781735137105, - "learning_rate": 0.00019999610168845825, - "loss": 46.0, - "step": 36771 - }, - { - "epoch": 2.811476193206797, - "grad_norm": 0.006907739210873842, - "learning_rate": 0.0001999961014763672, - "loss": 46.0, - "step": 36772 - }, - { - "epoch": 2.811552650190187, - "grad_norm": 0.002100649755448103, - "learning_rate": 0.0001999961012642704, - "loss": 46.0, - "step": 36773 - }, - { - "epoch": 2.8116291071735766, - "grad_norm": 0.001227144617587328, - "learning_rate": 0.00019999610105216783, - "loss": 46.0, - "step": 36774 - }, - { - "epoch": 2.8117055641569664, - "grad_norm": 0.0015517841093242168, - "learning_rate": 0.00019999610084005948, - "loss": 46.0, - "step": 36775 - }, - { - "epoch": 2.811782021140356, - "grad_norm": 0.0023132034111768007, - "learning_rate": 0.00019999610062794538, - "loss": 46.0, - "step": 36776 - }, - { - "epoch": 2.811858478123746, - "grad_norm": 0.0009892798261716962, - "learning_rate": 0.0001999961004158255, - "loss": 46.0, - "step": 36777 - }, - { - "epoch": 2.811934935107135, - "grad_norm": 0.0021904767490923405, - "learning_rate": 0.00019999610020369986, - "loss": 46.0, - "step": 36778 - }, - { - "epoch": 2.812011392090525, - "grad_norm": 0.002316446742042899, - "learning_rate": 0.00019999609999156844, - "loss": 46.0, - "step": 36779 - }, - { - "epoch": 2.8120878490739147, - "grad_norm": 0.00398360937833786, - "learning_rate": 0.00019999609977943125, - "loss": 46.0, - "step": 36780 - }, - { - "epoch": 2.8121643060573045, - "grad_norm": 0.0010359848383814096, - "learning_rate": 0.0001999960995672883, - "loss": 46.0, - "step": 36781 - }, - { - "epoch": 2.8122407630406943, - "grad_norm": 0.0034260349348187447, - "learning_rate": 0.00019999609935513957, - "loss": 46.0, - "step": 36782 - }, - { - "epoch": 2.812317220024084, - "grad_norm": 0.001337131136097014, - "learning_rate": 0.00019999609914298508, - "loss": 46.0, - "step": 36783 - }, - { - "epoch": 2.812393677007474, - "grad_norm": 0.0025051767006516457, - "learning_rate": 0.0001999960989308248, - "loss": 46.0, - "step": 36784 - }, - { - "epoch": 2.8124701339908635, - "grad_norm": 0.0019756334368139505, - "learning_rate": 0.0001999960987186588, - "loss": 46.0, - "step": 36785 - }, - { - "epoch": 2.812546590974253, - "grad_norm": 0.0014445362612605095, - "learning_rate": 0.00019999609850648699, - "loss": 46.0, - "step": 36786 - }, - { - "epoch": 2.8126230479576426, - "grad_norm": 0.0034651143942028284, - "learning_rate": 0.0001999960982943094, - "loss": 46.0, - "step": 36787 - }, - { - "epoch": 2.8126995049410324, - "grad_norm": 0.0013565976405516267, - "learning_rate": 0.00019999609808212608, - "loss": 46.0, - "step": 36788 - }, - { - "epoch": 2.812775961924422, - "grad_norm": 0.001865662052296102, - "learning_rate": 0.00019999609786993695, - "loss": 46.0, - "step": 36789 - }, - { - "epoch": 2.812852418907812, - "grad_norm": 0.0022742266301065683, - "learning_rate": 0.0001999960976577421, - "loss": 46.0, - "step": 36790 - }, - { - "epoch": 2.8129288758912017, - "grad_norm": 0.0012025375617668033, - "learning_rate": 0.00019999609744554144, - "loss": 46.0, - "step": 36791 - }, - { - "epoch": 2.8130053328745914, - "grad_norm": 0.0017907909350469708, - "learning_rate": 0.00019999609723333502, - "loss": 46.0, - "step": 36792 - }, - { - "epoch": 2.813081789857981, - "grad_norm": 0.004252856131643057, - "learning_rate": 0.00019999609702112283, - "loss": 46.0, - "step": 36793 - }, - { - "epoch": 2.813158246841371, - "grad_norm": 0.0007448701071552932, - "learning_rate": 0.0001999960968089049, - "loss": 46.0, - "step": 36794 - }, - { - "epoch": 2.8132347038247607, - "grad_norm": 0.002914773765951395, - "learning_rate": 0.00019999609659668115, - "loss": 46.0, - "step": 36795 - }, - { - "epoch": 2.8133111608081505, - "grad_norm": 0.0021829938050359488, - "learning_rate": 0.00019999609638445164, - "loss": 46.0, - "step": 36796 - }, - { - "epoch": 2.8133876177915402, - "grad_norm": 0.0011208775686100125, - "learning_rate": 0.0001999960961722164, - "loss": 46.0, - "step": 36797 - }, - { - "epoch": 2.81346407477493, - "grad_norm": 0.0008713062270544469, - "learning_rate": 0.00019999609595997537, - "loss": 46.0, - "step": 36798 - }, - { - "epoch": 2.8135405317583198, - "grad_norm": 0.004214846529066563, - "learning_rate": 0.00019999609574772854, - "loss": 46.0, - "step": 36799 - }, - { - "epoch": 2.813616988741709, - "grad_norm": 0.0017307811649516225, - "learning_rate": 0.000199996095535476, - "loss": 46.0, - "step": 36800 - }, - { - "epoch": 2.813693445725099, - "grad_norm": 0.0023016533814370632, - "learning_rate": 0.00019999609532321764, - "loss": 46.0, - "step": 36801 - }, - { - "epoch": 2.8137699027084886, - "grad_norm": 0.0015102032339200377, - "learning_rate": 0.00019999609511095354, - "loss": 46.0, - "step": 36802 - }, - { - "epoch": 2.8138463596918784, - "grad_norm": 0.003491014475002885, - "learning_rate": 0.00019999609489868364, - "loss": 46.0, - "step": 36803 - }, - { - "epoch": 2.813922816675268, - "grad_norm": 0.00448153680190444, - "learning_rate": 0.00019999609468640802, - "loss": 46.0, - "step": 36804 - }, - { - "epoch": 2.813999273658658, - "grad_norm": 0.00242490042001009, - "learning_rate": 0.0001999960944741266, - "loss": 46.0, - "step": 36805 - }, - { - "epoch": 2.8140757306420476, - "grad_norm": 0.002671569585800171, - "learning_rate": 0.00019999609426183938, - "loss": 46.0, - "step": 36806 - }, - { - "epoch": 2.8141521876254374, - "grad_norm": 0.004628873895853758, - "learning_rate": 0.00019999609404954644, - "loss": 46.0, - "step": 36807 - }, - { - "epoch": 2.8142286446088267, - "grad_norm": 0.0028726004529744387, - "learning_rate": 0.00019999609383724773, - "loss": 46.0, - "step": 36808 - }, - { - "epoch": 2.8143051015922165, - "grad_norm": 0.0015792903723195195, - "learning_rate": 0.00019999609362494322, - "loss": 46.0, - "step": 36809 - }, - { - "epoch": 2.8143815585756062, - "grad_norm": 0.0022798748686909676, - "learning_rate": 0.00019999609341263294, - "loss": 46.0, - "step": 36810 - }, - { - "epoch": 2.814458015558996, - "grad_norm": 0.002969057997688651, - "learning_rate": 0.0001999960932003169, - "loss": 46.0, - "step": 36811 - }, - { - "epoch": 2.8145344725423858, - "grad_norm": 0.0013731082435697317, - "learning_rate": 0.0001999960929879951, - "loss": 46.0, - "step": 36812 - }, - { - "epoch": 2.8146109295257755, - "grad_norm": 0.0013064262457191944, - "learning_rate": 0.00019999609277566755, - "loss": 46.0, - "step": 36813 - }, - { - "epoch": 2.8146873865091653, - "grad_norm": 0.000937616394367069, - "learning_rate": 0.00019999609256333422, - "loss": 46.0, - "step": 36814 - }, - { - "epoch": 2.814763843492555, - "grad_norm": 0.0009406793978996575, - "learning_rate": 0.0001999960923509951, - "loss": 46.0, - "step": 36815 - }, - { - "epoch": 2.814840300475945, - "grad_norm": 0.0018760168459266424, - "learning_rate": 0.00019999609213865023, - "loss": 46.0, - "step": 36816 - }, - { - "epoch": 2.8149167574593346, - "grad_norm": 0.002055566059425473, - "learning_rate": 0.00019999609192629958, - "loss": 46.0, - "step": 36817 - }, - { - "epoch": 2.8149932144427243, - "grad_norm": 0.00498898234218359, - "learning_rate": 0.00019999609171394316, - "loss": 46.0, - "step": 36818 - }, - { - "epoch": 2.815069671426114, - "grad_norm": 0.0012188117252662778, - "learning_rate": 0.00019999609150158097, - "loss": 46.0, - "step": 36819 - }, - { - "epoch": 2.815146128409504, - "grad_norm": 0.008990802802145481, - "learning_rate": 0.000199996091289213, - "loss": 46.0, - "step": 36820 - }, - { - "epoch": 2.815222585392893, - "grad_norm": 0.004068460781127214, - "learning_rate": 0.0001999960910768393, - "loss": 46.0, - "step": 36821 - }, - { - "epoch": 2.815299042376283, - "grad_norm": 0.0013540118234232068, - "learning_rate": 0.0001999960908644598, - "loss": 46.0, - "step": 36822 - }, - { - "epoch": 2.8153754993596727, - "grad_norm": 0.003944691736251116, - "learning_rate": 0.00019999609065207455, - "loss": 46.0, - "step": 36823 - }, - { - "epoch": 2.8154519563430624, - "grad_norm": 0.004211964551359415, - "learning_rate": 0.0001999960904396835, - "loss": 46.0, - "step": 36824 - }, - { - "epoch": 2.815528413326452, - "grad_norm": 0.0030869492329657078, - "learning_rate": 0.0001999960902272867, - "loss": 46.0, - "step": 36825 - }, - { - "epoch": 2.815604870309842, - "grad_norm": 0.003138667671009898, - "learning_rate": 0.0001999960900148841, - "loss": 46.0, - "step": 36826 - }, - { - "epoch": 2.8156813272932317, - "grad_norm": 0.0030309916473925114, - "learning_rate": 0.00019999608980247579, - "loss": 46.0, - "step": 36827 - }, - { - "epoch": 2.8157577842766215, - "grad_norm": 0.0012048023054376245, - "learning_rate": 0.00019999608959006166, - "loss": 46.0, - "step": 36828 - }, - { - "epoch": 2.8158342412600113, - "grad_norm": 0.0014024564297869802, - "learning_rate": 0.00019999608937764182, - "loss": 46.0, - "step": 36829 - }, - { - "epoch": 2.8159106982434006, - "grad_norm": 0.004322107881307602, - "learning_rate": 0.00019999608916521615, - "loss": 46.0, - "step": 36830 - }, - { - "epoch": 2.8159871552267903, - "grad_norm": 0.0007613147026859224, - "learning_rate": 0.00019999608895278473, - "loss": 46.0, - "step": 36831 - }, - { - "epoch": 2.81606361221018, - "grad_norm": 0.0016653668135404587, - "learning_rate": 0.00019999608874034754, - "loss": 46.0, - "step": 36832 - }, - { - "epoch": 2.81614006919357, - "grad_norm": 0.004325645510107279, - "learning_rate": 0.0001999960885279046, - "loss": 46.0, - "step": 36833 - }, - { - "epoch": 2.8162165261769596, - "grad_norm": 0.0007937981863506138, - "learning_rate": 0.00019999608831545586, - "loss": 46.0, - "step": 36834 - }, - { - "epoch": 2.8162929831603494, - "grad_norm": 0.0008365145185962319, - "learning_rate": 0.00019999608810300138, - "loss": 46.0, - "step": 36835 - }, - { - "epoch": 2.816369440143739, - "grad_norm": 0.0019224671414121985, - "learning_rate": 0.0001999960878905411, - "loss": 46.0, - "step": 36836 - }, - { - "epoch": 2.816445897127129, - "grad_norm": 0.004457344301044941, - "learning_rate": 0.00019999608767807506, - "loss": 46.0, - "step": 36837 - }, - { - "epoch": 2.8165223541105187, - "grad_norm": 0.0008685345528647304, - "learning_rate": 0.00019999608746560328, - "loss": 46.0, - "step": 36838 - }, - { - "epoch": 2.8165988110939084, - "grad_norm": 0.0008285320946015418, - "learning_rate": 0.00019999608725312568, - "loss": 46.0, - "step": 36839 - }, - { - "epoch": 2.816675268077298, - "grad_norm": 0.0061916024424135685, - "learning_rate": 0.00019999608704064235, - "loss": 46.0, - "step": 36840 - }, - { - "epoch": 2.816751725060688, - "grad_norm": 0.0028467231895774603, - "learning_rate": 0.00019999608682815323, - "loss": 46.0, - "step": 36841 - }, - { - "epoch": 2.8168281820440777, - "grad_norm": 0.0021147585939615965, - "learning_rate": 0.00019999608661565836, - "loss": 46.0, - "step": 36842 - }, - { - "epoch": 2.816904639027467, - "grad_norm": 0.003130111610516906, - "learning_rate": 0.00019999608640315771, - "loss": 46.0, - "step": 36843 - }, - { - "epoch": 2.8169810960108568, - "grad_norm": 0.0020714327692985535, - "learning_rate": 0.00019999608619065132, - "loss": 46.0, - "step": 36844 - }, - { - "epoch": 2.8170575529942465, - "grad_norm": 0.0026294696144759655, - "learning_rate": 0.00019999608597813913, - "loss": 46.0, - "step": 36845 - }, - { - "epoch": 2.8171340099776363, - "grad_norm": 0.0007815816788934171, - "learning_rate": 0.00019999608576562117, - "loss": 46.0, - "step": 36846 - }, - { - "epoch": 2.817210466961026, - "grad_norm": 0.0010007995879277587, - "learning_rate": 0.00019999608555309743, - "loss": 46.0, - "step": 36847 - }, - { - "epoch": 2.817286923944416, - "grad_norm": 0.002334407763555646, - "learning_rate": 0.00019999608534056792, - "loss": 46.0, - "step": 36848 - }, - { - "epoch": 2.8173633809278056, - "grad_norm": 0.0010643490822985768, - "learning_rate": 0.00019999608512803266, - "loss": 46.0, - "step": 36849 - }, - { - "epoch": 2.8174398379111953, - "grad_norm": 0.002337804762646556, - "learning_rate": 0.00019999608491549163, - "loss": 46.0, - "step": 36850 - }, - { - "epoch": 2.8175162948945847, - "grad_norm": 0.002812929917126894, - "learning_rate": 0.00019999608470294485, - "loss": 46.0, - "step": 36851 - }, - { - "epoch": 2.8175927518779744, - "grad_norm": 0.0028979452326893806, - "learning_rate": 0.00019999608449039228, - "loss": 46.0, - "step": 36852 - }, - { - "epoch": 2.817669208861364, - "grad_norm": 0.002232844242826104, - "learning_rate": 0.00019999608427783395, - "loss": 46.0, - "step": 36853 - }, - { - "epoch": 2.817745665844754, - "grad_norm": 0.004402905702590942, - "learning_rate": 0.00019999608406526983, - "loss": 46.0, - "step": 36854 - }, - { - "epoch": 2.8178221228281437, - "grad_norm": 0.001040091272443533, - "learning_rate": 0.00019999608385269993, - "loss": 46.0, - "step": 36855 - }, - { - "epoch": 2.8178985798115335, - "grad_norm": 0.0035972122568637133, - "learning_rate": 0.0001999960836401243, - "loss": 46.0, - "step": 36856 - }, - { - "epoch": 2.8179750367949232, - "grad_norm": 0.001221560756675899, - "learning_rate": 0.00019999608342754287, - "loss": 46.0, - "step": 36857 - }, - { - "epoch": 2.818051493778313, - "grad_norm": 0.005079397466033697, - "learning_rate": 0.00019999608321495568, - "loss": 46.0, - "step": 36858 - }, - { - "epoch": 2.8181279507617027, - "grad_norm": 0.0030243832152336836, - "learning_rate": 0.00019999608300236271, - "loss": 46.0, - "step": 36859 - }, - { - "epoch": 2.8182044077450925, - "grad_norm": 0.0016896920278668404, - "learning_rate": 0.000199996082789764, - "loss": 46.0, - "step": 36860 - }, - { - "epoch": 2.8182808647284823, - "grad_norm": 0.0021322863176465034, - "learning_rate": 0.0001999960825771595, - "loss": 46.0, - "step": 36861 - }, - { - "epoch": 2.818357321711872, - "grad_norm": 0.001678258995525539, - "learning_rate": 0.00019999608236454926, - "loss": 46.0, - "step": 36862 - }, - { - "epoch": 2.818433778695262, - "grad_norm": 0.0008996687829494476, - "learning_rate": 0.0001999960821519332, - "loss": 46.0, - "step": 36863 - }, - { - "epoch": 2.8185102356786516, - "grad_norm": 0.01777329295873642, - "learning_rate": 0.0001999960819393114, - "loss": 46.0, - "step": 36864 - }, - { - "epoch": 2.818586692662041, - "grad_norm": 0.004625699017196894, - "learning_rate": 0.00019999608172668385, - "loss": 46.0, - "step": 36865 - }, - { - "epoch": 2.8186631496454306, - "grad_norm": 0.0045891208574175835, - "learning_rate": 0.0001999960815140505, - "loss": 46.0, - "step": 36866 - }, - { - "epoch": 2.8187396066288204, - "grad_norm": 0.0015666260151192546, - "learning_rate": 0.00019999608130141138, - "loss": 46.0, - "step": 36867 - }, - { - "epoch": 2.81881606361221, - "grad_norm": 0.01152582187205553, - "learning_rate": 0.0001999960810887665, - "loss": 46.0, - "step": 36868 - }, - { - "epoch": 2.8188925205956, - "grad_norm": 0.001934061641804874, - "learning_rate": 0.00019999608087611586, - "loss": 46.0, - "step": 36869 - }, - { - "epoch": 2.8189689775789897, - "grad_norm": 0.003188719740137458, - "learning_rate": 0.00019999608066345945, - "loss": 46.0, - "step": 36870 - }, - { - "epoch": 2.8190454345623794, - "grad_norm": 0.0018322645919397473, - "learning_rate": 0.00019999608045079726, - "loss": 46.0, - "step": 36871 - }, - { - "epoch": 2.819121891545769, - "grad_norm": 0.00248602288775146, - "learning_rate": 0.0001999960802381293, - "loss": 46.0, - "step": 36872 - }, - { - "epoch": 2.8191983485291585, - "grad_norm": 0.005531106609851122, - "learning_rate": 0.00019999608002545558, - "loss": 46.0, - "step": 36873 - }, - { - "epoch": 2.8192748055125483, - "grad_norm": 0.005437852814793587, - "learning_rate": 0.00019999607981277607, - "loss": 46.0, - "step": 36874 - }, - { - "epoch": 2.819351262495938, - "grad_norm": 0.00439549470320344, - "learning_rate": 0.00019999607960009082, - "loss": 46.0, - "step": 36875 - }, - { - "epoch": 2.819427719479328, - "grad_norm": 0.0016947225667536259, - "learning_rate": 0.00019999607938739979, - "loss": 46.0, - "step": 36876 - }, - { - "epoch": 2.8195041764627176, - "grad_norm": 0.004166991449892521, - "learning_rate": 0.00019999607917470298, - "loss": 46.0, - "step": 36877 - }, - { - "epoch": 2.8195806334461073, - "grad_norm": 0.0014405949041247368, - "learning_rate": 0.0001999960789620004, - "loss": 46.0, - "step": 36878 - }, - { - "epoch": 2.819657090429497, - "grad_norm": 0.0013116549234837294, - "learning_rate": 0.00019999607874929208, - "loss": 46.0, - "step": 36879 - }, - { - "epoch": 2.819733547412887, - "grad_norm": 0.0038978285156190395, - "learning_rate": 0.00019999607853657796, - "loss": 46.0, - "step": 36880 - }, - { - "epoch": 2.8198100043962766, - "grad_norm": 0.0039982362650334835, - "learning_rate": 0.0001999960783238581, - "loss": 46.0, - "step": 36881 - }, - { - "epoch": 2.8198864613796664, - "grad_norm": 0.0016772805247455835, - "learning_rate": 0.00019999607811113244, - "loss": 46.0, - "step": 36882 - }, - { - "epoch": 2.819962918363056, - "grad_norm": 0.0008836925262585282, - "learning_rate": 0.000199996077898401, - "loss": 46.0, - "step": 36883 - }, - { - "epoch": 2.820039375346446, - "grad_norm": 0.0008074045181274414, - "learning_rate": 0.00019999607768566384, - "loss": 46.0, - "step": 36884 - }, - { - "epoch": 2.8201158323298356, - "grad_norm": 0.004422673024237156, - "learning_rate": 0.00019999607747292087, - "loss": 46.0, - "step": 36885 - }, - { - "epoch": 2.8201922893132254, - "grad_norm": 0.0016577758360654116, - "learning_rate": 0.00019999607726017216, - "loss": 46.0, - "step": 36886 - }, - { - "epoch": 2.8202687462966147, - "grad_norm": 0.004137158859521151, - "learning_rate": 0.00019999607704741765, - "loss": 46.0, - "step": 36887 - }, - { - "epoch": 2.8203452032800045, - "grad_norm": 0.0015736386412754655, - "learning_rate": 0.00019999607683465737, - "loss": 46.0, - "step": 36888 - }, - { - "epoch": 2.8204216602633942, - "grad_norm": 0.0013831130927428603, - "learning_rate": 0.00019999607662189134, - "loss": 46.0, - "step": 36889 - }, - { - "epoch": 2.820498117246784, - "grad_norm": 0.00231073796749115, - "learning_rate": 0.00019999607640911954, - "loss": 46.0, - "step": 36890 - }, - { - "epoch": 2.8205745742301738, - "grad_norm": 0.0033042451832443476, - "learning_rate": 0.00019999607619634196, - "loss": 46.0, - "step": 36891 - }, - { - "epoch": 2.8206510312135635, - "grad_norm": 0.003823449369519949, - "learning_rate": 0.00019999607598355864, - "loss": 46.0, - "step": 36892 - }, - { - "epoch": 2.8207274881969533, - "grad_norm": 0.002630861708894372, - "learning_rate": 0.00019999607577076954, - "loss": 46.0, - "step": 36893 - }, - { - "epoch": 2.820803945180343, - "grad_norm": 0.0011717822635546327, - "learning_rate": 0.00019999607555797467, - "loss": 46.0, - "step": 36894 - }, - { - "epoch": 2.8208804021637324, - "grad_norm": 0.0011562578147277236, - "learning_rate": 0.000199996075345174, - "loss": 46.0, - "step": 36895 - }, - { - "epoch": 2.820956859147122, - "grad_norm": 0.001471058581955731, - "learning_rate": 0.00019999607513236759, - "loss": 46.0, - "step": 36896 - }, - { - "epoch": 2.821033316130512, - "grad_norm": 0.004946605302393436, - "learning_rate": 0.0001999960749195554, - "loss": 46.0, - "step": 36897 - }, - { - "epoch": 2.8211097731139017, - "grad_norm": 0.002298481296747923, - "learning_rate": 0.00019999607470673743, - "loss": 46.0, - "step": 36898 - }, - { - "epoch": 2.8211862300972914, - "grad_norm": 0.0019355490803718567, - "learning_rate": 0.00019999607449391372, - "loss": 46.0, - "step": 36899 - }, - { - "epoch": 2.821262687080681, - "grad_norm": 0.0020304580684751272, - "learning_rate": 0.00019999607428108421, - "loss": 46.0, - "step": 36900 - }, - { - "epoch": 2.821339144064071, - "grad_norm": 0.003176880767568946, - "learning_rate": 0.00019999607406824896, - "loss": 46.0, - "step": 36901 - }, - { - "epoch": 2.8214156010474607, - "grad_norm": 0.003486047266051173, - "learning_rate": 0.00019999607385540793, - "loss": 46.0, - "step": 36902 - }, - { - "epoch": 2.8214920580308505, - "grad_norm": 0.0015230627031996846, - "learning_rate": 0.00019999607364256113, - "loss": 46.0, - "step": 36903 - }, - { - "epoch": 2.82156851501424, - "grad_norm": 0.0025447842199355364, - "learning_rate": 0.00019999607342970858, - "loss": 46.0, - "step": 36904 - }, - { - "epoch": 2.82164497199763, - "grad_norm": 0.0013891375856474042, - "learning_rate": 0.0001999960732168502, - "loss": 46.0, - "step": 36905 - }, - { - "epoch": 2.8217214289810197, - "grad_norm": 0.0007464977097697556, - "learning_rate": 0.0001999960730039861, - "loss": 46.0, - "step": 36906 - }, - { - "epoch": 2.8217978859644095, - "grad_norm": 0.00298857968300581, - "learning_rate": 0.00019999607279111623, - "loss": 46.0, - "step": 36907 - }, - { - "epoch": 2.8218743429477993, - "grad_norm": 0.0015343863051384687, - "learning_rate": 0.0001999960725782406, - "loss": 46.0, - "step": 36908 - }, - { - "epoch": 2.8219507999311886, - "grad_norm": 0.001058666268363595, - "learning_rate": 0.00019999607236535915, - "loss": 46.0, - "step": 36909 - }, - { - "epoch": 2.8220272569145783, - "grad_norm": 0.0016679898835718632, - "learning_rate": 0.00019999607215247199, - "loss": 46.0, - "step": 36910 - }, - { - "epoch": 2.822103713897968, - "grad_norm": 0.0016461880877614021, - "learning_rate": 0.00019999607193957902, - "loss": 46.0, - "step": 36911 - }, - { - "epoch": 2.822180170881358, - "grad_norm": 0.0026223361492156982, - "learning_rate": 0.0001999960717266803, - "loss": 46.0, - "step": 36912 - }, - { - "epoch": 2.8222566278647476, - "grad_norm": 0.0017350807320326567, - "learning_rate": 0.0001999960715137758, - "loss": 46.0, - "step": 36913 - }, - { - "epoch": 2.8223330848481374, - "grad_norm": 0.0015073895920068026, - "learning_rate": 0.00019999607130086555, - "loss": 46.0, - "step": 36914 - }, - { - "epoch": 2.822409541831527, - "grad_norm": 0.003138087224215269, - "learning_rate": 0.00019999607108794952, - "loss": 46.0, - "step": 36915 - }, - { - "epoch": 2.822485998814917, - "grad_norm": 0.002158522605895996, - "learning_rate": 0.0001999960708750277, - "loss": 46.0, - "step": 36916 - }, - { - "epoch": 2.8225624557983062, - "grad_norm": 0.009770886972546577, - "learning_rate": 0.00019999607066210014, - "loss": 46.0, - "step": 36917 - }, - { - "epoch": 2.822638912781696, - "grad_norm": 0.002159107243642211, - "learning_rate": 0.0001999960704491668, - "loss": 46.0, - "step": 36918 - }, - { - "epoch": 2.8227153697650857, - "grad_norm": 0.0008081173291429877, - "learning_rate": 0.0001999960702362277, - "loss": 46.0, - "step": 36919 - }, - { - "epoch": 2.8227918267484755, - "grad_norm": 0.005670949351042509, - "learning_rate": 0.0001999960700232828, - "loss": 46.0, - "step": 36920 - }, - { - "epoch": 2.8228682837318653, - "grad_norm": 0.0037465994246304035, - "learning_rate": 0.00019999606981033216, - "loss": 46.0, - "step": 36921 - }, - { - "epoch": 2.822944740715255, - "grad_norm": 0.0007225925219245255, - "learning_rate": 0.00019999606959737574, - "loss": 46.0, - "step": 36922 - }, - { - "epoch": 2.823021197698645, - "grad_norm": 0.00275416555814445, - "learning_rate": 0.00019999606938441355, - "loss": 46.0, - "step": 36923 - }, - { - "epoch": 2.8230976546820346, - "grad_norm": 0.001614834414795041, - "learning_rate": 0.0001999960691714456, - "loss": 46.0, - "step": 36924 - }, - { - "epoch": 2.8231741116654243, - "grad_norm": 0.00276213139295578, - "learning_rate": 0.00019999606895847188, - "loss": 46.0, - "step": 36925 - }, - { - "epoch": 2.823250568648814, - "grad_norm": 0.0009544197237119079, - "learning_rate": 0.00019999606874549237, - "loss": 46.0, - "step": 36926 - }, - { - "epoch": 2.823327025632204, - "grad_norm": 0.0032150084152817726, - "learning_rate": 0.00019999606853250712, - "loss": 46.0, - "step": 36927 - }, - { - "epoch": 2.8234034826155936, - "grad_norm": 0.0021593982819467783, - "learning_rate": 0.00019999606831951606, - "loss": 46.0, - "step": 36928 - }, - { - "epoch": 2.8234799395989834, - "grad_norm": 0.0020897244103252888, - "learning_rate": 0.00019999606810651929, - "loss": 46.0, - "step": 36929 - }, - { - "epoch": 2.823556396582373, - "grad_norm": 0.0010092228185385466, - "learning_rate": 0.0001999960678935167, - "loss": 46.0, - "step": 36930 - }, - { - "epoch": 2.8236328535657624, - "grad_norm": 0.0032900257501751184, - "learning_rate": 0.00019999606768050836, - "loss": 46.0, - "step": 36931 - }, - { - "epoch": 2.823709310549152, - "grad_norm": 0.004979650489985943, - "learning_rate": 0.00019999606746749424, - "loss": 46.0, - "step": 36932 - }, - { - "epoch": 2.823785767532542, - "grad_norm": 0.002661351813003421, - "learning_rate": 0.00019999606725447437, - "loss": 46.0, - "step": 36933 - }, - { - "epoch": 2.8238622245159317, - "grad_norm": 0.0019578225910663605, - "learning_rate": 0.00019999606704144873, - "loss": 46.0, - "step": 36934 - }, - { - "epoch": 2.8239386814993215, - "grad_norm": 0.0016328799538314342, - "learning_rate": 0.0001999960668284173, - "loss": 46.0, - "step": 36935 - }, - { - "epoch": 2.8240151384827112, - "grad_norm": 0.0033278141636401415, - "learning_rate": 0.00019999606661538012, - "loss": 46.0, - "step": 36936 - }, - { - "epoch": 2.824091595466101, - "grad_norm": 0.0024550892412662506, - "learning_rate": 0.00019999606640233716, - "loss": 46.0, - "step": 36937 - }, - { - "epoch": 2.8241680524494908, - "grad_norm": 0.0023666478227823973, - "learning_rate": 0.00019999606618928843, - "loss": 46.0, - "step": 36938 - }, - { - "epoch": 2.82424450943288, - "grad_norm": 0.0009375037625432014, - "learning_rate": 0.00019999606597623394, - "loss": 46.0, - "step": 36939 - }, - { - "epoch": 2.82432096641627, - "grad_norm": 0.0024318823125213385, - "learning_rate": 0.0001999960657631737, - "loss": 46.0, - "step": 36940 - }, - { - "epoch": 2.8243974233996596, - "grad_norm": 0.0037423213943839073, - "learning_rate": 0.00019999606555010763, - "loss": 46.0, - "step": 36941 - }, - { - "epoch": 2.8244738803830494, - "grad_norm": 0.001682156347669661, - "learning_rate": 0.00019999606533703586, - "loss": 46.0, - "step": 36942 - }, - { - "epoch": 2.824550337366439, - "grad_norm": 0.002599930390715599, - "learning_rate": 0.00019999606512395828, - "loss": 46.0, - "step": 36943 - }, - { - "epoch": 2.824626794349829, - "grad_norm": 0.002183648757636547, - "learning_rate": 0.00019999606491087493, - "loss": 46.0, - "step": 36944 - }, - { - "epoch": 2.8247032513332186, - "grad_norm": 0.0031466891523450613, - "learning_rate": 0.0001999960646977858, - "loss": 46.0, - "step": 36945 - }, - { - "epoch": 2.8247797083166084, - "grad_norm": 0.0038752886466681957, - "learning_rate": 0.00019999606448469094, - "loss": 46.0, - "step": 36946 - }, - { - "epoch": 2.824856165299998, - "grad_norm": 0.0023581795394420624, - "learning_rate": 0.0001999960642715903, - "loss": 46.0, - "step": 36947 - }, - { - "epoch": 2.824932622283388, - "grad_norm": 0.0068861329928040504, - "learning_rate": 0.0001999960640584839, - "loss": 46.0, - "step": 36948 - }, - { - "epoch": 2.8250090792667777, - "grad_norm": 0.0006696869386360049, - "learning_rate": 0.00019999606384537167, - "loss": 46.0, - "step": 36949 - }, - { - "epoch": 2.8250855362501675, - "grad_norm": 0.0013573196483775973, - "learning_rate": 0.00019999606363225374, - "loss": 46.0, - "step": 36950 - }, - { - "epoch": 2.825161993233557, - "grad_norm": 0.002307785674929619, - "learning_rate": 0.00019999606341913003, - "loss": 46.0, - "step": 36951 - }, - { - "epoch": 2.8252384502169465, - "grad_norm": 0.003991785924881697, - "learning_rate": 0.00019999606320600052, - "loss": 46.0, - "step": 36952 - }, - { - "epoch": 2.8253149072003363, - "grad_norm": 0.004686118569225073, - "learning_rate": 0.00019999606299286527, - "loss": 46.0, - "step": 36953 - }, - { - "epoch": 2.825391364183726, - "grad_norm": 0.0032807081006467342, - "learning_rate": 0.00019999606277972424, - "loss": 46.0, - "step": 36954 - }, - { - "epoch": 2.825467821167116, - "grad_norm": 0.0026232386007905006, - "learning_rate": 0.0001999960625665774, - "loss": 46.0, - "step": 36955 - }, - { - "epoch": 2.8255442781505056, - "grad_norm": 0.00046751167974434793, - "learning_rate": 0.00019999606235342486, - "loss": 46.0, - "step": 36956 - }, - { - "epoch": 2.8256207351338953, - "grad_norm": 0.0012950844829902053, - "learning_rate": 0.0001999960621402665, - "loss": 46.0, - "step": 36957 - }, - { - "epoch": 2.825697192117285, - "grad_norm": 0.001847826293669641, - "learning_rate": 0.00019999606192710242, - "loss": 46.0, - "step": 36958 - }, - { - "epoch": 2.825773649100675, - "grad_norm": 0.003466159338131547, - "learning_rate": 0.00019999606171393252, - "loss": 46.0, - "step": 36959 - }, - { - "epoch": 2.8258501060840646, - "grad_norm": 0.002881294582039118, - "learning_rate": 0.00019999606150075688, - "loss": 46.0, - "step": 36960 - }, - { - "epoch": 2.825926563067454, - "grad_norm": 0.002187170786783099, - "learning_rate": 0.00019999606128757547, - "loss": 46.0, - "step": 36961 - }, - { - "epoch": 2.8260030200508437, - "grad_norm": 0.003727750387042761, - "learning_rate": 0.00019999606107438828, - "loss": 46.0, - "step": 36962 - }, - { - "epoch": 2.8260794770342335, - "grad_norm": 0.0057577756233513355, - "learning_rate": 0.00019999606086119532, - "loss": 46.0, - "step": 36963 - }, - { - "epoch": 2.826155934017623, - "grad_norm": 0.002422747667878866, - "learning_rate": 0.0001999960606479966, - "loss": 46.0, - "step": 36964 - }, - { - "epoch": 2.826232391001013, - "grad_norm": 0.00204710615798831, - "learning_rate": 0.0001999960604347921, - "loss": 46.0, - "step": 36965 - }, - { - "epoch": 2.8263088479844027, - "grad_norm": 0.0007467864197678864, - "learning_rate": 0.00019999606022158185, - "loss": 46.0, - "step": 36966 - }, - { - "epoch": 2.8263853049677925, - "grad_norm": 0.0014958487590774894, - "learning_rate": 0.00019999606000836582, - "loss": 46.0, - "step": 36967 - }, - { - "epoch": 2.8264617619511823, - "grad_norm": 0.001446136855520308, - "learning_rate": 0.000199996059795144, - "loss": 46.0, - "step": 36968 - }, - { - "epoch": 2.826538218934572, - "grad_norm": 0.004083353094756603, - "learning_rate": 0.00019999605958191644, - "loss": 46.0, - "step": 36969 - }, - { - "epoch": 2.826614675917962, - "grad_norm": 0.0013256021775305271, - "learning_rate": 0.0001999960593686831, - "loss": 46.0, - "step": 36970 - }, - { - "epoch": 2.8266911329013515, - "grad_norm": 0.003115095430985093, - "learning_rate": 0.000199996059155444, - "loss": 46.0, - "step": 36971 - }, - { - "epoch": 2.8267675898847413, - "grad_norm": 0.002239291789010167, - "learning_rate": 0.0001999960589421991, - "loss": 46.0, - "step": 36972 - }, - { - "epoch": 2.826844046868131, - "grad_norm": 0.0033848669845610857, - "learning_rate": 0.00019999605872894844, - "loss": 46.0, - "step": 36973 - }, - { - "epoch": 2.8269205038515204, - "grad_norm": 0.0033135893754661083, - "learning_rate": 0.00019999605851569205, - "loss": 46.0, - "step": 36974 - }, - { - "epoch": 2.82699696083491, - "grad_norm": 0.0023286587093025446, - "learning_rate": 0.00019999605830242983, - "loss": 46.0, - "step": 36975 - }, - { - "epoch": 2.8270734178183, - "grad_norm": 0.003982891794294119, - "learning_rate": 0.0001999960580891619, - "loss": 46.0, - "step": 36976 - }, - { - "epoch": 2.8271498748016897, - "grad_norm": 0.002888122806325555, - "learning_rate": 0.00019999605787588816, - "loss": 46.0, - "step": 36977 - }, - { - "epoch": 2.8272263317850794, - "grad_norm": 0.0008295307052321732, - "learning_rate": 0.00019999605766260868, - "loss": 46.0, - "step": 36978 - }, - { - "epoch": 2.827302788768469, - "grad_norm": 0.0015853594522923231, - "learning_rate": 0.0001999960574493234, - "loss": 46.0, - "step": 36979 - }, - { - "epoch": 2.827379245751859, - "grad_norm": 0.002653133822605014, - "learning_rate": 0.00019999605723603237, - "loss": 46.0, - "step": 36980 - }, - { - "epoch": 2.8274557027352487, - "grad_norm": 0.0011382754892110825, - "learning_rate": 0.00019999605702273557, - "loss": 46.0, - "step": 36981 - }, - { - "epoch": 2.827532159718638, - "grad_norm": 0.0011233199620619416, - "learning_rate": 0.000199996056809433, - "loss": 46.0, - "step": 36982 - }, - { - "epoch": 2.827608616702028, - "grad_norm": 0.004417513031512499, - "learning_rate": 0.00019999605659612465, - "loss": 46.0, - "step": 36983 - }, - { - "epoch": 2.8276850736854175, - "grad_norm": 0.0008167651831172407, - "learning_rate": 0.00019999605638281056, - "loss": 46.0, - "step": 36984 - }, - { - "epoch": 2.8277615306688073, - "grad_norm": 0.001227772911079228, - "learning_rate": 0.0001999960561694907, - "loss": 46.0, - "step": 36985 - }, - { - "epoch": 2.827837987652197, - "grad_norm": 0.002584100002422929, - "learning_rate": 0.00019999605595616502, - "loss": 46.0, - "step": 36986 - }, - { - "epoch": 2.827914444635587, - "grad_norm": 0.0020683251786977053, - "learning_rate": 0.0001999960557428336, - "loss": 46.0, - "step": 36987 - }, - { - "epoch": 2.8279909016189766, - "grad_norm": 0.0029860222712159157, - "learning_rate": 0.00019999605552949642, - "loss": 46.0, - "step": 36988 - }, - { - "epoch": 2.8280673586023664, - "grad_norm": 0.0030127670615911484, - "learning_rate": 0.00019999605531615346, - "loss": 46.0, - "step": 36989 - }, - { - "epoch": 2.828143815585756, - "grad_norm": 0.004143049940466881, - "learning_rate": 0.00019999605510280475, - "loss": 46.0, - "step": 36990 - }, - { - "epoch": 2.828220272569146, - "grad_norm": 0.001784849795512855, - "learning_rate": 0.00019999605488945024, - "loss": 46.0, - "step": 36991 - }, - { - "epoch": 2.8282967295525356, - "grad_norm": 0.0032061522360891104, - "learning_rate": 0.00019999605467609, - "loss": 46.0, - "step": 36992 - }, - { - "epoch": 2.8283731865359254, - "grad_norm": 0.000825317285489291, - "learning_rate": 0.00019999605446272396, - "loss": 46.0, - "step": 36993 - }, - { - "epoch": 2.828449643519315, - "grad_norm": 0.0030062992591410875, - "learning_rate": 0.00019999605424935214, - "loss": 46.0, - "step": 36994 - }, - { - "epoch": 2.828526100502705, - "grad_norm": 0.003917657770216465, - "learning_rate": 0.0001999960540359746, - "loss": 46.0, - "step": 36995 - }, - { - "epoch": 2.8286025574860942, - "grad_norm": 0.001220331760123372, - "learning_rate": 0.00019999605382259127, - "loss": 46.0, - "step": 36996 - }, - { - "epoch": 2.828679014469484, - "grad_norm": 0.0008410522132180631, - "learning_rate": 0.00019999605360920212, - "loss": 46.0, - "step": 36997 - }, - { - "epoch": 2.8287554714528738, - "grad_norm": 0.0019293116638436913, - "learning_rate": 0.00019999605339580728, - "loss": 46.0, - "step": 36998 - }, - { - "epoch": 2.8288319284362635, - "grad_norm": 0.004157381597906351, - "learning_rate": 0.00019999605318240659, - "loss": 46.0, - "step": 36999 - }, - { - "epoch": 2.8289083854196533, - "grad_norm": 0.005885671824216843, - "learning_rate": 0.0001999960529690002, - "loss": 46.0, - "step": 37000 - }, - { - "epoch": 2.828984842403043, - "grad_norm": 0.002693883143365383, - "learning_rate": 0.000199996052755588, - "loss": 46.0, - "step": 37001 - }, - { - "epoch": 2.829061299386433, - "grad_norm": 0.0042720879428088665, - "learning_rate": 0.00019999605254217005, - "loss": 46.0, - "step": 37002 - }, - { - "epoch": 2.8291377563698226, - "grad_norm": 0.00464756740257144, - "learning_rate": 0.00019999605232874634, - "loss": 46.0, - "step": 37003 - }, - { - "epoch": 2.829214213353212, - "grad_norm": 0.00296876672655344, - "learning_rate": 0.00019999605211531684, - "loss": 46.0, - "step": 37004 - }, - { - "epoch": 2.8292906703366016, - "grad_norm": 0.0015118091832846403, - "learning_rate": 0.00019999605190188158, - "loss": 46.0, - "step": 37005 - }, - { - "epoch": 2.8293671273199914, - "grad_norm": 0.0016360600711777806, - "learning_rate": 0.00019999605168844053, - "loss": 46.0, - "step": 37006 - }, - { - "epoch": 2.829443584303381, - "grad_norm": 0.0012815962545573711, - "learning_rate": 0.00019999605147499373, - "loss": 46.0, - "step": 37007 - }, - { - "epoch": 2.829520041286771, - "grad_norm": 0.003865294624119997, - "learning_rate": 0.00019999605126154118, - "loss": 46.0, - "step": 37008 - }, - { - "epoch": 2.8295964982701607, - "grad_norm": 0.0035797294694930315, - "learning_rate": 0.0001999960510480828, - "loss": 46.0, - "step": 37009 - }, - { - "epoch": 2.8296729552535504, - "grad_norm": 0.0010599744273349643, - "learning_rate": 0.00019999605083461871, - "loss": 46.0, - "step": 37010 - }, - { - "epoch": 2.82974941223694, - "grad_norm": 0.003947384189814329, - "learning_rate": 0.00019999605062114882, - "loss": 46.0, - "step": 37011 - }, - { - "epoch": 2.82982586922033, - "grad_norm": 0.0008913521305657923, - "learning_rate": 0.00019999605040767318, - "loss": 46.0, - "step": 37012 - }, - { - "epoch": 2.8299023262037197, - "grad_norm": 0.0037171305157244205, - "learning_rate": 0.00019999605019419177, - "loss": 46.0, - "step": 37013 - }, - { - "epoch": 2.8299787831871095, - "grad_norm": 0.0021713310852646828, - "learning_rate": 0.00019999604998070458, - "loss": 46.0, - "step": 37014 - }, - { - "epoch": 2.8300552401704993, - "grad_norm": 0.0014609800418838859, - "learning_rate": 0.00019999604976721162, - "loss": 46.0, - "step": 37015 - }, - { - "epoch": 2.830131697153889, - "grad_norm": 0.0018056613625958562, - "learning_rate": 0.00019999604955371289, - "loss": 46.0, - "step": 37016 - }, - { - "epoch": 2.8302081541372788, - "grad_norm": 0.0014918268425390124, - "learning_rate": 0.0001999960493402084, - "loss": 46.0, - "step": 37017 - }, - { - "epoch": 2.830284611120668, - "grad_norm": 0.001848678570240736, - "learning_rate": 0.00019999604912669815, - "loss": 46.0, - "step": 37018 - }, - { - "epoch": 2.830361068104058, - "grad_norm": 0.001480182516388595, - "learning_rate": 0.0001999960489131821, - "loss": 46.0, - "step": 37019 - }, - { - "epoch": 2.8304375250874476, - "grad_norm": 0.003131404286250472, - "learning_rate": 0.0001999960486996603, - "loss": 46.0, - "step": 37020 - }, - { - "epoch": 2.8305139820708374, - "grad_norm": 0.00136624276638031, - "learning_rate": 0.00019999604848613273, - "loss": 46.0, - "step": 37021 - }, - { - "epoch": 2.830590439054227, - "grad_norm": 0.0027331511955708265, - "learning_rate": 0.0001999960482725994, - "loss": 46.0, - "step": 37022 - }, - { - "epoch": 2.830666896037617, - "grad_norm": 0.0009661935037001967, - "learning_rate": 0.00019999604805906029, - "loss": 46.0, - "step": 37023 - }, - { - "epoch": 2.8307433530210067, - "grad_norm": 0.003178333630785346, - "learning_rate": 0.0001999960478455154, - "loss": 46.0, - "step": 37024 - }, - { - "epoch": 2.8308198100043964, - "grad_norm": 0.0037964927032589912, - "learning_rate": 0.00019999604763196475, - "loss": 46.0, - "step": 37025 - }, - { - "epoch": 2.8308962669877857, - "grad_norm": 0.0023397982586175203, - "learning_rate": 0.00019999604741840834, - "loss": 46.0, - "step": 37026 - }, - { - "epoch": 2.8309727239711755, - "grad_norm": 0.0028361817821860313, - "learning_rate": 0.00019999604720484615, - "loss": 46.0, - "step": 37027 - }, - { - "epoch": 2.8310491809545653, - "grad_norm": 0.0020070101600140333, - "learning_rate": 0.00019999604699127822, - "loss": 46.0, - "step": 37028 - }, - { - "epoch": 2.831125637937955, - "grad_norm": 0.0044563873670995235, - "learning_rate": 0.00019999604677770449, - "loss": 46.0, - "step": 37029 - }, - { - "epoch": 2.831202094921345, - "grad_norm": 0.0019707109313458204, - "learning_rate": 0.00019999604656412498, - "loss": 46.0, - "step": 37030 - }, - { - "epoch": 2.8312785519047345, - "grad_norm": 0.002065874170511961, - "learning_rate": 0.00019999604635053973, - "loss": 46.0, - "step": 37031 - }, - { - "epoch": 2.8313550088881243, - "grad_norm": 0.0018800286343321204, - "learning_rate": 0.0001999960461369487, - "loss": 46.0, - "step": 37032 - }, - { - "epoch": 2.831431465871514, - "grad_norm": 0.003695941995829344, - "learning_rate": 0.0001999960459233519, - "loss": 46.0, - "step": 37033 - }, - { - "epoch": 2.831507922854904, - "grad_norm": 0.0024002997670322657, - "learning_rate": 0.00019999604570974933, - "loss": 46.0, - "step": 37034 - }, - { - "epoch": 2.8315843798382936, - "grad_norm": 0.0024831988848745823, - "learning_rate": 0.00019999604549614098, - "loss": 46.0, - "step": 37035 - }, - { - "epoch": 2.8316608368216833, - "grad_norm": 0.003218643134459853, - "learning_rate": 0.00019999604528252686, - "loss": 46.0, - "step": 37036 - }, - { - "epoch": 2.831737293805073, - "grad_norm": 0.0009372446220368147, - "learning_rate": 0.000199996045068907, - "loss": 46.0, - "step": 37037 - }, - { - "epoch": 2.831813750788463, - "grad_norm": 0.003029302228242159, - "learning_rate": 0.00019999604485528133, - "loss": 46.0, - "step": 37038 - }, - { - "epoch": 2.8318902077718526, - "grad_norm": 0.0028853006660938263, - "learning_rate": 0.00019999604464164992, - "loss": 46.0, - "step": 37039 - }, - { - "epoch": 2.831966664755242, - "grad_norm": 0.0019728646147996187, - "learning_rate": 0.00019999604442801273, - "loss": 46.0, - "step": 37040 - }, - { - "epoch": 2.8320431217386317, - "grad_norm": 0.0017252343241125345, - "learning_rate": 0.0001999960442143698, - "loss": 46.0, - "step": 37041 - }, - { - "epoch": 2.8321195787220215, - "grad_norm": 0.002286685397848487, - "learning_rate": 0.00019999604400072106, - "loss": 46.0, - "step": 37042 - }, - { - "epoch": 2.8321960357054112, - "grad_norm": 0.0010402693878859282, - "learning_rate": 0.00019999604378706658, - "loss": 46.0, - "step": 37043 - }, - { - "epoch": 2.832272492688801, - "grad_norm": 0.0015757293440401554, - "learning_rate": 0.0001999960435734063, - "loss": 46.0, - "step": 37044 - }, - { - "epoch": 2.8323489496721908, - "grad_norm": 0.0017045572167262435, - "learning_rate": 0.00019999604335974028, - "loss": 46.0, - "step": 37045 - }, - { - "epoch": 2.8324254066555805, - "grad_norm": 0.005007521249353886, - "learning_rate": 0.00019999604314606848, - "loss": 46.0, - "step": 37046 - }, - { - "epoch": 2.8325018636389703, - "grad_norm": 0.0048163775354623795, - "learning_rate": 0.0001999960429323909, - "loss": 46.0, - "step": 37047 - }, - { - "epoch": 2.8325783206223596, - "grad_norm": 0.004868373274803162, - "learning_rate": 0.0001999960427187076, - "loss": 46.0, - "step": 37048 - }, - { - "epoch": 2.8326547776057494, - "grad_norm": 0.004244635812938213, - "learning_rate": 0.00019999604250501847, - "loss": 46.0, - "step": 37049 - }, - { - "epoch": 2.832731234589139, - "grad_norm": 0.002112451707944274, - "learning_rate": 0.00019999604229132357, - "loss": 46.0, - "step": 37050 - }, - { - "epoch": 2.832807691572529, - "grad_norm": 0.002105921506881714, - "learning_rate": 0.00019999604207762294, - "loss": 46.0, - "step": 37051 - }, - { - "epoch": 2.8328841485559186, - "grad_norm": 0.002561325905844569, - "learning_rate": 0.0001999960418639165, - "loss": 46.0, - "step": 37052 - }, - { - "epoch": 2.8329606055393084, - "grad_norm": 0.0020687479991465807, - "learning_rate": 0.00019999604165020434, - "loss": 46.0, - "step": 37053 - }, - { - "epoch": 2.833037062522698, - "grad_norm": 0.0027319584041833878, - "learning_rate": 0.00019999604143648635, - "loss": 46.0, - "step": 37054 - }, - { - "epoch": 2.833113519506088, - "grad_norm": 0.0030791524332016706, - "learning_rate": 0.00019999604122276264, - "loss": 46.0, - "step": 37055 - }, - { - "epoch": 2.8331899764894777, - "grad_norm": 0.003188525792211294, - "learning_rate": 0.00019999604100903317, - "loss": 46.0, - "step": 37056 - }, - { - "epoch": 2.8332664334728674, - "grad_norm": 0.0026026912964880466, - "learning_rate": 0.00019999604079529789, - "loss": 46.0, - "step": 37057 - }, - { - "epoch": 2.833342890456257, - "grad_norm": 0.0026390100829303265, - "learning_rate": 0.00019999604058155686, - "loss": 46.0, - "step": 37058 - }, - { - "epoch": 2.833419347439647, - "grad_norm": 0.0020187459886074066, - "learning_rate": 0.00019999604036781006, - "loss": 46.0, - "step": 37059 - }, - { - "epoch": 2.8334958044230367, - "grad_norm": 0.0008578562992624938, - "learning_rate": 0.0001999960401540575, - "loss": 46.0, - "step": 37060 - }, - { - "epoch": 2.8335722614064265, - "grad_norm": 0.002300386782735586, - "learning_rate": 0.00019999603994029914, - "loss": 46.0, - "step": 37061 - }, - { - "epoch": 2.833648718389816, - "grad_norm": 0.007256791461259127, - "learning_rate": 0.00019999603972653502, - "loss": 46.0, - "step": 37062 - }, - { - "epoch": 2.8337251753732056, - "grad_norm": 0.002237992826849222, - "learning_rate": 0.00019999603951276516, - "loss": 46.0, - "step": 37063 - }, - { - "epoch": 2.8338016323565953, - "grad_norm": 0.005145910196006298, - "learning_rate": 0.00019999603929898952, - "loss": 46.0, - "step": 37064 - }, - { - "epoch": 2.833878089339985, - "grad_norm": 0.00281929736956954, - "learning_rate": 0.0001999960390852081, - "loss": 46.0, - "step": 37065 - }, - { - "epoch": 2.833954546323375, - "grad_norm": 0.008032320998609066, - "learning_rate": 0.00019999603887142092, - "loss": 46.0, - "step": 37066 - }, - { - "epoch": 2.8340310033067646, - "grad_norm": 0.0026360168121755123, - "learning_rate": 0.00019999603865762796, - "loss": 46.0, - "step": 37067 - }, - { - "epoch": 2.8341074602901544, - "grad_norm": 0.0012330182362347841, - "learning_rate": 0.0001999960384438292, - "loss": 46.0, - "step": 37068 - }, - { - "epoch": 2.834183917273544, - "grad_norm": 0.0017202867893502116, - "learning_rate": 0.00019999603823002472, - "loss": 46.0, - "step": 37069 - }, - { - "epoch": 2.8342603742569334, - "grad_norm": 0.0027398562524467707, - "learning_rate": 0.00019999603801621447, - "loss": 46.0, - "step": 37070 - }, - { - "epoch": 2.834336831240323, - "grad_norm": 0.0027491161599755287, - "learning_rate": 0.00019999603780239845, - "loss": 46.0, - "step": 37071 - }, - { - "epoch": 2.834413288223713, - "grad_norm": 0.0017183690797537565, - "learning_rate": 0.00019999603758857665, - "loss": 46.0, - "step": 37072 - }, - { - "epoch": 2.8344897452071027, - "grad_norm": 0.0034974408335983753, - "learning_rate": 0.00019999603737474905, - "loss": 46.0, - "step": 37073 - }, - { - "epoch": 2.8345662021904925, - "grad_norm": 0.0010642207926139235, - "learning_rate": 0.00019999603716091573, - "loss": 46.0, - "step": 37074 - }, - { - "epoch": 2.8346426591738823, - "grad_norm": 0.0025160901714116335, - "learning_rate": 0.00019999603694707664, - "loss": 46.0, - "step": 37075 - }, - { - "epoch": 2.834719116157272, - "grad_norm": 0.0024594564456492662, - "learning_rate": 0.00019999603673323172, - "loss": 46.0, - "step": 37076 - }, - { - "epoch": 2.8347955731406618, - "grad_norm": 0.0027378350496292114, - "learning_rate": 0.0001999960365193811, - "loss": 46.0, - "step": 37077 - }, - { - "epoch": 2.8348720301240515, - "grad_norm": 0.0033158729784190655, - "learning_rate": 0.00019999603630552467, - "loss": 46.0, - "step": 37078 - }, - { - "epoch": 2.8349484871074413, - "grad_norm": 0.0007978108478710055, - "learning_rate": 0.00019999603609166248, - "loss": 46.0, - "step": 37079 - }, - { - "epoch": 2.835024944090831, - "grad_norm": 0.003902283264324069, - "learning_rate": 0.00019999603587779455, - "loss": 46.0, - "step": 37080 - }, - { - "epoch": 2.835101401074221, - "grad_norm": 0.009267600253224373, - "learning_rate": 0.00019999603566392082, - "loss": 46.0, - "step": 37081 - }, - { - "epoch": 2.8351778580576106, - "grad_norm": 0.0013335481053218246, - "learning_rate": 0.00019999603545004134, - "loss": 46.0, - "step": 37082 - }, - { - "epoch": 2.835254315041, - "grad_norm": 0.008002828806638718, - "learning_rate": 0.00019999603523615606, - "loss": 46.0, - "step": 37083 - }, - { - "epoch": 2.8353307720243897, - "grad_norm": 0.0015505505725741386, - "learning_rate": 0.00019999603502226504, - "loss": 46.0, - "step": 37084 - }, - { - "epoch": 2.8354072290077794, - "grad_norm": 0.0014241415774449706, - "learning_rate": 0.0001999960348083682, - "loss": 46.0, - "step": 37085 - }, - { - "epoch": 2.835483685991169, - "grad_norm": 0.002636583987623453, - "learning_rate": 0.00019999603459446567, - "loss": 46.0, - "step": 37086 - }, - { - "epoch": 2.835560142974559, - "grad_norm": 0.001091033685952425, - "learning_rate": 0.00019999603438055732, - "loss": 46.0, - "step": 37087 - }, - { - "epoch": 2.8356365999579487, - "grad_norm": 0.001141828834079206, - "learning_rate": 0.00019999603416664318, - "loss": 46.0, - "step": 37088 - }, - { - "epoch": 2.8357130569413385, - "grad_norm": 0.005088814068585634, - "learning_rate": 0.00019999603395272334, - "loss": 46.0, - "step": 37089 - }, - { - "epoch": 2.8357895139247282, - "grad_norm": 0.0016998846549540758, - "learning_rate": 0.00019999603373879767, - "loss": 46.0, - "step": 37090 - }, - { - "epoch": 2.835865970908118, - "grad_norm": 0.0035497951321303844, - "learning_rate": 0.00019999603352486623, - "loss": 46.0, - "step": 37091 - }, - { - "epoch": 2.8359424278915073, - "grad_norm": 0.0030108769424259663, - "learning_rate": 0.00019999603331092908, - "loss": 46.0, - "step": 37092 - }, - { - "epoch": 2.836018884874897, - "grad_norm": 0.001296893460676074, - "learning_rate": 0.00019999603309698612, - "loss": 46.0, - "step": 37093 - }, - { - "epoch": 2.836095341858287, - "grad_norm": 0.001287568244151771, - "learning_rate": 0.00019999603288303739, - "loss": 46.0, - "step": 37094 - }, - { - "epoch": 2.8361717988416766, - "grad_norm": 0.0013096706243231893, - "learning_rate": 0.00019999603266908288, - "loss": 46.0, - "step": 37095 - }, - { - "epoch": 2.8362482558250663, - "grad_norm": 0.007514851167798042, - "learning_rate": 0.00019999603245512263, - "loss": 46.0, - "step": 37096 - }, - { - "epoch": 2.836324712808456, - "grad_norm": 0.001062970608472824, - "learning_rate": 0.0001999960322411566, - "loss": 46.0, - "step": 37097 - }, - { - "epoch": 2.836401169791846, - "grad_norm": 0.002191146370023489, - "learning_rate": 0.0001999960320271848, - "loss": 46.0, - "step": 37098 - }, - { - "epoch": 2.8364776267752356, - "grad_norm": 0.0019992170855402946, - "learning_rate": 0.00019999603181320723, - "loss": 46.0, - "step": 37099 - }, - { - "epoch": 2.8365540837586254, - "grad_norm": 0.002211317652836442, - "learning_rate": 0.0001999960315992239, - "loss": 46.0, - "step": 37100 - }, - { - "epoch": 2.836630540742015, - "grad_norm": 0.0007561781676486135, - "learning_rate": 0.0001999960313852348, - "loss": 46.0, - "step": 37101 - }, - { - "epoch": 2.836706997725405, - "grad_norm": 0.0015603979118168354, - "learning_rate": 0.0001999960311712399, - "loss": 46.0, - "step": 37102 - }, - { - "epoch": 2.8367834547087947, - "grad_norm": 0.0019219920504838228, - "learning_rate": 0.00019999603095723924, - "loss": 46.0, - "step": 37103 - }, - { - "epoch": 2.8368599116921844, - "grad_norm": 0.004766498226672411, - "learning_rate": 0.00019999603074323283, - "loss": 46.0, - "step": 37104 - }, - { - "epoch": 2.8369363686755737, - "grad_norm": 0.002429333748295903, - "learning_rate": 0.00019999603052922065, - "loss": 46.0, - "step": 37105 - }, - { - "epoch": 2.8370128256589635, - "grad_norm": 0.0024095450062304735, - "learning_rate": 0.0001999960303152027, - "loss": 46.0, - "step": 37106 - }, - { - "epoch": 2.8370892826423533, - "grad_norm": 0.0016871263505890965, - "learning_rate": 0.00019999603010117898, - "loss": 46.0, - "step": 37107 - }, - { - "epoch": 2.837165739625743, - "grad_norm": 0.0015730004524812102, - "learning_rate": 0.0001999960298871495, - "loss": 46.0, - "step": 37108 - }, - { - "epoch": 2.837242196609133, - "grad_norm": 0.0027134520933032036, - "learning_rate": 0.00019999602967311423, - "loss": 46.0, - "step": 37109 - }, - { - "epoch": 2.8373186535925226, - "grad_norm": 0.0009012341615743935, - "learning_rate": 0.00019999602945907318, - "loss": 46.0, - "step": 37110 - }, - { - "epoch": 2.8373951105759123, - "grad_norm": 0.0027486993931233883, - "learning_rate": 0.00019999602924502638, - "loss": 46.0, - "step": 37111 - }, - { - "epoch": 2.837471567559302, - "grad_norm": 0.003142373403534293, - "learning_rate": 0.00019999602903097383, - "loss": 46.0, - "step": 37112 - }, - { - "epoch": 2.8375480245426914, - "grad_norm": 0.005021336022764444, - "learning_rate": 0.0001999960288169155, - "loss": 46.0, - "step": 37113 - }, - { - "epoch": 2.837624481526081, - "grad_norm": 0.0010762499878183007, - "learning_rate": 0.00019999602860285137, - "loss": 46.0, - "step": 37114 - }, - { - "epoch": 2.837700938509471, - "grad_norm": 0.0008226162171922624, - "learning_rate": 0.00019999602838878148, - "loss": 46.0, - "step": 37115 - }, - { - "epoch": 2.8377773954928607, - "grad_norm": 0.003002900630235672, - "learning_rate": 0.00019999602817470584, - "loss": 46.0, - "step": 37116 - }, - { - "epoch": 2.8378538524762504, - "grad_norm": 0.002599109662696719, - "learning_rate": 0.00019999602796062443, - "loss": 46.0, - "step": 37117 - }, - { - "epoch": 2.83793030945964, - "grad_norm": 0.0011123527074232697, - "learning_rate": 0.00019999602774653722, - "loss": 46.0, - "step": 37118 - }, - { - "epoch": 2.83800676644303, - "grad_norm": 0.0028113347943872213, - "learning_rate": 0.0001999960275324443, - "loss": 46.0, - "step": 37119 - }, - { - "epoch": 2.8380832234264197, - "grad_norm": 0.002901636064052582, - "learning_rate": 0.00019999602731834556, - "loss": 46.0, - "step": 37120 - }, - { - "epoch": 2.8381596804098095, - "grad_norm": 0.0012139285681769252, - "learning_rate": 0.00019999602710424105, - "loss": 46.0, - "step": 37121 - }, - { - "epoch": 2.8382361373931992, - "grad_norm": 0.0026648107450455427, - "learning_rate": 0.00019999602689013083, - "loss": 46.0, - "step": 37122 - }, - { - "epoch": 2.838312594376589, - "grad_norm": 0.003036793554201722, - "learning_rate": 0.00019999602667601478, - "loss": 46.0, - "step": 37123 - }, - { - "epoch": 2.8383890513599788, - "grad_norm": 0.000876863079611212, - "learning_rate": 0.00019999602646189298, - "loss": 46.0, - "step": 37124 - }, - { - "epoch": 2.8384655083433685, - "grad_norm": 0.004673229996114969, - "learning_rate": 0.0001999960262477654, - "loss": 46.0, - "step": 37125 - }, - { - "epoch": 2.8385419653267583, - "grad_norm": 0.0013813417172059417, - "learning_rate": 0.00019999602603363207, - "loss": 46.0, - "step": 37126 - }, - { - "epoch": 2.8386184223101476, - "grad_norm": 0.0028213909827172756, - "learning_rate": 0.00019999602581949297, - "loss": 46.0, - "step": 37127 - }, - { - "epoch": 2.8386948792935374, - "grad_norm": 0.0025242804549634457, - "learning_rate": 0.00019999602560534808, - "loss": 46.0, - "step": 37128 - }, - { - "epoch": 2.838771336276927, - "grad_norm": 0.001962625654414296, - "learning_rate": 0.00019999602539119745, - "loss": 46.0, - "step": 37129 - }, - { - "epoch": 2.838847793260317, - "grad_norm": 0.0029655182734131813, - "learning_rate": 0.000199996025177041, - "loss": 46.0, - "step": 37130 - }, - { - "epoch": 2.8389242502437066, - "grad_norm": 0.0009559431346133351, - "learning_rate": 0.00019999602496287882, - "loss": 46.0, - "step": 37131 - }, - { - "epoch": 2.8390007072270964, - "grad_norm": 0.006765376776456833, - "learning_rate": 0.0001999960247487109, - "loss": 46.0, - "step": 37132 - }, - { - "epoch": 2.839077164210486, - "grad_norm": 0.006151263602077961, - "learning_rate": 0.00019999602453453714, - "loss": 46.0, - "step": 37133 - }, - { - "epoch": 2.839153621193876, - "grad_norm": 0.004104913212358952, - "learning_rate": 0.00019999602432035766, - "loss": 46.0, - "step": 37134 - }, - { - "epoch": 2.8392300781772652, - "grad_norm": 0.003158940002322197, - "learning_rate": 0.0001999960241061724, - "loss": 46.0, - "step": 37135 - }, - { - "epoch": 2.839306535160655, - "grad_norm": 0.0025407602079212666, - "learning_rate": 0.00019999602389198136, - "loss": 46.0, - "step": 37136 - }, - { - "epoch": 2.8393829921440448, - "grad_norm": 0.0013883167412132025, - "learning_rate": 0.00019999602367778456, - "loss": 46.0, - "step": 37137 - }, - { - "epoch": 2.8394594491274345, - "grad_norm": 0.0027060771826654673, - "learning_rate": 0.000199996023463582, - "loss": 46.0, - "step": 37138 - }, - { - "epoch": 2.8395359061108243, - "grad_norm": 0.004329566843807697, - "learning_rate": 0.00019999602324937365, - "loss": 46.0, - "step": 37139 - }, - { - "epoch": 2.839612363094214, - "grad_norm": 0.0006606465321965516, - "learning_rate": 0.00019999602303515953, - "loss": 46.0, - "step": 37140 - }, - { - "epoch": 2.839688820077604, - "grad_norm": 0.0012276185443624854, - "learning_rate": 0.00019999602282093966, - "loss": 46.0, - "step": 37141 - }, - { - "epoch": 2.8397652770609936, - "grad_norm": 0.0023852468002587557, - "learning_rate": 0.00019999602260671403, - "loss": 46.0, - "step": 37142 - }, - { - "epoch": 2.8398417340443833, - "grad_norm": 0.001401030458509922, - "learning_rate": 0.00019999602239248262, - "loss": 46.0, - "step": 37143 - }, - { - "epoch": 2.839918191027773, - "grad_norm": 0.009070102125406265, - "learning_rate": 0.0001999960221782454, - "loss": 46.0, - "step": 37144 - }, - { - "epoch": 2.839994648011163, - "grad_norm": 0.0009845560416579247, - "learning_rate": 0.00019999602196400248, - "loss": 46.0, - "step": 37145 - }, - { - "epoch": 2.8400711049945526, - "grad_norm": 0.0016333868261426687, - "learning_rate": 0.00019999602174975375, - "loss": 46.0, - "step": 37146 - }, - { - "epoch": 2.8401475619779424, - "grad_norm": 0.004019290674477816, - "learning_rate": 0.00019999602153549924, - "loss": 46.0, - "step": 37147 - }, - { - "epoch": 2.840224018961332, - "grad_norm": 0.0035774612333625555, - "learning_rate": 0.000199996021321239, - "loss": 46.0, - "step": 37148 - }, - { - "epoch": 2.8403004759447215, - "grad_norm": 0.004045620560646057, - "learning_rate": 0.00019999602110697294, - "loss": 46.0, - "step": 37149 - }, - { - "epoch": 2.840376932928111, - "grad_norm": 0.0011635760311037302, - "learning_rate": 0.00019999602089270114, - "loss": 46.0, - "step": 37150 - }, - { - "epoch": 2.840453389911501, - "grad_norm": 0.001537640462629497, - "learning_rate": 0.00019999602067842357, - "loss": 46.0, - "step": 37151 - }, - { - "epoch": 2.8405298468948907, - "grad_norm": 0.002714330330491066, - "learning_rate": 0.00019999602046414023, - "loss": 46.0, - "step": 37152 - }, - { - "epoch": 2.8406063038782805, - "grad_norm": 0.00142281677108258, - "learning_rate": 0.00019999602024985114, - "loss": 46.0, - "step": 37153 - }, - { - "epoch": 2.8406827608616703, - "grad_norm": 0.0015132240951061249, - "learning_rate": 0.00019999602003555628, - "loss": 46.0, - "step": 37154 - }, - { - "epoch": 2.84075921784506, - "grad_norm": 0.0015835468657314777, - "learning_rate": 0.00019999601982125559, - "loss": 46.0, - "step": 37155 - }, - { - "epoch": 2.84083567482845, - "grad_norm": 0.0035446430556476116, - "learning_rate": 0.0001999960196069492, - "loss": 46.0, - "step": 37156 - }, - { - "epoch": 2.840912131811839, - "grad_norm": 0.0011686112266033888, - "learning_rate": 0.000199996019392637, - "loss": 46.0, - "step": 37157 - }, - { - "epoch": 2.840988588795229, - "grad_norm": 0.0004988678847439587, - "learning_rate": 0.00019999601917831906, - "loss": 46.0, - "step": 37158 - }, - { - "epoch": 2.8410650457786186, - "grad_norm": 0.0019041512859985232, - "learning_rate": 0.00019999601896399533, - "loss": 46.0, - "step": 37159 - }, - { - "epoch": 2.8411415027620084, - "grad_norm": 0.0013399479212239385, - "learning_rate": 0.00019999601874966586, - "loss": 46.0, - "step": 37160 - }, - { - "epoch": 2.841217959745398, - "grad_norm": 0.0037399388384073973, - "learning_rate": 0.00019999601853533058, - "loss": 46.0, - "step": 37161 - }, - { - "epoch": 2.841294416728788, - "grad_norm": 0.0035389885306358337, - "learning_rate": 0.00019999601832098956, - "loss": 46.0, - "step": 37162 - }, - { - "epoch": 2.8413708737121777, - "grad_norm": 0.0029092596378177404, - "learning_rate": 0.00019999601810664276, - "loss": 46.0, - "step": 37163 - }, - { - "epoch": 2.8414473306955674, - "grad_norm": 0.0005028339219279587, - "learning_rate": 0.00019999601789229016, - "loss": 46.0, - "step": 37164 - }, - { - "epoch": 2.841523787678957, - "grad_norm": 0.004048381466418505, - "learning_rate": 0.00019999601767793182, - "loss": 46.0, - "step": 37165 - }, - { - "epoch": 2.841600244662347, - "grad_norm": 0.001473005278967321, - "learning_rate": 0.00019999601746356773, - "loss": 46.0, - "step": 37166 - }, - { - "epoch": 2.8416767016457367, - "grad_norm": 0.002869573188945651, - "learning_rate": 0.00019999601724919784, - "loss": 46.0, - "step": 37167 - }, - { - "epoch": 2.8417531586291265, - "grad_norm": 0.020751535892486572, - "learning_rate": 0.0001999960170348222, - "loss": 46.0, - "step": 37168 - }, - { - "epoch": 2.8418296156125162, - "grad_norm": 0.0018582474440336227, - "learning_rate": 0.00019999601682044077, - "loss": 46.0, - "step": 37169 - }, - { - "epoch": 2.841906072595906, - "grad_norm": 0.0012318510562181473, - "learning_rate": 0.00019999601660605358, - "loss": 46.0, - "step": 37170 - }, - { - "epoch": 2.8419825295792953, - "grad_norm": 0.001933880615979433, - "learning_rate": 0.00019999601639166063, - "loss": 46.0, - "step": 37171 - }, - { - "epoch": 2.842058986562685, - "grad_norm": 0.0018297359347343445, - "learning_rate": 0.00019999601617726192, - "loss": 46.0, - "step": 37172 - }, - { - "epoch": 2.842135443546075, - "grad_norm": 0.0018213269067928195, - "learning_rate": 0.00019999601596285742, - "loss": 46.0, - "step": 37173 - }, - { - "epoch": 2.8422119005294646, - "grad_norm": 0.003524311352521181, - "learning_rate": 0.00019999601574844714, - "loss": 46.0, - "step": 37174 - }, - { - "epoch": 2.8422883575128544, - "grad_norm": 0.003914892207831144, - "learning_rate": 0.00019999601553403112, - "loss": 46.0, - "step": 37175 - }, - { - "epoch": 2.842364814496244, - "grad_norm": 0.0015047680353745818, - "learning_rate": 0.00019999601531960935, - "loss": 46.0, - "step": 37176 - }, - { - "epoch": 2.842441271479634, - "grad_norm": 0.0028676274232566357, - "learning_rate": 0.00019999601510518175, - "loss": 46.0, - "step": 37177 - }, - { - "epoch": 2.8425177284630236, - "grad_norm": 0.0030861590057611465, - "learning_rate": 0.0001999960148907484, - "loss": 46.0, - "step": 37178 - }, - { - "epoch": 2.842594185446413, - "grad_norm": 0.0030827256850898266, - "learning_rate": 0.0001999960146763093, - "loss": 46.0, - "step": 37179 - }, - { - "epoch": 2.8426706424298027, - "grad_norm": 0.0021316458005458117, - "learning_rate": 0.00019999601446186443, - "loss": 46.0, - "step": 37180 - }, - { - "epoch": 2.8427470994131925, - "grad_norm": 0.0015677333576604724, - "learning_rate": 0.0001999960142474138, - "loss": 46.0, - "step": 37181 - }, - { - "epoch": 2.8428235563965822, - "grad_norm": 0.006090864073485136, - "learning_rate": 0.00019999601403295736, - "loss": 46.0, - "step": 37182 - }, - { - "epoch": 2.842900013379972, - "grad_norm": 0.000551430624909699, - "learning_rate": 0.00019999601381849518, - "loss": 46.0, - "step": 37183 - }, - { - "epoch": 2.8429764703633618, - "grad_norm": 0.0014679405139759183, - "learning_rate": 0.00019999601360402722, - "loss": 46.0, - "step": 37184 - }, - { - "epoch": 2.8430529273467515, - "grad_norm": 0.0036636656150221825, - "learning_rate": 0.0001999960133895535, - "loss": 46.0, - "step": 37185 - }, - { - "epoch": 2.8431293843301413, - "grad_norm": 0.0011902735568583012, - "learning_rate": 0.00019999601317507402, - "loss": 46.0, - "step": 37186 - }, - { - "epoch": 2.843205841313531, - "grad_norm": 0.001085460651665926, - "learning_rate": 0.00019999601296058877, - "loss": 46.0, - "step": 37187 - }, - { - "epoch": 2.843282298296921, - "grad_norm": 0.002682414371520281, - "learning_rate": 0.00019999601274609772, - "loss": 46.0, - "step": 37188 - }, - { - "epoch": 2.8433587552803106, - "grad_norm": 0.0024127059150487185, - "learning_rate": 0.00019999601253160092, - "loss": 46.0, - "step": 37189 - }, - { - "epoch": 2.8434352122637003, - "grad_norm": 0.007181702181696892, - "learning_rate": 0.00019999601231709835, - "loss": 46.0, - "step": 37190 - }, - { - "epoch": 2.84351166924709, - "grad_norm": 0.003293056273832917, - "learning_rate": 0.00019999601210259004, - "loss": 46.0, - "step": 37191 - }, - { - "epoch": 2.84358812623048, - "grad_norm": 0.0018092142418026924, - "learning_rate": 0.00019999601188807592, - "loss": 46.0, - "step": 37192 - }, - { - "epoch": 2.843664583213869, - "grad_norm": 0.0028401408344507217, - "learning_rate": 0.000199996011673556, - "loss": 46.0, - "step": 37193 - }, - { - "epoch": 2.843741040197259, - "grad_norm": 0.003255939343944192, - "learning_rate": 0.00019999601145903037, - "loss": 46.0, - "step": 37194 - }, - { - "epoch": 2.8438174971806487, - "grad_norm": 0.001096990774385631, - "learning_rate": 0.00019999601124449896, - "loss": 46.0, - "step": 37195 - }, - { - "epoch": 2.8438939541640385, - "grad_norm": 0.0016180739039555192, - "learning_rate": 0.00019999601102996178, - "loss": 46.0, - "step": 37196 - }, - { - "epoch": 2.843970411147428, - "grad_norm": 0.0027516987174749374, - "learning_rate": 0.00019999601081541882, - "loss": 46.0, - "step": 37197 - }, - { - "epoch": 2.844046868130818, - "grad_norm": 0.0020281956531107426, - "learning_rate": 0.0001999960106008701, - "loss": 46.0, - "step": 37198 - }, - { - "epoch": 2.8441233251142077, - "grad_norm": 0.0014528222382068634, - "learning_rate": 0.00019999601038631561, - "loss": 46.0, - "step": 37199 - }, - { - "epoch": 2.8441997820975975, - "grad_norm": 0.0015012851217761636, - "learning_rate": 0.00019999601017175534, - "loss": 46.0, - "step": 37200 - }, - { - "epoch": 2.844276239080987, - "grad_norm": 0.0037168373819440603, - "learning_rate": 0.00019999600995718932, - "loss": 46.0, - "step": 37201 - }, - { - "epoch": 2.8443526960643766, - "grad_norm": 0.0035074367187917233, - "learning_rate": 0.0001999960097426175, - "loss": 46.0, - "step": 37202 - }, - { - "epoch": 2.8444291530477663, - "grad_norm": 0.004065664950758219, - "learning_rate": 0.00019999600952803995, - "loss": 46.0, - "step": 37203 - }, - { - "epoch": 2.844505610031156, - "grad_norm": 0.002774972002953291, - "learning_rate": 0.0001999960093134566, - "loss": 46.0, - "step": 37204 - }, - { - "epoch": 2.844582067014546, - "grad_norm": 0.0014166026376187801, - "learning_rate": 0.00019999600909886752, - "loss": 46.0, - "step": 37205 - }, - { - "epoch": 2.8446585239979356, - "grad_norm": 0.004844150971621275, - "learning_rate": 0.0001999960088842726, - "loss": 46.0, - "step": 37206 - }, - { - "epoch": 2.8447349809813254, - "grad_norm": 0.0019219416426494718, - "learning_rate": 0.000199996008669672, - "loss": 46.0, - "step": 37207 - }, - { - "epoch": 2.844811437964715, - "grad_norm": 0.0026399383787065744, - "learning_rate": 0.00019999600845506556, - "loss": 46.0, - "step": 37208 - }, - { - "epoch": 2.844887894948105, - "grad_norm": 0.002299330662935972, - "learning_rate": 0.00019999600824045338, - "loss": 46.0, - "step": 37209 - }, - { - "epoch": 2.8449643519314947, - "grad_norm": 0.0034319909755140543, - "learning_rate": 0.0001999960080258354, - "loss": 46.0, - "step": 37210 - }, - { - "epoch": 2.8450408089148844, - "grad_norm": 0.003196443198248744, - "learning_rate": 0.0001999960078112117, - "loss": 46.0, - "step": 37211 - }, - { - "epoch": 2.845117265898274, - "grad_norm": 0.004450627602636814, - "learning_rate": 0.0001999960075965822, - "loss": 46.0, - "step": 37212 - }, - { - "epoch": 2.845193722881664, - "grad_norm": 0.0022163500543683767, - "learning_rate": 0.00019999600738194694, - "loss": 46.0, - "step": 37213 - }, - { - "epoch": 2.8452701798650533, - "grad_norm": 0.0016734949313104153, - "learning_rate": 0.00019999600716730592, - "loss": 46.0, - "step": 37214 - }, - { - "epoch": 2.845346636848443, - "grad_norm": 0.004838340915739536, - "learning_rate": 0.0001999960069526591, - "loss": 46.0, - "step": 37215 - }, - { - "epoch": 2.845423093831833, - "grad_norm": 0.0007701439317315817, - "learning_rate": 0.00019999600673800653, - "loss": 46.0, - "step": 37216 - }, - { - "epoch": 2.8454995508152225, - "grad_norm": 0.0017121906857937574, - "learning_rate": 0.0001999960065233482, - "loss": 46.0, - "step": 37217 - }, - { - "epoch": 2.8455760077986123, - "grad_norm": 0.005509126465767622, - "learning_rate": 0.0001999960063086841, - "loss": 46.0, - "step": 37218 - }, - { - "epoch": 2.845652464782002, - "grad_norm": 0.002026584465056658, - "learning_rate": 0.0001999960060940142, - "loss": 46.0, - "step": 37219 - }, - { - "epoch": 2.845728921765392, - "grad_norm": 0.0017668914515525103, - "learning_rate": 0.00019999600587933855, - "loss": 46.0, - "step": 37220 - }, - { - "epoch": 2.8458053787487816, - "grad_norm": 0.0027744630351662636, - "learning_rate": 0.00019999600566465714, - "loss": 46.0, - "step": 37221 - }, - { - "epoch": 2.8458818357321713, - "grad_norm": 0.00184998894110322, - "learning_rate": 0.00019999600544996996, - "loss": 46.0, - "step": 37222 - }, - { - "epoch": 2.8459582927155607, - "grad_norm": 0.0008259746828116477, - "learning_rate": 0.000199996005235277, - "loss": 46.0, - "step": 37223 - }, - { - "epoch": 2.8460347496989504, - "grad_norm": 0.0012356044026091695, - "learning_rate": 0.0001999960050205783, - "loss": 46.0, - "step": 37224 - }, - { - "epoch": 2.84611120668234, - "grad_norm": 0.0015845209127292037, - "learning_rate": 0.0001999960048058738, - "loss": 46.0, - "step": 37225 - }, - { - "epoch": 2.84618766366573, - "grad_norm": 0.0034666426945477724, - "learning_rate": 0.00019999600459116352, - "loss": 46.0, - "step": 37226 - }, - { - "epoch": 2.8462641206491197, - "grad_norm": 0.004044574219733477, - "learning_rate": 0.0001999960043764475, - "loss": 46.0, - "step": 37227 - }, - { - "epoch": 2.8463405776325095, - "grad_norm": 0.0041269417852163315, - "learning_rate": 0.0001999960041617257, - "loss": 46.0, - "step": 37228 - }, - { - "epoch": 2.8464170346158992, - "grad_norm": 0.0009241164079867303, - "learning_rate": 0.00019999600394699814, - "loss": 46.0, - "step": 37229 - }, - { - "epoch": 2.846493491599289, - "grad_norm": 0.0013869010144844651, - "learning_rate": 0.0001999960037322648, - "loss": 46.0, - "step": 37230 - }, - { - "epoch": 2.8465699485826788, - "grad_norm": 0.0037373602390289307, - "learning_rate": 0.00019999600351752568, - "loss": 46.0, - "step": 37231 - }, - { - "epoch": 2.8466464055660685, - "grad_norm": 0.0059655578806996346, - "learning_rate": 0.00019999600330278082, - "loss": 46.0, - "step": 37232 - }, - { - "epoch": 2.8467228625494583, - "grad_norm": 0.0006762645207345486, - "learning_rate": 0.00019999600308803016, - "loss": 46.0, - "step": 37233 - }, - { - "epoch": 2.846799319532848, - "grad_norm": 0.003408893244341016, - "learning_rate": 0.00019999600287327372, - "loss": 46.0, - "step": 37234 - }, - { - "epoch": 2.846875776516238, - "grad_norm": 0.00320029235444963, - "learning_rate": 0.00019999600265851157, - "loss": 46.0, - "step": 37235 - }, - { - "epoch": 2.846952233499627, - "grad_norm": 0.002168607898056507, - "learning_rate": 0.00019999600244374359, - "loss": 46.0, - "step": 37236 - }, - { - "epoch": 2.847028690483017, - "grad_norm": 0.002000172156840563, - "learning_rate": 0.00019999600222896988, - "loss": 46.0, - "step": 37237 - }, - { - "epoch": 2.8471051474664066, - "grad_norm": 0.0013277019606903195, - "learning_rate": 0.0001999960020141904, - "loss": 46.0, - "step": 37238 - }, - { - "epoch": 2.8471816044497964, - "grad_norm": 0.0014426482375711203, - "learning_rate": 0.00019999600179940514, - "loss": 46.0, - "step": 37239 - }, - { - "epoch": 2.847258061433186, - "grad_norm": 0.0017250728560611606, - "learning_rate": 0.00019999600158461411, - "loss": 46.0, - "step": 37240 - }, - { - "epoch": 2.847334518416576, - "grad_norm": 0.0025461020413786173, - "learning_rate": 0.00019999600136981732, - "loss": 46.0, - "step": 37241 - }, - { - "epoch": 2.8474109753999657, - "grad_norm": 0.0033455220982432365, - "learning_rate": 0.00019999600115501472, - "loss": 46.0, - "step": 37242 - }, - { - "epoch": 2.8474874323833554, - "grad_norm": 0.0007954634493216872, - "learning_rate": 0.00019999600094020638, - "loss": 46.0, - "step": 37243 - }, - { - "epoch": 2.8475638893667448, - "grad_norm": 0.0012235864996910095, - "learning_rate": 0.00019999600072539227, - "loss": 46.0, - "step": 37244 - }, - { - "epoch": 2.8476403463501345, - "grad_norm": 0.0025959573686122894, - "learning_rate": 0.0001999960005105724, - "loss": 46.0, - "step": 37245 - }, - { - "epoch": 2.8477168033335243, - "grad_norm": 0.0024312231689691544, - "learning_rate": 0.00019999600029574674, - "loss": 46.0, - "step": 37246 - }, - { - "epoch": 2.847793260316914, - "grad_norm": 0.0012624245136976242, - "learning_rate": 0.00019999600008091534, - "loss": 46.0, - "step": 37247 - }, - { - "epoch": 2.847869717300304, - "grad_norm": 0.0014503926504403353, - "learning_rate": 0.00019999599986607816, - "loss": 46.0, - "step": 37248 - }, - { - "epoch": 2.8479461742836936, - "grad_norm": 0.004004387650638819, - "learning_rate": 0.0001999959996512352, - "loss": 46.0, - "step": 37249 - }, - { - "epoch": 2.8480226312670833, - "grad_norm": 0.004282256588339806, - "learning_rate": 0.00019999599943638647, - "loss": 46.0, - "step": 37250 - }, - { - "epoch": 2.848099088250473, - "grad_norm": 0.001295424997806549, - "learning_rate": 0.00019999599922153197, - "loss": 46.0, - "step": 37251 - }, - { - "epoch": 2.848175545233863, - "grad_norm": 0.002017940627411008, - "learning_rate": 0.00019999599900667172, - "loss": 46.0, - "step": 37252 - }, - { - "epoch": 2.8482520022172526, - "grad_norm": 0.0015080752782523632, - "learning_rate": 0.00019999599879180568, - "loss": 46.0, - "step": 37253 - }, - { - "epoch": 2.8483284592006424, - "grad_norm": 0.0021924860775470734, - "learning_rate": 0.0001999959985769339, - "loss": 46.0, - "step": 37254 - }, - { - "epoch": 2.848404916184032, - "grad_norm": 0.004107275977730751, - "learning_rate": 0.00019999599836205634, - "loss": 46.0, - "step": 37255 - }, - { - "epoch": 2.848481373167422, - "grad_norm": 0.004498360678553581, - "learning_rate": 0.00019999599814717297, - "loss": 46.0, - "step": 37256 - }, - { - "epoch": 2.8485578301508117, - "grad_norm": 0.0022525659296661615, - "learning_rate": 0.00019999599793228389, - "loss": 46.0, - "step": 37257 - }, - { - "epoch": 2.848634287134201, - "grad_norm": 0.0020462311804294586, - "learning_rate": 0.000199995997717389, - "loss": 46.0, - "step": 37258 - }, - { - "epoch": 2.8487107441175907, - "grad_norm": 0.001385206007398665, - "learning_rate": 0.00019999599750248836, - "loss": 46.0, - "step": 37259 - }, - { - "epoch": 2.8487872011009805, - "grad_norm": 0.002075792523100972, - "learning_rate": 0.00019999599728758193, - "loss": 46.0, - "step": 37260 - }, - { - "epoch": 2.8488636580843703, - "grad_norm": 0.0026271117385476828, - "learning_rate": 0.00019999599707266975, - "loss": 46.0, - "step": 37261 - }, - { - "epoch": 2.84894011506776, - "grad_norm": 0.006578900385648012, - "learning_rate": 0.0001999959968577518, - "loss": 46.0, - "step": 37262 - }, - { - "epoch": 2.8490165720511498, - "grad_norm": 0.0020474980119615793, - "learning_rate": 0.00019999599664282807, - "loss": 46.0, - "step": 37263 - }, - { - "epoch": 2.8490930290345395, - "grad_norm": 0.0017278738087043166, - "learning_rate": 0.00019999599642789856, - "loss": 46.0, - "step": 37264 - }, - { - "epoch": 2.8491694860179293, - "grad_norm": 0.0011812258744612336, - "learning_rate": 0.00019999599621296332, - "loss": 46.0, - "step": 37265 - }, - { - "epoch": 2.8492459430013186, - "grad_norm": 0.0034139524213969707, - "learning_rate": 0.0001999959959980223, - "loss": 46.0, - "step": 37266 - }, - { - "epoch": 2.8493223999847084, - "grad_norm": 0.0014752881834283471, - "learning_rate": 0.00019999599578307548, - "loss": 46.0, - "step": 37267 - }, - { - "epoch": 2.849398856968098, - "grad_norm": 0.0027909576892852783, - "learning_rate": 0.0001999959955681229, - "loss": 46.0, - "step": 37268 - }, - { - "epoch": 2.849475313951488, - "grad_norm": 0.0035066893324255943, - "learning_rate": 0.00019999599535316457, - "loss": 46.0, - "step": 37269 - }, - { - "epoch": 2.8495517709348777, - "grad_norm": 0.0019999733194708824, - "learning_rate": 0.00019999599513820048, - "loss": 46.0, - "step": 37270 - }, - { - "epoch": 2.8496282279182674, - "grad_norm": 0.0021603750064969063, - "learning_rate": 0.0001999959949232306, - "loss": 46.0, - "step": 37271 - }, - { - "epoch": 2.849704684901657, - "grad_norm": 0.002759114373475313, - "learning_rate": 0.00019999599470825493, - "loss": 46.0, - "step": 37272 - }, - { - "epoch": 2.849781141885047, - "grad_norm": 0.0017168933991342783, - "learning_rate": 0.00019999599449327352, - "loss": 46.0, - "step": 37273 - }, - { - "epoch": 2.8498575988684367, - "grad_norm": 0.0017353284638375044, - "learning_rate": 0.00019999599427828634, - "loss": 46.0, - "step": 37274 - }, - { - "epoch": 2.8499340558518265, - "grad_norm": 0.0037807871121913195, - "learning_rate": 0.0001999959940632934, - "loss": 46.0, - "step": 37275 - }, - { - "epoch": 2.8500105128352162, - "grad_norm": 0.0015400650445371866, - "learning_rate": 0.00019999599384829466, - "loss": 46.0, - "step": 37276 - }, - { - "epoch": 2.850086969818606, - "grad_norm": 0.003298710333183408, - "learning_rate": 0.0001999959936332902, - "loss": 46.0, - "step": 37277 - }, - { - "epoch": 2.8501634268019957, - "grad_norm": 0.0029223680030554533, - "learning_rate": 0.00019999599341827991, - "loss": 46.0, - "step": 37278 - }, - { - "epoch": 2.8502398837853855, - "grad_norm": 0.001601759227924049, - "learning_rate": 0.00019999599320326387, - "loss": 46.0, - "step": 37279 - }, - { - "epoch": 2.850316340768775, - "grad_norm": 0.0014337064931169152, - "learning_rate": 0.0001999959929882421, - "loss": 46.0, - "step": 37280 - }, - { - "epoch": 2.8503927977521646, - "grad_norm": 0.0012704854598268867, - "learning_rate": 0.00019999599277321453, - "loss": 46.0, - "step": 37281 - }, - { - "epoch": 2.8504692547355543, - "grad_norm": 0.003259579185396433, - "learning_rate": 0.00019999599255818117, - "loss": 46.0, - "step": 37282 - }, - { - "epoch": 2.850545711718944, - "grad_norm": 0.003714054124429822, - "learning_rate": 0.00019999599234314208, - "loss": 46.0, - "step": 37283 - }, - { - "epoch": 2.850622168702334, - "grad_norm": 0.003584110178053379, - "learning_rate": 0.00019999599212809722, - "loss": 46.0, - "step": 37284 - }, - { - "epoch": 2.8506986256857236, - "grad_norm": 0.004490654915571213, - "learning_rate": 0.00019999599191304656, - "loss": 46.0, - "step": 37285 - }, - { - "epoch": 2.8507750826691134, - "grad_norm": 0.0010733489179983735, - "learning_rate": 0.00019999599169799015, - "loss": 46.0, - "step": 37286 - }, - { - "epoch": 2.850851539652503, - "grad_norm": 0.000965977436862886, - "learning_rate": 0.00019999599148292797, - "loss": 46.0, - "step": 37287 - }, - { - "epoch": 2.8509279966358925, - "grad_norm": 0.003879459109157324, - "learning_rate": 0.00019999599126786, - "loss": 46.0, - "step": 37288 - }, - { - "epoch": 2.8510044536192822, - "grad_norm": 0.001358768087811768, - "learning_rate": 0.0001999959910527863, - "loss": 46.0, - "step": 37289 - }, - { - "epoch": 2.851080910602672, - "grad_norm": 0.001473011914640665, - "learning_rate": 0.0001999959908377068, - "loss": 46.0, - "step": 37290 - }, - { - "epoch": 2.8511573675860618, - "grad_norm": 0.0009068356594070792, - "learning_rate": 0.00019999599062262152, - "loss": 46.0, - "step": 37291 - }, - { - "epoch": 2.8512338245694515, - "grad_norm": 0.004027722403407097, - "learning_rate": 0.0001999959904075305, - "loss": 46.0, - "step": 37292 - }, - { - "epoch": 2.8513102815528413, - "grad_norm": 0.0007677121320739388, - "learning_rate": 0.0001999959901924337, - "loss": 46.0, - "step": 37293 - }, - { - "epoch": 2.851386738536231, - "grad_norm": 0.0021411182824522257, - "learning_rate": 0.0001999959899773311, - "loss": 46.0, - "step": 37294 - }, - { - "epoch": 2.851463195519621, - "grad_norm": 0.0012926168274134398, - "learning_rate": 0.00019999598976222277, - "loss": 46.0, - "step": 37295 - }, - { - "epoch": 2.8515396525030106, - "grad_norm": 0.005833826027810574, - "learning_rate": 0.00019999598954710868, - "loss": 46.0, - "step": 37296 - }, - { - "epoch": 2.8516161094864003, - "grad_norm": 0.0018114744452759624, - "learning_rate": 0.0001999959893319888, - "loss": 46.0, - "step": 37297 - }, - { - "epoch": 2.85169256646979, - "grad_norm": 0.0027112492825835943, - "learning_rate": 0.00019999598911686313, - "loss": 46.0, - "step": 37298 - }, - { - "epoch": 2.85176902345318, - "grad_norm": 0.0009759074309840798, - "learning_rate": 0.00019999598890173173, - "loss": 46.0, - "step": 37299 - }, - { - "epoch": 2.8518454804365696, - "grad_norm": 0.0025301408022642136, - "learning_rate": 0.00019999598868659455, - "loss": 46.0, - "step": 37300 - }, - { - "epoch": 2.8519219374199594, - "grad_norm": 0.0020652299281209707, - "learning_rate": 0.0001999959884714516, - "loss": 46.0, - "step": 37301 - }, - { - "epoch": 2.8519983944033487, - "grad_norm": 0.0029327061492949724, - "learning_rate": 0.00019999598825630287, - "loss": 46.0, - "step": 37302 - }, - { - "epoch": 2.8520748513867384, - "grad_norm": 0.0019536330364644527, - "learning_rate": 0.0001999959880411484, - "loss": 46.0, - "step": 37303 - }, - { - "epoch": 2.852151308370128, - "grad_norm": 0.002208857797086239, - "learning_rate": 0.00019999598782598815, - "loss": 46.0, - "step": 37304 - }, - { - "epoch": 2.852227765353518, - "grad_norm": 0.0013169703306630254, - "learning_rate": 0.00019999598761082207, - "loss": 46.0, - "step": 37305 - }, - { - "epoch": 2.8523042223369077, - "grad_norm": 0.003879035357385874, - "learning_rate": 0.00019999598739565028, - "loss": 46.0, - "step": 37306 - }, - { - "epoch": 2.8523806793202975, - "grad_norm": 0.0029065320268273354, - "learning_rate": 0.00019999598718047271, - "loss": 46.0, - "step": 37307 - }, - { - "epoch": 2.8524571363036872, - "grad_norm": 0.0037449600640684366, - "learning_rate": 0.00019999598696528937, - "loss": 46.0, - "step": 37308 - }, - { - "epoch": 2.852533593287077, - "grad_norm": 0.0030812923796474934, - "learning_rate": 0.00019999598675010026, - "loss": 46.0, - "step": 37309 - }, - { - "epoch": 2.8526100502704663, - "grad_norm": 0.004042427055537701, - "learning_rate": 0.0001999959865349054, - "loss": 46.0, - "step": 37310 - }, - { - "epoch": 2.852686507253856, - "grad_norm": 0.002936464501544833, - "learning_rate": 0.00019999598631970474, - "loss": 46.0, - "step": 37311 - }, - { - "epoch": 2.852762964237246, - "grad_norm": 0.0017784219235181808, - "learning_rate": 0.00019999598610449834, - "loss": 46.0, - "step": 37312 - }, - { - "epoch": 2.8528394212206356, - "grad_norm": 0.002888568677008152, - "learning_rate": 0.00019999598588928613, - "loss": 46.0, - "step": 37313 - }, - { - "epoch": 2.8529158782040254, - "grad_norm": 0.0008542234427295625, - "learning_rate": 0.0001999959856740682, - "loss": 46.0, - "step": 37314 - }, - { - "epoch": 2.852992335187415, - "grad_norm": 0.0014698245795443654, - "learning_rate": 0.00019999598545884448, - "loss": 46.0, - "step": 37315 - }, - { - "epoch": 2.853068792170805, - "grad_norm": 0.0016620280221104622, - "learning_rate": 0.00019999598524361498, - "loss": 46.0, - "step": 37316 - }, - { - "epoch": 2.8531452491541947, - "grad_norm": 0.0007210882613435388, - "learning_rate": 0.00019999598502837973, - "loss": 46.0, - "step": 37317 - }, - { - "epoch": 2.8532217061375844, - "grad_norm": 0.0018330198945477605, - "learning_rate": 0.0001999959848131387, - "loss": 46.0, - "step": 37318 - }, - { - "epoch": 2.853298163120974, - "grad_norm": 0.00232741073705256, - "learning_rate": 0.00019999598459789192, - "loss": 46.0, - "step": 37319 - }, - { - "epoch": 2.853374620104364, - "grad_norm": 0.002274435944855213, - "learning_rate": 0.00019999598438263932, - "loss": 46.0, - "step": 37320 - }, - { - "epoch": 2.8534510770877537, - "grad_norm": 0.0034586244728416204, - "learning_rate": 0.00019999598416738099, - "loss": 46.0, - "step": 37321 - }, - { - "epoch": 2.8535275340711435, - "grad_norm": 0.0017230635276064277, - "learning_rate": 0.00019999598395211687, - "loss": 46.0, - "step": 37322 - }, - { - "epoch": 2.853603991054533, - "grad_norm": 0.0010296761756762862, - "learning_rate": 0.000199995983736847, - "loss": 46.0, - "step": 37323 - }, - { - "epoch": 2.8536804480379225, - "grad_norm": 0.0014594501117244363, - "learning_rate": 0.00019999598352157135, - "loss": 46.0, - "step": 37324 - }, - { - "epoch": 2.8537569050213123, - "grad_norm": 0.003524273168295622, - "learning_rate": 0.00019999598330628995, - "loss": 46.0, - "step": 37325 - }, - { - "epoch": 2.853833362004702, - "grad_norm": 0.005894568748772144, - "learning_rate": 0.00019999598309100277, - "loss": 46.0, - "step": 37326 - }, - { - "epoch": 2.853909818988092, - "grad_norm": 0.001328212209045887, - "learning_rate": 0.00019999598287570982, - "loss": 46.0, - "step": 37327 - }, - { - "epoch": 2.8539862759714816, - "grad_norm": 0.003787433495745063, - "learning_rate": 0.0001999959826604111, - "loss": 46.0, - "step": 37328 - }, - { - "epoch": 2.8540627329548713, - "grad_norm": 0.0021362300030887127, - "learning_rate": 0.0001999959824451066, - "loss": 46.0, - "step": 37329 - }, - { - "epoch": 2.854139189938261, - "grad_norm": 0.0023188029881566763, - "learning_rate": 0.00019999598222979632, - "loss": 46.0, - "step": 37330 - }, - { - "epoch": 2.854215646921651, - "grad_norm": 0.0011016883654519916, - "learning_rate": 0.0001999959820144803, - "loss": 46.0, - "step": 37331 - }, - { - "epoch": 2.85429210390504, - "grad_norm": 0.004367099609225988, - "learning_rate": 0.0001999959817991585, - "loss": 46.0, - "step": 37332 - }, - { - "epoch": 2.85436856088843, - "grad_norm": 0.004461745265871286, - "learning_rate": 0.00019999598158383094, - "loss": 46.0, - "step": 37333 - }, - { - "epoch": 2.8544450178718197, - "grad_norm": 0.004328207112848759, - "learning_rate": 0.0001999959813684976, - "loss": 46.0, - "step": 37334 - }, - { - "epoch": 2.8545214748552095, - "grad_norm": 0.0012120924657210708, - "learning_rate": 0.0001999959811531585, - "loss": 46.0, - "step": 37335 - }, - { - "epoch": 2.8545979318385992, - "grad_norm": 0.0009794754441827536, - "learning_rate": 0.00019999598093781363, - "loss": 46.0, - "step": 37336 - }, - { - "epoch": 2.854674388821989, - "grad_norm": 0.004440579097718, - "learning_rate": 0.00019999598072246294, - "loss": 46.0, - "step": 37337 - }, - { - "epoch": 2.8547508458053787, - "grad_norm": 0.001987901283428073, - "learning_rate": 0.00019999598050710656, - "loss": 46.0, - "step": 37338 - }, - { - "epoch": 2.8548273027887685, - "grad_norm": 0.001043384545482695, - "learning_rate": 0.00019999598029174438, - "loss": 46.0, - "step": 37339 - }, - { - "epoch": 2.8549037597721583, - "grad_norm": 0.002521959599107504, - "learning_rate": 0.0001999959800763764, - "loss": 46.0, - "step": 37340 - }, - { - "epoch": 2.854980216755548, - "grad_norm": 0.0010131163289770484, - "learning_rate": 0.00019999597986100268, - "loss": 46.0, - "step": 37341 - }, - { - "epoch": 2.855056673738938, - "grad_norm": 0.001875092275440693, - "learning_rate": 0.0001999959796456232, - "loss": 46.0, - "step": 37342 - }, - { - "epoch": 2.8551331307223276, - "grad_norm": 0.0026953311171382666, - "learning_rate": 0.00019999597943023793, - "loss": 46.0, - "step": 37343 - }, - { - "epoch": 2.8552095877057173, - "grad_norm": 0.006256473250687122, - "learning_rate": 0.00019999597921484692, - "loss": 46.0, - "step": 37344 - }, - { - "epoch": 2.8552860446891066, - "grad_norm": 0.0019740923307836056, - "learning_rate": 0.0001999959789994501, - "loss": 46.0, - "step": 37345 - }, - { - "epoch": 2.8553625016724964, - "grad_norm": 0.0013334328541532159, - "learning_rate": 0.00019999597878404756, - "loss": 46.0, - "step": 37346 - }, - { - "epoch": 2.855438958655886, - "grad_norm": 0.0008456657524220645, - "learning_rate": 0.00019999597856863922, - "loss": 46.0, - "step": 37347 - }, - { - "epoch": 2.855515415639276, - "grad_norm": 0.003338826121762395, - "learning_rate": 0.0001999959783532251, - "loss": 46.0, - "step": 37348 - }, - { - "epoch": 2.8555918726226657, - "grad_norm": 0.0028465476352721453, - "learning_rate": 0.00019999597813780522, - "loss": 46.0, - "step": 37349 - }, - { - "epoch": 2.8556683296060554, - "grad_norm": 0.002293667057529092, - "learning_rate": 0.0001999959779223796, - "loss": 46.0, - "step": 37350 - }, - { - "epoch": 2.855744786589445, - "grad_norm": 0.0033546965569257736, - "learning_rate": 0.00019999597770694816, - "loss": 46.0, - "step": 37351 - }, - { - "epoch": 2.855821243572835, - "grad_norm": 0.001735328696668148, - "learning_rate": 0.00019999597749151098, - "loss": 46.0, - "step": 37352 - }, - { - "epoch": 2.8558977005562247, - "grad_norm": 0.0040353089570999146, - "learning_rate": 0.00019999597727606803, - "loss": 46.0, - "step": 37353 - }, - { - "epoch": 2.855974157539614, - "grad_norm": 0.0043499041348695755, - "learning_rate": 0.0001999959770606193, - "loss": 46.0, - "step": 37354 - }, - { - "epoch": 2.856050614523004, - "grad_norm": 0.0015060887672007084, - "learning_rate": 0.00019999597684516483, - "loss": 46.0, - "step": 37355 - }, - { - "epoch": 2.8561270715063936, - "grad_norm": 0.0027334417682141066, - "learning_rate": 0.00019999597662970456, - "loss": 46.0, - "step": 37356 - }, - { - "epoch": 2.8562035284897833, - "grad_norm": 0.0019144502002745867, - "learning_rate": 0.0001999959764142385, - "loss": 46.0, - "step": 37357 - }, - { - "epoch": 2.856279985473173, - "grad_norm": 0.0006307953153736889, - "learning_rate": 0.00019999597619876675, - "loss": 46.0, - "step": 37358 - }, - { - "epoch": 2.856356442456563, - "grad_norm": 0.0009853419614955783, - "learning_rate": 0.00019999597598328915, - "loss": 46.0, - "step": 37359 - }, - { - "epoch": 2.8564328994399526, - "grad_norm": 0.006186658050864935, - "learning_rate": 0.00019999597576780581, - "loss": 46.0, - "step": 37360 - }, - { - "epoch": 2.8565093564233424, - "grad_norm": 0.011974647641181946, - "learning_rate": 0.0001999959755523167, - "loss": 46.0, - "step": 37361 - }, - { - "epoch": 2.856585813406732, - "grad_norm": 0.001817780314013362, - "learning_rate": 0.00019999597533682182, - "loss": 46.0, - "step": 37362 - }, - { - "epoch": 2.856662270390122, - "grad_norm": 0.0004092817544005811, - "learning_rate": 0.00019999597512132119, - "loss": 46.0, - "step": 37363 - }, - { - "epoch": 2.8567387273735116, - "grad_norm": 0.0012719588121399283, - "learning_rate": 0.00019999597490581475, - "loss": 46.0, - "step": 37364 - }, - { - "epoch": 2.8568151843569014, - "grad_norm": 0.002170251216739416, - "learning_rate": 0.00019999597469030257, - "loss": 46.0, - "step": 37365 - }, - { - "epoch": 2.856891641340291, - "grad_norm": 0.005010946653783321, - "learning_rate": 0.00019999597447478462, - "loss": 46.0, - "step": 37366 - }, - { - "epoch": 2.8569680983236805, - "grad_norm": 0.0014615219552069902, - "learning_rate": 0.0001999959742592609, - "loss": 46.0, - "step": 37367 - }, - { - "epoch": 2.8570445553070702, - "grad_norm": 0.001642643823288381, - "learning_rate": 0.00019999597404373143, - "loss": 46.0, - "step": 37368 - }, - { - "epoch": 2.85712101229046, - "grad_norm": 0.002189437858760357, - "learning_rate": 0.00019999597382819615, - "loss": 46.0, - "step": 37369 - }, - { - "epoch": 2.8571974692738498, - "grad_norm": 0.0028046458028256893, - "learning_rate": 0.00019999597361265514, - "loss": 46.0, - "step": 37370 - }, - { - "epoch": 2.8572739262572395, - "grad_norm": 0.0020366355311125517, - "learning_rate": 0.00019999597339710832, - "loss": 46.0, - "step": 37371 - }, - { - "epoch": 2.8573503832406293, - "grad_norm": 0.0034670080058276653, - "learning_rate": 0.00019999597318155575, - "loss": 46.0, - "step": 37372 - }, - { - "epoch": 2.857426840224019, - "grad_norm": 0.0026196478866040707, - "learning_rate": 0.00019999597296599744, - "loss": 46.0, - "step": 37373 - }, - { - "epoch": 2.857503297207409, - "grad_norm": 0.0023079689126461744, - "learning_rate": 0.0001999959727504333, - "loss": 46.0, - "step": 37374 - }, - { - "epoch": 2.857579754190798, - "grad_norm": 0.0027341623790562153, - "learning_rate": 0.00019999597253486344, - "loss": 46.0, - "step": 37375 - }, - { - "epoch": 2.857656211174188, - "grad_norm": 0.0016881534829735756, - "learning_rate": 0.00019999597231928779, - "loss": 46.0, - "step": 37376 - }, - { - "epoch": 2.8577326681575776, - "grad_norm": 0.002138593001291156, - "learning_rate": 0.00019999597210370638, - "loss": 46.0, - "step": 37377 - }, - { - "epoch": 2.8578091251409674, - "grad_norm": 0.001564140198752284, - "learning_rate": 0.0001999959718881192, - "loss": 46.0, - "step": 37378 - }, - { - "epoch": 2.857885582124357, - "grad_norm": 0.0022983925882726908, - "learning_rate": 0.00019999597167252625, - "loss": 46.0, - "step": 37379 - }, - { - "epoch": 2.857962039107747, - "grad_norm": 0.0012937862193211913, - "learning_rate": 0.00019999597145692753, - "loss": 46.0, - "step": 37380 - }, - { - "epoch": 2.8580384960911367, - "grad_norm": 0.0016106325201690197, - "learning_rate": 0.00019999597124132303, - "loss": 46.0, - "step": 37381 - }, - { - "epoch": 2.8581149530745265, - "grad_norm": 0.0027820770628750324, - "learning_rate": 0.00019999597102571276, - "loss": 46.0, - "step": 37382 - }, - { - "epoch": 2.858191410057916, - "grad_norm": 0.0036041252315044403, - "learning_rate": 0.00019999597081009674, - "loss": 46.0, - "step": 37383 - }, - { - "epoch": 2.858267867041306, - "grad_norm": 0.0023981030099093914, - "learning_rate": 0.00019999597059447495, - "loss": 46.0, - "step": 37384 - }, - { - "epoch": 2.8583443240246957, - "grad_norm": 0.0021879670675843954, - "learning_rate": 0.00019999597037884736, - "loss": 46.0, - "step": 37385 - }, - { - "epoch": 2.8584207810080855, - "grad_norm": 0.0028004725463688374, - "learning_rate": 0.00019999597016321402, - "loss": 46.0, - "step": 37386 - }, - { - "epoch": 2.8584972379914753, - "grad_norm": 0.00290484051220119, - "learning_rate": 0.00019999596994757493, - "loss": 46.0, - "step": 37387 - }, - { - "epoch": 2.858573694974865, - "grad_norm": 0.0031083980575203896, - "learning_rate": 0.00019999596973193005, - "loss": 46.0, - "step": 37388 - }, - { - "epoch": 2.8586501519582543, - "grad_norm": 0.0032578168902546167, - "learning_rate": 0.00019999596951627942, - "loss": 46.0, - "step": 37389 - }, - { - "epoch": 2.858726608941644, - "grad_norm": 0.0015673382440581918, - "learning_rate": 0.000199995969300623, - "loss": 46.0, - "step": 37390 - }, - { - "epoch": 2.858803065925034, - "grad_norm": 0.0029307957738637924, - "learning_rate": 0.0001999959690849608, - "loss": 46.0, - "step": 37391 - }, - { - "epoch": 2.8588795229084236, - "grad_norm": 0.0006896536215208471, - "learning_rate": 0.00019999596886929286, - "loss": 46.0, - "step": 37392 - }, - { - "epoch": 2.8589559798918134, - "grad_norm": 0.0037042410112917423, - "learning_rate": 0.0001999959686536191, - "loss": 46.0, - "step": 37393 - }, - { - "epoch": 2.859032436875203, - "grad_norm": 0.0011286704102531075, - "learning_rate": 0.00019999596843793964, - "loss": 46.0, - "step": 37394 - }, - { - "epoch": 2.859108893858593, - "grad_norm": 0.003778767306357622, - "learning_rate": 0.0001999959682222544, - "loss": 46.0, - "step": 37395 - }, - { - "epoch": 2.8591853508419827, - "grad_norm": 0.0010023026261478662, - "learning_rate": 0.00019999596800656335, - "loss": 46.0, - "step": 37396 - }, - { - "epoch": 2.859261807825372, - "grad_norm": 0.003019100520759821, - "learning_rate": 0.00019999596779086653, - "loss": 46.0, - "step": 37397 - }, - { - "epoch": 2.8593382648087617, - "grad_norm": 0.002212079707533121, - "learning_rate": 0.00019999596757516396, - "loss": 46.0, - "step": 37398 - }, - { - "epoch": 2.8594147217921515, - "grad_norm": 0.00805040542036295, - "learning_rate": 0.00019999596735945565, - "loss": 46.0, - "step": 37399 - }, - { - "epoch": 2.8594911787755413, - "grad_norm": 0.0030057288240641356, - "learning_rate": 0.00019999596714374154, - "loss": 46.0, - "step": 37400 - }, - { - "epoch": 2.859567635758931, - "grad_norm": 0.0024373324122279882, - "learning_rate": 0.00019999596692802169, - "loss": 46.0, - "step": 37401 - }, - { - "epoch": 2.859644092742321, - "grad_norm": 0.0017328665126115084, - "learning_rate": 0.000199995966712296, - "loss": 46.0, - "step": 37402 - }, - { - "epoch": 2.8597205497257105, - "grad_norm": 0.00064315419876948, - "learning_rate": 0.0001999959664965646, - "loss": 46.0, - "step": 37403 - }, - { - "epoch": 2.8597970067091003, - "grad_norm": 0.001285726553760469, - "learning_rate": 0.00019999596628082742, - "loss": 46.0, - "step": 37404 - }, - { - "epoch": 2.85987346369249, - "grad_norm": 0.0028490587137639523, - "learning_rate": 0.00019999596606508447, - "loss": 46.0, - "step": 37405 - }, - { - "epoch": 2.85994992067588, - "grad_norm": 0.004146496765315533, - "learning_rate": 0.00019999596584933574, - "loss": 46.0, - "step": 37406 - }, - { - "epoch": 2.8600263776592696, - "grad_norm": 0.002788916928693652, - "learning_rate": 0.00019999596563358125, - "loss": 46.0, - "step": 37407 - }, - { - "epoch": 2.8601028346426594, - "grad_norm": 0.003172248136252165, - "learning_rate": 0.000199995965417821, - "loss": 46.0, - "step": 37408 - }, - { - "epoch": 2.860179291626049, - "grad_norm": 0.001350397476926446, - "learning_rate": 0.00019999596520205496, - "loss": 46.0, - "step": 37409 - }, - { - "epoch": 2.860255748609439, - "grad_norm": 0.0021407592575997114, - "learning_rate": 0.00019999596498628317, - "loss": 46.0, - "step": 37410 - }, - { - "epoch": 2.860332205592828, - "grad_norm": 0.002006686059758067, - "learning_rate": 0.0001999959647705056, - "loss": 46.0, - "step": 37411 - }, - { - "epoch": 2.860408662576218, - "grad_norm": 0.0014944759896025062, - "learning_rate": 0.00019999596455472224, - "loss": 46.0, - "step": 37412 - }, - { - "epoch": 2.8604851195596077, - "grad_norm": 0.0020433547906577587, - "learning_rate": 0.00019999596433893316, - "loss": 46.0, - "step": 37413 - }, - { - "epoch": 2.8605615765429975, - "grad_norm": 0.0008167953928932548, - "learning_rate": 0.00019999596412313827, - "loss": 46.0, - "step": 37414 - }, - { - "epoch": 2.8606380335263872, - "grad_norm": 0.0013444479554891586, - "learning_rate": 0.00019999596390733764, - "loss": 46.0, - "step": 37415 - }, - { - "epoch": 2.860714490509777, - "grad_norm": 0.002432076493278146, - "learning_rate": 0.0001999959636915312, - "loss": 46.0, - "step": 37416 - }, - { - "epoch": 2.8607909474931668, - "grad_norm": 0.0043581328354775906, - "learning_rate": 0.00019999596347571903, - "loss": 46.0, - "step": 37417 - }, - { - "epoch": 2.8608674044765565, - "grad_norm": 0.000905010849237442, - "learning_rate": 0.00019999596325990105, - "loss": 46.0, - "step": 37418 - }, - { - "epoch": 2.860943861459946, - "grad_norm": 0.0022833244875073433, - "learning_rate": 0.00019999596304407736, - "loss": 46.0, - "step": 37419 - }, - { - "epoch": 2.8610203184433356, - "grad_norm": 0.00285896984860301, - "learning_rate": 0.00019999596282824786, - "loss": 46.0, - "step": 37420 - }, - { - "epoch": 2.8610967754267254, - "grad_norm": 0.003195906523615122, - "learning_rate": 0.0001999959626124126, - "loss": 46.0, - "step": 37421 - }, - { - "epoch": 2.861173232410115, - "grad_norm": 0.017030026763677597, - "learning_rate": 0.00019999596239657157, - "loss": 46.0, - "step": 37422 - }, - { - "epoch": 2.861249689393505, - "grad_norm": 0.000928624824155122, - "learning_rate": 0.00019999596218072478, - "loss": 46.0, - "step": 37423 - }, - { - "epoch": 2.8613261463768946, - "grad_norm": 0.0011592790251597762, - "learning_rate": 0.00019999596196487222, - "loss": 46.0, - "step": 37424 - }, - { - "epoch": 2.8614026033602844, - "grad_norm": 0.002315193647518754, - "learning_rate": 0.00019999596174901388, - "loss": 46.0, - "step": 37425 - }, - { - "epoch": 2.861479060343674, - "grad_norm": 0.0009720303351059556, - "learning_rate": 0.00019999596153314977, - "loss": 46.0, - "step": 37426 - }, - { - "epoch": 2.861555517327064, - "grad_norm": 0.003237604396417737, - "learning_rate": 0.0001999959613172799, - "loss": 46.0, - "step": 37427 - }, - { - "epoch": 2.8616319743104537, - "grad_norm": 0.001822356367483735, - "learning_rate": 0.00019999596110140426, - "loss": 46.0, - "step": 37428 - }, - { - "epoch": 2.8617084312938434, - "grad_norm": 0.0040573785081505775, - "learning_rate": 0.00019999596088552283, - "loss": 46.0, - "step": 37429 - }, - { - "epoch": 2.861784888277233, - "grad_norm": 0.0006334678619168699, - "learning_rate": 0.00019999596066963565, - "loss": 46.0, - "step": 37430 - }, - { - "epoch": 2.861861345260623, - "grad_norm": 0.0012000094866380095, - "learning_rate": 0.0001999959604537427, - "loss": 46.0, - "step": 37431 - }, - { - "epoch": 2.8619378022440127, - "grad_norm": 0.003927180077880621, - "learning_rate": 0.00019999596023784398, - "loss": 46.0, - "step": 37432 - }, - { - "epoch": 2.862014259227402, - "grad_norm": 0.0008652122342027724, - "learning_rate": 0.00019999596002193948, - "loss": 46.0, - "step": 37433 - }, - { - "epoch": 2.862090716210792, - "grad_norm": 0.00506264204159379, - "learning_rate": 0.0001999959598060292, - "loss": 46.0, - "step": 37434 - }, - { - "epoch": 2.8621671731941816, - "grad_norm": 0.001470347517170012, - "learning_rate": 0.0001999959595901132, - "loss": 46.0, - "step": 37435 - }, - { - "epoch": 2.8622436301775713, - "grad_norm": 0.0044128550216555595, - "learning_rate": 0.0001999959593741914, - "loss": 46.0, - "step": 37436 - }, - { - "epoch": 2.862320087160961, - "grad_norm": 0.0017818250926211476, - "learning_rate": 0.0001999959591582638, - "loss": 46.0, - "step": 37437 - }, - { - "epoch": 2.862396544144351, - "grad_norm": 0.0020803522784262896, - "learning_rate": 0.0001999959589423305, - "loss": 46.0, - "step": 37438 - }, - { - "epoch": 2.8624730011277406, - "grad_norm": 0.0012987942900508642, - "learning_rate": 0.0001999959587263914, - "loss": 46.0, - "step": 37439 - }, - { - "epoch": 2.8625494581111304, - "grad_norm": 0.002318762708455324, - "learning_rate": 0.0001999959585104465, - "loss": 46.0, - "step": 37440 - }, - { - "epoch": 2.8626259150945197, - "grad_norm": 0.0029196382965892553, - "learning_rate": 0.00019999595829449588, - "loss": 46.0, - "step": 37441 - }, - { - "epoch": 2.8627023720779095, - "grad_norm": 0.003180979983881116, - "learning_rate": 0.00019999595807853945, - "loss": 46.0, - "step": 37442 - }, - { - "epoch": 2.862778829061299, - "grad_norm": 0.0036485048476606607, - "learning_rate": 0.00019999595786257727, - "loss": 46.0, - "step": 37443 - }, - { - "epoch": 2.862855286044689, - "grad_norm": 0.0014625238254666328, - "learning_rate": 0.00019999595764660932, - "loss": 46.0, - "step": 37444 - }, - { - "epoch": 2.8629317430280787, - "grad_norm": 0.001224764622747898, - "learning_rate": 0.0001999959574306356, - "loss": 46.0, - "step": 37445 - }, - { - "epoch": 2.8630082000114685, - "grad_norm": 0.004123813472688198, - "learning_rate": 0.0001999959572146561, - "loss": 46.0, - "step": 37446 - }, - { - "epoch": 2.8630846569948583, - "grad_norm": 0.002723866840824485, - "learning_rate": 0.00019999595699867083, - "loss": 46.0, - "step": 37447 - }, - { - "epoch": 2.863161113978248, - "grad_norm": 0.0009698011563159525, - "learning_rate": 0.00019999595678267982, - "loss": 46.0, - "step": 37448 - }, - { - "epoch": 2.863237570961638, - "grad_norm": 0.0014715880388393998, - "learning_rate": 0.00019999595656668303, - "loss": 46.0, - "step": 37449 - }, - { - "epoch": 2.8633140279450275, - "grad_norm": 0.003595031565055251, - "learning_rate": 0.00019999595635068044, - "loss": 46.0, - "step": 37450 - }, - { - "epoch": 2.8633904849284173, - "grad_norm": 0.0008930552867241204, - "learning_rate": 0.00019999595613467213, - "loss": 46.0, - "step": 37451 - }, - { - "epoch": 2.863466941911807, - "grad_norm": 0.006320401560515165, - "learning_rate": 0.00019999595591865802, - "loss": 46.0, - "step": 37452 - }, - { - "epoch": 2.863543398895197, - "grad_norm": 0.0045289937406778336, - "learning_rate": 0.0001999959557026381, - "loss": 46.0, - "step": 37453 - }, - { - "epoch": 2.8636198558785866, - "grad_norm": 0.0014551301719620824, - "learning_rate": 0.00019999595548661248, - "loss": 46.0, - "step": 37454 - }, - { - "epoch": 2.863696312861976, - "grad_norm": 0.002317196223884821, - "learning_rate": 0.00019999595527058105, - "loss": 46.0, - "step": 37455 - }, - { - "epoch": 2.8637727698453657, - "grad_norm": 0.003183969995006919, - "learning_rate": 0.00019999595505454387, - "loss": 46.0, - "step": 37456 - }, - { - "epoch": 2.8638492268287554, - "grad_norm": 0.003286434104666114, - "learning_rate": 0.00019999595483850092, - "loss": 46.0, - "step": 37457 - }, - { - "epoch": 2.863925683812145, - "grad_norm": 0.0026142208371311426, - "learning_rate": 0.00019999595462245223, - "loss": 46.0, - "step": 37458 - }, - { - "epoch": 2.864002140795535, - "grad_norm": 0.0016015034634619951, - "learning_rate": 0.0001999959544063977, - "loss": 46.0, - "step": 37459 - }, - { - "epoch": 2.8640785977789247, - "grad_norm": 0.004334256052970886, - "learning_rate": 0.0001999959541903375, - "loss": 46.0, - "step": 37460 - }, - { - "epoch": 2.8641550547623145, - "grad_norm": 0.0013641779078170657, - "learning_rate": 0.00019999595397427142, - "loss": 46.0, - "step": 37461 - }, - { - "epoch": 2.8642315117457042, - "grad_norm": 0.0009866136824712157, - "learning_rate": 0.00019999595375819963, - "loss": 46.0, - "step": 37462 - }, - { - "epoch": 2.8643079687290935, - "grad_norm": 0.0010501127690076828, - "learning_rate": 0.0001999959535421221, - "loss": 46.0, - "step": 37463 - }, - { - "epoch": 2.8643844257124833, - "grad_norm": 0.0014722800115123391, - "learning_rate": 0.00019999595332603873, - "loss": 46.0, - "step": 37464 - }, - { - "epoch": 2.864460882695873, - "grad_norm": 0.0012393639190122485, - "learning_rate": 0.00019999595310994962, - "loss": 46.0, - "step": 37465 - }, - { - "epoch": 2.864537339679263, - "grad_norm": 0.0007497768383473158, - "learning_rate": 0.00019999595289385476, - "loss": 46.0, - "step": 37466 - }, - { - "epoch": 2.8646137966626526, - "grad_norm": 0.011151333339512348, - "learning_rate": 0.0001999959526777541, - "loss": 46.0, - "step": 37467 - }, - { - "epoch": 2.8646902536460424, - "grad_norm": 0.0011931046610698104, - "learning_rate": 0.0001999959524616477, - "loss": 46.0, - "step": 37468 - }, - { - "epoch": 2.864766710629432, - "grad_norm": 0.0012726098066195846, - "learning_rate": 0.0001999959522455355, - "loss": 46.0, - "step": 37469 - }, - { - "epoch": 2.864843167612822, - "grad_norm": 0.0025933312717825174, - "learning_rate": 0.00019999595202941755, - "loss": 46.0, - "step": 37470 - }, - { - "epoch": 2.8649196245962116, - "grad_norm": 0.00538911297917366, - "learning_rate": 0.00019999595181329386, - "loss": 46.0, - "step": 37471 - }, - { - "epoch": 2.8649960815796014, - "grad_norm": 0.0022859370801597834, - "learning_rate": 0.00019999595159716433, - "loss": 46.0, - "step": 37472 - }, - { - "epoch": 2.865072538562991, - "grad_norm": 0.0013085856335237622, - "learning_rate": 0.0001999959513810291, - "loss": 46.0, - "step": 37473 - }, - { - "epoch": 2.865148995546381, - "grad_norm": 0.005054724868386984, - "learning_rate": 0.00019999595116488805, - "loss": 46.0, - "step": 37474 - }, - { - "epoch": 2.8652254525297707, - "grad_norm": 0.006324580404907465, - "learning_rate": 0.00019999595094874126, - "loss": 46.0, - "step": 37475 - }, - { - "epoch": 2.86530190951316, - "grad_norm": 0.0008189025102183223, - "learning_rate": 0.00019999595073258867, - "loss": 46.0, - "step": 37476 - }, - { - "epoch": 2.8653783664965498, - "grad_norm": 0.0025469448883086443, - "learning_rate": 0.00019999595051643033, - "loss": 46.0, - "step": 37477 - }, - { - "epoch": 2.8654548234799395, - "grad_norm": 0.0017832368612289429, - "learning_rate": 0.00019999595030026622, - "loss": 46.0, - "step": 37478 - }, - { - "epoch": 2.8655312804633293, - "grad_norm": 0.002492888830602169, - "learning_rate": 0.00019999595008409637, - "loss": 46.0, - "step": 37479 - }, - { - "epoch": 2.865607737446719, - "grad_norm": 0.0023167075123637915, - "learning_rate": 0.00019999594986792074, - "loss": 46.0, - "step": 37480 - }, - { - "epoch": 2.865684194430109, - "grad_norm": 0.0023142294958233833, - "learning_rate": 0.0001999959496517393, - "loss": 46.0, - "step": 37481 - }, - { - "epoch": 2.8657606514134986, - "grad_norm": 0.0026507326401770115, - "learning_rate": 0.00019999594943555214, - "loss": 46.0, - "step": 37482 - }, - { - "epoch": 2.8658371083968883, - "grad_norm": 0.0011573369847610593, - "learning_rate": 0.0001999959492193592, - "loss": 46.0, - "step": 37483 - }, - { - "epoch": 2.865913565380278, - "grad_norm": 0.0017050030874088407, - "learning_rate": 0.00019999594900316046, - "loss": 46.0, - "step": 37484 - }, - { - "epoch": 2.8659900223636674, - "grad_norm": 0.0019060500198975205, - "learning_rate": 0.00019999594878695597, - "loss": 46.0, - "step": 37485 - }, - { - "epoch": 2.866066479347057, - "grad_norm": 0.0007430505938827991, - "learning_rate": 0.0001999959485707457, - "loss": 46.0, - "step": 37486 - }, - { - "epoch": 2.866142936330447, - "grad_norm": 0.0027034657541662455, - "learning_rate": 0.00019999594835452968, - "loss": 46.0, - "step": 37487 - }, - { - "epoch": 2.8662193933138367, - "grad_norm": 0.002144405385479331, - "learning_rate": 0.0001999959481383079, - "loss": 46.0, - "step": 37488 - }, - { - "epoch": 2.8662958502972264, - "grad_norm": 0.0008919090614654124, - "learning_rate": 0.0001999959479220803, - "loss": 46.0, - "step": 37489 - }, - { - "epoch": 2.866372307280616, - "grad_norm": 0.0020369570702314377, - "learning_rate": 0.00019999594770584697, - "loss": 46.0, - "step": 37490 - }, - { - "epoch": 2.866448764264006, - "grad_norm": 0.0007791294483467937, - "learning_rate": 0.00019999594748960786, - "loss": 46.0, - "step": 37491 - }, - { - "epoch": 2.8665252212473957, - "grad_norm": 0.004863109905272722, - "learning_rate": 0.000199995947273363, - "loss": 46.0, - "step": 37492 - }, - { - "epoch": 2.8666016782307855, - "grad_norm": 0.0008897147490642965, - "learning_rate": 0.00019999594705711235, - "loss": 46.0, - "step": 37493 - }, - { - "epoch": 2.8666781352141752, - "grad_norm": 0.001245802384801209, - "learning_rate": 0.00019999594684085595, - "loss": 46.0, - "step": 37494 - }, - { - "epoch": 2.866754592197565, - "grad_norm": 0.004159674979746342, - "learning_rate": 0.00019999594662459375, - "loss": 46.0, - "step": 37495 - }, - { - "epoch": 2.8668310491809548, - "grad_norm": 0.002161232987418771, - "learning_rate": 0.00019999594640832583, - "loss": 46.0, - "step": 37496 - }, - { - "epoch": 2.8669075061643445, - "grad_norm": 0.0019452959531918168, - "learning_rate": 0.00019999594619205208, - "loss": 46.0, - "step": 37497 - }, - { - "epoch": 2.866983963147734, - "grad_norm": 0.0022323147859424353, - "learning_rate": 0.00019999594597577258, - "loss": 46.0, - "step": 37498 - }, - { - "epoch": 2.8670604201311236, - "grad_norm": 0.0018292245222255588, - "learning_rate": 0.0001999959457594873, - "loss": 46.0, - "step": 37499 - }, - { - "epoch": 2.8671368771145134, - "grad_norm": 0.001836481736972928, - "learning_rate": 0.0001999959455431963, - "loss": 46.0, - "step": 37500 - }, - { - "epoch": 2.867213334097903, - "grad_norm": 0.0013462741626426578, - "learning_rate": 0.00019999594532689948, - "loss": 46.0, - "step": 37501 - }, - { - "epoch": 2.867289791081293, - "grad_norm": 0.0027386879082769156, - "learning_rate": 0.00019999594511059695, - "loss": 46.0, - "step": 37502 - }, - { - "epoch": 2.8673662480646827, - "grad_norm": 0.009533974342048168, - "learning_rate": 0.0001999959448942886, - "loss": 46.0, - "step": 37503 - }, - { - "epoch": 2.8674427050480724, - "grad_norm": 0.0008170533110387623, - "learning_rate": 0.0001999959446779745, - "loss": 46.0, - "step": 37504 - }, - { - "epoch": 2.867519162031462, - "grad_norm": 0.002299931598827243, - "learning_rate": 0.00019999594446165465, - "loss": 46.0, - "step": 37505 - }, - { - "epoch": 2.8675956190148515, - "grad_norm": 0.00147080491296947, - "learning_rate": 0.00019999594424532897, - "loss": 46.0, - "step": 37506 - }, - { - "epoch": 2.8676720759982413, - "grad_norm": 0.004154044669121504, - "learning_rate": 0.00019999594402899756, - "loss": 46.0, - "step": 37507 - }, - { - "epoch": 2.867748532981631, - "grad_norm": 0.0017092034686356783, - "learning_rate": 0.0001999959438126604, - "loss": 46.0, - "step": 37508 - }, - { - "epoch": 2.8678249899650208, - "grad_norm": 0.0021878632251173258, - "learning_rate": 0.00019999594359631744, - "loss": 46.0, - "step": 37509 - }, - { - "epoch": 2.8679014469484105, - "grad_norm": 0.005851882975548506, - "learning_rate": 0.0001999959433799687, - "loss": 46.0, - "step": 37510 - }, - { - "epoch": 2.8679779039318003, - "grad_norm": 0.004577010404318571, - "learning_rate": 0.0001999959431636142, - "loss": 46.0, - "step": 37511 - }, - { - "epoch": 2.86805436091519, - "grad_norm": 0.003406402887776494, - "learning_rate": 0.00019999594294725396, - "loss": 46.0, - "step": 37512 - }, - { - "epoch": 2.86813081789858, - "grad_norm": 0.0036590341478586197, - "learning_rate": 0.00019999594273088794, - "loss": 46.0, - "step": 37513 - }, - { - "epoch": 2.8682072748819696, - "grad_norm": 0.0017514858627691865, - "learning_rate": 0.00019999594251451615, - "loss": 46.0, - "step": 37514 - }, - { - "epoch": 2.8682837318653593, - "grad_norm": 0.002538986038416624, - "learning_rate": 0.00019999594229813856, - "loss": 46.0, - "step": 37515 - }, - { - "epoch": 2.868360188848749, - "grad_norm": 0.0051421415992081165, - "learning_rate": 0.00019999594208175523, - "loss": 46.0, - "step": 37516 - }, - { - "epoch": 2.868436645832139, - "grad_norm": 0.0027391118928790092, - "learning_rate": 0.00019999594186536612, - "loss": 46.0, - "step": 37517 - }, - { - "epoch": 2.8685131028155286, - "grad_norm": 0.001825224026106298, - "learning_rate": 0.00019999594164897127, - "loss": 46.0, - "step": 37518 - }, - { - "epoch": 2.8685895597989184, - "grad_norm": 0.0023483980912715197, - "learning_rate": 0.0001999959414325706, - "loss": 46.0, - "step": 37519 - }, - { - "epoch": 2.8686660167823077, - "grad_norm": 0.002353568794205785, - "learning_rate": 0.00019999594121616418, - "loss": 46.0, - "step": 37520 - }, - { - "epoch": 2.8687424737656975, - "grad_norm": 0.0016929901903495193, - "learning_rate": 0.000199995940999752, - "loss": 46.0, - "step": 37521 - }, - { - "epoch": 2.8688189307490872, - "grad_norm": 0.0012015769025310874, - "learning_rate": 0.00019999594078333406, - "loss": 46.0, - "step": 37522 - }, - { - "epoch": 2.868895387732477, - "grad_norm": 0.0029947562143206596, - "learning_rate": 0.00019999594056691034, - "loss": 46.0, - "step": 37523 - }, - { - "epoch": 2.8689718447158667, - "grad_norm": 0.006256711203604937, - "learning_rate": 0.00019999594035048084, - "loss": 46.0, - "step": 37524 - }, - { - "epoch": 2.8690483016992565, - "grad_norm": 0.0008310055709443986, - "learning_rate": 0.00019999594013404558, - "loss": 46.0, - "step": 37525 - }, - { - "epoch": 2.8691247586826463, - "grad_norm": 0.004027517046779394, - "learning_rate": 0.00019999593991760456, - "loss": 46.0, - "step": 37526 - }, - { - "epoch": 2.869201215666036, - "grad_norm": 0.0024037384428083897, - "learning_rate": 0.00019999593970115775, - "loss": 46.0, - "step": 37527 - }, - { - "epoch": 2.8692776726494253, - "grad_norm": 0.0016094769816845655, - "learning_rate": 0.0001999959394847052, - "loss": 46.0, - "step": 37528 - }, - { - "epoch": 2.869354129632815, - "grad_norm": 0.0014064470306038857, - "learning_rate": 0.00019999593926824685, - "loss": 46.0, - "step": 37529 - }, - { - "epoch": 2.869430586616205, - "grad_norm": 0.0019122699741274118, - "learning_rate": 0.00019999593905178274, - "loss": 46.0, - "step": 37530 - }, - { - "epoch": 2.8695070435995946, - "grad_norm": 0.0006940191378816962, - "learning_rate": 0.0001999959388353129, - "loss": 46.0, - "step": 37531 - }, - { - "epoch": 2.8695835005829844, - "grad_norm": 0.002844130387529731, - "learning_rate": 0.00019999593861883726, - "loss": 46.0, - "step": 37532 - }, - { - "epoch": 2.869659957566374, - "grad_norm": 0.0016199576202780008, - "learning_rate": 0.00019999593840235583, - "loss": 46.0, - "step": 37533 - }, - { - "epoch": 2.869736414549764, - "grad_norm": 0.003924049437046051, - "learning_rate": 0.00019999593818586866, - "loss": 46.0, - "step": 37534 - }, - { - "epoch": 2.8698128715331537, - "grad_norm": 0.0013053047005087137, - "learning_rate": 0.0001999959379693757, - "loss": 46.0, - "step": 37535 - }, - { - "epoch": 2.8698893285165434, - "grad_norm": 0.0024365128483623266, - "learning_rate": 0.00019999593775287696, - "loss": 46.0, - "step": 37536 - }, - { - "epoch": 2.869965785499933, - "grad_norm": 0.002858174964785576, - "learning_rate": 0.00019999593753637247, - "loss": 46.0, - "step": 37537 - }, - { - "epoch": 2.870042242483323, - "grad_norm": 0.007344653829932213, - "learning_rate": 0.00019999593731986223, - "loss": 46.0, - "step": 37538 - }, - { - "epoch": 2.8701186994667127, - "grad_norm": 0.0017986251041293144, - "learning_rate": 0.0001999959371033462, - "loss": 46.0, - "step": 37539 - }, - { - "epoch": 2.8701951564501025, - "grad_norm": 0.0016602270770817995, - "learning_rate": 0.0001999959368868244, - "loss": 46.0, - "step": 37540 - }, - { - "epoch": 2.8702716134334922, - "grad_norm": 0.00330542610026896, - "learning_rate": 0.0001999959366702968, - "loss": 46.0, - "step": 37541 - }, - { - "epoch": 2.8703480704168816, - "grad_norm": 0.001754896016791463, - "learning_rate": 0.00019999593645376348, - "loss": 46.0, - "step": 37542 - }, - { - "epoch": 2.8704245274002713, - "grad_norm": 0.001867889310233295, - "learning_rate": 0.0001999959362372244, - "loss": 46.0, - "step": 37543 - }, - { - "epoch": 2.870500984383661, - "grad_norm": 0.003189666895195842, - "learning_rate": 0.00019999593602067952, - "loss": 46.0, - "step": 37544 - }, - { - "epoch": 2.870577441367051, - "grad_norm": 0.0004502327064983547, - "learning_rate": 0.0001999959358041289, - "loss": 46.0, - "step": 37545 - }, - { - "epoch": 2.8706538983504406, - "grad_norm": 0.0014798900811001658, - "learning_rate": 0.00019999593558757246, - "loss": 46.0, - "step": 37546 - }, - { - "epoch": 2.8707303553338304, - "grad_norm": 0.0059404936619102955, - "learning_rate": 0.00019999593537101026, - "loss": 46.0, - "step": 37547 - }, - { - "epoch": 2.87080681231722, - "grad_norm": 0.0009865788742899895, - "learning_rate": 0.00019999593515444234, - "loss": 46.0, - "step": 37548 - }, - { - "epoch": 2.87088326930061, - "grad_norm": 0.0010516804177314043, - "learning_rate": 0.00019999593493786862, - "loss": 46.0, - "step": 37549 - }, - { - "epoch": 2.870959726283999, - "grad_norm": 0.001716958126053214, - "learning_rate": 0.0001999959347212891, - "loss": 46.0, - "step": 37550 - }, - { - "epoch": 2.871036183267389, - "grad_norm": 0.0035657398402690887, - "learning_rate": 0.00019999593450470386, - "loss": 46.0, - "step": 37551 - }, - { - "epoch": 2.8711126402507787, - "grad_norm": 0.0019648680463433266, - "learning_rate": 0.00019999593428811284, - "loss": 46.0, - "step": 37552 - }, - { - "epoch": 2.8711890972341685, - "grad_norm": 0.0027284380048513412, - "learning_rate": 0.00019999593407151603, - "loss": 46.0, - "step": 37553 - }, - { - "epoch": 2.8712655542175582, - "grad_norm": 0.0037620195653289557, - "learning_rate": 0.00019999593385491347, - "loss": 46.0, - "step": 37554 - }, - { - "epoch": 2.871342011200948, - "grad_norm": 0.0029824539087712765, - "learning_rate": 0.00019999593363830513, - "loss": 46.0, - "step": 37555 - }, - { - "epoch": 2.8714184681843378, - "grad_norm": 0.0011551117058843374, - "learning_rate": 0.00019999593342169103, - "loss": 46.0, - "step": 37556 - }, - { - "epoch": 2.8714949251677275, - "grad_norm": 0.0032276457641273737, - "learning_rate": 0.00019999593320507115, - "loss": 46.0, - "step": 37557 - }, - { - "epoch": 2.8715713821511173, - "grad_norm": 0.0028557691257447004, - "learning_rate": 0.0001999959329884455, - "loss": 46.0, - "step": 37558 - }, - { - "epoch": 2.871647839134507, - "grad_norm": 0.0033261561766266823, - "learning_rate": 0.00019999593277181406, - "loss": 46.0, - "step": 37559 - }, - { - "epoch": 2.871724296117897, - "grad_norm": 0.0022382389288395643, - "learning_rate": 0.00019999593255517692, - "loss": 46.0, - "step": 37560 - }, - { - "epoch": 2.8718007531012866, - "grad_norm": 0.004119453020393848, - "learning_rate": 0.00019999593233853394, - "loss": 46.0, - "step": 37561 - }, - { - "epoch": 2.8718772100846763, - "grad_norm": 0.0006247319979593158, - "learning_rate": 0.00019999593212188522, - "loss": 46.0, - "step": 37562 - }, - { - "epoch": 2.871953667068066, - "grad_norm": 0.0019964047241955996, - "learning_rate": 0.00019999593190523073, - "loss": 46.0, - "step": 37563 - }, - { - "epoch": 2.8720301240514554, - "grad_norm": 0.0011412703897804022, - "learning_rate": 0.0001999959316885705, - "loss": 46.0, - "step": 37564 - }, - { - "epoch": 2.872106581034845, - "grad_norm": 0.0009878354612737894, - "learning_rate": 0.00019999593147190448, - "loss": 46.0, - "step": 37565 - }, - { - "epoch": 2.872183038018235, - "grad_norm": 0.002664342289790511, - "learning_rate": 0.00019999593125523266, - "loss": 46.0, - "step": 37566 - }, - { - "epoch": 2.8722594950016247, - "grad_norm": 0.003253551432862878, - "learning_rate": 0.0001999959310385551, - "loss": 46.0, - "step": 37567 - }, - { - "epoch": 2.8723359519850145, - "grad_norm": 0.0011970893247053027, - "learning_rate": 0.00019999593082187177, - "loss": 46.0, - "step": 37568 - }, - { - "epoch": 2.872412408968404, - "grad_norm": 0.0017122890567407012, - "learning_rate": 0.00019999593060518266, - "loss": 46.0, - "step": 37569 - }, - { - "epoch": 2.872488865951794, - "grad_norm": 0.0026473926845937967, - "learning_rate": 0.00019999593038848778, - "loss": 46.0, - "step": 37570 - }, - { - "epoch": 2.8725653229351837, - "grad_norm": 0.0010518949711695313, - "learning_rate": 0.00019999593017178713, - "loss": 46.0, - "step": 37571 - }, - { - "epoch": 2.872641779918573, - "grad_norm": 0.00414118031039834, - "learning_rate": 0.00019999592995508073, - "loss": 46.0, - "step": 37572 - }, - { - "epoch": 2.872718236901963, - "grad_norm": 0.00721208518370986, - "learning_rate": 0.00019999592973836853, - "loss": 46.0, - "step": 37573 - }, - { - "epoch": 2.8727946938853526, - "grad_norm": 0.002168054925277829, - "learning_rate": 0.0001999959295216506, - "loss": 46.0, - "step": 37574 - }, - { - "epoch": 2.8728711508687423, - "grad_norm": 0.0034994606394320726, - "learning_rate": 0.0001999959293049269, - "loss": 46.0, - "step": 37575 - }, - { - "epoch": 2.872947607852132, - "grad_norm": 0.0022491789422929287, - "learning_rate": 0.00019999592908819737, - "loss": 46.0, - "step": 37576 - }, - { - "epoch": 2.873024064835522, - "grad_norm": 0.0009970130631700158, - "learning_rate": 0.00019999592887146213, - "loss": 46.0, - "step": 37577 - }, - { - "epoch": 2.8731005218189116, - "grad_norm": 0.0024155722931027412, - "learning_rate": 0.0001999959286547211, - "loss": 46.0, - "step": 37578 - }, - { - "epoch": 2.8731769788023014, - "grad_norm": 0.005563588347285986, - "learning_rate": 0.0001999959284379743, - "loss": 46.0, - "step": 37579 - }, - { - "epoch": 2.873253435785691, - "grad_norm": 0.002309634815901518, - "learning_rate": 0.00019999592822122174, - "loss": 46.0, - "step": 37580 - }, - { - "epoch": 2.873329892769081, - "grad_norm": 0.001668619690462947, - "learning_rate": 0.00019999592800446338, - "loss": 46.0, - "step": 37581 - }, - { - "epoch": 2.8734063497524707, - "grad_norm": 0.002494386164471507, - "learning_rate": 0.0001999959277876993, - "loss": 46.0, - "step": 37582 - }, - { - "epoch": 2.8734828067358604, - "grad_norm": 0.005363532807677984, - "learning_rate": 0.00019999592757092942, - "loss": 46.0, - "step": 37583 - }, - { - "epoch": 2.87355926371925, - "grad_norm": 0.0023722245823591948, - "learning_rate": 0.00019999592735415377, - "loss": 46.0, - "step": 37584 - }, - { - "epoch": 2.87363572070264, - "grad_norm": 0.002606066409498453, - "learning_rate": 0.00019999592713737237, - "loss": 46.0, - "step": 37585 - }, - { - "epoch": 2.8737121776860293, - "grad_norm": 0.0029873568564653397, - "learning_rate": 0.0001999959269205852, - "loss": 46.0, - "step": 37586 - }, - { - "epoch": 2.873788634669419, - "grad_norm": 0.0033291769213974476, - "learning_rate": 0.00019999592670379225, - "loss": 46.0, - "step": 37587 - }, - { - "epoch": 2.873865091652809, - "grad_norm": 0.004090075381100178, - "learning_rate": 0.0001999959264869935, - "loss": 46.0, - "step": 37588 - }, - { - "epoch": 2.8739415486361986, - "grad_norm": 0.0015167307574301958, - "learning_rate": 0.000199995926270189, - "loss": 46.0, - "step": 37589 - }, - { - "epoch": 2.8740180056195883, - "grad_norm": 0.001860748860053718, - "learning_rate": 0.00019999592605337877, - "loss": 46.0, - "step": 37590 - }, - { - "epoch": 2.874094462602978, - "grad_norm": 0.005001469980925322, - "learning_rate": 0.00019999592583656273, - "loss": 46.0, - "step": 37591 - }, - { - "epoch": 2.874170919586368, - "grad_norm": 0.003039294620975852, - "learning_rate": 0.00019999592561974094, - "loss": 46.0, - "step": 37592 - }, - { - "epoch": 2.8742473765697576, - "grad_norm": 0.005798115395009518, - "learning_rate": 0.00019999592540291338, - "loss": 46.0, - "step": 37593 - }, - { - "epoch": 2.874323833553147, - "grad_norm": 0.008996091783046722, - "learning_rate": 0.00019999592518608002, - "loss": 46.0, - "step": 37594 - }, - { - "epoch": 2.8744002905365367, - "grad_norm": 0.0012368884636089206, - "learning_rate": 0.00019999592496924094, - "loss": 46.0, - "step": 37595 - }, - { - "epoch": 2.8744767475199264, - "grad_norm": 0.0022297895047813654, - "learning_rate": 0.00019999592475239606, - "loss": 46.0, - "step": 37596 - }, - { - "epoch": 2.874553204503316, - "grad_norm": 0.0024840838741511106, - "learning_rate": 0.00019999592453554544, - "loss": 46.0, - "step": 37597 - }, - { - "epoch": 2.874629661486706, - "grad_norm": 0.004004363436251879, - "learning_rate": 0.00019999592431868898, - "loss": 46.0, - "step": 37598 - }, - { - "epoch": 2.8747061184700957, - "grad_norm": 0.0026861298829317093, - "learning_rate": 0.0001999959241018268, - "loss": 46.0, - "step": 37599 - }, - { - "epoch": 2.8747825754534855, - "grad_norm": 0.0005568024353124201, - "learning_rate": 0.00019999592388495886, - "loss": 46.0, - "step": 37600 - }, - { - "epoch": 2.8748590324368752, - "grad_norm": 0.0034252905752509832, - "learning_rate": 0.00019999592366808515, - "loss": 46.0, - "step": 37601 - }, - { - "epoch": 2.874935489420265, - "grad_norm": 0.001752939191646874, - "learning_rate": 0.00019999592345120565, - "loss": 46.0, - "step": 37602 - }, - { - "epoch": 2.8750119464036548, - "grad_norm": 0.003235015319660306, - "learning_rate": 0.00019999592323432039, - "loss": 46.0, - "step": 37603 - }, - { - "epoch": 2.8750884033870445, - "grad_norm": 0.005604472476989031, - "learning_rate": 0.00019999592301742937, - "loss": 46.0, - "step": 37604 - }, - { - "epoch": 2.8751648603704343, - "grad_norm": 0.0051450408063828945, - "learning_rate": 0.0001999959228005326, - "loss": 46.0, - "step": 37605 - }, - { - "epoch": 2.875241317353824, - "grad_norm": 0.003964126110076904, - "learning_rate": 0.00019999592258363, - "loss": 46.0, - "step": 37606 - }, - { - "epoch": 2.8753177743372134, - "grad_norm": 0.0005387893179431558, - "learning_rate": 0.0001999959223667217, - "loss": 46.0, - "step": 37607 - }, - { - "epoch": 2.875394231320603, - "grad_norm": 0.0009338142699562013, - "learning_rate": 0.00019999592214980756, - "loss": 46.0, - "step": 37608 - }, - { - "epoch": 2.875470688303993, - "grad_norm": 0.001832042122259736, - "learning_rate": 0.0001999959219328877, - "loss": 46.0, - "step": 37609 - }, - { - "epoch": 2.8755471452873826, - "grad_norm": 0.0022954875603318214, - "learning_rate": 0.00019999592171596206, - "loss": 46.0, - "step": 37610 - }, - { - "epoch": 2.8756236022707724, - "grad_norm": 0.0012057002168148756, - "learning_rate": 0.00019999592149903066, - "loss": 46.0, - "step": 37611 - }, - { - "epoch": 2.875700059254162, - "grad_norm": 0.006148792337626219, - "learning_rate": 0.00019999592128209349, - "loss": 46.0, - "step": 37612 - }, - { - "epoch": 2.875776516237552, - "grad_norm": 0.00475850747898221, - "learning_rate": 0.00019999592106515054, - "loss": 46.0, - "step": 37613 - }, - { - "epoch": 2.8758529732209417, - "grad_norm": 0.00256563862785697, - "learning_rate": 0.0001999959208482018, - "loss": 46.0, - "step": 37614 - }, - { - "epoch": 2.8759294302043314, - "grad_norm": 0.004690373782068491, - "learning_rate": 0.0001999959206312473, - "loss": 46.0, - "step": 37615 - }, - { - "epoch": 2.8760058871877208, - "grad_norm": 0.001298415125347674, - "learning_rate": 0.00019999592041428704, - "loss": 46.0, - "step": 37616 - }, - { - "epoch": 2.8760823441711105, - "grad_norm": 0.0023295036517083645, - "learning_rate": 0.00019999592019732102, - "loss": 46.0, - "step": 37617 - }, - { - "epoch": 2.8761588011545003, - "grad_norm": 0.0022516236640512943, - "learning_rate": 0.0001999959199803492, - "loss": 46.0, - "step": 37618 - }, - { - "epoch": 2.87623525813789, - "grad_norm": 0.0027028268668800592, - "learning_rate": 0.00019999591976337165, - "loss": 46.0, - "step": 37619 - }, - { - "epoch": 2.87631171512128, - "grad_norm": 0.003751756390556693, - "learning_rate": 0.00019999591954638832, - "loss": 46.0, - "step": 37620 - }, - { - "epoch": 2.8763881721046696, - "grad_norm": 0.0011190070072188973, - "learning_rate": 0.0001999959193293992, - "loss": 46.0, - "step": 37621 - }, - { - "epoch": 2.8764646290880593, - "grad_norm": 0.00637247646227479, - "learning_rate": 0.00019999591911240433, - "loss": 46.0, - "step": 37622 - }, - { - "epoch": 2.876541086071449, - "grad_norm": 0.0014003842370584607, - "learning_rate": 0.0001999959188954037, - "loss": 46.0, - "step": 37623 - }, - { - "epoch": 2.876617543054839, - "grad_norm": 0.0018502847524359822, - "learning_rate": 0.00019999591867839728, - "loss": 46.0, - "step": 37624 - }, - { - "epoch": 2.8766940000382286, - "grad_norm": 0.0023573304060846567, - "learning_rate": 0.0001999959184613851, - "loss": 46.0, - "step": 37625 - }, - { - "epoch": 2.8767704570216184, - "grad_norm": 0.001384947681799531, - "learning_rate": 0.00019999591824436714, - "loss": 46.0, - "step": 37626 - }, - { - "epoch": 2.876846914005008, - "grad_norm": 0.0009258214849978685, - "learning_rate": 0.00019999591802734342, - "loss": 46.0, - "step": 37627 - }, - { - "epoch": 2.876923370988398, - "grad_norm": 0.00131578603759408, - "learning_rate": 0.00019999591781031395, - "loss": 46.0, - "step": 37628 - }, - { - "epoch": 2.876999827971787, - "grad_norm": 0.0014559427509084344, - "learning_rate": 0.0001999959175932787, - "loss": 46.0, - "step": 37629 - }, - { - "epoch": 2.877076284955177, - "grad_norm": 0.0011839630315080285, - "learning_rate": 0.00019999591737623765, - "loss": 46.0, - "step": 37630 - }, - { - "epoch": 2.8771527419385667, - "grad_norm": 0.009083331562578678, - "learning_rate": 0.0001999959171591909, - "loss": 46.0, - "step": 37631 - }, - { - "epoch": 2.8772291989219565, - "grad_norm": 0.0016284589655697346, - "learning_rate": 0.0001999959169421383, - "loss": 46.0, - "step": 37632 - }, - { - "epoch": 2.8773056559053463, - "grad_norm": 0.0025250306352972984, - "learning_rate": 0.00019999591672507995, - "loss": 46.0, - "step": 37633 - }, - { - "epoch": 2.877382112888736, - "grad_norm": 0.0042189378291368484, - "learning_rate": 0.00019999591650801587, - "loss": 46.0, - "step": 37634 - }, - { - "epoch": 2.877458569872126, - "grad_norm": 0.006225135177373886, - "learning_rate": 0.000199995916290946, - "loss": 46.0, - "step": 37635 - }, - { - "epoch": 2.8775350268555155, - "grad_norm": 0.0019970820285379887, - "learning_rate": 0.00019999591607387034, - "loss": 46.0, - "step": 37636 - }, - { - "epoch": 2.877611483838905, - "grad_norm": 0.0010138554498553276, - "learning_rate": 0.0001999959158567889, - "loss": 46.0, - "step": 37637 - }, - { - "epoch": 2.8776879408222946, - "grad_norm": 0.001222631661221385, - "learning_rate": 0.00019999591563970174, - "loss": 46.0, - "step": 37638 - }, - { - "epoch": 2.8777643978056844, - "grad_norm": 0.006695645861327648, - "learning_rate": 0.0001999959154226088, - "loss": 46.0, - "step": 37639 - }, - { - "epoch": 2.877840854789074, - "grad_norm": 0.002268946496769786, - "learning_rate": 0.00019999591520551008, - "loss": 46.0, - "step": 37640 - }, - { - "epoch": 2.877917311772464, - "grad_norm": 0.003085621166974306, - "learning_rate": 0.00019999591498840558, - "loss": 46.0, - "step": 37641 - }, - { - "epoch": 2.8779937687558537, - "grad_norm": 0.0026596535462886095, - "learning_rate": 0.00019999591477129535, - "loss": 46.0, - "step": 37642 - }, - { - "epoch": 2.8780702257392434, - "grad_norm": 0.002113314112648368, - "learning_rate": 0.0001999959145541793, - "loss": 46.0, - "step": 37643 - }, - { - "epoch": 2.878146682722633, - "grad_norm": 0.0016181261744350195, - "learning_rate": 0.00019999591433705752, - "loss": 46.0, - "step": 37644 - }, - { - "epoch": 2.878223139706023, - "grad_norm": 0.0011771114077419043, - "learning_rate": 0.00019999591411992994, - "loss": 46.0, - "step": 37645 - }, - { - "epoch": 2.8782995966894127, - "grad_norm": 0.0017877859063446522, - "learning_rate": 0.0001999959139027966, - "loss": 46.0, - "step": 37646 - }, - { - "epoch": 2.8783760536728025, - "grad_norm": 0.0009797796374186873, - "learning_rate": 0.0001999959136856575, - "loss": 46.0, - "step": 37647 - }, - { - "epoch": 2.8784525106561922, - "grad_norm": 0.002931591821834445, - "learning_rate": 0.00019999591346851262, - "loss": 46.0, - "step": 37648 - }, - { - "epoch": 2.878528967639582, - "grad_norm": 0.0029796601738780737, - "learning_rate": 0.00019999591325136197, - "loss": 46.0, - "step": 37649 - }, - { - "epoch": 2.8786054246229718, - "grad_norm": 0.0015961222816258669, - "learning_rate": 0.0001999959130342056, - "loss": 46.0, - "step": 37650 - }, - { - "epoch": 2.878681881606361, - "grad_norm": 0.0021464508026838303, - "learning_rate": 0.0001999959128170434, - "loss": 46.0, - "step": 37651 - }, - { - "epoch": 2.878758338589751, - "grad_norm": 0.003437078557908535, - "learning_rate": 0.00019999591259987546, - "loss": 46.0, - "step": 37652 - }, - { - "epoch": 2.8788347955731406, - "grad_norm": 0.002468088176101446, - "learning_rate": 0.0001999959123827017, - "loss": 46.0, - "step": 37653 - }, - { - "epoch": 2.8789112525565304, - "grad_norm": 0.003069260623306036, - "learning_rate": 0.00019999591216552225, - "loss": 46.0, - "step": 37654 - }, - { - "epoch": 2.87898770953992, - "grad_norm": 0.0027792982291430235, - "learning_rate": 0.000199995911948337, - "loss": 46.0, - "step": 37655 - }, - { - "epoch": 2.87906416652331, - "grad_norm": 0.0026610700879245996, - "learning_rate": 0.00019999591173114597, - "loss": 46.0, - "step": 37656 - }, - { - "epoch": 2.8791406235066996, - "grad_norm": 0.0013047675602138042, - "learning_rate": 0.00019999591151394916, - "loss": 46.0, - "step": 37657 - }, - { - "epoch": 2.8792170804900894, - "grad_norm": 0.0017877020873129368, - "learning_rate": 0.0001999959112967466, - "loss": 46.0, - "step": 37658 - }, - { - "epoch": 2.8792935374734787, - "grad_norm": 0.0022516376338899136, - "learning_rate": 0.00019999591107953827, - "loss": 46.0, - "step": 37659 - }, - { - "epoch": 2.8793699944568685, - "grad_norm": 0.0035500021185725927, - "learning_rate": 0.00019999591086232416, - "loss": 46.0, - "step": 37660 - }, - { - "epoch": 2.8794464514402582, - "grad_norm": 0.005809157621115446, - "learning_rate": 0.00019999591064510429, - "loss": 46.0, - "step": 37661 - }, - { - "epoch": 2.879522908423648, - "grad_norm": 0.0010444847866892815, - "learning_rate": 0.00019999591042787863, - "loss": 46.0, - "step": 37662 - }, - { - "epoch": 2.8795993654070378, - "grad_norm": 0.0030564642511308193, - "learning_rate": 0.00019999591021064724, - "loss": 46.0, - "step": 37663 - }, - { - "epoch": 2.8796758223904275, - "grad_norm": 0.000956640811637044, - "learning_rate": 0.00019999590999341004, - "loss": 46.0, - "step": 37664 - }, - { - "epoch": 2.8797522793738173, - "grad_norm": 0.0012026693439111114, - "learning_rate": 0.0001999959097761671, - "loss": 46.0, - "step": 37665 - }, - { - "epoch": 2.879828736357207, - "grad_norm": 0.008212125860154629, - "learning_rate": 0.00019999590955891838, - "loss": 46.0, - "step": 37666 - }, - { - "epoch": 2.879905193340597, - "grad_norm": 0.002911114599555731, - "learning_rate": 0.0001999959093416639, - "loss": 46.0, - "step": 37667 - }, - { - "epoch": 2.8799816503239866, - "grad_norm": 0.005301766097545624, - "learning_rate": 0.00019999590912440365, - "loss": 46.0, - "step": 37668 - }, - { - "epoch": 2.8800581073073763, - "grad_norm": 0.0011371088912710547, - "learning_rate": 0.0001999959089071376, - "loss": 46.0, - "step": 37669 - }, - { - "epoch": 2.880134564290766, - "grad_norm": 0.0015648382250219584, - "learning_rate": 0.00019999590868986582, - "loss": 46.0, - "step": 37670 - }, - { - "epoch": 2.880211021274156, - "grad_norm": 0.001931838458403945, - "learning_rate": 0.00019999590847258824, - "loss": 46.0, - "step": 37671 - }, - { - "epoch": 2.8802874782575456, - "grad_norm": 0.004580653738230467, - "learning_rate": 0.0001999959082553049, - "loss": 46.0, - "step": 37672 - }, - { - "epoch": 2.880363935240935, - "grad_norm": 0.0030425051227211952, - "learning_rate": 0.00019999590803801583, - "loss": 46.0, - "step": 37673 - }, - { - "epoch": 2.8804403922243247, - "grad_norm": 0.0010746218031272292, - "learning_rate": 0.00019999590782072093, - "loss": 46.0, - "step": 37674 - }, - { - "epoch": 2.8805168492077144, - "grad_norm": 0.002546555595472455, - "learning_rate": 0.0001999959076034203, - "loss": 46.0, - "step": 37675 - }, - { - "epoch": 2.880593306191104, - "grad_norm": 0.0022606172133237123, - "learning_rate": 0.00019999590738611388, - "loss": 46.0, - "step": 37676 - }, - { - "epoch": 2.880669763174494, - "grad_norm": 0.002499643713235855, - "learning_rate": 0.0001999959071688017, - "loss": 46.0, - "step": 37677 - }, - { - "epoch": 2.8807462201578837, - "grad_norm": 0.00644226698204875, - "learning_rate": 0.00019999590695148376, - "loss": 46.0, - "step": 37678 - }, - { - "epoch": 2.8808226771412735, - "grad_norm": 0.0022670149337500334, - "learning_rate": 0.00019999590673416004, - "loss": 46.0, - "step": 37679 - }, - { - "epoch": 2.8808991341246633, - "grad_norm": 0.0010460034245625138, - "learning_rate": 0.00019999590651683055, - "loss": 46.0, - "step": 37680 - }, - { - "epoch": 2.8809755911080526, - "grad_norm": 0.004056965932250023, - "learning_rate": 0.0001999959062994953, - "loss": 46.0, - "step": 37681 - }, - { - "epoch": 2.8810520480914423, - "grad_norm": 0.0026718040462583303, - "learning_rate": 0.00019999590608215425, - "loss": 46.0, - "step": 37682 - }, - { - "epoch": 2.881128505074832, - "grad_norm": 0.0020678224973380566, - "learning_rate": 0.00019999590586480747, - "loss": 46.0, - "step": 37683 - }, - { - "epoch": 2.881204962058222, - "grad_norm": 0.0012035979889333248, - "learning_rate": 0.0001999959056474549, - "loss": 46.0, - "step": 37684 - }, - { - "epoch": 2.8812814190416116, - "grad_norm": 0.004759133793413639, - "learning_rate": 0.00019999590543009655, - "loss": 46.0, - "step": 37685 - }, - { - "epoch": 2.8813578760250014, - "grad_norm": 0.0024073247332125902, - "learning_rate": 0.00019999590521273245, - "loss": 46.0, - "step": 37686 - }, - { - "epoch": 2.881434333008391, - "grad_norm": 0.0007022698991931975, - "learning_rate": 0.00019999590499536257, - "loss": 46.0, - "step": 37687 - }, - { - "epoch": 2.881510789991781, - "grad_norm": 0.0011111075291410089, - "learning_rate": 0.00019999590477798695, - "loss": 46.0, - "step": 37688 - }, - { - "epoch": 2.8815872469751707, - "grad_norm": 0.002804168965667486, - "learning_rate": 0.00019999590456060555, - "loss": 46.0, - "step": 37689 - }, - { - "epoch": 2.8816637039585604, - "grad_norm": 0.0006622043438255787, - "learning_rate": 0.00019999590434321835, - "loss": 46.0, - "step": 37690 - }, - { - "epoch": 2.88174016094195, - "grad_norm": 0.0013546813279390335, - "learning_rate": 0.0001999959041258254, - "loss": 46.0, - "step": 37691 - }, - { - "epoch": 2.88181661792534, - "grad_norm": 0.004689261317253113, - "learning_rate": 0.0001999959039084267, - "loss": 46.0, - "step": 37692 - }, - { - "epoch": 2.8818930749087297, - "grad_norm": 0.00396009162068367, - "learning_rate": 0.0001999959036910222, - "loss": 46.0, - "step": 37693 - }, - { - "epoch": 2.8819695318921195, - "grad_norm": 0.0012875613756477833, - "learning_rate": 0.00019999590347361194, - "loss": 46.0, - "step": 37694 - }, - { - "epoch": 2.882045988875509, - "grad_norm": 0.0009763381676748395, - "learning_rate": 0.0001999959032561959, - "loss": 46.0, - "step": 37695 - }, - { - "epoch": 2.8821224458588985, - "grad_norm": 0.0036177264992147684, - "learning_rate": 0.00019999590303877414, - "loss": 46.0, - "step": 37696 - }, - { - "epoch": 2.8821989028422883, - "grad_norm": 0.002841199981048703, - "learning_rate": 0.00019999590282134656, - "loss": 46.0, - "step": 37697 - }, - { - "epoch": 2.882275359825678, - "grad_norm": 0.0015320699894800782, - "learning_rate": 0.00019999590260391323, - "loss": 46.0, - "step": 37698 - }, - { - "epoch": 2.882351816809068, - "grad_norm": 0.0018972893012687564, - "learning_rate": 0.00019999590238647412, - "loss": 46.0, - "step": 37699 - }, - { - "epoch": 2.8824282737924576, - "grad_norm": 0.005009973421692848, - "learning_rate": 0.00019999590216902925, - "loss": 46.0, - "step": 37700 - }, - { - "epoch": 2.8825047307758473, - "grad_norm": 0.0012373363133519888, - "learning_rate": 0.0001999959019515786, - "loss": 46.0, - "step": 37701 - }, - { - "epoch": 2.882581187759237, - "grad_norm": 0.001896055880934, - "learning_rate": 0.0001999959017341222, - "loss": 46.0, - "step": 37702 - }, - { - "epoch": 2.8826576447426264, - "grad_norm": 0.0026955078355968, - "learning_rate": 0.00019999590151666, - "loss": 46.0, - "step": 37703 - }, - { - "epoch": 2.882734101726016, - "grad_norm": 0.001902555231936276, - "learning_rate": 0.00019999590129919206, - "loss": 46.0, - "step": 37704 - }, - { - "epoch": 2.882810558709406, - "grad_norm": 0.0016124321846291423, - "learning_rate": 0.00019999590108171834, - "loss": 46.0, - "step": 37705 - }, - { - "epoch": 2.8828870156927957, - "grad_norm": 0.002984241582453251, - "learning_rate": 0.00019999590086423888, - "loss": 46.0, - "step": 37706 - }, - { - "epoch": 2.8829634726761855, - "grad_norm": 0.0027690676506608725, - "learning_rate": 0.00019999590064675359, - "loss": 46.0, - "step": 37707 - }, - { - "epoch": 2.8830399296595752, - "grad_norm": 0.008967806585133076, - "learning_rate": 0.00019999590042926258, - "loss": 46.0, - "step": 37708 - }, - { - "epoch": 2.883116386642965, - "grad_norm": 0.001452215714380145, - "learning_rate": 0.00019999590021176577, - "loss": 46.0, - "step": 37709 - }, - { - "epoch": 2.8831928436263548, - "grad_norm": 0.0017394665628671646, - "learning_rate": 0.00019999589999426324, - "loss": 46.0, - "step": 37710 - }, - { - "epoch": 2.8832693006097445, - "grad_norm": 0.00102862564381212, - "learning_rate": 0.00019999589977675488, - "loss": 46.0, - "step": 37711 - }, - { - "epoch": 2.8833457575931343, - "grad_norm": 0.0009718839428387582, - "learning_rate": 0.00019999589955924078, - "loss": 46.0, - "step": 37712 - }, - { - "epoch": 2.883422214576524, - "grad_norm": 0.0034405116457492113, - "learning_rate": 0.0001999958993417209, - "loss": 46.0, - "step": 37713 - }, - { - "epoch": 2.883498671559914, - "grad_norm": 0.0022408231161534786, - "learning_rate": 0.00019999589912419525, - "loss": 46.0, - "step": 37714 - }, - { - "epoch": 2.8835751285433036, - "grad_norm": 0.002714663976803422, - "learning_rate": 0.00019999589890666382, - "loss": 46.0, - "step": 37715 - }, - { - "epoch": 2.8836515855266933, - "grad_norm": 0.0031120474450290203, - "learning_rate": 0.00019999589868912668, - "loss": 46.0, - "step": 37716 - }, - { - "epoch": 2.8837280425100826, - "grad_norm": 0.0015535581624135375, - "learning_rate": 0.00019999589847158374, - "loss": 46.0, - "step": 37717 - }, - { - "epoch": 2.8838044994934724, - "grad_norm": 0.0007003198261372745, - "learning_rate": 0.000199995898254035, - "loss": 46.0, - "step": 37718 - }, - { - "epoch": 2.883880956476862, - "grad_norm": 0.0026158755645155907, - "learning_rate": 0.00019999589803648053, - "loss": 46.0, - "step": 37719 - }, - { - "epoch": 2.883957413460252, - "grad_norm": 0.0011512355413287878, - "learning_rate": 0.00019999589781892024, - "loss": 46.0, - "step": 37720 - }, - { - "epoch": 2.8840338704436417, - "grad_norm": 0.0008972790092229843, - "learning_rate": 0.00019999589760135423, - "loss": 46.0, - "step": 37721 - }, - { - "epoch": 2.8841103274270314, - "grad_norm": 0.0018919005524367094, - "learning_rate": 0.00019999589738378245, - "loss": 46.0, - "step": 37722 - }, - { - "epoch": 2.884186784410421, - "grad_norm": 0.0018430177588015795, - "learning_rate": 0.00019999589716620486, - "loss": 46.0, - "step": 37723 - }, - { - "epoch": 2.884263241393811, - "grad_norm": 0.006337456405162811, - "learning_rate": 0.00019999589694862154, - "loss": 46.0, - "step": 37724 - }, - { - "epoch": 2.8843396983772003, - "grad_norm": 0.007867420092225075, - "learning_rate": 0.00019999589673103243, - "loss": 46.0, - "step": 37725 - }, - { - "epoch": 2.88441615536059, - "grad_norm": 0.00452502490952611, - "learning_rate": 0.00019999589651343756, - "loss": 46.0, - "step": 37726 - }, - { - "epoch": 2.88449261234398, - "grad_norm": 0.002269575372338295, - "learning_rate": 0.00019999589629583693, - "loss": 46.0, - "step": 37727 - }, - { - "epoch": 2.8845690693273696, - "grad_norm": 0.0023959744721651077, - "learning_rate": 0.00019999589607823054, - "loss": 46.0, - "step": 37728 - }, - { - "epoch": 2.8846455263107593, - "grad_norm": 0.0013814035337418318, - "learning_rate": 0.00019999589586061834, - "loss": 46.0, - "step": 37729 - }, - { - "epoch": 2.884721983294149, - "grad_norm": 0.0007447094540111721, - "learning_rate": 0.0001999958956430004, - "loss": 46.0, - "step": 37730 - }, - { - "epoch": 2.884798440277539, - "grad_norm": 0.0011113070650026202, - "learning_rate": 0.00019999589542537668, - "loss": 46.0, - "step": 37731 - }, - { - "epoch": 2.8848748972609286, - "grad_norm": 0.0012835300294682384, - "learning_rate": 0.00019999589520774716, - "loss": 46.0, - "step": 37732 - }, - { - "epoch": 2.8849513542443184, - "grad_norm": 0.0033381865359842777, - "learning_rate": 0.0001999958949901119, - "loss": 46.0, - "step": 37733 - }, - { - "epoch": 2.885027811227708, - "grad_norm": 0.0037939741741865873, - "learning_rate": 0.0001999958947724709, - "loss": 46.0, - "step": 37734 - }, - { - "epoch": 2.885104268211098, - "grad_norm": 0.0017376294126734138, - "learning_rate": 0.0001999958945548241, - "loss": 46.0, - "step": 37735 - }, - { - "epoch": 2.8851807251944876, - "grad_norm": 0.003165578003972769, - "learning_rate": 0.00019999589433717155, - "loss": 46.0, - "step": 37736 - }, - { - "epoch": 2.8852571821778774, - "grad_norm": 0.006535191088914871, - "learning_rate": 0.0001999958941195132, - "loss": 46.0, - "step": 37737 - }, - { - "epoch": 2.8853336391612667, - "grad_norm": 0.001833610818721354, - "learning_rate": 0.0001999958939018491, - "loss": 46.0, - "step": 37738 - }, - { - "epoch": 2.8854100961446565, - "grad_norm": 0.0008515738882124424, - "learning_rate": 0.00019999589368417922, - "loss": 46.0, - "step": 37739 - }, - { - "epoch": 2.8854865531280462, - "grad_norm": 0.0022605929989367723, - "learning_rate": 0.0001999958934665036, - "loss": 46.0, - "step": 37740 - }, - { - "epoch": 2.885563010111436, - "grad_norm": 0.001866824459284544, - "learning_rate": 0.00019999589324882217, - "loss": 46.0, - "step": 37741 - }, - { - "epoch": 2.8856394670948258, - "grad_norm": 0.0014304748037829995, - "learning_rate": 0.000199995893031135, - "loss": 46.0, - "step": 37742 - }, - { - "epoch": 2.8857159240782155, - "grad_norm": 0.001404367620125413, - "learning_rate": 0.00019999589281344203, - "loss": 46.0, - "step": 37743 - }, - { - "epoch": 2.8857923810616053, - "grad_norm": 0.010288607329130173, - "learning_rate": 0.00019999589259574334, - "loss": 46.0, - "step": 37744 - }, - { - "epoch": 2.885868838044995, - "grad_norm": 0.0036197949666529894, - "learning_rate": 0.00019999589237803885, - "loss": 46.0, - "step": 37745 - }, - { - "epoch": 2.885945295028385, - "grad_norm": 0.0028814945835620165, - "learning_rate": 0.0001999958921603286, - "loss": 46.0, - "step": 37746 - }, - { - "epoch": 2.886021752011774, - "grad_norm": 0.001936594839207828, - "learning_rate": 0.00019999589194261255, - "loss": 46.0, - "step": 37747 - }, - { - "epoch": 2.886098208995164, - "grad_norm": 0.002066792454570532, - "learning_rate": 0.00019999589172489074, - "loss": 46.0, - "step": 37748 - }, - { - "epoch": 2.8861746659785537, - "grad_norm": 0.002139067742973566, - "learning_rate": 0.0001999958915071632, - "loss": 46.0, - "step": 37749 - }, - { - "epoch": 2.8862511229619434, - "grad_norm": 0.003388867946341634, - "learning_rate": 0.0001999958912894299, - "loss": 46.0, - "step": 37750 - }, - { - "epoch": 2.886327579945333, - "grad_norm": 0.004089138004928827, - "learning_rate": 0.00019999589107169076, - "loss": 46.0, - "step": 37751 - }, - { - "epoch": 2.886404036928723, - "grad_norm": 0.0010730591602623463, - "learning_rate": 0.00019999589085394588, - "loss": 46.0, - "step": 37752 - }, - { - "epoch": 2.8864804939121127, - "grad_norm": 0.003603886580094695, - "learning_rate": 0.00019999589063619526, - "loss": 46.0, - "step": 37753 - }, - { - "epoch": 2.8865569508955025, - "grad_norm": 0.0019759265705943108, - "learning_rate": 0.00019999589041843884, - "loss": 46.0, - "step": 37754 - }, - { - "epoch": 2.886633407878892, - "grad_norm": 0.002615033881738782, - "learning_rate": 0.00019999589020067667, - "loss": 46.0, - "step": 37755 - }, - { - "epoch": 2.886709864862282, - "grad_norm": 0.0016685740556567907, - "learning_rate": 0.00019999588998290872, - "loss": 46.0, - "step": 37756 - }, - { - "epoch": 2.8867863218456717, - "grad_norm": 0.002141603035852313, - "learning_rate": 0.00019999588976513498, - "loss": 46.0, - "step": 37757 - }, - { - "epoch": 2.8868627788290615, - "grad_norm": 0.00145918398629874, - "learning_rate": 0.00019999588954735552, - "loss": 46.0, - "step": 37758 - }, - { - "epoch": 2.8869392358124513, - "grad_norm": 0.0019381509628146887, - "learning_rate": 0.00019999588932957026, - "loss": 46.0, - "step": 37759 - }, - { - "epoch": 2.8870156927958406, - "grad_norm": 0.0014604333555325866, - "learning_rate": 0.00019999588911177922, - "loss": 46.0, - "step": 37760 - }, - { - "epoch": 2.8870921497792303, - "grad_norm": 0.001315958914346993, - "learning_rate": 0.00019999588889398244, - "loss": 46.0, - "step": 37761 - }, - { - "epoch": 2.88716860676262, - "grad_norm": 0.0018504865001887083, - "learning_rate": 0.00019999588867617986, - "loss": 46.0, - "step": 37762 - }, - { - "epoch": 2.88724506374601, - "grad_norm": 0.0075893704779446125, - "learning_rate": 0.00019999588845837153, - "loss": 46.0, - "step": 37763 - }, - { - "epoch": 2.8873215207293996, - "grad_norm": 0.002132125198841095, - "learning_rate": 0.00019999588824055743, - "loss": 46.0, - "step": 37764 - }, - { - "epoch": 2.8873979777127894, - "grad_norm": 0.003499071579426527, - "learning_rate": 0.00019999588802273758, - "loss": 46.0, - "step": 37765 - }, - { - "epoch": 2.887474434696179, - "grad_norm": 0.012519373558461666, - "learning_rate": 0.00019999588780491193, - "loss": 46.0, - "step": 37766 - }, - { - "epoch": 2.887550891679569, - "grad_norm": 0.0013570141745731235, - "learning_rate": 0.00019999588758708053, - "loss": 46.0, - "step": 37767 - }, - { - "epoch": 2.8876273486629582, - "grad_norm": 0.012234872207045555, - "learning_rate": 0.00019999588736924334, - "loss": 46.0, - "step": 37768 - }, - { - "epoch": 2.887703805646348, - "grad_norm": 0.0023281071335077286, - "learning_rate": 0.0001999958871514004, - "loss": 46.0, - "step": 37769 - }, - { - "epoch": 2.8877802626297377, - "grad_norm": 0.0019528227858245373, - "learning_rate": 0.00019999588693355168, - "loss": 46.0, - "step": 37770 - }, - { - "epoch": 2.8878567196131275, - "grad_norm": 0.0017250468954443932, - "learning_rate": 0.0001999958867156972, - "loss": 46.0, - "step": 37771 - }, - { - "epoch": 2.8879331765965173, - "grad_norm": 0.004837709944695234, - "learning_rate": 0.0001999958864978369, - "loss": 46.0, - "step": 37772 - }, - { - "epoch": 2.888009633579907, - "grad_norm": 0.0012939043808728456, - "learning_rate": 0.0001999958862799709, - "loss": 46.0, - "step": 37773 - }, - { - "epoch": 2.888086090563297, - "grad_norm": 0.001408212585374713, - "learning_rate": 0.00019999588606209908, - "loss": 46.0, - "step": 37774 - }, - { - "epoch": 2.8881625475466866, - "grad_norm": 0.002098650438711047, - "learning_rate": 0.00019999588584422153, - "loss": 46.0, - "step": 37775 - }, - { - "epoch": 2.8882390045300763, - "grad_norm": 0.0009064716869033873, - "learning_rate": 0.0001999958856263382, - "loss": 46.0, - "step": 37776 - }, - { - "epoch": 2.888315461513466, - "grad_norm": 0.0009372316999360919, - "learning_rate": 0.00019999588540844913, - "loss": 46.0, - "step": 37777 - }, - { - "epoch": 2.888391918496856, - "grad_norm": 0.007355047389864922, - "learning_rate": 0.00019999588519055422, - "loss": 46.0, - "step": 37778 - }, - { - "epoch": 2.8884683754802456, - "grad_norm": 0.002255040919408202, - "learning_rate": 0.00019999588497265357, - "loss": 46.0, - "step": 37779 - }, - { - "epoch": 2.8885448324636354, - "grad_norm": 0.0029307508375495672, - "learning_rate": 0.00019999588475474718, - "loss": 46.0, - "step": 37780 - }, - { - "epoch": 2.888621289447025, - "grad_norm": 0.0066835288889706135, - "learning_rate": 0.000199995884536835, - "loss": 46.0, - "step": 37781 - }, - { - "epoch": 2.8886977464304144, - "grad_norm": 0.004918511491268873, - "learning_rate": 0.00019999588431891704, - "loss": 46.0, - "step": 37782 - }, - { - "epoch": 2.888774203413804, - "grad_norm": 0.0014031730825081468, - "learning_rate": 0.00019999588410099333, - "loss": 46.0, - "step": 37783 - }, - { - "epoch": 2.888850660397194, - "grad_norm": 0.0015238840132951736, - "learning_rate": 0.00019999588388306384, - "loss": 46.0, - "step": 37784 - }, - { - "epoch": 2.8889271173805837, - "grad_norm": 0.0018328707665205002, - "learning_rate": 0.0001999958836651286, - "loss": 46.0, - "step": 37785 - }, - { - "epoch": 2.8890035743639735, - "grad_norm": 0.005886287894099951, - "learning_rate": 0.00019999588344718757, - "loss": 46.0, - "step": 37786 - }, - { - "epoch": 2.8890800313473632, - "grad_norm": 0.002663286868482828, - "learning_rate": 0.0001999958832292408, - "loss": 46.0, - "step": 37787 - }, - { - "epoch": 2.889156488330753, - "grad_norm": 0.006685193628072739, - "learning_rate": 0.0001999958830112882, - "loss": 46.0, - "step": 37788 - }, - { - "epoch": 2.8892329453141428, - "grad_norm": 0.003558224765583873, - "learning_rate": 0.00019999588279332988, - "loss": 46.0, - "step": 37789 - }, - { - "epoch": 2.889309402297532, - "grad_norm": 0.0012884949101135135, - "learning_rate": 0.0001999958825753658, - "loss": 46.0, - "step": 37790 - }, - { - "epoch": 2.889385859280922, - "grad_norm": 0.0031954103615134954, - "learning_rate": 0.0001999958823573959, - "loss": 46.0, - "step": 37791 - }, - { - "epoch": 2.8894623162643116, - "grad_norm": 0.0008383719832636416, - "learning_rate": 0.00019999588213942025, - "loss": 46.0, - "step": 37792 - }, - { - "epoch": 2.8895387732477014, - "grad_norm": 0.009652224369347095, - "learning_rate": 0.00019999588192143886, - "loss": 46.0, - "step": 37793 - }, - { - "epoch": 2.889615230231091, - "grad_norm": 0.0019310020143166184, - "learning_rate": 0.00019999588170345166, - "loss": 46.0, - "step": 37794 - }, - { - "epoch": 2.889691687214481, - "grad_norm": 0.0010015071602538228, - "learning_rate": 0.00019999588148545875, - "loss": 46.0, - "step": 37795 - }, - { - "epoch": 2.8897681441978706, - "grad_norm": 0.0035619321279227734, - "learning_rate": 0.00019999588126746, - "loss": 46.0, - "step": 37796 - }, - { - "epoch": 2.8898446011812604, - "grad_norm": 0.005210161209106445, - "learning_rate": 0.00019999588104945552, - "loss": 46.0, - "step": 37797 - }, - { - "epoch": 2.88992105816465, - "grad_norm": 0.0010614673374220729, - "learning_rate": 0.00019999588083144528, - "loss": 46.0, - "step": 37798 - }, - { - "epoch": 2.88999751514804, - "grad_norm": 0.0012027255725115538, - "learning_rate": 0.00019999588061342925, - "loss": 46.0, - "step": 37799 - }, - { - "epoch": 2.8900739721314297, - "grad_norm": 0.0021582175977528095, - "learning_rate": 0.00019999588039540744, - "loss": 46.0, - "step": 37800 - }, - { - "epoch": 2.8901504291148195, - "grad_norm": 0.0008120203274302185, - "learning_rate": 0.00019999588017737989, - "loss": 46.0, - "step": 37801 - }, - { - "epoch": 2.890226886098209, - "grad_norm": 0.007342230994254351, - "learning_rate": 0.00019999587995934653, - "loss": 46.0, - "step": 37802 - }, - { - "epoch": 2.890303343081599, - "grad_norm": 0.004231353290379047, - "learning_rate": 0.00019999587974130746, - "loss": 46.0, - "step": 37803 - }, - { - "epoch": 2.8903798000649883, - "grad_norm": 0.0017804204253479838, - "learning_rate": 0.00019999587952326258, - "loss": 46.0, - "step": 37804 - }, - { - "epoch": 2.890456257048378, - "grad_norm": 0.004549276549369097, - "learning_rate": 0.00019999587930521193, - "loss": 46.0, - "step": 37805 - }, - { - "epoch": 2.890532714031768, - "grad_norm": 0.0044477032497525215, - "learning_rate": 0.00019999587908715554, - "loss": 46.0, - "step": 37806 - }, - { - "epoch": 2.8906091710151576, - "grad_norm": 0.0025124498642981052, - "learning_rate": 0.00019999587886909334, - "loss": 46.0, - "step": 37807 - }, - { - "epoch": 2.8906856279985473, - "grad_norm": 0.002882106462493539, - "learning_rate": 0.0001999958786510254, - "loss": 46.0, - "step": 37808 - }, - { - "epoch": 2.890762084981937, - "grad_norm": 0.002985223662108183, - "learning_rate": 0.0001999958784329517, - "loss": 46.0, - "step": 37809 - }, - { - "epoch": 2.890838541965327, - "grad_norm": 0.0028906241059303284, - "learning_rate": 0.0001999958782148722, - "loss": 46.0, - "step": 37810 - }, - { - "epoch": 2.8909149989487166, - "grad_norm": 0.003020563628524542, - "learning_rate": 0.00019999587799678694, - "loss": 46.0, - "step": 37811 - }, - { - "epoch": 2.890991455932106, - "grad_norm": 0.0023545341100543737, - "learning_rate": 0.00019999587777869593, - "loss": 46.0, - "step": 37812 - }, - { - "epoch": 2.8910679129154957, - "grad_norm": 0.0009672415908426046, - "learning_rate": 0.00019999587756059912, - "loss": 46.0, - "step": 37813 - }, - { - "epoch": 2.8911443698988855, - "grad_norm": 0.0019381796009838581, - "learning_rate": 0.00019999587734249657, - "loss": 46.0, - "step": 37814 - }, - { - "epoch": 2.891220826882275, - "grad_norm": 0.003142933128401637, - "learning_rate": 0.00019999587712438822, - "loss": 46.0, - "step": 37815 - }, - { - "epoch": 2.891297283865665, - "grad_norm": 0.001372526166960597, - "learning_rate": 0.00019999587690627411, - "loss": 46.0, - "step": 37816 - }, - { - "epoch": 2.8913737408490547, - "grad_norm": 0.0015170314582064748, - "learning_rate": 0.00019999587668815424, - "loss": 46.0, - "step": 37817 - }, - { - "epoch": 2.8914501978324445, - "grad_norm": 0.000829146767500788, - "learning_rate": 0.00019999587647002862, - "loss": 46.0, - "step": 37818 - }, - { - "epoch": 2.8915266548158343, - "grad_norm": 0.0006197000038810074, - "learning_rate": 0.0001999958762518972, - "loss": 46.0, - "step": 37819 - }, - { - "epoch": 2.891603111799224, - "grad_norm": 0.003329264698550105, - "learning_rate": 0.00019999587603376003, - "loss": 46.0, - "step": 37820 - }, - { - "epoch": 2.891679568782614, - "grad_norm": 0.0018131479155272245, - "learning_rate": 0.00019999587581561706, - "loss": 46.0, - "step": 37821 - }, - { - "epoch": 2.8917560257660035, - "grad_norm": 0.003074378240853548, - "learning_rate": 0.00019999587559746838, - "loss": 46.0, - "step": 37822 - }, - { - "epoch": 2.8918324827493933, - "grad_norm": 0.004379425197839737, - "learning_rate": 0.00019999587537931386, - "loss": 46.0, - "step": 37823 - }, - { - "epoch": 2.891908939732783, - "grad_norm": 0.0022632989566773176, - "learning_rate": 0.0001999958751611536, - "loss": 46.0, - "step": 37824 - }, - { - "epoch": 2.891985396716173, - "grad_norm": 0.0033311517909169197, - "learning_rate": 0.0001999958749429876, - "loss": 46.0, - "step": 37825 - }, - { - "epoch": 2.892061853699562, - "grad_norm": 0.0018971188692376018, - "learning_rate": 0.0001999958747248158, - "loss": 46.0, - "step": 37826 - }, - { - "epoch": 2.892138310682952, - "grad_norm": 0.0021489844657480717, - "learning_rate": 0.00019999587450663823, - "loss": 46.0, - "step": 37827 - }, - { - "epoch": 2.8922147676663417, - "grad_norm": 0.0018439419800415635, - "learning_rate": 0.0001999958742884549, - "loss": 46.0, - "step": 37828 - }, - { - "epoch": 2.8922912246497314, - "grad_norm": 0.0034569138661026955, - "learning_rate": 0.00019999587407026578, - "loss": 46.0, - "step": 37829 - }, - { - "epoch": 2.892367681633121, - "grad_norm": 0.005139507353305817, - "learning_rate": 0.00019999587385207096, - "loss": 46.0, - "step": 37830 - }, - { - "epoch": 2.892444138616511, - "grad_norm": 0.0027904980815947056, - "learning_rate": 0.00019999587363387028, - "loss": 46.0, - "step": 37831 - }, - { - "epoch": 2.8925205955999007, - "grad_norm": 0.0033358149230480194, - "learning_rate": 0.0001999958734156639, - "loss": 46.0, - "step": 37832 - }, - { - "epoch": 2.8925970525832905, - "grad_norm": 0.0018645493546500802, - "learning_rate": 0.0001999958731974517, - "loss": 46.0, - "step": 37833 - }, - { - "epoch": 2.89267350956668, - "grad_norm": 0.0033399993553757668, - "learning_rate": 0.00019999587297923375, - "loss": 46.0, - "step": 37834 - }, - { - "epoch": 2.8927499665500696, - "grad_norm": 0.004222318064421415, - "learning_rate": 0.00019999587276101004, - "loss": 46.0, - "step": 37835 - }, - { - "epoch": 2.8928264235334593, - "grad_norm": 0.0015192920109257102, - "learning_rate": 0.00019999587254278053, - "loss": 46.0, - "step": 37836 - }, - { - "epoch": 2.892902880516849, - "grad_norm": 0.005047188140451908, - "learning_rate": 0.0001999958723245453, - "loss": 46.0, - "step": 37837 - }, - { - "epoch": 2.892979337500239, - "grad_norm": 0.0034721423871815205, - "learning_rate": 0.00019999587210630426, - "loss": 46.0, - "step": 37838 - }, - { - "epoch": 2.8930557944836286, - "grad_norm": 0.005029464140534401, - "learning_rate": 0.00019999587188805748, - "loss": 46.0, - "step": 37839 - }, - { - "epoch": 2.8931322514670184, - "grad_norm": 0.0020497071091085672, - "learning_rate": 0.00019999587166980492, - "loss": 46.0, - "step": 37840 - }, - { - "epoch": 2.893208708450408, - "grad_norm": 0.006000366061925888, - "learning_rate": 0.0001999958714515466, - "loss": 46.0, - "step": 37841 - }, - { - "epoch": 2.893285165433798, - "grad_norm": 0.0014674197882413864, - "learning_rate": 0.00019999587123328247, - "loss": 46.0, - "step": 37842 - }, - { - "epoch": 2.8933616224171876, - "grad_norm": 0.0005276089068502188, - "learning_rate": 0.00019999587101501262, - "loss": 46.0, - "step": 37843 - }, - { - "epoch": 2.8934380794005774, - "grad_norm": 0.0016725932946428657, - "learning_rate": 0.00019999587079673695, - "loss": 46.0, - "step": 37844 - }, - { - "epoch": 2.893514536383967, - "grad_norm": 0.0006369533366523683, - "learning_rate": 0.00019999587057845556, - "loss": 46.0, - "step": 37845 - }, - { - "epoch": 2.893590993367357, - "grad_norm": 0.0029901338275521994, - "learning_rate": 0.0001999958703601684, - "loss": 46.0, - "step": 37846 - }, - { - "epoch": 2.8936674503507467, - "grad_norm": 0.005535561591386795, - "learning_rate": 0.00019999587014187545, - "loss": 46.0, - "step": 37847 - }, - { - "epoch": 2.893743907334136, - "grad_norm": 0.001583735691383481, - "learning_rate": 0.0001999958699235767, - "loss": 46.0, - "step": 37848 - }, - { - "epoch": 2.8938203643175258, - "grad_norm": 0.005722708534449339, - "learning_rate": 0.00019999586970527222, - "loss": 46.0, - "step": 37849 - }, - { - "epoch": 2.8938968213009155, - "grad_norm": 0.0035355426371097565, - "learning_rate": 0.000199995869486962, - "loss": 46.0, - "step": 37850 - }, - { - "epoch": 2.8939732782843053, - "grad_norm": 0.013533324934542179, - "learning_rate": 0.00019999586926864595, - "loss": 46.0, - "step": 37851 - }, - { - "epoch": 2.894049735267695, - "grad_norm": 0.0013722516596317291, - "learning_rate": 0.00019999586905032417, - "loss": 46.0, - "step": 37852 - }, - { - "epoch": 2.894126192251085, - "grad_norm": 0.005028895568102598, - "learning_rate": 0.0001999958688319966, - "loss": 46.0, - "step": 37853 - }, - { - "epoch": 2.8942026492344746, - "grad_norm": 0.004545052070170641, - "learning_rate": 0.0001999958686136633, - "loss": 46.0, - "step": 37854 - }, - { - "epoch": 2.8942791062178643, - "grad_norm": 0.0012598909670487046, - "learning_rate": 0.00019999586839532417, - "loss": 46.0, - "step": 37855 - }, - { - "epoch": 2.8943555632012536, - "grad_norm": 0.0005189222865737975, - "learning_rate": 0.0001999958681769793, - "loss": 46.0, - "step": 37856 - }, - { - "epoch": 2.8944320201846434, - "grad_norm": 0.0014284258941188455, - "learning_rate": 0.00019999586795862865, - "loss": 46.0, - "step": 37857 - }, - { - "epoch": 2.894508477168033, - "grad_norm": 0.0020104290451854467, - "learning_rate": 0.00019999586774027223, - "loss": 46.0, - "step": 37858 - }, - { - "epoch": 2.894584934151423, - "grad_norm": 0.0006753055495209992, - "learning_rate": 0.00019999586752191006, - "loss": 46.0, - "step": 37859 - }, - { - "epoch": 2.8946613911348127, - "grad_norm": 0.0008774510934017599, - "learning_rate": 0.00019999586730354212, - "loss": 46.0, - "step": 37860 - }, - { - "epoch": 2.8947378481182025, - "grad_norm": 0.0029588660690933466, - "learning_rate": 0.0001999958670851684, - "loss": 46.0, - "step": 37861 - }, - { - "epoch": 2.894814305101592, - "grad_norm": 0.0017169112106785178, - "learning_rate": 0.0001999958668667889, - "loss": 46.0, - "step": 37862 - }, - { - "epoch": 2.894890762084982, - "grad_norm": 0.001216494245454669, - "learning_rate": 0.00019999586664840366, - "loss": 46.0, - "step": 37863 - }, - { - "epoch": 2.8949672190683717, - "grad_norm": 0.0003851375658996403, - "learning_rate": 0.00019999586643001265, - "loss": 46.0, - "step": 37864 - }, - { - "epoch": 2.8950436760517615, - "grad_norm": 0.0019412519177421927, - "learning_rate": 0.00019999586621161582, - "loss": 46.0, - "step": 37865 - }, - { - "epoch": 2.8951201330351513, - "grad_norm": 0.0020202603191137314, - "learning_rate": 0.0001999958659932133, - "loss": 46.0, - "step": 37866 - }, - { - "epoch": 2.895196590018541, - "grad_norm": 0.0009198561892844737, - "learning_rate": 0.00019999586577480497, - "loss": 46.0, - "step": 37867 - }, - { - "epoch": 2.8952730470019308, - "grad_norm": 0.0039591402746737, - "learning_rate": 0.00019999586555639084, - "loss": 46.0, - "step": 37868 - }, - { - "epoch": 2.89534950398532, - "grad_norm": 0.002076217206194997, - "learning_rate": 0.000199995865337971, - "loss": 46.0, - "step": 37869 - }, - { - "epoch": 2.89542596096871, - "grad_norm": 0.0032474645413458347, - "learning_rate": 0.00019999586511954535, - "loss": 46.0, - "step": 37870 - }, - { - "epoch": 2.8955024179520996, - "grad_norm": 0.0005519369151443243, - "learning_rate": 0.00019999586490111396, - "loss": 46.0, - "step": 37871 - }, - { - "epoch": 2.8955788749354894, - "grad_norm": 0.0035198063123971224, - "learning_rate": 0.00019999586468267676, - "loss": 46.0, - "step": 37872 - }, - { - "epoch": 2.895655331918879, - "grad_norm": 0.004100221209228039, - "learning_rate": 0.00019999586446423382, - "loss": 46.0, - "step": 37873 - }, - { - "epoch": 2.895731788902269, - "grad_norm": 0.0019092346774414182, - "learning_rate": 0.00019999586424578508, - "loss": 46.0, - "step": 37874 - }, - { - "epoch": 2.8958082458856587, - "grad_norm": 0.0016025708755478263, - "learning_rate": 0.00019999586402733062, - "loss": 46.0, - "step": 37875 - }, - { - "epoch": 2.8958847028690484, - "grad_norm": 0.0012079612351953983, - "learning_rate": 0.00019999586380887037, - "loss": 46.0, - "step": 37876 - }, - { - "epoch": 2.895961159852438, - "grad_norm": 0.0009891492081806064, - "learning_rate": 0.00019999586359040433, - "loss": 46.0, - "step": 37877 - }, - { - "epoch": 2.8960376168358275, - "grad_norm": 0.0038088688161224127, - "learning_rate": 0.00019999586337193255, - "loss": 46.0, - "step": 37878 - }, - { - "epoch": 2.8961140738192173, - "grad_norm": 0.0020113675855100155, - "learning_rate": 0.00019999586315345497, - "loss": 46.0, - "step": 37879 - }, - { - "epoch": 2.896190530802607, - "grad_norm": 0.0032350497785955667, - "learning_rate": 0.00019999586293497165, - "loss": 46.0, - "step": 37880 - }, - { - "epoch": 2.896266987785997, - "grad_norm": 0.0036726954858750105, - "learning_rate": 0.00019999586271648255, - "loss": 46.0, - "step": 37881 - }, - { - "epoch": 2.8963434447693865, - "grad_norm": 0.005590291228145361, - "learning_rate": 0.00019999586249798767, - "loss": 46.0, - "step": 37882 - }, - { - "epoch": 2.8964199017527763, - "grad_norm": 0.0034286403097212315, - "learning_rate": 0.00019999586227948703, - "loss": 46.0, - "step": 37883 - }, - { - "epoch": 2.896496358736166, - "grad_norm": 0.002362036146223545, - "learning_rate": 0.0001999958620609806, - "loss": 46.0, - "step": 37884 - }, - { - "epoch": 2.896572815719556, - "grad_norm": 0.0029984493739902973, - "learning_rate": 0.00019999586184246844, - "loss": 46.0, - "step": 37885 - }, - { - "epoch": 2.8966492727029456, - "grad_norm": 0.002623130800202489, - "learning_rate": 0.0001999958616239505, - "loss": 46.0, - "step": 37886 - }, - { - "epoch": 2.8967257296863353, - "grad_norm": 0.0006159718031994998, - "learning_rate": 0.0001999958614054268, - "loss": 46.0, - "step": 37887 - }, - { - "epoch": 2.896802186669725, - "grad_norm": 0.005858476273715496, - "learning_rate": 0.0001999958611868973, - "loss": 46.0, - "step": 37888 - }, - { - "epoch": 2.896878643653115, - "grad_norm": 0.003088385798037052, - "learning_rate": 0.00019999586096836205, - "loss": 46.0, - "step": 37889 - }, - { - "epoch": 2.8969551006365046, - "grad_norm": 0.0019597543869167566, - "learning_rate": 0.000199995860749821, - "loss": 46.0, - "step": 37890 - }, - { - "epoch": 2.897031557619894, - "grad_norm": 0.0020084744319319725, - "learning_rate": 0.0001999958605312742, - "loss": 46.0, - "step": 37891 - }, - { - "epoch": 2.8971080146032837, - "grad_norm": 0.0012871321523562074, - "learning_rate": 0.00019999586031272165, - "loss": 46.0, - "step": 37892 - }, - { - "epoch": 2.8971844715866735, - "grad_norm": 0.001935808570124209, - "learning_rate": 0.00019999586009416333, - "loss": 46.0, - "step": 37893 - }, - { - "epoch": 2.8972609285700632, - "grad_norm": 0.003490347880870104, - "learning_rate": 0.00019999585987559923, - "loss": 46.0, - "step": 37894 - }, - { - "epoch": 2.897337385553453, - "grad_norm": 0.0012122029438614845, - "learning_rate": 0.00019999585965702936, - "loss": 46.0, - "step": 37895 - }, - { - "epoch": 2.8974138425368428, - "grad_norm": 0.000973303453065455, - "learning_rate": 0.0001999958594384537, - "loss": 46.0, - "step": 37896 - }, - { - "epoch": 2.8974902995202325, - "grad_norm": 0.0017656109994277358, - "learning_rate": 0.00019999585921987232, - "loss": 46.0, - "step": 37897 - }, - { - "epoch": 2.8975667565036223, - "grad_norm": 0.004450140055269003, - "learning_rate": 0.00019999585900128515, - "loss": 46.0, - "step": 37898 - }, - { - "epoch": 2.8976432134870116, - "grad_norm": 0.004468852654099464, - "learning_rate": 0.0001999958587826922, - "loss": 46.0, - "step": 37899 - }, - { - "epoch": 2.8977196704704014, - "grad_norm": 0.0026200194843113422, - "learning_rate": 0.00019999585856409348, - "loss": 46.0, - "step": 37900 - }, - { - "epoch": 2.897796127453791, - "grad_norm": 0.0022780667059123516, - "learning_rate": 0.000199995858345489, - "loss": 46.0, - "step": 37901 - }, - { - "epoch": 2.897872584437181, - "grad_norm": 0.00370429758913815, - "learning_rate": 0.00019999585812687873, - "loss": 46.0, - "step": 37902 - }, - { - "epoch": 2.8979490414205706, - "grad_norm": 0.0027000715490430593, - "learning_rate": 0.00019999585790826273, - "loss": 46.0, - "step": 37903 - }, - { - "epoch": 2.8980254984039604, - "grad_norm": 0.0034705905709415674, - "learning_rate": 0.00019999585768964092, - "loss": 46.0, - "step": 37904 - }, - { - "epoch": 2.89810195538735, - "grad_norm": 0.0009451345540583134, - "learning_rate": 0.00019999585747101337, - "loss": 46.0, - "step": 37905 - }, - { - "epoch": 2.89817841237074, - "grad_norm": 0.0009521275642327964, - "learning_rate": 0.00019999585725238004, - "loss": 46.0, - "step": 37906 - }, - { - "epoch": 2.8982548693541297, - "grad_norm": 0.0012806823942810297, - "learning_rate": 0.00019999585703374092, - "loss": 46.0, - "step": 37907 - }, - { - "epoch": 2.8983313263375194, - "grad_norm": 0.0031860142480582, - "learning_rate": 0.00019999585681509604, - "loss": 46.0, - "step": 37908 - }, - { - "epoch": 2.898407783320909, - "grad_norm": 0.002116659888997674, - "learning_rate": 0.00019999585659644543, - "loss": 46.0, - "step": 37909 - }, - { - "epoch": 2.898484240304299, - "grad_norm": 0.004699204117059708, - "learning_rate": 0.000199995856377789, - "loss": 46.0, - "step": 37910 - }, - { - "epoch": 2.8985606972876887, - "grad_norm": 0.005540462210774422, - "learning_rate": 0.00019999585615912684, - "loss": 46.0, - "step": 37911 - }, - { - "epoch": 2.8986371542710785, - "grad_norm": 0.003695718478411436, - "learning_rate": 0.00019999585594045888, - "loss": 46.0, - "step": 37912 - }, - { - "epoch": 2.898713611254468, - "grad_norm": 0.0018116777064278722, - "learning_rate": 0.0001999958557217852, - "loss": 46.0, - "step": 37913 - }, - { - "epoch": 2.8987900682378576, - "grad_norm": 0.001851273002102971, - "learning_rate": 0.00019999585550310568, - "loss": 46.0, - "step": 37914 - }, - { - "epoch": 2.8988665252212473, - "grad_norm": 0.0021140240132808685, - "learning_rate": 0.00019999585528442042, - "loss": 46.0, - "step": 37915 - }, - { - "epoch": 2.898942982204637, - "grad_norm": 0.002079523866996169, - "learning_rate": 0.0001999958550657294, - "loss": 46.0, - "step": 37916 - }, - { - "epoch": 2.899019439188027, - "grad_norm": 0.0014937804080545902, - "learning_rate": 0.0001999958548470326, - "loss": 46.0, - "step": 37917 - }, - { - "epoch": 2.8990958961714166, - "grad_norm": 0.0020791683346033096, - "learning_rate": 0.00019999585462833006, - "loss": 46.0, - "step": 37918 - }, - { - "epoch": 2.8991723531548064, - "grad_norm": 0.0009234077297151089, - "learning_rate": 0.00019999585440962173, - "loss": 46.0, - "step": 37919 - }, - { - "epoch": 2.899248810138196, - "grad_norm": 0.002561927540227771, - "learning_rate": 0.00019999585419090763, - "loss": 46.0, - "step": 37920 - }, - { - "epoch": 2.8993252671215854, - "grad_norm": 0.0007290031062439084, - "learning_rate": 0.00019999585397218774, - "loss": 46.0, - "step": 37921 - }, - { - "epoch": 2.899401724104975, - "grad_norm": 0.002084975130856037, - "learning_rate": 0.00019999585375346212, - "loss": 46.0, - "step": 37922 - }, - { - "epoch": 2.899478181088365, - "grad_norm": 0.001207304303534329, - "learning_rate": 0.00019999585353473073, - "loss": 46.0, - "step": 37923 - }, - { - "epoch": 2.8995546380717547, - "grad_norm": 0.0008326134993694723, - "learning_rate": 0.00019999585331599353, - "loss": 46.0, - "step": 37924 - }, - { - "epoch": 2.8996310950551445, - "grad_norm": 0.0008646500064060092, - "learning_rate": 0.0001999958530972506, - "loss": 46.0, - "step": 37925 - }, - { - "epoch": 2.8997075520385343, - "grad_norm": 0.004952006042003632, - "learning_rate": 0.00019999585287850188, - "loss": 46.0, - "step": 37926 - }, - { - "epoch": 2.899784009021924, - "grad_norm": 0.0017778363544493914, - "learning_rate": 0.00019999585265974737, - "loss": 46.0, - "step": 37927 - }, - { - "epoch": 2.8998604660053138, - "grad_norm": 0.004038863815367222, - "learning_rate": 0.00019999585244098714, - "loss": 46.0, - "step": 37928 - }, - { - "epoch": 2.8999369229887035, - "grad_norm": 0.007513077463954687, - "learning_rate": 0.00019999585222222114, - "loss": 46.0, - "step": 37929 - }, - { - "epoch": 2.9000133799720933, - "grad_norm": 0.001991919009014964, - "learning_rate": 0.00019999585200344933, - "loss": 46.0, - "step": 37930 - }, - { - "epoch": 2.900089836955483, - "grad_norm": 0.00294917868450284, - "learning_rate": 0.00019999585178467175, - "loss": 46.0, - "step": 37931 - }, - { - "epoch": 2.900166293938873, - "grad_norm": 0.0005399100482463837, - "learning_rate": 0.00019999585156588843, - "loss": 46.0, - "step": 37932 - }, - { - "epoch": 2.9002427509222626, - "grad_norm": 0.0032314506825059652, - "learning_rate": 0.00019999585134709933, - "loss": 46.0, - "step": 37933 - }, - { - "epoch": 2.9003192079056523, - "grad_norm": 0.0019096112810075283, - "learning_rate": 0.00019999585112830446, - "loss": 46.0, - "step": 37934 - }, - { - "epoch": 2.9003956648890417, - "grad_norm": 0.0043441541492938995, - "learning_rate": 0.0001999958509095038, - "loss": 46.0, - "step": 37935 - }, - { - "epoch": 2.9004721218724314, - "grad_norm": 0.0035393706057220697, - "learning_rate": 0.00019999585069069742, - "loss": 46.0, - "step": 37936 - }, - { - "epoch": 2.900548578855821, - "grad_norm": 0.001900298404507339, - "learning_rate": 0.00019999585047188526, - "loss": 46.0, - "step": 37937 - }, - { - "epoch": 2.900625035839211, - "grad_norm": 0.002038979437202215, - "learning_rate": 0.0001999958502530673, - "loss": 46.0, - "step": 37938 - }, - { - "epoch": 2.9007014928226007, - "grad_norm": 0.0027250300627201796, - "learning_rate": 0.00019999585003424358, - "loss": 46.0, - "step": 37939 - }, - { - "epoch": 2.9007779498059905, - "grad_norm": 0.0017409016145393252, - "learning_rate": 0.0001999958498154141, - "loss": 46.0, - "step": 37940 - }, - { - "epoch": 2.9008544067893802, - "grad_norm": 0.004930721130222082, - "learning_rate": 0.00019999584959657884, - "loss": 46.0, - "step": 37941 - }, - { - "epoch": 2.90093086377277, - "grad_norm": 0.0018574399873614311, - "learning_rate": 0.0001999958493777378, - "loss": 46.0, - "step": 37942 - }, - { - "epoch": 2.9010073207561593, - "grad_norm": 0.005130739416927099, - "learning_rate": 0.00019999584915889103, - "loss": 46.0, - "step": 37943 - }, - { - "epoch": 2.901083777739549, - "grad_norm": 0.000941133766900748, - "learning_rate": 0.00019999584894003848, - "loss": 46.0, - "step": 37944 - }, - { - "epoch": 2.901160234722939, - "grad_norm": 0.0024534950498491526, - "learning_rate": 0.0001999958487211801, - "loss": 46.0, - "step": 37945 - }, - { - "epoch": 2.9012366917063286, - "grad_norm": 0.0018703043460845947, - "learning_rate": 0.00019999584850231603, - "loss": 46.0, - "step": 37946 - }, - { - "epoch": 2.9013131486897183, - "grad_norm": 0.0015180655755102634, - "learning_rate": 0.00019999584828344618, - "loss": 46.0, - "step": 37947 - }, - { - "epoch": 2.901389605673108, - "grad_norm": 0.002182352589443326, - "learning_rate": 0.0001999958480645705, - "loss": 46.0, - "step": 37948 - }, - { - "epoch": 2.901466062656498, - "grad_norm": 0.0022451074328273535, - "learning_rate": 0.0001999958478456891, - "loss": 46.0, - "step": 37949 - }, - { - "epoch": 2.9015425196398876, - "grad_norm": 0.0007296993862837553, - "learning_rate": 0.00019999584762680193, - "loss": 46.0, - "step": 37950 - }, - { - "epoch": 2.9016189766232774, - "grad_norm": 0.008540484122931957, - "learning_rate": 0.00019999584740790897, - "loss": 46.0, - "step": 37951 - }, - { - "epoch": 2.901695433606667, - "grad_norm": 0.001649975311011076, - "learning_rate": 0.00019999584718901028, - "loss": 46.0, - "step": 37952 - }, - { - "epoch": 2.901771890590057, - "grad_norm": 0.00655308086425066, - "learning_rate": 0.0001999958469701058, - "loss": 46.0, - "step": 37953 - }, - { - "epoch": 2.9018483475734467, - "grad_norm": 0.0014261931646615267, - "learning_rate": 0.00019999584675119554, - "loss": 46.0, - "step": 37954 - }, - { - "epoch": 2.9019248045568364, - "grad_norm": 0.0014747374225407839, - "learning_rate": 0.0001999958465322795, - "loss": 46.0, - "step": 37955 - }, - { - "epoch": 2.902001261540226, - "grad_norm": 0.0017352985450997949, - "learning_rate": 0.00019999584631335773, - "loss": 46.0, - "step": 37956 - }, - { - "epoch": 2.9020777185236155, - "grad_norm": 0.003741080639883876, - "learning_rate": 0.00019999584609443015, - "loss": 46.0, - "step": 37957 - }, - { - "epoch": 2.9021541755070053, - "grad_norm": 0.001909217913635075, - "learning_rate": 0.00019999584587549683, - "loss": 46.0, - "step": 37958 - }, - { - "epoch": 2.902230632490395, - "grad_norm": 0.0037101220805197954, - "learning_rate": 0.00019999584565655773, - "loss": 46.0, - "step": 37959 - }, - { - "epoch": 2.902307089473785, - "grad_norm": 0.002531482372432947, - "learning_rate": 0.00019999584543761286, - "loss": 46.0, - "step": 37960 - }, - { - "epoch": 2.9023835464571746, - "grad_norm": 0.002353631891310215, - "learning_rate": 0.00019999584521866224, - "loss": 46.0, - "step": 37961 - }, - { - "epoch": 2.9024600034405643, - "grad_norm": 0.0014853293541818857, - "learning_rate": 0.00019999584499970583, - "loss": 46.0, - "step": 37962 - }, - { - "epoch": 2.902536460423954, - "grad_norm": 0.0020416327752172947, - "learning_rate": 0.00019999584478074363, - "loss": 46.0, - "step": 37963 - }, - { - "epoch": 2.902612917407344, - "grad_norm": 0.000849250121973455, - "learning_rate": 0.0001999958445617757, - "loss": 46.0, - "step": 37964 - }, - { - "epoch": 2.902689374390733, - "grad_norm": 0.0035294487606734037, - "learning_rate": 0.000199995844342802, - "loss": 46.0, - "step": 37965 - }, - { - "epoch": 2.902765831374123, - "grad_norm": 0.004840458743274212, - "learning_rate": 0.0001999958441238225, - "loss": 46.0, - "step": 37966 - }, - { - "epoch": 2.9028422883575127, - "grad_norm": 0.0017963387072086334, - "learning_rate": 0.00019999584390483725, - "loss": 46.0, - "step": 37967 - }, - { - "epoch": 2.9029187453409024, - "grad_norm": 0.0016213165363296866, - "learning_rate": 0.00019999584368584624, - "loss": 46.0, - "step": 37968 - }, - { - "epoch": 2.902995202324292, - "grad_norm": 0.002751770429313183, - "learning_rate": 0.00019999584346684944, - "loss": 46.0, - "step": 37969 - }, - { - "epoch": 2.903071659307682, - "grad_norm": 0.0012061558663845062, - "learning_rate": 0.00019999584324784686, - "loss": 46.0, - "step": 37970 - }, - { - "epoch": 2.9031481162910717, - "grad_norm": 0.0018004565499722958, - "learning_rate": 0.00019999584302883854, - "loss": 46.0, - "step": 37971 - }, - { - "epoch": 2.9032245732744615, - "grad_norm": 0.0009978287853300571, - "learning_rate": 0.00019999584280982444, - "loss": 46.0, - "step": 37972 - }, - { - "epoch": 2.9033010302578512, - "grad_norm": 0.0011133557418361306, - "learning_rate": 0.00019999584259080457, - "loss": 46.0, - "step": 37973 - }, - { - "epoch": 2.903377487241241, - "grad_norm": 0.002503123367205262, - "learning_rate": 0.00019999584237177892, - "loss": 46.0, - "step": 37974 - }, - { - "epoch": 2.9034539442246308, - "grad_norm": 0.0025112552102655172, - "learning_rate": 0.0001999958421527475, - "loss": 46.0, - "step": 37975 - }, - { - "epoch": 2.9035304012080205, - "grad_norm": 0.0020459210500121117, - "learning_rate": 0.00019999584193371034, - "loss": 46.0, - "step": 37976 - }, - { - "epoch": 2.9036068581914103, - "grad_norm": 0.0020152805373072624, - "learning_rate": 0.0001999958417146674, - "loss": 46.0, - "step": 37977 - }, - { - "epoch": 2.9036833151748, - "grad_norm": 0.004488846752792597, - "learning_rate": 0.0001999958414956187, - "loss": 46.0, - "step": 37978 - }, - { - "epoch": 2.9037597721581894, - "grad_norm": 0.001990138553082943, - "learning_rate": 0.0001999958412765642, - "loss": 46.0, - "step": 37979 - }, - { - "epoch": 2.903836229141579, - "grad_norm": 0.0007626558071933687, - "learning_rate": 0.00019999584105750395, - "loss": 46.0, - "step": 37980 - }, - { - "epoch": 2.903912686124969, - "grad_norm": 0.0017354001756757498, - "learning_rate": 0.00019999584083843792, - "loss": 46.0, - "step": 37981 - }, - { - "epoch": 2.9039891431083587, - "grad_norm": 0.007851804606616497, - "learning_rate": 0.00019999584061936615, - "loss": 46.0, - "step": 37982 - }, - { - "epoch": 2.9040656000917484, - "grad_norm": 0.0033887727186083794, - "learning_rate": 0.00019999584040028857, - "loss": 46.0, - "step": 37983 - }, - { - "epoch": 2.904142057075138, - "grad_norm": 0.0023646436166018248, - "learning_rate": 0.00019999584018120525, - "loss": 46.0, - "step": 37984 - }, - { - "epoch": 2.904218514058528, - "grad_norm": 0.0046932329423725605, - "learning_rate": 0.00019999583996211615, - "loss": 46.0, - "step": 37985 - }, - { - "epoch": 2.9042949710419177, - "grad_norm": 0.001397062442265451, - "learning_rate": 0.00019999583974302128, - "loss": 46.0, - "step": 37986 - }, - { - "epoch": 2.904371428025307, - "grad_norm": 0.0029896232299506664, - "learning_rate": 0.00019999583952392064, - "loss": 46.0, - "step": 37987 - }, - { - "epoch": 2.9044478850086968, - "grad_norm": 0.0011912498157471418, - "learning_rate": 0.00019999583930481425, - "loss": 46.0, - "step": 37988 - }, - { - "epoch": 2.9045243419920865, - "grad_norm": 0.0009944017510861158, - "learning_rate": 0.00019999583908570206, - "loss": 46.0, - "step": 37989 - }, - { - "epoch": 2.9046007989754763, - "grad_norm": 0.0019806744530797005, - "learning_rate": 0.00019999583886658412, - "loss": 46.0, - "step": 37990 - }, - { - "epoch": 2.904677255958866, - "grad_norm": 0.0030906402971595526, - "learning_rate": 0.00019999583864746038, - "loss": 46.0, - "step": 37991 - }, - { - "epoch": 2.904753712942256, - "grad_norm": 0.003046264173462987, - "learning_rate": 0.00019999583842833093, - "loss": 46.0, - "step": 37992 - }, - { - "epoch": 2.9048301699256456, - "grad_norm": 0.0032885682303458452, - "learning_rate": 0.00019999583820919567, - "loss": 46.0, - "step": 37993 - }, - { - "epoch": 2.9049066269090353, - "grad_norm": 0.001067915465682745, - "learning_rate": 0.00019999583799005467, - "loss": 46.0, - "step": 37994 - }, - { - "epoch": 2.904983083892425, - "grad_norm": 0.0021763306576758623, - "learning_rate": 0.00019999583777090786, - "loss": 46.0, - "step": 37995 - }, - { - "epoch": 2.905059540875815, - "grad_norm": 0.0007947355625219643, - "learning_rate": 0.0001999958375517553, - "loss": 46.0, - "step": 37996 - }, - { - "epoch": 2.9051359978592046, - "grad_norm": 0.0019696077797561884, - "learning_rate": 0.00019999583733259696, - "loss": 46.0, - "step": 37997 - }, - { - "epoch": 2.9052124548425944, - "grad_norm": 0.0031082078348845243, - "learning_rate": 0.00019999583711343286, - "loss": 46.0, - "step": 37998 - }, - { - "epoch": 2.905288911825984, - "grad_norm": 0.000810633588116616, - "learning_rate": 0.000199995836894263, - "loss": 46.0, - "step": 37999 - }, - { - "epoch": 2.9053653688093735, - "grad_norm": 0.012532716616988182, - "learning_rate": 0.00019999583667508738, - "loss": 46.0, - "step": 38000 - }, - { - "epoch": 2.905441825792763, - "grad_norm": 0.0019997034687548876, - "learning_rate": 0.00019999583645590593, - "loss": 46.0, - "step": 38001 - }, - { - "epoch": 2.905518282776153, - "grad_norm": 0.0018338636728003621, - "learning_rate": 0.00019999583623671877, - "loss": 46.0, - "step": 38002 - }, - { - "epoch": 2.9055947397595427, - "grad_norm": 0.0016060309717431664, - "learning_rate": 0.00019999583601752584, - "loss": 46.0, - "step": 38003 - }, - { - "epoch": 2.9056711967429325, - "grad_norm": 0.0008651249227114022, - "learning_rate": 0.0001999958357983271, - "loss": 46.0, - "step": 38004 - }, - { - "epoch": 2.9057476537263223, - "grad_norm": 0.0024534137919545174, - "learning_rate": 0.00019999583557912262, - "loss": 46.0, - "step": 38005 - }, - { - "epoch": 2.905824110709712, - "grad_norm": 0.0030491920188069344, - "learning_rate": 0.00019999583535991239, - "loss": 46.0, - "step": 38006 - }, - { - "epoch": 2.905900567693102, - "grad_norm": 0.00151356915012002, - "learning_rate": 0.00019999583514069636, - "loss": 46.0, - "step": 38007 - }, - { - "epoch": 2.9059770246764915, - "grad_norm": 0.001505090855062008, - "learning_rate": 0.00019999583492147455, - "loss": 46.0, - "step": 38008 - }, - { - "epoch": 2.906053481659881, - "grad_norm": 0.001288287341594696, - "learning_rate": 0.000199995834702247, - "loss": 46.0, - "step": 38009 - }, - { - "epoch": 2.9061299386432706, - "grad_norm": 0.006425090599805117, - "learning_rate": 0.00019999583448301368, - "loss": 46.0, - "step": 38010 - }, - { - "epoch": 2.9062063956266604, - "grad_norm": 0.0036532345693558455, - "learning_rate": 0.00019999583426377458, - "loss": 46.0, - "step": 38011 - }, - { - "epoch": 2.90628285261005, - "grad_norm": 0.00118697015568614, - "learning_rate": 0.00019999583404452974, - "loss": 46.0, - "step": 38012 - }, - { - "epoch": 2.90635930959344, - "grad_norm": 0.0025789779610931873, - "learning_rate": 0.00019999583382527907, - "loss": 46.0, - "step": 38013 - }, - { - "epoch": 2.9064357665768297, - "grad_norm": 0.0036896681413054466, - "learning_rate": 0.00019999583360602268, - "loss": 46.0, - "step": 38014 - }, - { - "epoch": 2.9065122235602194, - "grad_norm": 0.00167533615604043, - "learning_rate": 0.0001999958333867605, - "loss": 46.0, - "step": 38015 - }, - { - "epoch": 2.906588680543609, - "grad_norm": 0.002036773134022951, - "learning_rate": 0.00019999583316749256, - "loss": 46.0, - "step": 38016 - }, - { - "epoch": 2.906665137526999, - "grad_norm": 0.0014422225067391992, - "learning_rate": 0.00019999583294821882, - "loss": 46.0, - "step": 38017 - }, - { - "epoch": 2.9067415945103887, - "grad_norm": 0.003712414065375924, - "learning_rate": 0.00019999583272893934, - "loss": 46.0, - "step": 38018 - }, - { - "epoch": 2.9068180514937785, - "grad_norm": 0.0014143196167424321, - "learning_rate": 0.0001999958325096541, - "loss": 46.0, - "step": 38019 - }, - { - "epoch": 2.9068945084771682, - "grad_norm": 0.0036908360198140144, - "learning_rate": 0.0001999958322903631, - "loss": 46.0, - "step": 38020 - }, - { - "epoch": 2.906970965460558, - "grad_norm": 0.001010382897220552, - "learning_rate": 0.0001999958320710663, - "loss": 46.0, - "step": 38021 - }, - { - "epoch": 2.9070474224439473, - "grad_norm": 0.001173604978248477, - "learning_rate": 0.00019999583185176373, - "loss": 46.0, - "step": 38022 - }, - { - "epoch": 2.907123879427337, - "grad_norm": 0.0017818254418671131, - "learning_rate": 0.0001999958316324554, - "loss": 46.0, - "step": 38023 - }, - { - "epoch": 2.907200336410727, - "grad_norm": 0.001810151501558721, - "learning_rate": 0.00019999583141314128, - "loss": 46.0, - "step": 38024 - }, - { - "epoch": 2.9072767933941166, - "grad_norm": 0.004480066243559122, - "learning_rate": 0.00019999583119382144, - "loss": 46.0, - "step": 38025 - }, - { - "epoch": 2.9073532503775064, - "grad_norm": 0.0014266769867390394, - "learning_rate": 0.00019999583097449582, - "loss": 46.0, - "step": 38026 - }, - { - "epoch": 2.907429707360896, - "grad_norm": 0.001249079592525959, - "learning_rate": 0.00019999583075516438, - "loss": 46.0, - "step": 38027 - }, - { - "epoch": 2.907506164344286, - "grad_norm": 0.00151823740452528, - "learning_rate": 0.0001999958305358272, - "loss": 46.0, - "step": 38028 - }, - { - "epoch": 2.9075826213276756, - "grad_norm": 0.0009738160879351199, - "learning_rate": 0.00019999583031648425, - "loss": 46.0, - "step": 38029 - }, - { - "epoch": 2.907659078311065, - "grad_norm": 0.0022975686006247997, - "learning_rate": 0.00019999583009713557, - "loss": 46.0, - "step": 38030 - }, - { - "epoch": 2.9077355352944547, - "grad_norm": 0.0008797014015726745, - "learning_rate": 0.00019999582987778106, - "loss": 46.0, - "step": 38031 - }, - { - "epoch": 2.9078119922778445, - "grad_norm": 0.0026479896623641253, - "learning_rate": 0.00019999582965842083, - "loss": 46.0, - "step": 38032 - }, - { - "epoch": 2.9078884492612342, - "grad_norm": 0.0012797890231013298, - "learning_rate": 0.0001999958294390548, - "loss": 46.0, - "step": 38033 - }, - { - "epoch": 2.907964906244624, - "grad_norm": 0.0012818325776606798, - "learning_rate": 0.000199995829219683, - "loss": 46.0, - "step": 38034 - }, - { - "epoch": 2.9080413632280138, - "grad_norm": 0.0016048140823841095, - "learning_rate": 0.00019999582900030545, - "loss": 46.0, - "step": 38035 - }, - { - "epoch": 2.9081178202114035, - "grad_norm": 0.003517944598570466, - "learning_rate": 0.00019999582878092213, - "loss": 46.0, - "step": 38036 - }, - { - "epoch": 2.9081942771947933, - "grad_norm": 0.0025899375323206186, - "learning_rate": 0.00019999582856153304, - "loss": 46.0, - "step": 38037 - }, - { - "epoch": 2.908270734178183, - "grad_norm": 0.0008070202893577516, - "learning_rate": 0.00019999582834213814, - "loss": 46.0, - "step": 38038 - }, - { - "epoch": 2.908347191161573, - "grad_norm": 0.0022034223657101393, - "learning_rate": 0.00019999582812273752, - "loss": 46.0, - "step": 38039 - }, - { - "epoch": 2.9084236481449626, - "grad_norm": 0.0010655556106939912, - "learning_rate": 0.0001999958279033311, - "loss": 46.0, - "step": 38040 - }, - { - "epoch": 2.9085001051283523, - "grad_norm": 0.001035425579175353, - "learning_rate": 0.00019999582768391895, - "loss": 46.0, - "step": 38041 - }, - { - "epoch": 2.908576562111742, - "grad_norm": 0.002119320211932063, - "learning_rate": 0.00019999582746450098, - "loss": 46.0, - "step": 38042 - }, - { - "epoch": 2.908653019095132, - "grad_norm": 0.002724515274167061, - "learning_rate": 0.00019999582724507727, - "loss": 46.0, - "step": 38043 - }, - { - "epoch": 2.908729476078521, - "grad_norm": 0.0031197164207696915, - "learning_rate": 0.00019999582702564782, - "loss": 46.0, - "step": 38044 - }, - { - "epoch": 2.908805933061911, - "grad_norm": 0.004094405099749565, - "learning_rate": 0.00019999582680621254, - "loss": 46.0, - "step": 38045 - }, - { - "epoch": 2.9088823900453007, - "grad_norm": 0.0030211550183594227, - "learning_rate": 0.00019999582658677154, - "loss": 46.0, - "step": 38046 - }, - { - "epoch": 2.9089588470286905, - "grad_norm": 0.0005786955589428544, - "learning_rate": 0.00019999582636732473, - "loss": 46.0, - "step": 38047 - }, - { - "epoch": 2.90903530401208, - "grad_norm": 0.0018357059452682734, - "learning_rate": 0.00019999582614787218, - "loss": 46.0, - "step": 38048 - }, - { - "epoch": 2.90911176099547, - "grad_norm": 0.0007620866526849568, - "learning_rate": 0.00019999582592841386, - "loss": 46.0, - "step": 38049 - }, - { - "epoch": 2.9091882179788597, - "grad_norm": 0.0023715051356703043, - "learning_rate": 0.00019999582570894974, - "loss": 46.0, - "step": 38050 - }, - { - "epoch": 2.9092646749622495, - "grad_norm": 0.0006669636932201684, - "learning_rate": 0.00019999582548947987, - "loss": 46.0, - "step": 38051 - }, - { - "epoch": 2.909341131945639, - "grad_norm": 0.001595982350409031, - "learning_rate": 0.00019999582527000426, - "loss": 46.0, - "step": 38052 - }, - { - "epoch": 2.9094175889290286, - "grad_norm": 0.0029678745195269585, - "learning_rate": 0.00019999582505052284, - "loss": 46.0, - "step": 38053 - }, - { - "epoch": 2.9094940459124183, - "grad_norm": 0.0037072370760142803, - "learning_rate": 0.00019999582483103568, - "loss": 46.0, - "step": 38054 - }, - { - "epoch": 2.909570502895808, - "grad_norm": 0.005485964938998222, - "learning_rate": 0.00019999582461154272, - "loss": 46.0, - "step": 38055 - }, - { - "epoch": 2.909646959879198, - "grad_norm": 0.002377541037276387, - "learning_rate": 0.000199995824392044, - "loss": 46.0, - "step": 38056 - }, - { - "epoch": 2.9097234168625876, - "grad_norm": 0.001125634997151792, - "learning_rate": 0.00019999582417253953, - "loss": 46.0, - "step": 38057 - }, - { - "epoch": 2.9097998738459774, - "grad_norm": 0.003063374664634466, - "learning_rate": 0.00019999582395302927, - "loss": 46.0, - "step": 38058 - }, - { - "epoch": 2.909876330829367, - "grad_norm": 0.0015070941299200058, - "learning_rate": 0.00019999582373351324, - "loss": 46.0, - "step": 38059 - }, - { - "epoch": 2.909952787812757, - "grad_norm": 0.0013611374888569117, - "learning_rate": 0.00019999582351399147, - "loss": 46.0, - "step": 38060 - }, - { - "epoch": 2.9100292447961467, - "grad_norm": 0.0016747881891205907, - "learning_rate": 0.00019999582329446392, - "loss": 46.0, - "step": 38061 - }, - { - "epoch": 2.9101057017795364, - "grad_norm": 0.004746443126350641, - "learning_rate": 0.0001999958230749306, - "loss": 46.0, - "step": 38062 - }, - { - "epoch": 2.910182158762926, - "grad_norm": 0.004720636177808046, - "learning_rate": 0.0001999958228553915, - "loss": 46.0, - "step": 38063 - }, - { - "epoch": 2.910258615746316, - "grad_norm": 0.0030097602866590023, - "learning_rate": 0.0001999958226358466, - "loss": 46.0, - "step": 38064 - }, - { - "epoch": 2.9103350727297057, - "grad_norm": 0.0029530173633247614, - "learning_rate": 0.00019999582241629597, - "loss": 46.0, - "step": 38065 - }, - { - "epoch": 2.910411529713095, - "grad_norm": 0.0013610816095024347, - "learning_rate": 0.00019999582219673955, - "loss": 46.0, - "step": 38066 - }, - { - "epoch": 2.910487986696485, - "grad_norm": 0.0011563777225092053, - "learning_rate": 0.0001999958219771774, - "loss": 46.0, - "step": 38067 - }, - { - "epoch": 2.9105644436798745, - "grad_norm": 0.0008149848436005414, - "learning_rate": 0.00019999582175760945, - "loss": 46.0, - "step": 38068 - }, - { - "epoch": 2.9106409006632643, - "grad_norm": 0.00287242466583848, - "learning_rate": 0.00019999582153803575, - "loss": 46.0, - "step": 38069 - }, - { - "epoch": 2.910717357646654, - "grad_norm": 0.0007763186004012823, - "learning_rate": 0.00019999582131845624, - "loss": 46.0, - "step": 38070 - }, - { - "epoch": 2.910793814630044, - "grad_norm": 0.0009206904796883464, - "learning_rate": 0.000199995821098871, - "loss": 46.0, - "step": 38071 - }, - { - "epoch": 2.9108702716134336, - "grad_norm": 0.0020329696126282215, - "learning_rate": 0.00019999582087927998, - "loss": 46.0, - "step": 38072 - }, - { - "epoch": 2.9109467285968234, - "grad_norm": 0.0007907075341790915, - "learning_rate": 0.00019999582065968318, - "loss": 46.0, - "step": 38073 - }, - { - "epoch": 2.9110231855802127, - "grad_norm": 0.0014534153742715716, - "learning_rate": 0.00019999582044008063, - "loss": 46.0, - "step": 38074 - }, - { - "epoch": 2.9110996425636024, - "grad_norm": 0.0013247976312413812, - "learning_rate": 0.0001999958202204723, - "loss": 46.0, - "step": 38075 - }, - { - "epoch": 2.911176099546992, - "grad_norm": 0.0012230559950694442, - "learning_rate": 0.00019999582000085822, - "loss": 46.0, - "step": 38076 - }, - { - "epoch": 2.911252556530382, - "grad_norm": 0.0016545574180781841, - "learning_rate": 0.00019999581978123835, - "loss": 46.0, - "step": 38077 - }, - { - "epoch": 2.9113290135137717, - "grad_norm": 0.0034378371201455593, - "learning_rate": 0.0001999958195616127, - "loss": 46.0, - "step": 38078 - }, - { - "epoch": 2.9114054704971615, - "grad_norm": 0.002517619403079152, - "learning_rate": 0.00019999581934198132, - "loss": 46.0, - "step": 38079 - }, - { - "epoch": 2.9114819274805512, - "grad_norm": 0.0010219543473795056, - "learning_rate": 0.0001999958191223441, - "loss": 46.0, - "step": 38080 - }, - { - "epoch": 2.911558384463941, - "grad_norm": 0.004021560773253441, - "learning_rate": 0.00019999581890270117, - "loss": 46.0, - "step": 38081 - }, - { - "epoch": 2.9116348414473308, - "grad_norm": 0.0006769971805624664, - "learning_rate": 0.0001999958186830525, - "loss": 46.0, - "step": 38082 - }, - { - "epoch": 2.9117112984307205, - "grad_norm": 0.004257587715983391, - "learning_rate": 0.00019999581846339798, - "loss": 46.0, - "step": 38083 - }, - { - "epoch": 2.9117877554141103, - "grad_norm": 0.001606352161616087, - "learning_rate": 0.00019999581824373772, - "loss": 46.0, - "step": 38084 - }, - { - "epoch": 2.9118642123975, - "grad_norm": 0.0012529935920611024, - "learning_rate": 0.0001999958180240717, - "loss": 46.0, - "step": 38085 - }, - { - "epoch": 2.91194066938089, - "grad_norm": 0.0011989392805844545, - "learning_rate": 0.00019999581780439992, - "loss": 46.0, - "step": 38086 - }, - { - "epoch": 2.9120171263642796, - "grad_norm": 0.004830826073884964, - "learning_rate": 0.00019999581758472237, - "loss": 46.0, - "step": 38087 - }, - { - "epoch": 2.912093583347669, - "grad_norm": 0.003272679867222905, - "learning_rate": 0.00019999581736503905, - "loss": 46.0, - "step": 38088 - }, - { - "epoch": 2.9121700403310586, - "grad_norm": 0.010868401266634464, - "learning_rate": 0.00019999581714534993, - "loss": 46.0, - "step": 38089 - }, - { - "epoch": 2.9122464973144484, - "grad_norm": 0.0019055166048929095, - "learning_rate": 0.00019999581692565506, - "loss": 46.0, - "step": 38090 - }, - { - "epoch": 2.912322954297838, - "grad_norm": 0.0030960282310843468, - "learning_rate": 0.00019999581670595445, - "loss": 46.0, - "step": 38091 - }, - { - "epoch": 2.912399411281228, - "grad_norm": 0.0014686177019029856, - "learning_rate": 0.00019999581648624804, - "loss": 46.0, - "step": 38092 - }, - { - "epoch": 2.9124758682646177, - "grad_norm": 0.003918887116014957, - "learning_rate": 0.00019999581626653585, - "loss": 46.0, - "step": 38093 - }, - { - "epoch": 2.9125523252480074, - "grad_norm": 0.0009708001161925495, - "learning_rate": 0.00019999581604681791, - "loss": 46.0, - "step": 38094 - }, - { - "epoch": 2.912628782231397, - "grad_norm": 0.005467913579195738, - "learning_rate": 0.0001999958158270942, - "loss": 46.0, - "step": 38095 - }, - { - "epoch": 2.9127052392147865, - "grad_norm": 0.0030989013612270355, - "learning_rate": 0.00019999581560736473, - "loss": 46.0, - "step": 38096 - }, - { - "epoch": 2.9127816961981763, - "grad_norm": 0.0029152007773518562, - "learning_rate": 0.00019999581538762947, - "loss": 46.0, - "step": 38097 - }, - { - "epoch": 2.912858153181566, - "grad_norm": 0.0010716785909608006, - "learning_rate": 0.00019999581516788844, - "loss": 46.0, - "step": 38098 - }, - { - "epoch": 2.912934610164956, - "grad_norm": 0.0016080865170806646, - "learning_rate": 0.00019999581494814167, - "loss": 46.0, - "step": 38099 - }, - { - "epoch": 2.9130110671483456, - "grad_norm": 0.0011618132703006268, - "learning_rate": 0.0001999958147283891, - "loss": 46.0, - "step": 38100 - }, - { - "epoch": 2.9130875241317353, - "grad_norm": 0.0025585044641047716, - "learning_rate": 0.00019999581450863077, - "loss": 46.0, - "step": 38101 - }, - { - "epoch": 2.913163981115125, - "grad_norm": 0.0016671492485329509, - "learning_rate": 0.00019999581428886668, - "loss": 46.0, - "step": 38102 - }, - { - "epoch": 2.913240438098515, - "grad_norm": 0.0006749452440999448, - "learning_rate": 0.0001999958140690968, - "loss": 46.0, - "step": 38103 - }, - { - "epoch": 2.9133168950819046, - "grad_norm": 0.004145908635109663, - "learning_rate": 0.00019999581384932117, - "loss": 46.0, - "step": 38104 - }, - { - "epoch": 2.9133933520652944, - "grad_norm": 0.004115253686904907, - "learning_rate": 0.00019999581362953976, - "loss": 46.0, - "step": 38105 - }, - { - "epoch": 2.913469809048684, - "grad_norm": 0.004197151865810156, - "learning_rate": 0.0001999958134097526, - "loss": 46.0, - "step": 38106 - }, - { - "epoch": 2.913546266032074, - "grad_norm": 0.0025055333971977234, - "learning_rate": 0.00019999581318995966, - "loss": 46.0, - "step": 38107 - }, - { - "epoch": 2.9136227230154637, - "grad_norm": 0.0015900548314675689, - "learning_rate": 0.00019999581297016096, - "loss": 46.0, - "step": 38108 - }, - { - "epoch": 2.9136991799988534, - "grad_norm": 0.005039710085839033, - "learning_rate": 0.00019999581275035645, - "loss": 46.0, - "step": 38109 - }, - { - "epoch": 2.9137756369822427, - "grad_norm": 0.0023497866932302713, - "learning_rate": 0.0001999958125305462, - "loss": 46.0, - "step": 38110 - }, - { - "epoch": 2.9138520939656325, - "grad_norm": 0.0014665876515209675, - "learning_rate": 0.0001999958123107302, - "loss": 46.0, - "step": 38111 - }, - { - "epoch": 2.9139285509490223, - "grad_norm": 0.004661711398512125, - "learning_rate": 0.00019999581209090842, - "loss": 46.0, - "step": 38112 - }, - { - "epoch": 2.914005007932412, - "grad_norm": 0.0030364585109055042, - "learning_rate": 0.00019999581187108082, - "loss": 46.0, - "step": 38113 - }, - { - "epoch": 2.9140814649158018, - "grad_norm": 0.0005936098168604076, - "learning_rate": 0.00019999581165124753, - "loss": 46.0, - "step": 38114 - }, - { - "epoch": 2.9141579218991915, - "grad_norm": 0.0026004870887845755, - "learning_rate": 0.00019999581143140843, - "loss": 46.0, - "step": 38115 - }, - { - "epoch": 2.9142343788825813, - "grad_norm": 0.002274326514452696, - "learning_rate": 0.00019999581121156354, - "loss": 46.0, - "step": 38116 - }, - { - "epoch": 2.914310835865971, - "grad_norm": 0.001378798857331276, - "learning_rate": 0.00019999581099171292, - "loss": 46.0, - "step": 38117 - }, - { - "epoch": 2.9143872928493604, - "grad_norm": 0.0027540642768144608, - "learning_rate": 0.0001999958107718565, - "loss": 46.0, - "step": 38118 - }, - { - "epoch": 2.91446374983275, - "grad_norm": 0.0010525084799155593, - "learning_rate": 0.00019999581055199435, - "loss": 46.0, - "step": 38119 - }, - { - "epoch": 2.91454020681614, - "grad_norm": 0.001021408592350781, - "learning_rate": 0.0001999958103321264, - "loss": 46.0, - "step": 38120 - }, - { - "epoch": 2.9146166637995297, - "grad_norm": 0.0026804169174283743, - "learning_rate": 0.00019999581011225268, - "loss": 46.0, - "step": 38121 - }, - { - "epoch": 2.9146931207829194, - "grad_norm": 0.005769689101725817, - "learning_rate": 0.00019999580989237318, - "loss": 46.0, - "step": 38122 - }, - { - "epoch": 2.914769577766309, - "grad_norm": 0.0022064405493438244, - "learning_rate": 0.00019999580967248798, - "loss": 46.0, - "step": 38123 - }, - { - "epoch": 2.914846034749699, - "grad_norm": 0.0006371671333909035, - "learning_rate": 0.00019999580945259692, - "loss": 46.0, - "step": 38124 - }, - { - "epoch": 2.9149224917330887, - "grad_norm": 0.003425972070544958, - "learning_rate": 0.00019999580923270015, - "loss": 46.0, - "step": 38125 - }, - { - "epoch": 2.9149989487164785, - "grad_norm": 0.0011123217409476638, - "learning_rate": 0.00019999580901279758, - "loss": 46.0, - "step": 38126 - }, - { - "epoch": 2.9150754056998682, - "grad_norm": 0.0024983014445751905, - "learning_rate": 0.00019999580879288926, - "loss": 46.0, - "step": 38127 - }, - { - "epoch": 2.915151862683258, - "grad_norm": 0.00330982799641788, - "learning_rate": 0.00019999580857297516, - "loss": 46.0, - "step": 38128 - }, - { - "epoch": 2.9152283196666477, - "grad_norm": 0.004173245280981064, - "learning_rate": 0.0001999958083530553, - "loss": 46.0, - "step": 38129 - }, - { - "epoch": 2.9153047766500375, - "grad_norm": 0.0008257174049504101, - "learning_rate": 0.00019999580813312965, - "loss": 46.0, - "step": 38130 - }, - { - "epoch": 2.915381233633427, - "grad_norm": 0.0032756465952843428, - "learning_rate": 0.00019999580791319824, - "loss": 46.0, - "step": 38131 - }, - { - "epoch": 2.9154576906168166, - "grad_norm": 0.003746794769540429, - "learning_rate": 0.00019999580769326108, - "loss": 46.0, - "step": 38132 - }, - { - "epoch": 2.9155341476002063, - "grad_norm": 0.00330060045234859, - "learning_rate": 0.00019999580747331815, - "loss": 46.0, - "step": 38133 - }, - { - "epoch": 2.915610604583596, - "grad_norm": 0.0029367574024945498, - "learning_rate": 0.00019999580725336941, - "loss": 46.0, - "step": 38134 - }, - { - "epoch": 2.915687061566986, - "grad_norm": 0.0032441681250929832, - "learning_rate": 0.00019999580703341496, - "loss": 46.0, - "step": 38135 - }, - { - "epoch": 2.9157635185503756, - "grad_norm": 0.0023535985965281725, - "learning_rate": 0.00019999580681345468, - "loss": 46.0, - "step": 38136 - }, - { - "epoch": 2.9158399755337654, - "grad_norm": 0.0027953076642006636, - "learning_rate": 0.00019999580659348868, - "loss": 46.0, - "step": 38137 - }, - { - "epoch": 2.915916432517155, - "grad_norm": 0.0006844722665846348, - "learning_rate": 0.00019999580637351688, - "loss": 46.0, - "step": 38138 - }, - { - "epoch": 2.915992889500545, - "grad_norm": 0.008718026801943779, - "learning_rate": 0.00019999580615353933, - "loss": 46.0, - "step": 38139 - }, - { - "epoch": 2.9160693464839342, - "grad_norm": 0.001414721948094666, - "learning_rate": 0.00019999580593355602, - "loss": 46.0, - "step": 38140 - }, - { - "epoch": 2.916145803467324, - "grad_norm": 0.0025321547873318195, - "learning_rate": 0.0001999958057135669, - "loss": 46.0, - "step": 38141 - }, - { - "epoch": 2.9162222604507138, - "grad_norm": 0.0033333799801766872, - "learning_rate": 0.00019999580549357203, - "loss": 46.0, - "step": 38142 - }, - { - "epoch": 2.9162987174341035, - "grad_norm": 0.006616013124585152, - "learning_rate": 0.0001999958052735714, - "loss": 46.0, - "step": 38143 - }, - { - "epoch": 2.9163751744174933, - "grad_norm": 0.00655062822625041, - "learning_rate": 0.00019999580505356498, - "loss": 46.0, - "step": 38144 - }, - { - "epoch": 2.916451631400883, - "grad_norm": 0.0021653359290212393, - "learning_rate": 0.00019999580483355282, - "loss": 46.0, - "step": 38145 - }, - { - "epoch": 2.916528088384273, - "grad_norm": 0.0022567014675587416, - "learning_rate": 0.00019999580461353488, - "loss": 46.0, - "step": 38146 - }, - { - "epoch": 2.9166045453676626, - "grad_norm": 0.001058167777955532, - "learning_rate": 0.00019999580439351118, - "loss": 46.0, - "step": 38147 - }, - { - "epoch": 2.9166810023510523, - "grad_norm": 0.008150795474648476, - "learning_rate": 0.0001999958041734817, - "loss": 46.0, - "step": 38148 - }, - { - "epoch": 2.916757459334442, - "grad_norm": 0.0031658122316002846, - "learning_rate": 0.00019999580395344647, - "loss": 46.0, - "step": 38149 - }, - { - "epoch": 2.916833916317832, - "grad_norm": 0.0015234394231811166, - "learning_rate": 0.00019999580373340542, - "loss": 46.0, - "step": 38150 - }, - { - "epoch": 2.9169103733012216, - "grad_norm": 0.0029059748630970716, - "learning_rate": 0.00019999580351335862, - "loss": 46.0, - "step": 38151 - }, - { - "epoch": 2.9169868302846114, - "grad_norm": 0.004571816883981228, - "learning_rate": 0.00019999580329330607, - "loss": 46.0, - "step": 38152 - }, - { - "epoch": 2.9170632872680007, - "grad_norm": 0.003188816364854574, - "learning_rate": 0.00019999580307324775, - "loss": 46.0, - "step": 38153 - }, - { - "epoch": 2.9171397442513904, - "grad_norm": 0.001972890691831708, - "learning_rate": 0.00019999580285318363, - "loss": 46.0, - "step": 38154 - }, - { - "epoch": 2.91721620123478, - "grad_norm": 0.0009233566815964878, - "learning_rate": 0.0001999958026331138, - "loss": 46.0, - "step": 38155 - }, - { - "epoch": 2.91729265821817, - "grad_norm": 0.003466230584308505, - "learning_rate": 0.00019999580241303818, - "loss": 46.0, - "step": 38156 - }, - { - "epoch": 2.9173691152015597, - "grad_norm": 0.003325595986098051, - "learning_rate": 0.00019999580219295674, - "loss": 46.0, - "step": 38157 - }, - { - "epoch": 2.9174455721849495, - "grad_norm": 0.002221320290118456, - "learning_rate": 0.00019999580197286958, - "loss": 46.0, - "step": 38158 - }, - { - "epoch": 2.9175220291683392, - "grad_norm": 0.0011871540918946266, - "learning_rate": 0.00019999580175277662, - "loss": 46.0, - "step": 38159 - }, - { - "epoch": 2.917598486151729, - "grad_norm": 0.0027356965001672506, - "learning_rate": 0.00019999580153267794, - "loss": 46.0, - "step": 38160 - }, - { - "epoch": 2.9176749431351188, - "grad_norm": 0.0008755429880693555, - "learning_rate": 0.00019999580131257344, - "loss": 46.0, - "step": 38161 - }, - { - "epoch": 2.917751400118508, - "grad_norm": 0.001745652174577117, - "learning_rate": 0.00019999580109246318, - "loss": 46.0, - "step": 38162 - }, - { - "epoch": 2.917827857101898, - "grad_norm": 0.0013858848251402378, - "learning_rate": 0.00019999580087234718, - "loss": 46.0, - "step": 38163 - }, - { - "epoch": 2.9179043140852876, - "grad_norm": 0.0022346859332174063, - "learning_rate": 0.00019999580065222539, - "loss": 46.0, - "step": 38164 - }, - { - "epoch": 2.9179807710686774, - "grad_norm": 0.002344232052564621, - "learning_rate": 0.0001999958004320978, - "loss": 46.0, - "step": 38165 - }, - { - "epoch": 2.918057228052067, - "grad_norm": 0.0008668802911415696, - "learning_rate": 0.0001999958002119645, - "loss": 46.0, - "step": 38166 - }, - { - "epoch": 2.918133685035457, - "grad_norm": 0.0022169763687998056, - "learning_rate": 0.0001999957999918254, - "loss": 46.0, - "step": 38167 - }, - { - "epoch": 2.9182101420188467, - "grad_norm": 0.007605826016515493, - "learning_rate": 0.00019999579977168053, - "loss": 46.0, - "step": 38168 - }, - { - "epoch": 2.9182865990022364, - "grad_norm": 0.0050665633752942085, - "learning_rate": 0.0001999957995515299, - "loss": 46.0, - "step": 38169 - }, - { - "epoch": 2.918363055985626, - "grad_norm": 0.004774228669703007, - "learning_rate": 0.0001999957993313735, - "loss": 46.0, - "step": 38170 - }, - { - "epoch": 2.918439512969016, - "grad_norm": 0.0035356131847947836, - "learning_rate": 0.00019999579911121132, - "loss": 46.0, - "step": 38171 - }, - { - "epoch": 2.9185159699524057, - "grad_norm": 0.0021626397501677275, - "learning_rate": 0.0001999957988910434, - "loss": 46.0, - "step": 38172 - }, - { - "epoch": 2.9185924269357955, - "grad_norm": 0.004352611489593983, - "learning_rate": 0.00019999579867086969, - "loss": 46.0, - "step": 38173 - }, - { - "epoch": 2.918668883919185, - "grad_norm": 0.0021969517692923546, - "learning_rate": 0.0001999957984506902, - "loss": 46.0, - "step": 38174 - }, - { - "epoch": 2.9187453409025745, - "grad_norm": 0.004011734388768673, - "learning_rate": 0.00019999579823050495, - "loss": 46.0, - "step": 38175 - }, - { - "epoch": 2.9188217978859643, - "grad_norm": 0.004317046608775854, - "learning_rate": 0.00019999579801031393, - "loss": 46.0, - "step": 38176 - }, - { - "epoch": 2.918898254869354, - "grad_norm": 0.0008651851676404476, - "learning_rate": 0.00019999579779011716, - "loss": 46.0, - "step": 38177 - }, - { - "epoch": 2.918974711852744, - "grad_norm": 0.0007842287886887789, - "learning_rate": 0.00019999579756991458, - "loss": 46.0, - "step": 38178 - }, - { - "epoch": 2.9190511688361336, - "grad_norm": 0.002279200591146946, - "learning_rate": 0.00019999579734970627, - "loss": 46.0, - "step": 38179 - }, - { - "epoch": 2.9191276258195233, - "grad_norm": 0.0016729203052818775, - "learning_rate": 0.00019999579712949217, - "loss": 46.0, - "step": 38180 - }, - { - "epoch": 2.919204082802913, - "grad_norm": 0.0012921706074848771, - "learning_rate": 0.00019999579690927228, - "loss": 46.0, - "step": 38181 - }, - { - "epoch": 2.919280539786303, - "grad_norm": 0.0014163557207211852, - "learning_rate": 0.00019999579668904667, - "loss": 46.0, - "step": 38182 - }, - { - "epoch": 2.919356996769692, - "grad_norm": 0.003808164270594716, - "learning_rate": 0.00019999579646881526, - "loss": 46.0, - "step": 38183 - }, - { - "epoch": 2.919433453753082, - "grad_norm": 0.0013044095830991864, - "learning_rate": 0.0001999957962485781, - "loss": 46.0, - "step": 38184 - }, - { - "epoch": 2.9195099107364717, - "grad_norm": 0.002274476457387209, - "learning_rate": 0.00019999579602833514, - "loss": 46.0, - "step": 38185 - }, - { - "epoch": 2.9195863677198615, - "grad_norm": 0.004797384608536959, - "learning_rate": 0.00019999579580808643, - "loss": 46.0, - "step": 38186 - }, - { - "epoch": 2.9196628247032512, - "grad_norm": 0.0006925598718225956, - "learning_rate": 0.00019999579558783195, - "loss": 46.0, - "step": 38187 - }, - { - "epoch": 2.919739281686641, - "grad_norm": 0.0007656869711354375, - "learning_rate": 0.0001999957953675717, - "loss": 46.0, - "step": 38188 - }, - { - "epoch": 2.9198157386700307, - "grad_norm": 0.0015074973925948143, - "learning_rate": 0.00019999579514730568, - "loss": 46.0, - "step": 38189 - }, - { - "epoch": 2.9198921956534205, - "grad_norm": 0.0014720710460096598, - "learning_rate": 0.0001999957949270339, - "loss": 46.0, - "step": 38190 - }, - { - "epoch": 2.9199686526368103, - "grad_norm": 0.0015082076424732804, - "learning_rate": 0.00019999579470675633, - "loss": 46.0, - "step": 38191 - }, - { - "epoch": 2.9200451096202, - "grad_norm": 0.0023782411590218544, - "learning_rate": 0.00019999579448647304, - "loss": 46.0, - "step": 38192 - }, - { - "epoch": 2.92012156660359, - "grad_norm": 0.0019532095175236464, - "learning_rate": 0.00019999579426618392, - "loss": 46.0, - "step": 38193 - }, - { - "epoch": 2.9201980235869796, - "grad_norm": 0.005382715258747339, - "learning_rate": 0.00019999579404588906, - "loss": 46.0, - "step": 38194 - }, - { - "epoch": 2.9202744805703693, - "grad_norm": 0.003931119572371244, - "learning_rate": 0.00019999579382558842, - "loss": 46.0, - "step": 38195 - }, - { - "epoch": 2.920350937553759, - "grad_norm": 0.0032313985284417868, - "learning_rate": 0.00019999579360528203, - "loss": 46.0, - "step": 38196 - }, - { - "epoch": 2.9204273945371484, - "grad_norm": 0.0013780139852315187, - "learning_rate": 0.00019999579338496982, - "loss": 46.0, - "step": 38197 - }, - { - "epoch": 2.920503851520538, - "grad_norm": 0.001275938586331904, - "learning_rate": 0.0001999957931646519, - "loss": 46.0, - "step": 38198 - }, - { - "epoch": 2.920580308503928, - "grad_norm": 0.0020545879378914833, - "learning_rate": 0.00019999579294432818, - "loss": 46.0, - "step": 38199 - }, - { - "epoch": 2.9206567654873177, - "grad_norm": 0.002381442114710808, - "learning_rate": 0.00019999579272399873, - "loss": 46.0, - "step": 38200 - }, - { - "epoch": 2.9207332224707074, - "grad_norm": 0.001282895216718316, - "learning_rate": 0.00019999579250366348, - "loss": 46.0, - "step": 38201 - }, - { - "epoch": 2.920809679454097, - "grad_norm": 0.0022798131685703993, - "learning_rate": 0.00019999579228332243, - "loss": 46.0, - "step": 38202 - }, - { - "epoch": 2.920886136437487, - "grad_norm": 0.0041446988470852375, - "learning_rate": 0.00019999579206297569, - "loss": 46.0, - "step": 38203 - }, - { - "epoch": 2.9209625934208767, - "grad_norm": 0.0007441206835210323, - "learning_rate": 0.00019999579184262309, - "loss": 46.0, - "step": 38204 - }, - { - "epoch": 2.921039050404266, - "grad_norm": 0.0034983546938747168, - "learning_rate": 0.0001999957916222648, - "loss": 46.0, - "step": 38205 - }, - { - "epoch": 2.921115507387656, - "grad_norm": 0.0018707457929849625, - "learning_rate": 0.00019999579140190068, - "loss": 46.0, - "step": 38206 - }, - { - "epoch": 2.9211919643710456, - "grad_norm": 0.0012298153014853597, - "learning_rate": 0.0001999957911815308, - "loss": 46.0, - "step": 38207 - }, - { - "epoch": 2.9212684213544353, - "grad_norm": 0.0008012565667741001, - "learning_rate": 0.00019999579096115517, - "loss": 46.0, - "step": 38208 - }, - { - "epoch": 2.921344878337825, - "grad_norm": 0.0008982726722024381, - "learning_rate": 0.00019999579074077376, - "loss": 46.0, - "step": 38209 - }, - { - "epoch": 2.921421335321215, - "grad_norm": 0.004667261149734259, - "learning_rate": 0.0001999957905203866, - "loss": 46.0, - "step": 38210 - }, - { - "epoch": 2.9214977923046046, - "grad_norm": 0.001954728504642844, - "learning_rate": 0.00019999579029999367, - "loss": 46.0, - "step": 38211 - }, - { - "epoch": 2.9215742492879944, - "grad_norm": 0.0027920694556087255, - "learning_rate": 0.00019999579007959497, - "loss": 46.0, - "step": 38212 - }, - { - "epoch": 2.921650706271384, - "grad_norm": 0.006857927888631821, - "learning_rate": 0.00019999578985919046, - "loss": 46.0, - "step": 38213 - }, - { - "epoch": 2.921727163254774, - "grad_norm": 0.0008770758286118507, - "learning_rate": 0.00019999578963878024, - "loss": 46.0, - "step": 38214 - }, - { - "epoch": 2.9218036202381636, - "grad_norm": 0.003990574274212122, - "learning_rate": 0.0001999957894183642, - "loss": 46.0, - "step": 38215 - }, - { - "epoch": 2.9218800772215534, - "grad_norm": 0.0028714665677398443, - "learning_rate": 0.00019999578919794244, - "loss": 46.0, - "step": 38216 - }, - { - "epoch": 2.921956534204943, - "grad_norm": 0.0032724132761359215, - "learning_rate": 0.00019999578897751484, - "loss": 46.0, - "step": 38217 - }, - { - "epoch": 2.922032991188333, - "grad_norm": 0.000939900754019618, - "learning_rate": 0.00019999578875708153, - "loss": 46.0, - "step": 38218 - }, - { - "epoch": 2.9221094481717222, - "grad_norm": 0.0038132353220134974, - "learning_rate": 0.00019999578853664246, - "loss": 46.0, - "step": 38219 - }, - { - "epoch": 2.922185905155112, - "grad_norm": 0.0015964631456881762, - "learning_rate": 0.0001999957883161976, - "loss": 46.0, - "step": 38220 - }, - { - "epoch": 2.9222623621385018, - "grad_norm": 0.0014915929641574621, - "learning_rate": 0.00019999578809574696, - "loss": 46.0, - "step": 38221 - }, - { - "epoch": 2.9223388191218915, - "grad_norm": 0.0050577991642057896, - "learning_rate": 0.00019999578787529055, - "loss": 46.0, - "step": 38222 - }, - { - "epoch": 2.9224152761052813, - "grad_norm": 0.003150764387100935, - "learning_rate": 0.0001999957876548284, - "loss": 46.0, - "step": 38223 - }, - { - "epoch": 2.922491733088671, - "grad_norm": 0.002196155721321702, - "learning_rate": 0.00019999578743436046, - "loss": 46.0, - "step": 38224 - }, - { - "epoch": 2.922568190072061, - "grad_norm": 0.002711218548938632, - "learning_rate": 0.00019999578721388673, - "loss": 46.0, - "step": 38225 - }, - { - "epoch": 2.9226446470554506, - "grad_norm": 0.0012895666295662522, - "learning_rate": 0.00019999578699340728, - "loss": 46.0, - "step": 38226 - }, - { - "epoch": 2.92272110403884, - "grad_norm": 0.003579151351004839, - "learning_rate": 0.000199995786772922, - "loss": 46.0, - "step": 38227 - }, - { - "epoch": 2.9227975610222297, - "grad_norm": 0.0019901602063328028, - "learning_rate": 0.00019999578655243097, - "loss": 46.0, - "step": 38228 - }, - { - "epoch": 2.9228740180056194, - "grad_norm": 0.0013206868898123503, - "learning_rate": 0.0001999957863319342, - "loss": 46.0, - "step": 38229 - }, - { - "epoch": 2.922950474989009, - "grad_norm": 0.0025013729464262724, - "learning_rate": 0.00019999578611143163, - "loss": 46.0, - "step": 38230 - }, - { - "epoch": 2.923026931972399, - "grad_norm": 0.004589189775288105, - "learning_rate": 0.00019999578589092331, - "loss": 46.0, - "step": 38231 - }, - { - "epoch": 2.9231033889557887, - "grad_norm": 0.004183898214250803, - "learning_rate": 0.00019999578567040922, - "loss": 46.0, - "step": 38232 - }, - { - "epoch": 2.9231798459391785, - "grad_norm": 0.00264453305862844, - "learning_rate": 0.00019999578544988936, - "loss": 46.0, - "step": 38233 - }, - { - "epoch": 2.923256302922568, - "grad_norm": 0.0007998436340130866, - "learning_rate": 0.00019999578522936372, - "loss": 46.0, - "step": 38234 - }, - { - "epoch": 2.923332759905958, - "grad_norm": 0.0016832256224006414, - "learning_rate": 0.0001999957850088323, - "loss": 46.0, - "step": 38235 - }, - { - "epoch": 2.9234092168893477, - "grad_norm": 0.0012537117581814528, - "learning_rate": 0.00019999578478829515, - "loss": 46.0, - "step": 38236 - }, - { - "epoch": 2.9234856738727375, - "grad_norm": 0.0024934527464210987, - "learning_rate": 0.00019999578456775222, - "loss": 46.0, - "step": 38237 - }, - { - "epoch": 2.9235621308561273, - "grad_norm": 0.011158591136336327, - "learning_rate": 0.0001999957843472035, - "loss": 46.0, - "step": 38238 - }, - { - "epoch": 2.923638587839517, - "grad_norm": 0.0014899068046361208, - "learning_rate": 0.000199995784126649, - "loss": 46.0, - "step": 38239 - }, - { - "epoch": 2.923715044822907, - "grad_norm": 0.0007623178535141051, - "learning_rate": 0.00019999578390608876, - "loss": 46.0, - "step": 38240 - }, - { - "epoch": 2.923791501806296, - "grad_norm": 0.0024979696609079838, - "learning_rate": 0.00019999578368552277, - "loss": 46.0, - "step": 38241 - }, - { - "epoch": 2.923867958789686, - "grad_norm": 0.006785134319216013, - "learning_rate": 0.00019999578346495097, - "loss": 46.0, - "step": 38242 - }, - { - "epoch": 2.9239444157730756, - "grad_norm": 0.0008517408859916031, - "learning_rate": 0.00019999578324437342, - "loss": 46.0, - "step": 38243 - }, - { - "epoch": 2.9240208727564654, - "grad_norm": 0.0028591016307473183, - "learning_rate": 0.0001999957830237901, - "loss": 46.0, - "step": 38244 - }, - { - "epoch": 2.924097329739855, - "grad_norm": 0.0007240738486871123, - "learning_rate": 0.000199995782803201, - "loss": 46.0, - "step": 38245 - }, - { - "epoch": 2.924173786723245, - "grad_norm": 0.0021234822925180197, - "learning_rate": 0.00019999578258260613, - "loss": 46.0, - "step": 38246 - }, - { - "epoch": 2.9242502437066347, - "grad_norm": 0.002773014595732093, - "learning_rate": 0.0001999957823620055, - "loss": 46.0, - "step": 38247 - }, - { - "epoch": 2.9243267006900244, - "grad_norm": 0.0006871841032989323, - "learning_rate": 0.0001999957821413991, - "loss": 46.0, - "step": 38248 - }, - { - "epoch": 2.9244031576734137, - "grad_norm": 0.0046259560622274876, - "learning_rate": 0.00019999578192078692, - "loss": 46.0, - "step": 38249 - }, - { - "epoch": 2.9244796146568035, - "grad_norm": 0.005346689838916063, - "learning_rate": 0.00019999578170016896, - "loss": 46.0, - "step": 38250 - }, - { - "epoch": 2.9245560716401933, - "grad_norm": 0.0007818485610187054, - "learning_rate": 0.00019999578147954529, - "loss": 46.0, - "step": 38251 - }, - { - "epoch": 2.924632528623583, - "grad_norm": 0.002096218289807439, - "learning_rate": 0.0001999957812589158, - "loss": 46.0, - "step": 38252 - }, - { - "epoch": 2.924708985606973, - "grad_norm": 0.0013558358186855912, - "learning_rate": 0.00019999578103828053, - "loss": 46.0, - "step": 38253 - }, - { - "epoch": 2.9247854425903625, - "grad_norm": 0.0022412913385778666, - "learning_rate": 0.00019999578081763953, - "loss": 46.0, - "step": 38254 - }, - { - "epoch": 2.9248618995737523, - "grad_norm": 0.002418191870674491, - "learning_rate": 0.00019999578059699276, - "loss": 46.0, - "step": 38255 - }, - { - "epoch": 2.924938356557142, - "grad_norm": 0.005712719634175301, - "learning_rate": 0.0001999957803763402, - "loss": 46.0, - "step": 38256 - }, - { - "epoch": 2.925014813540532, - "grad_norm": 0.0008087372407317162, - "learning_rate": 0.00019999578015568185, - "loss": 46.0, - "step": 38257 - }, - { - "epoch": 2.9250912705239216, - "grad_norm": 0.0019517212640494108, - "learning_rate": 0.00019999577993501776, - "loss": 46.0, - "step": 38258 - }, - { - "epoch": 2.9251677275073114, - "grad_norm": 0.0010789376683533192, - "learning_rate": 0.0001999957797143479, - "loss": 46.0, - "step": 38259 - }, - { - "epoch": 2.925244184490701, - "grad_norm": 0.003405132330954075, - "learning_rate": 0.00019999577949367226, - "loss": 46.0, - "step": 38260 - }, - { - "epoch": 2.925320641474091, - "grad_norm": 0.0007047481485642493, - "learning_rate": 0.00019999577927299087, - "loss": 46.0, - "step": 38261 - }, - { - "epoch": 2.92539709845748, - "grad_norm": 0.001475622644647956, - "learning_rate": 0.00019999577905230372, - "loss": 46.0, - "step": 38262 - }, - { - "epoch": 2.92547355544087, - "grad_norm": 0.00219365069642663, - "learning_rate": 0.00019999577883161076, - "loss": 46.0, - "step": 38263 - }, - { - "epoch": 2.9255500124242597, - "grad_norm": 0.002794957719743252, - "learning_rate": 0.00019999577861091203, - "loss": 46.0, - "step": 38264 - }, - { - "epoch": 2.9256264694076495, - "grad_norm": 0.0022276996169239283, - "learning_rate": 0.00019999577839020755, - "loss": 46.0, - "step": 38265 - }, - { - "epoch": 2.9257029263910392, - "grad_norm": 0.0016185338608920574, - "learning_rate": 0.0001999957781694973, - "loss": 46.0, - "step": 38266 - }, - { - "epoch": 2.925779383374429, - "grad_norm": 0.002163441153243184, - "learning_rate": 0.0001999957779487813, - "loss": 46.0, - "step": 38267 - }, - { - "epoch": 2.9258558403578188, - "grad_norm": 0.0004720076103694737, - "learning_rate": 0.00019999577772805954, - "loss": 46.0, - "step": 38268 - }, - { - "epoch": 2.9259322973412085, - "grad_norm": 0.002807009732350707, - "learning_rate": 0.00019999577750733197, - "loss": 46.0, - "step": 38269 - }, - { - "epoch": 2.9260087543245983, - "grad_norm": 0.004007470794022083, - "learning_rate": 0.00019999577728659865, - "loss": 46.0, - "step": 38270 - }, - { - "epoch": 2.9260852113079876, - "grad_norm": 0.00235898420214653, - "learning_rate": 0.00019999577706585953, - "loss": 46.0, - "step": 38271 - }, - { - "epoch": 2.9261616682913774, - "grad_norm": 0.0013184536946937442, - "learning_rate": 0.00019999577684511467, - "loss": 46.0, - "step": 38272 - }, - { - "epoch": 2.926238125274767, - "grad_norm": 0.0012576011940836906, - "learning_rate": 0.00019999577662436403, - "loss": 46.0, - "step": 38273 - }, - { - "epoch": 2.926314582258157, - "grad_norm": 0.002534965518862009, - "learning_rate": 0.00019999577640360765, - "loss": 46.0, - "step": 38274 - }, - { - "epoch": 2.9263910392415466, - "grad_norm": 0.0013629967579618096, - "learning_rate": 0.0001999957761828455, - "loss": 46.0, - "step": 38275 - }, - { - "epoch": 2.9264674962249364, - "grad_norm": 0.003923827316612005, - "learning_rate": 0.00019999577596207754, - "loss": 46.0, - "step": 38276 - }, - { - "epoch": 2.926543953208326, - "grad_norm": 0.002120506949722767, - "learning_rate": 0.00019999577574130383, - "loss": 46.0, - "step": 38277 - }, - { - "epoch": 2.926620410191716, - "grad_norm": 0.0011064867721870542, - "learning_rate": 0.00019999577552052436, - "loss": 46.0, - "step": 38278 - }, - { - "epoch": 2.9266968671751057, - "grad_norm": 0.0023922501131892204, - "learning_rate": 0.0001999957752997391, - "loss": 46.0, - "step": 38279 - }, - { - "epoch": 2.9267733241584954, - "grad_norm": 0.0006455389666371047, - "learning_rate": 0.00019999577507894808, - "loss": 46.0, - "step": 38280 - }, - { - "epoch": 2.926849781141885, - "grad_norm": 0.0068582636304199696, - "learning_rate": 0.00019999577485815131, - "loss": 46.0, - "step": 38281 - }, - { - "epoch": 2.926926238125275, - "grad_norm": 0.002149442443624139, - "learning_rate": 0.00019999577463734874, - "loss": 46.0, - "step": 38282 - }, - { - "epoch": 2.9270026951086647, - "grad_norm": 0.0010842869523912668, - "learning_rate": 0.00019999577441654043, - "loss": 46.0, - "step": 38283 - }, - { - "epoch": 2.927079152092054, - "grad_norm": 0.001289301086217165, - "learning_rate": 0.00019999577419572634, - "loss": 46.0, - "step": 38284 - }, - { - "epoch": 2.927155609075444, - "grad_norm": 0.0013966499827802181, - "learning_rate": 0.00019999577397490645, - "loss": 46.0, - "step": 38285 - }, - { - "epoch": 2.9272320660588336, - "grad_norm": 0.0028955533634871244, - "learning_rate": 0.00019999577375408084, - "loss": 46.0, - "step": 38286 - }, - { - "epoch": 2.9273085230422233, - "grad_norm": 0.0013165291165933013, - "learning_rate": 0.00019999577353324943, - "loss": 46.0, - "step": 38287 - }, - { - "epoch": 2.927384980025613, - "grad_norm": 0.0037420731969177723, - "learning_rate": 0.00019999577331241227, - "loss": 46.0, - "step": 38288 - }, - { - "epoch": 2.927461437009003, - "grad_norm": 0.001261544763110578, - "learning_rate": 0.00019999577309156932, - "loss": 46.0, - "step": 38289 - }, - { - "epoch": 2.9275378939923926, - "grad_norm": 0.0035089347511529922, - "learning_rate": 0.0001999957728707206, - "loss": 46.0, - "step": 38290 - }, - { - "epoch": 2.9276143509757824, - "grad_norm": 0.002187797799706459, - "learning_rate": 0.00019999577264986614, - "loss": 46.0, - "step": 38291 - }, - { - "epoch": 2.927690807959172, - "grad_norm": 0.001236019772477448, - "learning_rate": 0.0001999957724290059, - "loss": 46.0, - "step": 38292 - }, - { - "epoch": 2.9277672649425615, - "grad_norm": 0.0028971703723073006, - "learning_rate": 0.00019999577220813986, - "loss": 46.0, - "step": 38293 - }, - { - "epoch": 2.927843721925951, - "grad_norm": 0.0006433755625039339, - "learning_rate": 0.0001999957719872681, - "loss": 46.0, - "step": 38294 - }, - { - "epoch": 2.927920178909341, - "grad_norm": 0.0025660835672169924, - "learning_rate": 0.00019999577176639053, - "loss": 46.0, - "step": 38295 - }, - { - "epoch": 2.9279966358927307, - "grad_norm": 0.0071014766581356525, - "learning_rate": 0.00019999577154550718, - "loss": 46.0, - "step": 38296 - }, - { - "epoch": 2.9280730928761205, - "grad_norm": 0.0021674518939107656, - "learning_rate": 0.00019999577132461812, - "loss": 46.0, - "step": 38297 - }, - { - "epoch": 2.9281495498595103, - "grad_norm": 0.002416769741103053, - "learning_rate": 0.00019999577110372326, - "loss": 46.0, - "step": 38298 - }, - { - "epoch": 2.9282260068429, - "grad_norm": 0.0031663712579756975, - "learning_rate": 0.00019999577088282262, - "loss": 46.0, - "step": 38299 - }, - { - "epoch": 2.92830246382629, - "grad_norm": 0.003932499792426825, - "learning_rate": 0.0001999957706619162, - "loss": 46.0, - "step": 38300 - }, - { - "epoch": 2.9283789208096795, - "grad_norm": 0.0038557236548513174, - "learning_rate": 0.00019999577044100403, - "loss": 46.0, - "step": 38301 - }, - { - "epoch": 2.9284553777930693, - "grad_norm": 0.0029283526819199324, - "learning_rate": 0.0001999957702200861, - "loss": 46.0, - "step": 38302 - }, - { - "epoch": 2.928531834776459, - "grad_norm": 0.0018550771055743098, - "learning_rate": 0.0001999957699991624, - "loss": 46.0, - "step": 38303 - }, - { - "epoch": 2.928608291759849, - "grad_norm": 0.001281319884583354, - "learning_rate": 0.00019999576977823292, - "loss": 46.0, - "step": 38304 - }, - { - "epoch": 2.9286847487432386, - "grad_norm": 0.002361670834943652, - "learning_rate": 0.00019999576955729767, - "loss": 46.0, - "step": 38305 - }, - { - "epoch": 2.928761205726628, - "grad_norm": 0.0011752928839996457, - "learning_rate": 0.00019999576933635665, - "loss": 46.0, - "step": 38306 - }, - { - "epoch": 2.9288376627100177, - "grad_norm": 0.0019372672541067004, - "learning_rate": 0.00019999576911540988, - "loss": 46.0, - "step": 38307 - }, - { - "epoch": 2.9289141196934074, - "grad_norm": 0.004979288205504417, - "learning_rate": 0.0001999957688944573, - "loss": 46.0, - "step": 38308 - }, - { - "epoch": 2.928990576676797, - "grad_norm": 0.0023889269214123487, - "learning_rate": 0.000199995768673499, - "loss": 46.0, - "step": 38309 - }, - { - "epoch": 2.929067033660187, - "grad_norm": 0.0008140472928062081, - "learning_rate": 0.0001999957684525349, - "loss": 46.0, - "step": 38310 - }, - { - "epoch": 2.9291434906435767, - "grad_norm": 0.0005786191904917359, - "learning_rate": 0.00019999576823156504, - "loss": 46.0, - "step": 38311 - }, - { - "epoch": 2.9292199476269665, - "grad_norm": 0.0016067971009761095, - "learning_rate": 0.0001999957680105894, - "loss": 46.0, - "step": 38312 - }, - { - "epoch": 2.9292964046103562, - "grad_norm": 0.0009492552489973605, - "learning_rate": 0.000199995767789608, - "loss": 46.0, - "step": 38313 - }, - { - "epoch": 2.9293728615937455, - "grad_norm": 0.003731977194547653, - "learning_rate": 0.00019999576756862084, - "loss": 46.0, - "step": 38314 - }, - { - "epoch": 2.9294493185771353, - "grad_norm": 0.0018419703701511025, - "learning_rate": 0.00019999576734762792, - "loss": 46.0, - "step": 38315 - }, - { - "epoch": 2.929525775560525, - "grad_norm": 0.0025374721735715866, - "learning_rate": 0.00019999576712662919, - "loss": 46.0, - "step": 38316 - }, - { - "epoch": 2.929602232543915, - "grad_norm": 0.0015094116097316146, - "learning_rate": 0.0001999957669056247, - "loss": 46.0, - "step": 38317 - }, - { - "epoch": 2.9296786895273046, - "grad_norm": 0.0017964153084903955, - "learning_rate": 0.00019999576668461446, - "loss": 46.0, - "step": 38318 - }, - { - "epoch": 2.9297551465106944, - "grad_norm": 0.0022300092969089746, - "learning_rate": 0.00019999576646359844, - "loss": 46.0, - "step": 38319 - }, - { - "epoch": 2.929831603494084, - "grad_norm": 0.00252294703386724, - "learning_rate": 0.00019999576624257664, - "loss": 46.0, - "step": 38320 - }, - { - "epoch": 2.929908060477474, - "grad_norm": 0.005831143353134394, - "learning_rate": 0.0001999957660215491, - "loss": 46.0, - "step": 38321 - }, - { - "epoch": 2.9299845174608636, - "grad_norm": 0.001255890354514122, - "learning_rate": 0.00019999576580051579, - "loss": 46.0, - "step": 38322 - }, - { - "epoch": 2.9300609744442534, - "grad_norm": 0.0022746275644749403, - "learning_rate": 0.00019999576557947667, - "loss": 46.0, - "step": 38323 - }, - { - "epoch": 2.930137431427643, - "grad_norm": 0.002677883952856064, - "learning_rate": 0.00019999576535843184, - "loss": 46.0, - "step": 38324 - }, - { - "epoch": 2.930213888411033, - "grad_norm": 0.0027914121747016907, - "learning_rate": 0.0001999957651373812, - "loss": 46.0, - "step": 38325 - }, - { - "epoch": 2.9302903453944227, - "grad_norm": 0.004142760299146175, - "learning_rate": 0.0001999957649163248, - "loss": 46.0, - "step": 38326 - }, - { - "epoch": 2.9303668023778124, - "grad_norm": 0.001855485956184566, - "learning_rate": 0.00019999576469526264, - "loss": 46.0, - "step": 38327 - }, - { - "epoch": 2.9304432593612018, - "grad_norm": 0.002708352403715253, - "learning_rate": 0.00019999576447419468, - "loss": 46.0, - "step": 38328 - }, - { - "epoch": 2.9305197163445915, - "grad_norm": 0.0008962448337115347, - "learning_rate": 0.00019999576425312098, - "loss": 46.0, - "step": 38329 - }, - { - "epoch": 2.9305961733279813, - "grad_norm": 0.0022742683067917824, - "learning_rate": 0.00019999576403204153, - "loss": 46.0, - "step": 38330 - }, - { - "epoch": 2.930672630311371, - "grad_norm": 0.003403066424652934, - "learning_rate": 0.00019999576381095625, - "loss": 46.0, - "step": 38331 - }, - { - "epoch": 2.930749087294761, - "grad_norm": 0.001809767447412014, - "learning_rate": 0.00019999576358986526, - "loss": 46.0, - "step": 38332 - }, - { - "epoch": 2.9308255442781506, - "grad_norm": 0.005718843080103397, - "learning_rate": 0.00019999576336876846, - "loss": 46.0, - "step": 38333 - }, - { - "epoch": 2.9309020012615403, - "grad_norm": 0.0017335818847641349, - "learning_rate": 0.0001999957631476659, - "loss": 46.0, - "step": 38334 - }, - { - "epoch": 2.93097845824493, - "grad_norm": 0.0009207432740367949, - "learning_rate": 0.0001999957629265576, - "loss": 46.0, - "step": 38335 - }, - { - "epoch": 2.9310549152283194, - "grad_norm": 0.002403505379334092, - "learning_rate": 0.0001999957627054435, - "loss": 46.0, - "step": 38336 - }, - { - "epoch": 2.931131372211709, - "grad_norm": 0.0031551194842904806, - "learning_rate": 0.0001999957624843236, - "loss": 46.0, - "step": 38337 - }, - { - "epoch": 2.931207829195099, - "grad_norm": 0.0017367189284414053, - "learning_rate": 0.000199995762263198, - "loss": 46.0, - "step": 38338 - }, - { - "epoch": 2.9312842861784887, - "grad_norm": 0.0038173741195350885, - "learning_rate": 0.0001999957620420666, - "loss": 46.0, - "step": 38339 - }, - { - "epoch": 2.9313607431618784, - "grad_norm": 0.004370069596916437, - "learning_rate": 0.00019999576182092943, - "loss": 46.0, - "step": 38340 - }, - { - "epoch": 2.931437200145268, - "grad_norm": 0.00426573446020484, - "learning_rate": 0.0001999957615997865, - "loss": 46.0, - "step": 38341 - }, - { - "epoch": 2.931513657128658, - "grad_norm": 0.0016198699595406651, - "learning_rate": 0.00019999576137863777, - "loss": 46.0, - "step": 38342 - }, - { - "epoch": 2.9315901141120477, - "grad_norm": 0.002621751744300127, - "learning_rate": 0.0001999957611574833, - "loss": 46.0, - "step": 38343 - }, - { - "epoch": 2.9316665710954375, - "grad_norm": 0.0036541041918098927, - "learning_rate": 0.00019999576093632305, - "loss": 46.0, - "step": 38344 - }, - { - "epoch": 2.9317430280788273, - "grad_norm": 0.005183043889701366, - "learning_rate": 0.00019999576071515703, - "loss": 46.0, - "step": 38345 - }, - { - "epoch": 2.931819485062217, - "grad_norm": 0.0007401772891171277, - "learning_rate": 0.00019999576049398526, - "loss": 46.0, - "step": 38346 - }, - { - "epoch": 2.9318959420456068, - "grad_norm": 0.0012930460507050157, - "learning_rate": 0.00019999576027280772, - "loss": 46.0, - "step": 38347 - }, - { - "epoch": 2.9319723990289965, - "grad_norm": 0.0020210538059473038, - "learning_rate": 0.00019999576005162438, - "loss": 46.0, - "step": 38348 - }, - { - "epoch": 2.9320488560123863, - "grad_norm": 0.002794157015159726, - "learning_rate": 0.0001999957598304353, - "loss": 46.0, - "step": 38349 - }, - { - "epoch": 2.9321253129957756, - "grad_norm": 0.0052177696488797665, - "learning_rate": 0.00019999575960924043, - "loss": 46.0, - "step": 38350 - }, - { - "epoch": 2.9322017699791654, - "grad_norm": 0.003247017040848732, - "learning_rate": 0.0001999957593880398, - "loss": 46.0, - "step": 38351 - }, - { - "epoch": 2.932278226962555, - "grad_norm": 0.0011789138661697507, - "learning_rate": 0.00019999575916683339, - "loss": 46.0, - "step": 38352 - }, - { - "epoch": 2.932354683945945, - "grad_norm": 0.0018850021297112107, - "learning_rate": 0.0001999957589456212, - "loss": 46.0, - "step": 38353 - }, - { - "epoch": 2.9324311409293347, - "grad_norm": 0.005937132518738508, - "learning_rate": 0.00019999575872440328, - "loss": 46.0, - "step": 38354 - }, - { - "epoch": 2.9325075979127244, - "grad_norm": 0.004149425774812698, - "learning_rate": 0.00019999575850317958, - "loss": 46.0, - "step": 38355 - }, - { - "epoch": 2.932584054896114, - "grad_norm": 0.0015420602867379785, - "learning_rate": 0.0001999957582819501, - "loss": 46.0, - "step": 38356 - }, - { - "epoch": 2.932660511879504, - "grad_norm": 0.0037689516320824623, - "learning_rate": 0.00019999575806071485, - "loss": 46.0, - "step": 38357 - }, - { - "epoch": 2.9327369688628933, - "grad_norm": 0.006749901920557022, - "learning_rate": 0.00019999575783947383, - "loss": 46.0, - "step": 38358 - }, - { - "epoch": 2.932813425846283, - "grad_norm": 0.0022343432065099478, - "learning_rate": 0.00019999575761822706, - "loss": 46.0, - "step": 38359 - }, - { - "epoch": 2.932889882829673, - "grad_norm": 0.0012883387971669436, - "learning_rate": 0.0001999957573969745, - "loss": 46.0, - "step": 38360 - }, - { - "epoch": 2.9329663398130625, - "grad_norm": 0.002754267305135727, - "learning_rate": 0.00019999575717571618, - "loss": 46.0, - "step": 38361 - }, - { - "epoch": 2.9330427967964523, - "grad_norm": 0.0036985205952078104, - "learning_rate": 0.0001999957569544521, - "loss": 46.0, - "step": 38362 - }, - { - "epoch": 2.933119253779842, - "grad_norm": 0.0024605635553598404, - "learning_rate": 0.00019999575673318223, - "loss": 46.0, - "step": 38363 - }, - { - "epoch": 2.933195710763232, - "grad_norm": 0.004922663327306509, - "learning_rate": 0.0001999957565119066, - "loss": 46.0, - "step": 38364 - }, - { - "epoch": 2.9332721677466216, - "grad_norm": 0.0009371376363560557, - "learning_rate": 0.00019999575629062522, - "loss": 46.0, - "step": 38365 - }, - { - "epoch": 2.9333486247300113, - "grad_norm": 0.0012911951635032892, - "learning_rate": 0.00019999575606933804, - "loss": 46.0, - "step": 38366 - }, - { - "epoch": 2.933425081713401, - "grad_norm": 0.0031505294609814882, - "learning_rate": 0.0001999957558480451, - "loss": 46.0, - "step": 38367 - }, - { - "epoch": 2.933501538696791, - "grad_norm": 0.003799398662522435, - "learning_rate": 0.0001999957556267464, - "loss": 46.0, - "step": 38368 - }, - { - "epoch": 2.9335779956801806, - "grad_norm": 0.001875002752058208, - "learning_rate": 0.0001999957554054419, - "loss": 46.0, - "step": 38369 - }, - { - "epoch": 2.9336544526635704, - "grad_norm": 0.0013595259515568614, - "learning_rate": 0.00019999575518413166, - "loss": 46.0, - "step": 38370 - }, - { - "epoch": 2.93373090964696, - "grad_norm": 0.005625258199870586, - "learning_rate": 0.00019999575496281566, - "loss": 46.0, - "step": 38371 - }, - { - "epoch": 2.9338073666303495, - "grad_norm": 0.0014829987194389105, - "learning_rate": 0.00019999575474149387, - "loss": 46.0, - "step": 38372 - }, - { - "epoch": 2.9338838236137392, - "grad_norm": 0.0019627483561635017, - "learning_rate": 0.00019999575452016633, - "loss": 46.0, - "step": 38373 - }, - { - "epoch": 2.933960280597129, - "grad_norm": 0.0017464442644268274, - "learning_rate": 0.000199995754298833, - "loss": 46.0, - "step": 38374 - }, - { - "epoch": 2.9340367375805187, - "grad_norm": 0.001066903700120747, - "learning_rate": 0.0001999957540774939, - "loss": 46.0, - "step": 38375 - }, - { - "epoch": 2.9341131945639085, - "grad_norm": 0.0005869469023309648, - "learning_rate": 0.00019999575385614904, - "loss": 46.0, - "step": 38376 - }, - { - "epoch": 2.9341896515472983, - "grad_norm": 0.0016888286918401718, - "learning_rate": 0.0001999957536347984, - "loss": 46.0, - "step": 38377 - }, - { - "epoch": 2.934266108530688, - "grad_norm": 0.0027584037743508816, - "learning_rate": 0.000199995753413442, - "loss": 46.0, - "step": 38378 - }, - { - "epoch": 2.934342565514078, - "grad_norm": 0.0047539896331727505, - "learning_rate": 0.00019999575319207984, - "loss": 46.0, - "step": 38379 - }, - { - "epoch": 2.934419022497467, - "grad_norm": 0.0020775427110493183, - "learning_rate": 0.0001999957529707119, - "loss": 46.0, - "step": 38380 - }, - { - "epoch": 2.934495479480857, - "grad_norm": 0.0021214515436440706, - "learning_rate": 0.0001999957527493382, - "loss": 46.0, - "step": 38381 - }, - { - "epoch": 2.9345719364642466, - "grad_norm": 0.0025901715271174908, - "learning_rate": 0.00019999575252795871, - "loss": 46.0, - "step": 38382 - }, - { - "epoch": 2.9346483934476364, - "grad_norm": 0.0033463803119957447, - "learning_rate": 0.00019999575230657347, - "loss": 46.0, - "step": 38383 - }, - { - "epoch": 2.934724850431026, - "grad_norm": 0.003966361749917269, - "learning_rate": 0.00019999575208518247, - "loss": 46.0, - "step": 38384 - }, - { - "epoch": 2.934801307414416, - "grad_norm": 0.0012724873377010226, - "learning_rate": 0.00019999575186378568, - "loss": 46.0, - "step": 38385 - }, - { - "epoch": 2.9348777643978057, - "grad_norm": 0.0018246839754283428, - "learning_rate": 0.0001999957516423831, - "loss": 46.0, - "step": 38386 - }, - { - "epoch": 2.9349542213811954, - "grad_norm": 0.0021874033845961094, - "learning_rate": 0.0001999957514209748, - "loss": 46.0, - "step": 38387 - }, - { - "epoch": 2.935030678364585, - "grad_norm": 0.0012727330904453993, - "learning_rate": 0.0001999957511995607, - "loss": 46.0, - "step": 38388 - }, - { - "epoch": 2.935107135347975, - "grad_norm": 0.0038354459684342146, - "learning_rate": 0.00019999575097814085, - "loss": 46.0, - "step": 38389 - }, - { - "epoch": 2.9351835923313647, - "grad_norm": 0.004747078288346529, - "learning_rate": 0.00019999575075671522, - "loss": 46.0, - "step": 38390 - }, - { - "epoch": 2.9352600493147545, - "grad_norm": 0.0007668832549825311, - "learning_rate": 0.0001999957505352838, - "loss": 46.0, - "step": 38391 - }, - { - "epoch": 2.9353365062981442, - "grad_norm": 0.0034495287109166384, - "learning_rate": 0.00019999575031384666, - "loss": 46.0, - "step": 38392 - }, - { - "epoch": 2.935412963281534, - "grad_norm": 0.0013124882243573666, - "learning_rate": 0.00019999575009240373, - "loss": 46.0, - "step": 38393 - }, - { - "epoch": 2.9354894202649233, - "grad_norm": 0.0016738465055823326, - "learning_rate": 0.000199995749870955, - "loss": 46.0, - "step": 38394 - }, - { - "epoch": 2.935565877248313, - "grad_norm": 0.0022193174809217453, - "learning_rate": 0.00019999574964950053, - "loss": 46.0, - "step": 38395 - }, - { - "epoch": 2.935642334231703, - "grad_norm": 0.002999038202688098, - "learning_rate": 0.00019999574942804028, - "loss": 46.0, - "step": 38396 - }, - { - "epoch": 2.9357187912150926, - "grad_norm": 0.00505444873124361, - "learning_rate": 0.00019999574920657426, - "loss": 46.0, - "step": 38397 - }, - { - "epoch": 2.9357952481984824, - "grad_norm": 0.0033488075714558363, - "learning_rate": 0.0001999957489851025, - "loss": 46.0, - "step": 38398 - }, - { - "epoch": 2.935871705181872, - "grad_norm": 0.0030953786335885525, - "learning_rate": 0.00019999574876362495, - "loss": 46.0, - "step": 38399 - }, - { - "epoch": 2.935948162165262, - "grad_norm": 0.003579887095838785, - "learning_rate": 0.0001999957485421416, - "loss": 46.0, - "step": 38400 - }, - { - "epoch": 2.9360246191486516, - "grad_norm": 0.0011574856471270323, - "learning_rate": 0.00019999574832065253, - "loss": 46.0, - "step": 38401 - }, - { - "epoch": 2.936101076132041, - "grad_norm": 0.0011123744770884514, - "learning_rate": 0.00019999574809915767, - "loss": 46.0, - "step": 38402 - }, - { - "epoch": 2.9361775331154307, - "grad_norm": 0.0013532133307307959, - "learning_rate": 0.00019999574787765703, - "loss": 46.0, - "step": 38403 - }, - { - "epoch": 2.9362539900988205, - "grad_norm": 0.001378371729515493, - "learning_rate": 0.00019999574765615063, - "loss": 46.0, - "step": 38404 - }, - { - "epoch": 2.9363304470822102, - "grad_norm": 0.0020623046439141035, - "learning_rate": 0.00019999574743463847, - "loss": 46.0, - "step": 38405 - }, - { - "epoch": 2.9364069040656, - "grad_norm": 0.0021001333370804787, - "learning_rate": 0.00019999574721312055, - "loss": 46.0, - "step": 38406 - }, - { - "epoch": 2.9364833610489898, - "grad_norm": 0.003137333784252405, - "learning_rate": 0.00019999574699159682, - "loss": 46.0, - "step": 38407 - }, - { - "epoch": 2.9365598180323795, - "grad_norm": 0.005436848849058151, - "learning_rate": 0.00019999574677006737, - "loss": 46.0, - "step": 38408 - }, - { - "epoch": 2.9366362750157693, - "grad_norm": 0.0013779301661998034, - "learning_rate": 0.0001999957465485321, - "loss": 46.0, - "step": 38409 - }, - { - "epoch": 2.936712731999159, - "grad_norm": 0.006826538126915693, - "learning_rate": 0.00019999574632699108, - "loss": 46.0, - "step": 38410 - }, - { - "epoch": 2.936789188982549, - "grad_norm": 0.004575145896524191, - "learning_rate": 0.00019999574610544428, - "loss": 46.0, - "step": 38411 - }, - { - "epoch": 2.9368656459659386, - "grad_norm": 0.002724838675931096, - "learning_rate": 0.00019999574588389175, - "loss": 46.0, - "step": 38412 - }, - { - "epoch": 2.9369421029493283, - "grad_norm": 0.0011032322654500604, - "learning_rate": 0.00019999574566233343, - "loss": 46.0, - "step": 38413 - }, - { - "epoch": 2.937018559932718, - "grad_norm": 0.0007204872090369463, - "learning_rate": 0.00019999574544076935, - "loss": 46.0, - "step": 38414 - }, - { - "epoch": 2.9370950169161074, - "grad_norm": 0.0009414879605174065, - "learning_rate": 0.00019999574521919949, - "loss": 46.0, - "step": 38415 - }, - { - "epoch": 2.937171473899497, - "grad_norm": 0.0019028965616598725, - "learning_rate": 0.00019999574499762385, - "loss": 46.0, - "step": 38416 - }, - { - "epoch": 2.937247930882887, - "grad_norm": 0.0013468819670379162, - "learning_rate": 0.00019999574477604247, - "loss": 46.0, - "step": 38417 - }, - { - "epoch": 2.9373243878662767, - "grad_norm": 0.002717470284551382, - "learning_rate": 0.0001999957445544553, - "loss": 46.0, - "step": 38418 - }, - { - "epoch": 2.9374008448496665, - "grad_norm": 0.002555568004027009, - "learning_rate": 0.00019999574433286234, - "loss": 46.0, - "step": 38419 - }, - { - "epoch": 2.937477301833056, - "grad_norm": 0.0011127963662147522, - "learning_rate": 0.00019999574411126364, - "loss": 46.0, - "step": 38420 - }, - { - "epoch": 2.937553758816446, - "grad_norm": 0.0038804232608526945, - "learning_rate": 0.0001999957438896592, - "loss": 46.0, - "step": 38421 - }, - { - "epoch": 2.9376302157998357, - "grad_norm": 0.0018935680855065584, - "learning_rate": 0.00019999574366804895, - "loss": 46.0, - "step": 38422 - }, - { - "epoch": 2.9377066727832255, - "grad_norm": 0.00648722006008029, - "learning_rate": 0.00019999574344643293, - "loss": 46.0, - "step": 38423 - }, - { - "epoch": 2.937783129766615, - "grad_norm": 0.0017362262587994337, - "learning_rate": 0.00019999574322481113, - "loss": 46.0, - "step": 38424 - }, - { - "epoch": 2.9378595867500046, - "grad_norm": 0.0016211242182180285, - "learning_rate": 0.00019999574300318357, - "loss": 46.0, - "step": 38425 - }, - { - "epoch": 2.9379360437333943, - "grad_norm": 0.004038037732243538, - "learning_rate": 0.00019999574278155025, - "loss": 46.0, - "step": 38426 - }, - { - "epoch": 2.938012500716784, - "grad_norm": 0.0010735326213762164, - "learning_rate": 0.00019999574255991117, - "loss": 46.0, - "step": 38427 - }, - { - "epoch": 2.938088957700174, - "grad_norm": 0.0018506509950384498, - "learning_rate": 0.0001999957423382663, - "loss": 46.0, - "step": 38428 - }, - { - "epoch": 2.9381654146835636, - "grad_norm": 0.0011335625313222408, - "learning_rate": 0.00019999574211661568, - "loss": 46.0, - "step": 38429 - }, - { - "epoch": 2.9382418716669534, - "grad_norm": 0.009934425354003906, - "learning_rate": 0.00019999574189495927, - "loss": 46.0, - "step": 38430 - }, - { - "epoch": 2.938318328650343, - "grad_norm": 0.0014541858108714223, - "learning_rate": 0.00019999574167329712, - "loss": 46.0, - "step": 38431 - }, - { - "epoch": 2.938394785633733, - "grad_norm": 0.002659382065758109, - "learning_rate": 0.0001999957414516292, - "loss": 46.0, - "step": 38432 - }, - { - "epoch": 2.9384712426171227, - "grad_norm": 0.0027723510283976793, - "learning_rate": 0.0001999957412299555, - "loss": 46.0, - "step": 38433 - }, - { - "epoch": 2.9385476996005124, - "grad_norm": 0.001249409862793982, - "learning_rate": 0.000199995741008276, - "loss": 46.0, - "step": 38434 - }, - { - "epoch": 2.938624156583902, - "grad_norm": 0.0012369652977213264, - "learning_rate": 0.00019999574078659075, - "loss": 46.0, - "step": 38435 - }, - { - "epoch": 2.938700613567292, - "grad_norm": 0.0008058793027885258, - "learning_rate": 0.00019999574056489973, - "loss": 46.0, - "step": 38436 - }, - { - "epoch": 2.9387770705506813, - "grad_norm": 0.002899512182921171, - "learning_rate": 0.00019999574034320293, - "loss": 46.0, - "step": 38437 - }, - { - "epoch": 2.938853527534071, - "grad_norm": 0.0044359853491187096, - "learning_rate": 0.0001999957401215004, - "loss": 46.0, - "step": 38438 - }, - { - "epoch": 2.938929984517461, - "grad_norm": 0.003538022981956601, - "learning_rate": 0.00019999573989979208, - "loss": 46.0, - "step": 38439 - }, - { - "epoch": 2.9390064415008506, - "grad_norm": 0.0013276587706059217, - "learning_rate": 0.000199995739678078, - "loss": 46.0, - "step": 38440 - }, - { - "epoch": 2.9390828984842403, - "grad_norm": 0.004713753703981638, - "learning_rate": 0.0001999957394563581, - "loss": 46.0, - "step": 38441 - }, - { - "epoch": 2.93915935546763, - "grad_norm": 0.00706761097535491, - "learning_rate": 0.0001999957392346325, - "loss": 46.0, - "step": 38442 - }, - { - "epoch": 2.93923581245102, - "grad_norm": 0.0010085137328132987, - "learning_rate": 0.00019999573901290113, - "loss": 46.0, - "step": 38443 - }, - { - "epoch": 2.9393122694344096, - "grad_norm": 0.0024339633528143167, - "learning_rate": 0.00019999573879116395, - "loss": 46.0, - "step": 38444 - }, - { - "epoch": 2.939388726417799, - "grad_norm": 0.004985821433365345, - "learning_rate": 0.000199995738569421, - "loss": 46.0, - "step": 38445 - }, - { - "epoch": 2.9394651834011887, - "grad_norm": 0.001614739652723074, - "learning_rate": 0.0001999957383476723, - "loss": 46.0, - "step": 38446 - }, - { - "epoch": 2.9395416403845784, - "grad_norm": 0.0023264975752681494, - "learning_rate": 0.00019999573812591782, - "loss": 46.0, - "step": 38447 - }, - { - "epoch": 2.939618097367968, - "grad_norm": 0.003263306338340044, - "learning_rate": 0.00019999573790415758, - "loss": 46.0, - "step": 38448 - }, - { - "epoch": 2.939694554351358, - "grad_norm": 0.0012822872959077358, - "learning_rate": 0.00019999573768239156, - "loss": 46.0, - "step": 38449 - }, - { - "epoch": 2.9397710113347477, - "grad_norm": 0.003085100557655096, - "learning_rate": 0.0001999957374606198, - "loss": 46.0, - "step": 38450 - }, - { - "epoch": 2.9398474683181375, - "grad_norm": 0.0005631727399304509, - "learning_rate": 0.00019999573723884223, - "loss": 46.0, - "step": 38451 - }, - { - "epoch": 2.9399239253015272, - "grad_norm": 0.00225269328802824, - "learning_rate": 0.0001999957370170589, - "loss": 46.0, - "step": 38452 - }, - { - "epoch": 2.940000382284917, - "grad_norm": 0.006019046064466238, - "learning_rate": 0.00019999573679526983, - "loss": 46.0, - "step": 38453 - }, - { - "epoch": 2.9400768392683068, - "grad_norm": 0.0009961413452401757, - "learning_rate": 0.00019999573657347494, - "loss": 46.0, - "step": 38454 - }, - { - "epoch": 2.9401532962516965, - "grad_norm": 0.00282875495031476, - "learning_rate": 0.00019999573635167434, - "loss": 46.0, - "step": 38455 - }, - { - "epoch": 2.9402297532350863, - "grad_norm": 0.004325693007558584, - "learning_rate": 0.00019999573612986793, - "loss": 46.0, - "step": 38456 - }, - { - "epoch": 2.940306210218476, - "grad_norm": 0.0009420639835298061, - "learning_rate": 0.00019999573590805575, - "loss": 46.0, - "step": 38457 - }, - { - "epoch": 2.940382667201866, - "grad_norm": 0.0041830395348370075, - "learning_rate": 0.00019999573568623783, - "loss": 46.0, - "step": 38458 - }, - { - "epoch": 2.940459124185255, - "grad_norm": 0.0032734915148466825, - "learning_rate": 0.00019999573546441413, - "loss": 46.0, - "step": 38459 - }, - { - "epoch": 2.940535581168645, - "grad_norm": 0.002448160434141755, - "learning_rate": 0.00019999573524258466, - "loss": 46.0, - "step": 38460 - }, - { - "epoch": 2.9406120381520346, - "grad_norm": 0.0012298934161663055, - "learning_rate": 0.0001999957350207494, - "loss": 46.0, - "step": 38461 - }, - { - "epoch": 2.9406884951354244, - "grad_norm": 0.0036155146081000566, - "learning_rate": 0.00019999573479890842, - "loss": 46.0, - "step": 38462 - }, - { - "epoch": 2.940764952118814, - "grad_norm": 0.0026368515100330114, - "learning_rate": 0.0001999957345770616, - "loss": 46.0, - "step": 38463 - }, - { - "epoch": 2.940841409102204, - "grad_norm": 0.0007455009035766125, - "learning_rate": 0.00019999573435520906, - "loss": 46.0, - "step": 38464 - }, - { - "epoch": 2.9409178660855937, - "grad_norm": 0.004952927120029926, - "learning_rate": 0.00019999573413335075, - "loss": 46.0, - "step": 38465 - }, - { - "epoch": 2.9409943230689835, - "grad_norm": 0.002793573774397373, - "learning_rate": 0.00019999573391148664, - "loss": 46.0, - "step": 38466 - }, - { - "epoch": 2.9410707800523728, - "grad_norm": 0.002384437248110771, - "learning_rate": 0.0001999957336896168, - "loss": 46.0, - "step": 38467 - }, - { - "epoch": 2.9411472370357625, - "grad_norm": 0.0015376788796857, - "learning_rate": 0.00019999573346774117, - "loss": 46.0, - "step": 38468 - }, - { - "epoch": 2.9412236940191523, - "grad_norm": 0.0030045057646930218, - "learning_rate": 0.00019999573324585977, - "loss": 46.0, - "step": 38469 - }, - { - "epoch": 2.941300151002542, - "grad_norm": 0.002283738926053047, - "learning_rate": 0.00019999573302397262, - "loss": 46.0, - "step": 38470 - }, - { - "epoch": 2.941376607985932, - "grad_norm": 0.0025652190670371056, - "learning_rate": 0.00019999573280207966, - "loss": 46.0, - "step": 38471 - }, - { - "epoch": 2.9414530649693216, - "grad_norm": 0.0009685687837190926, - "learning_rate": 0.00019999573258018096, - "loss": 46.0, - "step": 38472 - }, - { - "epoch": 2.9415295219527113, - "grad_norm": 0.0029314816929399967, - "learning_rate": 0.0001999957323582765, - "loss": 46.0, - "step": 38473 - }, - { - "epoch": 2.941605978936101, - "grad_norm": 0.002371280686929822, - "learning_rate": 0.00019999573213636625, - "loss": 46.0, - "step": 38474 - }, - { - "epoch": 2.941682435919491, - "grad_norm": 0.0008238262962549925, - "learning_rate": 0.00019999573191445023, - "loss": 46.0, - "step": 38475 - }, - { - "epoch": 2.9417588929028806, - "grad_norm": 0.0011935480870306492, - "learning_rate": 0.00019999573169252846, - "loss": 46.0, - "step": 38476 - }, - { - "epoch": 2.9418353498862704, - "grad_norm": 0.0033864155411720276, - "learning_rate": 0.0001999957314706009, - "loss": 46.0, - "step": 38477 - }, - { - "epoch": 2.94191180686966, - "grad_norm": 0.0015145617071539164, - "learning_rate": 0.00019999573124866756, - "loss": 46.0, - "step": 38478 - }, - { - "epoch": 2.94198826385305, - "grad_norm": 0.0013933308655396104, - "learning_rate": 0.00019999573102672848, - "loss": 46.0, - "step": 38479 - }, - { - "epoch": 2.9420647208364397, - "grad_norm": 0.003730396507307887, - "learning_rate": 0.00019999573080478364, - "loss": 46.0, - "step": 38480 - }, - { - "epoch": 2.942141177819829, - "grad_norm": 0.008588864468038082, - "learning_rate": 0.000199995730582833, - "loss": 46.0, - "step": 38481 - }, - { - "epoch": 2.9422176348032187, - "grad_norm": 0.0013294616946950555, - "learning_rate": 0.0001999957303608766, - "loss": 46.0, - "step": 38482 - }, - { - "epoch": 2.9422940917866085, - "grad_norm": 0.001995525322854519, - "learning_rate": 0.00019999573013891443, - "loss": 46.0, - "step": 38483 - }, - { - "epoch": 2.9423705487699983, - "grad_norm": 0.0013809970114380121, - "learning_rate": 0.0001999957299169465, - "loss": 46.0, - "step": 38484 - }, - { - "epoch": 2.942447005753388, - "grad_norm": 0.0013118662172928452, - "learning_rate": 0.0001999957296949728, - "loss": 46.0, - "step": 38485 - }, - { - "epoch": 2.942523462736778, - "grad_norm": 0.0026405975222587585, - "learning_rate": 0.0001999957294729933, - "loss": 46.0, - "step": 38486 - }, - { - "epoch": 2.9425999197201675, - "grad_norm": 0.003563867649063468, - "learning_rate": 0.0001999957292510081, - "loss": 46.0, - "step": 38487 - }, - { - "epoch": 2.9426763767035573, - "grad_norm": 0.0019280824344605207, - "learning_rate": 0.00019999572902901707, - "loss": 46.0, - "step": 38488 - }, - { - "epoch": 2.9427528336869466, - "grad_norm": 0.005512777250260115, - "learning_rate": 0.00019999572880702028, - "loss": 46.0, - "step": 38489 - }, - { - "epoch": 2.9428292906703364, - "grad_norm": 0.000541763671208173, - "learning_rate": 0.00019999572858501774, - "loss": 46.0, - "step": 38490 - }, - { - "epoch": 2.942905747653726, - "grad_norm": 0.0011167492484673858, - "learning_rate": 0.0001999957283630094, - "loss": 46.0, - "step": 38491 - }, - { - "epoch": 2.942982204637116, - "grad_norm": 0.004400759469717741, - "learning_rate": 0.00019999572814099535, - "loss": 46.0, - "step": 38492 - }, - { - "epoch": 2.9430586616205057, - "grad_norm": 0.0036884199362248182, - "learning_rate": 0.00019999572791897546, - "loss": 46.0, - "step": 38493 - }, - { - "epoch": 2.9431351186038954, - "grad_norm": 0.003917299211025238, - "learning_rate": 0.00019999572769694983, - "loss": 46.0, - "step": 38494 - }, - { - "epoch": 2.943211575587285, - "grad_norm": 0.0018146766815334558, - "learning_rate": 0.00019999572747491845, - "loss": 46.0, - "step": 38495 - }, - { - "epoch": 2.943288032570675, - "grad_norm": 0.0054632979445159435, - "learning_rate": 0.00019999572725288127, - "loss": 46.0, - "step": 38496 - }, - { - "epoch": 2.9433644895540647, - "grad_norm": 0.0018193337600678205, - "learning_rate": 0.00019999572703083835, - "loss": 46.0, - "step": 38497 - }, - { - "epoch": 2.9434409465374545, - "grad_norm": 0.0013264549197629094, - "learning_rate": 0.00019999572680878965, - "loss": 46.0, - "step": 38498 - }, - { - "epoch": 2.9435174035208442, - "grad_norm": 0.0009816802339628339, - "learning_rate": 0.00019999572658673515, - "loss": 46.0, - "step": 38499 - }, - { - "epoch": 2.943593860504234, - "grad_norm": 0.0027608880773186684, - "learning_rate": 0.0001999957263646749, - "loss": 46.0, - "step": 38500 - }, - { - "epoch": 2.9436703174876238, - "grad_norm": 0.0017870917217805982, - "learning_rate": 0.00019999572614260892, - "loss": 46.0, - "step": 38501 - }, - { - "epoch": 2.9437467744710135, - "grad_norm": 0.0017494626808911562, - "learning_rate": 0.00019999572592053713, - "loss": 46.0, - "step": 38502 - }, - { - "epoch": 2.943823231454403, - "grad_norm": 0.0010061635402962565, - "learning_rate": 0.00019999572569845956, - "loss": 46.0, - "step": 38503 - }, - { - "epoch": 2.9438996884377926, - "grad_norm": 0.0007403005729429424, - "learning_rate": 0.00019999572547637625, - "loss": 46.0, - "step": 38504 - }, - { - "epoch": 2.9439761454211824, - "grad_norm": 0.0011278523597866297, - "learning_rate": 0.0001999957252542872, - "loss": 46.0, - "step": 38505 - }, - { - "epoch": 2.944052602404572, - "grad_norm": 0.0018596234731376171, - "learning_rate": 0.0001999957250321923, - "loss": 46.0, - "step": 38506 - }, - { - "epoch": 2.944129059387962, - "grad_norm": 0.0012644259259104729, - "learning_rate": 0.0001999957248100917, - "loss": 46.0, - "step": 38507 - }, - { - "epoch": 2.9442055163713516, - "grad_norm": 0.00401754816994071, - "learning_rate": 0.00019999572458798527, - "loss": 46.0, - "step": 38508 - }, - { - "epoch": 2.9442819733547414, - "grad_norm": 0.0028008895460516214, - "learning_rate": 0.00019999572436587312, - "loss": 46.0, - "step": 38509 - }, - { - "epoch": 2.944358430338131, - "grad_norm": 0.004756350070238113, - "learning_rate": 0.0001999957241437552, - "loss": 46.0, - "step": 38510 - }, - { - "epoch": 2.9444348873215205, - "grad_norm": 0.002242323709651828, - "learning_rate": 0.00019999572392163147, - "loss": 46.0, - "step": 38511 - }, - { - "epoch": 2.9445113443049102, - "grad_norm": 0.0017678787698969245, - "learning_rate": 0.000199995723699502, - "loss": 46.0, - "step": 38512 - }, - { - "epoch": 2.9445878012883, - "grad_norm": 0.0014639116125181317, - "learning_rate": 0.00019999572347736676, - "loss": 46.0, - "step": 38513 - }, - { - "epoch": 2.9446642582716898, - "grad_norm": 0.001688948250375688, - "learning_rate": 0.00019999572325522577, - "loss": 46.0, - "step": 38514 - }, - { - "epoch": 2.9447407152550795, - "grad_norm": 0.003764891065657139, - "learning_rate": 0.00019999572303307898, - "loss": 46.0, - "step": 38515 - }, - { - "epoch": 2.9448171722384693, - "grad_norm": 0.0014119968982413411, - "learning_rate": 0.0001999957228109264, - "loss": 46.0, - "step": 38516 - }, - { - "epoch": 2.944893629221859, - "grad_norm": 0.0012477160198614001, - "learning_rate": 0.0001999957225887681, - "loss": 46.0, - "step": 38517 - }, - { - "epoch": 2.944970086205249, - "grad_norm": 0.0018411694327369332, - "learning_rate": 0.00019999572236660402, - "loss": 46.0, - "step": 38518 - }, - { - "epoch": 2.9450465431886386, - "grad_norm": 0.001350630191154778, - "learning_rate": 0.00019999572214443416, - "loss": 46.0, - "step": 38519 - }, - { - "epoch": 2.9451230001720283, - "grad_norm": 0.0016800719313323498, - "learning_rate": 0.00019999572192225853, - "loss": 46.0, - "step": 38520 - }, - { - "epoch": 2.945199457155418, - "grad_norm": 0.0019967847038060427, - "learning_rate": 0.00019999572170007715, - "loss": 46.0, - "step": 38521 - }, - { - "epoch": 2.945275914138808, - "grad_norm": 0.0014051940524950624, - "learning_rate": 0.00019999572147788998, - "loss": 46.0, - "step": 38522 - }, - { - "epoch": 2.9453523711221976, - "grad_norm": 0.002312208293005824, - "learning_rate": 0.00019999572125569705, - "loss": 46.0, - "step": 38523 - }, - { - "epoch": 2.9454288281055874, - "grad_norm": 0.002284690272063017, - "learning_rate": 0.00019999572103349835, - "loss": 46.0, - "step": 38524 - }, - { - "epoch": 2.9455052850889767, - "grad_norm": 0.00094099051784724, - "learning_rate": 0.00019999572081129388, - "loss": 46.0, - "step": 38525 - }, - { - "epoch": 2.9455817420723664, - "grad_norm": 0.0034788523335009813, - "learning_rate": 0.0001999957205890836, - "loss": 46.0, - "step": 38526 - }, - { - "epoch": 2.945658199055756, - "grad_norm": 0.004224858712404966, - "learning_rate": 0.0001999957203668676, - "loss": 46.0, - "step": 38527 - }, - { - "epoch": 2.945734656039146, - "grad_norm": 0.0017389258136972785, - "learning_rate": 0.00019999572014464583, - "loss": 46.0, - "step": 38528 - }, - { - "epoch": 2.9458111130225357, - "grad_norm": 0.00484719080850482, - "learning_rate": 0.00019999571992241827, - "loss": 46.0, - "step": 38529 - }, - { - "epoch": 2.9458875700059255, - "grad_norm": 0.002473007421940565, - "learning_rate": 0.00019999571970018496, - "loss": 46.0, - "step": 38530 - }, - { - "epoch": 2.9459640269893153, - "grad_norm": 0.0019858009181916714, - "learning_rate": 0.00019999571947794587, - "loss": 46.0, - "step": 38531 - }, - { - "epoch": 2.946040483972705, - "grad_norm": 0.0008052597404457629, - "learning_rate": 0.00019999571925570102, - "loss": 46.0, - "step": 38532 - }, - { - "epoch": 2.9461169409560943, - "grad_norm": 0.0025038241874426603, - "learning_rate": 0.00019999571903345041, - "loss": 46.0, - "step": 38533 - }, - { - "epoch": 2.946193397939484, - "grad_norm": 0.002420000033453107, - "learning_rate": 0.000199995718811194, - "loss": 46.0, - "step": 38534 - }, - { - "epoch": 2.946269854922874, - "grad_norm": 0.001423690584488213, - "learning_rate": 0.00019999571858893183, - "loss": 46.0, - "step": 38535 - }, - { - "epoch": 2.9463463119062636, - "grad_norm": 0.0032743343617767096, - "learning_rate": 0.0001999957183666639, - "loss": 46.0, - "step": 38536 - }, - { - "epoch": 2.9464227688896534, - "grad_norm": 0.0007264400483109057, - "learning_rate": 0.00019999571814439019, - "loss": 46.0, - "step": 38537 - }, - { - "epoch": 2.946499225873043, - "grad_norm": 0.0017941446276381612, - "learning_rate": 0.00019999571792211072, - "loss": 46.0, - "step": 38538 - }, - { - "epoch": 2.946575682856433, - "grad_norm": 0.0021681897342205048, - "learning_rate": 0.00019999571769982547, - "loss": 46.0, - "step": 38539 - }, - { - "epoch": 2.9466521398398227, - "grad_norm": 0.00389724038541317, - "learning_rate": 0.00019999571747753448, - "loss": 46.0, - "step": 38540 - }, - { - "epoch": 2.9467285968232124, - "grad_norm": 0.004298449959605932, - "learning_rate": 0.0001999957172552377, - "loss": 46.0, - "step": 38541 - }, - { - "epoch": 2.946805053806602, - "grad_norm": 0.0025112037546932697, - "learning_rate": 0.00019999571703293513, - "loss": 46.0, - "step": 38542 - }, - { - "epoch": 2.946881510789992, - "grad_norm": 0.0011482355184853077, - "learning_rate": 0.00019999571681062684, - "loss": 46.0, - "step": 38543 - }, - { - "epoch": 2.9469579677733817, - "grad_norm": 0.0034956352319568396, - "learning_rate": 0.00019999571658831273, - "loss": 46.0, - "step": 38544 - }, - { - "epoch": 2.9470344247567715, - "grad_norm": 0.00196602800861001, - "learning_rate": 0.00019999571636599285, - "loss": 46.0, - "step": 38545 - }, - { - "epoch": 2.947110881740161, - "grad_norm": 0.0028144672978669405, - "learning_rate": 0.00019999571614366722, - "loss": 46.0, - "step": 38546 - }, - { - "epoch": 2.9471873387235505, - "grad_norm": 0.0031060539186000824, - "learning_rate": 0.00019999571592133587, - "loss": 46.0, - "step": 38547 - }, - { - "epoch": 2.9472637957069403, - "grad_norm": 0.0025489693507552147, - "learning_rate": 0.0001999957156989987, - "loss": 46.0, - "step": 38548 - }, - { - "epoch": 2.94734025269033, - "grad_norm": 0.0026762799825519323, - "learning_rate": 0.00019999571547665577, - "loss": 46.0, - "step": 38549 - }, - { - "epoch": 2.94741670967372, - "grad_norm": 0.002294783014804125, - "learning_rate": 0.00019999571525430705, - "loss": 46.0, - "step": 38550 - }, - { - "epoch": 2.9474931666571096, - "grad_norm": 0.0016876523150131106, - "learning_rate": 0.00019999571503195258, - "loss": 46.0, - "step": 38551 - }, - { - "epoch": 2.9475696236404993, - "grad_norm": 0.004489795304834843, - "learning_rate": 0.00019999571480959233, - "loss": 46.0, - "step": 38552 - }, - { - "epoch": 2.947646080623889, - "grad_norm": 0.0026544597931206226, - "learning_rate": 0.00019999571458722632, - "loss": 46.0, - "step": 38553 - }, - { - "epoch": 2.947722537607279, - "grad_norm": 0.0020726302172988653, - "learning_rate": 0.00019999571436485453, - "loss": 46.0, - "step": 38554 - }, - { - "epoch": 2.947798994590668, - "grad_norm": 0.0037922668270766735, - "learning_rate": 0.000199995714142477, - "loss": 46.0, - "step": 38555 - }, - { - "epoch": 2.947875451574058, - "grad_norm": 0.003436857834458351, - "learning_rate": 0.00019999571392009368, - "loss": 46.0, - "step": 38556 - }, - { - "epoch": 2.9479519085574477, - "grad_norm": 0.004491841420531273, - "learning_rate": 0.0001999957136977046, - "loss": 46.0, - "step": 38557 - }, - { - "epoch": 2.9480283655408375, - "grad_norm": 0.0032609403133392334, - "learning_rate": 0.00019999571347530974, - "loss": 46.0, - "step": 38558 - }, - { - "epoch": 2.9481048225242272, - "grad_norm": 0.005314024630934, - "learning_rate": 0.0001999957132529091, - "loss": 46.0, - "step": 38559 - }, - { - "epoch": 2.948181279507617, - "grad_norm": 0.0013576102210208774, - "learning_rate": 0.0001999957130305027, - "loss": 46.0, - "step": 38560 - }, - { - "epoch": 2.9482577364910068, - "grad_norm": 0.002082610735669732, - "learning_rate": 0.00019999571280809056, - "loss": 46.0, - "step": 38561 - }, - { - "epoch": 2.9483341934743965, - "grad_norm": 0.001264725113287568, - "learning_rate": 0.0001999957125856726, - "loss": 46.0, - "step": 38562 - }, - { - "epoch": 2.9484106504577863, - "grad_norm": 0.0018949240911751986, - "learning_rate": 0.0001999957123632489, - "loss": 46.0, - "step": 38563 - }, - { - "epoch": 2.948487107441176, - "grad_norm": 0.0033848145976662636, - "learning_rate": 0.00019999571214081944, - "loss": 46.0, - "step": 38564 - }, - { - "epoch": 2.948563564424566, - "grad_norm": 0.0008456098730675876, - "learning_rate": 0.0001999957119183842, - "loss": 46.0, - "step": 38565 - }, - { - "epoch": 2.9486400214079556, - "grad_norm": 0.003037538379430771, - "learning_rate": 0.00019999571169594318, - "loss": 46.0, - "step": 38566 - }, - { - "epoch": 2.9487164783913453, - "grad_norm": 0.0009184980881400406, - "learning_rate": 0.0001999957114734964, - "loss": 46.0, - "step": 38567 - }, - { - "epoch": 2.9487929353747346, - "grad_norm": 0.001417288207449019, - "learning_rate": 0.00019999571125104386, - "loss": 46.0, - "step": 38568 - }, - { - "epoch": 2.9488693923581244, - "grad_norm": 0.0021814866922795773, - "learning_rate": 0.00019999571102858555, - "loss": 46.0, - "step": 38569 - }, - { - "epoch": 2.948945849341514, - "grad_norm": 0.0022926959209144115, - "learning_rate": 0.00019999571080612144, - "loss": 46.0, - "step": 38570 - }, - { - "epoch": 2.949022306324904, - "grad_norm": 0.003184255911037326, - "learning_rate": 0.00019999571058365158, - "loss": 46.0, - "step": 38571 - }, - { - "epoch": 2.9490987633082937, - "grad_norm": 0.004917323123663664, - "learning_rate": 0.00019999571036117598, - "loss": 46.0, - "step": 38572 - }, - { - "epoch": 2.9491752202916834, - "grad_norm": 0.0021133313421159983, - "learning_rate": 0.00019999571013869458, - "loss": 46.0, - "step": 38573 - }, - { - "epoch": 2.949251677275073, - "grad_norm": 0.0008648965740576386, - "learning_rate": 0.00019999570991620743, - "loss": 46.0, - "step": 38574 - }, - { - "epoch": 2.949328134258463, - "grad_norm": 0.004948663990944624, - "learning_rate": 0.00019999570969371448, - "loss": 46.0, - "step": 38575 - }, - { - "epoch": 2.9494045912418523, - "grad_norm": 0.002718965755775571, - "learning_rate": 0.00019999570947121578, - "loss": 46.0, - "step": 38576 - }, - { - "epoch": 2.949481048225242, - "grad_norm": 0.0030901196878403425, - "learning_rate": 0.0001999957092487113, - "loss": 46.0, - "step": 38577 - }, - { - "epoch": 2.949557505208632, - "grad_norm": 0.0015326032880693674, - "learning_rate": 0.00019999570902620107, - "loss": 46.0, - "step": 38578 - }, - { - "epoch": 2.9496339621920216, - "grad_norm": 0.002147471997886896, - "learning_rate": 0.00019999570880368506, - "loss": 46.0, - "step": 38579 - }, - { - "epoch": 2.9497104191754113, - "grad_norm": 0.0009024632745422423, - "learning_rate": 0.00019999570858116327, - "loss": 46.0, - "step": 38580 - }, - { - "epoch": 2.949786876158801, - "grad_norm": 0.005958493798971176, - "learning_rate": 0.00019999570835863573, - "loss": 46.0, - "step": 38581 - }, - { - "epoch": 2.949863333142191, - "grad_norm": 0.002807225566357374, - "learning_rate": 0.0001999957081361024, - "loss": 46.0, - "step": 38582 - }, - { - "epoch": 2.9499397901255806, - "grad_norm": 0.004070540424436331, - "learning_rate": 0.0001999957079135633, - "loss": 46.0, - "step": 38583 - }, - { - "epoch": 2.9500162471089704, - "grad_norm": 0.0015553616685792804, - "learning_rate": 0.00019999570769101848, - "loss": 46.0, - "step": 38584 - }, - { - "epoch": 2.95009270409236, - "grad_norm": 0.0036861703265458345, - "learning_rate": 0.00019999570746846783, - "loss": 46.0, - "step": 38585 - }, - { - "epoch": 2.95016916107575, - "grad_norm": 0.002063591033220291, - "learning_rate": 0.00019999570724591145, - "loss": 46.0, - "step": 38586 - }, - { - "epoch": 2.9502456180591397, - "grad_norm": 0.0017155060777440667, - "learning_rate": 0.00019999570702334928, - "loss": 46.0, - "step": 38587 - }, - { - "epoch": 2.9503220750425294, - "grad_norm": 0.0027698769699782133, - "learning_rate": 0.00019999570680078135, - "loss": 46.0, - "step": 38588 - }, - { - "epoch": 2.950398532025919, - "grad_norm": 0.0014660131419077516, - "learning_rate": 0.00019999570657820766, - "loss": 46.0, - "step": 38589 - }, - { - "epoch": 2.9504749890093085, - "grad_norm": 0.0031355747487396, - "learning_rate": 0.0001999957063556282, - "loss": 46.0, - "step": 38590 - }, - { - "epoch": 2.9505514459926983, - "grad_norm": 0.0014126296155154705, - "learning_rate": 0.00019999570613304295, - "loss": 46.0, - "step": 38591 - }, - { - "epoch": 2.950627902976088, - "grad_norm": 0.007596690207719803, - "learning_rate": 0.00019999570591045193, - "loss": 46.0, - "step": 38592 - }, - { - "epoch": 2.9507043599594778, - "grad_norm": 0.001907376921735704, - "learning_rate": 0.00019999570568785514, - "loss": 46.0, - "step": 38593 - }, - { - "epoch": 2.9507808169428675, - "grad_norm": 0.0014813941670581698, - "learning_rate": 0.00019999570546525258, - "loss": 46.0, - "step": 38594 - }, - { - "epoch": 2.9508572739262573, - "grad_norm": 0.001786662032827735, - "learning_rate": 0.00019999570524264427, - "loss": 46.0, - "step": 38595 - }, - { - "epoch": 2.950933730909647, - "grad_norm": 0.004319278988987207, - "learning_rate": 0.0001999957050200302, - "loss": 46.0, - "step": 38596 - }, - { - "epoch": 2.951010187893037, - "grad_norm": 0.001897060894407332, - "learning_rate": 0.00019999570479741033, - "loss": 46.0, - "step": 38597 - }, - { - "epoch": 2.951086644876426, - "grad_norm": 0.0012420077109709382, - "learning_rate": 0.0001999957045747847, - "loss": 46.0, - "step": 38598 - }, - { - "epoch": 2.951163101859816, - "grad_norm": 0.0017158155096694827, - "learning_rate": 0.00019999570435215333, - "loss": 46.0, - "step": 38599 - }, - { - "epoch": 2.9512395588432057, - "grad_norm": 0.0032330851536244154, - "learning_rate": 0.00019999570412951615, - "loss": 46.0, - "step": 38600 - }, - { - "epoch": 2.9513160158265954, - "grad_norm": 0.0010303683811798692, - "learning_rate": 0.0001999957039068732, - "loss": 46.0, - "step": 38601 - }, - { - "epoch": 2.951392472809985, - "grad_norm": 0.004474753979593515, - "learning_rate": 0.0001999957036842245, - "loss": 46.0, - "step": 38602 - }, - { - "epoch": 2.951468929793375, - "grad_norm": 0.0023435058537870646, - "learning_rate": 0.00019999570346157004, - "loss": 46.0, - "step": 38603 - }, - { - "epoch": 2.9515453867767647, - "grad_norm": 0.003405747702345252, - "learning_rate": 0.00019999570323890983, - "loss": 46.0, - "step": 38604 - }, - { - "epoch": 2.9516218437601545, - "grad_norm": 0.0022501919884234667, - "learning_rate": 0.0001999957030162438, - "loss": 46.0, - "step": 38605 - }, - { - "epoch": 2.9516983007435442, - "grad_norm": 0.0011424679541960359, - "learning_rate": 0.00019999570279357202, - "loss": 46.0, - "step": 38606 - }, - { - "epoch": 2.951774757726934, - "grad_norm": 0.0010225395672023296, - "learning_rate": 0.00019999570257089449, - "loss": 46.0, - "step": 38607 - }, - { - "epoch": 2.9518512147103237, - "grad_norm": 0.0020619910210371017, - "learning_rate": 0.00019999570234821118, - "loss": 46.0, - "step": 38608 - }, - { - "epoch": 2.9519276716937135, - "grad_norm": 0.0071455081924796104, - "learning_rate": 0.00019999570212552207, - "loss": 46.0, - "step": 38609 - }, - { - "epoch": 2.9520041286771033, - "grad_norm": 0.0005961196729913354, - "learning_rate": 0.00019999570190282721, - "loss": 46.0, - "step": 38610 - }, - { - "epoch": 2.952080585660493, - "grad_norm": 0.0016357485437765718, - "learning_rate": 0.0001999957016801266, - "loss": 46.0, - "step": 38611 - }, - { - "epoch": 2.9521570426438823, - "grad_norm": 0.0024794924538582563, - "learning_rate": 0.0001999957014574202, - "loss": 46.0, - "step": 38612 - }, - { - "epoch": 2.952233499627272, - "grad_norm": 0.004225259646773338, - "learning_rate": 0.00019999570123470804, - "loss": 46.0, - "step": 38613 - }, - { - "epoch": 2.952309956610662, - "grad_norm": 0.0036041198763996363, - "learning_rate": 0.0001999957010119901, - "loss": 46.0, - "step": 38614 - }, - { - "epoch": 2.9523864135940516, - "grad_norm": 0.0029007666744291782, - "learning_rate": 0.00019999570078926642, - "loss": 46.0, - "step": 38615 - }, - { - "epoch": 2.9524628705774414, - "grad_norm": 0.000899720354937017, - "learning_rate": 0.00019999570056653692, - "loss": 46.0, - "step": 38616 - }, - { - "epoch": 2.952539327560831, - "grad_norm": 0.002860851353034377, - "learning_rate": 0.00019999570034380168, - "loss": 46.0, - "step": 38617 - }, - { - "epoch": 2.952615784544221, - "grad_norm": 0.004036292899399996, - "learning_rate": 0.0001999957001210607, - "loss": 46.0, - "step": 38618 - }, - { - "epoch": 2.9526922415276107, - "grad_norm": 0.0024592482950538397, - "learning_rate": 0.0001999956998983139, - "loss": 46.0, - "step": 38619 - }, - { - "epoch": 2.952768698511, - "grad_norm": 0.0029361334163695574, - "learning_rate": 0.00019999569967556134, - "loss": 46.0, - "step": 38620 - }, - { - "epoch": 2.9528451554943898, - "grad_norm": 0.0014306538505479693, - "learning_rate": 0.00019999569945280303, - "loss": 46.0, - "step": 38621 - }, - { - "epoch": 2.9529216124777795, - "grad_norm": 0.004527061711996794, - "learning_rate": 0.00019999569923003895, - "loss": 46.0, - "step": 38622 - }, - { - "epoch": 2.9529980694611693, - "grad_norm": 0.0012273305328562856, - "learning_rate": 0.0001999956990072691, - "loss": 46.0, - "step": 38623 - }, - { - "epoch": 2.953074526444559, - "grad_norm": 0.004408834036439657, - "learning_rate": 0.0001999956987844935, - "loss": 46.0, - "step": 38624 - }, - { - "epoch": 2.953150983427949, - "grad_norm": 0.002043339656665921, - "learning_rate": 0.0001999956985617121, - "loss": 46.0, - "step": 38625 - }, - { - "epoch": 2.9532274404113386, - "grad_norm": 0.002869096351787448, - "learning_rate": 0.00019999569833892492, - "loss": 46.0, - "step": 38626 - }, - { - "epoch": 2.9533038973947283, - "grad_norm": 0.0034318435937166214, - "learning_rate": 0.000199995698116132, - "loss": 46.0, - "step": 38627 - }, - { - "epoch": 2.953380354378118, - "grad_norm": 0.0022060545161366463, - "learning_rate": 0.00019999569789333328, - "loss": 46.0, - "step": 38628 - }, - { - "epoch": 2.953456811361508, - "grad_norm": 0.003766795387491584, - "learning_rate": 0.0001999956976705288, - "loss": 46.0, - "step": 38629 - }, - { - "epoch": 2.9535332683448976, - "grad_norm": 0.0034128769766539335, - "learning_rate": 0.00019999569744771857, - "loss": 46.0, - "step": 38630 - }, - { - "epoch": 2.9536097253282874, - "grad_norm": 0.0008646061760373414, - "learning_rate": 0.00019999569722490258, - "loss": 46.0, - "step": 38631 - }, - { - "epoch": 2.953686182311677, - "grad_norm": 0.0017679266165941954, - "learning_rate": 0.0001999956970020808, - "loss": 46.0, - "step": 38632 - }, - { - "epoch": 2.953762639295067, - "grad_norm": 0.006460228003561497, - "learning_rate": 0.00019999569677925326, - "loss": 46.0, - "step": 38633 - }, - { - "epoch": 2.953839096278456, - "grad_norm": 0.0033067085314542055, - "learning_rate": 0.00019999569655641995, - "loss": 46.0, - "step": 38634 - }, - { - "epoch": 2.953915553261846, - "grad_norm": 0.0007943941163830459, - "learning_rate": 0.00019999569633358084, - "loss": 46.0, - "step": 38635 - }, - { - "epoch": 2.9539920102452357, - "grad_norm": 0.005821706727147102, - "learning_rate": 0.000199995696110736, - "loss": 46.0, - "step": 38636 - }, - { - "epoch": 2.9540684672286255, - "grad_norm": 0.0017797807231545448, - "learning_rate": 0.00019999569588788536, - "loss": 46.0, - "step": 38637 - }, - { - "epoch": 2.9541449242120152, - "grad_norm": 0.001619631191715598, - "learning_rate": 0.00019999569566502899, - "loss": 46.0, - "step": 38638 - }, - { - "epoch": 2.954221381195405, - "grad_norm": 0.004914996214210987, - "learning_rate": 0.0001999956954421668, - "loss": 46.0, - "step": 38639 - }, - { - "epoch": 2.9542978381787948, - "grad_norm": 0.0014280570903792977, - "learning_rate": 0.00019999569521929886, - "loss": 46.0, - "step": 38640 - }, - { - "epoch": 2.9543742951621845, - "grad_norm": 0.004295347724109888, - "learning_rate": 0.0001999956949964252, - "loss": 46.0, - "step": 38641 - }, - { - "epoch": 2.954450752145574, - "grad_norm": 0.002865126123651862, - "learning_rate": 0.0001999956947735457, - "loss": 46.0, - "step": 38642 - }, - { - "epoch": 2.9545272091289636, - "grad_norm": 0.002506297081708908, - "learning_rate": 0.00019999569455066046, - "loss": 46.0, - "step": 38643 - }, - { - "epoch": 2.9546036661123534, - "grad_norm": 0.0028410139493644238, - "learning_rate": 0.00019999569432776945, - "loss": 46.0, - "step": 38644 - }, - { - "epoch": 2.954680123095743, - "grad_norm": 0.0013299144338816404, - "learning_rate": 0.00019999569410487266, - "loss": 46.0, - "step": 38645 - }, - { - "epoch": 2.954756580079133, - "grad_norm": 0.004807759542018175, - "learning_rate": 0.00019999569388197012, - "loss": 46.0, - "step": 38646 - }, - { - "epoch": 2.9548330370625226, - "grad_norm": 0.003323440905660391, - "learning_rate": 0.00019999569365906182, - "loss": 46.0, - "step": 38647 - }, - { - "epoch": 2.9549094940459124, - "grad_norm": 0.00833585113286972, - "learning_rate": 0.00019999569343614773, - "loss": 46.0, - "step": 38648 - }, - { - "epoch": 2.954985951029302, - "grad_norm": 0.0031237269286066294, - "learning_rate": 0.00019999569321322785, - "loss": 46.0, - "step": 38649 - }, - { - "epoch": 2.955062408012692, - "grad_norm": 0.001065508578903973, - "learning_rate": 0.00019999569299030225, - "loss": 46.0, - "step": 38650 - }, - { - "epoch": 2.9551388649960817, - "grad_norm": 0.004416194278746843, - "learning_rate": 0.00019999569276737088, - "loss": 46.0, - "step": 38651 - }, - { - "epoch": 2.9552153219794715, - "grad_norm": 0.0031110697891563177, - "learning_rate": 0.00019999569254443368, - "loss": 46.0, - "step": 38652 - }, - { - "epoch": 2.955291778962861, - "grad_norm": 0.0026335616130381823, - "learning_rate": 0.00019999569232149078, - "loss": 46.0, - "step": 38653 - }, - { - "epoch": 2.955368235946251, - "grad_norm": 0.0008569032652303576, - "learning_rate": 0.00019999569209854206, - "loss": 46.0, - "step": 38654 - }, - { - "epoch": 2.9554446929296407, - "grad_norm": 0.0017517261439934373, - "learning_rate": 0.0001999956918755876, - "loss": 46.0, - "step": 38655 - }, - { - "epoch": 2.95552114991303, - "grad_norm": 0.0053923544473946095, - "learning_rate": 0.00019999569165262738, - "loss": 46.0, - "step": 38656 - }, - { - "epoch": 2.95559760689642, - "grad_norm": 0.0012485375627875328, - "learning_rate": 0.00019999569142966134, - "loss": 46.0, - "step": 38657 - }, - { - "epoch": 2.9556740638798096, - "grad_norm": 0.003949600737541914, - "learning_rate": 0.00019999569120668958, - "loss": 46.0, - "step": 38658 - }, - { - "epoch": 2.9557505208631993, - "grad_norm": 0.0009021171717904508, - "learning_rate": 0.00019999569098371202, - "loss": 46.0, - "step": 38659 - }, - { - "epoch": 2.955826977846589, - "grad_norm": 0.0034618934150785208, - "learning_rate": 0.0001999956907607287, - "loss": 46.0, - "step": 38660 - }, - { - "epoch": 2.955903434829979, - "grad_norm": 0.002640408230945468, - "learning_rate": 0.00019999569053773963, - "loss": 46.0, - "step": 38661 - }, - { - "epoch": 2.9559798918133686, - "grad_norm": 0.0011499692918732762, - "learning_rate": 0.00019999569031474478, - "loss": 46.0, - "step": 38662 - }, - { - "epoch": 2.9560563487967584, - "grad_norm": 0.0034911585971713066, - "learning_rate": 0.00019999569009174415, - "loss": 46.0, - "step": 38663 - }, - { - "epoch": 2.9561328057801477, - "grad_norm": 0.001569836982525885, - "learning_rate": 0.00019999568986873775, - "loss": 46.0, - "step": 38664 - }, - { - "epoch": 2.9562092627635375, - "grad_norm": 0.0017723095370456576, - "learning_rate": 0.0001999956896457256, - "loss": 46.0, - "step": 38665 - }, - { - "epoch": 2.956285719746927, - "grad_norm": 0.0033342435490339994, - "learning_rate": 0.00019999568942270766, - "loss": 46.0, - "step": 38666 - }, - { - "epoch": 2.956362176730317, - "grad_norm": 0.0023418692871928215, - "learning_rate": 0.00019999568919968396, - "loss": 46.0, - "step": 38667 - }, - { - "epoch": 2.9564386337137067, - "grad_norm": 0.0022423886694014072, - "learning_rate": 0.0001999956889766545, - "loss": 46.0, - "step": 38668 - }, - { - "epoch": 2.9565150906970965, - "grad_norm": 0.002424341393634677, - "learning_rate": 0.00019999568875361925, - "loss": 46.0, - "step": 38669 - }, - { - "epoch": 2.9565915476804863, - "grad_norm": 0.00486412039026618, - "learning_rate": 0.00019999568853057824, - "loss": 46.0, - "step": 38670 - }, - { - "epoch": 2.956668004663876, - "grad_norm": 0.002259011147543788, - "learning_rate": 0.00019999568830753148, - "loss": 46.0, - "step": 38671 - }, - { - "epoch": 2.956744461647266, - "grad_norm": 0.0011656327405944467, - "learning_rate": 0.00019999568808447892, - "loss": 46.0, - "step": 38672 - }, - { - "epoch": 2.9568209186306555, - "grad_norm": 0.001422735396772623, - "learning_rate": 0.0001999956878614206, - "loss": 46.0, - "step": 38673 - }, - { - "epoch": 2.9568973756140453, - "grad_norm": 0.003281527431681752, - "learning_rate": 0.00019999568763835653, - "loss": 46.0, - "step": 38674 - }, - { - "epoch": 2.956973832597435, - "grad_norm": 0.005856127943843603, - "learning_rate": 0.00019999568741528668, - "loss": 46.0, - "step": 38675 - }, - { - "epoch": 2.957050289580825, - "grad_norm": 0.0014301514020189643, - "learning_rate": 0.00019999568719221105, - "loss": 46.0, - "step": 38676 - }, - { - "epoch": 2.957126746564214, - "grad_norm": 0.0009442302980460227, - "learning_rate": 0.00019999568696912965, - "loss": 46.0, - "step": 38677 - }, - { - "epoch": 2.957203203547604, - "grad_norm": 0.002449366496875882, - "learning_rate": 0.0001999956867460425, - "loss": 46.0, - "step": 38678 - }, - { - "epoch": 2.9572796605309937, - "grad_norm": 0.0007358395378105342, - "learning_rate": 0.00019999568652294959, - "loss": 46.0, - "step": 38679 - }, - { - "epoch": 2.9573561175143834, - "grad_norm": 0.0011241902830079198, - "learning_rate": 0.00019999568629985086, - "loss": 46.0, - "step": 38680 - }, - { - "epoch": 2.957432574497773, - "grad_norm": 0.0013472603168338537, - "learning_rate": 0.0001999956860767464, - "loss": 46.0, - "step": 38681 - }, - { - "epoch": 2.957509031481163, - "grad_norm": 0.0019767757039517164, - "learning_rate": 0.00019999568585363616, - "loss": 46.0, - "step": 38682 - }, - { - "epoch": 2.9575854884645527, - "grad_norm": 0.003985450603067875, - "learning_rate": 0.00019999568563052014, - "loss": 46.0, - "step": 38683 - }, - { - "epoch": 2.9576619454479425, - "grad_norm": 0.0027131284587085247, - "learning_rate": 0.00019999568540739836, - "loss": 46.0, - "step": 38684 - }, - { - "epoch": 2.9577384024313322, - "grad_norm": 0.0007529185386374593, - "learning_rate": 0.0001999956851842708, - "loss": 46.0, - "step": 38685 - }, - { - "epoch": 2.9578148594147216, - "grad_norm": 0.0015757158398628235, - "learning_rate": 0.00019999568496113752, - "loss": 46.0, - "step": 38686 - }, - { - "epoch": 2.9578913163981113, - "grad_norm": 0.0020403778180480003, - "learning_rate": 0.0001999956847379984, - "loss": 46.0, - "step": 38687 - }, - { - "epoch": 2.957967773381501, - "grad_norm": 0.001927541452459991, - "learning_rate": 0.00019999568451485358, - "loss": 46.0, - "step": 38688 - }, - { - "epoch": 2.958044230364891, - "grad_norm": 0.0016172969480976462, - "learning_rate": 0.00019999568429170296, - "loss": 46.0, - "step": 38689 - }, - { - "epoch": 2.9581206873482806, - "grad_norm": 0.0034206684213131666, - "learning_rate": 0.00019999568406854653, - "loss": 46.0, - "step": 38690 - }, - { - "epoch": 2.9581971443316704, - "grad_norm": 0.003295660950243473, - "learning_rate": 0.00019999568384538438, - "loss": 46.0, - "step": 38691 - }, - { - "epoch": 2.95827360131506, - "grad_norm": 0.0006767706945538521, - "learning_rate": 0.00019999568362221646, - "loss": 46.0, - "step": 38692 - }, - { - "epoch": 2.95835005829845, - "grad_norm": 0.00301443412899971, - "learning_rate": 0.00019999568339904274, - "loss": 46.0, - "step": 38693 - }, - { - "epoch": 2.9584265152818396, - "grad_norm": 0.0030937818810343742, - "learning_rate": 0.00019999568317586328, - "loss": 46.0, - "step": 38694 - }, - { - "epoch": 2.9585029722652294, - "grad_norm": 0.0016957969637587667, - "learning_rate": 0.00019999568295267807, - "loss": 46.0, - "step": 38695 - }, - { - "epoch": 2.958579429248619, - "grad_norm": 0.001401284127496183, - "learning_rate": 0.00019999568272948705, - "loss": 46.0, - "step": 38696 - }, - { - "epoch": 2.958655886232009, - "grad_norm": 0.0027776320930570364, - "learning_rate": 0.00019999568250629027, - "loss": 46.0, - "step": 38697 - }, - { - "epoch": 2.9587323432153987, - "grad_norm": 0.0032301959581673145, - "learning_rate": 0.00019999568228308773, - "loss": 46.0, - "step": 38698 - }, - { - "epoch": 2.958808800198788, - "grad_norm": 0.0037341329734772444, - "learning_rate": 0.00019999568205987943, - "loss": 46.0, - "step": 38699 - }, - { - "epoch": 2.9588852571821778, - "grad_norm": 0.0018835713854059577, - "learning_rate": 0.00019999568183666532, - "loss": 46.0, - "step": 38700 - }, - { - "epoch": 2.9589617141655675, - "grad_norm": 0.00538292434066534, - "learning_rate": 0.0001999956816134455, - "loss": 46.0, - "step": 38701 - }, - { - "epoch": 2.9590381711489573, - "grad_norm": 0.0025257535744458437, - "learning_rate": 0.00019999568139021987, - "loss": 46.0, - "step": 38702 - }, - { - "epoch": 2.959114628132347, - "grad_norm": 0.0016155935591086745, - "learning_rate": 0.00019999568116698847, - "loss": 46.0, - "step": 38703 - }, - { - "epoch": 2.959191085115737, - "grad_norm": 0.004132924135774374, - "learning_rate": 0.00019999568094375132, - "loss": 46.0, - "step": 38704 - }, - { - "epoch": 2.9592675420991266, - "grad_norm": 0.0028732081409543753, - "learning_rate": 0.00019999568072050838, - "loss": 46.0, - "step": 38705 - }, - { - "epoch": 2.9593439990825163, - "grad_norm": 0.0037886768113821745, - "learning_rate": 0.00019999568049725968, - "loss": 46.0, - "step": 38706 - }, - { - "epoch": 2.9594204560659056, - "grad_norm": 0.002697874093428254, - "learning_rate": 0.0001999956802740052, - "loss": 46.0, - "step": 38707 - }, - { - "epoch": 2.9594969130492954, - "grad_norm": 0.005122511647641659, - "learning_rate": 0.00019999568005074498, - "loss": 46.0, - "step": 38708 - }, - { - "epoch": 2.959573370032685, - "grad_norm": 0.006092989817261696, - "learning_rate": 0.00019999567982747896, - "loss": 46.0, - "step": 38709 - }, - { - "epoch": 2.959649827016075, - "grad_norm": 0.005526279099285603, - "learning_rate": 0.0001999956796042072, - "loss": 46.0, - "step": 38710 - }, - { - "epoch": 2.9597262839994647, - "grad_norm": 0.005327354650944471, - "learning_rate": 0.00019999567938092962, - "loss": 46.0, - "step": 38711 - }, - { - "epoch": 2.9598027409828545, - "grad_norm": 0.0024364099372178316, - "learning_rate": 0.0001999956791576463, - "loss": 46.0, - "step": 38712 - }, - { - "epoch": 2.959879197966244, - "grad_norm": 0.0010068166302517056, - "learning_rate": 0.00019999567893435723, - "loss": 46.0, - "step": 38713 - }, - { - "epoch": 2.959955654949634, - "grad_norm": 0.008561637252569199, - "learning_rate": 0.0001999956787110624, - "loss": 46.0, - "step": 38714 - }, - { - "epoch": 2.9600321119330237, - "grad_norm": 0.0012215106980875134, - "learning_rate": 0.00019999567848776178, - "loss": 46.0, - "step": 38715 - }, - { - "epoch": 2.9601085689164135, - "grad_norm": 0.001945184194482863, - "learning_rate": 0.00019999567826445538, - "loss": 46.0, - "step": 38716 - }, - { - "epoch": 2.9601850258998033, - "grad_norm": 0.0041485317051410675, - "learning_rate": 0.0001999956780411432, - "loss": 46.0, - "step": 38717 - }, - { - "epoch": 2.960261482883193, - "grad_norm": 0.0019014314748346806, - "learning_rate": 0.0001999956778178253, - "loss": 46.0, - "step": 38718 - }, - { - "epoch": 2.960337939866583, - "grad_norm": 0.0023423656821250916, - "learning_rate": 0.0001999956775945016, - "loss": 46.0, - "step": 38719 - }, - { - "epoch": 2.9604143968499725, - "grad_norm": 0.0026675541885197163, - "learning_rate": 0.0001999956773711721, - "loss": 46.0, - "step": 38720 - }, - { - "epoch": 2.960490853833362, - "grad_norm": 0.001858965028077364, - "learning_rate": 0.0001999956771478369, - "loss": 46.0, - "step": 38721 - }, - { - "epoch": 2.9605673108167516, - "grad_norm": 0.0016284739831462502, - "learning_rate": 0.00019999567692449588, - "loss": 46.0, - "step": 38722 - }, - { - "epoch": 2.9606437678001414, - "grad_norm": 0.0014827321283519268, - "learning_rate": 0.0001999956767011491, - "loss": 46.0, - "step": 38723 - }, - { - "epoch": 2.960720224783531, - "grad_norm": 0.0024489688221365213, - "learning_rate": 0.00019999567647779656, - "loss": 46.0, - "step": 38724 - }, - { - "epoch": 2.960796681766921, - "grad_norm": 0.003848556661978364, - "learning_rate": 0.00019999567625443826, - "loss": 46.0, - "step": 38725 - }, - { - "epoch": 2.9608731387503107, - "grad_norm": 0.0014507209416478872, - "learning_rate": 0.00019999567603107418, - "loss": 46.0, - "step": 38726 - }, - { - "epoch": 2.9609495957337004, - "grad_norm": 0.0029634302482008934, - "learning_rate": 0.0001999956758077043, - "loss": 46.0, - "step": 38727 - }, - { - "epoch": 2.96102605271709, - "grad_norm": 0.0006416336400434375, - "learning_rate": 0.00019999567558432867, - "loss": 46.0, - "step": 38728 - }, - { - "epoch": 2.9611025097004795, - "grad_norm": 0.0008342862129211426, - "learning_rate": 0.00019999567536094727, - "loss": 46.0, - "step": 38729 - }, - { - "epoch": 2.9611789666838693, - "grad_norm": 0.0015737359644845128, - "learning_rate": 0.00019999567513756015, - "loss": 46.0, - "step": 38730 - }, - { - "epoch": 2.961255423667259, - "grad_norm": 0.0036144196055829525, - "learning_rate": 0.0001999956749141672, - "loss": 46.0, - "step": 38731 - }, - { - "epoch": 2.961331880650649, - "grad_norm": 0.0028963545337319374, - "learning_rate": 0.00019999567469076852, - "loss": 46.0, - "step": 38732 - }, - { - "epoch": 2.9614083376340385, - "grad_norm": 0.002600293140858412, - "learning_rate": 0.00019999567446736402, - "loss": 46.0, - "step": 38733 - }, - { - "epoch": 2.9614847946174283, - "grad_norm": 0.0018781140679493546, - "learning_rate": 0.0001999956742439538, - "loss": 46.0, - "step": 38734 - }, - { - "epoch": 2.961561251600818, - "grad_norm": 0.003751951502636075, - "learning_rate": 0.0001999956740205378, - "loss": 46.0, - "step": 38735 - }, - { - "epoch": 2.961637708584208, - "grad_norm": 0.003273369511589408, - "learning_rate": 0.00019999567379711601, - "loss": 46.0, - "step": 38736 - }, - { - "epoch": 2.9617141655675976, - "grad_norm": 0.0011937698582187295, - "learning_rate": 0.00019999567357368846, - "loss": 46.0, - "step": 38737 - }, - { - "epoch": 2.9617906225509874, - "grad_norm": 0.005577050149440765, - "learning_rate": 0.00019999567335025518, - "loss": 46.0, - "step": 38738 - }, - { - "epoch": 2.961867079534377, - "grad_norm": 0.0025489041581749916, - "learning_rate": 0.0001999956731268161, - "loss": 46.0, - "step": 38739 - }, - { - "epoch": 2.961943536517767, - "grad_norm": 0.0004022567009087652, - "learning_rate": 0.00019999567290337122, - "loss": 46.0, - "step": 38740 - }, - { - "epoch": 2.9620199935011566, - "grad_norm": 0.0006471026572398841, - "learning_rate": 0.0001999956726799206, - "loss": 46.0, - "step": 38741 - }, - { - "epoch": 2.9620964504845464, - "grad_norm": 0.0005794462049379945, - "learning_rate": 0.00019999567245646422, - "loss": 46.0, - "step": 38742 - }, - { - "epoch": 2.9621729074679357, - "grad_norm": 0.003686587093397975, - "learning_rate": 0.00019999567223300205, - "loss": 46.0, - "step": 38743 - }, - { - "epoch": 2.9622493644513255, - "grad_norm": 0.0008808143902570009, - "learning_rate": 0.00019999567200953413, - "loss": 46.0, - "step": 38744 - }, - { - "epoch": 2.9623258214347152, - "grad_norm": 0.0007387800142168999, - "learning_rate": 0.0001999956717860604, - "loss": 46.0, - "step": 38745 - }, - { - "epoch": 2.962402278418105, - "grad_norm": 0.0026468741707503796, - "learning_rate": 0.00019999567156258095, - "loss": 46.0, - "step": 38746 - }, - { - "epoch": 2.9624787354014948, - "grad_norm": 0.003015848807990551, - "learning_rate": 0.0001999956713390957, - "loss": 46.0, - "step": 38747 - }, - { - "epoch": 2.9625551923848845, - "grad_norm": 0.0035268678329885006, - "learning_rate": 0.0001999956711156047, - "loss": 46.0, - "step": 38748 - }, - { - "epoch": 2.9626316493682743, - "grad_norm": 0.0023516786750406027, - "learning_rate": 0.00019999567089210794, - "loss": 46.0, - "step": 38749 - }, - { - "epoch": 2.962708106351664, - "grad_norm": 0.001929847989231348, - "learning_rate": 0.00019999567066860538, - "loss": 46.0, - "step": 38750 - }, - { - "epoch": 2.9627845633350534, - "grad_norm": 0.003065502969548106, - "learning_rate": 0.00019999567044509708, - "loss": 46.0, - "step": 38751 - }, - { - "epoch": 2.962861020318443, - "grad_norm": 0.0014847610145807266, - "learning_rate": 0.000199995670221583, - "loss": 46.0, - "step": 38752 - }, - { - "epoch": 2.962937477301833, - "grad_norm": 0.007171249948441982, - "learning_rate": 0.00019999566999806315, - "loss": 46.0, - "step": 38753 - }, - { - "epoch": 2.9630139342852226, - "grad_norm": 0.0041608791798353195, - "learning_rate": 0.00019999566977453755, - "loss": 46.0, - "step": 38754 - }, - { - "epoch": 2.9630903912686124, - "grad_norm": 0.0039712004363536835, - "learning_rate": 0.00019999566955100612, - "loss": 46.0, - "step": 38755 - }, - { - "epoch": 2.963166848252002, - "grad_norm": 0.0007582994876429439, - "learning_rate": 0.00019999566932746898, - "loss": 46.0, - "step": 38756 - }, - { - "epoch": 2.963243305235392, - "grad_norm": 0.0007962352246977389, - "learning_rate": 0.00019999566910392606, - "loss": 46.0, - "step": 38757 - }, - { - "epoch": 2.9633197622187817, - "grad_norm": 0.004281751345843077, - "learning_rate": 0.00019999566888037734, - "loss": 46.0, - "step": 38758 - }, - { - "epoch": 2.9633962192021714, - "grad_norm": 0.002316530328243971, - "learning_rate": 0.00019999566865682288, - "loss": 46.0, - "step": 38759 - }, - { - "epoch": 2.963472676185561, - "grad_norm": 0.001087358221411705, - "learning_rate": 0.00019999566843326264, - "loss": 46.0, - "step": 38760 - }, - { - "epoch": 2.963549133168951, - "grad_norm": 0.0012984341010451317, - "learning_rate": 0.00019999566820969665, - "loss": 46.0, - "step": 38761 - }, - { - "epoch": 2.9636255901523407, - "grad_norm": 0.004118119366466999, - "learning_rate": 0.00019999566798612487, - "loss": 46.0, - "step": 38762 - }, - { - "epoch": 2.9637020471357305, - "grad_norm": 0.003220656653866172, - "learning_rate": 0.00019999566776254734, - "loss": 46.0, - "step": 38763 - }, - { - "epoch": 2.9637785041191202, - "grad_norm": 0.0031028245575726032, - "learning_rate": 0.000199995667538964, - "loss": 46.0, - "step": 38764 - }, - { - "epoch": 2.9638549611025096, - "grad_norm": 0.0020329959224909544, - "learning_rate": 0.00019999566731537493, - "loss": 46.0, - "step": 38765 - }, - { - "epoch": 2.9639314180858993, - "grad_norm": 0.007521012332290411, - "learning_rate": 0.00019999566709178008, - "loss": 46.0, - "step": 38766 - }, - { - "epoch": 2.964007875069289, - "grad_norm": 0.001825822750106454, - "learning_rate": 0.00019999566686817945, - "loss": 46.0, - "step": 38767 - }, - { - "epoch": 2.964084332052679, - "grad_norm": 0.002598043065518141, - "learning_rate": 0.00019999566664457308, - "loss": 46.0, - "step": 38768 - }, - { - "epoch": 2.9641607890360686, - "grad_norm": 0.0035057917702943087, - "learning_rate": 0.0001999956664209609, - "loss": 46.0, - "step": 38769 - }, - { - "epoch": 2.9642372460194584, - "grad_norm": 0.0018140760948881507, - "learning_rate": 0.000199995666197343, - "loss": 46.0, - "step": 38770 - }, - { - "epoch": 2.964313703002848, - "grad_norm": 0.0031152910087257624, - "learning_rate": 0.00019999566597371927, - "loss": 46.0, - "step": 38771 - }, - { - "epoch": 2.964390159986238, - "grad_norm": 0.0035517015494406223, - "learning_rate": 0.0001999956657500898, - "loss": 46.0, - "step": 38772 - }, - { - "epoch": 2.964466616969627, - "grad_norm": 0.019760986790060997, - "learning_rate": 0.0001999956655264546, - "loss": 46.0, - "step": 38773 - }, - { - "epoch": 2.964543073953017, - "grad_norm": 0.004486551508307457, - "learning_rate": 0.00019999566530281359, - "loss": 46.0, - "step": 38774 - }, - { - "epoch": 2.9646195309364067, - "grad_norm": 0.004180152900516987, - "learning_rate": 0.0001999956650791668, - "loss": 46.0, - "step": 38775 - }, - { - "epoch": 2.9646959879197965, - "grad_norm": 0.000872608448844403, - "learning_rate": 0.00019999566485551427, - "loss": 46.0, - "step": 38776 - }, - { - "epoch": 2.9647724449031863, - "grad_norm": 0.001664087874814868, - "learning_rate": 0.00019999566463185594, - "loss": 46.0, - "step": 38777 - }, - { - "epoch": 2.964848901886576, - "grad_norm": 0.003988146316260099, - "learning_rate": 0.00019999566440819186, - "loss": 46.0, - "step": 38778 - }, - { - "epoch": 2.9649253588699658, - "grad_norm": 0.0021746321581304073, - "learning_rate": 0.00019999566418452204, - "loss": 46.0, - "step": 38779 - }, - { - "epoch": 2.9650018158533555, - "grad_norm": 0.0029714221600443125, - "learning_rate": 0.00019999566396084638, - "loss": 46.0, - "step": 38780 - }, - { - "epoch": 2.9650782728367453, - "grad_norm": 0.0009956000139936805, - "learning_rate": 0.000199995663737165, - "loss": 46.0, - "step": 38781 - }, - { - "epoch": 2.965154729820135, - "grad_norm": 0.006941694766283035, - "learning_rate": 0.00019999566351347782, - "loss": 46.0, - "step": 38782 - }, - { - "epoch": 2.965231186803525, - "grad_norm": 0.003042785217985511, - "learning_rate": 0.00019999566328978493, - "loss": 46.0, - "step": 38783 - }, - { - "epoch": 2.9653076437869146, - "grad_norm": 0.0036308171693235636, - "learning_rate": 0.0001999956630660862, - "loss": 46.0, - "step": 38784 - }, - { - "epoch": 2.9653841007703043, - "grad_norm": 0.002663103397935629, - "learning_rate": 0.00019999566284238177, - "loss": 46.0, - "step": 38785 - }, - { - "epoch": 2.965460557753694, - "grad_norm": 0.0012971964897587895, - "learning_rate": 0.0001999956626186715, - "loss": 46.0, - "step": 38786 - }, - { - "epoch": 2.9655370147370834, - "grad_norm": 0.003447584342211485, - "learning_rate": 0.0001999956623949555, - "loss": 46.0, - "step": 38787 - }, - { - "epoch": 2.965613471720473, - "grad_norm": 0.0016339856665581465, - "learning_rate": 0.00019999566217123374, - "loss": 46.0, - "step": 38788 - }, - { - "epoch": 2.965689928703863, - "grad_norm": 0.0029338186141103506, - "learning_rate": 0.0001999956619475062, - "loss": 46.0, - "step": 38789 - }, - { - "epoch": 2.9657663856872527, - "grad_norm": 0.0040512001141905785, - "learning_rate": 0.00019999566172377288, - "loss": 46.0, - "step": 38790 - }, - { - "epoch": 2.9658428426706425, - "grad_norm": 0.0013265768066048622, - "learning_rate": 0.0001999956615000338, - "loss": 46.0, - "step": 38791 - }, - { - "epoch": 2.9659192996540322, - "grad_norm": 0.0028855325654149055, - "learning_rate": 0.00019999566127628895, - "loss": 46.0, - "step": 38792 - }, - { - "epoch": 2.965995756637422, - "grad_norm": 0.000699925294611603, - "learning_rate": 0.00019999566105253832, - "loss": 46.0, - "step": 38793 - }, - { - "epoch": 2.9660722136208117, - "grad_norm": 0.0008901142864488065, - "learning_rate": 0.00019999566082878193, - "loss": 46.0, - "step": 38794 - }, - { - "epoch": 2.966148670604201, - "grad_norm": 0.003148681716993451, - "learning_rate": 0.00019999566060501978, - "loss": 46.0, - "step": 38795 - }, - { - "epoch": 2.966225127587591, - "grad_norm": 0.00447940593585372, - "learning_rate": 0.00019999566038125184, - "loss": 46.0, - "step": 38796 - }, - { - "epoch": 2.9663015845709806, - "grad_norm": 0.006700800731778145, - "learning_rate": 0.00019999566015747817, - "loss": 46.0, - "step": 38797 - }, - { - "epoch": 2.9663780415543703, - "grad_norm": 0.004799845162779093, - "learning_rate": 0.00019999565993369868, - "loss": 46.0, - "step": 38798 - }, - { - "epoch": 2.96645449853776, - "grad_norm": 0.001041700947098434, - "learning_rate": 0.00019999565970991345, - "loss": 46.0, - "step": 38799 - }, - { - "epoch": 2.96653095552115, - "grad_norm": 0.0014246506616473198, - "learning_rate": 0.00019999565948612244, - "loss": 46.0, - "step": 38800 - }, - { - "epoch": 2.9666074125045396, - "grad_norm": 0.001999033149331808, - "learning_rate": 0.00019999565926232568, - "loss": 46.0, - "step": 38801 - }, - { - "epoch": 2.9666838694879294, - "grad_norm": 0.008738222531974316, - "learning_rate": 0.00019999565903852312, - "loss": 46.0, - "step": 38802 - }, - { - "epoch": 2.966760326471319, - "grad_norm": 0.0029781197663396597, - "learning_rate": 0.00019999565881471482, - "loss": 46.0, - "step": 38803 - }, - { - "epoch": 2.966836783454709, - "grad_norm": 0.005128840915858746, - "learning_rate": 0.0001999956585909007, - "loss": 46.0, - "step": 38804 - }, - { - "epoch": 2.9669132404380987, - "grad_norm": 0.002178050111979246, - "learning_rate": 0.0001999956583670809, - "loss": 46.0, - "step": 38805 - }, - { - "epoch": 2.9669896974214884, - "grad_norm": 0.001666230265982449, - "learning_rate": 0.00019999565814325527, - "loss": 46.0, - "step": 38806 - }, - { - "epoch": 2.967066154404878, - "grad_norm": 0.002604331821203232, - "learning_rate": 0.0001999956579194239, - "loss": 46.0, - "step": 38807 - }, - { - "epoch": 2.9671426113882675, - "grad_norm": 0.0011654363479465246, - "learning_rate": 0.00019999565769558673, - "loss": 46.0, - "step": 38808 - }, - { - "epoch": 2.9672190683716573, - "grad_norm": 0.0025039773900061846, - "learning_rate": 0.0001999956574717438, - "loss": 46.0, - "step": 38809 - }, - { - "epoch": 2.967295525355047, - "grad_norm": 0.003030512947589159, - "learning_rate": 0.00019999565724789512, - "loss": 46.0, - "step": 38810 - }, - { - "epoch": 2.967371982338437, - "grad_norm": 0.0021453530061990023, - "learning_rate": 0.00019999565702404063, - "loss": 46.0, - "step": 38811 - }, - { - "epoch": 2.9674484393218266, - "grad_norm": 0.0027277146000415087, - "learning_rate": 0.0001999956568001804, - "loss": 46.0, - "step": 38812 - }, - { - "epoch": 2.9675248963052163, - "grad_norm": 0.0007995835039764643, - "learning_rate": 0.0001999956565763144, - "loss": 46.0, - "step": 38813 - }, - { - "epoch": 2.967601353288606, - "grad_norm": 0.002258928259834647, - "learning_rate": 0.00019999565635244265, - "loss": 46.0, - "step": 38814 - }, - { - "epoch": 2.967677810271996, - "grad_norm": 0.0027025151066482067, - "learning_rate": 0.00019999565612856507, - "loss": 46.0, - "step": 38815 - }, - { - "epoch": 2.9677542672553856, - "grad_norm": 0.0007156643550843, - "learning_rate": 0.00019999565590468176, - "loss": 46.0, - "step": 38816 - }, - { - "epoch": 2.967830724238775, - "grad_norm": 0.00210730847902596, - "learning_rate": 0.0001999956556807927, - "loss": 46.0, - "step": 38817 - }, - { - "epoch": 2.9679071812221647, - "grad_norm": 0.0020987922325730324, - "learning_rate": 0.00019999565545689784, - "loss": 46.0, - "step": 38818 - }, - { - "epoch": 2.9679836382055544, - "grad_norm": 0.0038130159955471754, - "learning_rate": 0.0001999956552329972, - "loss": 46.0, - "step": 38819 - }, - { - "epoch": 2.968060095188944, - "grad_norm": 0.002649113768711686, - "learning_rate": 0.00019999565500909084, - "loss": 46.0, - "step": 38820 - }, - { - "epoch": 2.968136552172334, - "grad_norm": 0.006833228282630444, - "learning_rate": 0.00019999565478517867, - "loss": 46.0, - "step": 38821 - }, - { - "epoch": 2.9682130091557237, - "grad_norm": 0.007193616591393948, - "learning_rate": 0.00019999565456126076, - "loss": 46.0, - "step": 38822 - }, - { - "epoch": 2.9682894661391135, - "grad_norm": 0.0016768421046435833, - "learning_rate": 0.00019999565433733704, - "loss": 46.0, - "step": 38823 - }, - { - "epoch": 2.9683659231225032, - "grad_norm": 0.0023161936551332474, - "learning_rate": 0.0001999956541134076, - "loss": 46.0, - "step": 38824 - }, - { - "epoch": 2.968442380105893, - "grad_norm": 0.002078343415632844, - "learning_rate": 0.00019999565388947234, - "loss": 46.0, - "step": 38825 - }, - { - "epoch": 2.9685188370892828, - "grad_norm": 0.011018405668437481, - "learning_rate": 0.00019999565366553136, - "loss": 46.0, - "step": 38826 - }, - { - "epoch": 2.9685952940726725, - "grad_norm": 0.0011039135279133916, - "learning_rate": 0.00019999565344158457, - "loss": 46.0, - "step": 38827 - }, - { - "epoch": 2.9686717510560623, - "grad_norm": 0.0019084344385191798, - "learning_rate": 0.00019999565321763202, - "loss": 46.0, - "step": 38828 - }, - { - "epoch": 2.968748208039452, - "grad_norm": 0.003199186408892274, - "learning_rate": 0.00019999565299367374, - "loss": 46.0, - "step": 38829 - }, - { - "epoch": 2.9688246650228414, - "grad_norm": 0.0023745347280055285, - "learning_rate": 0.00019999565276970964, - "loss": 46.0, - "step": 38830 - }, - { - "epoch": 2.968901122006231, - "grad_norm": 0.0014878212241455913, - "learning_rate": 0.00019999565254573979, - "loss": 46.0, - "step": 38831 - }, - { - "epoch": 2.968977578989621, - "grad_norm": 0.0013919976772740483, - "learning_rate": 0.0001999956523217642, - "loss": 46.0, - "step": 38832 - }, - { - "epoch": 2.9690540359730107, - "grad_norm": 0.0047453478910028934, - "learning_rate": 0.00019999565209778277, - "loss": 46.0, - "step": 38833 - }, - { - "epoch": 2.9691304929564004, - "grad_norm": 0.0026916400529444218, - "learning_rate": 0.00019999565187379562, - "loss": 46.0, - "step": 38834 - }, - { - "epoch": 2.96920694993979, - "grad_norm": 0.0006237321067601442, - "learning_rate": 0.0001999956516498027, - "loss": 46.0, - "step": 38835 - }, - { - "epoch": 2.96928340692318, - "grad_norm": 0.0015316014178097248, - "learning_rate": 0.000199995651425804, - "loss": 46.0, - "step": 38836 - }, - { - "epoch": 2.9693598639065697, - "grad_norm": 0.002091713948175311, - "learning_rate": 0.00019999565120179953, - "loss": 46.0, - "step": 38837 - }, - { - "epoch": 2.969436320889959, - "grad_norm": 0.0018464390886947513, - "learning_rate": 0.0001999956509777893, - "loss": 46.0, - "step": 38838 - }, - { - "epoch": 2.9695127778733488, - "grad_norm": 0.0012911810772493482, - "learning_rate": 0.00019999565075377328, - "loss": 46.0, - "step": 38839 - }, - { - "epoch": 2.9695892348567385, - "grad_norm": 0.002915911842137575, - "learning_rate": 0.00019999565052975153, - "loss": 46.0, - "step": 38840 - }, - { - "epoch": 2.9696656918401283, - "grad_norm": 0.0013452632119879127, - "learning_rate": 0.000199995650305724, - "loss": 46.0, - "step": 38841 - }, - { - "epoch": 2.969742148823518, - "grad_norm": 0.0016468253452330828, - "learning_rate": 0.00019999565008169067, - "loss": 46.0, - "step": 38842 - }, - { - "epoch": 2.969818605806908, - "grad_norm": 0.002326331799849868, - "learning_rate": 0.0001999956498576516, - "loss": 46.0, - "step": 38843 - }, - { - "epoch": 2.9698950627902976, - "grad_norm": 0.0012755782809108496, - "learning_rate": 0.00019999564963360674, - "loss": 46.0, - "step": 38844 - }, - { - "epoch": 2.9699715197736873, - "grad_norm": 0.0029674021061509848, - "learning_rate": 0.00019999564940955612, - "loss": 46.0, - "step": 38845 - }, - { - "epoch": 2.970047976757077, - "grad_norm": 0.000567444774787873, - "learning_rate": 0.00019999564918549972, - "loss": 46.0, - "step": 38846 - }, - { - "epoch": 2.970124433740467, - "grad_norm": 0.0013332696398720145, - "learning_rate": 0.00019999564896143758, - "loss": 46.0, - "step": 38847 - }, - { - "epoch": 2.9702008907238566, - "grad_norm": 0.0015207903925329447, - "learning_rate": 0.00019999564873736966, - "loss": 46.0, - "step": 38848 - }, - { - "epoch": 2.9702773477072464, - "grad_norm": 0.0021245379466563463, - "learning_rate": 0.00019999564851329597, - "loss": 46.0, - "step": 38849 - }, - { - "epoch": 2.970353804690636, - "grad_norm": 0.004931046161800623, - "learning_rate": 0.0001999956482892165, - "loss": 46.0, - "step": 38850 - }, - { - "epoch": 2.970430261674026, - "grad_norm": 0.0015642642974853516, - "learning_rate": 0.00019999564806513127, - "loss": 46.0, - "step": 38851 - }, - { - "epoch": 2.9705067186574152, - "grad_norm": 0.0014162542065605521, - "learning_rate": 0.00019999564784104027, - "loss": 46.0, - "step": 38852 - }, - { - "epoch": 2.970583175640805, - "grad_norm": 0.0017431830056011677, - "learning_rate": 0.00019999564761694348, - "loss": 46.0, - "step": 38853 - }, - { - "epoch": 2.9706596326241947, - "grad_norm": 0.003257500473409891, - "learning_rate": 0.00019999564739284095, - "loss": 46.0, - "step": 38854 - }, - { - "epoch": 2.9707360896075845, - "grad_norm": 0.0019210914615541697, - "learning_rate": 0.00019999564716873262, - "loss": 46.0, - "step": 38855 - }, - { - "epoch": 2.9708125465909743, - "grad_norm": 0.0072126430459320545, - "learning_rate": 0.00019999564694461858, - "loss": 46.0, - "step": 38856 - }, - { - "epoch": 2.970889003574364, - "grad_norm": 0.002768451115116477, - "learning_rate": 0.0001999956467204987, - "loss": 46.0, - "step": 38857 - }, - { - "epoch": 2.970965460557754, - "grad_norm": 0.0008489834726788104, - "learning_rate": 0.00019999564649637308, - "loss": 46.0, - "step": 38858 - }, - { - "epoch": 2.9710419175411436, - "grad_norm": 0.0013236310333013535, - "learning_rate": 0.0001999956462722417, - "loss": 46.0, - "step": 38859 - }, - { - "epoch": 2.971118374524533, - "grad_norm": 0.006933864671736956, - "learning_rate": 0.00019999564604810454, - "loss": 46.0, - "step": 38860 - }, - { - "epoch": 2.9711948315079226, - "grad_norm": 0.006330015603452921, - "learning_rate": 0.00019999564582396162, - "loss": 46.0, - "step": 38861 - }, - { - "epoch": 2.9712712884913124, - "grad_norm": 0.005118245258927345, - "learning_rate": 0.0001999956455998129, - "loss": 46.0, - "step": 38862 - }, - { - "epoch": 2.971347745474702, - "grad_norm": 0.003926456440240145, - "learning_rate": 0.00019999564537565844, - "loss": 46.0, - "step": 38863 - }, - { - "epoch": 2.971424202458092, - "grad_norm": 0.0034800253342837095, - "learning_rate": 0.00019999564515149824, - "loss": 46.0, - "step": 38864 - }, - { - "epoch": 2.9715006594414817, - "grad_norm": 0.0008141734870150685, - "learning_rate": 0.00019999564492733223, - "loss": 46.0, - "step": 38865 - }, - { - "epoch": 2.9715771164248714, - "grad_norm": 0.0009938642615452409, - "learning_rate": 0.00019999564470316044, - "loss": 46.0, - "step": 38866 - }, - { - "epoch": 2.971653573408261, - "grad_norm": 0.0009254764881916344, - "learning_rate": 0.00019999564447898292, - "loss": 46.0, - "step": 38867 - }, - { - "epoch": 2.971730030391651, - "grad_norm": 0.001480698585510254, - "learning_rate": 0.00019999564425479959, - "loss": 46.0, - "step": 38868 - }, - { - "epoch": 2.9718064873750407, - "grad_norm": 0.0027772849425673485, - "learning_rate": 0.0001999956440306105, - "loss": 46.0, - "step": 38869 - }, - { - "epoch": 2.9718829443584305, - "grad_norm": 0.003581203054636717, - "learning_rate": 0.00019999564380641566, - "loss": 46.0, - "step": 38870 - }, - { - "epoch": 2.9719594013418202, - "grad_norm": 0.0036480564158409834, - "learning_rate": 0.00019999564358221507, - "loss": 46.0, - "step": 38871 - }, - { - "epoch": 2.97203585832521, - "grad_norm": 0.001648459816351533, - "learning_rate": 0.00019999564335800867, - "loss": 46.0, - "step": 38872 - }, - { - "epoch": 2.9721123153085998, - "grad_norm": 0.002940130652859807, - "learning_rate": 0.0001999956431337965, - "loss": 46.0, - "step": 38873 - }, - { - "epoch": 2.972188772291989, - "grad_norm": 0.002545031951740384, - "learning_rate": 0.0001999956429095786, - "loss": 46.0, - "step": 38874 - }, - { - "epoch": 2.972265229275379, - "grad_norm": 0.001602409640327096, - "learning_rate": 0.0001999956426853549, - "loss": 46.0, - "step": 38875 - }, - { - "epoch": 2.9723416862587686, - "grad_norm": 0.001739976927638054, - "learning_rate": 0.00019999564246112544, - "loss": 46.0, - "step": 38876 - }, - { - "epoch": 2.9724181432421584, - "grad_norm": 0.005991366691887379, - "learning_rate": 0.00019999564223689017, - "loss": 46.0, - "step": 38877 - }, - { - "epoch": 2.972494600225548, - "grad_norm": 0.0025928064715117216, - "learning_rate": 0.0001999956420126492, - "loss": 46.0, - "step": 38878 - }, - { - "epoch": 2.972571057208938, - "grad_norm": 0.0015057475538924336, - "learning_rate": 0.0001999956417884024, - "loss": 46.0, - "step": 38879 - }, - { - "epoch": 2.9726475141923276, - "grad_norm": 0.002212582156062126, - "learning_rate": 0.00019999564156414988, - "loss": 46.0, - "step": 38880 - }, - { - "epoch": 2.9727239711757174, - "grad_norm": 0.008032852783799171, - "learning_rate": 0.00019999564133989158, - "loss": 46.0, - "step": 38881 - }, - { - "epoch": 2.9728004281591067, - "grad_norm": 0.002232054015621543, - "learning_rate": 0.0001999956411156275, - "loss": 46.0, - "step": 38882 - }, - { - "epoch": 2.9728768851424965, - "grad_norm": 0.003509450936689973, - "learning_rate": 0.00019999564089135766, - "loss": 46.0, - "step": 38883 - }, - { - "epoch": 2.9729533421258862, - "grad_norm": 0.0025061210617423058, - "learning_rate": 0.00019999564066708203, - "loss": 46.0, - "step": 38884 - }, - { - "epoch": 2.973029799109276, - "grad_norm": 0.017708707600831985, - "learning_rate": 0.00019999564044280064, - "loss": 46.0, - "step": 38885 - }, - { - "epoch": 2.9731062560926658, - "grad_norm": 0.00591287249699235, - "learning_rate": 0.00019999564021851347, - "loss": 46.0, - "step": 38886 - }, - { - "epoch": 2.9731827130760555, - "grad_norm": 0.0017712911358103156, - "learning_rate": 0.00019999563999422058, - "loss": 46.0, - "step": 38887 - }, - { - "epoch": 2.9732591700594453, - "grad_norm": 0.0023941416293382645, - "learning_rate": 0.00019999563976992187, - "loss": 46.0, - "step": 38888 - }, - { - "epoch": 2.973335627042835, - "grad_norm": 0.0014949578326195478, - "learning_rate": 0.0001999956395456174, - "loss": 46.0, - "step": 38889 - }, - { - "epoch": 2.973412084026225, - "grad_norm": 0.0029613198712468147, - "learning_rate": 0.00019999563932130717, - "loss": 46.0, - "step": 38890 - }, - { - "epoch": 2.9734885410096146, - "grad_norm": 0.0008647587965242565, - "learning_rate": 0.00019999563909699116, - "loss": 46.0, - "step": 38891 - }, - { - "epoch": 2.9735649979930043, - "grad_norm": 0.0031321656424552202, - "learning_rate": 0.0001999956388726694, - "loss": 46.0, - "step": 38892 - }, - { - "epoch": 2.973641454976394, - "grad_norm": 0.0014598460402339697, - "learning_rate": 0.00019999563864834185, - "loss": 46.0, - "step": 38893 - }, - { - "epoch": 2.973717911959784, - "grad_norm": 0.0012144835200160742, - "learning_rate": 0.00019999563842400855, - "loss": 46.0, - "step": 38894 - }, - { - "epoch": 2.9737943689431736, - "grad_norm": 0.004519733600318432, - "learning_rate": 0.00019999563819966947, - "loss": 46.0, - "step": 38895 - }, - { - "epoch": 2.973870825926563, - "grad_norm": 0.005155148450285196, - "learning_rate": 0.00019999563797532463, - "loss": 46.0, - "step": 38896 - }, - { - "epoch": 2.9739472829099527, - "grad_norm": 0.001507338136434555, - "learning_rate": 0.000199995637750974, - "loss": 46.0, - "step": 38897 - }, - { - "epoch": 2.9740237398933425, - "grad_norm": 0.001250439672730863, - "learning_rate": 0.0001999956375266176, - "loss": 46.0, - "step": 38898 - }, - { - "epoch": 2.974100196876732, - "grad_norm": 0.0022691816557198763, - "learning_rate": 0.00019999563730225547, - "loss": 46.0, - "step": 38899 - }, - { - "epoch": 2.974176653860122, - "grad_norm": 0.0016748163616284728, - "learning_rate": 0.00019999563707788753, - "loss": 46.0, - "step": 38900 - }, - { - "epoch": 2.9742531108435117, - "grad_norm": 0.0017410056898370385, - "learning_rate": 0.00019999563685351387, - "loss": 46.0, - "step": 38901 - }, - { - "epoch": 2.9743295678269015, - "grad_norm": 0.007299117278307676, - "learning_rate": 0.00019999563662913438, - "loss": 46.0, - "step": 38902 - }, - { - "epoch": 2.9744060248102913, - "grad_norm": 0.0008122262079268694, - "learning_rate": 0.00019999563640474914, - "loss": 46.0, - "step": 38903 - }, - { - "epoch": 2.9744824817936806, - "grad_norm": 0.0010236442321911454, - "learning_rate": 0.00019999563618035816, - "loss": 46.0, - "step": 38904 - }, - { - "epoch": 2.9745589387770703, - "grad_norm": 0.0025179386138916016, - "learning_rate": 0.00019999563595596138, - "loss": 46.0, - "step": 38905 - }, - { - "epoch": 2.97463539576046, - "grad_norm": 0.004896245896816254, - "learning_rate": 0.00019999563573155885, - "loss": 46.0, - "step": 38906 - }, - { - "epoch": 2.97471185274385, - "grad_norm": 0.0018146460643038154, - "learning_rate": 0.00019999563550715052, - "loss": 46.0, - "step": 38907 - }, - { - "epoch": 2.9747883097272396, - "grad_norm": 0.00169906928204, - "learning_rate": 0.00019999563528273645, - "loss": 46.0, - "step": 38908 - }, - { - "epoch": 2.9748647667106294, - "grad_norm": 0.001374110928736627, - "learning_rate": 0.0001999956350583166, - "loss": 46.0, - "step": 38909 - }, - { - "epoch": 2.974941223694019, - "grad_norm": 0.002236147178336978, - "learning_rate": 0.00019999563483389098, - "loss": 46.0, - "step": 38910 - }, - { - "epoch": 2.975017680677409, - "grad_norm": 0.0019094528397545218, - "learning_rate": 0.00019999563460945959, - "loss": 46.0, - "step": 38911 - }, - { - "epoch": 2.9750941376607987, - "grad_norm": 0.0012492660898715258, - "learning_rate": 0.00019999563438502244, - "loss": 46.0, - "step": 38912 - }, - { - "epoch": 2.9751705946441884, - "grad_norm": 0.0013261620188131928, - "learning_rate": 0.0001999956341605795, - "loss": 46.0, - "step": 38913 - }, - { - "epoch": 2.975247051627578, - "grad_norm": 0.0022912088315933943, - "learning_rate": 0.00019999563393613082, - "loss": 46.0, - "step": 38914 - }, - { - "epoch": 2.975323508610968, - "grad_norm": 0.003404107643291354, - "learning_rate": 0.00019999563371167635, - "loss": 46.0, - "step": 38915 - }, - { - "epoch": 2.9753999655943577, - "grad_norm": 0.0008852720493450761, - "learning_rate": 0.00019999563348721612, - "loss": 46.0, - "step": 38916 - }, - { - "epoch": 2.9754764225777475, - "grad_norm": 0.001354023814201355, - "learning_rate": 0.00019999563326275014, - "loss": 46.0, - "step": 38917 - }, - { - "epoch": 2.975552879561137, - "grad_norm": 0.0038675107061862946, - "learning_rate": 0.00019999563303827836, - "loss": 46.0, - "step": 38918 - }, - { - "epoch": 2.9756293365445265, - "grad_norm": 0.0012964140623807907, - "learning_rate": 0.0001999956328138008, - "loss": 46.0, - "step": 38919 - }, - { - "epoch": 2.9757057935279163, - "grad_norm": 0.0010123233078047633, - "learning_rate": 0.0001999956325893175, - "loss": 46.0, - "step": 38920 - }, - { - "epoch": 2.975782250511306, - "grad_norm": 0.0013981809606775641, - "learning_rate": 0.00019999563236482843, - "loss": 46.0, - "step": 38921 - }, - { - "epoch": 2.975858707494696, - "grad_norm": 0.002034079981967807, - "learning_rate": 0.00019999563214033355, - "loss": 46.0, - "step": 38922 - }, - { - "epoch": 2.9759351644780856, - "grad_norm": 0.003962273709475994, - "learning_rate": 0.00019999563191583296, - "loss": 46.0, - "step": 38923 - }, - { - "epoch": 2.9760116214614754, - "grad_norm": 0.0016956643667072058, - "learning_rate": 0.0001999956316913266, - "loss": 46.0, - "step": 38924 - }, - { - "epoch": 2.976088078444865, - "grad_norm": 0.003656771732494235, - "learning_rate": 0.00019999563146681442, - "loss": 46.0, - "step": 38925 - }, - { - "epoch": 2.9761645354282544, - "grad_norm": 0.002302858978509903, - "learning_rate": 0.0001999956312422965, - "loss": 46.0, - "step": 38926 - }, - { - "epoch": 2.976240992411644, - "grad_norm": 0.0006120320758782327, - "learning_rate": 0.00019999563101777282, - "loss": 46.0, - "step": 38927 - }, - { - "epoch": 2.976317449395034, - "grad_norm": 0.0040608961135149, - "learning_rate": 0.00019999563079324336, - "loss": 46.0, - "step": 38928 - }, - { - "epoch": 2.9763939063784237, - "grad_norm": 0.0036757125053554773, - "learning_rate": 0.00019999563056870813, - "loss": 46.0, - "step": 38929 - }, - { - "epoch": 2.9764703633618135, - "grad_norm": 0.0010158759541809559, - "learning_rate": 0.0001999956303441671, - "loss": 46.0, - "step": 38930 - }, - { - "epoch": 2.9765468203452032, - "grad_norm": 0.005072961561381817, - "learning_rate": 0.00019999563011962034, - "loss": 46.0, - "step": 38931 - }, - { - "epoch": 2.976623277328593, - "grad_norm": 0.0014291313709691167, - "learning_rate": 0.0001999956298950678, - "loss": 46.0, - "step": 38932 - }, - { - "epoch": 2.9766997343119828, - "grad_norm": 0.0014953481731936336, - "learning_rate": 0.00019999562967050948, - "loss": 46.0, - "step": 38933 - }, - { - "epoch": 2.9767761912953725, - "grad_norm": 0.0038099829107522964, - "learning_rate": 0.00019999562944594544, - "loss": 46.0, - "step": 38934 - }, - { - "epoch": 2.9768526482787623, - "grad_norm": 0.002259933389723301, - "learning_rate": 0.00019999562922137556, - "loss": 46.0, - "step": 38935 - }, - { - "epoch": 2.976929105262152, - "grad_norm": 0.0022849731612950563, - "learning_rate": 0.00019999562899679994, - "loss": 46.0, - "step": 38936 - }, - { - "epoch": 2.977005562245542, - "grad_norm": 0.001822706894017756, - "learning_rate": 0.00019999562877221858, - "loss": 46.0, - "step": 38937 - }, - { - "epoch": 2.9770820192289316, - "grad_norm": 0.0012723540421575308, - "learning_rate": 0.00019999562854763144, - "loss": 46.0, - "step": 38938 - }, - { - "epoch": 2.977158476212321, - "grad_norm": 0.0011593495728448033, - "learning_rate": 0.00019999562832303852, - "loss": 46.0, - "step": 38939 - }, - { - "epoch": 2.9772349331957106, - "grad_norm": 0.0023144090082496405, - "learning_rate": 0.0001999956280984398, - "loss": 46.0, - "step": 38940 - }, - { - "epoch": 2.9773113901791004, - "grad_norm": 0.0015373806236311793, - "learning_rate": 0.00019999562787383535, - "loss": 46.0, - "step": 38941 - }, - { - "epoch": 2.97738784716249, - "grad_norm": 0.0015771586913615465, - "learning_rate": 0.0001999956276492251, - "loss": 46.0, - "step": 38942 - }, - { - "epoch": 2.97746430414588, - "grad_norm": 0.0019857927691191435, - "learning_rate": 0.0001999956274246091, - "loss": 46.0, - "step": 38943 - }, - { - "epoch": 2.9775407611292697, - "grad_norm": 0.004098236560821533, - "learning_rate": 0.00019999562719998735, - "loss": 46.0, - "step": 38944 - }, - { - "epoch": 2.9776172181126594, - "grad_norm": 0.0029844462405890226, - "learning_rate": 0.00019999562697535977, - "loss": 46.0, - "step": 38945 - }, - { - "epoch": 2.977693675096049, - "grad_norm": 0.0023544856812804937, - "learning_rate": 0.0001999956267507265, - "loss": 46.0, - "step": 38946 - }, - { - "epoch": 2.977770132079439, - "grad_norm": 0.0018966731149703264, - "learning_rate": 0.0001999956265260874, - "loss": 46.0, - "step": 38947 - }, - { - "epoch": 2.9778465890628283, - "grad_norm": 0.008401605300605297, - "learning_rate": 0.00019999562630144255, - "loss": 46.0, - "step": 38948 - }, - { - "epoch": 2.977923046046218, - "grad_norm": 0.00455885985866189, - "learning_rate": 0.00019999562607679193, - "loss": 46.0, - "step": 38949 - }, - { - "epoch": 2.977999503029608, - "grad_norm": 0.003829419380053878, - "learning_rate": 0.00019999562585213556, - "loss": 46.0, - "step": 38950 - }, - { - "epoch": 2.9780759600129976, - "grad_norm": 0.0049112578853964806, - "learning_rate": 0.0001999956256274734, - "loss": 46.0, - "step": 38951 - }, - { - "epoch": 2.9781524169963873, - "grad_norm": 0.00432144571095705, - "learning_rate": 0.00019999562540280548, - "loss": 46.0, - "step": 38952 - }, - { - "epoch": 2.978228873979777, - "grad_norm": 0.0030643471982330084, - "learning_rate": 0.0001999956251781318, - "loss": 46.0, - "step": 38953 - }, - { - "epoch": 2.978305330963167, - "grad_norm": 0.002610486000776291, - "learning_rate": 0.00019999562495345234, - "loss": 46.0, - "step": 38954 - }, - { - "epoch": 2.9783817879465566, - "grad_norm": 0.001192101277410984, - "learning_rate": 0.0001999956247287671, - "loss": 46.0, - "step": 38955 - }, - { - "epoch": 2.9784582449299464, - "grad_norm": 0.0018854161025956273, - "learning_rate": 0.00019999562450407612, - "loss": 46.0, - "step": 38956 - }, - { - "epoch": 2.978534701913336, - "grad_norm": 0.001914657885208726, - "learning_rate": 0.00019999562427937932, - "loss": 46.0, - "step": 38957 - }, - { - "epoch": 2.978611158896726, - "grad_norm": 0.0024616485461592674, - "learning_rate": 0.0001999956240546768, - "loss": 46.0, - "step": 38958 - }, - { - "epoch": 2.9786876158801157, - "grad_norm": 0.0013303844025358558, - "learning_rate": 0.00019999562382996852, - "loss": 46.0, - "step": 38959 - }, - { - "epoch": 2.9787640728635054, - "grad_norm": 0.002290409291163087, - "learning_rate": 0.00019999562360525442, - "loss": 46.0, - "step": 38960 - }, - { - "epoch": 2.9788405298468947, - "grad_norm": 0.001430421369150281, - "learning_rate": 0.00019999562338053457, - "loss": 46.0, - "step": 38961 - }, - { - "epoch": 2.9789169868302845, - "grad_norm": 0.0009139743633568287, - "learning_rate": 0.00019999562315580895, - "loss": 46.0, - "step": 38962 - }, - { - "epoch": 2.9789934438136743, - "grad_norm": 0.001719501568004489, - "learning_rate": 0.00019999562293107756, - "loss": 46.0, - "step": 38963 - }, - { - "epoch": 2.979069900797064, - "grad_norm": 0.0027095619589090347, - "learning_rate": 0.00019999562270634042, - "loss": 46.0, - "step": 38964 - }, - { - "epoch": 2.979146357780454, - "grad_norm": 0.0020940606482326984, - "learning_rate": 0.00019999562248159748, - "loss": 46.0, - "step": 38965 - }, - { - "epoch": 2.9792228147638435, - "grad_norm": 0.002448778599500656, - "learning_rate": 0.0001999956222568488, - "loss": 46.0, - "step": 38966 - }, - { - "epoch": 2.9792992717472333, - "grad_norm": 0.0009370583575218916, - "learning_rate": 0.00019999562203209436, - "loss": 46.0, - "step": 38967 - }, - { - "epoch": 2.979375728730623, - "grad_norm": 0.004019725136458874, - "learning_rate": 0.00019999562180733412, - "loss": 46.0, - "step": 38968 - }, - { - "epoch": 2.9794521857140124, - "grad_norm": 0.003771245013922453, - "learning_rate": 0.00019999562158256812, - "loss": 46.0, - "step": 38969 - }, - { - "epoch": 2.979528642697402, - "grad_norm": 0.0008153815288096666, - "learning_rate": 0.00019999562135779634, - "loss": 46.0, - "step": 38970 - }, - { - "epoch": 2.979605099680792, - "grad_norm": 0.0019376752898097038, - "learning_rate": 0.00019999562113301878, - "loss": 46.0, - "step": 38971 - }, - { - "epoch": 2.9796815566641817, - "grad_norm": 0.0027577003929764032, - "learning_rate": 0.00019999562090823548, - "loss": 46.0, - "step": 38972 - }, - { - "epoch": 2.9797580136475714, - "grad_norm": 0.003916645888239145, - "learning_rate": 0.0001999956206834464, - "loss": 46.0, - "step": 38973 - }, - { - "epoch": 2.979834470630961, - "grad_norm": 0.002174672205001116, - "learning_rate": 0.00019999562045865156, - "loss": 46.0, - "step": 38974 - }, - { - "epoch": 2.979910927614351, - "grad_norm": 0.006645541172474623, - "learning_rate": 0.00019999562023385097, - "loss": 46.0, - "step": 38975 - }, - { - "epoch": 2.9799873845977407, - "grad_norm": 0.0017266924260184169, - "learning_rate": 0.00019999562000904458, - "loss": 46.0, - "step": 38976 - }, - { - "epoch": 2.9800638415811305, - "grad_norm": 0.002607109025120735, - "learning_rate": 0.0001999956197842324, - "loss": 46.0, - "step": 38977 - }, - { - "epoch": 2.9801402985645202, - "grad_norm": 0.0019106668187305331, - "learning_rate": 0.0001999956195594145, - "loss": 46.0, - "step": 38978 - }, - { - "epoch": 2.98021675554791, - "grad_norm": 0.004048858769237995, - "learning_rate": 0.00019999561933459079, - "loss": 46.0, - "step": 38979 - }, - { - "epoch": 2.9802932125312998, - "grad_norm": 0.0015311338938772678, - "learning_rate": 0.00019999561910976135, - "loss": 46.0, - "step": 38980 - }, - { - "epoch": 2.9803696695146895, - "grad_norm": 0.0006093823467381299, - "learning_rate": 0.00019999561888492612, - "loss": 46.0, - "step": 38981 - }, - { - "epoch": 2.9804461264980793, - "grad_norm": 0.0028479464817792177, - "learning_rate": 0.00019999561866008511, - "loss": 46.0, - "step": 38982 - }, - { - "epoch": 2.9805225834814686, - "grad_norm": 0.002826940966770053, - "learning_rate": 0.00019999561843523833, - "loss": 46.0, - "step": 38983 - }, - { - "epoch": 2.9805990404648584, - "grad_norm": 0.0030750504229217768, - "learning_rate": 0.00019999561821038578, - "loss": 46.0, - "step": 38984 - }, - { - "epoch": 2.980675497448248, - "grad_norm": 0.0016807789215818048, - "learning_rate": 0.0001999956179855275, - "loss": 46.0, - "step": 38985 - }, - { - "epoch": 2.980751954431638, - "grad_norm": 0.0015888669295236468, - "learning_rate": 0.0001999956177606634, - "loss": 46.0, - "step": 38986 - }, - { - "epoch": 2.9808284114150276, - "grad_norm": 0.0010277723195031285, - "learning_rate": 0.0001999956175357936, - "loss": 46.0, - "step": 38987 - }, - { - "epoch": 2.9809048683984174, - "grad_norm": 0.002044037217274308, - "learning_rate": 0.00019999561731091797, - "loss": 46.0, - "step": 38988 - }, - { - "epoch": 2.980981325381807, - "grad_norm": 0.0021695501636713743, - "learning_rate": 0.00019999561708603658, - "loss": 46.0, - "step": 38989 - }, - { - "epoch": 2.981057782365197, - "grad_norm": 0.0012121469480916858, - "learning_rate": 0.00019999561686114944, - "loss": 46.0, - "step": 38990 - }, - { - "epoch": 2.9811342393485862, - "grad_norm": 0.005473044700920582, - "learning_rate": 0.00019999561663625653, - "loss": 46.0, - "step": 38991 - }, - { - "epoch": 2.981210696331976, - "grad_norm": 0.001451272633858025, - "learning_rate": 0.0001999956164113578, - "loss": 46.0, - "step": 38992 - }, - { - "epoch": 2.9812871533153658, - "grad_norm": 0.006416152697056532, - "learning_rate": 0.00019999561618645335, - "loss": 46.0, - "step": 38993 - }, - { - "epoch": 2.9813636102987555, - "grad_norm": 0.0032489148434251547, - "learning_rate": 0.00019999561596154312, - "loss": 46.0, - "step": 38994 - }, - { - "epoch": 2.9814400672821453, - "grad_norm": 0.002617703750729561, - "learning_rate": 0.00019999561573662712, - "loss": 46.0, - "step": 38995 - }, - { - "epoch": 2.981516524265535, - "grad_norm": 0.0008853967301547527, - "learning_rate": 0.00019999561551170536, - "loss": 46.0, - "step": 38996 - }, - { - "epoch": 2.981592981248925, - "grad_norm": 0.003279887605458498, - "learning_rate": 0.0001999956152867778, - "loss": 46.0, - "step": 38997 - }, - { - "epoch": 2.9816694382323146, - "grad_norm": 0.0017831254517659545, - "learning_rate": 0.0001999956150618445, - "loss": 46.0, - "step": 38998 - }, - { - "epoch": 2.9817458952157043, - "grad_norm": 0.0017559000989422202, - "learning_rate": 0.00019999561483690547, - "loss": 46.0, - "step": 38999 - }, - { - "epoch": 2.981822352199094, - "grad_norm": 0.0021074169781059027, - "learning_rate": 0.0001999956146119606, - "loss": 46.0, - "step": 39000 - }, - { - "epoch": 2.981898809182484, - "grad_norm": 0.0014945173170417547, - "learning_rate": 0.00019999561438700998, - "loss": 46.0, - "step": 39001 - }, - { - "epoch": 2.9819752661658736, - "grad_norm": 0.000690289365593344, - "learning_rate": 0.00019999561416205358, - "loss": 46.0, - "step": 39002 - }, - { - "epoch": 2.9820517231492634, - "grad_norm": 0.0010788417421281338, - "learning_rate": 0.00019999561393709144, - "loss": 46.0, - "step": 39003 - }, - { - "epoch": 2.982128180132653, - "grad_norm": 0.001646575634367764, - "learning_rate": 0.00019999561371212353, - "loss": 46.0, - "step": 39004 - }, - { - "epoch": 2.9822046371160424, - "grad_norm": 0.0013893601717427373, - "learning_rate": 0.00019999561348714985, - "loss": 46.0, - "step": 39005 - }, - { - "epoch": 2.982281094099432, - "grad_norm": 0.0010865854565054178, - "learning_rate": 0.00019999561326217036, - "loss": 46.0, - "step": 39006 - }, - { - "epoch": 2.982357551082822, - "grad_norm": 0.003971052821725607, - "learning_rate": 0.00019999561303718513, - "loss": 46.0, - "step": 39007 - }, - { - "epoch": 2.9824340080662117, - "grad_norm": 0.0012208069674670696, - "learning_rate": 0.00019999561281219415, - "loss": 46.0, - "step": 39008 - }, - { - "epoch": 2.9825104650496015, - "grad_norm": 0.0006932918913662434, - "learning_rate": 0.0001999956125871974, - "loss": 46.0, - "step": 39009 - }, - { - "epoch": 2.9825869220329913, - "grad_norm": 0.0016632204642519355, - "learning_rate": 0.00019999561236219485, - "loss": 46.0, - "step": 39010 - }, - { - "epoch": 2.982663379016381, - "grad_norm": 0.0022916116286069155, - "learning_rate": 0.00019999561213718652, - "loss": 46.0, - "step": 39011 - }, - { - "epoch": 2.9827398359997708, - "grad_norm": 0.002489052014425397, - "learning_rate": 0.00019999561191217247, - "loss": 46.0, - "step": 39012 - }, - { - "epoch": 2.98281629298316, - "grad_norm": 0.003345897188410163, - "learning_rate": 0.00019999561168715263, - "loss": 46.0, - "step": 39013 - }, - { - "epoch": 2.98289274996655, - "grad_norm": 0.0013521400978788733, - "learning_rate": 0.000199995611462127, - "loss": 46.0, - "step": 39014 - }, - { - "epoch": 2.9829692069499396, - "grad_norm": 0.0028718295507133007, - "learning_rate": 0.00019999561123709562, - "loss": 46.0, - "step": 39015 - }, - { - "epoch": 2.9830456639333294, - "grad_norm": 0.0015655282186344266, - "learning_rate": 0.00019999561101205845, - "loss": 46.0, - "step": 39016 - }, - { - "epoch": 2.983122120916719, - "grad_norm": 0.005040094256401062, - "learning_rate": 0.00019999561078701554, - "loss": 46.0, - "step": 39017 - }, - { - "epoch": 2.983198577900109, - "grad_norm": 0.00428294250741601, - "learning_rate": 0.00019999561056196686, - "loss": 46.0, - "step": 39018 - }, - { - "epoch": 2.9832750348834987, - "grad_norm": 0.0013332576490938663, - "learning_rate": 0.0001999956103369124, - "loss": 46.0, - "step": 39019 - }, - { - "epoch": 2.9833514918668884, - "grad_norm": 0.002283480018377304, - "learning_rate": 0.00019999561011185216, - "loss": 46.0, - "step": 39020 - }, - { - "epoch": 2.983427948850278, - "grad_norm": 0.0005546375177800655, - "learning_rate": 0.00019999560988678616, - "loss": 46.0, - "step": 39021 - }, - { - "epoch": 2.983504405833668, - "grad_norm": 0.0013504207599908113, - "learning_rate": 0.0001999956096617144, - "loss": 46.0, - "step": 39022 - }, - { - "epoch": 2.9835808628170577, - "grad_norm": 0.0029268113430589437, - "learning_rate": 0.00019999560943663686, - "loss": 46.0, - "step": 39023 - }, - { - "epoch": 2.9836573198004475, - "grad_norm": 0.0067323134280741215, - "learning_rate": 0.00019999560921155356, - "loss": 46.0, - "step": 39024 - }, - { - "epoch": 2.983733776783837, - "grad_norm": 0.001047409139573574, - "learning_rate": 0.00019999560898646449, - "loss": 46.0, - "step": 39025 - }, - { - "epoch": 2.983810233767227, - "grad_norm": 0.0019587792921811342, - "learning_rate": 0.00019999560876136964, - "loss": 46.0, - "step": 39026 - }, - { - "epoch": 2.9838866907506163, - "grad_norm": 0.0014166788896545768, - "learning_rate": 0.00019999560853626902, - "loss": 46.0, - "step": 39027 - }, - { - "epoch": 2.983963147734006, - "grad_norm": 0.0016008763341233134, - "learning_rate": 0.00019999560831116263, - "loss": 46.0, - "step": 39028 - }, - { - "epoch": 2.984039604717396, - "grad_norm": 0.001029292936436832, - "learning_rate": 0.0001999956080860505, - "loss": 46.0, - "step": 39029 - }, - { - "epoch": 2.9841160617007856, - "grad_norm": 0.003147426061332226, - "learning_rate": 0.00019999560786093255, - "loss": 46.0, - "step": 39030 - }, - { - "epoch": 2.9841925186841753, - "grad_norm": 0.0026365185622125864, - "learning_rate": 0.0001999956076358089, - "loss": 46.0, - "step": 39031 - }, - { - "epoch": 2.984268975667565, - "grad_norm": 0.00127028813585639, - "learning_rate": 0.0001999956074106794, - "loss": 46.0, - "step": 39032 - }, - { - "epoch": 2.984345432650955, - "grad_norm": 0.005030323751270771, - "learning_rate": 0.00019999560718554418, - "loss": 46.0, - "step": 39033 - }, - { - "epoch": 2.9844218896343446, - "grad_norm": 0.00176139990799129, - "learning_rate": 0.0001999956069604032, - "loss": 46.0, - "step": 39034 - }, - { - "epoch": 2.984498346617734, - "grad_norm": 0.0024201814085245132, - "learning_rate": 0.00019999560673525642, - "loss": 46.0, - "step": 39035 - }, - { - "epoch": 2.9845748036011237, - "grad_norm": 0.004769555758684874, - "learning_rate": 0.0001999956065101039, - "loss": 46.0, - "step": 39036 - }, - { - "epoch": 2.9846512605845135, - "grad_norm": 0.0018084713956341147, - "learning_rate": 0.00019999560628494557, - "loss": 46.0, - "step": 39037 - }, - { - "epoch": 2.9847277175679032, - "grad_norm": 0.0020056227222085, - "learning_rate": 0.00019999560605978153, - "loss": 46.0, - "step": 39038 - }, - { - "epoch": 2.984804174551293, - "grad_norm": 0.0020144127774983644, - "learning_rate": 0.00019999560583461166, - "loss": 46.0, - "step": 39039 - }, - { - "epoch": 2.9848806315346827, - "grad_norm": 0.0012094862759113312, - "learning_rate": 0.00019999560560943604, - "loss": 46.0, - "step": 39040 - }, - { - "epoch": 2.9849570885180725, - "grad_norm": 0.0014625868061557412, - "learning_rate": 0.00019999560538425465, - "loss": 46.0, - "step": 39041 - }, - { - "epoch": 2.9850335455014623, - "grad_norm": 0.0036340290680527687, - "learning_rate": 0.00019999560515906754, - "loss": 46.0, - "step": 39042 - }, - { - "epoch": 2.985110002484852, - "grad_norm": 0.0032362372148782015, - "learning_rate": 0.00019999560493387462, - "loss": 46.0, - "step": 39043 - }, - { - "epoch": 2.985186459468242, - "grad_norm": 0.0012778709642589092, - "learning_rate": 0.0001999956047086759, - "loss": 46.0, - "step": 39044 - }, - { - "epoch": 2.9852629164516316, - "grad_norm": 0.0017344921361654997, - "learning_rate": 0.00019999560448347146, - "loss": 46.0, - "step": 39045 - }, - { - "epoch": 2.9853393734350213, - "grad_norm": 0.0011550289345905185, - "learning_rate": 0.00019999560425826122, - "loss": 46.0, - "step": 39046 - }, - { - "epoch": 2.985415830418411, - "grad_norm": 0.001096539432182908, - "learning_rate": 0.00019999560403304522, - "loss": 46.0, - "step": 39047 - }, - { - "epoch": 2.985492287401801, - "grad_norm": 0.002185060642659664, - "learning_rate": 0.00019999560380782344, - "loss": 46.0, - "step": 39048 - }, - { - "epoch": 2.98556874438519, - "grad_norm": 0.00213637319393456, - "learning_rate": 0.00019999560358259592, - "loss": 46.0, - "step": 39049 - }, - { - "epoch": 2.98564520136858, - "grad_norm": 0.004692373797297478, - "learning_rate": 0.00019999560335736262, - "loss": 46.0, - "step": 39050 - }, - { - "epoch": 2.9857216583519697, - "grad_norm": 0.0043512689881026745, - "learning_rate": 0.00019999560313212355, - "loss": 46.0, - "step": 39051 - }, - { - "epoch": 2.9857981153353594, - "grad_norm": 0.0006530076498165727, - "learning_rate": 0.0001999956029068787, - "loss": 46.0, - "step": 39052 - }, - { - "epoch": 2.985874572318749, - "grad_norm": 0.002633305499330163, - "learning_rate": 0.00019999560268162809, - "loss": 46.0, - "step": 39053 - }, - { - "epoch": 2.985951029302139, - "grad_norm": 0.004088290501385927, - "learning_rate": 0.0001999956024563717, - "loss": 46.0, - "step": 39054 - }, - { - "epoch": 2.9860274862855287, - "grad_norm": 0.0008660958847030997, - "learning_rate": 0.00019999560223110956, - "loss": 46.0, - "step": 39055 - }, - { - "epoch": 2.9861039432689185, - "grad_norm": 0.0036324551329016685, - "learning_rate": 0.00019999560200584165, - "loss": 46.0, - "step": 39056 - }, - { - "epoch": 2.986180400252308, - "grad_norm": 0.004912384785711765, - "learning_rate": 0.00019999560178056793, - "loss": 46.0, - "step": 39057 - }, - { - "epoch": 2.9862568572356976, - "grad_norm": 0.0004007123352494091, - "learning_rate": 0.0001999956015552885, - "loss": 46.0, - "step": 39058 - }, - { - "epoch": 2.9863333142190873, - "grad_norm": 0.000838922627735883, - "learning_rate": 0.00019999560133000325, - "loss": 46.0, - "step": 39059 - }, - { - "epoch": 2.986409771202477, - "grad_norm": 0.00048119647544808686, - "learning_rate": 0.00019999560110471224, - "loss": 46.0, - "step": 39060 - }, - { - "epoch": 2.986486228185867, - "grad_norm": 0.0006156764575280249, - "learning_rate": 0.0001999956008794155, - "loss": 46.0, - "step": 39061 - }, - { - "epoch": 2.9865626851692566, - "grad_norm": 0.0016523554222658277, - "learning_rate": 0.00019999560065411297, - "loss": 46.0, - "step": 39062 - }, - { - "epoch": 2.9866391421526464, - "grad_norm": 0.0032914048060774803, - "learning_rate": 0.00019999560042880464, - "loss": 46.0, - "step": 39063 - }, - { - "epoch": 2.986715599136036, - "grad_norm": 0.0030514118261635303, - "learning_rate": 0.00019999560020349057, - "loss": 46.0, - "step": 39064 - }, - { - "epoch": 2.986792056119426, - "grad_norm": 0.0010742873419076204, - "learning_rate": 0.00019999559997817075, - "loss": 46.0, - "step": 39065 - }, - { - "epoch": 2.9868685131028156, - "grad_norm": 0.00061345926951617, - "learning_rate": 0.00019999559975284514, - "loss": 46.0, - "step": 39066 - }, - { - "epoch": 2.9869449700862054, - "grad_norm": 0.002510753460228443, - "learning_rate": 0.00019999559952751375, - "loss": 46.0, - "step": 39067 - }, - { - "epoch": 2.987021427069595, - "grad_norm": 0.0032902169041335583, - "learning_rate": 0.00019999559930217658, - "loss": 46.0, - "step": 39068 - }, - { - "epoch": 2.987097884052985, - "grad_norm": 0.0007933829328976572, - "learning_rate": 0.00019999559907683367, - "loss": 46.0, - "step": 39069 - }, - { - "epoch": 2.9871743410363742, - "grad_norm": 0.0035547392908483744, - "learning_rate": 0.000199995598851485, - "loss": 46.0, - "step": 39070 - }, - { - "epoch": 2.987250798019764, - "grad_norm": 0.0028582008089870214, - "learning_rate": 0.00019999559862613053, - "loss": 46.0, - "step": 39071 - }, - { - "epoch": 2.9873272550031538, - "grad_norm": 0.001642982941120863, - "learning_rate": 0.0001999955984007703, - "loss": 46.0, - "step": 39072 - }, - { - "epoch": 2.9874037119865435, - "grad_norm": 0.0016685270238667727, - "learning_rate": 0.0001999955981754043, - "loss": 46.0, - "step": 39073 - }, - { - "epoch": 2.9874801689699333, - "grad_norm": 0.0021984437480568886, - "learning_rate": 0.00019999559795003255, - "loss": 46.0, - "step": 39074 - }, - { - "epoch": 2.987556625953323, - "grad_norm": 0.0008019302622415125, - "learning_rate": 0.000199995597724655, - "loss": 46.0, - "step": 39075 - }, - { - "epoch": 2.987633082936713, - "grad_norm": 0.004258256405591965, - "learning_rate": 0.0001999955974992717, - "loss": 46.0, - "step": 39076 - }, - { - "epoch": 2.9877095399201026, - "grad_norm": 0.00825955718755722, - "learning_rate": 0.00019999559727388263, - "loss": 46.0, - "step": 39077 - }, - { - "epoch": 2.9877859969034923, - "grad_norm": 0.0016950754215940833, - "learning_rate": 0.00019999559704848778, - "loss": 46.0, - "step": 39078 - }, - { - "epoch": 2.9878624538868817, - "grad_norm": 0.0013112345477566123, - "learning_rate": 0.00019999559682308717, - "loss": 46.0, - "step": 39079 - }, - { - "epoch": 2.9879389108702714, - "grad_norm": 0.003756336634978652, - "learning_rate": 0.0001999955965976808, - "loss": 46.0, - "step": 39080 - }, - { - "epoch": 2.988015367853661, - "grad_norm": 0.0021658686455339193, - "learning_rate": 0.00019999559637226867, - "loss": 46.0, - "step": 39081 - }, - { - "epoch": 2.988091824837051, - "grad_norm": 0.0024607155937701464, - "learning_rate": 0.0001999955961468507, - "loss": 46.0, - "step": 39082 - }, - { - "epoch": 2.9881682818204407, - "grad_norm": 0.0009024886530824006, - "learning_rate": 0.00019999559592142702, - "loss": 46.0, - "step": 39083 - }, - { - "epoch": 2.9882447388038305, - "grad_norm": 0.0008100476115942001, - "learning_rate": 0.00019999559569599756, - "loss": 46.0, - "step": 39084 - }, - { - "epoch": 2.98832119578722, - "grad_norm": 0.0016449899412691593, - "learning_rate": 0.00019999559547056236, - "loss": 46.0, - "step": 39085 - }, - { - "epoch": 2.98839765277061, - "grad_norm": 0.002241989132016897, - "learning_rate": 0.00019999559524512135, - "loss": 46.0, - "step": 39086 - }, - { - "epoch": 2.9884741097539997, - "grad_norm": 0.0010325298644602299, - "learning_rate": 0.00019999559501967458, - "loss": 46.0, - "step": 39087 - }, - { - "epoch": 2.9885505667373895, - "grad_norm": 0.0014504775172099471, - "learning_rate": 0.00019999559479422205, - "loss": 46.0, - "step": 39088 - }, - { - "epoch": 2.9886270237207793, - "grad_norm": 0.002304642926901579, - "learning_rate": 0.00019999559456876376, - "loss": 46.0, - "step": 39089 - }, - { - "epoch": 2.988703480704169, - "grad_norm": 0.003274294314906001, - "learning_rate": 0.00019999559434329966, - "loss": 46.0, - "step": 39090 - }, - { - "epoch": 2.988779937687559, - "grad_norm": 0.0023318196181207895, - "learning_rate": 0.00019999559411782984, - "loss": 46.0, - "step": 39091 - }, - { - "epoch": 2.988856394670948, - "grad_norm": 0.0016399105079472065, - "learning_rate": 0.00019999559389235423, - "loss": 46.0, - "step": 39092 - }, - { - "epoch": 2.988932851654338, - "grad_norm": 0.0008155826944857836, - "learning_rate": 0.00019999559366687283, - "loss": 46.0, - "step": 39093 - }, - { - "epoch": 2.9890093086377276, - "grad_norm": 0.0013628797605633736, - "learning_rate": 0.0001999955934413857, - "loss": 46.0, - "step": 39094 - }, - { - "epoch": 2.9890857656211174, - "grad_norm": 0.0020880845841020346, - "learning_rate": 0.0001999955932158928, - "loss": 46.0, - "step": 39095 - }, - { - "epoch": 2.989162222604507, - "grad_norm": 0.0018020865973085165, - "learning_rate": 0.0001999955929903941, - "loss": 46.0, - "step": 39096 - }, - { - "epoch": 2.989238679587897, - "grad_norm": 0.013506615534424782, - "learning_rate": 0.00019999559276488965, - "loss": 46.0, - "step": 39097 - }, - { - "epoch": 2.9893151365712867, - "grad_norm": 0.001906516612507403, - "learning_rate": 0.00019999559253937942, - "loss": 46.0, - "step": 39098 - }, - { - "epoch": 2.9893915935546764, - "grad_norm": 0.0038871723227202892, - "learning_rate": 0.0001999955923138634, - "loss": 46.0, - "step": 39099 - }, - { - "epoch": 2.9894680505380657, - "grad_norm": 0.00223262095823884, - "learning_rate": 0.00019999559208834164, - "loss": 46.0, - "step": 39100 - }, - { - "epoch": 2.9895445075214555, - "grad_norm": 0.003633759682998061, - "learning_rate": 0.00019999559186281411, - "loss": 46.0, - "step": 39101 - }, - { - "epoch": 2.9896209645048453, - "grad_norm": 0.001774475211277604, - "learning_rate": 0.0001999955916372808, - "loss": 46.0, - "step": 39102 - }, - { - "epoch": 2.989697421488235, - "grad_norm": 0.0011606712359935045, - "learning_rate": 0.00019999559141174172, - "loss": 46.0, - "step": 39103 - }, - { - "epoch": 2.989773878471625, - "grad_norm": 0.00431804871186614, - "learning_rate": 0.00019999559118619688, - "loss": 46.0, - "step": 39104 - }, - { - "epoch": 2.9898503354550146, - "grad_norm": 0.0031206090934574604, - "learning_rate": 0.00019999559096064626, - "loss": 46.0, - "step": 39105 - }, - { - "epoch": 2.9899267924384043, - "grad_norm": 0.0041432734578847885, - "learning_rate": 0.0001999955907350899, - "loss": 46.0, - "step": 39106 - }, - { - "epoch": 2.990003249421794, - "grad_norm": 0.001730704098008573, - "learning_rate": 0.00019999559050952774, - "loss": 46.0, - "step": 39107 - }, - { - "epoch": 2.990079706405184, - "grad_norm": 0.0036157353315502405, - "learning_rate": 0.00019999559028395983, - "loss": 46.0, - "step": 39108 - }, - { - "epoch": 2.9901561633885736, - "grad_norm": 0.003779682796448469, - "learning_rate": 0.00019999559005838612, - "loss": 46.0, - "step": 39109 - }, - { - "epoch": 2.9902326203719634, - "grad_norm": 0.005584088619798422, - "learning_rate": 0.0001999955898328067, - "loss": 46.0, - "step": 39110 - }, - { - "epoch": 2.990309077355353, - "grad_norm": 0.001936267246492207, - "learning_rate": 0.00019999558960722146, - "loss": 46.0, - "step": 39111 - }, - { - "epoch": 2.990385534338743, - "grad_norm": 0.0018537326250225306, - "learning_rate": 0.00019999558938163045, - "loss": 46.0, - "step": 39112 - }, - { - "epoch": 2.9904619913221326, - "grad_norm": 0.0011256610741838813, - "learning_rate": 0.0001999955891560337, - "loss": 46.0, - "step": 39113 - }, - { - "epoch": 2.990538448305522, - "grad_norm": 0.0022534318268299103, - "learning_rate": 0.00019999558893043115, - "loss": 46.0, - "step": 39114 - }, - { - "epoch": 2.9906149052889117, - "grad_norm": 0.0030199645552784204, - "learning_rate": 0.00019999558870482286, - "loss": 46.0, - "step": 39115 - }, - { - "epoch": 2.9906913622723015, - "grad_norm": 0.0017322596395388246, - "learning_rate": 0.00019999558847920882, - "loss": 46.0, - "step": 39116 - }, - { - "epoch": 2.9907678192556912, - "grad_norm": 0.0009440616704523563, - "learning_rate": 0.00019999558825358895, - "loss": 46.0, - "step": 39117 - }, - { - "epoch": 2.990844276239081, - "grad_norm": 0.001366412965580821, - "learning_rate": 0.00019999558802796336, - "loss": 46.0, - "step": 39118 - }, - { - "epoch": 2.9909207332224708, - "grad_norm": 0.002148016355931759, - "learning_rate": 0.00019999558780233197, - "loss": 46.0, - "step": 39119 - }, - { - "epoch": 2.9909971902058605, - "grad_norm": 0.0017213055398315191, - "learning_rate": 0.0001999955875766948, - "loss": 46.0, - "step": 39120 - }, - { - "epoch": 2.9910736471892503, - "grad_norm": 0.008067573420703411, - "learning_rate": 0.0001999955873510519, - "loss": 46.0, - "step": 39121 - }, - { - "epoch": 2.9911501041726396, - "grad_norm": 0.0031846011988818645, - "learning_rate": 0.0001999955871254032, - "loss": 46.0, - "step": 39122 - }, - { - "epoch": 2.9912265611560294, - "grad_norm": 0.002131731016561389, - "learning_rate": 0.00019999558689974873, - "loss": 46.0, - "step": 39123 - }, - { - "epoch": 2.991303018139419, - "grad_norm": 0.00308710103854537, - "learning_rate": 0.00019999558667408853, - "loss": 46.0, - "step": 39124 - }, - { - "epoch": 2.991379475122809, - "grad_norm": 0.0021525833290070295, - "learning_rate": 0.00019999558644842252, - "loss": 46.0, - "step": 39125 - }, - { - "epoch": 2.9914559321061986, - "grad_norm": 0.015429322607815266, - "learning_rate": 0.00019999558622275077, - "loss": 46.0, - "step": 39126 - }, - { - "epoch": 2.9915323890895884, - "grad_norm": 0.001156373182311654, - "learning_rate": 0.00019999558599707325, - "loss": 46.0, - "step": 39127 - }, - { - "epoch": 2.991608846072978, - "grad_norm": 0.0013423626078292727, - "learning_rate": 0.00019999558577138993, - "loss": 46.0, - "step": 39128 - }, - { - "epoch": 2.991685303056368, - "grad_norm": 0.0023643705062568188, - "learning_rate": 0.00019999558554570086, - "loss": 46.0, - "step": 39129 - }, - { - "epoch": 2.9917617600397577, - "grad_norm": 0.0026027371641248465, - "learning_rate": 0.00019999558532000602, - "loss": 46.0, - "step": 39130 - }, - { - "epoch": 2.9918382170231475, - "grad_norm": 0.001721615088172257, - "learning_rate": 0.0001999955850943054, - "loss": 46.0, - "step": 39131 - }, - { - "epoch": 2.991914674006537, - "grad_norm": 0.0029915894847363234, - "learning_rate": 0.00019999558486859904, - "loss": 46.0, - "step": 39132 - }, - { - "epoch": 2.991991130989927, - "grad_norm": 0.0025686193257570267, - "learning_rate": 0.00019999558464288688, - "loss": 46.0, - "step": 39133 - }, - { - "epoch": 2.9920675879733167, - "grad_norm": 0.004422489088028669, - "learning_rate": 0.00019999558441716894, - "loss": 46.0, - "step": 39134 - }, - { - "epoch": 2.9921440449567065, - "grad_norm": 0.0026294011622667313, - "learning_rate": 0.00019999558419144529, - "loss": 46.0, - "step": 39135 - }, - { - "epoch": 2.992220501940096, - "grad_norm": 0.0013138698413968086, - "learning_rate": 0.0001999955839657158, - "loss": 46.0, - "step": 39136 - }, - { - "epoch": 2.9922969589234856, - "grad_norm": 0.003294616937637329, - "learning_rate": 0.0001999955837399806, - "loss": 46.0, - "step": 39137 - }, - { - "epoch": 2.9923734159068753, - "grad_norm": 0.001353780273348093, - "learning_rate": 0.0001999955835142396, - "loss": 46.0, - "step": 39138 - }, - { - "epoch": 2.992449872890265, - "grad_norm": 0.0018757219659164548, - "learning_rate": 0.00019999558328849282, - "loss": 46.0, - "step": 39139 - }, - { - "epoch": 2.992526329873655, - "grad_norm": 0.0035926576238125563, - "learning_rate": 0.0001999955830627403, - "loss": 46.0, - "step": 39140 - }, - { - "epoch": 2.9926027868570446, - "grad_norm": 0.0007921658107079566, - "learning_rate": 0.00019999558283698198, - "loss": 46.0, - "step": 39141 - }, - { - "epoch": 2.9926792438404344, - "grad_norm": 0.0017835624748840928, - "learning_rate": 0.0001999955826112179, - "loss": 46.0, - "step": 39142 - }, - { - "epoch": 2.992755700823824, - "grad_norm": 0.0005739748594351113, - "learning_rate": 0.0001999955823854481, - "loss": 46.0, - "step": 39143 - }, - { - "epoch": 2.9928321578072135, - "grad_norm": 0.003235763171687722, - "learning_rate": 0.00019999558215967248, - "loss": 46.0, - "step": 39144 - }, - { - "epoch": 2.992908614790603, - "grad_norm": 0.0022382240276783705, - "learning_rate": 0.0001999955819338911, - "loss": 46.0, - "step": 39145 - }, - { - "epoch": 2.992985071773993, - "grad_norm": 0.0006428069318644702, - "learning_rate": 0.00019999558170810395, - "loss": 46.0, - "step": 39146 - }, - { - "epoch": 2.9930615287573827, - "grad_norm": 0.0015342015540227294, - "learning_rate": 0.00019999558148231104, - "loss": 46.0, - "step": 39147 - }, - { - "epoch": 2.9931379857407725, - "grad_norm": 0.0017962598940357566, - "learning_rate": 0.00019999558125651234, - "loss": 46.0, - "step": 39148 - }, - { - "epoch": 2.9932144427241623, - "grad_norm": 0.0007452882127836347, - "learning_rate": 0.00019999558103070788, - "loss": 46.0, - "step": 39149 - }, - { - "epoch": 2.993290899707552, - "grad_norm": 0.002946908352896571, - "learning_rate": 0.00019999558080489765, - "loss": 46.0, - "step": 39150 - }, - { - "epoch": 2.993367356690942, - "grad_norm": 0.0026203941088169813, - "learning_rate": 0.00019999558057908168, - "loss": 46.0, - "step": 39151 - }, - { - "epoch": 2.9934438136743315, - "grad_norm": 0.0013671743217855692, - "learning_rate": 0.00019999558035325993, - "loss": 46.0, - "step": 39152 - }, - { - "epoch": 2.9935202706577213, - "grad_norm": 0.0020278203301131725, - "learning_rate": 0.00019999558012743238, - "loss": 46.0, - "step": 39153 - }, - { - "epoch": 2.993596727641111, - "grad_norm": 0.0028306825552135706, - "learning_rate": 0.00019999557990159908, - "loss": 46.0, - "step": 39154 - }, - { - "epoch": 2.993673184624501, - "grad_norm": 0.0010586519492790103, - "learning_rate": 0.00019999557967576, - "loss": 46.0, - "step": 39155 - }, - { - "epoch": 2.9937496416078906, - "grad_norm": 0.0027841362170875072, - "learning_rate": 0.00019999557944991517, - "loss": 46.0, - "step": 39156 - }, - { - "epoch": 2.9938260985912803, - "grad_norm": 0.0012173576978966594, - "learning_rate": 0.00019999557922406456, - "loss": 46.0, - "step": 39157 - }, - { - "epoch": 2.9939025555746697, - "grad_norm": 0.0029892115853726864, - "learning_rate": 0.00019999557899820817, - "loss": 46.0, - "step": 39158 - }, - { - "epoch": 2.9939790125580594, - "grad_norm": 0.004242290742695332, - "learning_rate": 0.00019999557877234603, - "loss": 46.0, - "step": 39159 - }, - { - "epoch": 2.994055469541449, - "grad_norm": 0.00103765819221735, - "learning_rate": 0.00019999557854647813, - "loss": 46.0, - "step": 39160 - }, - { - "epoch": 2.994131926524839, - "grad_norm": 0.002326207933947444, - "learning_rate": 0.00019999557832060442, - "loss": 46.0, - "step": 39161 - }, - { - "epoch": 2.9942083835082287, - "grad_norm": 0.002947617322206497, - "learning_rate": 0.00019999557809472496, - "loss": 46.0, - "step": 39162 - }, - { - "epoch": 2.9942848404916185, - "grad_norm": 0.0012251833686605096, - "learning_rate": 0.00019999557786883973, - "loss": 46.0, - "step": 39163 - }, - { - "epoch": 2.9943612974750082, - "grad_norm": 0.005709645338356495, - "learning_rate": 0.00019999557764294873, - "loss": 46.0, - "step": 39164 - }, - { - "epoch": 2.994437754458398, - "grad_norm": 0.0016854925779625773, - "learning_rate": 0.00019999557741705198, - "loss": 46.0, - "step": 39165 - }, - { - "epoch": 2.9945142114417873, - "grad_norm": 0.002527038101106882, - "learning_rate": 0.00019999557719114946, - "loss": 46.0, - "step": 39166 - }, - { - "epoch": 2.994590668425177, - "grad_norm": 0.005868361797183752, - "learning_rate": 0.00019999557696524117, - "loss": 46.0, - "step": 39167 - }, - { - "epoch": 2.994667125408567, - "grad_norm": 0.0006125843501649797, - "learning_rate": 0.00019999557673932707, - "loss": 46.0, - "step": 39168 - }, - { - "epoch": 2.9947435823919566, - "grad_norm": 0.004382311832159758, - "learning_rate": 0.00019999557651340726, - "loss": 46.0, - "step": 39169 - }, - { - "epoch": 2.9948200393753464, - "grad_norm": 0.003015749854966998, - "learning_rate": 0.00019999557628748161, - "loss": 46.0, - "step": 39170 - }, - { - "epoch": 2.994896496358736, - "grad_norm": 0.0013347103958949447, - "learning_rate": 0.00019999557606155025, - "loss": 46.0, - "step": 39171 - }, - { - "epoch": 2.994972953342126, - "grad_norm": 0.0017075969371944666, - "learning_rate": 0.00019999557583561312, - "loss": 46.0, - "step": 39172 - }, - { - "epoch": 2.9950494103255156, - "grad_norm": 0.0017577135004103184, - "learning_rate": 0.00019999557560967018, - "loss": 46.0, - "step": 39173 - }, - { - "epoch": 2.9951258673089054, - "grad_norm": 0.0013778347056359053, - "learning_rate": 0.0001999955753837215, - "loss": 46.0, - "step": 39174 - }, - { - "epoch": 2.995202324292295, - "grad_norm": 0.0018427680479362607, - "learning_rate": 0.00019999557515776705, - "loss": 46.0, - "step": 39175 - }, - { - "epoch": 2.995278781275685, - "grad_norm": 0.0024936965201050043, - "learning_rate": 0.00019999557493180682, - "loss": 46.0, - "step": 39176 - }, - { - "epoch": 2.9953552382590747, - "grad_norm": 0.002008868847042322, - "learning_rate": 0.00019999557470584084, - "loss": 46.0, - "step": 39177 - }, - { - "epoch": 2.9954316952424644, - "grad_norm": 0.004113717004656792, - "learning_rate": 0.00019999557447986907, - "loss": 46.0, - "step": 39178 - }, - { - "epoch": 2.995508152225854, - "grad_norm": 0.0024763448163866997, - "learning_rate": 0.00019999557425389152, - "loss": 46.0, - "step": 39179 - }, - { - "epoch": 2.9955846092092435, - "grad_norm": 0.0014906786382198334, - "learning_rate": 0.00019999557402790825, - "loss": 46.0, - "step": 39180 - }, - { - "epoch": 2.9956610661926333, - "grad_norm": 0.0017268853262066841, - "learning_rate": 0.00019999557380191916, - "loss": 46.0, - "step": 39181 - }, - { - "epoch": 2.995737523176023, - "grad_norm": 0.00249011954292655, - "learning_rate": 0.00019999557357592434, - "loss": 46.0, - "step": 39182 - }, - { - "epoch": 2.995813980159413, - "grad_norm": 0.0019509891280904412, - "learning_rate": 0.00019999557334992373, - "loss": 46.0, - "step": 39183 - }, - { - "epoch": 2.9958904371428026, - "grad_norm": 0.005961879622191191, - "learning_rate": 0.00019999557312391734, - "loss": 46.0, - "step": 39184 - }, - { - "epoch": 2.9959668941261923, - "grad_norm": 0.0012329589808359742, - "learning_rate": 0.00019999557289790518, - "loss": 46.0, - "step": 39185 - }, - { - "epoch": 2.996043351109582, - "grad_norm": 0.00348090217448771, - "learning_rate": 0.00019999557267188727, - "loss": 46.0, - "step": 39186 - }, - { - "epoch": 2.996119808092972, - "grad_norm": 0.003842496545985341, - "learning_rate": 0.0001999955724458636, - "loss": 46.0, - "step": 39187 - }, - { - "epoch": 2.996196265076361, - "grad_norm": 0.002805515890941024, - "learning_rate": 0.00019999557221983414, - "loss": 46.0, - "step": 39188 - }, - { - "epoch": 2.996272722059751, - "grad_norm": 0.0012306882999837399, - "learning_rate": 0.00019999557199379894, - "loss": 46.0, - "step": 39189 - }, - { - "epoch": 2.9963491790431407, - "grad_norm": 0.0017449719598516822, - "learning_rate": 0.0001999955717677579, - "loss": 46.0, - "step": 39190 - }, - { - "epoch": 2.9964256360265304, - "grad_norm": 0.0019003982888534665, - "learning_rate": 0.00019999557154171116, - "loss": 46.0, - "step": 39191 - }, - { - "epoch": 2.99650209300992, - "grad_norm": 0.006370673421770334, - "learning_rate": 0.0001999955713156586, - "loss": 46.0, - "step": 39192 - }, - { - "epoch": 2.99657854999331, - "grad_norm": 0.0018238719785586, - "learning_rate": 0.00019999557108960035, - "loss": 46.0, - "step": 39193 - }, - { - "epoch": 2.9966550069766997, - "grad_norm": 0.0016050889389589429, - "learning_rate": 0.00019999557086353625, - "loss": 46.0, - "step": 39194 - }, - { - "epoch": 2.9967314639600895, - "grad_norm": 0.0018161804182454944, - "learning_rate": 0.0001999955706374664, - "loss": 46.0, - "step": 39195 - }, - { - "epoch": 2.9968079209434793, - "grad_norm": 0.00195804750546813, - "learning_rate": 0.00019999557041139082, - "loss": 46.0, - "step": 39196 - }, - { - "epoch": 2.996884377926869, - "grad_norm": 0.0018318075453862548, - "learning_rate": 0.00019999557018530943, - "loss": 46.0, - "step": 39197 - }, - { - "epoch": 2.9969608349102588, - "grad_norm": 0.01040571741759777, - "learning_rate": 0.00019999556995922227, - "loss": 46.0, - "step": 39198 - }, - { - "epoch": 2.9970372918936485, - "grad_norm": 0.008059567771852016, - "learning_rate": 0.00019999556973312937, - "loss": 46.0, - "step": 39199 - }, - { - "epoch": 2.9971137488770383, - "grad_norm": 0.0030449870973825455, - "learning_rate": 0.00019999556950703069, - "loss": 46.0, - "step": 39200 - }, - { - "epoch": 2.9971902058604276, - "grad_norm": 0.005059572868049145, - "learning_rate": 0.00019999556928092623, - "loss": 46.0, - "step": 39201 - }, - { - "epoch": 2.9972666628438174, - "grad_norm": 0.0017652895767241716, - "learning_rate": 0.000199995569054816, - "loss": 46.0, - "step": 39202 - }, - { - "epoch": 2.997343119827207, - "grad_norm": 0.0011652631219476461, - "learning_rate": 0.00019999556882870003, - "loss": 46.0, - "step": 39203 - }, - { - "epoch": 2.997419576810597, - "grad_norm": 0.002552255056798458, - "learning_rate": 0.00019999556860257826, - "loss": 46.0, - "step": 39204 - }, - { - "epoch": 2.9974960337939867, - "grad_norm": 0.0014065821887925267, - "learning_rate": 0.00019999556837645073, - "loss": 46.0, - "step": 39205 - }, - { - "epoch": 2.9975724907773764, - "grad_norm": 0.0036756526678800583, - "learning_rate": 0.00019999556815031744, - "loss": 46.0, - "step": 39206 - }, - { - "epoch": 2.997648947760766, - "grad_norm": 0.0010107505368068814, - "learning_rate": 0.00019999556792417837, - "loss": 46.0, - "step": 39207 - }, - { - "epoch": 2.997725404744156, - "grad_norm": 0.0017440892988815904, - "learning_rate": 0.0001999955676980335, - "loss": 46.0, - "step": 39208 - }, - { - "epoch": 2.9978018617275457, - "grad_norm": 0.0023775596637278795, - "learning_rate": 0.00019999556747188292, - "loss": 46.0, - "step": 39209 - }, - { - "epoch": 2.997878318710935, - "grad_norm": 0.0013232782948762178, - "learning_rate": 0.00019999556724572656, - "loss": 46.0, - "step": 39210 - }, - { - "epoch": 2.997954775694325, - "grad_norm": 0.0012101158499717712, - "learning_rate": 0.0001999955670195644, - "loss": 46.0, - "step": 39211 - }, - { - "epoch": 2.9980312326777145, - "grad_norm": 0.0005670886021107435, - "learning_rate": 0.00019999556679339646, - "loss": 46.0, - "step": 39212 - }, - { - "epoch": 2.9981076896611043, - "grad_norm": 0.0019193993648514152, - "learning_rate": 0.0001999955665672228, - "loss": 46.0, - "step": 39213 - }, - { - "epoch": 2.998184146644494, - "grad_norm": 0.0008746773819439113, - "learning_rate": 0.00019999556634104333, - "loss": 46.0, - "step": 39214 - }, - { - "epoch": 2.998260603627884, - "grad_norm": 0.0023979737889021635, - "learning_rate": 0.00019999556611485813, - "loss": 46.0, - "step": 39215 - }, - { - "epoch": 2.9983370606112736, - "grad_norm": 0.005400867201387882, - "learning_rate": 0.00019999556588866713, - "loss": 46.0, - "step": 39216 - }, - { - "epoch": 2.9984135175946633, - "grad_norm": 0.0028523243963718414, - "learning_rate": 0.00019999556566247035, - "loss": 46.0, - "step": 39217 - }, - { - "epoch": 2.998489974578053, - "grad_norm": 0.002949568908661604, - "learning_rate": 0.00019999556543626783, - "loss": 46.0, - "step": 39218 - }, - { - "epoch": 2.998566431561443, - "grad_norm": 0.0035884494427591562, - "learning_rate": 0.00019999556521005954, - "loss": 46.0, - "step": 39219 - }, - { - "epoch": 2.9986428885448326, - "grad_norm": 0.001347847981378436, - "learning_rate": 0.00019999556498384547, - "loss": 46.0, - "step": 39220 - }, - { - "epoch": 2.9987193455282224, - "grad_norm": 0.0012585886288434267, - "learning_rate": 0.00019999556475762563, - "loss": 46.0, - "step": 39221 - }, - { - "epoch": 2.998795802511612, - "grad_norm": 0.008716125972568989, - "learning_rate": 0.0001999955645314, - "loss": 46.0, - "step": 39222 - }, - { - "epoch": 2.9988722594950015, - "grad_norm": 0.0036288145929574966, - "learning_rate": 0.00019999556430516863, - "loss": 46.0, - "step": 39223 - }, - { - "epoch": 2.9989487164783912, - "grad_norm": 0.004326615482568741, - "learning_rate": 0.0001999955640789315, - "loss": 46.0, - "step": 39224 - }, - { - "epoch": 2.999025173461781, - "grad_norm": 0.004040957428514957, - "learning_rate": 0.0001999955638526886, - "loss": 46.0, - "step": 39225 - }, - { - "epoch": 2.9991016304451708, - "grad_norm": 0.0034429074730724096, - "learning_rate": 0.00019999556362643988, - "loss": 46.0, - "step": 39226 - }, - { - "epoch": 2.9991780874285605, - "grad_norm": 0.004061649087816477, - "learning_rate": 0.00019999556340018543, - "loss": 46.0, - "step": 39227 - }, - { - "epoch": 2.9992545444119503, - "grad_norm": 0.003879598341882229, - "learning_rate": 0.0001999955631739252, - "loss": 46.0, - "step": 39228 - }, - { - "epoch": 2.99933100139534, - "grad_norm": 0.0023432110901921988, - "learning_rate": 0.00019999556294765923, - "loss": 46.0, - "step": 39229 - }, - { - "epoch": 2.99940745837873, - "grad_norm": 0.0020076779183000326, - "learning_rate": 0.00019999556272138746, - "loss": 46.0, - "step": 39230 - }, - { - "epoch": 2.999483915362119, - "grad_norm": 0.0005539238918572664, - "learning_rate": 0.00019999556249510994, - "loss": 46.0, - "step": 39231 - }, - { - "epoch": 2.999560372345509, - "grad_norm": 0.0017795147141441703, - "learning_rate": 0.00019999556226882664, - "loss": 46.0, - "step": 39232 - }, - { - "epoch": 2.9996368293288986, - "grad_norm": 0.0019348624628037214, - "learning_rate": 0.00019999556204253755, - "loss": 46.0, - "step": 39233 - }, - { - "epoch": 2.9997132863122884, - "grad_norm": 0.003848652122542262, - "learning_rate": 0.00019999556181624274, - "loss": 46.0, - "step": 39234 - }, - { - "epoch": 2.999789743295678, - "grad_norm": 0.002938139485195279, - "learning_rate": 0.00019999556158994212, - "loss": 46.0, - "step": 39235 - }, - { - "epoch": 2.999866200279068, - "grad_norm": 0.0014270143583416939, - "learning_rate": 0.00019999556136363576, - "loss": 46.0, - "step": 39236 - }, - { - "epoch": 2.9999426572624577, - "grad_norm": 0.0008606792544014752, - "learning_rate": 0.0001999955611373236, - "loss": 46.0, - "step": 39237 - }, - { - "epoch": 2.9999426572624577, - "eval_loss": 11.5, - "eval_runtime": 32.5265, - "eval_samples_per_second": 169.339, - "eval_steps_per_second": 84.669, - "step": 39237 - }, - { - "epoch": 3.0000191142458474, - "grad_norm": 0.002427714178338647, - "learning_rate": 0.0001999955609110057, - "loss": 46.0, - "step": 39238 - }, - { - "epoch": 3.000095571229237, - "grad_norm": 0.004683328792452812, - "learning_rate": 0.000199995560684682, - "loss": 46.0, - "step": 39239 - }, - { - "epoch": 3.000172028212627, - "grad_norm": 0.0031059151515364647, - "learning_rate": 0.00019999556045835254, - "loss": 46.0, - "step": 39240 - }, - { - "epoch": 3.0002484851960167, - "grad_norm": 0.006568351294845343, - "learning_rate": 0.00019999556023201734, - "loss": 46.0, - "step": 39241 - }, - { - "epoch": 3.0003249421794065, - "grad_norm": 0.0021187514066696167, - "learning_rate": 0.00019999556000567634, - "loss": 46.0, - "step": 39242 - }, - { - "epoch": 3.0004013991627962, - "grad_norm": 0.001852111890912056, - "learning_rate": 0.00019999555977932956, - "loss": 46.0, - "step": 39243 - }, - { - "epoch": 3.0004778561461856, - "grad_norm": 0.001291001564823091, - "learning_rate": 0.00019999555955297704, - "loss": 46.0, - "step": 39244 - }, - { - "epoch": 3.0005543131295753, - "grad_norm": 0.0029865512624382973, - "learning_rate": 0.00019999555932661875, - "loss": 46.0, - "step": 39245 - }, - { - "epoch": 3.000630770112965, - "grad_norm": 0.0012287814170122147, - "learning_rate": 0.00019999555910025469, - "loss": 46.0, - "step": 39246 - }, - { - "epoch": 3.000707227096355, - "grad_norm": 0.0024188263341784477, - "learning_rate": 0.00019999555887388485, - "loss": 46.0, - "step": 39247 - }, - { - "epoch": 3.0007836840797446, - "grad_norm": 0.0008548871264792979, - "learning_rate": 0.00019999555864750926, - "loss": 46.0, - "step": 39248 - }, - { - "epoch": 3.0008601410631344, - "grad_norm": 0.006372688338160515, - "learning_rate": 0.00019999555842112785, - "loss": 46.0, - "step": 39249 - }, - { - "epoch": 3.000936598046524, - "grad_norm": 0.002814937150105834, - "learning_rate": 0.0001999955581947407, - "loss": 46.0, - "step": 39250 - }, - { - "epoch": 3.001013055029914, - "grad_norm": 0.0017765265656635165, - "learning_rate": 0.0001999955579683478, - "loss": 46.0, - "step": 39251 - }, - { - "epoch": 3.0010895120133037, - "grad_norm": 0.0013573955511674285, - "learning_rate": 0.00019999555774194913, - "loss": 46.0, - "step": 39252 - }, - { - "epoch": 3.0011659689966934, - "grad_norm": 0.003322416450828314, - "learning_rate": 0.00019999555751554467, - "loss": 46.0, - "step": 39253 - }, - { - "epoch": 3.001242425980083, - "grad_norm": 0.005314044654369354, - "learning_rate": 0.00019999555728913445, - "loss": 46.0, - "step": 39254 - }, - { - "epoch": 3.0013188829634725, - "grad_norm": 0.0013232966884970665, - "learning_rate": 0.00019999555706271848, - "loss": 46.0, - "step": 39255 - }, - { - "epoch": 3.0013953399468623, - "grad_norm": 0.0016556764021515846, - "learning_rate": 0.0001999955568362967, - "loss": 46.0, - "step": 39256 - }, - { - "epoch": 3.001471796930252, - "grad_norm": 0.003503865795210004, - "learning_rate": 0.00019999555660986918, - "loss": 46.0, - "step": 39257 - }, - { - "epoch": 3.0015482539136418, - "grad_norm": 0.0004094807372894138, - "learning_rate": 0.0001999955563834359, - "loss": 46.0, - "step": 39258 - }, - { - "epoch": 3.0016247108970315, - "grad_norm": 0.0018939865985885262, - "learning_rate": 0.0001999955561569968, - "loss": 46.0, - "step": 39259 - }, - { - "epoch": 3.0017011678804213, - "grad_norm": 0.002850540913641453, - "learning_rate": 0.00019999555593055196, - "loss": 46.0, - "step": 39260 - }, - { - "epoch": 3.001777624863811, - "grad_norm": 0.0006466483464464545, - "learning_rate": 0.00019999555570410135, - "loss": 46.0, - "step": 39261 - }, - { - "epoch": 3.001854081847201, - "grad_norm": 0.005661812145262957, - "learning_rate": 0.00019999555547764496, - "loss": 46.0, - "step": 39262 - }, - { - "epoch": 3.0019305388305906, - "grad_norm": 0.0012674578465521336, - "learning_rate": 0.00019999555525118283, - "loss": 46.0, - "step": 39263 - }, - { - "epoch": 3.0020069958139803, - "grad_norm": 0.002266967436298728, - "learning_rate": 0.00019999555502471492, - "loss": 46.0, - "step": 39264 - }, - { - "epoch": 3.00208345279737, - "grad_norm": 0.004274963401257992, - "learning_rate": 0.00019999555479824124, - "loss": 46.0, - "step": 39265 - }, - { - "epoch": 3.0021599097807594, - "grad_norm": 0.0046497248113155365, - "learning_rate": 0.0001999955545717618, - "loss": 46.0, - "step": 39266 - }, - { - "epoch": 3.002236366764149, - "grad_norm": 0.0037155766040086746, - "learning_rate": 0.00019999555434527656, - "loss": 46.0, - "step": 39267 - }, - { - "epoch": 3.002312823747539, - "grad_norm": 0.0013625187566503882, - "learning_rate": 0.0001999955541187856, - "loss": 46.0, - "step": 39268 - }, - { - "epoch": 3.0023892807309287, - "grad_norm": 0.0011886089341714978, - "learning_rate": 0.00019999555389228882, - "loss": 46.0, - "step": 39269 - }, - { - "epoch": 3.0024657377143185, - "grad_norm": 0.0012649070704355836, - "learning_rate": 0.0001999955536657863, - "loss": 46.0, - "step": 39270 - }, - { - "epoch": 3.0025421946977082, - "grad_norm": 0.0024706104304641485, - "learning_rate": 0.00019999555343927798, - "loss": 46.0, - "step": 39271 - }, - { - "epoch": 3.002618651681098, - "grad_norm": 0.0026065302081406116, - "learning_rate": 0.00019999555321276394, - "loss": 46.0, - "step": 39272 - }, - { - "epoch": 3.0026951086644877, - "grad_norm": 0.0014793617883697152, - "learning_rate": 0.0001999955529862441, - "loss": 46.0, - "step": 39273 - }, - { - "epoch": 3.0027715656478775, - "grad_norm": 0.0025190270971506834, - "learning_rate": 0.0001999955527597185, - "loss": 46.0, - "step": 39274 - }, - { - "epoch": 3.0028480226312673, - "grad_norm": 0.0011415166081860662, - "learning_rate": 0.0001999955525331871, - "loss": 46.0, - "step": 39275 - }, - { - "epoch": 3.0029244796146566, - "grad_norm": 0.0009597733151167631, - "learning_rate": 0.00019999555230664997, - "loss": 46.0, - "step": 39276 - }, - { - "epoch": 3.0030009365980463, - "grad_norm": 0.005575475748628378, - "learning_rate": 0.00019999555208010704, - "loss": 46.0, - "step": 39277 - }, - { - "epoch": 3.003077393581436, - "grad_norm": 0.002231983933597803, - "learning_rate": 0.0001999955518535584, - "loss": 46.0, - "step": 39278 - }, - { - "epoch": 3.003153850564826, - "grad_norm": 0.0014428955037146807, - "learning_rate": 0.0001999955516270039, - "loss": 46.0, - "step": 39279 - }, - { - "epoch": 3.0032303075482156, - "grad_norm": 0.001606437610462308, - "learning_rate": 0.0001999955514004437, - "loss": 46.0, - "step": 39280 - }, - { - "epoch": 3.0033067645316054, - "grad_norm": 0.0030549154616892338, - "learning_rate": 0.0001999955511738777, - "loss": 46.0, - "step": 39281 - }, - { - "epoch": 3.003383221514995, - "grad_norm": 0.003709004260599613, - "learning_rate": 0.00019999555094730596, - "loss": 46.0, - "step": 39282 - }, - { - "epoch": 3.003459678498385, - "grad_norm": 0.0029395415913313627, - "learning_rate": 0.00019999555072072842, - "loss": 46.0, - "step": 39283 - }, - { - "epoch": 3.0035361354817747, - "grad_norm": 0.001813619746826589, - "learning_rate": 0.00019999555049414513, - "loss": 46.0, - "step": 39284 - }, - { - "epoch": 3.0036125924651644, - "grad_norm": 0.0033683686051517725, - "learning_rate": 0.00019999555026755606, - "loss": 46.0, - "step": 39285 - }, - { - "epoch": 3.003689049448554, - "grad_norm": 0.00283208885230124, - "learning_rate": 0.00019999555004096122, - "loss": 46.0, - "step": 39286 - }, - { - "epoch": 3.0037655064319435, - "grad_norm": 0.0051369075663387775, - "learning_rate": 0.0001999955498143606, - "loss": 46.0, - "step": 39287 - }, - { - "epoch": 3.0038419634153333, - "grad_norm": 0.003842682344838977, - "learning_rate": 0.00019999554958775425, - "loss": 46.0, - "step": 39288 - }, - { - "epoch": 3.003918420398723, - "grad_norm": 0.0027507557533681393, - "learning_rate": 0.00019999554936114212, - "loss": 46.0, - "step": 39289 - }, - { - "epoch": 3.003994877382113, - "grad_norm": 0.002347825327888131, - "learning_rate": 0.0001999955491345242, - "loss": 46.0, - "step": 39290 - }, - { - "epoch": 3.0040713343655026, - "grad_norm": 0.003627563826739788, - "learning_rate": 0.00019999554890790048, - "loss": 46.0, - "step": 39291 - }, - { - "epoch": 3.0041477913488923, - "grad_norm": 0.003237220225855708, - "learning_rate": 0.00019999554868127106, - "loss": 46.0, - "step": 39292 - }, - { - "epoch": 3.004224248332282, - "grad_norm": 0.0012536810245364904, - "learning_rate": 0.0001999955484546358, - "loss": 46.0, - "step": 39293 - }, - { - "epoch": 3.004300705315672, - "grad_norm": 0.0024540743324905634, - "learning_rate": 0.00019999554822799483, - "loss": 46.0, - "step": 39294 - }, - { - "epoch": 3.0043771622990616, - "grad_norm": 0.0015452642692252994, - "learning_rate": 0.0001999955480013481, - "loss": 46.0, - "step": 39295 - }, - { - "epoch": 3.0044536192824514, - "grad_norm": 0.008288186974823475, - "learning_rate": 0.00019999554777469557, - "loss": 46.0, - "step": 39296 - }, - { - "epoch": 3.004530076265841, - "grad_norm": 0.0019129274878650904, - "learning_rate": 0.00019999554754803725, - "loss": 46.0, - "step": 39297 - }, - { - "epoch": 3.0046065332492304, - "grad_norm": 0.0022248008754104376, - "learning_rate": 0.00019999554732137319, - "loss": 46.0, - "step": 39298 - }, - { - "epoch": 3.00468299023262, - "grad_norm": 0.0038055742625147104, - "learning_rate": 0.00019999554709470337, - "loss": 46.0, - "step": 39299 - }, - { - "epoch": 3.00475944721601, - "grad_norm": 0.0024777159560471773, - "learning_rate": 0.00019999554686802776, - "loss": 46.0, - "step": 39300 - }, - { - "epoch": 3.0048359041993997, - "grad_norm": 0.0019054062431678176, - "learning_rate": 0.00019999554664134638, - "loss": 46.0, - "step": 39301 - }, - { - "epoch": 3.0049123611827895, - "grad_norm": 0.0003340501571074128, - "learning_rate": 0.00019999554641465922, - "loss": 46.0, - "step": 39302 - }, - { - "epoch": 3.0049888181661792, - "grad_norm": 0.002761523239314556, - "learning_rate": 0.0001999955461879663, - "loss": 46.0, - "step": 39303 - }, - { - "epoch": 3.005065275149569, - "grad_norm": 0.008612114936113358, - "learning_rate": 0.00019999554596126764, - "loss": 46.0, - "step": 39304 - }, - { - "epoch": 3.0051417321329588, - "grad_norm": 0.004222886171191931, - "learning_rate": 0.0001999955457345632, - "loss": 46.0, - "step": 39305 - }, - { - "epoch": 3.0052181891163485, - "grad_norm": 0.0017778804758563638, - "learning_rate": 0.00019999554550785299, - "loss": 46.0, - "step": 39306 - }, - { - "epoch": 3.0052946460997383, - "grad_norm": 0.002196765970438719, - "learning_rate": 0.000199995545281137, - "loss": 46.0, - "step": 39307 - }, - { - "epoch": 3.005371103083128, - "grad_norm": 0.003573029302060604, - "learning_rate": 0.00019999554505441522, - "loss": 46.0, - "step": 39308 - }, - { - "epoch": 3.0054475600665174, - "grad_norm": 0.001215667580254376, - "learning_rate": 0.0001999955448276877, - "loss": 46.0, - "step": 39309 - }, - { - "epoch": 3.005524017049907, - "grad_norm": 0.007345445454120636, - "learning_rate": 0.0001999955446009544, - "loss": 46.0, - "step": 39310 - }, - { - "epoch": 3.005600474033297, - "grad_norm": 0.0017702047480270267, - "learning_rate": 0.00019999554437421531, - "loss": 46.0, - "step": 39311 - }, - { - "epoch": 3.0056769310166866, - "grad_norm": 0.0019814420957118273, - "learning_rate": 0.0001999955441474705, - "loss": 46.0, - "step": 39312 - }, - { - "epoch": 3.0057533880000764, - "grad_norm": 0.003374501597136259, - "learning_rate": 0.00019999554392071992, - "loss": 46.0, - "step": 39313 - }, - { - "epoch": 3.005829844983466, - "grad_norm": 0.0017362427897751331, - "learning_rate": 0.0001999955436939635, - "loss": 46.0, - "step": 39314 - }, - { - "epoch": 3.005906301966856, - "grad_norm": 0.0016244228463619947, - "learning_rate": 0.00019999554346720138, - "loss": 46.0, - "step": 39315 - }, - { - "epoch": 3.0059827589502457, - "grad_norm": 0.0031385133042931557, - "learning_rate": 0.00019999554324043347, - "loss": 46.0, - "step": 39316 - }, - { - "epoch": 3.0060592159336355, - "grad_norm": 0.0008756855968385935, - "learning_rate": 0.0001999955430136598, - "loss": 46.0, - "step": 39317 - }, - { - "epoch": 3.006135672917025, - "grad_norm": 0.0024367005098611116, - "learning_rate": 0.00019999554278688034, - "loss": 46.0, - "step": 39318 - }, - { - "epoch": 3.006212129900415, - "grad_norm": 0.006825009826570749, - "learning_rate": 0.00019999554256009512, - "loss": 46.0, - "step": 39319 - }, - { - "epoch": 3.0062885868838043, - "grad_norm": 0.002445233054459095, - "learning_rate": 0.00019999554233330412, - "loss": 46.0, - "step": 39320 - }, - { - "epoch": 3.006365043867194, - "grad_norm": 0.0013995724730193615, - "learning_rate": 0.00019999554210650737, - "loss": 46.0, - "step": 39321 - }, - { - "epoch": 3.006441500850584, - "grad_norm": 0.002760851988568902, - "learning_rate": 0.00019999554187970483, - "loss": 46.0, - "step": 39322 - }, - { - "epoch": 3.0065179578339736, - "grad_norm": 0.0027504260651767254, - "learning_rate": 0.00019999554165289657, - "loss": 46.0, - "step": 39323 - }, - { - "epoch": 3.0065944148173633, - "grad_norm": 0.0025702493730932474, - "learning_rate": 0.00019999554142608247, - "loss": 46.0, - "step": 39324 - }, - { - "epoch": 3.006670871800753, - "grad_norm": 0.003154223784804344, - "learning_rate": 0.00019999554119926264, - "loss": 46.0, - "step": 39325 - }, - { - "epoch": 3.006747328784143, - "grad_norm": 0.0019131911685690284, - "learning_rate": 0.00019999554097243705, - "loss": 46.0, - "step": 39326 - }, - { - "epoch": 3.0068237857675326, - "grad_norm": 0.0013920033816248178, - "learning_rate": 0.00019999554074560567, - "loss": 46.0, - "step": 39327 - }, - { - "epoch": 3.0069002427509224, - "grad_norm": 0.0016234898939728737, - "learning_rate": 0.0001999955405187685, - "loss": 46.0, - "step": 39328 - }, - { - "epoch": 3.006976699734312, - "grad_norm": 0.004340468440204859, - "learning_rate": 0.00019999554029192563, - "loss": 46.0, - "step": 39329 - }, - { - "epoch": 3.007053156717702, - "grad_norm": 0.002443769248202443, - "learning_rate": 0.00019999554006507695, - "loss": 46.0, - "step": 39330 - }, - { - "epoch": 3.007129613701091, - "grad_norm": 0.0014609097270295024, - "learning_rate": 0.0001999955398382225, - "loss": 46.0, - "step": 39331 - }, - { - "epoch": 3.007206070684481, - "grad_norm": 0.0013730047503486276, - "learning_rate": 0.00019999553961136228, - "loss": 46.0, - "step": 39332 - }, - { - "epoch": 3.0072825276678707, - "grad_norm": 0.003794951131567359, - "learning_rate": 0.00019999553938449628, - "loss": 46.0, - "step": 39333 - }, - { - "epoch": 3.0073589846512605, - "grad_norm": 0.001801217906177044, - "learning_rate": 0.0001999955391576245, - "loss": 46.0, - "step": 39334 - }, - { - "epoch": 3.0074354416346503, - "grad_norm": 0.004198344424366951, - "learning_rate": 0.000199995538930747, - "loss": 46.0, - "step": 39335 - }, - { - "epoch": 3.00751189861804, - "grad_norm": 0.0020839693024754524, - "learning_rate": 0.00019999553870386373, - "loss": 46.0, - "step": 39336 - }, - { - "epoch": 3.00758835560143, - "grad_norm": 0.0015438891714438796, - "learning_rate": 0.00019999553847697464, - "loss": 46.0, - "step": 39337 - }, - { - "epoch": 3.0076648125848195, - "grad_norm": 0.0035184400621801615, - "learning_rate": 0.00019999553825007983, - "loss": 46.0, - "step": 39338 - }, - { - "epoch": 3.0077412695682093, - "grad_norm": 0.0009981831535696983, - "learning_rate": 0.0001999955380231792, - "loss": 46.0, - "step": 39339 - }, - { - "epoch": 3.007817726551599, - "grad_norm": 0.004358906764537096, - "learning_rate": 0.00019999553779627283, - "loss": 46.0, - "step": 39340 - }, - { - "epoch": 3.007894183534989, - "grad_norm": 0.0028269216418266296, - "learning_rate": 0.0001999955375693607, - "loss": 46.0, - "step": 39341 - }, - { - "epoch": 3.007970640518378, - "grad_norm": 0.000924261927139014, - "learning_rate": 0.0001999955373424428, - "loss": 46.0, - "step": 39342 - }, - { - "epoch": 3.008047097501768, - "grad_norm": 0.0013527566334232688, - "learning_rate": 0.0001999955371155191, - "loss": 46.0, - "step": 39343 - }, - { - "epoch": 3.0081235544851577, - "grad_norm": 0.0020822433289140463, - "learning_rate": 0.00019999553688858964, - "loss": 46.0, - "step": 39344 - }, - { - "epoch": 3.0082000114685474, - "grad_norm": 0.0022662978153675795, - "learning_rate": 0.00019999553666165442, - "loss": 46.0, - "step": 39345 - }, - { - "epoch": 3.008276468451937, - "grad_norm": 0.001475971657782793, - "learning_rate": 0.00019999553643471342, - "loss": 46.0, - "step": 39346 - }, - { - "epoch": 3.008352925435327, - "grad_norm": 0.0014536315575242043, - "learning_rate": 0.00019999553620776668, - "loss": 46.0, - "step": 39347 - }, - { - "epoch": 3.0084293824187167, - "grad_norm": 0.005424159578979015, - "learning_rate": 0.00019999553598081416, - "loss": 46.0, - "step": 39348 - }, - { - "epoch": 3.0085058394021065, - "grad_norm": 0.0013394515262916684, - "learning_rate": 0.00019999553575385587, - "loss": 46.0, - "step": 39349 - }, - { - "epoch": 3.0085822963854962, - "grad_norm": 0.0008809837745502591, - "learning_rate": 0.00019999553552689178, - "loss": 46.0, - "step": 39350 - }, - { - "epoch": 3.008658753368886, - "grad_norm": 0.0027789610903710127, - "learning_rate": 0.00019999553529992197, - "loss": 46.0, - "step": 39351 - }, - { - "epoch": 3.0087352103522758, - "grad_norm": 0.001662369933910668, - "learning_rate": 0.00019999553507294638, - "loss": 46.0, - "step": 39352 - }, - { - "epoch": 3.008811667335665, - "grad_norm": 0.0045648436062037945, - "learning_rate": 0.00019999553484596497, - "loss": 46.0, - "step": 39353 - }, - { - "epoch": 3.008888124319055, - "grad_norm": 0.0018527117790654302, - "learning_rate": 0.00019999553461897784, - "loss": 46.0, - "step": 39354 - }, - { - "epoch": 3.0089645813024446, - "grad_norm": 0.0024484908208251, - "learning_rate": 0.00019999553439198494, - "loss": 46.0, - "step": 39355 - }, - { - "epoch": 3.0090410382858344, - "grad_norm": 0.0022886963561177254, - "learning_rate": 0.00019999553416498626, - "loss": 46.0, - "step": 39356 - }, - { - "epoch": 3.009117495269224, - "grad_norm": 0.0014354987069964409, - "learning_rate": 0.0001999955339379818, - "loss": 46.0, - "step": 39357 - }, - { - "epoch": 3.009193952252614, - "grad_norm": 0.0016976002370938659, - "learning_rate": 0.00019999553371097156, - "loss": 46.0, - "step": 39358 - }, - { - "epoch": 3.0092704092360036, - "grad_norm": 0.0036428917665034533, - "learning_rate": 0.00019999553348395562, - "loss": 46.0, - "step": 39359 - }, - { - "epoch": 3.0093468662193934, - "grad_norm": 0.0027197967283427715, - "learning_rate": 0.00019999553325693382, - "loss": 46.0, - "step": 39360 - }, - { - "epoch": 3.009423323202783, - "grad_norm": 0.003048036713153124, - "learning_rate": 0.00019999553302990633, - "loss": 46.0, - "step": 39361 - }, - { - "epoch": 3.009499780186173, - "grad_norm": 0.0014959762338548899, - "learning_rate": 0.00019999553280287301, - "loss": 46.0, - "step": 39362 - }, - { - "epoch": 3.0095762371695627, - "grad_norm": 0.0015660420758649707, - "learning_rate": 0.00019999553257583395, - "loss": 46.0, - "step": 39363 - }, - { - "epoch": 3.009652694152952, - "grad_norm": 0.0033422966953366995, - "learning_rate": 0.0001999955323487891, - "loss": 46.0, - "step": 39364 - }, - { - "epoch": 3.0097291511363418, - "grad_norm": 0.0010475198505446315, - "learning_rate": 0.0001999955321217385, - "loss": 46.0, - "step": 39365 - }, - { - "epoch": 3.0098056081197315, - "grad_norm": 0.0036027971655130386, - "learning_rate": 0.00019999553189468215, - "loss": 46.0, - "step": 39366 - }, - { - "epoch": 3.0098820651031213, - "grad_norm": 0.002637310419231653, - "learning_rate": 0.00019999553166762, - "loss": 46.0, - "step": 39367 - }, - { - "epoch": 3.009958522086511, - "grad_norm": 0.0022255482617765665, - "learning_rate": 0.00019999553144055209, - "loss": 46.0, - "step": 39368 - }, - { - "epoch": 3.010034979069901, - "grad_norm": 0.002966152736917138, - "learning_rate": 0.00019999553121347844, - "loss": 46.0, - "step": 39369 - }, - { - "epoch": 3.0101114360532906, - "grad_norm": 0.002870200900360942, - "learning_rate": 0.00019999553098639896, - "loss": 46.0, - "step": 39370 - }, - { - "epoch": 3.0101878930366803, - "grad_norm": 0.006929353345185518, - "learning_rate": 0.00019999553075931376, - "loss": 46.0, - "step": 39371 - }, - { - "epoch": 3.01026435002007, - "grad_norm": 0.001328064245171845, - "learning_rate": 0.00019999553053222274, - "loss": 46.0, - "step": 39372 - }, - { - "epoch": 3.01034080700346, - "grad_norm": 0.0010474452283233404, - "learning_rate": 0.00019999553030512602, - "loss": 46.0, - "step": 39373 - }, - { - "epoch": 3.0104172639868496, - "grad_norm": 0.0030777212232351303, - "learning_rate": 0.0001999955300780235, - "loss": 46.0, - "step": 39374 - }, - { - "epoch": 3.010493720970239, - "grad_norm": 0.0041889892891049385, - "learning_rate": 0.00019999552985091522, - "loss": 46.0, - "step": 39375 - }, - { - "epoch": 3.0105701779536287, - "grad_norm": 0.003737753489986062, - "learning_rate": 0.00019999552962380113, - "loss": 46.0, - "step": 39376 - }, - { - "epoch": 3.0106466349370185, - "grad_norm": 0.005608769133687019, - "learning_rate": 0.0001999955293966813, - "loss": 46.0, - "step": 39377 - }, - { - "epoch": 3.010723091920408, - "grad_norm": 0.000600518542341888, - "learning_rate": 0.0001999955291695557, - "loss": 46.0, - "step": 39378 - }, - { - "epoch": 3.010799548903798, - "grad_norm": 0.0013439670437946916, - "learning_rate": 0.00019999552894242432, - "loss": 46.0, - "step": 39379 - }, - { - "epoch": 3.0108760058871877, - "grad_norm": 0.0013751742662861943, - "learning_rate": 0.00019999552871528717, - "loss": 46.0, - "step": 39380 - }, - { - "epoch": 3.0109524628705775, - "grad_norm": 0.0004651658528018743, - "learning_rate": 0.00019999552848814426, - "loss": 46.0, - "step": 39381 - }, - { - "epoch": 3.0110289198539673, - "grad_norm": 0.0012404487933963537, - "learning_rate": 0.0001999955282609956, - "loss": 46.0, - "step": 39382 - }, - { - "epoch": 3.011105376837357, - "grad_norm": 0.001523573650047183, - "learning_rate": 0.00019999552803384116, - "loss": 46.0, - "step": 39383 - }, - { - "epoch": 3.011181833820747, - "grad_norm": 0.0012302714167162776, - "learning_rate": 0.00019999552780668091, - "loss": 46.0, - "step": 39384 - }, - { - "epoch": 3.0112582908041365, - "grad_norm": 0.000768988044001162, - "learning_rate": 0.00019999552757951495, - "loss": 46.0, - "step": 39385 - }, - { - "epoch": 3.011334747787526, - "grad_norm": 0.0008446825086139143, - "learning_rate": 0.0001999955273523432, - "loss": 46.0, - "step": 39386 - }, - { - "epoch": 3.0114112047709156, - "grad_norm": 0.0037377565167844296, - "learning_rate": 0.00019999552712516566, - "loss": 46.0, - "step": 39387 - }, - { - "epoch": 3.0114876617543054, - "grad_norm": 0.004430252593010664, - "learning_rate": 0.00019999552689798237, - "loss": 46.0, - "step": 39388 - }, - { - "epoch": 3.011564118737695, - "grad_norm": 0.0023575969971716404, - "learning_rate": 0.0001999955266707933, - "loss": 46.0, - "step": 39389 - }, - { - "epoch": 3.011640575721085, - "grad_norm": 0.004492738284170628, - "learning_rate": 0.00019999552644359847, - "loss": 46.0, - "step": 39390 - }, - { - "epoch": 3.0117170327044747, - "grad_norm": 0.0021767497528344393, - "learning_rate": 0.0001999955262163979, - "loss": 46.0, - "step": 39391 - }, - { - "epoch": 3.0117934896878644, - "grad_norm": 0.0012283246032893658, - "learning_rate": 0.0001999955259891915, - "loss": 46.0, - "step": 39392 - }, - { - "epoch": 3.011869946671254, - "grad_norm": 0.0064230673015117645, - "learning_rate": 0.00019999552576197935, - "loss": 46.0, - "step": 39393 - }, - { - "epoch": 3.011946403654644, - "grad_norm": 0.0019803596660494804, - "learning_rate": 0.00019999552553476147, - "loss": 46.0, - "step": 39394 - }, - { - "epoch": 3.0120228606380337, - "grad_norm": 0.0008590491488575935, - "learning_rate": 0.00019999552530753777, - "loss": 46.0, - "step": 39395 - }, - { - "epoch": 3.0120993176214235, - "grad_norm": 0.0019970927387475967, - "learning_rate": 0.00019999552508030832, - "loss": 46.0, - "step": 39396 - }, - { - "epoch": 3.012175774604813, - "grad_norm": 0.0022963066585361958, - "learning_rate": 0.00019999552485307312, - "loss": 46.0, - "step": 39397 - }, - { - "epoch": 3.0122522315882025, - "grad_norm": 0.0018520416924729943, - "learning_rate": 0.00019999552462583213, - "loss": 46.0, - "step": 39398 - }, - { - "epoch": 3.0123286885715923, - "grad_norm": 0.0011761331697925925, - "learning_rate": 0.00019999552439858539, - "loss": 46.0, - "step": 39399 - }, - { - "epoch": 3.012405145554982, - "grad_norm": 0.0009542282205075026, - "learning_rate": 0.00019999552417133284, - "loss": 46.0, - "step": 39400 - }, - { - "epoch": 3.012481602538372, - "grad_norm": 0.005651876796036959, - "learning_rate": 0.00019999552394407455, - "loss": 46.0, - "step": 39401 - }, - { - "epoch": 3.0125580595217616, - "grad_norm": 0.0035883476957678795, - "learning_rate": 0.00019999552371681046, - "loss": 46.0, - "step": 39402 - }, - { - "epoch": 3.0126345165051513, - "grad_norm": 0.0006832416984252632, - "learning_rate": 0.00019999552348954068, - "loss": 46.0, - "step": 39403 - }, - { - "epoch": 3.012710973488541, - "grad_norm": 0.0028043249621987343, - "learning_rate": 0.00019999552326226505, - "loss": 46.0, - "step": 39404 - }, - { - "epoch": 3.012787430471931, - "grad_norm": 0.0006444815662689507, - "learning_rate": 0.0001999955230349837, - "loss": 46.0, - "step": 39405 - }, - { - "epoch": 3.0128638874553206, - "grad_norm": 0.004395497962832451, - "learning_rate": 0.00019999552280769653, - "loss": 46.0, - "step": 39406 - }, - { - "epoch": 3.01294034443871, - "grad_norm": 0.0009213870507664979, - "learning_rate": 0.00019999552258040363, - "loss": 46.0, - "step": 39407 - }, - { - "epoch": 3.0130168014220997, - "grad_norm": 0.0036612919066101313, - "learning_rate": 0.00019999552235310498, - "loss": 46.0, - "step": 39408 - }, - { - "epoch": 3.0130932584054895, - "grad_norm": 0.002255040453746915, - "learning_rate": 0.00019999552212580053, - "loss": 46.0, - "step": 39409 - }, - { - "epoch": 3.0131697153888792, - "grad_norm": 0.0026665974874049425, - "learning_rate": 0.00019999552189849028, - "loss": 46.0, - "step": 39410 - }, - { - "epoch": 3.013246172372269, - "grad_norm": 0.0030404115095734596, - "learning_rate": 0.00019999552167117432, - "loss": 46.0, - "step": 39411 - }, - { - "epoch": 3.0133226293556588, - "grad_norm": 0.003132016398012638, - "learning_rate": 0.00019999552144385257, - "loss": 46.0, - "step": 39412 - }, - { - "epoch": 3.0133990863390485, - "grad_norm": 0.004428382497280836, - "learning_rate": 0.00019999552121652503, - "loss": 46.0, - "step": 39413 - }, - { - "epoch": 3.0134755433224383, - "grad_norm": 0.00221726531162858, - "learning_rate": 0.00019999552098919174, - "loss": 46.0, - "step": 39414 - }, - { - "epoch": 3.013552000305828, - "grad_norm": 0.0034431880339980125, - "learning_rate": 0.00019999552076185268, - "loss": 46.0, - "step": 39415 - }, - { - "epoch": 3.013628457289218, - "grad_norm": 0.000692942354362458, - "learning_rate": 0.00019999552053450782, - "loss": 46.0, - "step": 39416 - }, - { - "epoch": 3.0137049142726076, - "grad_norm": 0.00745638320222497, - "learning_rate": 0.00019999552030715723, - "loss": 46.0, - "step": 39417 - }, - { - "epoch": 3.013781371255997, - "grad_norm": 0.00488311517983675, - "learning_rate": 0.00019999552007980088, - "loss": 46.0, - "step": 39418 - }, - { - "epoch": 3.0138578282393866, - "grad_norm": 0.004646709188818932, - "learning_rate": 0.00019999551985243875, - "loss": 46.0, - "step": 39419 - }, - { - "epoch": 3.0139342852227764, - "grad_norm": 0.002193636493757367, - "learning_rate": 0.00019999551962507082, - "loss": 46.0, - "step": 39420 - }, - { - "epoch": 3.014010742206166, - "grad_norm": 0.006063556298613548, - "learning_rate": 0.00019999551939769715, - "loss": 46.0, - "step": 39421 - }, - { - "epoch": 3.014087199189556, - "grad_norm": 0.003570585511624813, - "learning_rate": 0.0001999955191703177, - "loss": 46.0, - "step": 39422 - }, - { - "epoch": 3.0141636561729457, - "grad_norm": 0.0020032732281833887, - "learning_rate": 0.0001999955189429325, - "loss": 46.0, - "step": 39423 - }, - { - "epoch": 3.0142401131563354, - "grad_norm": 0.007903410121798515, - "learning_rate": 0.0001999955187155415, - "loss": 46.0, - "step": 39424 - }, - { - "epoch": 3.014316570139725, - "grad_norm": 0.0022396582644432783, - "learning_rate": 0.00019999551848814474, - "loss": 46.0, - "step": 39425 - }, - { - "epoch": 3.014393027123115, - "grad_norm": 0.001396796084009111, - "learning_rate": 0.00019999551826074222, - "loss": 46.0, - "step": 39426 - }, - { - "epoch": 3.0144694841065047, - "grad_norm": 0.005267503205686808, - "learning_rate": 0.00019999551803333393, - "loss": 46.0, - "step": 39427 - }, - { - "epoch": 3.0145459410898945, - "grad_norm": 0.0034614321775734425, - "learning_rate": 0.00019999551780591987, - "loss": 46.0, - "step": 39428 - }, - { - "epoch": 3.014622398073284, - "grad_norm": 0.0012525422498583794, - "learning_rate": 0.00019999551757850004, - "loss": 46.0, - "step": 39429 - }, - { - "epoch": 3.0146988550566736, - "grad_norm": 0.004195562098175287, - "learning_rate": 0.00019999551735107445, - "loss": 46.0, - "step": 39430 - }, - { - "epoch": 3.0147753120400633, - "grad_norm": 0.005847356282174587, - "learning_rate": 0.00019999551712364307, - "loss": 46.0, - "step": 39431 - }, - { - "epoch": 3.014851769023453, - "grad_norm": 0.0022426152136176825, - "learning_rate": 0.00019999551689620592, - "loss": 46.0, - "step": 39432 - }, - { - "epoch": 3.014928226006843, - "grad_norm": 0.0021602236665785313, - "learning_rate": 0.00019999551666876304, - "loss": 46.0, - "step": 39433 - }, - { - "epoch": 3.0150046829902326, - "grad_norm": 0.004899294581264257, - "learning_rate": 0.00019999551644131437, - "loss": 46.0, - "step": 39434 - }, - { - "epoch": 3.0150811399736224, - "grad_norm": 0.0026685649063438177, - "learning_rate": 0.0001999955162138599, - "loss": 46.0, - "step": 39435 - }, - { - "epoch": 3.015157596957012, - "grad_norm": 0.001997855259105563, - "learning_rate": 0.00019999551598639967, - "loss": 46.0, - "step": 39436 - }, - { - "epoch": 3.015234053940402, - "grad_norm": 0.005254756659269333, - "learning_rate": 0.00019999551575893373, - "loss": 46.0, - "step": 39437 - }, - { - "epoch": 3.0153105109237917, - "grad_norm": 0.0020204721949994564, - "learning_rate": 0.00019999551553146193, - "loss": 46.0, - "step": 39438 - }, - { - "epoch": 3.0153869679071814, - "grad_norm": 0.0009282830869778991, - "learning_rate": 0.00019999551530398445, - "loss": 46.0, - "step": 39439 - }, - { - "epoch": 3.0154634248905707, - "grad_norm": 0.004497554153203964, - "learning_rate": 0.00019999551507650113, - "loss": 46.0, - "step": 39440 - }, - { - "epoch": 3.0155398818739605, - "grad_norm": 0.001964461524039507, - "learning_rate": 0.00019999551484901207, - "loss": 46.0, - "step": 39441 - }, - { - "epoch": 3.0156163388573503, - "grad_norm": 0.0028947643004357815, - "learning_rate": 0.00019999551462151723, - "loss": 46.0, - "step": 39442 - }, - { - "epoch": 3.01569279584074, - "grad_norm": 0.003510626032948494, - "learning_rate": 0.00019999551439401665, - "loss": 46.0, - "step": 39443 - }, - { - "epoch": 3.0157692528241298, - "grad_norm": 0.0033016942907124758, - "learning_rate": 0.00019999551416651027, - "loss": 46.0, - "step": 39444 - }, - { - "epoch": 3.0158457098075195, - "grad_norm": 0.0012277968926355243, - "learning_rate": 0.00019999551393899814, - "loss": 46.0, - "step": 39445 - }, - { - "epoch": 3.0159221667909093, - "grad_norm": 0.0007238698308356106, - "learning_rate": 0.0001999955137114802, - "loss": 46.0, - "step": 39446 - }, - { - "epoch": 3.015998623774299, - "grad_norm": 0.001045160461217165, - "learning_rate": 0.00019999551348395657, - "loss": 46.0, - "step": 39447 - }, - { - "epoch": 3.016075080757689, - "grad_norm": 0.0019541247747838497, - "learning_rate": 0.00019999551325642712, - "loss": 46.0, - "step": 39448 - }, - { - "epoch": 3.0161515377410786, - "grad_norm": 0.0029180001001805067, - "learning_rate": 0.00019999551302889187, - "loss": 46.0, - "step": 39449 - }, - { - "epoch": 3.0162279947244683, - "grad_norm": 0.003668922232463956, - "learning_rate": 0.00019999551280135093, - "loss": 46.0, - "step": 39450 - }, - { - "epoch": 3.0163044517078577, - "grad_norm": 0.0013092189328745008, - "learning_rate": 0.00019999551257380416, - "loss": 46.0, - "step": 39451 - }, - { - "epoch": 3.0163809086912474, - "grad_norm": 0.0040798126719892025, - "learning_rate": 0.00019999551234625162, - "loss": 46.0, - "step": 39452 - }, - { - "epoch": 3.016457365674637, - "grad_norm": 0.0016058036126196384, - "learning_rate": 0.00019999551211869333, - "loss": 46.0, - "step": 39453 - }, - { - "epoch": 3.016533822658027, - "grad_norm": 0.0029545396100729704, - "learning_rate": 0.0001999955118911293, - "loss": 46.0, - "step": 39454 - }, - { - "epoch": 3.0166102796414167, - "grad_norm": 0.0014185799518600106, - "learning_rate": 0.00019999551166355946, - "loss": 46.0, - "step": 39455 - }, - { - "epoch": 3.0166867366248065, - "grad_norm": 0.001393378246575594, - "learning_rate": 0.00019999551143598385, - "loss": 46.0, - "step": 39456 - }, - { - "epoch": 3.0167631936081962, - "grad_norm": 0.002617392223328352, - "learning_rate": 0.00019999551120840247, - "loss": 46.0, - "step": 39457 - }, - { - "epoch": 3.016839650591586, - "grad_norm": 0.004419579170644283, - "learning_rate": 0.00019999551098081532, - "loss": 46.0, - "step": 39458 - }, - { - "epoch": 3.0169161075749757, - "grad_norm": 0.005027450621128082, - "learning_rate": 0.00019999551075322244, - "loss": 46.0, - "step": 39459 - }, - { - "epoch": 3.0169925645583655, - "grad_norm": 0.0012779480312019587, - "learning_rate": 0.00019999551052562374, - "loss": 46.0, - "step": 39460 - }, - { - "epoch": 3.0170690215417553, - "grad_norm": 0.0038401770871132612, - "learning_rate": 0.00019999551029801932, - "loss": 46.0, - "step": 39461 - }, - { - "epoch": 3.0171454785251446, - "grad_norm": 0.001983853057026863, - "learning_rate": 0.0001999955100704091, - "loss": 46.0, - "step": 39462 - }, - { - "epoch": 3.0172219355085343, - "grad_norm": 0.0035650290083140135, - "learning_rate": 0.0001999955098427931, - "loss": 46.0, - "step": 39463 - }, - { - "epoch": 3.017298392491924, - "grad_norm": 0.0003997962921857834, - "learning_rate": 0.00019999550961517136, - "loss": 46.0, - "step": 39464 - }, - { - "epoch": 3.017374849475314, - "grad_norm": 0.0020195217803120613, - "learning_rate": 0.00019999550938754382, - "loss": 46.0, - "step": 39465 - }, - { - "epoch": 3.0174513064587036, - "grad_norm": 0.0018501769518479705, - "learning_rate": 0.00019999550915991053, - "loss": 46.0, - "step": 39466 - }, - { - "epoch": 3.0175277634420934, - "grad_norm": 0.001178169739432633, - "learning_rate": 0.0001999955089322715, - "loss": 46.0, - "step": 39467 - }, - { - "epoch": 3.017604220425483, - "grad_norm": 0.0044974978081882, - "learning_rate": 0.00019999550870462667, - "loss": 46.0, - "step": 39468 - }, - { - "epoch": 3.017680677408873, - "grad_norm": 0.0026308188680559397, - "learning_rate": 0.00019999550847697606, - "loss": 46.0, - "step": 39469 - }, - { - "epoch": 3.0177571343922627, - "grad_norm": 0.0025967489928007126, - "learning_rate": 0.0001999955082493197, - "loss": 46.0, - "step": 39470 - }, - { - "epoch": 3.0178335913756524, - "grad_norm": 0.0033321240916848183, - "learning_rate": 0.00019999550802165755, - "loss": 46.0, - "step": 39471 - }, - { - "epoch": 3.017910048359042, - "grad_norm": 0.002086300402879715, - "learning_rate": 0.00019999550779398962, - "loss": 46.0, - "step": 39472 - }, - { - "epoch": 3.0179865053424315, - "grad_norm": 0.002381829544901848, - "learning_rate": 0.00019999550756631597, - "loss": 46.0, - "step": 39473 - }, - { - "epoch": 3.0180629623258213, - "grad_norm": 0.007461670786142349, - "learning_rate": 0.00019999550733863653, - "loss": 46.0, - "step": 39474 - }, - { - "epoch": 3.018139419309211, - "grad_norm": 0.0052920919843018055, - "learning_rate": 0.0001999955071109513, - "loss": 46.0, - "step": 39475 - }, - { - "epoch": 3.018215876292601, - "grad_norm": 0.004215862136334181, - "learning_rate": 0.0001999955068832603, - "loss": 46.0, - "step": 39476 - }, - { - "epoch": 3.0182923332759906, - "grad_norm": 0.003421841189265251, - "learning_rate": 0.00019999550665556357, - "loss": 46.0, - "step": 39477 - }, - { - "epoch": 3.0183687902593803, - "grad_norm": 0.0012295730412006378, - "learning_rate": 0.00019999550642786106, - "loss": 46.0, - "step": 39478 - }, - { - "epoch": 3.01844524724277, - "grad_norm": 0.0018388039898127317, - "learning_rate": 0.00019999550620015274, - "loss": 46.0, - "step": 39479 - }, - { - "epoch": 3.01852170422616, - "grad_norm": 0.003716369392350316, - "learning_rate": 0.0001999955059724387, - "loss": 46.0, - "step": 39480 - }, - { - "epoch": 3.0185981612095496, - "grad_norm": 0.0021993708796799183, - "learning_rate": 0.00019999550574471888, - "loss": 46.0, - "step": 39481 - }, - { - "epoch": 3.0186746181929394, - "grad_norm": 0.0016098986379802227, - "learning_rate": 0.00019999550551699327, - "loss": 46.0, - "step": 39482 - }, - { - "epoch": 3.018751075176329, - "grad_norm": 0.0026489822193980217, - "learning_rate": 0.00019999550528926191, - "loss": 46.0, - "step": 39483 - }, - { - "epoch": 3.0188275321597184, - "grad_norm": 0.0020469960290938616, - "learning_rate": 0.00019999550506152476, - "loss": 46.0, - "step": 39484 - }, - { - "epoch": 3.018903989143108, - "grad_norm": 0.00467684818431735, - "learning_rate": 0.00019999550483378186, - "loss": 46.0, - "step": 39485 - }, - { - "epoch": 3.018980446126498, - "grad_norm": 0.0051090954802930355, - "learning_rate": 0.0001999955046060332, - "loss": 46.0, - "step": 39486 - }, - { - "epoch": 3.0190569031098877, - "grad_norm": 0.005223751068115234, - "learning_rate": 0.00019999550437827877, - "loss": 46.0, - "step": 39487 - }, - { - "epoch": 3.0191333600932775, - "grad_norm": 0.0044022416695952415, - "learning_rate": 0.00019999550415051852, - "loss": 46.0, - "step": 39488 - }, - { - "epoch": 3.0192098170766672, - "grad_norm": 0.0015226774848997593, - "learning_rate": 0.00019999550392275255, - "loss": 46.0, - "step": 39489 - }, - { - "epoch": 3.019286274060057, - "grad_norm": 0.002391026122495532, - "learning_rate": 0.0001999955036949808, - "loss": 46.0, - "step": 39490 - }, - { - "epoch": 3.0193627310434468, - "grad_norm": 0.0032122882548719645, - "learning_rate": 0.00019999550346720327, - "loss": 46.0, - "step": 39491 - }, - { - "epoch": 3.0194391880268365, - "grad_norm": 0.002311436226591468, - "learning_rate": 0.00019999550323941998, - "loss": 46.0, - "step": 39492 - }, - { - "epoch": 3.0195156450102263, - "grad_norm": 0.0025622406974434853, - "learning_rate": 0.00019999550301163092, - "loss": 46.0, - "step": 39493 - }, - { - "epoch": 3.019592101993616, - "grad_norm": 0.0006701344391331077, - "learning_rate": 0.0001999955027838361, - "loss": 46.0, - "step": 39494 - }, - { - "epoch": 3.0196685589770054, - "grad_norm": 0.002483791671693325, - "learning_rate": 0.0001999955025560355, - "loss": 46.0, - "step": 39495 - }, - { - "epoch": 3.019745015960395, - "grad_norm": 0.0016375501872971654, - "learning_rate": 0.00019999550232822913, - "loss": 46.0, - "step": 39496 - }, - { - "epoch": 3.019821472943785, - "grad_norm": 0.004270276986062527, - "learning_rate": 0.000199995502100417, - "loss": 46.0, - "step": 39497 - }, - { - "epoch": 3.0198979299271747, - "grad_norm": 0.0016216675285249949, - "learning_rate": 0.0001999955018725991, - "loss": 46.0, - "step": 39498 - }, - { - "epoch": 3.0199743869105644, - "grad_norm": 0.002118132309988141, - "learning_rate": 0.00019999550164477543, - "loss": 46.0, - "step": 39499 - }, - { - "epoch": 3.020050843893954, - "grad_norm": 0.002751134568825364, - "learning_rate": 0.00019999550141694598, - "loss": 46.0, - "step": 39500 - }, - { - "epoch": 3.020127300877344, - "grad_norm": 0.0021930960938334465, - "learning_rate": 0.00019999550118911076, - "loss": 46.0, - "step": 39501 - }, - { - "epoch": 3.0202037578607337, - "grad_norm": 0.005091415718197823, - "learning_rate": 0.00019999550096126977, - "loss": 46.0, - "step": 39502 - }, - { - "epoch": 3.0202802148441235, - "grad_norm": 0.002106683561578393, - "learning_rate": 0.00019999550073342303, - "loss": 46.0, - "step": 39503 - }, - { - "epoch": 3.020356671827513, - "grad_norm": 0.001299706636928022, - "learning_rate": 0.0001999955005055705, - "loss": 46.0, - "step": 39504 - }, - { - "epoch": 3.020433128810903, - "grad_norm": 0.0009211815195158124, - "learning_rate": 0.0001999955002777122, - "loss": 46.0, - "step": 39505 - }, - { - "epoch": 3.0205095857942923, - "grad_norm": 0.002531653270125389, - "learning_rate": 0.00019999550004984817, - "loss": 46.0, - "step": 39506 - }, - { - "epoch": 3.020586042777682, - "grad_norm": 0.001453232835046947, - "learning_rate": 0.0001999954998219783, - "loss": 46.0, - "step": 39507 - }, - { - "epoch": 3.020662499761072, - "grad_norm": 0.006961993873119354, - "learning_rate": 0.00019999549959410273, - "loss": 46.0, - "step": 39508 - }, - { - "epoch": 3.0207389567444616, - "grad_norm": 0.0011820897925645113, - "learning_rate": 0.00019999549936622137, - "loss": 46.0, - "step": 39509 - }, - { - "epoch": 3.0208154137278513, - "grad_norm": 0.005325338337570429, - "learning_rate": 0.00019999549913833425, - "loss": 46.0, - "step": 39510 - }, - { - "epoch": 3.020891870711241, - "grad_norm": 0.004203235264867544, - "learning_rate": 0.00019999549891044135, - "loss": 46.0, - "step": 39511 - }, - { - "epoch": 3.020968327694631, - "grad_norm": 0.0012564307544380426, - "learning_rate": 0.00019999549868254265, - "loss": 46.0, - "step": 39512 - }, - { - "epoch": 3.0210447846780206, - "grad_norm": 0.002346331486478448, - "learning_rate": 0.0001999954984546382, - "loss": 46.0, - "step": 39513 - }, - { - "epoch": 3.0211212416614104, - "grad_norm": 0.0009770137257874012, - "learning_rate": 0.00019999549822672798, - "loss": 46.0, - "step": 39514 - }, - { - "epoch": 3.0211976986448, - "grad_norm": 0.0028245749417692423, - "learning_rate": 0.000199995497998812, - "loss": 46.0, - "step": 39515 - }, - { - "epoch": 3.02127415562819, - "grad_norm": 0.003160701598972082, - "learning_rate": 0.00019999549777089027, - "loss": 46.0, - "step": 39516 - }, - { - "epoch": 3.0213506126115792, - "grad_norm": 0.0017769790720194578, - "learning_rate": 0.00019999549754296273, - "loss": 46.0, - "step": 39517 - }, - { - "epoch": 3.021427069594969, - "grad_norm": 0.000977318617515266, - "learning_rate": 0.00019999549731502945, - "loss": 46.0, - "step": 39518 - }, - { - "epoch": 3.0215035265783587, - "grad_norm": 0.002621496096253395, - "learning_rate": 0.0001999954970870904, - "loss": 46.0, - "step": 39519 - }, - { - "epoch": 3.0215799835617485, - "grad_norm": 0.000990555388852954, - "learning_rate": 0.00019999549685914558, - "loss": 46.0, - "step": 39520 - }, - { - "epoch": 3.0216564405451383, - "grad_norm": 0.0032036621123552322, - "learning_rate": 0.00019999549663119498, - "loss": 46.0, - "step": 39521 - }, - { - "epoch": 3.021732897528528, - "grad_norm": 0.0014264700002968311, - "learning_rate": 0.0001999954964032386, - "loss": 46.0, - "step": 39522 - }, - { - "epoch": 3.021809354511918, - "grad_norm": 0.0023281339090317488, - "learning_rate": 0.00019999549617527647, - "loss": 46.0, - "step": 39523 - }, - { - "epoch": 3.0218858114953075, - "grad_norm": 0.001668137381784618, - "learning_rate": 0.00019999549594730857, - "loss": 46.0, - "step": 39524 - }, - { - "epoch": 3.0219622684786973, - "grad_norm": 0.0011162168812006712, - "learning_rate": 0.0001999954957193349, - "loss": 46.0, - "step": 39525 - }, - { - "epoch": 3.022038725462087, - "grad_norm": 0.0013556305784732103, - "learning_rate": 0.00019999549549135542, - "loss": 46.0, - "step": 39526 - }, - { - "epoch": 3.022115182445477, - "grad_norm": 0.004790290724486113, - "learning_rate": 0.00019999549526337026, - "loss": 46.0, - "step": 39527 - }, - { - "epoch": 3.022191639428866, - "grad_norm": 0.0011906020808964968, - "learning_rate": 0.00019999549503537924, - "loss": 46.0, - "step": 39528 - }, - { - "epoch": 3.022268096412256, - "grad_norm": 0.0033291589934378862, - "learning_rate": 0.0001999954948073825, - "loss": 46.0, - "step": 39529 - }, - { - "epoch": 3.0223445533956457, - "grad_norm": 0.0016306675970554352, - "learning_rate": 0.00019999549457937999, - "loss": 46.0, - "step": 39530 - }, - { - "epoch": 3.0224210103790354, - "grad_norm": 0.002272416604682803, - "learning_rate": 0.00019999549435137167, - "loss": 46.0, - "step": 39531 - }, - { - "epoch": 3.022497467362425, - "grad_norm": 0.0016264538280665874, - "learning_rate": 0.00019999549412335764, - "loss": 46.0, - "step": 39532 - }, - { - "epoch": 3.022573924345815, - "grad_norm": 0.0036463686265051365, - "learning_rate": 0.0001999954938953378, - "loss": 46.0, - "step": 39533 - }, - { - "epoch": 3.0226503813292047, - "grad_norm": 0.003005189588293433, - "learning_rate": 0.0001999954936673122, - "loss": 46.0, - "step": 39534 - }, - { - "epoch": 3.0227268383125945, - "grad_norm": 0.004093020688742399, - "learning_rate": 0.00019999549343928082, - "loss": 46.0, - "step": 39535 - }, - { - "epoch": 3.0228032952959842, - "grad_norm": 0.0026402596849948168, - "learning_rate": 0.00019999549321124372, - "loss": 46.0, - "step": 39536 - }, - { - "epoch": 3.022879752279374, - "grad_norm": 0.004336964339017868, - "learning_rate": 0.00019999549298320082, - "loss": 46.0, - "step": 39537 - }, - { - "epoch": 3.0229562092627633, - "grad_norm": 0.005225456319749355, - "learning_rate": 0.00019999549275515212, - "loss": 46.0, - "step": 39538 - }, - { - "epoch": 3.023032666246153, - "grad_norm": 0.0014755992451682687, - "learning_rate": 0.0001999954925270977, - "loss": 46.0, - "step": 39539 - }, - { - "epoch": 3.023109123229543, - "grad_norm": 0.0021839458495378494, - "learning_rate": 0.00019999549229903746, - "loss": 46.0, - "step": 39540 - }, - { - "epoch": 3.0231855802129326, - "grad_norm": 0.0014477333752438426, - "learning_rate": 0.0001999954920709715, - "loss": 46.0, - "step": 39541 - }, - { - "epoch": 3.0232620371963224, - "grad_norm": 0.004196503199636936, - "learning_rate": 0.00019999549184289975, - "loss": 46.0, - "step": 39542 - }, - { - "epoch": 3.023338494179712, - "grad_norm": 0.0010057806503027678, - "learning_rate": 0.00019999549161482224, - "loss": 46.0, - "step": 39543 - }, - { - "epoch": 3.023414951163102, - "grad_norm": 0.001979694701731205, - "learning_rate": 0.00019999549138673896, - "loss": 46.0, - "step": 39544 - }, - { - "epoch": 3.0234914081464916, - "grad_norm": 0.0018634955631569028, - "learning_rate": 0.0001999954911586499, - "loss": 46.0, - "step": 39545 - }, - { - "epoch": 3.0235678651298814, - "grad_norm": 0.0019349822541698813, - "learning_rate": 0.00019999549093055506, - "loss": 46.0, - "step": 39546 - }, - { - "epoch": 3.023644322113271, - "grad_norm": 0.000942290120292455, - "learning_rate": 0.00019999549070245446, - "loss": 46.0, - "step": 39547 - }, - { - "epoch": 3.023720779096661, - "grad_norm": 0.002466872101649642, - "learning_rate": 0.00019999549047434808, - "loss": 46.0, - "step": 39548 - }, - { - "epoch": 3.0237972360800502, - "grad_norm": 0.0007670424529351294, - "learning_rate": 0.00019999549024623595, - "loss": 46.0, - "step": 39549 - }, - { - "epoch": 3.02387369306344, - "grad_norm": 0.0030811980832368135, - "learning_rate": 0.00019999549001811805, - "loss": 46.0, - "step": 39550 - }, - { - "epoch": 3.0239501500468298, - "grad_norm": 0.005377268884330988, - "learning_rate": 0.00019999548978999438, - "loss": 46.0, - "step": 39551 - }, - { - "epoch": 3.0240266070302195, - "grad_norm": 0.0029614539816975594, - "learning_rate": 0.00019999548956186494, - "loss": 46.0, - "step": 39552 - }, - { - "epoch": 3.0241030640136093, - "grad_norm": 0.0015341442776843905, - "learning_rate": 0.00019999548933372974, - "loss": 46.0, - "step": 39553 - }, - { - "epoch": 3.024179520996999, - "grad_norm": 0.0021591614931821823, - "learning_rate": 0.00019999548910558875, - "loss": 46.0, - "step": 39554 - }, - { - "epoch": 3.024255977980389, - "grad_norm": 0.001307124039158225, - "learning_rate": 0.000199995488877442, - "loss": 46.0, - "step": 39555 - }, - { - "epoch": 3.0243324349637786, - "grad_norm": 0.0019438335439190269, - "learning_rate": 0.0001999954886492895, - "loss": 46.0, - "step": 39556 - }, - { - "epoch": 3.0244088919471683, - "grad_norm": 0.0012734817573800683, - "learning_rate": 0.00019999548842113122, - "loss": 46.0, - "step": 39557 - }, - { - "epoch": 3.024485348930558, - "grad_norm": 0.002688448177650571, - "learning_rate": 0.00019999548819296713, - "loss": 46.0, - "step": 39558 - }, - { - "epoch": 3.024561805913948, - "grad_norm": 0.009756259620189667, - "learning_rate": 0.0001999954879647973, - "loss": 46.0, - "step": 39559 - }, - { - "epoch": 3.024638262897337, - "grad_norm": 0.004535699728876352, - "learning_rate": 0.00019999548773662175, - "loss": 46.0, - "step": 39560 - }, - { - "epoch": 3.024714719880727, - "grad_norm": 0.0022196879144757986, - "learning_rate": 0.00019999548750844034, - "loss": 46.0, - "step": 39561 - }, - { - "epoch": 3.0247911768641167, - "grad_norm": 0.0037507337983697653, - "learning_rate": 0.00019999548728025324, - "loss": 46.0, - "step": 39562 - }, - { - "epoch": 3.0248676338475065, - "grad_norm": 0.02029794082045555, - "learning_rate": 0.00019999548705206032, - "loss": 46.0, - "step": 39563 - }, - { - "epoch": 3.024944090830896, - "grad_norm": 0.003327839309349656, - "learning_rate": 0.00019999548682386164, - "loss": 46.0, - "step": 39564 - }, - { - "epoch": 3.025020547814286, - "grad_norm": 0.0014364809030666947, - "learning_rate": 0.0001999954865956572, - "loss": 46.0, - "step": 39565 - }, - { - "epoch": 3.0250970047976757, - "grad_norm": 0.004175614099949598, - "learning_rate": 0.000199995486367447, - "loss": 46.0, - "step": 39566 - }, - { - "epoch": 3.0251734617810655, - "grad_norm": 0.0018812966300174594, - "learning_rate": 0.00019999548613923102, - "loss": 46.0, - "step": 39567 - }, - { - "epoch": 3.0252499187644553, - "grad_norm": 0.0026837356854230165, - "learning_rate": 0.00019999548591100928, - "loss": 46.0, - "step": 39568 - }, - { - "epoch": 3.025326375747845, - "grad_norm": 0.00445416709408164, - "learning_rate": 0.00019999548568278174, - "loss": 46.0, - "step": 39569 - }, - { - "epoch": 3.025402832731235, - "grad_norm": 0.005507659167051315, - "learning_rate": 0.00019999548545454845, - "loss": 46.0, - "step": 39570 - }, - { - "epoch": 3.025479289714624, - "grad_norm": 0.0023499075323343277, - "learning_rate": 0.0001999954852263094, - "loss": 46.0, - "step": 39571 - }, - { - "epoch": 3.025555746698014, - "grad_norm": 0.000948725501075387, - "learning_rate": 0.00019999548499806456, - "loss": 46.0, - "step": 39572 - }, - { - "epoch": 3.0256322036814036, - "grad_norm": 0.0011857252102345228, - "learning_rate": 0.00019999548476981398, - "loss": 46.0, - "step": 39573 - }, - { - "epoch": 3.0257086606647934, - "grad_norm": 0.0018004389712587, - "learning_rate": 0.0001999954845415576, - "loss": 46.0, - "step": 39574 - }, - { - "epoch": 3.025785117648183, - "grad_norm": 0.0024115771520882845, - "learning_rate": 0.00019999548431329548, - "loss": 46.0, - "step": 39575 - }, - { - "epoch": 3.025861574631573, - "grad_norm": 0.001869614003226161, - "learning_rate": 0.00019999548408502758, - "loss": 46.0, - "step": 39576 - }, - { - "epoch": 3.0259380316149627, - "grad_norm": 0.0022346852347254753, - "learning_rate": 0.0001999954838567539, - "loss": 46.0, - "step": 39577 - }, - { - "epoch": 3.0260144885983524, - "grad_norm": 0.0011376978363841772, - "learning_rate": 0.00019999548362847447, - "loss": 46.0, - "step": 39578 - }, - { - "epoch": 3.026090945581742, - "grad_norm": 0.0004650024638976902, - "learning_rate": 0.00019999548340018925, - "loss": 46.0, - "step": 39579 - }, - { - "epoch": 3.026167402565132, - "grad_norm": 0.002437163144350052, - "learning_rate": 0.00019999548317189828, - "loss": 46.0, - "step": 39580 - }, - { - "epoch": 3.0262438595485217, - "grad_norm": 0.0030671500135213137, - "learning_rate": 0.00019999548294360154, - "loss": 46.0, - "step": 39581 - }, - { - "epoch": 3.026320316531911, - "grad_norm": 0.0017258322332054377, - "learning_rate": 0.000199995482715299, - "loss": 46.0, - "step": 39582 - }, - { - "epoch": 3.026396773515301, - "grad_norm": 0.0029500273521989584, - "learning_rate": 0.00019999548248699075, - "loss": 46.0, - "step": 39583 - }, - { - "epoch": 3.0264732304986905, - "grad_norm": 0.0038172812201082706, - "learning_rate": 0.00019999548225867666, - "loss": 46.0, - "step": 39584 - }, - { - "epoch": 3.0265496874820803, - "grad_norm": 0.006628929637372494, - "learning_rate": 0.00019999548203035686, - "loss": 46.0, - "step": 39585 - }, - { - "epoch": 3.02662614446547, - "grad_norm": 0.0015419156989082694, - "learning_rate": 0.00019999548180203125, - "loss": 46.0, - "step": 39586 - }, - { - "epoch": 3.02670260144886, - "grad_norm": 0.0011260294122621417, - "learning_rate": 0.00019999548157369988, - "loss": 46.0, - "step": 39587 - }, - { - "epoch": 3.0267790584322496, - "grad_norm": 0.0009932902175933123, - "learning_rate": 0.00019999548134536275, - "loss": 46.0, - "step": 39588 - }, - { - "epoch": 3.0268555154156394, - "grad_norm": 0.003975550178438425, - "learning_rate": 0.00019999548111701985, - "loss": 46.0, - "step": 39589 - }, - { - "epoch": 3.026931972399029, - "grad_norm": 0.002477450994774699, - "learning_rate": 0.00019999548088867118, - "loss": 46.0, - "step": 39590 - }, - { - "epoch": 3.027008429382419, - "grad_norm": 0.0011117301182821393, - "learning_rate": 0.00019999548066031674, - "loss": 46.0, - "step": 39591 - }, - { - "epoch": 3.0270848863658086, - "grad_norm": 0.0017374836606904864, - "learning_rate": 0.00019999548043195652, - "loss": 46.0, - "step": 39592 - }, - { - "epoch": 3.027161343349198, - "grad_norm": 0.006389596965163946, - "learning_rate": 0.00019999548020359055, - "loss": 46.0, - "step": 39593 - }, - { - "epoch": 3.0272378003325877, - "grad_norm": 0.0019762907177209854, - "learning_rate": 0.0001999954799752188, - "loss": 46.0, - "step": 39594 - }, - { - "epoch": 3.0273142573159775, - "grad_norm": 0.0005828551365993917, - "learning_rate": 0.0001999954797468413, - "loss": 46.0, - "step": 39595 - }, - { - "epoch": 3.0273907142993672, - "grad_norm": 0.003115327563136816, - "learning_rate": 0.000199995479518458, - "loss": 46.0, - "step": 39596 - }, - { - "epoch": 3.027467171282757, - "grad_norm": 0.0023204556200653315, - "learning_rate": 0.00019999547929006894, - "loss": 46.0, - "step": 39597 - }, - { - "epoch": 3.0275436282661468, - "grad_norm": 0.004497657064348459, - "learning_rate": 0.00019999547906167413, - "loss": 46.0, - "step": 39598 - }, - { - "epoch": 3.0276200852495365, - "grad_norm": 0.0031204139813780785, - "learning_rate": 0.00019999547883327353, - "loss": 46.0, - "step": 39599 - }, - { - "epoch": 3.0276965422329263, - "grad_norm": 0.001052657957188785, - "learning_rate": 0.00019999547860486715, - "loss": 46.0, - "step": 39600 - }, - { - "epoch": 3.027772999216316, - "grad_norm": 0.0015188665129244328, - "learning_rate": 0.00019999547837645502, - "loss": 46.0, - "step": 39601 - }, - { - "epoch": 3.027849456199706, - "grad_norm": 0.001129683805629611, - "learning_rate": 0.00019999547814803715, - "loss": 46.0, - "step": 39602 - }, - { - "epoch": 3.0279259131830956, - "grad_norm": 0.0022491435520350933, - "learning_rate": 0.00019999547791961346, - "loss": 46.0, - "step": 39603 - }, - { - "epoch": 3.028002370166485, - "grad_norm": 0.0020019079092890024, - "learning_rate": 0.000199995477691184, - "loss": 46.0, - "step": 39604 - }, - { - "epoch": 3.0280788271498746, - "grad_norm": 0.001778197125531733, - "learning_rate": 0.0001999954774627488, - "loss": 46.0, - "step": 39605 - }, - { - "epoch": 3.0281552841332644, - "grad_norm": 0.0033746480476111174, - "learning_rate": 0.00019999547723430783, - "loss": 46.0, - "step": 39606 - }, - { - "epoch": 3.028231741116654, - "grad_norm": 0.0023577758111059666, - "learning_rate": 0.00019999547700586106, - "loss": 46.0, - "step": 39607 - }, - { - "epoch": 3.028308198100044, - "grad_norm": 0.0007763591129332781, - "learning_rate": 0.00019999547677740855, - "loss": 46.0, - "step": 39608 - }, - { - "epoch": 3.0283846550834337, - "grad_norm": 0.0023277439177036285, - "learning_rate": 0.00019999547654895027, - "loss": 46.0, - "step": 39609 - }, - { - "epoch": 3.0284611120668234, - "grad_norm": 0.0006218118942342699, - "learning_rate": 0.00019999547632048624, - "loss": 46.0, - "step": 39610 - }, - { - "epoch": 3.028537569050213, - "grad_norm": 0.0012510822853073478, - "learning_rate": 0.0001999954760920164, - "loss": 46.0, - "step": 39611 - }, - { - "epoch": 3.028614026033603, - "grad_norm": 0.0031409927178174257, - "learning_rate": 0.0001999954758635408, - "loss": 46.0, - "step": 39612 - }, - { - "epoch": 3.0286904830169927, - "grad_norm": 0.002179933711886406, - "learning_rate": 0.00019999547563505945, - "loss": 46.0, - "step": 39613 - }, - { - "epoch": 3.0287669400003825, - "grad_norm": 0.003693270729854703, - "learning_rate": 0.0001999954754065723, - "loss": 46.0, - "step": 39614 - }, - { - "epoch": 3.028843396983772, - "grad_norm": 0.0013576335040852427, - "learning_rate": 0.0001999954751780794, - "loss": 46.0, - "step": 39615 - }, - { - "epoch": 3.0289198539671616, - "grad_norm": 0.0025278846733272076, - "learning_rate": 0.00019999547494958076, - "loss": 46.0, - "step": 39616 - }, - { - "epoch": 3.0289963109505513, - "grad_norm": 0.003638332011178136, - "learning_rate": 0.0001999954747210763, - "loss": 46.0, - "step": 39617 - }, - { - "epoch": 3.029072767933941, - "grad_norm": 0.0022339161951094866, - "learning_rate": 0.0001999954744925661, - "loss": 46.0, - "step": 39618 - }, - { - "epoch": 3.029149224917331, - "grad_norm": 0.0008160865982063115, - "learning_rate": 0.0001999954742640501, - "loss": 46.0, - "step": 39619 - }, - { - "epoch": 3.0292256819007206, - "grad_norm": 0.001652189763262868, - "learning_rate": 0.00019999547403552837, - "loss": 46.0, - "step": 39620 - }, - { - "epoch": 3.0293021388841104, - "grad_norm": 0.002944201696664095, - "learning_rate": 0.00019999547380700086, - "loss": 46.0, - "step": 39621 - }, - { - "epoch": 3.0293785958675, - "grad_norm": 0.018335677683353424, - "learning_rate": 0.00019999547357846758, - "loss": 46.0, - "step": 39622 - }, - { - "epoch": 3.02945505285089, - "grad_norm": 0.002560319844633341, - "learning_rate": 0.00019999547334992852, - "loss": 46.0, - "step": 39623 - }, - { - "epoch": 3.0295315098342797, - "grad_norm": 0.004606395028531551, - "learning_rate": 0.0001999954731213837, - "loss": 46.0, - "step": 39624 - }, - { - "epoch": 3.0296079668176694, - "grad_norm": 0.0011316162999719381, - "learning_rate": 0.00019999547289283309, - "loss": 46.0, - "step": 39625 - }, - { - "epoch": 3.0296844238010587, - "grad_norm": 0.0024140742607414722, - "learning_rate": 0.0001999954726642767, - "loss": 46.0, - "step": 39626 - }, - { - "epoch": 3.0297608807844485, - "grad_norm": 0.001941421302035451, - "learning_rate": 0.0001999954724357146, - "loss": 46.0, - "step": 39627 - }, - { - "epoch": 3.0298373377678383, - "grad_norm": 0.0021032022777944803, - "learning_rate": 0.0001999954722071467, - "loss": 46.0, - "step": 39628 - }, - { - "epoch": 3.029913794751228, - "grad_norm": 0.0029945955611765385, - "learning_rate": 0.00019999547197857302, - "loss": 46.0, - "step": 39629 - }, - { - "epoch": 3.029990251734618, - "grad_norm": 0.0038915437180548906, - "learning_rate": 0.00019999547174999357, - "loss": 46.0, - "step": 39630 - }, - { - "epoch": 3.0300667087180075, - "grad_norm": 0.0014142083236947656, - "learning_rate": 0.00019999547152140838, - "loss": 46.0, - "step": 39631 - }, - { - "epoch": 3.0301431657013973, - "grad_norm": 0.0006382741266861558, - "learning_rate": 0.00019999547129281742, - "loss": 46.0, - "step": 39632 - }, - { - "epoch": 3.030219622684787, - "grad_norm": 0.0015237802872434258, - "learning_rate": 0.00019999547106422066, - "loss": 46.0, - "step": 39633 - }, - { - "epoch": 3.030296079668177, - "grad_norm": 0.002217212226241827, - "learning_rate": 0.00019999547083561815, - "loss": 46.0, - "step": 39634 - }, - { - "epoch": 3.0303725366515666, - "grad_norm": 0.0025363692548125982, - "learning_rate": 0.00019999547060700986, - "loss": 46.0, - "step": 39635 - }, - { - "epoch": 3.0304489936349563, - "grad_norm": 0.002185879973694682, - "learning_rate": 0.0001999954703783958, - "loss": 46.0, - "step": 39636 - }, - { - "epoch": 3.0305254506183457, - "grad_norm": 0.0030111183878034353, - "learning_rate": 0.00019999547014977598, - "loss": 46.0, - "step": 39637 - }, - { - "epoch": 3.0306019076017354, - "grad_norm": 0.0025228685699403286, - "learning_rate": 0.0001999954699211504, - "loss": 46.0, - "step": 39638 - }, - { - "epoch": 3.030678364585125, - "grad_norm": 0.0023971344344317913, - "learning_rate": 0.00019999546969251902, - "loss": 46.0, - "step": 39639 - }, - { - "epoch": 3.030754821568515, - "grad_norm": 0.0024575740098953247, - "learning_rate": 0.00019999546946388187, - "loss": 46.0, - "step": 39640 - }, - { - "epoch": 3.0308312785519047, - "grad_norm": 0.007665298413485289, - "learning_rate": 0.000199995469235239, - "loss": 46.0, - "step": 39641 - }, - { - "epoch": 3.0309077355352945, - "grad_norm": 0.0010867660166695714, - "learning_rate": 0.00019999546900659033, - "loss": 46.0, - "step": 39642 - }, - { - "epoch": 3.0309841925186842, - "grad_norm": 0.002488918136805296, - "learning_rate": 0.0001999954687779359, - "loss": 46.0, - "step": 39643 - }, - { - "epoch": 3.031060649502074, - "grad_norm": 0.00568863470107317, - "learning_rate": 0.00019999546854927567, - "loss": 46.0, - "step": 39644 - }, - { - "epoch": 3.0311371064854638, - "grad_norm": 0.010594950057566166, - "learning_rate": 0.00019999546832060968, - "loss": 46.0, - "step": 39645 - }, - { - "epoch": 3.0312135634688535, - "grad_norm": 0.0022774694953113794, - "learning_rate": 0.00019999546809193794, - "loss": 46.0, - "step": 39646 - }, - { - "epoch": 3.0312900204522433, - "grad_norm": 0.0015874780947342515, - "learning_rate": 0.00019999546786326043, - "loss": 46.0, - "step": 39647 - }, - { - "epoch": 3.0313664774356326, - "grad_norm": 0.0008436526986770332, - "learning_rate": 0.00019999546763457715, - "loss": 46.0, - "step": 39648 - }, - { - "epoch": 3.0314429344190224, - "grad_norm": 0.003040659474208951, - "learning_rate": 0.0001999954674058881, - "loss": 46.0, - "step": 39649 - }, - { - "epoch": 3.031519391402412, - "grad_norm": 0.0017943443963304162, - "learning_rate": 0.00019999546717719326, - "loss": 46.0, - "step": 39650 - }, - { - "epoch": 3.031595848385802, - "grad_norm": 0.0010763994650915265, - "learning_rate": 0.0001999954669484927, - "loss": 46.0, - "step": 39651 - }, - { - "epoch": 3.0316723053691916, - "grad_norm": 0.0018663015216588974, - "learning_rate": 0.00019999546671978634, - "loss": 46.0, - "step": 39652 - }, - { - "epoch": 3.0317487623525814, - "grad_norm": 0.003037545131519437, - "learning_rate": 0.0001999954664910742, - "loss": 46.0, - "step": 39653 - }, - { - "epoch": 3.031825219335971, - "grad_norm": 0.0008113468065857887, - "learning_rate": 0.0001999954662623563, - "loss": 46.0, - "step": 39654 - }, - { - "epoch": 3.031901676319361, - "grad_norm": 0.0016137253260239959, - "learning_rate": 0.00019999546603363265, - "loss": 46.0, - "step": 39655 - }, - { - "epoch": 3.0319781333027507, - "grad_norm": 0.0013214999344199896, - "learning_rate": 0.0001999954658049032, - "loss": 46.0, - "step": 39656 - }, - { - "epoch": 3.0320545902861404, - "grad_norm": 0.00482101459056139, - "learning_rate": 0.000199995465576168, - "loss": 46.0, - "step": 39657 - }, - { - "epoch": 3.03213104726953, - "grad_norm": 0.012571502476930618, - "learning_rate": 0.00019999546534742703, - "loss": 46.0, - "step": 39658 - }, - { - "epoch": 3.0322075042529195, - "grad_norm": 0.0036566995549947023, - "learning_rate": 0.00019999546511868026, - "loss": 46.0, - "step": 39659 - }, - { - "epoch": 3.0322839612363093, - "grad_norm": 0.0011799363419413567, - "learning_rate": 0.00019999546488992773, - "loss": 46.0, - "step": 39660 - }, - { - "epoch": 3.032360418219699, - "grad_norm": 0.0037186008412390947, - "learning_rate": 0.00019999546466116947, - "loss": 46.0, - "step": 39661 - }, - { - "epoch": 3.032436875203089, - "grad_norm": 0.001469515380449593, - "learning_rate": 0.0001999954644324054, - "loss": 46.0, - "step": 39662 - }, - { - "epoch": 3.0325133321864786, - "grad_norm": 0.008848035708069801, - "learning_rate": 0.00019999546420363559, - "loss": 46.0, - "step": 39663 - }, - { - "epoch": 3.0325897891698683, - "grad_norm": 0.008599605411291122, - "learning_rate": 0.00019999546397485998, - "loss": 46.0, - "step": 39664 - }, - { - "epoch": 3.032666246153258, - "grad_norm": 0.0025282329879701138, - "learning_rate": 0.00019999546374607863, - "loss": 46.0, - "step": 39665 - }, - { - "epoch": 3.032742703136648, - "grad_norm": 0.010897560976445675, - "learning_rate": 0.0001999954635172915, - "loss": 46.0, - "step": 39666 - }, - { - "epoch": 3.0328191601200376, - "grad_norm": 0.0013498463667929173, - "learning_rate": 0.00019999546328849861, - "loss": 46.0, - "step": 39667 - }, - { - "epoch": 3.0328956171034274, - "grad_norm": 0.0012233887100592256, - "learning_rate": 0.00019999546305969994, - "loss": 46.0, - "step": 39668 - }, - { - "epoch": 3.0329720740868167, - "grad_norm": 0.0016088435659185052, - "learning_rate": 0.0001999954628308955, - "loss": 46.0, - "step": 39669 - }, - { - "epoch": 3.0330485310702064, - "grad_norm": 0.002071057679131627, - "learning_rate": 0.00019999546260208529, - "loss": 46.0, - "step": 39670 - }, - { - "epoch": 3.033124988053596, - "grad_norm": 0.003840462537482381, - "learning_rate": 0.00019999546237326932, - "loss": 46.0, - "step": 39671 - }, - { - "epoch": 3.033201445036986, - "grad_norm": 0.00232309359125793, - "learning_rate": 0.0001999954621444476, - "loss": 46.0, - "step": 39672 - }, - { - "epoch": 3.0332779020203757, - "grad_norm": 0.005251055583357811, - "learning_rate": 0.00019999546191562005, - "loss": 46.0, - "step": 39673 - }, - { - "epoch": 3.0333543590037655, - "grad_norm": 0.0037175242323428392, - "learning_rate": 0.00019999546168678677, - "loss": 46.0, - "step": 39674 - }, - { - "epoch": 3.0334308159871552, - "grad_norm": 0.0022735216189175844, - "learning_rate": 0.0001999954614579477, - "loss": 46.0, - "step": 39675 - }, - { - "epoch": 3.033507272970545, - "grad_norm": 0.002139669144526124, - "learning_rate": 0.00019999546122910288, - "loss": 46.0, - "step": 39676 - }, - { - "epoch": 3.0335837299539348, - "grad_norm": 0.0012831700732931495, - "learning_rate": 0.0001999954610002523, - "loss": 46.0, - "step": 39677 - }, - { - "epoch": 3.0336601869373245, - "grad_norm": 0.004728326108306646, - "learning_rate": 0.00019999546077139596, - "loss": 46.0, - "step": 39678 - }, - { - "epoch": 3.0337366439207143, - "grad_norm": 0.001644921605475247, - "learning_rate": 0.0001999954605425338, - "loss": 46.0, - "step": 39679 - }, - { - "epoch": 3.0338131009041036, - "grad_norm": 0.011548982933163643, - "learning_rate": 0.0001999954603136659, - "loss": 46.0, - "step": 39680 - }, - { - "epoch": 3.0338895578874934, - "grad_norm": 0.0031128483824431896, - "learning_rate": 0.00019999546008479224, - "loss": 46.0, - "step": 39681 - }, - { - "epoch": 3.033966014870883, - "grad_norm": 0.001045048818923533, - "learning_rate": 0.0001999954598559128, - "loss": 46.0, - "step": 39682 - }, - { - "epoch": 3.034042471854273, - "grad_norm": 0.005120352376252413, - "learning_rate": 0.0001999954596270276, - "loss": 46.0, - "step": 39683 - }, - { - "epoch": 3.0341189288376627, - "grad_norm": 0.0011301080230623484, - "learning_rate": 0.00019999545939813662, - "loss": 46.0, - "step": 39684 - }, - { - "epoch": 3.0341953858210524, - "grad_norm": 0.0025274460203945637, - "learning_rate": 0.0001999954591692399, - "loss": 46.0, - "step": 39685 - }, - { - "epoch": 3.034271842804442, - "grad_norm": 0.0019689532928168774, - "learning_rate": 0.00019999545894033738, - "loss": 46.0, - "step": 39686 - }, - { - "epoch": 3.034348299787832, - "grad_norm": 0.002105043036863208, - "learning_rate": 0.0001999954587114291, - "loss": 46.0, - "step": 39687 - }, - { - "epoch": 3.0344247567712217, - "grad_norm": 0.0028234461788088083, - "learning_rate": 0.00019999545848251504, - "loss": 46.0, - "step": 39688 - }, - { - "epoch": 3.0345012137546115, - "grad_norm": 0.0026364759542047977, - "learning_rate": 0.0001999954582535952, - "loss": 46.0, - "step": 39689 - }, - { - "epoch": 3.034577670738001, - "grad_norm": 0.0034467161167412996, - "learning_rate": 0.00019999545802466964, - "loss": 46.0, - "step": 39690 - }, - { - "epoch": 3.0346541277213905, - "grad_norm": 0.002331661758944392, - "learning_rate": 0.00019999545779573826, - "loss": 46.0, - "step": 39691 - }, - { - "epoch": 3.0347305847047803, - "grad_norm": 0.004999334458261728, - "learning_rate": 0.00019999545756680114, - "loss": 46.0, - "step": 39692 - }, - { - "epoch": 3.03480704168817, - "grad_norm": 0.0008896341896615922, - "learning_rate": 0.00019999545733785822, - "loss": 46.0, - "step": 39693 - }, - { - "epoch": 3.03488349867156, - "grad_norm": 0.0012339059030637145, - "learning_rate": 0.00019999545710890957, - "loss": 46.0, - "step": 39694 - }, - { - "epoch": 3.0349599556549496, - "grad_norm": 0.0038829806726425886, - "learning_rate": 0.00019999545687995513, - "loss": 46.0, - "step": 39695 - }, - { - "epoch": 3.0350364126383393, - "grad_norm": 0.0049957334995269775, - "learning_rate": 0.00019999545665099492, - "loss": 46.0, - "step": 39696 - }, - { - "epoch": 3.035112869621729, - "grad_norm": 0.0028166561387479305, - "learning_rate": 0.00019999545642202895, - "loss": 46.0, - "step": 39697 - }, - { - "epoch": 3.035189326605119, - "grad_norm": 0.0021668223198503256, - "learning_rate": 0.00019999545619305722, - "loss": 46.0, - "step": 39698 - }, - { - "epoch": 3.0352657835885086, - "grad_norm": 0.0008365008980035782, - "learning_rate": 0.00019999545596407968, - "loss": 46.0, - "step": 39699 - }, - { - "epoch": 3.0353422405718984, - "grad_norm": 0.001663291361182928, - "learning_rate": 0.00019999545573509643, - "loss": 46.0, - "step": 39700 - }, - { - "epoch": 3.035418697555288, - "grad_norm": 0.0007723391172476113, - "learning_rate": 0.00019999545550610737, - "loss": 46.0, - "step": 39701 - }, - { - "epoch": 3.0354951545386775, - "grad_norm": 0.0010033791186288, - "learning_rate": 0.00019999545527711254, - "loss": 46.0, - "step": 39702 - }, - { - "epoch": 3.0355716115220672, - "grad_norm": 0.0015265808906406164, - "learning_rate": 0.00019999545504811194, - "loss": 46.0, - "step": 39703 - }, - { - "epoch": 3.035648068505457, - "grad_norm": 0.0014681227039545774, - "learning_rate": 0.0001999954548191056, - "loss": 46.0, - "step": 39704 - }, - { - "epoch": 3.0357245254888467, - "grad_norm": 0.00210682419128716, - "learning_rate": 0.00019999545459009345, - "loss": 46.0, - "step": 39705 - }, - { - "epoch": 3.0358009824722365, - "grad_norm": 0.005173744633793831, - "learning_rate": 0.00019999545436107555, - "loss": 46.0, - "step": 39706 - }, - { - "epoch": 3.0358774394556263, - "grad_norm": 0.0062578413635492325, - "learning_rate": 0.0001999954541320519, - "loss": 46.0, - "step": 39707 - }, - { - "epoch": 3.035953896439016, - "grad_norm": 0.0019370083464309573, - "learning_rate": 0.00019999545390302247, - "loss": 46.0, - "step": 39708 - }, - { - "epoch": 3.036030353422406, - "grad_norm": 0.004720513243228197, - "learning_rate": 0.00019999545367398725, - "loss": 46.0, - "step": 39709 - }, - { - "epoch": 3.0361068104057956, - "grad_norm": 0.0015936646377667785, - "learning_rate": 0.0001999954534449463, - "loss": 46.0, - "step": 39710 - }, - { - "epoch": 3.0361832673891853, - "grad_norm": 0.0012520968448370695, - "learning_rate": 0.00019999545321589955, - "loss": 46.0, - "step": 39711 - }, - { - "epoch": 3.036259724372575, - "grad_norm": 0.004156424663960934, - "learning_rate": 0.00019999545298684702, - "loss": 46.0, - "step": 39712 - }, - { - "epoch": 3.0363361813559644, - "grad_norm": 0.0021326541900634766, - "learning_rate": 0.00019999545275778874, - "loss": 46.0, - "step": 39713 - }, - { - "epoch": 3.036412638339354, - "grad_norm": 0.0015934498514980078, - "learning_rate": 0.00019999545252872468, - "loss": 46.0, - "step": 39714 - }, - { - "epoch": 3.036489095322744, - "grad_norm": 0.0014560369309037924, - "learning_rate": 0.00019999545229965485, - "loss": 46.0, - "step": 39715 - }, - { - "epoch": 3.0365655523061337, - "grad_norm": 0.0027113950345665216, - "learning_rate": 0.00019999545207057928, - "loss": 46.0, - "step": 39716 - }, - { - "epoch": 3.0366420092895234, - "grad_norm": 0.002184296492487192, - "learning_rate": 0.00019999545184149793, - "loss": 46.0, - "step": 39717 - }, - { - "epoch": 3.036718466272913, - "grad_norm": 0.0016092031728476286, - "learning_rate": 0.0001999954516124108, - "loss": 46.0, - "step": 39718 - }, - { - "epoch": 3.036794923256303, - "grad_norm": 0.0032755061984062195, - "learning_rate": 0.0001999954513833179, - "loss": 46.0, - "step": 39719 - }, - { - "epoch": 3.0368713802396927, - "grad_norm": 0.0016704684821888804, - "learning_rate": 0.00019999545115421925, - "loss": 46.0, - "step": 39720 - }, - { - "epoch": 3.0369478372230825, - "grad_norm": 0.0032511057797819376, - "learning_rate": 0.0001999954509251148, - "loss": 46.0, - "step": 39721 - }, - { - "epoch": 3.0370242942064722, - "grad_norm": 0.0009438850684091449, - "learning_rate": 0.0001999954506960046, - "loss": 46.0, - "step": 39722 - }, - { - "epoch": 3.037100751189862, - "grad_norm": 0.005450352095067501, - "learning_rate": 0.0001999954504668886, - "loss": 46.0, - "step": 39723 - }, - { - "epoch": 3.0371772081732513, - "grad_norm": 0.00190682930406183, - "learning_rate": 0.00019999545023776687, - "loss": 46.0, - "step": 39724 - }, - { - "epoch": 3.037253665156641, - "grad_norm": 0.0013446015072986484, - "learning_rate": 0.00019999545000863936, - "loss": 46.0, - "step": 39725 - }, - { - "epoch": 3.037330122140031, - "grad_norm": 0.002689749002456665, - "learning_rate": 0.00019999544977950608, - "loss": 46.0, - "step": 39726 - }, - { - "epoch": 3.0374065791234206, - "grad_norm": 0.002143718535080552, - "learning_rate": 0.00019999544955036705, - "loss": 46.0, - "step": 39727 - }, - { - "epoch": 3.0374830361068104, - "grad_norm": 0.0004537251952569932, - "learning_rate": 0.0001999954493212222, - "loss": 46.0, - "step": 39728 - }, - { - "epoch": 3.0375594930902, - "grad_norm": 0.002278085332363844, - "learning_rate": 0.00019999544909207162, - "loss": 46.0, - "step": 39729 - }, - { - "epoch": 3.03763595007359, - "grad_norm": 0.0013600974343717098, - "learning_rate": 0.00019999544886291525, - "loss": 46.0, - "step": 39730 - }, - { - "epoch": 3.0377124070569796, - "grad_norm": 0.006506752222776413, - "learning_rate": 0.00019999544863375315, - "loss": 46.0, - "step": 39731 - }, - { - "epoch": 3.0377888640403694, - "grad_norm": 0.0020644119940698147, - "learning_rate": 0.00019999544840458523, - "loss": 46.0, - "step": 39732 - }, - { - "epoch": 3.037865321023759, - "grad_norm": 0.0008373287855647504, - "learning_rate": 0.00019999544817541156, - "loss": 46.0, - "step": 39733 - }, - { - "epoch": 3.037941778007149, - "grad_norm": 0.0018869703635573387, - "learning_rate": 0.00019999544794623212, - "loss": 46.0, - "step": 39734 - }, - { - "epoch": 3.0380182349905382, - "grad_norm": 0.004040055442601442, - "learning_rate": 0.00019999544771704693, - "loss": 46.0, - "step": 39735 - }, - { - "epoch": 3.038094691973928, - "grad_norm": 0.004738978575915098, - "learning_rate": 0.00019999544748785595, - "loss": 46.0, - "step": 39736 - }, - { - "epoch": 3.0381711489573178, - "grad_norm": 0.002792940242215991, - "learning_rate": 0.0001999954472586592, - "loss": 46.0, - "step": 39737 - }, - { - "epoch": 3.0382476059407075, - "grad_norm": 0.0022057651076465845, - "learning_rate": 0.0001999954470294567, - "loss": 46.0, - "step": 39738 - }, - { - "epoch": 3.0383240629240973, - "grad_norm": 0.0037713306955993176, - "learning_rate": 0.00019999544680024842, - "loss": 46.0, - "step": 39739 - }, - { - "epoch": 3.038400519907487, - "grad_norm": 0.0030813212506473064, - "learning_rate": 0.00019999544657103437, - "loss": 46.0, - "step": 39740 - }, - { - "epoch": 3.038476976890877, - "grad_norm": 0.002665862673893571, - "learning_rate": 0.00019999544634181454, - "loss": 46.0, - "step": 39741 - }, - { - "epoch": 3.0385534338742666, - "grad_norm": 0.001664767973124981, - "learning_rate": 0.00019999544611258897, - "loss": 46.0, - "step": 39742 - }, - { - "epoch": 3.0386298908576563, - "grad_norm": 0.003835249226540327, - "learning_rate": 0.00019999544588335762, - "loss": 46.0, - "step": 39743 - }, - { - "epoch": 3.038706347841046, - "grad_norm": 0.0021365885622799397, - "learning_rate": 0.00019999544565412047, - "loss": 46.0, - "step": 39744 - }, - { - "epoch": 3.038782804824436, - "grad_norm": 0.0014744889922440052, - "learning_rate": 0.00019999544542487758, - "loss": 46.0, - "step": 39745 - }, - { - "epoch": 3.038859261807825, - "grad_norm": 0.008801175281405449, - "learning_rate": 0.0001999954451956289, - "loss": 46.0, - "step": 39746 - }, - { - "epoch": 3.038935718791215, - "grad_norm": 0.0016136305639520288, - "learning_rate": 0.00019999544496637447, - "loss": 46.0, - "step": 39747 - }, - { - "epoch": 3.0390121757746047, - "grad_norm": 0.0026654433459043503, - "learning_rate": 0.00019999544473711428, - "loss": 46.0, - "step": 39748 - }, - { - "epoch": 3.0390886327579945, - "grad_norm": 0.0010333481477573514, - "learning_rate": 0.0001999954445078483, - "loss": 46.0, - "step": 39749 - }, - { - "epoch": 3.039165089741384, - "grad_norm": 0.005302795208990574, - "learning_rate": 0.00019999544427857656, - "loss": 46.0, - "step": 39750 - }, - { - "epoch": 3.039241546724774, - "grad_norm": 0.0008502639248035848, - "learning_rate": 0.00019999544404929905, - "loss": 46.0, - "step": 39751 - }, - { - "epoch": 3.0393180037081637, - "grad_norm": 0.0027508113998919725, - "learning_rate": 0.00019999544382001577, - "loss": 46.0, - "step": 39752 - }, - { - "epoch": 3.0393944606915535, - "grad_norm": 0.0013168102595955133, - "learning_rate": 0.00019999544359072672, - "loss": 46.0, - "step": 39753 - }, - { - "epoch": 3.0394709176749433, - "grad_norm": 0.002552098361775279, - "learning_rate": 0.0001999954433614319, - "loss": 46.0, - "step": 39754 - }, - { - "epoch": 3.039547374658333, - "grad_norm": 0.0025400989688932896, - "learning_rate": 0.00019999544313213132, - "loss": 46.0, - "step": 39755 - }, - { - "epoch": 3.039623831641723, - "grad_norm": 0.0025748638436198235, - "learning_rate": 0.00019999544290282497, - "loss": 46.0, - "step": 39756 - }, - { - "epoch": 3.039700288625112, - "grad_norm": 0.0021021731663495302, - "learning_rate": 0.00019999544267351285, - "loss": 46.0, - "step": 39757 - }, - { - "epoch": 3.039776745608502, - "grad_norm": 0.002064772415906191, - "learning_rate": 0.00019999544244419495, - "loss": 46.0, - "step": 39758 - }, - { - "epoch": 3.0398532025918916, - "grad_norm": 0.003540665376931429, - "learning_rate": 0.0001999954422148713, - "loss": 46.0, - "step": 39759 - }, - { - "epoch": 3.0399296595752814, - "grad_norm": 0.0022401451133191586, - "learning_rate": 0.00019999544198554182, - "loss": 46.0, - "step": 39760 - }, - { - "epoch": 3.040006116558671, - "grad_norm": 0.003381523070856929, - "learning_rate": 0.00019999544175620663, - "loss": 46.0, - "step": 39761 - }, - { - "epoch": 3.040082573542061, - "grad_norm": 0.003989202901721001, - "learning_rate": 0.00019999544152686567, - "loss": 46.0, - "step": 39762 - }, - { - "epoch": 3.0401590305254507, - "grad_norm": 0.0036354924086481333, - "learning_rate": 0.00019999544129751894, - "loss": 46.0, - "step": 39763 - }, - { - "epoch": 3.0402354875088404, - "grad_norm": 0.0050549875013530254, - "learning_rate": 0.0001999954410681664, - "loss": 46.0, - "step": 39764 - }, - { - "epoch": 3.04031194449223, - "grad_norm": 0.0030578419100493193, - "learning_rate": 0.00019999544083880815, - "loss": 46.0, - "step": 39765 - }, - { - "epoch": 3.04038840147562, - "grad_norm": 0.004450567532330751, - "learning_rate": 0.0001999954406094441, - "loss": 46.0, - "step": 39766 - }, - { - "epoch": 3.0404648584590097, - "grad_norm": 0.003954468294978142, - "learning_rate": 0.00019999544038007424, - "loss": 46.0, - "step": 39767 - }, - { - "epoch": 3.040541315442399, - "grad_norm": 0.003068147925660014, - "learning_rate": 0.00019999544015069867, - "loss": 46.0, - "step": 39768 - }, - { - "epoch": 3.040617772425789, - "grad_norm": 0.003508657217025757, - "learning_rate": 0.0001999954399213173, - "loss": 46.0, - "step": 39769 - }, - { - "epoch": 3.0406942294091786, - "grad_norm": 0.001262909616343677, - "learning_rate": 0.0001999954396919302, - "loss": 46.0, - "step": 39770 - }, - { - "epoch": 3.0407706863925683, - "grad_norm": 0.0026834493037313223, - "learning_rate": 0.0001999954394625373, - "loss": 46.0, - "step": 39771 - }, - { - "epoch": 3.040847143375958, - "grad_norm": 0.004251353908330202, - "learning_rate": 0.00019999543923313864, - "loss": 46.0, - "step": 39772 - }, - { - "epoch": 3.040923600359348, - "grad_norm": 0.002146467100828886, - "learning_rate": 0.0001999954390037342, - "loss": 46.0, - "step": 39773 - }, - { - "epoch": 3.0410000573427376, - "grad_norm": 0.0026370815467089415, - "learning_rate": 0.00019999543877432402, - "loss": 46.0, - "step": 39774 - }, - { - "epoch": 3.0410765143261274, - "grad_norm": 0.0014033066108822823, - "learning_rate": 0.00019999543854490803, - "loss": 46.0, - "step": 39775 - }, - { - "epoch": 3.041152971309517, - "grad_norm": 0.0017862417735159397, - "learning_rate": 0.0001999954383154863, - "loss": 46.0, - "step": 39776 - }, - { - "epoch": 3.041229428292907, - "grad_norm": 0.0016954976599663496, - "learning_rate": 0.0001999954380860588, - "loss": 46.0, - "step": 39777 - }, - { - "epoch": 3.0413058852762966, - "grad_norm": 0.0009763639536686242, - "learning_rate": 0.0001999954378566255, - "loss": 46.0, - "step": 39778 - }, - { - "epoch": 3.041382342259686, - "grad_norm": 0.0030006549786776304, - "learning_rate": 0.00019999543762718645, - "loss": 46.0, - "step": 39779 - }, - { - "epoch": 3.0414587992430757, - "grad_norm": 0.0017674047267064452, - "learning_rate": 0.00019999543739774165, - "loss": 46.0, - "step": 39780 - }, - { - "epoch": 3.0415352562264655, - "grad_norm": 0.0012382587883621454, - "learning_rate": 0.00019999543716829105, - "loss": 46.0, - "step": 39781 - }, - { - "epoch": 3.0416117132098552, - "grad_norm": 0.0008127946057356894, - "learning_rate": 0.00019999543693883468, - "loss": 46.0, - "step": 39782 - }, - { - "epoch": 3.041688170193245, - "grad_norm": 0.00219341809861362, - "learning_rate": 0.00019999543670937256, - "loss": 46.0, - "step": 39783 - }, - { - "epoch": 3.0417646271766348, - "grad_norm": 0.0014569797785952687, - "learning_rate": 0.0001999954364799047, - "loss": 46.0, - "step": 39784 - }, - { - "epoch": 3.0418410841600245, - "grad_norm": 0.0006897144485265017, - "learning_rate": 0.000199995436250431, - "loss": 46.0, - "step": 39785 - }, - { - "epoch": 3.0419175411434143, - "grad_norm": 0.0019148170249536633, - "learning_rate": 0.0001999954360209516, - "loss": 46.0, - "step": 39786 - }, - { - "epoch": 3.041993998126804, - "grad_norm": 0.0007004592334851623, - "learning_rate": 0.00019999543579146635, - "loss": 46.0, - "step": 39787 - }, - { - "epoch": 3.042070455110194, - "grad_norm": 0.0010098963975906372, - "learning_rate": 0.0001999954355619754, - "loss": 46.0, - "step": 39788 - }, - { - "epoch": 3.0421469120935836, - "grad_norm": 0.0032262811437249184, - "learning_rate": 0.00019999543533247866, - "loss": 46.0, - "step": 39789 - }, - { - "epoch": 3.042223369076973, - "grad_norm": 0.001586111611686647, - "learning_rate": 0.00019999543510297615, - "loss": 46.0, - "step": 39790 - }, - { - "epoch": 3.0422998260603626, - "grad_norm": 0.0019963011145591736, - "learning_rate": 0.00019999543487346787, - "loss": 46.0, - "step": 39791 - }, - { - "epoch": 3.0423762830437524, - "grad_norm": 0.0032852094154804945, - "learning_rate": 0.00019999543464395382, - "loss": 46.0, - "step": 39792 - }, - { - "epoch": 3.042452740027142, - "grad_norm": 0.0011593063827604055, - "learning_rate": 0.000199995434414434, - "loss": 46.0, - "step": 39793 - }, - { - "epoch": 3.042529197010532, - "grad_norm": 0.0022101178765296936, - "learning_rate": 0.00019999543418490842, - "loss": 46.0, - "step": 39794 - }, - { - "epoch": 3.0426056539939217, - "grad_norm": 0.002550235018134117, - "learning_rate": 0.00019999543395537707, - "loss": 46.0, - "step": 39795 - }, - { - "epoch": 3.0426821109773114, - "grad_norm": 0.008202235214412212, - "learning_rate": 0.00019999543372583992, - "loss": 46.0, - "step": 39796 - }, - { - "epoch": 3.042758567960701, - "grad_norm": 0.002513185376301408, - "learning_rate": 0.00019999543349629706, - "loss": 46.0, - "step": 39797 - }, - { - "epoch": 3.042835024944091, - "grad_norm": 0.0015979657182469964, - "learning_rate": 0.00019999543326674836, - "loss": 46.0, - "step": 39798 - }, - { - "epoch": 3.0429114819274807, - "grad_norm": 0.0013994803884997964, - "learning_rate": 0.00019999543303719395, - "loss": 46.0, - "step": 39799 - }, - { - "epoch": 3.04298793891087, - "grad_norm": 0.0030840723775327206, - "learning_rate": 0.00019999543280763377, - "loss": 46.0, - "step": 39800 - }, - { - "epoch": 3.04306439589426, - "grad_norm": 0.0020172528456896544, - "learning_rate": 0.00019999543257806778, - "loss": 46.0, - "step": 39801 - }, - { - "epoch": 3.0431408528776496, - "grad_norm": 0.005486840847879648, - "learning_rate": 0.00019999543234849605, - "loss": 46.0, - "step": 39802 - }, - { - "epoch": 3.0432173098610393, - "grad_norm": 0.0026672601234167814, - "learning_rate": 0.00019999543211891854, - "loss": 46.0, - "step": 39803 - }, - { - "epoch": 3.043293766844429, - "grad_norm": 0.0010024961084127426, - "learning_rate": 0.00019999543188933526, - "loss": 46.0, - "step": 39804 - }, - { - "epoch": 3.043370223827819, - "grad_norm": 0.0014839605428278446, - "learning_rate": 0.00019999543165974618, - "loss": 46.0, - "step": 39805 - }, - { - "epoch": 3.0434466808112086, - "grad_norm": 0.0011847109999507666, - "learning_rate": 0.00019999543143015138, - "loss": 46.0, - "step": 39806 - }, - { - "epoch": 3.0435231377945984, - "grad_norm": 0.0035277872812002897, - "learning_rate": 0.0001999954312005508, - "loss": 46.0, - "step": 39807 - }, - { - "epoch": 3.043599594777988, - "grad_norm": 0.002852214267477393, - "learning_rate": 0.00019999543097094444, - "loss": 46.0, - "step": 39808 - }, - { - "epoch": 3.043676051761378, - "grad_norm": 0.0020340923219919205, - "learning_rate": 0.00019999543074133232, - "loss": 46.0, - "step": 39809 - }, - { - "epoch": 3.0437525087447677, - "grad_norm": 0.001955203479155898, - "learning_rate": 0.00019999543051171442, - "loss": 46.0, - "step": 39810 - }, - { - "epoch": 3.0438289657281574, - "grad_norm": 0.002027090871706605, - "learning_rate": 0.00019999543028209076, - "loss": 46.0, - "step": 39811 - }, - { - "epoch": 3.0439054227115467, - "grad_norm": 0.0023426534608006477, - "learning_rate": 0.00019999543005246132, - "loss": 46.0, - "step": 39812 - }, - { - "epoch": 3.0439818796949365, - "grad_norm": 0.0010036531602963805, - "learning_rate": 0.00019999542982282613, - "loss": 46.0, - "step": 39813 - }, - { - "epoch": 3.0440583366783263, - "grad_norm": 0.003153854515403509, - "learning_rate": 0.00019999542959318515, - "loss": 46.0, - "step": 39814 - }, - { - "epoch": 3.044134793661716, - "grad_norm": 0.0033195195719599724, - "learning_rate": 0.00019999542936353844, - "loss": 46.0, - "step": 39815 - }, - { - "epoch": 3.044211250645106, - "grad_norm": 0.004860568791627884, - "learning_rate": 0.0001999954291338859, - "loss": 46.0, - "step": 39816 - }, - { - "epoch": 3.0442877076284955, - "grad_norm": 0.0020198491401970387, - "learning_rate": 0.00019999542890422763, - "loss": 46.0, - "step": 39817 - }, - { - "epoch": 3.0443641646118853, - "grad_norm": 0.0028632120229303837, - "learning_rate": 0.0001999954286745636, - "loss": 46.0, - "step": 39818 - }, - { - "epoch": 3.044440621595275, - "grad_norm": 0.001592725282534957, - "learning_rate": 0.00019999542844489375, - "loss": 46.0, - "step": 39819 - }, - { - "epoch": 3.044517078578665, - "grad_norm": 0.004052300006151199, - "learning_rate": 0.0001999954282152182, - "loss": 46.0, - "step": 39820 - }, - { - "epoch": 3.0445935355620546, - "grad_norm": 0.0026906216517090797, - "learning_rate": 0.00019999542798553683, - "loss": 46.0, - "step": 39821 - }, - { - "epoch": 3.044669992545444, - "grad_norm": 0.0012166756205260754, - "learning_rate": 0.0001999954277558497, - "loss": 46.0, - "step": 39822 - }, - { - "epoch": 3.0447464495288337, - "grad_norm": 0.0011185735929757357, - "learning_rate": 0.0001999954275261568, - "loss": 46.0, - "step": 39823 - }, - { - "epoch": 3.0448229065122234, - "grad_norm": 0.001580333337187767, - "learning_rate": 0.00019999542729645816, - "loss": 46.0, - "step": 39824 - }, - { - "epoch": 3.044899363495613, - "grad_norm": 0.004871820565313101, - "learning_rate": 0.00019999542706675372, - "loss": 46.0, - "step": 39825 - }, - { - "epoch": 3.044975820479003, - "grad_norm": 0.0006713769398629665, - "learning_rate": 0.0001999954268370435, - "loss": 46.0, - "step": 39826 - }, - { - "epoch": 3.0450522774623927, - "grad_norm": 0.0010941119398921728, - "learning_rate": 0.00019999542660732755, - "loss": 46.0, - "step": 39827 - }, - { - "epoch": 3.0451287344457825, - "grad_norm": 0.004415113478899002, - "learning_rate": 0.00019999542637760581, - "loss": 46.0, - "step": 39828 - }, - { - "epoch": 3.0452051914291722, - "grad_norm": 0.0017356699099764228, - "learning_rate": 0.00019999542614787828, - "loss": 46.0, - "step": 39829 - }, - { - "epoch": 3.045281648412562, - "grad_norm": 0.0023002484813332558, - "learning_rate": 0.00019999542591814503, - "loss": 46.0, - "step": 39830 - }, - { - "epoch": 3.0453581053959518, - "grad_norm": 0.0011726251104846597, - "learning_rate": 0.00019999542568840598, - "loss": 46.0, - "step": 39831 - }, - { - "epoch": 3.0454345623793415, - "grad_norm": 0.0024728458374738693, - "learning_rate": 0.00019999542545866118, - "loss": 46.0, - "step": 39832 - }, - { - "epoch": 3.045511019362731, - "grad_norm": 0.0032813907600939274, - "learning_rate": 0.00019999542522891058, - "loss": 46.0, - "step": 39833 - }, - { - "epoch": 3.0455874763461206, - "grad_norm": 0.001836483716033399, - "learning_rate": 0.00019999542499915424, - "loss": 46.0, - "step": 39834 - }, - { - "epoch": 3.0456639333295104, - "grad_norm": 0.001598047441802919, - "learning_rate": 0.00019999542476939212, - "loss": 46.0, - "step": 39835 - }, - { - "epoch": 3.0457403903129, - "grad_norm": 0.0015207421965897083, - "learning_rate": 0.0001999954245396242, - "loss": 46.0, - "step": 39836 - }, - { - "epoch": 3.04581684729629, - "grad_norm": 0.004801145289093256, - "learning_rate": 0.00019999542430985053, - "loss": 46.0, - "step": 39837 - }, - { - "epoch": 3.0458933042796796, - "grad_norm": 0.0011858928482979536, - "learning_rate": 0.00019999542408007112, - "loss": 46.0, - "step": 39838 - }, - { - "epoch": 3.0459697612630694, - "grad_norm": 0.001516296761110425, - "learning_rate": 0.0001999954238502859, - "loss": 46.0, - "step": 39839 - }, - { - "epoch": 3.046046218246459, - "grad_norm": 0.0020369929261505604, - "learning_rate": 0.00019999542362049495, - "loss": 46.0, - "step": 39840 - }, - { - "epoch": 3.046122675229849, - "grad_norm": 0.0007960710790939629, - "learning_rate": 0.00019999542339069822, - "loss": 46.0, - "step": 39841 - }, - { - "epoch": 3.0461991322132387, - "grad_norm": 0.0007086318801157176, - "learning_rate": 0.0001999954231608957, - "loss": 46.0, - "step": 39842 - }, - { - "epoch": 3.0462755891966284, - "grad_norm": 0.00118496164213866, - "learning_rate": 0.0001999954229310874, - "loss": 46.0, - "step": 39843 - }, - { - "epoch": 3.0463520461800178, - "grad_norm": 0.00048624732880853117, - "learning_rate": 0.00019999542270127338, - "loss": 46.0, - "step": 39844 - }, - { - "epoch": 3.0464285031634075, - "grad_norm": 0.0015608640387654305, - "learning_rate": 0.00019999542247145358, - "loss": 46.0, - "step": 39845 - }, - { - "epoch": 3.0465049601467973, - "grad_norm": 0.0054060607217252254, - "learning_rate": 0.00019999542224162796, - "loss": 46.0, - "step": 39846 - }, - { - "epoch": 3.046581417130187, - "grad_norm": 0.0013889011461287737, - "learning_rate": 0.00019999542201179661, - "loss": 46.0, - "step": 39847 - }, - { - "epoch": 3.046657874113577, - "grad_norm": 0.0025718859396874905, - "learning_rate": 0.00019999542178195952, - "loss": 46.0, - "step": 39848 - }, - { - "epoch": 3.0467343310969666, - "grad_norm": 0.002158508636057377, - "learning_rate": 0.0001999954215521166, - "loss": 46.0, - "step": 39849 - }, - { - "epoch": 3.0468107880803563, - "grad_norm": 0.003430926240980625, - "learning_rate": 0.00019999542132226794, - "loss": 46.0, - "step": 39850 - }, - { - "epoch": 3.046887245063746, - "grad_norm": 0.003158948151394725, - "learning_rate": 0.00019999542109241353, - "loss": 46.0, - "step": 39851 - }, - { - "epoch": 3.046963702047136, - "grad_norm": 0.001974754733964801, - "learning_rate": 0.00019999542086255332, - "loss": 46.0, - "step": 39852 - }, - { - "epoch": 3.0470401590305256, - "grad_norm": 0.0017480666283518076, - "learning_rate": 0.00019999542063268736, - "loss": 46.0, - "step": 39853 - }, - { - "epoch": 3.0471166160139154, - "grad_norm": 0.0012514068512246013, - "learning_rate": 0.0001999954204028156, - "loss": 46.0, - "step": 39854 - }, - { - "epoch": 3.0471930729973047, - "grad_norm": 0.002120128832757473, - "learning_rate": 0.00019999542017293812, - "loss": 46.0, - "step": 39855 - }, - { - "epoch": 3.0472695299806944, - "grad_norm": 0.003946150653064251, - "learning_rate": 0.00019999541994305484, - "loss": 46.0, - "step": 39856 - }, - { - "epoch": 3.047345986964084, - "grad_norm": 0.0014776972820982337, - "learning_rate": 0.00019999541971316576, - "loss": 46.0, - "step": 39857 - }, - { - "epoch": 3.047422443947474, - "grad_norm": 0.001937778084538877, - "learning_rate": 0.00019999541948327097, - "loss": 46.0, - "step": 39858 - }, - { - "epoch": 3.0474989009308637, - "grad_norm": 0.001531258225440979, - "learning_rate": 0.0001999954192533704, - "loss": 46.0, - "step": 39859 - }, - { - "epoch": 3.0475753579142535, - "grad_norm": 0.0019490448758006096, - "learning_rate": 0.00019999541902346402, - "loss": 46.0, - "step": 39860 - }, - { - "epoch": 3.0476518148976433, - "grad_norm": 0.003566277679055929, - "learning_rate": 0.0001999954187935519, - "loss": 46.0, - "step": 39861 - }, - { - "epoch": 3.047728271881033, - "grad_norm": 0.0026820912025868893, - "learning_rate": 0.00019999541856363401, - "loss": 46.0, - "step": 39862 - }, - { - "epoch": 3.0478047288644228, - "grad_norm": 0.0031591744627803564, - "learning_rate": 0.00019999541833371035, - "loss": 46.0, - "step": 39863 - }, - { - "epoch": 3.0478811858478125, - "grad_norm": 0.002295204671099782, - "learning_rate": 0.00019999541810378094, - "loss": 46.0, - "step": 39864 - }, - { - "epoch": 3.0479576428312023, - "grad_norm": 0.002604244276881218, - "learning_rate": 0.00019999541787384573, - "loss": 46.0, - "step": 39865 - }, - { - "epoch": 3.0480340998145916, - "grad_norm": 0.0014944911235943437, - "learning_rate": 0.00019999541764390477, - "loss": 46.0, - "step": 39866 - }, - { - "epoch": 3.0481105567979814, - "grad_norm": 0.004636420402675867, - "learning_rate": 0.000199995417413958, - "loss": 46.0, - "step": 39867 - }, - { - "epoch": 3.048187013781371, - "grad_norm": 0.0014118136605247855, - "learning_rate": 0.0001999954171840055, - "loss": 46.0, - "step": 39868 - }, - { - "epoch": 3.048263470764761, - "grad_norm": 0.003935117274522781, - "learning_rate": 0.00019999541695404726, - "loss": 46.0, - "step": 39869 - }, - { - "epoch": 3.0483399277481507, - "grad_norm": 0.0033773519098758698, - "learning_rate": 0.0001999954167240832, - "loss": 46.0, - "step": 39870 - }, - { - "epoch": 3.0484163847315404, - "grad_norm": 0.0017816373147070408, - "learning_rate": 0.00019999541649411338, - "loss": 46.0, - "step": 39871 - }, - { - "epoch": 3.04849284171493, - "grad_norm": 0.0010630270699039102, - "learning_rate": 0.0001999954162641378, - "loss": 46.0, - "step": 39872 - }, - { - "epoch": 3.04856929869832, - "grad_norm": 0.002940055448561907, - "learning_rate": 0.00019999541603415644, - "loss": 46.0, - "step": 39873 - }, - { - "epoch": 3.0486457556817097, - "grad_norm": 0.000761278672143817, - "learning_rate": 0.00019999541580416932, - "loss": 46.0, - "step": 39874 - }, - { - "epoch": 3.0487222126650995, - "grad_norm": 0.0010927848052233458, - "learning_rate": 0.00019999541557417643, - "loss": 46.0, - "step": 39875 - }, - { - "epoch": 3.0487986696484892, - "grad_norm": 0.0034057158045470715, - "learning_rate": 0.0001999954153441778, - "loss": 46.0, - "step": 39876 - }, - { - "epoch": 3.0488751266318785, - "grad_norm": 0.002073639538139105, - "learning_rate": 0.00019999541511417335, - "loss": 46.0, - "step": 39877 - }, - { - "epoch": 3.0489515836152683, - "grad_norm": 0.0007386656361632049, - "learning_rate": 0.00019999541488416314, - "loss": 46.0, - "step": 39878 - }, - { - "epoch": 3.049028040598658, - "grad_norm": 0.002264376962557435, - "learning_rate": 0.00019999541465414719, - "loss": 46.0, - "step": 39879 - }, - { - "epoch": 3.049104497582048, - "grad_norm": 0.001007546205073595, - "learning_rate": 0.00019999541442412543, - "loss": 46.0, - "step": 39880 - }, - { - "epoch": 3.0491809545654376, - "grad_norm": 0.0024832834023982286, - "learning_rate": 0.00019999541419409792, - "loss": 46.0, - "step": 39881 - }, - { - "epoch": 3.0492574115488273, - "grad_norm": 0.0027159901801496744, - "learning_rate": 0.00019999541396406465, - "loss": 46.0, - "step": 39882 - }, - { - "epoch": 3.049333868532217, - "grad_norm": 0.0014653961407020688, - "learning_rate": 0.00019999541373402562, - "loss": 46.0, - "step": 39883 - }, - { - "epoch": 3.049410325515607, - "grad_norm": 0.0017313024727627635, - "learning_rate": 0.00019999541350398082, - "loss": 46.0, - "step": 39884 - }, - { - "epoch": 3.0494867824989966, - "grad_norm": 0.00138534524012357, - "learning_rate": 0.0001999954132739302, - "loss": 46.0, - "step": 39885 - }, - { - "epoch": 3.0495632394823864, - "grad_norm": 0.0031168884597718716, - "learning_rate": 0.00019999541304387388, - "loss": 46.0, - "step": 39886 - }, - { - "epoch": 3.049639696465776, - "grad_norm": 0.0014258017763495445, - "learning_rate": 0.00019999541281381174, - "loss": 46.0, - "step": 39887 - }, - { - "epoch": 3.0497161534491655, - "grad_norm": 0.002162324730306864, - "learning_rate": 0.00019999541258374385, - "loss": 46.0, - "step": 39888 - }, - { - "epoch": 3.0497926104325552, - "grad_norm": 0.00306203356012702, - "learning_rate": 0.0001999954123536702, - "loss": 46.0, - "step": 39889 - }, - { - "epoch": 3.049869067415945, - "grad_norm": 0.0028150237631052732, - "learning_rate": 0.00019999541212359077, - "loss": 46.0, - "step": 39890 - }, - { - "epoch": 3.0499455243993348, - "grad_norm": 0.0035250908695161343, - "learning_rate": 0.00019999541189350556, - "loss": 46.0, - "step": 39891 - }, - { - "epoch": 3.0500219813827245, - "grad_norm": 0.003052371321246028, - "learning_rate": 0.0001999954116634146, - "loss": 46.0, - "step": 39892 - }, - { - "epoch": 3.0500984383661143, - "grad_norm": 0.004716380499303341, - "learning_rate": 0.00019999541143331785, - "loss": 46.0, - "step": 39893 - }, - { - "epoch": 3.050174895349504, - "grad_norm": 0.001816091826185584, - "learning_rate": 0.00019999541120321537, - "loss": 46.0, - "step": 39894 - }, - { - "epoch": 3.050251352332894, - "grad_norm": 0.0018924175528809428, - "learning_rate": 0.00019999541097310707, - "loss": 46.0, - "step": 39895 - }, - { - "epoch": 3.0503278093162836, - "grad_norm": 0.0010075777536258101, - "learning_rate": 0.00019999541074299304, - "loss": 46.0, - "step": 39896 - }, - { - "epoch": 3.0504042662996733, - "grad_norm": 0.0030944314785301685, - "learning_rate": 0.00019999541051287322, - "loss": 46.0, - "step": 39897 - }, - { - "epoch": 3.050480723283063, - "grad_norm": 0.002769443206489086, - "learning_rate": 0.00019999541028274765, - "loss": 46.0, - "step": 39898 - }, - { - "epoch": 3.0505571802664524, - "grad_norm": 0.0020051966421306133, - "learning_rate": 0.00019999541005261628, - "loss": 46.0, - "step": 39899 - }, - { - "epoch": 3.050633637249842, - "grad_norm": 0.002914352575317025, - "learning_rate": 0.00019999540982247916, - "loss": 46.0, - "step": 39900 - }, - { - "epoch": 3.050710094233232, - "grad_norm": 0.0020059102680534124, - "learning_rate": 0.0001999954095923363, - "loss": 46.0, - "step": 39901 - }, - { - "epoch": 3.0507865512166217, - "grad_norm": 0.0023324305657297373, - "learning_rate": 0.00019999540936218763, - "loss": 46.0, - "step": 39902 - }, - { - "epoch": 3.0508630082000114, - "grad_norm": 0.00419639190658927, - "learning_rate": 0.0001999954091320332, - "loss": 46.0, - "step": 39903 - }, - { - "epoch": 3.050939465183401, - "grad_norm": 0.0030191445257514715, - "learning_rate": 0.000199995408901873, - "loss": 46.0, - "step": 39904 - }, - { - "epoch": 3.051015922166791, - "grad_norm": 0.003896491602063179, - "learning_rate": 0.00019999540867170703, - "loss": 46.0, - "step": 39905 - }, - { - "epoch": 3.0510923791501807, - "grad_norm": 0.001012215274386108, - "learning_rate": 0.00019999540844153527, - "loss": 46.0, - "step": 39906 - }, - { - "epoch": 3.0511688361335705, - "grad_norm": 0.002515647793188691, - "learning_rate": 0.0001999954082113578, - "loss": 46.0, - "step": 39907 - }, - { - "epoch": 3.0512452931169602, - "grad_norm": 0.0024647850077599287, - "learning_rate": 0.00019999540798117452, - "loss": 46.0, - "step": 39908 - }, - { - "epoch": 3.05132175010035, - "grad_norm": 0.0010217091767117381, - "learning_rate": 0.0001999954077509855, - "loss": 46.0, - "step": 39909 - }, - { - "epoch": 3.0513982070837393, - "grad_norm": 0.003492149757221341, - "learning_rate": 0.00019999540752079064, - "loss": 46.0, - "step": 39910 - }, - { - "epoch": 3.051474664067129, - "grad_norm": 0.0009389400365762413, - "learning_rate": 0.00019999540729059007, - "loss": 46.0, - "step": 39911 - }, - { - "epoch": 3.051551121050519, - "grad_norm": 0.0006082706968300045, - "learning_rate": 0.00019999540706038373, - "loss": 46.0, - "step": 39912 - }, - { - "epoch": 3.0516275780339086, - "grad_norm": 0.004620365798473358, - "learning_rate": 0.00019999540683017161, - "loss": 46.0, - "step": 39913 - }, - { - "epoch": 3.0517040350172984, - "grad_norm": 0.002860978012904525, - "learning_rate": 0.00019999540659995372, - "loss": 46.0, - "step": 39914 - }, - { - "epoch": 3.051780492000688, - "grad_norm": 0.006351559888571501, - "learning_rate": 0.00019999540636973006, - "loss": 46.0, - "step": 39915 - }, - { - "epoch": 3.051856948984078, - "grad_norm": 0.002003585919737816, - "learning_rate": 0.00019999540613950062, - "loss": 46.0, - "step": 39916 - }, - { - "epoch": 3.0519334059674676, - "grad_norm": 0.0023747654631733894, - "learning_rate": 0.0001999954059092654, - "loss": 46.0, - "step": 39917 - }, - { - "epoch": 3.0520098629508574, - "grad_norm": 0.004748103208839893, - "learning_rate": 0.00019999540567902448, - "loss": 46.0, - "step": 39918 - }, - { - "epoch": 3.052086319934247, - "grad_norm": 0.001683561597019434, - "learning_rate": 0.00019999540544877773, - "loss": 46.0, - "step": 39919 - }, - { - "epoch": 3.052162776917637, - "grad_norm": 0.0028488747775554657, - "learning_rate": 0.00019999540521852525, - "loss": 46.0, - "step": 39920 - }, - { - "epoch": 3.0522392339010262, - "grad_norm": 0.002458098577335477, - "learning_rate": 0.00019999540498826695, - "loss": 46.0, - "step": 39921 - }, - { - "epoch": 3.052315690884416, - "grad_norm": 0.00106534524820745, - "learning_rate": 0.00019999540475800292, - "loss": 46.0, - "step": 39922 - }, - { - "epoch": 3.0523921478678058, - "grad_norm": 0.0033299436327069998, - "learning_rate": 0.0001999954045277331, - "loss": 46.0, - "step": 39923 - }, - { - "epoch": 3.0524686048511955, - "grad_norm": 0.0021596713922917843, - "learning_rate": 0.00019999540429745753, - "loss": 46.0, - "step": 39924 - }, - { - "epoch": 3.0525450618345853, - "grad_norm": 0.0012651372235268354, - "learning_rate": 0.0001999954040671762, - "loss": 46.0, - "step": 39925 - }, - { - "epoch": 3.052621518817975, - "grad_norm": 0.0013762699672952294, - "learning_rate": 0.00019999540383688907, - "loss": 46.0, - "step": 39926 - }, - { - "epoch": 3.052697975801365, - "grad_norm": 0.0068327998742461205, - "learning_rate": 0.00019999540360659615, - "loss": 46.0, - "step": 39927 - }, - { - "epoch": 3.0527744327847546, - "grad_norm": 0.001442233449779451, - "learning_rate": 0.0001999954033762975, - "loss": 46.0, - "step": 39928 - }, - { - "epoch": 3.0528508897681443, - "grad_norm": 0.0017950354376807809, - "learning_rate": 0.00019999540314599308, - "loss": 46.0, - "step": 39929 - }, - { - "epoch": 3.052927346751534, - "grad_norm": 0.0008715394651517272, - "learning_rate": 0.00019999540291568287, - "loss": 46.0, - "step": 39930 - }, - { - "epoch": 3.0530038037349234, - "grad_norm": 0.00132539973128587, - "learning_rate": 0.00019999540268536692, - "loss": 46.0, - "step": 39931 - }, - { - "epoch": 3.053080260718313, - "grad_norm": 0.001679926528595388, - "learning_rate": 0.00019999540245504519, - "loss": 46.0, - "step": 39932 - }, - { - "epoch": 3.053156717701703, - "grad_norm": 0.0035040657967329025, - "learning_rate": 0.00019999540222471768, - "loss": 46.0, - "step": 39933 - }, - { - "epoch": 3.0532331746850927, - "grad_norm": 0.0014891623286530375, - "learning_rate": 0.0001999954019943844, - "loss": 46.0, - "step": 39934 - }, - { - "epoch": 3.0533096316684825, - "grad_norm": 0.001127067138440907, - "learning_rate": 0.00019999540176404536, - "loss": 46.0, - "step": 39935 - }, - { - "epoch": 3.053386088651872, - "grad_norm": 0.002575282007455826, - "learning_rate": 0.00019999540153370053, - "loss": 46.0, - "step": 39936 - }, - { - "epoch": 3.053462545635262, - "grad_norm": 0.0008896789513528347, - "learning_rate": 0.00019999540130334996, - "loss": 46.0, - "step": 39937 - }, - { - "epoch": 3.0535390026186517, - "grad_norm": 0.012337354943156242, - "learning_rate": 0.00019999540107299362, - "loss": 46.0, - "step": 39938 - }, - { - "epoch": 3.0536154596020415, - "grad_norm": 0.0019458942115306854, - "learning_rate": 0.0001999954008426315, - "loss": 46.0, - "step": 39939 - }, - { - "epoch": 3.0536919165854313, - "grad_norm": 0.00234235473908484, - "learning_rate": 0.00019999540061226362, - "loss": 46.0, - "step": 39940 - }, - { - "epoch": 3.053768373568821, - "grad_norm": 0.0019255520310252905, - "learning_rate": 0.00019999540038188995, - "loss": 46.0, - "step": 39941 - }, - { - "epoch": 3.053844830552211, - "grad_norm": 0.004226963501423597, - "learning_rate": 0.00019999540015151052, - "loss": 46.0, - "step": 39942 - }, - { - "epoch": 3.0539212875356, - "grad_norm": 0.001408649142831564, - "learning_rate": 0.00019999539992112533, - "loss": 46.0, - "step": 39943 - }, - { - "epoch": 3.05399774451899, - "grad_norm": 0.007395122200250626, - "learning_rate": 0.00019999539969073435, - "loss": 46.0, - "step": 39944 - }, - { - "epoch": 3.0540742015023796, - "grad_norm": 0.0016999570652842522, - "learning_rate": 0.00019999539946033762, - "loss": 46.0, - "step": 39945 - }, - { - "epoch": 3.0541506584857694, - "grad_norm": 0.003070377279073, - "learning_rate": 0.00019999539922993512, - "loss": 46.0, - "step": 39946 - }, - { - "epoch": 3.054227115469159, - "grad_norm": 0.000984929851256311, - "learning_rate": 0.00019999539899952687, - "loss": 46.0, - "step": 39947 - }, - { - "epoch": 3.054303572452549, - "grad_norm": 0.0010904729133471847, - "learning_rate": 0.00019999539876911282, - "loss": 46.0, - "step": 39948 - }, - { - "epoch": 3.0543800294359387, - "grad_norm": 0.0027253134176135063, - "learning_rate": 0.000199995398538693, - "loss": 46.0, - "step": 39949 - }, - { - "epoch": 3.0544564864193284, - "grad_norm": 0.0009720441303215921, - "learning_rate": 0.0001999953983082674, - "loss": 46.0, - "step": 39950 - }, - { - "epoch": 3.054532943402718, - "grad_norm": 0.0012783625861629844, - "learning_rate": 0.00019999539807783606, - "loss": 46.0, - "step": 39951 - }, - { - "epoch": 3.054609400386108, - "grad_norm": 0.002023769076913595, - "learning_rate": 0.00019999539784739894, - "loss": 46.0, - "step": 39952 - }, - { - "epoch": 3.0546858573694973, - "grad_norm": 0.002419615164399147, - "learning_rate": 0.00019999539761695608, - "loss": 46.0, - "step": 39953 - }, - { - "epoch": 3.054762314352887, - "grad_norm": 0.0015678760828450322, - "learning_rate": 0.00019999539738650742, - "loss": 46.0, - "step": 39954 - }, - { - "epoch": 3.054838771336277, - "grad_norm": 0.0031746365129947662, - "learning_rate": 0.000199995397156053, - "loss": 46.0, - "step": 39955 - }, - { - "epoch": 3.0549152283196666, - "grad_norm": 0.001625155913643539, - "learning_rate": 0.0001999953969255928, - "loss": 46.0, - "step": 39956 - }, - { - "epoch": 3.0549916853030563, - "grad_norm": 0.00221212743781507, - "learning_rate": 0.00019999539669512682, - "loss": 46.0, - "step": 39957 - }, - { - "epoch": 3.055068142286446, - "grad_norm": 0.01253463700413704, - "learning_rate": 0.0001999953964646551, - "loss": 46.0, - "step": 39958 - }, - { - "epoch": 3.055144599269836, - "grad_norm": 0.001577693154104054, - "learning_rate": 0.0001999953962341776, - "loss": 46.0, - "step": 39959 - }, - { - "epoch": 3.0552210562532256, - "grad_norm": 0.0026553766801953316, - "learning_rate": 0.0001999953960036943, - "loss": 46.0, - "step": 39960 - }, - { - "epoch": 3.0552975132366154, - "grad_norm": 0.0010875161970034242, - "learning_rate": 0.00019999539577320526, - "loss": 46.0, - "step": 39961 - }, - { - "epoch": 3.055373970220005, - "grad_norm": 0.0015368919121101499, - "learning_rate": 0.00019999539554271047, - "loss": 46.0, - "step": 39962 - }, - { - "epoch": 3.055450427203395, - "grad_norm": 0.0017528730677440763, - "learning_rate": 0.00019999539531220987, - "loss": 46.0, - "step": 39963 - }, - { - "epoch": 3.055526884186784, - "grad_norm": 0.0012605271767824888, - "learning_rate": 0.00019999539508170356, - "loss": 46.0, - "step": 39964 - }, - { - "epoch": 3.055603341170174, - "grad_norm": 0.00077737623360008, - "learning_rate": 0.00019999539485119141, - "loss": 46.0, - "step": 39965 - }, - { - "epoch": 3.0556797981535637, - "grad_norm": 0.0008446162100881338, - "learning_rate": 0.00019999539462067353, - "loss": 46.0, - "step": 39966 - }, - { - "epoch": 3.0557562551369535, - "grad_norm": 0.0023541422560811043, - "learning_rate": 0.0001999953943901499, - "loss": 46.0, - "step": 39967 - }, - { - "epoch": 3.0558327121203432, - "grad_norm": 0.0014828427229076624, - "learning_rate": 0.00019999539415962046, - "loss": 46.0, - "step": 39968 - }, - { - "epoch": 3.055909169103733, - "grad_norm": 0.0007023270009085536, - "learning_rate": 0.00019999539392908527, - "loss": 46.0, - "step": 39969 - }, - { - "epoch": 3.0559856260871228, - "grad_norm": 0.0011262464104220271, - "learning_rate": 0.0001999953936985443, - "loss": 46.0, - "step": 39970 - }, - { - "epoch": 3.0560620830705125, - "grad_norm": 0.0044969129376113415, - "learning_rate": 0.00019999539346799756, - "loss": 46.0, - "step": 39971 - }, - { - "epoch": 3.0561385400539023, - "grad_norm": 0.0014838998904451728, - "learning_rate": 0.00019999539323744506, - "loss": 46.0, - "step": 39972 - }, - { - "epoch": 3.056214997037292, - "grad_norm": 0.0024705128744244576, - "learning_rate": 0.00019999539300688678, - "loss": 46.0, - "step": 39973 - }, - { - "epoch": 3.056291454020682, - "grad_norm": 0.0029817065224051476, - "learning_rate": 0.00019999539277632274, - "loss": 46.0, - "step": 39974 - }, - { - "epoch": 3.056367911004071, - "grad_norm": 0.0027656417805701494, - "learning_rate": 0.00019999539254575294, - "loss": 46.0, - "step": 39975 - }, - { - "epoch": 3.056444367987461, - "grad_norm": 0.0027822358533740044, - "learning_rate": 0.00019999539231517735, - "loss": 46.0, - "step": 39976 - }, - { - "epoch": 3.0565208249708506, - "grad_norm": 0.0020388104021549225, - "learning_rate": 0.000199995392084596, - "loss": 46.0, - "step": 39977 - }, - { - "epoch": 3.0565972819542404, - "grad_norm": 0.005180031061172485, - "learning_rate": 0.00019999539185400892, - "loss": 46.0, - "step": 39978 - }, - { - "epoch": 3.05667373893763, - "grad_norm": 0.0030655774753540754, - "learning_rate": 0.000199995391623416, - "loss": 46.0, - "step": 39979 - }, - { - "epoch": 3.05675019592102, - "grad_norm": 0.001487218658439815, - "learning_rate": 0.00019999539139281734, - "loss": 46.0, - "step": 39980 - }, - { - "epoch": 3.0568266529044097, - "grad_norm": 0.0022295925300568342, - "learning_rate": 0.0001999953911622129, - "loss": 46.0, - "step": 39981 - }, - { - "epoch": 3.0569031098877995, - "grad_norm": 0.0013577919453382492, - "learning_rate": 0.00019999539093160272, - "loss": 46.0, - "step": 39982 - }, - { - "epoch": 3.056979566871189, - "grad_norm": 0.0019416635623201728, - "learning_rate": 0.00019999539070098677, - "loss": 46.0, - "step": 39983 - }, - { - "epoch": 3.057056023854579, - "grad_norm": 0.002988230437040329, - "learning_rate": 0.00019999539047036504, - "loss": 46.0, - "step": 39984 - }, - { - "epoch": 3.0571324808379687, - "grad_norm": 0.00440600560978055, - "learning_rate": 0.00019999539023973754, - "loss": 46.0, - "step": 39985 - }, - { - "epoch": 3.057208937821358, - "grad_norm": 0.0012431966606527567, - "learning_rate": 0.0001999953900091043, - "loss": 46.0, - "step": 39986 - }, - { - "epoch": 3.057285394804748, - "grad_norm": 0.0021161320619285107, - "learning_rate": 0.0001999953897784652, - "loss": 46.0, - "step": 39987 - }, - { - "epoch": 3.0573618517881376, - "grad_norm": 0.0011795427417382598, - "learning_rate": 0.00019999538954782042, - "loss": 46.0, - "step": 39988 - }, - { - "epoch": 3.0574383087715273, - "grad_norm": 0.0019480413757264614, - "learning_rate": 0.00019999538931716982, - "loss": 46.0, - "step": 39989 - }, - { - "epoch": 3.057514765754917, - "grad_norm": 0.001120949280448258, - "learning_rate": 0.00019999538908651348, - "loss": 46.0, - "step": 39990 - }, - { - "epoch": 3.057591222738307, - "grad_norm": 0.0014087992021813989, - "learning_rate": 0.00019999538885585137, - "loss": 46.0, - "step": 39991 - }, - { - "epoch": 3.0576676797216966, - "grad_norm": 0.003980633802711964, - "learning_rate": 0.00019999538862518348, - "loss": 46.0, - "step": 39992 - }, - { - "epoch": 3.0577441367050864, - "grad_norm": 0.0022805230692029, - "learning_rate": 0.00019999538839450982, - "loss": 46.0, - "step": 39993 - }, - { - "epoch": 3.057820593688476, - "grad_norm": 0.002794375643134117, - "learning_rate": 0.00019999538816383038, - "loss": 46.0, - "step": 39994 - }, - { - "epoch": 3.057897050671866, - "grad_norm": 0.0013495429884642363, - "learning_rate": 0.0001999953879331452, - "loss": 46.0, - "step": 39995 - }, - { - "epoch": 3.0579735076552557, - "grad_norm": 0.002690873108804226, - "learning_rate": 0.00019999538770245425, - "loss": 46.0, - "step": 39996 - }, - { - "epoch": 3.058049964638645, - "grad_norm": 0.000877495389431715, - "learning_rate": 0.0001999953874717575, - "loss": 46.0, - "step": 39997 - }, - { - "epoch": 3.0581264216220347, - "grad_norm": 0.0020809336565434933, - "learning_rate": 0.00019999538724105502, - "loss": 46.0, - "step": 39998 - }, - { - "epoch": 3.0582028786054245, - "grad_norm": 0.0022771693766117096, - "learning_rate": 0.00019999538701034674, - "loss": 46.0, - "step": 39999 - }, - { - "epoch": 3.0582793355888143, - "grad_norm": 0.0040611089207232, - "learning_rate": 0.0001999953867796327, - "loss": 46.0, - "step": 40000 - }, - { - "epoch": 3.058355792572204, - "grad_norm": 0.002339953323826194, - "learning_rate": 0.0001999953865489129, - "loss": 46.0, - "step": 40001 - }, - { - "epoch": 3.058432249555594, - "grad_norm": 0.00136351038236171, - "learning_rate": 0.00019999538631818733, - "loss": 46.0, - "step": 40002 - }, - { - "epoch": 3.0585087065389835, - "grad_norm": 0.0019463424105197191, - "learning_rate": 0.000199995386087456, - "loss": 46.0, - "step": 40003 - }, - { - "epoch": 3.0585851635223733, - "grad_norm": 0.0007321077864617109, - "learning_rate": 0.00019999538585671888, - "loss": 46.0, - "step": 40004 - }, - { - "epoch": 3.058661620505763, - "grad_norm": 0.0027424537111073732, - "learning_rate": 0.00019999538562597596, - "loss": 46.0, - "step": 40005 - }, - { - "epoch": 3.058738077489153, - "grad_norm": 0.004692576825618744, - "learning_rate": 0.0001999953853952273, - "loss": 46.0, - "step": 40006 - }, - { - "epoch": 3.0588145344725426, - "grad_norm": 0.0038249201606959105, - "learning_rate": 0.0001999953851644729, - "loss": 46.0, - "step": 40007 - }, - { - "epoch": 3.058890991455932, - "grad_norm": 0.0010336223058402538, - "learning_rate": 0.0001999953849337127, - "loss": 46.0, - "step": 40008 - }, - { - "epoch": 3.0589674484393217, - "grad_norm": 0.0016876417212188244, - "learning_rate": 0.00019999538470294673, - "loss": 46.0, - "step": 40009 - }, - { - "epoch": 3.0590439054227114, - "grad_norm": 0.000810078636277467, - "learning_rate": 0.000199995384472175, - "loss": 46.0, - "step": 40010 - }, - { - "epoch": 3.059120362406101, - "grad_norm": 0.002382120117545128, - "learning_rate": 0.0001999953842413975, - "loss": 46.0, - "step": 40011 - }, - { - "epoch": 3.059196819389491, - "grad_norm": 0.0017555918311700225, - "learning_rate": 0.00019999538401061423, - "loss": 46.0, - "step": 40012 - }, - { - "epoch": 3.0592732763728807, - "grad_norm": 0.00617721164599061, - "learning_rate": 0.00019999538377982518, - "loss": 46.0, - "step": 40013 - }, - { - "epoch": 3.0593497333562705, - "grad_norm": 0.0021797812078148127, - "learning_rate": 0.00019999538354903038, - "loss": 46.0, - "step": 40014 - }, - { - "epoch": 3.0594261903396602, - "grad_norm": 0.0028860955499112606, - "learning_rate": 0.0001999953833182298, - "loss": 46.0, - "step": 40015 - }, - { - "epoch": 3.05950264732305, - "grad_norm": 0.0033860933035612106, - "learning_rate": 0.00019999538308742345, - "loss": 46.0, - "step": 40016 - }, - { - "epoch": 3.0595791043064398, - "grad_norm": 0.00279794679954648, - "learning_rate": 0.00019999538285661134, - "loss": 46.0, - "step": 40017 - }, - { - "epoch": 3.0596555612898295, - "grad_norm": 0.0007723582093603909, - "learning_rate": 0.00019999538262579347, - "loss": 46.0, - "step": 40018 - }, - { - "epoch": 3.059732018273219, - "grad_norm": 0.0006307529401965439, - "learning_rate": 0.0001999953823949698, - "loss": 46.0, - "step": 40019 - }, - { - "epoch": 3.0598084752566086, - "grad_norm": 0.001034247106872499, - "learning_rate": 0.0001999953821641404, - "loss": 46.0, - "step": 40020 - }, - { - "epoch": 3.0598849322399984, - "grad_norm": 0.0015814361395314336, - "learning_rate": 0.00019999538193330517, - "loss": 46.0, - "step": 40021 - }, - { - "epoch": 3.059961389223388, - "grad_norm": 0.006440328434109688, - "learning_rate": 0.00019999538170246424, - "loss": 46.0, - "step": 40022 - }, - { - "epoch": 3.060037846206778, - "grad_norm": 0.0016617951914668083, - "learning_rate": 0.00019999538147161752, - "loss": 46.0, - "step": 40023 - }, - { - "epoch": 3.0601143031901676, - "grad_norm": 0.004704756662249565, - "learning_rate": 0.000199995381240765, - "loss": 46.0, - "step": 40024 - }, - { - "epoch": 3.0601907601735574, - "grad_norm": 0.0022259466350078583, - "learning_rate": 0.00019999538100990674, - "loss": 46.0, - "step": 40025 - }, - { - "epoch": 3.060267217156947, - "grad_norm": 0.0014169689966365695, - "learning_rate": 0.0001999953807790427, - "loss": 46.0, - "step": 40026 - }, - { - "epoch": 3.060343674140337, - "grad_norm": 0.0014136621030047536, - "learning_rate": 0.00019999538054817287, - "loss": 46.0, - "step": 40027 - }, - { - "epoch": 3.0604201311237267, - "grad_norm": 0.0006793364300392568, - "learning_rate": 0.00019999538031729733, - "loss": 46.0, - "step": 40028 - }, - { - "epoch": 3.0604965881071164, - "grad_norm": 0.001826431485824287, - "learning_rate": 0.00019999538008641597, - "loss": 46.0, - "step": 40029 - }, - { - "epoch": 3.0605730450905058, - "grad_norm": 0.010063394904136658, - "learning_rate": 0.00019999537985552883, - "loss": 46.0, - "step": 40030 - }, - { - "epoch": 3.0606495020738955, - "grad_norm": 0.0012650748249143362, - "learning_rate": 0.00019999537962463597, - "loss": 46.0, - "step": 40031 - }, - { - "epoch": 3.0607259590572853, - "grad_norm": 0.002444294048473239, - "learning_rate": 0.0001999953793937373, - "loss": 46.0, - "step": 40032 - }, - { - "epoch": 3.060802416040675, - "grad_norm": 0.005888687912374735, - "learning_rate": 0.0001999953791628329, - "loss": 46.0, - "step": 40033 - }, - { - "epoch": 3.060878873024065, - "grad_norm": 0.0026312689296901226, - "learning_rate": 0.00019999537893192272, - "loss": 46.0, - "step": 40034 - }, - { - "epoch": 3.0609553300074546, - "grad_norm": 0.0026897434145212173, - "learning_rate": 0.00019999537870100674, - "loss": 46.0, - "step": 40035 - }, - { - "epoch": 3.0610317869908443, - "grad_norm": 0.002142061712220311, - "learning_rate": 0.000199995378470085, - "loss": 46.0, - "step": 40036 - }, - { - "epoch": 3.061108243974234, - "grad_norm": 0.00276556727476418, - "learning_rate": 0.0001999953782391575, - "loss": 46.0, - "step": 40037 - }, - { - "epoch": 3.061184700957624, - "grad_norm": 0.0049687656573951244, - "learning_rate": 0.00019999537800822424, - "loss": 46.0, - "step": 40038 - }, - { - "epoch": 3.0612611579410136, - "grad_norm": 0.0012892611557617784, - "learning_rate": 0.00019999537777728522, - "loss": 46.0, - "step": 40039 - }, - { - "epoch": 3.0613376149244034, - "grad_norm": 0.0027382851112633944, - "learning_rate": 0.0001999953775463404, - "loss": 46.0, - "step": 40040 - }, - { - "epoch": 3.0614140719077927, - "grad_norm": 0.004092883318662643, - "learning_rate": 0.0001999953773153898, - "loss": 46.0, - "step": 40041 - }, - { - "epoch": 3.0614905288911824, - "grad_norm": 0.0012523240875452757, - "learning_rate": 0.0001999953770844335, - "loss": 46.0, - "step": 40042 - }, - { - "epoch": 3.061566985874572, - "grad_norm": 0.001444007270038128, - "learning_rate": 0.00019999537685347135, - "loss": 46.0, - "step": 40043 - }, - { - "epoch": 3.061643442857962, - "grad_norm": 0.0008491792832501233, - "learning_rate": 0.00019999537662250346, - "loss": 46.0, - "step": 40044 - }, - { - "epoch": 3.0617198998413517, - "grad_norm": 0.0012101087486371398, - "learning_rate": 0.00019999537639152983, - "loss": 46.0, - "step": 40045 - }, - { - "epoch": 3.0617963568247415, - "grad_norm": 0.0025914970319718122, - "learning_rate": 0.0001999953761605504, - "loss": 46.0, - "step": 40046 - }, - { - "epoch": 3.0618728138081313, - "grad_norm": 0.003165853675454855, - "learning_rate": 0.00019999537592956519, - "loss": 46.0, - "step": 40047 - }, - { - "epoch": 3.061949270791521, - "grad_norm": 0.0025450438261032104, - "learning_rate": 0.00019999537569857423, - "loss": 46.0, - "step": 40048 - }, - { - "epoch": 3.0620257277749108, - "grad_norm": 0.002609213814139366, - "learning_rate": 0.0001999953754675775, - "loss": 46.0, - "step": 40049 - }, - { - "epoch": 3.0621021847583005, - "grad_norm": 0.0018422119319438934, - "learning_rate": 0.000199995375236575, - "loss": 46.0, - "step": 40050 - }, - { - "epoch": 3.0621786417416903, - "grad_norm": 0.0020399370696395636, - "learning_rate": 0.00019999537500556676, - "loss": 46.0, - "step": 40051 - }, - { - "epoch": 3.0622550987250796, - "grad_norm": 0.0019451486878097057, - "learning_rate": 0.0001999953747745527, - "loss": 46.0, - "step": 40052 - }, - { - "epoch": 3.0623315557084694, - "grad_norm": 0.00195057422388345, - "learning_rate": 0.0001999953745435329, - "loss": 46.0, - "step": 40053 - }, - { - "epoch": 3.062408012691859, - "grad_norm": 0.0020503527484834194, - "learning_rate": 0.00019999537431250733, - "loss": 46.0, - "step": 40054 - }, - { - "epoch": 3.062484469675249, - "grad_norm": 0.0005428163567557931, - "learning_rate": 0.00019999537408147599, - "loss": 46.0, - "step": 40055 - }, - { - "epoch": 3.0625609266586387, - "grad_norm": 0.0028380868025124073, - "learning_rate": 0.00019999537385043887, - "loss": 46.0, - "step": 40056 - }, - { - "epoch": 3.0626373836420284, - "grad_norm": 0.002967200009152293, - "learning_rate": 0.00019999537361939599, - "loss": 46.0, - "step": 40057 - }, - { - "epoch": 3.062713840625418, - "grad_norm": 0.002673485316336155, - "learning_rate": 0.00019999537338834733, - "loss": 46.0, - "step": 40058 - }, - { - "epoch": 3.062790297608808, - "grad_norm": 0.0050707170739769936, - "learning_rate": 0.00019999537315729292, - "loss": 46.0, - "step": 40059 - }, - { - "epoch": 3.0628667545921977, - "grad_norm": 0.002262258669361472, - "learning_rate": 0.0001999953729262327, - "loss": 46.0, - "step": 40060 - }, - { - "epoch": 3.0629432115755875, - "grad_norm": 0.0028284159488976, - "learning_rate": 0.00019999537269516676, - "loss": 46.0, - "step": 40061 - }, - { - "epoch": 3.063019668558977, - "grad_norm": 0.001756039448082447, - "learning_rate": 0.00019999537246409503, - "loss": 46.0, - "step": 40062 - }, - { - "epoch": 3.0630961255423665, - "grad_norm": 0.0012712596217170358, - "learning_rate": 0.00019999537223301753, - "loss": 46.0, - "step": 40063 - }, - { - "epoch": 3.0631725825257563, - "grad_norm": 0.0033195726573467255, - "learning_rate": 0.00019999537200193426, - "loss": 46.0, - "step": 40064 - }, - { - "epoch": 3.063249039509146, - "grad_norm": 0.0031835127156227827, - "learning_rate": 0.00019999537177084524, - "loss": 46.0, - "step": 40065 - }, - { - "epoch": 3.063325496492536, - "grad_norm": 0.00181575829628855, - "learning_rate": 0.00019999537153975042, - "loss": 46.0, - "step": 40066 - }, - { - "epoch": 3.0634019534759256, - "grad_norm": 0.000695188413374126, - "learning_rate": 0.00019999537130864985, - "loss": 46.0, - "step": 40067 - }, - { - "epoch": 3.0634784104593153, - "grad_norm": 0.0021538808941841125, - "learning_rate": 0.00019999537107754352, - "loss": 46.0, - "step": 40068 - }, - { - "epoch": 3.063554867442705, - "grad_norm": 0.000692805799189955, - "learning_rate": 0.0001999953708464314, - "loss": 46.0, - "step": 40069 - }, - { - "epoch": 3.063631324426095, - "grad_norm": 0.006684989668428898, - "learning_rate": 0.00019999537061531352, - "loss": 46.0, - "step": 40070 - }, - { - "epoch": 3.0637077814094846, - "grad_norm": 0.0011334270238876343, - "learning_rate": 0.00019999537038418986, - "loss": 46.0, - "step": 40071 - }, - { - "epoch": 3.0637842383928744, - "grad_norm": 0.0024604403879493475, - "learning_rate": 0.00019999537015306045, - "loss": 46.0, - "step": 40072 - }, - { - "epoch": 3.063860695376264, - "grad_norm": 0.0010915531311184168, - "learning_rate": 0.00019999536992192524, - "loss": 46.0, - "step": 40073 - }, - { - "epoch": 3.0639371523596535, - "grad_norm": 0.00166521652135998, - "learning_rate": 0.00019999536969078426, - "loss": 46.0, - "step": 40074 - }, - { - "epoch": 3.0640136093430432, - "grad_norm": 0.005697852000594139, - "learning_rate": 0.00019999536945963756, - "loss": 46.0, - "step": 40075 - }, - { - "epoch": 3.064090066326433, - "grad_norm": 0.0012899364810436964, - "learning_rate": 0.00019999536922848507, - "loss": 46.0, - "step": 40076 - }, - { - "epoch": 3.0641665233098228, - "grad_norm": 0.002538541564717889, - "learning_rate": 0.0001999953689973268, - "loss": 46.0, - "step": 40077 - }, - { - "epoch": 3.0642429802932125, - "grad_norm": 0.001561658107675612, - "learning_rate": 0.00019999536876616277, - "loss": 46.0, - "step": 40078 - }, - { - "epoch": 3.0643194372766023, - "grad_norm": 0.006923548877239227, - "learning_rate": 0.00019999536853499295, - "loss": 46.0, - "step": 40079 - }, - { - "epoch": 3.064395894259992, - "grad_norm": 0.0018838904798030853, - "learning_rate": 0.0001999953683038174, - "loss": 46.0, - "step": 40080 - }, - { - "epoch": 3.064472351243382, - "grad_norm": 0.002636120654642582, - "learning_rate": 0.00019999536807263605, - "loss": 46.0, - "step": 40081 - }, - { - "epoch": 3.0645488082267716, - "grad_norm": 0.0010320631554350257, - "learning_rate": 0.0001999953678414489, - "loss": 46.0, - "step": 40082 - }, - { - "epoch": 3.0646252652101613, - "grad_norm": 0.003608887316659093, - "learning_rate": 0.00019999536761025602, - "loss": 46.0, - "step": 40083 - }, - { - "epoch": 3.0647017221935506, - "grad_norm": 0.00491699343547225, - "learning_rate": 0.00019999536737905736, - "loss": 46.0, - "step": 40084 - }, - { - "epoch": 3.0647781791769404, - "grad_norm": 0.0027365938294678926, - "learning_rate": 0.00019999536714785296, - "loss": 46.0, - "step": 40085 - }, - { - "epoch": 3.06485463616033, - "grad_norm": 0.0014830800937488675, - "learning_rate": 0.00019999536691664278, - "loss": 46.0, - "step": 40086 - }, - { - "epoch": 3.06493109314372, - "grad_norm": 0.0034375060349702835, - "learning_rate": 0.00019999536668542683, - "loss": 46.0, - "step": 40087 - }, - { - "epoch": 3.0650075501271097, - "grad_norm": 0.0035186083987355232, - "learning_rate": 0.0001999953664542051, - "loss": 46.0, - "step": 40088 - }, - { - "epoch": 3.0650840071104994, - "grad_norm": 0.0017330920090898871, - "learning_rate": 0.00019999536622297757, - "loss": 46.0, - "step": 40089 - }, - { - "epoch": 3.065160464093889, - "grad_norm": 0.0023464756086468697, - "learning_rate": 0.0001999953659917443, - "loss": 46.0, - "step": 40090 - }, - { - "epoch": 3.065236921077279, - "grad_norm": 0.002111198613420129, - "learning_rate": 0.00019999536576050528, - "loss": 46.0, - "step": 40091 - }, - { - "epoch": 3.0653133780606687, - "grad_norm": 0.002457548165693879, - "learning_rate": 0.0001999953655292605, - "loss": 46.0, - "step": 40092 - }, - { - "epoch": 3.0653898350440585, - "grad_norm": 0.002165521727874875, - "learning_rate": 0.0001999953652980099, - "loss": 46.0, - "step": 40093 - }, - { - "epoch": 3.0654662920274482, - "grad_norm": 0.0007987722638063133, - "learning_rate": 0.00019999536506675356, - "loss": 46.0, - "step": 40094 - }, - { - "epoch": 3.0655427490108376, - "grad_norm": 0.0018417808460071683, - "learning_rate": 0.00019999536483549147, - "loss": 46.0, - "step": 40095 - }, - { - "epoch": 3.0656192059942273, - "grad_norm": 0.0023169689811766148, - "learning_rate": 0.0001999953646042236, - "loss": 46.0, - "step": 40096 - }, - { - "epoch": 3.065695662977617, - "grad_norm": 0.0015528496587648988, - "learning_rate": 0.00019999536437294993, - "loss": 46.0, - "step": 40097 - }, - { - "epoch": 3.065772119961007, - "grad_norm": 0.0013787455391138792, - "learning_rate": 0.0001999953641416705, - "loss": 46.0, - "step": 40098 - }, - { - "epoch": 3.0658485769443966, - "grad_norm": 0.002037461381405592, - "learning_rate": 0.00019999536391038532, - "loss": 46.0, - "step": 40099 - }, - { - "epoch": 3.0659250339277864, - "grad_norm": 0.002598753198981285, - "learning_rate": 0.00019999536367909437, - "loss": 46.0, - "step": 40100 - }, - { - "epoch": 3.066001490911176, - "grad_norm": 0.001525825122371316, - "learning_rate": 0.00019999536344779764, - "loss": 46.0, - "step": 40101 - }, - { - "epoch": 3.066077947894566, - "grad_norm": 0.0033435372170060873, - "learning_rate": 0.00019999536321649511, - "loss": 46.0, - "step": 40102 - }, - { - "epoch": 3.0661544048779557, - "grad_norm": 0.002753153210505843, - "learning_rate": 0.00019999536298518687, - "loss": 46.0, - "step": 40103 - }, - { - "epoch": 3.0662308618613454, - "grad_norm": 0.0018918882124125957, - "learning_rate": 0.00019999536275387282, - "loss": 46.0, - "step": 40104 - }, - { - "epoch": 3.066307318844735, - "grad_norm": 0.0009196771425195038, - "learning_rate": 0.00019999536252255303, - "loss": 46.0, - "step": 40105 - }, - { - "epoch": 3.0663837758281245, - "grad_norm": 0.006751567590981722, - "learning_rate": 0.00019999536229122744, - "loss": 46.0, - "step": 40106 - }, - { - "epoch": 3.0664602328115143, - "grad_norm": 0.0015411663334816694, - "learning_rate": 0.0001999953620598961, - "loss": 46.0, - "step": 40107 - }, - { - "epoch": 3.066536689794904, - "grad_norm": 0.004163368605077267, - "learning_rate": 0.00019999536182855902, - "loss": 46.0, - "step": 40108 - }, - { - "epoch": 3.0666131467782938, - "grad_norm": 0.0010396692669019103, - "learning_rate": 0.00019999536159721613, - "loss": 46.0, - "step": 40109 - }, - { - "epoch": 3.0666896037616835, - "grad_norm": 0.0031032890547066927, - "learning_rate": 0.00019999536136586747, - "loss": 46.0, - "step": 40110 - }, - { - "epoch": 3.0667660607450733, - "grad_norm": 0.0028323382139205933, - "learning_rate": 0.00019999536113451304, - "loss": 46.0, - "step": 40111 - }, - { - "epoch": 3.066842517728463, - "grad_norm": 0.0006947419024072587, - "learning_rate": 0.00019999536090315286, - "loss": 46.0, - "step": 40112 - }, - { - "epoch": 3.066918974711853, - "grad_norm": 0.003625748213380575, - "learning_rate": 0.0001999953606717869, - "loss": 46.0, - "step": 40113 - }, - { - "epoch": 3.0669954316952426, - "grad_norm": 0.0006266285199671984, - "learning_rate": 0.00019999536044041518, - "loss": 46.0, - "step": 40114 - }, - { - "epoch": 3.0670718886786323, - "grad_norm": 0.0009936565766111016, - "learning_rate": 0.00019999536020903769, - "loss": 46.0, - "step": 40115 - }, - { - "epoch": 3.067148345662022, - "grad_norm": 0.00374209089204669, - "learning_rate": 0.00019999535997765441, - "loss": 46.0, - "step": 40116 - }, - { - "epoch": 3.0672248026454114, - "grad_norm": 0.005744465626776218, - "learning_rate": 0.00019999535974626537, - "loss": 46.0, - "step": 40117 - }, - { - "epoch": 3.067301259628801, - "grad_norm": 0.00117302720900625, - "learning_rate": 0.00019999535951487055, - "loss": 46.0, - "step": 40118 - }, - { - "epoch": 3.067377716612191, - "grad_norm": 0.0033250923734158278, - "learning_rate": 0.00019999535928347, - "loss": 46.0, - "step": 40119 - }, - { - "epoch": 3.0674541735955807, - "grad_norm": 0.004042233340442181, - "learning_rate": 0.00019999535905206365, - "loss": 46.0, - "step": 40120 - }, - { - "epoch": 3.0675306305789705, - "grad_norm": 0.001535071642138064, - "learning_rate": 0.00019999535882065156, - "loss": 46.0, - "step": 40121 - }, - { - "epoch": 3.0676070875623602, - "grad_norm": 0.0009647165425121784, - "learning_rate": 0.00019999535858923365, - "loss": 46.0, - "step": 40122 - }, - { - "epoch": 3.06768354454575, - "grad_norm": 0.0016198258381336927, - "learning_rate": 0.00019999535835781002, - "loss": 46.0, - "step": 40123 - }, - { - "epoch": 3.0677600015291397, - "grad_norm": 0.002851195400580764, - "learning_rate": 0.00019999535812638061, - "loss": 46.0, - "step": 40124 - }, - { - "epoch": 3.0678364585125295, - "grad_norm": 0.0016150474548339844, - "learning_rate": 0.0001999953578949454, - "loss": 46.0, - "step": 40125 - }, - { - "epoch": 3.0679129154959193, - "grad_norm": 0.002659546909853816, - "learning_rate": 0.00019999535766350446, - "loss": 46.0, - "step": 40126 - }, - { - "epoch": 3.067989372479309, - "grad_norm": 0.0016413042321801186, - "learning_rate": 0.00019999535743205773, - "loss": 46.0, - "step": 40127 - }, - { - "epoch": 3.0680658294626983, - "grad_norm": 0.0067235915921628475, - "learning_rate": 0.00019999535720060526, - "loss": 46.0, - "step": 40128 - }, - { - "epoch": 3.068142286446088, - "grad_norm": 0.0025130000431090593, - "learning_rate": 0.00019999535696914696, - "loss": 46.0, - "step": 40129 - }, - { - "epoch": 3.068218743429478, - "grad_norm": 0.000890807481482625, - "learning_rate": 0.00019999535673768292, - "loss": 46.0, - "step": 40130 - }, - { - "epoch": 3.0682952004128676, - "grad_norm": 0.002069278387352824, - "learning_rate": 0.00019999535650621315, - "loss": 46.0, - "step": 40131 - }, - { - "epoch": 3.0683716573962574, - "grad_norm": 0.0019009563839063048, - "learning_rate": 0.00019999535627473756, - "loss": 46.0, - "step": 40132 - }, - { - "epoch": 3.068448114379647, - "grad_norm": 0.00897801760584116, - "learning_rate": 0.00019999535604325622, - "loss": 46.0, - "step": 40133 - }, - { - "epoch": 3.068524571363037, - "grad_norm": 0.006295171566307545, - "learning_rate": 0.0001999953558117691, - "loss": 46.0, - "step": 40134 - }, - { - "epoch": 3.0686010283464267, - "grad_norm": 0.0012238348135724664, - "learning_rate": 0.00019999535558027623, - "loss": 46.0, - "step": 40135 - }, - { - "epoch": 3.0686774853298164, - "grad_norm": 0.0023444064427167177, - "learning_rate": 0.00019999535534877757, - "loss": 46.0, - "step": 40136 - }, - { - "epoch": 3.068753942313206, - "grad_norm": 0.0026438580825924873, - "learning_rate": 0.00019999535511727316, - "loss": 46.0, - "step": 40137 - }, - { - "epoch": 3.068830399296596, - "grad_norm": 0.0035757767036557198, - "learning_rate": 0.000199995354885763, - "loss": 46.0, - "step": 40138 - }, - { - "epoch": 3.0689068562799853, - "grad_norm": 0.002656287048012018, - "learning_rate": 0.000199995354654247, - "loss": 46.0, - "step": 40139 - }, - { - "epoch": 3.068983313263375, - "grad_norm": 0.009296913631260395, - "learning_rate": 0.00019999535442272528, - "loss": 46.0, - "step": 40140 - }, - { - "epoch": 3.069059770246765, - "grad_norm": 0.004610334988683462, - "learning_rate": 0.0001999953541911978, - "loss": 46.0, - "step": 40141 - }, - { - "epoch": 3.0691362272301546, - "grad_norm": 0.004539922345429659, - "learning_rate": 0.00019999535395966454, - "loss": 46.0, - "step": 40142 - }, - { - "epoch": 3.0692126842135443, - "grad_norm": 0.001351111801341176, - "learning_rate": 0.0001999953537281255, - "loss": 46.0, - "step": 40143 - }, - { - "epoch": 3.069289141196934, - "grad_norm": 0.001297913957387209, - "learning_rate": 0.0001999953534965807, - "loss": 46.0, - "step": 40144 - }, - { - "epoch": 3.069365598180324, - "grad_norm": 0.006673015654087067, - "learning_rate": 0.00019999535326503014, - "loss": 46.0, - "step": 40145 - }, - { - "epoch": 3.0694420551637136, - "grad_norm": 0.0012794877402484417, - "learning_rate": 0.00019999535303347378, - "loss": 46.0, - "step": 40146 - }, - { - "epoch": 3.0695185121471034, - "grad_norm": 0.0027004843577742577, - "learning_rate": 0.00019999535280191167, - "loss": 46.0, - "step": 40147 - }, - { - "epoch": 3.069594969130493, - "grad_norm": 0.0023454611655324697, - "learning_rate": 0.00019999535257034378, - "loss": 46.0, - "step": 40148 - }, - { - "epoch": 3.069671426113883, - "grad_norm": 0.0034696401562541723, - "learning_rate": 0.00019999535233877012, - "loss": 46.0, - "step": 40149 - }, - { - "epoch": 3.069747883097272, - "grad_norm": 0.0046706423163414, - "learning_rate": 0.00019999535210719075, - "loss": 46.0, - "step": 40150 - }, - { - "epoch": 3.069824340080662, - "grad_norm": 0.002217352157458663, - "learning_rate": 0.00019999535187560554, - "loss": 46.0, - "step": 40151 - }, - { - "epoch": 3.0699007970640517, - "grad_norm": 0.0018033245578408241, - "learning_rate": 0.0001999953516440146, - "loss": 46.0, - "step": 40152 - }, - { - "epoch": 3.0699772540474415, - "grad_norm": 0.0009308006265200675, - "learning_rate": 0.00019999535141241784, - "loss": 46.0, - "step": 40153 - }, - { - "epoch": 3.0700537110308312, - "grad_norm": 0.005178571213036776, - "learning_rate": 0.00019999535118081537, - "loss": 46.0, - "step": 40154 - }, - { - "epoch": 3.070130168014221, - "grad_norm": 0.003584745107218623, - "learning_rate": 0.0001999953509492071, - "loss": 46.0, - "step": 40155 - }, - { - "epoch": 3.0702066249976108, - "grad_norm": 0.0010845190845429897, - "learning_rate": 0.00019999535071759308, - "loss": 46.0, - "step": 40156 - }, - { - "epoch": 3.0702830819810005, - "grad_norm": 0.002099919132888317, - "learning_rate": 0.0001999953504859733, - "loss": 46.0, - "step": 40157 - }, - { - "epoch": 3.0703595389643903, - "grad_norm": 0.004751711152493954, - "learning_rate": 0.0001999953502543477, - "loss": 46.0, - "step": 40158 - }, - { - "epoch": 3.07043599594778, - "grad_norm": 0.003966817632317543, - "learning_rate": 0.00019999535002271636, - "loss": 46.0, - "step": 40159 - }, - { - "epoch": 3.07051245293117, - "grad_norm": 0.002709440654143691, - "learning_rate": 0.00019999534979107925, - "loss": 46.0, - "step": 40160 - }, - { - "epoch": 3.070588909914559, - "grad_norm": 0.004585724789649248, - "learning_rate": 0.00019999534955943637, - "loss": 46.0, - "step": 40161 - }, - { - "epoch": 3.070665366897949, - "grad_norm": 0.0016803338658064604, - "learning_rate": 0.00019999534932778774, - "loss": 46.0, - "step": 40162 - }, - { - "epoch": 3.0707418238813387, - "grad_norm": 0.001813422655686736, - "learning_rate": 0.0001999953490961333, - "loss": 46.0, - "step": 40163 - }, - { - "epoch": 3.0708182808647284, - "grad_norm": 0.0023779154289513826, - "learning_rate": 0.0001999953488644731, - "loss": 46.0, - "step": 40164 - }, - { - "epoch": 3.070894737848118, - "grad_norm": 0.0024741145316511393, - "learning_rate": 0.00019999534863280718, - "loss": 46.0, - "step": 40165 - }, - { - "epoch": 3.070971194831508, - "grad_norm": 0.003069663653150201, - "learning_rate": 0.00019999534840113543, - "loss": 46.0, - "step": 40166 - }, - { - "epoch": 3.0710476518148977, - "grad_norm": 0.0035713817924261093, - "learning_rate": 0.00019999534816945796, - "loss": 46.0, - "step": 40167 - }, - { - "epoch": 3.0711241087982875, - "grad_norm": 0.001247467938810587, - "learning_rate": 0.00019999534793777471, - "loss": 46.0, - "step": 40168 - }, - { - "epoch": 3.071200565781677, - "grad_norm": 0.0021556986030191183, - "learning_rate": 0.00019999534770608564, - "loss": 46.0, - "step": 40169 - }, - { - "epoch": 3.071277022765067, - "grad_norm": 0.0012641482753679156, - "learning_rate": 0.00019999534747439083, - "loss": 46.0, - "step": 40170 - }, - { - "epoch": 3.0713534797484567, - "grad_norm": 0.0012209730921313167, - "learning_rate": 0.00019999534724269026, - "loss": 46.0, - "step": 40171 - }, - { - "epoch": 3.071429936731846, - "grad_norm": 0.008485635742545128, - "learning_rate": 0.00019999534701098395, - "loss": 46.0, - "step": 40172 - }, - { - "epoch": 3.071506393715236, - "grad_norm": 0.007556588854640722, - "learning_rate": 0.00019999534677927184, - "loss": 46.0, - "step": 40173 - }, - { - "epoch": 3.0715828506986256, - "grad_norm": 0.0024074390530586243, - "learning_rate": 0.00019999534654755396, - "loss": 46.0, - "step": 40174 - }, - { - "epoch": 3.0716593076820153, - "grad_norm": 0.0018514370312914252, - "learning_rate": 0.0001999953463158303, - "loss": 46.0, - "step": 40175 - }, - { - "epoch": 3.071735764665405, - "grad_norm": 0.003574805799871683, - "learning_rate": 0.0001999953460841009, - "loss": 46.0, - "step": 40176 - }, - { - "epoch": 3.071812221648795, - "grad_norm": 0.0019145908299833536, - "learning_rate": 0.0001999953458523657, - "loss": 46.0, - "step": 40177 - }, - { - "epoch": 3.0718886786321846, - "grad_norm": 0.0030677402392029762, - "learning_rate": 0.00019999534562062477, - "loss": 46.0, - "step": 40178 - }, - { - "epoch": 3.0719651356155744, - "grad_norm": 0.002042026026174426, - "learning_rate": 0.00019999534538887802, - "loss": 46.0, - "step": 40179 - }, - { - "epoch": 3.072041592598964, - "grad_norm": 0.0029883794486522675, - "learning_rate": 0.00019999534515712552, - "loss": 46.0, - "step": 40180 - }, - { - "epoch": 3.072118049582354, - "grad_norm": 0.003546859370544553, - "learning_rate": 0.00019999534492536728, - "loss": 46.0, - "step": 40181 - }, - { - "epoch": 3.0721945065657437, - "grad_norm": 0.002618842525407672, - "learning_rate": 0.00019999534469360324, - "loss": 46.0, - "step": 40182 - }, - { - "epoch": 3.072270963549133, - "grad_norm": 0.0011484178248792887, - "learning_rate": 0.00019999534446183345, - "loss": 46.0, - "step": 40183 - }, - { - "epoch": 3.0723474205325227, - "grad_norm": 0.0018216263270005584, - "learning_rate": 0.00019999534423005785, - "loss": 46.0, - "step": 40184 - }, - { - "epoch": 3.0724238775159125, - "grad_norm": 0.0013706331374123693, - "learning_rate": 0.00019999534399827652, - "loss": 46.0, - "step": 40185 - }, - { - "epoch": 3.0725003344993023, - "grad_norm": 0.0013992574531584978, - "learning_rate": 0.00019999534376648943, - "loss": 46.0, - "step": 40186 - }, - { - "epoch": 3.072576791482692, - "grad_norm": 0.002979496493935585, - "learning_rate": 0.00019999534353469652, - "loss": 46.0, - "step": 40187 - }, - { - "epoch": 3.072653248466082, - "grad_norm": 0.0015877066180109978, - "learning_rate": 0.00019999534330289792, - "loss": 46.0, - "step": 40188 - }, - { - "epoch": 3.0727297054494715, - "grad_norm": 0.0013644949067384005, - "learning_rate": 0.0001999953430710935, - "loss": 46.0, - "step": 40189 - }, - { - "epoch": 3.0728061624328613, - "grad_norm": 0.0019357744604349136, - "learning_rate": 0.0001999953428392833, - "loss": 46.0, - "step": 40190 - }, - { - "epoch": 3.072882619416251, - "grad_norm": 0.0014490032335743308, - "learning_rate": 0.00019999534260746734, - "loss": 46.0, - "step": 40191 - }, - { - "epoch": 3.072959076399641, - "grad_norm": 0.00280370214022696, - "learning_rate": 0.00019999534237564564, - "loss": 46.0, - "step": 40192 - }, - { - "epoch": 3.07303553338303, - "grad_norm": 0.0015836763195693493, - "learning_rate": 0.00019999534214381815, - "loss": 46.0, - "step": 40193 - }, - { - "epoch": 3.07311199036642, - "grad_norm": 0.0015210001729428768, - "learning_rate": 0.00019999534191198488, - "loss": 46.0, - "step": 40194 - }, - { - "epoch": 3.0731884473498097, - "grad_norm": 0.002355832140892744, - "learning_rate": 0.00019999534168014583, - "loss": 46.0, - "step": 40195 - }, - { - "epoch": 3.0732649043331994, - "grad_norm": 0.0025815358385443687, - "learning_rate": 0.00019999534144830104, - "loss": 46.0, - "step": 40196 - }, - { - "epoch": 3.073341361316589, - "grad_norm": 0.0015076538547873497, - "learning_rate": 0.00019999534121645045, - "loss": 46.0, - "step": 40197 - }, - { - "epoch": 3.073417818299979, - "grad_norm": 0.0012456843396648765, - "learning_rate": 0.00019999534098459414, - "loss": 46.0, - "step": 40198 - }, - { - "epoch": 3.0734942752833687, - "grad_norm": 0.0026640426367521286, - "learning_rate": 0.000199995340752732, - "loss": 46.0, - "step": 40199 - }, - { - "epoch": 3.0735707322667585, - "grad_norm": 0.0021019652485847473, - "learning_rate": 0.00019999534052086415, - "loss": 46.0, - "step": 40200 - }, - { - "epoch": 3.0736471892501482, - "grad_norm": 0.0023133703507483006, - "learning_rate": 0.0001999953402889905, - "loss": 46.0, - "step": 40201 - }, - { - "epoch": 3.073723646233538, - "grad_norm": 0.002192039042711258, - "learning_rate": 0.00019999534005711106, - "loss": 46.0, - "step": 40202 - }, - { - "epoch": 3.0738001032169278, - "grad_norm": 0.0012913175160065293, - "learning_rate": 0.0001999953398252259, - "loss": 46.0, - "step": 40203 - }, - { - "epoch": 3.0738765602003175, - "grad_norm": 0.0011938527459278703, - "learning_rate": 0.00019999533959333494, - "loss": 46.0, - "step": 40204 - }, - { - "epoch": 3.073953017183707, - "grad_norm": 0.004477009642869234, - "learning_rate": 0.00019999533936143821, - "loss": 46.0, - "step": 40205 - }, - { - "epoch": 3.0740294741670966, - "grad_norm": 0.0006661259685643017, - "learning_rate": 0.00019999533912953572, - "loss": 46.0, - "step": 40206 - }, - { - "epoch": 3.0741059311504864, - "grad_norm": 0.0007705508614890277, - "learning_rate": 0.00019999533889762748, - "loss": 46.0, - "step": 40207 - }, - { - "epoch": 3.074182388133876, - "grad_norm": 0.0027248139958828688, - "learning_rate": 0.00019999533866571346, - "loss": 46.0, - "step": 40208 - }, - { - "epoch": 3.074258845117266, - "grad_norm": 0.002002173801884055, - "learning_rate": 0.00019999533843379364, - "loss": 46.0, - "step": 40209 - }, - { - "epoch": 3.0743353021006556, - "grad_norm": 0.001847300329245627, - "learning_rate": 0.00019999533820186805, - "loss": 46.0, - "step": 40210 - }, - { - "epoch": 3.0744117590840454, - "grad_norm": 0.0011133237276226282, - "learning_rate": 0.00019999533796993672, - "loss": 46.0, - "step": 40211 - }, - { - "epoch": 3.074488216067435, - "grad_norm": 0.008085299283266068, - "learning_rate": 0.00019999533773799963, - "loss": 46.0, - "step": 40212 - }, - { - "epoch": 3.074564673050825, - "grad_norm": 0.0023897693026810884, - "learning_rate": 0.00019999533750605675, - "loss": 46.0, - "step": 40213 - }, - { - "epoch": 3.0746411300342147, - "grad_norm": 0.0013743591262027621, - "learning_rate": 0.0001999953372741081, - "loss": 46.0, - "step": 40214 - }, - { - "epoch": 3.074717587017604, - "grad_norm": 0.0028470742981880903, - "learning_rate": 0.00019999533704215367, - "loss": 46.0, - "step": 40215 - }, - { - "epoch": 3.0747940440009938, - "grad_norm": 0.005823993589729071, - "learning_rate": 0.0001999953368101935, - "loss": 46.0, - "step": 40216 - }, - { - "epoch": 3.0748705009843835, - "grad_norm": 0.002277072286233306, - "learning_rate": 0.00019999533657822757, - "loss": 46.0, - "step": 40217 - }, - { - "epoch": 3.0749469579677733, - "grad_norm": 0.0028651715256273746, - "learning_rate": 0.00019999533634625582, - "loss": 46.0, - "step": 40218 - }, - { - "epoch": 3.075023414951163, - "grad_norm": 0.0027364131528884172, - "learning_rate": 0.00019999533611427835, - "loss": 46.0, - "step": 40219 - }, - { - "epoch": 3.075099871934553, - "grad_norm": 0.0020083829294890165, - "learning_rate": 0.00019999533588229505, - "loss": 46.0, - "step": 40220 - }, - { - "epoch": 3.0751763289179426, - "grad_norm": 0.0016049712430685759, - "learning_rate": 0.00019999533565030606, - "loss": 46.0, - "step": 40221 - }, - { - "epoch": 3.0752527859013323, - "grad_norm": 0.0029372749850153923, - "learning_rate": 0.00019999533541831125, - "loss": 46.0, - "step": 40222 - }, - { - "epoch": 3.075329242884722, - "grad_norm": 0.0033449844922870398, - "learning_rate": 0.00019999533518631069, - "loss": 46.0, - "step": 40223 - }, - { - "epoch": 3.075405699868112, - "grad_norm": 0.00678646843880415, - "learning_rate": 0.00019999533495430435, - "loss": 46.0, - "step": 40224 - }, - { - "epoch": 3.0754821568515016, - "grad_norm": 0.0009200061322189867, - "learning_rate": 0.00019999533472229224, - "loss": 46.0, - "step": 40225 - }, - { - "epoch": 3.075558613834891, - "grad_norm": 0.0027682597283273935, - "learning_rate": 0.00019999533449027436, - "loss": 46.0, - "step": 40226 - }, - { - "epoch": 3.0756350708182807, - "grad_norm": 0.0021547572687268257, - "learning_rate": 0.00019999533425825073, - "loss": 46.0, - "step": 40227 - }, - { - "epoch": 3.0757115278016705, - "grad_norm": 0.0034392361994832754, - "learning_rate": 0.0001999953340262213, - "loss": 46.0, - "step": 40228 - }, - { - "epoch": 3.07578798478506, - "grad_norm": 0.0007730411016382277, - "learning_rate": 0.00019999533379418612, - "loss": 46.0, - "step": 40229 - }, - { - "epoch": 3.07586444176845, - "grad_norm": 0.0023892747703939676, - "learning_rate": 0.00019999533356214518, - "loss": 46.0, - "step": 40230 - }, - { - "epoch": 3.0759408987518397, - "grad_norm": 0.0017106481827795506, - "learning_rate": 0.00019999533333009843, - "loss": 46.0, - "step": 40231 - }, - { - "epoch": 3.0760173557352295, - "grad_norm": 0.0008947617025114596, - "learning_rate": 0.00019999533309804596, - "loss": 46.0, - "step": 40232 - }, - { - "epoch": 3.0760938127186193, - "grad_norm": 0.001231977017596364, - "learning_rate": 0.0001999953328659877, - "loss": 46.0, - "step": 40233 - }, - { - "epoch": 3.076170269702009, - "grad_norm": 0.008011367172002792, - "learning_rate": 0.00019999533263392367, - "loss": 46.0, - "step": 40234 - }, - { - "epoch": 3.076246726685399, - "grad_norm": 0.001852184534072876, - "learning_rate": 0.00019999533240185386, - "loss": 46.0, - "step": 40235 - }, - { - "epoch": 3.0763231836687885, - "grad_norm": 0.00785853248089552, - "learning_rate": 0.0001999953321697783, - "loss": 46.0, - "step": 40236 - }, - { - "epoch": 3.076399640652178, - "grad_norm": 0.0012617373140528798, - "learning_rate": 0.00019999533193769696, - "loss": 46.0, - "step": 40237 - }, - { - "epoch": 3.0764760976355676, - "grad_norm": 0.004136977251619101, - "learning_rate": 0.00019999533170560985, - "loss": 46.0, - "step": 40238 - }, - { - "epoch": 3.0765525546189574, - "grad_norm": 0.0024272422306239605, - "learning_rate": 0.00019999533147351697, - "loss": 46.0, - "step": 40239 - }, - { - "epoch": 3.076629011602347, - "grad_norm": 0.003413500962778926, - "learning_rate": 0.00019999533124141834, - "loss": 46.0, - "step": 40240 - }, - { - "epoch": 3.076705468585737, - "grad_norm": 0.002280991291627288, - "learning_rate": 0.00019999533100931394, - "loss": 46.0, - "step": 40241 - }, - { - "epoch": 3.0767819255691267, - "grad_norm": 0.005484189372509718, - "learning_rate": 0.00019999533077720374, - "loss": 46.0, - "step": 40242 - }, - { - "epoch": 3.0768583825525164, - "grad_norm": 0.004708023741841316, - "learning_rate": 0.0001999953305450878, - "loss": 46.0, - "step": 40243 - }, - { - "epoch": 3.076934839535906, - "grad_norm": 0.0008094694931060076, - "learning_rate": 0.00019999533031296606, - "loss": 46.0, - "step": 40244 - }, - { - "epoch": 3.077011296519296, - "grad_norm": 0.0012199186021462083, - "learning_rate": 0.0001999953300808386, - "loss": 46.0, - "step": 40245 - }, - { - "epoch": 3.0770877535026857, - "grad_norm": 0.002402589190751314, - "learning_rate": 0.0001999953298487053, - "loss": 46.0, - "step": 40246 - }, - { - "epoch": 3.0771642104860755, - "grad_norm": 0.002540545305237174, - "learning_rate": 0.00019999532961656629, - "loss": 46.0, - "step": 40247 - }, - { - "epoch": 3.077240667469465, - "grad_norm": 0.003308674320578575, - "learning_rate": 0.0001999953293844215, - "loss": 46.0, - "step": 40248 - }, - { - "epoch": 3.0773171244528545, - "grad_norm": 0.006334108766168356, - "learning_rate": 0.00019999532915227094, - "loss": 46.0, - "step": 40249 - }, - { - "epoch": 3.0773935814362443, - "grad_norm": 0.001276075141504407, - "learning_rate": 0.0001999953289201146, - "loss": 46.0, - "step": 40250 - }, - { - "epoch": 3.077470038419634, - "grad_norm": 0.0024477725382894278, - "learning_rate": 0.00019999532868795246, - "loss": 46.0, - "step": 40251 - }, - { - "epoch": 3.077546495403024, - "grad_norm": 0.0038398776669055223, - "learning_rate": 0.0001999953284557846, - "loss": 46.0, - "step": 40252 - }, - { - "epoch": 3.0776229523864136, - "grad_norm": 0.0033194429706782103, - "learning_rate": 0.00019999532822361098, - "loss": 46.0, - "step": 40253 - }, - { - "epoch": 3.0776994093698034, - "grad_norm": 0.0021062400192022324, - "learning_rate": 0.00019999532799143155, - "loss": 46.0, - "step": 40254 - }, - { - "epoch": 3.077775866353193, - "grad_norm": 0.0019021752523258328, - "learning_rate": 0.00019999532775924638, - "loss": 46.0, - "step": 40255 - }, - { - "epoch": 3.077852323336583, - "grad_norm": 0.0021422221325337887, - "learning_rate": 0.0001999953275270554, - "loss": 46.0, - "step": 40256 - }, - { - "epoch": 3.0779287803199726, - "grad_norm": 0.007943300530314445, - "learning_rate": 0.0001999953272948587, - "loss": 46.0, - "step": 40257 - }, - { - "epoch": 3.0780052373033624, - "grad_norm": 0.002355459611862898, - "learning_rate": 0.0001999953270626562, - "loss": 46.0, - "step": 40258 - }, - { - "epoch": 3.0780816942867517, - "grad_norm": 0.0021378209348767996, - "learning_rate": 0.00019999532683044794, - "loss": 46.0, - "step": 40259 - }, - { - "epoch": 3.0781581512701415, - "grad_norm": 0.003569244872778654, - "learning_rate": 0.00019999532659823393, - "loss": 46.0, - "step": 40260 - }, - { - "epoch": 3.0782346082535312, - "grad_norm": 0.004313362762331963, - "learning_rate": 0.00019999532636601411, - "loss": 46.0, - "step": 40261 - }, - { - "epoch": 3.078311065236921, - "grad_norm": 0.0017759567126631737, - "learning_rate": 0.00019999532613378858, - "loss": 46.0, - "step": 40262 - }, - { - "epoch": 3.0783875222203108, - "grad_norm": 0.002193351509049535, - "learning_rate": 0.00019999532590155722, - "loss": 46.0, - "step": 40263 - }, - { - "epoch": 3.0784639792037005, - "grad_norm": 0.0042168269865214825, - "learning_rate": 0.0001999953256693201, - "loss": 46.0, - "step": 40264 - }, - { - "epoch": 3.0785404361870903, - "grad_norm": 0.0035198985133320093, - "learning_rate": 0.00019999532543707723, - "loss": 46.0, - "step": 40265 - }, - { - "epoch": 3.07861689317048, - "grad_norm": 0.0043728286400437355, - "learning_rate": 0.0001999953252048286, - "loss": 46.0, - "step": 40266 - }, - { - "epoch": 3.07869335015387, - "grad_norm": 0.006337030790746212, - "learning_rate": 0.0001999953249725742, - "loss": 46.0, - "step": 40267 - }, - { - "epoch": 3.0787698071372596, - "grad_norm": 0.006617938168346882, - "learning_rate": 0.000199995324740314, - "loss": 46.0, - "step": 40268 - }, - { - "epoch": 3.0788462641206493, - "grad_norm": 0.0021102922037243843, - "learning_rate": 0.00019999532450804805, - "loss": 46.0, - "step": 40269 - }, - { - "epoch": 3.0789227211040386, - "grad_norm": 0.003457592334598303, - "learning_rate": 0.00019999532427577633, - "loss": 46.0, - "step": 40270 - }, - { - "epoch": 3.0789991780874284, - "grad_norm": 0.004359242040663958, - "learning_rate": 0.00019999532404349886, - "loss": 46.0, - "step": 40271 - }, - { - "epoch": 3.079075635070818, - "grad_norm": 0.001231861999258399, - "learning_rate": 0.0001999953238112156, - "loss": 46.0, - "step": 40272 - }, - { - "epoch": 3.079152092054208, - "grad_norm": 0.002020505489781499, - "learning_rate": 0.00019999532357892655, - "loss": 46.0, - "step": 40273 - }, - { - "epoch": 3.0792285490375977, - "grad_norm": 0.0017457961803302169, - "learning_rate": 0.00019999532334663176, - "loss": 46.0, - "step": 40274 - }, - { - "epoch": 3.0793050060209874, - "grad_norm": 0.0009132236009463668, - "learning_rate": 0.0001999953231143312, - "loss": 46.0, - "step": 40275 - }, - { - "epoch": 3.079381463004377, - "grad_norm": 0.003237878903746605, - "learning_rate": 0.00019999532288202487, - "loss": 46.0, - "step": 40276 - }, - { - "epoch": 3.079457919987767, - "grad_norm": 0.0009749418823048472, - "learning_rate": 0.00019999532264971276, - "loss": 46.0, - "step": 40277 - }, - { - "epoch": 3.0795343769711567, - "grad_norm": 0.0009238472557626665, - "learning_rate": 0.0001999953224173949, - "loss": 46.0, - "step": 40278 - }, - { - "epoch": 3.0796108339545465, - "grad_norm": 0.0025606260169297457, - "learning_rate": 0.00019999532218507125, - "loss": 46.0, - "step": 40279 - }, - { - "epoch": 3.0796872909379363, - "grad_norm": 0.00390832731500268, - "learning_rate": 0.00019999532195274182, - "loss": 46.0, - "step": 40280 - }, - { - "epoch": 3.0797637479213256, - "grad_norm": 0.0019516630563884974, - "learning_rate": 0.00019999532172040665, - "loss": 46.0, - "step": 40281 - }, - { - "epoch": 3.0798402049047153, - "grad_norm": 0.003210377646610141, - "learning_rate": 0.0001999953214880657, - "loss": 46.0, - "step": 40282 - }, - { - "epoch": 3.079916661888105, - "grad_norm": 0.002234569750726223, - "learning_rate": 0.00019999532125571898, - "loss": 46.0, - "step": 40283 - }, - { - "epoch": 3.079993118871495, - "grad_norm": 0.0014541083946824074, - "learning_rate": 0.00019999532102336648, - "loss": 46.0, - "step": 40284 - }, - { - "epoch": 3.0800695758548846, - "grad_norm": 0.0013030937407165766, - "learning_rate": 0.00019999532079100824, - "loss": 46.0, - "step": 40285 - }, - { - "epoch": 3.0801460328382744, - "grad_norm": 0.0012608846882358193, - "learning_rate": 0.00019999532055864423, - "loss": 46.0, - "step": 40286 - }, - { - "epoch": 3.080222489821664, - "grad_norm": 0.0012387470342218876, - "learning_rate": 0.00019999532032627442, - "loss": 46.0, - "step": 40287 - }, - { - "epoch": 3.080298946805054, - "grad_norm": 0.0013427947415038943, - "learning_rate": 0.00019999532009389885, - "loss": 46.0, - "step": 40288 - }, - { - "epoch": 3.0803754037884437, - "grad_norm": 0.0024567232467234135, - "learning_rate": 0.00019999531986151752, - "loss": 46.0, - "step": 40289 - }, - { - "epoch": 3.0804518607718334, - "grad_norm": 0.0012409926857799292, - "learning_rate": 0.00019999531962913044, - "loss": 46.0, - "step": 40290 - }, - { - "epoch": 3.080528317755223, - "grad_norm": 0.0030446844175457954, - "learning_rate": 0.00019999531939673756, - "loss": 46.0, - "step": 40291 - }, - { - "epoch": 3.0806047747386125, - "grad_norm": 0.002904205583035946, - "learning_rate": 0.00019999531916433888, - "loss": 46.0, - "step": 40292 - }, - { - "epoch": 3.0806812317220023, - "grad_norm": 0.005621998570859432, - "learning_rate": 0.00019999531893193448, - "loss": 46.0, - "step": 40293 - }, - { - "epoch": 3.080757688705392, - "grad_norm": 0.006240567658096552, - "learning_rate": 0.0001999953186995243, - "loss": 46.0, - "step": 40294 - }, - { - "epoch": 3.0808341456887818, - "grad_norm": 0.003941263537853956, - "learning_rate": 0.00019999531846710836, - "loss": 46.0, - "step": 40295 - }, - { - "epoch": 3.0809106026721715, - "grad_norm": 0.0013673343928530812, - "learning_rate": 0.00019999531823468663, - "loss": 46.0, - "step": 40296 - }, - { - "epoch": 3.0809870596555613, - "grad_norm": 0.002029367024078965, - "learning_rate": 0.00019999531800225917, - "loss": 46.0, - "step": 40297 - }, - { - "epoch": 3.081063516638951, - "grad_norm": 0.0007272011134773493, - "learning_rate": 0.0001999953177698259, - "loss": 46.0, - "step": 40298 - }, - { - "epoch": 3.081139973622341, - "grad_norm": 0.0014875493943691254, - "learning_rate": 0.00019999531753738686, - "loss": 46.0, - "step": 40299 - }, - { - "epoch": 3.0812164306057306, - "grad_norm": 0.00400215107947588, - "learning_rate": 0.00019999531730494207, - "loss": 46.0, - "step": 40300 - }, - { - "epoch": 3.0812928875891203, - "grad_norm": 0.0044781663455069065, - "learning_rate": 0.0001999953170724915, - "loss": 46.0, - "step": 40301 - }, - { - "epoch": 3.08136934457251, - "grad_norm": 0.002711773617193103, - "learning_rate": 0.00019999531684003515, - "loss": 46.0, - "step": 40302 - }, - { - "epoch": 3.0814458015558994, - "grad_norm": 0.001435677520930767, - "learning_rate": 0.00019999531660757307, - "loss": 46.0, - "step": 40303 - }, - { - "epoch": 3.081522258539289, - "grad_norm": 0.003028220497071743, - "learning_rate": 0.0001999953163751052, - "loss": 46.0, - "step": 40304 - }, - { - "epoch": 3.081598715522679, - "grad_norm": 0.0011879857629537582, - "learning_rate": 0.00019999531614263156, - "loss": 46.0, - "step": 40305 - }, - { - "epoch": 3.0816751725060687, - "grad_norm": 0.0015074124094098806, - "learning_rate": 0.00019999531591015214, - "loss": 46.0, - "step": 40306 - }, - { - "epoch": 3.0817516294894585, - "grad_norm": 0.00365912402048707, - "learning_rate": 0.00019999531567766696, - "loss": 46.0, - "step": 40307 - }, - { - "epoch": 3.0818280864728482, - "grad_norm": 0.004783504642546177, - "learning_rate": 0.00019999531544517601, - "loss": 46.0, - "step": 40308 - }, - { - "epoch": 3.081904543456238, - "grad_norm": 0.004315034952014685, - "learning_rate": 0.00019999531521267932, - "loss": 46.0, - "step": 40309 - }, - { - "epoch": 3.0819810004396277, - "grad_norm": 0.0016449636314064264, - "learning_rate": 0.0001999953149801768, - "loss": 46.0, - "step": 40310 - }, - { - "epoch": 3.0820574574230175, - "grad_norm": 0.0028042870108038187, - "learning_rate": 0.00019999531474766856, - "loss": 46.0, - "step": 40311 - }, - { - "epoch": 3.0821339144064073, - "grad_norm": 0.0009609071421436965, - "learning_rate": 0.00019999531451515452, - "loss": 46.0, - "step": 40312 - }, - { - "epoch": 3.082210371389797, - "grad_norm": 0.004199567716568708, - "learning_rate": 0.00019999531428263473, - "loss": 46.0, - "step": 40313 - }, - { - "epoch": 3.0822868283731863, - "grad_norm": 0.0023863951209932566, - "learning_rate": 0.00019999531405010917, - "loss": 46.0, - "step": 40314 - }, - { - "epoch": 3.082363285356576, - "grad_norm": 0.0009719316149130464, - "learning_rate": 0.00019999531381757787, - "loss": 46.0, - "step": 40315 - }, - { - "epoch": 3.082439742339966, - "grad_norm": 0.0022970030549913645, - "learning_rate": 0.00019999531358504076, - "loss": 46.0, - "step": 40316 - }, - { - "epoch": 3.0825161993233556, - "grad_norm": 0.0022823235485702753, - "learning_rate": 0.00019999531335249788, - "loss": 46.0, - "step": 40317 - }, - { - "epoch": 3.0825926563067454, - "grad_norm": 0.0020702488254755735, - "learning_rate": 0.00019999531311994922, - "loss": 46.0, - "step": 40318 - }, - { - "epoch": 3.082669113290135, - "grad_norm": 0.0018968835938721895, - "learning_rate": 0.00019999531288739482, - "loss": 46.0, - "step": 40319 - }, - { - "epoch": 3.082745570273525, - "grad_norm": 0.0012051343219354749, - "learning_rate": 0.00019999531265483465, - "loss": 46.0, - "step": 40320 - }, - { - "epoch": 3.0828220272569147, - "grad_norm": 0.0035440793726593256, - "learning_rate": 0.0001999953124222687, - "loss": 46.0, - "step": 40321 - }, - { - "epoch": 3.0828984842403044, - "grad_norm": 0.0036816748324781656, - "learning_rate": 0.00019999531218969696, - "loss": 46.0, - "step": 40322 - }, - { - "epoch": 3.082974941223694, - "grad_norm": 0.0004241867281962186, - "learning_rate": 0.0001999953119571195, - "loss": 46.0, - "step": 40323 - }, - { - "epoch": 3.0830513982070835, - "grad_norm": 0.001081703114323318, - "learning_rate": 0.00019999531172453622, - "loss": 46.0, - "step": 40324 - }, - { - "epoch": 3.0831278551904733, - "grad_norm": 0.0017208452336490154, - "learning_rate": 0.0001999953114919472, - "loss": 46.0, - "step": 40325 - }, - { - "epoch": 3.083204312173863, - "grad_norm": 0.0015251614386215806, - "learning_rate": 0.0001999953112593524, - "loss": 46.0, - "step": 40326 - }, - { - "epoch": 3.083280769157253, - "grad_norm": 0.002002124907448888, - "learning_rate": 0.00019999531102675184, - "loss": 46.0, - "step": 40327 - }, - { - "epoch": 3.0833572261406426, - "grad_norm": 0.0016916863387450576, - "learning_rate": 0.0001999953107941455, - "loss": 46.0, - "step": 40328 - }, - { - "epoch": 3.0834336831240323, - "grad_norm": 0.0018178432947024703, - "learning_rate": 0.0001999953105615334, - "loss": 46.0, - "step": 40329 - }, - { - "epoch": 3.083510140107422, - "grad_norm": 0.0015511164674535394, - "learning_rate": 0.00019999531032891554, - "loss": 46.0, - "step": 40330 - }, - { - "epoch": 3.083586597090812, - "grad_norm": 0.002958128694444895, - "learning_rate": 0.00019999531009629192, - "loss": 46.0, - "step": 40331 - }, - { - "epoch": 3.0836630540742016, - "grad_norm": 0.0012487999629229307, - "learning_rate": 0.0001999953098636625, - "loss": 46.0, - "step": 40332 - }, - { - "epoch": 3.0837395110575914, - "grad_norm": 0.00450740335509181, - "learning_rate": 0.00019999530963102732, - "loss": 46.0, - "step": 40333 - }, - { - "epoch": 3.083815968040981, - "grad_norm": 0.0007050912827253342, - "learning_rate": 0.00019999530939838634, - "loss": 46.0, - "step": 40334 - }, - { - "epoch": 3.083892425024371, - "grad_norm": 0.0030002491548657417, - "learning_rate": 0.00019999530916573965, - "loss": 46.0, - "step": 40335 - }, - { - "epoch": 3.08396888200776, - "grad_norm": 0.003482536878436804, - "learning_rate": 0.00019999530893308719, - "loss": 46.0, - "step": 40336 - }, - { - "epoch": 3.08404533899115, - "grad_norm": 0.001370471902191639, - "learning_rate": 0.00019999530870042892, - "loss": 46.0, - "step": 40337 - }, - { - "epoch": 3.0841217959745397, - "grad_norm": 0.0022118911147117615, - "learning_rate": 0.00019999530846776488, - "loss": 46.0, - "step": 40338 - }, - { - "epoch": 3.0841982529579295, - "grad_norm": 0.0016165869310498238, - "learning_rate": 0.0001999953082350951, - "loss": 46.0, - "step": 40339 - }, - { - "epoch": 3.0842747099413192, - "grad_norm": 0.0027087293565273285, - "learning_rate": 0.00019999530800241953, - "loss": 46.0, - "step": 40340 - }, - { - "epoch": 3.084351166924709, - "grad_norm": 0.0005215605488047004, - "learning_rate": 0.0001999953077697382, - "loss": 46.0, - "step": 40341 - }, - { - "epoch": 3.0844276239080988, - "grad_norm": 0.002495115390047431, - "learning_rate": 0.0001999953075370511, - "loss": 46.0, - "step": 40342 - }, - { - "epoch": 3.0845040808914885, - "grad_norm": 0.0009297642973251641, - "learning_rate": 0.00019999530730435821, - "loss": 46.0, - "step": 40343 - }, - { - "epoch": 3.0845805378748783, - "grad_norm": 0.0008987291366793215, - "learning_rate": 0.0001999953070716596, - "loss": 46.0, - "step": 40344 - }, - { - "epoch": 3.084656994858268, - "grad_norm": 0.00212862784974277, - "learning_rate": 0.0001999953068389552, - "loss": 46.0, - "step": 40345 - }, - { - "epoch": 3.0847334518416574, - "grad_norm": 0.0017014248296618462, - "learning_rate": 0.000199995306606245, - "loss": 46.0, - "step": 40346 - }, - { - "epoch": 3.084809908825047, - "grad_norm": 0.0033167649526149035, - "learning_rate": 0.00019999530637352904, - "loss": 46.0, - "step": 40347 - }, - { - "epoch": 3.084886365808437, - "grad_norm": 0.004921542480587959, - "learning_rate": 0.00019999530614080735, - "loss": 46.0, - "step": 40348 - }, - { - "epoch": 3.0849628227918267, - "grad_norm": 0.0012904637260362506, - "learning_rate": 0.00019999530590807986, - "loss": 46.0, - "step": 40349 - }, - { - "epoch": 3.0850392797752164, - "grad_norm": 0.001202658866532147, - "learning_rate": 0.00019999530567534662, - "loss": 46.0, - "step": 40350 - }, - { - "epoch": 3.085115736758606, - "grad_norm": 0.002073245123028755, - "learning_rate": 0.00019999530544260758, - "loss": 46.0, - "step": 40351 - }, - { - "epoch": 3.085192193741996, - "grad_norm": 0.0009821204002946615, - "learning_rate": 0.0001999953052098628, - "loss": 46.0, - "step": 40352 - }, - { - "epoch": 3.0852686507253857, - "grad_norm": 0.0010281954891979694, - "learning_rate": 0.0001999953049771122, - "loss": 46.0, - "step": 40353 - }, - { - "epoch": 3.0853451077087755, - "grad_norm": 0.001977123087272048, - "learning_rate": 0.0001999953047443559, - "loss": 46.0, - "step": 40354 - }, - { - "epoch": 3.085421564692165, - "grad_norm": 0.002471043262630701, - "learning_rate": 0.0001999953045115938, - "loss": 46.0, - "step": 40355 - }, - { - "epoch": 3.085498021675555, - "grad_norm": 0.002482653595507145, - "learning_rate": 0.00019999530427882594, - "loss": 46.0, - "step": 40356 - }, - { - "epoch": 3.0855744786589443, - "grad_norm": 0.0036249703262001276, - "learning_rate": 0.00019999530404605226, - "loss": 46.0, - "step": 40357 - }, - { - "epoch": 3.085650935642334, - "grad_norm": 0.0012411470524966717, - "learning_rate": 0.00019999530381327286, - "loss": 46.0, - "step": 40358 - }, - { - "epoch": 3.085727392625724, - "grad_norm": 0.0018603705102577806, - "learning_rate": 0.0001999953035804877, - "loss": 46.0, - "step": 40359 - }, - { - "epoch": 3.0858038496091136, - "grad_norm": 0.003228815970942378, - "learning_rate": 0.00019999530334769677, - "loss": 46.0, - "step": 40360 - }, - { - "epoch": 3.0858803065925033, - "grad_norm": 0.0012322701513767242, - "learning_rate": 0.00019999530311490005, - "loss": 46.0, - "step": 40361 - }, - { - "epoch": 3.085956763575893, - "grad_norm": 0.0008629984804429114, - "learning_rate": 0.00019999530288209756, - "loss": 46.0, - "step": 40362 - }, - { - "epoch": 3.086033220559283, - "grad_norm": 0.0023042005486786366, - "learning_rate": 0.0001999953026492893, - "loss": 46.0, - "step": 40363 - }, - { - "epoch": 3.0861096775426726, - "grad_norm": 0.003802621504291892, - "learning_rate": 0.00019999530241647526, - "loss": 46.0, - "step": 40364 - }, - { - "epoch": 3.0861861345260624, - "grad_norm": 0.016155540943145752, - "learning_rate": 0.0001999953021836555, - "loss": 46.0, - "step": 40365 - }, - { - "epoch": 3.086262591509452, - "grad_norm": 0.003434145823121071, - "learning_rate": 0.0001999953019508299, - "loss": 46.0, - "step": 40366 - }, - { - "epoch": 3.086339048492842, - "grad_norm": 0.0023876624181866646, - "learning_rate": 0.00019999530171799858, - "loss": 46.0, - "step": 40367 - }, - { - "epoch": 3.0864155054762312, - "grad_norm": 0.0014396545011550188, - "learning_rate": 0.00019999530148516147, - "loss": 46.0, - "step": 40368 - }, - { - "epoch": 3.086491962459621, - "grad_norm": 0.0015674050664529204, - "learning_rate": 0.00019999530125231862, - "loss": 46.0, - "step": 40369 - }, - { - "epoch": 3.0865684194430107, - "grad_norm": 0.0022144687827676535, - "learning_rate": 0.00019999530101947, - "loss": 46.0, - "step": 40370 - }, - { - "epoch": 3.0866448764264005, - "grad_norm": 0.0014323288341984153, - "learning_rate": 0.0001999953007866156, - "loss": 46.0, - "step": 40371 - }, - { - "epoch": 3.0867213334097903, - "grad_norm": 0.005419464781880379, - "learning_rate": 0.0001999953005537554, - "loss": 46.0, - "step": 40372 - }, - { - "epoch": 3.08679779039318, - "grad_norm": 0.0011133495718240738, - "learning_rate": 0.00019999530032088948, - "loss": 46.0, - "step": 40373 - }, - { - "epoch": 3.08687424737657, - "grad_norm": 0.0015525479102507234, - "learning_rate": 0.00019999530008801773, - "loss": 46.0, - "step": 40374 - }, - { - "epoch": 3.0869507043599596, - "grad_norm": 0.001559942844323814, - "learning_rate": 0.00019999529985514024, - "loss": 46.0, - "step": 40375 - }, - { - "epoch": 3.0870271613433493, - "grad_norm": 0.0014576624380424619, - "learning_rate": 0.000199995299622257, - "loss": 46.0, - "step": 40376 - }, - { - "epoch": 3.087103618326739, - "grad_norm": 0.0007901376811787486, - "learning_rate": 0.000199995299389368, - "loss": 46.0, - "step": 40377 - }, - { - "epoch": 3.087180075310129, - "grad_norm": 0.004678662866353989, - "learning_rate": 0.00019999529915647318, - "loss": 46.0, - "step": 40378 - }, - { - "epoch": 3.087256532293518, - "grad_norm": 0.0023544635623693466, - "learning_rate": 0.00019999529892357262, - "loss": 46.0, - "step": 40379 - }, - { - "epoch": 3.087332989276908, - "grad_norm": 0.0038698078133165836, - "learning_rate": 0.00019999529869066632, - "loss": 46.0, - "step": 40380 - }, - { - "epoch": 3.0874094462602977, - "grad_norm": 0.0006970096728764474, - "learning_rate": 0.0001999952984577542, - "loss": 46.0, - "step": 40381 - }, - { - "epoch": 3.0874859032436874, - "grad_norm": 0.0023959896061569452, - "learning_rate": 0.0001999952982248363, - "loss": 46.0, - "step": 40382 - }, - { - "epoch": 3.087562360227077, - "grad_norm": 0.003075435757637024, - "learning_rate": 0.00019999529799191268, - "loss": 46.0, - "step": 40383 - }, - { - "epoch": 3.087638817210467, - "grad_norm": 0.006638146471232176, - "learning_rate": 0.00019999529775898328, - "loss": 46.0, - "step": 40384 - }, - { - "epoch": 3.0877152741938567, - "grad_norm": 0.005111514590680599, - "learning_rate": 0.0001999952975260481, - "loss": 46.0, - "step": 40385 - }, - { - "epoch": 3.0877917311772465, - "grad_norm": 0.0010137214558199048, - "learning_rate": 0.00019999529729310716, - "loss": 46.0, - "step": 40386 - }, - { - "epoch": 3.0878681881606362, - "grad_norm": 0.0022372484672814608, - "learning_rate": 0.00019999529706016045, - "loss": 46.0, - "step": 40387 - }, - { - "epoch": 3.087944645144026, - "grad_norm": 0.004143637605011463, - "learning_rate": 0.00019999529682720798, - "loss": 46.0, - "step": 40388 - }, - { - "epoch": 3.0880211021274158, - "grad_norm": 0.0009678576025180519, - "learning_rate": 0.00019999529659424971, - "loss": 46.0, - "step": 40389 - }, - { - "epoch": 3.088097559110805, - "grad_norm": 0.0010885283118113875, - "learning_rate": 0.00019999529636128568, - "loss": 46.0, - "step": 40390 - }, - { - "epoch": 3.088174016094195, - "grad_norm": 0.003515888936817646, - "learning_rate": 0.0001999952961283159, - "loss": 46.0, - "step": 40391 - }, - { - "epoch": 3.0882504730775846, - "grad_norm": 0.0020951409824192524, - "learning_rate": 0.00019999529589534033, - "loss": 46.0, - "step": 40392 - }, - { - "epoch": 3.0883269300609744, - "grad_norm": 0.000930809008423239, - "learning_rate": 0.00019999529566235903, - "loss": 46.0, - "step": 40393 - }, - { - "epoch": 3.088403387044364, - "grad_norm": 0.0016526360996067524, - "learning_rate": 0.00019999529542937192, - "loss": 46.0, - "step": 40394 - }, - { - "epoch": 3.088479844027754, - "grad_norm": 0.0026023746468126774, - "learning_rate": 0.00019999529519637904, - "loss": 46.0, - "step": 40395 - }, - { - "epoch": 3.0885563010111436, - "grad_norm": 0.0027975630946457386, - "learning_rate": 0.00019999529496338042, - "loss": 46.0, - "step": 40396 - }, - { - "epoch": 3.0886327579945334, - "grad_norm": 0.0013455899897962809, - "learning_rate": 0.000199995294730376, - "loss": 46.0, - "step": 40397 - }, - { - "epoch": 3.088709214977923, - "grad_norm": 0.0019090435234829783, - "learning_rate": 0.00019999529449736582, - "loss": 46.0, - "step": 40398 - }, - { - "epoch": 3.088785671961313, - "grad_norm": 0.0005486995214596391, - "learning_rate": 0.00019999529426434988, - "loss": 46.0, - "step": 40399 - }, - { - "epoch": 3.0888621289447027, - "grad_norm": 0.004117876756936312, - "learning_rate": 0.00019999529403132816, - "loss": 46.0, - "step": 40400 - }, - { - "epoch": 3.088938585928092, - "grad_norm": 0.0032686328049749136, - "learning_rate": 0.0001999952937983007, - "loss": 46.0, - "step": 40401 - }, - { - "epoch": 3.0890150429114818, - "grad_norm": 0.0035158058162778616, - "learning_rate": 0.00019999529356526743, - "loss": 46.0, - "step": 40402 - }, - { - "epoch": 3.0890914998948715, - "grad_norm": 0.004890000447630882, - "learning_rate": 0.00019999529333222842, - "loss": 46.0, - "step": 40403 - }, - { - "epoch": 3.0891679568782613, - "grad_norm": 0.004670220892876387, - "learning_rate": 0.0001999952930991836, - "loss": 46.0, - "step": 40404 - }, - { - "epoch": 3.089244413861651, - "grad_norm": 0.0013623174745589495, - "learning_rate": 0.00019999529286613305, - "loss": 46.0, - "step": 40405 - }, - { - "epoch": 3.089320870845041, - "grad_norm": 0.005032017361372709, - "learning_rate": 0.00019999529263307672, - "loss": 46.0, - "step": 40406 - }, - { - "epoch": 3.0893973278284306, - "grad_norm": 0.01621577888727188, - "learning_rate": 0.0001999952924000146, - "loss": 46.0, - "step": 40407 - }, - { - "epoch": 3.0894737848118203, - "grad_norm": 0.0034426331985741854, - "learning_rate": 0.00019999529216694676, - "loss": 46.0, - "step": 40408 - }, - { - "epoch": 3.08955024179521, - "grad_norm": 0.0029967075679451227, - "learning_rate": 0.00019999529193387314, - "loss": 46.0, - "step": 40409 - }, - { - "epoch": 3.0896266987786, - "grad_norm": 0.0023160045966506004, - "learning_rate": 0.0001999952917007937, - "loss": 46.0, - "step": 40410 - }, - { - "epoch": 3.0897031557619896, - "grad_norm": 0.0014055754290893674, - "learning_rate": 0.00019999529146770857, - "loss": 46.0, - "step": 40411 - }, - { - "epoch": 3.089779612745379, - "grad_norm": 0.0037570511922240257, - "learning_rate": 0.0001999952912346176, - "loss": 46.0, - "step": 40412 - }, - { - "epoch": 3.0898560697287687, - "grad_norm": 0.007220292929559946, - "learning_rate": 0.0001999952910015209, - "loss": 46.0, - "step": 40413 - }, - { - "epoch": 3.0899325267121585, - "grad_norm": 0.0010095554171130061, - "learning_rate": 0.00019999529076841842, - "loss": 46.0, - "step": 40414 - }, - { - "epoch": 3.090008983695548, - "grad_norm": 0.00925536174327135, - "learning_rate": 0.00019999529053531018, - "loss": 46.0, - "step": 40415 - }, - { - "epoch": 3.090085440678938, - "grad_norm": 0.004097710829228163, - "learning_rate": 0.0001999952903021961, - "loss": 46.0, - "step": 40416 - }, - { - "epoch": 3.0901618976623277, - "grad_norm": 0.003129548393189907, - "learning_rate": 0.00019999529006907633, - "loss": 46.0, - "step": 40417 - }, - { - "epoch": 3.0902383546457175, - "grad_norm": 0.0015607367968186736, - "learning_rate": 0.0001999952898359508, - "loss": 46.0, - "step": 40418 - }, - { - "epoch": 3.0903148116291073, - "grad_norm": 0.0013155147898942232, - "learning_rate": 0.00019999528960281947, - "loss": 46.0, - "step": 40419 - }, - { - "epoch": 3.090391268612497, - "grad_norm": 0.0009990620892494917, - "learning_rate": 0.00019999528936968236, - "loss": 46.0, - "step": 40420 - }, - { - "epoch": 3.090467725595887, - "grad_norm": 0.0015065952902659774, - "learning_rate": 0.00019999528913653948, - "loss": 46.0, - "step": 40421 - }, - { - "epoch": 3.0905441825792765, - "grad_norm": 0.0021842126734554768, - "learning_rate": 0.0001999952889033909, - "loss": 46.0, - "step": 40422 - }, - { - "epoch": 3.090620639562666, - "grad_norm": 0.0026607741601765156, - "learning_rate": 0.00019999528867023644, - "loss": 46.0, - "step": 40423 - }, - { - "epoch": 3.0906970965460556, - "grad_norm": 0.004283210728317499, - "learning_rate": 0.0001999952884370763, - "loss": 46.0, - "step": 40424 - }, - { - "epoch": 3.0907735535294454, - "grad_norm": 0.0035749301314353943, - "learning_rate": 0.00019999528820391035, - "loss": 46.0, - "step": 40425 - }, - { - "epoch": 3.090850010512835, - "grad_norm": 0.0013756649568676949, - "learning_rate": 0.00019999528797073863, - "loss": 46.0, - "step": 40426 - }, - { - "epoch": 3.090926467496225, - "grad_norm": 0.0015635115560144186, - "learning_rate": 0.00019999528773756114, - "loss": 46.0, - "step": 40427 - }, - { - "epoch": 3.0910029244796147, - "grad_norm": 0.0023453370667994022, - "learning_rate": 0.00019999528750437787, - "loss": 46.0, - "step": 40428 - }, - { - "epoch": 3.0910793814630044, - "grad_norm": 0.0028312159702181816, - "learning_rate": 0.0001999952872711889, - "loss": 46.0, - "step": 40429 - }, - { - "epoch": 3.091155838446394, - "grad_norm": 0.004746130667626858, - "learning_rate": 0.00019999528703799408, - "loss": 46.0, - "step": 40430 - }, - { - "epoch": 3.091232295429784, - "grad_norm": 0.0032440631184726954, - "learning_rate": 0.00019999528680479352, - "loss": 46.0, - "step": 40431 - }, - { - "epoch": 3.0913087524131737, - "grad_norm": 0.002936146454885602, - "learning_rate": 0.0001999952865715872, - "loss": 46.0, - "step": 40432 - }, - { - "epoch": 3.0913852093965635, - "grad_norm": 0.0032761215697973967, - "learning_rate": 0.00019999528633837511, - "loss": 46.0, - "step": 40433 - }, - { - "epoch": 3.091461666379953, - "grad_norm": 0.0014739279868081212, - "learning_rate": 0.00019999528610515724, - "loss": 46.0, - "step": 40434 - }, - { - "epoch": 3.0915381233633425, - "grad_norm": 0.003040981711819768, - "learning_rate": 0.0001999952858719336, - "loss": 46.0, - "step": 40435 - }, - { - "epoch": 3.0916145803467323, - "grad_norm": 0.0017576297977939248, - "learning_rate": 0.0001999952856387042, - "loss": 46.0, - "step": 40436 - }, - { - "epoch": 3.091691037330122, - "grad_norm": 0.003477106336504221, - "learning_rate": 0.00019999528540546902, - "loss": 46.0, - "step": 40437 - }, - { - "epoch": 3.091767494313512, - "grad_norm": 0.0009176989551633596, - "learning_rate": 0.00019999528517222807, - "loss": 46.0, - "step": 40438 - }, - { - "epoch": 3.0918439512969016, - "grad_norm": 0.0019836705178022385, - "learning_rate": 0.00019999528493898136, - "loss": 46.0, - "step": 40439 - }, - { - "epoch": 3.0919204082802914, - "grad_norm": 0.0017348227556794882, - "learning_rate": 0.00019999528470572887, - "loss": 46.0, - "step": 40440 - }, - { - "epoch": 3.091996865263681, - "grad_norm": 0.003862952347844839, - "learning_rate": 0.00019999528447247066, - "loss": 46.0, - "step": 40441 - }, - { - "epoch": 3.092073322247071, - "grad_norm": 0.0046743908897042274, - "learning_rate": 0.00019999528423920665, - "loss": 46.0, - "step": 40442 - }, - { - "epoch": 3.0921497792304606, - "grad_norm": 0.0022646833676844835, - "learning_rate": 0.00019999528400593684, - "loss": 46.0, - "step": 40443 - }, - { - "epoch": 3.0922262362138504, - "grad_norm": 0.0016290085623040795, - "learning_rate": 0.00019999528377266128, - "loss": 46.0, - "step": 40444 - }, - { - "epoch": 3.0923026931972397, - "grad_norm": 0.003084276570007205, - "learning_rate": 0.00019999528353937995, - "loss": 46.0, - "step": 40445 - }, - { - "epoch": 3.0923791501806295, - "grad_norm": 0.0016924918163567781, - "learning_rate": 0.00019999528330609284, - "loss": 46.0, - "step": 40446 - }, - { - "epoch": 3.0924556071640192, - "grad_norm": 0.005916125141084194, - "learning_rate": 0.00019999528307279997, - "loss": 46.0, - "step": 40447 - }, - { - "epoch": 3.092532064147409, - "grad_norm": 0.0018066663760691881, - "learning_rate": 0.00019999528283950134, - "loss": 46.0, - "step": 40448 - }, - { - "epoch": 3.0926085211307988, - "grad_norm": 0.0005061979172751307, - "learning_rate": 0.00019999528260619695, - "loss": 46.0, - "step": 40449 - }, - { - "epoch": 3.0926849781141885, - "grad_norm": 0.0031908699311316013, - "learning_rate": 0.00019999528237288678, - "loss": 46.0, - "step": 40450 - }, - { - "epoch": 3.0927614350975783, - "grad_norm": 0.0023815890308469534, - "learning_rate": 0.00019999528213957083, - "loss": 46.0, - "step": 40451 - }, - { - "epoch": 3.092837892080968, - "grad_norm": 0.004095599055290222, - "learning_rate": 0.00019999528190624912, - "loss": 46.0, - "step": 40452 - }, - { - "epoch": 3.092914349064358, - "grad_norm": 0.011234685778617859, - "learning_rate": 0.00019999528167292163, - "loss": 46.0, - "step": 40453 - }, - { - "epoch": 3.0929908060477476, - "grad_norm": 0.005430188961327076, - "learning_rate": 0.00019999528143958842, - "loss": 46.0, - "step": 40454 - }, - { - "epoch": 3.093067263031137, - "grad_norm": 0.003194117220118642, - "learning_rate": 0.00019999528120624938, - "loss": 46.0, - "step": 40455 - }, - { - "epoch": 3.0931437200145266, - "grad_norm": 0.0016319730784744024, - "learning_rate": 0.0001999952809729046, - "loss": 46.0, - "step": 40456 - }, - { - "epoch": 3.0932201769979164, - "grad_norm": 0.0015326309949159622, - "learning_rate": 0.000199995280739554, - "loss": 46.0, - "step": 40457 - }, - { - "epoch": 3.093296633981306, - "grad_norm": 0.0029314046259969473, - "learning_rate": 0.0001999952805061977, - "loss": 46.0, - "step": 40458 - }, - { - "epoch": 3.093373090964696, - "grad_norm": 0.003492429619655013, - "learning_rate": 0.0001999952802728356, - "loss": 46.0, - "step": 40459 - }, - { - "epoch": 3.0934495479480857, - "grad_norm": 0.004148657899349928, - "learning_rate": 0.00019999528003946776, - "loss": 46.0, - "step": 40460 - }, - { - "epoch": 3.0935260049314754, - "grad_norm": 0.0038597926031798124, - "learning_rate": 0.0001999952798060941, - "loss": 46.0, - "step": 40461 - }, - { - "epoch": 3.093602461914865, - "grad_norm": 0.0046270452439785, - "learning_rate": 0.0001999952795727147, - "loss": 46.0, - "step": 40462 - }, - { - "epoch": 3.093678918898255, - "grad_norm": 0.0013523029629141092, - "learning_rate": 0.00019999527933932954, - "loss": 46.0, - "step": 40463 - }, - { - "epoch": 3.0937553758816447, - "grad_norm": 0.003149408381432295, - "learning_rate": 0.0001999952791059386, - "loss": 46.0, - "step": 40464 - }, - { - "epoch": 3.0938318328650345, - "grad_norm": 0.0022788397036492825, - "learning_rate": 0.00019999527887254188, - "loss": 46.0, - "step": 40465 - }, - { - "epoch": 3.0939082898484243, - "grad_norm": 0.006475043948739767, - "learning_rate": 0.00019999527863913942, - "loss": 46.0, - "step": 40466 - }, - { - "epoch": 3.0939847468318136, - "grad_norm": 0.002389322267845273, - "learning_rate": 0.00019999527840573115, - "loss": 46.0, - "step": 40467 - }, - { - "epoch": 3.0940612038152033, - "grad_norm": 0.0016802245518192649, - "learning_rate": 0.00019999527817231714, - "loss": 46.0, - "step": 40468 - }, - { - "epoch": 3.094137660798593, - "grad_norm": 0.002319866558536887, - "learning_rate": 0.00019999527793889733, - "loss": 46.0, - "step": 40469 - }, - { - "epoch": 3.094214117781983, - "grad_norm": 0.004884062334895134, - "learning_rate": 0.0001999952777054718, - "loss": 46.0, - "step": 40470 - }, - { - "epoch": 3.0942905747653726, - "grad_norm": 0.0015341215766966343, - "learning_rate": 0.00019999527747204047, - "loss": 46.0, - "step": 40471 - }, - { - "epoch": 3.0943670317487624, - "grad_norm": 0.0030958065763115883, - "learning_rate": 0.0001999952772386034, - "loss": 46.0, - "step": 40472 - }, - { - "epoch": 3.094443488732152, - "grad_norm": 0.0034751540515571833, - "learning_rate": 0.00019999527700516052, - "loss": 46.0, - "step": 40473 - }, - { - "epoch": 3.094519945715542, - "grad_norm": 0.0017490205354988575, - "learning_rate": 0.00019999527677171187, - "loss": 46.0, - "step": 40474 - }, - { - "epoch": 3.0945964026989317, - "grad_norm": 0.0012396344682201743, - "learning_rate": 0.00019999527653825747, - "loss": 46.0, - "step": 40475 - }, - { - "epoch": 3.0946728596823214, - "grad_norm": 0.0016794325783848763, - "learning_rate": 0.0001999952763047973, - "loss": 46.0, - "step": 40476 - }, - { - "epoch": 3.0947493166657107, - "grad_norm": 0.0033042433205991983, - "learning_rate": 0.00019999527607133136, - "loss": 46.0, - "step": 40477 - }, - { - "epoch": 3.0948257736491005, - "grad_norm": 0.0012796861119568348, - "learning_rate": 0.00019999527583785965, - "loss": 46.0, - "step": 40478 - }, - { - "epoch": 3.0949022306324903, - "grad_norm": 0.0008166969055309892, - "learning_rate": 0.00019999527560438218, - "loss": 46.0, - "step": 40479 - }, - { - "epoch": 3.09497868761588, - "grad_norm": 0.001249453634954989, - "learning_rate": 0.00019999527537089892, - "loss": 46.0, - "step": 40480 - }, - { - "epoch": 3.09505514459927, - "grad_norm": 0.006361784413456917, - "learning_rate": 0.0001999952751374099, - "loss": 46.0, - "step": 40481 - }, - { - "epoch": 3.0951316015826595, - "grad_norm": 0.0005817991332150996, - "learning_rate": 0.00019999527490391513, - "loss": 46.0, - "step": 40482 - }, - { - "epoch": 3.0952080585660493, - "grad_norm": 0.0021973224356770515, - "learning_rate": 0.00019999527467041457, - "loss": 46.0, - "step": 40483 - }, - { - "epoch": 3.095284515549439, - "grad_norm": 0.001319780945777893, - "learning_rate": 0.00019999527443690824, - "loss": 46.0, - "step": 40484 - }, - { - "epoch": 3.095360972532829, - "grad_norm": 0.004355449695140123, - "learning_rate": 0.00019999527420339614, - "loss": 46.0, - "step": 40485 - }, - { - "epoch": 3.0954374295162186, - "grad_norm": 0.0012352505000308156, - "learning_rate": 0.00019999527396987826, - "loss": 46.0, - "step": 40486 - }, - { - "epoch": 3.0955138864996083, - "grad_norm": 0.005594301037490368, - "learning_rate": 0.00019999527373635464, - "loss": 46.0, - "step": 40487 - }, - { - "epoch": 3.0955903434829977, - "grad_norm": 0.002911853604018688, - "learning_rate": 0.00019999527350282524, - "loss": 46.0, - "step": 40488 - }, - { - "epoch": 3.0956668004663874, - "grad_norm": 0.0021229595877230167, - "learning_rate": 0.00019999527326929008, - "loss": 46.0, - "step": 40489 - }, - { - "epoch": 3.095743257449777, - "grad_norm": 0.0018272852757945657, - "learning_rate": 0.00019999527303574913, - "loss": 46.0, - "step": 40490 - }, - { - "epoch": 3.095819714433167, - "grad_norm": 0.008383817039430141, - "learning_rate": 0.00019999527280220242, - "loss": 46.0, - "step": 40491 - }, - { - "epoch": 3.0958961714165567, - "grad_norm": 0.008791478350758553, - "learning_rate": 0.00019999527256864993, - "loss": 46.0, - "step": 40492 - }, - { - "epoch": 3.0959726283999465, - "grad_norm": 0.0010263097938150167, - "learning_rate": 0.0001999952723350917, - "loss": 46.0, - "step": 40493 - }, - { - "epoch": 3.0960490853833362, - "grad_norm": 0.0017859783256426454, - "learning_rate": 0.00019999527210152768, - "loss": 46.0, - "step": 40494 - }, - { - "epoch": 3.096125542366726, - "grad_norm": 0.0024867348838597536, - "learning_rate": 0.0001999952718679579, - "loss": 46.0, - "step": 40495 - }, - { - "epoch": 3.0962019993501158, - "grad_norm": 0.0018216186435893178, - "learning_rate": 0.00019999527163438234, - "loss": 46.0, - "step": 40496 - }, - { - "epoch": 3.0962784563335055, - "grad_norm": 0.001453619566746056, - "learning_rate": 0.00019999527140080102, - "loss": 46.0, - "step": 40497 - }, - { - "epoch": 3.0963549133168953, - "grad_norm": 0.0053321560844779015, - "learning_rate": 0.00019999527116721394, - "loss": 46.0, - "step": 40498 - }, - { - "epoch": 3.0964313703002846, - "grad_norm": 0.004216715693473816, - "learning_rate": 0.00019999527093362106, - "loss": 46.0, - "step": 40499 - }, - { - "epoch": 3.0965078272836744, - "grad_norm": 0.0029483388643711805, - "learning_rate": 0.00019999527070002241, - "loss": 46.0, - "step": 40500 - }, - { - "epoch": 3.096584284267064, - "grad_norm": 0.0028659417293965816, - "learning_rate": 0.00019999527046641805, - "loss": 46.0, - "step": 40501 - }, - { - "epoch": 3.096660741250454, - "grad_norm": 0.0012636876199394464, - "learning_rate": 0.00019999527023280785, - "loss": 46.0, - "step": 40502 - }, - { - "epoch": 3.0967371982338436, - "grad_norm": 0.001527530374005437, - "learning_rate": 0.0001999952699991919, - "loss": 46.0, - "step": 40503 - }, - { - "epoch": 3.0968136552172334, - "grad_norm": 0.0014225122286006808, - "learning_rate": 0.00019999526976557022, - "loss": 46.0, - "step": 40504 - }, - { - "epoch": 3.096890112200623, - "grad_norm": 0.0010207727318629622, - "learning_rate": 0.00019999526953194273, - "loss": 46.0, - "step": 40505 - }, - { - "epoch": 3.096966569184013, - "grad_norm": 0.0009988220408558846, - "learning_rate": 0.0001999952692983095, - "loss": 46.0, - "step": 40506 - }, - { - "epoch": 3.0970430261674027, - "grad_norm": 0.0025540627539157867, - "learning_rate": 0.00019999526906467049, - "loss": 46.0, - "step": 40507 - }, - { - "epoch": 3.0971194831507924, - "grad_norm": 0.0008108865586109459, - "learning_rate": 0.00019999526883102568, - "loss": 46.0, - "step": 40508 - }, - { - "epoch": 3.097195940134182, - "grad_norm": 0.0016464785439893603, - "learning_rate": 0.00019999526859737512, - "loss": 46.0, - "step": 40509 - }, - { - "epoch": 3.0972723971175715, - "grad_norm": 0.004571177065372467, - "learning_rate": 0.00019999526836371882, - "loss": 46.0, - "step": 40510 - }, - { - "epoch": 3.0973488541009613, - "grad_norm": 0.0011320440098643303, - "learning_rate": 0.00019999526813005672, - "loss": 46.0, - "step": 40511 - }, - { - "epoch": 3.097425311084351, - "grad_norm": 0.0017163710435852408, - "learning_rate": 0.00019999526789638887, - "loss": 46.0, - "step": 40512 - }, - { - "epoch": 3.097501768067741, - "grad_norm": 0.0050961934030056, - "learning_rate": 0.00019999526766271522, - "loss": 46.0, - "step": 40513 - }, - { - "epoch": 3.0975782250511306, - "grad_norm": 0.0007164383423514664, - "learning_rate": 0.00019999526742903582, - "loss": 46.0, - "step": 40514 - }, - { - "epoch": 3.0976546820345203, - "grad_norm": 0.0020096651278436184, - "learning_rate": 0.00019999526719535066, - "loss": 46.0, - "step": 40515 - }, - { - "epoch": 3.09773113901791, - "grad_norm": 0.0020939901005476713, - "learning_rate": 0.0001999952669616597, - "loss": 46.0, - "step": 40516 - }, - { - "epoch": 3.0978075960013, - "grad_norm": 0.0014223206089809537, - "learning_rate": 0.000199995266727963, - "loss": 46.0, - "step": 40517 - }, - { - "epoch": 3.0978840529846896, - "grad_norm": 0.0016256141243502498, - "learning_rate": 0.00019999526649426054, - "loss": 46.0, - "step": 40518 - }, - { - "epoch": 3.0979605099680794, - "grad_norm": 0.0021733916364610195, - "learning_rate": 0.0001999952662605523, - "loss": 46.0, - "step": 40519 - }, - { - "epoch": 3.098036966951469, - "grad_norm": 0.0027708441484719515, - "learning_rate": 0.00019999526602683827, - "loss": 46.0, - "step": 40520 - }, - { - "epoch": 3.0981134239348584, - "grad_norm": 0.002322721527889371, - "learning_rate": 0.00019999526579311846, - "loss": 46.0, - "step": 40521 - }, - { - "epoch": 3.098189880918248, - "grad_norm": 0.0012915402185171843, - "learning_rate": 0.00019999526555939293, - "loss": 46.0, - "step": 40522 - }, - { - "epoch": 3.098266337901638, - "grad_norm": 0.0008527606260031462, - "learning_rate": 0.0001999952653256616, - "loss": 46.0, - "step": 40523 - }, - { - "epoch": 3.0983427948850277, - "grad_norm": 0.0011782728834077716, - "learning_rate": 0.00019999526509192453, - "loss": 46.0, - "step": 40524 - }, - { - "epoch": 3.0984192518684175, - "grad_norm": 0.0006229756399989128, - "learning_rate": 0.00019999526485818165, - "loss": 46.0, - "step": 40525 - }, - { - "epoch": 3.0984957088518073, - "grad_norm": 0.0014685136266052723, - "learning_rate": 0.000199995264624433, - "loss": 46.0, - "step": 40526 - }, - { - "epoch": 3.098572165835197, - "grad_norm": 0.005406105425208807, - "learning_rate": 0.0001999952643906786, - "loss": 46.0, - "step": 40527 - }, - { - "epoch": 3.0986486228185868, - "grad_norm": 0.002447728766128421, - "learning_rate": 0.00019999526415691844, - "loss": 46.0, - "step": 40528 - }, - { - "epoch": 3.0987250798019765, - "grad_norm": 0.00209886790253222, - "learning_rate": 0.0001999952639231525, - "loss": 46.0, - "step": 40529 - }, - { - "epoch": 3.0988015367853663, - "grad_norm": 0.001424482325091958, - "learning_rate": 0.0001999952636893808, - "loss": 46.0, - "step": 40530 - }, - { - "epoch": 3.098877993768756, - "grad_norm": 0.0012581193586811423, - "learning_rate": 0.00019999526345560332, - "loss": 46.0, - "step": 40531 - }, - { - "epoch": 3.0989544507521454, - "grad_norm": 0.00382399745285511, - "learning_rate": 0.00019999526322182009, - "loss": 46.0, - "step": 40532 - }, - { - "epoch": 3.099030907735535, - "grad_norm": 0.0009712508181110024, - "learning_rate": 0.00019999526298803105, - "loss": 46.0, - "step": 40533 - }, - { - "epoch": 3.099107364718925, - "grad_norm": 0.0017928526503965259, - "learning_rate": 0.0001999952627542363, - "loss": 46.0, - "step": 40534 - }, - { - "epoch": 3.0991838217023147, - "grad_norm": 0.006536139640957117, - "learning_rate": 0.0001999952625204357, - "loss": 46.0, - "step": 40535 - }, - { - "epoch": 3.0992602786857044, - "grad_norm": 0.002144876169040799, - "learning_rate": 0.0001999952622866294, - "loss": 46.0, - "step": 40536 - }, - { - "epoch": 3.099336735669094, - "grad_norm": 0.0006981620681472123, - "learning_rate": 0.00019999526205281734, - "loss": 46.0, - "step": 40537 - }, - { - "epoch": 3.099413192652484, - "grad_norm": 0.00507147703319788, - "learning_rate": 0.00019999526181899946, - "loss": 46.0, - "step": 40538 - }, - { - "epoch": 3.0994896496358737, - "grad_norm": 0.005660662893205881, - "learning_rate": 0.00019999526158517584, - "loss": 46.0, - "step": 40539 - }, - { - "epoch": 3.0995661066192635, - "grad_norm": 0.0018913400126621127, - "learning_rate": 0.00019999526135134642, - "loss": 46.0, - "step": 40540 - }, - { - "epoch": 3.0996425636026532, - "grad_norm": 0.000940806174185127, - "learning_rate": 0.00019999526111751128, - "loss": 46.0, - "step": 40541 - }, - { - "epoch": 3.099719020586043, - "grad_norm": 0.0008229814120568335, - "learning_rate": 0.00019999526088367034, - "loss": 46.0, - "step": 40542 - }, - { - "epoch": 3.0997954775694323, - "grad_norm": 0.006658149883151054, - "learning_rate": 0.00019999526064982362, - "loss": 46.0, - "step": 40543 - }, - { - "epoch": 3.099871934552822, - "grad_norm": 0.0036576061975210905, - "learning_rate": 0.00019999526041597113, - "loss": 46.0, - "step": 40544 - }, - { - "epoch": 3.099948391536212, - "grad_norm": 0.0005766819813288748, - "learning_rate": 0.0001999952601821129, - "loss": 46.0, - "step": 40545 - }, - { - "epoch": 3.1000248485196016, - "grad_norm": 0.0007805295172147453, - "learning_rate": 0.0001999952599482489, - "loss": 46.0, - "step": 40546 - }, - { - "epoch": 3.1001013055029913, - "grad_norm": 0.004046564921736717, - "learning_rate": 0.00019999525971437908, - "loss": 46.0, - "step": 40547 - }, - { - "epoch": 3.100177762486381, - "grad_norm": 0.0019012964330613613, - "learning_rate": 0.00019999525948050356, - "loss": 46.0, - "step": 40548 - }, - { - "epoch": 3.100254219469771, - "grad_norm": 0.002129658591002226, - "learning_rate": 0.00019999525924662223, - "loss": 46.0, - "step": 40549 - }, - { - "epoch": 3.1003306764531606, - "grad_norm": 0.0013813786208629608, - "learning_rate": 0.00019999525901273515, - "loss": 46.0, - "step": 40550 - }, - { - "epoch": 3.1004071334365504, - "grad_norm": 0.00272749038413167, - "learning_rate": 0.00019999525877884228, - "loss": 46.0, - "step": 40551 - }, - { - "epoch": 3.10048359041994, - "grad_norm": 0.0021689089480787516, - "learning_rate": 0.00019999525854494366, - "loss": 46.0, - "step": 40552 - }, - { - "epoch": 3.10056004740333, - "grad_norm": 0.003072606399655342, - "learning_rate": 0.00019999525831103924, - "loss": 46.0, - "step": 40553 - }, - { - "epoch": 3.1006365043867192, - "grad_norm": 0.0011073439382016659, - "learning_rate": 0.00019999525807712907, - "loss": 46.0, - "step": 40554 - }, - { - "epoch": 3.100712961370109, - "grad_norm": 0.001202999847009778, - "learning_rate": 0.00019999525784321316, - "loss": 46.0, - "step": 40555 - }, - { - "epoch": 3.1007894183534987, - "grad_norm": 0.001623639720492065, - "learning_rate": 0.00019999525760929144, - "loss": 46.0, - "step": 40556 - }, - { - "epoch": 3.1008658753368885, - "grad_norm": 0.0010263341246172786, - "learning_rate": 0.00019999525737536398, - "loss": 46.0, - "step": 40557 - }, - { - "epoch": 3.1009423323202783, - "grad_norm": 0.002977109281346202, - "learning_rate": 0.0001999952571414307, - "loss": 46.0, - "step": 40558 - }, - { - "epoch": 3.101018789303668, - "grad_norm": 0.002048192312940955, - "learning_rate": 0.0001999952569074917, - "loss": 46.0, - "step": 40559 - }, - { - "epoch": 3.101095246287058, - "grad_norm": 0.0014675704296678305, - "learning_rate": 0.00019999525667354693, - "loss": 46.0, - "step": 40560 - }, - { - "epoch": 3.1011717032704476, - "grad_norm": 0.002621354768052697, - "learning_rate": 0.00019999525643959638, - "loss": 46.0, - "step": 40561 - }, - { - "epoch": 3.1012481602538373, - "grad_norm": 0.0018998316954821348, - "learning_rate": 0.00019999525620564005, - "loss": 46.0, - "step": 40562 - }, - { - "epoch": 3.101324617237227, - "grad_norm": 0.0030819622334092855, - "learning_rate": 0.00019999525597167798, - "loss": 46.0, - "step": 40563 - }, - { - "epoch": 3.101401074220617, - "grad_norm": 0.0031770700588822365, - "learning_rate": 0.0001999952557377101, - "loss": 46.0, - "step": 40564 - }, - { - "epoch": 3.101477531204006, - "grad_norm": 0.002916044322773814, - "learning_rate": 0.00019999525550373645, - "loss": 46.0, - "step": 40565 - }, - { - "epoch": 3.101553988187396, - "grad_norm": 0.006006535608321428, - "learning_rate": 0.00019999525526975709, - "loss": 46.0, - "step": 40566 - }, - { - "epoch": 3.1016304451707857, - "grad_norm": 0.003432487603276968, - "learning_rate": 0.0001999952550357719, - "loss": 46.0, - "step": 40567 - }, - { - "epoch": 3.1017069021541754, - "grad_norm": 0.0015815269434824586, - "learning_rate": 0.00019999525480178098, - "loss": 46.0, - "step": 40568 - }, - { - "epoch": 3.101783359137565, - "grad_norm": 0.00358053226955235, - "learning_rate": 0.00019999525456778426, - "loss": 46.0, - "step": 40569 - }, - { - "epoch": 3.101859816120955, - "grad_norm": 0.0019013695418834686, - "learning_rate": 0.00019999525433378178, - "loss": 46.0, - "step": 40570 - }, - { - "epoch": 3.1019362731043447, - "grad_norm": 0.0020740365143865347, - "learning_rate": 0.00019999525409977354, - "loss": 46.0, - "step": 40571 - }, - { - "epoch": 3.1020127300877345, - "grad_norm": 0.004211660008877516, - "learning_rate": 0.00019999525386575954, - "loss": 46.0, - "step": 40572 - }, - { - "epoch": 3.1020891870711242, - "grad_norm": 0.0012021627044305205, - "learning_rate": 0.00019999525363173975, - "loss": 46.0, - "step": 40573 - }, - { - "epoch": 3.102165644054514, - "grad_norm": 0.0023867697454988956, - "learning_rate": 0.0001999952533977142, - "loss": 46.0, - "step": 40574 - }, - { - "epoch": 3.1022421010379038, - "grad_norm": 0.00216542836278677, - "learning_rate": 0.00019999525316368287, - "loss": 46.0, - "step": 40575 - }, - { - "epoch": 3.102318558021293, - "grad_norm": 0.003351530758664012, - "learning_rate": 0.00019999525292964577, - "loss": 46.0, - "step": 40576 - }, - { - "epoch": 3.102395015004683, - "grad_norm": 0.0019315776880830526, - "learning_rate": 0.00019999525269560293, - "loss": 46.0, - "step": 40577 - }, - { - "epoch": 3.1024714719880726, - "grad_norm": 0.001947465119883418, - "learning_rate": 0.0001999952524615543, - "loss": 46.0, - "step": 40578 - }, - { - "epoch": 3.1025479289714624, - "grad_norm": 0.004189606290310621, - "learning_rate": 0.0001999952522274999, - "loss": 46.0, - "step": 40579 - }, - { - "epoch": 3.102624385954852, - "grad_norm": 0.007601381279528141, - "learning_rate": 0.00019999525199343974, - "loss": 46.0, - "step": 40580 - }, - { - "epoch": 3.102700842938242, - "grad_norm": 0.003251432441174984, - "learning_rate": 0.0001999952517593738, - "loss": 46.0, - "step": 40581 - }, - { - "epoch": 3.1027772999216316, - "grad_norm": 0.003809809684753418, - "learning_rate": 0.0001999952515253021, - "loss": 46.0, - "step": 40582 - }, - { - "epoch": 3.1028537569050214, - "grad_norm": 0.0019049829570576549, - "learning_rate": 0.00019999525129122463, - "loss": 46.0, - "step": 40583 - }, - { - "epoch": 3.102930213888411, - "grad_norm": 0.0034392185043543577, - "learning_rate": 0.0001999952510571414, - "loss": 46.0, - "step": 40584 - }, - { - "epoch": 3.103006670871801, - "grad_norm": 0.005755885969847441, - "learning_rate": 0.00019999525082305236, - "loss": 46.0, - "step": 40585 - }, - { - "epoch": 3.1030831278551902, - "grad_norm": 0.003156736958771944, - "learning_rate": 0.0001999952505889576, - "loss": 46.0, - "step": 40586 - }, - { - "epoch": 3.10315958483858, - "grad_norm": 0.0035740917082875967, - "learning_rate": 0.00019999525035485706, - "loss": 46.0, - "step": 40587 - }, - { - "epoch": 3.1032360418219698, - "grad_norm": 0.0016834683483466506, - "learning_rate": 0.0001999952501207507, - "loss": 46.0, - "step": 40588 - }, - { - "epoch": 3.1033124988053595, - "grad_norm": 0.0015105652855709195, - "learning_rate": 0.00019999524988663866, - "loss": 46.0, - "step": 40589 - }, - { - "epoch": 3.1033889557887493, - "grad_norm": 0.0024993799161165953, - "learning_rate": 0.00019999524965252076, - "loss": 46.0, - "step": 40590 - }, - { - "epoch": 3.103465412772139, - "grad_norm": 0.0016338367713615298, - "learning_rate": 0.00019999524941839716, - "loss": 46.0, - "step": 40591 - }, - { - "epoch": 3.103541869755529, - "grad_norm": 0.002314062789082527, - "learning_rate": 0.00019999524918426774, - "loss": 46.0, - "step": 40592 - }, - { - "epoch": 3.1036183267389186, - "grad_norm": 0.001856429735198617, - "learning_rate": 0.00019999524895013257, - "loss": 46.0, - "step": 40593 - }, - { - "epoch": 3.1036947837223083, - "grad_norm": 0.004054618068039417, - "learning_rate": 0.00019999524871599163, - "loss": 46.0, - "step": 40594 - }, - { - "epoch": 3.103771240705698, - "grad_norm": 0.00089570990530774, - "learning_rate": 0.00019999524848184495, - "loss": 46.0, - "step": 40595 - }, - { - "epoch": 3.103847697689088, - "grad_norm": 0.0005263558705337346, - "learning_rate": 0.00019999524824769246, - "loss": 46.0, - "step": 40596 - }, - { - "epoch": 3.1039241546724776, - "grad_norm": 0.005276402924209833, - "learning_rate": 0.00019999524801353423, - "loss": 46.0, - "step": 40597 - }, - { - "epoch": 3.104000611655867, - "grad_norm": 0.0033934905659407377, - "learning_rate": 0.00019999524777937022, - "loss": 46.0, - "step": 40598 - }, - { - "epoch": 3.1040770686392567, - "grad_norm": 0.001445717178285122, - "learning_rate": 0.00019999524754520044, - "loss": 46.0, - "step": 40599 - }, - { - "epoch": 3.1041535256226465, - "grad_norm": 0.002149093197658658, - "learning_rate": 0.0001999952473110249, - "loss": 46.0, - "step": 40600 - }, - { - "epoch": 3.104229982606036, - "grad_norm": 0.002808294026181102, - "learning_rate": 0.0001999952470768436, - "loss": 46.0, - "step": 40601 - }, - { - "epoch": 3.104306439589426, - "grad_norm": 0.006283879745751619, - "learning_rate": 0.00019999524684265646, - "loss": 46.0, - "step": 40602 - }, - { - "epoch": 3.1043828965728157, - "grad_norm": 0.003297331975772977, - "learning_rate": 0.00019999524660846362, - "loss": 46.0, - "step": 40603 - }, - { - "epoch": 3.1044593535562055, - "grad_norm": 0.002959726145491004, - "learning_rate": 0.00019999524637426502, - "loss": 46.0, - "step": 40604 - }, - { - "epoch": 3.1045358105395953, - "grad_norm": 0.008936835452914238, - "learning_rate": 0.0001999952461400606, - "loss": 46.0, - "step": 40605 - }, - { - "epoch": 3.104612267522985, - "grad_norm": 0.0012401411077007651, - "learning_rate": 0.00019999524590585044, - "loss": 46.0, - "step": 40606 - }, - { - "epoch": 3.104688724506375, - "grad_norm": 0.0013057544128969312, - "learning_rate": 0.0001999952456716345, - "loss": 46.0, - "step": 40607 - }, - { - "epoch": 3.104765181489764, - "grad_norm": 0.0019892703276127577, - "learning_rate": 0.00019999524543741278, - "loss": 46.0, - "step": 40608 - }, - { - "epoch": 3.104841638473154, - "grad_norm": 0.0016813121037557721, - "learning_rate": 0.00019999524520318532, - "loss": 46.0, - "step": 40609 - }, - { - "epoch": 3.1049180954565436, - "grad_norm": 0.0012155581498518586, - "learning_rate": 0.0001999952449689521, - "loss": 46.0, - "step": 40610 - }, - { - "epoch": 3.1049945524399334, - "grad_norm": 0.00477614626288414, - "learning_rate": 0.00019999524473471308, - "loss": 46.0, - "step": 40611 - }, - { - "epoch": 3.105071009423323, - "grad_norm": 0.0007503448869101703, - "learning_rate": 0.0001999952445004683, - "loss": 46.0, - "step": 40612 - }, - { - "epoch": 3.105147466406713, - "grad_norm": 0.0016403916524723172, - "learning_rate": 0.00019999524426621775, - "loss": 46.0, - "step": 40613 - }, - { - "epoch": 3.1052239233901027, - "grad_norm": 0.0030978990253061056, - "learning_rate": 0.00019999524403196143, - "loss": 46.0, - "step": 40614 - }, - { - "epoch": 3.1053003803734924, - "grad_norm": 0.007497414946556091, - "learning_rate": 0.00019999524379769933, - "loss": 46.0, - "step": 40615 - }, - { - "epoch": 3.105376837356882, - "grad_norm": 0.005553377326577902, - "learning_rate": 0.00019999524356343148, - "loss": 46.0, - "step": 40616 - }, - { - "epoch": 3.105453294340272, - "grad_norm": 0.001269626198336482, - "learning_rate": 0.00019999524332915786, - "loss": 46.0, - "step": 40617 - }, - { - "epoch": 3.1055297513236617, - "grad_norm": 0.002937605371698737, - "learning_rate": 0.00019999524309487847, - "loss": 46.0, - "step": 40618 - }, - { - "epoch": 3.105606208307051, - "grad_norm": 0.0006505833589471877, - "learning_rate": 0.0001999952428605933, - "loss": 46.0, - "step": 40619 - }, - { - "epoch": 3.105682665290441, - "grad_norm": 0.0027430334594100714, - "learning_rate": 0.00019999524262630236, - "loss": 46.0, - "step": 40620 - }, - { - "epoch": 3.1057591222738306, - "grad_norm": 0.003604454919695854, - "learning_rate": 0.00019999524239200565, - "loss": 46.0, - "step": 40621 - }, - { - "epoch": 3.1058355792572203, - "grad_norm": 0.002649285364896059, - "learning_rate": 0.0001999952421577032, - "loss": 46.0, - "step": 40622 - }, - { - "epoch": 3.10591203624061, - "grad_norm": 0.002324703149497509, - "learning_rate": 0.00019999524192339493, - "loss": 46.0, - "step": 40623 - }, - { - "epoch": 3.105988493224, - "grad_norm": 0.002125852508470416, - "learning_rate": 0.00019999524168908095, - "loss": 46.0, - "step": 40624 - }, - { - "epoch": 3.1060649502073896, - "grad_norm": 0.0008553141378797591, - "learning_rate": 0.00019999524145476115, - "loss": 46.0, - "step": 40625 - }, - { - "epoch": 3.1061414071907794, - "grad_norm": 0.0005615248228423297, - "learning_rate": 0.0001999952412204356, - "loss": 46.0, - "step": 40626 - }, - { - "epoch": 3.106217864174169, - "grad_norm": 0.005933989305049181, - "learning_rate": 0.0001999952409861043, - "loss": 46.0, - "step": 40627 - }, - { - "epoch": 3.106294321157559, - "grad_norm": 0.002101338002830744, - "learning_rate": 0.00019999524075176722, - "loss": 46.0, - "step": 40628 - }, - { - "epoch": 3.1063707781409486, - "grad_norm": 0.003139087464660406, - "learning_rate": 0.00019999524051742435, - "loss": 46.0, - "step": 40629 - }, - { - "epoch": 3.106447235124338, - "grad_norm": 0.0021737574134021997, - "learning_rate": 0.00019999524028307573, - "loss": 46.0, - "step": 40630 - }, - { - "epoch": 3.1065236921077277, - "grad_norm": 0.0021124924533069134, - "learning_rate": 0.0001999952400487213, - "loss": 46.0, - "step": 40631 - }, - { - "epoch": 3.1066001490911175, - "grad_norm": 0.003720023902133107, - "learning_rate": 0.00019999523981436117, - "loss": 46.0, - "step": 40632 - }, - { - "epoch": 3.1066766060745072, - "grad_norm": 0.0028368530329316854, - "learning_rate": 0.00019999523957999523, - "loss": 46.0, - "step": 40633 - }, - { - "epoch": 3.106753063057897, - "grad_norm": 0.001612384687177837, - "learning_rate": 0.00019999523934562352, - "loss": 46.0, - "step": 40634 - }, - { - "epoch": 3.1068295200412868, - "grad_norm": 0.0019780707079917192, - "learning_rate": 0.00019999523911124606, - "loss": 46.0, - "step": 40635 - }, - { - "epoch": 3.1069059770246765, - "grad_norm": 0.0016828691586852074, - "learning_rate": 0.0001999952388768628, - "loss": 46.0, - "step": 40636 - }, - { - "epoch": 3.1069824340080663, - "grad_norm": 0.0014801541110500693, - "learning_rate": 0.0001999952386424738, - "loss": 46.0, - "step": 40637 - }, - { - "epoch": 3.107058890991456, - "grad_norm": 0.0014360109344124794, - "learning_rate": 0.00019999523840807904, - "loss": 46.0, - "step": 40638 - }, - { - "epoch": 3.107135347974846, - "grad_norm": 0.0019825368653982878, - "learning_rate": 0.0001999952381736785, - "loss": 46.0, - "step": 40639 - }, - { - "epoch": 3.1072118049582356, - "grad_norm": 0.0019437994342297316, - "learning_rate": 0.00019999523793927217, - "loss": 46.0, - "step": 40640 - }, - { - "epoch": 3.107288261941625, - "grad_norm": 0.002874209778383374, - "learning_rate": 0.0001999952377048601, - "loss": 46.0, - "step": 40641 - }, - { - "epoch": 3.1073647189250146, - "grad_norm": 0.0027536386623978615, - "learning_rate": 0.00019999523747044222, - "loss": 46.0, - "step": 40642 - }, - { - "epoch": 3.1074411759084044, - "grad_norm": 0.0038181981071829796, - "learning_rate": 0.00019999523723601858, - "loss": 46.0, - "step": 40643 - }, - { - "epoch": 3.107517632891794, - "grad_norm": 0.0011698815505951643, - "learning_rate": 0.0001999952370015892, - "loss": 46.0, - "step": 40644 - }, - { - "epoch": 3.107594089875184, - "grad_norm": 0.0035587139427661896, - "learning_rate": 0.00019999523676715404, - "loss": 46.0, - "step": 40645 - }, - { - "epoch": 3.1076705468585737, - "grad_norm": 0.002951499540358782, - "learning_rate": 0.0001999952365327131, - "loss": 46.0, - "step": 40646 - }, - { - "epoch": 3.1077470038419635, - "grad_norm": 0.0009942209580913186, - "learning_rate": 0.0001999952362982664, - "loss": 46.0, - "step": 40647 - }, - { - "epoch": 3.107823460825353, - "grad_norm": 0.0037157328333705664, - "learning_rate": 0.00019999523606381394, - "loss": 46.0, - "step": 40648 - }, - { - "epoch": 3.107899917808743, - "grad_norm": 0.0010292684892192483, - "learning_rate": 0.0001999952358293557, - "loss": 46.0, - "step": 40649 - }, - { - "epoch": 3.1079763747921327, - "grad_norm": 0.0030907404143363237, - "learning_rate": 0.00019999523559489167, - "loss": 46.0, - "step": 40650 - }, - { - "epoch": 3.1080528317755225, - "grad_norm": 0.002052318537607789, - "learning_rate": 0.00019999523536042192, - "loss": 46.0, - "step": 40651 - }, - { - "epoch": 3.108129288758912, - "grad_norm": 0.0037924996577203274, - "learning_rate": 0.00019999523512594637, - "loss": 46.0, - "step": 40652 - }, - { - "epoch": 3.1082057457423016, - "grad_norm": 0.0031955670565366745, - "learning_rate": 0.00019999523489146504, - "loss": 46.0, - "step": 40653 - }, - { - "epoch": 3.1082822027256913, - "grad_norm": 0.001090130303055048, - "learning_rate": 0.00019999523465697797, - "loss": 46.0, - "step": 40654 - }, - { - "epoch": 3.108358659709081, - "grad_norm": 0.0028760689310729504, - "learning_rate": 0.0001999952344224851, - "loss": 46.0, - "step": 40655 - }, - { - "epoch": 3.108435116692471, - "grad_norm": 0.0033978205174207687, - "learning_rate": 0.00019999523418798648, - "loss": 46.0, - "step": 40656 - }, - { - "epoch": 3.1085115736758606, - "grad_norm": 0.0033070696517825127, - "learning_rate": 0.0001999952339534821, - "loss": 46.0, - "step": 40657 - }, - { - "epoch": 3.1085880306592504, - "grad_norm": 0.006725492887198925, - "learning_rate": 0.00019999523371897192, - "loss": 46.0, - "step": 40658 - }, - { - "epoch": 3.10866448764264, - "grad_norm": 0.0012557096779346466, - "learning_rate": 0.00019999523348445598, - "loss": 46.0, - "step": 40659 - }, - { - "epoch": 3.10874094462603, - "grad_norm": 0.0029580688569694757, - "learning_rate": 0.00019999523324993427, - "loss": 46.0, - "step": 40660 - }, - { - "epoch": 3.1088174016094197, - "grad_norm": 0.0006876569823361933, - "learning_rate": 0.00019999523301540681, - "loss": 46.0, - "step": 40661 - }, - { - "epoch": 3.1088938585928094, - "grad_norm": 0.0005584590835496783, - "learning_rate": 0.00019999523278087358, - "loss": 46.0, - "step": 40662 - }, - { - "epoch": 3.1089703155761987, - "grad_norm": 0.002697932068258524, - "learning_rate": 0.00019999523254633458, - "loss": 46.0, - "step": 40663 - }, - { - "epoch": 3.1090467725595885, - "grad_norm": 0.0016806249041110277, - "learning_rate": 0.0001999952323117898, - "loss": 46.0, - "step": 40664 - }, - { - "epoch": 3.1091232295429783, - "grad_norm": 0.0006992561975494027, - "learning_rate": 0.00019999523207723925, - "loss": 46.0, - "step": 40665 - }, - { - "epoch": 3.109199686526368, - "grad_norm": 0.001335737993940711, - "learning_rate": 0.00019999523184268295, - "loss": 46.0, - "step": 40666 - }, - { - "epoch": 3.109276143509758, - "grad_norm": 0.0028802230954170227, - "learning_rate": 0.00019999523160812082, - "loss": 46.0, - "step": 40667 - }, - { - "epoch": 3.1093526004931475, - "grad_norm": 0.003597601316869259, - "learning_rate": 0.000199995231373553, - "loss": 46.0, - "step": 40668 - }, - { - "epoch": 3.1094290574765373, - "grad_norm": 0.0017050397582352161, - "learning_rate": 0.0001999952311389794, - "loss": 46.0, - "step": 40669 - }, - { - "epoch": 3.109505514459927, - "grad_norm": 0.0031466397922486067, - "learning_rate": 0.00019999523090439997, - "loss": 46.0, - "step": 40670 - }, - { - "epoch": 3.109581971443317, - "grad_norm": 0.0017802175134420395, - "learning_rate": 0.00019999523066981483, - "loss": 46.0, - "step": 40671 - }, - { - "epoch": 3.1096584284267066, - "grad_norm": 0.0017646894557401538, - "learning_rate": 0.0001999952304352239, - "loss": 46.0, - "step": 40672 - }, - { - "epoch": 3.1097348854100964, - "grad_norm": 0.002124791033565998, - "learning_rate": 0.0001999952302006272, - "loss": 46.0, - "step": 40673 - }, - { - "epoch": 3.1098113423934857, - "grad_norm": 0.001128863194026053, - "learning_rate": 0.00019999522996602472, - "loss": 46.0, - "step": 40674 - }, - { - "epoch": 3.1098877993768754, - "grad_norm": 0.002025673631578684, - "learning_rate": 0.0001999952297314165, - "loss": 46.0, - "step": 40675 - }, - { - "epoch": 3.109964256360265, - "grad_norm": 0.002854999853298068, - "learning_rate": 0.00019999522949680246, - "loss": 46.0, - "step": 40676 - }, - { - "epoch": 3.110040713343655, - "grad_norm": 0.0013628213200718164, - "learning_rate": 0.00019999522926218268, - "loss": 46.0, - "step": 40677 - }, - { - "epoch": 3.1101171703270447, - "grad_norm": 0.0012093354016542435, - "learning_rate": 0.00019999522902755716, - "loss": 46.0, - "step": 40678 - }, - { - "epoch": 3.1101936273104345, - "grad_norm": 0.005360200069844723, - "learning_rate": 0.00019999522879292583, - "loss": 46.0, - "step": 40679 - }, - { - "epoch": 3.1102700842938242, - "grad_norm": 0.0011868528090417385, - "learning_rate": 0.00019999522855828876, - "loss": 46.0, - "step": 40680 - }, - { - "epoch": 3.110346541277214, - "grad_norm": 0.0019066107925027609, - "learning_rate": 0.0001999952283236459, - "loss": 46.0, - "step": 40681 - }, - { - "epoch": 3.1104229982606038, - "grad_norm": 0.004260065965354443, - "learning_rate": 0.00019999522808899727, - "loss": 46.0, - "step": 40682 - }, - { - "epoch": 3.1104994552439935, - "grad_norm": 0.0013711127685382962, - "learning_rate": 0.00019999522785434288, - "loss": 46.0, - "step": 40683 - }, - { - "epoch": 3.1105759122273833, - "grad_norm": 0.0011432197643443942, - "learning_rate": 0.00019999522761968272, - "loss": 46.0, - "step": 40684 - }, - { - "epoch": 3.1106523692107726, - "grad_norm": 0.0011039115488529205, - "learning_rate": 0.00019999522738501678, - "loss": 46.0, - "step": 40685 - }, - { - "epoch": 3.1107288261941624, - "grad_norm": 0.005655019544064999, - "learning_rate": 0.0001999952271503451, - "loss": 46.0, - "step": 40686 - }, - { - "epoch": 3.110805283177552, - "grad_norm": 0.0069869086146354675, - "learning_rate": 0.0001999952269156676, - "loss": 46.0, - "step": 40687 - }, - { - "epoch": 3.110881740160942, - "grad_norm": 0.002772802719846368, - "learning_rate": 0.0001999952266809844, - "loss": 46.0, - "step": 40688 - }, - { - "epoch": 3.1109581971443316, - "grad_norm": 0.0025198734365403652, - "learning_rate": 0.00019999522644629538, - "loss": 46.0, - "step": 40689 - }, - { - "epoch": 3.1110346541277214, - "grad_norm": 0.0028384372126311064, - "learning_rate": 0.0001999952262116006, - "loss": 46.0, - "step": 40690 - }, - { - "epoch": 3.111111111111111, - "grad_norm": 0.0017654254334047437, - "learning_rate": 0.00019999522597690005, - "loss": 46.0, - "step": 40691 - }, - { - "epoch": 3.111187568094501, - "grad_norm": 0.0012556869769468904, - "learning_rate": 0.00019999522574219375, - "loss": 46.0, - "step": 40692 - }, - { - "epoch": 3.1112640250778907, - "grad_norm": 0.0017929719761013985, - "learning_rate": 0.00019999522550748168, - "loss": 46.0, - "step": 40693 - }, - { - "epoch": 3.1113404820612804, - "grad_norm": 0.0007313725654967129, - "learning_rate": 0.0001999952252727638, - "loss": 46.0, - "step": 40694 - }, - { - "epoch": 3.11141693904467, - "grad_norm": 0.007251699920743704, - "learning_rate": 0.0001999952250380402, - "loss": 46.0, - "step": 40695 - }, - { - "epoch": 3.1114933960280595, - "grad_norm": 0.003814192721620202, - "learning_rate": 0.0001999952248033108, - "loss": 46.0, - "step": 40696 - }, - { - "epoch": 3.1115698530114493, - "grad_norm": 0.003523855237290263, - "learning_rate": 0.00019999522456857563, - "loss": 46.0, - "step": 40697 - }, - { - "epoch": 3.111646309994839, - "grad_norm": 0.0033209652174264193, - "learning_rate": 0.0001999952243338347, - "loss": 46.0, - "step": 40698 - }, - { - "epoch": 3.111722766978229, - "grad_norm": 0.0007878456963226199, - "learning_rate": 0.000199995224099088, - "loss": 46.0, - "step": 40699 - }, - { - "epoch": 3.1117992239616186, - "grad_norm": 0.0023557180538773537, - "learning_rate": 0.00019999522386433553, - "loss": 46.0, - "step": 40700 - }, - { - "epoch": 3.1118756809450083, - "grad_norm": 0.0024816084187477827, - "learning_rate": 0.0001999952236295773, - "loss": 46.0, - "step": 40701 - }, - { - "epoch": 3.111952137928398, - "grad_norm": 0.006478719413280487, - "learning_rate": 0.0001999952233948133, - "loss": 46.0, - "step": 40702 - }, - { - "epoch": 3.112028594911788, - "grad_norm": 0.002110905945301056, - "learning_rate": 0.00019999522316004354, - "loss": 46.0, - "step": 40703 - }, - { - "epoch": 3.1121050518951776, - "grad_norm": 0.0008606757037341595, - "learning_rate": 0.00019999522292526797, - "loss": 46.0, - "step": 40704 - }, - { - "epoch": 3.1121815088785674, - "grad_norm": 0.00407033646479249, - "learning_rate": 0.00019999522269048664, - "loss": 46.0, - "step": 40705 - }, - { - "epoch": 3.112257965861957, - "grad_norm": 0.0019873501732945442, - "learning_rate": 0.00019999522245569957, - "loss": 46.0, - "step": 40706 - }, - { - "epoch": 3.1123344228453464, - "grad_norm": 0.0008969728951342404, - "learning_rate": 0.00019999522222090673, - "loss": 46.0, - "step": 40707 - }, - { - "epoch": 3.112410879828736, - "grad_norm": 0.002647097222507, - "learning_rate": 0.00019999522198610808, - "loss": 46.0, - "step": 40708 - }, - { - "epoch": 3.112487336812126, - "grad_norm": 0.00447895796969533, - "learning_rate": 0.00019999522175130372, - "loss": 46.0, - "step": 40709 - }, - { - "epoch": 3.1125637937955157, - "grad_norm": 0.0007037890027277172, - "learning_rate": 0.00019999522151649358, - "loss": 46.0, - "step": 40710 - }, - { - "epoch": 3.1126402507789055, - "grad_norm": 0.008124771527945995, - "learning_rate": 0.00019999522128167762, - "loss": 46.0, - "step": 40711 - }, - { - "epoch": 3.1127167077622953, - "grad_norm": 0.002086327178403735, - "learning_rate": 0.00019999522104685593, - "loss": 46.0, - "step": 40712 - }, - { - "epoch": 3.112793164745685, - "grad_norm": 0.0023307630326598883, - "learning_rate": 0.00019999522081202845, - "loss": 46.0, - "step": 40713 - }, - { - "epoch": 3.1128696217290748, - "grad_norm": 0.001804125146009028, - "learning_rate": 0.00019999522057719525, - "loss": 46.0, - "step": 40714 - }, - { - "epoch": 3.1129460787124645, - "grad_norm": 0.0064630708657205105, - "learning_rate": 0.00019999522034235622, - "loss": 46.0, - "step": 40715 - }, - { - "epoch": 3.1130225356958543, - "grad_norm": 0.002073155250400305, - "learning_rate": 0.00019999522010751144, - "loss": 46.0, - "step": 40716 - }, - { - "epoch": 3.1130989926792436, - "grad_norm": 0.0022630509920418262, - "learning_rate": 0.0001999952198726609, - "loss": 46.0, - "step": 40717 - }, - { - "epoch": 3.1131754496626334, - "grad_norm": 0.0018022538861259818, - "learning_rate": 0.0001999952196378046, - "loss": 46.0, - "step": 40718 - }, - { - "epoch": 3.113251906646023, - "grad_norm": 0.0019590011797845364, - "learning_rate": 0.0001999952194029425, - "loss": 46.0, - "step": 40719 - }, - { - "epoch": 3.113328363629413, - "grad_norm": 0.0020214931573718786, - "learning_rate": 0.00019999521916807465, - "loss": 46.0, - "step": 40720 - }, - { - "epoch": 3.1134048206128027, - "grad_norm": 0.004375392571091652, - "learning_rate": 0.00019999521893320103, - "loss": 46.0, - "step": 40721 - }, - { - "epoch": 3.1134812775961924, - "grad_norm": 0.0005054162465967238, - "learning_rate": 0.00019999521869832164, - "loss": 46.0, - "step": 40722 - }, - { - "epoch": 3.113557734579582, - "grad_norm": 0.0036657429300248623, - "learning_rate": 0.00019999521846343648, - "loss": 46.0, - "step": 40723 - }, - { - "epoch": 3.113634191562972, - "grad_norm": 0.002865151735022664, - "learning_rate": 0.00019999521822854557, - "loss": 46.0, - "step": 40724 - }, - { - "epoch": 3.1137106485463617, - "grad_norm": 0.0017647017957642674, - "learning_rate": 0.00019999521799364886, - "loss": 46.0, - "step": 40725 - }, - { - "epoch": 3.1137871055297515, - "grad_norm": 0.007046884391456842, - "learning_rate": 0.00019999521775874638, - "loss": 46.0, - "step": 40726 - }, - { - "epoch": 3.1138635625131412, - "grad_norm": 0.0011433225590735674, - "learning_rate": 0.00019999521752383817, - "loss": 46.0, - "step": 40727 - }, - { - "epoch": 3.113940019496531, - "grad_norm": 0.0022675630170851946, - "learning_rate": 0.00019999521728892417, - "loss": 46.0, - "step": 40728 - }, - { - "epoch": 3.1140164764799203, - "grad_norm": 0.0009418230038136244, - "learning_rate": 0.0001999952170540044, - "loss": 46.0, - "step": 40729 - }, - { - "epoch": 3.11409293346331, - "grad_norm": 0.0017789386911317706, - "learning_rate": 0.00019999521681907884, - "loss": 46.0, - "step": 40730 - }, - { - "epoch": 3.1141693904467, - "grad_norm": 0.0031167122069746256, - "learning_rate": 0.00019999521658414752, - "loss": 46.0, - "step": 40731 - }, - { - "epoch": 3.1142458474300896, - "grad_norm": 0.0010709302732720971, - "learning_rate": 0.00019999521634921042, - "loss": 46.0, - "step": 40732 - }, - { - "epoch": 3.1143223044134793, - "grad_norm": 0.002567109651863575, - "learning_rate": 0.00019999521611426758, - "loss": 46.0, - "step": 40733 - }, - { - "epoch": 3.114398761396869, - "grad_norm": 0.0034765906166285276, - "learning_rate": 0.00019999521587931896, - "loss": 46.0, - "step": 40734 - }, - { - "epoch": 3.114475218380259, - "grad_norm": 0.001815799972973764, - "learning_rate": 0.00019999521564436457, - "loss": 46.0, - "step": 40735 - }, - { - "epoch": 3.1145516753636486, - "grad_norm": 0.002282122615724802, - "learning_rate": 0.00019999521540940444, - "loss": 46.0, - "step": 40736 - }, - { - "epoch": 3.1146281323470384, - "grad_norm": 0.0017280846368521452, - "learning_rate": 0.0001999952151744385, - "loss": 46.0, - "step": 40737 - }, - { - "epoch": 3.114704589330428, - "grad_norm": 0.004034288693219423, - "learning_rate": 0.0001999952149394668, - "loss": 46.0, - "step": 40738 - }, - { - "epoch": 3.1147810463138175, - "grad_norm": 0.0018747354624792933, - "learning_rate": 0.0001999952147044893, - "loss": 46.0, - "step": 40739 - }, - { - "epoch": 3.1148575032972072, - "grad_norm": 0.005246356129646301, - "learning_rate": 0.00019999521446950608, - "loss": 46.0, - "step": 40740 - }, - { - "epoch": 3.114933960280597, - "grad_norm": 0.001763629843480885, - "learning_rate": 0.00019999521423451707, - "loss": 46.0, - "step": 40741 - }, - { - "epoch": 3.1150104172639868, - "grad_norm": 0.0014435260090976954, - "learning_rate": 0.00019999521399952232, - "loss": 46.0, - "step": 40742 - }, - { - "epoch": 3.1150868742473765, - "grad_norm": 0.0017951888730749488, - "learning_rate": 0.0001999952137645218, - "loss": 46.0, - "step": 40743 - }, - { - "epoch": 3.1151633312307663, - "grad_norm": 0.0013720939168706536, - "learning_rate": 0.00019999521352951548, - "loss": 46.0, - "step": 40744 - }, - { - "epoch": 3.115239788214156, - "grad_norm": 0.0026262635365128517, - "learning_rate": 0.00019999521329450338, - "loss": 46.0, - "step": 40745 - }, - { - "epoch": 3.115316245197546, - "grad_norm": 0.008479884825646877, - "learning_rate": 0.00019999521305948554, - "loss": 46.0, - "step": 40746 - }, - { - "epoch": 3.1153927021809356, - "grad_norm": 0.0025631135795265436, - "learning_rate": 0.00019999521282446192, - "loss": 46.0, - "step": 40747 - }, - { - "epoch": 3.1154691591643253, - "grad_norm": 0.00232143048197031, - "learning_rate": 0.00019999521258943253, - "loss": 46.0, - "step": 40748 - }, - { - "epoch": 3.115545616147715, - "grad_norm": 0.0007593531627207994, - "learning_rate": 0.00019999521235439737, - "loss": 46.0, - "step": 40749 - }, - { - "epoch": 3.115622073131105, - "grad_norm": 0.0025446987710893154, - "learning_rate": 0.00019999521211935646, - "loss": 46.0, - "step": 40750 - }, - { - "epoch": 3.115698530114494, - "grad_norm": 0.0024156621657311916, - "learning_rate": 0.00019999521188430975, - "loss": 46.0, - "step": 40751 - }, - { - "epoch": 3.115774987097884, - "grad_norm": 0.0049401200376451015, - "learning_rate": 0.0001999952116492573, - "loss": 46.0, - "step": 40752 - }, - { - "epoch": 3.1158514440812737, - "grad_norm": 0.0026664440520107746, - "learning_rate": 0.00019999521141419907, - "loss": 46.0, - "step": 40753 - }, - { - "epoch": 3.1159279010646634, - "grad_norm": 0.0008086326415650547, - "learning_rate": 0.00019999521117913504, - "loss": 46.0, - "step": 40754 - }, - { - "epoch": 3.116004358048053, - "grad_norm": 0.0016082170186564326, - "learning_rate": 0.00019999521094406526, - "loss": 46.0, - "step": 40755 - }, - { - "epoch": 3.116080815031443, - "grad_norm": 0.008245814591646194, - "learning_rate": 0.00019999521070898974, - "loss": 46.0, - "step": 40756 - }, - { - "epoch": 3.1161572720148327, - "grad_norm": 0.0016008797101676464, - "learning_rate": 0.00019999521047390841, - "loss": 46.0, - "step": 40757 - }, - { - "epoch": 3.1162337289982225, - "grad_norm": 0.0031871472019702196, - "learning_rate": 0.00019999521023882135, - "loss": 46.0, - "step": 40758 - }, - { - "epoch": 3.1163101859816122, - "grad_norm": 0.003929438069462776, - "learning_rate": 0.0001999952100037285, - "loss": 46.0, - "step": 40759 - }, - { - "epoch": 3.116386642965002, - "grad_norm": 0.0037591878790408373, - "learning_rate": 0.00019999520976862986, - "loss": 46.0, - "step": 40760 - }, - { - "epoch": 3.1164630999483913, - "grad_norm": 0.0014754737494513392, - "learning_rate": 0.0001999952095335255, - "loss": 46.0, - "step": 40761 - }, - { - "epoch": 3.116539556931781, - "grad_norm": 0.0016094903694465756, - "learning_rate": 0.00019999520929841534, - "loss": 46.0, - "step": 40762 - }, - { - "epoch": 3.116616013915171, - "grad_norm": 0.00698675774037838, - "learning_rate": 0.0001999952090632994, - "loss": 46.0, - "step": 40763 - }, - { - "epoch": 3.1166924708985606, - "grad_norm": 0.0006853318773210049, - "learning_rate": 0.00019999520882817772, - "loss": 46.0, - "step": 40764 - }, - { - "epoch": 3.1167689278819504, - "grad_norm": 0.001623383373953402, - "learning_rate": 0.00019999520859305023, - "loss": 46.0, - "step": 40765 - }, - { - "epoch": 3.11684538486534, - "grad_norm": 0.002321155508980155, - "learning_rate": 0.000199995208357917, - "loss": 46.0, - "step": 40766 - }, - { - "epoch": 3.11692184184873, - "grad_norm": 0.0050252932123839855, - "learning_rate": 0.000199995208122778, - "loss": 46.0, - "step": 40767 - }, - { - "epoch": 3.1169982988321197, - "grad_norm": 0.0022112191654741764, - "learning_rate": 0.00019999520788763323, - "loss": 46.0, - "step": 40768 - }, - { - "epoch": 3.1170747558155094, - "grad_norm": 0.0007991396123543382, - "learning_rate": 0.0001999952076524827, - "loss": 46.0, - "step": 40769 - }, - { - "epoch": 3.117151212798899, - "grad_norm": 0.0017596872057765722, - "learning_rate": 0.00019999520741732638, - "loss": 46.0, - "step": 40770 - }, - { - "epoch": 3.117227669782289, - "grad_norm": 0.0005868226289749146, - "learning_rate": 0.00019999520718216429, - "loss": 46.0, - "step": 40771 - }, - { - "epoch": 3.1173041267656783, - "grad_norm": 0.0016553538152948022, - "learning_rate": 0.00019999520694699644, - "loss": 46.0, - "step": 40772 - }, - { - "epoch": 3.117380583749068, - "grad_norm": 0.001902634627185762, - "learning_rate": 0.00019999520671182283, - "loss": 46.0, - "step": 40773 - }, - { - "epoch": 3.1174570407324578, - "grad_norm": 0.002324906410649419, - "learning_rate": 0.00019999520647664344, - "loss": 46.0, - "step": 40774 - }, - { - "epoch": 3.1175334977158475, - "grad_norm": 0.004304792732000351, - "learning_rate": 0.00019999520624145828, - "loss": 46.0, - "step": 40775 - }, - { - "epoch": 3.1176099546992373, - "grad_norm": 0.0024911880027502775, - "learning_rate": 0.00019999520600626737, - "loss": 46.0, - "step": 40776 - }, - { - "epoch": 3.117686411682627, - "grad_norm": 0.002355963457375765, - "learning_rate": 0.00019999520577107066, - "loss": 46.0, - "step": 40777 - }, - { - "epoch": 3.117762868666017, - "grad_norm": 0.004058195743709803, - "learning_rate": 0.0001999952055358682, - "loss": 46.0, - "step": 40778 - }, - { - "epoch": 3.1178393256494066, - "grad_norm": 0.001629286096431315, - "learning_rate": 0.00019999520530065998, - "loss": 46.0, - "step": 40779 - }, - { - "epoch": 3.1179157826327963, - "grad_norm": 0.003608867060393095, - "learning_rate": 0.00019999520506544597, - "loss": 46.0, - "step": 40780 - }, - { - "epoch": 3.117992239616186, - "grad_norm": 0.006915109232068062, - "learning_rate": 0.0001999952048302262, - "loss": 46.0, - "step": 40781 - }, - { - "epoch": 3.118068696599576, - "grad_norm": 0.0018111106473952532, - "learning_rate": 0.00019999520459500065, - "loss": 46.0, - "step": 40782 - }, - { - "epoch": 3.118145153582965, - "grad_norm": 0.0013533896999433637, - "learning_rate": 0.00019999520435976935, - "loss": 46.0, - "step": 40783 - }, - { - "epoch": 3.118221610566355, - "grad_norm": 0.001836710493080318, - "learning_rate": 0.00019999520412453228, - "loss": 46.0, - "step": 40784 - }, - { - "epoch": 3.1182980675497447, - "grad_norm": 0.000987344072200358, - "learning_rate": 0.00019999520388928942, - "loss": 46.0, - "step": 40785 - }, - { - "epoch": 3.1183745245331345, - "grad_norm": 0.005836552008986473, - "learning_rate": 0.0001999952036540408, - "loss": 46.0, - "step": 40786 - }, - { - "epoch": 3.1184509815165242, - "grad_norm": 0.0006390998023562133, - "learning_rate": 0.0001999952034187864, - "loss": 46.0, - "step": 40787 - }, - { - "epoch": 3.118527438499914, - "grad_norm": 0.007171757984906435, - "learning_rate": 0.00019999520318352628, - "loss": 46.0, - "step": 40788 - }, - { - "epoch": 3.1186038954833037, - "grad_norm": 0.0007172286277636886, - "learning_rate": 0.00019999520294826034, - "loss": 46.0, - "step": 40789 - }, - { - "epoch": 3.1186803524666935, - "grad_norm": 0.002585702110081911, - "learning_rate": 0.00019999520271298863, - "loss": 46.0, - "step": 40790 - }, - { - "epoch": 3.1187568094500833, - "grad_norm": 0.0023589550983160734, - "learning_rate": 0.0001999952024777112, - "loss": 46.0, - "step": 40791 - }, - { - "epoch": 3.118833266433473, - "grad_norm": 0.002266060560941696, - "learning_rate": 0.00019999520224242795, - "loss": 46.0, - "step": 40792 - }, - { - "epoch": 3.118909723416863, - "grad_norm": 0.0011684981873258948, - "learning_rate": 0.00019999520200713895, - "loss": 46.0, - "step": 40793 - }, - { - "epoch": 3.118986180400252, - "grad_norm": 0.0021452929358929396, - "learning_rate": 0.0001999952017718442, - "loss": 46.0, - "step": 40794 - }, - { - "epoch": 3.119062637383642, - "grad_norm": 0.0008237732108682394, - "learning_rate": 0.00019999520153654365, - "loss": 46.0, - "step": 40795 - }, - { - "epoch": 3.1191390943670316, - "grad_norm": 0.0013906615786254406, - "learning_rate": 0.00019999520130123736, - "loss": 46.0, - "step": 40796 - }, - { - "epoch": 3.1192155513504214, - "grad_norm": 0.001321022748015821, - "learning_rate": 0.00019999520106592526, - "loss": 46.0, - "step": 40797 - }, - { - "epoch": 3.119292008333811, - "grad_norm": 0.002199542010203004, - "learning_rate": 0.00019999520083060742, - "loss": 46.0, - "step": 40798 - }, - { - "epoch": 3.119368465317201, - "grad_norm": 0.001273911795578897, - "learning_rate": 0.00019999520059528378, - "loss": 46.0, - "step": 40799 - }, - { - "epoch": 3.1194449223005907, - "grad_norm": 0.001626364653930068, - "learning_rate": 0.00019999520035995442, - "loss": 46.0, - "step": 40800 - }, - { - "epoch": 3.1195213792839804, - "grad_norm": 0.0016366582131013274, - "learning_rate": 0.00019999520012461925, - "loss": 46.0, - "step": 40801 - }, - { - "epoch": 3.11959783626737, - "grad_norm": 0.0027048932388424873, - "learning_rate": 0.00019999519988927832, - "loss": 46.0, - "step": 40802 - }, - { - "epoch": 3.11967429325076, - "grad_norm": 0.0011805471731349826, - "learning_rate": 0.00019999519965393164, - "loss": 46.0, - "step": 40803 - }, - { - "epoch": 3.1197507502341497, - "grad_norm": 0.0026091975159943104, - "learning_rate": 0.00019999519941857918, - "loss": 46.0, - "step": 40804 - }, - { - "epoch": 3.119827207217539, - "grad_norm": 0.0029221978038549423, - "learning_rate": 0.00019999519918322093, - "loss": 46.0, - "step": 40805 - }, - { - "epoch": 3.119903664200929, - "grad_norm": 0.001681748777627945, - "learning_rate": 0.00019999519894785695, - "loss": 46.0, - "step": 40806 - }, - { - "epoch": 3.1199801211843186, - "grad_norm": 0.003822098020464182, - "learning_rate": 0.00019999519871248718, - "loss": 46.0, - "step": 40807 - }, - { - "epoch": 3.1200565781677083, - "grad_norm": 0.003669238882139325, - "learning_rate": 0.00019999519847711163, - "loss": 46.0, - "step": 40808 - }, - { - "epoch": 3.120133035151098, - "grad_norm": 0.002753482200205326, - "learning_rate": 0.0001999951982417303, - "loss": 46.0, - "step": 40809 - }, - { - "epoch": 3.120209492134488, - "grad_norm": 0.002198306145146489, - "learning_rate": 0.00019999519800634327, - "loss": 46.0, - "step": 40810 - }, - { - "epoch": 3.1202859491178776, - "grad_norm": 0.00133318523876369, - "learning_rate": 0.0001999951977709504, - "loss": 46.0, - "step": 40811 - }, - { - "epoch": 3.1203624061012674, - "grad_norm": 0.002639633836224675, - "learning_rate": 0.00019999519753555178, - "loss": 46.0, - "step": 40812 - }, - { - "epoch": 3.120438863084657, - "grad_norm": 0.0007897822069935501, - "learning_rate": 0.0001999951973001474, - "loss": 46.0, - "step": 40813 - }, - { - "epoch": 3.120515320068047, - "grad_norm": 0.009138131514191628, - "learning_rate": 0.00019999519706473726, - "loss": 46.0, - "step": 40814 - }, - { - "epoch": 3.1205917770514366, - "grad_norm": 0.001905362936668098, - "learning_rate": 0.0001999951968293213, - "loss": 46.0, - "step": 40815 - }, - { - "epoch": 3.120668234034826, - "grad_norm": 0.0012523388722911477, - "learning_rate": 0.00019999519659389962, - "loss": 46.0, - "step": 40816 - }, - { - "epoch": 3.1207446910182157, - "grad_norm": 0.0010521799558773637, - "learning_rate": 0.00019999519635847216, - "loss": 46.0, - "step": 40817 - }, - { - "epoch": 3.1208211480016055, - "grad_norm": 0.0035618990659713745, - "learning_rate": 0.00019999519612303894, - "loss": 46.0, - "step": 40818 - }, - { - "epoch": 3.1208976049849952, - "grad_norm": 0.0010985324624925852, - "learning_rate": 0.00019999519588759994, - "loss": 46.0, - "step": 40819 - }, - { - "epoch": 3.120974061968385, - "grad_norm": 0.0011366363614797592, - "learning_rate": 0.00019999519565215516, - "loss": 46.0, - "step": 40820 - }, - { - "epoch": 3.1210505189517748, - "grad_norm": 0.004237969405949116, - "learning_rate": 0.0001999951954167046, - "loss": 46.0, - "step": 40821 - }, - { - "epoch": 3.1211269759351645, - "grad_norm": 0.0010721695143729448, - "learning_rate": 0.00019999519518124832, - "loss": 46.0, - "step": 40822 - }, - { - "epoch": 3.1212034329185543, - "grad_norm": 0.0016065388917922974, - "learning_rate": 0.00019999519494578625, - "loss": 46.0, - "step": 40823 - }, - { - "epoch": 3.121279889901944, - "grad_norm": 0.001234628725796938, - "learning_rate": 0.0001999951947103184, - "loss": 46.0, - "step": 40824 - }, - { - "epoch": 3.121356346885334, - "grad_norm": 0.0027569569647312164, - "learning_rate": 0.0001999951944748448, - "loss": 46.0, - "step": 40825 - }, - { - "epoch": 3.1214328038687236, - "grad_norm": 0.005221212282776833, - "learning_rate": 0.0001999951942393654, - "loss": 46.0, - "step": 40826 - }, - { - "epoch": 3.121509260852113, - "grad_norm": 0.0014302755007520318, - "learning_rate": 0.00019999519400388025, - "loss": 46.0, - "step": 40827 - }, - { - "epoch": 3.1215857178355026, - "grad_norm": 0.010070730932056904, - "learning_rate": 0.0001999951937683893, - "loss": 46.0, - "step": 40828 - }, - { - "epoch": 3.1216621748188924, - "grad_norm": 0.002062923740595579, - "learning_rate": 0.00019999519353289263, - "loss": 46.0, - "step": 40829 - }, - { - "epoch": 3.121738631802282, - "grad_norm": 0.0014156471006572247, - "learning_rate": 0.00019999519329739015, - "loss": 46.0, - "step": 40830 - }, - { - "epoch": 3.121815088785672, - "grad_norm": 0.0049782805144786835, - "learning_rate": 0.00019999519306188192, - "loss": 46.0, - "step": 40831 - }, - { - "epoch": 3.1218915457690617, - "grad_norm": 0.0005933191860094666, - "learning_rate": 0.00019999519282636795, - "loss": 46.0, - "step": 40832 - }, - { - "epoch": 3.1219680027524515, - "grad_norm": 0.001802325714379549, - "learning_rate": 0.00019999519259084818, - "loss": 46.0, - "step": 40833 - }, - { - "epoch": 3.122044459735841, - "grad_norm": 0.0011777272447943687, - "learning_rate": 0.00019999519235532263, - "loss": 46.0, - "step": 40834 - }, - { - "epoch": 3.122120916719231, - "grad_norm": 0.0014607589691877365, - "learning_rate": 0.00019999519211979133, - "loss": 46.0, - "step": 40835 - }, - { - "epoch": 3.1221973737026207, - "grad_norm": 0.0022352810483425856, - "learning_rate": 0.00019999519188425424, - "loss": 46.0, - "step": 40836 - }, - { - "epoch": 3.1222738306860105, - "grad_norm": 0.0024258787743747234, - "learning_rate": 0.0001999951916487114, - "loss": 46.0, - "step": 40837 - }, - { - "epoch": 3.1223502876694, - "grad_norm": 0.0013899493496865034, - "learning_rate": 0.0001999951914131628, - "loss": 46.0, - "step": 40838 - }, - { - "epoch": 3.1224267446527896, - "grad_norm": 0.0034314440563321114, - "learning_rate": 0.0001999951911776084, - "loss": 46.0, - "step": 40839 - }, - { - "epoch": 3.1225032016361793, - "grad_norm": 0.0017810367280617356, - "learning_rate": 0.00019999519094204826, - "loss": 46.0, - "step": 40840 - }, - { - "epoch": 3.122579658619569, - "grad_norm": 0.0011429385049268603, - "learning_rate": 0.00019999519070648233, - "loss": 46.0, - "step": 40841 - }, - { - "epoch": 3.122656115602959, - "grad_norm": 0.00325586274266243, - "learning_rate": 0.00019999519047091065, - "loss": 46.0, - "step": 40842 - }, - { - "epoch": 3.1227325725863486, - "grad_norm": 0.008938724175095558, - "learning_rate": 0.0001999951902353332, - "loss": 46.0, - "step": 40843 - }, - { - "epoch": 3.1228090295697384, - "grad_norm": 0.0015570831019431353, - "learning_rate": 0.00019999518999974997, - "loss": 46.0, - "step": 40844 - }, - { - "epoch": 3.122885486553128, - "grad_norm": 0.004200282972306013, - "learning_rate": 0.00019999518976416094, - "loss": 46.0, - "step": 40845 - }, - { - "epoch": 3.122961943536518, - "grad_norm": 0.00270732119679451, - "learning_rate": 0.0001999951895285662, - "loss": 46.0, - "step": 40846 - }, - { - "epoch": 3.1230384005199077, - "grad_norm": 0.0020568890031427145, - "learning_rate": 0.00019999518929296565, - "loss": 46.0, - "step": 40847 - }, - { - "epoch": 3.123114857503297, - "grad_norm": 0.0012429124908521771, - "learning_rate": 0.00019999518905735935, - "loss": 46.0, - "step": 40848 - }, - { - "epoch": 3.1231913144866867, - "grad_norm": 0.0013417612062767148, - "learning_rate": 0.00019999518882174729, - "loss": 46.0, - "step": 40849 - }, - { - "epoch": 3.1232677714700765, - "grad_norm": 0.005240780767053366, - "learning_rate": 0.00019999518858612942, - "loss": 46.0, - "step": 40850 - }, - { - "epoch": 3.1233442284534663, - "grad_norm": 0.002773192012682557, - "learning_rate": 0.0001999951883505058, - "loss": 46.0, - "step": 40851 - }, - { - "epoch": 3.123420685436856, - "grad_norm": 0.001967603573575616, - "learning_rate": 0.00019999518811487642, - "loss": 46.0, - "step": 40852 - }, - { - "epoch": 3.123497142420246, - "grad_norm": 0.0004903913359157741, - "learning_rate": 0.00019999518787924128, - "loss": 46.0, - "step": 40853 - }, - { - "epoch": 3.1235735994036355, - "grad_norm": 0.00274214381352067, - "learning_rate": 0.00019999518764360038, - "loss": 46.0, - "step": 40854 - }, - { - "epoch": 3.1236500563870253, - "grad_norm": 0.0007771796081215143, - "learning_rate": 0.00019999518740795367, - "loss": 46.0, - "step": 40855 - }, - { - "epoch": 3.123726513370415, - "grad_norm": 0.004473615903407335, - "learning_rate": 0.0001999951871723012, - "loss": 46.0, - "step": 40856 - }, - { - "epoch": 3.123802970353805, - "grad_norm": 0.0031921842601150274, - "learning_rate": 0.000199995186936643, - "loss": 46.0, - "step": 40857 - }, - { - "epoch": 3.1238794273371946, - "grad_norm": 0.003890521125867963, - "learning_rate": 0.00019999518670097896, - "loss": 46.0, - "step": 40858 - }, - { - "epoch": 3.1239558843205844, - "grad_norm": 0.0012546718353405595, - "learning_rate": 0.00019999518646530922, - "loss": 46.0, - "step": 40859 - }, - { - "epoch": 3.1240323413039737, - "grad_norm": 0.0017951064510270953, - "learning_rate": 0.0001999951862296337, - "loss": 46.0, - "step": 40860 - }, - { - "epoch": 3.1241087982873634, - "grad_norm": 0.0006624902016483247, - "learning_rate": 0.00019999518599395238, - "loss": 46.0, - "step": 40861 - }, - { - "epoch": 3.124185255270753, - "grad_norm": 0.0021080849692225456, - "learning_rate": 0.00019999518575826528, - "loss": 46.0, - "step": 40862 - }, - { - "epoch": 3.124261712254143, - "grad_norm": 0.0017199330031871796, - "learning_rate": 0.00019999518552257244, - "loss": 46.0, - "step": 40863 - }, - { - "epoch": 3.1243381692375327, - "grad_norm": 0.0022995760664343834, - "learning_rate": 0.00019999518528687386, - "loss": 46.0, - "step": 40864 - }, - { - "epoch": 3.1244146262209225, - "grad_norm": 0.008426333777606487, - "learning_rate": 0.00019999518505116944, - "loss": 46.0, - "step": 40865 - }, - { - "epoch": 3.1244910832043122, - "grad_norm": 0.0014875649940222502, - "learning_rate": 0.0001999951848154593, - "loss": 46.0, - "step": 40866 - }, - { - "epoch": 3.124567540187702, - "grad_norm": 0.000963767000939697, - "learning_rate": 0.00019999518457974337, - "loss": 46.0, - "step": 40867 - }, - { - "epoch": 3.1246439971710918, - "grad_norm": 0.0027432607021182775, - "learning_rate": 0.0001999951843440217, - "loss": 46.0, - "step": 40868 - }, - { - "epoch": 3.1247204541544815, - "grad_norm": 0.0024546433705836535, - "learning_rate": 0.00019999518410829424, - "loss": 46.0, - "step": 40869 - }, - { - "epoch": 3.124796911137871, - "grad_norm": 0.0009702792740426958, - "learning_rate": 0.00019999518387256102, - "loss": 46.0, - "step": 40870 - }, - { - "epoch": 3.1248733681212606, - "grad_norm": 0.0008586355252191424, - "learning_rate": 0.00019999518363682202, - "loss": 46.0, - "step": 40871 - }, - { - "epoch": 3.1249498251046504, - "grad_norm": 0.002300516003742814, - "learning_rate": 0.00019999518340107724, - "loss": 46.0, - "step": 40872 - }, - { - "epoch": 3.12502628208804, - "grad_norm": 0.0014963094145059586, - "learning_rate": 0.0001999951831653267, - "loss": 46.0, - "step": 40873 - }, - { - "epoch": 3.12510273907143, - "grad_norm": 0.0008829200523905456, - "learning_rate": 0.0001999951829295704, - "loss": 46.0, - "step": 40874 - }, - { - "epoch": 3.1251791960548196, - "grad_norm": 0.001190814538858831, - "learning_rate": 0.00019999518269380834, - "loss": 46.0, - "step": 40875 - }, - { - "epoch": 3.1252556530382094, - "grad_norm": 0.004890835378319025, - "learning_rate": 0.00019999518245804047, - "loss": 46.0, - "step": 40876 - }, - { - "epoch": 3.125332110021599, - "grad_norm": 0.002724532736465335, - "learning_rate": 0.00019999518222226688, - "loss": 46.0, - "step": 40877 - }, - { - "epoch": 3.125408567004989, - "grad_norm": 0.0015181906055659056, - "learning_rate": 0.0001999951819864875, - "loss": 46.0, - "step": 40878 - }, - { - "epoch": 3.1254850239883787, - "grad_norm": 0.0008061067201197147, - "learning_rate": 0.00019999518175070234, - "loss": 46.0, - "step": 40879 - }, - { - "epoch": 3.1255614809717684, - "grad_norm": 0.006594307720661163, - "learning_rate": 0.00019999518151491143, - "loss": 46.0, - "step": 40880 - }, - { - "epoch": 3.125637937955158, - "grad_norm": 0.0028224645648151636, - "learning_rate": 0.00019999518127911475, - "loss": 46.0, - "step": 40881 - }, - { - "epoch": 3.1257143949385475, - "grad_norm": 0.0022636328358203173, - "learning_rate": 0.00019999518104331227, - "loss": 46.0, - "step": 40882 - }, - { - "epoch": 3.1257908519219373, - "grad_norm": 0.004256364889442921, - "learning_rate": 0.00019999518080750405, - "loss": 46.0, - "step": 40883 - }, - { - "epoch": 3.125867308905327, - "grad_norm": 0.0014648041687905788, - "learning_rate": 0.00019999518057169007, - "loss": 46.0, - "step": 40884 - }, - { - "epoch": 3.125943765888717, - "grad_norm": 0.002286911476403475, - "learning_rate": 0.0001999951803358703, - "loss": 46.0, - "step": 40885 - }, - { - "epoch": 3.1260202228721066, - "grad_norm": 0.003924392629414797, - "learning_rate": 0.00019999518010004475, - "loss": 46.0, - "step": 40886 - }, - { - "epoch": 3.1260966798554963, - "grad_norm": 0.00516595970839262, - "learning_rate": 0.00019999517986421346, - "loss": 46.0, - "step": 40887 - }, - { - "epoch": 3.126173136838886, - "grad_norm": 0.0040839132852852345, - "learning_rate": 0.00019999517962837637, - "loss": 46.0, - "step": 40888 - }, - { - "epoch": 3.126249593822276, - "grad_norm": 0.0007673455402255058, - "learning_rate": 0.00019999517939253353, - "loss": 46.0, - "step": 40889 - }, - { - "epoch": 3.1263260508056656, - "grad_norm": 0.0015977041330188513, - "learning_rate": 0.00019999517915668494, - "loss": 46.0, - "step": 40890 - }, - { - "epoch": 3.1264025077890554, - "grad_norm": 0.002916666679084301, - "learning_rate": 0.00019999517892083053, - "loss": 46.0, - "step": 40891 - }, - { - "epoch": 3.1264789647724447, - "grad_norm": 0.004950730595737696, - "learning_rate": 0.0001999951786849704, - "loss": 46.0, - "step": 40892 - }, - { - "epoch": 3.1265554217558345, - "grad_norm": 0.0007169688469730318, - "learning_rate": 0.00019999517844910446, - "loss": 46.0, - "step": 40893 - }, - { - "epoch": 3.126631878739224, - "grad_norm": 0.001416036975570023, - "learning_rate": 0.00019999517821323276, - "loss": 46.0, - "step": 40894 - }, - { - "epoch": 3.126708335722614, - "grad_norm": 0.0038079284131526947, - "learning_rate": 0.00019999517797735533, - "loss": 46.0, - "step": 40895 - }, - { - "epoch": 3.1267847927060037, - "grad_norm": 0.0025651552714407444, - "learning_rate": 0.0001999951777414721, - "loss": 46.0, - "step": 40896 - }, - { - "epoch": 3.1268612496893935, - "grad_norm": 0.0006050064112059772, - "learning_rate": 0.0001999951775055831, - "loss": 46.0, - "step": 40897 - }, - { - "epoch": 3.1269377066727833, - "grad_norm": 0.00445164181292057, - "learning_rate": 0.00019999517726968833, - "loss": 46.0, - "step": 40898 - }, - { - "epoch": 3.127014163656173, - "grad_norm": 0.002387454267591238, - "learning_rate": 0.00019999517703378782, - "loss": 46.0, - "step": 40899 - }, - { - "epoch": 3.127090620639563, - "grad_norm": 0.000696027185767889, - "learning_rate": 0.00019999517679788152, - "loss": 46.0, - "step": 40900 - }, - { - "epoch": 3.1271670776229525, - "grad_norm": 0.004150791559368372, - "learning_rate": 0.00019999517656196943, - "loss": 46.0, - "step": 40901 - }, - { - "epoch": 3.1272435346063423, - "grad_norm": 0.000997581286355853, - "learning_rate": 0.0001999951763260516, - "loss": 46.0, - "step": 40902 - }, - { - "epoch": 3.127319991589732, - "grad_norm": 0.002457044553011656, - "learning_rate": 0.00019999517609012798, - "loss": 46.0, - "step": 40903 - }, - { - "epoch": 3.1273964485731214, - "grad_norm": 0.0027247476391494274, - "learning_rate": 0.0001999951758541986, - "loss": 46.0, - "step": 40904 - }, - { - "epoch": 3.127472905556511, - "grad_norm": 0.0008025128045119345, - "learning_rate": 0.00019999517561826346, - "loss": 46.0, - "step": 40905 - }, - { - "epoch": 3.127549362539901, - "grad_norm": 0.002650618087500334, - "learning_rate": 0.00019999517538232253, - "loss": 46.0, - "step": 40906 - }, - { - "epoch": 3.1276258195232907, - "grad_norm": 0.0015751130413264036, - "learning_rate": 0.00019999517514637585, - "loss": 46.0, - "step": 40907 - }, - { - "epoch": 3.1277022765066804, - "grad_norm": 0.0021378311794251204, - "learning_rate": 0.0001999951749104234, - "loss": 46.0, - "step": 40908 - }, - { - "epoch": 3.12777873349007, - "grad_norm": 0.004254310857504606, - "learning_rate": 0.00019999517467446517, - "loss": 46.0, - "step": 40909 - }, - { - "epoch": 3.12785519047346, - "grad_norm": 0.0014484584098681808, - "learning_rate": 0.00019999517443850117, - "loss": 46.0, - "step": 40910 - }, - { - "epoch": 3.1279316474568497, - "grad_norm": 0.002101573860272765, - "learning_rate": 0.0001999951742025314, - "loss": 46.0, - "step": 40911 - }, - { - "epoch": 3.1280081044402395, - "grad_norm": 0.001584112411364913, - "learning_rate": 0.00019999517396655588, - "loss": 46.0, - "step": 40912 - }, - { - "epoch": 3.1280845614236292, - "grad_norm": 0.003157875034958124, - "learning_rate": 0.00019999517373057456, - "loss": 46.0, - "step": 40913 - }, - { - "epoch": 3.1281610184070185, - "grad_norm": 0.002584375673905015, - "learning_rate": 0.0001999951734945875, - "loss": 46.0, - "step": 40914 - }, - { - "epoch": 3.1282374753904083, - "grad_norm": 0.0015856287209317088, - "learning_rate": 0.00019999517325859466, - "loss": 46.0, - "step": 40915 - }, - { - "epoch": 3.128313932373798, - "grad_norm": 0.001126607065089047, - "learning_rate": 0.00019999517302259605, - "loss": 46.0, - "step": 40916 - }, - { - "epoch": 3.128390389357188, - "grad_norm": 0.0015187772223725915, - "learning_rate": 0.0001999951727865917, - "loss": 46.0, - "step": 40917 - }, - { - "epoch": 3.1284668463405776, - "grad_norm": 0.0017307294765487313, - "learning_rate": 0.0001999951725505815, - "loss": 46.0, - "step": 40918 - }, - { - "epoch": 3.1285433033239674, - "grad_norm": 0.0010536620393395424, - "learning_rate": 0.0001999951723145656, - "loss": 46.0, - "step": 40919 - }, - { - "epoch": 3.128619760307357, - "grad_norm": 0.0030114694964140654, - "learning_rate": 0.00019999517207854392, - "loss": 46.0, - "step": 40920 - }, - { - "epoch": 3.128696217290747, - "grad_norm": 0.0014078689273446798, - "learning_rate": 0.00019999517184251647, - "loss": 46.0, - "step": 40921 - }, - { - "epoch": 3.1287726742741366, - "grad_norm": 0.002471883548423648, - "learning_rate": 0.0001999951716064832, - "loss": 46.0, - "step": 40922 - }, - { - "epoch": 3.1288491312575264, - "grad_norm": 0.004802376497536898, - "learning_rate": 0.00019999517137044424, - "loss": 46.0, - "step": 40923 - }, - { - "epoch": 3.128925588240916, - "grad_norm": 0.004977057222276926, - "learning_rate": 0.00019999517113439947, - "loss": 46.0, - "step": 40924 - }, - { - "epoch": 3.1290020452243055, - "grad_norm": 0.008620427921414375, - "learning_rate": 0.00019999517089834895, - "loss": 46.0, - "step": 40925 - }, - { - "epoch": 3.1290785022076952, - "grad_norm": 0.001090528559871018, - "learning_rate": 0.00019999517066229263, - "loss": 46.0, - "step": 40926 - }, - { - "epoch": 3.129154959191085, - "grad_norm": 0.0009361812844872475, - "learning_rate": 0.00019999517042623057, - "loss": 46.0, - "step": 40927 - }, - { - "epoch": 3.1292314161744748, - "grad_norm": 0.0031330594792962074, - "learning_rate": 0.00019999517019016273, - "loss": 46.0, - "step": 40928 - }, - { - "epoch": 3.1293078731578645, - "grad_norm": 0.0030115721747279167, - "learning_rate": 0.00019999516995408912, - "loss": 46.0, - "step": 40929 - }, - { - "epoch": 3.1293843301412543, - "grad_norm": 0.0014377201441675425, - "learning_rate": 0.00019999516971800973, - "loss": 46.0, - "step": 40930 - }, - { - "epoch": 3.129460787124644, - "grad_norm": 0.004471286665648222, - "learning_rate": 0.0001999951694819246, - "loss": 46.0, - "step": 40931 - }, - { - "epoch": 3.129537244108034, - "grad_norm": 0.000843784655444324, - "learning_rate": 0.00019999516924583367, - "loss": 46.0, - "step": 40932 - }, - { - "epoch": 3.1296137010914236, - "grad_norm": 0.005641388241201639, - "learning_rate": 0.000199995169009737, - "loss": 46.0, - "step": 40933 - }, - { - "epoch": 3.1296901580748133, - "grad_norm": 0.0013651661574840546, - "learning_rate": 0.00019999516877363454, - "loss": 46.0, - "step": 40934 - }, - { - "epoch": 3.129766615058203, - "grad_norm": 0.0029702791944146156, - "learning_rate": 0.00019999516853752631, - "loss": 46.0, - "step": 40935 - }, - { - "epoch": 3.1298430720415924, - "grad_norm": 0.0022475728765130043, - "learning_rate": 0.00019999516830141232, - "loss": 46.0, - "step": 40936 - }, - { - "epoch": 3.129919529024982, - "grad_norm": 0.004736334551125765, - "learning_rate": 0.00019999516806529254, - "loss": 46.0, - "step": 40937 - }, - { - "epoch": 3.129995986008372, - "grad_norm": 0.0020905614364892244, - "learning_rate": 0.00019999516782916703, - "loss": 46.0, - "step": 40938 - }, - { - "epoch": 3.1300724429917617, - "grad_norm": 0.006155779119580984, - "learning_rate": 0.0001999951675930357, - "loss": 46.0, - "step": 40939 - }, - { - "epoch": 3.1301488999751514, - "grad_norm": 0.0017561701824888587, - "learning_rate": 0.00019999516735689864, - "loss": 46.0, - "step": 40940 - }, - { - "epoch": 3.130225356958541, - "grad_norm": 0.0017041712999343872, - "learning_rate": 0.0001999951671207558, - "loss": 46.0, - "step": 40941 - }, - { - "epoch": 3.130301813941931, - "grad_norm": 0.0007449098629876971, - "learning_rate": 0.0001999951668846072, - "loss": 46.0, - "step": 40942 - }, - { - "epoch": 3.1303782709253207, - "grad_norm": 0.0015767990844324231, - "learning_rate": 0.00019999516664845283, - "loss": 46.0, - "step": 40943 - }, - { - "epoch": 3.1304547279087105, - "grad_norm": 0.003999115899205208, - "learning_rate": 0.00019999516641229268, - "loss": 46.0, - "step": 40944 - }, - { - "epoch": 3.1305311848921002, - "grad_norm": 0.0011869925074279308, - "learning_rate": 0.00019999516617612674, - "loss": 46.0, - "step": 40945 - }, - { - "epoch": 3.13060764187549, - "grad_norm": 0.0017360768979415298, - "learning_rate": 0.0001999951659399551, - "loss": 46.0, - "step": 40946 - }, - { - "epoch": 3.1306840988588793, - "grad_norm": 0.0018099526641890407, - "learning_rate": 0.00019999516570377762, - "loss": 46.0, - "step": 40947 - }, - { - "epoch": 3.130760555842269, - "grad_norm": 0.0008087501628324389, - "learning_rate": 0.0001999951654675944, - "loss": 46.0, - "step": 40948 - }, - { - "epoch": 3.130837012825659, - "grad_norm": 0.004666229244321585, - "learning_rate": 0.0001999951652314054, - "loss": 46.0, - "step": 40949 - }, - { - "epoch": 3.1309134698090486, - "grad_norm": 0.002313598059117794, - "learning_rate": 0.00019999516499521062, - "loss": 46.0, - "step": 40950 - }, - { - "epoch": 3.1309899267924384, - "grad_norm": 0.005280129611492157, - "learning_rate": 0.0001999951647590101, - "loss": 46.0, - "step": 40951 - }, - { - "epoch": 3.131066383775828, - "grad_norm": 0.0037140948697924614, - "learning_rate": 0.0001999951645228038, - "loss": 46.0, - "step": 40952 - }, - { - "epoch": 3.131142840759218, - "grad_norm": 0.0010154227493330836, - "learning_rate": 0.00019999516428659172, - "loss": 46.0, - "step": 40953 - }, - { - "epoch": 3.1312192977426077, - "grad_norm": 0.002763459226116538, - "learning_rate": 0.00019999516405037388, - "loss": 46.0, - "step": 40954 - }, - { - "epoch": 3.1312957547259974, - "grad_norm": 0.0035257311537861824, - "learning_rate": 0.0001999951638141503, - "loss": 46.0, - "step": 40955 - }, - { - "epoch": 3.131372211709387, - "grad_norm": 0.007086979225277901, - "learning_rate": 0.00019999516357792091, - "loss": 46.0, - "step": 40956 - }, - { - "epoch": 3.1314486686927765, - "grad_norm": 0.0026205633766949177, - "learning_rate": 0.00019999516334168576, - "loss": 46.0, - "step": 40957 - }, - { - "epoch": 3.1315251256761663, - "grad_norm": 0.000860468833707273, - "learning_rate": 0.00019999516310544485, - "loss": 46.0, - "step": 40958 - }, - { - "epoch": 3.131601582659556, - "grad_norm": 0.002395564690232277, - "learning_rate": 0.00019999516286919818, - "loss": 46.0, - "step": 40959 - }, - { - "epoch": 3.1316780396429458, - "grad_norm": 0.0009179324260912836, - "learning_rate": 0.0001999951626329457, - "loss": 46.0, - "step": 40960 - }, - { - "epoch": 3.1317544966263355, - "grad_norm": 0.0032772414851933718, - "learning_rate": 0.0001999951623966875, - "loss": 46.0, - "step": 40961 - }, - { - "epoch": 3.1318309536097253, - "grad_norm": 0.0012345022987574339, - "learning_rate": 0.0001999951621604235, - "loss": 46.0, - "step": 40962 - }, - { - "epoch": 3.131907410593115, - "grad_norm": 0.0016557055059820414, - "learning_rate": 0.00019999516192415376, - "loss": 46.0, - "step": 40963 - }, - { - "epoch": 3.131983867576505, - "grad_norm": 0.005347746890038252, - "learning_rate": 0.0001999951616878782, - "loss": 46.0, - "step": 40964 - }, - { - "epoch": 3.1320603245598946, - "grad_norm": 0.002023885026574135, - "learning_rate": 0.00019999516145159692, - "loss": 46.0, - "step": 40965 - }, - { - "epoch": 3.1321367815432843, - "grad_norm": 0.0017879637889564037, - "learning_rate": 0.00019999516121530983, - "loss": 46.0, - "step": 40966 - }, - { - "epoch": 3.132213238526674, - "grad_norm": 0.004732067231088877, - "learning_rate": 0.000199995160979017, - "loss": 46.0, - "step": 40967 - }, - { - "epoch": 3.132289695510064, - "grad_norm": 0.0028899898752570152, - "learning_rate": 0.0001999951607427184, - "loss": 46.0, - "step": 40968 - }, - { - "epoch": 3.132366152493453, - "grad_norm": 0.002965432358905673, - "learning_rate": 0.000199995160506414, - "loss": 46.0, - "step": 40969 - }, - { - "epoch": 3.132442609476843, - "grad_norm": 0.0026841112412512302, - "learning_rate": 0.00019999516027010387, - "loss": 46.0, - "step": 40970 - }, - { - "epoch": 3.1325190664602327, - "grad_norm": 0.003204314736649394, - "learning_rate": 0.00019999516003378796, - "loss": 46.0, - "step": 40971 - }, - { - "epoch": 3.1325955234436225, - "grad_norm": 0.0020025165285915136, - "learning_rate": 0.00019999515979746626, - "loss": 46.0, - "step": 40972 - }, - { - "epoch": 3.1326719804270122, - "grad_norm": 0.0028681980911642313, - "learning_rate": 0.0001999951595611388, - "loss": 46.0, - "step": 40973 - }, - { - "epoch": 3.132748437410402, - "grad_norm": 0.0038651989307254553, - "learning_rate": 0.00019999515932480559, - "loss": 46.0, - "step": 40974 - }, - { - "epoch": 3.1328248943937917, - "grad_norm": 0.004710248671472073, - "learning_rate": 0.0001999951590884666, - "loss": 46.0, - "step": 40975 - }, - { - "epoch": 3.1329013513771815, - "grad_norm": 0.0015670906286686659, - "learning_rate": 0.00019999515885212184, - "loss": 46.0, - "step": 40976 - }, - { - "epoch": 3.1329778083605713, - "grad_norm": 0.005499074701219797, - "learning_rate": 0.00019999515861577133, - "loss": 46.0, - "step": 40977 - }, - { - "epoch": 3.133054265343961, - "grad_norm": 0.004043113440275192, - "learning_rate": 0.00019999515837941504, - "loss": 46.0, - "step": 40978 - }, - { - "epoch": 3.1331307223273503, - "grad_norm": 0.004674827679991722, - "learning_rate": 0.00019999515814305295, - "loss": 46.0, - "step": 40979 - }, - { - "epoch": 3.13320717931074, - "grad_norm": 0.0018757217330858111, - "learning_rate": 0.00019999515790668514, - "loss": 46.0, - "step": 40980 - }, - { - "epoch": 3.13328363629413, - "grad_norm": 0.005505661014467478, - "learning_rate": 0.0001999951576703115, - "loss": 46.0, - "step": 40981 - }, - { - "epoch": 3.1333600932775196, - "grad_norm": 0.00399810541421175, - "learning_rate": 0.00019999515743393214, - "loss": 46.0, - "step": 40982 - }, - { - "epoch": 3.1334365502609094, - "grad_norm": 0.0053443508222699165, - "learning_rate": 0.00019999515719754698, - "loss": 46.0, - "step": 40983 - }, - { - "epoch": 3.133513007244299, - "grad_norm": 0.013109898194670677, - "learning_rate": 0.00019999515696115608, - "loss": 46.0, - "step": 40984 - }, - { - "epoch": 3.133589464227689, - "grad_norm": 0.0013333922252058983, - "learning_rate": 0.0001999951567247594, - "loss": 46.0, - "step": 40985 - }, - { - "epoch": 3.1336659212110787, - "grad_norm": 0.0017973404610529542, - "learning_rate": 0.00019999515648835693, - "loss": 46.0, - "step": 40986 - }, - { - "epoch": 3.1337423781944684, - "grad_norm": 0.0013133450411260128, - "learning_rate": 0.0001999951562519487, - "loss": 46.0, - "step": 40987 - }, - { - "epoch": 3.133818835177858, - "grad_norm": 0.0064160265028476715, - "learning_rate": 0.00019999515601553473, - "loss": 46.0, - "step": 40988 - }, - { - "epoch": 3.133895292161248, - "grad_norm": 0.002278312109410763, - "learning_rate": 0.00019999515577911496, - "loss": 46.0, - "step": 40989 - }, - { - "epoch": 3.1339717491446377, - "grad_norm": 0.004158418159931898, - "learning_rate": 0.00019999515554268942, - "loss": 46.0, - "step": 40990 - }, - { - "epoch": 3.134048206128027, - "grad_norm": 0.004669709596782923, - "learning_rate": 0.00019999515530625815, - "loss": 46.0, - "step": 40991 - }, - { - "epoch": 3.134124663111417, - "grad_norm": 0.0017834221944212914, - "learning_rate": 0.00019999515506982106, - "loss": 46.0, - "step": 40992 - }, - { - "epoch": 3.1342011200948066, - "grad_norm": 0.0005970390047878027, - "learning_rate": 0.00019999515483337823, - "loss": 46.0, - "step": 40993 - }, - { - "epoch": 3.1342775770781963, - "grad_norm": 0.0050776260904967785, - "learning_rate": 0.00019999515459692964, - "loss": 46.0, - "step": 40994 - }, - { - "epoch": 3.134354034061586, - "grad_norm": 0.002877766266465187, - "learning_rate": 0.00019999515436047523, - "loss": 46.0, - "step": 40995 - }, - { - "epoch": 3.134430491044976, - "grad_norm": 0.0024728337302803993, - "learning_rate": 0.0001999951541240151, - "loss": 46.0, - "step": 40996 - }, - { - "epoch": 3.1345069480283656, - "grad_norm": 0.0013357122661545873, - "learning_rate": 0.0001999951538875492, - "loss": 46.0, - "step": 40997 - }, - { - "epoch": 3.1345834050117554, - "grad_norm": 0.0009288405999541283, - "learning_rate": 0.0001999951536510775, - "loss": 46.0, - "step": 40998 - }, - { - "epoch": 3.134659861995145, - "grad_norm": 0.0009051059023477137, - "learning_rate": 0.00019999515341460007, - "loss": 46.0, - "step": 40999 - }, - { - "epoch": 3.134736318978535, - "grad_norm": 0.0024559807498008013, - "learning_rate": 0.00019999515317811682, - "loss": 46.0, - "step": 41000 - }, - { - "epoch": 3.134812775961924, - "grad_norm": 0.004311062395572662, - "learning_rate": 0.00019999515294162785, - "loss": 46.0, - "step": 41001 - }, - { - "epoch": 3.134889232945314, - "grad_norm": 0.002960910787805915, - "learning_rate": 0.00019999515270513308, - "loss": 46.0, - "step": 41002 - }, - { - "epoch": 3.1349656899287037, - "grad_norm": 0.0013237728271633387, - "learning_rate": 0.00019999515246863257, - "loss": 46.0, - "step": 41003 - }, - { - "epoch": 3.1350421469120935, - "grad_norm": 0.0033009194303303957, - "learning_rate": 0.00019999515223212625, - "loss": 46.0, - "step": 41004 - }, - { - "epoch": 3.1351186038954832, - "grad_norm": 0.0016189126763492823, - "learning_rate": 0.00019999515199561421, - "loss": 46.0, - "step": 41005 - }, - { - "epoch": 3.135195060878873, - "grad_norm": 0.001902610994875431, - "learning_rate": 0.00019999515175909635, - "loss": 46.0, - "step": 41006 - }, - { - "epoch": 3.1352715178622628, - "grad_norm": 0.0012386628659442067, - "learning_rate": 0.00019999515152257277, - "loss": 46.0, - "step": 41007 - }, - { - "epoch": 3.1353479748456525, - "grad_norm": 0.0005118967383168638, - "learning_rate": 0.00019999515128604338, - "loss": 46.0, - "step": 41008 - }, - { - "epoch": 3.1354244318290423, - "grad_norm": 0.002692008623853326, - "learning_rate": 0.00019999515104950823, - "loss": 46.0, - "step": 41009 - }, - { - "epoch": 3.135500888812432, - "grad_norm": 0.003653703723102808, - "learning_rate": 0.00019999515081296732, - "loss": 46.0, - "step": 41010 - }, - { - "epoch": 3.135577345795822, - "grad_norm": 0.005738893058151007, - "learning_rate": 0.00019999515057642065, - "loss": 46.0, - "step": 41011 - }, - { - "epoch": 3.1356538027792116, - "grad_norm": 0.004237464163452387, - "learning_rate": 0.00019999515033986817, - "loss": 46.0, - "step": 41012 - }, - { - "epoch": 3.135730259762601, - "grad_norm": 0.003687287215143442, - "learning_rate": 0.00019999515010330997, - "loss": 46.0, - "step": 41013 - }, - { - "epoch": 3.1358067167459907, - "grad_norm": 0.0021057743579149246, - "learning_rate": 0.00019999514986674598, - "loss": 46.0, - "step": 41014 - }, - { - "epoch": 3.1358831737293804, - "grad_norm": 0.0025039201136678457, - "learning_rate": 0.00019999514963017624, - "loss": 46.0, - "step": 41015 - }, - { - "epoch": 3.13595963071277, - "grad_norm": 0.0032272557727992535, - "learning_rate": 0.00019999514939360072, - "loss": 46.0, - "step": 41016 - }, - { - "epoch": 3.13603608769616, - "grad_norm": 0.004893191158771515, - "learning_rate": 0.0001999951491570194, - "loss": 46.0, - "step": 41017 - }, - { - "epoch": 3.1361125446795497, - "grad_norm": 0.0007426108932122588, - "learning_rate": 0.0001999951489204323, - "loss": 46.0, - "step": 41018 - }, - { - "epoch": 3.1361890016629395, - "grad_norm": 0.0015161726623773575, - "learning_rate": 0.00019999514868383948, - "loss": 46.0, - "step": 41019 - }, - { - "epoch": 3.136265458646329, - "grad_norm": 0.002347435336560011, - "learning_rate": 0.0001999951484472409, - "loss": 46.0, - "step": 41020 - }, - { - "epoch": 3.136341915629719, - "grad_norm": 0.006625575479120016, - "learning_rate": 0.0001999951482106365, - "loss": 46.0, - "step": 41021 - }, - { - "epoch": 3.1364183726131087, - "grad_norm": 0.0017947033047676086, - "learning_rate": 0.00019999514797402638, - "loss": 46.0, - "step": 41022 - }, - { - "epoch": 3.136494829596498, - "grad_norm": 0.002447431907057762, - "learning_rate": 0.00019999514773741045, - "loss": 46.0, - "step": 41023 - }, - { - "epoch": 3.136571286579888, - "grad_norm": 0.0013351301895454526, - "learning_rate": 0.00019999514750078878, - "loss": 46.0, - "step": 41024 - }, - { - "epoch": 3.1366477435632776, - "grad_norm": 0.0013199971290305257, - "learning_rate": 0.00019999514726416133, - "loss": 46.0, - "step": 41025 - }, - { - "epoch": 3.1367242005466673, - "grad_norm": 0.002232053317129612, - "learning_rate": 0.00019999514702752813, - "loss": 46.0, - "step": 41026 - }, - { - "epoch": 3.136800657530057, - "grad_norm": 0.0015525601338595152, - "learning_rate": 0.00019999514679088913, - "loss": 46.0, - "step": 41027 - }, - { - "epoch": 3.136877114513447, - "grad_norm": 0.0038366890512406826, - "learning_rate": 0.00019999514655424437, - "loss": 46.0, - "step": 41028 - }, - { - "epoch": 3.1369535714968366, - "grad_norm": 0.0017662885366007686, - "learning_rate": 0.00019999514631759385, - "loss": 46.0, - "step": 41029 - }, - { - "epoch": 3.1370300284802264, - "grad_norm": 0.0016639424720779061, - "learning_rate": 0.00019999514608093753, - "loss": 46.0, - "step": 41030 - }, - { - "epoch": 3.137106485463616, - "grad_norm": 0.0012837244430556893, - "learning_rate": 0.00019999514584427547, - "loss": 46.0, - "step": 41031 - }, - { - "epoch": 3.137182942447006, - "grad_norm": 0.0009590934496372938, - "learning_rate": 0.00019999514560760763, - "loss": 46.0, - "step": 41032 - }, - { - "epoch": 3.1372593994303957, - "grad_norm": 0.001776376971974969, - "learning_rate": 0.00019999514537093405, - "loss": 46.0, - "step": 41033 - }, - { - "epoch": 3.1373358564137854, - "grad_norm": 0.0022420771420001984, - "learning_rate": 0.00019999514513425467, - "loss": 46.0, - "step": 41034 - }, - { - "epoch": 3.1374123133971747, - "grad_norm": 0.0024783953558653593, - "learning_rate": 0.00019999514489756951, - "loss": 46.0, - "step": 41035 - }, - { - "epoch": 3.1374887703805645, - "grad_norm": 0.0023346985690295696, - "learning_rate": 0.00019999514466087864, - "loss": 46.0, - "step": 41036 - }, - { - "epoch": 3.1375652273639543, - "grad_norm": 0.0019331456860527396, - "learning_rate": 0.00019999514442418194, - "loss": 46.0, - "step": 41037 - }, - { - "epoch": 3.137641684347344, - "grad_norm": 0.00319826602935791, - "learning_rate": 0.0001999951441874795, - "loss": 46.0, - "step": 41038 - }, - { - "epoch": 3.137718141330734, - "grad_norm": 0.0022392943501472473, - "learning_rate": 0.0001999951439507713, - "loss": 46.0, - "step": 41039 - }, - { - "epoch": 3.1377945983141236, - "grad_norm": 0.004793962929397821, - "learning_rate": 0.0001999951437140573, - "loss": 46.0, - "step": 41040 - }, - { - "epoch": 3.1378710552975133, - "grad_norm": 0.0022390270605683327, - "learning_rate": 0.00019999514347733753, - "loss": 46.0, - "step": 41041 - }, - { - "epoch": 3.137947512280903, - "grad_norm": 0.0005360444774851203, - "learning_rate": 0.000199995143240612, - "loss": 46.0, - "step": 41042 - }, - { - "epoch": 3.138023969264293, - "grad_norm": 0.001332034938968718, - "learning_rate": 0.00019999514300388072, - "loss": 46.0, - "step": 41043 - }, - { - "epoch": 3.1381004262476826, - "grad_norm": 0.002849350683391094, - "learning_rate": 0.00019999514276714363, - "loss": 46.0, - "step": 41044 - }, - { - "epoch": 3.138176883231072, - "grad_norm": 0.0033385315909981728, - "learning_rate": 0.0001999951425304008, - "loss": 46.0, - "step": 41045 - }, - { - "epoch": 3.1382533402144617, - "grad_norm": 0.004104383755475283, - "learning_rate": 0.00019999514229365222, - "loss": 46.0, - "step": 41046 - }, - { - "epoch": 3.1383297971978514, - "grad_norm": 0.0036129062063992023, - "learning_rate": 0.00019999514205689783, - "loss": 46.0, - "step": 41047 - }, - { - "epoch": 3.138406254181241, - "grad_norm": 0.001940494286827743, - "learning_rate": 0.0001999951418201377, - "loss": 46.0, - "step": 41048 - }, - { - "epoch": 3.138482711164631, - "grad_norm": 0.000988120213150978, - "learning_rate": 0.00019999514158337177, - "loss": 46.0, - "step": 41049 - }, - { - "epoch": 3.1385591681480207, - "grad_norm": 0.0008883534464985132, - "learning_rate": 0.0001999951413466001, - "loss": 46.0, - "step": 41050 - }, - { - "epoch": 3.1386356251314105, - "grad_norm": 0.0050128973089158535, - "learning_rate": 0.00019999514110982265, - "loss": 46.0, - "step": 41051 - }, - { - "epoch": 3.1387120821148002, - "grad_norm": 0.0031030410900712013, - "learning_rate": 0.00019999514087303943, - "loss": 46.0, - "step": 41052 - }, - { - "epoch": 3.13878853909819, - "grad_norm": 0.002500659553334117, - "learning_rate": 0.00019999514063625043, - "loss": 46.0, - "step": 41053 - }, - { - "epoch": 3.1388649960815798, - "grad_norm": 0.0019398679723963141, - "learning_rate": 0.0001999951403994557, - "loss": 46.0, - "step": 41054 - }, - { - "epoch": 3.1389414530649695, - "grad_norm": 0.0017691837856546044, - "learning_rate": 0.00019999514016265518, - "loss": 46.0, - "step": 41055 - }, - { - "epoch": 3.139017910048359, - "grad_norm": 0.00732882646843791, - "learning_rate": 0.0001999951399258489, - "loss": 46.0, - "step": 41056 - }, - { - "epoch": 3.1390943670317486, - "grad_norm": 0.0009454029495827854, - "learning_rate": 0.0001999951396890368, - "loss": 46.0, - "step": 41057 - }, - { - "epoch": 3.1391708240151384, - "grad_norm": 0.0028136447072029114, - "learning_rate": 0.00019999513945221896, - "loss": 46.0, - "step": 41058 - }, - { - "epoch": 3.139247280998528, - "grad_norm": 0.003275540191680193, - "learning_rate": 0.00019999513921539535, - "loss": 46.0, - "step": 41059 - }, - { - "epoch": 3.139323737981918, - "grad_norm": 0.007772588171064854, - "learning_rate": 0.000199995138978566, - "loss": 46.0, - "step": 41060 - }, - { - "epoch": 3.1394001949653076, - "grad_norm": 0.0006942058098502457, - "learning_rate": 0.00019999513874173087, - "loss": 46.0, - "step": 41061 - }, - { - "epoch": 3.1394766519486974, - "grad_norm": 0.004055796191096306, - "learning_rate": 0.00019999513850488994, - "loss": 46.0, - "step": 41062 - }, - { - "epoch": 3.139553108932087, - "grad_norm": 0.0016616667853668332, - "learning_rate": 0.00019999513826804327, - "loss": 46.0, - "step": 41063 - }, - { - "epoch": 3.139629565915477, - "grad_norm": 0.0011594309471547604, - "learning_rate": 0.00019999513803119082, - "loss": 46.0, - "step": 41064 - }, - { - "epoch": 3.1397060228988667, - "grad_norm": 0.004760695621371269, - "learning_rate": 0.0001999951377943326, - "loss": 46.0, - "step": 41065 - }, - { - "epoch": 3.1397824798822564, - "grad_norm": 0.005824300926178694, - "learning_rate": 0.0001999951375574686, - "loss": 46.0, - "step": 41066 - }, - { - "epoch": 3.1398589368656458, - "grad_norm": 0.0018791139591485262, - "learning_rate": 0.00019999513732059886, - "loss": 46.0, - "step": 41067 - }, - { - "epoch": 3.1399353938490355, - "grad_norm": 0.00265587423928082, - "learning_rate": 0.00019999513708372332, - "loss": 46.0, - "step": 41068 - }, - { - "epoch": 3.1400118508324253, - "grad_norm": 0.001935538137331605, - "learning_rate": 0.00019999513684684206, - "loss": 46.0, - "step": 41069 - }, - { - "epoch": 3.140088307815815, - "grad_norm": 0.0015491947997361422, - "learning_rate": 0.00019999513660995497, - "loss": 46.0, - "step": 41070 - }, - { - "epoch": 3.140164764799205, - "grad_norm": 0.001547043095342815, - "learning_rate": 0.00019999513637306216, - "loss": 46.0, - "step": 41071 - }, - { - "epoch": 3.1402412217825946, - "grad_norm": 0.0039647649973630905, - "learning_rate": 0.00019999513613616355, - "loss": 46.0, - "step": 41072 - }, - { - "epoch": 3.1403176787659843, - "grad_norm": 0.0009321686229668558, - "learning_rate": 0.00019999513589925917, - "loss": 46.0, - "step": 41073 - }, - { - "epoch": 3.140394135749374, - "grad_norm": 0.003700418397784233, - "learning_rate": 0.00019999513566234904, - "loss": 46.0, - "step": 41074 - }, - { - "epoch": 3.140470592732764, - "grad_norm": 0.0027197208255529404, - "learning_rate": 0.00019999513542543314, - "loss": 46.0, - "step": 41075 - }, - { - "epoch": 3.1405470497161536, - "grad_norm": 0.003884189995005727, - "learning_rate": 0.00019999513518851144, - "loss": 46.0, - "step": 41076 - }, - { - "epoch": 3.1406235066995434, - "grad_norm": 0.0023868188727647066, - "learning_rate": 0.00019999513495158402, - "loss": 46.0, - "step": 41077 - }, - { - "epoch": 3.1406999636829327, - "grad_norm": 0.00528392568230629, - "learning_rate": 0.0001999951347146508, - "loss": 46.0, - "step": 41078 - }, - { - "epoch": 3.1407764206663225, - "grad_norm": 0.004509489517658949, - "learning_rate": 0.00019999513447771177, - "loss": 46.0, - "step": 41079 - }, - { - "epoch": 3.140852877649712, - "grad_norm": 0.0011563922744244337, - "learning_rate": 0.00019999513424076706, - "loss": 46.0, - "step": 41080 - }, - { - "epoch": 3.140929334633102, - "grad_norm": 0.0015961665194481611, - "learning_rate": 0.00019999513400381652, - "loss": 46.0, - "step": 41081 - }, - { - "epoch": 3.1410057916164917, - "grad_norm": 0.0025906581431627274, - "learning_rate": 0.00019999513376686023, - "loss": 46.0, - "step": 41082 - }, - { - "epoch": 3.1410822485998815, - "grad_norm": 0.0007558197830803692, - "learning_rate": 0.00019999513352989817, - "loss": 46.0, - "step": 41083 - }, - { - "epoch": 3.1411587055832713, - "grad_norm": 0.0025289105251431465, - "learning_rate": 0.00019999513329293034, - "loss": 46.0, - "step": 41084 - }, - { - "epoch": 3.141235162566661, - "grad_norm": 0.0030896200332790613, - "learning_rate": 0.00019999513305595673, - "loss": 46.0, - "step": 41085 - }, - { - "epoch": 3.141311619550051, - "grad_norm": 0.0013852017000317574, - "learning_rate": 0.00019999513281897737, - "loss": 46.0, - "step": 41086 - }, - { - "epoch": 3.1413880765334405, - "grad_norm": 0.0028509381227195263, - "learning_rate": 0.00019999513258199222, - "loss": 46.0, - "step": 41087 - }, - { - "epoch": 3.14146453351683, - "grad_norm": 0.0009741692338138819, - "learning_rate": 0.00019999513234500134, - "loss": 46.0, - "step": 41088 - }, - { - "epoch": 3.1415409905002196, - "grad_norm": 0.002254907041788101, - "learning_rate": 0.00019999513210800464, - "loss": 46.0, - "step": 41089 - }, - { - "epoch": 3.1416174474836094, - "grad_norm": 0.0005364711978472769, - "learning_rate": 0.0001999951318710022, - "loss": 46.0, - "step": 41090 - }, - { - "epoch": 3.141693904466999, - "grad_norm": 0.0021251628641039133, - "learning_rate": 0.000199995131633994, - "loss": 46.0, - "step": 41091 - }, - { - "epoch": 3.141770361450389, - "grad_norm": 0.0028942017816007137, - "learning_rate": 0.00019999513139697998, - "loss": 46.0, - "step": 41092 - }, - { - "epoch": 3.1418468184337787, - "grad_norm": 0.005589932203292847, - "learning_rate": 0.00019999513115996024, - "loss": 46.0, - "step": 41093 - }, - { - "epoch": 3.1419232754171684, - "grad_norm": 0.0059248716570436954, - "learning_rate": 0.00019999513092293472, - "loss": 46.0, - "step": 41094 - }, - { - "epoch": 3.141999732400558, - "grad_norm": 0.003885081270709634, - "learning_rate": 0.00019999513068590344, - "loss": 46.0, - "step": 41095 - }, - { - "epoch": 3.142076189383948, - "grad_norm": 0.0005443011177703738, - "learning_rate": 0.00019999513044886635, - "loss": 46.0, - "step": 41096 - }, - { - "epoch": 3.1421526463673377, - "grad_norm": 0.0015941468300297856, - "learning_rate": 0.0001999951302118235, - "loss": 46.0, - "step": 41097 - }, - { - "epoch": 3.1422291033507275, - "grad_norm": 0.005403109826147556, - "learning_rate": 0.00019999512997477493, - "loss": 46.0, - "step": 41098 - }, - { - "epoch": 3.1423055603341172, - "grad_norm": 0.005095756612718105, - "learning_rate": 0.00019999512973772058, - "loss": 46.0, - "step": 41099 - }, - { - "epoch": 3.1423820173175065, - "grad_norm": 0.0008521788404323161, - "learning_rate": 0.00019999512950066045, - "loss": 46.0, - "step": 41100 - }, - { - "epoch": 3.1424584743008963, - "grad_norm": 0.001544449245557189, - "learning_rate": 0.00019999512926359452, - "loss": 46.0, - "step": 41101 - }, - { - "epoch": 3.142534931284286, - "grad_norm": 0.0031818135175853968, - "learning_rate": 0.00019999512902652285, - "loss": 46.0, - "step": 41102 - }, - { - "epoch": 3.142611388267676, - "grad_norm": 0.0026227463968098164, - "learning_rate": 0.0001999951287894454, - "loss": 46.0, - "step": 41103 - }, - { - "epoch": 3.1426878452510656, - "grad_norm": 0.002946611726656556, - "learning_rate": 0.0001999951285523622, - "loss": 46.0, - "step": 41104 - }, - { - "epoch": 3.1427643022344554, - "grad_norm": 0.0023916803766041994, - "learning_rate": 0.0001999951283152732, - "loss": 46.0, - "step": 41105 - }, - { - "epoch": 3.142840759217845, - "grad_norm": 0.0011243271874263883, - "learning_rate": 0.00019999512807817845, - "loss": 46.0, - "step": 41106 - }, - { - "epoch": 3.142917216201235, - "grad_norm": 0.001296074828132987, - "learning_rate": 0.00019999512784107793, - "loss": 46.0, - "step": 41107 - }, - { - "epoch": 3.1429936731846246, - "grad_norm": 0.004083739127963781, - "learning_rate": 0.00019999512760397165, - "loss": 46.0, - "step": 41108 - }, - { - "epoch": 3.1430701301680144, - "grad_norm": 0.0022604160476475954, - "learning_rate": 0.00019999512736685956, - "loss": 46.0, - "step": 41109 - }, - { - "epoch": 3.1431465871514037, - "grad_norm": 0.0016389890806749463, - "learning_rate": 0.00019999512712974172, - "loss": 46.0, - "step": 41110 - }, - { - "epoch": 3.1432230441347935, - "grad_norm": 0.004369684029370546, - "learning_rate": 0.00019999512689261814, - "loss": 46.0, - "step": 41111 - }, - { - "epoch": 3.1432995011181832, - "grad_norm": 0.0015991195105016232, - "learning_rate": 0.0001999951266554888, - "loss": 46.0, - "step": 41112 - }, - { - "epoch": 3.143375958101573, - "grad_norm": 0.0012750810710713267, - "learning_rate": 0.00019999512641835364, - "loss": 46.0, - "step": 41113 - }, - { - "epoch": 3.1434524150849628, - "grad_norm": 0.0019751156214624643, - "learning_rate": 0.00019999512618121273, - "loss": 46.0, - "step": 41114 - }, - { - "epoch": 3.1435288720683525, - "grad_norm": 0.00230652024038136, - "learning_rate": 0.00019999512594406603, - "loss": 46.0, - "step": 41115 - }, - { - "epoch": 3.1436053290517423, - "grad_norm": 0.00527595542371273, - "learning_rate": 0.0001999951257069136, - "loss": 46.0, - "step": 41116 - }, - { - "epoch": 3.143681786035132, - "grad_norm": 0.00411352189257741, - "learning_rate": 0.00019999512546975542, - "loss": 46.0, - "step": 41117 - }, - { - "epoch": 3.143758243018522, - "grad_norm": 0.0019018498715013266, - "learning_rate": 0.0001999951252325914, - "loss": 46.0, - "step": 41118 - }, - { - "epoch": 3.1438347000019116, - "grad_norm": 0.004346233326941729, - "learning_rate": 0.00019999512499542166, - "loss": 46.0, - "step": 41119 - }, - { - "epoch": 3.1439111569853013, - "grad_norm": 0.0029389483388513327, - "learning_rate": 0.00019999512475824615, - "loss": 46.0, - "step": 41120 - }, - { - "epoch": 3.143987613968691, - "grad_norm": 0.0029115909710526466, - "learning_rate": 0.00019999512452106486, - "loss": 46.0, - "step": 41121 - }, - { - "epoch": 3.1440640709520804, - "grad_norm": 0.002581561915576458, - "learning_rate": 0.0001999951242838778, - "loss": 46.0, - "step": 41122 - }, - { - "epoch": 3.14414052793547, - "grad_norm": 0.0009024101891554892, - "learning_rate": 0.00019999512404668494, - "loss": 46.0, - "step": 41123 - }, - { - "epoch": 3.14421698491886, - "grad_norm": 0.0016963593661785126, - "learning_rate": 0.00019999512380948638, - "loss": 46.0, - "step": 41124 - }, - { - "epoch": 3.1442934419022497, - "grad_norm": 0.001535413903184235, - "learning_rate": 0.00019999512357228198, - "loss": 46.0, - "step": 41125 - }, - { - "epoch": 3.1443698988856394, - "grad_norm": 0.0018477400299161673, - "learning_rate": 0.00019999512333507188, - "loss": 46.0, - "step": 41126 - }, - { - "epoch": 3.144446355869029, - "grad_norm": 0.0021108605433255434, - "learning_rate": 0.00019999512309785595, - "loss": 46.0, - "step": 41127 - }, - { - "epoch": 3.144522812852419, - "grad_norm": 0.001212080242112279, - "learning_rate": 0.00019999512286063428, - "loss": 46.0, - "step": 41128 - }, - { - "epoch": 3.1445992698358087, - "grad_norm": 0.002008714247494936, - "learning_rate": 0.00019999512262340683, - "loss": 46.0, - "step": 41129 - }, - { - "epoch": 3.1446757268191985, - "grad_norm": 0.003351228078827262, - "learning_rate": 0.0001999951223861736, - "loss": 46.0, - "step": 41130 - }, - { - "epoch": 3.1447521838025883, - "grad_norm": 0.0007999236695468426, - "learning_rate": 0.00019999512214893464, - "loss": 46.0, - "step": 41131 - }, - { - "epoch": 3.1448286407859776, - "grad_norm": 0.0031075961887836456, - "learning_rate": 0.00019999512191168988, - "loss": 46.0, - "step": 41132 - }, - { - "epoch": 3.1449050977693673, - "grad_norm": 0.0018060181755572557, - "learning_rate": 0.00019999512167443933, - "loss": 46.0, - "step": 41133 - }, - { - "epoch": 3.144981554752757, - "grad_norm": 0.0009503645705990493, - "learning_rate": 0.00019999512143718308, - "loss": 46.0, - "step": 41134 - }, - { - "epoch": 3.145058011736147, - "grad_norm": 0.002328176749870181, - "learning_rate": 0.00019999512119992102, - "loss": 46.0, - "step": 41135 - }, - { - "epoch": 3.1451344687195366, - "grad_norm": 0.0014271060936152935, - "learning_rate": 0.00019999512096265318, - "loss": 46.0, - "step": 41136 - }, - { - "epoch": 3.1452109257029264, - "grad_norm": 0.0014304692158475518, - "learning_rate": 0.00019999512072537958, - "loss": 46.0, - "step": 41137 - }, - { - "epoch": 3.145287382686316, - "grad_norm": 0.0035953198093920946, - "learning_rate": 0.00019999512048810022, - "loss": 46.0, - "step": 41138 - }, - { - "epoch": 3.145363839669706, - "grad_norm": 0.0022449269890785217, - "learning_rate": 0.0001999951202508151, - "loss": 46.0, - "step": 41139 - }, - { - "epoch": 3.1454402966530957, - "grad_norm": 0.0017370255663990974, - "learning_rate": 0.0001999951200135242, - "loss": 46.0, - "step": 41140 - }, - { - "epoch": 3.1455167536364854, - "grad_norm": 0.0034271995536983013, - "learning_rate": 0.0001999951197762275, - "loss": 46.0, - "step": 41141 - }, - { - "epoch": 3.145593210619875, - "grad_norm": 0.0015784569550305605, - "learning_rate": 0.00019999511953892505, - "loss": 46.0, - "step": 41142 - }, - { - "epoch": 3.145669667603265, - "grad_norm": 0.0010758456774055958, - "learning_rate": 0.00019999511930161686, - "loss": 46.0, - "step": 41143 - }, - { - "epoch": 3.1457461245866543, - "grad_norm": 0.0014098573010414839, - "learning_rate": 0.0001999951190643029, - "loss": 46.0, - "step": 41144 - }, - { - "epoch": 3.145822581570044, - "grad_norm": 0.00203301222063601, - "learning_rate": 0.00019999511882698312, - "loss": 46.0, - "step": 41145 - }, - { - "epoch": 3.145899038553434, - "grad_norm": 0.002582469955086708, - "learning_rate": 0.00019999511858965758, - "loss": 46.0, - "step": 41146 - }, - { - "epoch": 3.1459754955368235, - "grad_norm": 0.0018384659197181463, - "learning_rate": 0.00019999511835232632, - "loss": 46.0, - "step": 41147 - }, - { - "epoch": 3.1460519525202133, - "grad_norm": 0.0006012328085489571, - "learning_rate": 0.00019999511811498924, - "loss": 46.0, - "step": 41148 - }, - { - "epoch": 3.146128409503603, - "grad_norm": 0.001859766081906855, - "learning_rate": 0.00019999511787764643, - "loss": 46.0, - "step": 41149 - }, - { - "epoch": 3.146204866486993, - "grad_norm": 0.0029311501421034336, - "learning_rate": 0.00019999511764029782, - "loss": 46.0, - "step": 41150 - }, - { - "epoch": 3.1462813234703826, - "grad_norm": 0.0023800318595021963, - "learning_rate": 0.00019999511740294344, - "loss": 46.0, - "step": 41151 - }, - { - "epoch": 3.1463577804537723, - "grad_norm": 0.0013326131738722324, - "learning_rate": 0.00019999511716558332, - "loss": 46.0, - "step": 41152 - }, - { - "epoch": 3.146434237437162, - "grad_norm": 0.003274459857493639, - "learning_rate": 0.00019999511692821742, - "loss": 46.0, - "step": 41153 - }, - { - "epoch": 3.1465106944205514, - "grad_norm": 0.0009514266275800765, - "learning_rate": 0.00019999511669084575, - "loss": 46.0, - "step": 41154 - }, - { - "epoch": 3.146587151403941, - "grad_norm": 0.007105960976332426, - "learning_rate": 0.0001999951164534683, - "loss": 46.0, - "step": 41155 - }, - { - "epoch": 3.146663608387331, - "grad_norm": 0.008935816586017609, - "learning_rate": 0.00019999511621608508, - "loss": 46.0, - "step": 41156 - }, - { - "epoch": 3.1467400653707207, - "grad_norm": 0.004221817012876272, - "learning_rate": 0.00019999511597869611, - "loss": 46.0, - "step": 41157 - }, - { - "epoch": 3.1468165223541105, - "grad_norm": 0.0012877248227596283, - "learning_rate": 0.00019999511574130138, - "loss": 46.0, - "step": 41158 - }, - { - "epoch": 3.1468929793375002, - "grad_norm": 0.0021311098244041204, - "learning_rate": 0.00019999511550390084, - "loss": 46.0, - "step": 41159 - }, - { - "epoch": 3.14696943632089, - "grad_norm": 0.0009686232078820467, - "learning_rate": 0.00019999511526649455, - "loss": 46.0, - "step": 41160 - }, - { - "epoch": 3.1470458933042798, - "grad_norm": 0.0016935671446844935, - "learning_rate": 0.0001999951150290825, - "loss": 46.0, - "step": 41161 - }, - { - "epoch": 3.1471223502876695, - "grad_norm": 0.0019789987709373236, - "learning_rate": 0.00019999511479166466, - "loss": 46.0, - "step": 41162 - }, - { - "epoch": 3.1471988072710593, - "grad_norm": 0.00203468045219779, - "learning_rate": 0.00019999511455424108, - "loss": 46.0, - "step": 41163 - }, - { - "epoch": 3.147275264254449, - "grad_norm": 0.000643292733002454, - "learning_rate": 0.0001999951143168117, - "loss": 46.0, - "step": 41164 - }, - { - "epoch": 3.147351721237839, - "grad_norm": 0.003530516754835844, - "learning_rate": 0.00019999511407937657, - "loss": 46.0, - "step": 41165 - }, - { - "epoch": 3.147428178221228, - "grad_norm": 0.0019623476546257734, - "learning_rate": 0.00019999511384193567, - "loss": 46.0, - "step": 41166 - }, - { - "epoch": 3.147504635204618, - "grad_norm": 0.00207833806052804, - "learning_rate": 0.000199995113604489, - "loss": 46.0, - "step": 41167 - }, - { - "epoch": 3.1475810921880076, - "grad_norm": 0.005475122947245836, - "learning_rate": 0.00019999511336703653, - "loss": 46.0, - "step": 41168 - }, - { - "epoch": 3.1476575491713974, - "grad_norm": 0.0017534289509057999, - "learning_rate": 0.00019999511312957834, - "loss": 46.0, - "step": 41169 - }, - { - "epoch": 3.147734006154787, - "grad_norm": 0.001810555113479495, - "learning_rate": 0.00019999511289211434, - "loss": 46.0, - "step": 41170 - }, - { - "epoch": 3.147810463138177, - "grad_norm": 0.0010964962420985103, - "learning_rate": 0.0001999951126546446, - "loss": 46.0, - "step": 41171 - }, - { - "epoch": 3.1478869201215667, - "grad_norm": 0.0033140238374471664, - "learning_rate": 0.00019999511241716912, - "loss": 46.0, - "step": 41172 - }, - { - "epoch": 3.1479633771049564, - "grad_norm": 0.0026049206499010324, - "learning_rate": 0.0001999951121796878, - "loss": 46.0, - "step": 41173 - }, - { - "epoch": 3.148039834088346, - "grad_norm": 0.003027221653610468, - "learning_rate": 0.00019999511194220075, - "loss": 46.0, - "step": 41174 - }, - { - "epoch": 3.148116291071736, - "grad_norm": 0.0013067773543298244, - "learning_rate": 0.00019999511170470791, - "loss": 46.0, - "step": 41175 - }, - { - "epoch": 3.1481927480551253, - "grad_norm": 0.0020483662374317646, - "learning_rate": 0.00019999511146720934, - "loss": 46.0, - "step": 41176 - }, - { - "epoch": 3.148269205038515, - "grad_norm": 0.0029803512152284384, - "learning_rate": 0.00019999511122970496, - "loss": 46.0, - "step": 41177 - }, - { - "epoch": 3.148345662021905, - "grad_norm": 0.001941710477694869, - "learning_rate": 0.00019999511099219483, - "loss": 46.0, - "step": 41178 - }, - { - "epoch": 3.1484221190052946, - "grad_norm": 0.00592011259868741, - "learning_rate": 0.0001999951107546789, - "loss": 46.0, - "step": 41179 - }, - { - "epoch": 3.1484985759886843, - "grad_norm": 0.0028123133815824986, - "learning_rate": 0.00019999511051715726, - "loss": 46.0, - "step": 41180 - }, - { - "epoch": 3.148575032972074, - "grad_norm": 0.0011448734439909458, - "learning_rate": 0.00019999511027962981, - "loss": 46.0, - "step": 41181 - }, - { - "epoch": 3.148651489955464, - "grad_norm": 0.002779376693069935, - "learning_rate": 0.00019999511004209662, - "loss": 46.0, - "step": 41182 - }, - { - "epoch": 3.1487279469388536, - "grad_norm": 0.0026455887127667665, - "learning_rate": 0.00019999510980455763, - "loss": 46.0, - "step": 41183 - }, - { - "epoch": 3.1488044039222434, - "grad_norm": 0.002642741659656167, - "learning_rate": 0.00019999510956701286, - "loss": 46.0, - "step": 41184 - }, - { - "epoch": 3.148880860905633, - "grad_norm": 0.001011073705740273, - "learning_rate": 0.00019999510932946235, - "loss": 46.0, - "step": 41185 - }, - { - "epoch": 3.148957317889023, - "grad_norm": 0.0019699865952134132, - "learning_rate": 0.00019999510909190607, - "loss": 46.0, - "step": 41186 - }, - { - "epoch": 3.149033774872412, - "grad_norm": 0.0011443350231274962, - "learning_rate": 0.000199995108854344, - "loss": 46.0, - "step": 41187 - }, - { - "epoch": 3.149110231855802, - "grad_norm": 0.0020341400522738695, - "learning_rate": 0.00019999510861677618, - "loss": 46.0, - "step": 41188 - }, - { - "epoch": 3.1491866888391917, - "grad_norm": 0.0029956973157823086, - "learning_rate": 0.0001999951083792026, - "loss": 46.0, - "step": 41189 - }, - { - "epoch": 3.1492631458225815, - "grad_norm": 0.001325927209109068, - "learning_rate": 0.00019999510814162325, - "loss": 46.0, - "step": 41190 - }, - { - "epoch": 3.1493396028059712, - "grad_norm": 0.0018731832969933748, - "learning_rate": 0.0001999951079040381, - "loss": 46.0, - "step": 41191 - }, - { - "epoch": 3.149416059789361, - "grad_norm": 0.001182476058602333, - "learning_rate": 0.0001999951076664472, - "loss": 46.0, - "step": 41192 - }, - { - "epoch": 3.1494925167727508, - "grad_norm": 0.003146679373458028, - "learning_rate": 0.00019999510742885052, - "loss": 46.0, - "step": 41193 - }, - { - "epoch": 3.1495689737561405, - "grad_norm": 0.0011848483700305223, - "learning_rate": 0.00019999510719124808, - "loss": 46.0, - "step": 41194 - }, - { - "epoch": 3.1496454307395303, - "grad_norm": 0.002009914955124259, - "learning_rate": 0.00019999510695363989, - "loss": 46.0, - "step": 41195 - }, - { - "epoch": 3.14972188772292, - "grad_norm": 0.0014789466513320804, - "learning_rate": 0.0001999951067160259, - "loss": 46.0, - "step": 41196 - }, - { - "epoch": 3.14979834470631, - "grad_norm": 0.0009771896293386817, - "learning_rate": 0.00019999510647840616, - "loss": 46.0, - "step": 41197 - }, - { - "epoch": 3.149874801689699, - "grad_norm": 0.0030318363569676876, - "learning_rate": 0.00019999510624078064, - "loss": 46.0, - "step": 41198 - }, - { - "epoch": 3.149951258673089, - "grad_norm": 0.0025786079932004213, - "learning_rate": 0.00019999510600314936, - "loss": 46.0, - "step": 41199 - }, - { - "epoch": 3.1500277156564787, - "grad_norm": 0.0021388635504990816, - "learning_rate": 0.00019999510576551227, - "loss": 46.0, - "step": 41200 - }, - { - "epoch": 3.1501041726398684, - "grad_norm": 0.0012837856775149703, - "learning_rate": 0.00019999510552786947, - "loss": 46.0, - "step": 41201 - }, - { - "epoch": 3.150180629623258, - "grad_norm": 0.0022854323033243418, - "learning_rate": 0.00019999510529022086, - "loss": 46.0, - "step": 41202 - }, - { - "epoch": 3.150257086606648, - "grad_norm": 0.003339572111144662, - "learning_rate": 0.00019999510505256648, - "loss": 46.0, - "step": 41203 - }, - { - "epoch": 3.1503335435900377, - "grad_norm": 0.0017678368603810668, - "learning_rate": 0.0001999951048149064, - "loss": 46.0, - "step": 41204 - }, - { - "epoch": 3.1504100005734275, - "grad_norm": 0.002714118454605341, - "learning_rate": 0.0001999951045772405, - "loss": 46.0, - "step": 41205 - }, - { - "epoch": 3.150486457556817, - "grad_norm": 0.0020404579117894173, - "learning_rate": 0.00019999510433956882, - "loss": 46.0, - "step": 41206 - }, - { - "epoch": 3.150562914540207, - "grad_norm": 0.0014239432057365775, - "learning_rate": 0.00019999510410189134, - "loss": 46.0, - "step": 41207 - }, - { - "epoch": 3.1506393715235967, - "grad_norm": 0.001548121334053576, - "learning_rate": 0.00019999510386420815, - "loss": 46.0, - "step": 41208 - }, - { - "epoch": 3.150715828506986, - "grad_norm": 0.0017139689298346639, - "learning_rate": 0.00019999510362651916, - "loss": 46.0, - "step": 41209 - }, - { - "epoch": 3.150792285490376, - "grad_norm": 0.002566085895523429, - "learning_rate": 0.00019999510338882442, - "loss": 46.0, - "step": 41210 - }, - { - "epoch": 3.1508687424737656, - "grad_norm": 0.0025460158940404654, - "learning_rate": 0.0001999951031511239, - "loss": 46.0, - "step": 41211 - }, - { - "epoch": 3.1509451994571553, - "grad_norm": 0.006416479591280222, - "learning_rate": 0.00019999510291341763, - "loss": 46.0, - "step": 41212 - }, - { - "epoch": 3.151021656440545, - "grad_norm": 0.007463107351213694, - "learning_rate": 0.00019999510267570557, - "loss": 46.0, - "step": 41213 - }, - { - "epoch": 3.151098113423935, - "grad_norm": 0.0008747376268729568, - "learning_rate": 0.00019999510243798776, - "loss": 46.0, - "step": 41214 - }, - { - "epoch": 3.1511745704073246, - "grad_norm": 0.002939442405477166, - "learning_rate": 0.00019999510220026413, - "loss": 46.0, - "step": 41215 - }, - { - "epoch": 3.1512510273907144, - "grad_norm": 0.001430977019481361, - "learning_rate": 0.00019999510196253478, - "loss": 46.0, - "step": 41216 - }, - { - "epoch": 3.151327484374104, - "grad_norm": 0.0006753754569217563, - "learning_rate": 0.00019999510172479968, - "loss": 46.0, - "step": 41217 - }, - { - "epoch": 3.151403941357494, - "grad_norm": 0.0031671279575675726, - "learning_rate": 0.00019999510148705876, - "loss": 46.0, - "step": 41218 - }, - { - "epoch": 3.1514803983408832, - "grad_norm": 0.0008521328563801944, - "learning_rate": 0.0001999951012493121, - "loss": 46.0, - "step": 41219 - }, - { - "epoch": 3.151556855324273, - "grad_norm": 0.0023200339637696743, - "learning_rate": 0.00019999510101155964, - "loss": 46.0, - "step": 41220 - }, - { - "epoch": 3.1516333123076627, - "grad_norm": 0.0019352763192728162, - "learning_rate": 0.00019999510077380145, - "loss": 46.0, - "step": 41221 - }, - { - "epoch": 3.1517097692910525, - "grad_norm": 0.0011031129397451878, - "learning_rate": 0.00019999510053603746, - "loss": 46.0, - "step": 41222 - }, - { - "epoch": 3.1517862262744423, - "grad_norm": 0.0011672734981402755, - "learning_rate": 0.00019999510029826772, - "loss": 46.0, - "step": 41223 - }, - { - "epoch": 3.151862683257832, - "grad_norm": 0.0012146474327892065, - "learning_rate": 0.0001999951000604922, - "loss": 46.0, - "step": 41224 - }, - { - "epoch": 3.151939140241222, - "grad_norm": 0.0011745108058676124, - "learning_rate": 0.00019999509982271093, - "loss": 46.0, - "step": 41225 - }, - { - "epoch": 3.1520155972246116, - "grad_norm": 0.0013356372946873307, - "learning_rate": 0.00019999509958492387, - "loss": 46.0, - "step": 41226 - }, - { - "epoch": 3.1520920542080013, - "grad_norm": 0.005240879021584988, - "learning_rate": 0.00019999509934713104, - "loss": 46.0, - "step": 41227 - }, - { - "epoch": 3.152168511191391, - "grad_norm": 0.0012554834829643369, - "learning_rate": 0.00019999509910933246, - "loss": 46.0, - "step": 41228 - }, - { - "epoch": 3.152244968174781, - "grad_norm": 0.0016742480220273137, - "learning_rate": 0.0001999950988715281, - "loss": 46.0, - "step": 41229 - }, - { - "epoch": 3.1523214251581706, - "grad_norm": 0.001541724312119186, - "learning_rate": 0.00019999509863371796, - "loss": 46.0, - "step": 41230 - }, - { - "epoch": 3.15239788214156, - "grad_norm": 0.002407388063147664, - "learning_rate": 0.00019999509839590206, - "loss": 46.0, - "step": 41231 - }, - { - "epoch": 3.1524743391249497, - "grad_norm": 0.0048097348771989346, - "learning_rate": 0.0001999950981580804, - "loss": 46.0, - "step": 41232 - }, - { - "epoch": 3.1525507961083394, - "grad_norm": 0.0010083505185320973, - "learning_rate": 0.00019999509792025295, - "loss": 46.0, - "step": 41233 - }, - { - "epoch": 3.152627253091729, - "grad_norm": 0.0017390126595273614, - "learning_rate": 0.00019999509768241973, - "loss": 46.0, - "step": 41234 - }, - { - "epoch": 3.152703710075119, - "grad_norm": 0.00044358987361192703, - "learning_rate": 0.0001999950974445808, - "loss": 46.0, - "step": 41235 - }, - { - "epoch": 3.1527801670585087, - "grad_norm": 0.00310742505826056, - "learning_rate": 0.00019999509720673603, - "loss": 46.0, - "step": 41236 - }, - { - "epoch": 3.1528566240418985, - "grad_norm": 0.0019872100092470646, - "learning_rate": 0.00019999509696888552, - "loss": 46.0, - "step": 41237 - }, - { - "epoch": 3.1529330810252882, - "grad_norm": 0.0009636054164730012, - "learning_rate": 0.0001999950967310292, - "loss": 46.0, - "step": 41238 - }, - { - "epoch": 3.153009538008678, - "grad_norm": 0.0019592433236539364, - "learning_rate": 0.00019999509649316717, - "loss": 46.0, - "step": 41239 - }, - { - "epoch": 3.1530859949920678, - "grad_norm": 0.0014720330946147442, - "learning_rate": 0.00019999509625529934, - "loss": 46.0, - "step": 41240 - }, - { - "epoch": 3.153162451975457, - "grad_norm": 0.006337042897939682, - "learning_rate": 0.00019999509601742577, - "loss": 46.0, - "step": 41241 - }, - { - "epoch": 3.153238908958847, - "grad_norm": 0.0022629695013165474, - "learning_rate": 0.00019999509577954642, - "loss": 46.0, - "step": 41242 - }, - { - "epoch": 3.1533153659422366, - "grad_norm": 0.0011800449574366212, - "learning_rate": 0.00019999509554166126, - "loss": 46.0, - "step": 41243 - }, - { - "epoch": 3.1533918229256264, - "grad_norm": 0.0021426088642328978, - "learning_rate": 0.00019999509530377037, - "loss": 46.0, - "step": 41244 - }, - { - "epoch": 3.153468279909016, - "grad_norm": 0.002159228315576911, - "learning_rate": 0.0001999950950658737, - "loss": 46.0, - "step": 41245 - }, - { - "epoch": 3.153544736892406, - "grad_norm": 0.0019955765455961227, - "learning_rate": 0.00019999509482797128, - "loss": 46.0, - "step": 41246 - }, - { - "epoch": 3.1536211938757956, - "grad_norm": 0.0027953071985393763, - "learning_rate": 0.00019999509459006304, - "loss": 46.0, - "step": 41247 - }, - { - "epoch": 3.1536976508591854, - "grad_norm": 0.002594755031168461, - "learning_rate": 0.0001999950943521491, - "loss": 46.0, - "step": 41248 - }, - { - "epoch": 3.153774107842575, - "grad_norm": 0.0017028666334226727, - "learning_rate": 0.0001999950941142293, - "loss": 46.0, - "step": 41249 - }, - { - "epoch": 3.153850564825965, - "grad_norm": 0.0015072169480845332, - "learning_rate": 0.00019999509387630382, - "loss": 46.0, - "step": 41250 - }, - { - "epoch": 3.1539270218093547, - "grad_norm": 0.0035794342402368784, - "learning_rate": 0.00019999509363837254, - "loss": 46.0, - "step": 41251 - }, - { - "epoch": 3.1540034787927445, - "grad_norm": 0.000742486969102174, - "learning_rate": 0.00019999509340043548, - "loss": 46.0, - "step": 41252 - }, - { - "epoch": 3.1540799357761338, - "grad_norm": 0.003688682336360216, - "learning_rate": 0.00019999509316249268, - "loss": 46.0, - "step": 41253 - }, - { - "epoch": 3.1541563927595235, - "grad_norm": 0.01042461208999157, - "learning_rate": 0.00019999509292454408, - "loss": 46.0, - "step": 41254 - }, - { - "epoch": 3.1542328497429133, - "grad_norm": 0.008628328330814838, - "learning_rate": 0.00019999509268658973, - "loss": 46.0, - "step": 41255 - }, - { - "epoch": 3.154309306726303, - "grad_norm": 0.001481702085584402, - "learning_rate": 0.00019999509244862957, - "loss": 46.0, - "step": 41256 - }, - { - "epoch": 3.154385763709693, - "grad_norm": 0.004218224436044693, - "learning_rate": 0.00019999509221066368, - "loss": 46.0, - "step": 41257 - }, - { - "epoch": 3.1544622206930826, - "grad_norm": 0.0014666811330243945, - "learning_rate": 0.000199995091972692, - "loss": 46.0, - "step": 41258 - }, - { - "epoch": 3.1545386776764723, - "grad_norm": 0.0034756611566990614, - "learning_rate": 0.0001999950917347146, - "loss": 46.0, - "step": 41259 - }, - { - "epoch": 3.154615134659862, - "grad_norm": 0.0010898802429437637, - "learning_rate": 0.00019999509149673137, - "loss": 46.0, - "step": 41260 - }, - { - "epoch": 3.154691591643252, - "grad_norm": 0.0044989800080657005, - "learning_rate": 0.0001999950912587424, - "loss": 46.0, - "step": 41261 - }, - { - "epoch": 3.1547680486266416, - "grad_norm": 0.0016769011272117496, - "learning_rate": 0.00019999509102074767, - "loss": 46.0, - "step": 41262 - }, - { - "epoch": 3.154844505610031, - "grad_norm": 0.0018666343530640006, - "learning_rate": 0.00019999509078274714, - "loss": 46.0, - "step": 41263 - }, - { - "epoch": 3.1549209625934207, - "grad_norm": 0.0027444446459412575, - "learning_rate": 0.00019999509054474085, - "loss": 46.0, - "step": 41264 - }, - { - "epoch": 3.1549974195768105, - "grad_norm": 0.00113389128819108, - "learning_rate": 0.00019999509030672882, - "loss": 46.0, - "step": 41265 - }, - { - "epoch": 3.1550738765602, - "grad_norm": 0.0017235310515388846, - "learning_rate": 0.000199995090068711, - "loss": 46.0, - "step": 41266 - }, - { - "epoch": 3.15515033354359, - "grad_norm": 0.001434971927665174, - "learning_rate": 0.0001999950898306874, - "loss": 46.0, - "step": 41267 - }, - { - "epoch": 3.1552267905269797, - "grad_norm": 0.002379936631768942, - "learning_rate": 0.000199995089592658, - "loss": 46.0, - "step": 41268 - }, - { - "epoch": 3.1553032475103695, - "grad_norm": 0.0027696508914232254, - "learning_rate": 0.00019999508935462292, - "loss": 46.0, - "step": 41269 - }, - { - "epoch": 3.1553797044937593, - "grad_norm": 0.0005561787984333932, - "learning_rate": 0.000199995089116582, - "loss": 46.0, - "step": 41270 - }, - { - "epoch": 3.155456161477149, - "grad_norm": 0.0037807845510542393, - "learning_rate": 0.00019999508887853535, - "loss": 46.0, - "step": 41271 - }, - { - "epoch": 3.155532618460539, - "grad_norm": 0.006651694420725107, - "learning_rate": 0.0001999950886404829, - "loss": 46.0, - "step": 41272 - }, - { - "epoch": 3.1556090754439285, - "grad_norm": 0.004104775842279196, - "learning_rate": 0.00019999508840242472, - "loss": 46.0, - "step": 41273 - }, - { - "epoch": 3.1556855324273183, - "grad_norm": 0.0015994838904589415, - "learning_rate": 0.00019999508816436073, - "loss": 46.0, - "step": 41274 - }, - { - "epoch": 3.1557619894107076, - "grad_norm": 0.0012926486087962985, - "learning_rate": 0.000199995087926291, - "loss": 46.0, - "step": 41275 - }, - { - "epoch": 3.1558384463940974, - "grad_norm": 0.003736570244655013, - "learning_rate": 0.00019999508768821545, - "loss": 46.0, - "step": 41276 - }, - { - "epoch": 3.155914903377487, - "grad_norm": 0.001920853741466999, - "learning_rate": 0.0001999950874501342, - "loss": 46.0, - "step": 41277 - }, - { - "epoch": 3.155991360360877, - "grad_norm": 0.0011635113041847944, - "learning_rate": 0.00019999508721204717, - "loss": 46.0, - "step": 41278 - }, - { - "epoch": 3.1560678173442667, - "grad_norm": 0.003280497156083584, - "learning_rate": 0.00019999508697395434, - "loss": 46.0, - "step": 41279 - }, - { - "epoch": 3.1561442743276564, - "grad_norm": 0.004014330450445414, - "learning_rate": 0.00019999508673585573, - "loss": 46.0, - "step": 41280 - }, - { - "epoch": 3.156220731311046, - "grad_norm": 0.0026616144459694624, - "learning_rate": 0.00019999508649775138, - "loss": 46.0, - "step": 41281 - }, - { - "epoch": 3.156297188294436, - "grad_norm": 0.005476745311170816, - "learning_rate": 0.00019999508625964126, - "loss": 46.0, - "step": 41282 - }, - { - "epoch": 3.1563736452778257, - "grad_norm": 0.0017588009359315038, - "learning_rate": 0.00019999508602152534, - "loss": 46.0, - "step": 41283 - }, - { - "epoch": 3.1564501022612155, - "grad_norm": 0.0006979173049330711, - "learning_rate": 0.0001999950857834037, - "loss": 46.0, - "step": 41284 - }, - { - "epoch": 3.156526559244605, - "grad_norm": 0.0018476679688319564, - "learning_rate": 0.00019999508554527625, - "loss": 46.0, - "step": 41285 - }, - { - "epoch": 3.1566030162279946, - "grad_norm": 0.004086425062268972, - "learning_rate": 0.00019999508530714306, - "loss": 46.0, - "step": 41286 - }, - { - "epoch": 3.1566794732113843, - "grad_norm": 0.0023363095242530107, - "learning_rate": 0.00019999508506900407, - "loss": 46.0, - "step": 41287 - }, - { - "epoch": 3.156755930194774, - "grad_norm": 0.005558275617659092, - "learning_rate": 0.00019999508483085934, - "loss": 46.0, - "step": 41288 - }, - { - "epoch": 3.156832387178164, - "grad_norm": 0.004917372018098831, - "learning_rate": 0.00019999508459270883, - "loss": 46.0, - "step": 41289 - }, - { - "epoch": 3.1569088441615536, - "grad_norm": 0.0015837918035686016, - "learning_rate": 0.00019999508435455254, - "loss": 46.0, - "step": 41290 - }, - { - "epoch": 3.1569853011449434, - "grad_norm": 0.003155802609398961, - "learning_rate": 0.0001999950841163905, - "loss": 46.0, - "step": 41291 - }, - { - "epoch": 3.157061758128333, - "grad_norm": 0.0019145618425682187, - "learning_rate": 0.00019999508387822266, - "loss": 46.0, - "step": 41292 - }, - { - "epoch": 3.157138215111723, - "grad_norm": 0.0023620694410055876, - "learning_rate": 0.00019999508364004908, - "loss": 46.0, - "step": 41293 - }, - { - "epoch": 3.1572146720951126, - "grad_norm": 0.005915533285588026, - "learning_rate": 0.00019999508340186973, - "loss": 46.0, - "step": 41294 - }, - { - "epoch": 3.1572911290785024, - "grad_norm": 0.0026202548760920763, - "learning_rate": 0.0001999950831636846, - "loss": 46.0, - "step": 41295 - }, - { - "epoch": 3.157367586061892, - "grad_norm": 0.003385018790140748, - "learning_rate": 0.0001999950829254937, - "loss": 46.0, - "step": 41296 - }, - { - "epoch": 3.1574440430452815, - "grad_norm": 0.0010866944212466478, - "learning_rate": 0.00019999508268729702, - "loss": 46.0, - "step": 41297 - }, - { - "epoch": 3.1575205000286712, - "grad_norm": 0.0026685951743274927, - "learning_rate": 0.0001999950824490946, - "loss": 46.0, - "step": 41298 - }, - { - "epoch": 3.157596957012061, - "grad_norm": 0.0032722107134759426, - "learning_rate": 0.00019999508221088639, - "loss": 46.0, - "step": 41299 - }, - { - "epoch": 3.1576734139954508, - "grad_norm": 0.0006499799783341587, - "learning_rate": 0.00019999508197267242, - "loss": 46.0, - "step": 41300 - }, - { - "epoch": 3.1577498709788405, - "grad_norm": 0.00191175052896142, - "learning_rate": 0.0001999950817344527, - "loss": 46.0, - "step": 41301 - }, - { - "epoch": 3.1578263279622303, - "grad_norm": 0.0019544349052011967, - "learning_rate": 0.00019999508149622718, - "loss": 46.0, - "step": 41302 - }, - { - "epoch": 3.15790278494562, - "grad_norm": 0.0037779954727739096, - "learning_rate": 0.0001999950812579959, - "loss": 46.0, - "step": 41303 - }, - { - "epoch": 3.15797924192901, - "grad_norm": 0.001928371493704617, - "learning_rate": 0.00019999508101975884, - "loss": 46.0, - "step": 41304 - }, - { - "epoch": 3.1580556989123996, - "grad_norm": 0.0013177570654079318, - "learning_rate": 0.00019999508078151604, - "loss": 46.0, - "step": 41305 - }, - { - "epoch": 3.1581321558957893, - "grad_norm": 0.0021362965926527977, - "learning_rate": 0.00019999508054326746, - "loss": 46.0, - "step": 41306 - }, - { - "epoch": 3.1582086128791786, - "grad_norm": 0.0010499494383111596, - "learning_rate": 0.00019999508030501309, - "loss": 46.0, - "step": 41307 - }, - { - "epoch": 3.1582850698625684, - "grad_norm": 0.0016806864878162742, - "learning_rate": 0.00019999508006675296, - "loss": 46.0, - "step": 41308 - }, - { - "epoch": 3.158361526845958, - "grad_norm": 0.0014857127098366618, - "learning_rate": 0.00019999507982848707, - "loss": 46.0, - "step": 41309 - }, - { - "epoch": 3.158437983829348, - "grad_norm": 0.007579101249575615, - "learning_rate": 0.0001999950795902154, - "loss": 46.0, - "step": 41310 - }, - { - "epoch": 3.1585144408127377, - "grad_norm": 0.004320272710174322, - "learning_rate": 0.00019999507935193798, - "loss": 46.0, - "step": 41311 - }, - { - "epoch": 3.1585908977961275, - "grad_norm": 0.0030708054546266794, - "learning_rate": 0.00019999507911365477, - "loss": 46.0, - "step": 41312 - }, - { - "epoch": 3.158667354779517, - "grad_norm": 0.002287672832608223, - "learning_rate": 0.0001999950788753658, - "loss": 46.0, - "step": 41313 - }, - { - "epoch": 3.158743811762907, - "grad_norm": 0.0019517750479280949, - "learning_rate": 0.00019999507863707104, - "loss": 46.0, - "step": 41314 - }, - { - "epoch": 3.1588202687462967, - "grad_norm": 0.004568958654999733, - "learning_rate": 0.00019999507839877053, - "loss": 46.0, - "step": 41315 - }, - { - "epoch": 3.1588967257296865, - "grad_norm": 0.0017045927233994007, - "learning_rate": 0.00019999507816046425, - "loss": 46.0, - "step": 41316 - }, - { - "epoch": 3.1589731827130763, - "grad_norm": 0.0017688111402094364, - "learning_rate": 0.00019999507792215222, - "loss": 46.0, - "step": 41317 - }, - { - "epoch": 3.1590496396964656, - "grad_norm": 0.003358157118782401, - "learning_rate": 0.00019999507768383442, - "loss": 46.0, - "step": 41318 - }, - { - "epoch": 3.1591260966798553, - "grad_norm": 0.0024869979824870825, - "learning_rate": 0.0001999950774455108, - "loss": 46.0, - "step": 41319 - }, - { - "epoch": 3.159202553663245, - "grad_norm": 0.000876505917403847, - "learning_rate": 0.00019999507720718144, - "loss": 46.0, - "step": 41320 - }, - { - "epoch": 3.159279010646635, - "grad_norm": 0.0007721567526459694, - "learning_rate": 0.00019999507696884635, - "loss": 46.0, - "step": 41321 - }, - { - "epoch": 3.1593554676300246, - "grad_norm": 0.002563255140557885, - "learning_rate": 0.00019999507673050543, - "loss": 46.0, - "step": 41322 - }, - { - "epoch": 3.1594319246134144, - "grad_norm": 0.0014723507920280099, - "learning_rate": 0.00019999507649215876, - "loss": 46.0, - "step": 41323 - }, - { - "epoch": 3.159508381596804, - "grad_norm": 0.0035391172859817743, - "learning_rate": 0.00019999507625380634, - "loss": 46.0, - "step": 41324 - }, - { - "epoch": 3.159584838580194, - "grad_norm": 0.0015904412139207125, - "learning_rate": 0.00019999507601544813, - "loss": 46.0, - "step": 41325 - }, - { - "epoch": 3.1596612955635837, - "grad_norm": 0.003802722552791238, - "learning_rate": 0.00019999507577708416, - "loss": 46.0, - "step": 41326 - }, - { - "epoch": 3.1597377525469734, - "grad_norm": 0.00244453689083457, - "learning_rate": 0.00019999507553871443, - "loss": 46.0, - "step": 41327 - }, - { - "epoch": 3.159814209530363, - "grad_norm": 0.003056307090446353, - "learning_rate": 0.00019999507530033892, - "loss": 46.0, - "step": 41328 - }, - { - "epoch": 3.1598906665137525, - "grad_norm": 0.0023832679726183414, - "learning_rate": 0.00019999507506195764, - "loss": 46.0, - "step": 41329 - }, - { - "epoch": 3.1599671234971423, - "grad_norm": 0.0028164638206362724, - "learning_rate": 0.00019999507482357058, - "loss": 46.0, - "step": 41330 - }, - { - "epoch": 3.160043580480532, - "grad_norm": 0.001658973516896367, - "learning_rate": 0.00019999507458517775, - "loss": 46.0, - "step": 41331 - }, - { - "epoch": 3.160120037463922, - "grad_norm": 0.00508987857028842, - "learning_rate": 0.0001999950743467792, - "loss": 46.0, - "step": 41332 - }, - { - "epoch": 3.1601964944473115, - "grad_norm": 0.0021771444007754326, - "learning_rate": 0.00019999507410837483, - "loss": 46.0, - "step": 41333 - }, - { - "epoch": 3.1602729514307013, - "grad_norm": 0.0013983589597046375, - "learning_rate": 0.0001999950738699647, - "loss": 46.0, - "step": 41334 - }, - { - "epoch": 3.160349408414091, - "grad_norm": 0.004197974689304829, - "learning_rate": 0.00019999507363154881, - "loss": 46.0, - "step": 41335 - }, - { - "epoch": 3.160425865397481, - "grad_norm": 0.0011154047679156065, - "learning_rate": 0.00019999507339312715, - "loss": 46.0, - "step": 41336 - }, - { - "epoch": 3.1605023223808706, - "grad_norm": 0.0008261289331130683, - "learning_rate": 0.00019999507315469973, - "loss": 46.0, - "step": 41337 - }, - { - "epoch": 3.1605787793642603, - "grad_norm": 0.0026026489213109016, - "learning_rate": 0.00019999507291626652, - "loss": 46.0, - "step": 41338 - }, - { - "epoch": 3.16065523634765, - "grad_norm": 0.0018387408927083015, - "learning_rate": 0.00019999507267782755, - "loss": 46.0, - "step": 41339 - }, - { - "epoch": 3.1607316933310394, - "grad_norm": 0.003892270615324378, - "learning_rate": 0.0001999950724393828, - "loss": 46.0, - "step": 41340 - }, - { - "epoch": 3.160808150314429, - "grad_norm": 0.0007451699930243194, - "learning_rate": 0.0001999950722009323, - "loss": 46.0, - "step": 41341 - }, - { - "epoch": 3.160884607297819, - "grad_norm": 0.004547202028334141, - "learning_rate": 0.00019999507196247603, - "loss": 46.0, - "step": 41342 - }, - { - "epoch": 3.1609610642812087, - "grad_norm": 0.0013254316290840507, - "learning_rate": 0.00019999507172401397, - "loss": 46.0, - "step": 41343 - }, - { - "epoch": 3.1610375212645985, - "grad_norm": 0.003969667479395866, - "learning_rate": 0.00019999507148554617, - "loss": 46.0, - "step": 41344 - }, - { - "epoch": 3.1611139782479882, - "grad_norm": 0.0007787776412442327, - "learning_rate": 0.00019999507124707257, - "loss": 46.0, - "step": 41345 - }, - { - "epoch": 3.161190435231378, - "grad_norm": 0.004932778421789408, - "learning_rate": 0.0001999950710085932, - "loss": 46.0, - "step": 41346 - }, - { - "epoch": 3.1612668922147678, - "grad_norm": 0.0039217849262058735, - "learning_rate": 0.0001999950707701081, - "loss": 46.0, - "step": 41347 - }, - { - "epoch": 3.1613433491981575, - "grad_norm": 0.003376128152012825, - "learning_rate": 0.00019999507053161718, - "loss": 46.0, - "step": 41348 - }, - { - "epoch": 3.1614198061815473, - "grad_norm": 0.002181009389460087, - "learning_rate": 0.00019999507029312054, - "loss": 46.0, - "step": 41349 - }, - { - "epoch": 3.1614962631649366, - "grad_norm": 0.00240871193818748, - "learning_rate": 0.0001999950700546181, - "loss": 46.0, - "step": 41350 - }, - { - "epoch": 3.1615727201483264, - "grad_norm": 0.0015356396324932575, - "learning_rate": 0.00019999506981610988, - "loss": 46.0, - "step": 41351 - }, - { - "epoch": 3.161649177131716, - "grad_norm": 0.0029594209045171738, - "learning_rate": 0.00019999506957759592, - "loss": 46.0, - "step": 41352 - }, - { - "epoch": 3.161725634115106, - "grad_norm": 0.00423911539837718, - "learning_rate": 0.0001999950693390762, - "loss": 46.0, - "step": 41353 - }, - { - "epoch": 3.1618020910984956, - "grad_norm": 0.0026481598615646362, - "learning_rate": 0.00019999506910055068, - "loss": 46.0, - "step": 41354 - }, - { - "epoch": 3.1618785480818854, - "grad_norm": 0.0006250111036933959, - "learning_rate": 0.00019999506886201942, - "loss": 46.0, - "step": 41355 - }, - { - "epoch": 3.161955005065275, - "grad_norm": 0.006145654246211052, - "learning_rate": 0.0001999950686234824, - "loss": 46.0, - "step": 41356 - }, - { - "epoch": 3.162031462048665, - "grad_norm": 0.007828936912119389, - "learning_rate": 0.00019999506838493954, - "loss": 46.0, - "step": 41357 - }, - { - "epoch": 3.1621079190320547, - "grad_norm": 0.0010668664472177625, - "learning_rate": 0.00019999506814639097, - "loss": 46.0, - "step": 41358 - }, - { - "epoch": 3.1621843760154444, - "grad_norm": 0.0029236541595309973, - "learning_rate": 0.00019999506790783662, - "loss": 46.0, - "step": 41359 - }, - { - "epoch": 3.162260832998834, - "grad_norm": 0.003693469800055027, - "learning_rate": 0.0001999950676692765, - "loss": 46.0, - "step": 41360 - }, - { - "epoch": 3.162337289982224, - "grad_norm": 0.004355990327894688, - "learning_rate": 0.0001999950674307106, - "loss": 46.0, - "step": 41361 - }, - { - "epoch": 3.1624137469656133, - "grad_norm": 0.0013126952107995749, - "learning_rate": 0.00019999506719213894, - "loss": 46.0, - "step": 41362 - }, - { - "epoch": 3.162490203949003, - "grad_norm": 0.002799743553623557, - "learning_rate": 0.00019999506695356152, - "loss": 46.0, - "step": 41363 - }, - { - "epoch": 3.162566660932393, - "grad_norm": 0.003541214158758521, - "learning_rate": 0.0001999950667149783, - "loss": 46.0, - "step": 41364 - }, - { - "epoch": 3.1626431179157826, - "grad_norm": 0.0009232715819962323, - "learning_rate": 0.00019999506647638935, - "loss": 46.0, - "step": 41365 - }, - { - "epoch": 3.1627195748991723, - "grad_norm": 0.0021728789433836937, - "learning_rate": 0.0001999950662377946, - "loss": 46.0, - "step": 41366 - }, - { - "epoch": 3.162796031882562, - "grad_norm": 0.0012628649128600955, - "learning_rate": 0.00019999506599919408, - "loss": 46.0, - "step": 41367 - }, - { - "epoch": 3.162872488865952, - "grad_norm": 0.002501907991245389, - "learning_rate": 0.00019999506576058782, - "loss": 46.0, - "step": 41368 - }, - { - "epoch": 3.1629489458493416, - "grad_norm": 0.0017043884145095944, - "learning_rate": 0.00019999506552197577, - "loss": 46.0, - "step": 41369 - }, - { - "epoch": 3.1630254028327314, - "grad_norm": 0.001165761612355709, - "learning_rate": 0.00019999506528335797, - "loss": 46.0, - "step": 41370 - }, - { - "epoch": 3.163101859816121, - "grad_norm": 0.005266531836241484, - "learning_rate": 0.00019999506504473437, - "loss": 46.0, - "step": 41371 - }, - { - "epoch": 3.1631783167995104, - "grad_norm": 0.0012408840702846646, - "learning_rate": 0.00019999506480610505, - "loss": 46.0, - "step": 41372 - }, - { - "epoch": 3.1632547737829, - "grad_norm": 0.0014161203289404511, - "learning_rate": 0.0001999950645674699, - "loss": 46.0, - "step": 41373 - }, - { - "epoch": 3.16333123076629, - "grad_norm": 0.001424388843588531, - "learning_rate": 0.000199995064328829, - "loss": 46.0, - "step": 41374 - }, - { - "epoch": 3.1634076877496797, - "grad_norm": 0.0028290506452322006, - "learning_rate": 0.00019999506409018234, - "loss": 46.0, - "step": 41375 - }, - { - "epoch": 3.1634841447330695, - "grad_norm": 0.0021013321820646524, - "learning_rate": 0.0001999950638515299, - "loss": 46.0, - "step": 41376 - }, - { - "epoch": 3.1635606017164593, - "grad_norm": 0.00865001417696476, - "learning_rate": 0.00019999506361287174, - "loss": 46.0, - "step": 41377 - }, - { - "epoch": 3.163637058699849, - "grad_norm": 0.0044823638163506985, - "learning_rate": 0.00019999506337420778, - "loss": 46.0, - "step": 41378 - }, - { - "epoch": 3.1637135156832388, - "grad_norm": 0.0018760509556159377, - "learning_rate": 0.00019999506313553802, - "loss": 46.0, - "step": 41379 - }, - { - "epoch": 3.1637899726666285, - "grad_norm": 0.00493275374174118, - "learning_rate": 0.0001999950628968625, - "loss": 46.0, - "step": 41380 - }, - { - "epoch": 3.1638664296500183, - "grad_norm": 0.0025269535835832357, - "learning_rate": 0.00019999506265818125, - "loss": 46.0, - "step": 41381 - }, - { - "epoch": 3.163942886633408, - "grad_norm": 0.005213672295212746, - "learning_rate": 0.0001999950624194942, - "loss": 46.0, - "step": 41382 - }, - { - "epoch": 3.164019343616798, - "grad_norm": 0.002192362444475293, - "learning_rate": 0.00019999506218080137, - "loss": 46.0, - "step": 41383 - }, - { - "epoch": 3.164095800600187, - "grad_norm": 0.002362956292927265, - "learning_rate": 0.0001999950619421028, - "loss": 46.0, - "step": 41384 - }, - { - "epoch": 3.164172257583577, - "grad_norm": 0.0019419651944190264, - "learning_rate": 0.00019999506170339845, - "loss": 46.0, - "step": 41385 - }, - { - "epoch": 3.1642487145669667, - "grad_norm": 0.003133219201117754, - "learning_rate": 0.00019999506146468833, - "loss": 46.0, - "step": 41386 - }, - { - "epoch": 3.1643251715503564, - "grad_norm": 0.0018855045782402158, - "learning_rate": 0.00019999506122597244, - "loss": 46.0, - "step": 41387 - }, - { - "epoch": 3.164401628533746, - "grad_norm": 0.002275438979268074, - "learning_rate": 0.00019999506098725077, - "loss": 46.0, - "step": 41388 - }, - { - "epoch": 3.164478085517136, - "grad_norm": 0.004472393076866865, - "learning_rate": 0.00019999506074852336, - "loss": 46.0, - "step": 41389 - }, - { - "epoch": 3.1645545425005257, - "grad_norm": 0.0024108458310365677, - "learning_rate": 0.00019999506050979014, - "loss": 46.0, - "step": 41390 - }, - { - "epoch": 3.1646309994839155, - "grad_norm": 0.0010790076339617372, - "learning_rate": 0.00019999506027105118, - "loss": 46.0, - "step": 41391 - }, - { - "epoch": 3.1647074564673052, - "grad_norm": 0.004766308702528477, - "learning_rate": 0.00019999506003230645, - "loss": 46.0, - "step": 41392 - }, - { - "epoch": 3.164783913450695, - "grad_norm": 0.0024100765585899353, - "learning_rate": 0.00019999505979355594, - "loss": 46.0, - "step": 41393 - }, - { - "epoch": 3.1648603704340843, - "grad_norm": 0.0015630577690899372, - "learning_rate": 0.00019999505955479966, - "loss": 46.0, - "step": 41394 - }, - { - "epoch": 3.164936827417474, - "grad_norm": 0.0023312268313020468, - "learning_rate": 0.0001999950593160376, - "loss": 46.0, - "step": 41395 - }, - { - "epoch": 3.165013284400864, - "grad_norm": 0.0014532121131196618, - "learning_rate": 0.0001999950590772698, - "loss": 46.0, - "step": 41396 - }, - { - "epoch": 3.1650897413842536, - "grad_norm": 0.002739020623266697, - "learning_rate": 0.00019999505883849623, - "loss": 46.0, - "step": 41397 - }, - { - "epoch": 3.1651661983676433, - "grad_norm": 0.0027691854629665613, - "learning_rate": 0.0001999950585997169, - "loss": 46.0, - "step": 41398 - }, - { - "epoch": 3.165242655351033, - "grad_norm": 0.0033416608348488808, - "learning_rate": 0.00019999505836093174, - "loss": 46.0, - "step": 41399 - }, - { - "epoch": 3.165319112334423, - "grad_norm": 0.0025658742524683475, - "learning_rate": 0.00019999505812214085, - "loss": 46.0, - "step": 41400 - }, - { - "epoch": 3.1653955693178126, - "grad_norm": 0.005955113098025322, - "learning_rate": 0.0001999950578833442, - "loss": 46.0, - "step": 41401 - }, - { - "epoch": 3.1654720263012024, - "grad_norm": 0.002519210334867239, - "learning_rate": 0.0001999950576445418, - "loss": 46.0, - "step": 41402 - }, - { - "epoch": 3.165548483284592, - "grad_norm": 0.0014996922109276056, - "learning_rate": 0.00019999505740573355, - "loss": 46.0, - "step": 41403 - }, - { - "epoch": 3.165624940267982, - "grad_norm": 0.0008262404007837176, - "learning_rate": 0.00019999505716691962, - "loss": 46.0, - "step": 41404 - }, - { - "epoch": 3.1657013972513717, - "grad_norm": 0.001112659927457571, - "learning_rate": 0.0001999950569280999, - "loss": 46.0, - "step": 41405 - }, - { - "epoch": 3.165777854234761, - "grad_norm": 0.0016313678352162242, - "learning_rate": 0.00019999505668927435, - "loss": 46.0, - "step": 41406 - }, - { - "epoch": 3.1658543112181508, - "grad_norm": 0.0013768535573035479, - "learning_rate": 0.0001999950564504431, - "loss": 46.0, - "step": 41407 - }, - { - "epoch": 3.1659307682015405, - "grad_norm": 0.002947288565337658, - "learning_rate": 0.00019999505621160605, - "loss": 46.0, - "step": 41408 - }, - { - "epoch": 3.1660072251849303, - "grad_norm": 0.006324646528810263, - "learning_rate": 0.00019999505597276325, - "loss": 46.0, - "step": 41409 - }, - { - "epoch": 3.16608368216832, - "grad_norm": 0.0023180863354355097, - "learning_rate": 0.00019999505573391465, - "loss": 46.0, - "step": 41410 - }, - { - "epoch": 3.16616013915171, - "grad_norm": 0.001528040855191648, - "learning_rate": 0.0001999950554950603, - "loss": 46.0, - "step": 41411 - }, - { - "epoch": 3.1662365961350996, - "grad_norm": 0.0011482478585094213, - "learning_rate": 0.00019999505525620018, - "loss": 46.0, - "step": 41412 - }, - { - "epoch": 3.1663130531184893, - "grad_norm": 0.0023069106973707676, - "learning_rate": 0.00019999505501733431, - "loss": 46.0, - "step": 41413 - }, - { - "epoch": 3.166389510101879, - "grad_norm": 0.0033784867264330387, - "learning_rate": 0.00019999505477846265, - "loss": 46.0, - "step": 41414 - }, - { - "epoch": 3.166465967085269, - "grad_norm": 0.002776743844151497, - "learning_rate": 0.00019999505453958524, - "loss": 46.0, - "step": 41415 - }, - { - "epoch": 3.166542424068658, - "grad_norm": 0.0018270849250257015, - "learning_rate": 0.00019999505430070202, - "loss": 46.0, - "step": 41416 - }, - { - "epoch": 3.166618881052048, - "grad_norm": 0.0011862947139889002, - "learning_rate": 0.00019999505406181304, - "loss": 46.0, - "step": 41417 - }, - { - "epoch": 3.1666953380354377, - "grad_norm": 0.0038134099449962378, - "learning_rate": 0.00019999505382291833, - "loss": 46.0, - "step": 41418 - }, - { - "epoch": 3.1667717950188274, - "grad_norm": 0.0010683911386877298, - "learning_rate": 0.00019999505358401782, - "loss": 46.0, - "step": 41419 - }, - { - "epoch": 3.166848252002217, - "grad_norm": 0.000719505304004997, - "learning_rate": 0.00019999505334511154, - "loss": 46.0, - "step": 41420 - }, - { - "epoch": 3.166924708985607, - "grad_norm": 0.008577807806432247, - "learning_rate": 0.0001999950531061995, - "loss": 46.0, - "step": 41421 - }, - { - "epoch": 3.1670011659689967, - "grad_norm": 0.003264984581619501, - "learning_rate": 0.0001999950528672817, - "loss": 46.0, - "step": 41422 - }, - { - "epoch": 3.1670776229523865, - "grad_norm": 0.0026449931319803, - "learning_rate": 0.00019999505262835812, - "loss": 46.0, - "step": 41423 - }, - { - "epoch": 3.1671540799357762, - "grad_norm": 0.0037897902075201273, - "learning_rate": 0.00019999505238942877, - "loss": 46.0, - "step": 41424 - }, - { - "epoch": 3.167230536919166, - "grad_norm": 0.004215792752802372, - "learning_rate": 0.00019999505215049363, - "loss": 46.0, - "step": 41425 - }, - { - "epoch": 3.1673069939025558, - "grad_norm": 0.0018445485038682818, - "learning_rate": 0.00019999505191155276, - "loss": 46.0, - "step": 41426 - }, - { - "epoch": 3.1673834508859455, - "grad_norm": 0.007202946115285158, - "learning_rate": 0.0001999950516726061, - "loss": 46.0, - "step": 41427 - }, - { - "epoch": 3.167459907869335, - "grad_norm": 0.002640940947458148, - "learning_rate": 0.00019999505143365368, - "loss": 46.0, - "step": 41428 - }, - { - "epoch": 3.1675363648527246, - "grad_norm": 0.006939531769603491, - "learning_rate": 0.0001999950511946955, - "loss": 46.0, - "step": 41429 - }, - { - "epoch": 3.1676128218361144, - "grad_norm": 0.0034718550741672516, - "learning_rate": 0.00019999505095573154, - "loss": 46.0, - "step": 41430 - }, - { - "epoch": 3.167689278819504, - "grad_norm": 0.0015440002316609025, - "learning_rate": 0.00019999505071676178, - "loss": 46.0, - "step": 41431 - }, - { - "epoch": 3.167765735802894, - "grad_norm": 0.0021042367443442345, - "learning_rate": 0.00019999505047778627, - "loss": 46.0, - "step": 41432 - }, - { - "epoch": 3.1678421927862837, - "grad_norm": 0.00500674219802022, - "learning_rate": 0.00019999505023880502, - "loss": 46.0, - "step": 41433 - }, - { - "epoch": 3.1679186497696734, - "grad_norm": 0.0016279031988233328, - "learning_rate": 0.00019999504999981796, - "loss": 46.0, - "step": 41434 - }, - { - "epoch": 3.167995106753063, - "grad_norm": 0.0011564954183995724, - "learning_rate": 0.00019999504976082517, - "loss": 46.0, - "step": 41435 - }, - { - "epoch": 3.168071563736453, - "grad_norm": 0.003401833586394787, - "learning_rate": 0.00019999504952182657, - "loss": 46.0, - "step": 41436 - }, - { - "epoch": 3.1681480207198427, - "grad_norm": 0.0034036061260849237, - "learning_rate": 0.00019999504928282222, - "loss": 46.0, - "step": 41437 - }, - { - "epoch": 3.168224477703232, - "grad_norm": 0.001845944207161665, - "learning_rate": 0.0001999950490438121, - "loss": 46.0, - "step": 41438 - }, - { - "epoch": 3.1683009346866218, - "grad_norm": 0.001065343152731657, - "learning_rate": 0.00019999504880479624, - "loss": 46.0, - "step": 41439 - }, - { - "epoch": 3.1683773916700115, - "grad_norm": 0.0008206756901927292, - "learning_rate": 0.00019999504856577457, - "loss": 46.0, - "step": 41440 - }, - { - "epoch": 3.1684538486534013, - "grad_norm": 0.000572787772398442, - "learning_rate": 0.00019999504832674713, - "loss": 46.0, - "step": 41441 - }, - { - "epoch": 3.168530305636791, - "grad_norm": 0.0031394294928759336, - "learning_rate": 0.00019999504808771394, - "loss": 46.0, - "step": 41442 - }, - { - "epoch": 3.168606762620181, - "grad_norm": 0.006262943148612976, - "learning_rate": 0.00019999504784867499, - "loss": 46.0, - "step": 41443 - }, - { - "epoch": 3.1686832196035706, - "grad_norm": 0.0011333419242873788, - "learning_rate": 0.00019999504760963025, - "loss": 46.0, - "step": 41444 - }, - { - "epoch": 3.1687596765869603, - "grad_norm": 0.0010566285345703363, - "learning_rate": 0.00019999504737057975, - "loss": 46.0, - "step": 41445 - }, - { - "epoch": 3.16883613357035, - "grad_norm": 0.001251725247129798, - "learning_rate": 0.00019999504713152347, - "loss": 46.0, - "step": 41446 - }, - { - "epoch": 3.16891259055374, - "grad_norm": 0.003519281279295683, - "learning_rate": 0.00019999504689246144, - "loss": 46.0, - "step": 41447 - }, - { - "epoch": 3.1689890475371296, - "grad_norm": 0.0006476265261881053, - "learning_rate": 0.00019999504665339362, - "loss": 46.0, - "step": 41448 - }, - { - "epoch": 3.169065504520519, - "grad_norm": 0.003076705848798156, - "learning_rate": 0.00019999504641432004, - "loss": 46.0, - "step": 41449 - }, - { - "epoch": 3.1691419615039087, - "grad_norm": 0.0026646617334336042, - "learning_rate": 0.0001999950461752407, - "loss": 46.0, - "step": 41450 - }, - { - "epoch": 3.1692184184872985, - "grad_norm": 0.002413432579487562, - "learning_rate": 0.00019999504593615558, - "loss": 46.0, - "step": 41451 - }, - { - "epoch": 3.169294875470688, - "grad_norm": 0.003823963925242424, - "learning_rate": 0.0001999950456970647, - "loss": 46.0, - "step": 41452 - }, - { - "epoch": 3.169371332454078, - "grad_norm": 0.0011404749238863587, - "learning_rate": 0.00019999504545796802, - "loss": 46.0, - "step": 41453 - }, - { - "epoch": 3.1694477894374677, - "grad_norm": 0.002357630990445614, - "learning_rate": 0.0001999950452188656, - "loss": 46.0, - "step": 41454 - }, - { - "epoch": 3.1695242464208575, - "grad_norm": 0.0025385448243469, - "learning_rate": 0.0001999950449797574, - "loss": 46.0, - "step": 41455 - }, - { - "epoch": 3.1696007034042473, - "grad_norm": 0.00247181230224669, - "learning_rate": 0.00019999504474064347, - "loss": 46.0, - "step": 41456 - }, - { - "epoch": 3.169677160387637, - "grad_norm": 0.0029713709373027086, - "learning_rate": 0.00019999504450152373, - "loss": 46.0, - "step": 41457 - }, - { - "epoch": 3.169753617371027, - "grad_norm": 0.0036932379007339478, - "learning_rate": 0.0001999950442623982, - "loss": 46.0, - "step": 41458 - }, - { - "epoch": 3.1698300743544165, - "grad_norm": 0.008026341907680035, - "learning_rate": 0.00019999504402326698, - "loss": 46.0, - "step": 41459 - }, - { - "epoch": 3.169906531337806, - "grad_norm": 0.003109171986579895, - "learning_rate": 0.0001999950437841299, - "loss": 46.0, - "step": 41460 - }, - { - "epoch": 3.1699829883211956, - "grad_norm": 0.003735044738277793, - "learning_rate": 0.0001999950435449871, - "loss": 46.0, - "step": 41461 - }, - { - "epoch": 3.1700594453045854, - "grad_norm": 0.001759059727191925, - "learning_rate": 0.00019999504330583853, - "loss": 46.0, - "step": 41462 - }, - { - "epoch": 3.170135902287975, - "grad_norm": 0.0013326314510777593, - "learning_rate": 0.00019999504306668418, - "loss": 46.0, - "step": 41463 - }, - { - "epoch": 3.170212359271365, - "grad_norm": 0.007648392580449581, - "learning_rate": 0.00019999504282752406, - "loss": 46.0, - "step": 41464 - }, - { - "epoch": 3.1702888162547547, - "grad_norm": 0.0004672433133237064, - "learning_rate": 0.0001999950425883582, - "loss": 46.0, - "step": 41465 - }, - { - "epoch": 3.1703652732381444, - "grad_norm": 0.002496375236660242, - "learning_rate": 0.00019999504234918653, - "loss": 46.0, - "step": 41466 - }, - { - "epoch": 3.170441730221534, - "grad_norm": 0.0010404987260699272, - "learning_rate": 0.0001999950421100091, - "loss": 46.0, - "step": 41467 - }, - { - "epoch": 3.170518187204924, - "grad_norm": 0.006087156943976879, - "learning_rate": 0.00019999504187082588, - "loss": 46.0, - "step": 41468 - }, - { - "epoch": 3.1705946441883137, - "grad_norm": 0.0019763356540352106, - "learning_rate": 0.00019999504163163695, - "loss": 46.0, - "step": 41469 - }, - { - "epoch": 3.1706711011717035, - "grad_norm": 0.002362157916650176, - "learning_rate": 0.0001999950413924422, - "loss": 46.0, - "step": 41470 - }, - { - "epoch": 3.170747558155093, - "grad_norm": 0.0029203672893345356, - "learning_rate": 0.00019999504115324172, - "loss": 46.0, - "step": 41471 - }, - { - "epoch": 3.1708240151384826, - "grad_norm": 0.002817925764247775, - "learning_rate": 0.00019999504091403544, - "loss": 46.0, - "step": 41472 - }, - { - "epoch": 3.1709004721218723, - "grad_norm": 0.0020470796152949333, - "learning_rate": 0.00019999504067482338, - "loss": 46.0, - "step": 41473 - }, - { - "epoch": 3.170976929105262, - "grad_norm": 0.0025700980331748724, - "learning_rate": 0.0001999950404356056, - "loss": 46.0, - "step": 41474 - }, - { - "epoch": 3.171053386088652, - "grad_norm": 0.0017173243686556816, - "learning_rate": 0.00019999504019638202, - "loss": 46.0, - "step": 41475 - }, - { - "epoch": 3.1711298430720416, - "grad_norm": 0.0016228409949690104, - "learning_rate": 0.00019999503995715267, - "loss": 46.0, - "step": 41476 - }, - { - "epoch": 3.1712063000554314, - "grad_norm": 0.0010417434386909008, - "learning_rate": 0.00019999503971791755, - "loss": 46.0, - "step": 41477 - }, - { - "epoch": 3.171282757038821, - "grad_norm": 0.0006923602195456624, - "learning_rate": 0.00019999503947867666, - "loss": 46.0, - "step": 41478 - }, - { - "epoch": 3.171359214022211, - "grad_norm": 0.0022682345006614923, - "learning_rate": 0.00019999503923943002, - "loss": 46.0, - "step": 41479 - }, - { - "epoch": 3.1714356710056006, - "grad_norm": 0.0013542587403208017, - "learning_rate": 0.00019999503900017758, - "loss": 46.0, - "step": 41480 - }, - { - "epoch": 3.17151212798899, - "grad_norm": 0.0011866593267768621, - "learning_rate": 0.0001999950387609194, - "loss": 46.0, - "step": 41481 - }, - { - "epoch": 3.1715885849723797, - "grad_norm": 0.0028488964308053255, - "learning_rate": 0.00019999503852165544, - "loss": 46.0, - "step": 41482 - }, - { - "epoch": 3.1716650419557695, - "grad_norm": 0.0019846842624247074, - "learning_rate": 0.00019999503828238574, - "loss": 46.0, - "step": 41483 - }, - { - "epoch": 3.1717414989391592, - "grad_norm": 0.0017689914675429463, - "learning_rate": 0.0001999950380431102, - "loss": 46.0, - "step": 41484 - }, - { - "epoch": 3.171817955922549, - "grad_norm": 0.0028321673162281513, - "learning_rate": 0.00019999503780382893, - "loss": 46.0, - "step": 41485 - }, - { - "epoch": 3.1718944129059388, - "grad_norm": 0.002153906738385558, - "learning_rate": 0.0001999950375645419, - "loss": 46.0, - "step": 41486 - }, - { - "epoch": 3.1719708698893285, - "grad_norm": 0.0023367053363472223, - "learning_rate": 0.00019999503732524908, - "loss": 46.0, - "step": 41487 - }, - { - "epoch": 3.1720473268727183, - "grad_norm": 0.001522139529697597, - "learning_rate": 0.0001999950370859505, - "loss": 46.0, - "step": 41488 - }, - { - "epoch": 3.172123783856108, - "grad_norm": 0.002241625217720866, - "learning_rate": 0.0001999950368466462, - "loss": 46.0, - "step": 41489 - }, - { - "epoch": 3.172200240839498, - "grad_norm": 0.003878000658005476, - "learning_rate": 0.00019999503660733607, - "loss": 46.0, - "step": 41490 - }, - { - "epoch": 3.1722766978228876, - "grad_norm": 0.0013456863816827536, - "learning_rate": 0.00019999503636802018, - "loss": 46.0, - "step": 41491 - }, - { - "epoch": 3.1723531548062773, - "grad_norm": 0.002906520152464509, - "learning_rate": 0.00019999503612869852, - "loss": 46.0, - "step": 41492 - }, - { - "epoch": 3.1724296117896666, - "grad_norm": 0.0011602099984884262, - "learning_rate": 0.0001999950358893711, - "loss": 46.0, - "step": 41493 - }, - { - "epoch": 3.1725060687730564, - "grad_norm": 0.004237143322825432, - "learning_rate": 0.00019999503565003792, - "loss": 46.0, - "step": 41494 - }, - { - "epoch": 3.172582525756446, - "grad_norm": 0.003364263102412224, - "learning_rate": 0.00019999503541069896, - "loss": 46.0, - "step": 41495 - }, - { - "epoch": 3.172658982739836, - "grad_norm": 0.0010974074248224497, - "learning_rate": 0.0001999950351713542, - "loss": 46.0, - "step": 41496 - }, - { - "epoch": 3.1727354397232257, - "grad_norm": 0.001306042424403131, - "learning_rate": 0.0001999950349320037, - "loss": 46.0, - "step": 41497 - }, - { - "epoch": 3.1728118967066155, - "grad_norm": 0.0017872753087431192, - "learning_rate": 0.00019999503469264745, - "loss": 46.0, - "step": 41498 - }, - { - "epoch": 3.172888353690005, - "grad_norm": 0.0027761689852923155, - "learning_rate": 0.0001999950344532854, - "loss": 46.0, - "step": 41499 - }, - { - "epoch": 3.172964810673395, - "grad_norm": 0.0014261918840929866, - "learning_rate": 0.0001999950342139176, - "loss": 46.0, - "step": 41500 - }, - { - "epoch": 3.1730412676567847, - "grad_norm": 0.0022536099422723055, - "learning_rate": 0.00019999503397454406, - "loss": 46.0, - "step": 41501 - }, - { - "epoch": 3.1731177246401745, - "grad_norm": 0.0035957032814621925, - "learning_rate": 0.00019999503373516468, - "loss": 46.0, - "step": 41502 - }, - { - "epoch": 3.173194181623564, - "grad_norm": 0.002542843110859394, - "learning_rate": 0.0001999950334957796, - "loss": 46.0, - "step": 41503 - }, - { - "epoch": 3.1732706386069536, - "grad_norm": 0.002371058566495776, - "learning_rate": 0.00019999503325638868, - "loss": 46.0, - "step": 41504 - }, - { - "epoch": 3.1733470955903433, - "grad_norm": 0.0021500475704669952, - "learning_rate": 0.00019999503301699204, - "loss": 46.0, - "step": 41505 - }, - { - "epoch": 3.173423552573733, - "grad_norm": 0.002679188270121813, - "learning_rate": 0.00019999503277758963, - "loss": 46.0, - "step": 41506 - }, - { - "epoch": 3.173500009557123, - "grad_norm": 0.004183996468782425, - "learning_rate": 0.00019999503253818142, - "loss": 46.0, - "step": 41507 - }, - { - "epoch": 3.1735764665405126, - "grad_norm": 0.004615056794136763, - "learning_rate": 0.00019999503229876746, - "loss": 46.0, - "step": 41508 - }, - { - "epoch": 3.1736529235239024, - "grad_norm": 0.0043732537887990475, - "learning_rate": 0.00019999503205934773, - "loss": 46.0, - "step": 41509 - }, - { - "epoch": 3.173729380507292, - "grad_norm": 0.0026550525799393654, - "learning_rate": 0.00019999503181992222, - "loss": 46.0, - "step": 41510 - }, - { - "epoch": 3.173805837490682, - "grad_norm": 0.0021852480713278055, - "learning_rate": 0.00019999503158049097, - "loss": 46.0, - "step": 41511 - }, - { - "epoch": 3.1738822944740717, - "grad_norm": 0.00213302462361753, - "learning_rate": 0.00019999503134105395, - "loss": 46.0, - "step": 41512 - }, - { - "epoch": 3.1739587514574614, - "grad_norm": 0.004383992403745651, - "learning_rate": 0.00019999503110161113, - "loss": 46.0, - "step": 41513 - }, - { - "epoch": 3.174035208440851, - "grad_norm": 0.0014782102080062032, - "learning_rate": 0.00019999503086216256, - "loss": 46.0, - "step": 41514 - }, - { - "epoch": 3.1741116654242405, - "grad_norm": 0.026076970621943474, - "learning_rate": 0.0001999950306227082, - "loss": 46.0, - "step": 41515 - }, - { - "epoch": 3.1741881224076303, - "grad_norm": 0.0024677226319909096, - "learning_rate": 0.00019999503038324812, - "loss": 46.0, - "step": 41516 - }, - { - "epoch": 3.17426457939102, - "grad_norm": 0.0016292547807097435, - "learning_rate": 0.00019999503014378223, - "loss": 46.0, - "step": 41517 - }, - { - "epoch": 3.17434103637441, - "grad_norm": 0.0010048781987279654, - "learning_rate": 0.00019999502990431057, - "loss": 46.0, - "step": 41518 - }, - { - "epoch": 3.1744174933577995, - "grad_norm": 0.0029262187890708447, - "learning_rate": 0.00019999502966483316, - "loss": 46.0, - "step": 41519 - }, - { - "epoch": 3.1744939503411893, - "grad_norm": 0.0017416657647117972, - "learning_rate": 0.00019999502942534995, - "loss": 46.0, - "step": 41520 - }, - { - "epoch": 3.174570407324579, - "grad_norm": 0.0008085901499725878, - "learning_rate": 0.000199995029185861, - "loss": 46.0, - "step": 41521 - }, - { - "epoch": 3.174646864307969, - "grad_norm": 0.002365314634516835, - "learning_rate": 0.00019999502894636626, - "loss": 46.0, - "step": 41522 - }, - { - "epoch": 3.1747233212913586, - "grad_norm": 0.0009196935570798814, - "learning_rate": 0.00019999502870686578, - "loss": 46.0, - "step": 41523 - }, - { - "epoch": 3.1747997782747484, - "grad_norm": 0.0016587093705311418, - "learning_rate": 0.0001999950284673595, - "loss": 46.0, - "step": 41524 - }, - { - "epoch": 3.1748762352581377, - "grad_norm": 0.0020995191298425198, - "learning_rate": 0.00019999502822784745, - "loss": 46.0, - "step": 41525 - }, - { - "epoch": 3.1749526922415274, - "grad_norm": 0.0010391664691269398, - "learning_rate": 0.00019999502798832968, - "loss": 46.0, - "step": 41526 - }, - { - "epoch": 3.175029149224917, - "grad_norm": 0.0033912716899067163, - "learning_rate": 0.00019999502774880609, - "loss": 46.0, - "step": 41527 - }, - { - "epoch": 3.175105606208307, - "grad_norm": 0.0022105860989540815, - "learning_rate": 0.00019999502750927674, - "loss": 46.0, - "step": 41528 - }, - { - "epoch": 3.1751820631916967, - "grad_norm": 0.004614948760718107, - "learning_rate": 0.00019999502726974165, - "loss": 46.0, - "step": 41529 - }, - { - "epoch": 3.1752585201750865, - "grad_norm": 0.0019378411816433072, - "learning_rate": 0.00019999502703020076, - "loss": 46.0, - "step": 41530 - }, - { - "epoch": 3.1753349771584762, - "grad_norm": 0.0008935146615840495, - "learning_rate": 0.00019999502679065413, - "loss": 46.0, - "step": 41531 - }, - { - "epoch": 3.175411434141866, - "grad_norm": 0.000457369489595294, - "learning_rate": 0.0001999950265511017, - "loss": 46.0, - "step": 41532 - }, - { - "epoch": 3.1754878911252558, - "grad_norm": 0.0016567959683015943, - "learning_rate": 0.0001999950263115435, - "loss": 46.0, - "step": 41533 - }, - { - "epoch": 3.1755643481086455, - "grad_norm": 0.001107292715460062, - "learning_rate": 0.00019999502607197952, - "loss": 46.0, - "step": 41534 - }, - { - "epoch": 3.1756408050920353, - "grad_norm": 0.0027877024840563536, - "learning_rate": 0.00019999502583240982, - "loss": 46.0, - "step": 41535 - }, - { - "epoch": 3.175717262075425, - "grad_norm": 0.0028962024953216314, - "learning_rate": 0.00019999502559283431, - "loss": 46.0, - "step": 41536 - }, - { - "epoch": 3.1757937190588144, - "grad_norm": 0.0011165491305291653, - "learning_rate": 0.00019999502535325306, - "loss": 46.0, - "step": 41537 - }, - { - "epoch": 3.175870176042204, - "grad_norm": 0.0009530842071399093, - "learning_rate": 0.00019999502511366601, - "loss": 46.0, - "step": 41538 - }, - { - "epoch": 3.175946633025594, - "grad_norm": 0.001333654043264687, - "learning_rate": 0.00019999502487407322, - "loss": 46.0, - "step": 41539 - }, - { - "epoch": 3.1760230900089836, - "grad_norm": 0.005875668954104185, - "learning_rate": 0.00019999502463447465, - "loss": 46.0, - "step": 41540 - }, - { - "epoch": 3.1760995469923734, - "grad_norm": 0.0024102074094116688, - "learning_rate": 0.0001999950243948703, - "loss": 46.0, - "step": 41541 - }, - { - "epoch": 3.176176003975763, - "grad_norm": 0.0007566119311377406, - "learning_rate": 0.0001999950241552602, - "loss": 46.0, - "step": 41542 - }, - { - "epoch": 3.176252460959153, - "grad_norm": 0.002351798117160797, - "learning_rate": 0.0001999950239156443, - "loss": 46.0, - "step": 41543 - }, - { - "epoch": 3.1763289179425427, - "grad_norm": 0.0049277860671281815, - "learning_rate": 0.00019999502367602266, - "loss": 46.0, - "step": 41544 - }, - { - "epoch": 3.1764053749259324, - "grad_norm": 0.0021302916575223207, - "learning_rate": 0.00019999502343639525, - "loss": 46.0, - "step": 41545 - }, - { - "epoch": 3.176481831909322, - "grad_norm": 0.0034527345560491085, - "learning_rate": 0.00019999502319676204, - "loss": 46.0, - "step": 41546 - }, - { - "epoch": 3.1765582888927115, - "grad_norm": 0.005665971431881189, - "learning_rate": 0.00019999502295712309, - "loss": 46.0, - "step": 41547 - }, - { - "epoch": 3.1766347458761013, - "grad_norm": 0.005099464673548937, - "learning_rate": 0.00019999502271747836, - "loss": 46.0, - "step": 41548 - }, - { - "epoch": 3.176711202859491, - "grad_norm": 0.0012947771465405822, - "learning_rate": 0.00019999502247782785, - "loss": 46.0, - "step": 41549 - }, - { - "epoch": 3.176787659842881, - "grad_norm": 0.0018413482466712594, - "learning_rate": 0.0001999950222381716, - "loss": 46.0, - "step": 41550 - }, - { - "epoch": 3.1768641168262706, - "grad_norm": 0.005373688414692879, - "learning_rate": 0.00019999502199850958, - "loss": 46.0, - "step": 41551 - }, - { - "epoch": 3.1769405738096603, - "grad_norm": 0.0012525643687695265, - "learning_rate": 0.00019999502175884178, - "loss": 46.0, - "step": 41552 - }, - { - "epoch": 3.17701703079305, - "grad_norm": 0.0017059410456568003, - "learning_rate": 0.00019999502151916821, - "loss": 46.0, - "step": 41553 - }, - { - "epoch": 3.17709348777644, - "grad_norm": 0.004187475424259901, - "learning_rate": 0.00019999502127948887, - "loss": 46.0, - "step": 41554 - }, - { - "epoch": 3.1771699447598296, - "grad_norm": 0.0016918386099860072, - "learning_rate": 0.00019999502103980375, - "loss": 46.0, - "step": 41555 - }, - { - "epoch": 3.1772464017432194, - "grad_norm": 0.001578646362759173, - "learning_rate": 0.00019999502080011286, - "loss": 46.0, - "step": 41556 - }, - { - "epoch": 3.177322858726609, - "grad_norm": 0.0018953564576804638, - "learning_rate": 0.0001999950205604162, - "loss": 46.0, - "step": 41557 - }, - { - "epoch": 3.177399315709999, - "grad_norm": 0.0020169634371995926, - "learning_rate": 0.0001999950203207138, - "loss": 46.0, - "step": 41558 - }, - { - "epoch": 3.177475772693388, - "grad_norm": 0.003477503778412938, - "learning_rate": 0.0001999950200810056, - "loss": 46.0, - "step": 41559 - }, - { - "epoch": 3.177552229676778, - "grad_norm": 0.0009138755267485976, - "learning_rate": 0.00019999501984129165, - "loss": 46.0, - "step": 41560 - }, - { - "epoch": 3.1776286866601677, - "grad_norm": 0.0018043399322777987, - "learning_rate": 0.00019999501960157195, - "loss": 46.0, - "step": 41561 - }, - { - "epoch": 3.1777051436435575, - "grad_norm": 0.0024193490389734507, - "learning_rate": 0.00019999501936184645, - "loss": 46.0, - "step": 41562 - }, - { - "epoch": 3.1777816006269473, - "grad_norm": 0.0018770884489640594, - "learning_rate": 0.00019999501912211514, - "loss": 46.0, - "step": 41563 - }, - { - "epoch": 3.177858057610337, - "grad_norm": 0.0030878782272338867, - "learning_rate": 0.00019999501888237812, - "loss": 46.0, - "step": 41564 - }, - { - "epoch": 3.177934514593727, - "grad_norm": 0.0067941430024802685, - "learning_rate": 0.00019999501864263533, - "loss": 46.0, - "step": 41565 - }, - { - "epoch": 3.1780109715771165, - "grad_norm": 0.0018375436775386333, - "learning_rate": 0.00019999501840288676, - "loss": 46.0, - "step": 41566 - }, - { - "epoch": 3.1780874285605063, - "grad_norm": 0.004575012717396021, - "learning_rate": 0.00019999501816313241, - "loss": 46.0, - "step": 41567 - }, - { - "epoch": 3.178163885543896, - "grad_norm": 0.002006369410082698, - "learning_rate": 0.00019999501792337233, - "loss": 46.0, - "step": 41568 - }, - { - "epoch": 3.1782403425272854, - "grad_norm": 0.0024700758513063192, - "learning_rate": 0.00019999501768360644, - "loss": 46.0, - "step": 41569 - }, - { - "epoch": 3.178316799510675, - "grad_norm": 0.002289470285177231, - "learning_rate": 0.0001999950174438348, - "loss": 46.0, - "step": 41570 - }, - { - "epoch": 3.178393256494065, - "grad_norm": 0.0011970787309110165, - "learning_rate": 0.00019999501720405736, - "loss": 46.0, - "step": 41571 - }, - { - "epoch": 3.1784697134774547, - "grad_norm": 0.0008146300679072738, - "learning_rate": 0.00019999501696427418, - "loss": 46.0, - "step": 41572 - }, - { - "epoch": 3.1785461704608444, - "grad_norm": 0.0033185905776917934, - "learning_rate": 0.00019999501672448525, - "loss": 46.0, - "step": 41573 - }, - { - "epoch": 3.178622627444234, - "grad_norm": 0.0015095893759280443, - "learning_rate": 0.0001999950164846905, - "loss": 46.0, - "step": 41574 - }, - { - "epoch": 3.178699084427624, - "grad_norm": 0.00117490254342556, - "learning_rate": 0.00019999501624489002, - "loss": 46.0, - "step": 41575 - }, - { - "epoch": 3.1787755414110137, - "grad_norm": 0.002301177242770791, - "learning_rate": 0.00019999501600508375, - "loss": 46.0, - "step": 41576 - }, - { - "epoch": 3.1788519983944035, - "grad_norm": 0.001515236683189869, - "learning_rate": 0.00019999501576527172, - "loss": 46.0, - "step": 41577 - }, - { - "epoch": 3.1789284553777932, - "grad_norm": 0.0013602422550320625, - "learning_rate": 0.00019999501552545393, - "loss": 46.0, - "step": 41578 - }, - { - "epoch": 3.179004912361183, - "grad_norm": 0.002288083080202341, - "learning_rate": 0.00019999501528563033, - "loss": 46.0, - "step": 41579 - }, - { - "epoch": 3.1790813693445723, - "grad_norm": 0.001860869931988418, - "learning_rate": 0.00019999501504580102, - "loss": 46.0, - "step": 41580 - }, - { - "epoch": 3.179157826327962, - "grad_norm": 0.0034210917074233294, - "learning_rate": 0.0001999950148059659, - "loss": 46.0, - "step": 41581 - }, - { - "epoch": 3.179234283311352, - "grad_norm": 0.0013878109166398644, - "learning_rate": 0.000199995014566125, - "loss": 46.0, - "step": 41582 - }, - { - "epoch": 3.1793107402947416, - "grad_norm": 0.0029540008399635553, - "learning_rate": 0.00019999501432627838, - "loss": 46.0, - "step": 41583 - }, - { - "epoch": 3.1793871972781313, - "grad_norm": 0.0035117545630782843, - "learning_rate": 0.00019999501408642597, - "loss": 46.0, - "step": 41584 - }, - { - "epoch": 3.179463654261521, - "grad_norm": 0.0033393804915249348, - "learning_rate": 0.00019999501384656778, - "loss": 46.0, - "step": 41585 - }, - { - "epoch": 3.179540111244911, - "grad_norm": 0.004739480093121529, - "learning_rate": 0.0001999950136067038, - "loss": 46.0, - "step": 41586 - }, - { - "epoch": 3.1796165682283006, - "grad_norm": 0.0010944517562165856, - "learning_rate": 0.00019999501336683407, - "loss": 46.0, - "step": 41587 - }, - { - "epoch": 3.1796930252116904, - "grad_norm": 0.0009988929377868772, - "learning_rate": 0.0001999950131269586, - "loss": 46.0, - "step": 41588 - }, - { - "epoch": 3.17976948219508, - "grad_norm": 0.00634508952498436, - "learning_rate": 0.00019999501288707735, - "loss": 46.0, - "step": 41589 - }, - { - "epoch": 3.17984593917847, - "grad_norm": 0.01088044699281454, - "learning_rate": 0.0001999950126471903, - "loss": 46.0, - "step": 41590 - }, - { - "epoch": 3.1799223961618592, - "grad_norm": 0.001451428746804595, - "learning_rate": 0.0001999950124072975, - "loss": 46.0, - "step": 41591 - }, - { - "epoch": 3.179998853145249, - "grad_norm": 0.0037342708092182875, - "learning_rate": 0.00019999501216739894, - "loss": 46.0, - "step": 41592 - }, - { - "epoch": 3.1800753101286388, - "grad_norm": 0.001866180100478232, - "learning_rate": 0.0001999950119274946, - "loss": 46.0, - "step": 41593 - }, - { - "epoch": 3.1801517671120285, - "grad_norm": 0.0014850023435428739, - "learning_rate": 0.00019999501168758448, - "loss": 46.0, - "step": 41594 - }, - { - "epoch": 3.1802282240954183, - "grad_norm": 0.0019063232466578484, - "learning_rate": 0.00019999501144766862, - "loss": 46.0, - "step": 41595 - }, - { - "epoch": 3.180304681078808, - "grad_norm": 0.000784132513217628, - "learning_rate": 0.00019999501120774698, - "loss": 46.0, - "step": 41596 - }, - { - "epoch": 3.180381138062198, - "grad_norm": 0.0049018170684576035, - "learning_rate": 0.00019999501096781955, - "loss": 46.0, - "step": 41597 - }, - { - "epoch": 3.1804575950455876, - "grad_norm": 0.0017000646330416203, - "learning_rate": 0.00019999501072788636, - "loss": 46.0, - "step": 41598 - }, - { - "epoch": 3.1805340520289773, - "grad_norm": 0.0048112571239471436, - "learning_rate": 0.0001999950104879474, - "loss": 46.0, - "step": 41599 - }, - { - "epoch": 3.180610509012367, - "grad_norm": 0.000671104877255857, - "learning_rate": 0.00019999501024800268, - "loss": 46.0, - "step": 41600 - }, - { - "epoch": 3.180686965995757, - "grad_norm": 0.002536576008424163, - "learning_rate": 0.00019999501000805218, - "loss": 46.0, - "step": 41601 - }, - { - "epoch": 3.180763422979146, - "grad_norm": 0.0018588016973808408, - "learning_rate": 0.00019999500976809593, - "loss": 46.0, - "step": 41602 - }, - { - "epoch": 3.180839879962536, - "grad_norm": 0.002467375248670578, - "learning_rate": 0.0001999950095281339, - "loss": 46.0, - "step": 41603 - }, - { - "epoch": 3.1809163369459257, - "grad_norm": 0.002192617394030094, - "learning_rate": 0.00019999500928816611, - "loss": 46.0, - "step": 41604 - }, - { - "epoch": 3.1809927939293154, - "grad_norm": 0.0008830138831399381, - "learning_rate": 0.00019999500904819252, - "loss": 46.0, - "step": 41605 - }, - { - "epoch": 3.181069250912705, - "grad_norm": 0.0038703896570950747, - "learning_rate": 0.0001999950088082132, - "loss": 46.0, - "step": 41606 - }, - { - "epoch": 3.181145707896095, - "grad_norm": 0.0009447685442864895, - "learning_rate": 0.0001999950085682281, - "loss": 46.0, - "step": 41607 - }, - { - "epoch": 3.1812221648794847, - "grad_norm": 0.001665694871917367, - "learning_rate": 0.0001999950083282372, - "loss": 46.0, - "step": 41608 - }, - { - "epoch": 3.1812986218628745, - "grad_norm": 0.0013467780081555247, - "learning_rate": 0.00019999500808824057, - "loss": 46.0, - "step": 41609 - }, - { - "epoch": 3.1813750788462642, - "grad_norm": 0.0018159612081944942, - "learning_rate": 0.00019999500784823816, - "loss": 46.0, - "step": 41610 - }, - { - "epoch": 3.181451535829654, - "grad_norm": 0.0005733192665502429, - "learning_rate": 0.00019999500760822998, - "loss": 46.0, - "step": 41611 - }, - { - "epoch": 3.1815279928130433, - "grad_norm": 0.0007503158994950354, - "learning_rate": 0.00019999500736821602, - "loss": 46.0, - "step": 41612 - }, - { - "epoch": 3.181604449796433, - "grad_norm": 0.0014418438076972961, - "learning_rate": 0.00019999500712819627, - "loss": 46.0, - "step": 41613 - }, - { - "epoch": 3.181680906779823, - "grad_norm": 0.006869006436318159, - "learning_rate": 0.00019999500688817082, - "loss": 46.0, - "step": 41614 - }, - { - "epoch": 3.1817573637632126, - "grad_norm": 0.002136533847078681, - "learning_rate": 0.00019999500664813952, - "loss": 46.0, - "step": 41615 - }, - { - "epoch": 3.1818338207466024, - "grad_norm": 0.00460156612098217, - "learning_rate": 0.00019999500640810252, - "loss": 46.0, - "step": 41616 - }, - { - "epoch": 3.181910277729992, - "grad_norm": 0.002954870695248246, - "learning_rate": 0.0001999950061680597, - "loss": 46.0, - "step": 41617 - }, - { - "epoch": 3.181986734713382, - "grad_norm": 0.0031767820473760366, - "learning_rate": 0.00019999500592801113, - "loss": 46.0, - "step": 41618 - }, - { - "epoch": 3.1820631916967717, - "grad_norm": 0.0008709346293471754, - "learning_rate": 0.0001999950056879568, - "loss": 46.0, - "step": 41619 - }, - { - "epoch": 3.1821396486801614, - "grad_norm": 0.0018425191519781947, - "learning_rate": 0.00019999500544789668, - "loss": 46.0, - "step": 41620 - }, - { - "epoch": 3.182216105663551, - "grad_norm": 0.001838630880229175, - "learning_rate": 0.00019999500520783082, - "loss": 46.0, - "step": 41621 - }, - { - "epoch": 3.182292562646941, - "grad_norm": 0.0015733184991404414, - "learning_rate": 0.00019999500496775915, - "loss": 46.0, - "step": 41622 - }, - { - "epoch": 3.1823690196303307, - "grad_norm": 0.002236704109236598, - "learning_rate": 0.00019999500472768175, - "loss": 46.0, - "step": 41623 - }, - { - "epoch": 3.18244547661372, - "grad_norm": 0.001440307474695146, - "learning_rate": 0.0001999950044875986, - "loss": 46.0, - "step": 41624 - }, - { - "epoch": 3.1825219335971098, - "grad_norm": 0.0014734945725649595, - "learning_rate": 0.0001999950042475096, - "loss": 46.0, - "step": 41625 - }, - { - "epoch": 3.1825983905804995, - "grad_norm": 0.0010877603199332952, - "learning_rate": 0.0001999950040074149, - "loss": 46.0, - "step": 41626 - }, - { - "epoch": 3.1826748475638893, - "grad_norm": 0.001741341664455831, - "learning_rate": 0.0001999950037673144, - "loss": 46.0, - "step": 41627 - }, - { - "epoch": 3.182751304547279, - "grad_norm": 0.0006204545497894287, - "learning_rate": 0.00019999500352720816, - "loss": 46.0, - "step": 41628 - }, - { - "epoch": 3.182827761530669, - "grad_norm": 0.002920563332736492, - "learning_rate": 0.00019999500328709614, - "loss": 46.0, - "step": 41629 - }, - { - "epoch": 3.1829042185140586, - "grad_norm": 0.002461136318743229, - "learning_rate": 0.00019999500304697834, - "loss": 46.0, - "step": 41630 - }, - { - "epoch": 3.1829806754974483, - "grad_norm": 0.0011827474227175117, - "learning_rate": 0.00019999500280685478, - "loss": 46.0, - "step": 41631 - }, - { - "epoch": 3.183057132480838, - "grad_norm": 0.0031452528201043606, - "learning_rate": 0.00019999500256672544, - "loss": 46.0, - "step": 41632 - }, - { - "epoch": 3.183133589464228, - "grad_norm": 0.004967565648257732, - "learning_rate": 0.00019999500232659032, - "loss": 46.0, - "step": 41633 - }, - { - "epoch": 3.183210046447617, - "grad_norm": 0.002345235552638769, - "learning_rate": 0.00019999500208644943, - "loss": 46.0, - "step": 41634 - }, - { - "epoch": 3.183286503431007, - "grad_norm": 0.0013114488683640957, - "learning_rate": 0.0001999950018463028, - "loss": 46.0, - "step": 41635 - }, - { - "epoch": 3.1833629604143967, - "grad_norm": 0.0029320684261620045, - "learning_rate": 0.0001999950016061504, - "loss": 46.0, - "step": 41636 - }, - { - "epoch": 3.1834394173977865, - "grad_norm": 0.002438370371237397, - "learning_rate": 0.0001999950013659922, - "loss": 46.0, - "step": 41637 - }, - { - "epoch": 3.1835158743811762, - "grad_norm": 0.004720065742731094, - "learning_rate": 0.00019999500112582826, - "loss": 46.0, - "step": 41638 - }, - { - "epoch": 3.183592331364566, - "grad_norm": 0.0036966942716389894, - "learning_rate": 0.00019999500088565856, - "loss": 46.0, - "step": 41639 - }, - { - "epoch": 3.1836687883479557, - "grad_norm": 0.0013784399488940835, - "learning_rate": 0.00019999500064548306, - "loss": 46.0, - "step": 41640 - }, - { - "epoch": 3.1837452453313455, - "grad_norm": 0.0032542948611080647, - "learning_rate": 0.00019999500040530178, - "loss": 46.0, - "step": 41641 - }, - { - "epoch": 3.1838217023147353, - "grad_norm": 0.0023591159842908382, - "learning_rate": 0.00019999500016511473, - "loss": 46.0, - "step": 41642 - }, - { - "epoch": 3.183898159298125, - "grad_norm": 0.0045031337067484856, - "learning_rate": 0.00019999499992492194, - "loss": 46.0, - "step": 41643 - }, - { - "epoch": 3.183974616281515, - "grad_norm": 0.002368827583268285, - "learning_rate": 0.0001999949996847234, - "loss": 46.0, - "step": 41644 - }, - { - "epoch": 3.1840510732649046, - "grad_norm": 0.0009813635842874646, - "learning_rate": 0.00019999499944451906, - "loss": 46.0, - "step": 41645 - }, - { - "epoch": 3.184127530248294, - "grad_norm": 0.0036691150162369013, - "learning_rate": 0.00019999499920430894, - "loss": 46.0, - "step": 41646 - }, - { - "epoch": 3.1842039872316836, - "grad_norm": 0.0031920333858579397, - "learning_rate": 0.00019999499896409308, - "loss": 46.0, - "step": 41647 - }, - { - "epoch": 3.1842804442150734, - "grad_norm": 0.002809201367199421, - "learning_rate": 0.00019999499872387145, - "loss": 46.0, - "step": 41648 - }, - { - "epoch": 3.184356901198463, - "grad_norm": 0.0028415147680789232, - "learning_rate": 0.00019999499848364402, - "loss": 46.0, - "step": 41649 - }, - { - "epoch": 3.184433358181853, - "grad_norm": 0.00521051324903965, - "learning_rate": 0.00019999499824341083, - "loss": 46.0, - "step": 41650 - }, - { - "epoch": 3.1845098151652427, - "grad_norm": 0.002271543722599745, - "learning_rate": 0.0001999949980031719, - "loss": 46.0, - "step": 41651 - }, - { - "epoch": 3.1845862721486324, - "grad_norm": 0.0008131064823828638, - "learning_rate": 0.00019999499776292718, - "loss": 46.0, - "step": 41652 - }, - { - "epoch": 3.184662729132022, - "grad_norm": 0.0027534926775842905, - "learning_rate": 0.0001999949975226767, - "loss": 46.0, - "step": 41653 - }, - { - "epoch": 3.184739186115412, - "grad_norm": 0.0020710667595267296, - "learning_rate": 0.00019999499728242043, - "loss": 46.0, - "step": 41654 - }, - { - "epoch": 3.1848156430988017, - "grad_norm": 0.0020446348935365677, - "learning_rate": 0.0001999949970421584, - "loss": 46.0, - "step": 41655 - }, - { - "epoch": 3.184892100082191, - "grad_norm": 0.0019081425853073597, - "learning_rate": 0.0001999949968018906, - "loss": 46.0, - "step": 41656 - }, - { - "epoch": 3.184968557065581, - "grad_norm": 0.001043798285536468, - "learning_rate": 0.00019999499656161702, - "loss": 46.0, - "step": 41657 - }, - { - "epoch": 3.1850450140489706, - "grad_norm": 0.0037501410115510225, - "learning_rate": 0.0001999949963213377, - "loss": 46.0, - "step": 41658 - }, - { - "epoch": 3.1851214710323603, - "grad_norm": 0.006452765315771103, - "learning_rate": 0.0001999949960810526, - "loss": 46.0, - "step": 41659 - }, - { - "epoch": 3.18519792801575, - "grad_norm": 0.0012687244452536106, - "learning_rate": 0.00019999499584076174, - "loss": 46.0, - "step": 41660 - }, - { - "epoch": 3.18527438499914, - "grad_norm": 0.0029455493204295635, - "learning_rate": 0.00019999499560046508, - "loss": 46.0, - "step": 41661 - }, - { - "epoch": 3.1853508419825296, - "grad_norm": 0.0026105111464858055, - "learning_rate": 0.00019999499536016267, - "loss": 46.0, - "step": 41662 - }, - { - "epoch": 3.1854272989659194, - "grad_norm": 0.0007706858450546861, - "learning_rate": 0.0001999949951198545, - "loss": 46.0, - "step": 41663 - }, - { - "epoch": 3.185503755949309, - "grad_norm": 0.003465610556304455, - "learning_rate": 0.00019999499487954054, - "loss": 46.0, - "step": 41664 - }, - { - "epoch": 3.185580212932699, - "grad_norm": 0.0025231502950191498, - "learning_rate": 0.00019999499463922084, - "loss": 46.0, - "step": 41665 - }, - { - "epoch": 3.1856566699160886, - "grad_norm": 0.0031832822132855654, - "learning_rate": 0.00019999499439889534, - "loss": 46.0, - "step": 41666 - }, - { - "epoch": 3.1857331268994784, - "grad_norm": 0.0018971977988258004, - "learning_rate": 0.00019999499415856406, - "loss": 46.0, - "step": 41667 - }, - { - "epoch": 3.1858095838828677, - "grad_norm": 0.0026606128085404634, - "learning_rate": 0.00019999499391822704, - "loss": 46.0, - "step": 41668 - }, - { - "epoch": 3.1858860408662575, - "grad_norm": 0.0011698466259986162, - "learning_rate": 0.00019999499367788425, - "loss": 46.0, - "step": 41669 - }, - { - "epoch": 3.1859624978496472, - "grad_norm": 0.0017910320311784744, - "learning_rate": 0.00019999499343753568, - "loss": 46.0, - "step": 41670 - }, - { - "epoch": 3.186038954833037, - "grad_norm": 0.002160444622859359, - "learning_rate": 0.00019999499319718137, - "loss": 46.0, - "step": 41671 - }, - { - "epoch": 3.1861154118164268, - "grad_norm": 0.002982755657285452, - "learning_rate": 0.00019999499295682125, - "loss": 46.0, - "step": 41672 - }, - { - "epoch": 3.1861918687998165, - "grad_norm": 0.00077407545177266, - "learning_rate": 0.00019999499271645537, - "loss": 46.0, - "step": 41673 - }, - { - "epoch": 3.1862683257832063, - "grad_norm": 0.0038116704672574997, - "learning_rate": 0.00019999499247608373, - "loss": 46.0, - "step": 41674 - }, - { - "epoch": 3.186344782766596, - "grad_norm": 0.0015645333332940936, - "learning_rate": 0.00019999499223570633, - "loss": 46.0, - "step": 41675 - }, - { - "epoch": 3.186421239749986, - "grad_norm": 0.0028651622124016285, - "learning_rate": 0.00019999499199532315, - "loss": 46.0, - "step": 41676 - }, - { - "epoch": 3.1864976967333756, - "grad_norm": 0.0011966757010668516, - "learning_rate": 0.0001999949917549342, - "loss": 46.0, - "step": 41677 - }, - { - "epoch": 3.186574153716765, - "grad_norm": 0.004996367730200291, - "learning_rate": 0.0001999949915145395, - "loss": 46.0, - "step": 41678 - }, - { - "epoch": 3.1866506107001547, - "grad_norm": 0.002172978362068534, - "learning_rate": 0.00019999499127413902, - "loss": 46.0, - "step": 41679 - }, - { - "epoch": 3.1867270676835444, - "grad_norm": 0.002170744352042675, - "learning_rate": 0.00019999499103373275, - "loss": 46.0, - "step": 41680 - }, - { - "epoch": 3.186803524666934, - "grad_norm": 0.001734220772050321, - "learning_rate": 0.0001999949907933207, - "loss": 46.0, - "step": 41681 - }, - { - "epoch": 3.186879981650324, - "grad_norm": 0.0030573236290365458, - "learning_rate": 0.0001999949905529029, - "loss": 46.0, - "step": 41682 - }, - { - "epoch": 3.1869564386337137, - "grad_norm": 0.0018551649991422892, - "learning_rate": 0.00019999499031247937, - "loss": 46.0, - "step": 41683 - }, - { - "epoch": 3.1870328956171035, - "grad_norm": 0.0015566645888611674, - "learning_rate": 0.00019999499007205003, - "loss": 46.0, - "step": 41684 - }, - { - "epoch": 3.187109352600493, - "grad_norm": 0.0017837334889918566, - "learning_rate": 0.00019999498983161492, - "loss": 46.0, - "step": 41685 - }, - { - "epoch": 3.187185809583883, - "grad_norm": 0.0020725317299365997, - "learning_rate": 0.00019999498959117403, - "loss": 46.0, - "step": 41686 - }, - { - "epoch": 3.1872622665672727, - "grad_norm": 0.004108430817723274, - "learning_rate": 0.0001999949893507274, - "loss": 46.0, - "step": 41687 - }, - { - "epoch": 3.1873387235506625, - "grad_norm": 0.0026767654344439507, - "learning_rate": 0.000199994989110275, - "loss": 46.0, - "step": 41688 - }, - { - "epoch": 3.1874151805340523, - "grad_norm": 0.0008698187884874642, - "learning_rate": 0.0001999949888698168, - "loss": 46.0, - "step": 41689 - }, - { - "epoch": 3.1874916375174416, - "grad_norm": 0.0022780196741223335, - "learning_rate": 0.00019999498862935289, - "loss": 46.0, - "step": 41690 - }, - { - "epoch": 3.1875680945008313, - "grad_norm": 0.0007800338789820671, - "learning_rate": 0.00019999498838888316, - "loss": 46.0, - "step": 41691 - }, - { - "epoch": 3.187644551484221, - "grad_norm": 0.008167578838765621, - "learning_rate": 0.00019999498814840766, - "loss": 46.0, - "step": 41692 - }, - { - "epoch": 3.187721008467611, - "grad_norm": 0.0023614573292434216, - "learning_rate": 0.0001999949879079264, - "loss": 46.0, - "step": 41693 - }, - { - "epoch": 3.1877974654510006, - "grad_norm": 0.006071664392948151, - "learning_rate": 0.0001999949876674394, - "loss": 46.0, - "step": 41694 - }, - { - "epoch": 3.1878739224343904, - "grad_norm": 0.00268552009947598, - "learning_rate": 0.00019999498742694657, - "loss": 46.0, - "step": 41695 - }, - { - "epoch": 3.18795037941778, - "grad_norm": 0.0022890588734298944, - "learning_rate": 0.00019999498718644803, - "loss": 46.0, - "step": 41696 - }, - { - "epoch": 3.18802683640117, - "grad_norm": 0.001993396319448948, - "learning_rate": 0.0001999949869459437, - "loss": 46.0, - "step": 41697 - }, - { - "epoch": 3.1881032933845597, - "grad_norm": 0.005362294614315033, - "learning_rate": 0.00019999498670543358, - "loss": 46.0, - "step": 41698 - }, - { - "epoch": 3.1881797503679494, - "grad_norm": 0.006055880803614855, - "learning_rate": 0.00019999498646491772, - "loss": 46.0, - "step": 41699 - }, - { - "epoch": 3.1882562073513387, - "grad_norm": 0.00259780534543097, - "learning_rate": 0.00019999498622439606, - "loss": 46.0, - "step": 41700 - }, - { - "epoch": 3.1883326643347285, - "grad_norm": 0.0012675109319388866, - "learning_rate": 0.00019999498598386868, - "loss": 46.0, - "step": 41701 - }, - { - "epoch": 3.1884091213181183, - "grad_norm": 0.0005796006880700588, - "learning_rate": 0.0001999949857433355, - "loss": 46.0, - "step": 41702 - }, - { - "epoch": 3.188485578301508, - "grad_norm": 0.0007524911779910326, - "learning_rate": 0.00019999498550279653, - "loss": 46.0, - "step": 41703 - }, - { - "epoch": 3.188562035284898, - "grad_norm": 0.0021390062756836414, - "learning_rate": 0.00019999498526225185, - "loss": 46.0, - "step": 41704 - }, - { - "epoch": 3.1886384922682875, - "grad_norm": 0.005990055855363607, - "learning_rate": 0.00019999498502170135, - "loss": 46.0, - "step": 41705 - }, - { - "epoch": 3.1887149492516773, - "grad_norm": 0.001378029352054, - "learning_rate": 0.00019999498478114508, - "loss": 46.0, - "step": 41706 - }, - { - "epoch": 3.188791406235067, - "grad_norm": 0.0012867676559835672, - "learning_rate": 0.00019999498454058306, - "loss": 46.0, - "step": 41707 - }, - { - "epoch": 3.188867863218457, - "grad_norm": 0.005057553760707378, - "learning_rate": 0.0001999949843000153, - "loss": 46.0, - "step": 41708 - }, - { - "epoch": 3.1889443202018466, - "grad_norm": 0.0020237984135746956, - "learning_rate": 0.00019999498405944173, - "loss": 46.0, - "step": 41709 - }, - { - "epoch": 3.1890207771852364, - "grad_norm": 0.00036293535958975554, - "learning_rate": 0.0001999949838188624, - "loss": 46.0, - "step": 41710 - }, - { - "epoch": 3.1890972341686257, - "grad_norm": 0.0012332603801041842, - "learning_rate": 0.00019999498357827728, - "loss": 46.0, - "step": 41711 - }, - { - "epoch": 3.1891736911520154, - "grad_norm": 0.0020509285386651754, - "learning_rate": 0.0001999949833376864, - "loss": 46.0, - "step": 41712 - }, - { - "epoch": 3.189250148135405, - "grad_norm": 0.001647367374971509, - "learning_rate": 0.0001999949830970898, - "loss": 46.0, - "step": 41713 - }, - { - "epoch": 3.189326605118795, - "grad_norm": 0.0006007991614751518, - "learning_rate": 0.00019999498285648736, - "loss": 46.0, - "step": 41714 - }, - { - "epoch": 3.1894030621021847, - "grad_norm": 0.004553616978228092, - "learning_rate": 0.00019999498261587918, - "loss": 46.0, - "step": 41715 - }, - { - "epoch": 3.1894795190855745, - "grad_norm": 0.0007731916848570108, - "learning_rate": 0.00019999498237526525, - "loss": 46.0, - "step": 41716 - }, - { - "epoch": 3.1895559760689642, - "grad_norm": 0.004818055313080549, - "learning_rate": 0.00019999498213464553, - "loss": 46.0, - "step": 41717 - }, - { - "epoch": 3.189632433052354, - "grad_norm": 0.0006430381909012794, - "learning_rate": 0.00019999498189402005, - "loss": 46.0, - "step": 41718 - }, - { - "epoch": 3.1897088900357438, - "grad_norm": 0.0023323295172303915, - "learning_rate": 0.00019999498165338878, - "loss": 46.0, - "step": 41719 - }, - { - "epoch": 3.1897853470191335, - "grad_norm": 0.0017640216974541545, - "learning_rate": 0.00019999498141275176, - "loss": 46.0, - "step": 41720 - }, - { - "epoch": 3.1898618040025233, - "grad_norm": 0.0009280572412535548, - "learning_rate": 0.00019999498117210897, - "loss": 46.0, - "step": 41721 - }, - { - "epoch": 3.1899382609859126, - "grad_norm": 0.002290521515533328, - "learning_rate": 0.00019999498093146043, - "loss": 46.0, - "step": 41722 - }, - { - "epoch": 3.1900147179693024, - "grad_norm": 0.005821548867970705, - "learning_rate": 0.0001999949806908061, - "loss": 46.0, - "step": 41723 - }, - { - "epoch": 3.190091174952692, - "grad_norm": 0.0012983180349692702, - "learning_rate": 0.00019999498045014598, - "loss": 46.0, - "step": 41724 - }, - { - "epoch": 3.190167631936082, - "grad_norm": 0.002774711698293686, - "learning_rate": 0.00019999498020948012, - "loss": 46.0, - "step": 41725 - }, - { - "epoch": 3.1902440889194716, - "grad_norm": 0.0018534691771492362, - "learning_rate": 0.00019999497996880846, - "loss": 46.0, - "step": 41726 - }, - { - "epoch": 3.1903205459028614, - "grad_norm": 0.002928379224613309, - "learning_rate": 0.00019999497972813108, - "loss": 46.0, - "step": 41727 - }, - { - "epoch": 3.190397002886251, - "grad_norm": 0.0022499614860862494, - "learning_rate": 0.0001999949794874479, - "loss": 46.0, - "step": 41728 - }, - { - "epoch": 3.190473459869641, - "grad_norm": 0.010072726756334305, - "learning_rate": 0.00019999497924675893, - "loss": 46.0, - "step": 41729 - }, - { - "epoch": 3.1905499168530307, - "grad_norm": 0.002090676221996546, - "learning_rate": 0.00019999497900606423, - "loss": 46.0, - "step": 41730 - }, - { - "epoch": 3.1906263738364204, - "grad_norm": 0.0016220242250710726, - "learning_rate": 0.00019999497876536376, - "loss": 46.0, - "step": 41731 - }, - { - "epoch": 3.19070283081981, - "grad_norm": 0.002303495304659009, - "learning_rate": 0.00019999497852465748, - "loss": 46.0, - "step": 41732 - }, - { - "epoch": 3.1907792878031995, - "grad_norm": 0.0017076447838917375, - "learning_rate": 0.00019999497828394547, - "loss": 46.0, - "step": 41733 - }, - { - "epoch": 3.1908557447865893, - "grad_norm": 0.004774115979671478, - "learning_rate": 0.00019999497804322767, - "loss": 46.0, - "step": 41734 - }, - { - "epoch": 3.190932201769979, - "grad_norm": 0.0011328072287142277, - "learning_rate": 0.00019999497780250414, - "loss": 46.0, - "step": 41735 - }, - { - "epoch": 3.191008658753369, - "grad_norm": 0.002215541899204254, - "learning_rate": 0.0001999949775617748, - "loss": 46.0, - "step": 41736 - }, - { - "epoch": 3.1910851157367586, - "grad_norm": 0.002423564437776804, - "learning_rate": 0.0001999949773210397, - "loss": 46.0, - "step": 41737 - }, - { - "epoch": 3.1911615727201483, - "grad_norm": 0.001683376613073051, - "learning_rate": 0.00019999497708029883, - "loss": 46.0, - "step": 41738 - }, - { - "epoch": 3.191238029703538, - "grad_norm": 0.00179462437517941, - "learning_rate": 0.0001999949768395522, - "loss": 46.0, - "step": 41739 - }, - { - "epoch": 3.191314486686928, - "grad_norm": 0.0014585162280127406, - "learning_rate": 0.0001999949765987998, - "loss": 46.0, - "step": 41740 - }, - { - "epoch": 3.1913909436703176, - "grad_norm": 0.0015233204467222095, - "learning_rate": 0.0001999949763580416, - "loss": 46.0, - "step": 41741 - }, - { - "epoch": 3.1914674006537074, - "grad_norm": 0.001559414085932076, - "learning_rate": 0.00019999497611727766, - "loss": 46.0, - "step": 41742 - }, - { - "epoch": 3.1915438576370967, - "grad_norm": 0.004434848204255104, - "learning_rate": 0.00019999497587650796, - "loss": 46.0, - "step": 41743 - }, - { - "epoch": 3.1916203146204865, - "grad_norm": 0.0034638717770576477, - "learning_rate": 0.0001999949756357325, - "loss": 46.0, - "step": 41744 - }, - { - "epoch": 3.191696771603876, - "grad_norm": 0.0016908141551539302, - "learning_rate": 0.00019999497539495122, - "loss": 46.0, - "step": 41745 - }, - { - "epoch": 3.191773228587266, - "grad_norm": 0.0016036538872867823, - "learning_rate": 0.0001999949751541642, - "loss": 46.0, - "step": 41746 - }, - { - "epoch": 3.1918496855706557, - "grad_norm": 0.0023750835098326206, - "learning_rate": 0.00019999497491337144, - "loss": 46.0, - "step": 41747 - }, - { - "epoch": 3.1919261425540455, - "grad_norm": 0.0010884370421990752, - "learning_rate": 0.00019999497467257284, - "loss": 46.0, - "step": 41748 - }, - { - "epoch": 3.1920025995374353, - "grad_norm": 0.00804507452994585, - "learning_rate": 0.00019999497443176853, - "loss": 46.0, - "step": 41749 - }, - { - "epoch": 3.192079056520825, - "grad_norm": 0.0024654394946992397, - "learning_rate": 0.0001999949741909584, - "loss": 46.0, - "step": 41750 - }, - { - "epoch": 3.192155513504215, - "grad_norm": 0.0024962807074189186, - "learning_rate": 0.00019999497395014254, - "loss": 46.0, - "step": 41751 - }, - { - "epoch": 3.1922319704876045, - "grad_norm": 0.0034115491434931755, - "learning_rate": 0.00019999497370932093, - "loss": 46.0, - "step": 41752 - }, - { - "epoch": 3.1923084274709943, - "grad_norm": 0.002311406657099724, - "learning_rate": 0.00019999497346849353, - "loss": 46.0, - "step": 41753 - }, - { - "epoch": 3.192384884454384, - "grad_norm": 0.0033624779898673296, - "learning_rate": 0.00019999497322766035, - "loss": 46.0, - "step": 41754 - }, - { - "epoch": 3.1924613414377734, - "grad_norm": 0.0037651751190423965, - "learning_rate": 0.0001999949729868214, - "loss": 46.0, - "step": 41755 - }, - { - "epoch": 3.192537798421163, - "grad_norm": 0.005287521984428167, - "learning_rate": 0.00019999497274597667, - "loss": 46.0, - "step": 41756 - }, - { - "epoch": 3.192614255404553, - "grad_norm": 0.0037060093600302935, - "learning_rate": 0.0001999949725051262, - "loss": 46.0, - "step": 41757 - }, - { - "epoch": 3.1926907123879427, - "grad_norm": 0.0049739438109099865, - "learning_rate": 0.00019999497226426996, - "loss": 46.0, - "step": 41758 - }, - { - "epoch": 3.1927671693713324, - "grad_norm": 0.002128767780959606, - "learning_rate": 0.00019999497202340794, - "loss": 46.0, - "step": 41759 - }, - { - "epoch": 3.192843626354722, - "grad_norm": 0.0023529413156211376, - "learning_rate": 0.00019999497178254015, - "loss": 46.0, - "step": 41760 - }, - { - "epoch": 3.192920083338112, - "grad_norm": 0.001607896643690765, - "learning_rate": 0.00019999497154166658, - "loss": 46.0, - "step": 41761 - }, - { - "epoch": 3.1929965403215017, - "grad_norm": 0.0025055850856006145, - "learning_rate": 0.00019999497130078725, - "loss": 46.0, - "step": 41762 - }, - { - "epoch": 3.1930729973048915, - "grad_norm": 0.0030576728750020266, - "learning_rate": 0.00019999497105990214, - "loss": 46.0, - "step": 41763 - }, - { - "epoch": 3.1931494542882812, - "grad_norm": 0.0011251089163124561, - "learning_rate": 0.00019999497081901128, - "loss": 46.0, - "step": 41764 - }, - { - "epoch": 3.1932259112716705, - "grad_norm": 0.002464407356455922, - "learning_rate": 0.00019999497057811465, - "loss": 46.0, - "step": 41765 - }, - { - "epoch": 3.1933023682550603, - "grad_norm": 0.0006720838719047606, - "learning_rate": 0.00019999497033721224, - "loss": 46.0, - "step": 41766 - }, - { - "epoch": 3.19337882523845, - "grad_norm": 0.001942755887284875, - "learning_rate": 0.0001999949700963041, - "loss": 46.0, - "step": 41767 - }, - { - "epoch": 3.19345528222184, - "grad_norm": 0.0017107600579038262, - "learning_rate": 0.00019999496985539014, - "loss": 46.0, - "step": 41768 - }, - { - "epoch": 3.1935317392052296, - "grad_norm": 0.0026615303941071033, - "learning_rate": 0.00019999496961447042, - "loss": 46.0, - "step": 41769 - }, - { - "epoch": 3.1936081961886194, - "grad_norm": 0.002414393937215209, - "learning_rate": 0.00019999496937354495, - "loss": 46.0, - "step": 41770 - }, - { - "epoch": 3.193684653172009, - "grad_norm": 0.0010056046303361654, - "learning_rate": 0.00019999496913261367, - "loss": 46.0, - "step": 41771 - }, - { - "epoch": 3.193761110155399, - "grad_norm": 0.0019405658822506666, - "learning_rate": 0.00019999496889167668, - "loss": 46.0, - "step": 41772 - }, - { - "epoch": 3.1938375671387886, - "grad_norm": 0.0018267863197252154, - "learning_rate": 0.0001999949686507339, - "loss": 46.0, - "step": 41773 - }, - { - "epoch": 3.1939140241221784, - "grad_norm": 0.0012507934588938951, - "learning_rate": 0.0001999949684097853, - "loss": 46.0, - "step": 41774 - }, - { - "epoch": 3.193990481105568, - "grad_norm": 0.002086616586893797, - "learning_rate": 0.000199994968168831, - "loss": 46.0, - "step": 41775 - }, - { - "epoch": 3.194066938088958, - "grad_norm": 0.00244074035435915, - "learning_rate": 0.00019999496792787088, - "loss": 46.0, - "step": 41776 - }, - { - "epoch": 3.1941433950723472, - "grad_norm": 0.0034217399079352617, - "learning_rate": 0.00019999496768690502, - "loss": 46.0, - "step": 41777 - }, - { - "epoch": 3.194219852055737, - "grad_norm": 0.0022705956362187862, - "learning_rate": 0.0001999949674459334, - "loss": 46.0, - "step": 41778 - }, - { - "epoch": 3.1942963090391268, - "grad_norm": 0.0023646594490855932, - "learning_rate": 0.00019999496720495602, - "loss": 46.0, - "step": 41779 - }, - { - "epoch": 3.1943727660225165, - "grad_norm": 0.0008587867487221956, - "learning_rate": 0.0001999949669639728, - "loss": 46.0, - "step": 41780 - }, - { - "epoch": 3.1944492230059063, - "grad_norm": 0.0033211889676749706, - "learning_rate": 0.0001999949667229839, - "loss": 46.0, - "step": 41781 - }, - { - "epoch": 3.194525679989296, - "grad_norm": 0.0016654988285154104, - "learning_rate": 0.00019999496648198916, - "loss": 46.0, - "step": 41782 - }, - { - "epoch": 3.194602136972686, - "grad_norm": 0.0007194314384832978, - "learning_rate": 0.0001999949662409887, - "loss": 46.0, - "step": 41783 - }, - { - "epoch": 3.1946785939560756, - "grad_norm": 0.001223646104335785, - "learning_rate": 0.00019999496599998245, - "loss": 46.0, - "step": 41784 - }, - { - "epoch": 3.1947550509394653, - "grad_norm": 0.0029901759698987007, - "learning_rate": 0.0001999949657589704, - "loss": 46.0, - "step": 41785 - }, - { - "epoch": 3.194831507922855, - "grad_norm": 0.0014829349238425493, - "learning_rate": 0.0001999949655179526, - "loss": 46.0, - "step": 41786 - }, - { - "epoch": 3.1949079649062444, - "grad_norm": 0.007151391822844744, - "learning_rate": 0.00019999496527692908, - "loss": 46.0, - "step": 41787 - }, - { - "epoch": 3.194984421889634, - "grad_norm": 0.0025312264915555716, - "learning_rate": 0.00019999496503589974, - "loss": 46.0, - "step": 41788 - }, - { - "epoch": 3.195060878873024, - "grad_norm": 0.0014617975102737546, - "learning_rate": 0.00019999496479486466, - "loss": 46.0, - "step": 41789 - }, - { - "epoch": 3.1951373358564137, - "grad_norm": 0.0018159488681703806, - "learning_rate": 0.0001999949645538238, - "loss": 46.0, - "step": 41790 - }, - { - "epoch": 3.1952137928398034, - "grad_norm": 0.0018441840074956417, - "learning_rate": 0.00019999496431277717, - "loss": 46.0, - "step": 41791 - }, - { - "epoch": 3.195290249823193, - "grad_norm": 0.0013451763661578298, - "learning_rate": 0.00019999496407172476, - "loss": 46.0, - "step": 41792 - }, - { - "epoch": 3.195366706806583, - "grad_norm": 0.00121031585149467, - "learning_rate": 0.0001999949638306666, - "loss": 46.0, - "step": 41793 - }, - { - "epoch": 3.1954431637899727, - "grad_norm": 0.0065407538786530495, - "learning_rate": 0.00019999496358960266, - "loss": 46.0, - "step": 41794 - }, - { - "epoch": 3.1955196207733625, - "grad_norm": 0.002348933834582567, - "learning_rate": 0.00019999496334853294, - "loss": 46.0, - "step": 41795 - }, - { - "epoch": 3.1955960777567523, - "grad_norm": 0.003194976132363081, - "learning_rate": 0.00019999496310745744, - "loss": 46.0, - "step": 41796 - }, - { - "epoch": 3.195672534740142, - "grad_norm": 0.005215474870055914, - "learning_rate": 0.0001999949628663762, - "loss": 46.0, - "step": 41797 - }, - { - "epoch": 3.1957489917235318, - "grad_norm": 0.002075603697448969, - "learning_rate": 0.00019999496262528918, - "loss": 46.0, - "step": 41798 - }, - { - "epoch": 3.195825448706921, - "grad_norm": 0.0029091532342135906, - "learning_rate": 0.0001999949623841964, - "loss": 46.0, - "step": 41799 - }, - { - "epoch": 3.195901905690311, - "grad_norm": 0.007599889300763607, - "learning_rate": 0.00019999496214309783, - "loss": 46.0, - "step": 41800 - }, - { - "epoch": 3.1959783626737006, - "grad_norm": 0.0034401395823806524, - "learning_rate": 0.00019999496190199352, - "loss": 46.0, - "step": 41801 - }, - { - "epoch": 3.1960548196570904, - "grad_norm": 0.0019381269812583923, - "learning_rate": 0.00019999496166088344, - "loss": 46.0, - "step": 41802 - }, - { - "epoch": 3.19613127664048, - "grad_norm": 0.002752471948042512, - "learning_rate": 0.00019999496141976758, - "loss": 46.0, - "step": 41803 - }, - { - "epoch": 3.19620773362387, - "grad_norm": 0.003460479434579611, - "learning_rate": 0.00019999496117864592, - "loss": 46.0, - "step": 41804 - }, - { - "epoch": 3.1962841906072597, - "grad_norm": 0.006550750695168972, - "learning_rate": 0.00019999496093751852, - "loss": 46.0, - "step": 41805 - }, - { - "epoch": 3.1963606475906494, - "grad_norm": 0.002637798897922039, - "learning_rate": 0.00019999496069638534, - "loss": 46.0, - "step": 41806 - }, - { - "epoch": 3.196437104574039, - "grad_norm": 0.003196987323462963, - "learning_rate": 0.00019999496045524642, - "loss": 46.0, - "step": 41807 - }, - { - "epoch": 3.196513561557429, - "grad_norm": 0.0011728891404345632, - "learning_rate": 0.00019999496021410172, - "loss": 46.0, - "step": 41808 - }, - { - "epoch": 3.1965900185408183, - "grad_norm": 0.0022812115494161844, - "learning_rate": 0.00019999495997295123, - "loss": 46.0, - "step": 41809 - }, - { - "epoch": 3.196666475524208, - "grad_norm": 0.004441450349986553, - "learning_rate": 0.00019999495973179498, - "loss": 46.0, - "step": 41810 - }, - { - "epoch": 3.196742932507598, - "grad_norm": 0.001541881007142365, - "learning_rate": 0.00019999495949063297, - "loss": 46.0, - "step": 41811 - }, - { - "epoch": 3.1968193894909875, - "grad_norm": 0.005147553980350494, - "learning_rate": 0.00019999495924946518, - "loss": 46.0, - "step": 41812 - }, - { - "epoch": 3.1968958464743773, - "grad_norm": 0.0023873900063335896, - "learning_rate": 0.00019999495900829164, - "loss": 46.0, - "step": 41813 - }, - { - "epoch": 3.196972303457767, - "grad_norm": 0.0005892972694709897, - "learning_rate": 0.0001999949587671123, - "loss": 46.0, - "step": 41814 - }, - { - "epoch": 3.197048760441157, - "grad_norm": 0.00452373456209898, - "learning_rate": 0.0001999949585259272, - "loss": 46.0, - "step": 41815 - }, - { - "epoch": 3.1971252174245466, - "grad_norm": 0.007730280980467796, - "learning_rate": 0.00019999495828473634, - "loss": 46.0, - "step": 41816 - }, - { - "epoch": 3.1972016744079363, - "grad_norm": 0.005197593942284584, - "learning_rate": 0.0001999949580435397, - "loss": 46.0, - "step": 41817 - }, - { - "epoch": 3.197278131391326, - "grad_norm": 0.0023051961325109005, - "learning_rate": 0.00019999495780233733, - "loss": 46.0, - "step": 41818 - }, - { - "epoch": 3.197354588374716, - "grad_norm": 0.0032664304599165916, - "learning_rate": 0.00019999495756112913, - "loss": 46.0, - "step": 41819 - }, - { - "epoch": 3.1974310453581056, - "grad_norm": 0.0015942684840410948, - "learning_rate": 0.0001999949573199152, - "loss": 46.0, - "step": 41820 - }, - { - "epoch": 3.197507502341495, - "grad_norm": 0.0016448040260002017, - "learning_rate": 0.00019999495707869548, - "loss": 46.0, - "step": 41821 - }, - { - "epoch": 3.1975839593248847, - "grad_norm": 0.0010918325278908014, - "learning_rate": 0.00019999495683747004, - "loss": 46.0, - "step": 41822 - }, - { - "epoch": 3.1976604163082745, - "grad_norm": 0.0012299743248149753, - "learning_rate": 0.00019999495659623877, - "loss": 46.0, - "step": 41823 - }, - { - "epoch": 3.1977368732916642, - "grad_norm": 0.0028943598736077547, - "learning_rate": 0.00019999495635500175, - "loss": 46.0, - "step": 41824 - }, - { - "epoch": 3.197813330275054, - "grad_norm": 0.0018121174070984125, - "learning_rate": 0.000199994956113759, - "loss": 46.0, - "step": 41825 - }, - { - "epoch": 3.1978897872584438, - "grad_norm": 0.0013667203020304441, - "learning_rate": 0.0001999949558725104, - "loss": 46.0, - "step": 41826 - }, - { - "epoch": 3.1979662442418335, - "grad_norm": 0.0018197777681052685, - "learning_rate": 0.0001999949556312561, - "loss": 46.0, - "step": 41827 - }, - { - "epoch": 3.1980427012252233, - "grad_norm": 0.0009614763548597693, - "learning_rate": 0.000199994955389996, - "loss": 46.0, - "step": 41828 - }, - { - "epoch": 3.198119158208613, - "grad_norm": 0.0006617567269131541, - "learning_rate": 0.00019999495514873012, - "loss": 46.0, - "step": 41829 - }, - { - "epoch": 3.198195615192003, - "grad_norm": 0.0038372681010514498, - "learning_rate": 0.0001999949549074585, - "loss": 46.0, - "step": 41830 - }, - { - "epoch": 3.198272072175392, - "grad_norm": 0.003438947955146432, - "learning_rate": 0.0001999949546661811, - "loss": 46.0, - "step": 41831 - }, - { - "epoch": 3.198348529158782, - "grad_norm": 0.0050773415714502335, - "learning_rate": 0.00019999495442489792, - "loss": 46.0, - "step": 41832 - }, - { - "epoch": 3.1984249861421716, - "grad_norm": 0.0039446172304451466, - "learning_rate": 0.000199994954183609, - "loss": 46.0, - "step": 41833 - }, - { - "epoch": 3.1985014431255614, - "grad_norm": 0.001261175493709743, - "learning_rate": 0.0001999949539423143, - "loss": 46.0, - "step": 41834 - }, - { - "epoch": 3.198577900108951, - "grad_norm": 0.006266809534281492, - "learning_rate": 0.00019999495370101383, - "loss": 46.0, - "step": 41835 - }, - { - "epoch": 3.198654357092341, - "grad_norm": 0.00084760005120188, - "learning_rate": 0.00019999495345970756, - "loss": 46.0, - "step": 41836 - }, - { - "epoch": 3.1987308140757307, - "grad_norm": 0.002254942199215293, - "learning_rate": 0.00019999495321839557, - "loss": 46.0, - "step": 41837 - }, - { - "epoch": 3.1988072710591204, - "grad_norm": 0.0026623986195772886, - "learning_rate": 0.00019999495297707775, - "loss": 46.0, - "step": 41838 - }, - { - "epoch": 3.19888372804251, - "grad_norm": 0.0006011240184307098, - "learning_rate": 0.00019999495273575422, - "loss": 46.0, - "step": 41839 - }, - { - "epoch": 3.1989601850259, - "grad_norm": 0.002485383301973343, - "learning_rate": 0.0001999949524944249, - "loss": 46.0, - "step": 41840 - }, - { - "epoch": 3.1990366420092897, - "grad_norm": 0.0037892533000558615, - "learning_rate": 0.0001999949522530898, - "loss": 46.0, - "step": 41841 - }, - { - "epoch": 3.199113098992679, - "grad_norm": 0.0013631790643557906, - "learning_rate": 0.00019999495201174894, - "loss": 46.0, - "step": 41842 - }, - { - "epoch": 3.199189555976069, - "grad_norm": 0.0029840683564543724, - "learning_rate": 0.00019999495177040231, - "loss": 46.0, - "step": 41843 - }, - { - "epoch": 3.1992660129594586, - "grad_norm": 0.00284794089384377, - "learning_rate": 0.0001999949515290499, - "loss": 46.0, - "step": 41844 - }, - { - "epoch": 3.1993424699428483, - "grad_norm": 0.005058239679783583, - "learning_rate": 0.00019999495128769174, - "loss": 46.0, - "step": 41845 - }, - { - "epoch": 3.199418926926238, - "grad_norm": 0.005141830071806908, - "learning_rate": 0.0001999949510463278, - "loss": 46.0, - "step": 41846 - }, - { - "epoch": 3.199495383909628, - "grad_norm": 0.001782839186489582, - "learning_rate": 0.0001999949508049581, - "loss": 46.0, - "step": 41847 - }, - { - "epoch": 3.1995718408930176, - "grad_norm": 0.0022425889037549496, - "learning_rate": 0.00019999495056358262, - "loss": 46.0, - "step": 41848 - }, - { - "epoch": 3.1996482978764074, - "grad_norm": 0.002025061519816518, - "learning_rate": 0.00019999495032220138, - "loss": 46.0, - "step": 41849 - }, - { - "epoch": 3.199724754859797, - "grad_norm": 0.0022437439765781164, - "learning_rate": 0.00019999495008081434, - "loss": 46.0, - "step": 41850 - }, - { - "epoch": 3.199801211843187, - "grad_norm": 0.0021829060278832912, - "learning_rate": 0.00019999494983942158, - "loss": 46.0, - "step": 41851 - }, - { - "epoch": 3.1998776688265766, - "grad_norm": 0.0014959339750930667, - "learning_rate": 0.00019999494959802304, - "loss": 46.0, - "step": 41852 - }, - { - "epoch": 3.199954125809966, - "grad_norm": 0.0015004135202616453, - "learning_rate": 0.0001999949493566187, - "loss": 46.0, - "step": 41853 - }, - { - "epoch": 3.2000305827933557, - "grad_norm": 0.0006117904558777809, - "learning_rate": 0.00019999494911520862, - "loss": 46.0, - "step": 41854 - }, - { - "epoch": 3.2001070397767455, - "grad_norm": 0.004115217365324497, - "learning_rate": 0.00019999494887379274, - "loss": 46.0, - "step": 41855 - }, - { - "epoch": 3.2001834967601352, - "grad_norm": 0.0031721347477287054, - "learning_rate": 0.0001999949486323711, - "loss": 46.0, - "step": 41856 - }, - { - "epoch": 3.200259953743525, - "grad_norm": 0.0028074639849364758, - "learning_rate": 0.00019999494839094374, - "loss": 46.0, - "step": 41857 - }, - { - "epoch": 3.2003364107269148, - "grad_norm": 0.0021092924289405346, - "learning_rate": 0.00019999494814951056, - "loss": 46.0, - "step": 41858 - }, - { - "epoch": 3.2004128677103045, - "grad_norm": 0.006816784385591745, - "learning_rate": 0.0001999949479080716, - "loss": 46.0, - "step": 41859 - }, - { - "epoch": 3.2004893246936943, - "grad_norm": 0.004569733515381813, - "learning_rate": 0.00019999494766662692, - "loss": 46.0, - "step": 41860 - }, - { - "epoch": 3.200565781677084, - "grad_norm": 0.0033006465528160334, - "learning_rate": 0.00019999494742517645, - "loss": 46.0, - "step": 41861 - }, - { - "epoch": 3.200642238660474, - "grad_norm": 0.004284622147679329, - "learning_rate": 0.00019999494718372018, - "loss": 46.0, - "step": 41862 - }, - { - "epoch": 3.2007186956438636, - "grad_norm": 0.001976188039407134, - "learning_rate": 0.00019999494694225816, - "loss": 46.0, - "step": 41863 - }, - { - "epoch": 3.200795152627253, - "grad_norm": 0.001034342683851719, - "learning_rate": 0.0001999949467007904, - "loss": 46.0, - "step": 41864 - }, - { - "epoch": 3.2008716096106427, - "grad_norm": 0.0008406956330873072, - "learning_rate": 0.00019999494645931684, - "loss": 46.0, - "step": 41865 - }, - { - "epoch": 3.2009480665940324, - "grad_norm": 0.003191269002854824, - "learning_rate": 0.00019999494621783753, - "loss": 46.0, - "step": 41866 - }, - { - "epoch": 3.201024523577422, - "grad_norm": 0.0020116691011935472, - "learning_rate": 0.00019999494597635242, - "loss": 46.0, - "step": 41867 - }, - { - "epoch": 3.201100980560812, - "grad_norm": 0.004101854749023914, - "learning_rate": 0.0001999949457348616, - "loss": 46.0, - "step": 41868 - }, - { - "epoch": 3.2011774375442017, - "grad_norm": 0.0007809695671312511, - "learning_rate": 0.00019999494549336494, - "loss": 46.0, - "step": 41869 - }, - { - "epoch": 3.2012538945275915, - "grad_norm": 0.001870789099484682, - "learning_rate": 0.00019999494525186257, - "loss": 46.0, - "step": 41870 - }, - { - "epoch": 3.201330351510981, - "grad_norm": 0.005278183147311211, - "learning_rate": 0.0001999949450103544, - "loss": 46.0, - "step": 41871 - }, - { - "epoch": 3.201406808494371, - "grad_norm": 0.0022934924345463514, - "learning_rate": 0.00019999494476884047, - "loss": 46.0, - "step": 41872 - }, - { - "epoch": 3.2014832654777607, - "grad_norm": 0.001164207118563354, - "learning_rate": 0.00019999494452732075, - "loss": 46.0, - "step": 41873 - }, - { - "epoch": 3.2015597224611505, - "grad_norm": 0.00329647958278656, - "learning_rate": 0.00019999494428579525, - "loss": 46.0, - "step": 41874 - }, - { - "epoch": 3.20163617944454, - "grad_norm": 0.0018871875945478678, - "learning_rate": 0.000199994944044264, - "loss": 46.0, - "step": 41875 - }, - { - "epoch": 3.2017126364279296, - "grad_norm": 0.0013439093017950654, - "learning_rate": 0.00019999494380272702, - "loss": 46.0, - "step": 41876 - }, - { - "epoch": 3.2017890934113193, - "grad_norm": 0.003036444541066885, - "learning_rate": 0.00019999494356118423, - "loss": 46.0, - "step": 41877 - }, - { - "epoch": 3.201865550394709, - "grad_norm": 0.002536752261221409, - "learning_rate": 0.0001999949433196357, - "loss": 46.0, - "step": 41878 - }, - { - "epoch": 3.201942007378099, - "grad_norm": 0.0018414033111184835, - "learning_rate": 0.00019999494307808136, - "loss": 46.0, - "step": 41879 - }, - { - "epoch": 3.2020184643614886, - "grad_norm": 0.0028950015548616648, - "learning_rate": 0.00019999494283652128, - "loss": 46.0, - "step": 41880 - }, - { - "epoch": 3.2020949213448784, - "grad_norm": 0.0033415048383176327, - "learning_rate": 0.00019999494259495543, - "loss": 46.0, - "step": 41881 - }, - { - "epoch": 3.202171378328268, - "grad_norm": 0.000592630822211504, - "learning_rate": 0.00019999494235338377, - "loss": 46.0, - "step": 41882 - }, - { - "epoch": 3.202247835311658, - "grad_norm": 0.0023894303012639284, - "learning_rate": 0.0001999949421118064, - "loss": 46.0, - "step": 41883 - }, - { - "epoch": 3.2023242922950477, - "grad_norm": 0.0018253163434565067, - "learning_rate": 0.00019999494187022322, - "loss": 46.0, - "step": 41884 - }, - { - "epoch": 3.2024007492784374, - "grad_norm": 0.0028539958875626326, - "learning_rate": 0.00019999494162863427, - "loss": 46.0, - "step": 41885 - }, - { - "epoch": 3.2024772062618267, - "grad_norm": 0.0026212134398519993, - "learning_rate": 0.00019999494138703958, - "loss": 46.0, - "step": 41886 - }, - { - "epoch": 3.2025536632452165, - "grad_norm": 0.002570651937276125, - "learning_rate": 0.0001999949411454391, - "loss": 46.0, - "step": 41887 - }, - { - "epoch": 3.2026301202286063, - "grad_norm": 0.0013720061397179961, - "learning_rate": 0.00019999494090383287, - "loss": 46.0, - "step": 41888 - }, - { - "epoch": 3.202706577211996, - "grad_norm": 0.0021675988100469112, - "learning_rate": 0.00019999494066222086, - "loss": 46.0, - "step": 41889 - }, - { - "epoch": 3.202783034195386, - "grad_norm": 0.0011974771041423082, - "learning_rate": 0.0001999949404206031, - "loss": 46.0, - "step": 41890 - }, - { - "epoch": 3.2028594911787756, - "grad_norm": 0.0035049342550337315, - "learning_rate": 0.00019999494017897953, - "loss": 46.0, - "step": 41891 - }, - { - "epoch": 3.2029359481621653, - "grad_norm": 0.0011456075590103865, - "learning_rate": 0.0001999949399373502, - "loss": 46.0, - "step": 41892 - }, - { - "epoch": 3.203012405145555, - "grad_norm": 0.003045186400413513, - "learning_rate": 0.00019999493969571512, - "loss": 46.0, - "step": 41893 - }, - { - "epoch": 3.203088862128945, - "grad_norm": 0.0013950093416497111, - "learning_rate": 0.0001999949394540743, - "loss": 46.0, - "step": 41894 - }, - { - "epoch": 3.2031653191123346, - "grad_norm": 0.0026571836788207293, - "learning_rate": 0.00019999493921242764, - "loss": 46.0, - "step": 41895 - }, - { - "epoch": 3.203241776095724, - "grad_norm": 0.0037363844458013773, - "learning_rate": 0.00019999493897077523, - "loss": 46.0, - "step": 41896 - }, - { - "epoch": 3.2033182330791137, - "grad_norm": 0.0033596395514905453, - "learning_rate": 0.0001999949387291171, - "loss": 46.0, - "step": 41897 - }, - { - "epoch": 3.2033946900625034, - "grad_norm": 0.003285108832642436, - "learning_rate": 0.00019999493848745314, - "loss": 46.0, - "step": 41898 - }, - { - "epoch": 3.203471147045893, - "grad_norm": 0.0010430342517793179, - "learning_rate": 0.00019999493824578344, - "loss": 46.0, - "step": 41899 - }, - { - "epoch": 3.203547604029283, - "grad_norm": 0.0010957475751638412, - "learning_rate": 0.00019999493800410795, - "loss": 46.0, - "step": 41900 - }, - { - "epoch": 3.2036240610126727, - "grad_norm": 0.0017878767102956772, - "learning_rate": 0.00019999493776242674, - "loss": 46.0, - "step": 41901 - }, - { - "epoch": 3.2037005179960625, - "grad_norm": 0.0009199641062878072, - "learning_rate": 0.00019999493752073972, - "loss": 46.0, - "step": 41902 - }, - { - "epoch": 3.2037769749794522, - "grad_norm": 0.0007861303747631609, - "learning_rate": 0.00019999493727904693, - "loss": 46.0, - "step": 41903 - }, - { - "epoch": 3.203853431962842, - "grad_norm": 0.004879381973296404, - "learning_rate": 0.0001999949370373484, - "loss": 46.0, - "step": 41904 - }, - { - "epoch": 3.2039298889462318, - "grad_norm": 0.000710874970536679, - "learning_rate": 0.00019999493679564407, - "loss": 46.0, - "step": 41905 - }, - { - "epoch": 3.2040063459296215, - "grad_norm": 0.004271247889846563, - "learning_rate": 0.00019999493655393398, - "loss": 46.0, - "step": 41906 - }, - { - "epoch": 3.2040828029130113, - "grad_norm": 0.002667555818334222, - "learning_rate": 0.00019999493631221813, - "loss": 46.0, - "step": 41907 - }, - { - "epoch": 3.2041592598964006, - "grad_norm": 0.002643396146595478, - "learning_rate": 0.0001999949360704965, - "loss": 46.0, - "step": 41908 - }, - { - "epoch": 3.2042357168797904, - "grad_norm": 0.011871416121721268, - "learning_rate": 0.0001999949358287691, - "loss": 46.0, - "step": 41909 - }, - { - "epoch": 3.20431217386318, - "grad_norm": 0.0012360658729448915, - "learning_rate": 0.00019999493558703595, - "loss": 46.0, - "step": 41910 - }, - { - "epoch": 3.20438863084657, - "grad_norm": 0.0016530659049749374, - "learning_rate": 0.000199994935345297, - "loss": 46.0, - "step": 41911 - }, - { - "epoch": 3.2044650878299596, - "grad_norm": 0.003976988606154919, - "learning_rate": 0.00019999493510355229, - "loss": 46.0, - "step": 41912 - }, - { - "epoch": 3.2045415448133494, - "grad_norm": 0.0030321250669658184, - "learning_rate": 0.00019999493486180182, - "loss": 46.0, - "step": 41913 - }, - { - "epoch": 3.204618001796739, - "grad_norm": 0.004816477186977863, - "learning_rate": 0.0001999949346200456, - "loss": 46.0, - "step": 41914 - }, - { - "epoch": 3.204694458780129, - "grad_norm": 0.0024308855645358562, - "learning_rate": 0.0001999949343782836, - "loss": 46.0, - "step": 41915 - }, - { - "epoch": 3.2047709157635187, - "grad_norm": 0.0048491195775568485, - "learning_rate": 0.0001999949341365158, - "loss": 46.0, - "step": 41916 - }, - { - "epoch": 3.2048473727469085, - "grad_norm": 0.007922302931547165, - "learning_rate": 0.00019999493389474227, - "loss": 46.0, - "step": 41917 - }, - { - "epoch": 3.2049238297302978, - "grad_norm": 0.008576560765504837, - "learning_rate": 0.00019999493365296294, - "loss": 46.0, - "step": 41918 - }, - { - "epoch": 3.2050002867136875, - "grad_norm": 0.0024287390988320112, - "learning_rate": 0.00019999493341117786, - "loss": 46.0, - "step": 41919 - }, - { - "epoch": 3.2050767436970773, - "grad_norm": 0.0032086092978715897, - "learning_rate": 0.000199994933169387, - "loss": 46.0, - "step": 41920 - }, - { - "epoch": 3.205153200680467, - "grad_norm": 0.0018151280237361789, - "learning_rate": 0.00019999493292759037, - "loss": 46.0, - "step": 41921 - }, - { - "epoch": 3.205229657663857, - "grad_norm": 0.00288069318048656, - "learning_rate": 0.000199994932685788, - "loss": 46.0, - "step": 41922 - }, - { - "epoch": 3.2053061146472466, - "grad_norm": 0.0018049966311082244, - "learning_rate": 0.00019999493244397983, - "loss": 46.0, - "step": 41923 - }, - { - "epoch": 3.2053825716306363, - "grad_norm": 0.00171754602342844, - "learning_rate": 0.00019999493220216588, - "loss": 46.0, - "step": 41924 - }, - { - "epoch": 3.205459028614026, - "grad_norm": 0.0014012206811457872, - "learning_rate": 0.00019999493196034619, - "loss": 46.0, - "step": 41925 - }, - { - "epoch": 3.205535485597416, - "grad_norm": 0.0010059995111078024, - "learning_rate": 0.00019999493171852072, - "loss": 46.0, - "step": 41926 - }, - { - "epoch": 3.2056119425808056, - "grad_norm": 0.0017957398667931557, - "learning_rate": 0.00019999493147668948, - "loss": 46.0, - "step": 41927 - }, - { - "epoch": 3.2056883995641954, - "grad_norm": 0.0013694277731701732, - "learning_rate": 0.00019999493123485246, - "loss": 46.0, - "step": 41928 - }, - { - "epoch": 3.205764856547585, - "grad_norm": 0.0008041377877816558, - "learning_rate": 0.00019999493099300968, - "loss": 46.0, - "step": 41929 - }, - { - "epoch": 3.2058413135309745, - "grad_norm": 0.003393413731828332, - "learning_rate": 0.00019999493075116114, - "loss": 46.0, - "step": 41930 - }, - { - "epoch": 3.205917770514364, - "grad_norm": 0.003929989878088236, - "learning_rate": 0.0001999949305093068, - "loss": 46.0, - "step": 41931 - }, - { - "epoch": 3.205994227497754, - "grad_norm": 0.001302413991652429, - "learning_rate": 0.00019999493026744673, - "loss": 46.0, - "step": 41932 - }, - { - "epoch": 3.2060706844811437, - "grad_norm": 0.002498259535059333, - "learning_rate": 0.00019999493002558088, - "loss": 46.0, - "step": 41933 - }, - { - "epoch": 3.2061471414645335, - "grad_norm": 0.006291416939347982, - "learning_rate": 0.00019999492978370925, - "loss": 46.0, - "step": 41934 - }, - { - "epoch": 3.2062235984479233, - "grad_norm": 0.003000830067321658, - "learning_rate": 0.00019999492954183188, - "loss": 46.0, - "step": 41935 - }, - { - "epoch": 3.206300055431313, - "grad_norm": 0.0031903102062642574, - "learning_rate": 0.0001999949292999487, - "loss": 46.0, - "step": 41936 - }, - { - "epoch": 3.206376512414703, - "grad_norm": 0.0016998659120872617, - "learning_rate": 0.00019999492905805978, - "loss": 46.0, - "step": 41937 - }, - { - "epoch": 3.2064529693980925, - "grad_norm": 0.0021885468158870935, - "learning_rate": 0.00019999492881616506, - "loss": 46.0, - "step": 41938 - }, - { - "epoch": 3.2065294263814823, - "grad_norm": 0.00403962517157197, - "learning_rate": 0.0001999949285742646, - "loss": 46.0, - "step": 41939 - }, - { - "epoch": 3.2066058833648716, - "grad_norm": 0.002945851068943739, - "learning_rate": 0.00019999492833235838, - "loss": 46.0, - "step": 41940 - }, - { - "epoch": 3.2066823403482614, - "grad_norm": 0.0033174597192555666, - "learning_rate": 0.00019999492809044634, - "loss": 46.0, - "step": 41941 - }, - { - "epoch": 3.206758797331651, - "grad_norm": 0.0009510880336165428, - "learning_rate": 0.00019999492784852858, - "loss": 46.0, - "step": 41942 - }, - { - "epoch": 3.206835254315041, - "grad_norm": 0.002162659540772438, - "learning_rate": 0.00019999492760660502, - "loss": 46.0, - "step": 41943 - }, - { - "epoch": 3.2069117112984307, - "grad_norm": 0.001432416494935751, - "learning_rate": 0.00019999492736467574, - "loss": 46.0, - "step": 41944 - }, - { - "epoch": 3.2069881682818204, - "grad_norm": 0.001979827880859375, - "learning_rate": 0.00019999492712274064, - "loss": 46.0, - "step": 41945 - }, - { - "epoch": 3.20706462526521, - "grad_norm": 0.0029415723402053118, - "learning_rate": 0.00019999492688079978, - "loss": 46.0, - "step": 41946 - }, - { - "epoch": 3.2071410822486, - "grad_norm": 0.0038416923489421606, - "learning_rate": 0.00019999492663885316, - "loss": 46.0, - "step": 41947 - }, - { - "epoch": 3.2072175392319897, - "grad_norm": 0.004409688524901867, - "learning_rate": 0.00019999492639690076, - "loss": 46.0, - "step": 41948 - }, - { - "epoch": 3.2072939962153795, - "grad_norm": 0.0007211290649138391, - "learning_rate": 0.00019999492615494258, - "loss": 46.0, - "step": 41949 - }, - { - "epoch": 3.2073704531987692, - "grad_norm": 0.0020159073173999786, - "learning_rate": 0.00019999492591297866, - "loss": 46.0, - "step": 41950 - }, - { - "epoch": 3.207446910182159, - "grad_norm": 0.0025301771238446236, - "learning_rate": 0.00019999492567100897, - "loss": 46.0, - "step": 41951 - }, - { - "epoch": 3.2075233671655483, - "grad_norm": 0.002478532027453184, - "learning_rate": 0.0001999949254290335, - "loss": 46.0, - "step": 41952 - }, - { - "epoch": 3.207599824148938, - "grad_norm": 0.0013402156764641404, - "learning_rate": 0.00019999492518705226, - "loss": 46.0, - "step": 41953 - }, - { - "epoch": 3.207676281132328, - "grad_norm": 0.0005637327558360994, - "learning_rate": 0.00019999492494506528, - "loss": 46.0, - "step": 41954 - }, - { - "epoch": 3.2077527381157176, - "grad_norm": 0.0008347879629582167, - "learning_rate": 0.0001999949247030725, - "loss": 46.0, - "step": 41955 - }, - { - "epoch": 3.2078291950991074, - "grad_norm": 0.003693429986014962, - "learning_rate": 0.00019999492446107393, - "loss": 46.0, - "step": 41956 - }, - { - "epoch": 3.207905652082497, - "grad_norm": 0.0019132073502987623, - "learning_rate": 0.00019999492421906963, - "loss": 46.0, - "step": 41957 - }, - { - "epoch": 3.207982109065887, - "grad_norm": 0.004490647930651903, - "learning_rate": 0.00019999492397705955, - "loss": 46.0, - "step": 41958 - }, - { - "epoch": 3.2080585660492766, - "grad_norm": 0.0010885039810091257, - "learning_rate": 0.00019999492373504367, - "loss": 46.0, - "step": 41959 - }, - { - "epoch": 3.2081350230326664, - "grad_norm": 0.00437463354319334, - "learning_rate": 0.00019999492349302207, - "loss": 46.0, - "step": 41960 - }, - { - "epoch": 3.208211480016056, - "grad_norm": 0.0026589494664222, - "learning_rate": 0.00019999492325099467, - "loss": 46.0, - "step": 41961 - }, - { - "epoch": 3.2082879369994455, - "grad_norm": 0.0025310181081295013, - "learning_rate": 0.0001999949230089615, - "loss": 46.0, - "step": 41962 - }, - { - "epoch": 3.2083643939828352, - "grad_norm": 0.004438692703843117, - "learning_rate": 0.00019999492276692258, - "loss": 46.0, - "step": 41963 - }, - { - "epoch": 3.208440850966225, - "grad_norm": 0.0012899021385237575, - "learning_rate": 0.00019999492252487788, - "loss": 46.0, - "step": 41964 - }, - { - "epoch": 3.2085173079496148, - "grad_norm": 0.002076528500765562, - "learning_rate": 0.00019999492228282742, - "loss": 46.0, - "step": 41965 - }, - { - "epoch": 3.2085937649330045, - "grad_norm": 0.0013169677695259452, - "learning_rate": 0.00019999492204077118, - "loss": 46.0, - "step": 41966 - }, - { - "epoch": 3.2086702219163943, - "grad_norm": 0.001325442804954946, - "learning_rate": 0.00019999492179870916, - "loss": 46.0, - "step": 41967 - }, - { - "epoch": 3.208746678899784, - "grad_norm": 0.0036222960334271193, - "learning_rate": 0.0001999949215566414, - "loss": 46.0, - "step": 41968 - }, - { - "epoch": 3.208823135883174, - "grad_norm": 0.005300433840602636, - "learning_rate": 0.00019999492131456785, - "loss": 46.0, - "step": 41969 - }, - { - "epoch": 3.2088995928665636, - "grad_norm": 0.0016628399025648832, - "learning_rate": 0.00019999492107248854, - "loss": 46.0, - "step": 41970 - }, - { - "epoch": 3.2089760498499533, - "grad_norm": 0.002210634760558605, - "learning_rate": 0.00019999492083040346, - "loss": 46.0, - "step": 41971 - }, - { - "epoch": 3.209052506833343, - "grad_norm": 0.000432817789260298, - "learning_rate": 0.0001999949205883126, - "loss": 46.0, - "step": 41972 - }, - { - "epoch": 3.2091289638167324, - "grad_norm": 0.0031472027767449617, - "learning_rate": 0.00019999492034621598, - "loss": 46.0, - "step": 41973 - }, - { - "epoch": 3.209205420800122, - "grad_norm": 0.004076885990798473, - "learning_rate": 0.00019999492010411358, - "loss": 46.0, - "step": 41974 - }, - { - "epoch": 3.209281877783512, - "grad_norm": 0.003162642475217581, - "learning_rate": 0.00019999491986200544, - "loss": 46.0, - "step": 41975 - }, - { - "epoch": 3.2093583347669017, - "grad_norm": 0.0028325412422418594, - "learning_rate": 0.0001999949196198915, - "loss": 46.0, - "step": 41976 - }, - { - "epoch": 3.2094347917502914, - "grad_norm": 0.0015966629143804312, - "learning_rate": 0.0001999949193777718, - "loss": 46.0, - "step": 41977 - }, - { - "epoch": 3.209511248733681, - "grad_norm": 0.0024066753685474396, - "learning_rate": 0.00019999491913564633, - "loss": 46.0, - "step": 41978 - }, - { - "epoch": 3.209587705717071, - "grad_norm": 0.00470546493306756, - "learning_rate": 0.00019999491889351512, - "loss": 46.0, - "step": 41979 - }, - { - "epoch": 3.2096641627004607, - "grad_norm": 0.002852489473298192, - "learning_rate": 0.00019999491865137808, - "loss": 46.0, - "step": 41980 - }, - { - "epoch": 3.2097406196838505, - "grad_norm": 0.00869984831660986, - "learning_rate": 0.00019999491840923532, - "loss": 46.0, - "step": 41981 - }, - { - "epoch": 3.2098170766672403, - "grad_norm": 0.002468184567987919, - "learning_rate": 0.00019999491816708676, - "loss": 46.0, - "step": 41982 - }, - { - "epoch": 3.20989353365063, - "grad_norm": 0.00302159134298563, - "learning_rate": 0.00019999491792493249, - "loss": 46.0, - "step": 41983 - }, - { - "epoch": 3.2099699906340193, - "grad_norm": 0.002297553699463606, - "learning_rate": 0.00019999491768277238, - "loss": 46.0, - "step": 41984 - }, - { - "epoch": 3.210046447617409, - "grad_norm": 0.0016759030986577272, - "learning_rate": 0.00019999491744060653, - "loss": 46.0, - "step": 41985 - }, - { - "epoch": 3.210122904600799, - "grad_norm": 0.0018621556228026748, - "learning_rate": 0.0001999949171984349, - "loss": 46.0, - "step": 41986 - }, - { - "epoch": 3.2101993615841886, - "grad_norm": 0.002524434356018901, - "learning_rate": 0.00019999491695625753, - "loss": 46.0, - "step": 41987 - }, - { - "epoch": 3.2102758185675784, - "grad_norm": 0.006962368730455637, - "learning_rate": 0.00019999491671407436, - "loss": 46.0, - "step": 41988 - }, - { - "epoch": 3.210352275550968, - "grad_norm": 0.0016402521869167686, - "learning_rate": 0.00019999491647188544, - "loss": 46.0, - "step": 41989 - }, - { - "epoch": 3.210428732534358, - "grad_norm": 0.0011914786882698536, - "learning_rate": 0.00019999491622969075, - "loss": 46.0, - "step": 41990 - }, - { - "epoch": 3.2105051895177477, - "grad_norm": 0.001277404255233705, - "learning_rate": 0.00019999491598749028, - "loss": 46.0, - "step": 41991 - }, - { - "epoch": 3.2105816465011374, - "grad_norm": 0.007648926228284836, - "learning_rate": 0.00019999491574528404, - "loss": 46.0, - "step": 41992 - }, - { - "epoch": 3.210658103484527, - "grad_norm": 0.003468940267339349, - "learning_rate": 0.00019999491550307203, - "loss": 46.0, - "step": 41993 - }, - { - "epoch": 3.210734560467917, - "grad_norm": 0.002637440338730812, - "learning_rate": 0.00019999491526085427, - "loss": 46.0, - "step": 41994 - }, - { - "epoch": 3.2108110174513063, - "grad_norm": 0.006943566258996725, - "learning_rate": 0.0001999949150186307, - "loss": 46.0, - "step": 41995 - }, - { - "epoch": 3.210887474434696, - "grad_norm": 0.0016487700631842017, - "learning_rate": 0.0001999949147764014, - "loss": 46.0, - "step": 41996 - }, - { - "epoch": 3.210963931418086, - "grad_norm": 0.0009402178111486137, - "learning_rate": 0.00019999491453416633, - "loss": 46.0, - "step": 41997 - }, - { - "epoch": 3.2110403884014755, - "grad_norm": 0.0019277287647128105, - "learning_rate": 0.00019999491429192548, - "loss": 46.0, - "step": 41998 - }, - { - "epoch": 3.2111168453848653, - "grad_norm": 0.0005230432725511491, - "learning_rate": 0.00019999491404967885, - "loss": 46.0, - "step": 41999 - }, - { - "epoch": 3.211193302368255, - "grad_norm": 0.004211823921650648, - "learning_rate": 0.00019999491380742645, - "loss": 46.0, - "step": 42000 - }, - { - "epoch": 3.211269759351645, - "grad_norm": 0.0016098804771900177, - "learning_rate": 0.00019999491356516834, - "loss": 46.0, - "step": 42001 - }, - { - "epoch": 3.2113462163350346, - "grad_norm": 0.001238106982782483, - "learning_rate": 0.0001999949133229044, - "loss": 46.0, - "step": 42002 - }, - { - "epoch": 3.2114226733184243, - "grad_norm": 0.0031190484296530485, - "learning_rate": 0.0001999949130806347, - "loss": 46.0, - "step": 42003 - }, - { - "epoch": 3.211499130301814, - "grad_norm": 0.0017370606074109674, - "learning_rate": 0.00019999491283835923, - "loss": 46.0, - "step": 42004 - }, - { - "epoch": 3.211575587285204, - "grad_norm": 0.003475408535450697, - "learning_rate": 0.000199994912596078, - "loss": 46.0, - "step": 42005 - }, - { - "epoch": 3.211652044268593, - "grad_norm": 0.0019139007199555635, - "learning_rate": 0.00019999491235379098, - "loss": 46.0, - "step": 42006 - }, - { - "epoch": 3.211728501251983, - "grad_norm": 0.0021670942660421133, - "learning_rate": 0.00019999491211149822, - "loss": 46.0, - "step": 42007 - }, - { - "epoch": 3.2118049582353727, - "grad_norm": 0.002825238974764943, - "learning_rate": 0.00019999491186919967, - "loss": 46.0, - "step": 42008 - }, - { - "epoch": 3.2118814152187625, - "grad_norm": 0.003197198733687401, - "learning_rate": 0.00019999491162689536, - "loss": 46.0, - "step": 42009 - }, - { - "epoch": 3.2119578722021522, - "grad_norm": 0.00154455890879035, - "learning_rate": 0.00019999491138458528, - "loss": 46.0, - "step": 42010 - }, - { - "epoch": 3.212034329185542, - "grad_norm": 0.001795478630810976, - "learning_rate": 0.00019999491114226943, - "loss": 46.0, - "step": 42011 - }, - { - "epoch": 3.2121107861689318, - "grad_norm": 0.0012015431420877576, - "learning_rate": 0.0001999949108999478, - "loss": 46.0, - "step": 42012 - }, - { - "epoch": 3.2121872431523215, - "grad_norm": 0.0034283860586583614, - "learning_rate": 0.00019999491065762043, - "loss": 46.0, - "step": 42013 - }, - { - "epoch": 3.2122637001357113, - "grad_norm": 0.0019157460192218423, - "learning_rate": 0.00019999491041528726, - "loss": 46.0, - "step": 42014 - }, - { - "epoch": 3.212340157119101, - "grad_norm": 0.0028663913253694773, - "learning_rate": 0.00019999491017294835, - "loss": 46.0, - "step": 42015 - }, - { - "epoch": 3.212416614102491, - "grad_norm": 0.0010831519030034542, - "learning_rate": 0.00019999490993060363, - "loss": 46.0, - "step": 42016 - }, - { - "epoch": 3.21249307108588, - "grad_norm": 0.0030344510450959206, - "learning_rate": 0.0001999949096882532, - "loss": 46.0, - "step": 42017 - }, - { - "epoch": 3.21256952806927, - "grad_norm": 0.001425539841875434, - "learning_rate": 0.00019999490944589695, - "loss": 46.0, - "step": 42018 - }, - { - "epoch": 3.2126459850526596, - "grad_norm": 0.0021982023026794195, - "learning_rate": 0.00019999490920353497, - "loss": 46.0, - "step": 42019 - }, - { - "epoch": 3.2127224420360494, - "grad_norm": 0.004483222961425781, - "learning_rate": 0.00019999490896116718, - "loss": 46.0, - "step": 42020 - }, - { - "epoch": 3.212798899019439, - "grad_norm": 0.0022205663844943047, - "learning_rate": 0.00019999490871879365, - "loss": 46.0, - "step": 42021 - }, - { - "epoch": 3.212875356002829, - "grad_norm": 0.0014647021889686584, - "learning_rate": 0.00019999490847641435, - "loss": 46.0, - "step": 42022 - }, - { - "epoch": 3.2129518129862187, - "grad_norm": 0.0020561611745506525, - "learning_rate": 0.00019999490823402924, - "loss": 46.0, - "step": 42023 - }, - { - "epoch": 3.2130282699696084, - "grad_norm": 0.003461114363744855, - "learning_rate": 0.0001999949079916384, - "loss": 46.0, - "step": 42024 - }, - { - "epoch": 3.213104726952998, - "grad_norm": 0.0027100222650915384, - "learning_rate": 0.0001999949077492418, - "loss": 46.0, - "step": 42025 - }, - { - "epoch": 3.213181183936388, - "grad_norm": 0.0026494513731449842, - "learning_rate": 0.00019999490750683937, - "loss": 46.0, - "step": 42026 - }, - { - "epoch": 3.2132576409197773, - "grad_norm": 0.0020694297272711992, - "learning_rate": 0.00019999490726443125, - "loss": 46.0, - "step": 42027 - }, - { - "epoch": 3.213334097903167, - "grad_norm": 0.0016173539916053414, - "learning_rate": 0.00019999490702201733, - "loss": 46.0, - "step": 42028 - }, - { - "epoch": 3.213410554886557, - "grad_norm": 0.0017572048818692565, - "learning_rate": 0.0001999949067795976, - "loss": 46.0, - "step": 42029 - }, - { - "epoch": 3.2134870118699466, - "grad_norm": 0.0018634614534676075, - "learning_rate": 0.00019999490653717215, - "loss": 46.0, - "step": 42030 - }, - { - "epoch": 3.2135634688533363, - "grad_norm": 0.001311220694333315, - "learning_rate": 0.0001999949062947409, - "loss": 46.0, - "step": 42031 - }, - { - "epoch": 3.213639925836726, - "grad_norm": 0.0012478322023525834, - "learning_rate": 0.0001999949060523039, - "loss": 46.0, - "step": 42032 - }, - { - "epoch": 3.213716382820116, - "grad_norm": 0.0024957098066806793, - "learning_rate": 0.00019999490580986114, - "loss": 46.0, - "step": 42033 - }, - { - "epoch": 3.2137928398035056, - "grad_norm": 0.0017277593724429607, - "learning_rate": 0.00019999490556741264, - "loss": 46.0, - "step": 42034 - }, - { - "epoch": 3.2138692967868954, - "grad_norm": 0.0016810082597658038, - "learning_rate": 0.0001999949053249583, - "loss": 46.0, - "step": 42035 - }, - { - "epoch": 3.213945753770285, - "grad_norm": 0.004137719515711069, - "learning_rate": 0.00019999490508249823, - "loss": 46.0, - "step": 42036 - }, - { - "epoch": 3.214022210753675, - "grad_norm": 0.0041664703749120235, - "learning_rate": 0.00019999490484003238, - "loss": 46.0, - "step": 42037 - }, - { - "epoch": 3.2140986677370647, - "grad_norm": 0.003994942642748356, - "learning_rate": 0.00019999490459756075, - "loss": 46.0, - "step": 42038 - }, - { - "epoch": 3.214175124720454, - "grad_norm": 0.003370902733877301, - "learning_rate": 0.00019999490435508336, - "loss": 46.0, - "step": 42039 - }, - { - "epoch": 3.2142515817038437, - "grad_norm": 0.0010797909926623106, - "learning_rate": 0.0001999949041126002, - "loss": 46.0, - "step": 42040 - }, - { - "epoch": 3.2143280386872335, - "grad_norm": 0.0027798833325505257, - "learning_rate": 0.0001999949038701113, - "loss": 46.0, - "step": 42041 - }, - { - "epoch": 3.2144044956706233, - "grad_norm": 0.0007769897929392755, - "learning_rate": 0.0001999949036276166, - "loss": 46.0, - "step": 42042 - }, - { - "epoch": 3.214480952654013, - "grad_norm": 0.0022532104048877954, - "learning_rate": 0.00019999490338511614, - "loss": 46.0, - "step": 42043 - }, - { - "epoch": 3.2145574096374028, - "grad_norm": 0.0012627977412194014, - "learning_rate": 0.0001999949031426099, - "loss": 46.0, - "step": 42044 - }, - { - "epoch": 3.2146338666207925, - "grad_norm": 0.006925684399902821, - "learning_rate": 0.0001999949029000979, - "loss": 46.0, - "step": 42045 - }, - { - "epoch": 3.2147103236041823, - "grad_norm": 0.0019622284453362226, - "learning_rate": 0.00019999490265758013, - "loss": 46.0, - "step": 42046 - }, - { - "epoch": 3.214786780587572, - "grad_norm": 0.003863739548251033, - "learning_rate": 0.0001999949024150566, - "loss": 46.0, - "step": 42047 - }, - { - "epoch": 3.214863237570962, - "grad_norm": 0.002014464931562543, - "learning_rate": 0.00019999490217252727, - "loss": 46.0, - "step": 42048 - }, - { - "epoch": 3.214939694554351, - "grad_norm": 0.002009489806368947, - "learning_rate": 0.00019999490192999222, - "loss": 46.0, - "step": 42049 - }, - { - "epoch": 3.215016151537741, - "grad_norm": 0.0006463376339524984, - "learning_rate": 0.00019999490168745137, - "loss": 46.0, - "step": 42050 - }, - { - "epoch": 3.2150926085211307, - "grad_norm": 0.003279470605775714, - "learning_rate": 0.00019999490144490475, - "loss": 46.0, - "step": 42051 - }, - { - "epoch": 3.2151690655045204, - "grad_norm": 0.0037217820063233376, - "learning_rate": 0.00019999490120235235, - "loss": 46.0, - "step": 42052 - }, - { - "epoch": 3.21524552248791, - "grad_norm": 0.0033905671443790197, - "learning_rate": 0.0001999949009597942, - "loss": 46.0, - "step": 42053 - }, - { - "epoch": 3.2153219794713, - "grad_norm": 0.0011346264509484172, - "learning_rate": 0.0001999949007172303, - "loss": 46.0, - "step": 42054 - }, - { - "epoch": 3.2153984364546897, - "grad_norm": 0.003642667317762971, - "learning_rate": 0.00019999490047466057, - "loss": 46.0, - "step": 42055 - }, - { - "epoch": 3.2154748934380795, - "grad_norm": 0.0014360713539645076, - "learning_rate": 0.00019999490023208514, - "loss": 46.0, - "step": 42056 - }, - { - "epoch": 3.2155513504214692, - "grad_norm": 0.0076574720442295074, - "learning_rate": 0.00019999489998950393, - "loss": 46.0, - "step": 42057 - }, - { - "epoch": 3.215627807404859, - "grad_norm": 0.0028948960825800896, - "learning_rate": 0.0001999948997469169, - "loss": 46.0, - "step": 42058 - }, - { - "epoch": 3.2157042643882487, - "grad_norm": 0.0017489342717453837, - "learning_rate": 0.00019999489950432416, - "loss": 46.0, - "step": 42059 - }, - { - "epoch": 3.2157807213716385, - "grad_norm": 0.010017432272434235, - "learning_rate": 0.0001999948992617256, - "loss": 46.0, - "step": 42060 - }, - { - "epoch": 3.215857178355028, - "grad_norm": 0.0032760468311607838, - "learning_rate": 0.0001999948990191213, - "loss": 46.0, - "step": 42061 - }, - { - "epoch": 3.2159336353384176, - "grad_norm": 0.002923940308392048, - "learning_rate": 0.00019999489877651122, - "loss": 46.0, - "step": 42062 - }, - { - "epoch": 3.2160100923218073, - "grad_norm": 0.0027671444695442915, - "learning_rate": 0.00019999489853389537, - "loss": 46.0, - "step": 42063 - }, - { - "epoch": 3.216086549305197, - "grad_norm": 0.0015004008309915662, - "learning_rate": 0.00019999489829127375, - "loss": 46.0, - "step": 42064 - }, - { - "epoch": 3.216163006288587, - "grad_norm": 0.005195599514991045, - "learning_rate": 0.00019999489804864638, - "loss": 46.0, - "step": 42065 - }, - { - "epoch": 3.2162394632719766, - "grad_norm": 0.0031378748826682568, - "learning_rate": 0.0001999948978060132, - "loss": 46.0, - "step": 42066 - }, - { - "epoch": 3.2163159202553664, - "grad_norm": 0.0006268033175729215, - "learning_rate": 0.00019999489756337432, - "loss": 46.0, - "step": 42067 - }, - { - "epoch": 3.216392377238756, - "grad_norm": 0.0023044419940561056, - "learning_rate": 0.0001999948973207296, - "loss": 46.0, - "step": 42068 - }, - { - "epoch": 3.216468834222146, - "grad_norm": 0.0016772350063547492, - "learning_rate": 0.00019999489707807913, - "loss": 46.0, - "step": 42069 - }, - { - "epoch": 3.2165452912055357, - "grad_norm": 0.0022749791387468576, - "learning_rate": 0.00019999489683542292, - "loss": 46.0, - "step": 42070 - }, - { - "epoch": 3.216621748188925, - "grad_norm": 0.0014040415408089757, - "learning_rate": 0.00019999489659276091, - "loss": 46.0, - "step": 42071 - }, - { - "epoch": 3.2166982051723148, - "grad_norm": 0.0021414763759821653, - "learning_rate": 0.00019999489635009313, - "loss": 46.0, - "step": 42072 - }, - { - "epoch": 3.2167746621557045, - "grad_norm": 0.0015099617885425687, - "learning_rate": 0.00019999489610741963, - "loss": 46.0, - "step": 42073 - }, - { - "epoch": 3.2168511191390943, - "grad_norm": 0.004244043957442045, - "learning_rate": 0.00019999489586474032, - "loss": 46.0, - "step": 42074 - }, - { - "epoch": 3.216927576122484, - "grad_norm": 0.0015712818130850792, - "learning_rate": 0.00019999489562205525, - "loss": 46.0, - "step": 42075 - }, - { - "epoch": 3.217004033105874, - "grad_norm": 0.0007181677501648664, - "learning_rate": 0.0001999948953793644, - "loss": 46.0, - "step": 42076 - }, - { - "epoch": 3.2170804900892636, - "grad_norm": 0.0011897190706804395, - "learning_rate": 0.00019999489513666777, - "loss": 46.0, - "step": 42077 - }, - { - "epoch": 3.2171569470726533, - "grad_norm": 0.0009469385840930045, - "learning_rate": 0.00019999489489396538, - "loss": 46.0, - "step": 42078 - }, - { - "epoch": 3.217233404056043, - "grad_norm": 0.0007228367030620575, - "learning_rate": 0.00019999489465125723, - "loss": 46.0, - "step": 42079 - }, - { - "epoch": 3.217309861039433, - "grad_norm": 0.004239616449922323, - "learning_rate": 0.00019999489440854332, - "loss": 46.0, - "step": 42080 - }, - { - "epoch": 3.2173863180228226, - "grad_norm": 0.010959330946207047, - "learning_rate": 0.00019999489416582363, - "loss": 46.0, - "step": 42081 - }, - { - "epoch": 3.2174627750062124, - "grad_norm": 0.004945164080709219, - "learning_rate": 0.00019999489392309817, - "loss": 46.0, - "step": 42082 - }, - { - "epoch": 3.2175392319896017, - "grad_norm": 0.002401743782684207, - "learning_rate": 0.00019999489368036693, - "loss": 46.0, - "step": 42083 - }, - { - "epoch": 3.2176156889729914, - "grad_norm": 0.0013879043981432915, - "learning_rate": 0.00019999489343762995, - "loss": 46.0, - "step": 42084 - }, - { - "epoch": 3.217692145956381, - "grad_norm": 0.00033975523547269404, - "learning_rate": 0.0001999948931948872, - "loss": 46.0, - "step": 42085 - }, - { - "epoch": 3.217768602939771, - "grad_norm": 0.0031126264948397875, - "learning_rate": 0.00019999489295213863, - "loss": 46.0, - "step": 42086 - }, - { - "epoch": 3.2178450599231607, - "grad_norm": 0.0019977290648967028, - "learning_rate": 0.00019999489270938433, - "loss": 46.0, - "step": 42087 - }, - { - "epoch": 3.2179215169065505, - "grad_norm": 0.004049096256494522, - "learning_rate": 0.00019999489246662425, - "loss": 46.0, - "step": 42088 - }, - { - "epoch": 3.2179979738899402, - "grad_norm": 0.005429015029221773, - "learning_rate": 0.0001999948922238584, - "loss": 46.0, - "step": 42089 - }, - { - "epoch": 3.21807443087333, - "grad_norm": 0.0015906282933428884, - "learning_rate": 0.00019999489198108678, - "loss": 46.0, - "step": 42090 - }, - { - "epoch": 3.2181508878567198, - "grad_norm": 0.004229491110891104, - "learning_rate": 0.00019999489173830944, - "loss": 46.0, - "step": 42091 - }, - { - "epoch": 3.2182273448401095, - "grad_norm": 0.0015959986485540867, - "learning_rate": 0.00019999489149552627, - "loss": 46.0, - "step": 42092 - }, - { - "epoch": 3.218303801823499, - "grad_norm": 0.0024031796492636204, - "learning_rate": 0.00019999489125273735, - "loss": 46.0, - "step": 42093 - }, - { - "epoch": 3.2183802588068886, - "grad_norm": 0.0017796535976231098, - "learning_rate": 0.00019999489100994264, - "loss": 46.0, - "step": 42094 - }, - { - "epoch": 3.2184567157902784, - "grad_norm": 0.002239165361970663, - "learning_rate": 0.0001999948907671422, - "loss": 46.0, - "step": 42095 - }, - { - "epoch": 3.218533172773668, - "grad_norm": 0.004304191097617149, - "learning_rate": 0.00019999489052433596, - "loss": 46.0, - "step": 42096 - }, - { - "epoch": 3.218609629757058, - "grad_norm": 0.00257976446300745, - "learning_rate": 0.00019999489028152398, - "loss": 46.0, - "step": 42097 - }, - { - "epoch": 3.2186860867404476, - "grad_norm": 0.0016116040060296655, - "learning_rate": 0.00019999489003870623, - "loss": 46.0, - "step": 42098 - }, - { - "epoch": 3.2187625437238374, - "grad_norm": 0.004151143599301577, - "learning_rate": 0.00019999488979588267, - "loss": 46.0, - "step": 42099 - }, - { - "epoch": 3.218839000707227, - "grad_norm": 0.001729447627440095, - "learning_rate": 0.00019999488955305337, - "loss": 46.0, - "step": 42100 - }, - { - "epoch": 3.218915457690617, - "grad_norm": 0.0008196085109375417, - "learning_rate": 0.0001999948893102183, - "loss": 46.0, - "step": 42101 - }, - { - "epoch": 3.2189919146740067, - "grad_norm": 0.001459177932702005, - "learning_rate": 0.00019999488906737744, - "loss": 46.0, - "step": 42102 - }, - { - "epoch": 3.2190683716573965, - "grad_norm": 0.005648829508572817, - "learning_rate": 0.00019999488882453085, - "loss": 46.0, - "step": 42103 - }, - { - "epoch": 3.2191448286407858, - "grad_norm": 0.0017308726673945785, - "learning_rate": 0.00019999488858167845, - "loss": 46.0, - "step": 42104 - }, - { - "epoch": 3.2192212856241755, - "grad_norm": 0.002907112240791321, - "learning_rate": 0.0001999948883388203, - "loss": 46.0, - "step": 42105 - }, - { - "epoch": 3.2192977426075653, - "grad_norm": 0.001044327742420137, - "learning_rate": 0.0001999948880959564, - "loss": 46.0, - "step": 42106 - }, - { - "epoch": 3.219374199590955, - "grad_norm": 0.0035064637195318937, - "learning_rate": 0.0001999948878530867, - "loss": 46.0, - "step": 42107 - }, - { - "epoch": 3.219450656574345, - "grad_norm": 0.004620145540684462, - "learning_rate": 0.00019999488761021124, - "loss": 46.0, - "step": 42108 - }, - { - "epoch": 3.2195271135577346, - "grad_norm": 0.001909983460791409, - "learning_rate": 0.00019999488736733, - "loss": 46.0, - "step": 42109 - }, - { - "epoch": 3.2196035705411243, - "grad_norm": 0.0024475122336298227, - "learning_rate": 0.00019999488712444302, - "loss": 46.0, - "step": 42110 - }, - { - "epoch": 3.219680027524514, - "grad_norm": 0.006110894028097391, - "learning_rate": 0.00019999488688155027, - "loss": 46.0, - "step": 42111 - }, - { - "epoch": 3.219756484507904, - "grad_norm": 0.0010518016060814261, - "learning_rate": 0.0001999948866386517, - "loss": 46.0, - "step": 42112 - }, - { - "epoch": 3.2198329414912936, - "grad_norm": 0.006876754574477673, - "learning_rate": 0.0001999948863957474, - "loss": 46.0, - "step": 42113 - }, - { - "epoch": 3.2199093984746834, - "grad_norm": 0.0023621413856744766, - "learning_rate": 0.00019999488615283736, - "loss": 46.0, - "step": 42114 - }, - { - "epoch": 3.2199858554580727, - "grad_norm": 0.0012257179478183389, - "learning_rate": 0.0001999948859099215, - "loss": 46.0, - "step": 42115 - }, - { - "epoch": 3.2200623124414625, - "grad_norm": 0.001808013767004013, - "learning_rate": 0.00019999488566699989, - "loss": 46.0, - "step": 42116 - }, - { - "epoch": 3.220138769424852, - "grad_norm": 0.0014598038978874683, - "learning_rate": 0.00019999488542407252, - "loss": 46.0, - "step": 42117 - }, - { - "epoch": 3.220215226408242, - "grad_norm": 0.006018663756549358, - "learning_rate": 0.00019999488518113935, - "loss": 46.0, - "step": 42118 - }, - { - "epoch": 3.2202916833916317, - "grad_norm": 0.006880896631628275, - "learning_rate": 0.00019999488493820043, - "loss": 46.0, - "step": 42119 - }, - { - "epoch": 3.2203681403750215, - "grad_norm": 0.002467251615598798, - "learning_rate": 0.00019999488469525577, - "loss": 46.0, - "step": 42120 - }, - { - "epoch": 3.2204445973584113, - "grad_norm": 0.002496525412425399, - "learning_rate": 0.0001999948844523053, - "loss": 46.0, - "step": 42121 - }, - { - "epoch": 3.220521054341801, - "grad_norm": 0.0009444904862903059, - "learning_rate": 0.00019999488420934907, - "loss": 46.0, - "step": 42122 - }, - { - "epoch": 3.220597511325191, - "grad_norm": 0.0016867815284058452, - "learning_rate": 0.0001999948839663871, - "loss": 46.0, - "step": 42123 - }, - { - "epoch": 3.2206739683085805, - "grad_norm": 0.0012590765254572034, - "learning_rate": 0.0001999948837234193, - "loss": 46.0, - "step": 42124 - }, - { - "epoch": 3.2207504252919703, - "grad_norm": 0.0014494657516479492, - "learning_rate": 0.00019999488348044578, - "loss": 46.0, - "step": 42125 - }, - { - "epoch": 3.2208268822753596, - "grad_norm": 0.001622830401174724, - "learning_rate": 0.00019999488323746648, - "loss": 46.0, - "step": 42126 - }, - { - "epoch": 3.2209033392587494, - "grad_norm": 0.0021084214095026255, - "learning_rate": 0.0001999948829944814, - "loss": 46.0, - "step": 42127 - }, - { - "epoch": 3.220979796242139, - "grad_norm": 0.001371025457046926, - "learning_rate": 0.00019999488275149055, - "loss": 46.0, - "step": 42128 - }, - { - "epoch": 3.221056253225529, - "grad_norm": 0.002170608611777425, - "learning_rate": 0.00019999488250849393, - "loss": 46.0, - "step": 42129 - }, - { - "epoch": 3.2211327102089187, - "grad_norm": 0.0008423345279879868, - "learning_rate": 0.00019999488226549156, - "loss": 46.0, - "step": 42130 - }, - { - "epoch": 3.2212091671923084, - "grad_norm": 0.0011878538643941283, - "learning_rate": 0.00019999488202248342, - "loss": 46.0, - "step": 42131 - }, - { - "epoch": 3.221285624175698, - "grad_norm": 0.002165592275559902, - "learning_rate": 0.0001999948817794695, - "loss": 46.0, - "step": 42132 - }, - { - "epoch": 3.221362081159088, - "grad_norm": 0.0011837874772027135, - "learning_rate": 0.00019999488153644982, - "loss": 46.0, - "step": 42133 - }, - { - "epoch": 3.2214385381424777, - "grad_norm": 0.002083104569464922, - "learning_rate": 0.00019999488129342436, - "loss": 46.0, - "step": 42134 - }, - { - "epoch": 3.2215149951258675, - "grad_norm": 0.00433348910883069, - "learning_rate": 0.00019999488105039315, - "loss": 46.0, - "step": 42135 - }, - { - "epoch": 3.2215914521092572, - "grad_norm": 0.001558369374834001, - "learning_rate": 0.00019999488080735614, - "loss": 46.0, - "step": 42136 - }, - { - "epoch": 3.2216679090926466, - "grad_norm": 0.0026920493692159653, - "learning_rate": 0.00019999488056431338, - "loss": 46.0, - "step": 42137 - }, - { - "epoch": 3.2217443660760363, - "grad_norm": 0.0011284475913271308, - "learning_rate": 0.00019999488032126483, - "loss": 46.0, - "step": 42138 - }, - { - "epoch": 3.221820823059426, - "grad_norm": 0.004418435040861368, - "learning_rate": 0.00019999488007821055, - "loss": 46.0, - "step": 42139 - }, - { - "epoch": 3.221897280042816, - "grad_norm": 0.004002754110842943, - "learning_rate": 0.00019999487983515048, - "loss": 46.0, - "step": 42140 - }, - { - "epoch": 3.2219737370262056, - "grad_norm": 0.007967714220285416, - "learning_rate": 0.00019999487959208466, - "loss": 46.0, - "step": 42141 - }, - { - "epoch": 3.2220501940095954, - "grad_norm": 0.0019889555405825377, - "learning_rate": 0.00019999487934901304, - "loss": 46.0, - "step": 42142 - }, - { - "epoch": 3.222126650992985, - "grad_norm": 0.0027892852667719126, - "learning_rate": 0.00019999487910593567, - "loss": 46.0, - "step": 42143 - }, - { - "epoch": 3.222203107976375, - "grad_norm": 0.004028509370982647, - "learning_rate": 0.0001999948788628525, - "loss": 46.0, - "step": 42144 - }, - { - "epoch": 3.2222795649597646, - "grad_norm": 0.0024777331855148077, - "learning_rate": 0.00019999487861976358, - "loss": 46.0, - "step": 42145 - }, - { - "epoch": 3.2223560219431544, - "grad_norm": 0.0018562150653451681, - "learning_rate": 0.0001999948783766689, - "loss": 46.0, - "step": 42146 - }, - { - "epoch": 3.222432478926544, - "grad_norm": 0.0015134194400161505, - "learning_rate": 0.00019999487813356846, - "loss": 46.0, - "step": 42147 - }, - { - "epoch": 3.2225089359099335, - "grad_norm": 0.002831601770594716, - "learning_rate": 0.0001999948778904622, - "loss": 46.0, - "step": 42148 - }, - { - "epoch": 3.2225853928933232, - "grad_norm": 0.003527930937707424, - "learning_rate": 0.00019999487764735024, - "loss": 46.0, - "step": 42149 - }, - { - "epoch": 3.222661849876713, - "grad_norm": 0.004277333617210388, - "learning_rate": 0.00019999487740423246, - "loss": 46.0, - "step": 42150 - }, - { - "epoch": 3.2227383068601028, - "grad_norm": 0.001274807145819068, - "learning_rate": 0.0001999948771611089, - "loss": 46.0, - "step": 42151 - }, - { - "epoch": 3.2228147638434925, - "grad_norm": 0.00401354068890214, - "learning_rate": 0.00019999487691797963, - "loss": 46.0, - "step": 42152 - }, - { - "epoch": 3.2228912208268823, - "grad_norm": 0.0032939906232059, - "learning_rate": 0.00019999487667484456, - "loss": 46.0, - "step": 42153 - }, - { - "epoch": 3.222967677810272, - "grad_norm": 0.0016236893134191632, - "learning_rate": 0.0001999948764317037, - "loss": 46.0, - "step": 42154 - }, - { - "epoch": 3.223044134793662, - "grad_norm": 0.0009794343495741487, - "learning_rate": 0.0001999948761885571, - "loss": 46.0, - "step": 42155 - }, - { - "epoch": 3.2231205917770516, - "grad_norm": 0.0020132572390139103, - "learning_rate": 0.00019999487594540475, - "loss": 46.0, - "step": 42156 - }, - { - "epoch": 3.2231970487604413, - "grad_norm": 0.0038490155711770058, - "learning_rate": 0.0001999948757022466, - "loss": 46.0, - "step": 42157 - }, - { - "epoch": 3.2232735057438306, - "grad_norm": 0.002637245459482074, - "learning_rate": 0.00019999487545908267, - "loss": 46.0, - "step": 42158 - }, - { - "epoch": 3.2233499627272204, - "grad_norm": 0.004638268146663904, - "learning_rate": 0.000199994875215913, - "loss": 46.0, - "step": 42159 - }, - { - "epoch": 3.22342641971061, - "grad_norm": 0.00341234402731061, - "learning_rate": 0.00019999487497273752, - "loss": 46.0, - "step": 42160 - }, - { - "epoch": 3.223502876694, - "grad_norm": 0.0012906150659546256, - "learning_rate": 0.0001999948747295563, - "loss": 46.0, - "step": 42161 - }, - { - "epoch": 3.2235793336773897, - "grad_norm": 0.008898580446839333, - "learning_rate": 0.00019999487448636933, - "loss": 46.0, - "step": 42162 - }, - { - "epoch": 3.2236557906607795, - "grad_norm": 0.003250592155382037, - "learning_rate": 0.00019999487424317655, - "loss": 46.0, - "step": 42163 - }, - { - "epoch": 3.223732247644169, - "grad_norm": 0.0007815755670890212, - "learning_rate": 0.00019999487399997802, - "loss": 46.0, - "step": 42164 - }, - { - "epoch": 3.223808704627559, - "grad_norm": 0.0019414139678701758, - "learning_rate": 0.00019999487375677372, - "loss": 46.0, - "step": 42165 - }, - { - "epoch": 3.2238851616109487, - "grad_norm": 0.0028887789230793715, - "learning_rate": 0.00019999487351356367, - "loss": 46.0, - "step": 42166 - }, - { - "epoch": 3.2239616185943385, - "grad_norm": 0.0013588375877588987, - "learning_rate": 0.00019999487327034782, - "loss": 46.0, - "step": 42167 - }, - { - "epoch": 3.2240380755777283, - "grad_norm": 0.0006988157401792705, - "learning_rate": 0.0001999948730271262, - "loss": 46.0, - "step": 42168 - }, - { - "epoch": 3.224114532561118, - "grad_norm": 0.0029352956917136908, - "learning_rate": 0.00019999487278389883, - "loss": 46.0, - "step": 42169 - }, - { - "epoch": 3.2241909895445073, - "grad_norm": 0.0019444705685600638, - "learning_rate": 0.0001999948725406657, - "loss": 46.0, - "step": 42170 - }, - { - "epoch": 3.224267446527897, - "grad_norm": 0.002626130823045969, - "learning_rate": 0.0001999948722974268, - "loss": 46.0, - "step": 42171 - }, - { - "epoch": 3.224343903511287, - "grad_norm": 0.002297281753271818, - "learning_rate": 0.0001999948720541821, - "loss": 46.0, - "step": 42172 - }, - { - "epoch": 3.2244203604946766, - "grad_norm": 0.006431537214666605, - "learning_rate": 0.00019999487181093163, - "loss": 46.0, - "step": 42173 - }, - { - "epoch": 3.2244968174780664, - "grad_norm": 0.0007628339226357639, - "learning_rate": 0.00019999487156767542, - "loss": 46.0, - "step": 42174 - }, - { - "epoch": 3.224573274461456, - "grad_norm": 0.0028832328971475363, - "learning_rate": 0.00019999487132441344, - "loss": 46.0, - "step": 42175 - }, - { - "epoch": 3.224649731444846, - "grad_norm": 0.0021674379240721464, - "learning_rate": 0.00019999487108114566, - "loss": 46.0, - "step": 42176 - }, - { - "epoch": 3.2247261884282357, - "grad_norm": 0.00283754407428205, - "learning_rate": 0.00019999487083787213, - "loss": 46.0, - "step": 42177 - }, - { - "epoch": 3.2248026454116254, - "grad_norm": 0.00285433535464108, - "learning_rate": 0.00019999487059459283, - "loss": 46.0, - "step": 42178 - }, - { - "epoch": 3.224879102395015, - "grad_norm": 0.0012359402608126402, - "learning_rate": 0.00019999487035130779, - "loss": 46.0, - "step": 42179 - }, - { - "epoch": 3.2249555593784045, - "grad_norm": 0.0007884434890002012, - "learning_rate": 0.00019999487010801694, - "loss": 46.0, - "step": 42180 - }, - { - "epoch": 3.2250320163617943, - "grad_norm": 0.0018642704235389829, - "learning_rate": 0.00019999486986472032, - "loss": 46.0, - "step": 42181 - }, - { - "epoch": 3.225108473345184, - "grad_norm": 0.0035679785069078207, - "learning_rate": 0.00019999486962141795, - "loss": 46.0, - "step": 42182 - }, - { - "epoch": 3.225184930328574, - "grad_norm": 0.002079169498756528, - "learning_rate": 0.0001999948693781098, - "loss": 46.0, - "step": 42183 - }, - { - "epoch": 3.2252613873119635, - "grad_norm": 0.0013938754564151168, - "learning_rate": 0.00019999486913479587, - "loss": 46.0, - "step": 42184 - }, - { - "epoch": 3.2253378442953533, - "grad_norm": 0.0016578654758632183, - "learning_rate": 0.0001999948688914762, - "loss": 46.0, - "step": 42185 - }, - { - "epoch": 3.225414301278743, - "grad_norm": 0.001905063516460359, - "learning_rate": 0.00019999486864815075, - "loss": 46.0, - "step": 42186 - }, - { - "epoch": 3.225490758262133, - "grad_norm": 0.0017836643382906914, - "learning_rate": 0.00019999486840481954, - "loss": 46.0, - "step": 42187 - }, - { - "epoch": 3.2255672152455226, - "grad_norm": 0.004136717412620783, - "learning_rate": 0.00019999486816148253, - "loss": 46.0, - "step": 42188 - }, - { - "epoch": 3.2256436722289124, - "grad_norm": 0.0014836541377007961, - "learning_rate": 0.00019999486791813978, - "loss": 46.0, - "step": 42189 - }, - { - "epoch": 3.225720129212302, - "grad_norm": 0.003220832906663418, - "learning_rate": 0.00019999486767479125, - "loss": 46.0, - "step": 42190 - }, - { - "epoch": 3.225796586195692, - "grad_norm": 0.002322359010577202, - "learning_rate": 0.00019999486743143695, - "loss": 46.0, - "step": 42191 - }, - { - "epoch": 3.225873043179081, - "grad_norm": 0.0027075516991317272, - "learning_rate": 0.0001999948671880769, - "loss": 46.0, - "step": 42192 - }, - { - "epoch": 3.225949500162471, - "grad_norm": 0.001571964705362916, - "learning_rate": 0.00019999486694471106, - "loss": 46.0, - "step": 42193 - }, - { - "epoch": 3.2260259571458607, - "grad_norm": 0.002495530992746353, - "learning_rate": 0.00019999486670133946, - "loss": 46.0, - "step": 42194 - }, - { - "epoch": 3.2261024141292505, - "grad_norm": 0.002889525843784213, - "learning_rate": 0.00019999486645796207, - "loss": 46.0, - "step": 42195 - }, - { - "epoch": 3.2261788711126402, - "grad_norm": 0.0024617034941911697, - "learning_rate": 0.00019999486621457893, - "loss": 46.0, - "step": 42196 - }, - { - "epoch": 3.22625532809603, - "grad_norm": 0.0069183907471597195, - "learning_rate": 0.00019999486597119002, - "loss": 46.0, - "step": 42197 - }, - { - "epoch": 3.2263317850794198, - "grad_norm": 0.0038808458484709263, - "learning_rate": 0.00019999486572779533, - "loss": 46.0, - "step": 42198 - }, - { - "epoch": 3.2264082420628095, - "grad_norm": 0.0013269230257719755, - "learning_rate": 0.00019999486548439487, - "loss": 46.0, - "step": 42199 - }, - { - "epoch": 3.2264846990461993, - "grad_norm": 0.0010129095753654838, - "learning_rate": 0.00019999486524098866, - "loss": 46.0, - "step": 42200 - }, - { - "epoch": 3.226561156029589, - "grad_norm": 0.0033050503116101027, - "learning_rate": 0.00019999486499757668, - "loss": 46.0, - "step": 42201 - }, - { - "epoch": 3.2266376130129784, - "grad_norm": 0.0014685180503875017, - "learning_rate": 0.00019999486475415893, - "loss": 46.0, - "step": 42202 - }, - { - "epoch": 3.226714069996368, - "grad_norm": 0.0012390861520543694, - "learning_rate": 0.0001999948645107354, - "loss": 46.0, - "step": 42203 - }, - { - "epoch": 3.226790526979758, - "grad_norm": 0.002818741835653782, - "learning_rate": 0.0001999948642673061, - "loss": 46.0, - "step": 42204 - }, - { - "epoch": 3.2268669839631476, - "grad_norm": 0.005971470382064581, - "learning_rate": 0.00019999486402387106, - "loss": 46.0, - "step": 42205 - }, - { - "epoch": 3.2269434409465374, - "grad_norm": 0.0012805100996047258, - "learning_rate": 0.0001999948637804302, - "loss": 46.0, - "step": 42206 - }, - { - "epoch": 3.227019897929927, - "grad_norm": 0.0034617858473211527, - "learning_rate": 0.0001999948635369836, - "loss": 46.0, - "step": 42207 - }, - { - "epoch": 3.227096354913317, - "grad_norm": 0.0018793948693200946, - "learning_rate": 0.00019999486329353122, - "loss": 46.0, - "step": 42208 - }, - { - "epoch": 3.2271728118967067, - "grad_norm": 0.004829760175198317, - "learning_rate": 0.00019999486305007305, - "loss": 46.0, - "step": 42209 - }, - { - "epoch": 3.2272492688800964, - "grad_norm": 0.004632372409105301, - "learning_rate": 0.00019999486280660914, - "loss": 46.0, - "step": 42210 - }, - { - "epoch": 3.227325725863486, - "grad_norm": 0.005238438956439495, - "learning_rate": 0.00019999486256313945, - "loss": 46.0, - "step": 42211 - }, - { - "epoch": 3.227402182846876, - "grad_norm": 0.0007987130084075034, - "learning_rate": 0.00019999486231966405, - "loss": 46.0, - "step": 42212 - }, - { - "epoch": 3.2274786398302657, - "grad_norm": 0.001328629325143993, - "learning_rate": 0.00019999486207618282, - "loss": 46.0, - "step": 42213 - }, - { - "epoch": 3.227555096813655, - "grad_norm": 0.001736931735649705, - "learning_rate": 0.00019999486183269584, - "loss": 46.0, - "step": 42214 - }, - { - "epoch": 3.227631553797045, - "grad_norm": 0.013588810339570045, - "learning_rate": 0.00019999486158920306, - "loss": 46.0, - "step": 42215 - }, - { - "epoch": 3.2277080107804346, - "grad_norm": 0.005290184635668993, - "learning_rate": 0.00019999486134570453, - "loss": 46.0, - "step": 42216 - }, - { - "epoch": 3.2277844677638243, - "grad_norm": 0.0028071911074221134, - "learning_rate": 0.00019999486110220023, - "loss": 46.0, - "step": 42217 - }, - { - "epoch": 3.227860924747214, - "grad_norm": 0.0028559057973325253, - "learning_rate": 0.00019999486085869018, - "loss": 46.0, - "step": 42218 - }, - { - "epoch": 3.227937381730604, - "grad_norm": 0.0024556356947869062, - "learning_rate": 0.00019999486061517434, - "loss": 46.0, - "step": 42219 - }, - { - "epoch": 3.2280138387139936, - "grad_norm": 0.004364097956568003, - "learning_rate": 0.00019999486037165274, - "loss": 46.0, - "step": 42220 - }, - { - "epoch": 3.2280902956973834, - "grad_norm": 0.003711540484800935, - "learning_rate": 0.00019999486012812535, - "loss": 46.0, - "step": 42221 - }, - { - "epoch": 3.228166752680773, - "grad_norm": 0.0024851011112332344, - "learning_rate": 0.0001999948598845922, - "loss": 46.0, - "step": 42222 - }, - { - "epoch": 3.228243209664163, - "grad_norm": 0.003588085062801838, - "learning_rate": 0.00019999485964105332, - "loss": 46.0, - "step": 42223 - }, - { - "epoch": 3.228319666647552, - "grad_norm": 0.0034592694137245417, - "learning_rate": 0.00019999485939750864, - "loss": 46.0, - "step": 42224 - }, - { - "epoch": 3.228396123630942, - "grad_norm": 0.0009275093325413764, - "learning_rate": 0.00019999485915395818, - "loss": 46.0, - "step": 42225 - }, - { - "epoch": 3.2284725806143317, - "grad_norm": 0.002045780187472701, - "learning_rate": 0.00019999485891040197, - "loss": 46.0, - "step": 42226 - }, - { - "epoch": 3.2285490375977215, - "grad_norm": 0.0020252184476703405, - "learning_rate": 0.00019999485866683997, - "loss": 46.0, - "step": 42227 - }, - { - "epoch": 3.2286254945811113, - "grad_norm": 0.001736140577122569, - "learning_rate": 0.0001999948584232722, - "loss": 46.0, - "step": 42228 - }, - { - "epoch": 3.228701951564501, - "grad_norm": 0.007463186979293823, - "learning_rate": 0.0001999948581796987, - "loss": 46.0, - "step": 42229 - }, - { - "epoch": 3.2287784085478908, - "grad_norm": 0.003951418679207563, - "learning_rate": 0.00019999485793611941, - "loss": 46.0, - "step": 42230 - }, - { - "epoch": 3.2288548655312805, - "grad_norm": 0.0013803723268210888, - "learning_rate": 0.00019999485769253434, - "loss": 46.0, - "step": 42231 - }, - { - "epoch": 3.2289313225146703, - "grad_norm": 0.007881506346166134, - "learning_rate": 0.0001999948574489435, - "loss": 46.0, - "step": 42232 - }, - { - "epoch": 3.22900777949806, - "grad_norm": 0.002667024964466691, - "learning_rate": 0.0001999948572053469, - "loss": 46.0, - "step": 42233 - }, - { - "epoch": 3.22908423648145, - "grad_norm": 0.0018889637431129813, - "learning_rate": 0.0001999948569617445, - "loss": 46.0, - "step": 42234 - }, - { - "epoch": 3.229160693464839, - "grad_norm": 0.0023049158044159412, - "learning_rate": 0.00019999485671813637, - "loss": 46.0, - "step": 42235 - }, - { - "epoch": 3.229237150448229, - "grad_norm": 0.0022834099363535643, - "learning_rate": 0.00019999485647452249, - "loss": 46.0, - "step": 42236 - }, - { - "epoch": 3.2293136074316187, - "grad_norm": 0.000989936525002122, - "learning_rate": 0.0001999948562309028, - "loss": 46.0, - "step": 42237 - }, - { - "epoch": 3.2293900644150084, - "grad_norm": 0.005748219322413206, - "learning_rate": 0.00019999485598727734, - "loss": 46.0, - "step": 42238 - }, - { - "epoch": 3.229466521398398, - "grad_norm": 0.0011627700878307223, - "learning_rate": 0.00019999485574364613, - "loss": 46.0, - "step": 42239 - }, - { - "epoch": 3.229542978381788, - "grad_norm": 0.005285852123051882, - "learning_rate": 0.00019999485550000915, - "loss": 46.0, - "step": 42240 - }, - { - "epoch": 3.2296194353651777, - "grad_norm": 0.002274313708767295, - "learning_rate": 0.00019999485525636638, - "loss": 46.0, - "step": 42241 - }, - { - "epoch": 3.2296958923485675, - "grad_norm": 0.001398901455104351, - "learning_rate": 0.00019999485501271788, - "loss": 46.0, - "step": 42242 - }, - { - "epoch": 3.2297723493319572, - "grad_norm": 0.0008692287956364453, - "learning_rate": 0.00019999485476906358, - "loss": 46.0, - "step": 42243 - }, - { - "epoch": 3.229848806315347, - "grad_norm": 0.0049063218757510185, - "learning_rate": 0.00019999485452540353, - "loss": 46.0, - "step": 42244 - }, - { - "epoch": 3.2299252632987367, - "grad_norm": 0.0018107844516634941, - "learning_rate": 0.00019999485428173769, - "loss": 46.0, - "step": 42245 - }, - { - "epoch": 3.230001720282126, - "grad_norm": 0.002421931829303503, - "learning_rate": 0.00019999485403806607, - "loss": 46.0, - "step": 42246 - }, - { - "epoch": 3.230078177265516, - "grad_norm": 0.002113932278007269, - "learning_rate": 0.00019999485379438873, - "loss": 46.0, - "step": 42247 - }, - { - "epoch": 3.2301546342489056, - "grad_norm": 0.004734083078801632, - "learning_rate": 0.00019999485355070556, - "loss": 46.0, - "step": 42248 - }, - { - "epoch": 3.2302310912322953, - "grad_norm": 0.001262399833649397, - "learning_rate": 0.00019999485330701665, - "loss": 46.0, - "step": 42249 - }, - { - "epoch": 3.230307548215685, - "grad_norm": 0.004366319160908461, - "learning_rate": 0.00019999485306332196, - "loss": 46.0, - "step": 42250 - }, - { - "epoch": 3.230384005199075, - "grad_norm": 0.0025451062247157097, - "learning_rate": 0.0001999948528196215, - "loss": 46.0, - "step": 42251 - }, - { - "epoch": 3.2304604621824646, - "grad_norm": 0.0019479583716019988, - "learning_rate": 0.0001999948525759153, - "loss": 46.0, - "step": 42252 - }, - { - "epoch": 3.2305369191658544, - "grad_norm": 0.0019829554948955774, - "learning_rate": 0.00019999485233220332, - "loss": 46.0, - "step": 42253 - }, - { - "epoch": 3.230613376149244, - "grad_norm": 0.0038967791479080915, - "learning_rate": 0.00019999485208848557, - "loss": 46.0, - "step": 42254 - }, - { - "epoch": 3.230689833132634, - "grad_norm": 0.001703653484582901, - "learning_rate": 0.00019999485184476204, - "loss": 46.0, - "step": 42255 - }, - { - "epoch": 3.2307662901160237, - "grad_norm": 0.0012313113547861576, - "learning_rate": 0.00019999485160103274, - "loss": 46.0, - "step": 42256 - }, - { - "epoch": 3.230842747099413, - "grad_norm": 0.0022776711266487837, - "learning_rate": 0.0001999948513572977, - "loss": 46.0, - "step": 42257 - }, - { - "epoch": 3.2309192040828028, - "grad_norm": 0.0036595072597265244, - "learning_rate": 0.00019999485111355685, - "loss": 46.0, - "step": 42258 - }, - { - "epoch": 3.2309956610661925, - "grad_norm": 0.002467468148097396, - "learning_rate": 0.00019999485086981026, - "loss": 46.0, - "step": 42259 - }, - { - "epoch": 3.2310721180495823, - "grad_norm": 0.003750624367967248, - "learning_rate": 0.00019999485062605787, - "loss": 46.0, - "step": 42260 - }, - { - "epoch": 3.231148575032972, - "grad_norm": 0.00116166437510401, - "learning_rate": 0.00019999485038229976, - "loss": 46.0, - "step": 42261 - }, - { - "epoch": 3.231225032016362, - "grad_norm": 0.0011302041821181774, - "learning_rate": 0.00019999485013853585, - "loss": 46.0, - "step": 42262 - }, - { - "epoch": 3.2313014889997516, - "grad_norm": 0.0030286142136901617, - "learning_rate": 0.0001999948498947662, - "loss": 46.0, - "step": 42263 - }, - { - "epoch": 3.2313779459831413, - "grad_norm": 0.003529604757204652, - "learning_rate": 0.0001999948496509907, - "loss": 46.0, - "step": 42264 - }, - { - "epoch": 3.231454402966531, - "grad_norm": 0.001110009616240859, - "learning_rate": 0.00019999484940720952, - "loss": 46.0, - "step": 42265 - }, - { - "epoch": 3.231530859949921, - "grad_norm": 0.011706104502081871, - "learning_rate": 0.00019999484916342252, - "loss": 46.0, - "step": 42266 - }, - { - "epoch": 3.2316073169333106, - "grad_norm": 0.002430995460599661, - "learning_rate": 0.00019999484891962977, - "loss": 46.0, - "step": 42267 - }, - { - "epoch": 3.2316837739167, - "grad_norm": 0.0028907330706715584, - "learning_rate": 0.0001999948486758312, - "loss": 46.0, - "step": 42268 - }, - { - "epoch": 3.2317602309000897, - "grad_norm": 0.0011207496281713247, - "learning_rate": 0.00019999484843202697, - "loss": 46.0, - "step": 42269 - }, - { - "epoch": 3.2318366878834794, - "grad_norm": 0.0014401569496840239, - "learning_rate": 0.00019999484818821687, - "loss": 46.0, - "step": 42270 - }, - { - "epoch": 3.231913144866869, - "grad_norm": 0.0014576904941350222, - "learning_rate": 0.00019999484794440105, - "loss": 46.0, - "step": 42271 - }, - { - "epoch": 3.231989601850259, - "grad_norm": 0.0020687333308160305, - "learning_rate": 0.00019999484770057946, - "loss": 46.0, - "step": 42272 - }, - { - "epoch": 3.2320660588336487, - "grad_norm": 0.0014395163161680102, - "learning_rate": 0.00019999484745675207, - "loss": 46.0, - "step": 42273 - }, - { - "epoch": 3.2321425158170385, - "grad_norm": 0.0031149149872362614, - "learning_rate": 0.00019999484721291896, - "loss": 46.0, - "step": 42274 - }, - { - "epoch": 3.2322189728004282, - "grad_norm": 0.001901002717204392, - "learning_rate": 0.00019999484696908004, - "loss": 46.0, - "step": 42275 - }, - { - "epoch": 3.232295429783818, - "grad_norm": 0.0023491359315812588, - "learning_rate": 0.00019999484672523536, - "loss": 46.0, - "step": 42276 - }, - { - "epoch": 3.2323718867672078, - "grad_norm": 0.001346286735497415, - "learning_rate": 0.0001999948464813849, - "loss": 46.0, - "step": 42277 - }, - { - "epoch": 3.2324483437505975, - "grad_norm": 0.0024885758757591248, - "learning_rate": 0.00019999484623752872, - "loss": 46.0, - "step": 42278 - }, - { - "epoch": 3.232524800733987, - "grad_norm": 0.0045378971844911575, - "learning_rate": 0.00019999484599366675, - "loss": 46.0, - "step": 42279 - }, - { - "epoch": 3.2326012577173766, - "grad_norm": 0.001267250976525247, - "learning_rate": 0.00019999484574979897, - "loss": 46.0, - "step": 42280 - }, - { - "epoch": 3.2326777147007664, - "grad_norm": 0.0010063250083476305, - "learning_rate": 0.00019999484550592544, - "loss": 46.0, - "step": 42281 - }, - { - "epoch": 3.232754171684156, - "grad_norm": 0.005992132239043713, - "learning_rate": 0.00019999484526204614, - "loss": 46.0, - "step": 42282 - }, - { - "epoch": 3.232830628667546, - "grad_norm": 0.00469317426905036, - "learning_rate": 0.0001999948450181611, - "loss": 46.0, - "step": 42283 - }, - { - "epoch": 3.2329070856509357, - "grad_norm": 0.0036912125069648027, - "learning_rate": 0.00019999484477427025, - "loss": 46.0, - "step": 42284 - }, - { - "epoch": 3.2329835426343254, - "grad_norm": 0.0022965981625020504, - "learning_rate": 0.00019999484453037366, - "loss": 46.0, - "step": 42285 - }, - { - "epoch": 3.233059999617715, - "grad_norm": 0.0011684517376124859, - "learning_rate": 0.00019999484428647127, - "loss": 46.0, - "step": 42286 - }, - { - "epoch": 3.233136456601105, - "grad_norm": 0.00434738676995039, - "learning_rate": 0.00019999484404256316, - "loss": 46.0, - "step": 42287 - }, - { - "epoch": 3.2332129135844947, - "grad_norm": 0.0029134086798876524, - "learning_rate": 0.00019999484379864925, - "loss": 46.0, - "step": 42288 - }, - { - "epoch": 3.233289370567884, - "grad_norm": 0.0018011295469477773, - "learning_rate": 0.0001999948435547296, - "loss": 46.0, - "step": 42289 - }, - { - "epoch": 3.2333658275512738, - "grad_norm": 0.002436230657622218, - "learning_rate": 0.00019999484331080413, - "loss": 46.0, - "step": 42290 - }, - { - "epoch": 3.2334422845346635, - "grad_norm": 0.0015517380088567734, - "learning_rate": 0.00019999484306687293, - "loss": 46.0, - "step": 42291 - }, - { - "epoch": 3.2335187415180533, - "grad_norm": 0.0016480621416121721, - "learning_rate": 0.00019999484282293595, - "loss": 46.0, - "step": 42292 - }, - { - "epoch": 3.233595198501443, - "grad_norm": 0.0010706520406529307, - "learning_rate": 0.0001999948425789932, - "loss": 46.0, - "step": 42293 - }, - { - "epoch": 3.233671655484833, - "grad_norm": 0.003256821306422353, - "learning_rate": 0.00019999484233504467, - "loss": 46.0, - "step": 42294 - }, - { - "epoch": 3.2337481124682226, - "grad_norm": 0.003731667762622237, - "learning_rate": 0.00019999484209109038, - "loss": 46.0, - "step": 42295 - }, - { - "epoch": 3.2338245694516123, - "grad_norm": 0.0026174504309892654, - "learning_rate": 0.00019999484184713033, - "loss": 46.0, - "step": 42296 - }, - { - "epoch": 3.233901026435002, - "grad_norm": 0.000942641228903085, - "learning_rate": 0.0001999948416031645, - "loss": 46.0, - "step": 42297 - }, - { - "epoch": 3.233977483418392, - "grad_norm": 0.0028174982871860266, - "learning_rate": 0.0001999948413591929, - "loss": 46.0, - "step": 42298 - }, - { - "epoch": 3.2340539404017816, - "grad_norm": 0.004107377491891384, - "learning_rate": 0.00019999484111521553, - "loss": 46.0, - "step": 42299 - }, - { - "epoch": 3.2341303973851714, - "grad_norm": 0.002227388322353363, - "learning_rate": 0.0001999948408712324, - "loss": 46.0, - "step": 42300 - }, - { - "epoch": 3.2342068543685607, - "grad_norm": 0.0031873807311058044, - "learning_rate": 0.00019999484062724348, - "loss": 46.0, - "step": 42301 - }, - { - "epoch": 3.2342833113519505, - "grad_norm": 0.0014232448302209377, - "learning_rate": 0.0001999948403832488, - "loss": 46.0, - "step": 42302 - }, - { - "epoch": 3.2343597683353402, - "grad_norm": 0.005269214510917664, - "learning_rate": 0.00019999484013924837, - "loss": 46.0, - "step": 42303 - }, - { - "epoch": 3.23443622531873, - "grad_norm": 0.0007990125450305641, - "learning_rate": 0.00019999483989524214, - "loss": 46.0, - "step": 42304 - }, - { - "epoch": 3.2345126823021197, - "grad_norm": 0.005071209277957678, - "learning_rate": 0.00019999483965123016, - "loss": 46.0, - "step": 42305 - }, - { - "epoch": 3.2345891392855095, - "grad_norm": 0.0007935185567475855, - "learning_rate": 0.0001999948394072124, - "loss": 46.0, - "step": 42306 - }, - { - "epoch": 3.2346655962688993, - "grad_norm": 0.0021422707941383123, - "learning_rate": 0.0001999948391631889, - "loss": 46.0, - "step": 42307 - }, - { - "epoch": 3.234742053252289, - "grad_norm": 0.0008184100151993334, - "learning_rate": 0.00019999483891915961, - "loss": 46.0, - "step": 42308 - }, - { - "epoch": 3.234818510235679, - "grad_norm": 0.0026160688139498234, - "learning_rate": 0.00019999483867512457, - "loss": 46.0, - "step": 42309 - }, - { - "epoch": 3.2348949672190686, - "grad_norm": 0.0016637092921882868, - "learning_rate": 0.00019999483843108373, - "loss": 46.0, - "step": 42310 - }, - { - "epoch": 3.234971424202458, - "grad_norm": 0.0014725946821272373, - "learning_rate": 0.00019999483818703714, - "loss": 46.0, - "step": 42311 - }, - { - "epoch": 3.2350478811858476, - "grad_norm": 0.003380876500159502, - "learning_rate": 0.00019999483794298474, - "loss": 46.0, - "step": 42312 - }, - { - "epoch": 3.2351243381692374, - "grad_norm": 0.0012476126430556178, - "learning_rate": 0.00019999483769892663, - "loss": 46.0, - "step": 42313 - }, - { - "epoch": 3.235200795152627, - "grad_norm": 0.0018260172801092267, - "learning_rate": 0.00019999483745486272, - "loss": 46.0, - "step": 42314 - }, - { - "epoch": 3.235277252136017, - "grad_norm": 0.0021838087122887373, - "learning_rate": 0.00019999483721079307, - "loss": 46.0, - "step": 42315 - }, - { - "epoch": 3.2353537091194067, - "grad_norm": 0.002953731920570135, - "learning_rate": 0.00019999483696671758, - "loss": 46.0, - "step": 42316 - }, - { - "epoch": 3.2354301661027964, - "grad_norm": 0.015879008919000626, - "learning_rate": 0.0001999948367226364, - "loss": 46.0, - "step": 42317 - }, - { - "epoch": 3.235506623086186, - "grad_norm": 0.004051321651786566, - "learning_rate": 0.00019999483647854943, - "loss": 46.0, - "step": 42318 - }, - { - "epoch": 3.235583080069576, - "grad_norm": 0.001257934607565403, - "learning_rate": 0.00019999483623445668, - "loss": 46.0, - "step": 42319 - }, - { - "epoch": 3.2356595370529657, - "grad_norm": 0.0026651183143258095, - "learning_rate": 0.00019999483599035815, - "loss": 46.0, - "step": 42320 - }, - { - "epoch": 3.2357359940363555, - "grad_norm": 0.003077635308727622, - "learning_rate": 0.00019999483574625386, - "loss": 46.0, - "step": 42321 - }, - { - "epoch": 3.2358124510197452, - "grad_norm": 0.0015535916900262237, - "learning_rate": 0.00019999483550214378, - "loss": 46.0, - "step": 42322 - }, - { - "epoch": 3.2358889080031346, - "grad_norm": 0.0013542391825467348, - "learning_rate": 0.00019999483525802797, - "loss": 46.0, - "step": 42323 - }, - { - "epoch": 3.2359653649865243, - "grad_norm": 0.0022651380859315395, - "learning_rate": 0.00019999483501390638, - "loss": 46.0, - "step": 42324 - }, - { - "epoch": 3.236041821969914, - "grad_norm": 0.0038192470092326403, - "learning_rate": 0.00019999483476977901, - "loss": 46.0, - "step": 42325 - }, - { - "epoch": 3.236118278953304, - "grad_norm": 0.0011043907143175602, - "learning_rate": 0.0001999948345256459, - "loss": 46.0, - "step": 42326 - }, - { - "epoch": 3.2361947359366936, - "grad_norm": 0.003787618363276124, - "learning_rate": 0.00019999483428150697, - "loss": 46.0, - "step": 42327 - }, - { - "epoch": 3.2362711929200834, - "grad_norm": 0.0033555596601217985, - "learning_rate": 0.0001999948340373623, - "loss": 46.0, - "step": 42328 - }, - { - "epoch": 3.236347649903473, - "grad_norm": 0.0008274125284515321, - "learning_rate": 0.00019999483379321188, - "loss": 46.0, - "step": 42329 - }, - { - "epoch": 3.236424106886863, - "grad_norm": 0.0022448317613452673, - "learning_rate": 0.00019999483354905565, - "loss": 46.0, - "step": 42330 - }, - { - "epoch": 3.2365005638702526, - "grad_norm": 0.002428629668429494, - "learning_rate": 0.00019999483330489367, - "loss": 46.0, - "step": 42331 - }, - { - "epoch": 3.2365770208536424, - "grad_norm": 0.0016676083905622363, - "learning_rate": 0.00019999483306072592, - "loss": 46.0, - "step": 42332 - }, - { - "epoch": 3.2366534778370317, - "grad_norm": 0.0008506293524987996, - "learning_rate": 0.0001999948328165524, - "loss": 46.0, - "step": 42333 - }, - { - "epoch": 3.2367299348204215, - "grad_norm": 0.0022451188415288925, - "learning_rate": 0.0001999948325723731, - "loss": 46.0, - "step": 42334 - }, - { - "epoch": 3.2368063918038112, - "grad_norm": 0.0033403458073735237, - "learning_rate": 0.00019999483232818806, - "loss": 46.0, - "step": 42335 - }, - { - "epoch": 3.236882848787201, - "grad_norm": 0.0023838765919208527, - "learning_rate": 0.00019999483208399724, - "loss": 46.0, - "step": 42336 - }, - { - "epoch": 3.2369593057705908, - "grad_norm": 0.0033212322741746902, - "learning_rate": 0.00019999483183980062, - "loss": 46.0, - "step": 42337 - }, - { - "epoch": 3.2370357627539805, - "grad_norm": 0.002907288260757923, - "learning_rate": 0.00019999483159559826, - "loss": 46.0, - "step": 42338 - }, - { - "epoch": 3.2371122197373703, - "grad_norm": 0.004235134460031986, - "learning_rate": 0.00019999483135139015, - "loss": 46.0, - "step": 42339 - }, - { - "epoch": 3.23718867672076, - "grad_norm": 0.0015518086729571223, - "learning_rate": 0.00019999483110717624, - "loss": 46.0, - "step": 42340 - }, - { - "epoch": 3.23726513370415, - "grad_norm": 0.002651684219017625, - "learning_rate": 0.00019999483086295658, - "loss": 46.0, - "step": 42341 - }, - { - "epoch": 3.2373415906875396, - "grad_norm": 0.0051034241914749146, - "learning_rate": 0.00019999483061873113, - "loss": 46.0, - "step": 42342 - }, - { - "epoch": 3.2374180476709293, - "grad_norm": 0.00301416520960629, - "learning_rate": 0.00019999483037449992, - "loss": 46.0, - "step": 42343 - }, - { - "epoch": 3.237494504654319, - "grad_norm": 0.0019627090077847242, - "learning_rate": 0.00019999483013026295, - "loss": 46.0, - "step": 42344 - }, - { - "epoch": 3.2375709616377084, - "grad_norm": 0.003421587636694312, - "learning_rate": 0.0001999948298860202, - "loss": 46.0, - "step": 42345 - }, - { - "epoch": 3.237647418621098, - "grad_norm": 0.002241461304947734, - "learning_rate": 0.00019999482964177167, - "loss": 46.0, - "step": 42346 - }, - { - "epoch": 3.237723875604488, - "grad_norm": 0.005922417622059584, - "learning_rate": 0.0001999948293975174, - "loss": 46.0, - "step": 42347 - }, - { - "epoch": 3.2378003325878777, - "grad_norm": 0.0024471248034387827, - "learning_rate": 0.00019999482915325733, - "loss": 46.0, - "step": 42348 - }, - { - "epoch": 3.2378767895712675, - "grad_norm": 0.0012023499002680182, - "learning_rate": 0.00019999482890899152, - "loss": 46.0, - "step": 42349 - }, - { - "epoch": 3.237953246554657, - "grad_norm": 0.001696709543466568, - "learning_rate": 0.00019999482866471993, - "loss": 46.0, - "step": 42350 - }, - { - "epoch": 3.238029703538047, - "grad_norm": 0.003926475532352924, - "learning_rate": 0.00019999482842044254, - "loss": 46.0, - "step": 42351 - }, - { - "epoch": 3.2381061605214367, - "grad_norm": 0.0016383574111387134, - "learning_rate": 0.00019999482817615943, - "loss": 46.0, - "step": 42352 - }, - { - "epoch": 3.2381826175048265, - "grad_norm": 0.0006443076417781413, - "learning_rate": 0.00019999482793187052, - "loss": 46.0, - "step": 42353 - }, - { - "epoch": 3.2382590744882163, - "grad_norm": 0.002059386810287833, - "learning_rate": 0.00019999482768757584, - "loss": 46.0, - "step": 42354 - }, - { - "epoch": 3.2383355314716056, - "grad_norm": 0.003143174108117819, - "learning_rate": 0.0001999948274432754, - "loss": 46.0, - "step": 42355 - }, - { - "epoch": 3.2384119884549953, - "grad_norm": 0.004605344031006098, - "learning_rate": 0.0001999948271989692, - "loss": 46.0, - "step": 42356 - }, - { - "epoch": 3.238488445438385, - "grad_norm": 0.001875328947789967, - "learning_rate": 0.0001999948269546572, - "loss": 46.0, - "step": 42357 - }, - { - "epoch": 3.238564902421775, - "grad_norm": 0.002174764173105359, - "learning_rate": 0.00019999482671033948, - "loss": 46.0, - "step": 42358 - }, - { - "epoch": 3.2386413594051646, - "grad_norm": 0.001651904545724392, - "learning_rate": 0.00019999482646601595, - "loss": 46.0, - "step": 42359 - }, - { - "epoch": 3.2387178163885544, - "grad_norm": 0.0035280934534966946, - "learning_rate": 0.00019999482622168669, - "loss": 46.0, - "step": 42360 - }, - { - "epoch": 3.238794273371944, - "grad_norm": 0.002604069886729121, - "learning_rate": 0.00019999482597735162, - "loss": 46.0, - "step": 42361 - }, - { - "epoch": 3.238870730355334, - "grad_norm": 0.0025517635513097048, - "learning_rate": 0.00019999482573301077, - "loss": 46.0, - "step": 42362 - }, - { - "epoch": 3.2389471873387237, - "grad_norm": 0.0016855899011716247, - "learning_rate": 0.00019999482548866418, - "loss": 46.0, - "step": 42363 - }, - { - "epoch": 3.2390236443221134, - "grad_norm": 0.0035991498734802008, - "learning_rate": 0.00019999482524431185, - "loss": 46.0, - "step": 42364 - }, - { - "epoch": 3.239100101305503, - "grad_norm": 0.0021438421681523323, - "learning_rate": 0.0001999948249999537, - "loss": 46.0, - "step": 42365 - }, - { - "epoch": 3.2391765582888925, - "grad_norm": 0.002080722479149699, - "learning_rate": 0.0001999948247555898, - "loss": 46.0, - "step": 42366 - }, - { - "epoch": 3.2392530152722823, - "grad_norm": 0.004347725305706263, - "learning_rate": 0.00019999482451122015, - "loss": 46.0, - "step": 42367 - }, - { - "epoch": 3.239329472255672, - "grad_norm": 0.002128925174474716, - "learning_rate": 0.00019999482426684472, - "loss": 46.0, - "step": 42368 - }, - { - "epoch": 3.239405929239062, - "grad_norm": 0.0020087251905351877, - "learning_rate": 0.0001999948240224635, - "loss": 46.0, - "step": 42369 - }, - { - "epoch": 3.2394823862224515, - "grad_norm": 0.0029420426581054926, - "learning_rate": 0.0001999948237780765, - "loss": 46.0, - "step": 42370 - }, - { - "epoch": 3.2395588432058413, - "grad_norm": 0.0013687177561223507, - "learning_rate": 0.00019999482353368376, - "loss": 46.0, - "step": 42371 - }, - { - "epoch": 3.239635300189231, - "grad_norm": 0.0014295891160145402, - "learning_rate": 0.00019999482328928524, - "loss": 46.0, - "step": 42372 - }, - { - "epoch": 3.239711757172621, - "grad_norm": 0.003431841731071472, - "learning_rate": 0.00019999482304488097, - "loss": 46.0, - "step": 42373 - }, - { - "epoch": 3.2397882141560106, - "grad_norm": 0.002378212520852685, - "learning_rate": 0.00019999482280047093, - "loss": 46.0, - "step": 42374 - }, - { - "epoch": 3.2398646711394004, - "grad_norm": 0.0014651797246187925, - "learning_rate": 0.0001999948225560551, - "loss": 46.0, - "step": 42375 - }, - { - "epoch": 3.23994112812279, - "grad_norm": 0.0009595670271664858, - "learning_rate": 0.00019999482231163352, - "loss": 46.0, - "step": 42376 - }, - { - "epoch": 3.2400175851061794, - "grad_norm": 0.0014238699804991484, - "learning_rate": 0.00019999482206720613, - "loss": 46.0, - "step": 42377 - }, - { - "epoch": 3.240094042089569, - "grad_norm": 0.002643199637532234, - "learning_rate": 0.000199994821822773, - "loss": 46.0, - "step": 42378 - }, - { - "epoch": 3.240170499072959, - "grad_norm": 0.0022409625817090273, - "learning_rate": 0.00019999482157833412, - "loss": 46.0, - "step": 42379 - }, - { - "epoch": 3.2402469560563487, - "grad_norm": 0.0016407917719334364, - "learning_rate": 0.00019999482133388946, - "loss": 46.0, - "step": 42380 - }, - { - "epoch": 3.2403234130397385, - "grad_norm": 0.003386077703908086, - "learning_rate": 0.00019999482108943898, - "loss": 46.0, - "step": 42381 - }, - { - "epoch": 3.2403998700231282, - "grad_norm": 0.0028493229765444994, - "learning_rate": 0.0001999948208449828, - "loss": 46.0, - "step": 42382 - }, - { - "epoch": 3.240476327006518, - "grad_norm": 0.0013239012332633138, - "learning_rate": 0.00019999482060052083, - "loss": 46.0, - "step": 42383 - }, - { - "epoch": 3.2405527839899078, - "grad_norm": 0.005308200605213642, - "learning_rate": 0.00019999482035605305, - "loss": 46.0, - "step": 42384 - }, - { - "epoch": 3.2406292409732975, - "grad_norm": 0.0030443661380559206, - "learning_rate": 0.00019999482011157955, - "loss": 46.0, - "step": 42385 - }, - { - "epoch": 3.2407056979566873, - "grad_norm": 0.001260890276171267, - "learning_rate": 0.00019999481986710026, - "loss": 46.0, - "step": 42386 - }, - { - "epoch": 3.240782154940077, - "grad_norm": 0.0007613828638568521, - "learning_rate": 0.00019999481962261522, - "loss": 46.0, - "step": 42387 - }, - { - "epoch": 3.2408586119234664, - "grad_norm": 0.000516761327162385, - "learning_rate": 0.00019999481937812437, - "loss": 46.0, - "step": 42388 - }, - { - "epoch": 3.240935068906856, - "grad_norm": 0.0027428600005805492, - "learning_rate": 0.0001999948191336278, - "loss": 46.0, - "step": 42389 - }, - { - "epoch": 3.241011525890246, - "grad_norm": 0.0010224863654002547, - "learning_rate": 0.00019999481888912545, - "loss": 46.0, - "step": 42390 - }, - { - "epoch": 3.2410879828736356, - "grad_norm": 0.00244688312523067, - "learning_rate": 0.00019999481864461732, - "loss": 46.0, - "step": 42391 - }, - { - "epoch": 3.2411644398570254, - "grad_norm": 0.0025835614651441574, - "learning_rate": 0.00019999481840010343, - "loss": 46.0, - "step": 42392 - }, - { - "epoch": 3.241240896840415, - "grad_norm": 0.002661807229742408, - "learning_rate": 0.00019999481815558375, - "loss": 46.0, - "step": 42393 - }, - { - "epoch": 3.241317353823805, - "grad_norm": 0.003028519218787551, - "learning_rate": 0.00019999481791105832, - "loss": 46.0, - "step": 42394 - }, - { - "epoch": 3.2413938108071947, - "grad_norm": 0.003145348047837615, - "learning_rate": 0.00019999481766652712, - "loss": 46.0, - "step": 42395 - }, - { - "epoch": 3.2414702677905844, - "grad_norm": 0.002282362896949053, - "learning_rate": 0.00019999481742199015, - "loss": 46.0, - "step": 42396 - }, - { - "epoch": 3.241546724773974, - "grad_norm": 0.004383843857795, - "learning_rate": 0.0001999948171774474, - "loss": 46.0, - "step": 42397 - }, - { - "epoch": 3.241623181757364, - "grad_norm": 0.003248919965699315, - "learning_rate": 0.00019999481693289887, - "loss": 46.0, - "step": 42398 - }, - { - "epoch": 3.2416996387407533, - "grad_norm": 0.005298751872032881, - "learning_rate": 0.0001999948166883446, - "loss": 46.0, - "step": 42399 - }, - { - "epoch": 3.241776095724143, - "grad_norm": 0.0025044975336641073, - "learning_rate": 0.00019999481644378454, - "loss": 46.0, - "step": 42400 - }, - { - "epoch": 3.241852552707533, - "grad_norm": 0.0033499347046017647, - "learning_rate": 0.00019999481619921872, - "loss": 46.0, - "step": 42401 - }, - { - "epoch": 3.2419290096909226, - "grad_norm": 0.003150665434077382, - "learning_rate": 0.00019999481595464713, - "loss": 46.0, - "step": 42402 - }, - { - "epoch": 3.2420054666743123, - "grad_norm": 0.0018043940654024482, - "learning_rate": 0.00019999481571006977, - "loss": 46.0, - "step": 42403 - }, - { - "epoch": 3.242081923657702, - "grad_norm": 0.005504921544343233, - "learning_rate": 0.00019999481546548666, - "loss": 46.0, - "step": 42404 - }, - { - "epoch": 3.242158380641092, - "grad_norm": 0.0024685857351869345, - "learning_rate": 0.00019999481522089773, - "loss": 46.0, - "step": 42405 - }, - { - "epoch": 3.2422348376244816, - "grad_norm": 0.008920547552406788, - "learning_rate": 0.00019999481497630307, - "loss": 46.0, - "step": 42406 - }, - { - "epoch": 3.2423112946078714, - "grad_norm": 0.004442697856575251, - "learning_rate": 0.00019999481473170265, - "loss": 46.0, - "step": 42407 - }, - { - "epoch": 3.242387751591261, - "grad_norm": 0.0037557941395789385, - "learning_rate": 0.00019999481448709644, - "loss": 46.0, - "step": 42408 - }, - { - "epoch": 3.242464208574651, - "grad_norm": 0.003162234090268612, - "learning_rate": 0.00019999481424248447, - "loss": 46.0, - "step": 42409 - }, - { - "epoch": 3.24254066555804, - "grad_norm": 0.004171868786215782, - "learning_rate": 0.00019999481399786672, - "loss": 46.0, - "step": 42410 - }, - { - "epoch": 3.24261712254143, - "grad_norm": 0.003868746105581522, - "learning_rate": 0.0001999948137532432, - "loss": 46.0, - "step": 42411 - }, - { - "epoch": 3.2426935795248197, - "grad_norm": 0.005612892098724842, - "learning_rate": 0.00019999481350861393, - "loss": 46.0, - "step": 42412 - }, - { - "epoch": 3.2427700365082095, - "grad_norm": 0.00219744723290205, - "learning_rate": 0.0001999948132639789, - "loss": 46.0, - "step": 42413 - }, - { - "epoch": 3.2428464934915993, - "grad_norm": 0.0018065678887069225, - "learning_rate": 0.00019999481301933805, - "loss": 46.0, - "step": 42414 - }, - { - "epoch": 3.242922950474989, - "grad_norm": 0.0027141955215483904, - "learning_rate": 0.00019999481277469149, - "loss": 46.0, - "step": 42415 - }, - { - "epoch": 3.242999407458379, - "grad_norm": 0.0018828174797818065, - "learning_rate": 0.0001999948125300391, - "loss": 46.0, - "step": 42416 - }, - { - "epoch": 3.2430758644417685, - "grad_norm": 0.000530295423232019, - "learning_rate": 0.00019999481228538096, - "loss": 46.0, - "step": 42417 - }, - { - "epoch": 3.2431523214251583, - "grad_norm": 0.0018832373898476362, - "learning_rate": 0.00019999481204071708, - "loss": 46.0, - "step": 42418 - }, - { - "epoch": 3.243228778408548, - "grad_norm": 0.0017762802308425307, - "learning_rate": 0.00019999481179604743, - "loss": 46.0, - "step": 42419 - }, - { - "epoch": 3.2433052353919374, - "grad_norm": 0.0012160860933363438, - "learning_rate": 0.000199994811551372, - "loss": 46.0, - "step": 42420 - }, - { - "epoch": 3.243381692375327, - "grad_norm": 0.0028269952163100243, - "learning_rate": 0.00019999481130669077, - "loss": 46.0, - "step": 42421 - }, - { - "epoch": 3.243458149358717, - "grad_norm": 0.002148570492863655, - "learning_rate": 0.00019999481106200382, - "loss": 46.0, - "step": 42422 - }, - { - "epoch": 3.2435346063421067, - "grad_norm": 0.0036894066724926233, - "learning_rate": 0.00019999481081731108, - "loss": 46.0, - "step": 42423 - }, - { - "epoch": 3.2436110633254964, - "grad_norm": 0.002990249078720808, - "learning_rate": 0.00019999481057261255, - "loss": 46.0, - "step": 42424 - }, - { - "epoch": 3.243687520308886, - "grad_norm": 0.008820493705570698, - "learning_rate": 0.00019999481032790829, - "loss": 46.0, - "step": 42425 - }, - { - "epoch": 3.243763977292276, - "grad_norm": 0.0011494450736790895, - "learning_rate": 0.00019999481008319822, - "loss": 46.0, - "step": 42426 - }, - { - "epoch": 3.2438404342756657, - "grad_norm": 0.002933912444859743, - "learning_rate": 0.00019999480983848243, - "loss": 46.0, - "step": 42427 - }, - { - "epoch": 3.2439168912590555, - "grad_norm": 0.006502246949821711, - "learning_rate": 0.00019999480959376082, - "loss": 46.0, - "step": 42428 - }, - { - "epoch": 3.2439933482424452, - "grad_norm": 0.0014507254818454385, - "learning_rate": 0.00019999480934903345, - "loss": 46.0, - "step": 42429 - }, - { - "epoch": 3.244069805225835, - "grad_norm": 0.02041405625641346, - "learning_rate": 0.00019999480910430032, - "loss": 46.0, - "step": 42430 - }, - { - "epoch": 3.2441462622092248, - "grad_norm": 0.0008442448452115059, - "learning_rate": 0.00019999480885956144, - "loss": 46.0, - "step": 42431 - }, - { - "epoch": 3.244222719192614, - "grad_norm": 0.0008688230882398784, - "learning_rate": 0.00019999480861481676, - "loss": 46.0, - "step": 42432 - }, - { - "epoch": 3.244299176176004, - "grad_norm": 0.0015755766071379185, - "learning_rate": 0.00019999480837006636, - "loss": 46.0, - "step": 42433 - }, - { - "epoch": 3.2443756331593936, - "grad_norm": 0.003014215035364032, - "learning_rate": 0.00019999480812531016, - "loss": 46.0, - "step": 42434 - }, - { - "epoch": 3.2444520901427834, - "grad_norm": 0.0018318889196962118, - "learning_rate": 0.00019999480788054815, - "loss": 46.0, - "step": 42435 - }, - { - "epoch": 3.244528547126173, - "grad_norm": 0.002004543086513877, - "learning_rate": 0.00019999480763578043, - "loss": 46.0, - "step": 42436 - }, - { - "epoch": 3.244605004109563, - "grad_norm": 0.00123366410844028, - "learning_rate": 0.0001999948073910069, - "loss": 46.0, - "step": 42437 - }, - { - "epoch": 3.2446814610929526, - "grad_norm": 0.0038025574758648872, - "learning_rate": 0.00019999480714622765, - "loss": 46.0, - "step": 42438 - }, - { - "epoch": 3.2447579180763424, - "grad_norm": 0.001203357009217143, - "learning_rate": 0.00019999480690144258, - "loss": 46.0, - "step": 42439 - }, - { - "epoch": 3.244834375059732, - "grad_norm": 0.0022955937311053276, - "learning_rate": 0.0001999948066566518, - "loss": 46.0, - "step": 42440 - }, - { - "epoch": 3.244910832043122, - "grad_norm": 0.0013015575241297483, - "learning_rate": 0.00019999480641185518, - "loss": 46.0, - "step": 42441 - }, - { - "epoch": 3.2449872890265112, - "grad_norm": 0.0011194307589903474, - "learning_rate": 0.00019999480616705281, - "loss": 46.0, - "step": 42442 - }, - { - "epoch": 3.245063746009901, - "grad_norm": 0.003715003840625286, - "learning_rate": 0.0001999948059222447, - "loss": 46.0, - "step": 42443 - }, - { - "epoch": 3.2451402029932908, - "grad_norm": 0.0019537240732461214, - "learning_rate": 0.0001999948056774308, - "loss": 46.0, - "step": 42444 - }, - { - "epoch": 3.2452166599766805, - "grad_norm": 0.0036568893119692802, - "learning_rate": 0.00019999480543261115, - "loss": 46.0, - "step": 42445 - }, - { - "epoch": 3.2452931169600703, - "grad_norm": 0.0040557486936450005, - "learning_rate": 0.00019999480518778572, - "loss": 46.0, - "step": 42446 - }, - { - "epoch": 3.24536957394346, - "grad_norm": 0.001760962069965899, - "learning_rate": 0.00019999480494295452, - "loss": 46.0, - "step": 42447 - }, - { - "epoch": 3.24544603092685, - "grad_norm": 0.01030740700662136, - "learning_rate": 0.00019999480469811754, - "loss": 46.0, - "step": 42448 - }, - { - "epoch": 3.2455224879102396, - "grad_norm": 0.0029453302267938852, - "learning_rate": 0.0001999948044532748, - "loss": 46.0, - "step": 42449 - }, - { - "epoch": 3.2455989448936293, - "grad_norm": 0.00572332926094532, - "learning_rate": 0.0001999948042084263, - "loss": 46.0, - "step": 42450 - }, - { - "epoch": 3.245675401877019, - "grad_norm": 0.010032108053565025, - "learning_rate": 0.000199994803963572, - "loss": 46.0, - "step": 42451 - }, - { - "epoch": 3.245751858860409, - "grad_norm": 0.002460707677528262, - "learning_rate": 0.00019999480371871194, - "loss": 46.0, - "step": 42452 - }, - { - "epoch": 3.2458283158437986, - "grad_norm": 0.0012489031068980694, - "learning_rate": 0.00019999480347384613, - "loss": 46.0, - "step": 42453 - }, - { - "epoch": 3.245904772827188, - "grad_norm": 0.00368308718316257, - "learning_rate": 0.00019999480322897454, - "loss": 46.0, - "step": 42454 - }, - { - "epoch": 3.2459812298105777, - "grad_norm": 0.0031295011285692453, - "learning_rate": 0.0001999948029840972, - "loss": 46.0, - "step": 42455 - }, - { - "epoch": 3.2460576867939674, - "grad_norm": 0.003631161293014884, - "learning_rate": 0.0001999948027392141, - "loss": 46.0, - "step": 42456 - }, - { - "epoch": 3.246134143777357, - "grad_norm": 0.001051891827955842, - "learning_rate": 0.0001999948024943252, - "loss": 46.0, - "step": 42457 - }, - { - "epoch": 3.246210600760747, - "grad_norm": 0.0013903718208894134, - "learning_rate": 0.0001999948022494305, - "loss": 46.0, - "step": 42458 - }, - { - "epoch": 3.2462870577441367, - "grad_norm": 0.0019744872115552425, - "learning_rate": 0.00019999480200453009, - "loss": 46.0, - "step": 42459 - }, - { - "epoch": 3.2463635147275265, - "grad_norm": 0.0035972667392343283, - "learning_rate": 0.0001999948017596239, - "loss": 46.0, - "step": 42460 - }, - { - "epoch": 3.2464399717109163, - "grad_norm": 0.0015593518037348986, - "learning_rate": 0.00019999480151471194, - "loss": 46.0, - "step": 42461 - }, - { - "epoch": 3.246516428694306, - "grad_norm": 0.001166791538707912, - "learning_rate": 0.00019999480126979416, - "loss": 46.0, - "step": 42462 - }, - { - "epoch": 3.2465928856776958, - "grad_norm": 0.001599739189259708, - "learning_rate": 0.00019999480102487067, - "loss": 46.0, - "step": 42463 - }, - { - "epoch": 3.246669342661085, - "grad_norm": 0.0015420893905684352, - "learning_rate": 0.0001999948007799414, - "loss": 46.0, - "step": 42464 - }, - { - "epoch": 3.246745799644475, - "grad_norm": 0.0004134236369282007, - "learning_rate": 0.00019999480053500634, - "loss": 46.0, - "step": 42465 - }, - { - "epoch": 3.2468222566278646, - "grad_norm": 0.0013970095897093415, - "learning_rate": 0.00019999480029006552, - "loss": 46.0, - "step": 42466 - }, - { - "epoch": 3.2468987136112544, - "grad_norm": 0.004596116952598095, - "learning_rate": 0.00019999480004511896, - "loss": 46.0, - "step": 42467 - }, - { - "epoch": 3.246975170594644, - "grad_norm": 0.007578646298497915, - "learning_rate": 0.0001999947998001666, - "loss": 46.0, - "step": 42468 - }, - { - "epoch": 3.247051627578034, - "grad_norm": 0.001443088287487626, - "learning_rate": 0.00019999479955520847, - "loss": 46.0, - "step": 42469 - }, - { - "epoch": 3.2471280845614237, - "grad_norm": 0.0016861308831721544, - "learning_rate": 0.0001999947993102446, - "loss": 46.0, - "step": 42470 - }, - { - "epoch": 3.2472045415448134, - "grad_norm": 0.004286458250135183, - "learning_rate": 0.0001999947990652749, - "loss": 46.0, - "step": 42471 - }, - { - "epoch": 3.247280998528203, - "grad_norm": 0.0011865037959069014, - "learning_rate": 0.00019999479882029948, - "loss": 46.0, - "step": 42472 - }, - { - "epoch": 3.247357455511593, - "grad_norm": 0.005233015399426222, - "learning_rate": 0.00019999479857531828, - "loss": 46.0, - "step": 42473 - }, - { - "epoch": 3.2474339124949827, - "grad_norm": 0.004462135024368763, - "learning_rate": 0.00019999479833033134, - "loss": 46.0, - "step": 42474 - }, - { - "epoch": 3.2475103694783725, - "grad_norm": 0.0011538659455254674, - "learning_rate": 0.0001999947980853386, - "loss": 46.0, - "step": 42475 - }, - { - "epoch": 3.2475868264617618, - "grad_norm": 0.0010149668669328094, - "learning_rate": 0.00019999479784034007, - "loss": 46.0, - "step": 42476 - }, - { - "epoch": 3.2476632834451515, - "grad_norm": 0.0013371151871979237, - "learning_rate": 0.0001999947975953358, - "loss": 46.0, - "step": 42477 - }, - { - "epoch": 3.2477397404285413, - "grad_norm": 0.003627282800152898, - "learning_rate": 0.00019999479735032576, - "loss": 46.0, - "step": 42478 - }, - { - "epoch": 3.247816197411931, - "grad_norm": 0.0013804096961393952, - "learning_rate": 0.00019999479710530995, - "loss": 46.0, - "step": 42479 - }, - { - "epoch": 3.247892654395321, - "grad_norm": 0.00144345557782799, - "learning_rate": 0.00019999479686028834, - "loss": 46.0, - "step": 42480 - }, - { - "epoch": 3.2479691113787106, - "grad_norm": 0.0019860013853758574, - "learning_rate": 0.000199994796615261, - "loss": 46.0, - "step": 42481 - }, - { - "epoch": 3.2480455683621003, - "grad_norm": 0.0014417103957384825, - "learning_rate": 0.00019999479637022787, - "loss": 46.0, - "step": 42482 - }, - { - "epoch": 3.24812202534549, - "grad_norm": 0.0022198727820068598, - "learning_rate": 0.000199994796125189, - "loss": 46.0, - "step": 42483 - }, - { - "epoch": 3.24819848232888, - "grad_norm": 0.0009327406296506524, - "learning_rate": 0.00019999479588014434, - "loss": 46.0, - "step": 42484 - }, - { - "epoch": 3.2482749393122696, - "grad_norm": 0.0033086647745221853, - "learning_rate": 0.0001999947956350939, - "loss": 46.0, - "step": 42485 - }, - { - "epoch": 3.248351396295659, - "grad_norm": 0.0014830627478659153, - "learning_rate": 0.00019999479539003768, - "loss": 46.0, - "step": 42486 - }, - { - "epoch": 3.2484278532790487, - "grad_norm": 0.003838838078081608, - "learning_rate": 0.00019999479514497574, - "loss": 46.0, - "step": 42487 - }, - { - "epoch": 3.2485043102624385, - "grad_norm": 0.0052585904486477375, - "learning_rate": 0.000199994794899908, - "loss": 46.0, - "step": 42488 - }, - { - "epoch": 3.2485807672458282, - "grad_norm": 0.001985125942155719, - "learning_rate": 0.00019999479465483447, - "loss": 46.0, - "step": 42489 - }, - { - "epoch": 3.248657224229218, - "grad_norm": 0.0038302717730402946, - "learning_rate": 0.0001999947944097552, - "loss": 46.0, - "step": 42490 - }, - { - "epoch": 3.2487336812126077, - "grad_norm": 0.002133285393938422, - "learning_rate": 0.00019999479416467017, - "loss": 46.0, - "step": 42491 - }, - { - "epoch": 3.2488101381959975, - "grad_norm": 0.0007181174005381763, - "learning_rate": 0.00019999479391957933, - "loss": 46.0, - "step": 42492 - }, - { - "epoch": 3.2488865951793873, - "grad_norm": 0.002585875103250146, - "learning_rate": 0.00019999479367448277, - "loss": 46.0, - "step": 42493 - }, - { - "epoch": 3.248963052162777, - "grad_norm": 0.00368873099796474, - "learning_rate": 0.0001999947934293804, - "loss": 46.0, - "step": 42494 - }, - { - "epoch": 3.249039509146167, - "grad_norm": 0.0016099958447739482, - "learning_rate": 0.0001999947931842723, - "loss": 46.0, - "step": 42495 - }, - { - "epoch": 3.2491159661295566, - "grad_norm": 0.0022658456582576036, - "learning_rate": 0.0001999947929391584, - "loss": 46.0, - "step": 42496 - }, - { - "epoch": 3.249192423112946, - "grad_norm": 0.0033016069792211056, - "learning_rate": 0.00019999479269403874, - "loss": 46.0, - "step": 42497 - }, - { - "epoch": 3.2492688800963356, - "grad_norm": 0.0019370769150555134, - "learning_rate": 0.00019999479244891332, - "loss": 46.0, - "step": 42498 - }, - { - "epoch": 3.2493453370797254, - "grad_norm": 0.0026677935384213924, - "learning_rate": 0.00019999479220378212, - "loss": 46.0, - "step": 42499 - }, - { - "epoch": 3.249421794063115, - "grad_norm": 0.0012142519699409604, - "learning_rate": 0.00019999479195864517, - "loss": 46.0, - "step": 42500 - }, - { - "epoch": 3.249498251046505, - "grad_norm": 0.005798469763249159, - "learning_rate": 0.00019999479171350242, - "loss": 46.0, - "step": 42501 - }, - { - "epoch": 3.2495747080298947, - "grad_norm": 0.0012317787623032928, - "learning_rate": 0.0001999947914683539, - "loss": 46.0, - "step": 42502 - }, - { - "epoch": 3.2496511650132844, - "grad_norm": 0.003324667690321803, - "learning_rate": 0.00019999479122319964, - "loss": 46.0, - "step": 42503 - }, - { - "epoch": 3.249727621996674, - "grad_norm": 0.003560878336429596, - "learning_rate": 0.0001999947909780396, - "loss": 46.0, - "step": 42504 - }, - { - "epoch": 3.249804078980064, - "grad_norm": 0.007173549849539995, - "learning_rate": 0.00019999479073287376, - "loss": 46.0, - "step": 42505 - }, - { - "epoch": 3.2498805359634537, - "grad_norm": 0.0010374464327469468, - "learning_rate": 0.0001999947904877022, - "loss": 46.0, - "step": 42506 - }, - { - "epoch": 3.2499569929468435, - "grad_norm": 0.002254461869597435, - "learning_rate": 0.00019999479024252484, - "loss": 46.0, - "step": 42507 - }, - { - "epoch": 3.250033449930233, - "grad_norm": 0.002280781976878643, - "learning_rate": 0.00019999478999734174, - "loss": 46.0, - "step": 42508 - }, - { - "epoch": 3.2501099069136226, - "grad_norm": 0.0025865158531814814, - "learning_rate": 0.00019999478975215286, - "loss": 46.0, - "step": 42509 - }, - { - "epoch": 3.2501863638970123, - "grad_norm": 0.0015839614206925035, - "learning_rate": 0.00019999478950695818, - "loss": 46.0, - "step": 42510 - }, - { - "epoch": 3.250262820880402, - "grad_norm": 0.002307702787220478, - "learning_rate": 0.00019999478926175775, - "loss": 46.0, - "step": 42511 - }, - { - "epoch": 3.250339277863792, - "grad_norm": 0.003494716715067625, - "learning_rate": 0.00019999478901655155, - "loss": 46.0, - "step": 42512 - }, - { - "epoch": 3.2504157348471816, - "grad_norm": 0.0016066872049123049, - "learning_rate": 0.00019999478877133958, - "loss": 46.0, - "step": 42513 - }, - { - "epoch": 3.2504921918305714, - "grad_norm": 0.0013584350235760212, - "learning_rate": 0.00019999478852612186, - "loss": 46.0, - "step": 42514 - }, - { - "epoch": 3.250568648813961, - "grad_norm": 0.0032942506950348616, - "learning_rate": 0.00019999478828089834, - "loss": 46.0, - "step": 42515 - }, - { - "epoch": 3.250645105797351, - "grad_norm": 0.003127477364614606, - "learning_rate": 0.00019999478803566908, - "loss": 46.0, - "step": 42516 - }, - { - "epoch": 3.2507215627807406, - "grad_norm": 0.0016304231248795986, - "learning_rate": 0.00019999478779043404, - "loss": 46.0, - "step": 42517 - }, - { - "epoch": 3.2507980197641304, - "grad_norm": 0.001485159038566053, - "learning_rate": 0.0001999947875451932, - "loss": 46.0, - "step": 42518 - }, - { - "epoch": 3.25087447674752, - "grad_norm": 0.0013502154033631086, - "learning_rate": 0.00019999478729994664, - "loss": 46.0, - "step": 42519 - }, - { - "epoch": 3.2509509337309095, - "grad_norm": 0.0006993597489781678, - "learning_rate": 0.00019999478705469428, - "loss": 46.0, - "step": 42520 - }, - { - "epoch": 3.2510273907142992, - "grad_norm": 0.002000403357669711, - "learning_rate": 0.00019999478680943618, - "loss": 46.0, - "step": 42521 - }, - { - "epoch": 3.251103847697689, - "grad_norm": 0.001074827741831541, - "learning_rate": 0.00019999478656417227, - "loss": 46.0, - "step": 42522 - }, - { - "epoch": 3.2511803046810788, - "grad_norm": 0.0026615653187036514, - "learning_rate": 0.00019999478631890262, - "loss": 46.0, - "step": 42523 - }, - { - "epoch": 3.2512567616644685, - "grad_norm": 0.0017869886942207813, - "learning_rate": 0.0001999947860736272, - "loss": 46.0, - "step": 42524 - }, - { - "epoch": 3.2513332186478583, - "grad_norm": 0.0012223739176988602, - "learning_rate": 0.00019999478582834602, - "loss": 46.0, - "step": 42525 - }, - { - "epoch": 3.251409675631248, - "grad_norm": 0.006573184859007597, - "learning_rate": 0.00019999478558305902, - "loss": 46.0, - "step": 42526 - }, - { - "epoch": 3.251486132614638, - "grad_norm": 0.0009233249584212899, - "learning_rate": 0.0001999947853377663, - "loss": 46.0, - "step": 42527 - }, - { - "epoch": 3.2515625895980276, - "grad_norm": 0.0022954700980335474, - "learning_rate": 0.0001999947850924678, - "loss": 46.0, - "step": 42528 - }, - { - "epoch": 3.251639046581417, - "grad_norm": 0.0026257624849677086, - "learning_rate": 0.00019999478484716355, - "loss": 46.0, - "step": 42529 - }, - { - "epoch": 3.2517155035648067, - "grad_norm": 0.002981224562972784, - "learning_rate": 0.00019999478460185348, - "loss": 46.0, - "step": 42530 - }, - { - "epoch": 3.2517919605481964, - "grad_norm": 0.0029388112016022205, - "learning_rate": 0.00019999478435653767, - "loss": 46.0, - "step": 42531 - }, - { - "epoch": 3.251868417531586, - "grad_norm": 0.0011481440160423517, - "learning_rate": 0.0001999947841112161, - "loss": 46.0, - "step": 42532 - }, - { - "epoch": 3.251944874514976, - "grad_norm": 0.002720416756346822, - "learning_rate": 0.00019999478386588875, - "loss": 46.0, - "step": 42533 - }, - { - "epoch": 3.2520213314983657, - "grad_norm": 0.0008004575502127409, - "learning_rate": 0.00019999478362055562, - "loss": 46.0, - "step": 42534 - }, - { - "epoch": 3.2520977884817555, - "grad_norm": 0.0019154442707076669, - "learning_rate": 0.00019999478337521674, - "loss": 46.0, - "step": 42535 - }, - { - "epoch": 3.252174245465145, - "grad_norm": 0.0028819534927606583, - "learning_rate": 0.00019999478312987206, - "loss": 46.0, - "step": 42536 - }, - { - "epoch": 3.252250702448535, - "grad_norm": 0.0006618181359954178, - "learning_rate": 0.00019999478288452167, - "loss": 46.0, - "step": 42537 - }, - { - "epoch": 3.2523271594319247, - "grad_norm": 0.003203276079148054, - "learning_rate": 0.00019999478263916544, - "loss": 46.0, - "step": 42538 - }, - { - "epoch": 3.2524036164153145, - "grad_norm": 0.005383047740906477, - "learning_rate": 0.0001999947823938035, - "loss": 46.0, - "step": 42539 - }, - { - "epoch": 3.2524800733987043, - "grad_norm": 0.0071266950108110905, - "learning_rate": 0.00019999478214843578, - "loss": 46.0, - "step": 42540 - }, - { - "epoch": 3.2525565303820936, - "grad_norm": 0.004323940724134445, - "learning_rate": 0.00019999478190306226, - "loss": 46.0, - "step": 42541 - }, - { - "epoch": 3.2526329873654833, - "grad_norm": 0.0020127473399043083, - "learning_rate": 0.000199994781657683, - "loss": 46.0, - "step": 42542 - }, - { - "epoch": 3.252709444348873, - "grad_norm": 0.0025062926579266787, - "learning_rate": 0.00019999478141229796, - "loss": 46.0, - "step": 42543 - }, - { - "epoch": 3.252785901332263, - "grad_norm": 0.0013228552415966988, - "learning_rate": 0.00019999478116690712, - "loss": 46.0, - "step": 42544 - }, - { - "epoch": 3.2528623583156526, - "grad_norm": 0.0021390609908849, - "learning_rate": 0.00019999478092151056, - "loss": 46.0, - "step": 42545 - }, - { - "epoch": 3.2529388152990424, - "grad_norm": 0.0009808988543227315, - "learning_rate": 0.00019999478067610823, - "loss": 46.0, - "step": 42546 - }, - { - "epoch": 3.253015272282432, - "grad_norm": 0.0014587537152692676, - "learning_rate": 0.00019999478043070007, - "loss": 46.0, - "step": 42547 - }, - { - "epoch": 3.253091729265822, - "grad_norm": 0.0009863952873274684, - "learning_rate": 0.00019999478018528622, - "loss": 46.0, - "step": 42548 - }, - { - "epoch": 3.2531681862492117, - "grad_norm": 0.002077799988910556, - "learning_rate": 0.00019999477993986654, - "loss": 46.0, - "step": 42549 - }, - { - "epoch": 3.2532446432326014, - "grad_norm": 0.0030031197238713503, - "learning_rate": 0.00019999477969444111, - "loss": 46.0, - "step": 42550 - }, - { - "epoch": 3.2533211002159907, - "grad_norm": 0.0014947708696126938, - "learning_rate": 0.00019999477944900992, - "loss": 46.0, - "step": 42551 - }, - { - "epoch": 3.2533975571993805, - "grad_norm": 0.001467434922233224, - "learning_rate": 0.00019999477920357297, - "loss": 46.0, - "step": 42552 - }, - { - "epoch": 3.2534740141827703, - "grad_norm": 0.0013639378594234586, - "learning_rate": 0.00019999477895813023, - "loss": 46.0, - "step": 42553 - }, - { - "epoch": 3.25355047116616, - "grad_norm": 0.003380757989361882, - "learning_rate": 0.00019999477871268174, - "loss": 46.0, - "step": 42554 - }, - { - "epoch": 3.25362692814955, - "grad_norm": 0.0009612800204195082, - "learning_rate": 0.00019999477846722744, - "loss": 46.0, - "step": 42555 - }, - { - "epoch": 3.2537033851329396, - "grad_norm": 0.0024307905696332455, - "learning_rate": 0.00019999477822176743, - "loss": 46.0, - "step": 42556 - }, - { - "epoch": 3.2537798421163293, - "grad_norm": 0.001676167012192309, - "learning_rate": 0.00019999477797630162, - "loss": 46.0, - "step": 42557 - }, - { - "epoch": 3.253856299099719, - "grad_norm": 0.0020378839690238237, - "learning_rate": 0.00019999477773083004, - "loss": 46.0, - "step": 42558 - }, - { - "epoch": 3.253932756083109, - "grad_norm": 0.004175650887191296, - "learning_rate": 0.00019999477748535268, - "loss": 46.0, - "step": 42559 - }, - { - "epoch": 3.2540092130664986, - "grad_norm": 0.0022654470521956682, - "learning_rate": 0.0001999947772398696, - "loss": 46.0, - "step": 42560 - }, - { - "epoch": 3.2540856700498884, - "grad_norm": 0.001592738670296967, - "learning_rate": 0.0001999947769943807, - "loss": 46.0, - "step": 42561 - }, - { - "epoch": 3.254162127033278, - "grad_norm": 0.001594061148352921, - "learning_rate": 0.00019999477674888602, - "loss": 46.0, - "step": 42562 - }, - { - "epoch": 3.2542385840166674, - "grad_norm": 0.0023689360823482275, - "learning_rate": 0.0001999947765033856, - "loss": 46.0, - "step": 42563 - }, - { - "epoch": 3.254315041000057, - "grad_norm": 0.0020583071745932102, - "learning_rate": 0.00019999477625787942, - "loss": 46.0, - "step": 42564 - }, - { - "epoch": 3.254391497983447, - "grad_norm": 0.008926953189074993, - "learning_rate": 0.00019999477601236745, - "loss": 46.0, - "step": 42565 - }, - { - "epoch": 3.2544679549668367, - "grad_norm": 0.0018196330638602376, - "learning_rate": 0.00019999477576684974, - "loss": 46.0, - "step": 42566 - }, - { - "epoch": 3.2545444119502265, - "grad_norm": 0.0008109429036267102, - "learning_rate": 0.00019999477552132624, - "loss": 46.0, - "step": 42567 - }, - { - "epoch": 3.2546208689336162, - "grad_norm": 0.0037324721924960613, - "learning_rate": 0.00019999477527579695, - "loss": 46.0, - "step": 42568 - }, - { - "epoch": 3.254697325917006, - "grad_norm": 0.006557142361998558, - "learning_rate": 0.00019999477503026192, - "loss": 46.0, - "step": 42569 - }, - { - "epoch": 3.2547737829003958, - "grad_norm": 0.003369351848959923, - "learning_rate": 0.0001999947747847211, - "loss": 46.0, - "step": 42570 - }, - { - "epoch": 3.2548502398837855, - "grad_norm": 0.004526851233094931, - "learning_rate": 0.00019999477453917455, - "loss": 46.0, - "step": 42571 - }, - { - "epoch": 3.2549266968671753, - "grad_norm": 0.0025899349711835384, - "learning_rate": 0.0001999947742936222, - "loss": 46.0, - "step": 42572 - }, - { - "epoch": 3.2550031538505646, - "grad_norm": 0.004239336121827364, - "learning_rate": 0.00019999477404806406, - "loss": 46.0, - "step": 42573 - }, - { - "epoch": 3.2550796108339544, - "grad_norm": 0.001957200700417161, - "learning_rate": 0.00019999477380250018, - "loss": 46.0, - "step": 42574 - }, - { - "epoch": 3.255156067817344, - "grad_norm": 0.0036439464893192053, - "learning_rate": 0.00019999477355693053, - "loss": 46.0, - "step": 42575 - }, - { - "epoch": 3.255232524800734, - "grad_norm": 0.0010007243836298585, - "learning_rate": 0.0001999947733113551, - "loss": 46.0, - "step": 42576 - }, - { - "epoch": 3.2553089817841236, - "grad_norm": 0.004350172821432352, - "learning_rate": 0.0001999947730657739, - "loss": 46.0, - "step": 42577 - }, - { - "epoch": 3.2553854387675134, - "grad_norm": 0.0032392379362136126, - "learning_rate": 0.00019999477282018696, - "loss": 46.0, - "step": 42578 - }, - { - "epoch": 3.255461895750903, - "grad_norm": 0.001384774805046618, - "learning_rate": 0.00019999477257459422, - "loss": 46.0, - "step": 42579 - }, - { - "epoch": 3.255538352734293, - "grad_norm": 0.00437150476500392, - "learning_rate": 0.00019999477232899573, - "loss": 46.0, - "step": 42580 - }, - { - "epoch": 3.2556148097176827, - "grad_norm": 0.0033775190822780132, - "learning_rate": 0.00019999477208339147, - "loss": 46.0, - "step": 42581 - }, - { - "epoch": 3.2556912667010725, - "grad_norm": 0.0045274593867361546, - "learning_rate": 0.00019999477183778143, - "loss": 46.0, - "step": 42582 - }, - { - "epoch": 3.255767723684462, - "grad_norm": 0.0024706183467060328, - "learning_rate": 0.00019999477159216562, - "loss": 46.0, - "step": 42583 - }, - { - "epoch": 3.255844180667852, - "grad_norm": 0.0016953485319390893, - "learning_rate": 0.00019999477134654403, - "loss": 46.0, - "step": 42584 - }, - { - "epoch": 3.2559206376512413, - "grad_norm": 0.002129747299477458, - "learning_rate": 0.00019999477110091668, - "loss": 46.0, - "step": 42585 - }, - { - "epoch": 3.255997094634631, - "grad_norm": 0.003871861146762967, - "learning_rate": 0.00019999477085528357, - "loss": 46.0, - "step": 42586 - }, - { - "epoch": 3.256073551618021, - "grad_norm": 0.003804868320003152, - "learning_rate": 0.0001999947706096447, - "loss": 46.0, - "step": 42587 - }, - { - "epoch": 3.2561500086014106, - "grad_norm": 0.0009133187704719603, - "learning_rate": 0.00019999477036400005, - "loss": 46.0, - "step": 42588 - }, - { - "epoch": 3.2562264655848003, - "grad_norm": 0.002121273661032319, - "learning_rate": 0.00019999477011834965, - "loss": 46.0, - "step": 42589 - }, - { - "epoch": 3.25630292256819, - "grad_norm": 0.0006553675048053265, - "learning_rate": 0.00019999476987269342, - "loss": 46.0, - "step": 42590 - }, - { - "epoch": 3.25637937955158, - "grad_norm": 0.0021904862951487303, - "learning_rate": 0.00019999476962703148, - "loss": 46.0, - "step": 42591 - }, - { - "epoch": 3.2564558365349696, - "grad_norm": 0.009102953597903252, - "learning_rate": 0.00019999476938136374, - "loss": 46.0, - "step": 42592 - }, - { - "epoch": 3.2565322935183594, - "grad_norm": 0.00148000568151474, - "learning_rate": 0.00019999476913569025, - "loss": 46.0, - "step": 42593 - }, - { - "epoch": 3.256608750501749, - "grad_norm": 0.001783793093636632, - "learning_rate": 0.00019999476889001096, - "loss": 46.0, - "step": 42594 - }, - { - "epoch": 3.2566852074851385, - "grad_norm": 0.0015115505084395409, - "learning_rate": 0.00019999476864432595, - "loss": 46.0, - "step": 42595 - }, - { - "epoch": 3.256761664468528, - "grad_norm": 0.002230108482763171, - "learning_rate": 0.00019999476839863514, - "loss": 46.0, - "step": 42596 - }, - { - "epoch": 3.256838121451918, - "grad_norm": 0.0026826856192201376, - "learning_rate": 0.00019999476815293855, - "loss": 46.0, - "step": 42597 - }, - { - "epoch": 3.2569145784353077, - "grad_norm": 0.003217132994905114, - "learning_rate": 0.00019999476790723622, - "loss": 46.0, - "step": 42598 - }, - { - "epoch": 3.2569910354186975, - "grad_norm": 0.0050801499746739864, - "learning_rate": 0.0001999947676615281, - "loss": 46.0, - "step": 42599 - }, - { - "epoch": 3.2570674924020873, - "grad_norm": 0.001110272016376257, - "learning_rate": 0.00019999476741581421, - "loss": 46.0, - "step": 42600 - }, - { - "epoch": 3.257143949385477, - "grad_norm": 0.00299544888548553, - "learning_rate": 0.00019999476717009456, - "loss": 46.0, - "step": 42601 - }, - { - "epoch": 3.257220406368867, - "grad_norm": 0.0018276463961228728, - "learning_rate": 0.00019999476692436914, - "loss": 46.0, - "step": 42602 - }, - { - "epoch": 3.2572968633522565, - "grad_norm": 0.002299498300999403, - "learning_rate": 0.00019999476667863797, - "loss": 46.0, - "step": 42603 - }, - { - "epoch": 3.2573733203356463, - "grad_norm": 0.005792070645838976, - "learning_rate": 0.000199994766432901, - "loss": 46.0, - "step": 42604 - }, - { - "epoch": 3.257449777319036, - "grad_norm": 0.005416215397417545, - "learning_rate": 0.00019999476618715826, - "loss": 46.0, - "step": 42605 - }, - { - "epoch": 3.257526234302426, - "grad_norm": 0.0034660720266401768, - "learning_rate": 0.00019999476594140977, - "loss": 46.0, - "step": 42606 - }, - { - "epoch": 3.257602691285815, - "grad_norm": 0.0030668703839182854, - "learning_rate": 0.0001999947656956555, - "loss": 46.0, - "step": 42607 - }, - { - "epoch": 3.257679148269205, - "grad_norm": 0.0012086612405255437, - "learning_rate": 0.0001999947654498955, - "loss": 46.0, - "step": 42608 - }, - { - "epoch": 3.2577556052525947, - "grad_norm": 0.0025078016333281994, - "learning_rate": 0.00019999476520412968, - "loss": 46.0, - "step": 42609 - }, - { - "epoch": 3.2578320622359844, - "grad_norm": 0.0011279458412900567, - "learning_rate": 0.0001999947649583581, - "loss": 46.0, - "step": 42610 - }, - { - "epoch": 3.257908519219374, - "grad_norm": 0.0018524121260270476, - "learning_rate": 0.00019999476471258074, - "loss": 46.0, - "step": 42611 - }, - { - "epoch": 3.257984976202764, - "grad_norm": 0.0046928441151976585, - "learning_rate": 0.00019999476446679764, - "loss": 46.0, - "step": 42612 - }, - { - "epoch": 3.2580614331861537, - "grad_norm": 0.003628105390816927, - "learning_rate": 0.00019999476422100876, - "loss": 46.0, - "step": 42613 - }, - { - "epoch": 3.2581378901695435, - "grad_norm": 0.0006739333621226251, - "learning_rate": 0.00019999476397521411, - "loss": 46.0, - "step": 42614 - }, - { - "epoch": 3.2582143471529332, - "grad_norm": 0.0037230895832180977, - "learning_rate": 0.0001999947637294137, - "loss": 46.0, - "step": 42615 - }, - { - "epoch": 3.258290804136323, - "grad_norm": 0.0009315396309830248, - "learning_rate": 0.0001999947634836075, - "loss": 46.0, - "step": 42616 - }, - { - "epoch": 3.2583672611197123, - "grad_norm": 0.00419262982904911, - "learning_rate": 0.00019999476323779555, - "loss": 46.0, - "step": 42617 - }, - { - "epoch": 3.258443718103102, - "grad_norm": 0.0019057404715567827, - "learning_rate": 0.0001999947629919778, - "loss": 46.0, - "step": 42618 - }, - { - "epoch": 3.258520175086492, - "grad_norm": 0.0011036384385079145, - "learning_rate": 0.00019999476274615432, - "loss": 46.0, - "step": 42619 - }, - { - "epoch": 3.2585966320698816, - "grad_norm": 0.0020766984671354294, - "learning_rate": 0.00019999476250032506, - "loss": 46.0, - "step": 42620 - }, - { - "epoch": 3.2586730890532714, - "grad_norm": 0.0029008088167756796, - "learning_rate": 0.00019999476225449002, - "loss": 46.0, - "step": 42621 - }, - { - "epoch": 3.258749546036661, - "grad_norm": 0.001717729028314352, - "learning_rate": 0.0001999947620086492, - "loss": 46.0, - "step": 42622 - }, - { - "epoch": 3.258826003020051, - "grad_norm": 0.0026180907152593136, - "learning_rate": 0.00019999476176280263, - "loss": 46.0, - "step": 42623 - }, - { - "epoch": 3.2589024600034406, - "grad_norm": 0.004629471804946661, - "learning_rate": 0.00019999476151695032, - "loss": 46.0, - "step": 42624 - }, - { - "epoch": 3.2589789169868304, - "grad_norm": 0.003538233693689108, - "learning_rate": 0.00019999476127109217, - "loss": 46.0, - "step": 42625 - }, - { - "epoch": 3.25905537397022, - "grad_norm": 0.0014037989312782884, - "learning_rate": 0.00019999476102522832, - "loss": 46.0, - "step": 42626 - }, - { - "epoch": 3.25913183095361, - "grad_norm": 0.000849241332616657, - "learning_rate": 0.00019999476077935867, - "loss": 46.0, - "step": 42627 - }, - { - "epoch": 3.2592082879369997, - "grad_norm": 0.0042520547285676, - "learning_rate": 0.00019999476053348322, - "loss": 46.0, - "step": 42628 - }, - { - "epoch": 3.259284744920389, - "grad_norm": 0.0011316750897094607, - "learning_rate": 0.00019999476028760205, - "loss": 46.0, - "step": 42629 - }, - { - "epoch": 3.2593612019037788, - "grad_norm": 0.002351074945181608, - "learning_rate": 0.00019999476004171508, - "loss": 46.0, - "step": 42630 - }, - { - "epoch": 3.2594376588871685, - "grad_norm": 0.0014585272874683142, - "learning_rate": 0.00019999475979582236, - "loss": 46.0, - "step": 42631 - }, - { - "epoch": 3.2595141158705583, - "grad_norm": 0.002162576885893941, - "learning_rate": 0.00019999475954992385, - "loss": 46.0, - "step": 42632 - }, - { - "epoch": 3.259590572853948, - "grad_norm": 0.0021104079205542803, - "learning_rate": 0.0001999947593040196, - "loss": 46.0, - "step": 42633 - }, - { - "epoch": 3.259667029837338, - "grad_norm": 0.003532793838530779, - "learning_rate": 0.00019999475905810958, - "loss": 46.0, - "step": 42634 - }, - { - "epoch": 3.2597434868207276, - "grad_norm": 0.0009372130152769387, - "learning_rate": 0.00019999475881219377, - "loss": 46.0, - "step": 42635 - }, - { - "epoch": 3.2598199438041173, - "grad_norm": 0.001357743632979691, - "learning_rate": 0.0001999947585662722, - "loss": 46.0, - "step": 42636 - }, - { - "epoch": 3.259896400787507, - "grad_norm": 0.0016610942548140883, - "learning_rate": 0.00019999475832034486, - "loss": 46.0, - "step": 42637 - }, - { - "epoch": 3.2599728577708964, - "grad_norm": 0.0007794471457600594, - "learning_rate": 0.00019999475807441175, - "loss": 46.0, - "step": 42638 - }, - { - "epoch": 3.260049314754286, - "grad_norm": 0.0041829184629023075, - "learning_rate": 0.00019999475782847288, - "loss": 46.0, - "step": 42639 - }, - { - "epoch": 3.260125771737676, - "grad_norm": 0.0029143677093088627, - "learning_rate": 0.00019999475758252823, - "loss": 46.0, - "step": 42640 - }, - { - "epoch": 3.2602022287210657, - "grad_norm": 0.001310272840783, - "learning_rate": 0.00019999475733657783, - "loss": 46.0, - "step": 42641 - }, - { - "epoch": 3.2602786857044554, - "grad_norm": 0.0018135359277948737, - "learning_rate": 0.00019999475709062164, - "loss": 46.0, - "step": 42642 - }, - { - "epoch": 3.260355142687845, - "grad_norm": 0.003862922079861164, - "learning_rate": 0.00019999475684465967, - "loss": 46.0, - "step": 42643 - }, - { - "epoch": 3.260431599671235, - "grad_norm": 0.0009692379971966147, - "learning_rate": 0.00019999475659869192, - "loss": 46.0, - "step": 42644 - }, - { - "epoch": 3.2605080566546247, - "grad_norm": 0.0015959467273205519, - "learning_rate": 0.00019999475635271844, - "loss": 46.0, - "step": 42645 - }, - { - "epoch": 3.2605845136380145, - "grad_norm": 0.0031691587064415216, - "learning_rate": 0.0001999947561067392, - "loss": 46.0, - "step": 42646 - }, - { - "epoch": 3.2606609706214043, - "grad_norm": 0.004590482451021671, - "learning_rate": 0.00019999475586075414, - "loss": 46.0, - "step": 42647 - }, - { - "epoch": 3.260737427604794, - "grad_norm": 0.0035140204709023237, - "learning_rate": 0.00019999475561476336, - "loss": 46.0, - "step": 42648 - }, - { - "epoch": 3.2608138845881838, - "grad_norm": 0.002837889827787876, - "learning_rate": 0.00019999475536876677, - "loss": 46.0, - "step": 42649 - }, - { - "epoch": 3.2608903415715735, - "grad_norm": 0.010890967212617397, - "learning_rate": 0.00019999475512276442, - "loss": 46.0, - "step": 42650 - }, - { - "epoch": 3.260966798554963, - "grad_norm": 0.002530425088480115, - "learning_rate": 0.00019999475487675631, - "loss": 46.0, - "step": 42651 - }, - { - "epoch": 3.2610432555383526, - "grad_norm": 0.001641754643060267, - "learning_rate": 0.00019999475463074244, - "loss": 46.0, - "step": 42652 - }, - { - "epoch": 3.2611197125217424, - "grad_norm": 0.0018559409072622657, - "learning_rate": 0.0001999947543847228, - "loss": 46.0, - "step": 42653 - }, - { - "epoch": 3.261196169505132, - "grad_norm": 0.0010790321975946426, - "learning_rate": 0.00019999475413869737, - "loss": 46.0, - "step": 42654 - }, - { - "epoch": 3.261272626488522, - "grad_norm": 0.001097700442187488, - "learning_rate": 0.0001999947538926662, - "loss": 46.0, - "step": 42655 - }, - { - "epoch": 3.2613490834719117, - "grad_norm": 0.0020291018299758434, - "learning_rate": 0.00019999475364662926, - "loss": 46.0, - "step": 42656 - }, - { - "epoch": 3.2614255404553014, - "grad_norm": 0.002114082220941782, - "learning_rate": 0.00019999475340058652, - "loss": 46.0, - "step": 42657 - }, - { - "epoch": 3.261501997438691, - "grad_norm": 0.0020981209818273783, - "learning_rate": 0.00019999475315453803, - "loss": 46.0, - "step": 42658 - }, - { - "epoch": 3.261578454422081, - "grad_norm": 0.008384820073843002, - "learning_rate": 0.00019999475290848374, - "loss": 46.0, - "step": 42659 - }, - { - "epoch": 3.2616549114054703, - "grad_norm": 0.0018982775509357452, - "learning_rate": 0.00019999475266242373, - "loss": 46.0, - "step": 42660 - }, - { - "epoch": 3.26173136838886, - "grad_norm": 0.0015684940153732896, - "learning_rate": 0.00019999475241635795, - "loss": 46.0, - "step": 42661 - }, - { - "epoch": 3.26180782537225, - "grad_norm": 0.0008714738069102168, - "learning_rate": 0.00019999475217028637, - "loss": 46.0, - "step": 42662 - }, - { - "epoch": 3.2618842823556395, - "grad_norm": 0.0014482878614217043, - "learning_rate": 0.000199994751924209, - "loss": 46.0, - "step": 42663 - }, - { - "epoch": 3.2619607393390293, - "grad_norm": 0.0006008433410897851, - "learning_rate": 0.0001999947516781259, - "loss": 46.0, - "step": 42664 - }, - { - "epoch": 3.262037196322419, - "grad_norm": 0.001276198890991509, - "learning_rate": 0.00019999475143203703, - "loss": 46.0, - "step": 42665 - }, - { - "epoch": 3.262113653305809, - "grad_norm": 0.0033647785894572735, - "learning_rate": 0.00019999475118594238, - "loss": 46.0, - "step": 42666 - }, - { - "epoch": 3.2621901102891986, - "grad_norm": 0.00391007773578167, - "learning_rate": 0.00019999475093984196, - "loss": 46.0, - "step": 42667 - }, - { - "epoch": 3.2622665672725883, - "grad_norm": 0.0030895096715539694, - "learning_rate": 0.0001999947506937358, - "loss": 46.0, - "step": 42668 - }, - { - "epoch": 3.262343024255978, - "grad_norm": 0.0021707257255911827, - "learning_rate": 0.00019999475044762383, - "loss": 46.0, - "step": 42669 - }, - { - "epoch": 3.262419481239368, - "grad_norm": 0.0015484520699828863, - "learning_rate": 0.0001999947502015061, - "loss": 46.0, - "step": 42670 - }, - { - "epoch": 3.2624959382227576, - "grad_norm": 0.0010763694299384952, - "learning_rate": 0.00019999474995538262, - "loss": 46.0, - "step": 42671 - }, - { - "epoch": 3.2625723952061474, - "grad_norm": 0.0012754467315971851, - "learning_rate": 0.00019999474970925336, - "loss": 46.0, - "step": 42672 - }, - { - "epoch": 3.2626488521895367, - "grad_norm": 0.0011959876865148544, - "learning_rate": 0.00019999474946311833, - "loss": 46.0, - "step": 42673 - }, - { - "epoch": 3.2627253091729265, - "grad_norm": 0.002192129846662283, - "learning_rate": 0.00019999474921697754, - "loss": 46.0, - "step": 42674 - }, - { - "epoch": 3.2628017661563162, - "grad_norm": 0.001396198756992817, - "learning_rate": 0.00019999474897083096, - "loss": 46.0, - "step": 42675 - }, - { - "epoch": 3.262878223139706, - "grad_norm": 0.0011704524513334036, - "learning_rate": 0.00019999474872467863, - "loss": 46.0, - "step": 42676 - }, - { - "epoch": 3.2629546801230958, - "grad_norm": 0.0010957523481920362, - "learning_rate": 0.0001999947484785205, - "loss": 46.0, - "step": 42677 - }, - { - "epoch": 3.2630311371064855, - "grad_norm": 0.0011550318449735641, - "learning_rate": 0.00019999474823235663, - "loss": 46.0, - "step": 42678 - }, - { - "epoch": 3.2631075940898753, - "grad_norm": 0.004079408943653107, - "learning_rate": 0.000199994747986187, - "loss": 46.0, - "step": 42679 - }, - { - "epoch": 3.263184051073265, - "grad_norm": 0.009575429372489452, - "learning_rate": 0.00019999474774001156, - "loss": 46.0, - "step": 42680 - }, - { - "epoch": 3.263260508056655, - "grad_norm": 0.003662359667941928, - "learning_rate": 0.0001999947474938304, - "loss": 46.0, - "step": 42681 - }, - { - "epoch": 3.263336965040044, - "grad_norm": 0.0032211500220000744, - "learning_rate": 0.00019999474724764345, - "loss": 46.0, - "step": 42682 - }, - { - "epoch": 3.263413422023434, - "grad_norm": 0.0005939346738159657, - "learning_rate": 0.0001999947470014507, - "loss": 46.0, - "step": 42683 - }, - { - "epoch": 3.2634898790068236, - "grad_norm": 0.0034532295539975166, - "learning_rate": 0.00019999474675525222, - "loss": 46.0, - "step": 42684 - }, - { - "epoch": 3.2635663359902134, - "grad_norm": 0.0016830003587529063, - "learning_rate": 0.00019999474650904796, - "loss": 46.0, - "step": 42685 - }, - { - "epoch": 3.263642792973603, - "grad_norm": 0.0014946593437343836, - "learning_rate": 0.00019999474626283795, - "loss": 46.0, - "step": 42686 - }, - { - "epoch": 3.263719249956993, - "grad_norm": 0.001080077257938683, - "learning_rate": 0.00019999474601662214, - "loss": 46.0, - "step": 42687 - }, - { - "epoch": 3.2637957069403827, - "grad_norm": 0.0014632137026637793, - "learning_rate": 0.00019999474577040056, - "loss": 46.0, - "step": 42688 - }, - { - "epoch": 3.2638721639237724, - "grad_norm": 0.0026226257905364037, - "learning_rate": 0.00019999474552417324, - "loss": 46.0, - "step": 42689 - }, - { - "epoch": 3.263948620907162, - "grad_norm": 0.0017646170454099774, - "learning_rate": 0.00019999474527794013, - "loss": 46.0, - "step": 42690 - }, - { - "epoch": 3.264025077890552, - "grad_norm": 0.008362078107893467, - "learning_rate": 0.00019999474503170126, - "loss": 46.0, - "step": 42691 - }, - { - "epoch": 3.2641015348739417, - "grad_norm": 0.0021672456059604883, - "learning_rate": 0.0001999947447854566, - "loss": 46.0, - "step": 42692 - }, - { - "epoch": 3.2641779918573315, - "grad_norm": 0.0009663395467214286, - "learning_rate": 0.00019999474453920622, - "loss": 46.0, - "step": 42693 - }, - { - "epoch": 3.264254448840721, - "grad_norm": 0.0022803430911153555, - "learning_rate": 0.00019999474429295002, - "loss": 46.0, - "step": 42694 - }, - { - "epoch": 3.2643309058241106, - "grad_norm": 0.0022487312089651823, - "learning_rate": 0.00019999474404668805, - "loss": 46.0, - "step": 42695 - }, - { - "epoch": 3.2644073628075003, - "grad_norm": 0.0052771600894629955, - "learning_rate": 0.00019999474380042034, - "loss": 46.0, - "step": 42696 - }, - { - "epoch": 3.26448381979089, - "grad_norm": 0.0023920375388115644, - "learning_rate": 0.00019999474355414685, - "loss": 46.0, - "step": 42697 - }, - { - "epoch": 3.26456027677428, - "grad_norm": 0.0010859811445698142, - "learning_rate": 0.0001999947433078676, - "loss": 46.0, - "step": 42698 - }, - { - "epoch": 3.2646367337576696, - "grad_norm": 0.0018826827872544527, - "learning_rate": 0.00019999474306158256, - "loss": 46.0, - "step": 42699 - }, - { - "epoch": 3.2647131907410594, - "grad_norm": 0.0019581117667257786, - "learning_rate": 0.00019999474281529178, - "loss": 46.0, - "step": 42700 - }, - { - "epoch": 3.264789647724449, - "grad_norm": 0.0016367704374715686, - "learning_rate": 0.0001999947425689952, - "loss": 46.0, - "step": 42701 - }, - { - "epoch": 3.264866104707839, - "grad_norm": 0.002849506912752986, - "learning_rate": 0.00019999474232269287, - "loss": 46.0, - "step": 42702 - }, - { - "epoch": 3.2649425616912287, - "grad_norm": 0.004920949228107929, - "learning_rate": 0.00019999474207638477, - "loss": 46.0, - "step": 42703 - }, - { - "epoch": 3.265019018674618, - "grad_norm": 0.0016969614662230015, - "learning_rate": 0.0001999947418300709, - "loss": 46.0, - "step": 42704 - }, - { - "epoch": 3.2650954756580077, - "grad_norm": 0.003259900491684675, - "learning_rate": 0.00019999474158375124, - "loss": 46.0, - "step": 42705 - }, - { - "epoch": 3.2651719326413975, - "grad_norm": 0.003804669948294759, - "learning_rate": 0.00019999474133742582, - "loss": 46.0, - "step": 42706 - }, - { - "epoch": 3.2652483896247873, - "grad_norm": 0.0024107638746500015, - "learning_rate": 0.00019999474109109463, - "loss": 46.0, - "step": 42707 - }, - { - "epoch": 3.265324846608177, - "grad_norm": 0.00541957002133131, - "learning_rate": 0.0001999947408447577, - "loss": 46.0, - "step": 42708 - }, - { - "epoch": 3.2654013035915668, - "grad_norm": 0.004622437059879303, - "learning_rate": 0.00019999474059841497, - "loss": 46.0, - "step": 42709 - }, - { - "epoch": 3.2654777605749565, - "grad_norm": 0.002149523701518774, - "learning_rate": 0.0001999947403520665, - "loss": 46.0, - "step": 42710 - }, - { - "epoch": 3.2655542175583463, - "grad_norm": 0.0023876260966062546, - "learning_rate": 0.00019999474010571225, - "loss": 46.0, - "step": 42711 - }, - { - "epoch": 3.265630674541736, - "grad_norm": 0.001868357416242361, - "learning_rate": 0.0001999947398593522, - "loss": 46.0, - "step": 42712 - }, - { - "epoch": 3.265707131525126, - "grad_norm": 0.0023764839861541986, - "learning_rate": 0.0001999947396129864, - "loss": 46.0, - "step": 42713 - }, - { - "epoch": 3.2657835885085156, - "grad_norm": 0.007495502475649118, - "learning_rate": 0.00019999473936661483, - "loss": 46.0, - "step": 42714 - }, - { - "epoch": 3.2658600454919053, - "grad_norm": 0.0009260806255042553, - "learning_rate": 0.00019999473912023753, - "loss": 46.0, - "step": 42715 - }, - { - "epoch": 3.2659365024752947, - "grad_norm": 0.002886514412239194, - "learning_rate": 0.0001999947388738544, - "loss": 46.0, - "step": 42716 - }, - { - "epoch": 3.2660129594586844, - "grad_norm": 0.003133345628157258, - "learning_rate": 0.00019999473862746553, - "loss": 46.0, - "step": 42717 - }, - { - "epoch": 3.266089416442074, - "grad_norm": 0.0025711958296597004, - "learning_rate": 0.00019999473838107088, - "loss": 46.0, - "step": 42718 - }, - { - "epoch": 3.266165873425464, - "grad_norm": 0.00218483479693532, - "learning_rate": 0.00019999473813467046, - "loss": 46.0, - "step": 42719 - }, - { - "epoch": 3.2662423304088537, - "grad_norm": 0.0030936570838093758, - "learning_rate": 0.0001999947378882643, - "loss": 46.0, - "step": 42720 - }, - { - "epoch": 3.2663187873922435, - "grad_norm": 0.001858058269135654, - "learning_rate": 0.00019999473764185235, - "loss": 46.0, - "step": 42721 - }, - { - "epoch": 3.2663952443756332, - "grad_norm": 0.002769799204543233, - "learning_rate": 0.00019999473739543464, - "loss": 46.0, - "step": 42722 - }, - { - "epoch": 3.266471701359023, - "grad_norm": 0.0027657856699079275, - "learning_rate": 0.00019999473714901113, - "loss": 46.0, - "step": 42723 - }, - { - "epoch": 3.2665481583424127, - "grad_norm": 0.0022426487412303686, - "learning_rate": 0.0001999947369025819, - "loss": 46.0, - "step": 42724 - }, - { - "epoch": 3.2666246153258025, - "grad_norm": 0.0014258448500186205, - "learning_rate": 0.00019999473665614686, - "loss": 46.0, - "step": 42725 - }, - { - "epoch": 3.266701072309192, - "grad_norm": 0.0010731976944953203, - "learning_rate": 0.00019999473640970608, - "loss": 46.0, - "step": 42726 - }, - { - "epoch": 3.2667775292925816, - "grad_norm": 0.0023060431703925133, - "learning_rate": 0.0001999947361632595, - "loss": 46.0, - "step": 42727 - }, - { - "epoch": 3.2668539862759713, - "grad_norm": 0.0006291406461969018, - "learning_rate": 0.00019999473591680714, - "loss": 46.0, - "step": 42728 - }, - { - "epoch": 3.266930443259361, - "grad_norm": 0.002645375905558467, - "learning_rate": 0.00019999473567034904, - "loss": 46.0, - "step": 42729 - }, - { - "epoch": 3.267006900242751, - "grad_norm": 0.0017835797043517232, - "learning_rate": 0.00019999473542388517, - "loss": 46.0, - "step": 42730 - }, - { - "epoch": 3.2670833572261406, - "grad_norm": 0.001698076375760138, - "learning_rate": 0.00019999473517741555, - "loss": 46.0, - "step": 42731 - }, - { - "epoch": 3.2671598142095304, - "grad_norm": 0.0013392699183896184, - "learning_rate": 0.00019999473493094013, - "loss": 46.0, - "step": 42732 - }, - { - "epoch": 3.26723627119292, - "grad_norm": 0.002247408265247941, - "learning_rate": 0.00019999473468445896, - "loss": 46.0, - "step": 42733 - }, - { - "epoch": 3.26731272817631, - "grad_norm": 0.0038942538667470217, - "learning_rate": 0.000199994734437972, - "loss": 46.0, - "step": 42734 - }, - { - "epoch": 3.2673891851596997, - "grad_norm": 0.0029028940480202436, - "learning_rate": 0.00019999473419147928, - "loss": 46.0, - "step": 42735 - }, - { - "epoch": 3.2674656421430894, - "grad_norm": 0.0030094278044998646, - "learning_rate": 0.0001999947339449808, - "loss": 46.0, - "step": 42736 - }, - { - "epoch": 3.267542099126479, - "grad_norm": 0.0015174256404861808, - "learning_rate": 0.00019999473369847654, - "loss": 46.0, - "step": 42737 - }, - { - "epoch": 3.2676185561098685, - "grad_norm": 0.001444027409888804, - "learning_rate": 0.0001999947334519665, - "loss": 46.0, - "step": 42738 - }, - { - "epoch": 3.2676950130932583, - "grad_norm": 0.00046260232920758426, - "learning_rate": 0.00019999473320545072, - "loss": 46.0, - "step": 42739 - }, - { - "epoch": 3.267771470076648, - "grad_norm": 0.001998119056224823, - "learning_rate": 0.00019999473295892917, - "loss": 46.0, - "step": 42740 - }, - { - "epoch": 3.267847927060038, - "grad_norm": 0.0013875464210286736, - "learning_rate": 0.00019999473271240184, - "loss": 46.0, - "step": 42741 - }, - { - "epoch": 3.2679243840434276, - "grad_norm": 0.0012464553583413363, - "learning_rate": 0.00019999473246586872, - "loss": 46.0, - "step": 42742 - }, - { - "epoch": 3.2680008410268173, - "grad_norm": 0.0023628019262105227, - "learning_rate": 0.00019999473221932984, - "loss": 46.0, - "step": 42743 - }, - { - "epoch": 3.268077298010207, - "grad_norm": 0.007078080903738737, - "learning_rate": 0.00019999473197278522, - "loss": 46.0, - "step": 42744 - }, - { - "epoch": 3.268153754993597, - "grad_norm": 0.0017808827105909586, - "learning_rate": 0.00019999473172623483, - "loss": 46.0, - "step": 42745 - }, - { - "epoch": 3.2682302119769866, - "grad_norm": 0.0028013356495648623, - "learning_rate": 0.0001999947314796786, - "loss": 46.0, - "step": 42746 - }, - { - "epoch": 3.2683066689603764, - "grad_norm": 0.002043115207925439, - "learning_rate": 0.0001999947312331167, - "loss": 46.0, - "step": 42747 - }, - { - "epoch": 3.2683831259437657, - "grad_norm": 0.0028173988685011864, - "learning_rate": 0.00019999473098654896, - "loss": 46.0, - "step": 42748 - }, - { - "epoch": 3.2684595829271554, - "grad_norm": 0.005193495657294989, - "learning_rate": 0.00019999473073997547, - "loss": 46.0, - "step": 42749 - }, - { - "epoch": 3.268536039910545, - "grad_norm": 0.0007028752588666975, - "learning_rate": 0.00019999473049339624, - "loss": 46.0, - "step": 42750 - }, - { - "epoch": 3.268612496893935, - "grad_norm": 0.0017139507690444589, - "learning_rate": 0.0001999947302468112, - "loss": 46.0, - "step": 42751 - }, - { - "epoch": 3.2686889538773247, - "grad_norm": 0.0011305786902084947, - "learning_rate": 0.00019999473000022043, - "loss": 46.0, - "step": 42752 - }, - { - "epoch": 3.2687654108607145, - "grad_norm": 0.005871754139661789, - "learning_rate": 0.00019999472975362385, - "loss": 46.0, - "step": 42753 - }, - { - "epoch": 3.2688418678441042, - "grad_norm": 0.0008038533851504326, - "learning_rate": 0.00019999472950702152, - "loss": 46.0, - "step": 42754 - }, - { - "epoch": 3.268918324827494, - "grad_norm": 0.0012331573525443673, - "learning_rate": 0.0001999947292604134, - "loss": 46.0, - "step": 42755 - }, - { - "epoch": 3.2689947818108838, - "grad_norm": 0.0021019268315285444, - "learning_rate": 0.00019999472901379955, - "loss": 46.0, - "step": 42756 - }, - { - "epoch": 3.2690712387942735, - "grad_norm": 0.0007939172792248428, - "learning_rate": 0.0001999947287671799, - "loss": 46.0, - "step": 42757 - }, - { - "epoch": 3.2691476957776633, - "grad_norm": 0.001801857608370483, - "learning_rate": 0.0001999947285205545, - "loss": 46.0, - "step": 42758 - }, - { - "epoch": 3.269224152761053, - "grad_norm": 0.0017656749114394188, - "learning_rate": 0.00019999472827392331, - "loss": 46.0, - "step": 42759 - }, - { - "epoch": 3.2693006097444424, - "grad_norm": 0.0013188837328925729, - "learning_rate": 0.00019999472802728637, - "loss": 46.0, - "step": 42760 - }, - { - "epoch": 3.269377066727832, - "grad_norm": 0.0014837241033092141, - "learning_rate": 0.00019999472778064366, - "loss": 46.0, - "step": 42761 - }, - { - "epoch": 3.269453523711222, - "grad_norm": 0.001947373035363853, - "learning_rate": 0.00019999472753399518, - "loss": 46.0, - "step": 42762 - }, - { - "epoch": 3.2695299806946116, - "grad_norm": 0.0019918896723538637, - "learning_rate": 0.00019999472728734092, - "loss": 46.0, - "step": 42763 - }, - { - "epoch": 3.2696064376780014, - "grad_norm": 0.002146610291674733, - "learning_rate": 0.00019999472704068088, - "loss": 46.0, - "step": 42764 - }, - { - "epoch": 3.269682894661391, - "grad_norm": 0.0020708097144961357, - "learning_rate": 0.0001999947267940151, - "loss": 46.0, - "step": 42765 - }, - { - "epoch": 3.269759351644781, - "grad_norm": 0.00236993795260787, - "learning_rate": 0.00019999472654734355, - "loss": 46.0, - "step": 42766 - }, - { - "epoch": 3.2698358086281707, - "grad_norm": 0.0014317125314846635, - "learning_rate": 0.0001999947263006662, - "loss": 46.0, - "step": 42767 - }, - { - "epoch": 3.2699122656115605, - "grad_norm": 0.011325960047543049, - "learning_rate": 0.0001999947260539831, - "loss": 46.0, - "step": 42768 - }, - { - "epoch": 3.2699887225949498, - "grad_norm": 0.001926047378219664, - "learning_rate": 0.00019999472580729423, - "loss": 46.0, - "step": 42769 - }, - { - "epoch": 3.2700651795783395, - "grad_norm": 0.0015121811302378774, - "learning_rate": 0.0001999947255605996, - "loss": 46.0, - "step": 42770 - }, - { - "epoch": 3.2701416365617293, - "grad_norm": 0.0011549246264621615, - "learning_rate": 0.0001999947253138992, - "loss": 46.0, - "step": 42771 - }, - { - "epoch": 3.270218093545119, - "grad_norm": 0.0034781082067638636, - "learning_rate": 0.00019999472506719302, - "loss": 46.0, - "step": 42772 - }, - { - "epoch": 3.270294550528509, - "grad_norm": 0.004090859554708004, - "learning_rate": 0.00019999472482048108, - "loss": 46.0, - "step": 42773 - }, - { - "epoch": 3.2703710075118986, - "grad_norm": 0.0016107696574181318, - "learning_rate": 0.00019999472457376337, - "loss": 46.0, - "step": 42774 - }, - { - "epoch": 3.2704474644952883, - "grad_norm": 0.0051518771797418594, - "learning_rate": 0.00019999472432703989, - "loss": 46.0, - "step": 42775 - }, - { - "epoch": 3.270523921478678, - "grad_norm": 0.003394344123080373, - "learning_rate": 0.00019999472408031063, - "loss": 46.0, - "step": 42776 - }, - { - "epoch": 3.270600378462068, - "grad_norm": 0.002992049092426896, - "learning_rate": 0.0001999947238335756, - "loss": 46.0, - "step": 42777 - }, - { - "epoch": 3.2706768354454576, - "grad_norm": 0.0029316695872694254, - "learning_rate": 0.00019999472358683481, - "loss": 46.0, - "step": 42778 - }, - { - "epoch": 3.2707532924288474, - "grad_norm": 0.0021703671663999557, - "learning_rate": 0.00019999472334008824, - "loss": 46.0, - "step": 42779 - }, - { - "epoch": 3.270829749412237, - "grad_norm": 0.003651489969342947, - "learning_rate": 0.00019999472309333594, - "loss": 46.0, - "step": 42780 - }, - { - "epoch": 3.270906206395627, - "grad_norm": 0.0027810169849544764, - "learning_rate": 0.0001999947228465778, - "loss": 46.0, - "step": 42781 - }, - { - "epoch": 3.270982663379016, - "grad_norm": 0.0030302575323730707, - "learning_rate": 0.00019999472259981397, - "loss": 46.0, - "step": 42782 - }, - { - "epoch": 3.271059120362406, - "grad_norm": 0.001487372675910592, - "learning_rate": 0.00019999472235304432, - "loss": 46.0, - "step": 42783 - }, - { - "epoch": 3.2711355773457957, - "grad_norm": 0.0013298442354425788, - "learning_rate": 0.0001999947221062689, - "loss": 46.0, - "step": 42784 - }, - { - "epoch": 3.2712120343291855, - "grad_norm": 0.0004484287928789854, - "learning_rate": 0.00019999472185948774, - "loss": 46.0, - "step": 42785 - }, - { - "epoch": 3.2712884913125753, - "grad_norm": 0.002169565763324499, - "learning_rate": 0.0001999947216127008, - "loss": 46.0, - "step": 42786 - }, - { - "epoch": 3.271364948295965, - "grad_norm": 0.0016888410318642855, - "learning_rate": 0.00019999472136590808, - "loss": 46.0, - "step": 42787 - }, - { - "epoch": 3.271441405279355, - "grad_norm": 0.0008050664910115302, - "learning_rate": 0.0001999947211191096, - "loss": 46.0, - "step": 42788 - }, - { - "epoch": 3.2715178622627445, - "grad_norm": 0.005234782118350267, - "learning_rate": 0.00019999472087230534, - "loss": 46.0, - "step": 42789 - }, - { - "epoch": 3.2715943192461343, - "grad_norm": 0.003743865294381976, - "learning_rate": 0.00019999472062549533, - "loss": 46.0, - "step": 42790 - }, - { - "epoch": 3.2716707762295236, - "grad_norm": 0.0015106243081390858, - "learning_rate": 0.00019999472037867953, - "loss": 46.0, - "step": 42791 - }, - { - "epoch": 3.2717472332129134, - "grad_norm": 0.0017595445970073342, - "learning_rate": 0.00019999472013185798, - "loss": 46.0, - "step": 42792 - }, - { - "epoch": 3.271823690196303, - "grad_norm": 0.001041025621816516, - "learning_rate": 0.00019999471988503065, - "loss": 46.0, - "step": 42793 - }, - { - "epoch": 3.271900147179693, - "grad_norm": 0.0046062287874519825, - "learning_rate": 0.00019999471963819753, - "loss": 46.0, - "step": 42794 - }, - { - "epoch": 3.2719766041630827, - "grad_norm": 0.0010785041376948357, - "learning_rate": 0.00019999471939135868, - "loss": 46.0, - "step": 42795 - }, - { - "epoch": 3.2720530611464724, - "grad_norm": 0.0014233095571398735, - "learning_rate": 0.00019999471914451404, - "loss": 46.0, - "step": 42796 - }, - { - "epoch": 3.272129518129862, - "grad_norm": 0.0016641061520203948, - "learning_rate": 0.00019999471889766364, - "loss": 46.0, - "step": 42797 - }, - { - "epoch": 3.272205975113252, - "grad_norm": 0.004447747487574816, - "learning_rate": 0.00019999471865080745, - "loss": 46.0, - "step": 42798 - }, - { - "epoch": 3.2722824320966417, - "grad_norm": 0.0005159921711310744, - "learning_rate": 0.00019999471840394551, - "loss": 46.0, - "step": 42799 - }, - { - "epoch": 3.2723588890800315, - "grad_norm": 0.0025304376613348722, - "learning_rate": 0.00019999471815707783, - "loss": 46.0, - "step": 42800 - }, - { - "epoch": 3.2724353460634212, - "grad_norm": 0.001097582164220512, - "learning_rate": 0.00019999471791020432, - "loss": 46.0, - "step": 42801 - }, - { - "epoch": 3.272511803046811, - "grad_norm": 0.001467219553887844, - "learning_rate": 0.00019999471766332506, - "loss": 46.0, - "step": 42802 - }, - { - "epoch": 3.2725882600302008, - "grad_norm": 0.0024594292044639587, - "learning_rate": 0.00019999471741644005, - "loss": 46.0, - "step": 42803 - }, - { - "epoch": 3.27266471701359, - "grad_norm": 0.0033360703382641077, - "learning_rate": 0.00019999471716954928, - "loss": 46.0, - "step": 42804 - }, - { - "epoch": 3.27274117399698, - "grad_norm": 0.0020716842263936996, - "learning_rate": 0.0001999947169226527, - "loss": 46.0, - "step": 42805 - }, - { - "epoch": 3.2728176309803696, - "grad_norm": 0.003088155761361122, - "learning_rate": 0.00019999471667575037, - "loss": 46.0, - "step": 42806 - }, - { - "epoch": 3.2728940879637594, - "grad_norm": 0.0012038134736940265, - "learning_rate": 0.00019999471642884227, - "loss": 46.0, - "step": 42807 - }, - { - "epoch": 3.272970544947149, - "grad_norm": 0.0037069725804030895, - "learning_rate": 0.00019999471618192843, - "loss": 46.0, - "step": 42808 - }, - { - "epoch": 3.273047001930539, - "grad_norm": 0.001407202915288508, - "learning_rate": 0.00019999471593500878, - "loss": 46.0, - "step": 42809 - }, - { - "epoch": 3.2731234589139286, - "grad_norm": 0.006372368428856134, - "learning_rate": 0.0001999947156880834, - "loss": 46.0, - "step": 42810 - }, - { - "epoch": 3.2731999158973184, - "grad_norm": 0.002292105695232749, - "learning_rate": 0.0001999947154411522, - "loss": 46.0, - "step": 42811 - }, - { - "epoch": 3.273276372880708, - "grad_norm": 0.002656138502061367, - "learning_rate": 0.00019999471519421526, - "loss": 46.0, - "step": 42812 - }, - { - "epoch": 3.2733528298640975, - "grad_norm": 0.004129037726670504, - "learning_rate": 0.00019999471494727255, - "loss": 46.0, - "step": 42813 - }, - { - "epoch": 3.2734292868474872, - "grad_norm": 0.0010024503571912646, - "learning_rate": 0.00019999471470032407, - "loss": 46.0, - "step": 42814 - }, - { - "epoch": 3.273505743830877, - "grad_norm": 0.0014185491017997265, - "learning_rate": 0.0001999947144533698, - "loss": 46.0, - "step": 42815 - }, - { - "epoch": 3.2735822008142668, - "grad_norm": 0.0015643941005691886, - "learning_rate": 0.0001999947142064098, - "loss": 46.0, - "step": 42816 - }, - { - "epoch": 3.2736586577976565, - "grad_norm": 0.002996225608512759, - "learning_rate": 0.00019999471395944402, - "loss": 46.0, - "step": 42817 - }, - { - "epoch": 3.2737351147810463, - "grad_norm": 0.001086618285626173, - "learning_rate": 0.00019999471371247247, - "loss": 46.0, - "step": 42818 - }, - { - "epoch": 3.273811571764436, - "grad_norm": 0.0010692798532545567, - "learning_rate": 0.00019999471346549515, - "loss": 46.0, - "step": 42819 - }, - { - "epoch": 3.273888028747826, - "grad_norm": 0.004911097232252359, - "learning_rate": 0.00019999471321851205, - "loss": 46.0, - "step": 42820 - }, - { - "epoch": 3.2739644857312156, - "grad_norm": 0.001528770080767572, - "learning_rate": 0.00019999471297152318, - "loss": 46.0, - "step": 42821 - }, - { - "epoch": 3.2740409427146053, - "grad_norm": 0.0030985355842858553, - "learning_rate": 0.00019999471272452853, - "loss": 46.0, - "step": 42822 - }, - { - "epoch": 3.274117399697995, - "grad_norm": 0.0020407792180776596, - "learning_rate": 0.00019999471247752814, - "loss": 46.0, - "step": 42823 - }, - { - "epoch": 3.274193856681385, - "grad_norm": 0.0012985669309273362, - "learning_rate": 0.00019999471223052195, - "loss": 46.0, - "step": 42824 - }, - { - "epoch": 3.274270313664774, - "grad_norm": 0.0006892292876727879, - "learning_rate": 0.00019999471198351004, - "loss": 46.0, - "step": 42825 - }, - { - "epoch": 3.274346770648164, - "grad_norm": 0.003025229088962078, - "learning_rate": 0.0001999947117364923, - "loss": 46.0, - "step": 42826 - }, - { - "epoch": 3.2744232276315537, - "grad_norm": 0.0018115270650014281, - "learning_rate": 0.00019999471148946882, - "loss": 46.0, - "step": 42827 - }, - { - "epoch": 3.2744996846149435, - "grad_norm": 0.00709062023088336, - "learning_rate": 0.0001999947112424396, - "loss": 46.0, - "step": 42828 - }, - { - "epoch": 3.274576141598333, - "grad_norm": 0.0016407400835305452, - "learning_rate": 0.00019999471099540456, - "loss": 46.0, - "step": 42829 - }, - { - "epoch": 3.274652598581723, - "grad_norm": 0.006136324256658554, - "learning_rate": 0.00019999471074836378, - "loss": 46.0, - "step": 42830 - }, - { - "epoch": 3.2747290555651127, - "grad_norm": 0.002394117182120681, - "learning_rate": 0.00019999471050131723, - "loss": 46.0, - "step": 42831 - }, - { - "epoch": 3.2748055125485025, - "grad_norm": 0.003427214687690139, - "learning_rate": 0.00019999471025426488, - "loss": 46.0, - "step": 42832 - }, - { - "epoch": 3.2748819695318923, - "grad_norm": 0.0014828969724476337, - "learning_rate": 0.0001999947100072068, - "loss": 46.0, - "step": 42833 - }, - { - "epoch": 3.274958426515282, - "grad_norm": 0.003951503429561853, - "learning_rate": 0.00019999470976014293, - "loss": 46.0, - "step": 42834 - }, - { - "epoch": 3.2750348834986713, - "grad_norm": 0.0011830911971628666, - "learning_rate": 0.00019999470951307332, - "loss": 46.0, - "step": 42835 - }, - { - "epoch": 3.275111340482061, - "grad_norm": 0.0014231910463422537, - "learning_rate": 0.0001999947092659979, - "loss": 46.0, - "step": 42836 - }, - { - "epoch": 3.275187797465451, - "grad_norm": 0.003736478043720126, - "learning_rate": 0.00019999470901891674, - "loss": 46.0, - "step": 42837 - }, - { - "epoch": 3.2752642544488406, - "grad_norm": 0.005280000157654285, - "learning_rate": 0.00019999470877182977, - "loss": 46.0, - "step": 42838 - }, - { - "epoch": 3.2753407114322304, - "grad_norm": 0.004943507723510265, - "learning_rate": 0.00019999470852473706, - "loss": 46.0, - "step": 42839 - }, - { - "epoch": 3.27541716841562, - "grad_norm": 0.0012493549147620797, - "learning_rate": 0.0001999947082776386, - "loss": 46.0, - "step": 42840 - }, - { - "epoch": 3.27549362539901, - "grad_norm": 0.0021634967997670174, - "learning_rate": 0.00019999470803053435, - "loss": 46.0, - "step": 42841 - }, - { - "epoch": 3.2755700823823997, - "grad_norm": 0.0015853960067033768, - "learning_rate": 0.00019999470778342434, - "loss": 46.0, - "step": 42842 - }, - { - "epoch": 3.2756465393657894, - "grad_norm": 0.0019422424957156181, - "learning_rate": 0.00019999470753630856, - "loss": 46.0, - "step": 42843 - }, - { - "epoch": 3.275722996349179, - "grad_norm": 0.0022043425124138594, - "learning_rate": 0.000199994707289187, - "loss": 46.0, - "step": 42844 - }, - { - "epoch": 3.275799453332569, - "grad_norm": 0.006030660588294268, - "learning_rate": 0.00019999470704205966, - "loss": 46.0, - "step": 42845 - }, - { - "epoch": 3.2758759103159587, - "grad_norm": 0.0009263867978006601, - "learning_rate": 0.0001999947067949266, - "loss": 46.0, - "step": 42846 - }, - { - "epoch": 3.275952367299348, - "grad_norm": 0.002589545212686062, - "learning_rate": 0.00019999470654778772, - "loss": 46.0, - "step": 42847 - }, - { - "epoch": 3.276028824282738, - "grad_norm": 0.0035401920322328806, - "learning_rate": 0.0001999947063006431, - "loss": 46.0, - "step": 42848 - }, - { - "epoch": 3.2761052812661275, - "grad_norm": 0.0013544629327952862, - "learning_rate": 0.00019999470605349269, - "loss": 46.0, - "step": 42849 - }, - { - "epoch": 3.2761817382495173, - "grad_norm": 0.003758753417059779, - "learning_rate": 0.0001999947058063365, - "loss": 46.0, - "step": 42850 - }, - { - "epoch": 3.276258195232907, - "grad_norm": 0.0010497358161956072, - "learning_rate": 0.00019999470555917456, - "loss": 46.0, - "step": 42851 - }, - { - "epoch": 3.276334652216297, - "grad_norm": 0.0034277932718396187, - "learning_rate": 0.00019999470531200685, - "loss": 46.0, - "step": 42852 - }, - { - "epoch": 3.2764111091996866, - "grad_norm": 0.005429920740425587, - "learning_rate": 0.00019999470506483336, - "loss": 46.0, - "step": 42853 - }, - { - "epoch": 3.2764875661830763, - "grad_norm": 0.003476017387583852, - "learning_rate": 0.00019999470481765413, - "loss": 46.0, - "step": 42854 - }, - { - "epoch": 3.276564023166466, - "grad_norm": 0.0038299558218568563, - "learning_rate": 0.00019999470457046913, - "loss": 46.0, - "step": 42855 - }, - { - "epoch": 3.276640480149856, - "grad_norm": 0.004274279344826937, - "learning_rate": 0.00019999470432327833, - "loss": 46.0, - "step": 42856 - }, - { - "epoch": 3.276716937133245, - "grad_norm": 0.0011316942982375622, - "learning_rate": 0.00019999470407608178, - "loss": 46.0, - "step": 42857 - }, - { - "epoch": 3.276793394116635, - "grad_norm": 0.001200154540129006, - "learning_rate": 0.00019999470382887945, - "loss": 46.0, - "step": 42858 - }, - { - "epoch": 3.2768698511000247, - "grad_norm": 0.0007008964894339442, - "learning_rate": 0.00019999470358167138, - "loss": 46.0, - "step": 42859 - }, - { - "epoch": 3.2769463080834145, - "grad_norm": 0.0013684829464182258, - "learning_rate": 0.0001999947033344575, - "loss": 46.0, - "step": 42860 - }, - { - "epoch": 3.2770227650668042, - "grad_norm": 0.00510263629257679, - "learning_rate": 0.00019999470308723787, - "loss": 46.0, - "step": 42861 - }, - { - "epoch": 3.277099222050194, - "grad_norm": 0.00334531394764781, - "learning_rate": 0.00019999470284001248, - "loss": 46.0, - "step": 42862 - }, - { - "epoch": 3.2771756790335838, - "grad_norm": 0.0015518802683800459, - "learning_rate": 0.00019999470259278131, - "loss": 46.0, - "step": 42863 - }, - { - "epoch": 3.2772521360169735, - "grad_norm": 0.0011197567218914628, - "learning_rate": 0.00019999470234554435, - "loss": 46.0, - "step": 42864 - }, - { - "epoch": 3.2773285930003633, - "grad_norm": 0.003087131306529045, - "learning_rate": 0.00019999470209830167, - "loss": 46.0, - "step": 42865 - }, - { - "epoch": 3.277405049983753, - "grad_norm": 0.0024691878352314234, - "learning_rate": 0.0001999947018510532, - "loss": 46.0, - "step": 42866 - }, - { - "epoch": 3.277481506967143, - "grad_norm": 0.002816775580868125, - "learning_rate": 0.00019999470160379893, - "loss": 46.0, - "step": 42867 - }, - { - "epoch": 3.2775579639505326, - "grad_norm": 0.001735563506372273, - "learning_rate": 0.00019999470135653892, - "loss": 46.0, - "step": 42868 - }, - { - "epoch": 3.277634420933922, - "grad_norm": 0.0023912061005830765, - "learning_rate": 0.00019999470110927315, - "loss": 46.0, - "step": 42869 - }, - { - "epoch": 3.2777108779173116, - "grad_norm": 0.002525065094232559, - "learning_rate": 0.0001999947008620016, - "loss": 46.0, - "step": 42870 - }, - { - "epoch": 3.2777873349007014, - "grad_norm": 0.0013745236210525036, - "learning_rate": 0.00019999470061472427, - "loss": 46.0, - "step": 42871 - }, - { - "epoch": 3.277863791884091, - "grad_norm": 0.0037984021473675966, - "learning_rate": 0.00019999470036744118, - "loss": 46.0, - "step": 42872 - }, - { - "epoch": 3.277940248867481, - "grad_norm": 0.0017467513680458069, - "learning_rate": 0.0001999947001201523, - "loss": 46.0, - "step": 42873 - }, - { - "epoch": 3.2780167058508707, - "grad_norm": 0.00536099448800087, - "learning_rate": 0.0001999946998728577, - "loss": 46.0, - "step": 42874 - }, - { - "epoch": 3.2780931628342604, - "grad_norm": 0.0040005166083574295, - "learning_rate": 0.00019999469962555727, - "loss": 46.0, - "step": 42875 - }, - { - "epoch": 3.27816961981765, - "grad_norm": 0.00365635147318244, - "learning_rate": 0.00019999469937825114, - "loss": 46.0, - "step": 42876 - }, - { - "epoch": 3.27824607680104, - "grad_norm": 0.0006848968914709985, - "learning_rate": 0.0001999946991309392, - "loss": 46.0, - "step": 42877 - }, - { - "epoch": 3.2783225337844297, - "grad_norm": 0.006137051619589329, - "learning_rate": 0.0001999946988836215, - "loss": 46.0, - "step": 42878 - }, - { - "epoch": 3.278398990767819, - "grad_norm": 0.0027530849911272526, - "learning_rate": 0.000199994698636298, - "loss": 46.0, - "step": 42879 - }, - { - "epoch": 3.278475447751209, - "grad_norm": 0.006251274608075619, - "learning_rate": 0.00019999469838896875, - "loss": 46.0, - "step": 42880 - }, - { - "epoch": 3.2785519047345986, - "grad_norm": 0.0039061664137989283, - "learning_rate": 0.00019999469814163375, - "loss": 46.0, - "step": 42881 - }, - { - "epoch": 3.2786283617179883, - "grad_norm": 0.0020890890154987574, - "learning_rate": 0.00019999469789429294, - "loss": 46.0, - "step": 42882 - }, - { - "epoch": 3.278704818701378, - "grad_norm": 0.0018786712316796184, - "learning_rate": 0.0001999946976469464, - "loss": 46.0, - "step": 42883 - }, - { - "epoch": 3.278781275684768, - "grad_norm": 0.002287887968122959, - "learning_rate": 0.0001999946973995941, - "loss": 46.0, - "step": 42884 - }, - { - "epoch": 3.2788577326681576, - "grad_norm": 0.001223283470608294, - "learning_rate": 0.000199994697152236, - "loss": 46.0, - "step": 42885 - }, - { - "epoch": 3.2789341896515474, - "grad_norm": 0.0014291051775217056, - "learning_rate": 0.00019999469690487213, - "loss": 46.0, - "step": 42886 - }, - { - "epoch": 3.279010646634937, - "grad_norm": 0.004400795791298151, - "learning_rate": 0.0001999946966575025, - "loss": 46.0, - "step": 42887 - }, - { - "epoch": 3.279087103618327, - "grad_norm": 0.004692132119089365, - "learning_rate": 0.00019999469641012713, - "loss": 46.0, - "step": 42888 - }, - { - "epoch": 3.2791635606017167, - "grad_norm": 0.0005094028310850263, - "learning_rate": 0.00019999469616274596, - "loss": 46.0, - "step": 42889 - }, - { - "epoch": 3.2792400175851064, - "grad_norm": 0.0022600230295211077, - "learning_rate": 0.000199994695915359, - "loss": 46.0, - "step": 42890 - }, - { - "epoch": 3.2793164745684957, - "grad_norm": 0.0023688492365181446, - "learning_rate": 0.00019999469566796632, - "loss": 46.0, - "step": 42891 - }, - { - "epoch": 3.2793929315518855, - "grad_norm": 0.0026851186994463205, - "learning_rate": 0.00019999469542056783, - "loss": 46.0, - "step": 42892 - }, - { - "epoch": 3.2794693885352753, - "grad_norm": 0.0016948075499385595, - "learning_rate": 0.00019999469517316358, - "loss": 46.0, - "step": 42893 - }, - { - "epoch": 3.279545845518665, - "grad_norm": 0.0008688248344697058, - "learning_rate": 0.00019999469492575358, - "loss": 46.0, - "step": 42894 - }, - { - "epoch": 3.2796223025020548, - "grad_norm": 0.004782327450811863, - "learning_rate": 0.0001999946946783378, - "loss": 46.0, - "step": 42895 - }, - { - "epoch": 3.2796987594854445, - "grad_norm": 0.00345169217325747, - "learning_rate": 0.00019999469443091625, - "loss": 46.0, - "step": 42896 - }, - { - "epoch": 3.2797752164688343, - "grad_norm": 0.0027687230613082647, - "learning_rate": 0.00019999469418348893, - "loss": 46.0, - "step": 42897 - }, - { - "epoch": 3.279851673452224, - "grad_norm": 0.005968145560473204, - "learning_rate": 0.00019999469393605583, - "loss": 46.0, - "step": 42898 - }, - { - "epoch": 3.279928130435614, - "grad_norm": 0.004532267339527607, - "learning_rate": 0.00019999469368861696, - "loss": 46.0, - "step": 42899 - }, - { - "epoch": 3.280004587419003, - "grad_norm": 0.002928745234385133, - "learning_rate": 0.00019999469344117237, - "loss": 46.0, - "step": 42900 - }, - { - "epoch": 3.280081044402393, - "grad_norm": 0.003344410564750433, - "learning_rate": 0.00019999469319372196, - "loss": 46.0, - "step": 42901 - }, - { - "epoch": 3.2801575013857827, - "grad_norm": 0.002041700528934598, - "learning_rate": 0.0001999946929462658, - "loss": 46.0, - "step": 42902 - }, - { - "epoch": 3.2802339583691724, - "grad_norm": 0.0010236079106107354, - "learning_rate": 0.00019999469269880383, - "loss": 46.0, - "step": 42903 - }, - { - "epoch": 3.280310415352562, - "grad_norm": 0.0013015087461099029, - "learning_rate": 0.00019999469245133615, - "loss": 46.0, - "step": 42904 - }, - { - "epoch": 3.280386872335952, - "grad_norm": 0.0018849613843485713, - "learning_rate": 0.0001999946922038627, - "loss": 46.0, - "step": 42905 - }, - { - "epoch": 3.2804633293193417, - "grad_norm": 0.0018751038005575538, - "learning_rate": 0.00019999469195638344, - "loss": 46.0, - "step": 42906 - }, - { - "epoch": 3.2805397863027315, - "grad_norm": 0.002517502522096038, - "learning_rate": 0.0001999946917088984, - "loss": 46.0, - "step": 42907 - }, - { - "epoch": 3.2806162432861212, - "grad_norm": 0.002895353129133582, - "learning_rate": 0.00019999469146140766, - "loss": 46.0, - "step": 42908 - }, - { - "epoch": 3.280692700269511, - "grad_norm": 0.0019689404871314764, - "learning_rate": 0.0001999946912139111, - "loss": 46.0, - "step": 42909 - }, - { - "epoch": 3.2807691572529007, - "grad_norm": 0.0013360092416405678, - "learning_rate": 0.00019999469096640876, - "loss": 46.0, - "step": 42910 - }, - { - "epoch": 3.2808456142362905, - "grad_norm": 0.002089686458930373, - "learning_rate": 0.00019999469071890066, - "loss": 46.0, - "step": 42911 - }, - { - "epoch": 3.2809220712196803, - "grad_norm": 0.0035643037408590317, - "learning_rate": 0.00019999469047138685, - "loss": 46.0, - "step": 42912 - }, - { - "epoch": 3.2809985282030696, - "grad_norm": 0.004932517185807228, - "learning_rate": 0.00019999469022386718, - "loss": 46.0, - "step": 42913 - }, - { - "epoch": 3.2810749851864593, - "grad_norm": 0.003204831387847662, - "learning_rate": 0.00019999468997634182, - "loss": 46.0, - "step": 42914 - }, - { - "epoch": 3.281151442169849, - "grad_norm": 0.00418286956846714, - "learning_rate": 0.00019999468972881063, - "loss": 46.0, - "step": 42915 - }, - { - "epoch": 3.281227899153239, - "grad_norm": 0.0016691904747858644, - "learning_rate": 0.00019999468948127372, - "loss": 46.0, - "step": 42916 - }, - { - "epoch": 3.2813043561366286, - "grad_norm": 0.002681855810806155, - "learning_rate": 0.00019999468923373098, - "loss": 46.0, - "step": 42917 - }, - { - "epoch": 3.2813808131200184, - "grad_norm": 0.0023132087662816048, - "learning_rate": 0.00019999468898618253, - "loss": 46.0, - "step": 42918 - }, - { - "epoch": 3.281457270103408, - "grad_norm": 0.0029189232736825943, - "learning_rate": 0.00019999468873862827, - "loss": 46.0, - "step": 42919 - }, - { - "epoch": 3.281533727086798, - "grad_norm": 0.0006030069198459387, - "learning_rate": 0.00019999468849106827, - "loss": 46.0, - "step": 42920 - }, - { - "epoch": 3.2816101840701877, - "grad_norm": 0.002316718455404043, - "learning_rate": 0.00019999468824350252, - "loss": 46.0, - "step": 42921 - }, - { - "epoch": 3.281686641053577, - "grad_norm": 0.0012835246743634343, - "learning_rate": 0.00019999468799593095, - "loss": 46.0, - "step": 42922 - }, - { - "epoch": 3.2817630980369668, - "grad_norm": 0.002023693872615695, - "learning_rate": 0.00019999468774835362, - "loss": 46.0, - "step": 42923 - }, - { - "epoch": 3.2818395550203565, - "grad_norm": 0.011228973045945168, - "learning_rate": 0.00019999468750077056, - "loss": 46.0, - "step": 42924 - }, - { - "epoch": 3.2819160120037463, - "grad_norm": 0.003020940348505974, - "learning_rate": 0.0001999946872531817, - "loss": 46.0, - "step": 42925 - }, - { - "epoch": 3.281992468987136, - "grad_norm": 0.0020151061471551657, - "learning_rate": 0.00019999468700558705, - "loss": 46.0, - "step": 42926 - }, - { - "epoch": 3.282068925970526, - "grad_norm": 0.005133095663040876, - "learning_rate": 0.00019999468675798668, - "loss": 46.0, - "step": 42927 - }, - { - "epoch": 3.2821453829539156, - "grad_norm": 0.0049695041961967945, - "learning_rate": 0.00019999468651038052, - "loss": 46.0, - "step": 42928 - }, - { - "epoch": 3.2822218399373053, - "grad_norm": 0.0011481387773528695, - "learning_rate": 0.0001999946862627686, - "loss": 46.0, - "step": 42929 - }, - { - "epoch": 3.282298296920695, - "grad_norm": 0.0010705229360610247, - "learning_rate": 0.00019999468601515088, - "loss": 46.0, - "step": 42930 - }, - { - "epoch": 3.282374753904085, - "grad_norm": 0.0011284570209681988, - "learning_rate": 0.0001999946857675274, - "loss": 46.0, - "step": 42931 - }, - { - "epoch": 3.2824512108874746, - "grad_norm": 0.0024002110585570335, - "learning_rate": 0.00019999468551989814, - "loss": 46.0, - "step": 42932 - }, - { - "epoch": 3.2825276678708644, - "grad_norm": 0.0017078750533983111, - "learning_rate": 0.00019999468527226314, - "loss": 46.0, - "step": 42933 - }, - { - "epoch": 3.282604124854254, - "grad_norm": 0.0032187087927013636, - "learning_rate": 0.00019999468502462236, - "loss": 46.0, - "step": 42934 - }, - { - "epoch": 3.2826805818376434, - "grad_norm": 0.0019785864278674126, - "learning_rate": 0.00019999468477697582, - "loss": 46.0, - "step": 42935 - }, - { - "epoch": 3.282757038821033, - "grad_norm": 0.005571179557591677, - "learning_rate": 0.0001999946845293235, - "loss": 46.0, - "step": 42936 - }, - { - "epoch": 3.282833495804423, - "grad_norm": 0.002684182720258832, - "learning_rate": 0.00019999468428166543, - "loss": 46.0, - "step": 42937 - }, - { - "epoch": 3.2829099527878127, - "grad_norm": 0.002035718411207199, - "learning_rate": 0.00019999468403400156, - "loss": 46.0, - "step": 42938 - }, - { - "epoch": 3.2829864097712025, - "grad_norm": 0.0018606202211230993, - "learning_rate": 0.00019999468378633194, - "loss": 46.0, - "step": 42939 - }, - { - "epoch": 3.2830628667545922, - "grad_norm": 0.0024310534354299307, - "learning_rate": 0.00019999468353865653, - "loss": 46.0, - "step": 42940 - }, - { - "epoch": 3.283139323737982, - "grad_norm": 0.004504888318479061, - "learning_rate": 0.0001999946832909754, - "loss": 46.0, - "step": 42941 - }, - { - "epoch": 3.2832157807213718, - "grad_norm": 0.003242659382522106, - "learning_rate": 0.00019999468304328846, - "loss": 46.0, - "step": 42942 - }, - { - "epoch": 3.2832922377047615, - "grad_norm": 0.0030412704218178988, - "learning_rate": 0.00019999468279559578, - "loss": 46.0, - "step": 42943 - }, - { - "epoch": 3.283368694688151, - "grad_norm": 0.0030782849062234163, - "learning_rate": 0.0001999946825478973, - "loss": 46.0, - "step": 42944 - }, - { - "epoch": 3.2834451516715406, - "grad_norm": 0.0017058249795809388, - "learning_rate": 0.00019999468230019304, - "loss": 46.0, - "step": 42945 - }, - { - "epoch": 3.2835216086549304, - "grad_norm": 0.0014666904462501407, - "learning_rate": 0.00019999468205248304, - "loss": 46.0, - "step": 42946 - }, - { - "epoch": 3.28359806563832, - "grad_norm": 0.001175355981104076, - "learning_rate": 0.00019999468180476726, - "loss": 46.0, - "step": 42947 - }, - { - "epoch": 3.28367452262171, - "grad_norm": 0.003088381141424179, - "learning_rate": 0.00019999468155704571, - "loss": 46.0, - "step": 42948 - }, - { - "epoch": 3.2837509796050997, - "grad_norm": 0.0030383828561753035, - "learning_rate": 0.0001999946813093184, - "loss": 46.0, - "step": 42949 - }, - { - "epoch": 3.2838274365884894, - "grad_norm": 0.001755392411723733, - "learning_rate": 0.00019999468106158533, - "loss": 46.0, - "step": 42950 - }, - { - "epoch": 3.283903893571879, - "grad_norm": 0.001585042686201632, - "learning_rate": 0.00019999468081384646, - "loss": 46.0, - "step": 42951 - }, - { - "epoch": 3.283980350555269, - "grad_norm": 0.005337897222489119, - "learning_rate": 0.00019999468056610184, - "loss": 46.0, - "step": 42952 - }, - { - "epoch": 3.2840568075386587, - "grad_norm": 0.0006694708717986941, - "learning_rate": 0.00019999468031835145, - "loss": 46.0, - "step": 42953 - }, - { - "epoch": 3.2841332645220485, - "grad_norm": 0.001618560403585434, - "learning_rate": 0.00019999468007059527, - "loss": 46.0, - "step": 42954 - }, - { - "epoch": 3.284209721505438, - "grad_norm": 0.002989501226693392, - "learning_rate": 0.00019999467982283336, - "loss": 46.0, - "step": 42955 - }, - { - "epoch": 3.2842861784888275, - "grad_norm": 0.006691244896501303, - "learning_rate": 0.00019999467957506565, - "loss": 46.0, - "step": 42956 - }, - { - "epoch": 3.2843626354722173, - "grad_norm": 0.0016134664183482528, - "learning_rate": 0.0001999946793272922, - "loss": 46.0, - "step": 42957 - }, - { - "epoch": 3.284439092455607, - "grad_norm": 0.0011493315687403083, - "learning_rate": 0.00019999467907951294, - "loss": 46.0, - "step": 42958 - }, - { - "epoch": 3.284515549438997, - "grad_norm": 0.0014053423656150699, - "learning_rate": 0.00019999467883172797, - "loss": 46.0, - "step": 42959 - }, - { - "epoch": 3.2845920064223866, - "grad_norm": 0.0020455457270145416, - "learning_rate": 0.00019999467858393717, - "loss": 46.0, - "step": 42960 - }, - { - "epoch": 3.2846684634057763, - "grad_norm": 0.004131302237510681, - "learning_rate": 0.00019999467833614062, - "loss": 46.0, - "step": 42961 - }, - { - "epoch": 3.284744920389166, - "grad_norm": 0.0031100099440664053, - "learning_rate": 0.0001999946780883383, - "loss": 46.0, - "step": 42962 - }, - { - "epoch": 3.284821377372556, - "grad_norm": 0.0022978046908974648, - "learning_rate": 0.0001999946778405302, - "loss": 46.0, - "step": 42963 - }, - { - "epoch": 3.2848978343559456, - "grad_norm": 0.0024684544187039137, - "learning_rate": 0.00019999467759271636, - "loss": 46.0, - "step": 42964 - }, - { - "epoch": 3.2849742913393354, - "grad_norm": 0.0068963258527219296, - "learning_rate": 0.00019999467734489675, - "loss": 46.0, - "step": 42965 - }, - { - "epoch": 3.2850507483227247, - "grad_norm": 0.0011998926056548953, - "learning_rate": 0.00019999467709707136, - "loss": 46.0, - "step": 42966 - }, - { - "epoch": 3.2851272053061145, - "grad_norm": 0.003674176288768649, - "learning_rate": 0.00019999467684924017, - "loss": 46.0, - "step": 42967 - }, - { - "epoch": 3.2852036622895042, - "grad_norm": 0.0021530287340283394, - "learning_rate": 0.00019999467660140326, - "loss": 46.0, - "step": 42968 - }, - { - "epoch": 3.285280119272894, - "grad_norm": 0.004675593227148056, - "learning_rate": 0.00019999467635356056, - "loss": 46.0, - "step": 42969 - }, - { - "epoch": 3.2853565762562837, - "grad_norm": 0.0016175052151083946, - "learning_rate": 0.0001999946761057121, - "loss": 46.0, - "step": 42970 - }, - { - "epoch": 3.2854330332396735, - "grad_norm": 0.0014991143252700567, - "learning_rate": 0.00019999467585785788, - "loss": 46.0, - "step": 42971 - }, - { - "epoch": 3.2855094902230633, - "grad_norm": 0.0006681408267468214, - "learning_rate": 0.00019999467560999785, - "loss": 46.0, - "step": 42972 - }, - { - "epoch": 3.285585947206453, - "grad_norm": 0.0006874622777104378, - "learning_rate": 0.00019999467536213207, - "loss": 46.0, - "step": 42973 - }, - { - "epoch": 3.285662404189843, - "grad_norm": 0.0009981076000258327, - "learning_rate": 0.00019999467511426053, - "loss": 46.0, - "step": 42974 - }, - { - "epoch": 3.2857388611732326, - "grad_norm": 0.002620001556351781, - "learning_rate": 0.00019999467486638323, - "loss": 46.0, - "step": 42975 - }, - { - "epoch": 3.2858153181566223, - "grad_norm": 0.0022467144299298525, - "learning_rate": 0.00019999467461850014, - "loss": 46.0, - "step": 42976 - }, - { - "epoch": 3.285891775140012, - "grad_norm": 0.0023586770985275507, - "learning_rate": 0.00019999467437061127, - "loss": 46.0, - "step": 42977 - }, - { - "epoch": 3.2859682321234014, - "grad_norm": 0.0022540080826729536, - "learning_rate": 0.00019999467412271666, - "loss": 46.0, - "step": 42978 - }, - { - "epoch": 3.286044689106791, - "grad_norm": 0.0036533037200570107, - "learning_rate": 0.00019999467387481627, - "loss": 46.0, - "step": 42979 - }, - { - "epoch": 3.286121146090181, - "grad_norm": 0.0020512158516794443, - "learning_rate": 0.00019999467362691014, - "loss": 46.0, - "step": 42980 - }, - { - "epoch": 3.2861976030735707, - "grad_norm": 0.0007933160522952676, - "learning_rate": 0.00019999467337899817, - "loss": 46.0, - "step": 42981 - }, - { - "epoch": 3.2862740600569604, - "grad_norm": 0.004828393924981356, - "learning_rate": 0.0001999946731310805, - "loss": 46.0, - "step": 42982 - }, - { - "epoch": 3.28635051704035, - "grad_norm": 0.0024523879401385784, - "learning_rate": 0.00019999467288315704, - "loss": 46.0, - "step": 42983 - }, - { - "epoch": 3.28642697402374, - "grad_norm": 0.0038373444695025682, - "learning_rate": 0.00019999467263522779, - "loss": 46.0, - "step": 42984 - }, - { - "epoch": 3.2865034310071297, - "grad_norm": 0.0010139079531654716, - "learning_rate": 0.00019999467238729279, - "loss": 46.0, - "step": 42985 - }, - { - "epoch": 3.2865798879905195, - "grad_norm": 0.0021915417164564133, - "learning_rate": 0.000199994672139352, - "loss": 46.0, - "step": 42986 - }, - { - "epoch": 3.2866563449739092, - "grad_norm": 0.0013777019921690226, - "learning_rate": 0.00019999467189140546, - "loss": 46.0, - "step": 42987 - }, - { - "epoch": 3.2867328019572986, - "grad_norm": 0.0024014387745410204, - "learning_rate": 0.00019999467164345314, - "loss": 46.0, - "step": 42988 - }, - { - "epoch": 3.2868092589406883, - "grad_norm": 0.0014577845577150583, - "learning_rate": 0.00019999467139549508, - "loss": 46.0, - "step": 42989 - }, - { - "epoch": 3.286885715924078, - "grad_norm": 0.0010902235517278314, - "learning_rate": 0.0001999946711475312, - "loss": 46.0, - "step": 42990 - }, - { - "epoch": 3.286962172907468, - "grad_norm": 0.016242684796452522, - "learning_rate": 0.0001999946708995616, - "loss": 46.0, - "step": 42991 - }, - { - "epoch": 3.2870386298908576, - "grad_norm": 0.006492890417575836, - "learning_rate": 0.0001999946706515862, - "loss": 46.0, - "step": 42992 - }, - { - "epoch": 3.2871150868742474, - "grad_norm": 0.0013405898353084922, - "learning_rate": 0.00019999467040360505, - "loss": 46.0, - "step": 42993 - }, - { - "epoch": 3.287191543857637, - "grad_norm": 0.00208151969127357, - "learning_rate": 0.00019999467015561812, - "loss": 46.0, - "step": 42994 - }, - { - "epoch": 3.287268000841027, - "grad_norm": 0.0007782382308505476, - "learning_rate": 0.00019999466990762544, - "loss": 46.0, - "step": 42995 - }, - { - "epoch": 3.2873444578244166, - "grad_norm": 0.00380100985057652, - "learning_rate": 0.00019999466965962695, - "loss": 46.0, - "step": 42996 - }, - { - "epoch": 3.2874209148078064, - "grad_norm": 0.0020033137407153845, - "learning_rate": 0.00019999466941162273, - "loss": 46.0, - "step": 42997 - }, - { - "epoch": 3.287497371791196, - "grad_norm": 0.0029079923406243324, - "learning_rate": 0.0001999946691636127, - "loss": 46.0, - "step": 42998 - }, - { - "epoch": 3.287573828774586, - "grad_norm": 0.002057442208752036, - "learning_rate": 0.00019999466891559695, - "loss": 46.0, - "step": 42999 - }, - { - "epoch": 3.2876502857579752, - "grad_norm": 0.0053220042027533054, - "learning_rate": 0.0001999946686675754, - "loss": 46.0, - "step": 43000 - }, - { - "epoch": 3.287726742741365, - "grad_norm": 0.0024718421045690775, - "learning_rate": 0.00019999466841954811, - "loss": 46.0, - "step": 43001 - }, - { - "epoch": 3.2878031997247548, - "grad_norm": 0.0020546005107462406, - "learning_rate": 0.00019999466817151502, - "loss": 46.0, - "step": 43002 - }, - { - "epoch": 3.2878796567081445, - "grad_norm": 0.0019832036923617125, - "learning_rate": 0.00019999466792347618, - "loss": 46.0, - "step": 43003 - }, - { - "epoch": 3.2879561136915343, - "grad_norm": 0.0011489823227748275, - "learning_rate": 0.00019999466767543157, - "loss": 46.0, - "step": 43004 - }, - { - "epoch": 3.288032570674924, - "grad_norm": 0.00465560844168067, - "learning_rate": 0.00019999466742738115, - "loss": 46.0, - "step": 43005 - }, - { - "epoch": 3.288109027658314, - "grad_norm": 0.0016144949477165937, - "learning_rate": 0.000199994667179325, - "loss": 46.0, - "step": 43006 - }, - { - "epoch": 3.2881854846417036, - "grad_norm": 0.000845888105686754, - "learning_rate": 0.0001999946669312631, - "loss": 46.0, - "step": 43007 - }, - { - "epoch": 3.2882619416250933, - "grad_norm": 0.001992309233173728, - "learning_rate": 0.00019999466668319538, - "loss": 46.0, - "step": 43008 - }, - { - "epoch": 3.288338398608483, - "grad_norm": 0.0037813507951796055, - "learning_rate": 0.00019999466643512193, - "loss": 46.0, - "step": 43009 - }, - { - "epoch": 3.2884148555918724, - "grad_norm": 0.002345156157389283, - "learning_rate": 0.0001999946661870427, - "loss": 46.0, - "step": 43010 - }, - { - "epoch": 3.288491312575262, - "grad_norm": 0.0035937039647251368, - "learning_rate": 0.00019999466593895767, - "loss": 46.0, - "step": 43011 - }, - { - "epoch": 3.288567769558652, - "grad_norm": 0.0035123745910823345, - "learning_rate": 0.0001999946656908669, - "loss": 46.0, - "step": 43012 - }, - { - "epoch": 3.2886442265420417, - "grad_norm": 0.0006592206191271544, - "learning_rate": 0.00019999466544277035, - "loss": 46.0, - "step": 43013 - }, - { - "epoch": 3.2887206835254315, - "grad_norm": 0.001796973985619843, - "learning_rate": 0.00019999466519466806, - "loss": 46.0, - "step": 43014 - }, - { - "epoch": 3.288797140508821, - "grad_norm": 0.0012087718350812793, - "learning_rate": 0.00019999466494656, - "loss": 46.0, - "step": 43015 - }, - { - "epoch": 3.288873597492211, - "grad_norm": 0.0029219805728644133, - "learning_rate": 0.00019999466469844613, - "loss": 46.0, - "step": 43016 - }, - { - "epoch": 3.2889500544756007, - "grad_norm": 0.0066736540757119656, - "learning_rate": 0.00019999466445032652, - "loss": 46.0, - "step": 43017 - }, - { - "epoch": 3.2890265114589905, - "grad_norm": 0.0018083583563566208, - "learning_rate": 0.00019999466420220113, - "loss": 46.0, - "step": 43018 - }, - { - "epoch": 3.2891029684423803, - "grad_norm": 0.0044660489074885845, - "learning_rate": 0.00019999466395406997, - "loss": 46.0, - "step": 43019 - }, - { - "epoch": 3.28917942542577, - "grad_norm": 0.0007229259354062378, - "learning_rate": 0.00019999466370593304, - "loss": 46.0, - "step": 43020 - }, - { - "epoch": 3.28925588240916, - "grad_norm": 0.0012154581490904093, - "learning_rate": 0.00019999466345779036, - "loss": 46.0, - "step": 43021 - }, - { - "epoch": 3.289332339392549, - "grad_norm": 0.0018211901187896729, - "learning_rate": 0.00019999466320964188, - "loss": 46.0, - "step": 43022 - }, - { - "epoch": 3.289408796375939, - "grad_norm": 0.0024893921799957752, - "learning_rate": 0.00019999466296148765, - "loss": 46.0, - "step": 43023 - }, - { - "epoch": 3.2894852533593286, - "grad_norm": 0.0020806242246180773, - "learning_rate": 0.00019999466271332765, - "loss": 46.0, - "step": 43024 - }, - { - "epoch": 3.2895617103427184, - "grad_norm": 0.002393683884292841, - "learning_rate": 0.0001999946624651619, - "loss": 46.0, - "step": 43025 - }, - { - "epoch": 3.289638167326108, - "grad_norm": 0.0032088824082165956, - "learning_rate": 0.00019999466221699036, - "loss": 46.0, - "step": 43026 - }, - { - "epoch": 3.289714624309498, - "grad_norm": 0.0015087004285305738, - "learning_rate": 0.000199994661968813, - "loss": 46.0, - "step": 43027 - }, - { - "epoch": 3.2897910812928877, - "grad_norm": 0.0026160881388932467, - "learning_rate": 0.00019999466172062995, - "loss": 46.0, - "step": 43028 - }, - { - "epoch": 3.2898675382762774, - "grad_norm": 0.0017488918965682387, - "learning_rate": 0.0001999946614724411, - "loss": 46.0, - "step": 43029 - }, - { - "epoch": 3.289943995259667, - "grad_norm": 0.0025165732949972153, - "learning_rate": 0.0001999946612242465, - "loss": 46.0, - "step": 43030 - }, - { - "epoch": 3.2900204522430565, - "grad_norm": 0.002355508739128709, - "learning_rate": 0.0001999946609760461, - "loss": 46.0, - "step": 43031 - }, - { - "epoch": 3.2900969092264463, - "grad_norm": 0.0003302995173726231, - "learning_rate": 0.00019999466072783998, - "loss": 46.0, - "step": 43032 - }, - { - "epoch": 3.290173366209836, - "grad_norm": 0.010817056521773338, - "learning_rate": 0.00019999466047962802, - "loss": 46.0, - "step": 43033 - }, - { - "epoch": 3.290249823193226, - "grad_norm": 0.0010188055457547307, - "learning_rate": 0.00019999466023141034, - "loss": 46.0, - "step": 43034 - }, - { - "epoch": 3.2903262801766155, - "grad_norm": 0.00414435938000679, - "learning_rate": 0.00019999465998318686, - "loss": 46.0, - "step": 43035 - }, - { - "epoch": 3.2904027371600053, - "grad_norm": 0.0035603779833763838, - "learning_rate": 0.00019999465973495766, - "loss": 46.0, - "step": 43036 - }, - { - "epoch": 3.290479194143395, - "grad_norm": 0.0033431260380893946, - "learning_rate": 0.00019999465948672263, - "loss": 46.0, - "step": 43037 - }, - { - "epoch": 3.290555651126785, - "grad_norm": 0.0009001412545330822, - "learning_rate": 0.00019999465923848186, - "loss": 46.0, - "step": 43038 - }, - { - "epoch": 3.2906321081101746, - "grad_norm": 0.0007501213112846017, - "learning_rate": 0.00019999465899023532, - "loss": 46.0, - "step": 43039 - }, - { - "epoch": 3.2907085650935644, - "grad_norm": 0.0036481693387031555, - "learning_rate": 0.00019999465874198302, - "loss": 46.0, - "step": 43040 - }, - { - "epoch": 3.290785022076954, - "grad_norm": 0.0014177331468090415, - "learning_rate": 0.00019999465849372493, - "loss": 46.0, - "step": 43041 - }, - { - "epoch": 3.290861479060344, - "grad_norm": 0.003811736823990941, - "learning_rate": 0.00019999465824546112, - "loss": 46.0, - "step": 43042 - }, - { - "epoch": 3.2909379360437336, - "grad_norm": 0.0017111067427322268, - "learning_rate": 0.0001999946579971915, - "loss": 46.0, - "step": 43043 - }, - { - "epoch": 3.291014393027123, - "grad_norm": 0.0007644662400707603, - "learning_rate": 0.0001999946577489161, - "loss": 46.0, - "step": 43044 - }, - { - "epoch": 3.2910908500105127, - "grad_norm": 0.005183683708310127, - "learning_rate": 0.00019999465750063496, - "loss": 46.0, - "step": 43045 - }, - { - "epoch": 3.2911673069939025, - "grad_norm": 0.0016056194435805082, - "learning_rate": 0.00019999465725234803, - "loss": 46.0, - "step": 43046 - }, - { - "epoch": 3.2912437639772922, - "grad_norm": 0.0008942779386416078, - "learning_rate": 0.00019999465700405532, - "loss": 46.0, - "step": 43047 - }, - { - "epoch": 3.291320220960682, - "grad_norm": 0.002565128030255437, - "learning_rate": 0.00019999465675575687, - "loss": 46.0, - "step": 43048 - }, - { - "epoch": 3.2913966779440718, - "grad_norm": 0.0014598025009036064, - "learning_rate": 0.00019999465650745265, - "loss": 46.0, - "step": 43049 - }, - { - "epoch": 3.2914731349274615, - "grad_norm": 0.0007604403654113412, - "learning_rate": 0.00019999465625914265, - "loss": 46.0, - "step": 43050 - }, - { - "epoch": 3.2915495919108513, - "grad_norm": 0.0021494082175195217, - "learning_rate": 0.00019999465601082687, - "loss": 46.0, - "step": 43051 - }, - { - "epoch": 3.291626048894241, - "grad_norm": 0.0008570743957534432, - "learning_rate": 0.00019999465576250533, - "loss": 46.0, - "step": 43052 - }, - { - "epoch": 3.2917025058776304, - "grad_norm": 0.0012927136849611998, - "learning_rate": 0.000199994655514178, - "loss": 46.0, - "step": 43053 - }, - { - "epoch": 3.29177896286102, - "grad_norm": 0.0027778660878539085, - "learning_rate": 0.00019999465526584494, - "loss": 46.0, - "step": 43054 - }, - { - "epoch": 3.29185541984441, - "grad_norm": 0.0015423414297401905, - "learning_rate": 0.0001999946550175061, - "loss": 46.0, - "step": 43055 - }, - { - "epoch": 3.2919318768277996, - "grad_norm": 0.0027668089605867863, - "learning_rate": 0.0001999946547691615, - "loss": 46.0, - "step": 43056 - }, - { - "epoch": 3.2920083338111894, - "grad_norm": 0.001075509237125516, - "learning_rate": 0.0001999946545208111, - "loss": 46.0, - "step": 43057 - }, - { - "epoch": 3.292084790794579, - "grad_norm": 0.0024258026387542486, - "learning_rate": 0.00019999465427245495, - "loss": 46.0, - "step": 43058 - }, - { - "epoch": 3.292161247777969, - "grad_norm": 0.0016378405271098018, - "learning_rate": 0.00019999465402409304, - "loss": 46.0, - "step": 43059 - }, - { - "epoch": 3.2922377047613587, - "grad_norm": 0.001945990021340549, - "learning_rate": 0.00019999465377572534, - "loss": 46.0, - "step": 43060 - }, - { - "epoch": 3.2923141617447484, - "grad_norm": 0.0009554408024996519, - "learning_rate": 0.0001999946535273519, - "loss": 46.0, - "step": 43061 - }, - { - "epoch": 3.292390618728138, - "grad_norm": 0.0030981251038610935, - "learning_rate": 0.00019999465327897266, - "loss": 46.0, - "step": 43062 - }, - { - "epoch": 3.292467075711528, - "grad_norm": 0.0012822715798392892, - "learning_rate": 0.00019999465303058766, - "loss": 46.0, - "step": 43063 - }, - { - "epoch": 3.2925435326949177, - "grad_norm": 0.0017161233117803931, - "learning_rate": 0.0001999946527821969, - "loss": 46.0, - "step": 43064 - }, - { - "epoch": 3.2926199896783075, - "grad_norm": 0.0021098225843161345, - "learning_rate": 0.00019999465253380035, - "loss": 46.0, - "step": 43065 - }, - { - "epoch": 3.292696446661697, - "grad_norm": 0.00217106263153255, - "learning_rate": 0.00019999465228539805, - "loss": 46.0, - "step": 43066 - }, - { - "epoch": 3.2927729036450866, - "grad_norm": 0.0012985338689759374, - "learning_rate": 0.00019999465203698996, - "loss": 46.0, - "step": 43067 - }, - { - "epoch": 3.2928493606284763, - "grad_norm": 0.0030153896659612656, - "learning_rate": 0.00019999465178857612, - "loss": 46.0, - "step": 43068 - }, - { - "epoch": 3.292925817611866, - "grad_norm": 0.0032734698615968227, - "learning_rate": 0.0001999946515401565, - "loss": 46.0, - "step": 43069 - }, - { - "epoch": 3.293002274595256, - "grad_norm": 0.0009464027243666351, - "learning_rate": 0.00019999465129173113, - "loss": 46.0, - "step": 43070 - }, - { - "epoch": 3.2930787315786456, - "grad_norm": 0.0007659874972887337, - "learning_rate": 0.0001999946510433, - "loss": 46.0, - "step": 43071 - }, - { - "epoch": 3.2931551885620354, - "grad_norm": 0.0020798335317522287, - "learning_rate": 0.00019999465079486306, - "loss": 46.0, - "step": 43072 - }, - { - "epoch": 3.293231645545425, - "grad_norm": 0.0021051503717899323, - "learning_rate": 0.00019999465054642036, - "loss": 46.0, - "step": 43073 - }, - { - "epoch": 3.293308102528815, - "grad_norm": 0.001819591037929058, - "learning_rate": 0.00019999465029797193, - "loss": 46.0, - "step": 43074 - }, - { - "epoch": 3.293384559512204, - "grad_norm": 0.0013232157798483968, - "learning_rate": 0.00019999465004951768, - "loss": 46.0, - "step": 43075 - }, - { - "epoch": 3.293461016495594, - "grad_norm": 0.0029392847791314125, - "learning_rate": 0.00019999464980105768, - "loss": 46.0, - "step": 43076 - }, - { - "epoch": 3.2935374734789837, - "grad_norm": 0.002965545281767845, - "learning_rate": 0.0001999946495525919, - "loss": 46.0, - "step": 43077 - }, - { - "epoch": 3.2936139304623735, - "grad_norm": 0.0026470404118299484, - "learning_rate": 0.0001999946493041204, - "loss": 46.0, - "step": 43078 - }, - { - "epoch": 3.2936903874457633, - "grad_norm": 0.0034660804085433483, - "learning_rate": 0.00019999464905564308, - "loss": 46.0, - "step": 43079 - }, - { - "epoch": 3.293766844429153, - "grad_norm": 0.004432412795722485, - "learning_rate": 0.00019999464880716, - "loss": 46.0, - "step": 43080 - }, - { - "epoch": 3.293843301412543, - "grad_norm": 0.0048272740095853806, - "learning_rate": 0.00019999464855867115, - "loss": 46.0, - "step": 43081 - }, - { - "epoch": 3.2939197583959325, - "grad_norm": 0.0006613717414438725, - "learning_rate": 0.00019999464831017656, - "loss": 46.0, - "step": 43082 - }, - { - "epoch": 3.2939962153793223, - "grad_norm": 0.0035553157795220613, - "learning_rate": 0.00019999464806167615, - "loss": 46.0, - "step": 43083 - }, - { - "epoch": 3.294072672362712, - "grad_norm": 0.001576737966388464, - "learning_rate": 0.00019999464781317002, - "loss": 46.0, - "step": 43084 - }, - { - "epoch": 3.294149129346102, - "grad_norm": 0.001536518451757729, - "learning_rate": 0.0001999946475646581, - "loss": 46.0, - "step": 43085 - }, - { - "epoch": 3.2942255863294916, - "grad_norm": 0.0013201546389609575, - "learning_rate": 0.0001999946473161404, - "loss": 46.0, - "step": 43086 - }, - { - "epoch": 3.294302043312881, - "grad_norm": 0.0020425517577677965, - "learning_rate": 0.00019999464706761696, - "loss": 46.0, - "step": 43087 - }, - { - "epoch": 3.2943785002962707, - "grad_norm": 0.0040287296287715435, - "learning_rate": 0.00019999464681908773, - "loss": 46.0, - "step": 43088 - }, - { - "epoch": 3.2944549572796604, - "grad_norm": 0.0027974038384854794, - "learning_rate": 0.00019999464657055274, - "loss": 46.0, - "step": 43089 - }, - { - "epoch": 3.29453141426305, - "grad_norm": 0.0019716296810656786, - "learning_rate": 0.00019999464632201197, - "loss": 46.0, - "step": 43090 - }, - { - "epoch": 3.29460787124644, - "grad_norm": 0.0012575959553942084, - "learning_rate": 0.00019999464607346542, - "loss": 46.0, - "step": 43091 - }, - { - "epoch": 3.2946843282298297, - "grad_norm": 0.0026781733613461256, - "learning_rate": 0.00019999464582491313, - "loss": 46.0, - "step": 43092 - }, - { - "epoch": 3.2947607852132195, - "grad_norm": 0.0016945755342021585, - "learning_rate": 0.00019999464557635507, - "loss": 46.0, - "step": 43093 - }, - { - "epoch": 3.2948372421966092, - "grad_norm": 0.0014913951745256782, - "learning_rate": 0.00019999464532779123, - "loss": 46.0, - "step": 43094 - }, - { - "epoch": 3.294913699179999, - "grad_norm": 0.001143215806223452, - "learning_rate": 0.00019999464507922162, - "loss": 46.0, - "step": 43095 - }, - { - "epoch": 3.2949901561633888, - "grad_norm": 0.005004389211535454, - "learning_rate": 0.00019999464483064623, - "loss": 46.0, - "step": 43096 - }, - { - "epoch": 3.295066613146778, - "grad_norm": 0.0038042457308620214, - "learning_rate": 0.00019999464458206507, - "loss": 46.0, - "step": 43097 - }, - { - "epoch": 3.295143070130168, - "grad_norm": 0.0010423966450616717, - "learning_rate": 0.00019999464433347814, - "loss": 46.0, - "step": 43098 - }, - { - "epoch": 3.2952195271135576, - "grad_norm": 0.0007346981437876821, - "learning_rate": 0.00019999464408488547, - "loss": 46.0, - "step": 43099 - }, - { - "epoch": 3.2952959840969474, - "grad_norm": 0.0009377679089084268, - "learning_rate": 0.00019999464383628702, - "loss": 46.0, - "step": 43100 - }, - { - "epoch": 3.295372441080337, - "grad_norm": 0.0013458920875564218, - "learning_rate": 0.0001999946435876828, - "loss": 46.0, - "step": 43101 - }, - { - "epoch": 3.295448898063727, - "grad_norm": 0.000986292609013617, - "learning_rate": 0.0001999946433390728, - "loss": 46.0, - "step": 43102 - }, - { - "epoch": 3.2955253550471166, - "grad_norm": 0.011193119920790195, - "learning_rate": 0.00019999464309045702, - "loss": 46.0, - "step": 43103 - }, - { - "epoch": 3.2956018120305064, - "grad_norm": 0.002467630198225379, - "learning_rate": 0.00019999464284183548, - "loss": 46.0, - "step": 43104 - }, - { - "epoch": 3.295678269013896, - "grad_norm": 0.00205299467779696, - "learning_rate": 0.0001999946425932082, - "loss": 46.0, - "step": 43105 - }, - { - "epoch": 3.295754725997286, - "grad_norm": 0.0013569977600127459, - "learning_rate": 0.00019999464234457512, - "loss": 46.0, - "step": 43106 - }, - { - "epoch": 3.2958311829806757, - "grad_norm": 0.005249109584838152, - "learning_rate": 0.00019999464209593629, - "loss": 46.0, - "step": 43107 - }, - { - "epoch": 3.2959076399640654, - "grad_norm": 0.0003961015318054706, - "learning_rate": 0.00019999464184729168, - "loss": 46.0, - "step": 43108 - }, - { - "epoch": 3.2959840969474548, - "grad_norm": 0.0028856215067207813, - "learning_rate": 0.0001999946415986413, - "loss": 46.0, - "step": 43109 - }, - { - "epoch": 3.2960605539308445, - "grad_norm": 0.005846831016242504, - "learning_rate": 0.00019999464134998513, - "loss": 46.0, - "step": 43110 - }, - { - "epoch": 3.2961370109142343, - "grad_norm": 0.0016929770354181528, - "learning_rate": 0.00019999464110132323, - "loss": 46.0, - "step": 43111 - }, - { - "epoch": 3.296213467897624, - "grad_norm": 0.0036313924938440323, - "learning_rate": 0.00019999464085265553, - "loss": 46.0, - "step": 43112 - }, - { - "epoch": 3.296289924881014, - "grad_norm": 0.0017907816218212247, - "learning_rate": 0.0001999946406039821, - "loss": 46.0, - "step": 43113 - }, - { - "epoch": 3.2963663818644036, - "grad_norm": 0.0031940562184900045, - "learning_rate": 0.00019999464035530285, - "loss": 46.0, - "step": 43114 - }, - { - "epoch": 3.2964428388477933, - "grad_norm": 0.0033530748914927244, - "learning_rate": 0.00019999464010661785, - "loss": 46.0, - "step": 43115 - }, - { - "epoch": 3.296519295831183, - "grad_norm": 0.0071699912659823895, - "learning_rate": 0.0001999946398579271, - "loss": 46.0, - "step": 43116 - }, - { - "epoch": 3.296595752814573, - "grad_norm": 0.0024224447552114725, - "learning_rate": 0.00019999463960923057, - "loss": 46.0, - "step": 43117 - }, - { - "epoch": 3.2966722097979626, - "grad_norm": 0.001077526481822133, - "learning_rate": 0.00019999463936052828, - "loss": 46.0, - "step": 43118 - }, - { - "epoch": 3.296748666781352, - "grad_norm": 0.0010505522368475795, - "learning_rate": 0.00019999463911182018, - "loss": 46.0, - "step": 43119 - }, - { - "epoch": 3.2968251237647417, - "grad_norm": 0.0019745826721191406, - "learning_rate": 0.00019999463886310635, - "loss": 46.0, - "step": 43120 - }, - { - "epoch": 3.2969015807481314, - "grad_norm": 0.0010027430253103375, - "learning_rate": 0.00019999463861438674, - "loss": 46.0, - "step": 43121 - }, - { - "epoch": 3.296978037731521, - "grad_norm": 0.00284214923158288, - "learning_rate": 0.00019999463836566138, - "loss": 46.0, - "step": 43122 - }, - { - "epoch": 3.297054494714911, - "grad_norm": 0.003681445261463523, - "learning_rate": 0.00019999463811693022, - "loss": 46.0, - "step": 43123 - }, - { - "epoch": 3.2971309516983007, - "grad_norm": 0.002956235548481345, - "learning_rate": 0.0001999946378681933, - "loss": 46.0, - "step": 43124 - }, - { - "epoch": 3.2972074086816905, - "grad_norm": 0.0025147164706140757, - "learning_rate": 0.00019999463761945064, - "loss": 46.0, - "step": 43125 - }, - { - "epoch": 3.2972838656650802, - "grad_norm": 0.002099211560562253, - "learning_rate": 0.00019999463737070217, - "loss": 46.0, - "step": 43126 - }, - { - "epoch": 3.29736032264847, - "grad_norm": 0.0037914717104285955, - "learning_rate": 0.00019999463712194794, - "loss": 46.0, - "step": 43127 - }, - { - "epoch": 3.2974367796318598, - "grad_norm": 0.001852756249718368, - "learning_rate": 0.00019999463687318795, - "loss": 46.0, - "step": 43128 - }, - { - "epoch": 3.2975132366152495, - "grad_norm": 0.0023945618886500597, - "learning_rate": 0.00019999463662442217, - "loss": 46.0, - "step": 43129 - }, - { - "epoch": 3.2975896935986393, - "grad_norm": 0.002128422725945711, - "learning_rate": 0.00019999463637565066, - "loss": 46.0, - "step": 43130 - }, - { - "epoch": 3.2976661505820286, - "grad_norm": 0.002548876451328397, - "learning_rate": 0.00019999463612687337, - "loss": 46.0, - "step": 43131 - }, - { - "epoch": 3.2977426075654184, - "grad_norm": 0.0016226563602685928, - "learning_rate": 0.00019999463587809028, - "loss": 46.0, - "step": 43132 - }, - { - "epoch": 3.297819064548808, - "grad_norm": 0.0031711033079773188, - "learning_rate": 0.00019999463562930144, - "loss": 46.0, - "step": 43133 - }, - { - "epoch": 3.297895521532198, - "grad_norm": 0.001658200053498149, - "learning_rate": 0.00019999463538050686, - "loss": 46.0, - "step": 43134 - }, - { - "epoch": 3.2979719785155877, - "grad_norm": 0.001705559203401208, - "learning_rate": 0.00019999463513170647, - "loss": 46.0, - "step": 43135 - }, - { - "epoch": 3.2980484354989774, - "grad_norm": 0.00634820805862546, - "learning_rate": 0.00019999463488290032, - "loss": 46.0, - "step": 43136 - }, - { - "epoch": 3.298124892482367, - "grad_norm": 0.0017934966599568725, - "learning_rate": 0.00019999463463408841, - "loss": 46.0, - "step": 43137 - }, - { - "epoch": 3.298201349465757, - "grad_norm": 0.002681695856153965, - "learning_rate": 0.00019999463438527074, - "loss": 46.0, - "step": 43138 - }, - { - "epoch": 3.2982778064491467, - "grad_norm": 0.007503107190132141, - "learning_rate": 0.0001999946341364473, - "loss": 46.0, - "step": 43139 - }, - { - "epoch": 3.2983542634325365, - "grad_norm": 0.0019769046921283007, - "learning_rate": 0.00019999463388761806, - "loss": 46.0, - "step": 43140 - }, - { - "epoch": 3.2984307204159258, - "grad_norm": 0.0032831698190420866, - "learning_rate": 0.00019999463363878307, - "loss": 46.0, - "step": 43141 - }, - { - "epoch": 3.2985071773993155, - "grad_norm": 0.0008680423488840461, - "learning_rate": 0.0001999946333899423, - "loss": 46.0, - "step": 43142 - }, - { - "epoch": 3.2985836343827053, - "grad_norm": 0.0013498598709702492, - "learning_rate": 0.00019999463314109575, - "loss": 46.0, - "step": 43143 - }, - { - "epoch": 3.298660091366095, - "grad_norm": 0.010320496745407581, - "learning_rate": 0.00019999463289224346, - "loss": 46.0, - "step": 43144 - }, - { - "epoch": 3.298736548349485, - "grad_norm": 0.00266675534658134, - "learning_rate": 0.00019999463264338537, - "loss": 46.0, - "step": 43145 - }, - { - "epoch": 3.2988130053328746, - "grad_norm": 0.0016131686279550195, - "learning_rate": 0.00019999463239452157, - "loss": 46.0, - "step": 43146 - }, - { - "epoch": 3.2988894623162643, - "grad_norm": 0.0020049726590514183, - "learning_rate": 0.00019999463214565196, - "loss": 46.0, - "step": 43147 - }, - { - "epoch": 3.298965919299654, - "grad_norm": 0.0020847790874540806, - "learning_rate": 0.00019999463189677657, - "loss": 46.0, - "step": 43148 - }, - { - "epoch": 3.299042376283044, - "grad_norm": 0.0042830053716897964, - "learning_rate": 0.00019999463164789542, - "loss": 46.0, - "step": 43149 - }, - { - "epoch": 3.2991188332664336, - "grad_norm": 0.0032707746140658855, - "learning_rate": 0.0001999946313990085, - "loss": 46.0, - "step": 43150 - }, - { - "epoch": 3.2991952902498234, - "grad_norm": 0.002887357259169221, - "learning_rate": 0.00019999463115011584, - "loss": 46.0, - "step": 43151 - }, - { - "epoch": 3.299271747233213, - "grad_norm": 0.0020889570005238056, - "learning_rate": 0.00019999463090121736, - "loss": 46.0, - "step": 43152 - }, - { - "epoch": 3.2993482042166025, - "grad_norm": 0.0037425702903419733, - "learning_rate": 0.00019999463065231314, - "loss": 46.0, - "step": 43153 - }, - { - "epoch": 3.2994246611999922, - "grad_norm": 0.0010507851839065552, - "learning_rate": 0.0001999946304034032, - "loss": 46.0, - "step": 43154 - }, - { - "epoch": 3.299501118183382, - "grad_norm": 0.0012318973895162344, - "learning_rate": 0.0001999946301544874, - "loss": 46.0, - "step": 43155 - }, - { - "epoch": 3.2995775751667717, - "grad_norm": 0.0014983902219682932, - "learning_rate": 0.00019999462990556588, - "loss": 46.0, - "step": 43156 - }, - { - "epoch": 3.2996540321501615, - "grad_norm": 0.0015422574942931533, - "learning_rate": 0.00019999462965663857, - "loss": 46.0, - "step": 43157 - }, - { - "epoch": 3.2997304891335513, - "grad_norm": 0.007413915358483791, - "learning_rate": 0.00019999462940770553, - "loss": 46.0, - "step": 43158 - }, - { - "epoch": 3.299806946116941, - "grad_norm": 0.002296923426911235, - "learning_rate": 0.00019999462915876667, - "loss": 46.0, - "step": 43159 - }, - { - "epoch": 3.299883403100331, - "grad_norm": 0.0020284513011574745, - "learning_rate": 0.00019999462890982206, - "loss": 46.0, - "step": 43160 - }, - { - "epoch": 3.2999598600837206, - "grad_norm": 0.0022211328614503145, - "learning_rate": 0.00019999462866087167, - "loss": 46.0, - "step": 43161 - }, - { - "epoch": 3.30003631706711, - "grad_norm": 0.0027674459852278233, - "learning_rate": 0.00019999462841191555, - "loss": 46.0, - "step": 43162 - }, - { - "epoch": 3.3001127740504996, - "grad_norm": 0.0029754112474620342, - "learning_rate": 0.00019999462816295364, - "loss": 46.0, - "step": 43163 - }, - { - "epoch": 3.3001892310338894, - "grad_norm": 0.0010440535843372345, - "learning_rate": 0.00019999462791398597, - "loss": 46.0, - "step": 43164 - }, - { - "epoch": 3.300265688017279, - "grad_norm": 0.0016595243941992521, - "learning_rate": 0.00019999462766501252, - "loss": 46.0, - "step": 43165 - }, - { - "epoch": 3.300342145000669, - "grad_norm": 0.0031372876837849617, - "learning_rate": 0.00019999462741603327, - "loss": 46.0, - "step": 43166 - }, - { - "epoch": 3.3004186019840587, - "grad_norm": 0.0038617700338363647, - "learning_rate": 0.0001999946271670483, - "loss": 46.0, - "step": 43167 - }, - { - "epoch": 3.3004950589674484, - "grad_norm": 0.0014321524649858475, - "learning_rate": 0.00019999462691805753, - "loss": 46.0, - "step": 43168 - }, - { - "epoch": 3.300571515950838, - "grad_norm": 0.0016711888602003455, - "learning_rate": 0.00019999462666906101, - "loss": 46.0, - "step": 43169 - }, - { - "epoch": 3.300647972934228, - "grad_norm": 0.0033880462870001793, - "learning_rate": 0.00019999462642005872, - "loss": 46.0, - "step": 43170 - }, - { - "epoch": 3.3007244299176177, - "grad_norm": 0.0015615088632330298, - "learning_rate": 0.00019999462617105064, - "loss": 46.0, - "step": 43171 - }, - { - "epoch": 3.3008008869010075, - "grad_norm": 0.0018420040141791105, - "learning_rate": 0.00019999462592203683, - "loss": 46.0, - "step": 43172 - }, - { - "epoch": 3.3008773438843972, - "grad_norm": 0.003144825343042612, - "learning_rate": 0.00019999462567301722, - "loss": 46.0, - "step": 43173 - }, - { - "epoch": 3.300953800867787, - "grad_norm": 0.0026488881558179855, - "learning_rate": 0.00019999462542399183, - "loss": 46.0, - "step": 43174 - }, - { - "epoch": 3.3010302578511763, - "grad_norm": 0.002214920474216342, - "learning_rate": 0.0001999946251749607, - "loss": 46.0, - "step": 43175 - }, - { - "epoch": 3.301106714834566, - "grad_norm": 0.0024944681208580732, - "learning_rate": 0.00019999462492592378, - "loss": 46.0, - "step": 43176 - }, - { - "epoch": 3.301183171817956, - "grad_norm": 0.001967791933566332, - "learning_rate": 0.0001999946246768811, - "loss": 46.0, - "step": 43177 - }, - { - "epoch": 3.3012596288013456, - "grad_norm": 0.005799522157758474, - "learning_rate": 0.00019999462442783265, - "loss": 46.0, - "step": 43178 - }, - { - "epoch": 3.3013360857847354, - "grad_norm": 0.0009032622328959405, - "learning_rate": 0.00019999462417877843, - "loss": 46.0, - "step": 43179 - }, - { - "epoch": 3.301412542768125, - "grad_norm": 0.0009598106262274086, - "learning_rate": 0.00019999462392971843, - "loss": 46.0, - "step": 43180 - }, - { - "epoch": 3.301488999751515, - "grad_norm": 0.0018510113004595041, - "learning_rate": 0.0001999946236806527, - "loss": 46.0, - "step": 43181 - }, - { - "epoch": 3.3015654567349046, - "grad_norm": 0.0017959250835701823, - "learning_rate": 0.00019999462343158118, - "loss": 46.0, - "step": 43182 - }, - { - "epoch": 3.3016419137182944, - "grad_norm": 0.001839536358602345, - "learning_rate": 0.00019999462318250386, - "loss": 46.0, - "step": 43183 - }, - { - "epoch": 3.3017183707016837, - "grad_norm": 0.004612884018570185, - "learning_rate": 0.0001999946229334208, - "loss": 46.0, - "step": 43184 - }, - { - "epoch": 3.3017948276850735, - "grad_norm": 0.00887236837297678, - "learning_rate": 0.000199994622684332, - "loss": 46.0, - "step": 43185 - }, - { - "epoch": 3.3018712846684632, - "grad_norm": 0.002551883226260543, - "learning_rate": 0.00019999462243523735, - "loss": 46.0, - "step": 43186 - }, - { - "epoch": 3.301947741651853, - "grad_norm": 0.0018193320138379931, - "learning_rate": 0.00019999462218613697, - "loss": 46.0, - "step": 43187 - }, - { - "epoch": 3.3020241986352428, - "grad_norm": 0.0031338778790086508, - "learning_rate": 0.00019999462193703084, - "loss": 46.0, - "step": 43188 - }, - { - "epoch": 3.3021006556186325, - "grad_norm": 0.0014880941016599536, - "learning_rate": 0.00019999462168791894, - "loss": 46.0, - "step": 43189 - }, - { - "epoch": 3.3021771126020223, - "grad_norm": 0.0023926033172756433, - "learning_rate": 0.00019999462143880127, - "loss": 46.0, - "step": 43190 - }, - { - "epoch": 3.302253569585412, - "grad_norm": 0.002128173364326358, - "learning_rate": 0.0001999946211896778, - "loss": 46.0, - "step": 43191 - }, - { - "epoch": 3.302330026568802, - "grad_norm": 0.0018917858833447099, - "learning_rate": 0.0001999946209405486, - "loss": 46.0, - "step": 43192 - }, - { - "epoch": 3.3024064835521916, - "grad_norm": 0.001458160812035203, - "learning_rate": 0.00019999462069141363, - "loss": 46.0, - "step": 43193 - }, - { - "epoch": 3.3024829405355813, - "grad_norm": 0.0014849331928417087, - "learning_rate": 0.00019999462044227286, - "loss": 46.0, - "step": 43194 - }, - { - "epoch": 3.302559397518971, - "grad_norm": 0.0024203744251281023, - "learning_rate": 0.00019999462019312634, - "loss": 46.0, - "step": 43195 - }, - { - "epoch": 3.302635854502361, - "grad_norm": 0.005318007431924343, - "learning_rate": 0.00019999461994397403, - "loss": 46.0, - "step": 43196 - }, - { - "epoch": 3.30271231148575, - "grad_norm": 0.004124005790799856, - "learning_rate": 0.00019999461969481597, - "loss": 46.0, - "step": 43197 - }, - { - "epoch": 3.30278876846914, - "grad_norm": 0.002730536973103881, - "learning_rate": 0.00019999461944565213, - "loss": 46.0, - "step": 43198 - }, - { - "epoch": 3.3028652254525297, - "grad_norm": 0.004512130282819271, - "learning_rate": 0.00019999461919648252, - "loss": 46.0, - "step": 43199 - }, - { - "epoch": 3.3029416824359195, - "grad_norm": 0.0030428001191467047, - "learning_rate": 0.00019999461894730717, - "loss": 46.0, - "step": 43200 - }, - { - "epoch": 3.303018139419309, - "grad_norm": 0.004517611116170883, - "learning_rate": 0.000199994618698126, - "loss": 46.0, - "step": 43201 - }, - { - "epoch": 3.303094596402699, - "grad_norm": 0.0021113597322255373, - "learning_rate": 0.0001999946184489391, - "loss": 46.0, - "step": 43202 - }, - { - "epoch": 3.3031710533860887, - "grad_norm": 0.002403896301984787, - "learning_rate": 0.00019999461819974644, - "loss": 46.0, - "step": 43203 - }, - { - "epoch": 3.3032475103694785, - "grad_norm": 0.0007715957472100854, - "learning_rate": 0.000199994617950548, - "loss": 46.0, - "step": 43204 - }, - { - "epoch": 3.3033239673528683, - "grad_norm": 0.0010853923158720136, - "learning_rate": 0.00019999461770134377, - "loss": 46.0, - "step": 43205 - }, - { - "epoch": 3.3034004243362576, - "grad_norm": 0.0007718742126598954, - "learning_rate": 0.0001999946174521338, - "loss": 46.0, - "step": 43206 - }, - { - "epoch": 3.3034768813196473, - "grad_norm": 0.0019535336177796125, - "learning_rate": 0.00019999461720291803, - "loss": 46.0, - "step": 43207 - }, - { - "epoch": 3.303553338303037, - "grad_norm": 0.0034988231491297483, - "learning_rate": 0.0001999946169536965, - "loss": 46.0, - "step": 43208 - }, - { - "epoch": 3.303629795286427, - "grad_norm": 0.0010888231918215752, - "learning_rate": 0.0001999946167044692, - "loss": 46.0, - "step": 43209 - }, - { - "epoch": 3.3037062522698166, - "grad_norm": 0.0024672902654856443, - "learning_rate": 0.00019999461645523614, - "loss": 46.0, - "step": 43210 - }, - { - "epoch": 3.3037827092532064, - "grad_norm": 0.0013750992948189378, - "learning_rate": 0.0001999946162059973, - "loss": 46.0, - "step": 43211 - }, - { - "epoch": 3.303859166236596, - "grad_norm": 0.001331614563241601, - "learning_rate": 0.0001999946159567527, - "loss": 46.0, - "step": 43212 - }, - { - "epoch": 3.303935623219986, - "grad_norm": 0.002048181602731347, - "learning_rate": 0.00019999461570750234, - "loss": 46.0, - "step": 43213 - }, - { - "epoch": 3.3040120802033757, - "grad_norm": 0.0011536324163898826, - "learning_rate": 0.0001999946154582462, - "loss": 46.0, - "step": 43214 - }, - { - "epoch": 3.3040885371867654, - "grad_norm": 0.0008246597717516124, - "learning_rate": 0.0001999946152089843, - "loss": 46.0, - "step": 43215 - }, - { - "epoch": 3.304164994170155, - "grad_norm": 0.007458702195435762, - "learning_rate": 0.00019999461495971664, - "loss": 46.0, - "step": 43216 - }, - { - "epoch": 3.304241451153545, - "grad_norm": 0.0008868675213307142, - "learning_rate": 0.0001999946147104432, - "loss": 46.0, - "step": 43217 - }, - { - "epoch": 3.3043179081369343, - "grad_norm": 0.0031157336197793484, - "learning_rate": 0.00019999461446116397, - "loss": 46.0, - "step": 43218 - }, - { - "epoch": 3.304394365120324, - "grad_norm": 0.0027397051453590393, - "learning_rate": 0.00019999461421187897, - "loss": 46.0, - "step": 43219 - }, - { - "epoch": 3.304470822103714, - "grad_norm": 0.004035792779177427, - "learning_rate": 0.0001999946139625882, - "loss": 46.0, - "step": 43220 - }, - { - "epoch": 3.3045472790871036, - "grad_norm": 0.001293553039431572, - "learning_rate": 0.0001999946137132917, - "loss": 46.0, - "step": 43221 - }, - { - "epoch": 3.3046237360704933, - "grad_norm": 0.001137599116191268, - "learning_rate": 0.0001999946134639894, - "loss": 46.0, - "step": 43222 - }, - { - "epoch": 3.304700193053883, - "grad_norm": 0.002191295148804784, - "learning_rate": 0.00019999461321468134, - "loss": 46.0, - "step": 43223 - }, - { - "epoch": 3.304776650037273, - "grad_norm": 0.0029363189823925495, - "learning_rate": 0.0001999946129653675, - "loss": 46.0, - "step": 43224 - }, - { - "epoch": 3.3048531070206626, - "grad_norm": 0.002210573758929968, - "learning_rate": 0.00019999461271604793, - "loss": 46.0, - "step": 43225 - }, - { - "epoch": 3.3049295640040524, - "grad_norm": 0.0021649464033544064, - "learning_rate": 0.00019999461246672254, - "loss": 46.0, - "step": 43226 - }, - { - "epoch": 3.305006020987442, - "grad_norm": 0.0019673234783113003, - "learning_rate": 0.0001999946122173914, - "loss": 46.0, - "step": 43227 - }, - { - "epoch": 3.3050824779708314, - "grad_norm": 0.0006071816314943135, - "learning_rate": 0.0001999946119680545, - "loss": 46.0, - "step": 43228 - }, - { - "epoch": 3.305158934954221, - "grad_norm": 0.0019875140860676765, - "learning_rate": 0.00019999461171871184, - "loss": 46.0, - "step": 43229 - }, - { - "epoch": 3.305235391937611, - "grad_norm": 0.0029642588924616575, - "learning_rate": 0.00019999461146936337, - "loss": 46.0, - "step": 43230 - }, - { - "epoch": 3.3053118489210007, - "grad_norm": 0.002159376861527562, - "learning_rate": 0.00019999461122000915, - "loss": 46.0, - "step": 43231 - }, - { - "epoch": 3.3053883059043905, - "grad_norm": 0.0046060094609856606, - "learning_rate": 0.00019999461097064918, - "loss": 46.0, - "step": 43232 - }, - { - "epoch": 3.3054647628877802, - "grad_norm": 0.0026856299955397844, - "learning_rate": 0.00019999461072128344, - "loss": 46.0, - "step": 43233 - }, - { - "epoch": 3.30554121987117, - "grad_norm": 0.0013417989248409867, - "learning_rate": 0.0001999946104719119, - "loss": 46.0, - "step": 43234 - }, - { - "epoch": 3.3056176768545598, - "grad_norm": 0.0019791172817349434, - "learning_rate": 0.0001999946102225346, - "loss": 46.0, - "step": 43235 - }, - { - "epoch": 3.3056941338379495, - "grad_norm": 0.0023111761547625065, - "learning_rate": 0.00019999460997315155, - "loss": 46.0, - "step": 43236 - }, - { - "epoch": 3.3057705908213393, - "grad_norm": 0.0019377714488655329, - "learning_rate": 0.0001999946097237627, - "loss": 46.0, - "step": 43237 - }, - { - "epoch": 3.305847047804729, - "grad_norm": 0.0011661823373287916, - "learning_rate": 0.00019999460947436813, - "loss": 46.0, - "step": 43238 - }, - { - "epoch": 3.305923504788119, - "grad_norm": 0.0007760869339108467, - "learning_rate": 0.00019999460922496775, - "loss": 46.0, - "step": 43239 - }, - { - "epoch": 3.305999961771508, - "grad_norm": 0.001574092311784625, - "learning_rate": 0.00019999460897556162, - "loss": 46.0, - "step": 43240 - }, - { - "epoch": 3.306076418754898, - "grad_norm": 0.0029979865066707134, - "learning_rate": 0.00019999460872614972, - "loss": 46.0, - "step": 43241 - }, - { - "epoch": 3.3061528757382876, - "grad_norm": 0.002518233610317111, - "learning_rate": 0.00019999460847673205, - "loss": 46.0, - "step": 43242 - }, - { - "epoch": 3.3062293327216774, - "grad_norm": 0.0012703253887593746, - "learning_rate": 0.00019999460822730858, - "loss": 46.0, - "step": 43243 - }, - { - "epoch": 3.306305789705067, - "grad_norm": 0.002061791019514203, - "learning_rate": 0.00019999460797787938, - "loss": 46.0, - "step": 43244 - }, - { - "epoch": 3.306382246688457, - "grad_norm": 0.0023671623785048723, - "learning_rate": 0.0001999946077284444, - "loss": 46.0, - "step": 43245 - }, - { - "epoch": 3.3064587036718467, - "grad_norm": 0.0017499792156741023, - "learning_rate": 0.00019999460747900365, - "loss": 46.0, - "step": 43246 - }, - { - "epoch": 3.3065351606552364, - "grad_norm": 0.0012367863673716784, - "learning_rate": 0.0001999946072295571, - "loss": 46.0, - "step": 43247 - }, - { - "epoch": 3.306611617638626, - "grad_norm": 0.002476779278367758, - "learning_rate": 0.00019999460698010482, - "loss": 46.0, - "step": 43248 - }, - { - "epoch": 3.306688074622016, - "grad_norm": 0.002475731773301959, - "learning_rate": 0.00019999460673064676, - "loss": 46.0, - "step": 43249 - }, - { - "epoch": 3.3067645316054053, - "grad_norm": 0.0014733021380379796, - "learning_rate": 0.00019999460648118292, - "loss": 46.0, - "step": 43250 - }, - { - "epoch": 3.306840988588795, - "grad_norm": 0.0016036968445405364, - "learning_rate": 0.00019999460623171334, - "loss": 46.0, - "step": 43251 - }, - { - "epoch": 3.306917445572185, - "grad_norm": 0.001718223444186151, - "learning_rate": 0.000199994605982238, - "loss": 46.0, - "step": 43252 - }, - { - "epoch": 3.3069939025555746, - "grad_norm": 0.002081759739667177, - "learning_rate": 0.00019999460573275686, - "loss": 46.0, - "step": 43253 - }, - { - "epoch": 3.3070703595389643, - "grad_norm": 0.0013873249990865588, - "learning_rate": 0.00019999460548326994, - "loss": 46.0, - "step": 43254 - }, - { - "epoch": 3.307146816522354, - "grad_norm": 0.0026570456102490425, - "learning_rate": 0.00019999460523377726, - "loss": 46.0, - "step": 43255 - }, - { - "epoch": 3.307223273505744, - "grad_norm": 0.0016605970449745655, - "learning_rate": 0.00019999460498427882, - "loss": 46.0, - "step": 43256 - }, - { - "epoch": 3.3072997304891336, - "grad_norm": 0.0028671384789049625, - "learning_rate": 0.0001999946047347746, - "loss": 46.0, - "step": 43257 - }, - { - "epoch": 3.3073761874725234, - "grad_norm": 0.0018654812593013048, - "learning_rate": 0.0001999946044852646, - "loss": 46.0, - "step": 43258 - }, - { - "epoch": 3.307452644455913, - "grad_norm": 0.0019044623477384448, - "learning_rate": 0.00019999460423574886, - "loss": 46.0, - "step": 43259 - }, - { - "epoch": 3.307529101439303, - "grad_norm": 0.0024207562673836946, - "learning_rate": 0.00019999460398622735, - "loss": 46.0, - "step": 43260 - }, - { - "epoch": 3.3076055584226927, - "grad_norm": 0.0017198467394337058, - "learning_rate": 0.00019999460373670006, - "loss": 46.0, - "step": 43261 - }, - { - "epoch": 3.307682015406082, - "grad_norm": 0.0008224956691265106, - "learning_rate": 0.000199994603487167, - "loss": 46.0, - "step": 43262 - }, - { - "epoch": 3.3077584723894717, - "grad_norm": 0.0036526774056255817, - "learning_rate": 0.00019999460323762817, - "loss": 46.0, - "step": 43263 - }, - { - "epoch": 3.3078349293728615, - "grad_norm": 0.0012848360929638147, - "learning_rate": 0.00019999460298808356, - "loss": 46.0, - "step": 43264 - }, - { - "epoch": 3.3079113863562513, - "grad_norm": 0.0009869858622550964, - "learning_rate": 0.00019999460273853323, - "loss": 46.0, - "step": 43265 - }, - { - "epoch": 3.307987843339641, - "grad_norm": 0.0009321004617959261, - "learning_rate": 0.00019999460248897708, - "loss": 46.0, - "step": 43266 - }, - { - "epoch": 3.308064300323031, - "grad_norm": 0.001969235483556986, - "learning_rate": 0.0001999946022394152, - "loss": 46.0, - "step": 43267 - }, - { - "epoch": 3.3081407573064205, - "grad_norm": 0.00263637350872159, - "learning_rate": 0.0001999946019898475, - "loss": 46.0, - "step": 43268 - }, - { - "epoch": 3.3082172142898103, - "grad_norm": 0.002038033213466406, - "learning_rate": 0.00019999460174027406, - "loss": 46.0, - "step": 43269 - }, - { - "epoch": 3.3082936712732, - "grad_norm": 0.005171559285372496, - "learning_rate": 0.00019999460149069487, - "loss": 46.0, - "step": 43270 - }, - { - "epoch": 3.30837012825659, - "grad_norm": 0.003046031342819333, - "learning_rate": 0.00019999460124110987, - "loss": 46.0, - "step": 43271 - }, - { - "epoch": 3.308446585239979, - "grad_norm": 0.0011525237932801247, - "learning_rate": 0.00019999460099151913, - "loss": 46.0, - "step": 43272 - }, - { - "epoch": 3.308523042223369, - "grad_norm": 0.0006031316588632762, - "learning_rate": 0.00019999460074192262, - "loss": 46.0, - "step": 43273 - }, - { - "epoch": 3.3085994992067587, - "grad_norm": 0.009976615197956562, - "learning_rate": 0.00019999460049232033, - "loss": 46.0, - "step": 43274 - }, - { - "epoch": 3.3086759561901484, - "grad_norm": 0.0021152389235794544, - "learning_rate": 0.00019999460024271225, - "loss": 46.0, - "step": 43275 - }, - { - "epoch": 3.308752413173538, - "grad_norm": 0.0017483679112046957, - "learning_rate": 0.00019999459999309841, - "loss": 46.0, - "step": 43276 - }, - { - "epoch": 3.308828870156928, - "grad_norm": 0.00484770443290472, - "learning_rate": 0.00019999459974347886, - "loss": 46.0, - "step": 43277 - }, - { - "epoch": 3.3089053271403177, - "grad_norm": 0.0010415568249300122, - "learning_rate": 0.00019999459949385345, - "loss": 46.0, - "step": 43278 - }, - { - "epoch": 3.3089817841237075, - "grad_norm": 0.002279620384797454, - "learning_rate": 0.00019999459924422236, - "loss": 46.0, - "step": 43279 - }, - { - "epoch": 3.3090582411070972, - "grad_norm": 0.002856149571016431, - "learning_rate": 0.00019999459899458543, - "loss": 46.0, - "step": 43280 - }, - { - "epoch": 3.309134698090487, - "grad_norm": 0.0012341402471065521, - "learning_rate": 0.00019999459874494276, - "loss": 46.0, - "step": 43281 - }, - { - "epoch": 3.3092111550738768, - "grad_norm": 0.004400936421006918, - "learning_rate": 0.0001999945984952943, - "loss": 46.0, - "step": 43282 - }, - { - "epoch": 3.3092876120572665, - "grad_norm": 0.0015747930156067014, - "learning_rate": 0.00019999459824564012, - "loss": 46.0, - "step": 43283 - }, - { - "epoch": 3.309364069040656, - "grad_norm": 0.004910276737064123, - "learning_rate": 0.00019999459799598012, - "loss": 46.0, - "step": 43284 - }, - { - "epoch": 3.3094405260240456, - "grad_norm": 0.001290509128011763, - "learning_rate": 0.00019999459774631438, - "loss": 46.0, - "step": 43285 - }, - { - "epoch": 3.3095169830074354, - "grad_norm": 0.004152401350438595, - "learning_rate": 0.0001999945974966429, - "loss": 46.0, - "step": 43286 - }, - { - "epoch": 3.309593439990825, - "grad_norm": 0.0035686946939677, - "learning_rate": 0.00019999459724696558, - "loss": 46.0, - "step": 43287 - }, - { - "epoch": 3.309669896974215, - "grad_norm": 0.00228068302385509, - "learning_rate": 0.00019999459699728252, - "loss": 46.0, - "step": 43288 - }, - { - "epoch": 3.3097463539576046, - "grad_norm": 0.0010730500798672438, - "learning_rate": 0.00019999459674759372, - "loss": 46.0, - "step": 43289 - }, - { - "epoch": 3.3098228109409944, - "grad_norm": 0.004837107378989458, - "learning_rate": 0.0001999945964978991, - "loss": 46.0, - "step": 43290 - }, - { - "epoch": 3.309899267924384, - "grad_norm": 0.0012564780190587044, - "learning_rate": 0.00019999459624819873, - "loss": 46.0, - "step": 43291 - }, - { - "epoch": 3.309975724907774, - "grad_norm": 0.00089595268946141, - "learning_rate": 0.0001999945959984926, - "loss": 46.0, - "step": 43292 - }, - { - "epoch": 3.3100521818911632, - "grad_norm": 0.0011313320137560368, - "learning_rate": 0.00019999459574878073, - "loss": 46.0, - "step": 43293 - }, - { - "epoch": 3.310128638874553, - "grad_norm": 0.0031332937069237232, - "learning_rate": 0.00019999459549906306, - "loss": 46.0, - "step": 43294 - }, - { - "epoch": 3.3102050958579428, - "grad_norm": 0.0017274945275858045, - "learning_rate": 0.00019999459524933962, - "loss": 46.0, - "step": 43295 - }, - { - "epoch": 3.3102815528413325, - "grad_norm": 0.004312107805162668, - "learning_rate": 0.0001999945949996104, - "loss": 46.0, - "step": 43296 - }, - { - "epoch": 3.3103580098247223, - "grad_norm": 0.0017059976235032082, - "learning_rate": 0.0001999945947498754, - "loss": 46.0, - "step": 43297 - }, - { - "epoch": 3.310434466808112, - "grad_norm": 0.0015957513824105263, - "learning_rate": 0.00019999459450013466, - "loss": 46.0, - "step": 43298 - }, - { - "epoch": 3.310510923791502, - "grad_norm": 0.00199304916895926, - "learning_rate": 0.00019999459425038815, - "loss": 46.0, - "step": 43299 - }, - { - "epoch": 3.3105873807748916, - "grad_norm": 0.0012294353218749166, - "learning_rate": 0.00019999459400063587, - "loss": 46.0, - "step": 43300 - }, - { - "epoch": 3.3106638377582813, - "grad_norm": 0.002053462667390704, - "learning_rate": 0.0001999945937508778, - "loss": 46.0, - "step": 43301 - }, - { - "epoch": 3.310740294741671, - "grad_norm": 0.0006750842439942062, - "learning_rate": 0.000199994593501114, - "loss": 46.0, - "step": 43302 - }, - { - "epoch": 3.310816751725061, - "grad_norm": 0.0015960498712956905, - "learning_rate": 0.0001999945932513444, - "loss": 46.0, - "step": 43303 - }, - { - "epoch": 3.3108932087084506, - "grad_norm": 0.001992043573409319, - "learning_rate": 0.00019999459300156902, - "loss": 46.0, - "step": 43304 - }, - { - "epoch": 3.3109696656918404, - "grad_norm": 0.0009937369031831622, - "learning_rate": 0.0001999945927517879, - "loss": 46.0, - "step": 43305 - }, - { - "epoch": 3.3110461226752297, - "grad_norm": 0.0007268227636814117, - "learning_rate": 0.000199994592502001, - "loss": 46.0, - "step": 43306 - }, - { - "epoch": 3.3111225796586194, - "grad_norm": 0.002949135610833764, - "learning_rate": 0.00019999459225220835, - "loss": 46.0, - "step": 43307 - }, - { - "epoch": 3.311199036642009, - "grad_norm": 0.0010290266945958138, - "learning_rate": 0.0001999945920024099, - "loss": 46.0, - "step": 43308 - }, - { - "epoch": 3.311275493625399, - "grad_norm": 0.010142185725271702, - "learning_rate": 0.00019999459175260568, - "loss": 46.0, - "step": 43309 - }, - { - "epoch": 3.3113519506087887, - "grad_norm": 0.0007305838516913354, - "learning_rate": 0.00019999459150279571, - "loss": 46.0, - "step": 43310 - }, - { - "epoch": 3.3114284075921785, - "grad_norm": 0.00721488194540143, - "learning_rate": 0.00019999459125297998, - "loss": 46.0, - "step": 43311 - }, - { - "epoch": 3.3115048645755683, - "grad_norm": 0.0011465094285085797, - "learning_rate": 0.00019999459100315844, - "loss": 46.0, - "step": 43312 - }, - { - "epoch": 3.311581321558958, - "grad_norm": 0.0020851767621934414, - "learning_rate": 0.00019999459075333115, - "loss": 46.0, - "step": 43313 - }, - { - "epoch": 3.3116577785423478, - "grad_norm": 0.0022980738431215286, - "learning_rate": 0.00019999459050349812, - "loss": 46.0, - "step": 43314 - }, - { - "epoch": 3.311734235525737, - "grad_norm": 0.0013065822422504425, - "learning_rate": 0.0001999945902536593, - "loss": 46.0, - "step": 43315 - }, - { - "epoch": 3.311810692509127, - "grad_norm": 0.001578980009071529, - "learning_rate": 0.0001999945900038147, - "loss": 46.0, - "step": 43316 - }, - { - "epoch": 3.3118871494925166, - "grad_norm": 0.0016919063637033105, - "learning_rate": 0.00019999458975396436, - "loss": 46.0, - "step": 43317 - }, - { - "epoch": 3.3119636064759064, - "grad_norm": 0.0006471959641203284, - "learning_rate": 0.0001999945895041082, - "loss": 46.0, - "step": 43318 - }, - { - "epoch": 3.312040063459296, - "grad_norm": 0.002068018075078726, - "learning_rate": 0.0001999945892542463, - "loss": 46.0, - "step": 43319 - }, - { - "epoch": 3.312116520442686, - "grad_norm": 0.0021072272211313248, - "learning_rate": 0.00019999458900437863, - "loss": 46.0, - "step": 43320 - }, - { - "epoch": 3.3121929774260757, - "grad_norm": 0.0017072216141968966, - "learning_rate": 0.00019999458875450521, - "loss": 46.0, - "step": 43321 - }, - { - "epoch": 3.3122694344094654, - "grad_norm": 0.003044598735868931, - "learning_rate": 0.00019999458850462597, - "loss": 46.0, - "step": 43322 - }, - { - "epoch": 3.312345891392855, - "grad_norm": 0.0005900660180486739, - "learning_rate": 0.00019999458825474103, - "loss": 46.0, - "step": 43323 - }, - { - "epoch": 3.312422348376245, - "grad_norm": 0.002818033564835787, - "learning_rate": 0.00019999458800485026, - "loss": 46.0, - "step": 43324 - }, - { - "epoch": 3.3124988053596347, - "grad_norm": 0.0017469549784436822, - "learning_rate": 0.00019999458775495375, - "loss": 46.0, - "step": 43325 - }, - { - "epoch": 3.3125752623430245, - "grad_norm": 0.000833536556456238, - "learning_rate": 0.00019999458750505147, - "loss": 46.0, - "step": 43326 - }, - { - "epoch": 3.3126517193264142, - "grad_norm": 0.0017317094607278705, - "learning_rate": 0.0001999945872551434, - "loss": 46.0, - "step": 43327 - }, - { - "epoch": 3.3127281763098035, - "grad_norm": 0.004800363909453154, - "learning_rate": 0.0001999945870052296, - "loss": 46.0, - "step": 43328 - }, - { - "epoch": 3.3128046332931933, - "grad_norm": 0.0011750849662348628, - "learning_rate": 0.00019999458675531, - "loss": 46.0, - "step": 43329 - }, - { - "epoch": 3.312881090276583, - "grad_norm": 0.010479213669896126, - "learning_rate": 0.00019999458650538465, - "loss": 46.0, - "step": 43330 - }, - { - "epoch": 3.312957547259973, - "grad_norm": 0.0020926264114677906, - "learning_rate": 0.00019999458625545352, - "loss": 46.0, - "step": 43331 - }, - { - "epoch": 3.3130340042433626, - "grad_norm": 0.0014867031713947654, - "learning_rate": 0.00019999458600551662, - "loss": 46.0, - "step": 43332 - }, - { - "epoch": 3.3131104612267523, - "grad_norm": 0.0039025195874273777, - "learning_rate": 0.00019999458575557398, - "loss": 46.0, - "step": 43333 - }, - { - "epoch": 3.313186918210142, - "grad_norm": 0.0014592184452340007, - "learning_rate": 0.0001999945855056255, - "loss": 46.0, - "step": 43334 - }, - { - "epoch": 3.313263375193532, - "grad_norm": 0.004011046141386032, - "learning_rate": 0.00019999458525567131, - "loss": 46.0, - "step": 43335 - }, - { - "epoch": 3.3133398321769216, - "grad_norm": 0.0019452678970992565, - "learning_rate": 0.00019999458500571135, - "loss": 46.0, - "step": 43336 - }, - { - "epoch": 3.313416289160311, - "grad_norm": 0.001178641919977963, - "learning_rate": 0.00019999458475574558, - "loss": 46.0, - "step": 43337 - }, - { - "epoch": 3.3134927461437007, - "grad_norm": 0.0022635338827967644, - "learning_rate": 0.00019999458450577407, - "loss": 46.0, - "step": 43338 - }, - { - "epoch": 3.3135692031270905, - "grad_norm": 0.002230064244940877, - "learning_rate": 0.00019999458425579682, - "loss": 46.0, - "step": 43339 - }, - { - "epoch": 3.3136456601104802, - "grad_norm": 0.003249509958550334, - "learning_rate": 0.00019999458400581376, - "loss": 46.0, - "step": 43340 - }, - { - "epoch": 3.31372211709387, - "grad_norm": 0.0023734376300126314, - "learning_rate": 0.00019999458375582493, - "loss": 46.0, - "step": 43341 - }, - { - "epoch": 3.3137985740772598, - "grad_norm": 0.0012363401474431157, - "learning_rate": 0.00019999458350583035, - "loss": 46.0, - "step": 43342 - }, - { - "epoch": 3.3138750310606495, - "grad_norm": 0.001281177275814116, - "learning_rate": 0.00019999458325582997, - "loss": 46.0, - "step": 43343 - }, - { - "epoch": 3.3139514880440393, - "grad_norm": 0.004189579281955957, - "learning_rate": 0.00019999458300582384, - "loss": 46.0, - "step": 43344 - }, - { - "epoch": 3.314027945027429, - "grad_norm": 0.0024599258322268724, - "learning_rate": 0.00019999458275581195, - "loss": 46.0, - "step": 43345 - }, - { - "epoch": 3.314104402010819, - "grad_norm": 0.0006552319973707199, - "learning_rate": 0.00019999458250579427, - "loss": 46.0, - "step": 43346 - }, - { - "epoch": 3.3141808589942086, - "grad_norm": 0.0029775877483189106, - "learning_rate": 0.00019999458225577086, - "loss": 46.0, - "step": 43347 - }, - { - "epoch": 3.3142573159775983, - "grad_norm": 0.003050489816814661, - "learning_rate": 0.00019999458200574167, - "loss": 46.0, - "step": 43348 - }, - { - "epoch": 3.3143337729609876, - "grad_norm": 0.001027034013532102, - "learning_rate": 0.0001999945817557067, - "loss": 46.0, - "step": 43349 - }, - { - "epoch": 3.3144102299443774, - "grad_norm": 0.0031818593852221966, - "learning_rate": 0.00019999458150566594, - "loss": 46.0, - "step": 43350 - }, - { - "epoch": 3.314486686927767, - "grad_norm": 0.0013632456539198756, - "learning_rate": 0.00019999458125561942, - "loss": 46.0, - "step": 43351 - }, - { - "epoch": 3.314563143911157, - "grad_norm": 0.0012772209011018276, - "learning_rate": 0.00019999458100556714, - "loss": 46.0, - "step": 43352 - }, - { - "epoch": 3.3146396008945467, - "grad_norm": 0.0028334425296634436, - "learning_rate": 0.0001999945807555091, - "loss": 46.0, - "step": 43353 - }, - { - "epoch": 3.3147160578779364, - "grad_norm": 0.001498374156653881, - "learning_rate": 0.00019999458050544528, - "loss": 46.0, - "step": 43354 - }, - { - "epoch": 3.314792514861326, - "grad_norm": 0.005111016798764467, - "learning_rate": 0.00019999458025537567, - "loss": 46.0, - "step": 43355 - }, - { - "epoch": 3.314868971844716, - "grad_norm": 0.001594903995282948, - "learning_rate": 0.00019999458000530035, - "loss": 46.0, - "step": 43356 - }, - { - "epoch": 3.3149454288281057, - "grad_norm": 0.0009064633050002158, - "learning_rate": 0.0001999945797552192, - "loss": 46.0, - "step": 43357 - }, - { - "epoch": 3.3150218858114955, - "grad_norm": 0.0015657697804272175, - "learning_rate": 0.0001999945795051323, - "loss": 46.0, - "step": 43358 - }, - { - "epoch": 3.315098342794885, - "grad_norm": 0.004220201633870602, - "learning_rate": 0.00019999457925503963, - "loss": 46.0, - "step": 43359 - }, - { - "epoch": 3.3151747997782746, - "grad_norm": 0.001176894991658628, - "learning_rate": 0.0001999945790049412, - "loss": 46.0, - "step": 43360 - }, - { - "epoch": 3.3152512567616643, - "grad_norm": 0.0029704258777201176, - "learning_rate": 0.000199994578754837, - "loss": 46.0, - "step": 43361 - }, - { - "epoch": 3.315327713745054, - "grad_norm": 0.0016683146823197603, - "learning_rate": 0.00019999457850472703, - "loss": 46.0, - "step": 43362 - }, - { - "epoch": 3.315404170728444, - "grad_norm": 0.002009845105931163, - "learning_rate": 0.0001999945782546113, - "loss": 46.0, - "step": 43363 - }, - { - "epoch": 3.3154806277118336, - "grad_norm": 0.0011125633027404547, - "learning_rate": 0.0001999945780044898, - "loss": 46.0, - "step": 43364 - }, - { - "epoch": 3.3155570846952234, - "grad_norm": 0.0015573266427963972, - "learning_rate": 0.0001999945777543625, - "loss": 46.0, - "step": 43365 - }, - { - "epoch": 3.315633541678613, - "grad_norm": 0.0017093715723603964, - "learning_rate": 0.00019999457750422947, - "loss": 46.0, - "step": 43366 - }, - { - "epoch": 3.315709998662003, - "grad_norm": 0.004980104044079781, - "learning_rate": 0.00019999457725409063, - "loss": 46.0, - "step": 43367 - }, - { - "epoch": 3.3157864556453926, - "grad_norm": 0.002460876014083624, - "learning_rate": 0.00019999457700394606, - "loss": 46.0, - "step": 43368 - }, - { - "epoch": 3.3158629126287824, - "grad_norm": 0.0014098930405452847, - "learning_rate": 0.00019999457675379568, - "loss": 46.0, - "step": 43369 - }, - { - "epoch": 3.315939369612172, - "grad_norm": 0.006988766603171825, - "learning_rate": 0.00019999457650363956, - "loss": 46.0, - "step": 43370 - }, - { - "epoch": 3.3160158265955615, - "grad_norm": 0.0018488438799977303, - "learning_rate": 0.00019999457625347766, - "loss": 46.0, - "step": 43371 - }, - { - "epoch": 3.3160922835789512, - "grad_norm": 0.000904468004591763, - "learning_rate": 0.00019999457600331, - "loss": 46.0, - "step": 43372 - }, - { - "epoch": 3.316168740562341, - "grad_norm": 0.0014299922622740269, - "learning_rate": 0.00019999457575313657, - "loss": 46.0, - "step": 43373 - }, - { - "epoch": 3.3162451975457308, - "grad_norm": 0.004879137966781855, - "learning_rate": 0.00019999457550295738, - "loss": 46.0, - "step": 43374 - }, - { - "epoch": 3.3163216545291205, - "grad_norm": 0.000905583321582526, - "learning_rate": 0.00019999457525277242, - "loss": 46.0, - "step": 43375 - }, - { - "epoch": 3.3163981115125103, - "grad_norm": 0.0012610937701538205, - "learning_rate": 0.00019999457500258165, - "loss": 46.0, - "step": 43376 - }, - { - "epoch": 3.3164745684959, - "grad_norm": 0.0036018602550029755, - "learning_rate": 0.00019999457475238517, - "loss": 46.0, - "step": 43377 - }, - { - "epoch": 3.31655102547929, - "grad_norm": 0.0010846088407561183, - "learning_rate": 0.00019999457450218286, - "loss": 46.0, - "step": 43378 - }, - { - "epoch": 3.3166274824626796, - "grad_norm": 0.0012248934945091605, - "learning_rate": 0.00019999457425197483, - "loss": 46.0, - "step": 43379 - }, - { - "epoch": 3.3167039394460693, - "grad_norm": 0.0010553468018770218, - "learning_rate": 0.00019999457400176102, - "loss": 46.0, - "step": 43380 - }, - { - "epoch": 3.3167803964294587, - "grad_norm": 0.002031085779890418, - "learning_rate": 0.00019999457375154142, - "loss": 46.0, - "step": 43381 - }, - { - "epoch": 3.3168568534128484, - "grad_norm": 0.002805246040225029, - "learning_rate": 0.00019999457350131607, - "loss": 46.0, - "step": 43382 - }, - { - "epoch": 3.316933310396238, - "grad_norm": 0.0017665355699136853, - "learning_rate": 0.00019999457325108494, - "loss": 46.0, - "step": 43383 - }, - { - "epoch": 3.317009767379628, - "grad_norm": 0.0013133903266862035, - "learning_rate": 0.00019999457300084805, - "loss": 46.0, - "step": 43384 - }, - { - "epoch": 3.3170862243630177, - "grad_norm": 0.00139354367274791, - "learning_rate": 0.0001999945727506054, - "loss": 46.0, - "step": 43385 - }, - { - "epoch": 3.3171626813464075, - "grad_norm": 0.002866447437554598, - "learning_rate": 0.00019999457250035696, - "loss": 46.0, - "step": 43386 - }, - { - "epoch": 3.317239138329797, - "grad_norm": 0.0027093978133052588, - "learning_rate": 0.00019999457225010274, - "loss": 46.0, - "step": 43387 - }, - { - "epoch": 3.317315595313187, - "grad_norm": 0.0018428182229399681, - "learning_rate": 0.0001999945719998428, - "loss": 46.0, - "step": 43388 - }, - { - "epoch": 3.3173920522965767, - "grad_norm": 0.0027941951993852854, - "learning_rate": 0.00019999457174957704, - "loss": 46.0, - "step": 43389 - }, - { - "epoch": 3.3174685092799665, - "grad_norm": 0.0016378681175410748, - "learning_rate": 0.00019999457149930553, - "loss": 46.0, - "step": 43390 - }, - { - "epoch": 3.3175449662633563, - "grad_norm": 0.003005365375429392, - "learning_rate": 0.00019999457124902827, - "loss": 46.0, - "step": 43391 - }, - { - "epoch": 3.317621423246746, - "grad_norm": 0.0010542947566136718, - "learning_rate": 0.00019999457099874522, - "loss": 46.0, - "step": 43392 - }, - { - "epoch": 3.3176978802301353, - "grad_norm": 0.0013423888012766838, - "learning_rate": 0.0001999945707484564, - "loss": 46.0, - "step": 43393 - }, - { - "epoch": 3.317774337213525, - "grad_norm": 0.0013756750850006938, - "learning_rate": 0.0001999945704981618, - "loss": 46.0, - "step": 43394 - }, - { - "epoch": 3.317850794196915, - "grad_norm": 0.0016079377382993698, - "learning_rate": 0.00019999457024786146, - "loss": 46.0, - "step": 43395 - }, - { - "epoch": 3.3179272511803046, - "grad_norm": 0.0007803951157256961, - "learning_rate": 0.00019999456999755534, - "loss": 46.0, - "step": 43396 - }, - { - "epoch": 3.3180037081636944, - "grad_norm": 0.003149746684357524, - "learning_rate": 0.00019999456974724344, - "loss": 46.0, - "step": 43397 - }, - { - "epoch": 3.318080165147084, - "grad_norm": 0.005063454154878855, - "learning_rate": 0.0001999945694969258, - "loss": 46.0, - "step": 43398 - }, - { - "epoch": 3.318156622130474, - "grad_norm": 0.0076774712651968, - "learning_rate": 0.00019999456924660235, - "loss": 46.0, - "step": 43399 - }, - { - "epoch": 3.3182330791138637, - "grad_norm": 0.001998207299038768, - "learning_rate": 0.00019999456899627316, - "loss": 46.0, - "step": 43400 - }, - { - "epoch": 3.3183095360972534, - "grad_norm": 0.001289700623601675, - "learning_rate": 0.00019999456874593817, - "loss": 46.0, - "step": 43401 - }, - { - "epoch": 3.318385993080643, - "grad_norm": 0.0027638860046863556, - "learning_rate": 0.00019999456849559746, - "loss": 46.0, - "step": 43402 - }, - { - "epoch": 3.3184624500640325, - "grad_norm": 0.0012664938112720847, - "learning_rate": 0.00019999456824525093, - "loss": 46.0, - "step": 43403 - }, - { - "epoch": 3.3185389070474223, - "grad_norm": 0.0010289938654750586, - "learning_rate": 0.00019999456799489867, - "loss": 46.0, - "step": 43404 - }, - { - "epoch": 3.318615364030812, - "grad_norm": 0.0012244055978953838, - "learning_rate": 0.0001999945677445406, - "loss": 46.0, - "step": 43405 - }, - { - "epoch": 3.318691821014202, - "grad_norm": 0.0036576027050614357, - "learning_rate": 0.0001999945674941768, - "loss": 46.0, - "step": 43406 - }, - { - "epoch": 3.3187682779975916, - "grad_norm": 0.0018814962822943926, - "learning_rate": 0.00019999456724380723, - "loss": 46.0, - "step": 43407 - }, - { - "epoch": 3.3188447349809813, - "grad_norm": 0.007668083067983389, - "learning_rate": 0.00019999456699343186, - "loss": 46.0, - "step": 43408 - }, - { - "epoch": 3.318921191964371, - "grad_norm": 0.0017301782499998808, - "learning_rate": 0.00019999456674305076, - "loss": 46.0, - "step": 43409 - }, - { - "epoch": 3.318997648947761, - "grad_norm": 0.0010808518854901195, - "learning_rate": 0.00019999456649266386, - "loss": 46.0, - "step": 43410 - }, - { - "epoch": 3.3190741059311506, - "grad_norm": 0.0018614014843478799, - "learning_rate": 0.0001999945662422712, - "loss": 46.0, - "step": 43411 - }, - { - "epoch": 3.3191505629145404, - "grad_norm": 0.0018076942069455981, - "learning_rate": 0.00019999456599187275, - "loss": 46.0, - "step": 43412 - }, - { - "epoch": 3.31922701989793, - "grad_norm": 0.0014691805699840188, - "learning_rate": 0.0001999945657414686, - "loss": 46.0, - "step": 43413 - }, - { - "epoch": 3.31930347688132, - "grad_norm": 0.006432017311453819, - "learning_rate": 0.0001999945654910586, - "loss": 46.0, - "step": 43414 - }, - { - "epoch": 3.319379933864709, - "grad_norm": 0.000985596445389092, - "learning_rate": 0.00019999456524064286, - "loss": 46.0, - "step": 43415 - }, - { - "epoch": 3.319456390848099, - "grad_norm": 0.0007833700510673225, - "learning_rate": 0.00019999456499022135, - "loss": 46.0, - "step": 43416 - }, - { - "epoch": 3.3195328478314887, - "grad_norm": 0.001098829903639853, - "learning_rate": 0.00019999456473979407, - "loss": 46.0, - "step": 43417 - }, - { - "epoch": 3.3196093048148785, - "grad_norm": 0.0013725137105211616, - "learning_rate": 0.00019999456448936104, - "loss": 46.0, - "step": 43418 - }, - { - "epoch": 3.3196857617982682, - "grad_norm": 0.0014277450973168015, - "learning_rate": 0.0001999945642389222, - "loss": 46.0, - "step": 43419 - }, - { - "epoch": 3.319762218781658, - "grad_norm": 0.0020582827273756266, - "learning_rate": 0.00019999456398847763, - "loss": 46.0, - "step": 43420 - }, - { - "epoch": 3.3198386757650478, - "grad_norm": 0.0021607165690511465, - "learning_rate": 0.00019999456373802728, - "loss": 46.0, - "step": 43421 - }, - { - "epoch": 3.3199151327484375, - "grad_norm": 0.0016946293180808425, - "learning_rate": 0.00019999456348757116, - "loss": 46.0, - "step": 43422 - }, - { - "epoch": 3.3199915897318273, - "grad_norm": 0.0027404495049268007, - "learning_rate": 0.00019999456323710927, - "loss": 46.0, - "step": 43423 - }, - { - "epoch": 3.3200680467152166, - "grad_norm": 0.004383963067084551, - "learning_rate": 0.00019999456298664162, - "loss": 46.0, - "step": 43424 - }, - { - "epoch": 3.3201445036986064, - "grad_norm": 0.0015503901522606611, - "learning_rate": 0.00019999456273616818, - "loss": 46.0, - "step": 43425 - }, - { - "epoch": 3.320220960681996, - "grad_norm": 0.0013477016473188996, - "learning_rate": 0.000199994562485689, - "loss": 46.0, - "step": 43426 - }, - { - "epoch": 3.320297417665386, - "grad_norm": 0.0021067361813038588, - "learning_rate": 0.00019999456223520403, - "loss": 46.0, - "step": 43427 - }, - { - "epoch": 3.3203738746487756, - "grad_norm": 0.002632143208757043, - "learning_rate": 0.0001999945619847133, - "loss": 46.0, - "step": 43428 - }, - { - "epoch": 3.3204503316321654, - "grad_norm": 0.004429586697369814, - "learning_rate": 0.0001999945617342168, - "loss": 46.0, - "step": 43429 - }, - { - "epoch": 3.320526788615555, - "grad_norm": 0.001210974995046854, - "learning_rate": 0.00019999456148371453, - "loss": 46.0, - "step": 43430 - }, - { - "epoch": 3.320603245598945, - "grad_norm": 0.0032355301082134247, - "learning_rate": 0.00019999456123320644, - "loss": 46.0, - "step": 43431 - }, - { - "epoch": 3.3206797025823347, - "grad_norm": 0.0020707454532384872, - "learning_rate": 0.00019999456098269267, - "loss": 46.0, - "step": 43432 - }, - { - "epoch": 3.3207561595657245, - "grad_norm": 0.0006517020519822836, - "learning_rate": 0.00019999456073217307, - "loss": 46.0, - "step": 43433 - }, - { - "epoch": 3.320832616549114, - "grad_norm": 0.001286494778469205, - "learning_rate": 0.00019999456048164774, - "loss": 46.0, - "step": 43434 - }, - { - "epoch": 3.320909073532504, - "grad_norm": 0.001964277122169733, - "learning_rate": 0.00019999456023111662, - "loss": 46.0, - "step": 43435 - }, - { - "epoch": 3.3209855305158937, - "grad_norm": 0.0018471956718713045, - "learning_rate": 0.00019999455998057972, - "loss": 46.0, - "step": 43436 - }, - { - "epoch": 3.321061987499283, - "grad_norm": 0.0009719212539494038, - "learning_rate": 0.00019999455973003706, - "loss": 46.0, - "step": 43437 - }, - { - "epoch": 3.321138444482673, - "grad_norm": 0.005236941855400801, - "learning_rate": 0.0001999945594794886, - "loss": 46.0, - "step": 43438 - }, - { - "epoch": 3.3212149014660626, - "grad_norm": 0.0023489014711230993, - "learning_rate": 0.00019999455922893445, - "loss": 46.0, - "step": 43439 - }, - { - "epoch": 3.3212913584494523, - "grad_norm": 0.002056208672001958, - "learning_rate": 0.00019999455897837446, - "loss": 46.0, - "step": 43440 - }, - { - "epoch": 3.321367815432842, - "grad_norm": 0.001000122050754726, - "learning_rate": 0.00019999455872780875, - "loss": 46.0, - "step": 43441 - }, - { - "epoch": 3.321444272416232, - "grad_norm": 0.002598110120743513, - "learning_rate": 0.00019999455847723722, - "loss": 46.0, - "step": 43442 - }, - { - "epoch": 3.3215207293996216, - "grad_norm": 0.001369926962070167, - "learning_rate": 0.00019999455822666, - "loss": 46.0, - "step": 43443 - }, - { - "epoch": 3.3215971863830114, - "grad_norm": 0.0024704348761588335, - "learning_rate": 0.0001999945579760769, - "loss": 46.0, - "step": 43444 - }, - { - "epoch": 3.321673643366401, - "grad_norm": 0.000950091693084687, - "learning_rate": 0.00019999455772548813, - "loss": 46.0, - "step": 43445 - }, - { - "epoch": 3.3217501003497905, - "grad_norm": 0.002488666446879506, - "learning_rate": 0.00019999455747489353, - "loss": 46.0, - "step": 43446 - }, - { - "epoch": 3.32182655733318, - "grad_norm": 0.001453764969483018, - "learning_rate": 0.00019999455722429318, - "loss": 46.0, - "step": 43447 - }, - { - "epoch": 3.32190301431657, - "grad_norm": 0.007059496361762285, - "learning_rate": 0.00019999455697368706, - "loss": 46.0, - "step": 43448 - }, - { - "epoch": 3.3219794712999597, - "grad_norm": 0.0013031256385147572, - "learning_rate": 0.00019999455672307516, - "loss": 46.0, - "step": 43449 - }, - { - "epoch": 3.3220559282833495, - "grad_norm": 0.001736507867462933, - "learning_rate": 0.0001999945564724575, - "loss": 46.0, - "step": 43450 - }, - { - "epoch": 3.3221323852667393, - "grad_norm": 0.002132937777787447, - "learning_rate": 0.0001999945562218341, - "loss": 46.0, - "step": 43451 - }, - { - "epoch": 3.322208842250129, - "grad_norm": 0.002812823746353388, - "learning_rate": 0.00019999455597120492, - "loss": 46.0, - "step": 43452 - }, - { - "epoch": 3.322285299233519, - "grad_norm": 0.005177715793251991, - "learning_rate": 0.00019999455572056993, - "loss": 46.0, - "step": 43453 - }, - { - "epoch": 3.3223617562169085, - "grad_norm": 0.002652070950716734, - "learning_rate": 0.0001999945554699292, - "loss": 46.0, - "step": 43454 - }, - { - "epoch": 3.3224382132002983, - "grad_norm": 0.0009143145871348679, - "learning_rate": 0.0001999945552192827, - "loss": 46.0, - "step": 43455 - }, - { - "epoch": 3.322514670183688, - "grad_norm": 0.0012752431211993098, - "learning_rate": 0.00019999455496863045, - "loss": 46.0, - "step": 43456 - }, - { - "epoch": 3.322591127167078, - "grad_norm": 0.0007557413773611188, - "learning_rate": 0.00019999455471797237, - "loss": 46.0, - "step": 43457 - }, - { - "epoch": 3.3226675841504676, - "grad_norm": 0.0033525999169796705, - "learning_rate": 0.00019999455446730857, - "loss": 46.0, - "step": 43458 - }, - { - "epoch": 3.322744041133857, - "grad_norm": 0.0038206998724490404, - "learning_rate": 0.000199994554216639, - "loss": 46.0, - "step": 43459 - }, - { - "epoch": 3.3228204981172467, - "grad_norm": 0.0028049773536622524, - "learning_rate": 0.00019999455396596365, - "loss": 46.0, - "step": 43460 - }, - { - "epoch": 3.3228969551006364, - "grad_norm": 0.002209616359323263, - "learning_rate": 0.00019999455371528252, - "loss": 46.0, - "step": 43461 - }, - { - "epoch": 3.322973412084026, - "grad_norm": 0.004158904310315847, - "learning_rate": 0.00019999455346459563, - "loss": 46.0, - "step": 43462 - }, - { - "epoch": 3.323049869067416, - "grad_norm": 0.0022466732189059258, - "learning_rate": 0.000199994553213903, - "loss": 46.0, - "step": 43463 - }, - { - "epoch": 3.3231263260508057, - "grad_norm": 0.006654097232967615, - "learning_rate": 0.00019999455296320457, - "loss": 46.0, - "step": 43464 - }, - { - "epoch": 3.3232027830341955, - "grad_norm": 0.0010583935072645545, - "learning_rate": 0.00019999455271250036, - "loss": 46.0, - "step": 43465 - }, - { - "epoch": 3.3232792400175852, - "grad_norm": 0.000667455664370209, - "learning_rate": 0.00019999455246179042, - "loss": 46.0, - "step": 43466 - }, - { - "epoch": 3.323355697000975, - "grad_norm": 0.0027885313611477613, - "learning_rate": 0.0001999945522110747, - "loss": 46.0, - "step": 43467 - }, - { - "epoch": 3.3234321539843643, - "grad_norm": 0.0023187026381492615, - "learning_rate": 0.00019999455196035318, - "loss": 46.0, - "step": 43468 - }, - { - "epoch": 3.323508610967754, - "grad_norm": 0.0014646988129243255, - "learning_rate": 0.0001999945517096259, - "loss": 46.0, - "step": 43469 - }, - { - "epoch": 3.323585067951144, - "grad_norm": 0.002519245259463787, - "learning_rate": 0.00019999455145889287, - "loss": 46.0, - "step": 43470 - }, - { - "epoch": 3.3236615249345336, - "grad_norm": 0.0015073021641001105, - "learning_rate": 0.00019999455120815407, - "loss": 46.0, - "step": 43471 - }, - { - "epoch": 3.3237379819179234, - "grad_norm": 0.0026442264206707478, - "learning_rate": 0.0001999945509574095, - "loss": 46.0, - "step": 43472 - }, - { - "epoch": 3.323814438901313, - "grad_norm": 0.0029849254060536623, - "learning_rate": 0.00019999455070665914, - "loss": 46.0, - "step": 43473 - }, - { - "epoch": 3.323890895884703, - "grad_norm": 0.0021172654815018177, - "learning_rate": 0.000199994550455903, - "loss": 46.0, - "step": 43474 - }, - { - "epoch": 3.3239673528680926, - "grad_norm": 0.001788344350643456, - "learning_rate": 0.00019999455020514113, - "loss": 46.0, - "step": 43475 - }, - { - "epoch": 3.3240438098514824, - "grad_norm": 0.0015312808100134134, - "learning_rate": 0.0001999945499543735, - "loss": 46.0, - "step": 43476 - }, - { - "epoch": 3.324120266834872, - "grad_norm": 0.0009136635926552117, - "learning_rate": 0.00019999454970360005, - "loss": 46.0, - "step": 43477 - }, - { - "epoch": 3.324196723818262, - "grad_norm": 0.0013022496132180095, - "learning_rate": 0.00019999454945282086, - "loss": 46.0, - "step": 43478 - }, - { - "epoch": 3.3242731808016517, - "grad_norm": 0.0030702543444931507, - "learning_rate": 0.0001999945492020359, - "loss": 46.0, - "step": 43479 - }, - { - "epoch": 3.324349637785041, - "grad_norm": 0.0025544846430420876, - "learning_rate": 0.00019999454895124516, - "loss": 46.0, - "step": 43480 - }, - { - "epoch": 3.3244260947684308, - "grad_norm": 0.0015385471051558852, - "learning_rate": 0.00019999454870044868, - "loss": 46.0, - "step": 43481 - }, - { - "epoch": 3.3245025517518205, - "grad_norm": 0.002480618190020323, - "learning_rate": 0.0001999945484496464, - "loss": 46.0, - "step": 43482 - }, - { - "epoch": 3.3245790087352103, - "grad_norm": 0.004908351693302393, - "learning_rate": 0.00019999454819883837, - "loss": 46.0, - "step": 43483 - }, - { - "epoch": 3.3246554657186, - "grad_norm": 0.0006085407803766429, - "learning_rate": 0.00019999454794802457, - "loss": 46.0, - "step": 43484 - }, - { - "epoch": 3.32473192270199, - "grad_norm": 0.0011776385363191366, - "learning_rate": 0.00019999454769720497, - "loss": 46.0, - "step": 43485 - }, - { - "epoch": 3.3248083796853796, - "grad_norm": 0.001425719354301691, - "learning_rate": 0.00019999454744637962, - "loss": 46.0, - "step": 43486 - }, - { - "epoch": 3.3248848366687693, - "grad_norm": 0.00261881691403687, - "learning_rate": 0.00019999454719554853, - "loss": 46.0, - "step": 43487 - }, - { - "epoch": 3.324961293652159, - "grad_norm": 0.0017293972196057439, - "learning_rate": 0.00019999454694471163, - "loss": 46.0, - "step": 43488 - }, - { - "epoch": 3.325037750635549, - "grad_norm": 0.00608735391870141, - "learning_rate": 0.000199994546693869, - "loss": 46.0, - "step": 43489 - }, - { - "epoch": 3.325114207618938, - "grad_norm": 0.0010004539508372545, - "learning_rate": 0.00019999454644302055, - "loss": 46.0, - "step": 43490 - }, - { - "epoch": 3.325190664602328, - "grad_norm": 0.007039217744022608, - "learning_rate": 0.00019999454619216636, - "loss": 46.0, - "step": 43491 - }, - { - "epoch": 3.3252671215857177, - "grad_norm": 0.0023110767360776663, - "learning_rate": 0.0001999945459413064, - "loss": 46.0, - "step": 43492 - }, - { - "epoch": 3.3253435785691074, - "grad_norm": 0.002817175816744566, - "learning_rate": 0.00019999454569044067, - "loss": 46.0, - "step": 43493 - }, - { - "epoch": 3.325420035552497, - "grad_norm": 0.0013660419499501586, - "learning_rate": 0.00019999454543956916, - "loss": 46.0, - "step": 43494 - }, - { - "epoch": 3.325496492535887, - "grad_norm": 0.0023869769647717476, - "learning_rate": 0.0001999945451886919, - "loss": 46.0, - "step": 43495 - }, - { - "epoch": 3.3255729495192767, - "grad_norm": 0.001902631833218038, - "learning_rate": 0.00019999454493780888, - "loss": 46.0, - "step": 43496 - }, - { - "epoch": 3.3256494065026665, - "grad_norm": 0.0009875389514490962, - "learning_rate": 0.00019999454468692008, - "loss": 46.0, - "step": 43497 - }, - { - "epoch": 3.3257258634860563, - "grad_norm": 0.001548259286209941, - "learning_rate": 0.00019999454443602547, - "loss": 46.0, - "step": 43498 - }, - { - "epoch": 3.325802320469446, - "grad_norm": 0.005543024279177189, - "learning_rate": 0.00019999454418512515, - "loss": 46.0, - "step": 43499 - }, - { - "epoch": 3.3258787774528358, - "grad_norm": 0.003943361807614565, - "learning_rate": 0.00019999454393421903, - "loss": 46.0, - "step": 43500 - }, - { - "epoch": 3.3259552344362255, - "grad_norm": 0.017705580219626427, - "learning_rate": 0.00019999454368330714, - "loss": 46.0, - "step": 43501 - }, - { - "epoch": 3.326031691419615, - "grad_norm": 0.0012057377025485039, - "learning_rate": 0.0001999945434323895, - "loss": 46.0, - "step": 43502 - }, - { - "epoch": 3.3261081484030046, - "grad_norm": 0.001260724849998951, - "learning_rate": 0.00019999454318146608, - "loss": 46.0, - "step": 43503 - }, - { - "epoch": 3.3261846053863944, - "grad_norm": 0.0038510174490511417, - "learning_rate": 0.0001999945429305369, - "loss": 46.0, - "step": 43504 - }, - { - "epoch": 3.326261062369784, - "grad_norm": 0.001594186876900494, - "learning_rate": 0.00019999454267960194, - "loss": 46.0, - "step": 43505 - }, - { - "epoch": 3.326337519353174, - "grad_norm": 0.0012539097806438804, - "learning_rate": 0.0001999945424286612, - "loss": 46.0, - "step": 43506 - }, - { - "epoch": 3.3264139763365637, - "grad_norm": 0.0032835793681442738, - "learning_rate": 0.0001999945421777147, - "loss": 46.0, - "step": 43507 - }, - { - "epoch": 3.3264904333199534, - "grad_norm": 0.0013495655730366707, - "learning_rate": 0.00019999454192676244, - "loss": 46.0, - "step": 43508 - }, - { - "epoch": 3.326566890303343, - "grad_norm": 0.0013800951419398189, - "learning_rate": 0.0001999945416758044, - "loss": 46.0, - "step": 43509 - }, - { - "epoch": 3.326643347286733, - "grad_norm": 0.0017246020724996924, - "learning_rate": 0.00019999454142484059, - "loss": 46.0, - "step": 43510 - }, - { - "epoch": 3.3267198042701227, - "grad_norm": 0.00484325410798192, - "learning_rate": 0.000199994541173871, - "loss": 46.0, - "step": 43511 - }, - { - "epoch": 3.326796261253512, - "grad_norm": 0.004459098447114229, - "learning_rate": 0.0001999945409228957, - "loss": 46.0, - "step": 43512 - }, - { - "epoch": 3.326872718236902, - "grad_norm": 0.0030825668945908546, - "learning_rate": 0.00019999454067191457, - "loss": 46.0, - "step": 43513 - }, - { - "epoch": 3.3269491752202915, - "grad_norm": 0.0016127012204378843, - "learning_rate": 0.00019999454042092768, - "loss": 46.0, - "step": 43514 - }, - { - "epoch": 3.3270256322036813, - "grad_norm": 0.005958862602710724, - "learning_rate": 0.000199994540169935, - "loss": 46.0, - "step": 43515 - }, - { - "epoch": 3.327102089187071, - "grad_norm": 0.007319292984902859, - "learning_rate": 0.0001999945399189366, - "loss": 46.0, - "step": 43516 - }, - { - "epoch": 3.327178546170461, - "grad_norm": 0.0021405229344964027, - "learning_rate": 0.00019999453966793244, - "loss": 46.0, - "step": 43517 - }, - { - "epoch": 3.3272550031538506, - "grad_norm": 0.0013300413265824318, - "learning_rate": 0.00019999453941692248, - "loss": 46.0, - "step": 43518 - }, - { - "epoch": 3.3273314601372403, - "grad_norm": 0.0036079774145036936, - "learning_rate": 0.00019999453916590674, - "loss": 46.0, - "step": 43519 - }, - { - "epoch": 3.32740791712063, - "grad_norm": 0.01782381907105446, - "learning_rate": 0.00019999453891488523, - "loss": 46.0, - "step": 43520 - }, - { - "epoch": 3.32748437410402, - "grad_norm": 0.0008638953440822661, - "learning_rate": 0.00019999453866385798, - "loss": 46.0, - "step": 43521 - }, - { - "epoch": 3.3275608310874096, - "grad_norm": 0.0013219933025538921, - "learning_rate": 0.00019999453841282493, - "loss": 46.0, - "step": 43522 - }, - { - "epoch": 3.3276372880707994, - "grad_norm": 0.0019455340225249529, - "learning_rate": 0.00019999453816178613, - "loss": 46.0, - "step": 43523 - }, - { - "epoch": 3.3277137450541887, - "grad_norm": 0.0022216681391000748, - "learning_rate": 0.00019999453791074155, - "loss": 46.0, - "step": 43524 - }, - { - "epoch": 3.3277902020375785, - "grad_norm": 0.0011177087435498834, - "learning_rate": 0.00019999453765969123, - "loss": 46.0, - "step": 43525 - }, - { - "epoch": 3.3278666590209682, - "grad_norm": 0.0022819102741777897, - "learning_rate": 0.0001999945374086351, - "loss": 46.0, - "step": 43526 - }, - { - "epoch": 3.327943116004358, - "grad_norm": 0.0015371522167697549, - "learning_rate": 0.00019999453715757325, - "loss": 46.0, - "step": 43527 - }, - { - "epoch": 3.3280195729877478, - "grad_norm": 0.006041317712515593, - "learning_rate": 0.00019999453690650558, - "loss": 46.0, - "step": 43528 - }, - { - "epoch": 3.3280960299711375, - "grad_norm": 0.0029390607960522175, - "learning_rate": 0.00019999453665543214, - "loss": 46.0, - "step": 43529 - }, - { - "epoch": 3.3281724869545273, - "grad_norm": 0.0015405311714857817, - "learning_rate": 0.00019999453640435298, - "loss": 46.0, - "step": 43530 - }, - { - "epoch": 3.328248943937917, - "grad_norm": 0.002567627001553774, - "learning_rate": 0.00019999453615326802, - "loss": 46.0, - "step": 43531 - }, - { - "epoch": 3.328325400921307, - "grad_norm": 0.0012853796361014247, - "learning_rate": 0.00019999453590217729, - "loss": 46.0, - "step": 43532 - }, - { - "epoch": 3.3284018579046966, - "grad_norm": 0.003097273176535964, - "learning_rate": 0.0001999945356510808, - "loss": 46.0, - "step": 43533 - }, - { - "epoch": 3.328478314888086, - "grad_norm": 0.0008199003059417009, - "learning_rate": 0.00019999453539997855, - "loss": 46.0, - "step": 43534 - }, - { - "epoch": 3.3285547718714756, - "grad_norm": 0.0030198912136256695, - "learning_rate": 0.0001999945351488705, - "loss": 46.0, - "step": 43535 - }, - { - "epoch": 3.3286312288548654, - "grad_norm": 0.004851036239415407, - "learning_rate": 0.00019999453489775667, - "loss": 46.0, - "step": 43536 - }, - { - "epoch": 3.328707685838255, - "grad_norm": 0.0011911002220585942, - "learning_rate": 0.0001999945346466371, - "loss": 46.0, - "step": 43537 - }, - { - "epoch": 3.328784142821645, - "grad_norm": 0.0018139132298529148, - "learning_rate": 0.00019999453439551178, - "loss": 46.0, - "step": 43538 - }, - { - "epoch": 3.3288605998050347, - "grad_norm": 0.0007225653971545398, - "learning_rate": 0.0001999945341443807, - "loss": 46.0, - "step": 43539 - }, - { - "epoch": 3.3289370567884244, - "grad_norm": 0.0017102648271247745, - "learning_rate": 0.00019999453389324382, - "loss": 46.0, - "step": 43540 - }, - { - "epoch": 3.329013513771814, - "grad_norm": 0.0032987946178764105, - "learning_rate": 0.00019999453364210115, - "loss": 46.0, - "step": 43541 - }, - { - "epoch": 3.329089970755204, - "grad_norm": 0.0033472159411758184, - "learning_rate": 0.00019999453339095274, - "loss": 46.0, - "step": 43542 - }, - { - "epoch": 3.3291664277385937, - "grad_norm": 0.004375026095658541, - "learning_rate": 0.00019999453313979855, - "loss": 46.0, - "step": 43543 - }, - { - "epoch": 3.3292428847219835, - "grad_norm": 0.0013529694406315684, - "learning_rate": 0.0001999945328886386, - "loss": 46.0, - "step": 43544 - }, - { - "epoch": 3.3293193417053732, - "grad_norm": 0.0019949353300035, - "learning_rate": 0.00019999453263747286, - "loss": 46.0, - "step": 43545 - }, - { - "epoch": 3.3293957986887626, - "grad_norm": 0.0012880043359473348, - "learning_rate": 0.00019999453238630138, - "loss": 46.0, - "step": 43546 - }, - { - "epoch": 3.3294722556721523, - "grad_norm": 0.002864767564460635, - "learning_rate": 0.0001999945321351241, - "loss": 46.0, - "step": 43547 - }, - { - "epoch": 3.329548712655542, - "grad_norm": 0.0013542111264541745, - "learning_rate": 0.00019999453188394108, - "loss": 46.0, - "step": 43548 - }, - { - "epoch": 3.329625169638932, - "grad_norm": 0.0031665111891925335, - "learning_rate": 0.00019999453163275228, - "loss": 46.0, - "step": 43549 - }, - { - "epoch": 3.3297016266223216, - "grad_norm": 0.0031101955100893974, - "learning_rate": 0.0001999945313815577, - "loss": 46.0, - "step": 43550 - }, - { - "epoch": 3.3297780836057114, - "grad_norm": 0.001858170609921217, - "learning_rate": 0.00019999453113035736, - "loss": 46.0, - "step": 43551 - }, - { - "epoch": 3.329854540589101, - "grad_norm": 0.0019680005498230457, - "learning_rate": 0.00019999453087915126, - "loss": 46.0, - "step": 43552 - }, - { - "epoch": 3.329930997572491, - "grad_norm": 0.0031915337312966585, - "learning_rate": 0.00019999453062793937, - "loss": 46.0, - "step": 43553 - }, - { - "epoch": 3.3300074545558807, - "grad_norm": 0.0036522019654512405, - "learning_rate": 0.0001999945303767217, - "loss": 46.0, - "step": 43554 - }, - { - "epoch": 3.33008391153927, - "grad_norm": 0.002773649524897337, - "learning_rate": 0.00019999453012549832, - "loss": 46.0, - "step": 43555 - }, - { - "epoch": 3.3301603685226597, - "grad_norm": 0.0010565861593931913, - "learning_rate": 0.00019999452987426913, - "loss": 46.0, - "step": 43556 - }, - { - "epoch": 3.3302368255060495, - "grad_norm": 0.0014284523203969002, - "learning_rate": 0.00019999452962303415, - "loss": 46.0, - "step": 43557 - }, - { - "epoch": 3.3303132824894393, - "grad_norm": 0.002501044189557433, - "learning_rate": 0.00019999452937179344, - "loss": 46.0, - "step": 43558 - }, - { - "epoch": 3.330389739472829, - "grad_norm": 0.003685515373945236, - "learning_rate": 0.00019999452912054693, - "loss": 46.0, - "step": 43559 - }, - { - "epoch": 3.3304661964562188, - "grad_norm": 0.000850306882057339, - "learning_rate": 0.00019999452886929468, - "loss": 46.0, - "step": 43560 - }, - { - "epoch": 3.3305426534396085, - "grad_norm": 0.0026118706446141005, - "learning_rate": 0.00019999452861803666, - "loss": 46.0, - "step": 43561 - }, - { - "epoch": 3.3306191104229983, - "grad_norm": 0.0008020975510589778, - "learning_rate": 0.00019999452836677286, - "loss": 46.0, - "step": 43562 - }, - { - "epoch": 3.330695567406388, - "grad_norm": 0.0011191933881491423, - "learning_rate": 0.0001999945281155033, - "loss": 46.0, - "step": 43563 - }, - { - "epoch": 3.330772024389778, - "grad_norm": 0.002534350147470832, - "learning_rate": 0.00019999452786422794, - "loss": 46.0, - "step": 43564 - }, - { - "epoch": 3.3308484813731676, - "grad_norm": 0.0011979220435023308, - "learning_rate": 0.00019999452761294685, - "loss": 46.0, - "step": 43565 - }, - { - "epoch": 3.3309249383565573, - "grad_norm": 0.0009882686426863074, - "learning_rate": 0.00019999452736165995, - "loss": 46.0, - "step": 43566 - }, - { - "epoch": 3.331001395339947, - "grad_norm": 0.0010714128147810698, - "learning_rate": 0.00019999452711036731, - "loss": 46.0, - "step": 43567 - }, - { - "epoch": 3.3310778523233364, - "grad_norm": 0.002566568087786436, - "learning_rate": 0.00019999452685906893, - "loss": 46.0, - "step": 43568 - }, - { - "epoch": 3.331154309306726, - "grad_norm": 0.0020654734689742327, - "learning_rate": 0.00019999452660776472, - "loss": 46.0, - "step": 43569 - }, - { - "epoch": 3.331230766290116, - "grad_norm": 0.0011668906081467867, - "learning_rate": 0.00019999452635645478, - "loss": 46.0, - "step": 43570 - }, - { - "epoch": 3.3313072232735057, - "grad_norm": 0.005298765376210213, - "learning_rate": 0.00019999452610513905, - "loss": 46.0, - "step": 43571 - }, - { - "epoch": 3.3313836802568955, - "grad_norm": 0.003780979895964265, - "learning_rate": 0.00019999452585381754, - "loss": 46.0, - "step": 43572 - }, - { - "epoch": 3.3314601372402852, - "grad_norm": 0.0013556742342188954, - "learning_rate": 0.0001999945256024903, - "loss": 46.0, - "step": 43573 - }, - { - "epoch": 3.331536594223675, - "grad_norm": 0.001188464812003076, - "learning_rate": 0.00019999452535115727, - "loss": 46.0, - "step": 43574 - }, - { - "epoch": 3.3316130512070647, - "grad_norm": 0.005002522375434637, - "learning_rate": 0.00019999452509981847, - "loss": 46.0, - "step": 43575 - }, - { - "epoch": 3.3316895081904545, - "grad_norm": 0.0009228858980350196, - "learning_rate": 0.0001999945248484739, - "loss": 46.0, - "step": 43576 - }, - { - "epoch": 3.331765965173844, - "grad_norm": 0.0015604887157678604, - "learning_rate": 0.00019999452459712355, - "loss": 46.0, - "step": 43577 - }, - { - "epoch": 3.3318424221572336, - "grad_norm": 0.0022732976358383894, - "learning_rate": 0.00019999452434576743, - "loss": 46.0, - "step": 43578 - }, - { - "epoch": 3.3319188791406233, - "grad_norm": 0.0021010686177760363, - "learning_rate": 0.00019999452409440557, - "loss": 46.0, - "step": 43579 - }, - { - "epoch": 3.331995336124013, - "grad_norm": 0.0011510387994349003, - "learning_rate": 0.00019999452384303793, - "loss": 46.0, - "step": 43580 - }, - { - "epoch": 3.332071793107403, - "grad_norm": 0.0020070946775376797, - "learning_rate": 0.00019999452359166454, - "loss": 46.0, - "step": 43581 - }, - { - "epoch": 3.3321482500907926, - "grad_norm": 0.0013966704718768597, - "learning_rate": 0.00019999452334028536, - "loss": 46.0, - "step": 43582 - }, - { - "epoch": 3.3322247070741824, - "grad_norm": 0.0052477153949439526, - "learning_rate": 0.0001999945230889004, - "loss": 46.0, - "step": 43583 - }, - { - "epoch": 3.332301164057572, - "grad_norm": 0.005325807724148035, - "learning_rate": 0.00019999452283750967, - "loss": 46.0, - "step": 43584 - }, - { - "epoch": 3.332377621040962, - "grad_norm": 0.001477902987971902, - "learning_rate": 0.0001999945225861132, - "loss": 46.0, - "step": 43585 - }, - { - "epoch": 3.3324540780243517, - "grad_norm": 0.0020473082549870014, - "learning_rate": 0.0001999945223347109, - "loss": 46.0, - "step": 43586 - }, - { - "epoch": 3.3325305350077414, - "grad_norm": 0.009419417940080166, - "learning_rate": 0.00019999452208330288, - "loss": 46.0, - "step": 43587 - }, - { - "epoch": 3.332606991991131, - "grad_norm": 0.0013555992627516389, - "learning_rate": 0.00019999452183188908, - "loss": 46.0, - "step": 43588 - }, - { - "epoch": 3.332683448974521, - "grad_norm": 0.001202745595946908, - "learning_rate": 0.0001999945215804695, - "loss": 46.0, - "step": 43589 - }, - { - "epoch": 3.3327599059579103, - "grad_norm": 0.0014870216837152839, - "learning_rate": 0.00019999452132904417, - "loss": 46.0, - "step": 43590 - }, - { - "epoch": 3.3328363629413, - "grad_norm": 0.0015464737080037594, - "learning_rate": 0.00019999452107761308, - "loss": 46.0, - "step": 43591 - }, - { - "epoch": 3.33291281992469, - "grad_norm": 0.0023479051887989044, - "learning_rate": 0.0001999945208261762, - "loss": 46.0, - "step": 43592 - }, - { - "epoch": 3.3329892769080796, - "grad_norm": 0.0017061260296031833, - "learning_rate": 0.00019999452057473354, - "loss": 46.0, - "step": 43593 - }, - { - "epoch": 3.3330657338914693, - "grad_norm": 0.002244638977572322, - "learning_rate": 0.00019999452032328513, - "loss": 46.0, - "step": 43594 - }, - { - "epoch": 3.333142190874859, - "grad_norm": 0.005626108963042498, - "learning_rate": 0.00019999452007183095, - "loss": 46.0, - "step": 43595 - }, - { - "epoch": 3.333218647858249, - "grad_norm": 0.002107857493683696, - "learning_rate": 0.00019999451982037102, - "loss": 46.0, - "step": 43596 - }, - { - "epoch": 3.3332951048416386, - "grad_norm": 0.0018534853588789701, - "learning_rate": 0.00019999451956890528, - "loss": 46.0, - "step": 43597 - }, - { - "epoch": 3.3333715618250284, - "grad_norm": 0.004295188467949629, - "learning_rate": 0.0001999945193174338, - "loss": 46.0, - "step": 43598 - }, - { - "epoch": 3.3334480188084177, - "grad_norm": 0.0009946598438546062, - "learning_rate": 0.00019999451906595655, - "loss": 46.0, - "step": 43599 - }, - { - "epoch": 3.3335244757918074, - "grad_norm": 0.0009655312169343233, - "learning_rate": 0.00019999451881447353, - "loss": 46.0, - "step": 43600 - }, - { - "epoch": 3.333600932775197, - "grad_norm": 0.003998894244432449, - "learning_rate": 0.00019999451856298473, - "loss": 46.0, - "step": 43601 - }, - { - "epoch": 3.333677389758587, - "grad_norm": 0.0012280386872589588, - "learning_rate": 0.00019999451831149013, - "loss": 46.0, - "step": 43602 - }, - { - "epoch": 3.3337538467419767, - "grad_norm": 0.0018116086721420288, - "learning_rate": 0.00019999451805998981, - "loss": 46.0, - "step": 43603 - }, - { - "epoch": 3.3338303037253665, - "grad_norm": 0.002628431422635913, - "learning_rate": 0.0001999945178084837, - "loss": 46.0, - "step": 43604 - }, - { - "epoch": 3.3339067607087562, - "grad_norm": 0.0012622167123481631, - "learning_rate": 0.00019999451755697183, - "loss": 46.0, - "step": 43605 - }, - { - "epoch": 3.333983217692146, - "grad_norm": 0.005994723178446293, - "learning_rate": 0.0001999945173054542, - "loss": 46.0, - "step": 43606 - }, - { - "epoch": 3.3340596746755358, - "grad_norm": 0.002169710351154208, - "learning_rate": 0.0001999945170539308, - "loss": 46.0, - "step": 43607 - }, - { - "epoch": 3.3341361316589255, - "grad_norm": 0.0018565854988992214, - "learning_rate": 0.0001999945168024016, - "loss": 46.0, - "step": 43608 - }, - { - "epoch": 3.3342125886423153, - "grad_norm": 0.0028142984956502914, - "learning_rate": 0.00019999451655086667, - "loss": 46.0, - "step": 43609 - }, - { - "epoch": 3.334289045625705, - "grad_norm": 0.005660689901560545, - "learning_rate": 0.00019999451629932593, - "loss": 46.0, - "step": 43610 - }, - { - "epoch": 3.3343655026090944, - "grad_norm": 0.0025527826510369778, - "learning_rate": 0.00019999451604777946, - "loss": 46.0, - "step": 43611 - }, - { - "epoch": 3.334441959592484, - "grad_norm": 0.0024709736462682486, - "learning_rate": 0.0001999945157962272, - "loss": 46.0, - "step": 43612 - }, - { - "epoch": 3.334518416575874, - "grad_norm": 0.0009529362432658672, - "learning_rate": 0.00019999451554466918, - "loss": 46.0, - "step": 43613 - }, - { - "epoch": 3.3345948735592637, - "grad_norm": 0.0010676614474505186, - "learning_rate": 0.00019999451529310538, - "loss": 46.0, - "step": 43614 - }, - { - "epoch": 3.3346713305426534, - "grad_norm": 0.0011848678113892674, - "learning_rate": 0.0001999945150415358, - "loss": 46.0, - "step": 43615 - }, - { - "epoch": 3.334747787526043, - "grad_norm": 0.0018325322307646275, - "learning_rate": 0.00019999451478996047, - "loss": 46.0, - "step": 43616 - }, - { - "epoch": 3.334824244509433, - "grad_norm": 0.008592256344854832, - "learning_rate": 0.00019999451453837938, - "loss": 46.0, - "step": 43617 - }, - { - "epoch": 3.3349007014928227, - "grad_norm": 0.0025443288031965494, - "learning_rate": 0.0001999945142867925, - "loss": 46.0, - "step": 43618 - }, - { - "epoch": 3.3349771584762125, - "grad_norm": 0.004462145734578371, - "learning_rate": 0.00019999451403519985, - "loss": 46.0, - "step": 43619 - }, - { - "epoch": 3.335053615459602, - "grad_norm": 0.004196337424218655, - "learning_rate": 0.00019999451378360146, - "loss": 46.0, - "step": 43620 - }, - { - "epoch": 3.3351300724429915, - "grad_norm": 0.005828474648296833, - "learning_rate": 0.00019999451353199728, - "loss": 46.0, - "step": 43621 - }, - { - "epoch": 3.3352065294263813, - "grad_norm": 0.0027317239437252283, - "learning_rate": 0.00019999451328038732, - "loss": 46.0, - "step": 43622 - }, - { - "epoch": 3.335282986409771, - "grad_norm": 0.0033082624431699514, - "learning_rate": 0.00019999451302877162, - "loss": 46.0, - "step": 43623 - }, - { - "epoch": 3.335359443393161, - "grad_norm": 0.006514125503599644, - "learning_rate": 0.0001999945127771501, - "loss": 46.0, - "step": 43624 - }, - { - "epoch": 3.3354359003765506, - "grad_norm": 0.003531440393999219, - "learning_rate": 0.00019999451252552286, - "loss": 46.0, - "step": 43625 - }, - { - "epoch": 3.3355123573599403, - "grad_norm": 0.0009159431792795658, - "learning_rate": 0.00019999451227388984, - "loss": 46.0, - "step": 43626 - }, - { - "epoch": 3.33558881434333, - "grad_norm": 0.005102984607219696, - "learning_rate": 0.00019999451202225106, - "loss": 46.0, - "step": 43627 - }, - { - "epoch": 3.33566527132672, - "grad_norm": 0.0012662075459957123, - "learning_rate": 0.0001999945117706065, - "loss": 46.0, - "step": 43628 - }, - { - "epoch": 3.3357417283101096, - "grad_norm": 0.0011329468106850982, - "learning_rate": 0.00019999451151895615, - "loss": 46.0, - "step": 43629 - }, - { - "epoch": 3.3358181852934994, - "grad_norm": 0.0008179281721822917, - "learning_rate": 0.00019999451126730003, - "loss": 46.0, - "step": 43630 - }, - { - "epoch": 3.335894642276889, - "grad_norm": 0.005298284348100424, - "learning_rate": 0.00019999451101563817, - "loss": 46.0, - "step": 43631 - }, - { - "epoch": 3.335971099260279, - "grad_norm": 0.0025736666284501553, - "learning_rate": 0.00019999451076397053, - "loss": 46.0, - "step": 43632 - }, - { - "epoch": 3.336047556243668, - "grad_norm": 0.0015905586769804358, - "learning_rate": 0.00019999451051229712, - "loss": 46.0, - "step": 43633 - }, - { - "epoch": 3.336124013227058, - "grad_norm": 0.003255372168496251, - "learning_rate": 0.00019999451026061796, - "loss": 46.0, - "step": 43634 - }, - { - "epoch": 3.3362004702104477, - "grad_norm": 0.0016996952472254634, - "learning_rate": 0.000199994510008933, - "loss": 46.0, - "step": 43635 - }, - { - "epoch": 3.3362769271938375, - "grad_norm": 0.0011706857476383448, - "learning_rate": 0.00019999450975724227, - "loss": 46.0, - "step": 43636 - }, - { - "epoch": 3.3363533841772273, - "grad_norm": 0.00977516919374466, - "learning_rate": 0.0001999945095055458, - "loss": 46.0, - "step": 43637 - }, - { - "epoch": 3.336429841160617, - "grad_norm": 0.0015745279379189014, - "learning_rate": 0.00019999450925384355, - "loss": 46.0, - "step": 43638 - }, - { - "epoch": 3.336506298144007, - "grad_norm": 0.0017607235349714756, - "learning_rate": 0.00019999450900213552, - "loss": 46.0, - "step": 43639 - }, - { - "epoch": 3.3365827551273965, - "grad_norm": 0.0012273314641788602, - "learning_rate": 0.00019999450875042172, - "loss": 46.0, - "step": 43640 - }, - { - "epoch": 3.3366592121107863, - "grad_norm": 0.0018174175638705492, - "learning_rate": 0.00019999450849870215, - "loss": 46.0, - "step": 43641 - }, - { - "epoch": 3.336735669094176, - "grad_norm": 0.0036967771593481302, - "learning_rate": 0.00019999450824697684, - "loss": 46.0, - "step": 43642 - }, - { - "epoch": 3.3368121260775654, - "grad_norm": 0.0022941918577998877, - "learning_rate": 0.00019999450799524572, - "loss": 46.0, - "step": 43643 - }, - { - "epoch": 3.336888583060955, - "grad_norm": 0.0018503410974517465, - "learning_rate": 0.00019999450774350885, - "loss": 46.0, - "step": 43644 - }, - { - "epoch": 3.336965040044345, - "grad_norm": 0.0013353358954191208, - "learning_rate": 0.00019999450749176622, - "loss": 46.0, - "step": 43645 - }, - { - "epoch": 3.3370414970277347, - "grad_norm": 0.002239350927993655, - "learning_rate": 0.00019999450724001783, - "loss": 46.0, - "step": 43646 - }, - { - "epoch": 3.3371179540111244, - "grad_norm": 0.0013675467344000936, - "learning_rate": 0.00019999450698826365, - "loss": 46.0, - "step": 43647 - }, - { - "epoch": 3.337194410994514, - "grad_norm": 0.002925304463133216, - "learning_rate": 0.0001999945067365037, - "loss": 46.0, - "step": 43648 - }, - { - "epoch": 3.337270867977904, - "grad_norm": 0.0032144831493496895, - "learning_rate": 0.000199994506484738, - "loss": 46.0, - "step": 43649 - }, - { - "epoch": 3.3373473249612937, - "grad_norm": 0.0008190250373445451, - "learning_rate": 0.00019999450623296649, - "loss": 46.0, - "step": 43650 - }, - { - "epoch": 3.3374237819446835, - "grad_norm": 0.0014309815596789122, - "learning_rate": 0.00019999450598118924, - "loss": 46.0, - "step": 43651 - }, - { - "epoch": 3.3375002389280732, - "grad_norm": 0.0023848325945436954, - "learning_rate": 0.0001999945057294062, - "loss": 46.0, - "step": 43652 - }, - { - "epoch": 3.337576695911463, - "grad_norm": 0.002579217776656151, - "learning_rate": 0.00019999450547761741, - "loss": 46.0, - "step": 43653 - }, - { - "epoch": 3.3376531528948528, - "grad_norm": 0.0028548510745167732, - "learning_rate": 0.00019999450522582287, - "loss": 46.0, - "step": 43654 - }, - { - "epoch": 3.337729609878242, - "grad_norm": 0.0016805481864139438, - "learning_rate": 0.00019999450497402253, - "loss": 46.0, - "step": 43655 - }, - { - "epoch": 3.337806066861632, - "grad_norm": 0.0016382435569539666, - "learning_rate": 0.00019999450472221644, - "loss": 46.0, - "step": 43656 - }, - { - "epoch": 3.3378825238450216, - "grad_norm": 0.0015484046889469028, - "learning_rate": 0.00019999450447040457, - "loss": 46.0, - "step": 43657 - }, - { - "epoch": 3.3379589808284114, - "grad_norm": 0.011212067678570747, - "learning_rate": 0.00019999450421858694, - "loss": 46.0, - "step": 43658 - }, - { - "epoch": 3.338035437811801, - "grad_norm": 0.003929028753191233, - "learning_rate": 0.00019999450396676353, - "loss": 46.0, - "step": 43659 - }, - { - "epoch": 3.338111894795191, - "grad_norm": 0.0037038493901491165, - "learning_rate": 0.00019999450371493434, - "loss": 46.0, - "step": 43660 - }, - { - "epoch": 3.3381883517785806, - "grad_norm": 0.001650122576393187, - "learning_rate": 0.0001999945034630994, - "loss": 46.0, - "step": 43661 - }, - { - "epoch": 3.3382648087619704, - "grad_norm": 0.0011574962409213185, - "learning_rate": 0.00019999450321125868, - "loss": 46.0, - "step": 43662 - }, - { - "epoch": 3.33834126574536, - "grad_norm": 0.0019906165543943644, - "learning_rate": 0.0001999945029594122, - "loss": 46.0, - "step": 43663 - }, - { - "epoch": 3.33841772272875, - "grad_norm": 0.005197696853429079, - "learning_rate": 0.00019999450270755996, - "loss": 46.0, - "step": 43664 - }, - { - "epoch": 3.3384941797121392, - "grad_norm": 0.001822852180339396, - "learning_rate": 0.00019999450245570193, - "loss": 46.0, - "step": 43665 - }, - { - "epoch": 3.338570636695529, - "grad_norm": 0.0032041617669165134, - "learning_rate": 0.00019999450220383814, - "loss": 46.0, - "step": 43666 - }, - { - "epoch": 3.3386470936789188, - "grad_norm": 0.0026878872886300087, - "learning_rate": 0.0001999945019519686, - "loss": 46.0, - "step": 43667 - }, - { - "epoch": 3.3387235506623085, - "grad_norm": 0.00133521668612957, - "learning_rate": 0.00019999450170009325, - "loss": 46.0, - "step": 43668 - }, - { - "epoch": 3.3388000076456983, - "grad_norm": 0.0030250868294388056, - "learning_rate": 0.00019999450144821216, - "loss": 46.0, - "step": 43669 - }, - { - "epoch": 3.338876464629088, - "grad_norm": 0.003318930044770241, - "learning_rate": 0.0001999945011963253, - "loss": 46.0, - "step": 43670 - }, - { - "epoch": 3.338952921612478, - "grad_norm": 0.0009371746564283967, - "learning_rate": 0.00019999450094443266, - "loss": 46.0, - "step": 43671 - }, - { - "epoch": 3.3390293785958676, - "grad_norm": 0.005983148235827684, - "learning_rate": 0.00019999450069253425, - "loss": 46.0, - "step": 43672 - }, - { - "epoch": 3.3391058355792573, - "grad_norm": 0.001200530561618507, - "learning_rate": 0.00019999450044063007, - "loss": 46.0, - "step": 43673 - }, - { - "epoch": 3.339182292562647, - "grad_norm": 0.0005374782485887408, - "learning_rate": 0.00019999450018872014, - "loss": 46.0, - "step": 43674 - }, - { - "epoch": 3.339258749546037, - "grad_norm": 0.0029157826211303473, - "learning_rate": 0.0001999944999368044, - "loss": 46.0, - "step": 43675 - }, - { - "epoch": 3.3393352065294266, - "grad_norm": 0.0027916766703128815, - "learning_rate": 0.00019999449968488293, - "loss": 46.0, - "step": 43676 - }, - { - "epoch": 3.339411663512816, - "grad_norm": 0.002333183540031314, - "learning_rate": 0.00019999449943295568, - "loss": 46.0, - "step": 43677 - }, - { - "epoch": 3.3394881204962057, - "grad_norm": 0.003528687870129943, - "learning_rate": 0.00019999449918102266, - "loss": 46.0, - "step": 43678 - }, - { - "epoch": 3.3395645774795955, - "grad_norm": 0.0032058802898973227, - "learning_rate": 0.0001999944989290839, - "loss": 46.0, - "step": 43679 - }, - { - "epoch": 3.339641034462985, - "grad_norm": 0.003699591616168618, - "learning_rate": 0.00019999449867713932, - "loss": 46.0, - "step": 43680 - }, - { - "epoch": 3.339717491446375, - "grad_norm": 0.0022140464279800653, - "learning_rate": 0.00019999449842518898, - "loss": 46.0, - "step": 43681 - }, - { - "epoch": 3.3397939484297647, - "grad_norm": 0.002870541298761964, - "learning_rate": 0.00019999449817323291, - "loss": 46.0, - "step": 43682 - }, - { - "epoch": 3.3398704054131545, - "grad_norm": 0.0015898555284366012, - "learning_rate": 0.00019999449792127102, - "loss": 46.0, - "step": 43683 - }, - { - "epoch": 3.3399468623965443, - "grad_norm": 0.004617452621459961, - "learning_rate": 0.0001999944976693034, - "loss": 46.0, - "step": 43684 - }, - { - "epoch": 3.340023319379934, - "grad_norm": 0.004027530085295439, - "learning_rate": 0.00019999449741732998, - "loss": 46.0, - "step": 43685 - }, - { - "epoch": 3.3400997763633233, - "grad_norm": 0.0034801489673554897, - "learning_rate": 0.00019999449716535082, - "loss": 46.0, - "step": 43686 - }, - { - "epoch": 3.340176233346713, - "grad_norm": 0.0025715066585689783, - "learning_rate": 0.00019999449691336587, - "loss": 46.0, - "step": 43687 - }, - { - "epoch": 3.340252690330103, - "grad_norm": 0.0010014110011979938, - "learning_rate": 0.00019999449666137516, - "loss": 46.0, - "step": 43688 - }, - { - "epoch": 3.3403291473134926, - "grad_norm": 0.0014420212246477604, - "learning_rate": 0.0001999944964093787, - "loss": 46.0, - "step": 43689 - }, - { - "epoch": 3.3404056042968824, - "grad_norm": 0.0035400516353547573, - "learning_rate": 0.0001999944961573764, - "loss": 46.0, - "step": 43690 - }, - { - "epoch": 3.340482061280272, - "grad_norm": 0.0019122886005789042, - "learning_rate": 0.00019999449590536842, - "loss": 46.0, - "step": 43691 - }, - { - "epoch": 3.340558518263662, - "grad_norm": 0.0030854246579110622, - "learning_rate": 0.00019999449565335462, - "loss": 46.0, - "step": 43692 - }, - { - "epoch": 3.3406349752470517, - "grad_norm": 0.001542039797641337, - "learning_rate": 0.00019999449540133505, - "loss": 46.0, - "step": 43693 - }, - { - "epoch": 3.3407114322304414, - "grad_norm": 0.004390873480588198, - "learning_rate": 0.00019999449514930973, - "loss": 46.0, - "step": 43694 - }, - { - "epoch": 3.340787889213831, - "grad_norm": 0.0027141906321048737, - "learning_rate": 0.00019999449489727864, - "loss": 46.0, - "step": 43695 - }, - { - "epoch": 3.340864346197221, - "grad_norm": 0.002727772109210491, - "learning_rate": 0.00019999449464524176, - "loss": 46.0, - "step": 43696 - }, - { - "epoch": 3.3409408031806107, - "grad_norm": 0.0028421839233487844, - "learning_rate": 0.00019999449439319915, - "loss": 46.0, - "step": 43697 - }, - { - "epoch": 3.3410172601640005, - "grad_norm": 0.0009473218815401196, - "learning_rate": 0.0001999944941411507, - "loss": 46.0, - "step": 43698 - }, - { - "epoch": 3.34109371714739, - "grad_norm": 0.0021254231687635183, - "learning_rate": 0.00019999449388909658, - "loss": 46.0, - "step": 43699 - }, - { - "epoch": 3.3411701741307795, - "grad_norm": 0.0014690895332023501, - "learning_rate": 0.00019999449363703665, - "loss": 46.0, - "step": 43700 - }, - { - "epoch": 3.3412466311141693, - "grad_norm": 0.002514997962862253, - "learning_rate": 0.00019999449338497092, - "loss": 46.0, - "step": 43701 - }, - { - "epoch": 3.341323088097559, - "grad_norm": 0.0009950983803719282, - "learning_rate": 0.00019999449313289942, - "loss": 46.0, - "step": 43702 - }, - { - "epoch": 3.341399545080949, - "grad_norm": 0.00353820389136672, - "learning_rate": 0.00019999449288082217, - "loss": 46.0, - "step": 43703 - }, - { - "epoch": 3.3414760020643386, - "grad_norm": 0.0015001499559730291, - "learning_rate": 0.00019999449262873918, - "loss": 46.0, - "step": 43704 - }, - { - "epoch": 3.3415524590477284, - "grad_norm": 0.0014312267303466797, - "learning_rate": 0.00019999449237665038, - "loss": 46.0, - "step": 43705 - }, - { - "epoch": 3.341628916031118, - "grad_norm": 0.0028946269303560257, - "learning_rate": 0.00019999449212455584, - "loss": 46.0, - "step": 43706 - }, - { - "epoch": 3.341705373014508, - "grad_norm": 0.005926786921918392, - "learning_rate": 0.0001999944918724555, - "loss": 46.0, - "step": 43707 - }, - { - "epoch": 3.341781829997897, - "grad_norm": 0.0013261823914945126, - "learning_rate": 0.0001999944916203494, - "loss": 46.0, - "step": 43708 - }, - { - "epoch": 3.341858286981287, - "grad_norm": 0.004232048988342285, - "learning_rate": 0.00019999449136823754, - "loss": 46.0, - "step": 43709 - }, - { - "epoch": 3.3419347439646767, - "grad_norm": 0.0010355169652029872, - "learning_rate": 0.0001999944911161199, - "loss": 46.0, - "step": 43710 - }, - { - "epoch": 3.3420112009480665, - "grad_norm": 0.001177627476863563, - "learning_rate": 0.0001999944908639965, - "loss": 46.0, - "step": 43711 - }, - { - "epoch": 3.3420876579314562, - "grad_norm": 0.001688484800979495, - "learning_rate": 0.00019999449061186732, - "loss": 46.0, - "step": 43712 - }, - { - "epoch": 3.342164114914846, - "grad_norm": 0.0017280696192756295, - "learning_rate": 0.0001999944903597324, - "loss": 46.0, - "step": 43713 - }, - { - "epoch": 3.3422405718982358, - "grad_norm": 0.0023232828825712204, - "learning_rate": 0.0001999944901075917, - "loss": 46.0, - "step": 43714 - }, - { - "epoch": 3.3423170288816255, - "grad_norm": 0.0014603016898036003, - "learning_rate": 0.0001999944898554452, - "loss": 46.0, - "step": 43715 - }, - { - "epoch": 3.3423934858650153, - "grad_norm": 0.00415347795933485, - "learning_rate": 0.00019999448960329294, - "loss": 46.0, - "step": 43716 - }, - { - "epoch": 3.342469942848405, - "grad_norm": 0.0038795575965195894, - "learning_rate": 0.00019999448935113494, - "loss": 46.0, - "step": 43717 - }, - { - "epoch": 3.342546399831795, - "grad_norm": 0.0014306865632534027, - "learning_rate": 0.00019999448909897117, - "loss": 46.0, - "step": 43718 - }, - { - "epoch": 3.3426228568151846, - "grad_norm": 0.001141933025792241, - "learning_rate": 0.0001999944888468016, - "loss": 46.0, - "step": 43719 - }, - { - "epoch": 3.3426993137985743, - "grad_norm": 0.008010140620172024, - "learning_rate": 0.0001999944885946263, - "loss": 46.0, - "step": 43720 - }, - { - "epoch": 3.3427757707819636, - "grad_norm": 0.0014825982507318258, - "learning_rate": 0.00019999448834244517, - "loss": 46.0, - "step": 43721 - }, - { - "epoch": 3.3428522277653534, - "grad_norm": 0.00431219395250082, - "learning_rate": 0.0001999944880902583, - "loss": 46.0, - "step": 43722 - }, - { - "epoch": 3.342928684748743, - "grad_norm": 0.001852414570748806, - "learning_rate": 0.0001999944878380657, - "loss": 46.0, - "step": 43723 - }, - { - "epoch": 3.343005141732133, - "grad_norm": 0.001598511473275721, - "learning_rate": 0.0001999944875858673, - "loss": 46.0, - "step": 43724 - }, - { - "epoch": 3.3430815987155227, - "grad_norm": 0.0016969903372228146, - "learning_rate": 0.00019999448733366314, - "loss": 46.0, - "step": 43725 - }, - { - "epoch": 3.3431580556989124, - "grad_norm": 0.005367988720536232, - "learning_rate": 0.00019999448708145318, - "loss": 46.0, - "step": 43726 - }, - { - "epoch": 3.343234512682302, - "grad_norm": 0.001105388393625617, - "learning_rate": 0.00019999448682923748, - "loss": 46.0, - "step": 43727 - }, - { - "epoch": 3.343310969665692, - "grad_norm": 0.005350275430828333, - "learning_rate": 0.000199994486577016, - "loss": 46.0, - "step": 43728 - }, - { - "epoch": 3.3433874266490817, - "grad_norm": 0.0027503862511366606, - "learning_rate": 0.00019999448632478876, - "loss": 46.0, - "step": 43729 - }, - { - "epoch": 3.343463883632471, - "grad_norm": 0.0006434792303480208, - "learning_rate": 0.00019999448607255574, - "loss": 46.0, - "step": 43730 - }, - { - "epoch": 3.343540340615861, - "grad_norm": 0.0011709927348420024, - "learning_rate": 0.00019999448582031697, - "loss": 46.0, - "step": 43731 - }, - { - "epoch": 3.3436167975992506, - "grad_norm": 0.0009464659378863871, - "learning_rate": 0.0001999944855680724, - "loss": 46.0, - "step": 43732 - }, - { - "epoch": 3.3436932545826403, - "grad_norm": 0.002380137564614415, - "learning_rate": 0.00019999448531582209, - "loss": 46.0, - "step": 43733 - }, - { - "epoch": 3.34376971156603, - "grad_norm": 0.0010878652101382613, - "learning_rate": 0.000199994485063566, - "loss": 46.0, - "step": 43734 - }, - { - "epoch": 3.34384616854942, - "grad_norm": 0.0006586374947801232, - "learning_rate": 0.00019999448481130414, - "loss": 46.0, - "step": 43735 - }, - { - "epoch": 3.3439226255328096, - "grad_norm": 0.0023917292710393667, - "learning_rate": 0.0001999944845590365, - "loss": 46.0, - "step": 43736 - }, - { - "epoch": 3.3439990825161994, - "grad_norm": 0.0074102249927818775, - "learning_rate": 0.00019999448430676312, - "loss": 46.0, - "step": 43737 - }, - { - "epoch": 3.344075539499589, - "grad_norm": 0.0030318291392177343, - "learning_rate": 0.00019999448405448394, - "loss": 46.0, - "step": 43738 - }, - { - "epoch": 3.344151996482979, - "grad_norm": 0.002106248401105404, - "learning_rate": 0.00019999448380219898, - "loss": 46.0, - "step": 43739 - }, - { - "epoch": 3.3442284534663687, - "grad_norm": 0.0021125716157257557, - "learning_rate": 0.00019999448354990828, - "loss": 46.0, - "step": 43740 - }, - { - "epoch": 3.3443049104497584, - "grad_norm": 0.003041575662791729, - "learning_rate": 0.0001999944832976118, - "loss": 46.0, - "step": 43741 - }, - { - "epoch": 3.3443813674331477, - "grad_norm": 0.0019100791541859508, - "learning_rate": 0.00019999448304530956, - "loss": 46.0, - "step": 43742 - }, - { - "epoch": 3.3444578244165375, - "grad_norm": 0.0026639197021722794, - "learning_rate": 0.00019999448279300154, - "loss": 46.0, - "step": 43743 - }, - { - "epoch": 3.3445342813999273, - "grad_norm": 0.002550798002630472, - "learning_rate": 0.00019999448254068777, - "loss": 46.0, - "step": 43744 - }, - { - "epoch": 3.344610738383317, - "grad_norm": 0.0016459315083920956, - "learning_rate": 0.0001999944822883682, - "loss": 46.0, - "step": 43745 - }, - { - "epoch": 3.3446871953667068, - "grad_norm": 0.0023307749070227146, - "learning_rate": 0.0001999944820360429, - "loss": 46.0, - "step": 43746 - }, - { - "epoch": 3.3447636523500965, - "grad_norm": 0.004019988235086203, - "learning_rate": 0.00019999448178371177, - "loss": 46.0, - "step": 43747 - }, - { - "epoch": 3.3448401093334863, - "grad_norm": 0.0013858407037332654, - "learning_rate": 0.00019999448153137494, - "loss": 46.0, - "step": 43748 - }, - { - "epoch": 3.344916566316876, - "grad_norm": 0.0029589759651571512, - "learning_rate": 0.0001999944812790323, - "loss": 46.0, - "step": 43749 - }, - { - "epoch": 3.344993023300266, - "grad_norm": 0.0014120849082246423, - "learning_rate": 0.00019999448102668392, - "loss": 46.0, - "step": 43750 - }, - { - "epoch": 3.3450694802836556, - "grad_norm": 0.0009957747533917427, - "learning_rate": 0.00019999448077432974, - "loss": 46.0, - "step": 43751 - }, - { - "epoch": 3.345145937267045, - "grad_norm": 0.0012325698044151068, - "learning_rate": 0.0001999944805219698, - "loss": 46.0, - "step": 43752 - }, - { - "epoch": 3.3452223942504347, - "grad_norm": 0.006688510999083519, - "learning_rate": 0.0001999944802696041, - "loss": 46.0, - "step": 43753 - }, - { - "epoch": 3.3452988512338244, - "grad_norm": 0.002767265308648348, - "learning_rate": 0.00019999448001723264, - "loss": 46.0, - "step": 43754 - }, - { - "epoch": 3.345375308217214, - "grad_norm": 0.007987682707607746, - "learning_rate": 0.0001999944797648554, - "loss": 46.0, - "step": 43755 - }, - { - "epoch": 3.345451765200604, - "grad_norm": 0.0032269826624542475, - "learning_rate": 0.00019999447951247237, - "loss": 46.0, - "step": 43756 - }, - { - "epoch": 3.3455282221839937, - "grad_norm": 0.008732321672141552, - "learning_rate": 0.0001999944792600836, - "loss": 46.0, - "step": 43757 - }, - { - "epoch": 3.3456046791673835, - "grad_norm": 0.004555963911116123, - "learning_rate": 0.00019999447900768903, - "loss": 46.0, - "step": 43758 - }, - { - "epoch": 3.3456811361507732, - "grad_norm": 0.0035217690747231245, - "learning_rate": 0.00019999447875528872, - "loss": 46.0, - "step": 43759 - }, - { - "epoch": 3.345757593134163, - "grad_norm": 0.003565158462151885, - "learning_rate": 0.00019999447850288263, - "loss": 46.0, - "step": 43760 - }, - { - "epoch": 3.3458340501175527, - "grad_norm": 0.00240330770611763, - "learning_rate": 0.00019999447825047077, - "loss": 46.0, - "step": 43761 - }, - { - "epoch": 3.3459105071009425, - "grad_norm": 0.0016876407898962498, - "learning_rate": 0.00019999447799805316, - "loss": 46.0, - "step": 43762 - }, - { - "epoch": 3.3459869640843323, - "grad_norm": 0.0013996004126966, - "learning_rate": 0.00019999447774562973, - "loss": 46.0, - "step": 43763 - }, - { - "epoch": 3.3460634210677216, - "grad_norm": 0.0009210139396600425, - "learning_rate": 0.0001999944774932006, - "loss": 46.0, - "step": 43764 - }, - { - "epoch": 3.3461398780511113, - "grad_norm": 0.0018856822280213237, - "learning_rate": 0.00019999447724076562, - "loss": 46.0, - "step": 43765 - }, - { - "epoch": 3.346216335034501, - "grad_norm": 0.001019989955238998, - "learning_rate": 0.00019999447698832495, - "loss": 46.0, - "step": 43766 - }, - { - "epoch": 3.346292792017891, - "grad_norm": 0.000980242621153593, - "learning_rate": 0.00019999447673587847, - "loss": 46.0, - "step": 43767 - }, - { - "epoch": 3.3463692490012806, - "grad_norm": 0.0018957104766741395, - "learning_rate": 0.00019999447648342625, - "loss": 46.0, - "step": 43768 - }, - { - "epoch": 3.3464457059846704, - "grad_norm": 0.0015449401689693332, - "learning_rate": 0.00019999447623096823, - "loss": 46.0, - "step": 43769 - }, - { - "epoch": 3.34652216296806, - "grad_norm": 0.0014780907658860087, - "learning_rate": 0.00019999447597850444, - "loss": 46.0, - "step": 43770 - }, - { - "epoch": 3.34659861995145, - "grad_norm": 0.001844253041781485, - "learning_rate": 0.0001999944757260349, - "loss": 46.0, - "step": 43771 - }, - { - "epoch": 3.3466750769348397, - "grad_norm": 0.0013287770561873913, - "learning_rate": 0.00019999447547355956, - "loss": 46.0, - "step": 43772 - }, - { - "epoch": 3.3467515339182294, - "grad_norm": 0.003127113450318575, - "learning_rate": 0.00019999447522107847, - "loss": 46.0, - "step": 43773 - }, - { - "epoch": 3.3468279909016188, - "grad_norm": 0.0013063066871836782, - "learning_rate": 0.00019999447496859163, - "loss": 46.0, - "step": 43774 - }, - { - "epoch": 3.3469044478850085, - "grad_norm": 0.0017159474082291126, - "learning_rate": 0.00019999447471609897, - "loss": 46.0, - "step": 43775 - }, - { - "epoch": 3.3469809048683983, - "grad_norm": 0.0024811800103634596, - "learning_rate": 0.0001999944744636006, - "loss": 46.0, - "step": 43776 - }, - { - "epoch": 3.347057361851788, - "grad_norm": 0.007020830176770687, - "learning_rate": 0.00019999447421109644, - "loss": 46.0, - "step": 43777 - }, - { - "epoch": 3.347133818835178, - "grad_norm": 0.0017891379538923502, - "learning_rate": 0.0001999944739585865, - "loss": 46.0, - "step": 43778 - }, - { - "epoch": 3.3472102758185676, - "grad_norm": 0.0018376614898443222, - "learning_rate": 0.00019999447370607079, - "loss": 46.0, - "step": 43779 - }, - { - "epoch": 3.3472867328019573, - "grad_norm": 0.00107807747554034, - "learning_rate": 0.0001999944734535493, - "loss": 46.0, - "step": 43780 - }, - { - "epoch": 3.347363189785347, - "grad_norm": 0.0027003982104361057, - "learning_rate": 0.00019999447320102206, - "loss": 46.0, - "step": 43781 - }, - { - "epoch": 3.347439646768737, - "grad_norm": 0.00218713260255754, - "learning_rate": 0.00019999447294848907, - "loss": 46.0, - "step": 43782 - }, - { - "epoch": 3.3475161037521266, - "grad_norm": 0.0017130491323769093, - "learning_rate": 0.0001999944726959503, - "loss": 46.0, - "step": 43783 - }, - { - "epoch": 3.3475925607355164, - "grad_norm": 0.0017729747341945767, - "learning_rate": 0.00019999447244340574, - "loss": 46.0, - "step": 43784 - }, - { - "epoch": 3.347669017718906, - "grad_norm": 0.0024973598774522543, - "learning_rate": 0.00019999447219085542, - "loss": 46.0, - "step": 43785 - }, - { - "epoch": 3.3477454747022954, - "grad_norm": 0.0009839512640610337, - "learning_rate": 0.00019999447193829934, - "loss": 46.0, - "step": 43786 - }, - { - "epoch": 3.347821931685685, - "grad_norm": 0.0020365859381854534, - "learning_rate": 0.00019999447168573748, - "loss": 46.0, - "step": 43787 - }, - { - "epoch": 3.347898388669075, - "grad_norm": 0.005399767775088549, - "learning_rate": 0.00019999447143316984, - "loss": 46.0, - "step": 43788 - }, - { - "epoch": 3.3479748456524647, - "grad_norm": 0.004327656235545874, - "learning_rate": 0.00019999447118059646, - "loss": 46.0, - "step": 43789 - }, - { - "epoch": 3.3480513026358545, - "grad_norm": 0.0029053015168756247, - "learning_rate": 0.0001999944709280173, - "loss": 46.0, - "step": 43790 - }, - { - "epoch": 3.3481277596192442, - "grad_norm": 0.002035491168498993, - "learning_rate": 0.00019999447067543238, - "loss": 46.0, - "step": 43791 - }, - { - "epoch": 3.348204216602634, - "grad_norm": 0.007189083378762007, - "learning_rate": 0.00019999447042284168, - "loss": 46.0, - "step": 43792 - }, - { - "epoch": 3.3482806735860238, - "grad_norm": 0.001513556926511228, - "learning_rate": 0.0001999944701702452, - "loss": 46.0, - "step": 43793 - }, - { - "epoch": 3.3483571305694135, - "grad_norm": 0.0010239416733384132, - "learning_rate": 0.00019999446991764293, - "loss": 46.0, - "step": 43794 - }, - { - "epoch": 3.3484335875528033, - "grad_norm": 0.0008572787628509104, - "learning_rate": 0.00019999446966503494, - "loss": 46.0, - "step": 43795 - }, - { - "epoch": 3.3485100445361926, - "grad_norm": 0.00042112168739549816, - "learning_rate": 0.00019999446941242117, - "loss": 46.0, - "step": 43796 - }, - { - "epoch": 3.3485865015195824, - "grad_norm": 0.0010782723547890782, - "learning_rate": 0.00019999446915980164, - "loss": 46.0, - "step": 43797 - }, - { - "epoch": 3.348662958502972, - "grad_norm": 0.0007021041237749159, - "learning_rate": 0.0001999944689071763, - "loss": 46.0, - "step": 43798 - }, - { - "epoch": 3.348739415486362, - "grad_norm": 0.002902782754972577, - "learning_rate": 0.00019999446865454524, - "loss": 46.0, - "step": 43799 - }, - { - "epoch": 3.3488158724697517, - "grad_norm": 0.0017065502470359206, - "learning_rate": 0.00019999446840190838, - "loss": 46.0, - "step": 43800 - }, - { - "epoch": 3.3488923294531414, - "grad_norm": 0.001943213865160942, - "learning_rate": 0.00019999446814926571, - "loss": 46.0, - "step": 43801 - }, - { - "epoch": 3.348968786436531, - "grad_norm": 0.001652433187700808, - "learning_rate": 0.00019999446789661734, - "loss": 46.0, - "step": 43802 - }, - { - "epoch": 3.349045243419921, - "grad_norm": 0.00564029673114419, - "learning_rate": 0.00019999446764396315, - "loss": 46.0, - "step": 43803 - }, - { - "epoch": 3.3491217004033107, - "grad_norm": 0.0015387465246021748, - "learning_rate": 0.00019999446739130326, - "loss": 46.0, - "step": 43804 - }, - { - "epoch": 3.3491981573867005, - "grad_norm": 0.004566119518131018, - "learning_rate": 0.00019999446713863756, - "loss": 46.0, - "step": 43805 - }, - { - "epoch": 3.34927461437009, - "grad_norm": 0.0015043530147522688, - "learning_rate": 0.00019999446688596608, - "loss": 46.0, - "step": 43806 - }, - { - "epoch": 3.34935107135348, - "grad_norm": 0.002944496227428317, - "learning_rate": 0.00019999446663328884, - "loss": 46.0, - "step": 43807 - }, - { - "epoch": 3.3494275283368693, - "grad_norm": 0.0024127515498548746, - "learning_rate": 0.00019999446638060584, - "loss": 46.0, - "step": 43808 - }, - { - "epoch": 3.349503985320259, - "grad_norm": 0.0029832408763468266, - "learning_rate": 0.00019999446612791705, - "loss": 46.0, - "step": 43809 - }, - { - "epoch": 3.349580442303649, - "grad_norm": 0.009830344468355179, - "learning_rate": 0.0001999944658752225, - "loss": 46.0, - "step": 43810 - }, - { - "epoch": 3.3496568992870386, - "grad_norm": 0.001837630057707429, - "learning_rate": 0.0001999944656225222, - "loss": 46.0, - "step": 43811 - }, - { - "epoch": 3.3497333562704283, - "grad_norm": 0.0010770781664177775, - "learning_rate": 0.0001999944653698161, - "loss": 46.0, - "step": 43812 - }, - { - "epoch": 3.349809813253818, - "grad_norm": 0.0032199062407016754, - "learning_rate": 0.00019999446511710428, - "loss": 46.0, - "step": 43813 - }, - { - "epoch": 3.349886270237208, - "grad_norm": 0.0016934559680521488, - "learning_rate": 0.00019999446486438665, - "loss": 46.0, - "step": 43814 - }, - { - "epoch": 3.3499627272205976, - "grad_norm": 0.0020858892239630222, - "learning_rate": 0.00019999446461166324, - "loss": 46.0, - "step": 43815 - }, - { - "epoch": 3.3500391842039874, - "grad_norm": 0.000608460046350956, - "learning_rate": 0.0001999944643589341, - "loss": 46.0, - "step": 43816 - }, - { - "epoch": 3.3501156411873767, - "grad_norm": 0.002830859972164035, - "learning_rate": 0.00019999446410619916, - "loss": 46.0, - "step": 43817 - }, - { - "epoch": 3.3501920981707665, - "grad_norm": 0.0023028110153973103, - "learning_rate": 0.00019999446385345846, - "loss": 46.0, - "step": 43818 - }, - { - "epoch": 3.3502685551541562, - "grad_norm": 0.0027507252525538206, - "learning_rate": 0.000199994463600712, - "loss": 46.0, - "step": 43819 - }, - { - "epoch": 3.350345012137546, - "grad_norm": 0.003907621838152409, - "learning_rate": 0.00019999446334795974, - "loss": 46.0, - "step": 43820 - }, - { - "epoch": 3.3504214691209357, - "grad_norm": 0.0026545915752649307, - "learning_rate": 0.00019999446309520175, - "loss": 46.0, - "step": 43821 - }, - { - "epoch": 3.3504979261043255, - "grad_norm": 0.0025462189223617315, - "learning_rate": 0.00019999446284243796, - "loss": 46.0, - "step": 43822 - }, - { - "epoch": 3.3505743830877153, - "grad_norm": 0.0017587310867384076, - "learning_rate": 0.00019999446258966842, - "loss": 46.0, - "step": 43823 - }, - { - "epoch": 3.350650840071105, - "grad_norm": 0.005058556795120239, - "learning_rate": 0.0001999944623368931, - "loss": 46.0, - "step": 43824 - }, - { - "epoch": 3.350727297054495, - "grad_norm": 0.0026162636931985617, - "learning_rate": 0.00019999446208411202, - "loss": 46.0, - "step": 43825 - }, - { - "epoch": 3.3508037540378846, - "grad_norm": 0.0019965535029768944, - "learning_rate": 0.00019999446183132519, - "loss": 46.0, - "step": 43826 - }, - { - "epoch": 3.3508802110212743, - "grad_norm": 0.0020608650520443916, - "learning_rate": 0.00019999446157853255, - "loss": 46.0, - "step": 43827 - }, - { - "epoch": 3.350956668004664, - "grad_norm": 0.0015180882764980197, - "learning_rate": 0.00019999446132573417, - "loss": 46.0, - "step": 43828 - }, - { - "epoch": 3.351033124988054, - "grad_norm": 0.0018931550439447165, - "learning_rate": 0.00019999446107293, - "loss": 46.0, - "step": 43829 - }, - { - "epoch": 3.351109581971443, - "grad_norm": 0.0008813115418888628, - "learning_rate": 0.00019999446082012007, - "loss": 46.0, - "step": 43830 - }, - { - "epoch": 3.351186038954833, - "grad_norm": 0.003362125949934125, - "learning_rate": 0.00019999446056730437, - "loss": 46.0, - "step": 43831 - }, - { - "epoch": 3.3512624959382227, - "grad_norm": 0.0024379382375627756, - "learning_rate": 0.0001999944603144829, - "loss": 46.0, - "step": 43832 - }, - { - "epoch": 3.3513389529216124, - "grad_norm": 0.0006784663419239223, - "learning_rate": 0.00019999446006165568, - "loss": 46.0, - "step": 43833 - }, - { - "epoch": 3.351415409905002, - "grad_norm": 0.0006937530706636608, - "learning_rate": 0.0001999944598088227, - "loss": 46.0, - "step": 43834 - }, - { - "epoch": 3.351491866888392, - "grad_norm": 0.0024105324409902096, - "learning_rate": 0.0001999944595559839, - "loss": 46.0, - "step": 43835 - }, - { - "epoch": 3.3515683238717817, - "grad_norm": 0.001397754531353712, - "learning_rate": 0.00019999445930313936, - "loss": 46.0, - "step": 43836 - }, - { - "epoch": 3.3516447808551715, - "grad_norm": 0.0032718039583414793, - "learning_rate": 0.00019999445905028904, - "loss": 46.0, - "step": 43837 - }, - { - "epoch": 3.3517212378385612, - "grad_norm": 0.0013750699581578374, - "learning_rate": 0.00019999445879743296, - "loss": 46.0, - "step": 43838 - }, - { - "epoch": 3.3517976948219506, - "grad_norm": 0.0029985366854816675, - "learning_rate": 0.0001999944585445711, - "loss": 46.0, - "step": 43839 - }, - { - "epoch": 3.3518741518053403, - "grad_norm": 0.00362880015745759, - "learning_rate": 0.0001999944582917035, - "loss": 46.0, - "step": 43840 - }, - { - "epoch": 3.35195060878873, - "grad_norm": 0.001020593335852027, - "learning_rate": 0.0001999944580388301, - "loss": 46.0, - "step": 43841 - }, - { - "epoch": 3.35202706577212, - "grad_norm": 0.0006231357692740858, - "learning_rate": 0.00019999445778595093, - "loss": 46.0, - "step": 43842 - }, - { - "epoch": 3.3521035227555096, - "grad_norm": 0.003153297584503889, - "learning_rate": 0.000199994457533066, - "loss": 46.0, - "step": 43843 - }, - { - "epoch": 3.3521799797388994, - "grad_norm": 0.001853014575317502, - "learning_rate": 0.00019999445728017528, - "loss": 46.0, - "step": 43844 - }, - { - "epoch": 3.352256436722289, - "grad_norm": 0.0012519273441284895, - "learning_rate": 0.00019999445702727884, - "loss": 46.0, - "step": 43845 - }, - { - "epoch": 3.352332893705679, - "grad_norm": 0.003738974453881383, - "learning_rate": 0.0001999944567743766, - "loss": 46.0, - "step": 43846 - }, - { - "epoch": 3.3524093506890686, - "grad_norm": 0.0023745065554976463, - "learning_rate": 0.0001999944565214686, - "loss": 46.0, - "step": 43847 - }, - { - "epoch": 3.3524858076724584, - "grad_norm": 0.001642899471335113, - "learning_rate": 0.0001999944562685548, - "loss": 46.0, - "step": 43848 - }, - { - "epoch": 3.352562264655848, - "grad_norm": 0.001237194868735969, - "learning_rate": 0.0001999944560156353, - "loss": 46.0, - "step": 43849 - }, - { - "epoch": 3.352638721639238, - "grad_norm": 0.006038373801857233, - "learning_rate": 0.00019999445576270999, - "loss": 46.0, - "step": 43850 - }, - { - "epoch": 3.3527151786226277, - "grad_norm": 0.002091360278427601, - "learning_rate": 0.0001999944555097789, - "loss": 46.0, - "step": 43851 - }, - { - "epoch": 3.352791635606017, - "grad_norm": 0.0009761899709701538, - "learning_rate": 0.00019999445525684207, - "loss": 46.0, - "step": 43852 - }, - { - "epoch": 3.3528680925894068, - "grad_norm": 0.0007871040725149214, - "learning_rate": 0.0001999944550038994, - "loss": 46.0, - "step": 43853 - }, - { - "epoch": 3.3529445495727965, - "grad_norm": 0.002603947650641203, - "learning_rate": 0.00019999445475095103, - "loss": 46.0, - "step": 43854 - }, - { - "epoch": 3.3530210065561863, - "grad_norm": 0.0025980807840824127, - "learning_rate": 0.0001999944544979969, - "loss": 46.0, - "step": 43855 - }, - { - "epoch": 3.353097463539576, - "grad_norm": 0.001019587623886764, - "learning_rate": 0.00019999445424503695, - "loss": 46.0, - "step": 43856 - }, - { - "epoch": 3.353173920522966, - "grad_norm": 0.0018611080013215542, - "learning_rate": 0.00019999445399207125, - "loss": 46.0, - "step": 43857 - }, - { - "epoch": 3.3532503775063556, - "grad_norm": 0.0006320485263131559, - "learning_rate": 0.00019999445373909978, - "loss": 46.0, - "step": 43858 - }, - { - "epoch": 3.3533268344897453, - "grad_norm": 0.002382304985076189, - "learning_rate": 0.00019999445348612256, - "loss": 46.0, - "step": 43859 - }, - { - "epoch": 3.353403291473135, - "grad_norm": 0.0012430732604116201, - "learning_rate": 0.00019999445323313957, - "loss": 46.0, - "step": 43860 - }, - { - "epoch": 3.3534797484565244, - "grad_norm": 0.0009905261686071754, - "learning_rate": 0.00019999445298015078, - "loss": 46.0, - "step": 43861 - }, - { - "epoch": 3.353556205439914, - "grad_norm": 0.005501210689544678, - "learning_rate": 0.00019999445272715624, - "loss": 46.0, - "step": 43862 - }, - { - "epoch": 3.353632662423304, - "grad_norm": 0.005886425264179707, - "learning_rate": 0.00019999445247415593, - "loss": 46.0, - "step": 43863 - }, - { - "epoch": 3.3537091194066937, - "grad_norm": 0.0032316395081579685, - "learning_rate": 0.00019999445222114985, - "loss": 46.0, - "step": 43864 - }, - { - "epoch": 3.3537855763900835, - "grad_norm": 0.0015141512267291546, - "learning_rate": 0.000199994451968138, - "loss": 46.0, - "step": 43865 - }, - { - "epoch": 3.353862033373473, - "grad_norm": 0.0043038311414420605, - "learning_rate": 0.00019999445171512038, - "loss": 46.0, - "step": 43866 - }, - { - "epoch": 3.353938490356863, - "grad_norm": 0.004591199103742838, - "learning_rate": 0.000199994451462097, - "loss": 46.0, - "step": 43867 - }, - { - "epoch": 3.3540149473402527, - "grad_norm": 0.003466640831902623, - "learning_rate": 0.00019999445120906785, - "loss": 46.0, - "step": 43868 - }, - { - "epoch": 3.3540914043236425, - "grad_norm": 0.0028352956287562847, - "learning_rate": 0.00019999445095603293, - "loss": 46.0, - "step": 43869 - }, - { - "epoch": 3.3541678613070323, - "grad_norm": 0.0020261681638658047, - "learning_rate": 0.00019999445070299223, - "loss": 46.0, - "step": 43870 - }, - { - "epoch": 3.354244318290422, - "grad_norm": 0.0011520616244524717, - "learning_rate": 0.00019999445044994576, - "loss": 46.0, - "step": 43871 - }, - { - "epoch": 3.354320775273812, - "grad_norm": 0.0013362925965338945, - "learning_rate": 0.00019999445019689354, - "loss": 46.0, - "step": 43872 - }, - { - "epoch": 3.354397232257201, - "grad_norm": 0.001257666852325201, - "learning_rate": 0.00019999444994383555, - "loss": 46.0, - "step": 43873 - }, - { - "epoch": 3.354473689240591, - "grad_norm": 0.0014234689297154546, - "learning_rate": 0.00019999444969077176, - "loss": 46.0, - "step": 43874 - }, - { - "epoch": 3.3545501462239806, - "grad_norm": 0.0013488612603396177, - "learning_rate": 0.00019999444943770222, - "loss": 46.0, - "step": 43875 - }, - { - "epoch": 3.3546266032073704, - "grad_norm": 0.0041291494853794575, - "learning_rate": 0.0001999944491846269, - "loss": 46.0, - "step": 43876 - }, - { - "epoch": 3.35470306019076, - "grad_norm": 0.002632700838148594, - "learning_rate": 0.00019999444893154582, - "loss": 46.0, - "step": 43877 - }, - { - "epoch": 3.35477951717415, - "grad_norm": 0.0010814070701599121, - "learning_rate": 0.00019999444867845897, - "loss": 46.0, - "step": 43878 - }, - { - "epoch": 3.3548559741575397, - "grad_norm": 0.0016110329888761044, - "learning_rate": 0.00019999444842536636, - "loss": 46.0, - "step": 43879 - }, - { - "epoch": 3.3549324311409294, - "grad_norm": 0.0015688743442296982, - "learning_rate": 0.00019999444817226798, - "loss": 46.0, - "step": 43880 - }, - { - "epoch": 3.355008888124319, - "grad_norm": 0.001690788776613772, - "learning_rate": 0.0001999944479191638, - "loss": 46.0, - "step": 43881 - }, - { - "epoch": 3.355085345107709, - "grad_norm": 0.0015371820190921426, - "learning_rate": 0.0001999944476660539, - "loss": 46.0, - "step": 43882 - }, - { - "epoch": 3.3551618020910983, - "grad_norm": 0.009616333059966564, - "learning_rate": 0.0001999944474129382, - "loss": 46.0, - "step": 43883 - }, - { - "epoch": 3.355238259074488, - "grad_norm": 0.0014407926937565207, - "learning_rate": 0.00019999444715981674, - "loss": 46.0, - "step": 43884 - }, - { - "epoch": 3.355314716057878, - "grad_norm": 0.0026107977610081434, - "learning_rate": 0.00019999444690668952, - "loss": 46.0, - "step": 43885 - }, - { - "epoch": 3.3553911730412675, - "grad_norm": 0.0037337651010602713, - "learning_rate": 0.0001999944466535565, - "loss": 46.0, - "step": 43886 - }, - { - "epoch": 3.3554676300246573, - "grad_norm": 0.001361203845590353, - "learning_rate": 0.00019999444640041777, - "loss": 46.0, - "step": 43887 - }, - { - "epoch": 3.355544087008047, - "grad_norm": 0.0028255737852305174, - "learning_rate": 0.0001999944461472732, - "loss": 46.0, - "step": 43888 - }, - { - "epoch": 3.355620543991437, - "grad_norm": 0.0028665463905781507, - "learning_rate": 0.00019999444589412292, - "loss": 46.0, - "step": 43889 - }, - { - "epoch": 3.3556970009748266, - "grad_norm": 0.001013896893709898, - "learning_rate": 0.0001999944456409668, - "loss": 46.0, - "step": 43890 - }, - { - "epoch": 3.3557734579582164, - "grad_norm": 0.005709273274987936, - "learning_rate": 0.00019999444538780498, - "loss": 46.0, - "step": 43891 - }, - { - "epoch": 3.355849914941606, - "grad_norm": 0.0026123628485947847, - "learning_rate": 0.00019999444513463737, - "loss": 46.0, - "step": 43892 - }, - { - "epoch": 3.355926371924996, - "grad_norm": 0.0017032069154083729, - "learning_rate": 0.000199994444881464, - "loss": 46.0, - "step": 43893 - }, - { - "epoch": 3.3560028289083856, - "grad_norm": 0.0035076544154435396, - "learning_rate": 0.00019999444462828482, - "loss": 46.0, - "step": 43894 - }, - { - "epoch": 3.356079285891775, - "grad_norm": 0.005666936747729778, - "learning_rate": 0.00019999444437509992, - "loss": 46.0, - "step": 43895 - }, - { - "epoch": 3.3561557428751647, - "grad_norm": 0.0021470433566719294, - "learning_rate": 0.00019999444412190922, - "loss": 46.0, - "step": 43896 - }, - { - "epoch": 3.3562321998585545, - "grad_norm": 0.0028956576716154814, - "learning_rate": 0.00019999444386871275, - "loss": 46.0, - "step": 43897 - }, - { - "epoch": 3.3563086568419442, - "grad_norm": 0.0007516075856983662, - "learning_rate": 0.0001999944436155105, - "loss": 46.0, - "step": 43898 - }, - { - "epoch": 3.356385113825334, - "grad_norm": 0.0009589249966666102, - "learning_rate": 0.00019999444336230254, - "loss": 46.0, - "step": 43899 - }, - { - "epoch": 3.3564615708087238, - "grad_norm": 0.000699333380907774, - "learning_rate": 0.00019999444310908878, - "loss": 46.0, - "step": 43900 - }, - { - "epoch": 3.3565380277921135, - "grad_norm": 0.001205030595883727, - "learning_rate": 0.00019999444285586921, - "loss": 46.0, - "step": 43901 - }, - { - "epoch": 3.3566144847755033, - "grad_norm": 0.003453389508649707, - "learning_rate": 0.0001999944426026439, - "loss": 46.0, - "step": 43902 - }, - { - "epoch": 3.356690941758893, - "grad_norm": 0.0017191831720992923, - "learning_rate": 0.00019999444234941285, - "loss": 46.0, - "step": 43903 - }, - { - "epoch": 3.356767398742283, - "grad_norm": 0.006386714521795511, - "learning_rate": 0.000199994442096176, - "loss": 46.0, - "step": 43904 - }, - { - "epoch": 3.356843855725672, - "grad_norm": 0.004084683954715729, - "learning_rate": 0.00019999444184293336, - "loss": 46.0, - "step": 43905 - }, - { - "epoch": 3.356920312709062, - "grad_norm": 0.004364202730357647, - "learning_rate": 0.00019999444158968498, - "loss": 46.0, - "step": 43906 - }, - { - "epoch": 3.3569967696924516, - "grad_norm": 0.002384310821071267, - "learning_rate": 0.00019999444133643083, - "loss": 46.0, - "step": 43907 - }, - { - "epoch": 3.3570732266758414, - "grad_norm": 0.004904799163341522, - "learning_rate": 0.00019999444108317093, - "loss": 46.0, - "step": 43908 - }, - { - "epoch": 3.357149683659231, - "grad_norm": 0.0026783396024256945, - "learning_rate": 0.0001999944408299052, - "loss": 46.0, - "step": 43909 - }, - { - "epoch": 3.357226140642621, - "grad_norm": 0.000880925974342972, - "learning_rate": 0.00019999444057663377, - "loss": 46.0, - "step": 43910 - }, - { - "epoch": 3.3573025976260107, - "grad_norm": 0.0023351088166236877, - "learning_rate": 0.00019999444032335655, - "loss": 46.0, - "step": 43911 - }, - { - "epoch": 3.3573790546094004, - "grad_norm": 0.0018413979560136795, - "learning_rate": 0.00019999444007007356, - "loss": 46.0, - "step": 43912 - }, - { - "epoch": 3.35745551159279, - "grad_norm": 0.006709543988108635, - "learning_rate": 0.0001999944398167848, - "loss": 46.0, - "step": 43913 - }, - { - "epoch": 3.35753196857618, - "grad_norm": 0.004390840418636799, - "learning_rate": 0.00019999443956349023, - "loss": 46.0, - "step": 43914 - }, - { - "epoch": 3.3576084255595697, - "grad_norm": 0.002180737443268299, - "learning_rate": 0.00019999443931018992, - "loss": 46.0, - "step": 43915 - }, - { - "epoch": 3.3576848825429595, - "grad_norm": 0.0042215511202812195, - "learning_rate": 0.00019999443905688386, - "loss": 46.0, - "step": 43916 - }, - { - "epoch": 3.357761339526349, - "grad_norm": 0.001434640376828611, - "learning_rate": 0.00019999443880357203, - "loss": 46.0, - "step": 43917 - }, - { - "epoch": 3.3578377965097386, - "grad_norm": 0.0031608024146407843, - "learning_rate": 0.0001999944385502544, - "loss": 46.0, - "step": 43918 - }, - { - "epoch": 3.3579142534931283, - "grad_norm": 0.0013899975456297398, - "learning_rate": 0.00019999443829693103, - "loss": 46.0, - "step": 43919 - }, - { - "epoch": 3.357990710476518, - "grad_norm": 0.0026645592879503965, - "learning_rate": 0.00019999443804360188, - "loss": 46.0, - "step": 43920 - }, - { - "epoch": 3.358067167459908, - "grad_norm": 0.001703485380858183, - "learning_rate": 0.00019999443779026695, - "loss": 46.0, - "step": 43921 - }, - { - "epoch": 3.3581436244432976, - "grad_norm": 0.005464259535074234, - "learning_rate": 0.00019999443753692626, - "loss": 46.0, - "step": 43922 - }, - { - "epoch": 3.3582200814266874, - "grad_norm": 0.0031179413199424744, - "learning_rate": 0.0001999944372835798, - "loss": 46.0, - "step": 43923 - }, - { - "epoch": 3.358296538410077, - "grad_norm": 0.0014420761726796627, - "learning_rate": 0.00019999443703022757, - "loss": 46.0, - "step": 43924 - }, - { - "epoch": 3.358372995393467, - "grad_norm": 0.0010481172939762473, - "learning_rate": 0.00019999443677686958, - "loss": 46.0, - "step": 43925 - }, - { - "epoch": 3.3584494523768567, - "grad_norm": 0.0006573973223567009, - "learning_rate": 0.0001999944365235058, - "loss": 46.0, - "step": 43926 - }, - { - "epoch": 3.358525909360246, - "grad_norm": 0.00368470954708755, - "learning_rate": 0.00019999443627013628, - "loss": 46.0, - "step": 43927 - }, - { - "epoch": 3.3586023663436357, - "grad_norm": 0.000752764695789665, - "learning_rate": 0.00019999443601676097, - "loss": 46.0, - "step": 43928 - }, - { - "epoch": 3.3586788233270255, - "grad_norm": 0.00128778291400522, - "learning_rate": 0.0001999944357633799, - "loss": 46.0, - "step": 43929 - }, - { - "epoch": 3.3587552803104153, - "grad_norm": 0.005448696669191122, - "learning_rate": 0.00019999443550999306, - "loss": 46.0, - "step": 43930 - }, - { - "epoch": 3.358831737293805, - "grad_norm": 0.001997839193791151, - "learning_rate": 0.00019999443525660045, - "loss": 46.0, - "step": 43931 - }, - { - "epoch": 3.358908194277195, - "grad_norm": 0.0006499068695120513, - "learning_rate": 0.00019999443500320205, - "loss": 46.0, - "step": 43932 - }, - { - "epoch": 3.3589846512605845, - "grad_norm": 0.0018064759206026793, - "learning_rate": 0.0001999944347497979, - "loss": 46.0, - "step": 43933 - }, - { - "epoch": 3.3590611082439743, - "grad_norm": 0.011833742260932922, - "learning_rate": 0.000199994434496388, - "loss": 46.0, - "step": 43934 - }, - { - "epoch": 3.359137565227364, - "grad_norm": 0.0018127337098121643, - "learning_rate": 0.0001999944342429723, - "loss": 46.0, - "step": 43935 - }, - { - "epoch": 3.359214022210754, - "grad_norm": 0.0012243735836818814, - "learning_rate": 0.00019999443398955084, - "loss": 46.0, - "step": 43936 - }, - { - "epoch": 3.3592904791941436, - "grad_norm": 0.001750829047523439, - "learning_rate": 0.00019999443373612362, - "loss": 46.0, - "step": 43937 - }, - { - "epoch": 3.3593669361775333, - "grad_norm": 0.0028012520633637905, - "learning_rate": 0.00019999443348269066, - "loss": 46.0, - "step": 43938 - }, - { - "epoch": 3.3594433931609227, - "grad_norm": 0.0010618646629154682, - "learning_rate": 0.00019999443322925187, - "loss": 46.0, - "step": 43939 - }, - { - "epoch": 3.3595198501443124, - "grad_norm": 0.001202001003548503, - "learning_rate": 0.00019999443297580733, - "loss": 46.0, - "step": 43940 - }, - { - "epoch": 3.359596307127702, - "grad_norm": 0.0034605555702000856, - "learning_rate": 0.00019999443272235702, - "loss": 46.0, - "step": 43941 - }, - { - "epoch": 3.359672764111092, - "grad_norm": 0.0019187383586540818, - "learning_rate": 0.00019999443246890096, - "loss": 46.0, - "step": 43942 - }, - { - "epoch": 3.3597492210944817, - "grad_norm": 0.004395488183945417, - "learning_rate": 0.0001999944322154391, - "loss": 46.0, - "step": 43943 - }, - { - "epoch": 3.3598256780778715, - "grad_norm": 0.01191452331840992, - "learning_rate": 0.0001999944319619715, - "loss": 46.0, - "step": 43944 - }, - { - "epoch": 3.3599021350612612, - "grad_norm": 0.004040820989757776, - "learning_rate": 0.00019999443170849813, - "loss": 46.0, - "step": 43945 - }, - { - "epoch": 3.359978592044651, - "grad_norm": 0.0014816161710768938, - "learning_rate": 0.00019999443145501898, - "loss": 46.0, - "step": 43946 - }, - { - "epoch": 3.3600550490280408, - "grad_norm": 0.005345433950424194, - "learning_rate": 0.00019999443120153406, - "loss": 46.0, - "step": 43947 - }, - { - "epoch": 3.36013150601143, - "grad_norm": 0.0031253669876605272, - "learning_rate": 0.0001999944309480434, - "loss": 46.0, - "step": 43948 - }, - { - "epoch": 3.36020796299482, - "grad_norm": 0.0006608832045458257, - "learning_rate": 0.00019999443069454692, - "loss": 46.0, - "step": 43949 - }, - { - "epoch": 3.3602844199782096, - "grad_norm": 0.0025015934370458126, - "learning_rate": 0.0001999944304410447, - "loss": 46.0, - "step": 43950 - }, - { - "epoch": 3.3603608769615994, - "grad_norm": 0.006387869827449322, - "learning_rate": 0.0001999944301875367, - "loss": 46.0, - "step": 43951 - }, - { - "epoch": 3.360437333944989, - "grad_norm": 0.006368723232299089, - "learning_rate": 0.00019999442993402292, - "loss": 46.0, - "step": 43952 - }, - { - "epoch": 3.360513790928379, - "grad_norm": 0.01100991666316986, - "learning_rate": 0.00019999442968050341, - "loss": 46.0, - "step": 43953 - }, - { - "epoch": 3.3605902479117686, - "grad_norm": 0.0021573035046458244, - "learning_rate": 0.0001999944294269781, - "loss": 46.0, - "step": 43954 - }, - { - "epoch": 3.3606667048951584, - "grad_norm": 0.001826654770411551, - "learning_rate": 0.00019999442917344705, - "loss": 46.0, - "step": 43955 - }, - { - "epoch": 3.360743161878548, - "grad_norm": 0.0032256762497127056, - "learning_rate": 0.0001999944289199102, - "loss": 46.0, - "step": 43956 - }, - { - "epoch": 3.360819618861938, - "grad_norm": 0.0033450177870690823, - "learning_rate": 0.00019999442866636756, - "loss": 46.0, - "step": 43957 - }, - { - "epoch": 3.3608960758453277, - "grad_norm": 0.0007844028295949101, - "learning_rate": 0.00019999442841281921, - "loss": 46.0, - "step": 43958 - }, - { - "epoch": 3.3609725328287174, - "grad_norm": 0.0015928855864331126, - "learning_rate": 0.0001999944281592651, - "loss": 46.0, - "step": 43959 - }, - { - "epoch": 3.361048989812107, - "grad_norm": 0.0015519981971010566, - "learning_rate": 0.00019999442790570514, - "loss": 46.0, - "step": 43960 - }, - { - "epoch": 3.3611254467954965, - "grad_norm": 0.0008716603042557836, - "learning_rate": 0.00019999442765213945, - "loss": 46.0, - "step": 43961 - }, - { - "epoch": 3.3612019037788863, - "grad_norm": 0.006894951220601797, - "learning_rate": 0.00019999442739856803, - "loss": 46.0, - "step": 43962 - }, - { - "epoch": 3.361278360762276, - "grad_norm": 0.0014548986218869686, - "learning_rate": 0.00019999442714499076, - "loss": 46.0, - "step": 43963 - }, - { - "epoch": 3.361354817745666, - "grad_norm": 0.002383999526500702, - "learning_rate": 0.0001999944268914078, - "loss": 46.0, - "step": 43964 - }, - { - "epoch": 3.3614312747290556, - "grad_norm": 0.0036437136586755514, - "learning_rate": 0.00019999442663781904, - "loss": 46.0, - "step": 43965 - }, - { - "epoch": 3.3615077317124453, - "grad_norm": 0.0018820744007825851, - "learning_rate": 0.0001999944263842245, - "loss": 46.0, - "step": 43966 - }, - { - "epoch": 3.361584188695835, - "grad_norm": 0.002455133944749832, - "learning_rate": 0.0001999944261306242, - "loss": 46.0, - "step": 43967 - }, - { - "epoch": 3.361660645679225, - "grad_norm": 0.0037344032898545265, - "learning_rate": 0.00019999442587701814, - "loss": 46.0, - "step": 43968 - }, - { - "epoch": 3.3617371026626146, - "grad_norm": 0.0027202623896300793, - "learning_rate": 0.00019999442562340628, - "loss": 46.0, - "step": 43969 - }, - { - "epoch": 3.361813559646004, - "grad_norm": 0.0007057629991322756, - "learning_rate": 0.0001999944253697887, - "loss": 46.0, - "step": 43970 - }, - { - "epoch": 3.3618900166293937, - "grad_norm": 0.0024618864990770817, - "learning_rate": 0.0001999944251161653, - "loss": 46.0, - "step": 43971 - }, - { - "epoch": 3.3619664736127834, - "grad_norm": 0.0018257752526551485, - "learning_rate": 0.00019999442486253615, - "loss": 46.0, - "step": 43972 - }, - { - "epoch": 3.362042930596173, - "grad_norm": 0.0017023581312969327, - "learning_rate": 0.00019999442460890126, - "loss": 46.0, - "step": 43973 - }, - { - "epoch": 3.362119387579563, - "grad_norm": 0.0012312245089560747, - "learning_rate": 0.00019999442435526054, - "loss": 46.0, - "step": 43974 - }, - { - "epoch": 3.3621958445629527, - "grad_norm": 0.001370420679450035, - "learning_rate": 0.00019999442410161412, - "loss": 46.0, - "step": 43975 - }, - { - "epoch": 3.3622723015463425, - "grad_norm": 0.003550641005858779, - "learning_rate": 0.0001999944238479619, - "loss": 46.0, - "step": 43976 - }, - { - "epoch": 3.3623487585297323, - "grad_norm": 0.0037540870252996683, - "learning_rate": 0.0001999944235943039, - "loss": 46.0, - "step": 43977 - }, - { - "epoch": 3.362425215513122, - "grad_norm": 0.004224210046231747, - "learning_rate": 0.00019999442334064013, - "loss": 46.0, - "step": 43978 - }, - { - "epoch": 3.3625016724965118, - "grad_norm": 0.002612208016216755, - "learning_rate": 0.00019999442308697062, - "loss": 46.0, - "step": 43979 - }, - { - "epoch": 3.3625781294799015, - "grad_norm": 0.000882496009580791, - "learning_rate": 0.0001999944228332953, - "loss": 46.0, - "step": 43980 - }, - { - "epoch": 3.3626545864632913, - "grad_norm": 0.0011672910768538713, - "learning_rate": 0.00019999442257961426, - "loss": 46.0, - "step": 43981 - }, - { - "epoch": 3.362731043446681, - "grad_norm": 0.0037730110343545675, - "learning_rate": 0.0001999944223259274, - "loss": 46.0, - "step": 43982 - }, - { - "epoch": 3.3628075004300704, - "grad_norm": 0.0035125119611620903, - "learning_rate": 0.00019999442207223477, - "loss": 46.0, - "step": 43983 - }, - { - "epoch": 3.36288395741346, - "grad_norm": 0.0015022674342617393, - "learning_rate": 0.00019999442181853642, - "loss": 46.0, - "step": 43984 - }, - { - "epoch": 3.36296041439685, - "grad_norm": 0.0015277877682819963, - "learning_rate": 0.00019999442156483225, - "loss": 46.0, - "step": 43985 - }, - { - "epoch": 3.3630368713802397, - "grad_norm": 0.0017599384300410748, - "learning_rate": 0.00019999442131112235, - "loss": 46.0, - "step": 43986 - }, - { - "epoch": 3.3631133283636294, - "grad_norm": 0.005569704808294773, - "learning_rate": 0.00019999442105740668, - "loss": 46.0, - "step": 43987 - }, - { - "epoch": 3.363189785347019, - "grad_norm": 0.0029773619025945663, - "learning_rate": 0.00019999442080368521, - "loss": 46.0, - "step": 43988 - }, - { - "epoch": 3.363266242330409, - "grad_norm": 0.0017659197328612208, - "learning_rate": 0.000199994420549958, - "loss": 46.0, - "step": 43989 - }, - { - "epoch": 3.3633426993137987, - "grad_norm": 0.0030685404781252146, - "learning_rate": 0.000199994420296225, - "loss": 46.0, - "step": 43990 - }, - { - "epoch": 3.3634191562971885, - "grad_norm": 0.0006195066962391138, - "learning_rate": 0.00019999442004248625, - "loss": 46.0, - "step": 43991 - }, - { - "epoch": 3.363495613280578, - "grad_norm": 0.004994519986212254, - "learning_rate": 0.00019999441978874174, - "loss": 46.0, - "step": 43992 - }, - { - "epoch": 3.3635720702639675, - "grad_norm": 0.0015531317330896854, - "learning_rate": 0.00019999441953499143, - "loss": 46.0, - "step": 43993 - }, - { - "epoch": 3.3636485272473573, - "grad_norm": 0.0031213618349283934, - "learning_rate": 0.00019999441928123535, - "loss": 46.0, - "step": 43994 - }, - { - "epoch": 3.363724984230747, - "grad_norm": 0.001442785025574267, - "learning_rate": 0.00019999441902747352, - "loss": 46.0, - "step": 43995 - }, - { - "epoch": 3.363801441214137, - "grad_norm": 0.0038165568839758635, - "learning_rate": 0.0001999944187737059, - "loss": 46.0, - "step": 43996 - }, - { - "epoch": 3.3638778981975266, - "grad_norm": 0.005286256782710552, - "learning_rate": 0.00019999441851993255, - "loss": 46.0, - "step": 43997 - }, - { - "epoch": 3.3639543551809163, - "grad_norm": 0.0017717641312628984, - "learning_rate": 0.00019999441826615343, - "loss": 46.0, - "step": 43998 - }, - { - "epoch": 3.364030812164306, - "grad_norm": 0.0013908063992857933, - "learning_rate": 0.00019999441801236848, - "loss": 46.0, - "step": 43999 - }, - { - "epoch": 3.364107269147696, - "grad_norm": 0.0025197898503392935, - "learning_rate": 0.0001999944177585778, - "loss": 46.0, - "step": 44000 - }, - { - "epoch": 3.3641837261310856, - "grad_norm": 0.0017711903201416135, - "learning_rate": 0.00019999441750478137, - "loss": 46.0, - "step": 44001 - }, - { - "epoch": 3.3642601831144754, - "grad_norm": 0.002247098134830594, - "learning_rate": 0.00019999441725097913, - "loss": 46.0, - "step": 44002 - }, - { - "epoch": 3.364336640097865, - "grad_norm": 0.0016384284244850278, - "learning_rate": 0.00019999441699717116, - "loss": 46.0, - "step": 44003 - }, - { - "epoch": 3.3644130970812545, - "grad_norm": 0.0013083242811262608, - "learning_rate": 0.0001999944167433574, - "loss": 46.0, - "step": 44004 - }, - { - "epoch": 3.3644895540646442, - "grad_norm": 0.007745744194835424, - "learning_rate": 0.00019999441648953784, - "loss": 46.0, - "step": 44005 - }, - { - "epoch": 3.364566011048034, - "grad_norm": 0.003586008446291089, - "learning_rate": 0.0001999944162357126, - "loss": 46.0, - "step": 44006 - }, - { - "epoch": 3.3646424680314237, - "grad_norm": 0.0019406918436288834, - "learning_rate": 0.00019999441598188148, - "loss": 46.0, - "step": 44007 - }, - { - "epoch": 3.3647189250148135, - "grad_norm": 0.0015440720599144697, - "learning_rate": 0.00019999441572804468, - "loss": 46.0, - "step": 44008 - }, - { - "epoch": 3.3647953819982033, - "grad_norm": 0.0026671465020626783, - "learning_rate": 0.00019999441547420208, - "loss": 46.0, - "step": 44009 - }, - { - "epoch": 3.364871838981593, - "grad_norm": 0.0012204390950500965, - "learning_rate": 0.0001999944152203537, - "loss": 46.0, - "step": 44010 - }, - { - "epoch": 3.364948295964983, - "grad_norm": 0.0016009026439860463, - "learning_rate": 0.00019999441496649955, - "loss": 46.0, - "step": 44011 - }, - { - "epoch": 3.3650247529483726, - "grad_norm": 0.0020493201445788145, - "learning_rate": 0.00019999441471263963, - "loss": 46.0, - "step": 44012 - }, - { - "epoch": 3.3651012099317623, - "grad_norm": 0.007693315856158733, - "learning_rate": 0.00019999441445877396, - "loss": 46.0, - "step": 44013 - }, - { - "epoch": 3.3651776669151516, - "grad_norm": 0.0016419970197603106, - "learning_rate": 0.00019999441420490252, - "loss": 46.0, - "step": 44014 - }, - { - "epoch": 3.3652541238985414, - "grad_norm": 0.0019955032039433718, - "learning_rate": 0.00019999441395102528, - "loss": 46.0, - "step": 44015 - }, - { - "epoch": 3.365330580881931, - "grad_norm": 0.007652727421373129, - "learning_rate": 0.0001999944136971423, - "loss": 46.0, - "step": 44016 - }, - { - "epoch": 3.365407037865321, - "grad_norm": 0.002051898743957281, - "learning_rate": 0.00019999441344325356, - "loss": 46.0, - "step": 44017 - }, - { - "epoch": 3.3654834948487107, - "grad_norm": 0.0009151982958428562, - "learning_rate": 0.000199994413189359, - "loss": 46.0, - "step": 44018 - }, - { - "epoch": 3.3655599518321004, - "grad_norm": 0.0034932363778352737, - "learning_rate": 0.00019999441293545872, - "loss": 46.0, - "step": 44019 - }, - { - "epoch": 3.36563640881549, - "grad_norm": 0.0003169063711538911, - "learning_rate": 0.00019999441268155266, - "loss": 46.0, - "step": 44020 - }, - { - "epoch": 3.36571286579888, - "grad_norm": 0.004807604476809502, - "learning_rate": 0.00019999441242764084, - "loss": 46.0, - "step": 44021 - }, - { - "epoch": 3.3657893227822697, - "grad_norm": 0.002756100380793214, - "learning_rate": 0.0001999944121737232, - "loss": 46.0, - "step": 44022 - }, - { - "epoch": 3.3658657797656595, - "grad_norm": 0.0009285946725867689, - "learning_rate": 0.00019999441191979983, - "loss": 46.0, - "step": 44023 - }, - { - "epoch": 3.3659422367490492, - "grad_norm": 0.0025340397842228413, - "learning_rate": 0.00019999441166587071, - "loss": 46.0, - "step": 44024 - }, - { - "epoch": 3.366018693732439, - "grad_norm": 0.003007646184414625, - "learning_rate": 0.0001999944114119358, - "loss": 46.0, - "step": 44025 - }, - { - "epoch": 3.3660951507158283, - "grad_norm": 0.003365627024322748, - "learning_rate": 0.00019999441115799513, - "loss": 46.0, - "step": 44026 - }, - { - "epoch": 3.366171607699218, - "grad_norm": 0.00034670779132284224, - "learning_rate": 0.00019999441090404866, - "loss": 46.0, - "step": 44027 - }, - { - "epoch": 3.366248064682608, - "grad_norm": 0.0029784946236759424, - "learning_rate": 0.00019999441065009647, - "loss": 46.0, - "step": 44028 - }, - { - "epoch": 3.3663245216659976, - "grad_norm": 0.0007970849983394146, - "learning_rate": 0.00019999441039613846, - "loss": 46.0, - "step": 44029 - }, - { - "epoch": 3.3664009786493874, - "grad_norm": 0.0034228123258799314, - "learning_rate": 0.00019999441014217472, - "loss": 46.0, - "step": 44030 - }, - { - "epoch": 3.366477435632777, - "grad_norm": 0.0014420750085264444, - "learning_rate": 0.0001999944098882052, - "loss": 46.0, - "step": 44031 - }, - { - "epoch": 3.366553892616167, - "grad_norm": 0.003425904316827655, - "learning_rate": 0.00019999440963422988, - "loss": 46.0, - "step": 44032 - }, - { - "epoch": 3.3666303495995566, - "grad_norm": 0.002243151655420661, - "learning_rate": 0.00019999440938024883, - "loss": 46.0, - "step": 44033 - }, - { - "epoch": 3.3667068065829464, - "grad_norm": 0.0006195084424689412, - "learning_rate": 0.000199994409126262, - "loss": 46.0, - "step": 44034 - }, - { - "epoch": 3.366783263566336, - "grad_norm": 0.0022923778742551804, - "learning_rate": 0.0001999944088722694, - "loss": 46.0, - "step": 44035 - }, - { - "epoch": 3.3668597205497255, - "grad_norm": 0.0063108112663030624, - "learning_rate": 0.00019999440861827102, - "loss": 46.0, - "step": 44036 - }, - { - "epoch": 3.3669361775331152, - "grad_norm": 0.002106614410877228, - "learning_rate": 0.0001999944083642669, - "loss": 46.0, - "step": 44037 - }, - { - "epoch": 3.367012634516505, - "grad_norm": 0.003978736698627472, - "learning_rate": 0.00019999440811025698, - "loss": 46.0, - "step": 44038 - }, - { - "epoch": 3.3670890914998948, - "grad_norm": 0.0024155403953045607, - "learning_rate": 0.00019999440785624132, - "loss": 46.0, - "step": 44039 - }, - { - "epoch": 3.3671655484832845, - "grad_norm": 0.0008293219725601375, - "learning_rate": 0.00019999440760221985, - "loss": 46.0, - "step": 44040 - }, - { - "epoch": 3.3672420054666743, - "grad_norm": 0.0007247526082210243, - "learning_rate": 0.00019999440734819266, - "loss": 46.0, - "step": 44041 - }, - { - "epoch": 3.367318462450064, - "grad_norm": 0.003393264254555106, - "learning_rate": 0.00019999440709415968, - "loss": 46.0, - "step": 44042 - }, - { - "epoch": 3.367394919433454, - "grad_norm": 0.0011567908804863691, - "learning_rate": 0.0001999944068401209, - "loss": 46.0, - "step": 44043 - }, - { - "epoch": 3.3674713764168436, - "grad_norm": 0.0015661921352148056, - "learning_rate": 0.00019999440658607638, - "loss": 46.0, - "step": 44044 - }, - { - "epoch": 3.3675478334002333, - "grad_norm": 0.0016815419076010585, - "learning_rate": 0.00019999440633202607, - "loss": 46.0, - "step": 44045 - }, - { - "epoch": 3.367624290383623, - "grad_norm": 0.001174632809124887, - "learning_rate": 0.00019999440607797002, - "loss": 46.0, - "step": 44046 - }, - { - "epoch": 3.367700747367013, - "grad_norm": 0.0007991038146428764, - "learning_rate": 0.0001999944058239082, - "loss": 46.0, - "step": 44047 - }, - { - "epoch": 3.367777204350402, - "grad_norm": 0.0025518157053738832, - "learning_rate": 0.0001999944055698406, - "loss": 46.0, - "step": 44048 - }, - { - "epoch": 3.367853661333792, - "grad_norm": 0.0033297755289822817, - "learning_rate": 0.00019999440531576722, - "loss": 46.0, - "step": 44049 - }, - { - "epoch": 3.3679301183171817, - "grad_norm": 0.002374293515458703, - "learning_rate": 0.0001999944050616881, - "loss": 46.0, - "step": 44050 - }, - { - "epoch": 3.3680065753005715, - "grad_norm": 0.0014175818068906665, - "learning_rate": 0.00019999440480760318, - "loss": 46.0, - "step": 44051 - }, - { - "epoch": 3.368083032283961, - "grad_norm": 0.0011245246278122067, - "learning_rate": 0.0001999944045535125, - "loss": 46.0, - "step": 44052 - }, - { - "epoch": 3.368159489267351, - "grad_norm": 0.002561658388003707, - "learning_rate": 0.00019999440429941605, - "loss": 46.0, - "step": 44053 - }, - { - "epoch": 3.3682359462507407, - "grad_norm": 0.0017387623665854335, - "learning_rate": 0.00019999440404531383, - "loss": 46.0, - "step": 44054 - }, - { - "epoch": 3.3683124032341305, - "grad_norm": 0.0013006882509216666, - "learning_rate": 0.00019999440379120587, - "loss": 46.0, - "step": 44055 - }, - { - "epoch": 3.3683888602175203, - "grad_norm": 0.0020123799331486225, - "learning_rate": 0.0001999944035370921, - "loss": 46.0, - "step": 44056 - }, - { - "epoch": 3.36846531720091, - "grad_norm": 0.0022627043072134256, - "learning_rate": 0.00019999440328297258, - "loss": 46.0, - "step": 44057 - }, - { - "epoch": 3.3685417741842993, - "grad_norm": 0.002851160941645503, - "learning_rate": 0.0001999944030288473, - "loss": 46.0, - "step": 44058 - }, - { - "epoch": 3.368618231167689, - "grad_norm": 0.003905707038938999, - "learning_rate": 0.00019999440277471625, - "loss": 46.0, - "step": 44059 - }, - { - "epoch": 3.368694688151079, - "grad_norm": 0.0008961720741353929, - "learning_rate": 0.00019999440252057942, - "loss": 46.0, - "step": 44060 - }, - { - "epoch": 3.3687711451344686, - "grad_norm": 0.0030855913646519184, - "learning_rate": 0.0001999944022664368, - "loss": 46.0, - "step": 44061 - }, - { - "epoch": 3.3688476021178584, - "grad_norm": 0.003867947030812502, - "learning_rate": 0.00019999440201228845, - "loss": 46.0, - "step": 44062 - }, - { - "epoch": 3.368924059101248, - "grad_norm": 0.021704386919736862, - "learning_rate": 0.0001999944017581343, - "loss": 46.0, - "step": 44063 - }, - { - "epoch": 3.369000516084638, - "grad_norm": 0.001425275462679565, - "learning_rate": 0.0001999944015039744, - "loss": 46.0, - "step": 44064 - }, - { - "epoch": 3.3690769730680277, - "grad_norm": 0.003588259220123291, - "learning_rate": 0.00019999440124980874, - "loss": 46.0, - "step": 44065 - }, - { - "epoch": 3.3691534300514174, - "grad_norm": 0.004983542952686548, - "learning_rate": 0.0001999944009956373, - "loss": 46.0, - "step": 44066 - }, - { - "epoch": 3.369229887034807, - "grad_norm": 0.0011447498109191656, - "learning_rate": 0.00019999440074146009, - "loss": 46.0, - "step": 44067 - }, - { - "epoch": 3.369306344018197, - "grad_norm": 0.013150081038475037, - "learning_rate": 0.0001999944004872771, - "loss": 46.0, - "step": 44068 - }, - { - "epoch": 3.3693828010015867, - "grad_norm": 0.001434782985597849, - "learning_rate": 0.00019999440023308834, - "loss": 46.0, - "step": 44069 - }, - { - "epoch": 3.369459257984976, - "grad_norm": 0.004751997068524361, - "learning_rate": 0.0001999943999788938, - "loss": 46.0, - "step": 44070 - }, - { - "epoch": 3.369535714968366, - "grad_norm": 0.0011649583466351032, - "learning_rate": 0.00019999439972469353, - "loss": 46.0, - "step": 44071 - }, - { - "epoch": 3.3696121719517556, - "grad_norm": 0.001877776114270091, - "learning_rate": 0.00019999439947048745, - "loss": 46.0, - "step": 44072 - }, - { - "epoch": 3.3696886289351453, - "grad_norm": 0.002230021171271801, - "learning_rate": 0.00019999439921627562, - "loss": 46.0, - "step": 44073 - }, - { - "epoch": 3.369765085918535, - "grad_norm": 0.0025933990254998207, - "learning_rate": 0.00019999439896205802, - "loss": 46.0, - "step": 44074 - }, - { - "epoch": 3.369841542901925, - "grad_norm": 0.0028062283527106047, - "learning_rate": 0.00019999439870783468, - "loss": 46.0, - "step": 44075 - }, - { - "epoch": 3.3699179998853146, - "grad_norm": 0.0006448200438171625, - "learning_rate": 0.00019999439845360556, - "loss": 46.0, - "step": 44076 - }, - { - "epoch": 3.3699944568687044, - "grad_norm": 0.007435072213411331, - "learning_rate": 0.00019999439819937064, - "loss": 46.0, - "step": 44077 - }, - { - "epoch": 3.370070913852094, - "grad_norm": 0.003749512368813157, - "learning_rate": 0.00019999439794512994, - "loss": 46.0, - "step": 44078 - }, - { - "epoch": 3.370147370835484, - "grad_norm": 0.002749453764408827, - "learning_rate": 0.00019999439769088353, - "loss": 46.0, - "step": 44079 - }, - { - "epoch": 3.370223827818873, - "grad_norm": 0.010053405538201332, - "learning_rate": 0.00019999439743663132, - "loss": 46.0, - "step": 44080 - }, - { - "epoch": 3.370300284802263, - "grad_norm": 0.0009766056900843978, - "learning_rate": 0.0001999943971823733, - "loss": 46.0, - "step": 44081 - }, - { - "epoch": 3.3703767417856527, - "grad_norm": 0.0015313500771299005, - "learning_rate": 0.00019999439692810957, - "loss": 46.0, - "step": 44082 - }, - { - "epoch": 3.3704531987690425, - "grad_norm": 0.002901981584727764, - "learning_rate": 0.00019999439667384007, - "loss": 46.0, - "step": 44083 - }, - { - "epoch": 3.3705296557524322, - "grad_norm": 0.0029382891952991486, - "learning_rate": 0.00019999439641956476, - "loss": 46.0, - "step": 44084 - }, - { - "epoch": 3.370606112735822, - "grad_norm": 0.003256638767197728, - "learning_rate": 0.0001999943961652837, - "loss": 46.0, - "step": 44085 - }, - { - "epoch": 3.3706825697192118, - "grad_norm": 0.0012262884993106127, - "learning_rate": 0.00019999439591099688, - "loss": 46.0, - "step": 44086 - }, - { - "epoch": 3.3707590267026015, - "grad_norm": 0.002431526780128479, - "learning_rate": 0.00019999439565670428, - "loss": 46.0, - "step": 44087 - }, - { - "epoch": 3.3708354836859913, - "grad_norm": 0.0016523622907698154, - "learning_rate": 0.0001999943954024059, - "loss": 46.0, - "step": 44088 - }, - { - "epoch": 3.370911940669381, - "grad_norm": 0.006879203021526337, - "learning_rate": 0.0001999943951481018, - "loss": 46.0, - "step": 44089 - }, - { - "epoch": 3.370988397652771, - "grad_norm": 0.00565417530015111, - "learning_rate": 0.00019999439489379187, - "loss": 46.0, - "step": 44090 - }, - { - "epoch": 3.3710648546361606, - "grad_norm": 0.0010815723799169064, - "learning_rate": 0.0001999943946394762, - "loss": 46.0, - "step": 44091 - }, - { - "epoch": 3.37114131161955, - "grad_norm": 0.00601259944960475, - "learning_rate": 0.00019999439438515477, - "loss": 46.0, - "step": 44092 - }, - { - "epoch": 3.3712177686029396, - "grad_norm": 0.001999577507376671, - "learning_rate": 0.00019999439413082755, - "loss": 46.0, - "step": 44093 - }, - { - "epoch": 3.3712942255863294, - "grad_norm": 0.002029047580435872, - "learning_rate": 0.0001999943938764946, - "loss": 46.0, - "step": 44094 - }, - { - "epoch": 3.371370682569719, - "grad_norm": 0.000913600146304816, - "learning_rate": 0.0001999943936221558, - "loss": 46.0, - "step": 44095 - }, - { - "epoch": 3.371447139553109, - "grad_norm": 0.0067564258351922035, - "learning_rate": 0.00019999439336781133, - "loss": 46.0, - "step": 44096 - }, - { - "epoch": 3.3715235965364987, - "grad_norm": 0.0032579984981566668, - "learning_rate": 0.00019999439311346102, - "loss": 46.0, - "step": 44097 - }, - { - "epoch": 3.3716000535198885, - "grad_norm": 0.001946762204170227, - "learning_rate": 0.00019999439285910494, - "loss": 46.0, - "step": 44098 - }, - { - "epoch": 3.371676510503278, - "grad_norm": 0.0015006192261353135, - "learning_rate": 0.00019999439260474312, - "loss": 46.0, - "step": 44099 - }, - { - "epoch": 3.371752967486668, - "grad_norm": 0.0017017036443576217, - "learning_rate": 0.00019999439235037555, - "loss": 46.0, - "step": 44100 - }, - { - "epoch": 3.3718294244700573, - "grad_norm": 0.005532213021069765, - "learning_rate": 0.00019999439209600217, - "loss": 46.0, - "step": 44101 - }, - { - "epoch": 3.371905881453447, - "grad_norm": 0.0014013760956004262, - "learning_rate": 0.00019999439184162306, - "loss": 46.0, - "step": 44102 - }, - { - "epoch": 3.371982338436837, - "grad_norm": 0.002641149330884218, - "learning_rate": 0.00019999439158723814, - "loss": 46.0, - "step": 44103 - }, - { - "epoch": 3.3720587954202266, - "grad_norm": 0.0010958026396110654, - "learning_rate": 0.00019999439133284747, - "loss": 46.0, - "step": 44104 - }, - { - "epoch": 3.3721352524036163, - "grad_norm": 0.0013367747887969017, - "learning_rate": 0.00019999439107845103, - "loss": 46.0, - "step": 44105 - }, - { - "epoch": 3.372211709387006, - "grad_norm": 0.0009631678694859147, - "learning_rate": 0.00019999439082404882, - "loss": 46.0, - "step": 44106 - }, - { - "epoch": 3.372288166370396, - "grad_norm": 0.0023285860661417246, - "learning_rate": 0.00019999439056964083, - "loss": 46.0, - "step": 44107 - }, - { - "epoch": 3.3723646233537856, - "grad_norm": 0.0023017036728560925, - "learning_rate": 0.00019999439031522708, - "loss": 46.0, - "step": 44108 - }, - { - "epoch": 3.3724410803371754, - "grad_norm": 0.004402284976094961, - "learning_rate": 0.00019999439006080757, - "loss": 46.0, - "step": 44109 - }, - { - "epoch": 3.372517537320565, - "grad_norm": 0.0014698015293106437, - "learning_rate": 0.0001999943898063823, - "loss": 46.0, - "step": 44110 - }, - { - "epoch": 3.372593994303955, - "grad_norm": 0.001998212421312928, - "learning_rate": 0.00019999438955195124, - "loss": 46.0, - "step": 44111 - }, - { - "epoch": 3.3726704512873447, - "grad_norm": 0.00285261869430542, - "learning_rate": 0.00019999438929751441, - "loss": 46.0, - "step": 44112 - }, - { - "epoch": 3.3727469082707344, - "grad_norm": 0.002480628900229931, - "learning_rate": 0.00019999438904307182, - "loss": 46.0, - "step": 44113 - }, - { - "epoch": 3.3728233652541237, - "grad_norm": 0.002849624725058675, - "learning_rate": 0.00019999438878862344, - "loss": 46.0, - "step": 44114 - }, - { - "epoch": 3.3728998222375135, - "grad_norm": 0.0015133980195969343, - "learning_rate": 0.00019999438853416935, - "loss": 46.0, - "step": 44115 - }, - { - "epoch": 3.3729762792209033, - "grad_norm": 0.0020540626719594, - "learning_rate": 0.00019999438827970943, - "loss": 46.0, - "step": 44116 - }, - { - "epoch": 3.373052736204293, - "grad_norm": 0.0014220286393538117, - "learning_rate": 0.00019999438802524377, - "loss": 46.0, - "step": 44117 - }, - { - "epoch": 3.373129193187683, - "grad_norm": 0.0015165775548666716, - "learning_rate": 0.0001999943877707723, - "loss": 46.0, - "step": 44118 - }, - { - "epoch": 3.3732056501710725, - "grad_norm": 0.0026727435179054737, - "learning_rate": 0.00019999438751629512, - "loss": 46.0, - "step": 44119 - }, - { - "epoch": 3.3732821071544623, - "grad_norm": 0.004713153466582298, - "learning_rate": 0.00019999438726181213, - "loss": 46.0, - "step": 44120 - }, - { - "epoch": 3.373358564137852, - "grad_norm": 0.001825318788178265, - "learning_rate": 0.0001999943870073234, - "loss": 46.0, - "step": 44121 - }, - { - "epoch": 3.373435021121242, - "grad_norm": 0.002042043022811413, - "learning_rate": 0.00019999438675282887, - "loss": 46.0, - "step": 44122 - }, - { - "epoch": 3.373511478104631, - "grad_norm": 0.0016553821042180061, - "learning_rate": 0.0001999943864983286, - "loss": 46.0, - "step": 44123 - }, - { - "epoch": 3.373587935088021, - "grad_norm": 0.0037303168792277575, - "learning_rate": 0.00019999438624382254, - "loss": 46.0, - "step": 44124 - }, - { - "epoch": 3.3736643920714107, - "grad_norm": 0.0015781475231051445, - "learning_rate": 0.00019999438598931071, - "loss": 46.0, - "step": 44125 - }, - { - "epoch": 3.3737408490548004, - "grad_norm": 0.005999335087835789, - "learning_rate": 0.00019999438573479312, - "loss": 46.0, - "step": 44126 - }, - { - "epoch": 3.37381730603819, - "grad_norm": 0.0014905557036399841, - "learning_rate": 0.00019999438548026977, - "loss": 46.0, - "step": 44127 - }, - { - "epoch": 3.37389376302158, - "grad_norm": 0.00216510146856308, - "learning_rate": 0.0001999943852257406, - "loss": 46.0, - "step": 44128 - }, - { - "epoch": 3.3739702200049697, - "grad_norm": 0.00393811846151948, - "learning_rate": 0.00019999438497120573, - "loss": 46.0, - "step": 44129 - }, - { - "epoch": 3.3740466769883595, - "grad_norm": 0.0035043267998844385, - "learning_rate": 0.00019999438471666507, - "loss": 46.0, - "step": 44130 - }, - { - "epoch": 3.3741231339717492, - "grad_norm": 0.0011496666120365262, - "learning_rate": 0.00019999438446211863, - "loss": 46.0, - "step": 44131 - }, - { - "epoch": 3.374199590955139, - "grad_norm": 0.0009796533267945051, - "learning_rate": 0.0001999943842075664, - "loss": 46.0, - "step": 44132 - }, - { - "epoch": 3.3742760479385288, - "grad_norm": 0.0016995292389765382, - "learning_rate": 0.00019999438395300844, - "loss": 46.0, - "step": 44133 - }, - { - "epoch": 3.3743525049219185, - "grad_norm": 0.000945060106460005, - "learning_rate": 0.00019999438369844468, - "loss": 46.0, - "step": 44134 - }, - { - "epoch": 3.374428961905308, - "grad_norm": 0.0027595667634159327, - "learning_rate": 0.00019999438344387517, - "loss": 46.0, - "step": 44135 - }, - { - "epoch": 3.3745054188886976, - "grad_norm": 0.007582082878798246, - "learning_rate": 0.0001999943831892999, - "loss": 46.0, - "step": 44136 - }, - { - "epoch": 3.3745818758720874, - "grad_norm": 0.0011815408943220973, - "learning_rate": 0.00019999438293471884, - "loss": 46.0, - "step": 44137 - }, - { - "epoch": 3.374658332855477, - "grad_norm": 0.005109219346195459, - "learning_rate": 0.00019999438268013202, - "loss": 46.0, - "step": 44138 - }, - { - "epoch": 3.374734789838867, - "grad_norm": 0.002627226524055004, - "learning_rate": 0.00019999438242553942, - "loss": 46.0, - "step": 44139 - }, - { - "epoch": 3.3748112468222566, - "grad_norm": 0.0033121739979833364, - "learning_rate": 0.00019999438217094108, - "loss": 46.0, - "step": 44140 - }, - { - "epoch": 3.3748877038056464, - "grad_norm": 0.0021402142010629177, - "learning_rate": 0.00019999438191633693, - "loss": 46.0, - "step": 44141 - }, - { - "epoch": 3.374964160789036, - "grad_norm": 0.001608529593795538, - "learning_rate": 0.00019999438166172704, - "loss": 46.0, - "step": 44142 - }, - { - "epoch": 3.375040617772426, - "grad_norm": 0.0064996955916285515, - "learning_rate": 0.00019999438140711138, - "loss": 46.0, - "step": 44143 - }, - { - "epoch": 3.3751170747558157, - "grad_norm": 0.0021500091534107924, - "learning_rate": 0.00019999438115248994, - "loss": 46.0, - "step": 44144 - }, - { - "epoch": 3.375193531739205, - "grad_norm": 0.0010710778879001737, - "learning_rate": 0.00019999438089786273, - "loss": 46.0, - "step": 44145 - }, - { - "epoch": 3.3752699887225948, - "grad_norm": 0.0018483932362869382, - "learning_rate": 0.00019999438064322974, - "loss": 46.0, - "step": 44146 - }, - { - "epoch": 3.3753464457059845, - "grad_norm": 0.004057146143168211, - "learning_rate": 0.000199994380388591, - "loss": 46.0, - "step": 44147 - }, - { - "epoch": 3.3754229026893743, - "grad_norm": 0.002004601527005434, - "learning_rate": 0.0001999943801339465, - "loss": 46.0, - "step": 44148 - }, - { - "epoch": 3.375499359672764, - "grad_norm": 0.0009705049451440573, - "learning_rate": 0.0001999943798792962, - "loss": 46.0, - "step": 44149 - }, - { - "epoch": 3.375575816656154, - "grad_norm": 0.0015370274195447564, - "learning_rate": 0.00019999437962464018, - "loss": 46.0, - "step": 44150 - }, - { - "epoch": 3.3756522736395436, - "grad_norm": 0.0008545694872736931, - "learning_rate": 0.00019999437936997833, - "loss": 46.0, - "step": 44151 - }, - { - "epoch": 3.3757287306229333, - "grad_norm": 0.002193163149058819, - "learning_rate": 0.00019999437911531076, - "loss": 46.0, - "step": 44152 - }, - { - "epoch": 3.375805187606323, - "grad_norm": 0.001179728889837861, - "learning_rate": 0.0001999943788606374, - "loss": 46.0, - "step": 44153 - }, - { - "epoch": 3.375881644589713, - "grad_norm": 0.000601144798565656, - "learning_rate": 0.0001999943786059583, - "loss": 46.0, - "step": 44154 - }, - { - "epoch": 3.3759581015731026, - "grad_norm": 0.0009339035023003817, - "learning_rate": 0.0001999943783512734, - "loss": 46.0, - "step": 44155 - }, - { - "epoch": 3.3760345585564924, - "grad_norm": 0.001779919839464128, - "learning_rate": 0.0001999943780965827, - "loss": 46.0, - "step": 44156 - }, - { - "epoch": 3.3761110155398817, - "grad_norm": 0.001878186478279531, - "learning_rate": 0.00019999437784188627, - "loss": 46.0, - "step": 44157 - }, - { - "epoch": 3.3761874725232714, - "grad_norm": 0.002082601422443986, - "learning_rate": 0.00019999437758718406, - "loss": 46.0, - "step": 44158 - }, - { - "epoch": 3.376263929506661, - "grad_norm": 0.0036716728936880827, - "learning_rate": 0.0001999943773324761, - "loss": 46.0, - "step": 44159 - }, - { - "epoch": 3.376340386490051, - "grad_norm": 0.0013276876416057348, - "learning_rate": 0.00019999437707776238, - "loss": 46.0, - "step": 44160 - }, - { - "epoch": 3.3764168434734407, - "grad_norm": 0.004441867116838694, - "learning_rate": 0.00019999437682304282, - "loss": 46.0, - "step": 44161 - }, - { - "epoch": 3.3764933004568305, - "grad_norm": 0.005524048116058111, - "learning_rate": 0.00019999437656831757, - "loss": 46.0, - "step": 44162 - }, - { - "epoch": 3.3765697574402203, - "grad_norm": 0.001608599559403956, - "learning_rate": 0.00019999437631358652, - "loss": 46.0, - "step": 44163 - }, - { - "epoch": 3.37664621442361, - "grad_norm": 0.001950397971086204, - "learning_rate": 0.0001999943760588497, - "loss": 46.0, - "step": 44164 - }, - { - "epoch": 3.3767226714069998, - "grad_norm": 0.002956976182758808, - "learning_rate": 0.0001999943758041071, - "loss": 46.0, - "step": 44165 - }, - { - "epoch": 3.3767991283903895, - "grad_norm": 0.0010052917059510946, - "learning_rate": 0.00019999437554935872, - "loss": 46.0, - "step": 44166 - }, - { - "epoch": 3.376875585373779, - "grad_norm": 0.000769517500884831, - "learning_rate": 0.0001999943752946046, - "loss": 46.0, - "step": 44167 - }, - { - "epoch": 3.3769520423571686, - "grad_norm": 0.002629816997796297, - "learning_rate": 0.00019999437503984472, - "loss": 46.0, - "step": 44168 - }, - { - "epoch": 3.3770284993405584, - "grad_norm": 0.003704741830006242, - "learning_rate": 0.00019999437478507908, - "loss": 46.0, - "step": 44169 - }, - { - "epoch": 3.377104956323948, - "grad_norm": 0.0032428184058517218, - "learning_rate": 0.00019999437453030762, - "loss": 46.0, - "step": 44170 - }, - { - "epoch": 3.377181413307338, - "grad_norm": 0.0021908662747591734, - "learning_rate": 0.00019999437427553043, - "loss": 46.0, - "step": 44171 - }, - { - "epoch": 3.3772578702907277, - "grad_norm": 0.001503440085798502, - "learning_rate": 0.00019999437402074745, - "loss": 46.0, - "step": 44172 - }, - { - "epoch": 3.3773343272741174, - "grad_norm": 0.004630373325198889, - "learning_rate": 0.00019999437376595872, - "loss": 46.0, - "step": 44173 - }, - { - "epoch": 3.377410784257507, - "grad_norm": 0.0032096903305500746, - "learning_rate": 0.0001999943735111642, - "loss": 46.0, - "step": 44174 - }, - { - "epoch": 3.377487241240897, - "grad_norm": 0.0016666464507579803, - "learning_rate": 0.0001999943732563639, - "loss": 46.0, - "step": 44175 - }, - { - "epoch": 3.3775636982242867, - "grad_norm": 0.0028358374256640673, - "learning_rate": 0.0001999943730015579, - "loss": 46.0, - "step": 44176 - }, - { - "epoch": 3.3776401552076765, - "grad_norm": 0.005152968689799309, - "learning_rate": 0.00019999437274674606, - "loss": 46.0, - "step": 44177 - }, - { - "epoch": 3.3777166121910662, - "grad_norm": 0.0027974932454526424, - "learning_rate": 0.00019999437249192847, - "loss": 46.0, - "step": 44178 - }, - { - "epoch": 3.3777930691744555, - "grad_norm": 0.003510811598971486, - "learning_rate": 0.00019999437223710512, - "loss": 46.0, - "step": 44179 - }, - { - "epoch": 3.3778695261578453, - "grad_norm": 0.006456709932535887, - "learning_rate": 0.00019999437198227598, - "loss": 46.0, - "step": 44180 - }, - { - "epoch": 3.377945983141235, - "grad_norm": 0.003602234646677971, - "learning_rate": 0.00019999437172744112, - "loss": 46.0, - "step": 44181 - }, - { - "epoch": 3.378022440124625, - "grad_norm": 0.0005513750948011875, - "learning_rate": 0.00019999437147260042, - "loss": 46.0, - "step": 44182 - }, - { - "epoch": 3.3780988971080146, - "grad_norm": 0.003053493332117796, - "learning_rate": 0.000199994371217754, - "loss": 46.0, - "step": 44183 - }, - { - "epoch": 3.3781753540914043, - "grad_norm": 0.0012695646146312356, - "learning_rate": 0.00019999437096290178, - "loss": 46.0, - "step": 44184 - }, - { - "epoch": 3.378251811074794, - "grad_norm": 0.0021072239615023136, - "learning_rate": 0.00019999437070804382, - "loss": 46.0, - "step": 44185 - }, - { - "epoch": 3.378328268058184, - "grad_norm": 0.002927567111328244, - "learning_rate": 0.0001999943704531801, - "loss": 46.0, - "step": 44186 - }, - { - "epoch": 3.3784047250415736, - "grad_norm": 0.0012319096131250262, - "learning_rate": 0.0001999943701983106, - "loss": 46.0, - "step": 44187 - }, - { - "epoch": 3.3784811820249634, - "grad_norm": 0.0037730620242655277, - "learning_rate": 0.0001999943699434353, - "loss": 46.0, - "step": 44188 - }, - { - "epoch": 3.3785576390083527, - "grad_norm": 0.0012067090719938278, - "learning_rate": 0.00019999436968855426, - "loss": 46.0, - "step": 44189 - }, - { - "epoch": 3.3786340959917425, - "grad_norm": 0.004190836101770401, - "learning_rate": 0.00019999436943366744, - "loss": 46.0, - "step": 44190 - }, - { - "epoch": 3.3787105529751322, - "grad_norm": 0.0013076069299131632, - "learning_rate": 0.00019999436917877484, - "loss": 46.0, - "step": 44191 - }, - { - "epoch": 3.378787009958522, - "grad_norm": 0.0023885949049144983, - "learning_rate": 0.0001999943689238765, - "loss": 46.0, - "step": 44192 - }, - { - "epoch": 3.3788634669419118, - "grad_norm": 0.0016930722631514072, - "learning_rate": 0.00019999436866897238, - "loss": 46.0, - "step": 44193 - }, - { - "epoch": 3.3789399239253015, - "grad_norm": 0.0024841127451509237, - "learning_rate": 0.00019999436841406246, - "loss": 46.0, - "step": 44194 - }, - { - "epoch": 3.3790163809086913, - "grad_norm": 0.003515142248943448, - "learning_rate": 0.00019999436815914683, - "loss": 46.0, - "step": 44195 - }, - { - "epoch": 3.379092837892081, - "grad_norm": 0.0025204226840287447, - "learning_rate": 0.0001999943679042254, - "loss": 46.0, - "step": 44196 - }, - { - "epoch": 3.379169294875471, - "grad_norm": 0.0032615887466818094, - "learning_rate": 0.00019999436764929818, - "loss": 46.0, - "step": 44197 - }, - { - "epoch": 3.3792457518588606, - "grad_norm": 0.0013024966465309262, - "learning_rate": 0.00019999436739436523, - "loss": 46.0, - "step": 44198 - }, - { - "epoch": 3.3793222088422503, - "grad_norm": 0.0023050878662616014, - "learning_rate": 0.00019999436713942647, - "loss": 46.0, - "step": 44199 - }, - { - "epoch": 3.37939866582564, - "grad_norm": 0.004119938239455223, - "learning_rate": 0.00019999436688448197, - "loss": 46.0, - "step": 44200 - }, - { - "epoch": 3.3794751228090294, - "grad_norm": 0.001688433694653213, - "learning_rate": 0.0001999943666295317, - "loss": 46.0, - "step": 44201 - }, - { - "epoch": 3.379551579792419, - "grad_norm": 0.0015831138007342815, - "learning_rate": 0.00019999436637457564, - "loss": 46.0, - "step": 44202 - }, - { - "epoch": 3.379628036775809, - "grad_norm": 0.0009583765640854836, - "learning_rate": 0.00019999436611961384, - "loss": 46.0, - "step": 44203 - }, - { - "epoch": 3.3797044937591987, - "grad_norm": 0.002457007300108671, - "learning_rate": 0.00019999436586464625, - "loss": 46.0, - "step": 44204 - }, - { - "epoch": 3.3797809507425884, - "grad_norm": 0.0030656771268695593, - "learning_rate": 0.0001999943656096729, - "loss": 46.0, - "step": 44205 - }, - { - "epoch": 3.379857407725978, - "grad_norm": 0.005699296947568655, - "learning_rate": 0.00019999436535469376, - "loss": 46.0, - "step": 44206 - }, - { - "epoch": 3.379933864709368, - "grad_norm": 0.001191108487546444, - "learning_rate": 0.00019999436509970887, - "loss": 46.0, - "step": 44207 - }, - { - "epoch": 3.3800103216927577, - "grad_norm": 0.002215739106759429, - "learning_rate": 0.00019999436484471824, - "loss": 46.0, - "step": 44208 - }, - { - "epoch": 3.3800867786761475, - "grad_norm": 0.0012486905325204134, - "learning_rate": 0.0001999943645897218, - "loss": 46.0, - "step": 44209 - }, - { - "epoch": 3.3801632356595372, - "grad_norm": 0.0028072006534785032, - "learning_rate": 0.00019999436433471962, - "loss": 46.0, - "step": 44210 - }, - { - "epoch": 3.3802396926429266, - "grad_norm": 0.0021883114241063595, - "learning_rate": 0.00019999436407971163, - "loss": 46.0, - "step": 44211 - }, - { - "epoch": 3.3803161496263163, - "grad_norm": 0.002390835201367736, - "learning_rate": 0.0001999943638246979, - "loss": 46.0, - "step": 44212 - }, - { - "epoch": 3.380392606609706, - "grad_norm": 0.0011012639151886106, - "learning_rate": 0.00019999436356967837, - "loss": 46.0, - "step": 44213 - }, - { - "epoch": 3.380469063593096, - "grad_norm": 0.002598078455775976, - "learning_rate": 0.0001999943633146531, - "loss": 46.0, - "step": 44214 - }, - { - "epoch": 3.3805455205764856, - "grad_norm": 0.0021298665087670088, - "learning_rate": 0.00019999436305962208, - "loss": 46.0, - "step": 44215 - }, - { - "epoch": 3.3806219775598754, - "grad_norm": 0.002586901420727372, - "learning_rate": 0.00019999436280458523, - "loss": 46.0, - "step": 44216 - }, - { - "epoch": 3.380698434543265, - "grad_norm": 0.0023862814996391535, - "learning_rate": 0.00019999436254954266, - "loss": 46.0, - "step": 44217 - }, - { - "epoch": 3.380774891526655, - "grad_norm": 0.0014274966670200229, - "learning_rate": 0.00019999436229449431, - "loss": 46.0, - "step": 44218 - }, - { - "epoch": 3.3808513485100447, - "grad_norm": 0.002062779851257801, - "learning_rate": 0.00019999436203944017, - "loss": 46.0, - "step": 44219 - }, - { - "epoch": 3.3809278054934344, - "grad_norm": 0.0011792710283771157, - "learning_rate": 0.00019999436178438028, - "loss": 46.0, - "step": 44220 - }, - { - "epoch": 3.381004262476824, - "grad_norm": 0.00436359690502286, - "learning_rate": 0.00019999436152931465, - "loss": 46.0, - "step": 44221 - }, - { - "epoch": 3.381080719460214, - "grad_norm": 0.006435814779251814, - "learning_rate": 0.0001999943612742432, - "loss": 46.0, - "step": 44222 - }, - { - "epoch": 3.3811571764436033, - "grad_norm": 0.002429185900837183, - "learning_rate": 0.00019999436101916603, - "loss": 46.0, - "step": 44223 - }, - { - "epoch": 3.381233633426993, - "grad_norm": 0.0011744301300495863, - "learning_rate": 0.00019999436076408305, - "loss": 46.0, - "step": 44224 - }, - { - "epoch": 3.3813100904103828, - "grad_norm": 0.006143974140286446, - "learning_rate": 0.00019999436050899432, - "loss": 46.0, - "step": 44225 - }, - { - "epoch": 3.3813865473937725, - "grad_norm": 0.002569023985415697, - "learning_rate": 0.0001999943602538998, - "loss": 46.0, - "step": 44226 - }, - { - "epoch": 3.3814630043771623, - "grad_norm": 0.003772683907300234, - "learning_rate": 0.00019999435999879954, - "loss": 46.0, - "step": 44227 - }, - { - "epoch": 3.381539461360552, - "grad_norm": 0.0010371258249506354, - "learning_rate": 0.0001999943597436935, - "loss": 46.0, - "step": 44228 - }, - { - "epoch": 3.381615918343942, - "grad_norm": 0.002710413420572877, - "learning_rate": 0.00019999435948858167, - "loss": 46.0, - "step": 44229 - }, - { - "epoch": 3.3816923753273316, - "grad_norm": 0.0031064720824360847, - "learning_rate": 0.0001999943592334641, - "loss": 46.0, - "step": 44230 - }, - { - "epoch": 3.3817688323107213, - "grad_norm": 0.0036101602017879486, - "learning_rate": 0.00019999435897834076, - "loss": 46.0, - "step": 44231 - }, - { - "epoch": 3.3818452892941107, - "grad_norm": 0.0022557475604116917, - "learning_rate": 0.00019999435872321164, - "loss": 46.0, - "step": 44232 - }, - { - "epoch": 3.3819217462775004, - "grad_norm": 0.0021435606759041548, - "learning_rate": 0.00019999435846807675, - "loss": 46.0, - "step": 44233 - }, - { - "epoch": 3.38199820326089, - "grad_norm": 0.00417352793738246, - "learning_rate": 0.0001999943582129361, - "loss": 46.0, - "step": 44234 - }, - { - "epoch": 3.38207466024428, - "grad_norm": 0.00229679886251688, - "learning_rate": 0.00019999435795778965, - "loss": 46.0, - "step": 44235 - }, - { - "epoch": 3.3821511172276697, - "grad_norm": 0.0024019451811909676, - "learning_rate": 0.00019999435770263744, - "loss": 46.0, - "step": 44236 - }, - { - "epoch": 3.3822275742110595, - "grad_norm": 0.004723278805613518, - "learning_rate": 0.0001999943574474795, - "loss": 46.0, - "step": 44237 - }, - { - "epoch": 3.3823040311944492, - "grad_norm": 0.0025893684942275286, - "learning_rate": 0.00019999435719231573, - "loss": 46.0, - "step": 44238 - }, - { - "epoch": 3.382380488177839, - "grad_norm": 0.0018986817449331284, - "learning_rate": 0.00019999435693714626, - "loss": 46.0, - "step": 44239 - }, - { - "epoch": 3.3824569451612287, - "grad_norm": 0.0012239236384630203, - "learning_rate": 0.00019999435668197098, - "loss": 46.0, - "step": 44240 - }, - { - "epoch": 3.3825334021446185, - "grad_norm": 0.005004290025681257, - "learning_rate": 0.00019999435642678993, - "loss": 46.0, - "step": 44241 - }, - { - "epoch": 3.3826098591280083, - "grad_norm": 0.0021270280703902245, - "learning_rate": 0.0001999943561716031, - "loss": 46.0, - "step": 44242 - }, - { - "epoch": 3.382686316111398, - "grad_norm": 0.004737840034067631, - "learning_rate": 0.00019999435591641054, - "loss": 46.0, - "step": 44243 - }, - { - "epoch": 3.382762773094788, - "grad_norm": 0.001975037157535553, - "learning_rate": 0.00019999435566121217, - "loss": 46.0, - "step": 44244 - }, - { - "epoch": 3.382839230078177, - "grad_norm": 0.0011048391461372375, - "learning_rate": 0.0001999943554060081, - "loss": 46.0, - "step": 44245 - }, - { - "epoch": 3.382915687061567, - "grad_norm": 0.003061659401282668, - "learning_rate": 0.0001999943551507982, - "loss": 46.0, - "step": 44246 - }, - { - "epoch": 3.3829921440449566, - "grad_norm": 0.0010180168319493532, - "learning_rate": 0.00019999435489558254, - "loss": 46.0, - "step": 44247 - }, - { - "epoch": 3.3830686010283464, - "grad_norm": 0.006625974550843239, - "learning_rate": 0.0001999943546403611, - "loss": 46.0, - "step": 44248 - }, - { - "epoch": 3.383145058011736, - "grad_norm": 0.0020825297106057405, - "learning_rate": 0.00019999435438513392, - "loss": 46.0, - "step": 44249 - }, - { - "epoch": 3.383221514995126, - "grad_norm": 0.0030929071363061666, - "learning_rate": 0.00019999435412990096, - "loss": 46.0, - "step": 44250 - }, - { - "epoch": 3.3832979719785157, - "grad_norm": 0.0035906864795833826, - "learning_rate": 0.0001999943538746622, - "loss": 46.0, - "step": 44251 - }, - { - "epoch": 3.3833744289619054, - "grad_norm": 0.009797638282179832, - "learning_rate": 0.0001999943536194177, - "loss": 46.0, - "step": 44252 - }, - { - "epoch": 3.383450885945295, - "grad_norm": 0.0024939454160630703, - "learning_rate": 0.00019999435336416746, - "loss": 46.0, - "step": 44253 - }, - { - "epoch": 3.3835273429286845, - "grad_norm": 0.0010261759161949158, - "learning_rate": 0.0001999943531089114, - "loss": 46.0, - "step": 44254 - }, - { - "epoch": 3.3836037999120743, - "grad_norm": 0.0013130229199305177, - "learning_rate": 0.00019999435285364962, - "loss": 46.0, - "step": 44255 - }, - { - "epoch": 3.383680256895464, - "grad_norm": 0.003383307484909892, - "learning_rate": 0.00019999435259838202, - "loss": 46.0, - "step": 44256 - }, - { - "epoch": 3.383756713878854, - "grad_norm": 0.0030635856091976166, - "learning_rate": 0.00019999435234310865, - "loss": 46.0, - "step": 44257 - }, - { - "epoch": 3.3838331708622436, - "grad_norm": 0.0037393427919596434, - "learning_rate": 0.00019999435208782956, - "loss": 46.0, - "step": 44258 - }, - { - "epoch": 3.3839096278456333, - "grad_norm": 0.0010026482632383704, - "learning_rate": 0.00019999435183254468, - "loss": 46.0, - "step": 44259 - }, - { - "epoch": 3.383986084829023, - "grad_norm": 0.0016988830175250769, - "learning_rate": 0.000199994351577254, - "loss": 46.0, - "step": 44260 - }, - { - "epoch": 3.384062541812413, - "grad_norm": 0.0005278535536490381, - "learning_rate": 0.00019999435132195758, - "loss": 46.0, - "step": 44261 - }, - { - "epoch": 3.3841389987958026, - "grad_norm": 0.0025562585797160864, - "learning_rate": 0.00019999435106665537, - "loss": 46.0, - "step": 44262 - }, - { - "epoch": 3.3842154557791924, - "grad_norm": 0.0008684746571816504, - "learning_rate": 0.00019999435081134744, - "loss": 46.0, - "step": 44263 - }, - { - "epoch": 3.384291912762582, - "grad_norm": 0.0017802761867642403, - "learning_rate": 0.0001999943505560337, - "loss": 46.0, - "step": 44264 - }, - { - "epoch": 3.384368369745972, - "grad_norm": 0.00244636251591146, - "learning_rate": 0.00019999435030071419, - "loss": 46.0, - "step": 44265 - }, - { - "epoch": 3.384444826729361, - "grad_norm": 0.0008984652813524008, - "learning_rate": 0.0001999943500453889, - "loss": 46.0, - "step": 44266 - }, - { - "epoch": 3.384521283712751, - "grad_norm": 0.0004849621909670532, - "learning_rate": 0.0001999943497900579, - "loss": 46.0, - "step": 44267 - }, - { - "epoch": 3.3845977406961407, - "grad_norm": 0.002147234743461013, - "learning_rate": 0.00019999434953472107, - "loss": 46.0, - "step": 44268 - }, - { - "epoch": 3.3846741976795305, - "grad_norm": 0.001699982094578445, - "learning_rate": 0.0001999943492793785, - "loss": 46.0, - "step": 44269 - }, - { - "epoch": 3.3847506546629202, - "grad_norm": 0.0008087189635261893, - "learning_rate": 0.00019999434902403013, - "loss": 46.0, - "step": 44270 - }, - { - "epoch": 3.38482711164631, - "grad_norm": 0.00853345263749361, - "learning_rate": 0.00019999434876867605, - "loss": 46.0, - "step": 44271 - }, - { - "epoch": 3.3849035686296998, - "grad_norm": 0.001939923269674182, - "learning_rate": 0.00019999434851331616, - "loss": 46.0, - "step": 44272 - }, - { - "epoch": 3.3849800256130895, - "grad_norm": 0.003089835634455085, - "learning_rate": 0.0001999943482579505, - "loss": 46.0, - "step": 44273 - }, - { - "epoch": 3.3850564825964793, - "grad_norm": 0.001009583007544279, - "learning_rate": 0.0001999943480025791, - "loss": 46.0, - "step": 44274 - }, - { - "epoch": 3.385132939579869, - "grad_norm": 0.0041471137665212154, - "learning_rate": 0.00019999434774720188, - "loss": 46.0, - "step": 44275 - }, - { - "epoch": 3.3852093965632584, - "grad_norm": 0.0013122382806614041, - "learning_rate": 0.00019999434749181893, - "loss": 46.0, - "step": 44276 - }, - { - "epoch": 3.385285853546648, - "grad_norm": 0.0060357991605997086, - "learning_rate": 0.0001999943472364302, - "loss": 46.0, - "step": 44277 - }, - { - "epoch": 3.385362310530038, - "grad_norm": 0.003791297785937786, - "learning_rate": 0.0001999943469810357, - "loss": 46.0, - "step": 44278 - }, - { - "epoch": 3.3854387675134276, - "grad_norm": 0.00183978455606848, - "learning_rate": 0.00019999434672563542, - "loss": 46.0, - "step": 44279 - }, - { - "epoch": 3.3855152244968174, - "grad_norm": 0.0014265599893406034, - "learning_rate": 0.00019999434647022937, - "loss": 46.0, - "step": 44280 - }, - { - "epoch": 3.385591681480207, - "grad_norm": 0.0011291619157418609, - "learning_rate": 0.00019999434621481758, - "loss": 46.0, - "step": 44281 - }, - { - "epoch": 3.385668138463597, - "grad_norm": 0.0006563774077221751, - "learning_rate": 0.00019999434595939999, - "loss": 46.0, - "step": 44282 - }, - { - "epoch": 3.3857445954469867, - "grad_norm": 0.004237682092934847, - "learning_rate": 0.00019999434570397667, - "loss": 46.0, - "step": 44283 - }, - { - "epoch": 3.3858210524303765, - "grad_norm": 0.003301443299278617, - "learning_rate": 0.00019999434544854753, - "loss": 46.0, - "step": 44284 - }, - { - "epoch": 3.385897509413766, - "grad_norm": 0.004162689205259085, - "learning_rate": 0.00019999434519311264, - "loss": 46.0, - "step": 44285 - }, - { - "epoch": 3.385973966397156, - "grad_norm": 0.0009109798120334744, - "learning_rate": 0.000199994344937672, - "loss": 46.0, - "step": 44286 - }, - { - "epoch": 3.3860504233805457, - "grad_norm": 0.0016060040798038244, - "learning_rate": 0.00019999434468222557, - "loss": 46.0, - "step": 44287 - }, - { - "epoch": 3.386126880363935, - "grad_norm": 0.007468229625374079, - "learning_rate": 0.0001999943444267734, - "loss": 46.0, - "step": 44288 - }, - { - "epoch": 3.386203337347325, - "grad_norm": 0.0007647254387848079, - "learning_rate": 0.0001999943441713154, - "loss": 46.0, - "step": 44289 - }, - { - "epoch": 3.3862797943307146, - "grad_norm": 0.005335691850632429, - "learning_rate": 0.0001999943439158517, - "loss": 46.0, - "step": 44290 - }, - { - "epoch": 3.3863562513141043, - "grad_norm": 0.0016116950428113341, - "learning_rate": 0.0001999943436603822, - "loss": 46.0, - "step": 44291 - }, - { - "epoch": 3.386432708297494, - "grad_norm": 0.002544571179896593, - "learning_rate": 0.00019999434340490694, - "loss": 46.0, - "step": 44292 - }, - { - "epoch": 3.386509165280884, - "grad_norm": 0.004215978551656008, - "learning_rate": 0.00019999434314942592, - "loss": 46.0, - "step": 44293 - }, - { - "epoch": 3.3865856222642736, - "grad_norm": 0.0010518316412344575, - "learning_rate": 0.00019999434289393907, - "loss": 46.0, - "step": 44294 - }, - { - "epoch": 3.3866620792476634, - "grad_norm": 0.0025761134456843138, - "learning_rate": 0.00019999434263844653, - "loss": 46.0, - "step": 44295 - }, - { - "epoch": 3.386738536231053, - "grad_norm": 0.0015401405980810523, - "learning_rate": 0.0001999943423829482, - "loss": 46.0, - "step": 44296 - }, - { - "epoch": 3.386814993214443, - "grad_norm": 0.001467181951738894, - "learning_rate": 0.00019999434212744405, - "loss": 46.0, - "step": 44297 - }, - { - "epoch": 3.386891450197832, - "grad_norm": 0.001316542155109346, - "learning_rate": 0.0001999943418719342, - "loss": 46.0, - "step": 44298 - }, - { - "epoch": 3.386967907181222, - "grad_norm": 0.002022126456722617, - "learning_rate": 0.00019999434161641853, - "loss": 46.0, - "step": 44299 - }, - { - "epoch": 3.3870443641646117, - "grad_norm": 0.002226028358563781, - "learning_rate": 0.00019999434136089712, - "loss": 46.0, - "step": 44300 - }, - { - "epoch": 3.3871208211480015, - "grad_norm": 0.0013663721038028598, - "learning_rate": 0.0001999943411053699, - "loss": 46.0, - "step": 44301 - }, - { - "epoch": 3.3871972781313913, - "grad_norm": 0.0016099871136248112, - "learning_rate": 0.00019999434084983696, - "loss": 46.0, - "step": 44302 - }, - { - "epoch": 3.387273735114781, - "grad_norm": 0.0021600699983537197, - "learning_rate": 0.00019999434059429826, - "loss": 46.0, - "step": 44303 - }, - { - "epoch": 3.387350192098171, - "grad_norm": 0.0010704408632591367, - "learning_rate": 0.00019999434033875373, - "loss": 46.0, - "step": 44304 - }, - { - "epoch": 3.3874266490815605, - "grad_norm": 0.0044770753011107445, - "learning_rate": 0.00019999434008320348, - "loss": 46.0, - "step": 44305 - }, - { - "epoch": 3.3875031060649503, - "grad_norm": 0.006041297223418951, - "learning_rate": 0.00019999433982764744, - "loss": 46.0, - "step": 44306 - }, - { - "epoch": 3.38757956304834, - "grad_norm": 0.0027178835589438677, - "learning_rate": 0.00019999433957208562, - "loss": 46.0, - "step": 44307 - }, - { - "epoch": 3.38765602003173, - "grad_norm": 0.007367952261120081, - "learning_rate": 0.00019999433931651805, - "loss": 46.0, - "step": 44308 - }, - { - "epoch": 3.3877324770151196, - "grad_norm": 0.003046803642064333, - "learning_rate": 0.0001999943390609447, - "loss": 46.0, - "step": 44309 - }, - { - "epoch": 3.387808933998509, - "grad_norm": 0.005679132416844368, - "learning_rate": 0.0001999943388053656, - "loss": 46.0, - "step": 44310 - }, - { - "epoch": 3.3878853909818987, - "grad_norm": 0.0029210590291768312, - "learning_rate": 0.0001999943385497807, - "loss": 46.0, - "step": 44311 - }, - { - "epoch": 3.3879618479652884, - "grad_norm": 0.000879273284226656, - "learning_rate": 0.00019999433829419005, - "loss": 46.0, - "step": 44312 - }, - { - "epoch": 3.388038304948678, - "grad_norm": 0.0011485553113743663, - "learning_rate": 0.00019999433803859367, - "loss": 46.0, - "step": 44313 - }, - { - "epoch": 3.388114761932068, - "grad_norm": 0.005128402728587389, - "learning_rate": 0.00019999433778299146, - "loss": 46.0, - "step": 44314 - }, - { - "epoch": 3.3881912189154577, - "grad_norm": 0.002910564886406064, - "learning_rate": 0.0001999943375273835, - "loss": 46.0, - "step": 44315 - }, - { - "epoch": 3.3882676758988475, - "grad_norm": 0.0020676194690167904, - "learning_rate": 0.00019999433727176978, - "loss": 46.0, - "step": 44316 - }, - { - "epoch": 3.3883441328822372, - "grad_norm": 0.002800045534968376, - "learning_rate": 0.00019999433701615028, - "loss": 46.0, - "step": 44317 - }, - { - "epoch": 3.388420589865627, - "grad_norm": 0.0016800720477476716, - "learning_rate": 0.000199994336760525, - "loss": 46.0, - "step": 44318 - }, - { - "epoch": 3.3884970468490168, - "grad_norm": 0.001002141973003745, - "learning_rate": 0.000199994336504894, - "loss": 46.0, - "step": 44319 - }, - { - "epoch": 3.388573503832406, - "grad_norm": 0.0021147807128727436, - "learning_rate": 0.0001999943362492572, - "loss": 46.0, - "step": 44320 - }, - { - "epoch": 3.388649960815796, - "grad_norm": 0.0007403853232972324, - "learning_rate": 0.0001999943359936146, - "loss": 46.0, - "step": 44321 - }, - { - "epoch": 3.3887264177991856, - "grad_norm": 0.0015886673936620355, - "learning_rate": 0.00019999433573796626, - "loss": 46.0, - "step": 44322 - }, - { - "epoch": 3.3888028747825754, - "grad_norm": 0.0018542190082371235, - "learning_rate": 0.00019999433548231217, - "loss": 46.0, - "step": 44323 - }, - { - "epoch": 3.388879331765965, - "grad_norm": 0.0017437656642869115, - "learning_rate": 0.0001999943352266523, - "loss": 46.0, - "step": 44324 - }, - { - "epoch": 3.388955788749355, - "grad_norm": 0.0026636819820851088, - "learning_rate": 0.00019999433497098663, - "loss": 46.0, - "step": 44325 - }, - { - "epoch": 3.3890322457327446, - "grad_norm": 0.0029814038425683975, - "learning_rate": 0.00019999433471531522, - "loss": 46.0, - "step": 44326 - }, - { - "epoch": 3.3891087027161344, - "grad_norm": 0.0010743628954514861, - "learning_rate": 0.00019999433445963801, - "loss": 46.0, - "step": 44327 - }, - { - "epoch": 3.389185159699524, - "grad_norm": 0.0014858456561341882, - "learning_rate": 0.00019999433420395506, - "loss": 46.0, - "step": 44328 - }, - { - "epoch": 3.389261616682914, - "grad_norm": 0.0015158449532464147, - "learning_rate": 0.00019999433394826633, - "loss": 46.0, - "step": 44329 - }, - { - "epoch": 3.3893380736663037, - "grad_norm": 0.0037937683518975973, - "learning_rate": 0.00019999433369257186, - "loss": 46.0, - "step": 44330 - }, - { - "epoch": 3.3894145306496934, - "grad_norm": 0.003278592135757208, - "learning_rate": 0.0001999943334368716, - "loss": 46.0, - "step": 44331 - }, - { - "epoch": 3.3894909876330828, - "grad_norm": 0.004604941699653864, - "learning_rate": 0.00019999433318116557, - "loss": 46.0, - "step": 44332 - }, - { - "epoch": 3.3895674446164725, - "grad_norm": 0.0012613818980753422, - "learning_rate": 0.00019999433292545375, - "loss": 46.0, - "step": 44333 - }, - { - "epoch": 3.3896439015998623, - "grad_norm": 0.0011180745204910636, - "learning_rate": 0.00019999433266973618, - "loss": 46.0, - "step": 44334 - }, - { - "epoch": 3.389720358583252, - "grad_norm": 0.003944214899092913, - "learning_rate": 0.00019999433241401284, - "loss": 46.0, - "step": 44335 - }, - { - "epoch": 3.389796815566642, - "grad_norm": 0.005096031352877617, - "learning_rate": 0.00019999433215828373, - "loss": 46.0, - "step": 44336 - }, - { - "epoch": 3.3898732725500316, - "grad_norm": 0.0037811503279954195, - "learning_rate": 0.00019999433190254884, - "loss": 46.0, - "step": 44337 - }, - { - "epoch": 3.3899497295334213, - "grad_norm": 0.0013853403506800532, - "learning_rate": 0.0001999943316468082, - "loss": 46.0, - "step": 44338 - }, - { - "epoch": 3.390026186516811, - "grad_norm": 0.0015070205554366112, - "learning_rate": 0.00019999433139106178, - "loss": 46.0, - "step": 44339 - }, - { - "epoch": 3.390102643500201, - "grad_norm": 0.003356938948854804, - "learning_rate": 0.0001999943311353096, - "loss": 46.0, - "step": 44340 - }, - { - "epoch": 3.3901791004835906, - "grad_norm": 0.0009637877228669822, - "learning_rate": 0.00019999433087955165, - "loss": 46.0, - "step": 44341 - }, - { - "epoch": 3.39025555746698, - "grad_norm": 0.0015435336390510201, - "learning_rate": 0.00019999433062378792, - "loss": 46.0, - "step": 44342 - }, - { - "epoch": 3.3903320144503697, - "grad_norm": 0.0023447140119969845, - "learning_rate": 0.00019999433036801842, - "loss": 46.0, - "step": 44343 - }, - { - "epoch": 3.3904084714337595, - "grad_norm": 0.001448609633371234, - "learning_rate": 0.00019999433011224315, - "loss": 46.0, - "step": 44344 - }, - { - "epoch": 3.390484928417149, - "grad_norm": 0.0017516553634777665, - "learning_rate": 0.00019999432985646213, - "loss": 46.0, - "step": 44345 - }, - { - "epoch": 3.390561385400539, - "grad_norm": 0.0009320615790784359, - "learning_rate": 0.00019999432960067534, - "loss": 46.0, - "step": 44346 - }, - { - "epoch": 3.3906378423839287, - "grad_norm": 0.0032951468601822853, - "learning_rate": 0.00019999432934488277, - "loss": 46.0, - "step": 44347 - }, - { - "epoch": 3.3907142993673185, - "grad_norm": 0.0011953997891396284, - "learning_rate": 0.00019999432908908443, - "loss": 46.0, - "step": 44348 - }, - { - "epoch": 3.3907907563507083, - "grad_norm": 0.0036905519664287567, - "learning_rate": 0.00019999432883328032, - "loss": 46.0, - "step": 44349 - }, - { - "epoch": 3.390867213334098, - "grad_norm": 0.0036113422829657793, - "learning_rate": 0.00019999432857747046, - "loss": 46.0, - "step": 44350 - }, - { - "epoch": 3.390943670317488, - "grad_norm": 0.0030079083517193794, - "learning_rate": 0.0001999943283216548, - "loss": 46.0, - "step": 44351 - }, - { - "epoch": 3.3910201273008775, - "grad_norm": 0.001848881016485393, - "learning_rate": 0.0001999943280658334, - "loss": 46.0, - "step": 44352 - }, - { - "epoch": 3.3910965842842673, - "grad_norm": 0.0025121138896793127, - "learning_rate": 0.0001999943278100062, - "loss": 46.0, - "step": 44353 - }, - { - "epoch": 3.3911730412676566, - "grad_norm": 0.004290048498660326, - "learning_rate": 0.00019999432755417323, - "loss": 46.0, - "step": 44354 - }, - { - "epoch": 3.3912494982510464, - "grad_norm": 0.004328650422394276, - "learning_rate": 0.00019999432729833454, - "loss": 46.0, - "step": 44355 - }, - { - "epoch": 3.391325955234436, - "grad_norm": 0.0031624711118638515, - "learning_rate": 0.00019999432704249004, - "loss": 46.0, - "step": 44356 - }, - { - "epoch": 3.391402412217826, - "grad_norm": 0.0014169290661811829, - "learning_rate": 0.00019999432678663976, - "loss": 46.0, - "step": 44357 - }, - { - "epoch": 3.3914788692012157, - "grad_norm": 0.0011730794794857502, - "learning_rate": 0.00019999432653078375, - "loss": 46.0, - "step": 44358 - }, - { - "epoch": 3.3915553261846054, - "grad_norm": 0.0013420027680695057, - "learning_rate": 0.00019999432627492193, - "loss": 46.0, - "step": 44359 - }, - { - "epoch": 3.391631783167995, - "grad_norm": 0.0013395623536780477, - "learning_rate": 0.0001999943260190544, - "loss": 46.0, - "step": 44360 - }, - { - "epoch": 3.391708240151385, - "grad_norm": 0.0031876848079264164, - "learning_rate": 0.00019999432576318102, - "loss": 46.0, - "step": 44361 - }, - { - "epoch": 3.3917846971347747, - "grad_norm": 0.002493435051292181, - "learning_rate": 0.00019999432550730194, - "loss": 46.0, - "step": 44362 - }, - { - "epoch": 3.391861154118164, - "grad_norm": 0.0011749910190701485, - "learning_rate": 0.00019999432525141705, - "loss": 46.0, - "step": 44363 - }, - { - "epoch": 3.391937611101554, - "grad_norm": 0.0023854251485317945, - "learning_rate": 0.00019999432499552642, - "loss": 46.0, - "step": 44364 - }, - { - "epoch": 3.3920140680849435, - "grad_norm": 0.0014242819743230939, - "learning_rate": 0.00019999432473963, - "loss": 46.0, - "step": 44365 - }, - { - "epoch": 3.3920905250683333, - "grad_norm": 0.004025659523904324, - "learning_rate": 0.0001999943244837278, - "loss": 46.0, - "step": 44366 - }, - { - "epoch": 3.392166982051723, - "grad_norm": 0.003747211769223213, - "learning_rate": 0.00019999432422781985, - "loss": 46.0, - "step": 44367 - }, - { - "epoch": 3.392243439035113, - "grad_norm": 0.003146359696984291, - "learning_rate": 0.00019999432397190613, - "loss": 46.0, - "step": 44368 - }, - { - "epoch": 3.3923198960185026, - "grad_norm": 0.0016721318243071437, - "learning_rate": 0.00019999432371598666, - "loss": 46.0, - "step": 44369 - }, - { - "epoch": 3.3923963530018924, - "grad_norm": 0.0021338535007089376, - "learning_rate": 0.00019999432346006138, - "loss": 46.0, - "step": 44370 - }, - { - "epoch": 3.392472809985282, - "grad_norm": 0.002418762305751443, - "learning_rate": 0.00019999432320413037, - "loss": 46.0, - "step": 44371 - }, - { - "epoch": 3.392549266968672, - "grad_norm": 0.0012527513317763805, - "learning_rate": 0.00019999432294819357, - "loss": 46.0, - "step": 44372 - }, - { - "epoch": 3.3926257239520616, - "grad_norm": 0.0023592961952090263, - "learning_rate": 0.000199994322692251, - "loss": 46.0, - "step": 44373 - }, - { - "epoch": 3.3927021809354514, - "grad_norm": 0.003370742779225111, - "learning_rate": 0.00019999432243630264, - "loss": 46.0, - "step": 44374 - }, - { - "epoch": 3.392778637918841, - "grad_norm": 0.002068512374535203, - "learning_rate": 0.00019999432218034856, - "loss": 46.0, - "step": 44375 - }, - { - "epoch": 3.3928550949022305, - "grad_norm": 0.002953542862087488, - "learning_rate": 0.00019999432192438867, - "loss": 46.0, - "step": 44376 - }, - { - "epoch": 3.3929315518856202, - "grad_norm": 0.0018068733625113964, - "learning_rate": 0.00019999432166842304, - "loss": 46.0, - "step": 44377 - }, - { - "epoch": 3.39300800886901, - "grad_norm": 0.002879250328987837, - "learning_rate": 0.00019999432141245164, - "loss": 46.0, - "step": 44378 - }, - { - "epoch": 3.3930844658523998, - "grad_norm": 0.0008370761061087251, - "learning_rate": 0.00019999432115647446, - "loss": 46.0, - "step": 44379 - }, - { - "epoch": 3.3931609228357895, - "grad_norm": 0.0015681401127949357, - "learning_rate": 0.00019999432090049148, - "loss": 46.0, - "step": 44380 - }, - { - "epoch": 3.3932373798191793, - "grad_norm": 0.001608354039490223, - "learning_rate": 0.00019999432064450278, - "loss": 46.0, - "step": 44381 - }, - { - "epoch": 3.393313836802569, - "grad_norm": 0.0021239486522972584, - "learning_rate": 0.00019999432038850828, - "loss": 46.0, - "step": 44382 - }, - { - "epoch": 3.393390293785959, - "grad_norm": 0.0019906209781765938, - "learning_rate": 0.00019999432013250804, - "loss": 46.0, - "step": 44383 - }, - { - "epoch": 3.3934667507693486, - "grad_norm": 0.0009511591633781791, - "learning_rate": 0.00019999431987650202, - "loss": 46.0, - "step": 44384 - }, - { - "epoch": 3.393543207752738, - "grad_norm": 0.0013309771893545985, - "learning_rate": 0.0001999943196204902, - "loss": 46.0, - "step": 44385 - }, - { - "epoch": 3.3936196647361276, - "grad_norm": 0.0034242768306285143, - "learning_rate": 0.00019999431936447264, - "loss": 46.0, - "step": 44386 - }, - { - "epoch": 3.3936961217195174, - "grad_norm": 0.0021421003621071577, - "learning_rate": 0.0001999943191084493, - "loss": 46.0, - "step": 44387 - }, - { - "epoch": 3.393772578702907, - "grad_norm": 0.0021581468172371387, - "learning_rate": 0.0001999943188524202, - "loss": 46.0, - "step": 44388 - }, - { - "epoch": 3.393849035686297, - "grad_norm": 0.0010917327599599957, - "learning_rate": 0.00019999431859638533, - "loss": 46.0, - "step": 44389 - }, - { - "epoch": 3.3939254926696867, - "grad_norm": 0.0006710791494697332, - "learning_rate": 0.00019999431834034467, - "loss": 46.0, - "step": 44390 - }, - { - "epoch": 3.3940019496530764, - "grad_norm": 0.0017409648280590773, - "learning_rate": 0.00019999431808429826, - "loss": 46.0, - "step": 44391 - }, - { - "epoch": 3.394078406636466, - "grad_norm": 0.0023581183049827814, - "learning_rate": 0.00019999431782824609, - "loss": 46.0, - "step": 44392 - }, - { - "epoch": 3.394154863619856, - "grad_norm": 0.003903318429365754, - "learning_rate": 0.00019999431757218816, - "loss": 46.0, - "step": 44393 - }, - { - "epoch": 3.3942313206032457, - "grad_norm": 0.0016468054382130504, - "learning_rate": 0.0001999943173161244, - "loss": 46.0, - "step": 44394 - }, - { - "epoch": 3.3943077775866355, - "grad_norm": 0.0025710039772093296, - "learning_rate": 0.00019999431706005494, - "loss": 46.0, - "step": 44395 - }, - { - "epoch": 3.3943842345700252, - "grad_norm": 0.003464498557150364, - "learning_rate": 0.0001999943168039797, - "loss": 46.0, - "step": 44396 - }, - { - "epoch": 3.3944606915534146, - "grad_norm": 0.0008923442801460624, - "learning_rate": 0.00019999431654789868, - "loss": 46.0, - "step": 44397 - }, - { - "epoch": 3.3945371485368043, - "grad_norm": 0.0006607627728953958, - "learning_rate": 0.00019999431629181186, - "loss": 46.0, - "step": 44398 - }, - { - "epoch": 3.394613605520194, - "grad_norm": 0.0028931759297847748, - "learning_rate": 0.0001999943160357193, - "loss": 46.0, - "step": 44399 - }, - { - "epoch": 3.394690062503584, - "grad_norm": 0.0033882500138133764, - "learning_rate": 0.00019999431577962095, - "loss": 46.0, - "step": 44400 - }, - { - "epoch": 3.3947665194869736, - "grad_norm": 0.0036598783917725086, - "learning_rate": 0.00019999431552351687, - "loss": 46.0, - "step": 44401 - }, - { - "epoch": 3.3948429764703634, - "grad_norm": 0.001742070191539824, - "learning_rate": 0.00019999431526740698, - "loss": 46.0, - "step": 44402 - }, - { - "epoch": 3.394919433453753, - "grad_norm": 0.0016772841336205602, - "learning_rate": 0.00019999431501129135, - "loss": 46.0, - "step": 44403 - }, - { - "epoch": 3.394995890437143, - "grad_norm": 0.0008965773740783334, - "learning_rate": 0.00019999431475516995, - "loss": 46.0, - "step": 44404 - }, - { - "epoch": 3.3950723474205327, - "grad_norm": 0.001574399066157639, - "learning_rate": 0.00019999431449904277, - "loss": 46.0, - "step": 44405 - }, - { - "epoch": 3.3951488044039224, - "grad_norm": 0.0015241751680150628, - "learning_rate": 0.00019999431424290982, - "loss": 46.0, - "step": 44406 - }, - { - "epoch": 3.3952252613873117, - "grad_norm": 0.0018504337640479207, - "learning_rate": 0.0001999943139867711, - "loss": 46.0, - "step": 44407 - }, - { - "epoch": 3.3953017183707015, - "grad_norm": 0.001041129231452942, - "learning_rate": 0.0001999943137306266, - "loss": 46.0, - "step": 44408 - }, - { - "epoch": 3.3953781753540913, - "grad_norm": 0.0041731493547558784, - "learning_rate": 0.00019999431347447635, - "loss": 46.0, - "step": 44409 - }, - { - "epoch": 3.395454632337481, - "grad_norm": 0.002125500701367855, - "learning_rate": 0.0001999943132183203, - "loss": 46.0, - "step": 44410 - }, - { - "epoch": 3.3955310893208708, - "grad_norm": 0.001525670406408608, - "learning_rate": 0.00019999431296215854, - "loss": 46.0, - "step": 44411 - }, - { - "epoch": 3.3956075463042605, - "grad_norm": 0.0006528489757329226, - "learning_rate": 0.00019999431270599098, - "loss": 46.0, - "step": 44412 - }, - { - "epoch": 3.3956840032876503, - "grad_norm": 0.0033198250457644463, - "learning_rate": 0.00019999431244981764, - "loss": 46.0, - "step": 44413 - }, - { - "epoch": 3.39576046027104, - "grad_norm": 0.0033316079061478376, - "learning_rate": 0.00019999431219363856, - "loss": 46.0, - "step": 44414 - }, - { - "epoch": 3.39583691725443, - "grad_norm": 0.002117588883265853, - "learning_rate": 0.00019999431193745365, - "loss": 46.0, - "step": 44415 - }, - { - "epoch": 3.3959133742378196, - "grad_norm": 0.0005719622713513672, - "learning_rate": 0.00019999431168126304, - "loss": 46.0, - "step": 44416 - }, - { - "epoch": 3.3959898312212093, - "grad_norm": 0.00243534822948277, - "learning_rate": 0.00019999431142506664, - "loss": 46.0, - "step": 44417 - }, - { - "epoch": 3.396066288204599, - "grad_norm": 0.0007536790799349546, - "learning_rate": 0.00019999431116886443, - "loss": 46.0, - "step": 44418 - }, - { - "epoch": 3.3961427451879884, - "grad_norm": 0.001244775834493339, - "learning_rate": 0.0001999943109126565, - "loss": 46.0, - "step": 44419 - }, - { - "epoch": 3.396219202171378, - "grad_norm": 0.0032614772208034992, - "learning_rate": 0.0001999943106564428, - "loss": 46.0, - "step": 44420 - }, - { - "epoch": 3.396295659154768, - "grad_norm": 0.0016448217211291194, - "learning_rate": 0.00019999431040022332, - "loss": 46.0, - "step": 44421 - }, - { - "epoch": 3.3963721161381577, - "grad_norm": 0.002260012086480856, - "learning_rate": 0.00019999431014399805, - "loss": 46.0, - "step": 44422 - }, - { - "epoch": 3.3964485731215475, - "grad_norm": 0.0020634829998016357, - "learning_rate": 0.00019999430988776703, - "loss": 46.0, - "step": 44423 - }, - { - "epoch": 3.3965250301049372, - "grad_norm": 0.00206224643625319, - "learning_rate": 0.00019999430963153024, - "loss": 46.0, - "step": 44424 - }, - { - "epoch": 3.396601487088327, - "grad_norm": 0.005386530887335539, - "learning_rate": 0.00019999430937528767, - "loss": 46.0, - "step": 44425 - }, - { - "epoch": 3.3966779440717167, - "grad_norm": 0.0018630780978128314, - "learning_rate": 0.00019999430911903934, - "loss": 46.0, - "step": 44426 - }, - { - "epoch": 3.3967544010551065, - "grad_norm": 0.0026223179884254932, - "learning_rate": 0.00019999430886278522, - "loss": 46.0, - "step": 44427 - }, - { - "epoch": 3.3968308580384963, - "grad_norm": 0.0020208198111504316, - "learning_rate": 0.00019999430860652537, - "loss": 46.0, - "step": 44428 - }, - { - "epoch": 3.3969073150218856, - "grad_norm": 0.0014199266443029046, - "learning_rate": 0.00019999430835025974, - "loss": 46.0, - "step": 44429 - }, - { - "epoch": 3.3969837720052753, - "grad_norm": 0.002945514163002372, - "learning_rate": 0.00019999430809398833, - "loss": 46.0, - "step": 44430 - }, - { - "epoch": 3.397060228988665, - "grad_norm": 0.0024349556770175695, - "learning_rate": 0.00019999430783771116, - "loss": 46.0, - "step": 44431 - }, - { - "epoch": 3.397136685972055, - "grad_norm": 0.0036872962955385447, - "learning_rate": 0.0001999943075814282, - "loss": 46.0, - "step": 44432 - }, - { - "epoch": 3.3972131429554446, - "grad_norm": 0.0009498997824266553, - "learning_rate": 0.00019999430732513948, - "loss": 46.0, - "step": 44433 - }, - { - "epoch": 3.3972895999388344, - "grad_norm": 0.0023024696856737137, - "learning_rate": 0.000199994307068845, - "loss": 46.0, - "step": 44434 - }, - { - "epoch": 3.397366056922224, - "grad_norm": 0.0020671552047133446, - "learning_rate": 0.00019999430681254477, - "loss": 46.0, - "step": 44435 - }, - { - "epoch": 3.397442513905614, - "grad_norm": 0.0015280861407518387, - "learning_rate": 0.00019999430655623872, - "loss": 46.0, - "step": 44436 - }, - { - "epoch": 3.3975189708890037, - "grad_norm": 0.0005792198935523629, - "learning_rate": 0.00019999430629992696, - "loss": 46.0, - "step": 44437 - }, - { - "epoch": 3.3975954278723934, - "grad_norm": 0.0013664521975442767, - "learning_rate": 0.00019999430604360937, - "loss": 46.0, - "step": 44438 - }, - { - "epoch": 3.397671884855783, - "grad_norm": 0.002889463910833001, - "learning_rate": 0.00019999430578728603, - "loss": 46.0, - "step": 44439 - }, - { - "epoch": 3.397748341839173, - "grad_norm": 0.0014578712871298194, - "learning_rate": 0.00019999430553095695, - "loss": 46.0, - "step": 44440 - }, - { - "epoch": 3.3978247988225623, - "grad_norm": 0.0027165664359927177, - "learning_rate": 0.0001999943052746221, - "loss": 46.0, - "step": 44441 - }, - { - "epoch": 3.397901255805952, - "grad_norm": 0.0019710944034159184, - "learning_rate": 0.00019999430501828143, - "loss": 46.0, - "step": 44442 - }, - { - "epoch": 3.397977712789342, - "grad_norm": 0.002875181846320629, - "learning_rate": 0.00019999430476193503, - "loss": 46.0, - "step": 44443 - }, - { - "epoch": 3.3980541697727316, - "grad_norm": 0.0008439186494797468, - "learning_rate": 0.00019999430450558288, - "loss": 46.0, - "step": 44444 - }, - { - "epoch": 3.3981306267561213, - "grad_norm": 0.001341644674539566, - "learning_rate": 0.00019999430424922493, - "loss": 46.0, - "step": 44445 - }, - { - "epoch": 3.398207083739511, - "grad_norm": 0.004887677729129791, - "learning_rate": 0.0001999943039928612, - "loss": 46.0, - "step": 44446 - }, - { - "epoch": 3.398283540722901, - "grad_norm": 0.0017702901968732476, - "learning_rate": 0.00019999430373649174, - "loss": 46.0, - "step": 44447 - }, - { - "epoch": 3.3983599977062906, - "grad_norm": 0.001969631528481841, - "learning_rate": 0.00019999430348011647, - "loss": 46.0, - "step": 44448 - }, - { - "epoch": 3.3984364546896804, - "grad_norm": 0.0017390295397490263, - "learning_rate": 0.00019999430322373548, - "loss": 46.0, - "step": 44449 - }, - { - "epoch": 3.39851291167307, - "grad_norm": 0.0006267852149903774, - "learning_rate": 0.00019999430296734866, - "loss": 46.0, - "step": 44450 - }, - { - "epoch": 3.3985893686564594, - "grad_norm": 0.0030672969296574593, - "learning_rate": 0.00019999430271095612, - "loss": 46.0, - "step": 44451 - }, - { - "epoch": 3.398665825639849, - "grad_norm": 0.005493775475770235, - "learning_rate": 0.00019999430245455776, - "loss": 46.0, - "step": 44452 - }, - { - "epoch": 3.398742282623239, - "grad_norm": 0.002444051904603839, - "learning_rate": 0.0001999943021981537, - "loss": 46.0, - "step": 44453 - }, - { - "epoch": 3.3988187396066287, - "grad_norm": 0.006261073052883148, - "learning_rate": 0.00019999430194174382, - "loss": 46.0, - "step": 44454 - }, - { - "epoch": 3.3988951965900185, - "grad_norm": 0.001168728107586503, - "learning_rate": 0.0001999943016853282, - "loss": 46.0, - "step": 44455 - }, - { - "epoch": 3.3989716535734082, - "grad_norm": 0.0017290415707975626, - "learning_rate": 0.0001999943014289068, - "loss": 46.0, - "step": 44456 - }, - { - "epoch": 3.399048110556798, - "grad_norm": 0.0007060155621729791, - "learning_rate": 0.0001999943011724796, - "loss": 46.0, - "step": 44457 - }, - { - "epoch": 3.3991245675401878, - "grad_norm": 0.00530970236286521, - "learning_rate": 0.00019999430091604666, - "loss": 46.0, - "step": 44458 - }, - { - "epoch": 3.3992010245235775, - "grad_norm": 0.0008209020597860217, - "learning_rate": 0.00019999430065960797, - "loss": 46.0, - "step": 44459 - }, - { - "epoch": 3.3992774815069673, - "grad_norm": 0.0015252567827701569, - "learning_rate": 0.00019999430040316347, - "loss": 46.0, - "step": 44460 - }, - { - "epoch": 3.399353938490357, - "grad_norm": 0.00172558450140059, - "learning_rate": 0.00019999430014671323, - "loss": 46.0, - "step": 44461 - }, - { - "epoch": 3.399430395473747, - "grad_norm": 0.005498908925801516, - "learning_rate": 0.0001999942998902572, - "loss": 46.0, - "step": 44462 - }, - { - "epoch": 3.399506852457136, - "grad_norm": 0.004490367602556944, - "learning_rate": 0.00019999429963379545, - "loss": 46.0, - "step": 44463 - }, - { - "epoch": 3.399583309440526, - "grad_norm": 0.0025596641935408115, - "learning_rate": 0.00019999429937732788, - "loss": 46.0, - "step": 44464 - }, - { - "epoch": 3.3996597664239157, - "grad_norm": 0.003218061989173293, - "learning_rate": 0.00019999429912085455, - "loss": 46.0, - "step": 44465 - }, - { - "epoch": 3.3997362234073054, - "grad_norm": 0.0014005386037752032, - "learning_rate": 0.00019999429886437544, - "loss": 46.0, - "step": 44466 - }, - { - "epoch": 3.399812680390695, - "grad_norm": 0.0019948091357946396, - "learning_rate": 0.00019999429860789055, - "loss": 46.0, - "step": 44467 - }, - { - "epoch": 3.399889137374085, - "grad_norm": 0.00129279016982764, - "learning_rate": 0.00019999429835139995, - "loss": 46.0, - "step": 44468 - }, - { - "epoch": 3.3999655943574747, - "grad_norm": 0.0012246770784258842, - "learning_rate": 0.00019999429809490352, - "loss": 46.0, - "step": 44469 - }, - { - "epoch": 3.4000420513408645, - "grad_norm": 0.0025491847191005945, - "learning_rate": 0.00019999429783840137, - "loss": 46.0, - "step": 44470 - }, - { - "epoch": 3.400118508324254, - "grad_norm": 0.0013725466560572386, - "learning_rate": 0.0001999942975818934, - "loss": 46.0, - "step": 44471 - }, - { - "epoch": 3.400194965307644, - "grad_norm": 0.002798571949824691, - "learning_rate": 0.0001999942973253797, - "loss": 46.0, - "step": 44472 - }, - { - "epoch": 3.4002714222910333, - "grad_norm": 0.005642000585794449, - "learning_rate": 0.00019999429706886023, - "loss": 46.0, - "step": 44473 - }, - { - "epoch": 3.400347879274423, - "grad_norm": 0.003958832006901503, - "learning_rate": 0.000199994296812335, - "loss": 46.0, - "step": 44474 - }, - { - "epoch": 3.400424336257813, - "grad_norm": 0.003236716380342841, - "learning_rate": 0.00019999429655580397, - "loss": 46.0, - "step": 44475 - }, - { - "epoch": 3.4005007932412026, - "grad_norm": 0.0021570269018411636, - "learning_rate": 0.00019999429629926718, - "loss": 46.0, - "step": 44476 - }, - { - "epoch": 3.4005772502245923, - "grad_norm": 0.0016871554544195533, - "learning_rate": 0.00019999429604272462, - "loss": 46.0, - "step": 44477 - }, - { - "epoch": 3.400653707207982, - "grad_norm": 0.0012570195831358433, - "learning_rate": 0.0001999942957861763, - "loss": 46.0, - "step": 44478 - }, - { - "epoch": 3.400730164191372, - "grad_norm": 0.0011895325733348727, - "learning_rate": 0.0001999942955296222, - "loss": 46.0, - "step": 44479 - }, - { - "epoch": 3.4008066211747616, - "grad_norm": 0.0025255184154957533, - "learning_rate": 0.00019999429527306234, - "loss": 46.0, - "step": 44480 - }, - { - "epoch": 3.4008830781581514, - "grad_norm": 0.002386713633313775, - "learning_rate": 0.00019999429501649672, - "loss": 46.0, - "step": 44481 - }, - { - "epoch": 3.400959535141541, - "grad_norm": 0.0018322336254641414, - "learning_rate": 0.0001999942947599253, - "loss": 46.0, - "step": 44482 - }, - { - "epoch": 3.401035992124931, - "grad_norm": 0.001167122507467866, - "learning_rate": 0.0001999942945033481, - "loss": 46.0, - "step": 44483 - }, - { - "epoch": 3.4011124491083207, - "grad_norm": 0.00200465926900506, - "learning_rate": 0.0001999942942467652, - "loss": 46.0, - "step": 44484 - }, - { - "epoch": 3.40118890609171, - "grad_norm": 0.007078466005623341, - "learning_rate": 0.0001999942939901765, - "loss": 46.0, - "step": 44485 - }, - { - "epoch": 3.4012653630750997, - "grad_norm": 0.002868445124477148, - "learning_rate": 0.000199994293733582, - "loss": 46.0, - "step": 44486 - }, - { - "epoch": 3.4013418200584895, - "grad_norm": 0.00226974836550653, - "learning_rate": 0.00019999429347698175, - "loss": 46.0, - "step": 44487 - }, - { - "epoch": 3.4014182770418793, - "grad_norm": 0.009962710551917553, - "learning_rate": 0.00019999429322037574, - "loss": 46.0, - "step": 44488 - }, - { - "epoch": 3.401494734025269, - "grad_norm": 0.00166595121845603, - "learning_rate": 0.00019999429296376398, - "loss": 46.0, - "step": 44489 - }, - { - "epoch": 3.401571191008659, - "grad_norm": 0.0024232363793998957, - "learning_rate": 0.0001999942927071464, - "loss": 46.0, - "step": 44490 - }, - { - "epoch": 3.4016476479920486, - "grad_norm": 0.002095426432788372, - "learning_rate": 0.0001999942924505231, - "loss": 46.0, - "step": 44491 - }, - { - "epoch": 3.4017241049754383, - "grad_norm": 0.001230902038514614, - "learning_rate": 0.000199994292193894, - "loss": 46.0, - "step": 44492 - }, - { - "epoch": 3.401800561958828, - "grad_norm": 0.0013546929694712162, - "learning_rate": 0.00019999429193725914, - "loss": 46.0, - "step": 44493 - }, - { - "epoch": 3.4018770189422174, - "grad_norm": 0.0024091091472655535, - "learning_rate": 0.00019999429168061848, - "loss": 46.0, - "step": 44494 - }, - { - "epoch": 3.401953475925607, - "grad_norm": 0.0035000063944607973, - "learning_rate": 0.0001999942914239721, - "loss": 46.0, - "step": 44495 - }, - { - "epoch": 3.402029932908997, - "grad_norm": 0.003709676442667842, - "learning_rate": 0.00019999429116731993, - "loss": 46.0, - "step": 44496 - }, - { - "epoch": 3.4021063898923867, - "grad_norm": 0.008089705370366573, - "learning_rate": 0.00019999429091066198, - "loss": 46.0, - "step": 44497 - }, - { - "epoch": 3.4021828468757764, - "grad_norm": 0.002435478614643216, - "learning_rate": 0.0001999942906539983, - "loss": 46.0, - "step": 44498 - }, - { - "epoch": 3.402259303859166, - "grad_norm": 0.0005994358216412365, - "learning_rate": 0.00019999429039732882, - "loss": 46.0, - "step": 44499 - }, - { - "epoch": 3.402335760842556, - "grad_norm": 0.005287316627800465, - "learning_rate": 0.00019999429014065355, - "loss": 46.0, - "step": 44500 - }, - { - "epoch": 3.4024122178259457, - "grad_norm": 0.0025307191535830498, - "learning_rate": 0.00019999428988397256, - "loss": 46.0, - "step": 44501 - }, - { - "epoch": 3.4024886748093355, - "grad_norm": 0.00230208202265203, - "learning_rate": 0.00019999428962728577, - "loss": 46.0, - "step": 44502 - }, - { - "epoch": 3.4025651317927252, - "grad_norm": 0.0045244283974170685, - "learning_rate": 0.0001999942893705932, - "loss": 46.0, - "step": 44503 - }, - { - "epoch": 3.402641588776115, - "grad_norm": 0.0036708831321448088, - "learning_rate": 0.00019999428911389488, - "loss": 46.0, - "step": 44504 - }, - { - "epoch": 3.4027180457595048, - "grad_norm": 0.0018802789272740483, - "learning_rate": 0.0001999942888571908, - "loss": 46.0, - "step": 44505 - }, - { - "epoch": 3.4027945027428945, - "grad_norm": 0.002465044381096959, - "learning_rate": 0.00019999428860048094, - "loss": 46.0, - "step": 44506 - }, - { - "epoch": 3.402870959726284, - "grad_norm": 0.004716214258223772, - "learning_rate": 0.0001999942883437653, - "loss": 46.0, - "step": 44507 - }, - { - "epoch": 3.4029474167096736, - "grad_norm": 0.00487924087792635, - "learning_rate": 0.00019999428808704394, - "loss": 46.0, - "step": 44508 - }, - { - "epoch": 3.4030238736930634, - "grad_norm": 0.002176678040996194, - "learning_rate": 0.00019999428783031676, - "loss": 46.0, - "step": 44509 - }, - { - "epoch": 3.403100330676453, - "grad_norm": 0.0014835112961009145, - "learning_rate": 0.0001999942875735838, - "loss": 46.0, - "step": 44510 - }, - { - "epoch": 3.403176787659843, - "grad_norm": 0.0031991840805858374, - "learning_rate": 0.0001999942873168451, - "loss": 46.0, - "step": 44511 - }, - { - "epoch": 3.4032532446432326, - "grad_norm": 0.0015627560205757618, - "learning_rate": 0.00019999428706010065, - "loss": 46.0, - "step": 44512 - }, - { - "epoch": 3.4033297016266224, - "grad_norm": 0.0018399023683741689, - "learning_rate": 0.00019999428680335038, - "loss": 46.0, - "step": 44513 - }, - { - "epoch": 3.403406158610012, - "grad_norm": 0.0047172242775559425, - "learning_rate": 0.0001999942865465944, - "loss": 46.0, - "step": 44514 - }, - { - "epoch": 3.403482615593402, - "grad_norm": 0.0007529751746915281, - "learning_rate": 0.00019999428628983258, - "loss": 46.0, - "step": 44515 - }, - { - "epoch": 3.4035590725767912, - "grad_norm": 0.0013395254500210285, - "learning_rate": 0.00019999428603306501, - "loss": 46.0, - "step": 44516 - }, - { - "epoch": 3.403635529560181, - "grad_norm": 0.0007168152369558811, - "learning_rate": 0.0001999942857762917, - "loss": 46.0, - "step": 44517 - }, - { - "epoch": 3.4037119865435708, - "grad_norm": 0.004291699267923832, - "learning_rate": 0.00019999428551951262, - "loss": 46.0, - "step": 44518 - }, - { - "epoch": 3.4037884435269605, - "grad_norm": 0.0005293144495226443, - "learning_rate": 0.00019999428526272777, - "loss": 46.0, - "step": 44519 - }, - { - "epoch": 3.4038649005103503, - "grad_norm": 0.0014971561031416059, - "learning_rate": 0.00019999428500593714, - "loss": 46.0, - "step": 44520 - }, - { - "epoch": 3.40394135749374, - "grad_norm": 0.0025402531027793884, - "learning_rate": 0.0001999942847491407, - "loss": 46.0, - "step": 44521 - }, - { - "epoch": 3.40401781447713, - "grad_norm": 0.0006218007765710354, - "learning_rate": 0.00019999428449233856, - "loss": 46.0, - "step": 44522 - }, - { - "epoch": 3.4040942714605196, - "grad_norm": 0.005018069874495268, - "learning_rate": 0.00019999428423553064, - "loss": 46.0, - "step": 44523 - }, - { - "epoch": 3.4041707284439093, - "grad_norm": 0.0021142316982150078, - "learning_rate": 0.00019999428397871692, - "loss": 46.0, - "step": 44524 - }, - { - "epoch": 3.404247185427299, - "grad_norm": 0.001458027632907033, - "learning_rate": 0.00019999428372189745, - "loss": 46.0, - "step": 44525 - }, - { - "epoch": 3.404323642410689, - "grad_norm": 0.0017159141134470701, - "learning_rate": 0.0001999942834650722, - "loss": 46.0, - "step": 44526 - }, - { - "epoch": 3.4044000993940786, - "grad_norm": 0.001311560976319015, - "learning_rate": 0.0001999942832082412, - "loss": 46.0, - "step": 44527 - }, - { - "epoch": 3.404476556377468, - "grad_norm": 0.0012697457568719983, - "learning_rate": 0.0001999942829514044, - "loss": 46.0, - "step": 44528 - }, - { - "epoch": 3.4045530133608577, - "grad_norm": 0.002954694442451, - "learning_rate": 0.00019999428269456187, - "loss": 46.0, - "step": 44529 - }, - { - "epoch": 3.4046294703442475, - "grad_norm": 0.001003327895887196, - "learning_rate": 0.00019999428243771354, - "loss": 46.0, - "step": 44530 - }, - { - "epoch": 3.404705927327637, - "grad_norm": 0.005693940445780754, - "learning_rate": 0.00019999428218085946, - "loss": 46.0, - "step": 44531 - }, - { - "epoch": 3.404782384311027, - "grad_norm": 0.007230098359286785, - "learning_rate": 0.0001999942819239996, - "loss": 46.0, - "step": 44532 - }, - { - "epoch": 3.4048588412944167, - "grad_norm": 0.005926085636019707, - "learning_rate": 0.00019999428166713398, - "loss": 46.0, - "step": 44533 - }, - { - "epoch": 3.4049352982778065, - "grad_norm": 0.004895671270787716, - "learning_rate": 0.00019999428141026257, - "loss": 46.0, - "step": 44534 - }, - { - "epoch": 3.4050117552611963, - "grad_norm": 0.0015876397956162691, - "learning_rate": 0.0001999942811533854, - "loss": 46.0, - "step": 44535 - }, - { - "epoch": 3.405088212244586, - "grad_norm": 0.001187377143651247, - "learning_rate": 0.00019999428089650248, - "loss": 46.0, - "step": 44536 - }, - { - "epoch": 3.405164669227976, - "grad_norm": 0.0035681205336004496, - "learning_rate": 0.00019999428063961378, - "loss": 46.0, - "step": 44537 - }, - { - "epoch": 3.405241126211365, - "grad_norm": 0.0021375594660639763, - "learning_rate": 0.0001999942803827193, - "loss": 46.0, - "step": 44538 - }, - { - "epoch": 3.405317583194755, - "grad_norm": 0.0008389716385863721, - "learning_rate": 0.00019999428012581908, - "loss": 46.0, - "step": 44539 - }, - { - "epoch": 3.4053940401781446, - "grad_norm": 0.0010876808082684875, - "learning_rate": 0.00019999427986891303, - "loss": 46.0, - "step": 44540 - }, - { - "epoch": 3.4054704971615344, - "grad_norm": 0.0011203258763998747, - "learning_rate": 0.00019999427961200125, - "loss": 46.0, - "step": 44541 - }, - { - "epoch": 3.405546954144924, - "grad_norm": 0.0012522428296506405, - "learning_rate": 0.0001999942793550837, - "loss": 46.0, - "step": 44542 - }, - { - "epoch": 3.405623411128314, - "grad_norm": 0.0012317071668803692, - "learning_rate": 0.00019999427909816043, - "loss": 46.0, - "step": 44543 - }, - { - "epoch": 3.4056998681117037, - "grad_norm": 0.0025619708467274904, - "learning_rate": 0.00019999427884123132, - "loss": 46.0, - "step": 44544 - }, - { - "epoch": 3.4057763250950934, - "grad_norm": 0.0031380769796669483, - "learning_rate": 0.00019999427858429644, - "loss": 46.0, - "step": 44545 - }, - { - "epoch": 3.405852782078483, - "grad_norm": 0.001825815299525857, - "learning_rate": 0.00019999427832735584, - "loss": 46.0, - "step": 44546 - }, - { - "epoch": 3.405929239061873, - "grad_norm": 0.002268935786560178, - "learning_rate": 0.00019999427807040944, - "loss": 46.0, - "step": 44547 - }, - { - "epoch": 3.4060056960452627, - "grad_norm": 0.005343676544725895, - "learning_rate": 0.00019999427781345727, - "loss": 46.0, - "step": 44548 - }, - { - "epoch": 3.4060821530286525, - "grad_norm": 0.0013021710328757763, - "learning_rate": 0.00019999427755649935, - "loss": 46.0, - "step": 44549 - }, - { - "epoch": 3.406158610012042, - "grad_norm": 0.0022684766445308924, - "learning_rate": 0.00019999427729953565, - "loss": 46.0, - "step": 44550 - }, - { - "epoch": 3.4062350669954315, - "grad_norm": 0.003002998884767294, - "learning_rate": 0.00019999427704256616, - "loss": 46.0, - "step": 44551 - }, - { - "epoch": 3.4063115239788213, - "grad_norm": 0.005568784195929766, - "learning_rate": 0.00019999427678559092, - "loss": 46.0, - "step": 44552 - }, - { - "epoch": 3.406387980962211, - "grad_norm": 0.002286982722580433, - "learning_rate": 0.00019999427652860993, - "loss": 46.0, - "step": 44553 - }, - { - "epoch": 3.406464437945601, - "grad_norm": 0.0036630250979214907, - "learning_rate": 0.00019999427627162314, - "loss": 46.0, - "step": 44554 - }, - { - "epoch": 3.4065408949289906, - "grad_norm": 0.0025698444806039333, - "learning_rate": 0.00019999427601463058, - "loss": 46.0, - "step": 44555 - }, - { - "epoch": 3.4066173519123804, - "grad_norm": 0.0018158338498324156, - "learning_rate": 0.00019999427575763227, - "loss": 46.0, - "step": 44556 - }, - { - "epoch": 3.40669380889577, - "grad_norm": 0.0012441120343282819, - "learning_rate": 0.0001999942755006282, - "loss": 46.0, - "step": 44557 - }, - { - "epoch": 3.40677026587916, - "grad_norm": 0.0058428216725587845, - "learning_rate": 0.00019999427524361834, - "loss": 46.0, - "step": 44558 - }, - { - "epoch": 3.4068467228625496, - "grad_norm": 0.0015200950438156724, - "learning_rate": 0.0001999942749866027, - "loss": 46.0, - "step": 44559 - }, - { - "epoch": 3.406923179845939, - "grad_norm": 0.00158967066090554, - "learning_rate": 0.0001999942747295813, - "loss": 46.0, - "step": 44560 - }, - { - "epoch": 3.4069996368293287, - "grad_norm": 0.0016352664679288864, - "learning_rate": 0.00019999427447255416, - "loss": 46.0, - "step": 44561 - }, - { - "epoch": 3.4070760938127185, - "grad_norm": 0.004216746427118778, - "learning_rate": 0.00019999427421552124, - "loss": 46.0, - "step": 44562 - }, - { - "epoch": 3.4071525507961082, - "grad_norm": 0.002384734572842717, - "learning_rate": 0.00019999427395848252, - "loss": 46.0, - "step": 44563 - }, - { - "epoch": 3.407229007779498, - "grad_norm": 0.0021031221840530634, - "learning_rate": 0.00019999427370143806, - "loss": 46.0, - "step": 44564 - }, - { - "epoch": 3.4073054647628878, - "grad_norm": 0.0012883978197351098, - "learning_rate": 0.00019999427344438782, - "loss": 46.0, - "step": 44565 - }, - { - "epoch": 3.4073819217462775, - "grad_norm": 0.0022906125523149967, - "learning_rate": 0.0001999942731873318, - "loss": 46.0, - "step": 44566 - }, - { - "epoch": 3.4074583787296673, - "grad_norm": 0.00508294440805912, - "learning_rate": 0.00019999427293027004, - "loss": 46.0, - "step": 44567 - }, - { - "epoch": 3.407534835713057, - "grad_norm": 0.001698538544587791, - "learning_rate": 0.00019999427267320248, - "loss": 46.0, - "step": 44568 - }, - { - "epoch": 3.407611292696447, - "grad_norm": 0.0030605129431933165, - "learning_rate": 0.00019999427241612917, - "loss": 46.0, - "step": 44569 - }, - { - "epoch": 3.4076877496798366, - "grad_norm": 0.003079720539972186, - "learning_rate": 0.0001999942721590501, - "loss": 46.0, - "step": 44570 - }, - { - "epoch": 3.4077642066632263, - "grad_norm": 0.0015500913141295314, - "learning_rate": 0.00019999427190196521, - "loss": 46.0, - "step": 44571 - }, - { - "epoch": 3.4078406636466156, - "grad_norm": 0.001608409802429378, - "learning_rate": 0.00019999427164487461, - "loss": 46.0, - "step": 44572 - }, - { - "epoch": 3.4079171206300054, - "grad_norm": 0.0025723797734826803, - "learning_rate": 0.0001999942713877782, - "loss": 46.0, - "step": 44573 - }, - { - "epoch": 3.407993577613395, - "grad_norm": 0.007553036790341139, - "learning_rate": 0.00019999427113067607, - "loss": 46.0, - "step": 44574 - }, - { - "epoch": 3.408070034596785, - "grad_norm": 0.0016935491003096104, - "learning_rate": 0.00019999427087356812, - "loss": 46.0, - "step": 44575 - }, - { - "epoch": 3.4081464915801747, - "grad_norm": 0.0012393731158226728, - "learning_rate": 0.00019999427061645443, - "loss": 46.0, - "step": 44576 - }, - { - "epoch": 3.4082229485635644, - "grad_norm": 0.0003832278889603913, - "learning_rate": 0.00019999427035933496, - "loss": 46.0, - "step": 44577 - }, - { - "epoch": 3.408299405546954, - "grad_norm": 0.0011358532356098294, - "learning_rate": 0.00019999427010220972, - "loss": 46.0, - "step": 44578 - }, - { - "epoch": 3.408375862530344, - "grad_norm": 0.003860140684992075, - "learning_rate": 0.0001999942698450787, - "loss": 46.0, - "step": 44579 - }, - { - "epoch": 3.4084523195137337, - "grad_norm": 0.0018668039701879025, - "learning_rate": 0.00019999426958794195, - "loss": 46.0, - "step": 44580 - }, - { - "epoch": 3.4085287764971235, - "grad_norm": 0.001568567007780075, - "learning_rate": 0.0001999942693307994, - "loss": 46.0, - "step": 44581 - }, - { - "epoch": 3.408605233480513, - "grad_norm": 0.0006676905904896557, - "learning_rate": 0.00019999426907365108, - "loss": 46.0, - "step": 44582 - }, - { - "epoch": 3.4086816904639026, - "grad_norm": 0.0035766656510531902, - "learning_rate": 0.000199994268816497, - "loss": 46.0, - "step": 44583 - }, - { - "epoch": 3.4087581474472923, - "grad_norm": 0.0032155183143913746, - "learning_rate": 0.00019999426855933715, - "loss": 46.0, - "step": 44584 - }, - { - "epoch": 3.408834604430682, - "grad_norm": 0.001143681351095438, - "learning_rate": 0.00019999426830217152, - "loss": 46.0, - "step": 44585 - }, - { - "epoch": 3.408911061414072, - "grad_norm": 0.002032248768955469, - "learning_rate": 0.00019999426804500012, - "loss": 46.0, - "step": 44586 - }, - { - "epoch": 3.4089875183974616, - "grad_norm": 0.005097049288451672, - "learning_rate": 0.00019999426778782297, - "loss": 46.0, - "step": 44587 - }, - { - "epoch": 3.4090639753808514, - "grad_norm": 0.0031921030022203922, - "learning_rate": 0.00019999426753064005, - "loss": 46.0, - "step": 44588 - }, - { - "epoch": 3.409140432364241, - "grad_norm": 0.0031302792485803366, - "learning_rate": 0.00019999426727345133, - "loss": 46.0, - "step": 44589 - }, - { - "epoch": 3.409216889347631, - "grad_norm": 0.0020225311163812876, - "learning_rate": 0.0001999942670162569, - "loss": 46.0, - "step": 44590 - }, - { - "epoch": 3.4092933463310207, - "grad_norm": 0.0010203256970271468, - "learning_rate": 0.00019999426675905663, - "loss": 46.0, - "step": 44591 - }, - { - "epoch": 3.4093698033144104, - "grad_norm": 0.002846898278221488, - "learning_rate": 0.00019999426650185064, - "loss": 46.0, - "step": 44592 - }, - { - "epoch": 3.4094462602978, - "grad_norm": 0.004486053250730038, - "learning_rate": 0.00019999426624463888, - "loss": 46.0, - "step": 44593 - }, - { - "epoch": 3.4095227172811895, - "grad_norm": 0.0012737103970721364, - "learning_rate": 0.00019999426598742132, - "loss": 46.0, - "step": 44594 - }, - { - "epoch": 3.4095991742645793, - "grad_norm": 0.001980091677978635, - "learning_rate": 0.00019999426573019801, - "loss": 46.0, - "step": 44595 - }, - { - "epoch": 3.409675631247969, - "grad_norm": 0.0016301375580951571, - "learning_rate": 0.00019999426547296894, - "loss": 46.0, - "step": 44596 - }, - { - "epoch": 3.409752088231359, - "grad_norm": 0.0013766594929620624, - "learning_rate": 0.00019999426521573408, - "loss": 46.0, - "step": 44597 - }, - { - "epoch": 3.4098285452147485, - "grad_norm": 0.0014653644757345319, - "learning_rate": 0.00019999426495849348, - "loss": 46.0, - "step": 44598 - }, - { - "epoch": 3.4099050021981383, - "grad_norm": 0.003097351174801588, - "learning_rate": 0.00019999426470124706, - "loss": 46.0, - "step": 44599 - }, - { - "epoch": 3.409981459181528, - "grad_norm": 0.0043132854625582695, - "learning_rate": 0.0001999942644439949, - "loss": 46.0, - "step": 44600 - }, - { - "epoch": 3.410057916164918, - "grad_norm": 0.003087574616074562, - "learning_rate": 0.000199994264186737, - "loss": 46.0, - "step": 44601 - }, - { - "epoch": 3.4101343731483076, - "grad_norm": 0.0022097467444837093, - "learning_rate": 0.0001999942639294733, - "loss": 46.0, - "step": 44602 - }, - { - "epoch": 3.4102108301316973, - "grad_norm": 0.002199438400566578, - "learning_rate": 0.00019999426367220383, - "loss": 46.0, - "step": 44603 - }, - { - "epoch": 3.4102872871150867, - "grad_norm": 0.0016657226951792836, - "learning_rate": 0.0001999942634149286, - "loss": 46.0, - "step": 44604 - }, - { - "epoch": 3.4103637440984764, - "grad_norm": 0.0020539460238069296, - "learning_rate": 0.0001999942631576476, - "loss": 46.0, - "step": 44605 - }, - { - "epoch": 3.410440201081866, - "grad_norm": 0.002514677355065942, - "learning_rate": 0.00019999426290036082, - "loss": 46.0, - "step": 44606 - }, - { - "epoch": 3.410516658065256, - "grad_norm": 0.0024192591663450003, - "learning_rate": 0.0001999942626430683, - "loss": 46.0, - "step": 44607 - }, - { - "epoch": 3.4105931150486457, - "grad_norm": 0.0018314456101506948, - "learning_rate": 0.00019999426238576998, - "loss": 46.0, - "step": 44608 - }, - { - "epoch": 3.4106695720320355, - "grad_norm": 0.0014262263430282474, - "learning_rate": 0.00019999426212846588, - "loss": 46.0, - "step": 44609 - }, - { - "epoch": 3.4107460290154252, - "grad_norm": 0.0022243992425501347, - "learning_rate": 0.00019999426187115602, - "loss": 46.0, - "step": 44610 - }, - { - "epoch": 3.410822485998815, - "grad_norm": 0.0008551247301511467, - "learning_rate": 0.0001999942616138404, - "loss": 46.0, - "step": 44611 - }, - { - "epoch": 3.4108989429822048, - "grad_norm": 0.004102067090570927, - "learning_rate": 0.00019999426135651902, - "loss": 46.0, - "step": 44612 - }, - { - "epoch": 3.4109753999655945, - "grad_norm": 0.0015754050109535456, - "learning_rate": 0.00019999426109919188, - "loss": 46.0, - "step": 44613 - }, - { - "epoch": 3.4110518569489843, - "grad_norm": 0.0019353434909135103, - "learning_rate": 0.00019999426084185893, - "loss": 46.0, - "step": 44614 - }, - { - "epoch": 3.411128313932374, - "grad_norm": 0.0019014155259355903, - "learning_rate": 0.00019999426058452024, - "loss": 46.0, - "step": 44615 - }, - { - "epoch": 3.4112047709157634, - "grad_norm": 0.002923661144450307, - "learning_rate": 0.00019999426032717577, - "loss": 46.0, - "step": 44616 - }, - { - "epoch": 3.411281227899153, - "grad_norm": 0.0018945930059999228, - "learning_rate": 0.00019999426006982556, - "loss": 46.0, - "step": 44617 - }, - { - "epoch": 3.411357684882543, - "grad_norm": 0.001301974174566567, - "learning_rate": 0.00019999425981246955, - "loss": 46.0, - "step": 44618 - }, - { - "epoch": 3.4114341418659326, - "grad_norm": 0.00305893924087286, - "learning_rate": 0.0001999942595551078, - "loss": 46.0, - "step": 44619 - }, - { - "epoch": 3.4115105988493224, - "grad_norm": 0.0028179411310702562, - "learning_rate": 0.00019999425929774023, - "loss": 46.0, - "step": 44620 - }, - { - "epoch": 3.411587055832712, - "grad_norm": 0.0014932259218767285, - "learning_rate": 0.00019999425904036693, - "loss": 46.0, - "step": 44621 - }, - { - "epoch": 3.411663512816102, - "grad_norm": 0.0017887079156935215, - "learning_rate": 0.00019999425878298785, - "loss": 46.0, - "step": 44622 - }, - { - "epoch": 3.4117399697994917, - "grad_norm": 0.008045237511396408, - "learning_rate": 0.000199994258525603, - "loss": 46.0, - "step": 44623 - }, - { - "epoch": 3.4118164267828814, - "grad_norm": 0.001971038058400154, - "learning_rate": 0.0001999942582682124, - "loss": 46.0, - "step": 44624 - }, - { - "epoch": 3.4118928837662708, - "grad_norm": 0.004962950479239225, - "learning_rate": 0.000199994258010816, - "loss": 46.0, - "step": 44625 - }, - { - "epoch": 3.4119693407496605, - "grad_norm": 0.002649918431416154, - "learning_rate": 0.00019999425775341382, - "loss": 46.0, - "step": 44626 - }, - { - "epoch": 3.4120457977330503, - "grad_norm": 0.0014484574785456061, - "learning_rate": 0.0001999942574960059, - "loss": 46.0, - "step": 44627 - }, - { - "epoch": 3.41212225471644, - "grad_norm": 0.0009665804682299495, - "learning_rate": 0.0001999942572385922, - "loss": 46.0, - "step": 44628 - }, - { - "epoch": 3.41219871169983, - "grad_norm": 0.0032977371010929346, - "learning_rate": 0.00019999425698117275, - "loss": 46.0, - "step": 44629 - }, - { - "epoch": 3.4122751686832196, - "grad_norm": 0.0018153262790292501, - "learning_rate": 0.0001999942567237475, - "loss": 46.0, - "step": 44630 - }, - { - "epoch": 3.4123516256666093, - "grad_norm": 0.002286125672981143, - "learning_rate": 0.0001999942564663165, - "loss": 46.0, - "step": 44631 - }, - { - "epoch": 3.412428082649999, - "grad_norm": 0.0010668054455891252, - "learning_rate": 0.00019999425620887976, - "loss": 46.0, - "step": 44632 - }, - { - "epoch": 3.412504539633389, - "grad_norm": 0.001997595652937889, - "learning_rate": 0.0001999942559514372, - "loss": 46.0, - "step": 44633 - }, - { - "epoch": 3.4125809966167786, - "grad_norm": 0.001615968649275601, - "learning_rate": 0.0001999942556939889, - "loss": 46.0, - "step": 44634 - }, - { - "epoch": 3.4126574536001684, - "grad_norm": 0.0013958230847492814, - "learning_rate": 0.00019999425543653482, - "loss": 46.0, - "step": 44635 - }, - { - "epoch": 3.412733910583558, - "grad_norm": 0.003213487332686782, - "learning_rate": 0.000199994255179075, - "loss": 46.0, - "step": 44636 - }, - { - "epoch": 3.412810367566948, - "grad_norm": 0.001600019633769989, - "learning_rate": 0.00019999425492160937, - "loss": 46.0, - "step": 44637 - }, - { - "epoch": 3.412886824550337, - "grad_norm": 0.0014443201944231987, - "learning_rate": 0.00019999425466413797, - "loss": 46.0, - "step": 44638 - }, - { - "epoch": 3.412963281533727, - "grad_norm": 0.003951051272451878, - "learning_rate": 0.00019999425440666083, - "loss": 46.0, - "step": 44639 - }, - { - "epoch": 3.4130397385171167, - "grad_norm": 0.00178765959572047, - "learning_rate": 0.00019999425414917788, - "loss": 46.0, - "step": 44640 - }, - { - "epoch": 3.4131161955005065, - "grad_norm": 0.002320082625374198, - "learning_rate": 0.0001999942538916892, - "loss": 46.0, - "step": 44641 - }, - { - "epoch": 3.4131926524838963, - "grad_norm": 0.003323833690956235, - "learning_rate": 0.00019999425363419475, - "loss": 46.0, - "step": 44642 - }, - { - "epoch": 3.413269109467286, - "grad_norm": 0.003406842937693, - "learning_rate": 0.00019999425337669454, - "loss": 46.0, - "step": 44643 - }, - { - "epoch": 3.4133455664506758, - "grad_norm": 0.0019844863563776016, - "learning_rate": 0.0001999942531191885, - "loss": 46.0, - "step": 44644 - }, - { - "epoch": 3.4134220234340655, - "grad_norm": 0.0035805413499474525, - "learning_rate": 0.00019999425286167677, - "loss": 46.0, - "step": 44645 - }, - { - "epoch": 3.4134984804174553, - "grad_norm": 0.0032482771202921867, - "learning_rate": 0.0001999942526041592, - "loss": 46.0, - "step": 44646 - }, - { - "epoch": 3.4135749374008446, - "grad_norm": 0.004943655803799629, - "learning_rate": 0.0001999942523466359, - "loss": 46.0, - "step": 44647 - }, - { - "epoch": 3.4136513943842344, - "grad_norm": 0.003509682137519121, - "learning_rate": 0.00019999425208910686, - "loss": 46.0, - "step": 44648 - }, - { - "epoch": 3.413727851367624, - "grad_norm": 0.0021849924232810736, - "learning_rate": 0.00019999425183157198, - "loss": 46.0, - "step": 44649 - }, - { - "epoch": 3.413804308351014, - "grad_norm": 0.003294270019978285, - "learning_rate": 0.00019999425157403138, - "loss": 46.0, - "step": 44650 - }, - { - "epoch": 3.4138807653344037, - "grad_norm": 0.001982462592422962, - "learning_rate": 0.00019999425131648498, - "loss": 46.0, - "step": 44651 - }, - { - "epoch": 3.4139572223177934, - "grad_norm": 0.003685680218040943, - "learning_rate": 0.00019999425105893284, - "loss": 46.0, - "step": 44652 - }, - { - "epoch": 3.414033679301183, - "grad_norm": 0.004441377241164446, - "learning_rate": 0.00019999425080137492, - "loss": 46.0, - "step": 44653 - }, - { - "epoch": 3.414110136284573, - "grad_norm": 0.0014486340805888176, - "learning_rate": 0.00019999425054381123, - "loss": 46.0, - "step": 44654 - }, - { - "epoch": 3.4141865932679627, - "grad_norm": 0.0049875639379024506, - "learning_rate": 0.00019999425028624176, - "loss": 46.0, - "step": 44655 - }, - { - "epoch": 3.4142630502513525, - "grad_norm": 0.004506751429289579, - "learning_rate": 0.00019999425002866655, - "loss": 46.0, - "step": 44656 - }, - { - "epoch": 3.414339507234742, - "grad_norm": 0.0014895175117999315, - "learning_rate": 0.00019999424977108554, - "loss": 46.0, - "step": 44657 - }, - { - "epoch": 3.414415964218132, - "grad_norm": 0.0008397481869906187, - "learning_rate": 0.00019999424951349875, - "loss": 46.0, - "step": 44658 - }, - { - "epoch": 3.4144924212015217, - "grad_norm": 0.0015095225535333157, - "learning_rate": 0.00019999424925590625, - "loss": 46.0, - "step": 44659 - }, - { - "epoch": 3.414568878184911, - "grad_norm": 0.0024280408397316933, - "learning_rate": 0.00019999424899830794, - "loss": 46.0, - "step": 44660 - }, - { - "epoch": 3.414645335168301, - "grad_norm": 0.0017444886034354568, - "learning_rate": 0.00019999424874070387, - "loss": 46.0, - "step": 44661 - }, - { - "epoch": 3.4147217921516906, - "grad_norm": 0.001526823383755982, - "learning_rate": 0.00019999424848309401, - "loss": 46.0, - "step": 44662 - }, - { - "epoch": 3.4147982491350803, - "grad_norm": 0.003983426373451948, - "learning_rate": 0.00019999424822547842, - "loss": 46.0, - "step": 44663 - }, - { - "epoch": 3.41487470611847, - "grad_norm": 0.001521335681900382, - "learning_rate": 0.00019999424796785702, - "loss": 46.0, - "step": 44664 - }, - { - "epoch": 3.41495116310186, - "grad_norm": 0.0010788280051201582, - "learning_rate": 0.00019999424771022985, - "loss": 46.0, - "step": 44665 - }, - { - "epoch": 3.4150276200852496, - "grad_norm": 0.008481916040182114, - "learning_rate": 0.00019999424745259696, - "loss": 46.0, - "step": 44666 - }, - { - "epoch": 3.4151040770686394, - "grad_norm": 0.0018634909065440297, - "learning_rate": 0.00019999424719495824, - "loss": 46.0, - "step": 44667 - }, - { - "epoch": 3.415180534052029, - "grad_norm": 0.0031818212009966373, - "learning_rate": 0.0001999942469373138, - "loss": 46.0, - "step": 44668 - }, - { - "epoch": 3.4152569910354185, - "grad_norm": 0.0014383854577317834, - "learning_rate": 0.00019999424667966356, - "loss": 46.0, - "step": 44669 - }, - { - "epoch": 3.4153334480188082, - "grad_norm": 0.003227618522942066, - "learning_rate": 0.00019999424642200758, - "loss": 46.0, - "step": 44670 - }, - { - "epoch": 3.415409905002198, - "grad_norm": 0.003179847029969096, - "learning_rate": 0.00019999424616434582, - "loss": 46.0, - "step": 44671 - }, - { - "epoch": 3.4154863619855877, - "grad_norm": 0.0007517783669754863, - "learning_rate": 0.0001999942459066783, - "loss": 46.0, - "step": 44672 - }, - { - "epoch": 3.4155628189689775, - "grad_norm": 0.002710822969675064, - "learning_rate": 0.00019999424564900499, - "loss": 46.0, - "step": 44673 - }, - { - "epoch": 3.4156392759523673, - "grad_norm": 0.0033408503513783216, - "learning_rate": 0.0001999942453913259, - "loss": 46.0, - "step": 44674 - }, - { - "epoch": 3.415715732935757, - "grad_norm": 0.0021094053518027067, - "learning_rate": 0.00019999424513364106, - "loss": 46.0, - "step": 44675 - }, - { - "epoch": 3.415792189919147, - "grad_norm": 0.0006639473140239716, - "learning_rate": 0.00019999424487595043, - "loss": 46.0, - "step": 44676 - }, - { - "epoch": 3.4158686469025366, - "grad_norm": 0.006734954658895731, - "learning_rate": 0.00019999424461825406, - "loss": 46.0, - "step": 44677 - }, - { - "epoch": 3.4159451038859263, - "grad_norm": 0.0013490165583789349, - "learning_rate": 0.0001999942443605519, - "loss": 46.0, - "step": 44678 - }, - { - "epoch": 3.416021560869316, - "grad_norm": 0.0005813298048451543, - "learning_rate": 0.000199994244102844, - "loss": 46.0, - "step": 44679 - }, - { - "epoch": 3.416098017852706, - "grad_norm": 0.002728705294430256, - "learning_rate": 0.0001999942438451303, - "loss": 46.0, - "step": 44680 - }, - { - "epoch": 3.416174474836095, - "grad_norm": 0.00444404873996973, - "learning_rate": 0.00019999424358741084, - "loss": 46.0, - "step": 44681 - }, - { - "epoch": 3.416250931819485, - "grad_norm": 0.008355891332030296, - "learning_rate": 0.0001999942433296856, - "loss": 46.0, - "step": 44682 - }, - { - "epoch": 3.4163273888028747, - "grad_norm": 0.0020598899573087692, - "learning_rate": 0.00019999424307195462, - "loss": 46.0, - "step": 44683 - }, - { - "epoch": 3.4164038457862644, - "grad_norm": 0.0038032501470297575, - "learning_rate": 0.00019999424281421787, - "loss": 46.0, - "step": 44684 - }, - { - "epoch": 3.416480302769654, - "grad_norm": 0.0028091317508369684, - "learning_rate": 0.0001999942425564753, - "loss": 46.0, - "step": 44685 - }, - { - "epoch": 3.416556759753044, - "grad_norm": 0.001817409647628665, - "learning_rate": 0.00019999424229872703, - "loss": 46.0, - "step": 44686 - }, - { - "epoch": 3.4166332167364337, - "grad_norm": 0.0021161246113479137, - "learning_rate": 0.00019999424204097298, - "loss": 46.0, - "step": 44687 - }, - { - "epoch": 3.4167096737198235, - "grad_norm": 0.0016338556306436658, - "learning_rate": 0.0001999942417832131, - "loss": 46.0, - "step": 44688 - }, - { - "epoch": 3.4167861307032132, - "grad_norm": 0.00104104436468333, - "learning_rate": 0.0001999942415254475, - "loss": 46.0, - "step": 44689 - }, - { - "epoch": 3.416862587686603, - "grad_norm": 0.0007239144179038703, - "learning_rate": 0.0001999942412676761, - "loss": 46.0, - "step": 44690 - }, - { - "epoch": 3.4169390446699923, - "grad_norm": 0.002731866203248501, - "learning_rate": 0.000199994241009899, - "loss": 46.0, - "step": 44691 - }, - { - "epoch": 3.417015501653382, - "grad_norm": 0.0031095221638679504, - "learning_rate": 0.00019999424075211605, - "loss": 46.0, - "step": 44692 - }, - { - "epoch": 3.417091958636772, - "grad_norm": 0.0027906717732548714, - "learning_rate": 0.00019999424049432736, - "loss": 46.0, - "step": 44693 - }, - { - "epoch": 3.4171684156201616, - "grad_norm": 0.0021964660845696926, - "learning_rate": 0.0001999942402365329, - "loss": 46.0, - "step": 44694 - }, - { - "epoch": 3.4172448726035514, - "grad_norm": 0.0027778425719588995, - "learning_rate": 0.00019999423997873268, - "loss": 46.0, - "step": 44695 - }, - { - "epoch": 3.417321329586941, - "grad_norm": 0.002277670195326209, - "learning_rate": 0.0001999942397209267, - "loss": 46.0, - "step": 44696 - }, - { - "epoch": 3.417397786570331, - "grad_norm": 0.0007492602453567088, - "learning_rate": 0.00019999423946311494, - "loss": 46.0, - "step": 44697 - }, - { - "epoch": 3.4174742435537206, - "grad_norm": 0.0025637235958129168, - "learning_rate": 0.0001999942392052974, - "loss": 46.0, - "step": 44698 - }, - { - "epoch": 3.4175507005371104, - "grad_norm": 0.0014416198246181011, - "learning_rate": 0.00019999423894747408, - "loss": 46.0, - "step": 44699 - }, - { - "epoch": 3.4176271575205, - "grad_norm": 0.0031861397437751293, - "learning_rate": 0.00019999423868964503, - "loss": 46.0, - "step": 44700 - }, - { - "epoch": 3.41770361450389, - "grad_norm": 0.002186631551012397, - "learning_rate": 0.00019999423843181018, - "loss": 46.0, - "step": 44701 - }, - { - "epoch": 3.4177800714872797, - "grad_norm": 0.0021601759362965822, - "learning_rate": 0.00019999423817396958, - "loss": 46.0, - "step": 44702 - }, - { - "epoch": 3.417856528470669, - "grad_norm": 0.0018157935701310635, - "learning_rate": 0.00019999423791612321, - "loss": 46.0, - "step": 44703 - }, - { - "epoch": 3.4179329854540588, - "grad_norm": 0.002842133166268468, - "learning_rate": 0.00019999423765827104, - "loss": 46.0, - "step": 44704 - }, - { - "epoch": 3.4180094424374485, - "grad_norm": 0.009291475638747215, - "learning_rate": 0.00019999423740041315, - "loss": 46.0, - "step": 44705 - }, - { - "epoch": 3.4180858994208383, - "grad_norm": 0.0016347493510693312, - "learning_rate": 0.00019999423714254946, - "loss": 46.0, - "step": 44706 - }, - { - "epoch": 3.418162356404228, - "grad_norm": 0.0031018806621432304, - "learning_rate": 0.00019999423688468, - "loss": 46.0, - "step": 44707 - }, - { - "epoch": 3.418238813387618, - "grad_norm": 0.0011205618502572179, - "learning_rate": 0.0001999942366268048, - "loss": 46.0, - "step": 44708 - }, - { - "epoch": 3.4183152703710076, - "grad_norm": 0.001417459105141461, - "learning_rate": 0.00019999423636892378, - "loss": 46.0, - "step": 44709 - }, - { - "epoch": 3.4183917273543973, - "grad_norm": 0.0018594611901789904, - "learning_rate": 0.00019999423611103702, - "loss": 46.0, - "step": 44710 - }, - { - "epoch": 3.418468184337787, - "grad_norm": 0.005445950664579868, - "learning_rate": 0.0001999942358531445, - "loss": 46.0, - "step": 44711 - }, - { - "epoch": 3.418544641321177, - "grad_norm": 0.0020416711922734976, - "learning_rate": 0.0001999942355952462, - "loss": 46.0, - "step": 44712 - }, - { - "epoch": 3.418621098304566, - "grad_norm": 0.00345365097746253, - "learning_rate": 0.0001999942353373421, - "loss": 46.0, - "step": 44713 - }, - { - "epoch": 3.418697555287956, - "grad_norm": 0.006086622830480337, - "learning_rate": 0.0001999942350794323, - "loss": 46.0, - "step": 44714 - }, - { - "epoch": 3.4187740122713457, - "grad_norm": 0.0009702304378151894, - "learning_rate": 0.0001999942348215167, - "loss": 46.0, - "step": 44715 - }, - { - "epoch": 3.4188504692547355, - "grad_norm": 0.0025691729970276356, - "learning_rate": 0.00019999423456359532, - "loss": 46.0, - "step": 44716 - }, - { - "epoch": 3.418926926238125, - "grad_norm": 0.0023943802807480097, - "learning_rate": 0.00019999423430566818, - "loss": 46.0, - "step": 44717 - }, - { - "epoch": 3.419003383221515, - "grad_norm": 0.0037359334528446198, - "learning_rate": 0.00019999423404773526, - "loss": 46.0, - "step": 44718 - }, - { - "epoch": 3.4190798402049047, - "grad_norm": 0.003570438362658024, - "learning_rate": 0.00019999423378979655, - "loss": 46.0, - "step": 44719 - }, - { - "epoch": 3.4191562971882945, - "grad_norm": 0.0025169539730995893, - "learning_rate": 0.00019999423353185208, - "loss": 46.0, - "step": 44720 - }, - { - "epoch": 3.4192327541716843, - "grad_norm": 0.00238978723064065, - "learning_rate": 0.0001999942332739019, - "loss": 46.0, - "step": 44721 - }, - { - "epoch": 3.419309211155074, - "grad_norm": 0.004822245799005032, - "learning_rate": 0.0001999942330159459, - "loss": 46.0, - "step": 44722 - }, - { - "epoch": 3.419385668138464, - "grad_norm": 0.001097759697586298, - "learning_rate": 0.00019999423275798413, - "loss": 46.0, - "step": 44723 - }, - { - "epoch": 3.4194621251218535, - "grad_norm": 0.003434795653447509, - "learning_rate": 0.0001999942325000166, - "loss": 46.0, - "step": 44724 - }, - { - "epoch": 3.419538582105243, - "grad_norm": 0.002722394187003374, - "learning_rate": 0.0001999942322420433, - "loss": 46.0, - "step": 44725 - }, - { - "epoch": 3.4196150390886326, - "grad_norm": 0.0025237184017896652, - "learning_rate": 0.00019999423198406425, - "loss": 46.0, - "step": 44726 - }, - { - "epoch": 3.4196914960720224, - "grad_norm": 0.002259512199088931, - "learning_rate": 0.0001999942317260794, - "loss": 46.0, - "step": 44727 - }, - { - "epoch": 3.419767953055412, - "grad_norm": 0.001269186963327229, - "learning_rate": 0.0001999942314680888, - "loss": 46.0, - "step": 44728 - }, - { - "epoch": 3.419844410038802, - "grad_norm": 0.0028657715301960707, - "learning_rate": 0.0001999942312100924, - "loss": 46.0, - "step": 44729 - }, - { - "epoch": 3.4199208670221917, - "grad_norm": 0.005401701666414738, - "learning_rate": 0.00019999423095209027, - "loss": 46.0, - "step": 44730 - }, - { - "epoch": 3.4199973240055814, - "grad_norm": 0.005532996263355017, - "learning_rate": 0.00019999423069408235, - "loss": 46.0, - "step": 44731 - }, - { - "epoch": 3.420073780988971, - "grad_norm": 0.0012326058931648731, - "learning_rate": 0.0001999942304360687, - "loss": 46.0, - "step": 44732 - }, - { - "epoch": 3.420150237972361, - "grad_norm": 0.0010215356014668941, - "learning_rate": 0.00019999423017804922, - "loss": 46.0, - "step": 44733 - }, - { - "epoch": 3.4202266949557507, - "grad_norm": 0.0005585872568190098, - "learning_rate": 0.00019999422992002402, - "loss": 46.0, - "step": 44734 - }, - { - "epoch": 3.42030315193914, - "grad_norm": 0.004630609881132841, - "learning_rate": 0.000199994229661993, - "loss": 46.0, - "step": 44735 - }, - { - "epoch": 3.42037960892253, - "grad_norm": 0.0015549033414572477, - "learning_rate": 0.00019999422940395625, - "loss": 46.0, - "step": 44736 - }, - { - "epoch": 3.4204560659059196, - "grad_norm": 0.000666903390083462, - "learning_rate": 0.00019999422914591372, - "loss": 46.0, - "step": 44737 - }, - { - "epoch": 3.4205325228893093, - "grad_norm": 0.0011284883366897702, - "learning_rate": 0.00019999422888786544, - "loss": 46.0, - "step": 44738 - }, - { - "epoch": 3.420608979872699, - "grad_norm": 0.00099844834767282, - "learning_rate": 0.00019999422862981137, - "loss": 46.0, - "step": 44739 - }, - { - "epoch": 3.420685436856089, - "grad_norm": 0.0007214226061478257, - "learning_rate": 0.00019999422837175152, - "loss": 46.0, - "step": 44740 - }, - { - "epoch": 3.4207618938394786, - "grad_norm": 0.0020479015074670315, - "learning_rate": 0.00019999422811368592, - "loss": 46.0, - "step": 44741 - }, - { - "epoch": 3.4208383508228684, - "grad_norm": 0.002117303665727377, - "learning_rate": 0.00019999422785561458, - "loss": 46.0, - "step": 44742 - }, - { - "epoch": 3.420914807806258, - "grad_norm": 0.003740879939869046, - "learning_rate": 0.0001999942275975374, - "loss": 46.0, - "step": 44743 - }, - { - "epoch": 3.420991264789648, - "grad_norm": 0.0028734600637108088, - "learning_rate": 0.0001999942273394545, - "loss": 46.0, - "step": 44744 - }, - { - "epoch": 3.4210677217730376, - "grad_norm": 0.0013127618003636599, - "learning_rate": 0.0001999942270813658, - "loss": 46.0, - "step": 44745 - }, - { - "epoch": 3.4211441787564274, - "grad_norm": 0.0029794976580888033, - "learning_rate": 0.00019999422682327137, - "loss": 46.0, - "step": 44746 - }, - { - "epoch": 3.4212206357398167, - "grad_norm": 0.0022585249971598387, - "learning_rate": 0.00019999422656517116, - "loss": 46.0, - "step": 44747 - }, - { - "epoch": 3.4212970927232065, - "grad_norm": 0.0015519445296376944, - "learning_rate": 0.00019999422630706518, - "loss": 46.0, - "step": 44748 - }, - { - "epoch": 3.4213735497065962, - "grad_norm": 0.0006429665954783559, - "learning_rate": 0.0001999942260489534, - "loss": 46.0, - "step": 44749 - }, - { - "epoch": 3.421450006689986, - "grad_norm": 0.0015220160130411386, - "learning_rate": 0.00019999422579083587, - "loss": 46.0, - "step": 44750 - }, - { - "epoch": 3.4215264636733758, - "grad_norm": 0.005609241779893637, - "learning_rate": 0.0001999942255327126, - "loss": 46.0, - "step": 44751 - }, - { - "epoch": 3.4216029206567655, - "grad_norm": 0.0028223353438079357, - "learning_rate": 0.00019999422527458354, - "loss": 46.0, - "step": 44752 - }, - { - "epoch": 3.4216793776401553, - "grad_norm": 0.0022195610217750072, - "learning_rate": 0.0001999942250164487, - "loss": 46.0, - "step": 44753 - }, - { - "epoch": 3.421755834623545, - "grad_norm": 0.001166786765679717, - "learning_rate": 0.00019999422475830807, - "loss": 46.0, - "step": 44754 - }, - { - "epoch": 3.421832291606935, - "grad_norm": 0.0013321212027221918, - "learning_rate": 0.00019999422450016173, - "loss": 46.0, - "step": 44755 - }, - { - "epoch": 3.421908748590324, - "grad_norm": 0.0020120050758123398, - "learning_rate": 0.0001999942242420096, - "loss": 46.0, - "step": 44756 - }, - { - "epoch": 3.421985205573714, - "grad_norm": 0.0029778610914945602, - "learning_rate": 0.00019999422398385167, - "loss": 46.0, - "step": 44757 - }, - { - "epoch": 3.4220616625571036, - "grad_norm": 0.0008817972266115248, - "learning_rate": 0.00019999422372568798, - "loss": 46.0, - "step": 44758 - }, - { - "epoch": 3.4221381195404934, - "grad_norm": 0.0013322122395038605, - "learning_rate": 0.00019999422346751855, - "loss": 46.0, - "step": 44759 - }, - { - "epoch": 3.422214576523883, - "grad_norm": 0.001070545520633459, - "learning_rate": 0.0001999942232093433, - "loss": 46.0, - "step": 44760 - }, - { - "epoch": 3.422291033507273, - "grad_norm": 0.0025846741627901793, - "learning_rate": 0.00019999422295116233, - "loss": 46.0, - "step": 44761 - }, - { - "epoch": 3.4223674904906627, - "grad_norm": 0.0013964733807370067, - "learning_rate": 0.00019999422269297557, - "loss": 46.0, - "step": 44762 - }, - { - "epoch": 3.4224439474740525, - "grad_norm": 0.0014243458863347769, - "learning_rate": 0.00019999422243478305, - "loss": 46.0, - "step": 44763 - }, - { - "epoch": 3.422520404457442, - "grad_norm": 0.0011035765055567026, - "learning_rate": 0.00019999422217658474, - "loss": 46.0, - "step": 44764 - }, - { - "epoch": 3.422596861440832, - "grad_norm": 0.00795206893235445, - "learning_rate": 0.0001999942219183807, - "loss": 46.0, - "step": 44765 - }, - { - "epoch": 3.4226733184242217, - "grad_norm": 0.0024507150519639254, - "learning_rate": 0.00019999422166017085, - "loss": 46.0, - "step": 44766 - }, - { - "epoch": 3.4227497754076115, - "grad_norm": 0.0008149688947014511, - "learning_rate": 0.00019999422140195525, - "loss": 46.0, - "step": 44767 - }, - { - "epoch": 3.4228262323910013, - "grad_norm": 0.0021263097878545523, - "learning_rate": 0.00019999422114373388, - "loss": 46.0, - "step": 44768 - }, - { - "epoch": 3.4229026893743906, - "grad_norm": 0.0009419882553629577, - "learning_rate": 0.00019999422088550674, - "loss": 46.0, - "step": 44769 - }, - { - "epoch": 3.4229791463577803, - "grad_norm": 0.001690485281869769, - "learning_rate": 0.00019999422062727385, - "loss": 46.0, - "step": 44770 - }, - { - "epoch": 3.42305560334117, - "grad_norm": 0.001407820382155478, - "learning_rate": 0.00019999422036903516, - "loss": 46.0, - "step": 44771 - }, - { - "epoch": 3.42313206032456, - "grad_norm": 0.0022869319655001163, - "learning_rate": 0.00019999422011079073, - "loss": 46.0, - "step": 44772 - }, - { - "epoch": 3.4232085173079496, - "grad_norm": 0.001174435019493103, - "learning_rate": 0.0001999942198525405, - "loss": 46.0, - "step": 44773 - }, - { - "epoch": 3.4232849742913394, - "grad_norm": 0.002835790626704693, - "learning_rate": 0.0001999942195942845, - "loss": 46.0, - "step": 44774 - }, - { - "epoch": 3.423361431274729, - "grad_norm": 0.0013584523694589734, - "learning_rate": 0.00019999421933602276, - "loss": 46.0, - "step": 44775 - }, - { - "epoch": 3.423437888258119, - "grad_norm": 0.00241549639031291, - "learning_rate": 0.00019999421907775523, - "loss": 46.0, - "step": 44776 - }, - { - "epoch": 3.4235143452415087, - "grad_norm": 0.0012261769734323025, - "learning_rate": 0.00019999421881948193, - "loss": 46.0, - "step": 44777 - }, - { - "epoch": 3.423590802224898, - "grad_norm": 0.0022929199039936066, - "learning_rate": 0.00019999421856120288, - "loss": 46.0, - "step": 44778 - }, - { - "epoch": 3.4236672592082877, - "grad_norm": 0.0013775007100775838, - "learning_rate": 0.00019999421830291806, - "loss": 46.0, - "step": 44779 - }, - { - "epoch": 3.4237437161916775, - "grad_norm": 0.0021777483634650707, - "learning_rate": 0.00019999421804462743, - "loss": 46.0, - "step": 44780 - }, - { - "epoch": 3.4238201731750673, - "grad_norm": 0.0031727158930152655, - "learning_rate": 0.00019999421778633107, - "loss": 46.0, - "step": 44781 - }, - { - "epoch": 3.423896630158457, - "grad_norm": 0.0007171485922299325, - "learning_rate": 0.00019999421752802892, - "loss": 46.0, - "step": 44782 - }, - { - "epoch": 3.423973087141847, - "grad_norm": 0.0013663237914443016, - "learning_rate": 0.00019999421726972104, - "loss": 46.0, - "step": 44783 - }, - { - "epoch": 3.4240495441252365, - "grad_norm": 0.0032893188763409853, - "learning_rate": 0.00019999421701140735, - "loss": 46.0, - "step": 44784 - }, - { - "epoch": 3.4241260011086263, - "grad_norm": 0.001893207780085504, - "learning_rate": 0.0001999942167530879, - "loss": 46.0, - "step": 44785 - }, - { - "epoch": 3.424202458092016, - "grad_norm": 0.00474650738760829, - "learning_rate": 0.0001999942164947627, - "loss": 46.0, - "step": 44786 - }, - { - "epoch": 3.424278915075406, - "grad_norm": 0.001162675442174077, - "learning_rate": 0.0001999942162364317, - "loss": 46.0, - "step": 44787 - }, - { - "epoch": 3.4243553720587956, - "grad_norm": 0.0033233659341931343, - "learning_rate": 0.00019999421597809497, - "loss": 46.0, - "step": 44788 - }, - { - "epoch": 3.4244318290421853, - "grad_norm": 0.0041215987876057625, - "learning_rate": 0.00019999421571975244, - "loss": 46.0, - "step": 44789 - }, - { - "epoch": 3.424508286025575, - "grad_norm": 0.001480935956351459, - "learning_rate": 0.00019999421546140417, - "loss": 46.0, - "step": 44790 - }, - { - "epoch": 3.4245847430089644, - "grad_norm": 0.0006438368582166731, - "learning_rate": 0.0001999942152030501, - "loss": 46.0, - "step": 44791 - }, - { - "epoch": 3.424661199992354, - "grad_norm": 0.008541252464056015, - "learning_rate": 0.00019999421494469024, - "loss": 46.0, - "step": 44792 - }, - { - "epoch": 3.424737656975744, - "grad_norm": 0.002177627757191658, - "learning_rate": 0.00019999421468632465, - "loss": 46.0, - "step": 44793 - }, - { - "epoch": 3.4248141139591337, - "grad_norm": 0.002214984502643347, - "learning_rate": 0.00019999421442795328, - "loss": 46.0, - "step": 44794 - }, - { - "epoch": 3.4248905709425235, - "grad_norm": 0.0017491765320301056, - "learning_rate": 0.00019999421416957617, - "loss": 46.0, - "step": 44795 - }, - { - "epoch": 3.4249670279259132, - "grad_norm": 0.0004604731220752001, - "learning_rate": 0.00019999421391119325, - "loss": 46.0, - "step": 44796 - }, - { - "epoch": 3.425043484909303, - "grad_norm": 0.0019555080216377974, - "learning_rate": 0.00019999421365280456, - "loss": 46.0, - "step": 44797 - }, - { - "epoch": 3.4251199418926928, - "grad_norm": 0.0009428462362848222, - "learning_rate": 0.0001999942133944101, - "loss": 46.0, - "step": 44798 - }, - { - "epoch": 3.4251963988760825, - "grad_norm": 0.005895137321203947, - "learning_rate": 0.00019999421313600992, - "loss": 46.0, - "step": 44799 - }, - { - "epoch": 3.425272855859472, - "grad_norm": 0.005798183847218752, - "learning_rate": 0.00019999421287760391, - "loss": 46.0, - "step": 44800 - }, - { - "epoch": 3.4253493128428616, - "grad_norm": 0.0016766665503382683, - "learning_rate": 0.0001999942126191922, - "loss": 46.0, - "step": 44801 - }, - { - "epoch": 3.4254257698262514, - "grad_norm": 0.0014669087249785662, - "learning_rate": 0.00019999421236077466, - "loss": 46.0, - "step": 44802 - }, - { - "epoch": 3.425502226809641, - "grad_norm": 0.004316451493650675, - "learning_rate": 0.00019999421210235136, - "loss": 46.0, - "step": 44803 - }, - { - "epoch": 3.425578683793031, - "grad_norm": 0.003676827298477292, - "learning_rate": 0.0001999942118439223, - "loss": 46.0, - "step": 44804 - }, - { - "epoch": 3.4256551407764206, - "grad_norm": 0.0019010331016033888, - "learning_rate": 0.0001999942115854875, - "loss": 46.0, - "step": 44805 - }, - { - "epoch": 3.4257315977598104, - "grad_norm": 0.003554445458576083, - "learning_rate": 0.0001999942113270469, - "loss": 46.0, - "step": 44806 - }, - { - "epoch": 3.4258080547432, - "grad_norm": 0.00315917213447392, - "learning_rate": 0.00019999421106860053, - "loss": 46.0, - "step": 44807 - }, - { - "epoch": 3.42588451172659, - "grad_norm": 0.0024693512823432684, - "learning_rate": 0.0001999942108101484, - "loss": 46.0, - "step": 44808 - }, - { - "epoch": 3.4259609687099797, - "grad_norm": 0.002221296541392803, - "learning_rate": 0.00019999421055169047, - "loss": 46.0, - "step": 44809 - }, - { - "epoch": 3.4260374256933694, - "grad_norm": 0.0034316994715481997, - "learning_rate": 0.0001999942102932268, - "loss": 46.0, - "step": 44810 - }, - { - "epoch": 3.426113882676759, - "grad_norm": 0.007961695082485676, - "learning_rate": 0.00019999421003475735, - "loss": 46.0, - "step": 44811 - }, - { - "epoch": 3.4261903396601485, - "grad_norm": 0.0018990971148014069, - "learning_rate": 0.00019999420977628214, - "loss": 46.0, - "step": 44812 - }, - { - "epoch": 3.4262667966435383, - "grad_norm": 0.0010237082606181502, - "learning_rate": 0.00019999420951780116, - "loss": 46.0, - "step": 44813 - }, - { - "epoch": 3.426343253626928, - "grad_norm": 0.0014058739179745317, - "learning_rate": 0.00019999420925931444, - "loss": 46.0, - "step": 44814 - }, - { - "epoch": 3.426419710610318, - "grad_norm": 0.0014993497170507908, - "learning_rate": 0.0001999942090008219, - "loss": 46.0, - "step": 44815 - }, - { - "epoch": 3.4264961675937076, - "grad_norm": 0.0019525452516973019, - "learning_rate": 0.0001999942087423236, - "loss": 46.0, - "step": 44816 - }, - { - "epoch": 3.4265726245770973, - "grad_norm": 0.0034172164741903543, - "learning_rate": 0.00019999420848381956, - "loss": 46.0, - "step": 44817 - }, - { - "epoch": 3.426649081560487, - "grad_norm": 0.0019351268419995904, - "learning_rate": 0.0001999942082253097, - "loss": 46.0, - "step": 44818 - }, - { - "epoch": 3.426725538543877, - "grad_norm": 0.0013878019526600838, - "learning_rate": 0.00019999420796679415, - "loss": 46.0, - "step": 44819 - }, - { - "epoch": 3.4268019955272666, - "grad_norm": 0.0013011184055358171, - "learning_rate": 0.00019999420770827278, - "loss": 46.0, - "step": 44820 - }, - { - "epoch": 3.4268784525106564, - "grad_norm": 0.0020521623082458973, - "learning_rate": 0.0001999942074497456, - "loss": 46.0, - "step": 44821 - }, - { - "epoch": 3.4269549094940457, - "grad_norm": 0.0038942224346101284, - "learning_rate": 0.00019999420719121272, - "loss": 46.0, - "step": 44822 - }, - { - "epoch": 3.4270313664774354, - "grad_norm": 0.0015740522649139166, - "learning_rate": 0.00019999420693267406, - "loss": 46.0, - "step": 44823 - }, - { - "epoch": 3.427107823460825, - "grad_norm": 0.0023642228916287422, - "learning_rate": 0.0001999942066741296, - "loss": 46.0, - "step": 44824 - }, - { - "epoch": 3.427184280444215, - "grad_norm": 0.002113768830895424, - "learning_rate": 0.0001999942064155794, - "loss": 46.0, - "step": 44825 - }, - { - "epoch": 3.4272607374276047, - "grad_norm": 0.004363631829619408, - "learning_rate": 0.00019999420615702341, - "loss": 46.0, - "step": 44826 - }, - { - "epoch": 3.4273371944109945, - "grad_norm": 0.0054193767718970776, - "learning_rate": 0.00019999420589846166, - "loss": 46.0, - "step": 44827 - }, - { - "epoch": 3.4274136513943843, - "grad_norm": 0.0036532417871057987, - "learning_rate": 0.00019999420563989416, - "loss": 46.0, - "step": 44828 - }, - { - "epoch": 3.427490108377774, - "grad_norm": 0.0021726256236433983, - "learning_rate": 0.00019999420538132086, - "loss": 46.0, - "step": 44829 - }, - { - "epoch": 3.4275665653611638, - "grad_norm": 0.0025024053175002337, - "learning_rate": 0.00019999420512274184, - "loss": 46.0, - "step": 44830 - }, - { - "epoch": 3.4276430223445535, - "grad_norm": 0.0018850157503038645, - "learning_rate": 0.000199994204864157, - "loss": 46.0, - "step": 44831 - }, - { - "epoch": 3.4277194793279433, - "grad_norm": 0.00663653202354908, - "learning_rate": 0.0001999942046055664, - "loss": 46.0, - "step": 44832 - }, - { - "epoch": 3.427795936311333, - "grad_norm": 0.0015489644138142467, - "learning_rate": 0.00019999420434697003, - "loss": 46.0, - "step": 44833 - }, - { - "epoch": 3.4278723932947224, - "grad_norm": 0.001355155953206122, - "learning_rate": 0.00019999420408836792, - "loss": 46.0, - "step": 44834 - }, - { - "epoch": 3.427948850278112, - "grad_norm": 0.0014475092757493258, - "learning_rate": 0.00019999420382976, - "loss": 46.0, - "step": 44835 - }, - { - "epoch": 3.428025307261502, - "grad_norm": 0.003343346295878291, - "learning_rate": 0.00019999420357114632, - "loss": 46.0, - "step": 44836 - }, - { - "epoch": 3.4281017642448917, - "grad_norm": 0.0020684038754552603, - "learning_rate": 0.00019999420331252689, - "loss": 46.0, - "step": 44837 - }, - { - "epoch": 3.4281782212282814, - "grad_norm": 0.0009632062283344567, - "learning_rate": 0.00019999420305390165, - "loss": 46.0, - "step": 44838 - }, - { - "epoch": 3.428254678211671, - "grad_norm": 0.001289335428737104, - "learning_rate": 0.0001999942027952707, - "loss": 46.0, - "step": 44839 - }, - { - "epoch": 3.428331135195061, - "grad_norm": 0.004934332799166441, - "learning_rate": 0.00019999420253663397, - "loss": 46.0, - "step": 44840 - }, - { - "epoch": 3.4284075921784507, - "grad_norm": 0.001309933140873909, - "learning_rate": 0.00019999420227799145, - "loss": 46.0, - "step": 44841 - }, - { - "epoch": 3.4284840491618405, - "grad_norm": 0.001965321134775877, - "learning_rate": 0.00019999420201934314, - "loss": 46.0, - "step": 44842 - }, - { - "epoch": 3.4285605061452302, - "grad_norm": 0.0014687597285956144, - "learning_rate": 0.0001999942017606891, - "loss": 46.0, - "step": 44843 - }, - { - "epoch": 3.4286369631286195, - "grad_norm": 0.002811191137880087, - "learning_rate": 0.00019999420150202925, - "loss": 46.0, - "step": 44844 - }, - { - "epoch": 3.4287134201120093, - "grad_norm": 0.0068966723047196865, - "learning_rate": 0.00019999420124336369, - "loss": 46.0, - "step": 44845 - }, - { - "epoch": 3.428789877095399, - "grad_norm": 0.0012438022531569004, - "learning_rate": 0.00019999420098469232, - "loss": 46.0, - "step": 44846 - }, - { - "epoch": 3.428866334078789, - "grad_norm": 0.001511577283963561, - "learning_rate": 0.0001999942007260152, - "loss": 46.0, - "step": 44847 - }, - { - "epoch": 3.4289427910621786, - "grad_norm": 0.0023268430959433317, - "learning_rate": 0.0001999942004673323, - "loss": 46.0, - "step": 44848 - }, - { - "epoch": 3.4290192480455683, - "grad_norm": 0.00422036973759532, - "learning_rate": 0.0001999942002086436, - "loss": 46.0, - "step": 44849 - }, - { - "epoch": 3.429095705028958, - "grad_norm": 0.0008190243388526142, - "learning_rate": 0.0001999941999499492, - "loss": 46.0, - "step": 44850 - }, - { - "epoch": 3.429172162012348, - "grad_norm": 0.003018898656591773, - "learning_rate": 0.00019999419969124897, - "loss": 46.0, - "step": 44851 - }, - { - "epoch": 3.4292486189957376, - "grad_norm": 0.002889312105253339, - "learning_rate": 0.00019999419943254299, - "loss": 46.0, - "step": 44852 - }, - { - "epoch": 3.4293250759791274, - "grad_norm": 0.0014641005545854568, - "learning_rate": 0.00019999419917383123, - "loss": 46.0, - "step": 44853 - }, - { - "epoch": 3.429401532962517, - "grad_norm": 0.0007817661389708519, - "learning_rate": 0.00019999419891511373, - "loss": 46.0, - "step": 44854 - }, - { - "epoch": 3.429477989945907, - "grad_norm": 0.003971664234995842, - "learning_rate": 0.00019999419865639046, - "loss": 46.0, - "step": 44855 - }, - { - "epoch": 3.4295544469292962, - "grad_norm": 0.0030584794003516436, - "learning_rate": 0.0001999941983976614, - "loss": 46.0, - "step": 44856 - }, - { - "epoch": 3.429630903912686, - "grad_norm": 0.00611590500921011, - "learning_rate": 0.00019999419813892657, - "loss": 46.0, - "step": 44857 - }, - { - "epoch": 3.4297073608960758, - "grad_norm": 0.005710372235625982, - "learning_rate": 0.00019999419788018597, - "loss": 46.0, - "step": 44858 - }, - { - "epoch": 3.4297838178794655, - "grad_norm": 0.001975302817299962, - "learning_rate": 0.00019999419762143964, - "loss": 46.0, - "step": 44859 - }, - { - "epoch": 3.4298602748628553, - "grad_norm": 0.0008019822416827083, - "learning_rate": 0.0001999941973626875, - "loss": 46.0, - "step": 44860 - }, - { - "epoch": 3.429936731846245, - "grad_norm": 0.0006031130324117839, - "learning_rate": 0.00019999419710392958, - "loss": 46.0, - "step": 44861 - }, - { - "epoch": 3.430013188829635, - "grad_norm": 0.0056520369835197926, - "learning_rate": 0.00019999419684516592, - "loss": 46.0, - "step": 44862 - }, - { - "epoch": 3.4300896458130246, - "grad_norm": 0.0022835610434412956, - "learning_rate": 0.0001999941965863965, - "loss": 46.0, - "step": 44863 - }, - { - "epoch": 3.4301661027964143, - "grad_norm": 0.0011921918485313654, - "learning_rate": 0.00019999419632762128, - "loss": 46.0, - "step": 44864 - }, - { - "epoch": 3.430242559779804, - "grad_norm": 0.0016698911786079407, - "learning_rate": 0.00019999419606884028, - "loss": 46.0, - "step": 44865 - }, - { - "epoch": 3.4303190167631934, - "grad_norm": 0.001507268287241459, - "learning_rate": 0.00019999419581005355, - "loss": 46.0, - "step": 44866 - }, - { - "epoch": 3.430395473746583, - "grad_norm": 0.005574206355959177, - "learning_rate": 0.00019999419555126105, - "loss": 46.0, - "step": 44867 - }, - { - "epoch": 3.430471930729973, - "grad_norm": 0.0025285673327744007, - "learning_rate": 0.00019999419529246275, - "loss": 46.0, - "step": 44868 - }, - { - "epoch": 3.4305483877133627, - "grad_norm": 0.001006459933705628, - "learning_rate": 0.0001999941950336587, - "loss": 46.0, - "step": 44869 - }, - { - "epoch": 3.4306248446967524, - "grad_norm": 0.0034164192620664835, - "learning_rate": 0.0001999941947748489, - "loss": 46.0, - "step": 44870 - }, - { - "epoch": 3.430701301680142, - "grad_norm": 0.0015245262766256928, - "learning_rate": 0.0001999941945160333, - "loss": 46.0, - "step": 44871 - }, - { - "epoch": 3.430777758663532, - "grad_norm": 0.0018647368997335434, - "learning_rate": 0.00019999419425721193, - "loss": 46.0, - "step": 44872 - }, - { - "epoch": 3.4308542156469217, - "grad_norm": 0.001559597090817988, - "learning_rate": 0.00019999419399838482, - "loss": 46.0, - "step": 44873 - }, - { - "epoch": 3.4309306726303115, - "grad_norm": 0.0021349964663386345, - "learning_rate": 0.00019999419373955193, - "loss": 46.0, - "step": 44874 - }, - { - "epoch": 3.4310071296137012, - "grad_norm": 0.00545212347060442, - "learning_rate": 0.00019999419348071325, - "loss": 46.0, - "step": 44875 - }, - { - "epoch": 3.431083586597091, - "grad_norm": 0.0021754009649157524, - "learning_rate": 0.00019999419322186884, - "loss": 46.0, - "step": 44876 - }, - { - "epoch": 3.4311600435804808, - "grad_norm": 0.003279255935922265, - "learning_rate": 0.0001999941929630186, - "loss": 46.0, - "step": 44877 - }, - { - "epoch": 3.43123650056387, - "grad_norm": 0.0017380501376464963, - "learning_rate": 0.00019999419270416265, - "loss": 46.0, - "step": 44878 - }, - { - "epoch": 3.43131295754726, - "grad_norm": 0.0005144042079336941, - "learning_rate": 0.0001999941924453009, - "loss": 46.0, - "step": 44879 - }, - { - "epoch": 3.4313894145306496, - "grad_norm": 0.0023301737383008003, - "learning_rate": 0.00019999419218643338, - "loss": 46.0, - "step": 44880 - }, - { - "epoch": 3.4314658715140394, - "grad_norm": 0.0019882656633853912, - "learning_rate": 0.0001999941919275601, - "loss": 46.0, - "step": 44881 - }, - { - "epoch": 3.431542328497429, - "grad_norm": 0.0025007580406963825, - "learning_rate": 0.00019999419166868106, - "loss": 46.0, - "step": 44882 - }, - { - "epoch": 3.431618785480819, - "grad_norm": 0.0036483160220086575, - "learning_rate": 0.00019999419140979624, - "loss": 46.0, - "step": 44883 - }, - { - "epoch": 3.4316952424642087, - "grad_norm": 0.0036548005882650614, - "learning_rate": 0.00019999419115090565, - "loss": 46.0, - "step": 44884 - }, - { - "epoch": 3.4317716994475984, - "grad_norm": 0.0011429466539993882, - "learning_rate": 0.00019999419089200928, - "loss": 46.0, - "step": 44885 - }, - { - "epoch": 3.431848156430988, - "grad_norm": 0.0007388408412225544, - "learning_rate": 0.00019999419063310717, - "loss": 46.0, - "step": 44886 - }, - { - "epoch": 3.4319246134143775, - "grad_norm": 0.004041592590510845, - "learning_rate": 0.00019999419037419926, - "loss": 46.0, - "step": 44887 - }, - { - "epoch": 3.4320010703977673, - "grad_norm": 0.0011919206008315086, - "learning_rate": 0.0001999941901152856, - "loss": 46.0, - "step": 44888 - }, - { - "epoch": 3.432077527381157, - "grad_norm": 0.0030115016270428896, - "learning_rate": 0.00019999418985636617, - "loss": 46.0, - "step": 44889 - }, - { - "epoch": 3.4321539843645468, - "grad_norm": 0.007465067785233259, - "learning_rate": 0.00019999418959744096, - "loss": 46.0, - "step": 44890 - }, - { - "epoch": 3.4322304413479365, - "grad_norm": 0.0019680482801049948, - "learning_rate": 0.00019999418933851, - "loss": 46.0, - "step": 44891 - }, - { - "epoch": 3.4323068983313263, - "grad_norm": 0.0009815064258873463, - "learning_rate": 0.00019999418907957326, - "loss": 46.0, - "step": 44892 - }, - { - "epoch": 3.432383355314716, - "grad_norm": 0.0015862572472542524, - "learning_rate": 0.00019999418882063076, - "loss": 46.0, - "step": 44893 - }, - { - "epoch": 3.432459812298106, - "grad_norm": 0.0047988081350922585, - "learning_rate": 0.00019999418856168246, - "loss": 46.0, - "step": 44894 - }, - { - "epoch": 3.4325362692814956, - "grad_norm": 0.0013331896625459194, - "learning_rate": 0.00019999418830272841, - "loss": 46.0, - "step": 44895 - }, - { - "epoch": 3.4326127262648853, - "grad_norm": 0.0014283873606473207, - "learning_rate": 0.0001999941880437686, - "loss": 46.0, - "step": 44896 - }, - { - "epoch": 3.432689183248275, - "grad_norm": 0.0035420330241322517, - "learning_rate": 0.000199994187784803, - "loss": 46.0, - "step": 44897 - }, - { - "epoch": 3.432765640231665, - "grad_norm": 0.01783638633787632, - "learning_rate": 0.00019999418752583164, - "loss": 46.0, - "step": 44898 - }, - { - "epoch": 3.4328420972150546, - "grad_norm": 0.0004914290038868785, - "learning_rate": 0.00019999418726685453, - "loss": 46.0, - "step": 44899 - }, - { - "epoch": 3.432918554198444, - "grad_norm": 0.0025247198063880205, - "learning_rate": 0.00019999418700787161, - "loss": 46.0, - "step": 44900 - }, - { - "epoch": 3.4329950111818337, - "grad_norm": 0.0036685969680547714, - "learning_rate": 0.00019999418674888296, - "loss": 46.0, - "step": 44901 - }, - { - "epoch": 3.4330714681652235, - "grad_norm": 0.0024150358512997627, - "learning_rate": 0.00019999418648988852, - "loss": 46.0, - "step": 44902 - }, - { - "epoch": 3.433147925148613, - "grad_norm": 0.0038582610432058573, - "learning_rate": 0.00019999418623088832, - "loss": 46.0, - "step": 44903 - }, - { - "epoch": 3.433224382132003, - "grad_norm": 0.002038276055827737, - "learning_rate": 0.00019999418597188234, - "loss": 46.0, - "step": 44904 - }, - { - "epoch": 3.4333008391153927, - "grad_norm": 0.0008746012463234365, - "learning_rate": 0.00019999418571287062, - "loss": 46.0, - "step": 44905 - }, - { - "epoch": 3.4333772960987825, - "grad_norm": 0.002799250418320298, - "learning_rate": 0.00019999418545385312, - "loss": 46.0, - "step": 44906 - }, - { - "epoch": 3.4334537530821723, - "grad_norm": 0.0018621382769197226, - "learning_rate": 0.00019999418519482985, - "loss": 46.0, - "step": 44907 - }, - { - "epoch": 3.433530210065562, - "grad_norm": 0.0010429885005578399, - "learning_rate": 0.00019999418493580077, - "loss": 46.0, - "step": 44908 - }, - { - "epoch": 3.4336066670489513, - "grad_norm": 0.0016331266378983855, - "learning_rate": 0.00019999418467676596, - "loss": 46.0, - "step": 44909 - }, - { - "epoch": 3.433683124032341, - "grad_norm": 0.004243065603077412, - "learning_rate": 0.00019999418441772536, - "loss": 46.0, - "step": 44910 - }, - { - "epoch": 3.433759581015731, - "grad_norm": 0.002113878959789872, - "learning_rate": 0.00019999418415867903, - "loss": 46.0, - "step": 44911 - }, - { - "epoch": 3.4338360379991206, - "grad_norm": 0.0014938856475055218, - "learning_rate": 0.0001999941838996269, - "loss": 46.0, - "step": 44912 - }, - { - "epoch": 3.4339124949825104, - "grad_norm": 0.0018710580188781023, - "learning_rate": 0.000199994183640569, - "loss": 46.0, - "step": 44913 - }, - { - "epoch": 3.4339889519659, - "grad_norm": 0.003282850841060281, - "learning_rate": 0.00019999418338150534, - "loss": 46.0, - "step": 44914 - }, - { - "epoch": 3.43406540894929, - "grad_norm": 0.002514865715056658, - "learning_rate": 0.0001999941831224359, - "loss": 46.0, - "step": 44915 - }, - { - "epoch": 3.4341418659326797, - "grad_norm": 0.002084376523271203, - "learning_rate": 0.0001999941828633607, - "loss": 46.0, - "step": 44916 - }, - { - "epoch": 3.4342183229160694, - "grad_norm": 0.002161577809602022, - "learning_rate": 0.00019999418260427976, - "loss": 46.0, - "step": 44917 - }, - { - "epoch": 3.434294779899459, - "grad_norm": 0.0012893271632492542, - "learning_rate": 0.000199994182345193, - "loss": 46.0, - "step": 44918 - }, - { - "epoch": 3.434371236882849, - "grad_norm": 0.0011127229081466794, - "learning_rate": 0.0001999941820861005, - "loss": 46.0, - "step": 44919 - }, - { - "epoch": 3.4344476938662387, - "grad_norm": 0.0017635368276387453, - "learning_rate": 0.00019999418182700224, - "loss": 46.0, - "step": 44920 - }, - { - "epoch": 3.4345241508496285, - "grad_norm": 0.0032388526014983654, - "learning_rate": 0.00019999418156789816, - "loss": 46.0, - "step": 44921 - }, - { - "epoch": 3.434600607833018, - "grad_norm": 0.002855314640328288, - "learning_rate": 0.00019999418130878835, - "loss": 46.0, - "step": 44922 - }, - { - "epoch": 3.4346770648164076, - "grad_norm": 0.0013176175998523831, - "learning_rate": 0.00019999418104967278, - "loss": 46.0, - "step": 44923 - }, - { - "epoch": 3.4347535217997973, - "grad_norm": 0.004822809249162674, - "learning_rate": 0.00019999418079055144, - "loss": 46.0, - "step": 44924 - }, - { - "epoch": 3.434829978783187, - "grad_norm": 0.008785182610154152, - "learning_rate": 0.0001999941805314243, - "loss": 46.0, - "step": 44925 - }, - { - "epoch": 3.434906435766577, - "grad_norm": 0.000978841446340084, - "learning_rate": 0.00019999418027229142, - "loss": 46.0, - "step": 44926 - }, - { - "epoch": 3.4349828927499666, - "grad_norm": 0.0011776972096413374, - "learning_rate": 0.00019999418001315276, - "loss": 46.0, - "step": 44927 - }, - { - "epoch": 3.4350593497333564, - "grad_norm": 0.0028256208170205355, - "learning_rate": 0.0001999941797540083, - "loss": 46.0, - "step": 44928 - }, - { - "epoch": 3.435135806716746, - "grad_norm": 0.004236609674990177, - "learning_rate": 0.00019999417949485813, - "loss": 46.0, - "step": 44929 - }, - { - "epoch": 3.435212263700136, - "grad_norm": 0.001698176609352231, - "learning_rate": 0.00019999417923570215, - "loss": 46.0, - "step": 44930 - }, - { - "epoch": 3.435288720683525, - "grad_norm": 0.0009990448597818613, - "learning_rate": 0.0001999941789765404, - "loss": 46.0, - "step": 44931 - }, - { - "epoch": 3.435365177666915, - "grad_norm": 0.0025956081226468086, - "learning_rate": 0.00019999417871737293, - "loss": 46.0, - "step": 44932 - }, - { - "epoch": 3.4354416346503047, - "grad_norm": 0.0035286343190819025, - "learning_rate": 0.00019999417845819963, - "loss": 46.0, - "step": 44933 - }, - { - "epoch": 3.4355180916336945, - "grad_norm": 0.0036628209054470062, - "learning_rate": 0.00019999417819902059, - "loss": 46.0, - "step": 44934 - }, - { - "epoch": 3.4355945486170842, - "grad_norm": 0.0016416325233876705, - "learning_rate": 0.00019999417793983577, - "loss": 46.0, - "step": 44935 - }, - { - "epoch": 3.435671005600474, - "grad_norm": 0.0026466776616871357, - "learning_rate": 0.00019999417768064518, - "loss": 46.0, - "step": 44936 - }, - { - "epoch": 3.4357474625838638, - "grad_norm": 0.0018463265150785446, - "learning_rate": 0.00019999417742144884, - "loss": 46.0, - "step": 44937 - }, - { - "epoch": 3.4358239195672535, - "grad_norm": 0.0025940712075680494, - "learning_rate": 0.0001999941771622467, - "loss": 46.0, - "step": 44938 - }, - { - "epoch": 3.4359003765506433, - "grad_norm": 0.002723110606893897, - "learning_rate": 0.00019999417690303884, - "loss": 46.0, - "step": 44939 - }, - { - "epoch": 3.435976833534033, - "grad_norm": 0.0006902538589201868, - "learning_rate": 0.0001999941766438252, - "loss": 46.0, - "step": 44940 - }, - { - "epoch": 3.436053290517423, - "grad_norm": 0.0020823150407522917, - "learning_rate": 0.00019999417638460573, - "loss": 46.0, - "step": 44941 - }, - { - "epoch": 3.4361297475008126, - "grad_norm": 0.0024217634927481413, - "learning_rate": 0.00019999417612538055, - "loss": 46.0, - "step": 44942 - }, - { - "epoch": 3.436206204484202, - "grad_norm": 0.0022657637018710375, - "learning_rate": 0.00019999417586614957, - "loss": 46.0, - "step": 44943 - }, - { - "epoch": 3.4362826614675916, - "grad_norm": 0.002159512136131525, - "learning_rate": 0.00019999417560691285, - "loss": 46.0, - "step": 44944 - }, - { - "epoch": 3.4363591184509814, - "grad_norm": 0.0011793377343565226, - "learning_rate": 0.00019999417534767035, - "loss": 46.0, - "step": 44945 - }, - { - "epoch": 3.436435575434371, - "grad_norm": 0.01255052350461483, - "learning_rate": 0.00019999417508842206, - "loss": 46.0, - "step": 44946 - }, - { - "epoch": 3.436512032417761, - "grad_norm": 0.003107881173491478, - "learning_rate": 0.000199994174829168, - "loss": 46.0, - "step": 44947 - }, - { - "epoch": 3.4365884894011507, - "grad_norm": 0.015196037478744984, - "learning_rate": 0.00019999417456990822, - "loss": 46.0, - "step": 44948 - }, - { - "epoch": 3.4366649463845405, - "grad_norm": 0.0007579917437396944, - "learning_rate": 0.00019999417431064263, - "loss": 46.0, - "step": 44949 - }, - { - "epoch": 3.43674140336793, - "grad_norm": 0.002663498045876622, - "learning_rate": 0.0001999941740513713, - "loss": 46.0, - "step": 44950 - }, - { - "epoch": 3.43681786035132, - "grad_norm": 0.0014326348900794983, - "learning_rate": 0.00019999417379209416, - "loss": 46.0, - "step": 44951 - }, - { - "epoch": 3.4368943173347097, - "grad_norm": 0.004433400928974152, - "learning_rate": 0.00019999417353281127, - "loss": 46.0, - "step": 44952 - }, - { - "epoch": 3.436970774318099, - "grad_norm": 0.002390683628618717, - "learning_rate": 0.00019999417327352262, - "loss": 46.0, - "step": 44953 - }, - { - "epoch": 3.437047231301489, - "grad_norm": 0.002861342392861843, - "learning_rate": 0.00019999417301422818, - "loss": 46.0, - "step": 44954 - }, - { - "epoch": 3.4371236882848786, - "grad_norm": 0.004952490329742432, - "learning_rate": 0.00019999417275492798, - "loss": 46.0, - "step": 44955 - }, - { - "epoch": 3.4372001452682683, - "grad_norm": 0.0014386163093149662, - "learning_rate": 0.00019999417249562203, - "loss": 46.0, - "step": 44956 - }, - { - "epoch": 3.437276602251658, - "grad_norm": 0.002678332384675741, - "learning_rate": 0.0001999941722363103, - "loss": 46.0, - "step": 44957 - }, - { - "epoch": 3.437353059235048, - "grad_norm": 0.0026570528279989958, - "learning_rate": 0.00019999417197699278, - "loss": 46.0, - "step": 44958 - }, - { - "epoch": 3.4374295162184376, - "grad_norm": 0.0025048903189599514, - "learning_rate": 0.0001999941717176695, - "loss": 46.0, - "step": 44959 - }, - { - "epoch": 3.4375059732018274, - "grad_norm": 0.0020839301869273186, - "learning_rate": 0.00019999417145834047, - "loss": 46.0, - "step": 44960 - }, - { - "epoch": 3.437582430185217, - "grad_norm": 0.0009288383298553526, - "learning_rate": 0.00019999417119900565, - "loss": 46.0, - "step": 44961 - }, - { - "epoch": 3.437658887168607, - "grad_norm": 0.0017863233806565404, - "learning_rate": 0.0001999941709396651, - "loss": 46.0, - "step": 44962 - }, - { - "epoch": 3.4377353441519967, - "grad_norm": 0.0011920041870325804, - "learning_rate": 0.00019999417068031872, - "loss": 46.0, - "step": 44963 - }, - { - "epoch": 3.4378118011353864, - "grad_norm": 0.0072701722383499146, - "learning_rate": 0.00019999417042096661, - "loss": 46.0, - "step": 44964 - }, - { - "epoch": 3.4378882581187757, - "grad_norm": 0.000920695427339524, - "learning_rate": 0.00019999417016160873, - "loss": 46.0, - "step": 44965 - }, - { - "epoch": 3.4379647151021655, - "grad_norm": 0.0052647716365754604, - "learning_rate": 0.00019999416990224507, - "loss": 46.0, - "step": 44966 - }, - { - "epoch": 3.4380411720855553, - "grad_norm": 0.0010495687602087855, - "learning_rate": 0.00019999416964287564, - "loss": 46.0, - "step": 44967 - }, - { - "epoch": 3.438117629068945, - "grad_norm": 0.0015211127465590835, - "learning_rate": 0.00019999416938350044, - "loss": 46.0, - "step": 44968 - }, - { - "epoch": 3.438194086052335, - "grad_norm": 0.001882574986666441, - "learning_rate": 0.0001999941691241195, - "loss": 46.0, - "step": 44969 - }, - { - "epoch": 3.4382705430357245, - "grad_norm": 0.0014052526094019413, - "learning_rate": 0.00019999416886473274, - "loss": 46.0, - "step": 44970 - }, - { - "epoch": 3.4383470000191143, - "grad_norm": 0.004943019710481167, - "learning_rate": 0.00019999416860534027, - "loss": 46.0, - "step": 44971 - }, - { - "epoch": 3.438423457002504, - "grad_norm": 0.0022616649512201548, - "learning_rate": 0.00019999416834594197, - "loss": 46.0, - "step": 44972 - }, - { - "epoch": 3.438499913985894, - "grad_norm": 0.0026195053942501545, - "learning_rate": 0.00019999416808653796, - "loss": 46.0, - "step": 44973 - }, - { - "epoch": 3.4385763709692836, - "grad_norm": 0.0018388881580904126, - "learning_rate": 0.0001999941678271281, - "loss": 46.0, - "step": 44974 - }, - { - "epoch": 3.438652827952673, - "grad_norm": 0.005501854699105024, - "learning_rate": 0.00019999416756771258, - "loss": 46.0, - "step": 44975 - }, - { - "epoch": 3.4387292849360627, - "grad_norm": 0.001565106213092804, - "learning_rate": 0.00019999416730829121, - "loss": 46.0, - "step": 44976 - }, - { - "epoch": 3.4388057419194524, - "grad_norm": 0.0018467765767127275, - "learning_rate": 0.00019999416704886408, - "loss": 46.0, - "step": 44977 - }, - { - "epoch": 3.438882198902842, - "grad_norm": 0.0017670224187895656, - "learning_rate": 0.00019999416678943122, - "loss": 46.0, - "step": 44978 - }, - { - "epoch": 3.438958655886232, - "grad_norm": 0.0027717140037566423, - "learning_rate": 0.00019999416652999256, - "loss": 46.0, - "step": 44979 - }, - { - "epoch": 3.4390351128696217, - "grad_norm": 0.002137487754225731, - "learning_rate": 0.00019999416627054813, - "loss": 46.0, - "step": 44980 - }, - { - "epoch": 3.4391115698530115, - "grad_norm": 0.010148182511329651, - "learning_rate": 0.00019999416601109796, - "loss": 46.0, - "step": 44981 - }, - { - "epoch": 3.4391880268364012, - "grad_norm": 0.002510828198865056, - "learning_rate": 0.00019999416575164198, - "loss": 46.0, - "step": 44982 - }, - { - "epoch": 3.439264483819791, - "grad_norm": 0.0020173918455839157, - "learning_rate": 0.00019999416549218023, - "loss": 46.0, - "step": 44983 - }, - { - "epoch": 3.4393409408031808, - "grad_norm": 0.0028393741231411695, - "learning_rate": 0.00019999416523271274, - "loss": 46.0, - "step": 44984 - }, - { - "epoch": 3.4394173977865705, - "grad_norm": 0.0011860416270792484, - "learning_rate": 0.0001999941649732395, - "loss": 46.0, - "step": 44985 - }, - { - "epoch": 3.4394938547699603, - "grad_norm": 0.005023598670959473, - "learning_rate": 0.00019999416471376042, - "loss": 46.0, - "step": 44986 - }, - { - "epoch": 3.4395703117533496, - "grad_norm": 0.002815471263602376, - "learning_rate": 0.00019999416445427563, - "loss": 46.0, - "step": 44987 - }, - { - "epoch": 3.4396467687367394, - "grad_norm": 0.0023719302844256163, - "learning_rate": 0.00019999416419478504, - "loss": 46.0, - "step": 44988 - }, - { - "epoch": 3.439723225720129, - "grad_norm": 0.0026318971067667007, - "learning_rate": 0.00019999416393528868, - "loss": 46.0, - "step": 44989 - }, - { - "epoch": 3.439799682703519, - "grad_norm": 0.0036225770600140095, - "learning_rate": 0.00019999416367578657, - "loss": 46.0, - "step": 44990 - }, - { - "epoch": 3.4398761396869086, - "grad_norm": 0.0028483932837843895, - "learning_rate": 0.0001999941634162787, - "loss": 46.0, - "step": 44991 - }, - { - "epoch": 3.4399525966702984, - "grad_norm": 0.0016213920898735523, - "learning_rate": 0.00019999416315676506, - "loss": 46.0, - "step": 44992 - }, - { - "epoch": 3.440029053653688, - "grad_norm": 0.0014060037210583687, - "learning_rate": 0.00019999416289724563, - "loss": 46.0, - "step": 44993 - }, - { - "epoch": 3.440105510637078, - "grad_norm": 0.0018228681292384863, - "learning_rate": 0.00019999416263772043, - "loss": 46.0, - "step": 44994 - }, - { - "epoch": 3.4401819676204677, - "grad_norm": 0.0030857317615300417, - "learning_rate": 0.00019999416237818948, - "loss": 46.0, - "step": 44995 - }, - { - "epoch": 3.4402584246038574, - "grad_norm": 0.002523573115468025, - "learning_rate": 0.00019999416211865273, - "loss": 46.0, - "step": 44996 - }, - { - "epoch": 3.4403348815872468, - "grad_norm": 0.0011439020745456219, - "learning_rate": 0.00019999416185911026, - "loss": 46.0, - "step": 44997 - }, - { - "epoch": 3.4404113385706365, - "grad_norm": 0.0024027186445891857, - "learning_rate": 0.00019999416159956196, - "loss": 46.0, - "step": 44998 - }, - { - "epoch": 3.4404877955540263, - "grad_norm": 0.001416101586073637, - "learning_rate": 0.00019999416134000795, - "loss": 46.0, - "step": 44999 - }, - { - "epoch": 3.440564252537416, - "grad_norm": 0.0036262685898691416, - "learning_rate": 0.00019999416108044816, - "loss": 46.0, - "step": 45000 - }, - { - "epoch": 3.440640709520806, - "grad_norm": 0.0015977498842403293, - "learning_rate": 0.00019999416082088257, - "loss": 46.0, - "step": 45001 - }, - { - "epoch": 3.4407171665041956, - "grad_norm": 0.0026611043140292168, - "learning_rate": 0.0001999941605613112, - "loss": 46.0, - "step": 45002 - }, - { - "epoch": 3.4407936234875853, - "grad_norm": 0.0008026852738112211, - "learning_rate": 0.0001999941603017341, - "loss": 46.0, - "step": 45003 - }, - { - "epoch": 3.440870080470975, - "grad_norm": 0.0011025264393538237, - "learning_rate": 0.00019999416004215124, - "loss": 46.0, - "step": 45004 - }, - { - "epoch": 3.440946537454365, - "grad_norm": 0.0006148290704004467, - "learning_rate": 0.00019999415978256258, - "loss": 46.0, - "step": 45005 - }, - { - "epoch": 3.4410229944377546, - "grad_norm": 0.0031365377362817526, - "learning_rate": 0.00019999415952296813, - "loss": 46.0, - "step": 45006 - }, - { - "epoch": 3.4410994514211444, - "grad_norm": 0.002085742773488164, - "learning_rate": 0.00019999415926336795, - "loss": 46.0, - "step": 45007 - }, - { - "epoch": 3.441175908404534, - "grad_norm": 0.002416794653981924, - "learning_rate": 0.000199994159003762, - "loss": 46.0, - "step": 45008 - }, - { - "epoch": 3.4412523653879235, - "grad_norm": 0.0020822798833251, - "learning_rate": 0.00019999415874415028, - "loss": 46.0, - "step": 45009 - }, - { - "epoch": 3.441328822371313, - "grad_norm": 0.0030430806800723076, - "learning_rate": 0.00019999415848453276, - "loss": 46.0, - "step": 45010 - }, - { - "epoch": 3.441405279354703, - "grad_norm": 0.0010397444711998105, - "learning_rate": 0.0001999941582249095, - "loss": 46.0, - "step": 45011 - }, - { - "epoch": 3.4414817363380927, - "grad_norm": 0.004340566694736481, - "learning_rate": 0.00019999415796528047, - "loss": 46.0, - "step": 45012 - }, - { - "epoch": 3.4415581933214825, - "grad_norm": 0.002555574057623744, - "learning_rate": 0.00019999415770564566, - "loss": 46.0, - "step": 45013 - }, - { - "epoch": 3.4416346503048723, - "grad_norm": 0.001438225619494915, - "learning_rate": 0.0001999941574460051, - "loss": 46.0, - "step": 45014 - }, - { - "epoch": 3.441711107288262, - "grad_norm": 0.0004707752668764442, - "learning_rate": 0.00019999415718635873, - "loss": 46.0, - "step": 45015 - }, - { - "epoch": 3.441787564271652, - "grad_norm": 0.0008685371140018106, - "learning_rate": 0.00019999415692670662, - "loss": 46.0, - "step": 45016 - }, - { - "epoch": 3.4418640212550415, - "grad_norm": 0.000713616085704416, - "learning_rate": 0.00019999415666704874, - "loss": 46.0, - "step": 45017 - }, - { - "epoch": 3.441940478238431, - "grad_norm": 0.0024848226457834244, - "learning_rate": 0.00019999415640738511, - "loss": 46.0, - "step": 45018 - }, - { - "epoch": 3.4420169352218206, - "grad_norm": 0.0032812978606671095, - "learning_rate": 0.00019999415614771568, - "loss": 46.0, - "step": 45019 - }, - { - "epoch": 3.4420933922052104, - "grad_norm": 0.0011629536747932434, - "learning_rate": 0.00019999415588804048, - "loss": 46.0, - "step": 45020 - }, - { - "epoch": 3.4421698491886, - "grad_norm": 0.0016299558337777853, - "learning_rate": 0.00019999415562835953, - "loss": 46.0, - "step": 45021 - }, - { - "epoch": 3.44224630617199, - "grad_norm": 0.0015818693209439516, - "learning_rate": 0.0001999941553686728, - "loss": 46.0, - "step": 45022 - }, - { - "epoch": 3.4423227631553797, - "grad_norm": 0.00176207663025707, - "learning_rate": 0.00019999415510898032, - "loss": 46.0, - "step": 45023 - }, - { - "epoch": 3.4423992201387694, - "grad_norm": 0.0016777977580204606, - "learning_rate": 0.00019999415484928202, - "loss": 46.0, - "step": 45024 - }, - { - "epoch": 3.442475677122159, - "grad_norm": 0.004494350869208574, - "learning_rate": 0.00019999415458957803, - "loss": 46.0, - "step": 45025 - }, - { - "epoch": 3.442552134105549, - "grad_norm": 0.0008568787598051131, - "learning_rate": 0.00019999415432986822, - "loss": 46.0, - "step": 45026 - }, - { - "epoch": 3.4426285910889387, - "grad_norm": 0.0071852789260447025, - "learning_rate": 0.00019999415407015266, - "loss": 46.0, - "step": 45027 - }, - { - "epoch": 3.4427050480723285, - "grad_norm": 0.0021753127221018076, - "learning_rate": 0.0001999941538104313, - "loss": 46.0, - "step": 45028 - }, - { - "epoch": 3.4427815050557182, - "grad_norm": 0.0020551783964037895, - "learning_rate": 0.00019999415355070418, - "loss": 46.0, - "step": 45029 - }, - { - "epoch": 3.442857962039108, - "grad_norm": 0.003154277103021741, - "learning_rate": 0.0001999941532909713, - "loss": 46.0, - "step": 45030 - }, - { - "epoch": 3.4429344190224973, - "grad_norm": 0.001228502718731761, - "learning_rate": 0.00019999415303123265, - "loss": 46.0, - "step": 45031 - }, - { - "epoch": 3.443010876005887, - "grad_norm": 0.0017970663029700518, - "learning_rate": 0.00019999415277148822, - "loss": 46.0, - "step": 45032 - }, - { - "epoch": 3.443087332989277, - "grad_norm": 0.0025412687100470066, - "learning_rate": 0.00019999415251173804, - "loss": 46.0, - "step": 45033 - }, - { - "epoch": 3.4431637899726666, - "grad_norm": 0.0018300460651516914, - "learning_rate": 0.0001999941522519821, - "loss": 46.0, - "step": 45034 - }, - { - "epoch": 3.4432402469560563, - "grad_norm": 0.003766604932025075, - "learning_rate": 0.00019999415199222037, - "loss": 46.0, - "step": 45035 - }, - { - "epoch": 3.443316703939446, - "grad_norm": 0.003766162320971489, - "learning_rate": 0.00019999415173245288, - "loss": 46.0, - "step": 45036 - }, - { - "epoch": 3.443393160922836, - "grad_norm": 0.002715705195441842, - "learning_rate": 0.0001999941514726796, - "loss": 46.0, - "step": 45037 - }, - { - "epoch": 3.4434696179062256, - "grad_norm": 0.002420001197606325, - "learning_rate": 0.00019999415121290057, - "loss": 46.0, - "step": 45038 - }, - { - "epoch": 3.4435460748896154, - "grad_norm": 0.0019235039362683892, - "learning_rate": 0.00019999415095311578, - "loss": 46.0, - "step": 45039 - }, - { - "epoch": 3.4436225318730047, - "grad_norm": 0.0027975374832749367, - "learning_rate": 0.00019999415069332522, - "loss": 46.0, - "step": 45040 - }, - { - "epoch": 3.4436989888563945, - "grad_norm": 0.001645622425712645, - "learning_rate": 0.00019999415043352886, - "loss": 46.0, - "step": 45041 - }, - { - "epoch": 3.4437754458397842, - "grad_norm": 0.0017790887504816055, - "learning_rate": 0.00019999415017372675, - "loss": 46.0, - "step": 45042 - }, - { - "epoch": 3.443851902823174, - "grad_norm": 0.0012978228041902184, - "learning_rate": 0.0001999941499139189, - "loss": 46.0, - "step": 45043 - }, - { - "epoch": 3.4439283598065638, - "grad_norm": 0.0008418190409429371, - "learning_rate": 0.00019999414965410524, - "loss": 46.0, - "step": 45044 - }, - { - "epoch": 3.4440048167899535, - "grad_norm": 0.004874741192907095, - "learning_rate": 0.0001999941493942858, - "loss": 46.0, - "step": 45045 - }, - { - "epoch": 3.4440812737733433, - "grad_norm": 0.00124945433344692, - "learning_rate": 0.0001999941491344606, - "loss": 46.0, - "step": 45046 - }, - { - "epoch": 3.444157730756733, - "grad_norm": 0.0016876948066055775, - "learning_rate": 0.00019999414887462966, - "loss": 46.0, - "step": 45047 - }, - { - "epoch": 3.444234187740123, - "grad_norm": 0.0027740299701690674, - "learning_rate": 0.00019999414861479294, - "loss": 46.0, - "step": 45048 - }, - { - "epoch": 3.4443106447235126, - "grad_norm": 0.0008311325218528509, - "learning_rate": 0.00019999414835495047, - "loss": 46.0, - "step": 45049 - }, - { - "epoch": 3.4443871017069023, - "grad_norm": 0.003162291133776307, - "learning_rate": 0.0001999941480951022, - "loss": 46.0, - "step": 45050 - }, - { - "epoch": 3.444463558690292, - "grad_norm": 0.0013432904379442334, - "learning_rate": 0.00019999414783524816, - "loss": 46.0, - "step": 45051 - }, - { - "epoch": 3.444540015673682, - "grad_norm": 0.001604159246198833, - "learning_rate": 0.00019999414757538838, - "loss": 46.0, - "step": 45052 - }, - { - "epoch": 3.444616472657071, - "grad_norm": 0.0013794914120808244, - "learning_rate": 0.0001999941473155228, - "loss": 46.0, - "step": 45053 - }, - { - "epoch": 3.444692929640461, - "grad_norm": 0.0005842077080160379, - "learning_rate": 0.00019999414705565145, - "loss": 46.0, - "step": 45054 - }, - { - "epoch": 3.4447693866238507, - "grad_norm": 0.0008987685432657599, - "learning_rate": 0.00019999414679577434, - "loss": 46.0, - "step": 45055 - }, - { - "epoch": 3.4448458436072404, - "grad_norm": 0.0025070877745747566, - "learning_rate": 0.0001999941465358915, - "loss": 46.0, - "step": 45056 - }, - { - "epoch": 3.44492230059063, - "grad_norm": 0.0022104973904788494, - "learning_rate": 0.00019999414627600284, - "loss": 46.0, - "step": 45057 - }, - { - "epoch": 3.44499875757402, - "grad_norm": 0.0014750424306839705, - "learning_rate": 0.0001999941460161084, - "loss": 46.0, - "step": 45058 - }, - { - "epoch": 3.4450752145574097, - "grad_norm": 0.002425615908578038, - "learning_rate": 0.00019999414575620823, - "loss": 46.0, - "step": 45059 - }, - { - "epoch": 3.4451516715407995, - "grad_norm": 0.0036413762718439102, - "learning_rate": 0.00019999414549630226, - "loss": 46.0, - "step": 45060 - }, - { - "epoch": 3.4452281285241892, - "grad_norm": 0.0014508320018649101, - "learning_rate": 0.00019999414523639054, - "loss": 46.0, - "step": 45061 - }, - { - "epoch": 3.4453045855075786, - "grad_norm": 0.000787628348916769, - "learning_rate": 0.00019999414497647307, - "loss": 46.0, - "step": 45062 - }, - { - "epoch": 3.4453810424909683, - "grad_norm": 0.0011673078406602144, - "learning_rate": 0.0001999941447165498, - "loss": 46.0, - "step": 45063 - }, - { - "epoch": 3.445457499474358, - "grad_norm": 0.0069952961057424545, - "learning_rate": 0.00019999414445662076, - "loss": 46.0, - "step": 45064 - }, - { - "epoch": 3.445533956457748, - "grad_norm": 0.0029303813353180885, - "learning_rate": 0.00019999414419668597, - "loss": 46.0, - "step": 45065 - }, - { - "epoch": 3.4456104134411376, - "grad_norm": 0.0021008700132369995, - "learning_rate": 0.0001999941439367454, - "loss": 46.0, - "step": 45066 - }, - { - "epoch": 3.4456868704245274, - "grad_norm": 0.0029025902040302753, - "learning_rate": 0.00019999414367679908, - "loss": 46.0, - "step": 45067 - }, - { - "epoch": 3.445763327407917, - "grad_norm": 0.0013081106590107083, - "learning_rate": 0.00019999414341684694, - "loss": 46.0, - "step": 45068 - }, - { - "epoch": 3.445839784391307, - "grad_norm": 0.0017352878348901868, - "learning_rate": 0.0001999941431568891, - "loss": 46.0, - "step": 45069 - }, - { - "epoch": 3.4459162413746967, - "grad_norm": 0.0005688758683390915, - "learning_rate": 0.00019999414289692544, - "loss": 46.0, - "step": 45070 - }, - { - "epoch": 3.4459926983580864, - "grad_norm": 0.001002923701889813, - "learning_rate": 0.00019999414263695604, - "loss": 46.0, - "step": 45071 - }, - { - "epoch": 3.446069155341476, - "grad_norm": 0.0022857871372252703, - "learning_rate": 0.00019999414237698083, - "loss": 46.0, - "step": 45072 - }, - { - "epoch": 3.446145612324866, - "grad_norm": 0.00481349928304553, - "learning_rate": 0.0001999941421169999, - "loss": 46.0, - "step": 45073 - }, - { - "epoch": 3.4462220693082553, - "grad_norm": 0.0040299552492797375, - "learning_rate": 0.00019999414185701317, - "loss": 46.0, - "step": 45074 - }, - { - "epoch": 3.446298526291645, - "grad_norm": 0.0024729405995458364, - "learning_rate": 0.00019999414159702067, - "loss": 46.0, - "step": 45075 - }, - { - "epoch": 3.4463749832750348, - "grad_norm": 0.005155516788363457, - "learning_rate": 0.0001999941413370224, - "loss": 46.0, - "step": 45076 - }, - { - "epoch": 3.4464514402584245, - "grad_norm": 0.0011493430938571692, - "learning_rate": 0.0001999941410770184, - "loss": 46.0, - "step": 45077 - }, - { - "epoch": 3.4465278972418143, - "grad_norm": 0.00148152990732342, - "learning_rate": 0.0001999941408170086, - "loss": 46.0, - "step": 45078 - }, - { - "epoch": 3.446604354225204, - "grad_norm": 0.014683982357382774, - "learning_rate": 0.00019999414055699304, - "loss": 46.0, - "step": 45079 - }, - { - "epoch": 3.446680811208594, - "grad_norm": 0.0012630947167053819, - "learning_rate": 0.0001999941402969717, - "loss": 46.0, - "step": 45080 - }, - { - "epoch": 3.4467572681919836, - "grad_norm": 0.0008803530363366008, - "learning_rate": 0.0001999941400369446, - "loss": 46.0, - "step": 45081 - }, - { - "epoch": 3.4468337251753733, - "grad_norm": 0.003138320753350854, - "learning_rate": 0.0001999941397769117, - "loss": 46.0, - "step": 45082 - }, - { - "epoch": 3.446910182158763, - "grad_norm": 0.003982197493314743, - "learning_rate": 0.0001999941395168731, - "loss": 46.0, - "step": 45083 - }, - { - "epoch": 3.4469866391421524, - "grad_norm": 0.0015992287080734968, - "learning_rate": 0.00019999413925682867, - "loss": 46.0, - "step": 45084 - }, - { - "epoch": 3.447063096125542, - "grad_norm": 0.0026897152420133352, - "learning_rate": 0.00019999413899677847, - "loss": 46.0, - "step": 45085 - }, - { - "epoch": 3.447139553108932, - "grad_norm": 0.0012032792437821627, - "learning_rate": 0.00019999413873672252, - "loss": 46.0, - "step": 45086 - }, - { - "epoch": 3.4472160100923217, - "grad_norm": 0.0053362417966127396, - "learning_rate": 0.00019999413847666083, - "loss": 46.0, - "step": 45087 - }, - { - "epoch": 3.4472924670757115, - "grad_norm": 0.001471990835852921, - "learning_rate": 0.00019999413821659333, - "loss": 46.0, - "step": 45088 - }, - { - "epoch": 3.4473689240591012, - "grad_norm": 0.0010681929998099804, - "learning_rate": 0.00019999413795652006, - "loss": 46.0, - "step": 45089 - }, - { - "epoch": 3.447445381042491, - "grad_norm": 0.0019076806493103504, - "learning_rate": 0.00019999413769644105, - "loss": 46.0, - "step": 45090 - }, - { - "epoch": 3.4475218380258807, - "grad_norm": 0.0026488739531487226, - "learning_rate": 0.00019999413743635624, - "loss": 46.0, - "step": 45091 - }, - { - "epoch": 3.4475982950092705, - "grad_norm": 0.0008688176749274135, - "learning_rate": 0.0001999941371762657, - "loss": 46.0, - "step": 45092 - }, - { - "epoch": 3.4476747519926603, - "grad_norm": 0.0009435151587240398, - "learning_rate": 0.00019999413691616934, - "loss": 46.0, - "step": 45093 - }, - { - "epoch": 3.44775120897605, - "grad_norm": 0.0013512911973521113, - "learning_rate": 0.00019999413665606724, - "loss": 46.0, - "step": 45094 - }, - { - "epoch": 3.44782766595944, - "grad_norm": 0.0019017241429537535, - "learning_rate": 0.00019999413639595936, - "loss": 46.0, - "step": 45095 - }, - { - "epoch": 3.447904122942829, - "grad_norm": 0.0019924594089388847, - "learning_rate": 0.00019999413613584573, - "loss": 46.0, - "step": 45096 - }, - { - "epoch": 3.447980579926219, - "grad_norm": 0.0013535883044824004, - "learning_rate": 0.00019999413587572633, - "loss": 46.0, - "step": 45097 - }, - { - "epoch": 3.4480570369096086, - "grad_norm": 0.002430856693536043, - "learning_rate": 0.00019999413561560113, - "loss": 46.0, - "step": 45098 - }, - { - "epoch": 3.4481334938929984, - "grad_norm": 0.002370377304032445, - "learning_rate": 0.00019999413535547018, - "loss": 46.0, - "step": 45099 - }, - { - "epoch": 3.448209950876388, - "grad_norm": 0.0012367168674245477, - "learning_rate": 0.00019999413509533346, - "loss": 46.0, - "step": 45100 - }, - { - "epoch": 3.448286407859778, - "grad_norm": 0.00196105451323092, - "learning_rate": 0.000199994134835191, - "loss": 46.0, - "step": 45101 - }, - { - "epoch": 3.4483628648431677, - "grad_norm": 0.0034136022441089153, - "learning_rate": 0.00019999413457504273, - "loss": 46.0, - "step": 45102 - }, - { - "epoch": 3.4484393218265574, - "grad_norm": 0.0010997152421623468, - "learning_rate": 0.0001999941343148887, - "loss": 46.0, - "step": 45103 - }, - { - "epoch": 3.448515778809947, - "grad_norm": 0.002138302428647876, - "learning_rate": 0.0001999941340547289, - "loss": 46.0, - "step": 45104 - }, - { - "epoch": 3.448592235793337, - "grad_norm": 0.0025186294224113226, - "learning_rate": 0.00019999413379456334, - "loss": 46.0, - "step": 45105 - }, - { - "epoch": 3.4486686927767263, - "grad_norm": 0.0023291988763958216, - "learning_rate": 0.000199994133534392, - "loss": 46.0, - "step": 45106 - }, - { - "epoch": 3.448745149760116, - "grad_norm": 0.0024258801713585854, - "learning_rate": 0.00019999413327421493, - "loss": 46.0, - "step": 45107 - }, - { - "epoch": 3.448821606743506, - "grad_norm": 0.0016182144172489643, - "learning_rate": 0.00019999413301403202, - "loss": 46.0, - "step": 45108 - }, - { - "epoch": 3.4488980637268956, - "grad_norm": 0.002611712086945772, - "learning_rate": 0.0001999941327538434, - "loss": 46.0, - "step": 45109 - }, - { - "epoch": 3.4489745207102853, - "grad_norm": 0.001078123808838427, - "learning_rate": 0.000199994132493649, - "loss": 46.0, - "step": 45110 - }, - { - "epoch": 3.449050977693675, - "grad_norm": 0.0024431785568594933, - "learning_rate": 0.00019999413223344882, - "loss": 46.0, - "step": 45111 - }, - { - "epoch": 3.449127434677065, - "grad_norm": 0.0022236232180148363, - "learning_rate": 0.00019999413197324285, - "loss": 46.0, - "step": 45112 - }, - { - "epoch": 3.4492038916604546, - "grad_norm": 0.00159554707352072, - "learning_rate": 0.00019999413171303116, - "loss": 46.0, - "step": 45113 - }, - { - "epoch": 3.4492803486438444, - "grad_norm": 0.0010796718997880816, - "learning_rate": 0.00019999413145281366, - "loss": 46.0, - "step": 45114 - }, - { - "epoch": 3.449356805627234, - "grad_norm": 0.0032348039094358683, - "learning_rate": 0.0001999941311925904, - "loss": 46.0, - "step": 45115 - }, - { - "epoch": 3.449433262610624, - "grad_norm": 0.001142166554927826, - "learning_rate": 0.00019999413093236138, - "loss": 46.0, - "step": 45116 - }, - { - "epoch": 3.4495097195940136, - "grad_norm": 0.0012506458442658186, - "learning_rate": 0.0001999941306721266, - "loss": 46.0, - "step": 45117 - }, - { - "epoch": 3.449586176577403, - "grad_norm": 0.0032766899093985558, - "learning_rate": 0.000199994130411886, - "loss": 46.0, - "step": 45118 - }, - { - "epoch": 3.4496626335607927, - "grad_norm": 0.0034516965970396996, - "learning_rate": 0.0001999941301516397, - "loss": 46.0, - "step": 45119 - }, - { - "epoch": 3.4497390905441825, - "grad_norm": 0.005322735290974379, - "learning_rate": 0.0001999941298913876, - "loss": 46.0, - "step": 45120 - }, - { - "epoch": 3.4498155475275722, - "grad_norm": 0.0027151722460985184, - "learning_rate": 0.00019999412963112975, - "loss": 46.0, - "step": 45121 - }, - { - "epoch": 3.449892004510962, - "grad_norm": 0.00263771740719676, - "learning_rate": 0.00019999412937086607, - "loss": 46.0, - "step": 45122 - }, - { - "epoch": 3.4499684614943518, - "grad_norm": 0.0011744481744244695, - "learning_rate": 0.00019999412911059667, - "loss": 46.0, - "step": 45123 - }, - { - "epoch": 3.4500449184777415, - "grad_norm": 0.003663536161184311, - "learning_rate": 0.0001999941288503215, - "loss": 46.0, - "step": 45124 - }, - { - "epoch": 3.4501213754611313, - "grad_norm": 0.0019422996556386352, - "learning_rate": 0.00019999412859004058, - "loss": 46.0, - "step": 45125 - }, - { - "epoch": 3.450197832444521, - "grad_norm": 0.0009640787611715496, - "learning_rate": 0.00019999412832975386, - "loss": 46.0, - "step": 45126 - }, - { - "epoch": 3.450274289427911, - "grad_norm": 0.0014606183394789696, - "learning_rate": 0.00019999412806946136, - "loss": 46.0, - "step": 45127 - }, - { - "epoch": 3.4503507464113, - "grad_norm": 0.020403439179062843, - "learning_rate": 0.00019999412780916312, - "loss": 46.0, - "step": 45128 - }, - { - "epoch": 3.45042720339469, - "grad_norm": 0.0023286258801817894, - "learning_rate": 0.00019999412754885908, - "loss": 46.0, - "step": 45129 - }, - { - "epoch": 3.4505036603780797, - "grad_norm": 0.00200326438061893, - "learning_rate": 0.0001999941272885493, - "loss": 46.0, - "step": 45130 - }, - { - "epoch": 3.4505801173614694, - "grad_norm": 0.002215000567957759, - "learning_rate": 0.0001999941270282337, - "loss": 46.0, - "step": 45131 - }, - { - "epoch": 3.450656574344859, - "grad_norm": 0.0033983380999416113, - "learning_rate": 0.0001999941267679124, - "loss": 46.0, - "step": 45132 - }, - { - "epoch": 3.450733031328249, - "grad_norm": 0.002194545930251479, - "learning_rate": 0.0001999941265075853, - "loss": 46.0, - "step": 45133 - }, - { - "epoch": 3.4508094883116387, - "grad_norm": 0.0012085940688848495, - "learning_rate": 0.00019999412624725245, - "loss": 46.0, - "step": 45134 - }, - { - "epoch": 3.4508859452950285, - "grad_norm": 0.00337292836047709, - "learning_rate": 0.0001999941259869138, - "loss": 46.0, - "step": 45135 - }, - { - "epoch": 3.450962402278418, - "grad_norm": 0.001386150368489325, - "learning_rate": 0.0001999941257265694, - "loss": 46.0, - "step": 45136 - }, - { - "epoch": 3.451038859261808, - "grad_norm": 0.004297069739550352, - "learning_rate": 0.0001999941254662192, - "loss": 46.0, - "step": 45137 - }, - { - "epoch": 3.4511153162451977, - "grad_norm": 0.002161393640562892, - "learning_rate": 0.00019999412520586328, - "loss": 46.0, - "step": 45138 - }, - { - "epoch": 3.4511917732285875, - "grad_norm": 0.0014598227571696043, - "learning_rate": 0.00019999412494550156, - "loss": 46.0, - "step": 45139 - }, - { - "epoch": 3.451268230211977, - "grad_norm": 0.0023323611821979284, - "learning_rate": 0.00019999412468513407, - "loss": 46.0, - "step": 45140 - }, - { - "epoch": 3.4513446871953666, - "grad_norm": 0.00313792796805501, - "learning_rate": 0.00019999412442476083, - "loss": 46.0, - "step": 45141 - }, - { - "epoch": 3.4514211441787563, - "grad_norm": 0.002790069440379739, - "learning_rate": 0.00019999412416438182, - "loss": 46.0, - "step": 45142 - }, - { - "epoch": 3.451497601162146, - "grad_norm": 0.004355615470558405, - "learning_rate": 0.00019999412390399703, - "loss": 46.0, - "step": 45143 - }, - { - "epoch": 3.451574058145536, - "grad_norm": 0.0027639043983072042, - "learning_rate": 0.00019999412364360644, - "loss": 46.0, - "step": 45144 - }, - { - "epoch": 3.4516505151289256, - "grad_norm": 0.0015308116562664509, - "learning_rate": 0.00019999412338321014, - "loss": 46.0, - "step": 45145 - }, - { - "epoch": 3.4517269721123154, - "grad_norm": 0.0014943835558369756, - "learning_rate": 0.00019999412312280803, - "loss": 46.0, - "step": 45146 - }, - { - "epoch": 3.451803429095705, - "grad_norm": 0.0018667022231966257, - "learning_rate": 0.00019999412286240018, - "loss": 46.0, - "step": 45147 - }, - { - "epoch": 3.451879886079095, - "grad_norm": 0.004269406199455261, - "learning_rate": 0.00019999412260198653, - "loss": 46.0, - "step": 45148 - }, - { - "epoch": 3.4519563430624842, - "grad_norm": 0.0013109026476740837, - "learning_rate": 0.0001999941223415671, - "loss": 46.0, - "step": 45149 - }, - { - "epoch": 3.452032800045874, - "grad_norm": 0.002175177214667201, - "learning_rate": 0.00019999412208114196, - "loss": 46.0, - "step": 45150 - }, - { - "epoch": 3.4521092570292637, - "grad_norm": 0.003254286479204893, - "learning_rate": 0.000199994121820711, - "loss": 46.0, - "step": 45151 - }, - { - "epoch": 3.4521857140126535, - "grad_norm": 0.0011905268765985966, - "learning_rate": 0.0001999941215602743, - "loss": 46.0, - "step": 45152 - }, - { - "epoch": 3.4522621709960433, - "grad_norm": 0.0011312214192003012, - "learning_rate": 0.00019999412129983183, - "loss": 46.0, - "step": 45153 - }, - { - "epoch": 3.452338627979433, - "grad_norm": 0.004624643363058567, - "learning_rate": 0.00019999412103938356, - "loss": 46.0, - "step": 45154 - }, - { - "epoch": 3.452415084962823, - "grad_norm": 0.0022143993992358446, - "learning_rate": 0.00019999412077892955, - "loss": 46.0, - "step": 45155 - }, - { - "epoch": 3.4524915419462125, - "grad_norm": 0.0016871481202542782, - "learning_rate": 0.00019999412051846974, - "loss": 46.0, - "step": 45156 - }, - { - "epoch": 3.4525679989296023, - "grad_norm": 0.0007682691211812198, - "learning_rate": 0.00019999412025800418, - "loss": 46.0, - "step": 45157 - }, - { - "epoch": 3.452644455912992, - "grad_norm": 0.0018926331540569663, - "learning_rate": 0.00019999411999753288, - "loss": 46.0, - "step": 45158 - }, - { - "epoch": 3.452720912896382, - "grad_norm": 0.0016012253472581506, - "learning_rate": 0.0001999941197370558, - "loss": 46.0, - "step": 45159 - }, - { - "epoch": 3.4527973698797716, - "grad_norm": 0.0035411352291703224, - "learning_rate": 0.00019999411947657292, - "loss": 46.0, - "step": 45160 - }, - { - "epoch": 3.4528738268631614, - "grad_norm": 0.0017030660528689623, - "learning_rate": 0.0001999941192160843, - "loss": 46.0, - "step": 45161 - }, - { - "epoch": 3.4529502838465507, - "grad_norm": 0.003867089981213212, - "learning_rate": 0.0001999941189555899, - "loss": 46.0, - "step": 45162 - }, - { - "epoch": 3.4530267408299404, - "grad_norm": 0.001563460100442171, - "learning_rate": 0.0001999941186950897, - "loss": 46.0, - "step": 45163 - }, - { - "epoch": 3.45310319781333, - "grad_norm": 0.0018809012835845351, - "learning_rate": 0.00019999411843458375, - "loss": 46.0, - "step": 45164 - }, - { - "epoch": 3.45317965479672, - "grad_norm": 0.0022518320474773645, - "learning_rate": 0.00019999411817407206, - "loss": 46.0, - "step": 45165 - }, - { - "epoch": 3.4532561117801097, - "grad_norm": 0.0012009054189547896, - "learning_rate": 0.00019999411791355457, - "loss": 46.0, - "step": 45166 - }, - { - "epoch": 3.4533325687634995, - "grad_norm": 0.0023504241835325956, - "learning_rate": 0.00019999411765303133, - "loss": 46.0, - "step": 45167 - }, - { - "epoch": 3.4534090257468892, - "grad_norm": 0.008862879127264023, - "learning_rate": 0.00019999411739250232, - "loss": 46.0, - "step": 45168 - }, - { - "epoch": 3.453485482730279, - "grad_norm": 0.002991313813254237, - "learning_rate": 0.0001999941171319675, - "loss": 46.0, - "step": 45169 - }, - { - "epoch": 3.4535619397136688, - "grad_norm": 0.0015789506724104285, - "learning_rate": 0.00019999411687142695, - "loss": 46.0, - "step": 45170 - }, - { - "epoch": 3.453638396697058, - "grad_norm": 0.004376812372356653, - "learning_rate": 0.00019999411661088064, - "loss": 46.0, - "step": 45171 - }, - { - "epoch": 3.453714853680448, - "grad_norm": 0.0005093111540190876, - "learning_rate": 0.00019999411635032854, - "loss": 46.0, - "step": 45172 - }, - { - "epoch": 3.4537913106638376, - "grad_norm": 0.0018366910517215729, - "learning_rate": 0.0001999941160897707, - "loss": 46.0, - "step": 45173 - }, - { - "epoch": 3.4538677676472274, - "grad_norm": 0.0034219748340547085, - "learning_rate": 0.00019999411582920703, - "loss": 46.0, - "step": 45174 - }, - { - "epoch": 3.453944224630617, - "grad_norm": 0.0029362551867961884, - "learning_rate": 0.00019999411556863766, - "loss": 46.0, - "step": 45175 - }, - { - "epoch": 3.454020681614007, - "grad_norm": 0.0019654249772429466, - "learning_rate": 0.0001999941153080625, - "loss": 46.0, - "step": 45176 - }, - { - "epoch": 3.4540971385973966, - "grad_norm": 0.0031968653202056885, - "learning_rate": 0.00019999411504748152, - "loss": 46.0, - "step": 45177 - }, - { - "epoch": 3.4541735955807864, - "grad_norm": 0.004857442807406187, - "learning_rate": 0.00019999411478689483, - "loss": 46.0, - "step": 45178 - }, - { - "epoch": 3.454250052564176, - "grad_norm": 0.003389917779713869, - "learning_rate": 0.00019999411452630236, - "loss": 46.0, - "step": 45179 - }, - { - "epoch": 3.454326509547566, - "grad_norm": 0.0029015298932790756, - "learning_rate": 0.0001999941142657041, - "loss": 46.0, - "step": 45180 - }, - { - "epoch": 3.4544029665309557, - "grad_norm": 0.0019460406620055437, - "learning_rate": 0.0001999941140051001, - "loss": 46.0, - "step": 45181 - }, - { - "epoch": 3.4544794235143454, - "grad_norm": 0.003322836710140109, - "learning_rate": 0.0001999941137444903, - "loss": 46.0, - "step": 45182 - }, - { - "epoch": 3.454555880497735, - "grad_norm": 0.001839798642322421, - "learning_rate": 0.00019999411348387475, - "loss": 46.0, - "step": 45183 - }, - { - "epoch": 3.4546323374811245, - "grad_norm": 0.0019660566467791796, - "learning_rate": 0.00019999411322325341, - "loss": 46.0, - "step": 45184 - }, - { - "epoch": 3.4547087944645143, - "grad_norm": 0.00232500908896327, - "learning_rate": 0.00019999411296262634, - "loss": 46.0, - "step": 45185 - }, - { - "epoch": 3.454785251447904, - "grad_norm": 0.00673377001658082, - "learning_rate": 0.00019999411270199349, - "loss": 46.0, - "step": 45186 - }, - { - "epoch": 3.454861708431294, - "grad_norm": 0.007242770865559578, - "learning_rate": 0.00019999411244135483, - "loss": 46.0, - "step": 45187 - }, - { - "epoch": 3.4549381654146836, - "grad_norm": 0.0025004667695611715, - "learning_rate": 0.00019999411218071044, - "loss": 46.0, - "step": 45188 - }, - { - "epoch": 3.4550146223980733, - "grad_norm": 0.002810709411278367, - "learning_rate": 0.00019999411192006027, - "loss": 46.0, - "step": 45189 - }, - { - "epoch": 3.455091079381463, - "grad_norm": 0.0016391208628192544, - "learning_rate": 0.00019999411165940435, - "loss": 46.0, - "step": 45190 - }, - { - "epoch": 3.455167536364853, - "grad_norm": 0.0008896089857444167, - "learning_rate": 0.00019999411139874263, - "loss": 46.0, - "step": 45191 - }, - { - "epoch": 3.4552439933482426, - "grad_norm": 0.0033274965826421976, - "learning_rate": 0.00019999411113807517, - "loss": 46.0, - "step": 45192 - }, - { - "epoch": 3.455320450331632, - "grad_norm": 0.0012692732270807028, - "learning_rate": 0.0001999941108774019, - "loss": 46.0, - "step": 45193 - }, - { - "epoch": 3.4553969073150217, - "grad_norm": 0.004666381515562534, - "learning_rate": 0.0001999941106167229, - "loss": 46.0, - "step": 45194 - }, - { - "epoch": 3.4554733642984115, - "grad_norm": 0.0026167805772274733, - "learning_rate": 0.0001999941103560381, - "loss": 46.0, - "step": 45195 - }, - { - "epoch": 3.455549821281801, - "grad_norm": 0.004648515023291111, - "learning_rate": 0.00019999411009534758, - "loss": 46.0, - "step": 45196 - }, - { - "epoch": 3.455626278265191, - "grad_norm": 0.0026463312096893787, - "learning_rate": 0.00019999410983465124, - "loss": 46.0, - "step": 45197 - }, - { - "epoch": 3.4557027352485807, - "grad_norm": 0.0025514094159007072, - "learning_rate": 0.00019999410957394914, - "loss": 46.0, - "step": 45198 - }, - { - "epoch": 3.4557791922319705, - "grad_norm": 0.0013882337370887399, - "learning_rate": 0.00019999410931324126, - "loss": 46.0, - "step": 45199 - }, - { - "epoch": 3.4558556492153603, - "grad_norm": 0.0010888864053413272, - "learning_rate": 0.00019999410905252764, - "loss": 46.0, - "step": 45200 - }, - { - "epoch": 3.45593210619875, - "grad_norm": 0.0008232976542785764, - "learning_rate": 0.00019999410879180824, - "loss": 46.0, - "step": 45201 - }, - { - "epoch": 3.45600856318214, - "grad_norm": 0.0022681825794279575, - "learning_rate": 0.00019999410853108307, - "loss": 46.0, - "step": 45202 - }, - { - "epoch": 3.4560850201655295, - "grad_norm": 0.0019660198595374823, - "learning_rate": 0.00019999410827035215, - "loss": 46.0, - "step": 45203 - }, - { - "epoch": 3.4561614771489193, - "grad_norm": 0.0011629970977082849, - "learning_rate": 0.00019999410800961544, - "loss": 46.0, - "step": 45204 - }, - { - "epoch": 3.4562379341323086, - "grad_norm": 0.0024486305192112923, - "learning_rate": 0.00019999410774887294, - "loss": 46.0, - "step": 45205 - }, - { - "epoch": 3.4563143911156984, - "grad_norm": 0.0018469431670382619, - "learning_rate": 0.00019999410748812473, - "loss": 46.0, - "step": 45206 - }, - { - "epoch": 3.456390848099088, - "grad_norm": 0.0025218394584953785, - "learning_rate": 0.0001999941072273707, - "loss": 46.0, - "step": 45207 - }, - { - "epoch": 3.456467305082478, - "grad_norm": 0.0036653392016887665, - "learning_rate": 0.0001999941069666109, - "loss": 46.0, - "step": 45208 - }, - { - "epoch": 3.4565437620658677, - "grad_norm": 0.001371211837977171, - "learning_rate": 0.00019999410670584538, - "loss": 46.0, - "step": 45209 - }, - { - "epoch": 3.4566202190492574, - "grad_norm": 0.011854550801217556, - "learning_rate": 0.00019999410644507405, - "loss": 46.0, - "step": 45210 - }, - { - "epoch": 3.456696676032647, - "grad_norm": 0.001393481157720089, - "learning_rate": 0.00019999410618429697, - "loss": 46.0, - "step": 45211 - }, - { - "epoch": 3.456773133016037, - "grad_norm": 0.0008507466409355402, - "learning_rate": 0.0001999941059235141, - "loss": 46.0, - "step": 45212 - }, - { - "epoch": 3.4568495899994267, - "grad_norm": 0.0015412586508318782, - "learning_rate": 0.00019999410566272547, - "loss": 46.0, - "step": 45213 - }, - { - "epoch": 3.4569260469828165, - "grad_norm": 0.004121486097574234, - "learning_rate": 0.00019999410540193108, - "loss": 46.0, - "step": 45214 - }, - { - "epoch": 3.457002503966206, - "grad_norm": 0.006144287530332804, - "learning_rate": 0.00019999410514113093, - "loss": 46.0, - "step": 45215 - }, - { - "epoch": 3.4570789609495955, - "grad_norm": 0.003407243639230728, - "learning_rate": 0.00019999410488032496, - "loss": 46.0, - "step": 45216 - }, - { - "epoch": 3.4571554179329853, - "grad_norm": 0.002048554364591837, - "learning_rate": 0.0001999941046195133, - "loss": 46.0, - "step": 45217 - }, - { - "epoch": 3.457231874916375, - "grad_norm": 0.004459748975932598, - "learning_rate": 0.00019999410435869578, - "loss": 46.0, - "step": 45218 - }, - { - "epoch": 3.457308331899765, - "grad_norm": 0.002758754650130868, - "learning_rate": 0.00019999410409787257, - "loss": 46.0, - "step": 45219 - }, - { - "epoch": 3.4573847888831546, - "grad_norm": 0.004380478523671627, - "learning_rate": 0.00019999410383704356, - "loss": 46.0, - "step": 45220 - }, - { - "epoch": 3.4574612458665444, - "grad_norm": 0.003762703388929367, - "learning_rate": 0.00019999410357620878, - "loss": 46.0, - "step": 45221 - }, - { - "epoch": 3.457537702849934, - "grad_norm": 0.00159950100351125, - "learning_rate": 0.00019999410331536822, - "loss": 46.0, - "step": 45222 - }, - { - "epoch": 3.457614159833324, - "grad_norm": 0.0015217358013615012, - "learning_rate": 0.00019999410305452192, - "loss": 46.0, - "step": 45223 - }, - { - "epoch": 3.4576906168167136, - "grad_norm": 0.003450914518907666, - "learning_rate": 0.0001999941027936698, - "loss": 46.0, - "step": 45224 - }, - { - "epoch": 3.4577670738001034, - "grad_norm": 0.001059976639226079, - "learning_rate": 0.00019999410253281194, - "loss": 46.0, - "step": 45225 - }, - { - "epoch": 3.457843530783493, - "grad_norm": 0.0013654757058247924, - "learning_rate": 0.0001999941022719483, - "loss": 46.0, - "step": 45226 - }, - { - "epoch": 3.4579199877668825, - "grad_norm": 0.001975161489099264, - "learning_rate": 0.00019999410201107894, - "loss": 46.0, - "step": 45227 - }, - { - "epoch": 3.4579964447502722, - "grad_norm": 0.001725782873108983, - "learning_rate": 0.00019999410175020377, - "loss": 46.0, - "step": 45228 - }, - { - "epoch": 3.458072901733662, - "grad_norm": 0.0036536534316837788, - "learning_rate": 0.00019999410148932283, - "loss": 46.0, - "step": 45229 - }, - { - "epoch": 3.4581493587170518, - "grad_norm": 0.0006217904738150537, - "learning_rate": 0.00019999410122843614, - "loss": 46.0, - "step": 45230 - }, - { - "epoch": 3.4582258157004415, - "grad_norm": 0.0010941765503957868, - "learning_rate": 0.00019999410096754365, - "loss": 46.0, - "step": 45231 - }, - { - "epoch": 3.4583022726838313, - "grad_norm": 0.0036449795588850975, - "learning_rate": 0.0001999941007066454, - "loss": 46.0, - "step": 45232 - }, - { - "epoch": 3.458378729667221, - "grad_norm": 0.0014898300869390368, - "learning_rate": 0.0001999941004457414, - "loss": 46.0, - "step": 45233 - }, - { - "epoch": 3.458455186650611, - "grad_norm": 0.0009278338402509689, - "learning_rate": 0.00019999410018483162, - "loss": 46.0, - "step": 45234 - }, - { - "epoch": 3.4585316436340006, - "grad_norm": 0.002027886686846614, - "learning_rate": 0.0001999940999239161, - "loss": 46.0, - "step": 45235 - }, - { - "epoch": 3.4586081006173903, - "grad_norm": 0.002692680573090911, - "learning_rate": 0.00019999409966299476, - "loss": 46.0, - "step": 45236 - }, - { - "epoch": 3.4586845576007796, - "grad_norm": 0.005479230545461178, - "learning_rate": 0.00019999409940206766, - "loss": 46.0, - "step": 45237 - }, - { - "epoch": 3.4587610145841694, - "grad_norm": 0.0028295456431806087, - "learning_rate": 0.00019999409914113483, - "loss": 46.0, - "step": 45238 - }, - { - "epoch": 3.458837471567559, - "grad_norm": 0.0015259896172210574, - "learning_rate": 0.00019999409888019618, - "loss": 46.0, - "step": 45239 - }, - { - "epoch": 3.458913928550949, - "grad_norm": 0.002326915506273508, - "learning_rate": 0.0001999940986192518, - "loss": 46.0, - "step": 45240 - }, - { - "epoch": 3.4589903855343387, - "grad_norm": 0.00134359672665596, - "learning_rate": 0.00019999409835830164, - "loss": 46.0, - "step": 45241 - }, - { - "epoch": 3.4590668425177284, - "grad_norm": 0.0013130197767168283, - "learning_rate": 0.0001999940980973457, - "loss": 46.0, - "step": 45242 - }, - { - "epoch": 3.459143299501118, - "grad_norm": 0.007666292134672403, - "learning_rate": 0.00019999409783638398, - "loss": 46.0, - "step": 45243 - }, - { - "epoch": 3.459219756484508, - "grad_norm": 0.001448189141228795, - "learning_rate": 0.00019999409757541652, - "loss": 46.0, - "step": 45244 - }, - { - "epoch": 3.4592962134678977, - "grad_norm": 0.002870507538318634, - "learning_rate": 0.00019999409731444328, - "loss": 46.0, - "step": 45245 - }, - { - "epoch": 3.4593726704512875, - "grad_norm": 0.0013945474056527019, - "learning_rate": 0.00019999409705346427, - "loss": 46.0, - "step": 45246 - }, - { - "epoch": 3.4594491274346773, - "grad_norm": 0.0018077908316627145, - "learning_rate": 0.00019999409679247952, - "loss": 46.0, - "step": 45247 - }, - { - "epoch": 3.459525584418067, - "grad_norm": 0.0016248972387984395, - "learning_rate": 0.00019999409653148896, - "loss": 46.0, - "step": 45248 - }, - { - "epoch": 3.4596020414014563, - "grad_norm": 0.00513993576169014, - "learning_rate": 0.00019999409627049266, - "loss": 46.0, - "step": 45249 - }, - { - "epoch": 3.459678498384846, - "grad_norm": 0.0013955950271338224, - "learning_rate": 0.00019999409600949056, - "loss": 46.0, - "step": 45250 - }, - { - "epoch": 3.459754955368236, - "grad_norm": 0.0013859347673133016, - "learning_rate": 0.0001999940957484827, - "loss": 46.0, - "step": 45251 - }, - { - "epoch": 3.4598314123516256, - "grad_norm": 0.0007701015565544367, - "learning_rate": 0.00019999409548746908, - "loss": 46.0, - "step": 45252 - }, - { - "epoch": 3.4599078693350154, - "grad_norm": 0.003868626896291971, - "learning_rate": 0.00019999409522644966, - "loss": 46.0, - "step": 45253 - }, - { - "epoch": 3.459984326318405, - "grad_norm": 0.0006784715224057436, - "learning_rate": 0.00019999409496542452, - "loss": 46.0, - "step": 45254 - }, - { - "epoch": 3.460060783301795, - "grad_norm": 0.002501010661944747, - "learning_rate": 0.00019999409470439358, - "loss": 46.0, - "step": 45255 - }, - { - "epoch": 3.4601372402851847, - "grad_norm": 0.0016842057229951024, - "learning_rate": 0.00019999409444335689, - "loss": 46.0, - "step": 45256 - }, - { - "epoch": 3.4602136972685744, - "grad_norm": 0.0008871675818227232, - "learning_rate": 0.00019999409418231442, - "loss": 46.0, - "step": 45257 - }, - { - "epoch": 3.460290154251964, - "grad_norm": 0.0024440987035632133, - "learning_rate": 0.00019999409392126621, - "loss": 46.0, - "step": 45258 - }, - { - "epoch": 3.4603666112353535, - "grad_norm": 0.0010630963370203972, - "learning_rate": 0.00019999409366021218, - "loss": 46.0, - "step": 45259 - }, - { - "epoch": 3.4604430682187433, - "grad_norm": 0.0008465952123515308, - "learning_rate": 0.0001999940933991524, - "loss": 46.0, - "step": 45260 - }, - { - "epoch": 3.460519525202133, - "grad_norm": 0.0008157572010532022, - "learning_rate": 0.00019999409313808687, - "loss": 46.0, - "step": 45261 - }, - { - "epoch": 3.460595982185523, - "grad_norm": 0.005165203008800745, - "learning_rate": 0.00019999409287701556, - "loss": 46.0, - "step": 45262 - }, - { - "epoch": 3.4606724391689125, - "grad_norm": 0.0008446403662674129, - "learning_rate": 0.00019999409261593846, - "loss": 46.0, - "step": 45263 - }, - { - "epoch": 3.4607488961523023, - "grad_norm": 0.0019177235662937164, - "learning_rate": 0.0001999940923548556, - "loss": 46.0, - "step": 45264 - }, - { - "epoch": 3.460825353135692, - "grad_norm": 0.0043699066154658794, - "learning_rate": 0.000199994092093767, - "loss": 46.0, - "step": 45265 - }, - { - "epoch": 3.460901810119082, - "grad_norm": 0.006610126234591007, - "learning_rate": 0.0001999940918326726, - "loss": 46.0, - "step": 45266 - }, - { - "epoch": 3.4609782671024716, - "grad_norm": 0.0009315164061263204, - "learning_rate": 0.00019999409157157242, - "loss": 46.0, - "step": 45267 - }, - { - "epoch": 3.4610547240858613, - "grad_norm": 0.0020402332302182913, - "learning_rate": 0.00019999409131046648, - "loss": 46.0, - "step": 45268 - }, - { - "epoch": 3.461131181069251, - "grad_norm": 0.0020765983499586582, - "learning_rate": 0.00019999409104935482, - "loss": 46.0, - "step": 45269 - }, - { - "epoch": 3.461207638052641, - "grad_norm": 0.0043333726935088634, - "learning_rate": 0.00019999409078823733, - "loss": 46.0, - "step": 45270 - }, - { - "epoch": 3.46128409503603, - "grad_norm": 0.003468272276222706, - "learning_rate": 0.00019999409052711412, - "loss": 46.0, - "step": 45271 - }, - { - "epoch": 3.46136055201942, - "grad_norm": 0.0020052504260092974, - "learning_rate": 0.0001999940902659851, - "loss": 46.0, - "step": 45272 - }, - { - "epoch": 3.4614370090028097, - "grad_norm": 0.002159254625439644, - "learning_rate": 0.00019999409000485033, - "loss": 46.0, - "step": 45273 - }, - { - "epoch": 3.4615134659861995, - "grad_norm": 0.0027012641075998545, - "learning_rate": 0.00019999408974370977, - "loss": 46.0, - "step": 45274 - }, - { - "epoch": 3.4615899229695892, - "grad_norm": 0.000906950153876096, - "learning_rate": 0.00019999408948256347, - "loss": 46.0, - "step": 45275 - }, - { - "epoch": 3.461666379952979, - "grad_norm": 0.006767289713025093, - "learning_rate": 0.0001999940892214114, - "loss": 46.0, - "step": 45276 - }, - { - "epoch": 3.4617428369363688, - "grad_norm": 0.005225414410233498, - "learning_rate": 0.00019999408896025352, - "loss": 46.0, - "step": 45277 - }, - { - "epoch": 3.4618192939197585, - "grad_norm": 0.0015316924545913935, - "learning_rate": 0.0001999940886990899, - "loss": 46.0, - "step": 45278 - }, - { - "epoch": 3.4618957509031483, - "grad_norm": 0.0021379240788519382, - "learning_rate": 0.0001999940884379205, - "loss": 46.0, - "step": 45279 - }, - { - "epoch": 3.4619722078865376, - "grad_norm": 0.0012888384517282248, - "learning_rate": 0.00019999408817674536, - "loss": 46.0, - "step": 45280 - }, - { - "epoch": 3.4620486648699274, - "grad_norm": 0.0015105732018128037, - "learning_rate": 0.00019999408791556445, - "loss": 46.0, - "step": 45281 - }, - { - "epoch": 3.462125121853317, - "grad_norm": 0.0017428887076675892, - "learning_rate": 0.00019999408765437776, - "loss": 46.0, - "step": 45282 - }, - { - "epoch": 3.462201578836707, - "grad_norm": 0.0018663223600015044, - "learning_rate": 0.00019999408739318527, - "loss": 46.0, - "step": 45283 - }, - { - "epoch": 3.4622780358200966, - "grad_norm": 0.0017414928879588842, - "learning_rate": 0.00019999408713198703, - "loss": 46.0, - "step": 45284 - }, - { - "epoch": 3.4623544928034864, - "grad_norm": 0.0014732241397723556, - "learning_rate": 0.00019999408687078302, - "loss": 46.0, - "step": 45285 - }, - { - "epoch": 3.462430949786876, - "grad_norm": 0.007237066049128771, - "learning_rate": 0.00019999408660957327, - "loss": 46.0, - "step": 45286 - }, - { - "epoch": 3.462507406770266, - "grad_norm": 0.003422307316213846, - "learning_rate": 0.00019999408634835771, - "loss": 46.0, - "step": 45287 - }, - { - "epoch": 3.4625838637536557, - "grad_norm": 0.002165980637073517, - "learning_rate": 0.0001999940860871364, - "loss": 46.0, - "step": 45288 - }, - { - "epoch": 3.4626603207370454, - "grad_norm": 0.0032646714244037867, - "learning_rate": 0.00019999408582590934, - "loss": 46.0, - "step": 45289 - }, - { - "epoch": 3.462736777720435, - "grad_norm": 0.0008388942806050181, - "learning_rate": 0.00019999408556467646, - "loss": 46.0, - "step": 45290 - }, - { - "epoch": 3.462813234703825, - "grad_norm": 0.00189967080950737, - "learning_rate": 0.00019999408530343787, - "loss": 46.0, - "step": 45291 - }, - { - "epoch": 3.4628896916872147, - "grad_norm": 0.001891830237582326, - "learning_rate": 0.00019999408504219347, - "loss": 46.0, - "step": 45292 - }, - { - "epoch": 3.462966148670604, - "grad_norm": 0.0015905596083030105, - "learning_rate": 0.0001999940847809433, - "loss": 46.0, - "step": 45293 - }, - { - "epoch": 3.463042605653994, - "grad_norm": 0.001288939849473536, - "learning_rate": 0.0001999940845196874, - "loss": 46.0, - "step": 45294 - }, - { - "epoch": 3.4631190626373836, - "grad_norm": 0.0018962343456223607, - "learning_rate": 0.0001999940842584257, - "loss": 46.0, - "step": 45295 - }, - { - "epoch": 3.4631955196207733, - "grad_norm": 0.0017523699207231402, - "learning_rate": 0.0001999940839971582, - "loss": 46.0, - "step": 45296 - }, - { - "epoch": 3.463271976604163, - "grad_norm": 0.0019330132054165006, - "learning_rate": 0.000199994083735885, - "loss": 46.0, - "step": 45297 - }, - { - "epoch": 3.463348433587553, - "grad_norm": 0.002740581054240465, - "learning_rate": 0.000199994083474606, - "loss": 46.0, - "step": 45298 - }, - { - "epoch": 3.4634248905709426, - "grad_norm": 0.005950115155428648, - "learning_rate": 0.0001999940832133212, - "loss": 46.0, - "step": 45299 - }, - { - "epoch": 3.4635013475543324, - "grad_norm": 0.003209349699318409, - "learning_rate": 0.00019999408295203068, - "loss": 46.0, - "step": 45300 - }, - { - "epoch": 3.463577804537722, - "grad_norm": 0.0011719181202352047, - "learning_rate": 0.00019999408269073436, - "loss": 46.0, - "step": 45301 - }, - { - "epoch": 3.4636542615211114, - "grad_norm": 0.00136278267018497, - "learning_rate": 0.00019999408242943228, - "loss": 46.0, - "step": 45302 - }, - { - "epoch": 3.463730718504501, - "grad_norm": 0.0019472400890663266, - "learning_rate": 0.00019999408216812446, - "loss": 46.0, - "step": 45303 - }, - { - "epoch": 3.463807175487891, - "grad_norm": 0.006315067410469055, - "learning_rate": 0.0001999940819068108, - "loss": 46.0, - "step": 45304 - }, - { - "epoch": 3.4638836324712807, - "grad_norm": 0.0010663226712495089, - "learning_rate": 0.00019999408164549144, - "loss": 46.0, - "step": 45305 - }, - { - "epoch": 3.4639600894546705, - "grad_norm": 0.002910124370828271, - "learning_rate": 0.00019999408138416628, - "loss": 46.0, - "step": 45306 - }, - { - "epoch": 3.4640365464380602, - "grad_norm": 0.0015780196990817785, - "learning_rate": 0.00019999408112283536, - "loss": 46.0, - "step": 45307 - }, - { - "epoch": 3.46411300342145, - "grad_norm": 0.001325340592302382, - "learning_rate": 0.00019999408086149867, - "loss": 46.0, - "step": 45308 - }, - { - "epoch": 3.4641894604048398, - "grad_norm": 0.0018771840259432793, - "learning_rate": 0.0001999940806001562, - "loss": 46.0, - "step": 45309 - }, - { - "epoch": 3.4642659173882295, - "grad_norm": 0.001647996949031949, - "learning_rate": 0.00019999408033880798, - "loss": 46.0, - "step": 45310 - }, - { - "epoch": 3.4643423743716193, - "grad_norm": 0.00992936547845602, - "learning_rate": 0.00019999408007745397, - "loss": 46.0, - "step": 45311 - }, - { - "epoch": 3.464418831355009, - "grad_norm": 0.0013483690563589334, - "learning_rate": 0.0001999940798160942, - "loss": 46.0, - "step": 45312 - }, - { - "epoch": 3.464495288338399, - "grad_norm": 0.0023338343016803265, - "learning_rate": 0.00019999407955472866, - "loss": 46.0, - "step": 45313 - }, - { - "epoch": 3.4645717453217886, - "grad_norm": 0.005195608362555504, - "learning_rate": 0.00019999407929335736, - "loss": 46.0, - "step": 45314 - }, - { - "epoch": 3.464648202305178, - "grad_norm": 0.002874793251976371, - "learning_rate": 0.00019999407903198026, - "loss": 46.0, - "step": 45315 - }, - { - "epoch": 3.4647246592885677, - "grad_norm": 0.0010338746942579746, - "learning_rate": 0.00019999407877059744, - "loss": 46.0, - "step": 45316 - }, - { - "epoch": 3.4648011162719574, - "grad_norm": 0.0008347270195372403, - "learning_rate": 0.00019999407850920881, - "loss": 46.0, - "step": 45317 - }, - { - "epoch": 3.464877573255347, - "grad_norm": 0.001603742246516049, - "learning_rate": 0.00019999407824781442, - "loss": 46.0, - "step": 45318 - }, - { - "epoch": 3.464954030238737, - "grad_norm": 0.0016313113737851381, - "learning_rate": 0.00019999407798641428, - "loss": 46.0, - "step": 45319 - }, - { - "epoch": 3.4650304872221267, - "grad_norm": 0.0013656540540978312, - "learning_rate": 0.00019999407772500836, - "loss": 46.0, - "step": 45320 - }, - { - "epoch": 3.4651069442055165, - "grad_norm": 0.0034292214550077915, - "learning_rate": 0.00019999407746359665, - "loss": 46.0, - "step": 45321 - }, - { - "epoch": 3.465183401188906, - "grad_norm": 0.001389471348375082, - "learning_rate": 0.0001999940772021792, - "loss": 46.0, - "step": 45322 - }, - { - "epoch": 3.465259858172296, - "grad_norm": 0.0022577354684472084, - "learning_rate": 0.00019999407694075595, - "loss": 46.0, - "step": 45323 - }, - { - "epoch": 3.4653363151556853, - "grad_norm": 0.0020267742220312357, - "learning_rate": 0.00019999407667932697, - "loss": 46.0, - "step": 45324 - }, - { - "epoch": 3.465412772139075, - "grad_norm": 0.002215612679719925, - "learning_rate": 0.00019999407641789222, - "loss": 46.0, - "step": 45325 - }, - { - "epoch": 3.465489229122465, - "grad_norm": 0.001049189013428986, - "learning_rate": 0.00019999407615645167, - "loss": 46.0, - "step": 45326 - }, - { - "epoch": 3.4655656861058546, - "grad_norm": 0.0026559787802398205, - "learning_rate": 0.00019999407589500536, - "loss": 46.0, - "step": 45327 - }, - { - "epoch": 3.4656421430892443, - "grad_norm": 0.0029841770883649588, - "learning_rate": 0.0001999940756335533, - "loss": 46.0, - "step": 45328 - }, - { - "epoch": 3.465718600072634, - "grad_norm": 0.001439036801457405, - "learning_rate": 0.00019999407537209542, - "loss": 46.0, - "step": 45329 - }, - { - "epoch": 3.465795057056024, - "grad_norm": 0.0005394062027335167, - "learning_rate": 0.00019999407511063182, - "loss": 46.0, - "step": 45330 - }, - { - "epoch": 3.4658715140394136, - "grad_norm": 0.0015713658649474382, - "learning_rate": 0.00019999407484916243, - "loss": 46.0, - "step": 45331 - }, - { - "epoch": 3.4659479710228034, - "grad_norm": 0.0019952277652919292, - "learning_rate": 0.0001999940745876873, - "loss": 46.0, - "step": 45332 - }, - { - "epoch": 3.466024428006193, - "grad_norm": 0.0038955719210207462, - "learning_rate": 0.00019999407432620637, - "loss": 46.0, - "step": 45333 - }, - { - "epoch": 3.466100884989583, - "grad_norm": 0.00429118238389492, - "learning_rate": 0.00019999407406471966, - "loss": 46.0, - "step": 45334 - }, - { - "epoch": 3.4661773419729727, - "grad_norm": 0.003267466090619564, - "learning_rate": 0.0001999940738032272, - "loss": 46.0, - "step": 45335 - }, - { - "epoch": 3.466253798956362, - "grad_norm": 0.0022804783657193184, - "learning_rate": 0.000199994073541729, - "loss": 46.0, - "step": 45336 - }, - { - "epoch": 3.4663302559397517, - "grad_norm": 0.001498306985013187, - "learning_rate": 0.00019999407328022498, - "loss": 46.0, - "step": 45337 - }, - { - "epoch": 3.4664067129231415, - "grad_norm": 0.0018274802714586258, - "learning_rate": 0.00019999407301871523, - "loss": 46.0, - "step": 45338 - }, - { - "epoch": 3.4664831699065313, - "grad_norm": 0.0018702266970649362, - "learning_rate": 0.0001999940727571997, - "loss": 46.0, - "step": 45339 - }, - { - "epoch": 3.466559626889921, - "grad_norm": 0.0015760073438286781, - "learning_rate": 0.00019999407249567838, - "loss": 46.0, - "step": 45340 - }, - { - "epoch": 3.466636083873311, - "grad_norm": 0.0009732645121403039, - "learning_rate": 0.0001999940722341513, - "loss": 46.0, - "step": 45341 - }, - { - "epoch": 3.4667125408567006, - "grad_norm": 0.0007463563233613968, - "learning_rate": 0.00019999407197261845, - "loss": 46.0, - "step": 45342 - }, - { - "epoch": 3.4667889978400903, - "grad_norm": 0.0016366144409403205, - "learning_rate": 0.00019999407171107983, - "loss": 46.0, - "step": 45343 - }, - { - "epoch": 3.46686545482348, - "grad_norm": 0.0028873293194919825, - "learning_rate": 0.00019999407144953547, - "loss": 46.0, - "step": 45344 - }, - { - "epoch": 3.46694191180687, - "grad_norm": 0.0012015729444101453, - "learning_rate": 0.00019999407118798533, - "loss": 46.0, - "step": 45345 - }, - { - "epoch": 3.467018368790259, - "grad_norm": 0.0045380378141999245, - "learning_rate": 0.0001999940709264294, - "loss": 46.0, - "step": 45346 - }, - { - "epoch": 3.467094825773649, - "grad_norm": 0.0035851828288286924, - "learning_rate": 0.0001999940706648677, - "loss": 46.0, - "step": 45347 - }, - { - "epoch": 3.4671712827570387, - "grad_norm": 0.0019371291855350137, - "learning_rate": 0.00019999407040330024, - "loss": 46.0, - "step": 45348 - }, - { - "epoch": 3.4672477397404284, - "grad_norm": 0.0019747507758438587, - "learning_rate": 0.00019999407014172703, - "loss": 46.0, - "step": 45349 - }, - { - "epoch": 3.467324196723818, - "grad_norm": 0.00125067587941885, - "learning_rate": 0.00019999406988014805, - "loss": 46.0, - "step": 45350 - }, - { - "epoch": 3.467400653707208, - "grad_norm": 0.0014952163910493255, - "learning_rate": 0.00019999406961856327, - "loss": 46.0, - "step": 45351 - }, - { - "epoch": 3.4674771106905977, - "grad_norm": 0.00152310892008245, - "learning_rate": 0.00019999406935697272, - "loss": 46.0, - "step": 45352 - }, - { - "epoch": 3.4675535676739875, - "grad_norm": 0.0014664194313809276, - "learning_rate": 0.00019999406909537642, - "loss": 46.0, - "step": 45353 - }, - { - "epoch": 3.4676300246573772, - "grad_norm": 0.0016908731777220964, - "learning_rate": 0.00019999406883377434, - "loss": 46.0, - "step": 45354 - }, - { - "epoch": 3.467706481640767, - "grad_norm": 0.000938360346481204, - "learning_rate": 0.0001999940685721665, - "loss": 46.0, - "step": 45355 - }, - { - "epoch": 3.4677829386241568, - "grad_norm": 0.0026080983225256205, - "learning_rate": 0.0001999940683105529, - "loss": 46.0, - "step": 45356 - }, - { - "epoch": 3.4678593956075465, - "grad_norm": 0.0023704352788627148, - "learning_rate": 0.00019999406804893354, - "loss": 46.0, - "step": 45357 - }, - { - "epoch": 3.467935852590936, - "grad_norm": 0.0020570727065205574, - "learning_rate": 0.00019999406778730837, - "loss": 46.0, - "step": 45358 - }, - { - "epoch": 3.4680123095743256, - "grad_norm": 0.001802973565645516, - "learning_rate": 0.00019999406752567746, - "loss": 46.0, - "step": 45359 - }, - { - "epoch": 3.4680887665577154, - "grad_norm": 0.0015169268008321524, - "learning_rate": 0.00019999406726404074, - "loss": 46.0, - "step": 45360 - }, - { - "epoch": 3.468165223541105, - "grad_norm": 0.002393965609371662, - "learning_rate": 0.0001999940670023983, - "loss": 46.0, - "step": 45361 - }, - { - "epoch": 3.468241680524495, - "grad_norm": 0.0034477978479117155, - "learning_rate": 0.0001999940667407501, - "loss": 46.0, - "step": 45362 - }, - { - "epoch": 3.4683181375078846, - "grad_norm": 0.002363716484978795, - "learning_rate": 0.0001999940664790961, - "loss": 46.0, - "step": 45363 - }, - { - "epoch": 3.4683945944912744, - "grad_norm": 0.002004781737923622, - "learning_rate": 0.00019999406621743632, - "loss": 46.0, - "step": 45364 - }, - { - "epoch": 3.468471051474664, - "grad_norm": 0.0067963371984660625, - "learning_rate": 0.0001999940659557708, - "loss": 46.0, - "step": 45365 - }, - { - "epoch": 3.468547508458054, - "grad_norm": 0.0028303293511271477, - "learning_rate": 0.0001999940656940995, - "loss": 46.0, - "step": 45366 - }, - { - "epoch": 3.4686239654414437, - "grad_norm": 0.008921141736209393, - "learning_rate": 0.00019999406543242242, - "loss": 46.0, - "step": 45367 - }, - { - "epoch": 3.468700422424833, - "grad_norm": 0.0016227097949013114, - "learning_rate": 0.00019999406517073957, - "loss": 46.0, - "step": 45368 - }, - { - "epoch": 3.4687768794082228, - "grad_norm": 0.002052989089861512, - "learning_rate": 0.00019999406490905095, - "loss": 46.0, - "step": 45369 - }, - { - "epoch": 3.4688533363916125, - "grad_norm": 0.0020686821080744267, - "learning_rate": 0.00019999406464735658, - "loss": 46.0, - "step": 45370 - }, - { - "epoch": 3.4689297933750023, - "grad_norm": 0.0031085405498743057, - "learning_rate": 0.00019999406438565644, - "loss": 46.0, - "step": 45371 - }, - { - "epoch": 3.469006250358392, - "grad_norm": 0.0006648002308793366, - "learning_rate": 0.00019999406412395053, - "loss": 46.0, - "step": 45372 - }, - { - "epoch": 3.469082707341782, - "grad_norm": 0.001301878015510738, - "learning_rate": 0.00019999406386223884, - "loss": 46.0, - "step": 45373 - }, - { - "epoch": 3.4691591643251716, - "grad_norm": 0.0019161106320098042, - "learning_rate": 0.00019999406360052138, - "loss": 46.0, - "step": 45374 - }, - { - "epoch": 3.4692356213085613, - "grad_norm": 0.0020964015275239944, - "learning_rate": 0.00019999406333879815, - "loss": 46.0, - "step": 45375 - }, - { - "epoch": 3.469312078291951, - "grad_norm": 0.0012005208991467953, - "learning_rate": 0.00019999406307706917, - "loss": 46.0, - "step": 45376 - }, - { - "epoch": 3.469388535275341, - "grad_norm": 0.0017579332925379276, - "learning_rate": 0.00019999406281533442, - "loss": 46.0, - "step": 45377 - }, - { - "epoch": 3.4694649922587306, - "grad_norm": 0.003330113599076867, - "learning_rate": 0.00019999406255359387, - "loss": 46.0, - "step": 45378 - }, - { - "epoch": 3.4695414492421204, - "grad_norm": 0.00260583171620965, - "learning_rate": 0.00019999406229184757, - "loss": 46.0, - "step": 45379 - }, - { - "epoch": 3.4696179062255097, - "grad_norm": 0.0036398381926119328, - "learning_rate": 0.0001999940620300955, - "loss": 46.0, - "step": 45380 - }, - { - "epoch": 3.4696943632088995, - "grad_norm": 0.006889119278639555, - "learning_rate": 0.00019999406176833765, - "loss": 46.0, - "step": 45381 - }, - { - "epoch": 3.469770820192289, - "grad_norm": 0.0025699592661112547, - "learning_rate": 0.00019999406150657405, - "loss": 46.0, - "step": 45382 - }, - { - "epoch": 3.469847277175679, - "grad_norm": 0.0049185906536877155, - "learning_rate": 0.00019999406124480466, - "loss": 46.0, - "step": 45383 - }, - { - "epoch": 3.4699237341590687, - "grad_norm": 0.001075700274668634, - "learning_rate": 0.00019999406098302952, - "loss": 46.0, - "step": 45384 - }, - { - "epoch": 3.4700001911424585, - "grad_norm": 0.0006722345133312047, - "learning_rate": 0.0001999940607212486, - "loss": 46.0, - "step": 45385 - }, - { - "epoch": 3.4700766481258483, - "grad_norm": 0.0028031079564243555, - "learning_rate": 0.00019999406045946195, - "loss": 46.0, - "step": 45386 - }, - { - "epoch": 3.470153105109238, - "grad_norm": 0.004638790152966976, - "learning_rate": 0.0001999940601976695, - "loss": 46.0, - "step": 45387 - }, - { - "epoch": 3.470229562092628, - "grad_norm": 0.003844328224658966, - "learning_rate": 0.00019999405993587126, - "loss": 46.0, - "step": 45388 - }, - { - "epoch": 3.4703060190760175, - "grad_norm": 0.001000547781586647, - "learning_rate": 0.00019999405967406725, - "loss": 46.0, - "step": 45389 - }, - { - "epoch": 3.470382476059407, - "grad_norm": 0.0009820261038839817, - "learning_rate": 0.0001999940594122575, - "loss": 46.0, - "step": 45390 - }, - { - "epoch": 3.4704589330427966, - "grad_norm": 0.00280400482006371, - "learning_rate": 0.00019999405915044197, - "loss": 46.0, - "step": 45391 - }, - { - "epoch": 3.4705353900261864, - "grad_norm": 0.0023543043062090874, - "learning_rate": 0.00019999405888862067, - "loss": 46.0, - "step": 45392 - }, - { - "epoch": 3.470611847009576, - "grad_norm": 0.0017922515980899334, - "learning_rate": 0.0001999940586267936, - "loss": 46.0, - "step": 45393 - }, - { - "epoch": 3.470688303992966, - "grad_norm": 0.0027865006122738123, - "learning_rate": 0.00019999405836496076, - "loss": 46.0, - "step": 45394 - }, - { - "epoch": 3.4707647609763557, - "grad_norm": 0.001274133799597621, - "learning_rate": 0.00019999405810312216, - "loss": 46.0, - "step": 45395 - }, - { - "epoch": 3.4708412179597454, - "grad_norm": 0.003584288526326418, - "learning_rate": 0.00019999405784127777, - "loss": 46.0, - "step": 45396 - }, - { - "epoch": 3.470917674943135, - "grad_norm": 0.0038590379990637302, - "learning_rate": 0.00019999405757942763, - "loss": 46.0, - "step": 45397 - }, - { - "epoch": 3.470994131926525, - "grad_norm": 0.003260708414018154, - "learning_rate": 0.00019999405731757172, - "loss": 46.0, - "step": 45398 - }, - { - "epoch": 3.4710705889099147, - "grad_norm": 0.002681143581867218, - "learning_rate": 0.00019999405705571003, - "loss": 46.0, - "step": 45399 - }, - { - "epoch": 3.4711470458933045, - "grad_norm": 0.0018498157151043415, - "learning_rate": 0.00019999405679384257, - "loss": 46.0, - "step": 45400 - }, - { - "epoch": 3.4712235028766942, - "grad_norm": 0.0012419262202456594, - "learning_rate": 0.00019999405653196937, - "loss": 46.0, - "step": 45401 - }, - { - "epoch": 3.4712999598600836, - "grad_norm": 0.0030020864214748144, - "learning_rate": 0.0001999940562700904, - "loss": 46.0, - "step": 45402 - }, - { - "epoch": 3.4713764168434733, - "grad_norm": 0.0011914694914594293, - "learning_rate": 0.0001999940560082056, - "loss": 46.0, - "step": 45403 - }, - { - "epoch": 3.471452873826863, - "grad_norm": 0.008153322152793407, - "learning_rate": 0.00019999405574631509, - "loss": 46.0, - "step": 45404 - }, - { - "epoch": 3.471529330810253, - "grad_norm": 0.0048467861488461494, - "learning_rate": 0.00019999405548441881, - "loss": 46.0, - "step": 45405 - }, - { - "epoch": 3.4716057877936426, - "grad_norm": 0.0006020099972374737, - "learning_rate": 0.00019999405522251674, - "loss": 46.0, - "step": 45406 - }, - { - "epoch": 3.4716822447770324, - "grad_norm": 0.0022257273085415363, - "learning_rate": 0.00019999405496060887, - "loss": 46.0, - "step": 45407 - }, - { - "epoch": 3.471758701760422, - "grad_norm": 0.0021620949264615774, - "learning_rate": 0.00019999405469869528, - "loss": 46.0, - "step": 45408 - }, - { - "epoch": 3.471835158743812, - "grad_norm": 0.0030188492964953184, - "learning_rate": 0.0001999940544367759, - "loss": 46.0, - "step": 45409 - }, - { - "epoch": 3.4719116157272016, - "grad_norm": 0.0021532301325351, - "learning_rate": 0.00019999405417485077, - "loss": 46.0, - "step": 45410 - }, - { - "epoch": 3.471988072710591, - "grad_norm": 0.003152495017275214, - "learning_rate": 0.00019999405391291986, - "loss": 46.0, - "step": 45411 - }, - { - "epoch": 3.4720645296939807, - "grad_norm": 0.004910083953291178, - "learning_rate": 0.00019999405365098318, - "loss": 46.0, - "step": 45412 - }, - { - "epoch": 3.4721409866773705, - "grad_norm": 0.005014121066778898, - "learning_rate": 0.00019999405338904072, - "loss": 46.0, - "step": 45413 - }, - { - "epoch": 3.4722174436607602, - "grad_norm": 0.001974246231839061, - "learning_rate": 0.0001999940531270925, - "loss": 46.0, - "step": 45414 - }, - { - "epoch": 3.47229390064415, - "grad_norm": 0.0015629720874130726, - "learning_rate": 0.0001999940528651385, - "loss": 46.0, - "step": 45415 - }, - { - "epoch": 3.4723703576275398, - "grad_norm": 0.0005405074334703386, - "learning_rate": 0.00019999405260317875, - "loss": 46.0, - "step": 45416 - }, - { - "epoch": 3.4724468146109295, - "grad_norm": 0.007345457561314106, - "learning_rate": 0.00019999405234121326, - "loss": 46.0, - "step": 45417 - }, - { - "epoch": 3.4725232715943193, - "grad_norm": 0.009950641542673111, - "learning_rate": 0.00019999405207924193, - "loss": 46.0, - "step": 45418 - }, - { - "epoch": 3.472599728577709, - "grad_norm": 0.001955407904461026, - "learning_rate": 0.00019999405181726486, - "loss": 46.0, - "step": 45419 - }, - { - "epoch": 3.472676185561099, - "grad_norm": 0.0026656100526452065, - "learning_rate": 0.00019999405155528204, - "loss": 46.0, - "step": 45420 - }, - { - "epoch": 3.4727526425444886, - "grad_norm": 0.0013629165478050709, - "learning_rate": 0.00019999405129329345, - "loss": 46.0, - "step": 45421 - }, - { - "epoch": 3.4728290995278783, - "grad_norm": 0.003128195647150278, - "learning_rate": 0.00019999405103129906, - "loss": 46.0, - "step": 45422 - }, - { - "epoch": 3.472905556511268, - "grad_norm": 0.002011357806622982, - "learning_rate": 0.00019999405076929892, - "loss": 46.0, - "step": 45423 - }, - { - "epoch": 3.4729820134946574, - "grad_norm": 0.0026485517155379057, - "learning_rate": 0.000199994050507293, - "loss": 46.0, - "step": 45424 - }, - { - "epoch": 3.473058470478047, - "grad_norm": 0.0023168164771050215, - "learning_rate": 0.00019999405024528132, - "loss": 46.0, - "step": 45425 - }, - { - "epoch": 3.473134927461437, - "grad_norm": 0.003357185050845146, - "learning_rate": 0.0001999940499832639, - "loss": 46.0, - "step": 45426 - }, - { - "epoch": 3.4732113844448267, - "grad_norm": 0.00062833062838763, - "learning_rate": 0.00019999404972124066, - "loss": 46.0, - "step": 45427 - }, - { - "epoch": 3.4732878414282164, - "grad_norm": 0.0012073391117155552, - "learning_rate": 0.00019999404945921168, - "loss": 46.0, - "step": 45428 - }, - { - "epoch": 3.473364298411606, - "grad_norm": 0.0024175432045012712, - "learning_rate": 0.00019999404919717693, - "loss": 46.0, - "step": 45429 - }, - { - "epoch": 3.473440755394996, - "grad_norm": 0.0015951709356158972, - "learning_rate": 0.0001999940489351364, - "loss": 46.0, - "step": 45430 - }, - { - "epoch": 3.4735172123783857, - "grad_norm": 0.0013720013666898012, - "learning_rate": 0.0001999940486730901, - "loss": 46.0, - "step": 45431 - }, - { - "epoch": 3.4735936693617755, - "grad_norm": 0.001110560609959066, - "learning_rate": 0.00019999404841103803, - "loss": 46.0, - "step": 45432 - }, - { - "epoch": 3.473670126345165, - "grad_norm": 0.001942623988725245, - "learning_rate": 0.0001999940481489802, - "loss": 46.0, - "step": 45433 - }, - { - "epoch": 3.4737465833285546, - "grad_norm": 0.007223673164844513, - "learning_rate": 0.0001999940478869166, - "loss": 46.0, - "step": 45434 - }, - { - "epoch": 3.4738230403119443, - "grad_norm": 0.00296038668602705, - "learning_rate": 0.00019999404762484723, - "loss": 46.0, - "step": 45435 - }, - { - "epoch": 3.473899497295334, - "grad_norm": 0.0013491497375071049, - "learning_rate": 0.0001999940473627721, - "loss": 46.0, - "step": 45436 - }, - { - "epoch": 3.473975954278724, - "grad_norm": 0.003378593595698476, - "learning_rate": 0.00019999404710069118, - "loss": 46.0, - "step": 45437 - }, - { - "epoch": 3.4740524112621136, - "grad_norm": 0.0005663735792040825, - "learning_rate": 0.00019999404683860452, - "loss": 46.0, - "step": 45438 - }, - { - "epoch": 3.4741288682455034, - "grad_norm": 0.002361743478104472, - "learning_rate": 0.00019999404657651207, - "loss": 46.0, - "step": 45439 - }, - { - "epoch": 3.474205325228893, - "grad_norm": 0.002637530444189906, - "learning_rate": 0.00019999404631441383, - "loss": 46.0, - "step": 45440 - }, - { - "epoch": 3.474281782212283, - "grad_norm": 0.010690860450267792, - "learning_rate": 0.00019999404605230986, - "loss": 46.0, - "step": 45441 - }, - { - "epoch": 3.4743582391956727, - "grad_norm": 0.002201820258051157, - "learning_rate": 0.00019999404579020008, - "loss": 46.0, - "step": 45442 - }, - { - "epoch": 3.4744346961790624, - "grad_norm": 0.00961847510188818, - "learning_rate": 0.00019999404552808455, - "loss": 46.0, - "step": 45443 - }, - { - "epoch": 3.474511153162452, - "grad_norm": 0.0037724098656326532, - "learning_rate": 0.00019999404526596328, - "loss": 46.0, - "step": 45444 - }, - { - "epoch": 3.474587610145842, - "grad_norm": 0.0024975226260721684, - "learning_rate": 0.00019999404500383624, - "loss": 46.0, - "step": 45445 - }, - { - "epoch": 3.4746640671292313, - "grad_norm": 0.0024796074721962214, - "learning_rate": 0.00019999404474170337, - "loss": 46.0, - "step": 45446 - }, - { - "epoch": 3.474740524112621, - "grad_norm": 0.0014058712404221296, - "learning_rate": 0.0001999940444795648, - "loss": 46.0, - "step": 45447 - }, - { - "epoch": 3.474816981096011, - "grad_norm": 0.002800158690661192, - "learning_rate": 0.0001999940442174204, - "loss": 46.0, - "step": 45448 - }, - { - "epoch": 3.4748934380794005, - "grad_norm": 0.0027238046750426292, - "learning_rate": 0.00019999404395527027, - "loss": 46.0, - "step": 45449 - }, - { - "epoch": 3.4749698950627903, - "grad_norm": 0.006922128144651651, - "learning_rate": 0.0001999940436931144, - "loss": 46.0, - "step": 45450 - }, - { - "epoch": 3.47504635204618, - "grad_norm": 0.00367325940169394, - "learning_rate": 0.0001999940434309527, - "loss": 46.0, - "step": 45451 - }, - { - "epoch": 3.47512280902957, - "grad_norm": 0.0013188676675781608, - "learning_rate": 0.00019999404316878525, - "loss": 46.0, - "step": 45452 - }, - { - "epoch": 3.4751992660129596, - "grad_norm": 0.002308727242052555, - "learning_rate": 0.00019999404290661204, - "loss": 46.0, - "step": 45453 - }, - { - "epoch": 3.4752757229963493, - "grad_norm": 0.0035989207681268454, - "learning_rate": 0.00019999404264443307, - "loss": 46.0, - "step": 45454 - }, - { - "epoch": 3.4753521799797387, - "grad_norm": 0.001186361419968307, - "learning_rate": 0.00019999404238224831, - "loss": 46.0, - "step": 45455 - }, - { - "epoch": 3.4754286369631284, - "grad_norm": 0.0010325969196856022, - "learning_rate": 0.00019999404212005776, - "loss": 46.0, - "step": 45456 - }, - { - "epoch": 3.475505093946518, - "grad_norm": 0.002916923025622964, - "learning_rate": 0.0001999940418578615, - "loss": 46.0, - "step": 45457 - }, - { - "epoch": 3.475581550929908, - "grad_norm": 0.0038317441940307617, - "learning_rate": 0.00019999404159565942, - "loss": 46.0, - "step": 45458 - }, - { - "epoch": 3.4756580079132977, - "grad_norm": 0.001993397017940879, - "learning_rate": 0.0001999940413334516, - "loss": 46.0, - "step": 45459 - }, - { - "epoch": 3.4757344648966875, - "grad_norm": 0.004902064334601164, - "learning_rate": 0.00019999404107123801, - "loss": 46.0, - "step": 45460 - }, - { - "epoch": 3.4758109218800772, - "grad_norm": 0.0023605634924024343, - "learning_rate": 0.00019999404080901862, - "loss": 46.0, - "step": 45461 - }, - { - "epoch": 3.475887378863467, - "grad_norm": 0.00530610978603363, - "learning_rate": 0.00019999404054679349, - "loss": 46.0, - "step": 45462 - }, - { - "epoch": 3.4759638358468568, - "grad_norm": 0.002614965196698904, - "learning_rate": 0.0001999940402845626, - "loss": 46.0, - "step": 45463 - }, - { - "epoch": 3.4760402928302465, - "grad_norm": 0.003270120592787862, - "learning_rate": 0.00019999404002232592, - "loss": 46.0, - "step": 45464 - }, - { - "epoch": 3.4761167498136363, - "grad_norm": 0.0026582053396850824, - "learning_rate": 0.00019999403976008346, - "loss": 46.0, - "step": 45465 - }, - { - "epoch": 3.476193206797026, - "grad_norm": 0.003206276800483465, - "learning_rate": 0.00019999403949783526, - "loss": 46.0, - "step": 45466 - }, - { - "epoch": 3.4762696637804154, - "grad_norm": 0.00184364162851125, - "learning_rate": 0.00019999403923558128, - "loss": 46.0, - "step": 45467 - }, - { - "epoch": 3.476346120763805, - "grad_norm": 0.0016548899002373219, - "learning_rate": 0.00019999403897332153, - "loss": 46.0, - "step": 45468 - }, - { - "epoch": 3.476422577747195, - "grad_norm": 0.0034113412257283926, - "learning_rate": 0.000199994038711056, - "loss": 46.0, - "step": 45469 - }, - { - "epoch": 3.4764990347305846, - "grad_norm": 0.002269693650305271, - "learning_rate": 0.0001999940384487847, - "loss": 46.0, - "step": 45470 - }, - { - "epoch": 3.4765754917139744, - "grad_norm": 0.003167959628626704, - "learning_rate": 0.00019999403818650766, - "loss": 46.0, - "step": 45471 - }, - { - "epoch": 3.476651948697364, - "grad_norm": 0.0020699365995824337, - "learning_rate": 0.00019999403792422485, - "loss": 46.0, - "step": 45472 - }, - { - "epoch": 3.476728405680754, - "grad_norm": 0.016294796019792557, - "learning_rate": 0.00019999403766193623, - "loss": 46.0, - "step": 45473 - }, - { - "epoch": 3.4768048626641437, - "grad_norm": 0.0007564066327176988, - "learning_rate": 0.00019999403739964187, - "loss": 46.0, - "step": 45474 - }, - { - "epoch": 3.4768813196475334, - "grad_norm": 0.0017108300235122442, - "learning_rate": 0.00019999403713734173, - "loss": 46.0, - "step": 45475 - }, - { - "epoch": 3.476957776630923, - "grad_norm": 0.004104054067283869, - "learning_rate": 0.00019999403687503584, - "loss": 46.0, - "step": 45476 - }, - { - "epoch": 3.4770342336143125, - "grad_norm": 0.003205989021807909, - "learning_rate": 0.00019999403661272416, - "loss": 46.0, - "step": 45477 - }, - { - "epoch": 3.4771106905977023, - "grad_norm": 0.008060215972363949, - "learning_rate": 0.0001999940363504067, - "loss": 46.0, - "step": 45478 - }, - { - "epoch": 3.477187147581092, - "grad_norm": 0.0021951785311102867, - "learning_rate": 0.0001999940360880835, - "loss": 46.0, - "step": 45479 - }, - { - "epoch": 3.477263604564482, - "grad_norm": 0.0014548609033226967, - "learning_rate": 0.00019999403582575452, - "loss": 46.0, - "step": 45480 - }, - { - "epoch": 3.4773400615478716, - "grad_norm": 0.0011106256861239672, - "learning_rate": 0.00019999403556341977, - "loss": 46.0, - "step": 45481 - }, - { - "epoch": 3.4774165185312613, - "grad_norm": 0.0019740527495741844, - "learning_rate": 0.00019999403530107922, - "loss": 46.0, - "step": 45482 - }, - { - "epoch": 3.477492975514651, - "grad_norm": 0.002642839215695858, - "learning_rate": 0.00019999403503873295, - "loss": 46.0, - "step": 45483 - }, - { - "epoch": 3.477569432498041, - "grad_norm": 0.004174418747425079, - "learning_rate": 0.0001999940347763809, - "loss": 46.0, - "step": 45484 - }, - { - "epoch": 3.4776458894814306, - "grad_norm": 0.0010072627337649465, - "learning_rate": 0.00019999403451402306, - "loss": 46.0, - "step": 45485 - }, - { - "epoch": 3.4777223464648204, - "grad_norm": 0.001531164743937552, - "learning_rate": 0.00019999403425165947, - "loss": 46.0, - "step": 45486 - }, - { - "epoch": 3.47779880344821, - "grad_norm": 0.0037696228828281164, - "learning_rate": 0.0001999940339892901, - "loss": 46.0, - "step": 45487 - }, - { - "epoch": 3.4778752604316, - "grad_norm": 0.007739607710391283, - "learning_rate": 0.00019999403372691497, - "loss": 46.0, - "step": 45488 - }, - { - "epoch": 3.477951717414989, - "grad_norm": 0.0010087728733196855, - "learning_rate": 0.0001999940334645341, - "loss": 46.0, - "step": 45489 - }, - { - "epoch": 3.478028174398379, - "grad_norm": 0.0040344009175896645, - "learning_rate": 0.0001999940332021474, - "loss": 46.0, - "step": 45490 - }, - { - "epoch": 3.4781046313817687, - "grad_norm": 0.003061852650716901, - "learning_rate": 0.00019999403293975495, - "loss": 46.0, - "step": 45491 - }, - { - "epoch": 3.4781810883651585, - "grad_norm": 0.0016354190884158015, - "learning_rate": 0.00019999403267735675, - "loss": 46.0, - "step": 45492 - }, - { - "epoch": 3.4782575453485483, - "grad_norm": 0.003986387047916651, - "learning_rate": 0.00019999403241495277, - "loss": 46.0, - "step": 45493 - }, - { - "epoch": 3.478334002331938, - "grad_norm": 0.0032610264606773853, - "learning_rate": 0.00019999403215254305, - "loss": 46.0, - "step": 45494 - }, - { - "epoch": 3.4784104593153278, - "grad_norm": 0.002538049826398492, - "learning_rate": 0.0001999940318901275, - "loss": 46.0, - "step": 45495 - }, - { - "epoch": 3.4784869162987175, - "grad_norm": 0.0019715926609933376, - "learning_rate": 0.00019999403162770623, - "loss": 46.0, - "step": 45496 - }, - { - "epoch": 3.4785633732821073, - "grad_norm": 0.002381403697654605, - "learning_rate": 0.00019999403136527916, - "loss": 46.0, - "step": 45497 - }, - { - "epoch": 3.478639830265497, - "grad_norm": 0.002238808199763298, - "learning_rate": 0.00019999403110284637, - "loss": 46.0, - "step": 45498 - }, - { - "epoch": 3.4787162872488864, - "grad_norm": 0.0012791279004886746, - "learning_rate": 0.00019999403084040775, - "loss": 46.0, - "step": 45499 - }, - { - "epoch": 3.478792744232276, - "grad_norm": 0.0014512179186567664, - "learning_rate": 0.0001999940305779634, - "loss": 46.0, - "step": 45500 - }, - { - "epoch": 3.478869201215666, - "grad_norm": 0.0026128538884222507, - "learning_rate": 0.00019999403031551328, - "loss": 46.0, - "step": 45501 - }, - { - "epoch": 3.4789456581990557, - "grad_norm": 0.0007514961762353778, - "learning_rate": 0.00019999403005305734, - "loss": 46.0, - "step": 45502 - }, - { - "epoch": 3.4790221151824454, - "grad_norm": 0.002741573378443718, - "learning_rate": 0.0001999940297905957, - "loss": 46.0, - "step": 45503 - }, - { - "epoch": 3.479098572165835, - "grad_norm": 0.0006237424095161259, - "learning_rate": 0.00019999402952812826, - "loss": 46.0, - "step": 45504 - }, - { - "epoch": 3.479175029149225, - "grad_norm": 0.0019310236675664783, - "learning_rate": 0.00019999402926565503, - "loss": 46.0, - "step": 45505 - }, - { - "epoch": 3.4792514861326147, - "grad_norm": 0.004290848504751921, - "learning_rate": 0.00019999402900317605, - "loss": 46.0, - "step": 45506 - }, - { - "epoch": 3.4793279431160045, - "grad_norm": 0.0010485415114089847, - "learning_rate": 0.0001999940287406913, - "loss": 46.0, - "step": 45507 - }, - { - "epoch": 3.4794044000993942, - "grad_norm": 0.0027531886007636786, - "learning_rate": 0.00019999402847820078, - "loss": 46.0, - "step": 45508 - }, - { - "epoch": 3.479480857082784, - "grad_norm": 0.0015785720897838473, - "learning_rate": 0.0001999940282157045, - "loss": 46.0, - "step": 45509 - }, - { - "epoch": 3.4795573140661737, - "grad_norm": 0.0017468953737989068, - "learning_rate": 0.00019999402795320247, - "loss": 46.0, - "step": 45510 - }, - { - "epoch": 3.479633771049563, - "grad_norm": 0.004634004086256027, - "learning_rate": 0.00019999402769069465, - "loss": 46.0, - "step": 45511 - }, - { - "epoch": 3.479710228032953, - "grad_norm": 0.003090350888669491, - "learning_rate": 0.00019999402742818103, - "loss": 46.0, - "step": 45512 - }, - { - "epoch": 3.4797866850163426, - "grad_norm": 0.0029485109262168407, - "learning_rate": 0.0001999940271656617, - "loss": 46.0, - "step": 45513 - }, - { - "epoch": 3.4798631419997323, - "grad_norm": 0.0016075458843261003, - "learning_rate": 0.00019999402690313653, - "loss": 46.0, - "step": 45514 - }, - { - "epoch": 3.479939598983122, - "grad_norm": 0.001572399283759296, - "learning_rate": 0.00019999402664060565, - "loss": 46.0, - "step": 45515 - }, - { - "epoch": 3.480016055966512, - "grad_norm": 0.0007134056650102139, - "learning_rate": 0.000199994026378069, - "loss": 46.0, - "step": 45516 - }, - { - "epoch": 3.4800925129499016, - "grad_norm": 0.004497717134654522, - "learning_rate": 0.00019999402611552657, - "loss": 46.0, - "step": 45517 - }, - { - "epoch": 3.4801689699332914, - "grad_norm": 0.0009015090181492269, - "learning_rate": 0.00019999402585297834, - "loss": 46.0, - "step": 45518 - }, - { - "epoch": 3.480245426916681, - "grad_norm": 0.0017260683234781027, - "learning_rate": 0.00019999402559042436, - "loss": 46.0, - "step": 45519 - }, - { - "epoch": 3.480321883900071, - "grad_norm": 0.0012865082826465368, - "learning_rate": 0.0001999940253278646, - "loss": 46.0, - "step": 45520 - }, - { - "epoch": 3.4803983408834602, - "grad_norm": 0.0013066261308267713, - "learning_rate": 0.0001999940250652991, - "loss": 46.0, - "step": 45521 - }, - { - "epoch": 3.48047479786685, - "grad_norm": 0.001797198667190969, - "learning_rate": 0.00019999402480272782, - "loss": 46.0, - "step": 45522 - }, - { - "epoch": 3.4805512548502398, - "grad_norm": 0.004325026646256447, - "learning_rate": 0.00019999402454015078, - "loss": 46.0, - "step": 45523 - }, - { - "epoch": 3.4806277118336295, - "grad_norm": 0.0018415595404803753, - "learning_rate": 0.00019999402427756794, - "loss": 46.0, - "step": 45524 - }, - { - "epoch": 3.4807041688170193, - "grad_norm": 0.001642862567678094, - "learning_rate": 0.00019999402401497935, - "loss": 46.0, - "step": 45525 - }, - { - "epoch": 3.480780625800409, - "grad_norm": 0.003109134268015623, - "learning_rate": 0.00019999402375238498, - "loss": 46.0, - "step": 45526 - }, - { - "epoch": 3.480857082783799, - "grad_norm": 0.0014782004291191697, - "learning_rate": 0.00019999402348978487, - "loss": 46.0, - "step": 45527 - }, - { - "epoch": 3.4809335397671886, - "grad_norm": 0.0032355119474232197, - "learning_rate": 0.00019999402322717894, - "loss": 46.0, - "step": 45528 - }, - { - "epoch": 3.4810099967505783, - "grad_norm": 0.0010524388635531068, - "learning_rate": 0.00019999402296456728, - "loss": 46.0, - "step": 45529 - }, - { - "epoch": 3.481086453733968, - "grad_norm": 0.0028826151974499226, - "learning_rate": 0.00019999402270194985, - "loss": 46.0, - "step": 45530 - }, - { - "epoch": 3.481162910717358, - "grad_norm": 0.004183790180832148, - "learning_rate": 0.00019999402243932665, - "loss": 46.0, - "step": 45531 - }, - { - "epoch": 3.4812393677007476, - "grad_norm": 0.001213785377331078, - "learning_rate": 0.0001999940221766977, - "loss": 46.0, - "step": 45532 - }, - { - "epoch": 3.481315824684137, - "grad_norm": 0.0037122094072401524, - "learning_rate": 0.00019999402191406293, - "loss": 46.0, - "step": 45533 - }, - { - "epoch": 3.4813922816675267, - "grad_norm": 0.003766071517020464, - "learning_rate": 0.00019999402165142243, - "loss": 46.0, - "step": 45534 - }, - { - "epoch": 3.4814687386509164, - "grad_norm": 0.0015654724556952715, - "learning_rate": 0.00019999402138877614, - "loss": 46.0, - "step": 45535 - }, - { - "epoch": 3.481545195634306, - "grad_norm": 0.0012794616632163525, - "learning_rate": 0.0001999940211261241, - "loss": 46.0, - "step": 45536 - }, - { - "epoch": 3.481621652617696, - "grad_norm": 0.0014855306362733245, - "learning_rate": 0.00019999402086346625, - "loss": 46.0, - "step": 45537 - }, - { - "epoch": 3.4816981096010857, - "grad_norm": 0.002996789524331689, - "learning_rate": 0.0001999940206008027, - "loss": 46.0, - "step": 45538 - }, - { - "epoch": 3.4817745665844755, - "grad_norm": 0.004285655450075865, - "learning_rate": 0.00019999402033813333, - "loss": 46.0, - "step": 45539 - }, - { - "epoch": 3.4818510235678652, - "grad_norm": 0.00040705816354602575, - "learning_rate": 0.0001999940200754582, - "loss": 46.0, - "step": 45540 - }, - { - "epoch": 3.481927480551255, - "grad_norm": 0.007175622973591089, - "learning_rate": 0.0001999940198127773, - "loss": 46.0, - "step": 45541 - }, - { - "epoch": 3.4820039375346443, - "grad_norm": 0.0025255833752453327, - "learning_rate": 0.00019999401955009063, - "loss": 46.0, - "step": 45542 - }, - { - "epoch": 3.482080394518034, - "grad_norm": 0.005446826573461294, - "learning_rate": 0.00019999401928739817, - "loss": 46.0, - "step": 45543 - }, - { - "epoch": 3.482156851501424, - "grad_norm": 0.00160220917314291, - "learning_rate": 0.0001999940190247, - "loss": 46.0, - "step": 45544 - }, - { - "epoch": 3.4822333084848136, - "grad_norm": 0.001040255301631987, - "learning_rate": 0.00019999401876199602, - "loss": 46.0, - "step": 45545 - }, - { - "epoch": 3.4823097654682034, - "grad_norm": 0.0009551150724291801, - "learning_rate": 0.0001999940184992863, - "loss": 46.0, - "step": 45546 - }, - { - "epoch": 3.482386222451593, - "grad_norm": 0.0016583276446908712, - "learning_rate": 0.00019999401823657075, - "loss": 46.0, - "step": 45547 - }, - { - "epoch": 3.482462679434983, - "grad_norm": 0.0017576119862496853, - "learning_rate": 0.00019999401797384948, - "loss": 46.0, - "step": 45548 - }, - { - "epoch": 3.4825391364183726, - "grad_norm": 0.0009687562705948949, - "learning_rate": 0.00019999401771112244, - "loss": 46.0, - "step": 45549 - }, - { - "epoch": 3.4826155934017624, - "grad_norm": 0.0018143875058740377, - "learning_rate": 0.00019999401744838963, - "loss": 46.0, - "step": 45550 - }, - { - "epoch": 3.482692050385152, - "grad_norm": 0.003955061547458172, - "learning_rate": 0.00019999401718565104, - "loss": 46.0, - "step": 45551 - }, - { - "epoch": 3.482768507368542, - "grad_norm": 0.001455305959098041, - "learning_rate": 0.0001999940169229067, - "loss": 46.0, - "step": 45552 - }, - { - "epoch": 3.4828449643519317, - "grad_norm": 0.0017940544057637453, - "learning_rate": 0.00019999401666015657, - "loss": 46.0, - "step": 45553 - }, - { - "epoch": 3.4829214213353215, - "grad_norm": 0.00205013039521873, - "learning_rate": 0.00019999401639740066, - "loss": 46.0, - "step": 45554 - }, - { - "epoch": 3.4829978783187108, - "grad_norm": 0.000726459373254329, - "learning_rate": 0.000199994016134639, - "loss": 46.0, - "step": 45555 - }, - { - "epoch": 3.4830743353021005, - "grad_norm": 0.0011215859558433294, - "learning_rate": 0.00019999401587187158, - "loss": 46.0, - "step": 45556 - }, - { - "epoch": 3.4831507922854903, - "grad_norm": 0.0013050147099420428, - "learning_rate": 0.00019999401560909835, - "loss": 46.0, - "step": 45557 - }, - { - "epoch": 3.48322724926888, - "grad_norm": 0.004671876318752766, - "learning_rate": 0.00019999401534631937, - "loss": 46.0, - "step": 45558 - }, - { - "epoch": 3.48330370625227, - "grad_norm": 0.0018706230912357569, - "learning_rate": 0.00019999401508353465, - "loss": 46.0, - "step": 45559 - }, - { - "epoch": 3.4833801632356596, - "grad_norm": 0.0017690241802483797, - "learning_rate": 0.00019999401482074413, - "loss": 46.0, - "step": 45560 - }, - { - "epoch": 3.4834566202190493, - "grad_norm": 0.0016352059319615364, - "learning_rate": 0.00019999401455794783, - "loss": 46.0, - "step": 45561 - }, - { - "epoch": 3.483533077202439, - "grad_norm": 0.0016982720699161291, - "learning_rate": 0.0001999940142951458, - "loss": 46.0, - "step": 45562 - }, - { - "epoch": 3.483609534185829, - "grad_norm": 0.002577295759692788, - "learning_rate": 0.000199994014032338, - "loss": 46.0, - "step": 45563 - }, - { - "epoch": 3.483685991169218, - "grad_norm": 0.0022148285061120987, - "learning_rate": 0.0001999940137695244, - "loss": 46.0, - "step": 45564 - }, - { - "epoch": 3.483762448152608, - "grad_norm": 0.0012893271632492542, - "learning_rate": 0.00019999401350670506, - "loss": 46.0, - "step": 45565 - }, - { - "epoch": 3.4838389051359977, - "grad_norm": 0.0050181616097688675, - "learning_rate": 0.00019999401324387992, - "loss": 46.0, - "step": 45566 - }, - { - "epoch": 3.4839153621193875, - "grad_norm": 0.003126205410808325, - "learning_rate": 0.00019999401298104904, - "loss": 46.0, - "step": 45567 - }, - { - "epoch": 3.483991819102777, - "grad_norm": 0.0019700892735272646, - "learning_rate": 0.00019999401271821236, - "loss": 46.0, - "step": 45568 - }, - { - "epoch": 3.484068276086167, - "grad_norm": 0.002283578971400857, - "learning_rate": 0.00019999401245536996, - "loss": 46.0, - "step": 45569 - }, - { - "epoch": 3.4841447330695567, - "grad_norm": 0.002191627398133278, - "learning_rate": 0.00019999401219252173, - "loss": 46.0, - "step": 45570 - }, - { - "epoch": 3.4842211900529465, - "grad_norm": 0.0018031207146123052, - "learning_rate": 0.00019999401192966775, - "loss": 46.0, - "step": 45571 - }, - { - "epoch": 3.4842976470363363, - "grad_norm": 0.0018127438379451632, - "learning_rate": 0.00019999401166680803, - "loss": 46.0, - "step": 45572 - }, - { - "epoch": 3.484374104019726, - "grad_norm": 0.002399756805971265, - "learning_rate": 0.0001999940114039425, - "loss": 46.0, - "step": 45573 - }, - { - "epoch": 3.484450561003116, - "grad_norm": 0.004946081433445215, - "learning_rate": 0.00019999401114107124, - "loss": 46.0, - "step": 45574 - }, - { - "epoch": 3.4845270179865055, - "grad_norm": 0.0016603940166532993, - "learning_rate": 0.00019999401087819418, - "loss": 46.0, - "step": 45575 - }, - { - "epoch": 3.4846034749698953, - "grad_norm": 0.0007328182109631598, - "learning_rate": 0.0001999940106153114, - "loss": 46.0, - "step": 45576 - }, - { - "epoch": 3.4846799319532846, - "grad_norm": 0.008194363676011562, - "learning_rate": 0.0001999940103524228, - "loss": 46.0, - "step": 45577 - }, - { - "epoch": 3.4847563889366744, - "grad_norm": 0.002520754700526595, - "learning_rate": 0.00019999401008952844, - "loss": 46.0, - "step": 45578 - }, - { - "epoch": 3.484832845920064, - "grad_norm": 0.0013752371305599809, - "learning_rate": 0.00019999400982662833, - "loss": 46.0, - "step": 45579 - }, - { - "epoch": 3.484909302903454, - "grad_norm": 0.0017104746075347066, - "learning_rate": 0.00019999400956372242, - "loss": 46.0, - "step": 45580 - }, - { - "epoch": 3.4849857598868437, - "grad_norm": 0.0018003035802394152, - "learning_rate": 0.00019999400930081077, - "loss": 46.0, - "step": 45581 - }, - { - "epoch": 3.4850622168702334, - "grad_norm": 0.002289471449330449, - "learning_rate": 0.00019999400903789334, - "loss": 46.0, - "step": 45582 - }, - { - "epoch": 3.485138673853623, - "grad_norm": 0.0019717414397746325, - "learning_rate": 0.00019999400877497014, - "loss": 46.0, - "step": 45583 - }, - { - "epoch": 3.485215130837013, - "grad_norm": 0.0035805185325443745, - "learning_rate": 0.00019999400851204117, - "loss": 46.0, - "step": 45584 - }, - { - "epoch": 3.4852915878204027, - "grad_norm": 0.0014128257753327489, - "learning_rate": 0.00019999400824910645, - "loss": 46.0, - "step": 45585 - }, - { - "epoch": 3.485368044803792, - "grad_norm": 0.0025227379519492388, - "learning_rate": 0.00019999400798616592, - "loss": 46.0, - "step": 45586 - }, - { - "epoch": 3.485444501787182, - "grad_norm": 0.0009539916645735502, - "learning_rate": 0.00019999400772321966, - "loss": 46.0, - "step": 45587 - }, - { - "epoch": 3.4855209587705716, - "grad_norm": 0.006139744073152542, - "learning_rate": 0.00019999400746026762, - "loss": 46.0, - "step": 45588 - }, - { - "epoch": 3.4855974157539613, - "grad_norm": 0.001246851752512157, - "learning_rate": 0.00019999400719730977, - "loss": 46.0, - "step": 45589 - }, - { - "epoch": 3.485673872737351, - "grad_norm": 0.0020244382321834564, - "learning_rate": 0.00019999400693434621, - "loss": 46.0, - "step": 45590 - }, - { - "epoch": 3.485750329720741, - "grad_norm": 0.0023067768197506666, - "learning_rate": 0.00019999400667137685, - "loss": 46.0, - "step": 45591 - }, - { - "epoch": 3.4858267867041306, - "grad_norm": 0.001433484605513513, - "learning_rate": 0.00019999400640840175, - "loss": 46.0, - "step": 45592 - }, - { - "epoch": 3.4859032436875204, - "grad_norm": 0.002376784570515156, - "learning_rate": 0.00019999400614542087, - "loss": 46.0, - "step": 45593 - }, - { - "epoch": 3.48597970067091, - "grad_norm": 0.0032361838966608047, - "learning_rate": 0.0001999940058824342, - "loss": 46.0, - "step": 45594 - }, - { - "epoch": 3.4860561576543, - "grad_norm": 0.0020656774286180735, - "learning_rate": 0.00019999400561944178, - "loss": 46.0, - "step": 45595 - }, - { - "epoch": 3.4861326146376896, - "grad_norm": 0.0014405029360204935, - "learning_rate": 0.00019999400535644356, - "loss": 46.0, - "step": 45596 - }, - { - "epoch": 3.4862090716210794, - "grad_norm": 0.002200776943936944, - "learning_rate": 0.00019999400509343958, - "loss": 46.0, - "step": 45597 - }, - { - "epoch": 3.4862855286044687, - "grad_norm": 0.001321990042924881, - "learning_rate": 0.00019999400483042984, - "loss": 46.0, - "step": 45598 - }, - { - "epoch": 3.4863619855878585, - "grad_norm": 0.0026791421696543694, - "learning_rate": 0.00019999400456741437, - "loss": 46.0, - "step": 45599 - }, - { - "epoch": 3.4864384425712482, - "grad_norm": 0.002942162798717618, - "learning_rate": 0.00019999400430439307, - "loss": 46.0, - "step": 45600 - }, - { - "epoch": 3.486514899554638, - "grad_norm": 0.0013769024517387152, - "learning_rate": 0.00019999400404136603, - "loss": 46.0, - "step": 45601 - }, - { - "epoch": 3.4865913565380278, - "grad_norm": 0.0015389503678306937, - "learning_rate": 0.00019999400377833322, - "loss": 46.0, - "step": 45602 - }, - { - "epoch": 3.4866678135214175, - "grad_norm": 0.0013089972781017423, - "learning_rate": 0.00019999400351529466, - "loss": 46.0, - "step": 45603 - }, - { - "epoch": 3.4867442705048073, - "grad_norm": 0.0017713319975882769, - "learning_rate": 0.0001999940032522503, - "loss": 46.0, - "step": 45604 - }, - { - "epoch": 3.486820727488197, - "grad_norm": 0.0012855998938903213, - "learning_rate": 0.0001999940029892002, - "loss": 46.0, - "step": 45605 - }, - { - "epoch": 3.486897184471587, - "grad_norm": 0.0017893895274028182, - "learning_rate": 0.0001999940027261443, - "loss": 46.0, - "step": 45606 - }, - { - "epoch": 3.4869736414549766, - "grad_norm": 0.003058493137359619, - "learning_rate": 0.00019999400246308263, - "loss": 46.0, - "step": 45607 - }, - { - "epoch": 3.487050098438366, - "grad_norm": 0.0010416876757517457, - "learning_rate": 0.0001999940022000152, - "loss": 46.0, - "step": 45608 - }, - { - "epoch": 3.4871265554217556, - "grad_norm": 0.0020272028632462025, - "learning_rate": 0.000199994001936942, - "loss": 46.0, - "step": 45609 - }, - { - "epoch": 3.4872030124051454, - "grad_norm": 0.0015325198182836175, - "learning_rate": 0.00019999400167386303, - "loss": 46.0, - "step": 45610 - }, - { - "epoch": 3.487279469388535, - "grad_norm": 0.006384000647813082, - "learning_rate": 0.0001999940014107783, - "loss": 46.0, - "step": 45611 - }, - { - "epoch": 3.487355926371925, - "grad_norm": 0.003559096483513713, - "learning_rate": 0.00019999400114768782, - "loss": 46.0, - "step": 45612 - }, - { - "epoch": 3.4874323833553147, - "grad_norm": 0.0029365464579313993, - "learning_rate": 0.00019999400088459152, - "loss": 46.0, - "step": 45613 - }, - { - "epoch": 3.4875088403387045, - "grad_norm": 0.0008497110102325678, - "learning_rate": 0.0001999940006214895, - "loss": 46.0, - "step": 45614 - }, - { - "epoch": 3.487585297322094, - "grad_norm": 0.0021616590674966574, - "learning_rate": 0.0001999940003583817, - "loss": 46.0, - "step": 45615 - }, - { - "epoch": 3.487661754305484, - "grad_norm": 0.0010892166756093502, - "learning_rate": 0.0001999940000952681, - "loss": 46.0, - "step": 45616 - }, - { - "epoch": 3.4877382112888737, - "grad_norm": 0.004651820287108421, - "learning_rate": 0.00019999399983214875, - "loss": 46.0, - "step": 45617 - }, - { - "epoch": 3.4878146682722635, - "grad_norm": 0.0022322628647089005, - "learning_rate": 0.00019999399956902364, - "loss": 46.0, - "step": 45618 - }, - { - "epoch": 3.4878911252556533, - "grad_norm": 0.002929776208475232, - "learning_rate": 0.00019999399930589276, - "loss": 46.0, - "step": 45619 - }, - { - "epoch": 3.4879675822390426, - "grad_norm": 0.005450090393424034, - "learning_rate": 0.00019999399904275608, - "loss": 46.0, - "step": 45620 - }, - { - "epoch": 3.4880440392224323, - "grad_norm": 0.0018909627106040716, - "learning_rate": 0.00019999399877961366, - "loss": 46.0, - "step": 45621 - }, - { - "epoch": 3.488120496205822, - "grad_norm": 0.0013319356366991997, - "learning_rate": 0.00019999399851646548, - "loss": 46.0, - "step": 45622 - }, - { - "epoch": 3.488196953189212, - "grad_norm": 0.003321138909086585, - "learning_rate": 0.0001999939982533115, - "loss": 46.0, - "step": 45623 - }, - { - "epoch": 3.4882734101726016, - "grad_norm": 0.0013424414210021496, - "learning_rate": 0.00019999399799015176, - "loss": 46.0, - "step": 45624 - }, - { - "epoch": 3.4883498671559914, - "grad_norm": 0.0034013569820672274, - "learning_rate": 0.00019999399772698627, - "loss": 46.0, - "step": 45625 - }, - { - "epoch": 3.488426324139381, - "grad_norm": 0.0010715972166508436, - "learning_rate": 0.00019999399746381503, - "loss": 46.0, - "step": 45626 - }, - { - "epoch": 3.488502781122771, - "grad_norm": 0.003295557340607047, - "learning_rate": 0.00019999399720063796, - "loss": 46.0, - "step": 45627 - }, - { - "epoch": 3.4885792381061607, - "grad_norm": 0.002561314729973674, - "learning_rate": 0.00019999399693745515, - "loss": 46.0, - "step": 45628 - }, - { - "epoch": 3.4886556950895504, - "grad_norm": 0.0029351990669965744, - "learning_rate": 0.0001999939966742666, - "loss": 46.0, - "step": 45629 - }, - { - "epoch": 3.4887321520729397, - "grad_norm": 0.004409323912113905, - "learning_rate": 0.00019999399641107223, - "loss": 46.0, - "step": 45630 - }, - { - "epoch": 3.4888086090563295, - "grad_norm": 0.0012277784990146756, - "learning_rate": 0.0001999939961478721, - "loss": 46.0, - "step": 45631 - }, - { - "epoch": 3.4888850660397193, - "grad_norm": 0.002300865249708295, - "learning_rate": 0.00019999399588466622, - "loss": 46.0, - "step": 45632 - }, - { - "epoch": 3.488961523023109, - "grad_norm": 0.0021461935248225927, - "learning_rate": 0.00019999399562145456, - "loss": 46.0, - "step": 45633 - }, - { - "epoch": 3.489037980006499, - "grad_norm": 0.0006051812088117003, - "learning_rate": 0.00019999399535823717, - "loss": 46.0, - "step": 45634 - }, - { - "epoch": 3.4891144369898885, - "grad_norm": 0.0026908665895462036, - "learning_rate": 0.00019999399509501397, - "loss": 46.0, - "step": 45635 - }, - { - "epoch": 3.4891908939732783, - "grad_norm": 0.0023808779660612345, - "learning_rate": 0.000199993994831785, - "loss": 46.0, - "step": 45636 - }, - { - "epoch": 3.489267350956668, - "grad_norm": 0.002033111173659563, - "learning_rate": 0.00019999399456855027, - "loss": 46.0, - "step": 45637 - }, - { - "epoch": 3.489343807940058, - "grad_norm": 0.0018457021797075868, - "learning_rate": 0.00019999399430530975, - "loss": 46.0, - "step": 45638 - }, - { - "epoch": 3.4894202649234476, - "grad_norm": 0.0018687910633161664, - "learning_rate": 0.0001999939940420635, - "loss": 46.0, - "step": 45639 - }, - { - "epoch": 3.4894967219068374, - "grad_norm": 0.001373398001305759, - "learning_rate": 0.00019999399377881147, - "loss": 46.0, - "step": 45640 - }, - { - "epoch": 3.489573178890227, - "grad_norm": 0.0008144521270878613, - "learning_rate": 0.00019999399351555366, - "loss": 46.0, - "step": 45641 - }, - { - "epoch": 3.4896496358736164, - "grad_norm": 0.001465918612666428, - "learning_rate": 0.00019999399325229008, - "loss": 46.0, - "step": 45642 - }, - { - "epoch": 3.489726092857006, - "grad_norm": 0.0013177195796743035, - "learning_rate": 0.00019999399298902072, - "loss": 46.0, - "step": 45643 - }, - { - "epoch": 3.489802549840396, - "grad_norm": 0.000930647598579526, - "learning_rate": 0.0001999939927257456, - "loss": 46.0, - "step": 45644 - }, - { - "epoch": 3.4898790068237857, - "grad_norm": 0.002567956456914544, - "learning_rate": 0.00019999399246246473, - "loss": 46.0, - "step": 45645 - }, - { - "epoch": 3.4899554638071755, - "grad_norm": 0.002925882348790765, - "learning_rate": 0.00019999399219917808, - "loss": 46.0, - "step": 45646 - }, - { - "epoch": 3.4900319207905652, - "grad_norm": 0.0019909306429326534, - "learning_rate": 0.00019999399193588565, - "loss": 46.0, - "step": 45647 - }, - { - "epoch": 3.490108377773955, - "grad_norm": 0.0009055719710886478, - "learning_rate": 0.00019999399167258748, - "loss": 46.0, - "step": 45648 - }, - { - "epoch": 3.4901848347573448, - "grad_norm": 0.0018317571375519037, - "learning_rate": 0.0001999939914092835, - "loss": 46.0, - "step": 45649 - }, - { - "epoch": 3.4902612917407345, - "grad_norm": 0.002224199939519167, - "learning_rate": 0.00019999399114597376, - "loss": 46.0, - "step": 45650 - }, - { - "epoch": 3.4903377487241243, - "grad_norm": 0.0018536419374868274, - "learning_rate": 0.0001999939908826583, - "loss": 46.0, - "step": 45651 - }, - { - "epoch": 3.4904142057075136, - "grad_norm": 0.00274038128554821, - "learning_rate": 0.000199993990619337, - "loss": 46.0, - "step": 45652 - }, - { - "epoch": 3.4904906626909034, - "grad_norm": 0.001384408911690116, - "learning_rate": 0.00019999399035600996, - "loss": 46.0, - "step": 45653 - }, - { - "epoch": 3.490567119674293, - "grad_norm": 0.002141014439985156, - "learning_rate": 0.00019999399009267715, - "loss": 46.0, - "step": 45654 - }, - { - "epoch": 3.490643576657683, - "grad_norm": 0.00203525903634727, - "learning_rate": 0.0001999939898293386, - "loss": 46.0, - "step": 45655 - }, - { - "epoch": 3.4907200336410726, - "grad_norm": 0.004908971954137087, - "learning_rate": 0.00019999398956599423, - "loss": 46.0, - "step": 45656 - }, - { - "epoch": 3.4907964906244624, - "grad_norm": 0.0012880604481324553, - "learning_rate": 0.00019999398930264413, - "loss": 46.0, - "step": 45657 - }, - { - "epoch": 3.490872947607852, - "grad_norm": 0.002419913886114955, - "learning_rate": 0.00019999398903928825, - "loss": 46.0, - "step": 45658 - }, - { - "epoch": 3.490949404591242, - "grad_norm": 0.001576408394612372, - "learning_rate": 0.00019999398877592662, - "loss": 46.0, - "step": 45659 - }, - { - "epoch": 3.4910258615746317, - "grad_norm": 0.0012453236849978566, - "learning_rate": 0.0001999939885125592, - "loss": 46.0, - "step": 45660 - }, - { - "epoch": 3.4911023185580214, - "grad_norm": 0.0011135872919112444, - "learning_rate": 0.000199993988249186, - "loss": 46.0, - "step": 45661 - }, - { - "epoch": 3.491178775541411, - "grad_norm": 0.004250650759786367, - "learning_rate": 0.00019999398798580705, - "loss": 46.0, - "step": 45662 - }, - { - "epoch": 3.491255232524801, - "grad_norm": 0.0008822313393466175, - "learning_rate": 0.0001999939877224223, - "loss": 46.0, - "step": 45663 - }, - { - "epoch": 3.4913316895081903, - "grad_norm": 0.0008011918398551643, - "learning_rate": 0.0001999939874590318, - "loss": 46.0, - "step": 45664 - }, - { - "epoch": 3.49140814649158, - "grad_norm": 0.0026196420658379793, - "learning_rate": 0.00019999398719563555, - "loss": 46.0, - "step": 45665 - }, - { - "epoch": 3.49148460347497, - "grad_norm": 0.002392460824921727, - "learning_rate": 0.0001999939869322335, - "loss": 46.0, - "step": 45666 - }, - { - "epoch": 3.4915610604583596, - "grad_norm": 0.0029384037479758263, - "learning_rate": 0.00019999398666882572, - "loss": 46.0, - "step": 45667 - }, - { - "epoch": 3.4916375174417493, - "grad_norm": 0.003067390527576208, - "learning_rate": 0.0001999939864054121, - "loss": 46.0, - "step": 45668 - }, - { - "epoch": 3.491713974425139, - "grad_norm": 0.0007817524601705372, - "learning_rate": 0.00019999398614199278, - "loss": 46.0, - "step": 45669 - }, - { - "epoch": 3.491790431408529, - "grad_norm": 0.0009064714540727437, - "learning_rate": 0.00019999398587856767, - "loss": 46.0, - "step": 45670 - }, - { - "epoch": 3.4918668883919186, - "grad_norm": 0.002751914318650961, - "learning_rate": 0.0001999939856151368, - "loss": 46.0, - "step": 45671 - }, - { - "epoch": 3.4919433453753084, - "grad_norm": 0.0027233222499489784, - "learning_rate": 0.00019999398535170014, - "loss": 46.0, - "step": 45672 - }, - { - "epoch": 3.4920198023586977, - "grad_norm": 0.004428385756909847, - "learning_rate": 0.00019999398508825772, - "loss": 46.0, - "step": 45673 - }, - { - "epoch": 3.4920962593420874, - "grad_norm": 0.0024542836472392082, - "learning_rate": 0.00019999398482480952, - "loss": 46.0, - "step": 45674 - }, - { - "epoch": 3.492172716325477, - "grad_norm": 0.003983583766967058, - "learning_rate": 0.00019999398456135557, - "loss": 46.0, - "step": 45675 - }, - { - "epoch": 3.492249173308867, - "grad_norm": 0.005079290829598904, - "learning_rate": 0.00019999398429789586, - "loss": 46.0, - "step": 45676 - }, - { - "epoch": 3.4923256302922567, - "grad_norm": 0.0018149176612496376, - "learning_rate": 0.00019999398403443034, - "loss": 46.0, - "step": 45677 - }, - { - "epoch": 3.4924020872756465, - "grad_norm": 0.010486841201782227, - "learning_rate": 0.0001999939837709591, - "loss": 46.0, - "step": 45678 - }, - { - "epoch": 3.4924785442590363, - "grad_norm": 0.004170822910964489, - "learning_rate": 0.00019999398350748206, - "loss": 46.0, - "step": 45679 - }, - { - "epoch": 3.492555001242426, - "grad_norm": 0.003301135031506419, - "learning_rate": 0.00019999398324399925, - "loss": 46.0, - "step": 45680 - }, - { - "epoch": 3.4926314582258158, - "grad_norm": 0.0015523623442277312, - "learning_rate": 0.0001999939829805107, - "loss": 46.0, - "step": 45681 - }, - { - "epoch": 3.4927079152092055, - "grad_norm": 0.0013399841263890266, - "learning_rate": 0.00019999398271701636, - "loss": 46.0, - "step": 45682 - }, - { - "epoch": 3.4927843721925953, - "grad_norm": 0.002554279286414385, - "learning_rate": 0.00019999398245351623, - "loss": 46.0, - "step": 45683 - }, - { - "epoch": 3.492860829175985, - "grad_norm": 0.003012260189279914, - "learning_rate": 0.00019999398219001035, - "loss": 46.0, - "step": 45684 - }, - { - "epoch": 3.492937286159375, - "grad_norm": 0.006762445904314518, - "learning_rate": 0.0001999939819264987, - "loss": 46.0, - "step": 45685 - }, - { - "epoch": 3.493013743142764, - "grad_norm": 0.001363743213005364, - "learning_rate": 0.0001999939816629813, - "loss": 46.0, - "step": 45686 - }, - { - "epoch": 3.493090200126154, - "grad_norm": 0.0015602060593664646, - "learning_rate": 0.0001999939813994581, - "loss": 46.0, - "step": 45687 - }, - { - "epoch": 3.4931666571095437, - "grad_norm": 0.0019296335522085428, - "learning_rate": 0.00019999398113592916, - "loss": 46.0, - "step": 45688 - }, - { - "epoch": 3.4932431140929334, - "grad_norm": 0.002067565219476819, - "learning_rate": 0.00019999398087239444, - "loss": 46.0, - "step": 45689 - }, - { - "epoch": 3.493319571076323, - "grad_norm": 0.006629869341850281, - "learning_rate": 0.00019999398060885395, - "loss": 46.0, - "step": 45690 - }, - { - "epoch": 3.493396028059713, - "grad_norm": 0.0012023057788610458, - "learning_rate": 0.00019999398034530768, - "loss": 46.0, - "step": 45691 - }, - { - "epoch": 3.4934724850431027, - "grad_norm": 0.0030029453337192535, - "learning_rate": 0.00019999398008175562, - "loss": 46.0, - "step": 45692 - }, - { - "epoch": 3.4935489420264925, - "grad_norm": 0.001269257627427578, - "learning_rate": 0.00019999397981819783, - "loss": 46.0, - "step": 45693 - }, - { - "epoch": 3.4936253990098822, - "grad_norm": 0.00343915494158864, - "learning_rate": 0.00019999397955463425, - "loss": 46.0, - "step": 45694 - }, - { - "epoch": 3.4937018559932715, - "grad_norm": 0.003538409247994423, - "learning_rate": 0.00019999397929106491, - "loss": 46.0, - "step": 45695 - }, - { - "epoch": 3.4937783129766613, - "grad_norm": 0.002054263139143586, - "learning_rate": 0.0001999939790274898, - "loss": 46.0, - "step": 45696 - }, - { - "epoch": 3.493854769960051, - "grad_norm": 0.0019038824830204248, - "learning_rate": 0.00019999397876390896, - "loss": 46.0, - "step": 45697 - }, - { - "epoch": 3.493931226943441, - "grad_norm": 0.0010791143868118525, - "learning_rate": 0.00019999397850032228, - "loss": 46.0, - "step": 45698 - }, - { - "epoch": 3.4940076839268306, - "grad_norm": 0.001404003705829382, - "learning_rate": 0.00019999397823672988, - "loss": 46.0, - "step": 45699 - }, - { - "epoch": 3.4940841409102203, - "grad_norm": 0.00282001169398427, - "learning_rate": 0.00019999397797313168, - "loss": 46.0, - "step": 45700 - }, - { - "epoch": 3.49416059789361, - "grad_norm": 0.0023398713674396276, - "learning_rate": 0.00019999397770952774, - "loss": 46.0, - "step": 45701 - }, - { - "epoch": 3.494237054877, - "grad_norm": 0.0038184532895684242, - "learning_rate": 0.00019999397744591802, - "loss": 46.0, - "step": 45702 - }, - { - "epoch": 3.4943135118603896, - "grad_norm": 0.0041579315438866615, - "learning_rate": 0.00019999397718230253, - "loss": 46.0, - "step": 45703 - }, - { - "epoch": 3.4943899688437794, - "grad_norm": 0.0027121994644403458, - "learning_rate": 0.00019999397691868127, - "loss": 46.0, - "step": 45704 - }, - { - "epoch": 3.494466425827169, - "grad_norm": 0.0014264617348089814, - "learning_rate": 0.00019999397665505423, - "loss": 46.0, - "step": 45705 - }, - { - "epoch": 3.494542882810559, - "grad_norm": 0.003969315439462662, - "learning_rate": 0.00019999397639142144, - "loss": 46.0, - "step": 45706 - }, - { - "epoch": 3.4946193397939487, - "grad_norm": 0.0018643800867721438, - "learning_rate": 0.00019999397612778289, - "loss": 46.0, - "step": 45707 - }, - { - "epoch": 3.494695796777338, - "grad_norm": 0.0024837409146130085, - "learning_rate": 0.00019999397586413853, - "loss": 46.0, - "step": 45708 - }, - { - "epoch": 3.4947722537607278, - "grad_norm": 0.0015021396102383733, - "learning_rate": 0.00019999397560048842, - "loss": 46.0, - "step": 45709 - }, - { - "epoch": 3.4948487107441175, - "grad_norm": 0.004136497620493174, - "learning_rate": 0.00019999397533683255, - "loss": 46.0, - "step": 45710 - }, - { - "epoch": 3.4949251677275073, - "grad_norm": 0.0015473722014576197, - "learning_rate": 0.0001999939750731709, - "loss": 46.0, - "step": 45711 - }, - { - "epoch": 3.495001624710897, - "grad_norm": 0.00628721434623003, - "learning_rate": 0.00019999397480950347, - "loss": 46.0, - "step": 45712 - }, - { - "epoch": 3.495078081694287, - "grad_norm": 0.0025035073049366474, - "learning_rate": 0.0001999939745458303, - "loss": 46.0, - "step": 45713 - }, - { - "epoch": 3.4951545386776766, - "grad_norm": 0.0012276730267331004, - "learning_rate": 0.00019999397428215136, - "loss": 46.0, - "step": 45714 - }, - { - "epoch": 3.4952309956610663, - "grad_norm": 0.001920933835208416, - "learning_rate": 0.00019999397401846664, - "loss": 46.0, - "step": 45715 - }, - { - "epoch": 3.495307452644456, - "grad_norm": 0.0023868796415627003, - "learning_rate": 0.00019999397375477615, - "loss": 46.0, - "step": 45716 - }, - { - "epoch": 3.4953839096278454, - "grad_norm": 0.0030802234541624784, - "learning_rate": 0.00019999397349107988, - "loss": 46.0, - "step": 45717 - }, - { - "epoch": 3.495460366611235, - "grad_norm": 0.004375213757157326, - "learning_rate": 0.00019999397322737785, - "loss": 46.0, - "step": 45718 - }, - { - "epoch": 3.495536823594625, - "grad_norm": 0.004127439111471176, - "learning_rate": 0.00019999397296367006, - "loss": 46.0, - "step": 45719 - }, - { - "epoch": 3.4956132805780147, - "grad_norm": 0.003457908285781741, - "learning_rate": 0.0001999939726999565, - "loss": 46.0, - "step": 45720 - }, - { - "epoch": 3.4956897375614044, - "grad_norm": 0.0011295400327071548, - "learning_rate": 0.00019999397243623715, - "loss": 46.0, - "step": 45721 - }, - { - "epoch": 3.495766194544794, - "grad_norm": 0.0027088159695267677, - "learning_rate": 0.00019999397217251207, - "loss": 46.0, - "step": 45722 - }, - { - "epoch": 3.495842651528184, - "grad_norm": 0.0008764645899645984, - "learning_rate": 0.00019999397190878117, - "loss": 46.0, - "step": 45723 - }, - { - "epoch": 3.4959191085115737, - "grad_norm": 0.002637633355334401, - "learning_rate": 0.00019999397164504451, - "loss": 46.0, - "step": 45724 - }, - { - "epoch": 3.4959955654949635, - "grad_norm": 0.0029878916684538126, - "learning_rate": 0.00019999397138130212, - "loss": 46.0, - "step": 45725 - }, - { - "epoch": 3.4960720224783532, - "grad_norm": 0.001272466965019703, - "learning_rate": 0.00019999397111755395, - "loss": 46.0, - "step": 45726 - }, - { - "epoch": 3.496148479461743, - "grad_norm": 0.0015955364797264338, - "learning_rate": 0.00019999397085379998, - "loss": 46.0, - "step": 45727 - }, - { - "epoch": 3.4962249364451328, - "grad_norm": 0.0010294899111613631, - "learning_rate": 0.00019999397059004026, - "loss": 46.0, - "step": 45728 - }, - { - "epoch": 3.496301393428522, - "grad_norm": 0.0020012736786156893, - "learning_rate": 0.0001999939703262748, - "loss": 46.0, - "step": 45729 - }, - { - "epoch": 3.496377850411912, - "grad_norm": 0.0032012115698307753, - "learning_rate": 0.00019999397006250353, - "loss": 46.0, - "step": 45730 - }, - { - "epoch": 3.4964543073953016, - "grad_norm": 0.0018668158445507288, - "learning_rate": 0.0001999939697987265, - "loss": 46.0, - "step": 45731 - }, - { - "epoch": 3.4965307643786914, - "grad_norm": 0.0019320003921166062, - "learning_rate": 0.0001999939695349437, - "loss": 46.0, - "step": 45732 - }, - { - "epoch": 3.496607221362081, - "grad_norm": 0.002725800732150674, - "learning_rate": 0.00019999396927115513, - "loss": 46.0, - "step": 45733 - }, - { - "epoch": 3.496683678345471, - "grad_norm": 0.0011421084636822343, - "learning_rate": 0.0001999939690073608, - "loss": 46.0, - "step": 45734 - }, - { - "epoch": 3.4967601353288607, - "grad_norm": 0.0022823053877800703, - "learning_rate": 0.00019999396874356072, - "loss": 46.0, - "step": 45735 - }, - { - "epoch": 3.4968365923122504, - "grad_norm": 0.0016514388844370842, - "learning_rate": 0.00019999396847975482, - "loss": 46.0, - "step": 45736 - }, - { - "epoch": 3.49691304929564, - "grad_norm": 0.002985776402056217, - "learning_rate": 0.0001999939682159432, - "loss": 46.0, - "step": 45737 - }, - { - "epoch": 3.49698950627903, - "grad_norm": 0.001532039255835116, - "learning_rate": 0.00019999396795212577, - "loss": 46.0, - "step": 45738 - }, - { - "epoch": 3.4970659632624193, - "grad_norm": 0.0018588689854368567, - "learning_rate": 0.0001999939676883026, - "loss": 46.0, - "step": 45739 - }, - { - "epoch": 3.497142420245809, - "grad_norm": 0.002633602824062109, - "learning_rate": 0.00019999396742447365, - "loss": 46.0, - "step": 45740 - }, - { - "epoch": 3.4972188772291988, - "grad_norm": 0.0021706006955355406, - "learning_rate": 0.0001999939671606389, - "loss": 46.0, - "step": 45741 - }, - { - "epoch": 3.4972953342125885, - "grad_norm": 0.002011735225096345, - "learning_rate": 0.00019999396689679845, - "loss": 46.0, - "step": 45742 - }, - { - "epoch": 3.4973717911959783, - "grad_norm": 0.0009335528593510389, - "learning_rate": 0.00019999396663295218, - "loss": 46.0, - "step": 45743 - }, - { - "epoch": 3.497448248179368, - "grad_norm": 0.00796280987560749, - "learning_rate": 0.00019999396636910017, - "loss": 46.0, - "step": 45744 - }, - { - "epoch": 3.497524705162758, - "grad_norm": 0.004227794706821442, - "learning_rate": 0.00019999396610524234, - "loss": 46.0, - "step": 45745 - }, - { - "epoch": 3.4976011621461476, - "grad_norm": 0.004875116981565952, - "learning_rate": 0.0001999939658413788, - "loss": 46.0, - "step": 45746 - }, - { - "epoch": 3.4976776191295373, - "grad_norm": 0.0061309547163546085, - "learning_rate": 0.00019999396557750948, - "loss": 46.0, - "step": 45747 - }, - { - "epoch": 3.497754076112927, - "grad_norm": 0.0017395709874108434, - "learning_rate": 0.00019999396531363437, - "loss": 46.0, - "step": 45748 - }, - { - "epoch": 3.497830533096317, - "grad_norm": 0.0016545196995139122, - "learning_rate": 0.0001999939650497535, - "loss": 46.0, - "step": 45749 - }, - { - "epoch": 3.4979069900797066, - "grad_norm": 0.0017746618250384927, - "learning_rate": 0.00019999396478586685, - "loss": 46.0, - "step": 45750 - }, - { - "epoch": 3.497983447063096, - "grad_norm": 0.0034066583029925823, - "learning_rate": 0.00019999396452197442, - "loss": 46.0, - "step": 45751 - }, - { - "epoch": 3.4980599040464857, - "grad_norm": 0.0030231866985559464, - "learning_rate": 0.00019999396425807628, - "loss": 46.0, - "step": 45752 - }, - { - "epoch": 3.4981363610298755, - "grad_norm": 0.0035011935979127884, - "learning_rate": 0.0001999939639941723, - "loss": 46.0, - "step": 45753 - }, - { - "epoch": 3.4982128180132652, - "grad_norm": 0.0009037017589434981, - "learning_rate": 0.0001999939637302626, - "loss": 46.0, - "step": 45754 - }, - { - "epoch": 3.498289274996655, - "grad_norm": 0.0013911626301705837, - "learning_rate": 0.0001999939634663471, - "loss": 46.0, - "step": 45755 - }, - { - "epoch": 3.4983657319800447, - "grad_norm": 0.003283183556050062, - "learning_rate": 0.00019999396320242587, - "loss": 46.0, - "step": 45756 - }, - { - "epoch": 3.4984421889634345, - "grad_norm": 0.0009282208629883826, - "learning_rate": 0.00019999396293849883, - "loss": 46.0, - "step": 45757 - }, - { - "epoch": 3.4985186459468243, - "grad_norm": 0.0017220533918589354, - "learning_rate": 0.00019999396267456605, - "loss": 46.0, - "step": 45758 - }, - { - "epoch": 3.498595102930214, - "grad_norm": 0.0009883064776659012, - "learning_rate": 0.0001999939624106275, - "loss": 46.0, - "step": 45759 - }, - { - "epoch": 3.498671559913604, - "grad_norm": 0.0026350603438913822, - "learning_rate": 0.00019999396214668316, - "loss": 46.0, - "step": 45760 - }, - { - "epoch": 3.498748016896993, - "grad_norm": 0.0024470877833664417, - "learning_rate": 0.00019999396188273306, - "loss": 46.0, - "step": 45761 - }, - { - "epoch": 3.498824473880383, - "grad_norm": 0.001396100502461195, - "learning_rate": 0.00019999396161877718, - "loss": 46.0, - "step": 45762 - }, - { - "epoch": 3.4989009308637726, - "grad_norm": 0.005022132303565741, - "learning_rate": 0.00019999396135481553, - "loss": 46.0, - "step": 45763 - }, - { - "epoch": 3.4989773878471624, - "grad_norm": 0.003914027940481901, - "learning_rate": 0.00019999396109084814, - "loss": 46.0, - "step": 45764 - }, - { - "epoch": 3.499053844830552, - "grad_norm": 0.0030109924264252186, - "learning_rate": 0.00019999396082687497, - "loss": 46.0, - "step": 45765 - }, - { - "epoch": 3.499130301813942, - "grad_norm": 0.004394937306642532, - "learning_rate": 0.00019999396056289602, - "loss": 46.0, - "step": 45766 - }, - { - "epoch": 3.4992067587973317, - "grad_norm": 0.005031343083828688, - "learning_rate": 0.00019999396029891128, - "loss": 46.0, - "step": 45767 - }, - { - "epoch": 3.4992832157807214, - "grad_norm": 0.0012765949359163642, - "learning_rate": 0.00019999396003492082, - "loss": 46.0, - "step": 45768 - }, - { - "epoch": 3.499359672764111, - "grad_norm": 0.0015599989565089345, - "learning_rate": 0.00019999395977092455, - "loss": 46.0, - "step": 45769 - }, - { - "epoch": 3.499436129747501, - "grad_norm": 0.0024923740420490503, - "learning_rate": 0.00019999395950692255, - "loss": 46.0, - "step": 45770 - }, - { - "epoch": 3.4995125867308907, - "grad_norm": 0.001404183334670961, - "learning_rate": 0.00019999395924291476, - "loss": 46.0, - "step": 45771 - }, - { - "epoch": 3.4995890437142805, - "grad_norm": 0.0009848756017163396, - "learning_rate": 0.00019999395897890118, - "loss": 46.0, - "step": 45772 - }, - { - "epoch": 3.49966550069767, - "grad_norm": 0.00337539822794497, - "learning_rate": 0.00019999395871488185, - "loss": 46.0, - "step": 45773 - }, - { - "epoch": 3.4997419576810596, - "grad_norm": 0.0011520797852426767, - "learning_rate": 0.00019999395845085677, - "loss": 46.0, - "step": 45774 - }, - { - "epoch": 3.4998184146644493, - "grad_norm": 0.003876384347677231, - "learning_rate": 0.0001999939581868259, - "loss": 46.0, - "step": 45775 - }, - { - "epoch": 3.499894871647839, - "grad_norm": 0.0012800614349544048, - "learning_rate": 0.00019999395792278925, - "loss": 46.0, - "step": 45776 - }, - { - "epoch": 3.499971328631229, - "grad_norm": 0.0025990649592131376, - "learning_rate": 0.00019999395765874685, - "loss": 46.0, - "step": 45777 - }, - { - "epoch": 3.5000477856146186, - "grad_norm": 0.0021888643968850374, - "learning_rate": 0.00019999395739469868, - "loss": 46.0, - "step": 45778 - }, - { - "epoch": 3.5001242425980084, - "grad_norm": 0.0012116662692278624, - "learning_rate": 0.00019999395713064474, - "loss": 46.0, - "step": 45779 - }, - { - "epoch": 3.500200699581398, - "grad_norm": 0.0014844882534816861, - "learning_rate": 0.00019999395686658503, - "loss": 46.0, - "step": 45780 - }, - { - "epoch": 3.500277156564788, - "grad_norm": 0.0033130068331956863, - "learning_rate": 0.00019999395660251954, - "loss": 46.0, - "step": 45781 - }, - { - "epoch": 3.500353613548177, - "grad_norm": 0.0022224008571356535, - "learning_rate": 0.00019999395633844827, - "loss": 46.0, - "step": 45782 - }, - { - "epoch": 3.500430070531567, - "grad_norm": 0.002328029600903392, - "learning_rate": 0.0001999939560743713, - "loss": 46.0, - "step": 45783 - }, - { - "epoch": 3.5005065275149567, - "grad_norm": 0.0027920312713831663, - "learning_rate": 0.00019999395581028848, - "loss": 46.0, - "step": 45784 - }, - { - "epoch": 3.5005829844983465, - "grad_norm": 0.002868940820917487, - "learning_rate": 0.0001999939555461999, - "loss": 46.0, - "step": 45785 - }, - { - "epoch": 3.5006594414817362, - "grad_norm": 0.005953640677034855, - "learning_rate": 0.0001999939552821056, - "loss": 46.0, - "step": 45786 - }, - { - "epoch": 3.500735898465126, - "grad_norm": 0.0015957440482452512, - "learning_rate": 0.0001999939550180055, - "loss": 46.0, - "step": 45787 - }, - { - "epoch": 3.5008123554485158, - "grad_norm": 0.0014333180151879787, - "learning_rate": 0.00019999395475389965, - "loss": 46.0, - "step": 45788 - }, - { - "epoch": 3.5008888124319055, - "grad_norm": 0.0007881660130806267, - "learning_rate": 0.000199993954489788, - "loss": 46.0, - "step": 45789 - }, - { - "epoch": 3.5009652694152953, - "grad_norm": 0.002509583020582795, - "learning_rate": 0.00019999395422567057, - "loss": 46.0, - "step": 45790 - }, - { - "epoch": 3.501041726398685, - "grad_norm": 0.0065762801095843315, - "learning_rate": 0.0001999939539615474, - "loss": 46.0, - "step": 45791 - }, - { - "epoch": 3.501118183382075, - "grad_norm": 0.0011103107826784253, - "learning_rate": 0.00019999395369741846, - "loss": 46.0, - "step": 45792 - }, - { - "epoch": 3.5011946403654646, - "grad_norm": 0.0006491128588095307, - "learning_rate": 0.00019999395343328375, - "loss": 46.0, - "step": 45793 - }, - { - "epoch": 3.5012710973488543, - "grad_norm": 0.0012044055620208383, - "learning_rate": 0.00019999395316914326, - "loss": 46.0, - "step": 45794 - }, - { - "epoch": 3.501347554332244, - "grad_norm": 0.0012121006147935987, - "learning_rate": 0.00019999395290499702, - "loss": 46.0, - "step": 45795 - }, - { - "epoch": 3.5014240113156334, - "grad_norm": 0.0018368646269664168, - "learning_rate": 0.00019999395264084501, - "loss": 46.0, - "step": 45796 - }, - { - "epoch": 3.501500468299023, - "grad_norm": 0.0037259759847074747, - "learning_rate": 0.0001999939523766872, - "loss": 46.0, - "step": 45797 - }, - { - "epoch": 3.501576925282413, - "grad_norm": 0.0025700132828205824, - "learning_rate": 0.00019999395211252368, - "loss": 46.0, - "step": 45798 - }, - { - "epoch": 3.5016533822658027, - "grad_norm": 0.001448381575755775, - "learning_rate": 0.00019999395184835435, - "loss": 46.0, - "step": 45799 - }, - { - "epoch": 3.5017298392491925, - "grad_norm": 0.0037338396068662405, - "learning_rate": 0.00019999395158417925, - "loss": 46.0, - "step": 45800 - }, - { - "epoch": 3.501806296232582, - "grad_norm": 0.0012148433597758412, - "learning_rate": 0.0001999939513199984, - "loss": 46.0, - "step": 45801 - }, - { - "epoch": 3.501882753215972, - "grad_norm": 0.0016613543266430497, - "learning_rate": 0.00019999395105581175, - "loss": 46.0, - "step": 45802 - }, - { - "epoch": 3.5019592101993617, - "grad_norm": 0.003074268577620387, - "learning_rate": 0.00019999395079161935, - "loss": 46.0, - "step": 45803 - }, - { - "epoch": 3.502035667182751, - "grad_norm": 0.0013266843743622303, - "learning_rate": 0.00019999395052742119, - "loss": 46.0, - "step": 45804 - }, - { - "epoch": 3.502112124166141, - "grad_norm": 0.0010522467782720923, - "learning_rate": 0.00019999395026321724, - "loss": 46.0, - "step": 45805 - }, - { - "epoch": 3.5021885811495306, - "grad_norm": 0.0018472321098670363, - "learning_rate": 0.0001999939499990075, - "loss": 46.0, - "step": 45806 - }, - { - "epoch": 3.5022650381329203, - "grad_norm": 0.0015084845945239067, - "learning_rate": 0.00019999394973479204, - "loss": 46.0, - "step": 45807 - }, - { - "epoch": 3.50234149511631, - "grad_norm": 0.0016131834127008915, - "learning_rate": 0.0001999939494705708, - "loss": 46.0, - "step": 45808 - }, - { - "epoch": 3.5024179520997, - "grad_norm": 0.002342240186408162, - "learning_rate": 0.00019999394920634377, - "loss": 46.0, - "step": 45809 - }, - { - "epoch": 3.5024944090830896, - "grad_norm": 0.0072229960933327675, - "learning_rate": 0.000199993948942111, - "loss": 46.0, - "step": 45810 - }, - { - "epoch": 3.5025708660664794, - "grad_norm": 0.0009944502962753177, - "learning_rate": 0.00019999394867787243, - "loss": 46.0, - "step": 45811 - }, - { - "epoch": 3.502647323049869, - "grad_norm": 0.0014579308917745948, - "learning_rate": 0.0001999939484136281, - "loss": 46.0, - "step": 45812 - }, - { - "epoch": 3.502723780033259, - "grad_norm": 0.003925422206521034, - "learning_rate": 0.00019999394814937803, - "loss": 46.0, - "step": 45813 - }, - { - "epoch": 3.5028002370166487, - "grad_norm": 0.0035908729769289494, - "learning_rate": 0.00019999394788512215, - "loss": 46.0, - "step": 45814 - }, - { - "epoch": 3.5028766940000384, - "grad_norm": 0.0029187118634581566, - "learning_rate": 0.00019999394762086053, - "loss": 46.0, - "step": 45815 - }, - { - "epoch": 3.502953150983428, - "grad_norm": 0.003749963827431202, - "learning_rate": 0.00019999394735659314, - "loss": 46.0, - "step": 45816 - }, - { - "epoch": 3.5030296079668175, - "grad_norm": 0.0014738740865141153, - "learning_rate": 0.00019999394709231994, - "loss": 46.0, - "step": 45817 - }, - { - "epoch": 3.5031060649502073, - "grad_norm": 0.0015019946731626987, - "learning_rate": 0.000199993946828041, - "loss": 46.0, - "step": 45818 - }, - { - "epoch": 3.503182521933597, - "grad_norm": 0.001335314242169261, - "learning_rate": 0.0001999939465637563, - "loss": 46.0, - "step": 45819 - }, - { - "epoch": 3.503258978916987, - "grad_norm": 0.0014708946691825986, - "learning_rate": 0.00019999394629946582, - "loss": 46.0, - "step": 45820 - }, - { - "epoch": 3.5033354359003765, - "grad_norm": 0.0023479245137423277, - "learning_rate": 0.0001999939460351696, - "loss": 46.0, - "step": 45821 - }, - { - "epoch": 3.5034118928837663, - "grad_norm": 0.002362276893109083, - "learning_rate": 0.00019999394577086755, - "loss": 46.0, - "step": 45822 - }, - { - "epoch": 3.503488349867156, - "grad_norm": 0.0014825870748609304, - "learning_rate": 0.00019999394550655977, - "loss": 46.0, - "step": 45823 - }, - { - "epoch": 3.503564806850546, - "grad_norm": 0.0012609621044248343, - "learning_rate": 0.00019999394524224622, - "loss": 46.0, - "step": 45824 - }, - { - "epoch": 3.5036412638339356, - "grad_norm": 0.0013599558733403683, - "learning_rate": 0.0001999939449779269, - "loss": 46.0, - "step": 45825 - }, - { - "epoch": 3.503717720817325, - "grad_norm": 0.0021276073530316353, - "learning_rate": 0.0001999939447136018, - "loss": 46.0, - "step": 45826 - }, - { - "epoch": 3.5037941778007147, - "grad_norm": 0.0017014875775203109, - "learning_rate": 0.00019999394444927094, - "loss": 46.0, - "step": 45827 - }, - { - "epoch": 3.5038706347841044, - "grad_norm": 0.0024813904892653227, - "learning_rate": 0.00019999394418493435, - "loss": 46.0, - "step": 45828 - }, - { - "epoch": 3.503947091767494, - "grad_norm": 0.0030645569786429405, - "learning_rate": 0.00019999394392059192, - "loss": 46.0, - "step": 45829 - }, - { - "epoch": 3.504023548750884, - "grad_norm": 0.0011163130402565002, - "learning_rate": 0.00019999394365624376, - "loss": 46.0, - "step": 45830 - }, - { - "epoch": 3.5041000057342737, - "grad_norm": 0.002176186302676797, - "learning_rate": 0.00019999394339188982, - "loss": 46.0, - "step": 45831 - }, - { - "epoch": 3.5041764627176635, - "grad_norm": 0.003148752963170409, - "learning_rate": 0.0001999939431275301, - "loss": 46.0, - "step": 45832 - }, - { - "epoch": 3.5042529197010532, - "grad_norm": 0.0027813708875328302, - "learning_rate": 0.0001999939428631646, - "loss": 46.0, - "step": 45833 - }, - { - "epoch": 3.504329376684443, - "grad_norm": 0.0033020740374922752, - "learning_rate": 0.00019999394259879338, - "loss": 46.0, - "step": 45834 - }, - { - "epoch": 3.5044058336678328, - "grad_norm": 0.0042078206315636635, - "learning_rate": 0.00019999394233441637, - "loss": 46.0, - "step": 45835 - }, - { - "epoch": 3.5044822906512225, - "grad_norm": 0.0032947277650237083, - "learning_rate": 0.0001999939420700336, - "loss": 46.0, - "step": 45836 - }, - { - "epoch": 3.5045587476346123, - "grad_norm": 0.006774098612368107, - "learning_rate": 0.00019999394180564504, - "loss": 46.0, - "step": 45837 - }, - { - "epoch": 3.504635204618002, - "grad_norm": 0.0024718046188354492, - "learning_rate": 0.0001999939415412507, - "loss": 46.0, - "step": 45838 - }, - { - "epoch": 3.5047116616013914, - "grad_norm": 0.003453444456681609, - "learning_rate": 0.00019999394127685063, - "loss": 46.0, - "step": 45839 - }, - { - "epoch": 3.504788118584781, - "grad_norm": 0.0018170345574617386, - "learning_rate": 0.00019999394101244479, - "loss": 46.0, - "step": 45840 - }, - { - "epoch": 3.504864575568171, - "grad_norm": 0.0023780278861522675, - "learning_rate": 0.00019999394074803314, - "loss": 46.0, - "step": 45841 - }, - { - "epoch": 3.5049410325515606, - "grad_norm": 0.0033883964642882347, - "learning_rate": 0.00019999394048361574, - "loss": 46.0, - "step": 45842 - }, - { - "epoch": 3.5050174895349504, - "grad_norm": 0.0029428484849631786, - "learning_rate": 0.00019999394021919258, - "loss": 46.0, - "step": 45843 - }, - { - "epoch": 3.50509394651834, - "grad_norm": 0.0027647584211081266, - "learning_rate": 0.00019999393995476364, - "loss": 46.0, - "step": 45844 - }, - { - "epoch": 3.50517040350173, - "grad_norm": 0.0025004716590046883, - "learning_rate": 0.00019999393969032895, - "loss": 46.0, - "step": 45845 - }, - { - "epoch": 3.5052468604851197, - "grad_norm": 0.0015785566065460443, - "learning_rate": 0.00019999393942588846, - "loss": 46.0, - "step": 45846 - }, - { - "epoch": 3.505323317468509, - "grad_norm": 0.0012097267899662256, - "learning_rate": 0.00019999393916144223, - "loss": 46.0, - "step": 45847 - }, - { - "epoch": 3.5053997744518988, - "grad_norm": 0.0017973168287426233, - "learning_rate": 0.00019999393889699022, - "loss": 46.0, - "step": 45848 - }, - { - "epoch": 3.5054762314352885, - "grad_norm": 0.0018133892444893718, - "learning_rate": 0.0001999939386325324, - "loss": 46.0, - "step": 45849 - }, - { - "epoch": 3.5055526884186783, - "grad_norm": 0.001698700594715774, - "learning_rate": 0.00019999393836806888, - "loss": 46.0, - "step": 45850 - }, - { - "epoch": 3.505629145402068, - "grad_norm": 0.0035927521530538797, - "learning_rate": 0.00019999393810359956, - "loss": 46.0, - "step": 45851 - }, - { - "epoch": 3.505705602385458, - "grad_norm": 0.003899917472153902, - "learning_rate": 0.00019999393783912446, - "loss": 46.0, - "step": 45852 - }, - { - "epoch": 3.5057820593688476, - "grad_norm": 0.003743167733773589, - "learning_rate": 0.0001999939375746436, - "loss": 46.0, - "step": 45853 - }, - { - "epoch": 3.5058585163522373, - "grad_norm": 0.006765828467905521, - "learning_rate": 0.00019999393731015696, - "loss": 46.0, - "step": 45854 - }, - { - "epoch": 3.505934973335627, - "grad_norm": 0.0037534558214247227, - "learning_rate": 0.0001999939370456646, - "loss": 46.0, - "step": 45855 - }, - { - "epoch": 3.506011430319017, - "grad_norm": 0.0013452754355967045, - "learning_rate": 0.00019999393678116643, - "loss": 46.0, - "step": 45856 - }, - { - "epoch": 3.5060878873024066, - "grad_norm": 0.0012423753505572677, - "learning_rate": 0.00019999393651666249, - "loss": 46.0, - "step": 45857 - }, - { - "epoch": 3.5061643442857964, - "grad_norm": 0.0015553719131276011, - "learning_rate": 0.0001999939362521528, - "loss": 46.0, - "step": 45858 - }, - { - "epoch": 3.506240801269186, - "grad_norm": 0.005281276069581509, - "learning_rate": 0.0001999939359876373, - "loss": 46.0, - "step": 45859 - }, - { - "epoch": 3.506317258252576, - "grad_norm": 0.0019446517108008265, - "learning_rate": 0.00019999393572311608, - "loss": 46.0, - "step": 45860 - }, - { - "epoch": 3.506393715235965, - "grad_norm": 0.003982921130955219, - "learning_rate": 0.00019999393545858904, - "loss": 46.0, - "step": 45861 - }, - { - "epoch": 3.506470172219355, - "grad_norm": 0.010441708378493786, - "learning_rate": 0.0001999939351940563, - "loss": 46.0, - "step": 45862 - }, - { - "epoch": 3.5065466292027447, - "grad_norm": 0.004086884204298258, - "learning_rate": 0.00019999393492951774, - "loss": 46.0, - "step": 45863 - }, - { - "epoch": 3.5066230861861345, - "grad_norm": 0.0034190313890576363, - "learning_rate": 0.00019999393466497344, - "loss": 46.0, - "step": 45864 - }, - { - "epoch": 3.5066995431695243, - "grad_norm": 0.0013898743782192469, - "learning_rate": 0.0001999939344004233, - "loss": 46.0, - "step": 45865 - }, - { - "epoch": 3.506776000152914, - "grad_norm": 0.0033379308879375458, - "learning_rate": 0.00019999393413586746, - "loss": 46.0, - "step": 45866 - }, - { - "epoch": 3.506852457136304, - "grad_norm": 0.0015834105433896184, - "learning_rate": 0.00019999393387130584, - "loss": 46.0, - "step": 45867 - }, - { - "epoch": 3.5069289141196935, - "grad_norm": 0.001442268374375999, - "learning_rate": 0.00019999393360673845, - "loss": 46.0, - "step": 45868 - }, - { - "epoch": 3.507005371103083, - "grad_norm": 0.002715506823733449, - "learning_rate": 0.0001999939333421653, - "loss": 46.0, - "step": 45869 - }, - { - "epoch": 3.5070818280864726, - "grad_norm": 0.0026769351679831743, - "learning_rate": 0.00019999393307758637, - "loss": 46.0, - "step": 45870 - }, - { - "epoch": 3.5071582850698624, - "grad_norm": 0.0036387809086591005, - "learning_rate": 0.00019999393281300165, - "loss": 46.0, - "step": 45871 - }, - { - "epoch": 3.507234742053252, - "grad_norm": 0.003063938580453396, - "learning_rate": 0.00019999393254841117, - "loss": 46.0, - "step": 45872 - }, - { - "epoch": 3.507311199036642, - "grad_norm": 0.000976090261247009, - "learning_rate": 0.00019999393228381493, - "loss": 46.0, - "step": 45873 - }, - { - "epoch": 3.5073876560200317, - "grad_norm": 0.005339443683624268, - "learning_rate": 0.00019999393201921293, - "loss": 46.0, - "step": 45874 - }, - { - "epoch": 3.5074641130034214, - "grad_norm": 0.0008203244069591165, - "learning_rate": 0.00019999393175460515, - "loss": 46.0, - "step": 45875 - }, - { - "epoch": 3.507540569986811, - "grad_norm": 0.0024762852117419243, - "learning_rate": 0.0001999939314899916, - "loss": 46.0, - "step": 45876 - }, - { - "epoch": 3.507617026970201, - "grad_norm": 0.0018344307318329811, - "learning_rate": 0.0001999939312253723, - "loss": 46.0, - "step": 45877 - }, - { - "epoch": 3.5076934839535907, - "grad_norm": 0.0034210924059152603, - "learning_rate": 0.00019999393096074722, - "loss": 46.0, - "step": 45878 - }, - { - "epoch": 3.5077699409369805, - "grad_norm": 0.0023654100950807333, - "learning_rate": 0.00019999393069611637, - "loss": 46.0, - "step": 45879 - }, - { - "epoch": 3.5078463979203702, - "grad_norm": 0.002415325725451112, - "learning_rate": 0.00019999393043147973, - "loss": 46.0, - "step": 45880 - }, - { - "epoch": 3.50792285490376, - "grad_norm": 0.005408473312854767, - "learning_rate": 0.00019999393016683733, - "loss": 46.0, - "step": 45881 - }, - { - "epoch": 3.5079993118871498, - "grad_norm": 0.0011761934729292989, - "learning_rate": 0.00019999392990218917, - "loss": 46.0, - "step": 45882 - }, - { - "epoch": 3.508075768870539, - "grad_norm": 0.002419077791273594, - "learning_rate": 0.00019999392963753523, - "loss": 46.0, - "step": 45883 - }, - { - "epoch": 3.508152225853929, - "grad_norm": 0.002273550955578685, - "learning_rate": 0.00019999392937287557, - "loss": 46.0, - "step": 45884 - }, - { - "epoch": 3.5082286828373186, - "grad_norm": 0.0017420825315639377, - "learning_rate": 0.00019999392910821005, - "loss": 46.0, - "step": 45885 - }, - { - "epoch": 3.5083051398207084, - "grad_norm": 0.001755853183567524, - "learning_rate": 0.00019999392884353885, - "loss": 46.0, - "step": 45886 - }, - { - "epoch": 3.508381596804098, - "grad_norm": 0.003445603419095278, - "learning_rate": 0.00019999392857886181, - "loss": 46.0, - "step": 45887 - }, - { - "epoch": 3.508458053787488, - "grad_norm": 0.0022320651914924383, - "learning_rate": 0.00019999392831417903, - "loss": 46.0, - "step": 45888 - }, - { - "epoch": 3.5085345107708776, - "grad_norm": 0.004856535699218512, - "learning_rate": 0.0001999939280494905, - "loss": 46.0, - "step": 45889 - }, - { - "epoch": 3.5086109677542674, - "grad_norm": 0.004724472761154175, - "learning_rate": 0.0001999939277847962, - "loss": 46.0, - "step": 45890 - }, - { - "epoch": 3.5086874247376567, - "grad_norm": 0.002014308935031295, - "learning_rate": 0.0001999939275200961, - "loss": 46.0, - "step": 45891 - }, - { - "epoch": 3.5087638817210465, - "grad_norm": 0.0013860141625627875, - "learning_rate": 0.00019999392725539026, - "loss": 46.0, - "step": 45892 - }, - { - "epoch": 3.5088403387044362, - "grad_norm": 0.0009763978305272758, - "learning_rate": 0.00019999392699067862, - "loss": 46.0, - "step": 45893 - }, - { - "epoch": 3.508916795687826, - "grad_norm": 0.002042412292212248, - "learning_rate": 0.00019999392672596122, - "loss": 46.0, - "step": 45894 - }, - { - "epoch": 3.5089932526712158, - "grad_norm": 0.0005378625937737525, - "learning_rate": 0.00019999392646123808, - "loss": 46.0, - "step": 45895 - }, - { - "epoch": 3.5090697096546055, - "grad_norm": 0.0015830768970772624, - "learning_rate": 0.00019999392619650914, - "loss": 46.0, - "step": 45896 - }, - { - "epoch": 3.5091461666379953, - "grad_norm": 0.0017058874946087599, - "learning_rate": 0.00019999392593177446, - "loss": 46.0, - "step": 45897 - }, - { - "epoch": 3.509222623621385, - "grad_norm": 0.0009436761611141264, - "learning_rate": 0.000199993925667034, - "loss": 46.0, - "step": 45898 - }, - { - "epoch": 3.509299080604775, - "grad_norm": 0.0007821686449460685, - "learning_rate": 0.00019999392540228774, - "loss": 46.0, - "step": 45899 - }, - { - "epoch": 3.5093755375881646, - "grad_norm": 0.002428462728857994, - "learning_rate": 0.00019999392513753573, - "loss": 46.0, - "step": 45900 - }, - { - "epoch": 3.5094519945715543, - "grad_norm": 0.0031222105026245117, - "learning_rate": 0.00019999392487277798, - "loss": 46.0, - "step": 45901 - }, - { - "epoch": 3.509528451554944, - "grad_norm": 0.0013568011345341802, - "learning_rate": 0.00019999392460801443, - "loss": 46.0, - "step": 45902 - }, - { - "epoch": 3.509604908538334, - "grad_norm": 0.0010679615661501884, - "learning_rate": 0.0001999939243432451, - "loss": 46.0, - "step": 45903 - }, - { - "epoch": 3.5096813655217236, - "grad_norm": 0.008835924789309502, - "learning_rate": 0.00019999392407847003, - "loss": 46.0, - "step": 45904 - }, - { - "epoch": 3.509757822505113, - "grad_norm": 0.002657622564584017, - "learning_rate": 0.00019999392381368918, - "loss": 46.0, - "step": 45905 - }, - { - "epoch": 3.5098342794885027, - "grad_norm": 0.002233570907264948, - "learning_rate": 0.00019999392354890256, - "loss": 46.0, - "step": 45906 - }, - { - "epoch": 3.5099107364718924, - "grad_norm": 0.002726313192397356, - "learning_rate": 0.00019999392328411014, - "loss": 46.0, - "step": 45907 - }, - { - "epoch": 3.509987193455282, - "grad_norm": 0.00536592211574316, - "learning_rate": 0.000199993923019312, - "loss": 46.0, - "step": 45908 - }, - { - "epoch": 3.510063650438672, - "grad_norm": 0.0012080423766747117, - "learning_rate": 0.00019999392275450807, - "loss": 46.0, - "step": 45909 - }, - { - "epoch": 3.5101401074220617, - "grad_norm": 0.004361757542937994, - "learning_rate": 0.00019999392248969838, - "loss": 46.0, - "step": 45910 - }, - { - "epoch": 3.5102165644054515, - "grad_norm": 0.0010043777292594314, - "learning_rate": 0.00019999392222488292, - "loss": 46.0, - "step": 45911 - }, - { - "epoch": 3.5102930213888413, - "grad_norm": 0.0008478460367769003, - "learning_rate": 0.0001999939219600617, - "loss": 46.0, - "step": 45912 - }, - { - "epoch": 3.5103694783722306, - "grad_norm": 0.00814181100577116, - "learning_rate": 0.00019999392169523468, - "loss": 46.0, - "step": 45913 - }, - { - "epoch": 3.5104459353556203, - "grad_norm": 0.003671047743409872, - "learning_rate": 0.0001999939214304019, - "loss": 46.0, - "step": 45914 - }, - { - "epoch": 3.51052239233901, - "grad_norm": 0.001437688828445971, - "learning_rate": 0.00019999392116556335, - "loss": 46.0, - "step": 45915 - }, - { - "epoch": 3.5105988493224, - "grad_norm": 0.0011340953642502427, - "learning_rate": 0.00019999392090071908, - "loss": 46.0, - "step": 45916 - }, - { - "epoch": 3.5106753063057896, - "grad_norm": 0.000889941118657589, - "learning_rate": 0.00019999392063586898, - "loss": 46.0, - "step": 45917 - }, - { - "epoch": 3.5107517632891794, - "grad_norm": 0.004333685152232647, - "learning_rate": 0.00019999392037101313, - "loss": 46.0, - "step": 45918 - }, - { - "epoch": 3.510828220272569, - "grad_norm": 0.004315197467803955, - "learning_rate": 0.0001999939201061515, - "loss": 46.0, - "step": 45919 - }, - { - "epoch": 3.510904677255959, - "grad_norm": 0.0013370043598115444, - "learning_rate": 0.00019999391984128412, - "loss": 46.0, - "step": 45920 - }, - { - "epoch": 3.5109811342393487, - "grad_norm": 0.0031153459567576647, - "learning_rate": 0.00019999391957641096, - "loss": 46.0, - "step": 45921 - }, - { - "epoch": 3.5110575912227384, - "grad_norm": 0.0011196978157386184, - "learning_rate": 0.00019999391931153202, - "loss": 46.0, - "step": 45922 - }, - { - "epoch": 3.511134048206128, - "grad_norm": 0.004838210064917803, - "learning_rate": 0.00019999391904664733, - "loss": 46.0, - "step": 45923 - }, - { - "epoch": 3.511210505189518, - "grad_norm": 0.0036373466718941927, - "learning_rate": 0.00019999391878175687, - "loss": 46.0, - "step": 45924 - }, - { - "epoch": 3.5112869621729077, - "grad_norm": 0.002062140731140971, - "learning_rate": 0.00019999391851686061, - "loss": 46.0, - "step": 45925 - }, - { - "epoch": 3.5113634191562975, - "grad_norm": 0.004207535646855831, - "learning_rate": 0.00019999391825195864, - "loss": 46.0, - "step": 45926 - }, - { - "epoch": 3.5114398761396868, - "grad_norm": 0.0006255257758311927, - "learning_rate": 0.00019999391798705088, - "loss": 46.0, - "step": 45927 - }, - { - "epoch": 3.5115163331230765, - "grad_norm": 0.0015837616520002484, - "learning_rate": 0.0001999939177221373, - "loss": 46.0, - "step": 45928 - }, - { - "epoch": 3.5115927901064663, - "grad_norm": 0.001973797334358096, - "learning_rate": 0.00019999391745721803, - "loss": 46.0, - "step": 45929 - }, - { - "epoch": 3.511669247089856, - "grad_norm": 0.0008100785780698061, - "learning_rate": 0.00019999391719229293, - "loss": 46.0, - "step": 45930 - }, - { - "epoch": 3.511745704073246, - "grad_norm": 0.0021802224218845367, - "learning_rate": 0.0001999939169273621, - "loss": 46.0, - "step": 45931 - }, - { - "epoch": 3.5118221610566356, - "grad_norm": 0.0025743097066879272, - "learning_rate": 0.0001999939166624255, - "loss": 46.0, - "step": 45932 - }, - { - "epoch": 3.5118986180400253, - "grad_norm": 0.001661102520301938, - "learning_rate": 0.00019999391639748308, - "loss": 46.0, - "step": 45933 - }, - { - "epoch": 3.511975075023415, - "grad_norm": 0.00397537974640727, - "learning_rate": 0.00019999391613253494, - "loss": 46.0, - "step": 45934 - }, - { - "epoch": 3.5120515320068044, - "grad_norm": 0.01090953592211008, - "learning_rate": 0.000199993915867581, - "loss": 46.0, - "step": 45935 - }, - { - "epoch": 3.512127988990194, - "grad_norm": 0.006305725313723087, - "learning_rate": 0.00019999391560262132, - "loss": 46.0, - "step": 45936 - }, - { - "epoch": 3.512204445973584, - "grad_norm": 0.0034787540789693594, - "learning_rate": 0.00019999391533765586, - "loss": 46.0, - "step": 45937 - }, - { - "epoch": 3.5122809029569737, - "grad_norm": 0.0036479176487773657, - "learning_rate": 0.0001999939150726846, - "loss": 46.0, - "step": 45938 - }, - { - "epoch": 3.5123573599403635, - "grad_norm": 0.00420813774690032, - "learning_rate": 0.00019999391480770762, - "loss": 46.0, - "step": 45939 - }, - { - "epoch": 3.5124338169237532, - "grad_norm": 0.002550095319747925, - "learning_rate": 0.00019999391454272484, - "loss": 46.0, - "step": 45940 - }, - { - "epoch": 3.512510273907143, - "grad_norm": 0.009901037439703941, - "learning_rate": 0.00019999391427773632, - "loss": 46.0, - "step": 45941 - }, - { - "epoch": 3.5125867308905327, - "grad_norm": 0.0016465189401060343, - "learning_rate": 0.000199993914012742, - "loss": 46.0, - "step": 45942 - }, - { - "epoch": 3.5126631878739225, - "grad_norm": 0.008149377070367336, - "learning_rate": 0.00019999391374774192, - "loss": 46.0, - "step": 45943 - }, - { - "epoch": 3.5127396448573123, - "grad_norm": 0.0015656622126698494, - "learning_rate": 0.00019999391348273608, - "loss": 46.0, - "step": 45944 - }, - { - "epoch": 3.512816101840702, - "grad_norm": 0.0029504215344786644, - "learning_rate": 0.00019999391321772448, - "loss": 46.0, - "step": 45945 - }, - { - "epoch": 3.512892558824092, - "grad_norm": 0.0022443272173404694, - "learning_rate": 0.0001999939129527071, - "loss": 46.0, - "step": 45946 - }, - { - "epoch": 3.5129690158074816, - "grad_norm": 0.0048622931353747845, - "learning_rate": 0.00019999391268768393, - "loss": 46.0, - "step": 45947 - }, - { - "epoch": 3.513045472790871, - "grad_norm": 0.001592762302607298, - "learning_rate": 0.000199993912422655, - "loss": 46.0, - "step": 45948 - }, - { - "epoch": 3.5131219297742606, - "grad_norm": 0.0022699437104165554, - "learning_rate": 0.0001999939121576203, - "loss": 46.0, - "step": 45949 - }, - { - "epoch": 3.5131983867576504, - "grad_norm": 0.0007706578471697867, - "learning_rate": 0.00019999391189257985, - "loss": 46.0, - "step": 45950 - }, - { - "epoch": 3.51327484374104, - "grad_norm": 0.003420833731070161, - "learning_rate": 0.00019999391162753362, - "loss": 46.0, - "step": 45951 - }, - { - "epoch": 3.51335130072443, - "grad_norm": 0.0015259770443663, - "learning_rate": 0.0001999939113624816, - "loss": 46.0, - "step": 45952 - }, - { - "epoch": 3.5134277577078197, - "grad_norm": 0.0031806055922061205, - "learning_rate": 0.00019999391109742386, - "loss": 46.0, - "step": 45953 - }, - { - "epoch": 3.5135042146912094, - "grad_norm": 0.001705571310594678, - "learning_rate": 0.0001999939108323603, - "loss": 46.0, - "step": 45954 - }, - { - "epoch": 3.513580671674599, - "grad_norm": 0.003992537967860699, - "learning_rate": 0.000199993910567291, - "loss": 46.0, - "step": 45955 - }, - { - "epoch": 3.513657128657989, - "grad_norm": 0.001126436865888536, - "learning_rate": 0.00019999391030221594, - "loss": 46.0, - "step": 45956 - }, - { - "epoch": 3.5137335856413783, - "grad_norm": 0.0025449309032410383, - "learning_rate": 0.00019999391003713507, - "loss": 46.0, - "step": 45957 - }, - { - "epoch": 3.513810042624768, - "grad_norm": 0.0023185438476502895, - "learning_rate": 0.00019999390977204845, - "loss": 46.0, - "step": 45958 - }, - { - "epoch": 3.513886499608158, - "grad_norm": 0.0022981897927820683, - "learning_rate": 0.00019999390950695608, - "loss": 46.0, - "step": 45959 - }, - { - "epoch": 3.5139629565915476, - "grad_norm": 0.001110328477807343, - "learning_rate": 0.00019999390924185795, - "loss": 46.0, - "step": 45960 - }, - { - "epoch": 3.5140394135749373, - "grad_norm": 0.0009469927754253149, - "learning_rate": 0.000199993908976754, - "loss": 46.0, - "step": 45961 - }, - { - "epoch": 3.514115870558327, - "grad_norm": 0.0011637015268206596, - "learning_rate": 0.00019999390871164435, - "loss": 46.0, - "step": 45962 - }, - { - "epoch": 3.514192327541717, - "grad_norm": 0.0009351832559332252, - "learning_rate": 0.00019999390844652887, - "loss": 46.0, - "step": 45963 - }, - { - "epoch": 3.5142687845251066, - "grad_norm": 0.001968756318092346, - "learning_rate": 0.00019999390818140763, - "loss": 46.0, - "step": 45964 - }, - { - "epoch": 3.5143452415084964, - "grad_norm": 0.0007624453282915056, - "learning_rate": 0.00019999390791628066, - "loss": 46.0, - "step": 45965 - }, - { - "epoch": 3.514421698491886, - "grad_norm": 0.0016586828278377652, - "learning_rate": 0.00019999390765114788, - "loss": 46.0, - "step": 45966 - }, - { - "epoch": 3.514498155475276, - "grad_norm": 0.001244717976078391, - "learning_rate": 0.00019999390738600933, - "loss": 46.0, - "step": 45967 - }, - { - "epoch": 3.5145746124586656, - "grad_norm": 0.005238679703325033, - "learning_rate": 0.00019999390712086503, - "loss": 46.0, - "step": 45968 - }, - { - "epoch": 3.5146510694420554, - "grad_norm": 0.0027155191637575626, - "learning_rate": 0.000199993906855715, - "loss": 46.0, - "step": 45969 - }, - { - "epoch": 3.5147275264254447, - "grad_norm": 0.0019125982653349638, - "learning_rate": 0.00019999390659055912, - "loss": 46.0, - "step": 45970 - }, - { - "epoch": 3.5148039834088345, - "grad_norm": 0.002336718374863267, - "learning_rate": 0.0001999939063253975, - "loss": 46.0, - "step": 45971 - }, - { - "epoch": 3.5148804403922242, - "grad_norm": 0.005797531921416521, - "learning_rate": 0.00019999390606023013, - "loss": 46.0, - "step": 45972 - }, - { - "epoch": 3.514956897375614, - "grad_norm": 0.0010556880151852965, - "learning_rate": 0.00019999390579505697, - "loss": 46.0, - "step": 45973 - }, - { - "epoch": 3.5150333543590038, - "grad_norm": 0.0014832635642960668, - "learning_rate": 0.00019999390552987806, - "loss": 46.0, - "step": 45974 - }, - { - "epoch": 3.5151098113423935, - "grad_norm": 0.002287686802446842, - "learning_rate": 0.00019999390526469337, - "loss": 46.0, - "step": 45975 - }, - { - "epoch": 3.5151862683257833, - "grad_norm": 0.0008717738674022257, - "learning_rate": 0.0001999939049995029, - "loss": 46.0, - "step": 45976 - }, - { - "epoch": 3.515262725309173, - "grad_norm": 0.004684879910200834, - "learning_rate": 0.00019999390473430669, - "loss": 46.0, - "step": 45977 - }, - { - "epoch": 3.5153391822925624, - "grad_norm": 0.004966872278600931, - "learning_rate": 0.00019999390446910468, - "loss": 46.0, - "step": 45978 - }, - { - "epoch": 3.515415639275952, - "grad_norm": 0.001488385139964521, - "learning_rate": 0.00019999390420389693, - "loss": 46.0, - "step": 45979 - }, - { - "epoch": 3.515492096259342, - "grad_norm": 0.006349211558699608, - "learning_rate": 0.00019999390393868338, - "loss": 46.0, - "step": 45980 - }, - { - "epoch": 3.5155685532427317, - "grad_norm": 0.0037485247012227774, - "learning_rate": 0.00019999390367346408, - "loss": 46.0, - "step": 45981 - }, - { - "epoch": 3.5156450102261214, - "grad_norm": 0.0031722646672278643, - "learning_rate": 0.000199993903408239, - "loss": 46.0, - "step": 45982 - }, - { - "epoch": 3.515721467209511, - "grad_norm": 0.0023654988035559654, - "learning_rate": 0.00019999390314300817, - "loss": 46.0, - "step": 45983 - }, - { - "epoch": 3.515797924192901, - "grad_norm": 0.002111158799380064, - "learning_rate": 0.00019999390287777158, - "loss": 46.0, - "step": 45984 - }, - { - "epoch": 3.5158743811762907, - "grad_norm": 0.001592441345565021, - "learning_rate": 0.00019999390261252916, - "loss": 46.0, - "step": 45985 - }, - { - "epoch": 3.5159508381596805, - "grad_norm": 0.002611360512673855, - "learning_rate": 0.00019999390234728105, - "loss": 46.0, - "step": 45986 - }, - { - "epoch": 3.51602729514307, - "grad_norm": 0.004567362368106842, - "learning_rate": 0.00019999390208202714, - "loss": 46.0, - "step": 45987 - }, - { - "epoch": 3.51610375212646, - "grad_norm": 0.0021764670964330435, - "learning_rate": 0.00019999390181676743, - "loss": 46.0, - "step": 45988 - }, - { - "epoch": 3.5161802091098497, - "grad_norm": 0.003382662311196327, - "learning_rate": 0.00019999390155150197, - "loss": 46.0, - "step": 45989 - }, - { - "epoch": 3.5162566660932395, - "grad_norm": 0.005656557623296976, - "learning_rate": 0.00019999390128623074, - "loss": 46.0, - "step": 45990 - }, - { - "epoch": 3.5163331230766293, - "grad_norm": 0.0015625564847141504, - "learning_rate": 0.00019999390102095376, - "loss": 46.0, - "step": 45991 - }, - { - "epoch": 3.5164095800600186, - "grad_norm": 0.0025601221714168787, - "learning_rate": 0.000199993900755671, - "loss": 46.0, - "step": 45992 - }, - { - "epoch": 3.5164860370434083, - "grad_norm": 0.0019405633211135864, - "learning_rate": 0.00019999390049038244, - "loss": 46.0, - "step": 45993 - }, - { - "epoch": 3.516562494026798, - "grad_norm": 0.002470424398779869, - "learning_rate": 0.00019999390022508817, - "loss": 46.0, - "step": 45994 - }, - { - "epoch": 3.516638951010188, - "grad_norm": 0.0015981619944795966, - "learning_rate": 0.00019999389995978807, - "loss": 46.0, - "step": 45995 - }, - { - "epoch": 3.5167154079935776, - "grad_norm": 0.0007899231277406216, - "learning_rate": 0.00019999389969448225, - "loss": 46.0, - "step": 45996 - }, - { - "epoch": 3.5167918649769674, - "grad_norm": 0.004193992353975773, - "learning_rate": 0.00019999389942917064, - "loss": 46.0, - "step": 45997 - }, - { - "epoch": 3.516868321960357, - "grad_norm": 0.0011907995212823153, - "learning_rate": 0.00019999389916385325, - "loss": 46.0, - "step": 45998 - }, - { - "epoch": 3.516944778943747, - "grad_norm": 0.003617934649810195, - "learning_rate": 0.0001999938988985301, - "loss": 46.0, - "step": 45999 - }, - { - "epoch": 3.5170212359271362, - "grad_norm": 0.0011587579501792789, - "learning_rate": 0.0001999938986332012, - "loss": 46.0, - "step": 46000 - }, - { - "epoch": 3.517097692910526, - "grad_norm": 0.0018538255244493484, - "learning_rate": 0.00019999389836786651, - "loss": 46.0, - "step": 46001 - }, - { - "epoch": 3.5171741498939157, - "grad_norm": 0.0026684461627155542, - "learning_rate": 0.00019999389810252606, - "loss": 46.0, - "step": 46002 - }, - { - "epoch": 3.5172506068773055, - "grad_norm": 0.0028471164405345917, - "learning_rate": 0.00019999389783717983, - "loss": 46.0, - "step": 46003 - }, - { - "epoch": 3.5173270638606953, - "grad_norm": 0.0048071397468447685, - "learning_rate": 0.00019999389757182785, - "loss": 46.0, - "step": 46004 - }, - { - "epoch": 3.517403520844085, - "grad_norm": 0.0038539008237421513, - "learning_rate": 0.00019999389730647007, - "loss": 46.0, - "step": 46005 - }, - { - "epoch": 3.517479977827475, - "grad_norm": 0.001737555954605341, - "learning_rate": 0.00019999389704110655, - "loss": 46.0, - "step": 46006 - }, - { - "epoch": 3.5175564348108646, - "grad_norm": 0.0022841468453407288, - "learning_rate": 0.00019999389677573725, - "loss": 46.0, - "step": 46007 - }, - { - "epoch": 3.5176328917942543, - "grad_norm": 0.00592148257419467, - "learning_rate": 0.00019999389651036218, - "loss": 46.0, - "step": 46008 - }, - { - "epoch": 3.517709348777644, - "grad_norm": 0.008165507577359676, - "learning_rate": 0.00019999389624498134, - "loss": 46.0, - "step": 46009 - }, - { - "epoch": 3.517785805761034, - "grad_norm": 0.004823136143386364, - "learning_rate": 0.00019999389597959475, - "loss": 46.0, - "step": 46010 - }, - { - "epoch": 3.5178622627444236, - "grad_norm": 0.002332430798560381, - "learning_rate": 0.00019999389571420236, - "loss": 46.0, - "step": 46011 - }, - { - "epoch": 3.5179387197278134, - "grad_norm": 0.0027830160688608885, - "learning_rate": 0.00019999389544880422, - "loss": 46.0, - "step": 46012 - }, - { - "epoch": 3.518015176711203, - "grad_norm": 0.003499938640743494, - "learning_rate": 0.00019999389518340031, - "loss": 46.0, - "step": 46013 - }, - { - "epoch": 3.5180916336945924, - "grad_norm": 0.0026316444855183363, - "learning_rate": 0.0001999938949179906, - "loss": 46.0, - "step": 46014 - }, - { - "epoch": 3.518168090677982, - "grad_norm": 0.0015766448341310024, - "learning_rate": 0.00019999389465257518, - "loss": 46.0, - "step": 46015 - }, - { - "epoch": 3.518244547661372, - "grad_norm": 0.0015216968022286892, - "learning_rate": 0.00019999389438715395, - "loss": 46.0, - "step": 46016 - }, - { - "epoch": 3.5183210046447617, - "grad_norm": 0.004122574348002672, - "learning_rate": 0.00019999389412172694, - "loss": 46.0, - "step": 46017 - }, - { - "epoch": 3.5183974616281515, - "grad_norm": 0.0037685635033994913, - "learning_rate": 0.0001999938938562942, - "loss": 46.0, - "step": 46018 - }, - { - "epoch": 3.5184739186115412, - "grad_norm": 0.0015333796618506312, - "learning_rate": 0.00019999389359085567, - "loss": 46.0, - "step": 46019 - }, - { - "epoch": 3.518550375594931, - "grad_norm": 0.003347333986312151, - "learning_rate": 0.00019999389332541137, - "loss": 46.0, - "step": 46020 - }, - { - "epoch": 3.5186268325783208, - "grad_norm": 0.0016944825183600187, - "learning_rate": 0.0001999938930599613, - "loss": 46.0, - "step": 46021 - }, - { - "epoch": 3.51870328956171, - "grad_norm": 0.0019174270564690232, - "learning_rate": 0.00019999389279450546, - "loss": 46.0, - "step": 46022 - }, - { - "epoch": 3.5187797465451, - "grad_norm": 0.0025299994740635157, - "learning_rate": 0.00019999389252904387, - "loss": 46.0, - "step": 46023 - }, - { - "epoch": 3.5188562035284896, - "grad_norm": 0.0014852676540613174, - "learning_rate": 0.0001999938922635765, - "loss": 46.0, - "step": 46024 - }, - { - "epoch": 3.5189326605118794, - "grad_norm": 0.0025224294513463974, - "learning_rate": 0.00019999389199810335, - "loss": 46.0, - "step": 46025 - }, - { - "epoch": 3.519009117495269, - "grad_norm": 0.002003477420657873, - "learning_rate": 0.0001999938917326244, - "loss": 46.0, - "step": 46026 - }, - { - "epoch": 3.519085574478659, - "grad_norm": 0.012452625669538975, - "learning_rate": 0.00019999389146713975, - "loss": 46.0, - "step": 46027 - }, - { - "epoch": 3.5191620314620486, - "grad_norm": 0.0032979967072606087, - "learning_rate": 0.0001999938912016493, - "loss": 46.0, - "step": 46028 - }, - { - "epoch": 3.5192384884454384, - "grad_norm": 0.0018458785489201546, - "learning_rate": 0.0001999938909361531, - "loss": 46.0, - "step": 46029 - }, - { - "epoch": 3.519314945428828, - "grad_norm": 0.0020835548639297485, - "learning_rate": 0.0001999938906706511, - "loss": 46.0, - "step": 46030 - }, - { - "epoch": 3.519391402412218, - "grad_norm": 0.0014450142625719309, - "learning_rate": 0.00019999389040514331, - "loss": 46.0, - "step": 46031 - }, - { - "epoch": 3.5194678593956077, - "grad_norm": 0.002957655116915703, - "learning_rate": 0.0001999938901396298, - "loss": 46.0, - "step": 46032 - }, - { - "epoch": 3.5195443163789975, - "grad_norm": 0.002331898082047701, - "learning_rate": 0.0001999938898741105, - "loss": 46.0, - "step": 46033 - }, - { - "epoch": 3.519620773362387, - "grad_norm": 0.0017305782530456781, - "learning_rate": 0.00019999388960858543, - "loss": 46.0, - "step": 46034 - }, - { - "epoch": 3.519697230345777, - "grad_norm": 0.0030377323273569345, - "learning_rate": 0.0001999938893430546, - "loss": 46.0, - "step": 46035 - }, - { - "epoch": 3.5197736873291663, - "grad_norm": 0.0025776883121579885, - "learning_rate": 0.00019999388907751802, - "loss": 46.0, - "step": 46036 - }, - { - "epoch": 3.519850144312556, - "grad_norm": 0.0027985882479697466, - "learning_rate": 0.00019999388881197563, - "loss": 46.0, - "step": 46037 - }, - { - "epoch": 3.519926601295946, - "grad_norm": 0.002309117466211319, - "learning_rate": 0.0001999938885464275, - "loss": 46.0, - "step": 46038 - }, - { - "epoch": 3.5200030582793356, - "grad_norm": 0.001496815006248653, - "learning_rate": 0.0001999938882808736, - "loss": 46.0, - "step": 46039 - }, - { - "epoch": 3.5200795152627253, - "grad_norm": 0.0019770245999097824, - "learning_rate": 0.00019999388801531388, - "loss": 46.0, - "step": 46040 - }, - { - "epoch": 3.520155972246115, - "grad_norm": 0.003854860318824649, - "learning_rate": 0.00019999388774974842, - "loss": 46.0, - "step": 46041 - }, - { - "epoch": 3.520232429229505, - "grad_norm": 0.002077318262308836, - "learning_rate": 0.00019999388748417722, - "loss": 46.0, - "step": 46042 - }, - { - "epoch": 3.5203088862128946, - "grad_norm": 0.0014645201154053211, - "learning_rate": 0.00019999388721860024, - "loss": 46.0, - "step": 46043 - }, - { - "epoch": 3.520385343196284, - "grad_norm": 0.002360919024795294, - "learning_rate": 0.0001999938869530175, - "loss": 46.0, - "step": 46044 - }, - { - "epoch": 3.5204618001796737, - "grad_norm": 0.0009254809119738638, - "learning_rate": 0.00019999388668742895, - "loss": 46.0, - "step": 46045 - }, - { - "epoch": 3.5205382571630635, - "grad_norm": 0.001603074953891337, - "learning_rate": 0.00019999388642183465, - "loss": 46.0, - "step": 46046 - }, - { - "epoch": 3.520614714146453, - "grad_norm": 0.0011834410252049565, - "learning_rate": 0.0001999938861562346, - "loss": 46.0, - "step": 46047 - }, - { - "epoch": 3.520691171129843, - "grad_norm": 0.004092540591955185, - "learning_rate": 0.00019999388589062877, - "loss": 46.0, - "step": 46048 - }, - { - "epoch": 3.5207676281132327, - "grad_norm": 0.0015189680270850658, - "learning_rate": 0.00019999388562501715, - "loss": 46.0, - "step": 46049 - }, - { - "epoch": 3.5208440850966225, - "grad_norm": 0.00263799587264657, - "learning_rate": 0.00019999388535939982, - "loss": 46.0, - "step": 46050 - }, - { - "epoch": 3.5209205420800123, - "grad_norm": 0.0025415034033358097, - "learning_rate": 0.00019999388509377665, - "loss": 46.0, - "step": 46051 - }, - { - "epoch": 3.520996999063402, - "grad_norm": 0.0012590382248163223, - "learning_rate": 0.00019999388482814774, - "loss": 46.0, - "step": 46052 - }, - { - "epoch": 3.521073456046792, - "grad_norm": 0.0019087701803073287, - "learning_rate": 0.00019999388456251306, - "loss": 46.0, - "step": 46053 - }, - { - "epoch": 3.5211499130301815, - "grad_norm": 0.0035736518912017345, - "learning_rate": 0.00019999388429687263, - "loss": 46.0, - "step": 46054 - }, - { - "epoch": 3.5212263700135713, - "grad_norm": 0.0021984484046697617, - "learning_rate": 0.0001999938840312264, - "loss": 46.0, - "step": 46055 - }, - { - "epoch": 3.521302826996961, - "grad_norm": 0.002630413044244051, - "learning_rate": 0.00019999388376557443, - "loss": 46.0, - "step": 46056 - }, - { - "epoch": 3.521379283980351, - "grad_norm": 0.007793442811816931, - "learning_rate": 0.00019999388349991665, - "loss": 46.0, - "step": 46057 - }, - { - "epoch": 3.52145574096374, - "grad_norm": 0.0016296850517392159, - "learning_rate": 0.00019999388323425313, - "loss": 46.0, - "step": 46058 - }, - { - "epoch": 3.52153219794713, - "grad_norm": 0.003136121667921543, - "learning_rate": 0.00019999388296858386, - "loss": 46.0, - "step": 46059 - }, - { - "epoch": 3.5216086549305197, - "grad_norm": 0.0033281154464930296, - "learning_rate": 0.00019999388270290877, - "loss": 46.0, - "step": 46060 - }, - { - "epoch": 3.5216851119139094, - "grad_norm": 0.006391796283423901, - "learning_rate": 0.00019999388243722795, - "loss": 46.0, - "step": 46061 - }, - { - "epoch": 3.521761568897299, - "grad_norm": 0.0010172172915190458, - "learning_rate": 0.00019999388217154134, - "loss": 46.0, - "step": 46062 - }, - { - "epoch": 3.521838025880689, - "grad_norm": 0.0014320433838292956, - "learning_rate": 0.00019999388190584897, - "loss": 46.0, - "step": 46063 - }, - { - "epoch": 3.5219144828640787, - "grad_norm": 0.003336955327540636, - "learning_rate": 0.00019999388164015084, - "loss": 46.0, - "step": 46064 - }, - { - "epoch": 3.5219909398474685, - "grad_norm": 0.002149282954633236, - "learning_rate": 0.00019999388137444696, - "loss": 46.0, - "step": 46065 - }, - { - "epoch": 3.522067396830858, - "grad_norm": 0.0011472372571006417, - "learning_rate": 0.00019999388110873728, - "loss": 46.0, - "step": 46066 - }, - { - "epoch": 3.5221438538142475, - "grad_norm": 0.0029665378388017416, - "learning_rate": 0.00019999388084302182, - "loss": 46.0, - "step": 46067 - }, - { - "epoch": 3.5222203107976373, - "grad_norm": 0.004602435976266861, - "learning_rate": 0.0001999938805773006, - "loss": 46.0, - "step": 46068 - }, - { - "epoch": 3.522296767781027, - "grad_norm": 0.0022841079626232386, - "learning_rate": 0.0001999938803115736, - "loss": 46.0, - "step": 46069 - }, - { - "epoch": 3.522373224764417, - "grad_norm": 0.0007618876988999546, - "learning_rate": 0.00019999388004584087, - "loss": 46.0, - "step": 46070 - }, - { - "epoch": 3.5224496817478066, - "grad_norm": 0.001303955214098096, - "learning_rate": 0.00019999387978010232, - "loss": 46.0, - "step": 46071 - }, - { - "epoch": 3.5225261387311964, - "grad_norm": 0.00635666586458683, - "learning_rate": 0.00019999387951435805, - "loss": 46.0, - "step": 46072 - }, - { - "epoch": 3.522602595714586, - "grad_norm": 0.002430203603580594, - "learning_rate": 0.00019999387924860798, - "loss": 46.0, - "step": 46073 - }, - { - "epoch": 3.522679052697976, - "grad_norm": 0.0031734409276396036, - "learning_rate": 0.00019999387898285217, - "loss": 46.0, - "step": 46074 - }, - { - "epoch": 3.5227555096813656, - "grad_norm": 0.004892737604677677, - "learning_rate": 0.00019999387871709055, - "loss": 46.0, - "step": 46075 - }, - { - "epoch": 3.5228319666647554, - "grad_norm": 0.004358571022748947, - "learning_rate": 0.0001999938784513232, - "loss": 46.0, - "step": 46076 - }, - { - "epoch": 3.522908423648145, - "grad_norm": 0.0008713623974472284, - "learning_rate": 0.00019999387818555006, - "loss": 46.0, - "step": 46077 - }, - { - "epoch": 3.522984880631535, - "grad_norm": 0.0027689894195646048, - "learning_rate": 0.00019999387791977115, - "loss": 46.0, - "step": 46078 - }, - { - "epoch": 3.5230613376149242, - "grad_norm": 0.002409819047898054, - "learning_rate": 0.00019999387765398647, - "loss": 46.0, - "step": 46079 - }, - { - "epoch": 3.523137794598314, - "grad_norm": 0.012077780440449715, - "learning_rate": 0.000199993877388196, - "loss": 46.0, - "step": 46080 - }, - { - "epoch": 3.5232142515817038, - "grad_norm": 0.0026512390468269587, - "learning_rate": 0.00019999387712239984, - "loss": 46.0, - "step": 46081 - }, - { - "epoch": 3.5232907085650935, - "grad_norm": 0.0008373678429052234, - "learning_rate": 0.00019999387685659784, - "loss": 46.0, - "step": 46082 - }, - { - "epoch": 3.5233671655484833, - "grad_norm": 0.003648224752396345, - "learning_rate": 0.0001999938765907901, - "loss": 46.0, - "step": 46083 - }, - { - "epoch": 3.523443622531873, - "grad_norm": 0.003660549409687519, - "learning_rate": 0.00019999387632497654, - "loss": 46.0, - "step": 46084 - }, - { - "epoch": 3.523520079515263, - "grad_norm": 0.003959151450544596, - "learning_rate": 0.00019999387605915727, - "loss": 46.0, - "step": 46085 - }, - { - "epoch": 3.5235965364986526, - "grad_norm": 0.004377682227641344, - "learning_rate": 0.00019999387579333223, - "loss": 46.0, - "step": 46086 - }, - { - "epoch": 3.5236729934820423, - "grad_norm": 0.002529323101043701, - "learning_rate": 0.0001999938755275014, - "loss": 46.0, - "step": 46087 - }, - { - "epoch": 3.5237494504654316, - "grad_norm": 0.003742713714018464, - "learning_rate": 0.0001999938752616648, - "loss": 46.0, - "step": 46088 - }, - { - "epoch": 3.5238259074488214, - "grad_norm": 0.002671642228960991, - "learning_rate": 0.00019999387499582241, - "loss": 46.0, - "step": 46089 - }, - { - "epoch": 3.523902364432211, - "grad_norm": 0.0031757294200360775, - "learning_rate": 0.00019999387472997428, - "loss": 46.0, - "step": 46090 - }, - { - "epoch": 3.523978821415601, - "grad_norm": 0.001106171403080225, - "learning_rate": 0.00019999387446412037, - "loss": 46.0, - "step": 46091 - }, - { - "epoch": 3.5240552783989907, - "grad_norm": 0.0018763769185170531, - "learning_rate": 0.00019999387419826072, - "loss": 46.0, - "step": 46092 - }, - { - "epoch": 3.5241317353823804, - "grad_norm": 0.0017623038729652762, - "learning_rate": 0.00019999387393239526, - "loss": 46.0, - "step": 46093 - }, - { - "epoch": 3.52420819236577, - "grad_norm": 0.0032051235903054476, - "learning_rate": 0.00019999387366652406, - "loss": 46.0, - "step": 46094 - }, - { - "epoch": 3.52428464934916, - "grad_norm": 0.00218062661588192, - "learning_rate": 0.0001999938734006471, - "loss": 46.0, - "step": 46095 - }, - { - "epoch": 3.5243611063325497, - "grad_norm": 0.001641497598029673, - "learning_rate": 0.0001999938731347643, - "loss": 46.0, - "step": 46096 - }, - { - "epoch": 3.5244375633159395, - "grad_norm": 0.0018254265887662768, - "learning_rate": 0.00019999387286887582, - "loss": 46.0, - "step": 46097 - }, - { - "epoch": 3.5245140202993293, - "grad_norm": 0.0022950973361730576, - "learning_rate": 0.00019999387260298152, - "loss": 46.0, - "step": 46098 - }, - { - "epoch": 3.524590477282719, - "grad_norm": 0.003461933694779873, - "learning_rate": 0.00019999387233708146, - "loss": 46.0, - "step": 46099 - }, - { - "epoch": 3.5246669342661088, - "grad_norm": 0.0029186801984906197, - "learning_rate": 0.00019999387207117564, - "loss": 46.0, - "step": 46100 - }, - { - "epoch": 3.524743391249498, - "grad_norm": 0.0027371232863515615, - "learning_rate": 0.00019999387180526403, - "loss": 46.0, - "step": 46101 - }, - { - "epoch": 3.524819848232888, - "grad_norm": 0.004526629578322172, - "learning_rate": 0.00019999387153934667, - "loss": 46.0, - "step": 46102 - }, - { - "epoch": 3.5248963052162776, - "grad_norm": 0.0021808675955981016, - "learning_rate": 0.00019999387127342356, - "loss": 46.0, - "step": 46103 - }, - { - "epoch": 3.5249727621996674, - "grad_norm": 0.0051301997154951096, - "learning_rate": 0.00019999387100749463, - "loss": 46.0, - "step": 46104 - }, - { - "epoch": 3.525049219183057, - "grad_norm": 0.0006886975606903434, - "learning_rate": 0.00019999387074155997, - "loss": 46.0, - "step": 46105 - }, - { - "epoch": 3.525125676166447, - "grad_norm": 0.0017932055052369833, - "learning_rate": 0.00019999387047561952, - "loss": 46.0, - "step": 46106 - }, - { - "epoch": 3.5252021331498367, - "grad_norm": 0.00435675261542201, - "learning_rate": 0.00019999387020967332, - "loss": 46.0, - "step": 46107 - }, - { - "epoch": 3.5252785901332264, - "grad_norm": 0.0024882727302610874, - "learning_rate": 0.00019999386994372134, - "loss": 46.0, - "step": 46108 - }, - { - "epoch": 3.5253550471166157, - "grad_norm": 0.007611345499753952, - "learning_rate": 0.0001999938696777636, - "loss": 46.0, - "step": 46109 - }, - { - "epoch": 3.5254315041000055, - "grad_norm": 0.002032520482316613, - "learning_rate": 0.00019999386941180007, - "loss": 46.0, - "step": 46110 - }, - { - "epoch": 3.5255079610833953, - "grad_norm": 0.004340315703302622, - "learning_rate": 0.00019999386914583078, - "loss": 46.0, - "step": 46111 - }, - { - "epoch": 3.525584418066785, - "grad_norm": 0.0039915586821734905, - "learning_rate": 0.0001999938688798557, - "loss": 46.0, - "step": 46112 - }, - { - "epoch": 3.525660875050175, - "grad_norm": 0.0011454009218141437, - "learning_rate": 0.00019999386861387493, - "loss": 46.0, - "step": 46113 - }, - { - "epoch": 3.5257373320335645, - "grad_norm": 0.0010445420630276203, - "learning_rate": 0.0001999938683478883, - "loss": 46.0, - "step": 46114 - }, - { - "epoch": 3.5258137890169543, - "grad_norm": 0.0019860954489558935, - "learning_rate": 0.00019999386808189595, - "loss": 46.0, - "step": 46115 - }, - { - "epoch": 3.525890246000344, - "grad_norm": 0.0031954918522387743, - "learning_rate": 0.00019999386781589782, - "loss": 46.0, - "step": 46116 - }, - { - "epoch": 3.525966702983734, - "grad_norm": 0.001985540147870779, - "learning_rate": 0.00019999386754989394, - "loss": 46.0, - "step": 46117 - }, - { - "epoch": 3.5260431599671236, - "grad_norm": 0.002525629475712776, - "learning_rate": 0.00019999386728388426, - "loss": 46.0, - "step": 46118 - }, - { - "epoch": 3.5261196169505133, - "grad_norm": 0.004820912145078182, - "learning_rate": 0.0001999938670178688, - "loss": 46.0, - "step": 46119 - }, - { - "epoch": 3.526196073933903, - "grad_norm": 0.0014989228220656514, - "learning_rate": 0.0001999938667518476, - "loss": 46.0, - "step": 46120 - }, - { - "epoch": 3.526272530917293, - "grad_norm": 0.0015615710290148854, - "learning_rate": 0.0001999938664858206, - "loss": 46.0, - "step": 46121 - }, - { - "epoch": 3.5263489879006826, - "grad_norm": 0.0013162549585103989, - "learning_rate": 0.00019999386621978785, - "loss": 46.0, - "step": 46122 - }, - { - "epoch": 3.526425444884072, - "grad_norm": 0.0023649130016565323, - "learning_rate": 0.00019999386595374936, - "loss": 46.0, - "step": 46123 - }, - { - "epoch": 3.5265019018674617, - "grad_norm": 0.006667578127235174, - "learning_rate": 0.0001999938656877051, - "loss": 46.0, - "step": 46124 - }, - { - "epoch": 3.5265783588508515, - "grad_norm": 0.006556238513439894, - "learning_rate": 0.000199993865421655, - "loss": 46.0, - "step": 46125 - }, - { - "epoch": 3.5266548158342412, - "grad_norm": 0.0015096586430445313, - "learning_rate": 0.00019999386515559921, - "loss": 46.0, - "step": 46126 - }, - { - "epoch": 3.526731272817631, - "grad_norm": 0.0012077161809429526, - "learning_rate": 0.0001999938648895376, - "loss": 46.0, - "step": 46127 - }, - { - "epoch": 3.5268077298010208, - "grad_norm": 0.0022361089941114187, - "learning_rate": 0.00019999386462347024, - "loss": 46.0, - "step": 46128 - }, - { - "epoch": 3.5268841867844105, - "grad_norm": 0.002737488131970167, - "learning_rate": 0.00019999386435739713, - "loss": 46.0, - "step": 46129 - }, - { - "epoch": 3.5269606437678003, - "grad_norm": 0.0032334872521460056, - "learning_rate": 0.00019999386409131823, - "loss": 46.0, - "step": 46130 - }, - { - "epoch": 3.5270371007511896, - "grad_norm": 0.004387090913951397, - "learning_rate": 0.00019999386382523355, - "loss": 46.0, - "step": 46131 - }, - { - "epoch": 3.5271135577345794, - "grad_norm": 0.0015021500876173377, - "learning_rate": 0.00019999386355914312, - "loss": 46.0, - "step": 46132 - }, - { - "epoch": 3.527190014717969, - "grad_norm": 0.0020126483868807554, - "learning_rate": 0.00019999386329304692, - "loss": 46.0, - "step": 46133 - }, - { - "epoch": 3.527266471701359, - "grad_norm": 0.0019522577058523893, - "learning_rate": 0.00019999386302694495, - "loss": 46.0, - "step": 46134 - }, - { - "epoch": 3.5273429286847486, - "grad_norm": 0.005085465032607317, - "learning_rate": 0.00019999386276083717, - "loss": 46.0, - "step": 46135 - }, - { - "epoch": 3.5274193856681384, - "grad_norm": 0.0020602287258952856, - "learning_rate": 0.00019999386249472368, - "loss": 46.0, - "step": 46136 - }, - { - "epoch": 3.527495842651528, - "grad_norm": 0.0018585174111649394, - "learning_rate": 0.00019999386222860439, - "loss": 46.0, - "step": 46137 - }, - { - "epoch": 3.527572299634918, - "grad_norm": 0.000972264853771776, - "learning_rate": 0.00019999386196247935, - "loss": 46.0, - "step": 46138 - }, - { - "epoch": 3.5276487566183077, - "grad_norm": 0.004523070063441992, - "learning_rate": 0.0001999938616963485, - "loss": 46.0, - "step": 46139 - }, - { - "epoch": 3.5277252136016974, - "grad_norm": 0.006592682562768459, - "learning_rate": 0.00019999386143021192, - "loss": 46.0, - "step": 46140 - }, - { - "epoch": 3.527801670585087, - "grad_norm": 0.001471811207011342, - "learning_rate": 0.0001999938611640696, - "loss": 46.0, - "step": 46141 - }, - { - "epoch": 3.527878127568477, - "grad_norm": 0.0015464500756934285, - "learning_rate": 0.00019999386089792145, - "loss": 46.0, - "step": 46142 - }, - { - "epoch": 3.5279545845518667, - "grad_norm": 0.0017789526609703898, - "learning_rate": 0.00019999386063176755, - "loss": 46.0, - "step": 46143 - }, - { - "epoch": 3.5280310415352565, - "grad_norm": 0.004961306694895029, - "learning_rate": 0.00019999386036560787, - "loss": 46.0, - "step": 46144 - }, - { - "epoch": 3.528107498518646, - "grad_norm": 0.0016997209750115871, - "learning_rate": 0.00019999386009944244, - "loss": 46.0, - "step": 46145 - }, - { - "epoch": 3.5281839555020356, - "grad_norm": 0.0012948306975886226, - "learning_rate": 0.00019999385983327124, - "loss": 46.0, - "step": 46146 - }, - { - "epoch": 3.5282604124854253, - "grad_norm": 0.0029470238368958235, - "learning_rate": 0.00019999385956709427, - "loss": 46.0, - "step": 46147 - }, - { - "epoch": 3.528336869468815, - "grad_norm": 0.002085821470245719, - "learning_rate": 0.00019999385930091152, - "loss": 46.0, - "step": 46148 - }, - { - "epoch": 3.528413326452205, - "grad_norm": 0.0025229991879314184, - "learning_rate": 0.000199993859034723, - "loss": 46.0, - "step": 46149 - }, - { - "epoch": 3.5284897834355946, - "grad_norm": 0.0030042538419365883, - "learning_rate": 0.00019999385876852874, - "loss": 46.0, - "step": 46150 - }, - { - "epoch": 3.5285662404189844, - "grad_norm": 0.0010310504585504532, - "learning_rate": 0.00019999385850232867, - "loss": 46.0, - "step": 46151 - }, - { - "epoch": 3.528642697402374, - "grad_norm": 0.002798303961753845, - "learning_rate": 0.00019999385823612286, - "loss": 46.0, - "step": 46152 - }, - { - "epoch": 3.5287191543857634, - "grad_norm": 0.0010029373224824667, - "learning_rate": 0.00019999385796991127, - "loss": 46.0, - "step": 46153 - }, - { - "epoch": 3.528795611369153, - "grad_norm": 0.0015498147113248706, - "learning_rate": 0.0001999938577036939, - "loss": 46.0, - "step": 46154 - }, - { - "epoch": 3.528872068352543, - "grad_norm": 0.001677525695413351, - "learning_rate": 0.00019999385743747078, - "loss": 46.0, - "step": 46155 - }, - { - "epoch": 3.5289485253359327, - "grad_norm": 0.003191616153344512, - "learning_rate": 0.00019999385717124187, - "loss": 46.0, - "step": 46156 - }, - { - "epoch": 3.5290249823193225, - "grad_norm": 0.0033075122628360987, - "learning_rate": 0.00019999385690500722, - "loss": 46.0, - "step": 46157 - }, - { - "epoch": 3.5291014393027123, - "grad_norm": 0.0010205006692558527, - "learning_rate": 0.00019999385663876677, - "loss": 46.0, - "step": 46158 - }, - { - "epoch": 3.529177896286102, - "grad_norm": 0.0024993717670440674, - "learning_rate": 0.0001999938563725206, - "loss": 46.0, - "step": 46159 - }, - { - "epoch": 3.5292543532694918, - "grad_norm": 0.0024535616394132376, - "learning_rate": 0.00019999385610626862, - "loss": 46.0, - "step": 46160 - }, - { - "epoch": 3.5293308102528815, - "grad_norm": 0.006042394321411848, - "learning_rate": 0.00019999385584001087, - "loss": 46.0, - "step": 46161 - }, - { - "epoch": 3.5294072672362713, - "grad_norm": 0.0017053228802978992, - "learning_rate": 0.00019999385557374736, - "loss": 46.0, - "step": 46162 - }, - { - "epoch": 3.529483724219661, - "grad_norm": 0.004111082758754492, - "learning_rate": 0.0001999938553074781, - "loss": 46.0, - "step": 46163 - }, - { - "epoch": 3.529560181203051, - "grad_norm": 0.004131131339818239, - "learning_rate": 0.00019999385504120305, - "loss": 46.0, - "step": 46164 - }, - { - "epoch": 3.5296366381864406, - "grad_norm": 0.0007864825893193483, - "learning_rate": 0.00019999385477492224, - "loss": 46.0, - "step": 46165 - }, - { - "epoch": 3.5297130951698303, - "grad_norm": 0.004420701414346695, - "learning_rate": 0.00019999385450863562, - "loss": 46.0, - "step": 46166 - }, - { - "epoch": 3.5297895521532197, - "grad_norm": 0.001161379972472787, - "learning_rate": 0.00019999385424234327, - "loss": 46.0, - "step": 46167 - }, - { - "epoch": 3.5298660091366094, - "grad_norm": 0.0022415905259549618, - "learning_rate": 0.00019999385397604516, - "loss": 46.0, - "step": 46168 - }, - { - "epoch": 3.529942466119999, - "grad_norm": 0.0015534928534179926, - "learning_rate": 0.00019999385370974125, - "loss": 46.0, - "step": 46169 - }, - { - "epoch": 3.530018923103389, - "grad_norm": 0.0022089462727308273, - "learning_rate": 0.00019999385344343157, - "loss": 46.0, - "step": 46170 - }, - { - "epoch": 3.5300953800867787, - "grad_norm": 0.0011778940679505467, - "learning_rate": 0.00019999385317711618, - "loss": 46.0, - "step": 46171 - }, - { - "epoch": 3.5301718370701685, - "grad_norm": 0.002905918750911951, - "learning_rate": 0.00019999385291079495, - "loss": 46.0, - "step": 46172 - }, - { - "epoch": 3.5302482940535582, - "grad_norm": 0.002060182858258486, - "learning_rate": 0.00019999385264446798, - "loss": 46.0, - "step": 46173 - }, - { - "epoch": 3.530324751036948, - "grad_norm": 0.0024297856725752354, - "learning_rate": 0.00019999385237813523, - "loss": 46.0, - "step": 46174 - }, - { - "epoch": 3.5304012080203373, - "grad_norm": 0.0013838422019034624, - "learning_rate": 0.00019999385211179674, - "loss": 46.0, - "step": 46175 - }, - { - "epoch": 3.530477665003727, - "grad_norm": 0.0017051702598109841, - "learning_rate": 0.00019999385184545245, - "loss": 46.0, - "step": 46176 - }, - { - "epoch": 3.530554121987117, - "grad_norm": 0.0047286455519497395, - "learning_rate": 0.0001999938515791024, - "loss": 46.0, - "step": 46177 - }, - { - "epoch": 3.5306305789705066, - "grad_norm": 0.004432672169059515, - "learning_rate": 0.0001999938513127466, - "loss": 46.0, - "step": 46178 - }, - { - "epoch": 3.5307070359538963, - "grad_norm": 0.0015016455436125398, - "learning_rate": 0.000199993851046385, - "loss": 46.0, - "step": 46179 - }, - { - "epoch": 3.530783492937286, - "grad_norm": 0.003653473686426878, - "learning_rate": 0.00019999385078001765, - "loss": 46.0, - "step": 46180 - }, - { - "epoch": 3.530859949920676, - "grad_norm": 0.0014620987931266427, - "learning_rate": 0.00019999385051364455, - "loss": 46.0, - "step": 46181 - }, - { - "epoch": 3.5309364069040656, - "grad_norm": 0.0010354912374168634, - "learning_rate": 0.00019999385024726564, - "loss": 46.0, - "step": 46182 - }, - { - "epoch": 3.5310128638874554, - "grad_norm": 0.0009038523421622813, - "learning_rate": 0.000199993849980881, - "loss": 46.0, - "step": 46183 - }, - { - "epoch": 3.531089320870845, - "grad_norm": 0.002375928685069084, - "learning_rate": 0.00019999384971449056, - "loss": 46.0, - "step": 46184 - }, - { - "epoch": 3.531165777854235, - "grad_norm": 0.0009006283944472671, - "learning_rate": 0.00019999384944809436, - "loss": 46.0, - "step": 46185 - }, - { - "epoch": 3.5312422348376247, - "grad_norm": 0.0022582062520086765, - "learning_rate": 0.0001999938491816924, - "loss": 46.0, - "step": 46186 - }, - { - "epoch": 3.5313186918210144, - "grad_norm": 0.0005977017572149634, - "learning_rate": 0.00019999384891528465, - "loss": 46.0, - "step": 46187 - }, - { - "epoch": 3.531395148804404, - "grad_norm": 0.0015886574983596802, - "learning_rate": 0.00019999384864887113, - "loss": 46.0, - "step": 46188 - }, - { - "epoch": 3.5314716057877935, - "grad_norm": 0.0007058461778797209, - "learning_rate": 0.00019999384838245186, - "loss": 46.0, - "step": 46189 - }, - { - "epoch": 3.5315480627711833, - "grad_norm": 0.002441684016957879, - "learning_rate": 0.00019999384811602682, - "loss": 46.0, - "step": 46190 - }, - { - "epoch": 3.531624519754573, - "grad_norm": 0.004254346247762442, - "learning_rate": 0.000199993847849596, - "loss": 46.0, - "step": 46191 - }, - { - "epoch": 3.531700976737963, - "grad_norm": 0.002071988768875599, - "learning_rate": 0.00019999384758315943, - "loss": 46.0, - "step": 46192 - }, - { - "epoch": 3.5317774337213526, - "grad_norm": 0.00489824777469039, - "learning_rate": 0.00019999384731671707, - "loss": 46.0, - "step": 46193 - }, - { - "epoch": 3.5318538907047423, - "grad_norm": 0.0036819670349359512, - "learning_rate": 0.00019999384705026896, - "loss": 46.0, - "step": 46194 - }, - { - "epoch": 3.531930347688132, - "grad_norm": 0.004765456076711416, - "learning_rate": 0.00019999384678381506, - "loss": 46.0, - "step": 46195 - }, - { - "epoch": 3.532006804671522, - "grad_norm": 0.0012841366697102785, - "learning_rate": 0.0001999938465173554, - "loss": 46.0, - "step": 46196 - }, - { - "epoch": 3.532083261654911, - "grad_norm": 0.0026969967875629663, - "learning_rate": 0.00019999384625088998, - "loss": 46.0, - "step": 46197 - }, - { - "epoch": 3.532159718638301, - "grad_norm": 0.0022583266254514456, - "learning_rate": 0.00019999384598441878, - "loss": 46.0, - "step": 46198 - }, - { - "epoch": 3.5322361756216907, - "grad_norm": 0.003013828070834279, - "learning_rate": 0.0001999938457179418, - "loss": 46.0, - "step": 46199 - }, - { - "epoch": 3.5323126326050804, - "grad_norm": 0.0013328802306205034, - "learning_rate": 0.00019999384545145907, - "loss": 46.0, - "step": 46200 - }, - { - "epoch": 3.53238908958847, - "grad_norm": 0.003972713835537434, - "learning_rate": 0.00019999384518497055, - "loss": 46.0, - "step": 46201 - }, - { - "epoch": 3.53246554657186, - "grad_norm": 0.0019800085574388504, - "learning_rate": 0.0001999938449184763, - "loss": 46.0, - "step": 46202 - }, - { - "epoch": 3.5325420035552497, - "grad_norm": 0.006707543972879648, - "learning_rate": 0.00019999384465197624, - "loss": 46.0, - "step": 46203 - }, - { - "epoch": 3.5326184605386395, - "grad_norm": 0.002987027633935213, - "learning_rate": 0.00019999384438547043, - "loss": 46.0, - "step": 46204 - }, - { - "epoch": 3.5326949175220292, - "grad_norm": 0.004226853139698505, - "learning_rate": 0.00019999384411895885, - "loss": 46.0, - "step": 46205 - }, - { - "epoch": 3.532771374505419, - "grad_norm": 0.0023436364717781544, - "learning_rate": 0.0001999938438524415, - "loss": 46.0, - "step": 46206 - }, - { - "epoch": 3.5328478314888088, - "grad_norm": 0.0016728965565562248, - "learning_rate": 0.00019999384358591839, - "loss": 46.0, - "step": 46207 - }, - { - "epoch": 3.5329242884721985, - "grad_norm": 0.0017292009433731437, - "learning_rate": 0.00019999384331938948, - "loss": 46.0, - "step": 46208 - }, - { - "epoch": 3.5330007454555883, - "grad_norm": 0.001986191375181079, - "learning_rate": 0.00019999384305285483, - "loss": 46.0, - "step": 46209 - }, - { - "epoch": 3.5330772024389776, - "grad_norm": 0.004805953707545996, - "learning_rate": 0.00019999384278631438, - "loss": 46.0, - "step": 46210 - }, - { - "epoch": 3.5331536594223674, - "grad_norm": 0.0014367010444402695, - "learning_rate": 0.0001999938425197682, - "loss": 46.0, - "step": 46211 - }, - { - "epoch": 3.533230116405757, - "grad_norm": 0.0031450914684683084, - "learning_rate": 0.00019999384225321623, - "loss": 46.0, - "step": 46212 - }, - { - "epoch": 3.533306573389147, - "grad_norm": 0.002335998695343733, - "learning_rate": 0.00019999384198665852, - "loss": 46.0, - "step": 46213 - }, - { - "epoch": 3.5333830303725366, - "grad_norm": 0.0021044020541012287, - "learning_rate": 0.000199993841720095, - "loss": 46.0, - "step": 46214 - }, - { - "epoch": 3.5334594873559264, - "grad_norm": 0.003720491658896208, - "learning_rate": 0.00019999384145352573, - "loss": 46.0, - "step": 46215 - }, - { - "epoch": 3.533535944339316, - "grad_norm": 0.0017340414924547076, - "learning_rate": 0.00019999384118695067, - "loss": 46.0, - "step": 46216 - }, - { - "epoch": 3.533612401322706, - "grad_norm": 0.0043286606669425964, - "learning_rate": 0.00019999384092036989, - "loss": 46.0, - "step": 46217 - }, - { - "epoch": 3.5336888583060957, - "grad_norm": 0.0050333645194768906, - "learning_rate": 0.0001999938406537833, - "loss": 46.0, - "step": 46218 - }, - { - "epoch": 3.533765315289485, - "grad_norm": 0.002465359168127179, - "learning_rate": 0.00019999384038719094, - "loss": 46.0, - "step": 46219 - }, - { - "epoch": 3.5338417722728748, - "grad_norm": 0.00240574823692441, - "learning_rate": 0.00019999384012059284, - "loss": 46.0, - "step": 46220 - }, - { - "epoch": 3.5339182292562645, - "grad_norm": 0.002476591384038329, - "learning_rate": 0.00019999383985398893, - "loss": 46.0, - "step": 46221 - }, - { - "epoch": 3.5339946862396543, - "grad_norm": 0.0019660114776343107, - "learning_rate": 0.00019999383958737928, - "loss": 46.0, - "step": 46222 - }, - { - "epoch": 3.534071143223044, - "grad_norm": 0.0035554941277951, - "learning_rate": 0.00019999383932076389, - "loss": 46.0, - "step": 46223 - }, - { - "epoch": 3.534147600206434, - "grad_norm": 0.0008523643482476473, - "learning_rate": 0.00019999383905414266, - "loss": 46.0, - "step": 46224 - }, - { - "epoch": 3.5342240571898236, - "grad_norm": 0.003433752106502652, - "learning_rate": 0.00019999383878751572, - "loss": 46.0, - "step": 46225 - }, - { - "epoch": 3.5343005141732133, - "grad_norm": 0.0024735131300985813, - "learning_rate": 0.00019999383852088297, - "loss": 46.0, - "step": 46226 - }, - { - "epoch": 3.534376971156603, - "grad_norm": 0.0022912307176738977, - "learning_rate": 0.00019999383825424448, - "loss": 46.0, - "step": 46227 - }, - { - "epoch": 3.534453428139993, - "grad_norm": 0.0013587875291705132, - "learning_rate": 0.00019999383798760022, - "loss": 46.0, - "step": 46228 - }, - { - "epoch": 3.5345298851233826, - "grad_norm": 0.0017341406783089042, - "learning_rate": 0.00019999383772095018, - "loss": 46.0, - "step": 46229 - }, - { - "epoch": 3.5346063421067724, - "grad_norm": 0.004754199180752039, - "learning_rate": 0.00019999383745429437, - "loss": 46.0, - "step": 46230 - }, - { - "epoch": 3.534682799090162, - "grad_norm": 0.0019492115825414658, - "learning_rate": 0.00019999383718763278, - "loss": 46.0, - "step": 46231 - }, - { - "epoch": 3.5347592560735515, - "grad_norm": 0.002331928815692663, - "learning_rate": 0.00019999383692096543, - "loss": 46.0, - "step": 46232 - }, - { - "epoch": 3.534835713056941, - "grad_norm": 0.008620609529316425, - "learning_rate": 0.0001999938366542923, - "loss": 46.0, - "step": 46233 - }, - { - "epoch": 3.534912170040331, - "grad_norm": 0.0013084078673273325, - "learning_rate": 0.00019999383638761342, - "loss": 46.0, - "step": 46234 - }, - { - "epoch": 3.5349886270237207, - "grad_norm": 0.006739387754350901, - "learning_rate": 0.00019999383612092877, - "loss": 46.0, - "step": 46235 - }, - { - "epoch": 3.5350650840071105, - "grad_norm": 0.0020912489853799343, - "learning_rate": 0.00019999383585423834, - "loss": 46.0, - "step": 46236 - }, - { - "epoch": 3.5351415409905003, - "grad_norm": 0.002359029371291399, - "learning_rate": 0.00019999383558754215, - "loss": 46.0, - "step": 46237 - }, - { - "epoch": 3.53521799797389, - "grad_norm": 0.001721543725579977, - "learning_rate": 0.0001999938353208402, - "loss": 46.0, - "step": 46238 - }, - { - "epoch": 3.53529445495728, - "grad_norm": 0.0011985236778855324, - "learning_rate": 0.00019999383505413248, - "loss": 46.0, - "step": 46239 - }, - { - "epoch": 3.5353709119406695, - "grad_norm": 0.0017227210337296128, - "learning_rate": 0.00019999383478741897, - "loss": 46.0, - "step": 46240 - }, - { - "epoch": 3.535447368924059, - "grad_norm": 0.001360439695417881, - "learning_rate": 0.0001999938345206997, - "loss": 46.0, - "step": 46241 - }, - { - "epoch": 3.5355238259074486, - "grad_norm": 0.004013245925307274, - "learning_rate": 0.00019999383425397467, - "loss": 46.0, - "step": 46242 - }, - { - "epoch": 3.5356002828908384, - "grad_norm": 0.0016308400081470609, - "learning_rate": 0.00019999383398724386, - "loss": 46.0, - "step": 46243 - }, - { - "epoch": 3.535676739874228, - "grad_norm": 0.003431737655773759, - "learning_rate": 0.00019999383372050727, - "loss": 46.0, - "step": 46244 - }, - { - "epoch": 3.535753196857618, - "grad_norm": 0.006416003219783306, - "learning_rate": 0.00019999383345376491, - "loss": 46.0, - "step": 46245 - }, - { - "epoch": 3.5358296538410077, - "grad_norm": 0.003979357425123453, - "learning_rate": 0.0001999938331870168, - "loss": 46.0, - "step": 46246 - }, - { - "epoch": 3.5359061108243974, - "grad_norm": 0.006700616329908371, - "learning_rate": 0.00019999383292026293, - "loss": 46.0, - "step": 46247 - }, - { - "epoch": 3.535982567807787, - "grad_norm": 0.0017793797887861729, - "learning_rate": 0.00019999383265350326, - "loss": 46.0, - "step": 46248 - }, - { - "epoch": 3.536059024791177, - "grad_norm": 0.0019806320779025555, - "learning_rate": 0.00019999383238673786, - "loss": 46.0, - "step": 46249 - }, - { - "epoch": 3.5361354817745667, - "grad_norm": 0.0013911542482674122, - "learning_rate": 0.00019999383211996666, - "loss": 46.0, - "step": 46250 - }, - { - "epoch": 3.5362119387579565, - "grad_norm": 0.000999206560663879, - "learning_rate": 0.0001999938318531897, - "loss": 46.0, - "step": 46251 - }, - { - "epoch": 3.5362883957413462, - "grad_norm": 0.0016948198899626732, - "learning_rate": 0.00019999383158640698, - "loss": 46.0, - "step": 46252 - }, - { - "epoch": 3.536364852724736, - "grad_norm": 0.0024850894697010517, - "learning_rate": 0.00019999383131961848, - "loss": 46.0, - "step": 46253 - }, - { - "epoch": 3.5364413097081253, - "grad_norm": 0.0023038587532937527, - "learning_rate": 0.00019999383105282422, - "loss": 46.0, - "step": 46254 - }, - { - "epoch": 3.536517766691515, - "grad_norm": 0.005350546911358833, - "learning_rate": 0.00019999383078602416, - "loss": 46.0, - "step": 46255 - }, - { - "epoch": 3.536594223674905, - "grad_norm": 0.0021614579018205404, - "learning_rate": 0.00019999383051921837, - "loss": 46.0, - "step": 46256 - }, - { - "epoch": 3.5366706806582946, - "grad_norm": 0.004045534413307905, - "learning_rate": 0.0001999938302524068, - "loss": 46.0, - "step": 46257 - }, - { - "epoch": 3.5367471376416844, - "grad_norm": 0.0017706024227663875, - "learning_rate": 0.00019999382998558943, - "loss": 46.0, - "step": 46258 - }, - { - "epoch": 3.536823594625074, - "grad_norm": 0.004617693368345499, - "learning_rate": 0.00019999382971876633, - "loss": 46.0, - "step": 46259 - }, - { - "epoch": 3.536900051608464, - "grad_norm": 0.0007010715780779719, - "learning_rate": 0.00019999382945193745, - "loss": 46.0, - "step": 46260 - }, - { - "epoch": 3.5369765085918536, - "grad_norm": 0.0045739891938865185, - "learning_rate": 0.0001999938291851028, - "loss": 46.0, - "step": 46261 - }, - { - "epoch": 3.537052965575243, - "grad_norm": 0.00445365346968174, - "learning_rate": 0.0001999938289182624, - "loss": 46.0, - "step": 46262 - }, - { - "epoch": 3.5371294225586327, - "grad_norm": 0.0033870309125632048, - "learning_rate": 0.0001999938286514162, - "loss": 46.0, - "step": 46263 - }, - { - "epoch": 3.5372058795420225, - "grad_norm": 0.00199363986030221, - "learning_rate": 0.00019999382838456424, - "loss": 46.0, - "step": 46264 - }, - { - "epoch": 3.5372823365254122, - "grad_norm": 0.000944305385928601, - "learning_rate": 0.0001999938281177065, - "loss": 46.0, - "step": 46265 - }, - { - "epoch": 3.537358793508802, - "grad_norm": 0.0006186650134623051, - "learning_rate": 0.000199993827850843, - "loss": 46.0, - "step": 46266 - }, - { - "epoch": 3.5374352504921918, - "grad_norm": 0.0011179079301655293, - "learning_rate": 0.00019999382758397374, - "loss": 46.0, - "step": 46267 - }, - { - "epoch": 3.5375117074755815, - "grad_norm": 0.0026072447653859854, - "learning_rate": 0.0001999938273170987, - "loss": 46.0, - "step": 46268 - }, - { - "epoch": 3.5375881644589713, - "grad_norm": 0.001495429198257625, - "learning_rate": 0.0001999938270502179, - "loss": 46.0, - "step": 46269 - }, - { - "epoch": 3.537664621442361, - "grad_norm": 0.001951538142748177, - "learning_rate": 0.0001999938267833313, - "loss": 46.0, - "step": 46270 - }, - { - "epoch": 3.537741078425751, - "grad_norm": 0.004342267289757729, - "learning_rate": 0.00019999382651643898, - "loss": 46.0, - "step": 46271 - }, - { - "epoch": 3.5378175354091406, - "grad_norm": 0.0011411033337935805, - "learning_rate": 0.00019999382624954088, - "loss": 46.0, - "step": 46272 - }, - { - "epoch": 3.5378939923925303, - "grad_norm": 0.0011285870568826795, - "learning_rate": 0.000199993825982637, - "loss": 46.0, - "step": 46273 - }, - { - "epoch": 3.53797044937592, - "grad_norm": 0.0015957941068336368, - "learning_rate": 0.00019999382571572735, - "loss": 46.0, - "step": 46274 - }, - { - "epoch": 3.53804690635931, - "grad_norm": 0.005447502247989178, - "learning_rate": 0.00019999382544881193, - "loss": 46.0, - "step": 46275 - }, - { - "epoch": 3.538123363342699, - "grad_norm": 0.0006389226182363927, - "learning_rate": 0.00019999382518189073, - "loss": 46.0, - "step": 46276 - }, - { - "epoch": 3.538199820326089, - "grad_norm": 0.00413490878418088, - "learning_rate": 0.00019999382491496376, - "loss": 46.0, - "step": 46277 - }, - { - "epoch": 3.5382762773094787, - "grad_norm": 0.003709120210260153, - "learning_rate": 0.00019999382464803108, - "loss": 46.0, - "step": 46278 - }, - { - "epoch": 3.5383527342928685, - "grad_norm": 0.00188072572927922, - "learning_rate": 0.00019999382438109256, - "loss": 46.0, - "step": 46279 - }, - { - "epoch": 3.538429191276258, - "grad_norm": 0.0010070427088066936, - "learning_rate": 0.0001999938241141483, - "loss": 46.0, - "step": 46280 - }, - { - "epoch": 3.538505648259648, - "grad_norm": 0.0017372238216921687, - "learning_rate": 0.00019999382384719826, - "loss": 46.0, - "step": 46281 - }, - { - "epoch": 3.5385821052430377, - "grad_norm": 0.002527270233258605, - "learning_rate": 0.00019999382358024245, - "loss": 46.0, - "step": 46282 - }, - { - "epoch": 3.5386585622264275, - "grad_norm": 0.003549344837665558, - "learning_rate": 0.0001999938233132809, - "loss": 46.0, - "step": 46283 - }, - { - "epoch": 3.538735019209817, - "grad_norm": 0.0042600310407578945, - "learning_rate": 0.00019999382304631357, - "loss": 46.0, - "step": 46284 - }, - { - "epoch": 3.5388114761932066, - "grad_norm": 0.001320547889918089, - "learning_rate": 0.00019999382277934044, - "loss": 46.0, - "step": 46285 - }, - { - "epoch": 3.5388879331765963, - "grad_norm": 0.001385242328979075, - "learning_rate": 0.00019999382251236156, - "loss": 46.0, - "step": 46286 - }, - { - "epoch": 3.538964390159986, - "grad_norm": 0.003106738207861781, - "learning_rate": 0.0001999938222453769, - "loss": 46.0, - "step": 46287 - }, - { - "epoch": 3.539040847143376, - "grad_norm": 0.0009308186126872897, - "learning_rate": 0.0001999938219783865, - "loss": 46.0, - "step": 46288 - }, - { - "epoch": 3.5391173041267656, - "grad_norm": 0.005181416403502226, - "learning_rate": 0.00019999382171139032, - "loss": 46.0, - "step": 46289 - }, - { - "epoch": 3.5391937611101554, - "grad_norm": 0.0012717050267383456, - "learning_rate": 0.00019999382144438835, - "loss": 46.0, - "step": 46290 - }, - { - "epoch": 3.539270218093545, - "grad_norm": 0.006178069394081831, - "learning_rate": 0.00019999382117738063, - "loss": 46.0, - "step": 46291 - }, - { - "epoch": 3.539346675076935, - "grad_norm": 0.0018972111865878105, - "learning_rate": 0.00019999382091036714, - "loss": 46.0, - "step": 46292 - }, - { - "epoch": 3.5394231320603247, - "grad_norm": 0.002399832010269165, - "learning_rate": 0.00019999382064334785, - "loss": 46.0, - "step": 46293 - }, - { - "epoch": 3.5394995890437144, - "grad_norm": 0.002942180959507823, - "learning_rate": 0.00019999382037632284, - "loss": 46.0, - "step": 46294 - }, - { - "epoch": 3.539576046027104, - "grad_norm": 0.003683202899992466, - "learning_rate": 0.00019999382010929204, - "loss": 46.0, - "step": 46295 - }, - { - "epoch": 3.539652503010494, - "grad_norm": 0.002486092271283269, - "learning_rate": 0.00019999381984225548, - "loss": 46.0, - "step": 46296 - }, - { - "epoch": 3.5397289599938837, - "grad_norm": 0.004011643119156361, - "learning_rate": 0.00019999381957521312, - "loss": 46.0, - "step": 46297 - }, - { - "epoch": 3.539805416977273, - "grad_norm": 0.0014857224887236953, - "learning_rate": 0.00019999381930816502, - "loss": 46.0, - "step": 46298 - }, - { - "epoch": 3.539881873960663, - "grad_norm": 0.0021912052761763334, - "learning_rate": 0.00019999381904111117, - "loss": 46.0, - "step": 46299 - }, - { - "epoch": 3.5399583309440525, - "grad_norm": 0.001422680332325399, - "learning_rate": 0.0001999938187740515, - "loss": 46.0, - "step": 46300 - }, - { - "epoch": 3.5400347879274423, - "grad_norm": 0.0029559291433542967, - "learning_rate": 0.0001999938185069861, - "loss": 46.0, - "step": 46301 - }, - { - "epoch": 3.540111244910832, - "grad_norm": 0.0008832392049953341, - "learning_rate": 0.0001999938182399149, - "loss": 46.0, - "step": 46302 - }, - { - "epoch": 3.540187701894222, - "grad_norm": 0.002093641087412834, - "learning_rate": 0.00019999381797283796, - "loss": 46.0, - "step": 46303 - }, - { - "epoch": 3.5402641588776116, - "grad_norm": 0.009877314791083336, - "learning_rate": 0.00019999381770575522, - "loss": 46.0, - "step": 46304 - }, - { - "epoch": 3.5403406158610013, - "grad_norm": 0.0014203209429979324, - "learning_rate": 0.00019999381743866673, - "loss": 46.0, - "step": 46305 - }, - { - "epoch": 3.5404170728443907, - "grad_norm": 0.004516810178756714, - "learning_rate": 0.0001999938171715725, - "loss": 46.0, - "step": 46306 - }, - { - "epoch": 3.5404935298277804, - "grad_norm": 0.0025179197546094656, - "learning_rate": 0.00019999381690447246, - "loss": 46.0, - "step": 46307 - }, - { - "epoch": 3.54056998681117, - "grad_norm": 0.0025336826220154762, - "learning_rate": 0.00019999381663736665, - "loss": 46.0, - "step": 46308 - }, - { - "epoch": 3.54064644379456, - "grad_norm": 0.0015534859849140048, - "learning_rate": 0.0001999938163702551, - "loss": 46.0, - "step": 46309 - }, - { - "epoch": 3.5407229007779497, - "grad_norm": 0.007230198476463556, - "learning_rate": 0.00019999381610313774, - "loss": 46.0, - "step": 46310 - }, - { - "epoch": 3.5407993577613395, - "grad_norm": 0.0021804249845445156, - "learning_rate": 0.00019999381583601464, - "loss": 46.0, - "step": 46311 - }, - { - "epoch": 3.5408758147447292, - "grad_norm": 0.0016025755321606994, - "learning_rate": 0.00019999381556888576, - "loss": 46.0, - "step": 46312 - }, - { - "epoch": 3.540952271728119, - "grad_norm": 0.002811283804476261, - "learning_rate": 0.00019999381530175111, - "loss": 46.0, - "step": 46313 - }, - { - "epoch": 3.5410287287115088, - "grad_norm": 0.004209770355373621, - "learning_rate": 0.0001999938150346107, - "loss": 46.0, - "step": 46314 - }, - { - "epoch": 3.5411051856948985, - "grad_norm": 0.0007851845002733171, - "learning_rate": 0.00019999381476746452, - "loss": 46.0, - "step": 46315 - }, - { - "epoch": 3.5411816426782883, - "grad_norm": 0.001168499351479113, - "learning_rate": 0.00019999381450031258, - "loss": 46.0, - "step": 46316 - }, - { - "epoch": 3.541258099661678, - "grad_norm": 0.002150749322026968, - "learning_rate": 0.00019999381423315487, - "loss": 46.0, - "step": 46317 - }, - { - "epoch": 3.541334556645068, - "grad_norm": 0.0014501098776236176, - "learning_rate": 0.00019999381396599138, - "loss": 46.0, - "step": 46318 - }, - { - "epoch": 3.5414110136284576, - "grad_norm": 0.0032885984983295202, - "learning_rate": 0.00019999381369882212, - "loss": 46.0, - "step": 46319 - }, - { - "epoch": 3.541487470611847, - "grad_norm": 0.0011689552338793874, - "learning_rate": 0.00019999381343164708, - "loss": 46.0, - "step": 46320 - }, - { - "epoch": 3.5415639275952366, - "grad_norm": 0.0020286189392209053, - "learning_rate": 0.00019999381316446627, - "loss": 46.0, - "step": 46321 - }, - { - "epoch": 3.5416403845786264, - "grad_norm": 0.002792837331071496, - "learning_rate": 0.00019999381289727972, - "loss": 46.0, - "step": 46322 - }, - { - "epoch": 3.541716841562016, - "grad_norm": 0.0013269322225823998, - "learning_rate": 0.00019999381263008736, - "loss": 46.0, - "step": 46323 - }, - { - "epoch": 3.541793298545406, - "grad_norm": 0.002234497107565403, - "learning_rate": 0.00019999381236288929, - "loss": 46.0, - "step": 46324 - }, - { - "epoch": 3.5418697555287957, - "grad_norm": 0.002600625157356262, - "learning_rate": 0.0001999938120956854, - "loss": 46.0, - "step": 46325 - }, - { - "epoch": 3.5419462125121854, - "grad_norm": 0.0022841726895421743, - "learning_rate": 0.00019999381182847576, - "loss": 46.0, - "step": 46326 - }, - { - "epoch": 3.542022669495575, - "grad_norm": 0.002187923062592745, - "learning_rate": 0.00019999381156126034, - "loss": 46.0, - "step": 46327 - }, - { - "epoch": 3.5420991264789645, - "grad_norm": 0.003986688796430826, - "learning_rate": 0.00019999381129403915, - "loss": 46.0, - "step": 46328 - }, - { - "epoch": 3.5421755834623543, - "grad_norm": 0.003036154666915536, - "learning_rate": 0.0001999938110268122, - "loss": 46.0, - "step": 46329 - }, - { - "epoch": 3.542252040445744, - "grad_norm": 0.0014259350718930364, - "learning_rate": 0.00019999381075957952, - "loss": 46.0, - "step": 46330 - }, - { - "epoch": 3.542328497429134, - "grad_norm": 0.0014589549973607063, - "learning_rate": 0.000199993810492341, - "loss": 46.0, - "step": 46331 - }, - { - "epoch": 3.5424049544125236, - "grad_norm": 0.0038053635507822037, - "learning_rate": 0.00019999381022509677, - "loss": 46.0, - "step": 46332 - }, - { - "epoch": 3.5424814113959133, - "grad_norm": 0.0016306041507050395, - "learning_rate": 0.0001999938099578467, - "loss": 46.0, - "step": 46333 - }, - { - "epoch": 3.542557868379303, - "grad_norm": 0.003216137643903494, - "learning_rate": 0.00019999380969059095, - "loss": 46.0, - "step": 46334 - }, - { - "epoch": 3.542634325362693, - "grad_norm": 0.0031793902162462473, - "learning_rate": 0.00019999380942332937, - "loss": 46.0, - "step": 46335 - }, - { - "epoch": 3.5427107823460826, - "grad_norm": 0.0018321546958759427, - "learning_rate": 0.00019999380915606204, - "loss": 46.0, - "step": 46336 - }, - { - "epoch": 3.5427872393294724, - "grad_norm": 0.0014027178985998034, - "learning_rate": 0.00019999380888878894, - "loss": 46.0, - "step": 46337 - }, - { - "epoch": 3.542863696312862, - "grad_norm": 0.0014313403517007828, - "learning_rate": 0.00019999380862151006, - "loss": 46.0, - "step": 46338 - }, - { - "epoch": 3.542940153296252, - "grad_norm": 0.0010580315720289946, - "learning_rate": 0.00019999380835422544, - "loss": 46.0, - "step": 46339 - }, - { - "epoch": 3.5430166102796417, - "grad_norm": 0.004340584389865398, - "learning_rate": 0.00019999380808693502, - "loss": 46.0, - "step": 46340 - }, - { - "epoch": 3.543093067263031, - "grad_norm": 0.002075295429676771, - "learning_rate": 0.00019999380781963883, - "loss": 46.0, - "step": 46341 - }, - { - "epoch": 3.5431695242464207, - "grad_norm": 0.003148572286590934, - "learning_rate": 0.00019999380755233688, - "loss": 46.0, - "step": 46342 - }, - { - "epoch": 3.5432459812298105, - "grad_norm": 0.0010792420944198966, - "learning_rate": 0.00019999380728502917, - "loss": 46.0, - "step": 46343 - }, - { - "epoch": 3.5433224382132003, - "grad_norm": 0.0020565842278301716, - "learning_rate": 0.00019999380701771568, - "loss": 46.0, - "step": 46344 - }, - { - "epoch": 3.54339889519659, - "grad_norm": 0.0014495979994535446, - "learning_rate": 0.00019999380675039642, - "loss": 46.0, - "step": 46345 - }, - { - "epoch": 3.5434753521799798, - "grad_norm": 0.003187112044543028, - "learning_rate": 0.00019999380648307139, - "loss": 46.0, - "step": 46346 - }, - { - "epoch": 3.5435518091633695, - "grad_norm": 0.0022386698983609676, - "learning_rate": 0.00019999380621574063, - "loss": 46.0, - "step": 46347 - }, - { - "epoch": 3.5436282661467593, - "grad_norm": 0.0033604076597839594, - "learning_rate": 0.00019999380594840405, - "loss": 46.0, - "step": 46348 - }, - { - "epoch": 3.543704723130149, - "grad_norm": 0.0009237302583642304, - "learning_rate": 0.0001999938056810617, - "loss": 46.0, - "step": 46349 - }, - { - "epoch": 3.5437811801135384, - "grad_norm": 0.005306551698595285, - "learning_rate": 0.0001999938054137136, - "loss": 46.0, - "step": 46350 - }, - { - "epoch": 3.543857637096928, - "grad_norm": 0.0009216921171173453, - "learning_rate": 0.00019999380514635972, - "loss": 46.0, - "step": 46351 - }, - { - "epoch": 3.543934094080318, - "grad_norm": 0.001888197148218751, - "learning_rate": 0.0001999938048790001, - "loss": 46.0, - "step": 46352 - }, - { - "epoch": 3.5440105510637077, - "grad_norm": 0.0014829114079475403, - "learning_rate": 0.0001999938046116347, - "loss": 46.0, - "step": 46353 - }, - { - "epoch": 3.5440870080470974, - "grad_norm": 0.0017742238705977798, - "learning_rate": 0.0001999938043442635, - "loss": 46.0, - "step": 46354 - }, - { - "epoch": 3.544163465030487, - "grad_norm": 0.002818820532411337, - "learning_rate": 0.00019999380407688657, - "loss": 46.0, - "step": 46355 - }, - { - "epoch": 3.544239922013877, - "grad_norm": 0.001856859540566802, - "learning_rate": 0.00019999380380950386, - "loss": 46.0, - "step": 46356 - }, - { - "epoch": 3.5443163789972667, - "grad_norm": 0.0019075623713433743, - "learning_rate": 0.00019999380354211537, - "loss": 46.0, - "step": 46357 - }, - { - "epoch": 3.5443928359806565, - "grad_norm": 0.0010480245109647512, - "learning_rate": 0.0001999938032747211, - "loss": 46.0, - "step": 46358 - }, - { - "epoch": 3.5444692929640462, - "grad_norm": 0.0008112953510135412, - "learning_rate": 0.0001999938030073211, - "loss": 46.0, - "step": 46359 - }, - { - "epoch": 3.544545749947436, - "grad_norm": 0.004541801288723946, - "learning_rate": 0.0001999938027399153, - "loss": 46.0, - "step": 46360 - }, - { - "epoch": 3.5446222069308257, - "grad_norm": 0.007674762979149818, - "learning_rate": 0.00019999380247250374, - "loss": 46.0, - "step": 46361 - }, - { - "epoch": 3.5446986639142155, - "grad_norm": 0.0021105597261339426, - "learning_rate": 0.0001999938022050864, - "loss": 46.0, - "step": 46362 - }, - { - "epoch": 3.544775120897605, - "grad_norm": 0.001374351093545556, - "learning_rate": 0.0001999938019376633, - "loss": 46.0, - "step": 46363 - }, - { - "epoch": 3.5448515778809946, - "grad_norm": 0.007965194061398506, - "learning_rate": 0.00019999380167023444, - "loss": 46.0, - "step": 46364 - }, - { - "epoch": 3.5449280348643843, - "grad_norm": 0.005757701583206654, - "learning_rate": 0.00019999380140279982, - "loss": 46.0, - "step": 46365 - }, - { - "epoch": 3.545004491847774, - "grad_norm": 0.0011785657843574882, - "learning_rate": 0.0001999938011353594, - "loss": 46.0, - "step": 46366 - }, - { - "epoch": 3.545080948831164, - "grad_norm": 0.003242665668949485, - "learning_rate": 0.00019999380086791323, - "loss": 46.0, - "step": 46367 - }, - { - "epoch": 3.5451574058145536, - "grad_norm": 0.0021293293684720993, - "learning_rate": 0.00019999380060046126, - "loss": 46.0, - "step": 46368 - }, - { - "epoch": 3.5452338627979434, - "grad_norm": 0.00043841879232786596, - "learning_rate": 0.00019999380033300357, - "loss": 46.0, - "step": 46369 - }, - { - "epoch": 3.545310319781333, - "grad_norm": 0.001587691600434482, - "learning_rate": 0.00019999380006554009, - "loss": 46.0, - "step": 46370 - }, - { - "epoch": 3.545386776764723, - "grad_norm": 0.007348526269197464, - "learning_rate": 0.00019999379979807085, - "loss": 46.0, - "step": 46371 - }, - { - "epoch": 3.5454632337481122, - "grad_norm": 0.0038146430160850286, - "learning_rate": 0.00019999379953059582, - "loss": 46.0, - "step": 46372 - }, - { - "epoch": 3.545539690731502, - "grad_norm": 0.0018491229275241494, - "learning_rate": 0.00019999379926311504, - "loss": 46.0, - "step": 46373 - }, - { - "epoch": 3.5456161477148918, - "grad_norm": 0.006273452192544937, - "learning_rate": 0.00019999379899562846, - "loss": 46.0, - "step": 46374 - }, - { - "epoch": 3.5456926046982815, - "grad_norm": 0.000767296354752034, - "learning_rate": 0.00019999379872813613, - "loss": 46.0, - "step": 46375 - }, - { - "epoch": 3.5457690616816713, - "grad_norm": 0.0035523734986782074, - "learning_rate": 0.00019999379846063803, - "loss": 46.0, - "step": 46376 - }, - { - "epoch": 3.545845518665061, - "grad_norm": 0.007249224465340376, - "learning_rate": 0.00019999379819313418, - "loss": 46.0, - "step": 46377 - }, - { - "epoch": 3.545921975648451, - "grad_norm": 0.002585752634331584, - "learning_rate": 0.00019999379792562453, - "loss": 46.0, - "step": 46378 - }, - { - "epoch": 3.5459984326318406, - "grad_norm": 0.0026606840547174215, - "learning_rate": 0.00019999379765810914, - "loss": 46.0, - "step": 46379 - }, - { - "epoch": 3.5460748896152303, - "grad_norm": 0.007803153246641159, - "learning_rate": 0.00019999379739058797, - "loss": 46.0, - "step": 46380 - }, - { - "epoch": 3.54615134659862, - "grad_norm": 0.0020089249592274427, - "learning_rate": 0.000199993797123061, - "loss": 46.0, - "step": 46381 - }, - { - "epoch": 3.54622780358201, - "grad_norm": 0.005319799762219191, - "learning_rate": 0.0001999937968555283, - "loss": 46.0, - "step": 46382 - }, - { - "epoch": 3.5463042605653996, - "grad_norm": 0.0013772428501397371, - "learning_rate": 0.00019999379658798983, - "loss": 46.0, - "step": 46383 - }, - { - "epoch": 3.5463807175487894, - "grad_norm": 0.004116037394851446, - "learning_rate": 0.00019999379632044557, - "loss": 46.0, - "step": 46384 - }, - { - "epoch": 3.5464571745321787, - "grad_norm": 0.0018922293093055487, - "learning_rate": 0.00019999379605289557, - "loss": 46.0, - "step": 46385 - }, - { - "epoch": 3.5465336315155684, - "grad_norm": 0.0019334412645548582, - "learning_rate": 0.00019999379578533976, - "loss": 46.0, - "step": 46386 - }, - { - "epoch": 3.546610088498958, - "grad_norm": 0.0027468183543533087, - "learning_rate": 0.0001999937955177782, - "loss": 46.0, - "step": 46387 - }, - { - "epoch": 3.546686545482348, - "grad_norm": 0.00670226477086544, - "learning_rate": 0.00019999379525021088, - "loss": 46.0, - "step": 46388 - }, - { - "epoch": 3.5467630024657377, - "grad_norm": 0.0012792288325726986, - "learning_rate": 0.00019999379498263778, - "loss": 46.0, - "step": 46389 - }, - { - "epoch": 3.5468394594491275, - "grad_norm": 0.0031636471394449472, - "learning_rate": 0.00019999379471505893, - "loss": 46.0, - "step": 46390 - }, - { - "epoch": 3.5469159164325172, - "grad_norm": 0.011945711448788643, - "learning_rate": 0.00019999379444747429, - "loss": 46.0, - "step": 46391 - }, - { - "epoch": 3.546992373415907, - "grad_norm": 0.003536128904670477, - "learning_rate": 0.0001999937941798839, - "loss": 46.0, - "step": 46392 - }, - { - "epoch": 3.5470688303992963, - "grad_norm": 0.002750889863818884, - "learning_rate": 0.0001999937939122877, - "loss": 46.0, - "step": 46393 - }, - { - "epoch": 3.547145287382686, - "grad_norm": 0.0028589051216840744, - "learning_rate": 0.00019999379364468576, - "loss": 46.0, - "step": 46394 - }, - { - "epoch": 3.547221744366076, - "grad_norm": 0.001291858498007059, - "learning_rate": 0.00019999379337707805, - "loss": 46.0, - "step": 46395 - }, - { - "epoch": 3.5472982013494656, - "grad_norm": 0.0016246800078079104, - "learning_rate": 0.00019999379310946459, - "loss": 46.0, - "step": 46396 - }, - { - "epoch": 3.5473746583328554, - "grad_norm": 0.0025478247553110123, - "learning_rate": 0.00019999379284184535, - "loss": 46.0, - "step": 46397 - }, - { - "epoch": 3.547451115316245, - "grad_norm": 0.001480619772337377, - "learning_rate": 0.00019999379257422032, - "loss": 46.0, - "step": 46398 - }, - { - "epoch": 3.547527572299635, - "grad_norm": 0.002828828524798155, - "learning_rate": 0.0001999937923065895, - "loss": 46.0, - "step": 46399 - }, - { - "epoch": 3.5476040292830247, - "grad_norm": 0.002600104780867696, - "learning_rate": 0.00019999379203895296, - "loss": 46.0, - "step": 46400 - }, - { - "epoch": 3.5476804862664144, - "grad_norm": 0.002453841036185622, - "learning_rate": 0.00019999379177131066, - "loss": 46.0, - "step": 46401 - }, - { - "epoch": 3.547756943249804, - "grad_norm": 0.0021061059087514877, - "learning_rate": 0.00019999379150366256, - "loss": 46.0, - "step": 46402 - }, - { - "epoch": 3.547833400233194, - "grad_norm": 0.0022286151070147753, - "learning_rate": 0.0001999937912360087, - "loss": 46.0, - "step": 46403 - }, - { - "epoch": 3.5479098572165837, - "grad_norm": 0.001047341269440949, - "learning_rate": 0.00019999379096834904, - "loss": 46.0, - "step": 46404 - }, - { - "epoch": 3.5479863141999735, - "grad_norm": 0.0012548328377306461, - "learning_rate": 0.00019999379070068365, - "loss": 46.0, - "step": 46405 - }, - { - "epoch": 3.548062771183363, - "grad_norm": 0.0012487383792176843, - "learning_rate": 0.00019999379043301248, - "loss": 46.0, - "step": 46406 - }, - { - "epoch": 3.5481392281667525, - "grad_norm": 0.0012695499463006854, - "learning_rate": 0.00019999379016533554, - "loss": 46.0, - "step": 46407 - }, - { - "epoch": 3.5482156851501423, - "grad_norm": 0.0021747243590652943, - "learning_rate": 0.00019999378989765286, - "loss": 46.0, - "step": 46408 - }, - { - "epoch": 3.548292142133532, - "grad_norm": 0.003429760690778494, - "learning_rate": 0.00019999378962996437, - "loss": 46.0, - "step": 46409 - }, - { - "epoch": 3.548368599116922, - "grad_norm": 0.005224948283284903, - "learning_rate": 0.0001999937893622701, - "loss": 46.0, - "step": 46410 - }, - { - "epoch": 3.5484450561003116, - "grad_norm": 0.002717582043260336, - "learning_rate": 0.0001999937890945701, - "loss": 46.0, - "step": 46411 - }, - { - "epoch": 3.5485215130837013, - "grad_norm": 0.0017967562889680266, - "learning_rate": 0.00019999378882686432, - "loss": 46.0, - "step": 46412 - }, - { - "epoch": 3.548597970067091, - "grad_norm": 0.0024169525131583214, - "learning_rate": 0.00019999378855915277, - "loss": 46.0, - "step": 46413 - }, - { - "epoch": 3.548674427050481, - "grad_norm": 0.0017868360737338662, - "learning_rate": 0.00019999378829143542, - "loss": 46.0, - "step": 46414 - }, - { - "epoch": 3.54875088403387, - "grad_norm": 0.0026598009280860424, - "learning_rate": 0.00019999378802371235, - "loss": 46.0, - "step": 46415 - }, - { - "epoch": 3.54882734101726, - "grad_norm": 0.004194796085357666, - "learning_rate": 0.00019999378775598347, - "loss": 46.0, - "step": 46416 - }, - { - "epoch": 3.5489037980006497, - "grad_norm": 0.001359489280730486, - "learning_rate": 0.00019999378748824885, - "loss": 46.0, - "step": 46417 - }, - { - "epoch": 3.5489802549840395, - "grad_norm": 0.001906198333017528, - "learning_rate": 0.00019999378722050846, - "loss": 46.0, - "step": 46418 - }, - { - "epoch": 3.5490567119674292, - "grad_norm": 0.005178088787943125, - "learning_rate": 0.0001999937869527623, - "loss": 46.0, - "step": 46419 - }, - { - "epoch": 3.549133168950819, - "grad_norm": 0.0015865389723330736, - "learning_rate": 0.00019999378668501036, - "loss": 46.0, - "step": 46420 - }, - { - "epoch": 3.5492096259342087, - "grad_norm": 0.006840957794338465, - "learning_rate": 0.00019999378641725264, - "loss": 46.0, - "step": 46421 - }, - { - "epoch": 3.5492860829175985, - "grad_norm": 0.0012211559806019068, - "learning_rate": 0.00019999378614948916, - "loss": 46.0, - "step": 46422 - }, - { - "epoch": 3.5493625399009883, - "grad_norm": 0.0032057706266641617, - "learning_rate": 0.00019999378588171992, - "loss": 46.0, - "step": 46423 - }, - { - "epoch": 3.549438996884378, - "grad_norm": 0.0012986625079065561, - "learning_rate": 0.00019999378561394492, - "loss": 46.0, - "step": 46424 - }, - { - "epoch": 3.549515453867768, - "grad_norm": 0.0052831959910690784, - "learning_rate": 0.0001999937853461641, - "loss": 46.0, - "step": 46425 - }, - { - "epoch": 3.5495919108511576, - "grad_norm": 0.003423130139708519, - "learning_rate": 0.00019999378507837756, - "loss": 46.0, - "step": 46426 - }, - { - "epoch": 3.5496683678345473, - "grad_norm": 0.000631034083198756, - "learning_rate": 0.00019999378481058523, - "loss": 46.0, - "step": 46427 - }, - { - "epoch": 3.549744824817937, - "grad_norm": 0.001113277510739863, - "learning_rate": 0.00019999378454278716, - "loss": 46.0, - "step": 46428 - }, - { - "epoch": 3.5498212818013264, - "grad_norm": 0.0047985706478357315, - "learning_rate": 0.0001999937842749833, - "loss": 46.0, - "step": 46429 - }, - { - "epoch": 3.549897738784716, - "grad_norm": 0.002293867291882634, - "learning_rate": 0.00019999378400717367, - "loss": 46.0, - "step": 46430 - }, - { - "epoch": 3.549974195768106, - "grad_norm": 0.001945320051163435, - "learning_rate": 0.00019999378373935828, - "loss": 46.0, - "step": 46431 - }, - { - "epoch": 3.5500506527514957, - "grad_norm": 0.0036559845320880413, - "learning_rate": 0.0001999937834715371, - "loss": 46.0, - "step": 46432 - }, - { - "epoch": 3.5501271097348854, - "grad_norm": 0.002458605682477355, - "learning_rate": 0.00019999378320371017, - "loss": 46.0, - "step": 46433 - }, - { - "epoch": 3.550203566718275, - "grad_norm": 0.0018103031907230616, - "learning_rate": 0.00019999378293587746, - "loss": 46.0, - "step": 46434 - }, - { - "epoch": 3.550280023701665, - "grad_norm": 0.0023678455036133528, - "learning_rate": 0.000199993782668039, - "loss": 46.0, - "step": 46435 - }, - { - "epoch": 3.5503564806850547, - "grad_norm": 0.0036589547526091337, - "learning_rate": 0.00019999378240019474, - "loss": 46.0, - "step": 46436 - }, - { - "epoch": 3.550432937668444, - "grad_norm": 0.002018094528466463, - "learning_rate": 0.00019999378213234474, - "loss": 46.0, - "step": 46437 - }, - { - "epoch": 3.550509394651834, - "grad_norm": 0.0018803341081365943, - "learning_rate": 0.00019999378186448896, - "loss": 46.0, - "step": 46438 - }, - { - "epoch": 3.5505858516352236, - "grad_norm": 0.0033969832584261894, - "learning_rate": 0.0001999937815966274, - "loss": 46.0, - "step": 46439 - }, - { - "epoch": 3.5506623086186133, - "grad_norm": 0.0015325553249567747, - "learning_rate": 0.0001999937813287601, - "loss": 46.0, - "step": 46440 - }, - { - "epoch": 3.550738765602003, - "grad_norm": 0.001463297288864851, - "learning_rate": 0.000199993781060887, - "loss": 46.0, - "step": 46441 - }, - { - "epoch": 3.550815222585393, - "grad_norm": 0.0024571786634624004, - "learning_rate": 0.00019999378079300814, - "loss": 46.0, - "step": 46442 - }, - { - "epoch": 3.5508916795687826, - "grad_norm": 0.0010557464556768537, - "learning_rate": 0.00019999378052512352, - "loss": 46.0, - "step": 46443 - }, - { - "epoch": 3.5509681365521724, - "grad_norm": 0.002927768277004361, - "learning_rate": 0.00019999378025723313, - "loss": 46.0, - "step": 46444 - }, - { - "epoch": 3.551044593535562, - "grad_norm": 0.006967509631067514, - "learning_rate": 0.00019999377998933696, - "loss": 46.0, - "step": 46445 - }, - { - "epoch": 3.551121050518952, - "grad_norm": 0.000863505934830755, - "learning_rate": 0.00019999377972143502, - "loss": 46.0, - "step": 46446 - }, - { - "epoch": 3.5511975075023416, - "grad_norm": 0.0027712322771549225, - "learning_rate": 0.00019999377945352734, - "loss": 46.0, - "step": 46447 - }, - { - "epoch": 3.5512739644857314, - "grad_norm": 0.0014371549477800727, - "learning_rate": 0.00019999377918561385, - "loss": 46.0, - "step": 46448 - }, - { - "epoch": 3.551350421469121, - "grad_norm": 0.0005209771334193647, - "learning_rate": 0.00019999377891769462, - "loss": 46.0, - "step": 46449 - }, - { - "epoch": 3.551426878452511, - "grad_norm": 0.001574140740558505, - "learning_rate": 0.00019999377864976962, - "loss": 46.0, - "step": 46450 - }, - { - "epoch": 3.5515033354359002, - "grad_norm": 0.0065953535959124565, - "learning_rate": 0.0001999937783818388, - "loss": 46.0, - "step": 46451 - }, - { - "epoch": 3.55157979241929, - "grad_norm": 0.0005561783909797668, - "learning_rate": 0.00019999377811390226, - "loss": 46.0, - "step": 46452 - }, - { - "epoch": 3.5516562494026798, - "grad_norm": 0.0029747439548373222, - "learning_rate": 0.00019999377784595996, - "loss": 46.0, - "step": 46453 - }, - { - "epoch": 3.5517327063860695, - "grad_norm": 0.000876390200573951, - "learning_rate": 0.00019999377757801186, - "loss": 46.0, - "step": 46454 - }, - { - "epoch": 3.5518091633694593, - "grad_norm": 0.0014996856916695833, - "learning_rate": 0.00019999377731005802, - "loss": 46.0, - "step": 46455 - }, - { - "epoch": 3.551885620352849, - "grad_norm": 0.004231185652315617, - "learning_rate": 0.0001999937770420984, - "loss": 46.0, - "step": 46456 - }, - { - "epoch": 3.551962077336239, - "grad_norm": 0.001975968712940812, - "learning_rate": 0.00019999377677413298, - "loss": 46.0, - "step": 46457 - }, - { - "epoch": 3.5520385343196286, - "grad_norm": 0.002437055576592684, - "learning_rate": 0.00019999377650616181, - "loss": 46.0, - "step": 46458 - }, - { - "epoch": 3.552114991303018, - "grad_norm": 0.0014956494560465217, - "learning_rate": 0.0001999937762381849, - "loss": 46.0, - "step": 46459 - }, - { - "epoch": 3.5521914482864076, - "grad_norm": 0.0008825063123367727, - "learning_rate": 0.0001999937759702022, - "loss": 46.0, - "step": 46460 - }, - { - "epoch": 3.5522679052697974, - "grad_norm": 0.005186131224036217, - "learning_rate": 0.0001999937757022137, - "loss": 46.0, - "step": 46461 - }, - { - "epoch": 3.552344362253187, - "grad_norm": 0.004838583059608936, - "learning_rate": 0.00019999377543421947, - "loss": 46.0, - "step": 46462 - }, - { - "epoch": 3.552420819236577, - "grad_norm": 0.0018970175879076123, - "learning_rate": 0.00019999377516621947, - "loss": 46.0, - "step": 46463 - }, - { - "epoch": 3.5524972762199667, - "grad_norm": 0.0008161778678186238, - "learning_rate": 0.0001999937748982137, - "loss": 46.0, - "step": 46464 - }, - { - "epoch": 3.5525737332033565, - "grad_norm": 0.0006800947012379766, - "learning_rate": 0.00019999377463020214, - "loss": 46.0, - "step": 46465 - }, - { - "epoch": 3.552650190186746, - "grad_norm": 0.0011572532821446657, - "learning_rate": 0.00019999377436218482, - "loss": 46.0, - "step": 46466 - }, - { - "epoch": 3.552726647170136, - "grad_norm": 0.0027272903826087713, - "learning_rate": 0.00019999377409416174, - "loss": 46.0, - "step": 46467 - }, - { - "epoch": 3.5528031041535257, - "grad_norm": 0.0016184666892513633, - "learning_rate": 0.0001999937738261329, - "loss": 46.0, - "step": 46468 - }, - { - "epoch": 3.5528795611369155, - "grad_norm": 0.005173943471163511, - "learning_rate": 0.00019999377355809825, - "loss": 46.0, - "step": 46469 - }, - { - "epoch": 3.5529560181203053, - "grad_norm": 0.004095404874533415, - "learning_rate": 0.00019999377329005786, - "loss": 46.0, - "step": 46470 - }, - { - "epoch": 3.553032475103695, - "grad_norm": 0.005040294025093317, - "learning_rate": 0.0001999937730220117, - "loss": 46.0, - "step": 46471 - }, - { - "epoch": 3.553108932087085, - "grad_norm": 0.002450446831062436, - "learning_rate": 0.0001999937727539598, - "loss": 46.0, - "step": 46472 - }, - { - "epoch": 3.553185389070474, - "grad_norm": 0.003448336385190487, - "learning_rate": 0.0001999937724859021, - "loss": 46.0, - "step": 46473 - }, - { - "epoch": 3.553261846053864, - "grad_norm": 0.00127406045794487, - "learning_rate": 0.00019999377221783862, - "loss": 46.0, - "step": 46474 - }, - { - "epoch": 3.5533383030372536, - "grad_norm": 0.0021221397910267115, - "learning_rate": 0.0001999937719497694, - "loss": 46.0, - "step": 46475 - }, - { - "epoch": 3.5534147600206434, - "grad_norm": 0.0017270802054554224, - "learning_rate": 0.00019999377168169438, - "loss": 46.0, - "step": 46476 - }, - { - "epoch": 3.553491217004033, - "grad_norm": 0.003796135075390339, - "learning_rate": 0.0001999937714136136, - "loss": 46.0, - "step": 46477 - }, - { - "epoch": 3.553567673987423, - "grad_norm": 0.002585887210443616, - "learning_rate": 0.00019999377114552705, - "loss": 46.0, - "step": 46478 - }, - { - "epoch": 3.5536441309708127, - "grad_norm": 0.0027438232209533453, - "learning_rate": 0.00019999377087743473, - "loss": 46.0, - "step": 46479 - }, - { - "epoch": 3.5537205879542024, - "grad_norm": 0.0016532428562641144, - "learning_rate": 0.00019999377060933666, - "loss": 46.0, - "step": 46480 - }, - { - "epoch": 3.5537970449375917, - "grad_norm": 0.0029786117374897003, - "learning_rate": 0.00019999377034123281, - "loss": 46.0, - "step": 46481 - }, - { - "epoch": 3.5538735019209815, - "grad_norm": 0.0011823297245427966, - "learning_rate": 0.0001999937700731232, - "loss": 46.0, - "step": 46482 - }, - { - "epoch": 3.5539499589043713, - "grad_norm": 0.0011401071678847075, - "learning_rate": 0.00019999376980500778, - "loss": 46.0, - "step": 46483 - }, - { - "epoch": 3.554026415887761, - "grad_norm": 0.0013400858733803034, - "learning_rate": 0.00019999376953688661, - "loss": 46.0, - "step": 46484 - }, - { - "epoch": 3.554102872871151, - "grad_norm": 0.0018520744051784277, - "learning_rate": 0.0001999937692687597, - "loss": 46.0, - "step": 46485 - }, - { - "epoch": 3.5541793298545405, - "grad_norm": 0.003796538570895791, - "learning_rate": 0.00019999376900062702, - "loss": 46.0, - "step": 46486 - }, - { - "epoch": 3.5542557868379303, - "grad_norm": 0.0027337847277522087, - "learning_rate": 0.0001999937687324885, - "loss": 46.0, - "step": 46487 - }, - { - "epoch": 3.55433224382132, - "grad_norm": 0.0021763781551271677, - "learning_rate": 0.0001999937684643443, - "loss": 46.0, - "step": 46488 - }, - { - "epoch": 3.55440870080471, - "grad_norm": 0.0014338461915031075, - "learning_rate": 0.0001999937681961943, - "loss": 46.0, - "step": 46489 - }, - { - "epoch": 3.5544851577880996, - "grad_norm": 0.0013220143737271428, - "learning_rate": 0.00019999376792803852, - "loss": 46.0, - "step": 46490 - }, - { - "epoch": 3.5545616147714894, - "grad_norm": 0.004611336160451174, - "learning_rate": 0.00019999376765987697, - "loss": 46.0, - "step": 46491 - }, - { - "epoch": 3.554638071754879, - "grad_norm": 0.001799620222300291, - "learning_rate": 0.00019999376739170965, - "loss": 46.0, - "step": 46492 - }, - { - "epoch": 3.554714528738269, - "grad_norm": 0.0015801709378138185, - "learning_rate": 0.00019999376712353658, - "loss": 46.0, - "step": 46493 - }, - { - "epoch": 3.554790985721658, - "grad_norm": 0.001817018841393292, - "learning_rate": 0.00019999376685535773, - "loss": 46.0, - "step": 46494 - }, - { - "epoch": 3.554867442705048, - "grad_norm": 0.005005191545933485, - "learning_rate": 0.00019999376658717312, - "loss": 46.0, - "step": 46495 - }, - { - "epoch": 3.5549438996884377, - "grad_norm": 0.0038010175339877605, - "learning_rate": 0.00019999376631898272, - "loss": 46.0, - "step": 46496 - }, - { - "epoch": 3.5550203566718275, - "grad_norm": 0.0030369756277650595, - "learning_rate": 0.00019999376605078656, - "loss": 46.0, - "step": 46497 - }, - { - "epoch": 3.5550968136552172, - "grad_norm": 0.0028636339120566845, - "learning_rate": 0.00019999376578258465, - "loss": 46.0, - "step": 46498 - }, - { - "epoch": 3.555173270638607, - "grad_norm": 0.005953289568424225, - "learning_rate": 0.00019999376551437694, - "loss": 46.0, - "step": 46499 - }, - { - "epoch": 3.5552497276219968, - "grad_norm": 0.00319922948256135, - "learning_rate": 0.00019999376524616346, - "loss": 46.0, - "step": 46500 - }, - { - "epoch": 3.5553261846053865, - "grad_norm": 0.0017837381456047297, - "learning_rate": 0.00019999376497794425, - "loss": 46.0, - "step": 46501 - }, - { - "epoch": 3.5554026415887763, - "grad_norm": 0.0018834981601685286, - "learning_rate": 0.00019999376470971925, - "loss": 46.0, - "step": 46502 - }, - { - "epoch": 3.5554790985721656, - "grad_norm": 0.001960393972694874, - "learning_rate": 0.00019999376444148847, - "loss": 46.0, - "step": 46503 - }, - { - "epoch": 3.5555555555555554, - "grad_norm": 0.001821323181502521, - "learning_rate": 0.00019999376417325192, - "loss": 46.0, - "step": 46504 - }, - { - "epoch": 3.555632012538945, - "grad_norm": 0.003700501285493374, - "learning_rate": 0.00019999376390500962, - "loss": 46.0, - "step": 46505 - }, - { - "epoch": 3.555708469522335, - "grad_norm": 0.0017601598519831896, - "learning_rate": 0.00019999376363676153, - "loss": 46.0, - "step": 46506 - }, - { - "epoch": 3.5557849265057246, - "grad_norm": 0.006738262716680765, - "learning_rate": 0.00019999376336850768, - "loss": 46.0, - "step": 46507 - }, - { - "epoch": 3.5558613834891144, - "grad_norm": 0.0055123502388596535, - "learning_rate": 0.00019999376310024807, - "loss": 46.0, - "step": 46508 - }, - { - "epoch": 3.555937840472504, - "grad_norm": 0.003295546630397439, - "learning_rate": 0.00019999376283198267, - "loss": 46.0, - "step": 46509 - }, - { - "epoch": 3.556014297455894, - "grad_norm": 0.0004722021985799074, - "learning_rate": 0.0001999937625637115, - "loss": 46.0, - "step": 46510 - }, - { - "epoch": 3.5560907544392837, - "grad_norm": 0.005691759288311005, - "learning_rate": 0.0001999937622954346, - "loss": 46.0, - "step": 46511 - }, - { - "epoch": 3.5561672114226734, - "grad_norm": 0.0027579213492572308, - "learning_rate": 0.00019999376202715192, - "loss": 46.0, - "step": 46512 - }, - { - "epoch": 3.556243668406063, - "grad_norm": 0.004934835247695446, - "learning_rate": 0.00019999376175886343, - "loss": 46.0, - "step": 46513 - }, - { - "epoch": 3.556320125389453, - "grad_norm": 0.0014789033448323607, - "learning_rate": 0.0001999937614905692, - "loss": 46.0, - "step": 46514 - }, - { - "epoch": 3.5563965823728427, - "grad_norm": 0.0017723443452268839, - "learning_rate": 0.0001999937612222692, - "loss": 46.0, - "step": 46515 - }, - { - "epoch": 3.556473039356232, - "grad_norm": 0.0013103695819154382, - "learning_rate": 0.00019999376095396345, - "loss": 46.0, - "step": 46516 - }, - { - "epoch": 3.556549496339622, - "grad_norm": 0.001224347623065114, - "learning_rate": 0.0001999937606856519, - "loss": 46.0, - "step": 46517 - }, - { - "epoch": 3.5566259533230116, - "grad_norm": 0.0010508011328056455, - "learning_rate": 0.00019999376041733458, - "loss": 46.0, - "step": 46518 - }, - { - "epoch": 3.5567024103064013, - "grad_norm": 0.0036310760769993067, - "learning_rate": 0.00019999376014901153, - "loss": 46.0, - "step": 46519 - }, - { - "epoch": 3.556778867289791, - "grad_norm": 0.005407261196523905, - "learning_rate": 0.00019999375988068266, - "loss": 46.0, - "step": 46520 - }, - { - "epoch": 3.556855324273181, - "grad_norm": 0.003659235080704093, - "learning_rate": 0.00019999375961234805, - "loss": 46.0, - "step": 46521 - }, - { - "epoch": 3.5569317812565706, - "grad_norm": 0.0022064486984163523, - "learning_rate": 0.00019999375934400766, - "loss": 46.0, - "step": 46522 - }, - { - "epoch": 3.5570082382399604, - "grad_norm": 0.002823495538905263, - "learning_rate": 0.0001999937590756615, - "loss": 46.0, - "step": 46523 - }, - { - "epoch": 3.5570846952233497, - "grad_norm": 0.0018522243481129408, - "learning_rate": 0.00019999375880730958, - "loss": 46.0, - "step": 46524 - }, - { - "epoch": 3.5571611522067395, - "grad_norm": 0.0035180626437067986, - "learning_rate": 0.00019999375853895187, - "loss": 46.0, - "step": 46525 - }, - { - "epoch": 3.557237609190129, - "grad_norm": 0.001274880371056497, - "learning_rate": 0.00019999375827058842, - "loss": 46.0, - "step": 46526 - }, - { - "epoch": 3.557314066173519, - "grad_norm": 0.002875514095649123, - "learning_rate": 0.0001999937580022192, - "loss": 46.0, - "step": 46527 - }, - { - "epoch": 3.5573905231569087, - "grad_norm": 0.0007261926075443625, - "learning_rate": 0.0001999937577338442, - "loss": 46.0, - "step": 46528 - }, - { - "epoch": 3.5574669801402985, - "grad_norm": 0.002841297071427107, - "learning_rate": 0.00019999375746546343, - "loss": 46.0, - "step": 46529 - }, - { - "epoch": 3.5575434371236883, - "grad_norm": 0.0014934537466615438, - "learning_rate": 0.00019999375719707688, - "loss": 46.0, - "step": 46530 - }, - { - "epoch": 3.557619894107078, - "grad_norm": 0.0026760082691907883, - "learning_rate": 0.0001999937569286846, - "loss": 46.0, - "step": 46531 - }, - { - "epoch": 3.557696351090468, - "grad_norm": 0.0012086716014891863, - "learning_rate": 0.00019999375666028652, - "loss": 46.0, - "step": 46532 - }, - { - "epoch": 3.5577728080738575, - "grad_norm": 0.0038816449232399464, - "learning_rate": 0.00019999375639188268, - "loss": 46.0, - "step": 46533 - }, - { - "epoch": 3.5578492650572473, - "grad_norm": 0.0015986757352948189, - "learning_rate": 0.00019999375612347303, - "loss": 46.0, - "step": 46534 - }, - { - "epoch": 3.557925722040637, - "grad_norm": 0.0013071264838799834, - "learning_rate": 0.00019999375585505767, - "loss": 46.0, - "step": 46535 - }, - { - "epoch": 3.558002179024027, - "grad_norm": 0.0009613691945560277, - "learning_rate": 0.0001999937555866365, - "loss": 46.0, - "step": 46536 - }, - { - "epoch": 3.5580786360074166, - "grad_norm": 0.0010414548451080918, - "learning_rate": 0.0001999937553182096, - "loss": 46.0, - "step": 46537 - }, - { - "epoch": 3.558155092990806, - "grad_norm": 0.028348220512270927, - "learning_rate": 0.0001999937550497769, - "loss": 46.0, - "step": 46538 - }, - { - "epoch": 3.5582315499741957, - "grad_norm": 0.0008071522461250424, - "learning_rate": 0.00019999375478133843, - "loss": 46.0, - "step": 46539 - }, - { - "epoch": 3.5583080069575854, - "grad_norm": 0.004722594749182463, - "learning_rate": 0.00019999375451289423, - "loss": 46.0, - "step": 46540 - }, - { - "epoch": 3.558384463940975, - "grad_norm": 0.002216981491073966, - "learning_rate": 0.0001999937542444442, - "loss": 46.0, - "step": 46541 - }, - { - "epoch": 3.558460920924365, - "grad_norm": 0.0016356802079826593, - "learning_rate": 0.00019999375397598845, - "loss": 46.0, - "step": 46542 - }, - { - "epoch": 3.5585373779077547, - "grad_norm": 0.0017779436893761158, - "learning_rate": 0.0001999937537075269, - "loss": 46.0, - "step": 46543 - }, - { - "epoch": 3.5586138348911445, - "grad_norm": 0.0033922898583114147, - "learning_rate": 0.0001999937534390596, - "loss": 46.0, - "step": 46544 - }, - { - "epoch": 3.5586902918745342, - "grad_norm": 0.004556615371257067, - "learning_rate": 0.00019999375317058653, - "loss": 46.0, - "step": 46545 - }, - { - "epoch": 3.5587667488579235, - "grad_norm": 0.00324404239654541, - "learning_rate": 0.0001999937529021077, - "loss": 46.0, - "step": 46546 - }, - { - "epoch": 3.5588432058413133, - "grad_norm": 0.0030002316925674677, - "learning_rate": 0.00019999375263362308, - "loss": 46.0, - "step": 46547 - }, - { - "epoch": 3.558919662824703, - "grad_norm": 0.0014597923727706075, - "learning_rate": 0.00019999375236513269, - "loss": 46.0, - "step": 46548 - }, - { - "epoch": 3.558996119808093, - "grad_norm": 0.0022793463431298733, - "learning_rate": 0.00019999375209663652, - "loss": 46.0, - "step": 46549 - }, - { - "epoch": 3.5590725767914826, - "grad_norm": 0.0021507972851395607, - "learning_rate": 0.00019999375182813461, - "loss": 46.0, - "step": 46550 - }, - { - "epoch": 3.5591490337748724, - "grad_norm": 0.004458022769540548, - "learning_rate": 0.00019999375155962693, - "loss": 46.0, - "step": 46551 - }, - { - "epoch": 3.559225490758262, - "grad_norm": 0.0015177782624959946, - "learning_rate": 0.00019999375129111348, - "loss": 46.0, - "step": 46552 - }, - { - "epoch": 3.559301947741652, - "grad_norm": 0.004764895886182785, - "learning_rate": 0.00019999375102259425, - "loss": 46.0, - "step": 46553 - }, - { - "epoch": 3.5593784047250416, - "grad_norm": 0.0014271772233769298, - "learning_rate": 0.00019999375075406924, - "loss": 46.0, - "step": 46554 - }, - { - "epoch": 3.5594548617084314, - "grad_norm": 0.0007983514224179089, - "learning_rate": 0.00019999375048553847, - "loss": 46.0, - "step": 46555 - }, - { - "epoch": 3.559531318691821, - "grad_norm": 0.0009628830011934042, - "learning_rate": 0.00019999375021700195, - "loss": 46.0, - "step": 46556 - }, - { - "epoch": 3.559607775675211, - "grad_norm": 0.0007626317674294114, - "learning_rate": 0.00019999374994845965, - "loss": 46.0, - "step": 46557 - }, - { - "epoch": 3.5596842326586007, - "grad_norm": 0.0015915690455585718, - "learning_rate": 0.00019999374967991158, - "loss": 46.0, - "step": 46558 - }, - { - "epoch": 3.5597606896419904, - "grad_norm": 0.0008220255840569735, - "learning_rate": 0.00019999374941135774, - "loss": 46.0, - "step": 46559 - }, - { - "epoch": 3.5598371466253798, - "grad_norm": 0.0023698657751083374, - "learning_rate": 0.00019999374914279812, - "loss": 46.0, - "step": 46560 - }, - { - "epoch": 3.5599136036087695, - "grad_norm": 0.011392847634851933, - "learning_rate": 0.00019999374887423273, - "loss": 46.0, - "step": 46561 - }, - { - "epoch": 3.5599900605921593, - "grad_norm": 0.0025960872881114483, - "learning_rate": 0.0001999937486056616, - "loss": 46.0, - "step": 46562 - }, - { - "epoch": 3.560066517575549, - "grad_norm": 0.003866950748488307, - "learning_rate": 0.00019999374833708466, - "loss": 46.0, - "step": 46563 - }, - { - "epoch": 3.560142974558939, - "grad_norm": 0.003444395959377289, - "learning_rate": 0.000199993748068502, - "loss": 46.0, - "step": 46564 - }, - { - "epoch": 3.5602194315423286, - "grad_norm": 0.0011637936113402247, - "learning_rate": 0.00019999374779991353, - "loss": 46.0, - "step": 46565 - }, - { - "epoch": 3.5602958885257183, - "grad_norm": 0.0029007100965827703, - "learning_rate": 0.0001999937475313193, - "loss": 46.0, - "step": 46566 - }, - { - "epoch": 3.560372345509108, - "grad_norm": 0.0010651706252247095, - "learning_rate": 0.00019999374726271932, - "loss": 46.0, - "step": 46567 - }, - { - "epoch": 3.5604488024924974, - "grad_norm": 0.002920096041634679, - "learning_rate": 0.00019999374699411355, - "loss": 46.0, - "step": 46568 - }, - { - "epoch": 3.560525259475887, - "grad_norm": 0.0039646257646381855, - "learning_rate": 0.00019999374672550202, - "loss": 46.0, - "step": 46569 - }, - { - "epoch": 3.560601716459277, - "grad_norm": 0.003384967800229788, - "learning_rate": 0.0001999937464568847, - "loss": 46.0, - "step": 46570 - }, - { - "epoch": 3.5606781734426667, - "grad_norm": 0.0016989994328469038, - "learning_rate": 0.00019999374618826163, - "loss": 46.0, - "step": 46571 - }, - { - "epoch": 3.5607546304260564, - "grad_norm": 0.006379690486937761, - "learning_rate": 0.0001999937459196328, - "loss": 46.0, - "step": 46572 - }, - { - "epoch": 3.560831087409446, - "grad_norm": 0.002185478340834379, - "learning_rate": 0.0001999937456509982, - "loss": 46.0, - "step": 46573 - }, - { - "epoch": 3.560907544392836, - "grad_norm": 0.004664817824959755, - "learning_rate": 0.0001999937453823578, - "loss": 46.0, - "step": 46574 - }, - { - "epoch": 3.5609840013762257, - "grad_norm": 0.005241215229034424, - "learning_rate": 0.00019999374511371168, - "loss": 46.0, - "step": 46575 - }, - { - "epoch": 3.5610604583596155, - "grad_norm": 0.0028947058599442244, - "learning_rate": 0.00019999374484505977, - "loss": 46.0, - "step": 46576 - }, - { - "epoch": 3.5611369153430052, - "grad_norm": 0.0010516849579289556, - "learning_rate": 0.00019999374457640206, - "loss": 46.0, - "step": 46577 - }, - { - "epoch": 3.561213372326395, - "grad_norm": 0.003213840536773205, - "learning_rate": 0.0001999937443077386, - "loss": 46.0, - "step": 46578 - }, - { - "epoch": 3.5612898293097848, - "grad_norm": 0.0023612165823578835, - "learning_rate": 0.00019999374403906938, - "loss": 46.0, - "step": 46579 - }, - { - "epoch": 3.5613662862931745, - "grad_norm": 0.005796457175165415, - "learning_rate": 0.0001999937437703944, - "loss": 46.0, - "step": 46580 - }, - { - "epoch": 3.5614427432765643, - "grad_norm": 0.0010385580826550722, - "learning_rate": 0.00019999374350171363, - "loss": 46.0, - "step": 46581 - }, - { - "epoch": 3.5615192002599536, - "grad_norm": 0.0008809170685708523, - "learning_rate": 0.0001999937432330271, - "loss": 46.0, - "step": 46582 - }, - { - "epoch": 3.5615956572433434, - "grad_norm": 0.0015432793879881501, - "learning_rate": 0.0001999937429643348, - "loss": 46.0, - "step": 46583 - }, - { - "epoch": 3.561672114226733, - "grad_norm": 0.0006031371303834021, - "learning_rate": 0.00019999374269563674, - "loss": 46.0, - "step": 46584 - }, - { - "epoch": 3.561748571210123, - "grad_norm": 0.0018374252831563354, - "learning_rate": 0.0001999937424269329, - "loss": 46.0, - "step": 46585 - }, - { - "epoch": 3.5618250281935127, - "grad_norm": 0.001013958710245788, - "learning_rate": 0.00019999374215822329, - "loss": 46.0, - "step": 46586 - }, - { - "epoch": 3.5619014851769024, - "grad_norm": 0.0010941693326458335, - "learning_rate": 0.00019999374188950792, - "loss": 46.0, - "step": 46587 - }, - { - "epoch": 3.561977942160292, - "grad_norm": 0.0035610490012913942, - "learning_rate": 0.00019999374162078676, - "loss": 46.0, - "step": 46588 - }, - { - "epoch": 3.562054399143682, - "grad_norm": 0.004505632445216179, - "learning_rate": 0.00019999374135205985, - "loss": 46.0, - "step": 46589 - }, - { - "epoch": 3.5621308561270713, - "grad_norm": 0.0037538534961640835, - "learning_rate": 0.00019999374108332715, - "loss": 46.0, - "step": 46590 - }, - { - "epoch": 3.562207313110461, - "grad_norm": 0.002848175587132573, - "learning_rate": 0.0001999937408145887, - "loss": 46.0, - "step": 46591 - }, - { - "epoch": 3.5622837700938508, - "grad_norm": 0.0009538735612295568, - "learning_rate": 0.0001999937405458445, - "loss": 46.0, - "step": 46592 - }, - { - "epoch": 3.5623602270772405, - "grad_norm": 0.013432635925710201, - "learning_rate": 0.00019999374027709451, - "loss": 46.0, - "step": 46593 - }, - { - "epoch": 3.5624366840606303, - "grad_norm": 0.002499157562851906, - "learning_rate": 0.00019999374000833874, - "loss": 46.0, - "step": 46594 - }, - { - "epoch": 3.56251314104402, - "grad_norm": 0.0021435744129121304, - "learning_rate": 0.0001999937397395772, - "loss": 46.0, - "step": 46595 - }, - { - "epoch": 3.56258959802741, - "grad_norm": 0.0029948113951832056, - "learning_rate": 0.00019999373947080992, - "loss": 46.0, - "step": 46596 - }, - { - "epoch": 3.5626660550107996, - "grad_norm": 0.005200231913477182, - "learning_rate": 0.00019999373920203686, - "loss": 46.0, - "step": 46597 - }, - { - "epoch": 3.5627425119941893, - "grad_norm": 0.0023839629720896482, - "learning_rate": 0.00019999373893325801, - "loss": 46.0, - "step": 46598 - }, - { - "epoch": 3.562818968977579, - "grad_norm": 0.0025282285641878843, - "learning_rate": 0.00019999373866447343, - "loss": 46.0, - "step": 46599 - }, - { - "epoch": 3.562895425960969, - "grad_norm": 0.0023611115757375956, - "learning_rate": 0.00019999373839568304, - "loss": 46.0, - "step": 46600 - }, - { - "epoch": 3.5629718829443586, - "grad_norm": 0.0015852905344218016, - "learning_rate": 0.0001999937381268869, - "loss": 46.0, - "step": 46601 - }, - { - "epoch": 3.5630483399277484, - "grad_norm": 0.0030706808902323246, - "learning_rate": 0.000199993737858085, - "loss": 46.0, - "step": 46602 - }, - { - "epoch": 3.563124796911138, - "grad_norm": 0.0009118703310377896, - "learning_rate": 0.00019999373758927732, - "loss": 46.0, - "step": 46603 - }, - { - "epoch": 3.5632012538945275, - "grad_norm": 0.003964750096201897, - "learning_rate": 0.00019999373732046386, - "loss": 46.0, - "step": 46604 - }, - { - "epoch": 3.5632777108779172, - "grad_norm": 0.0021664509549736977, - "learning_rate": 0.0001999937370516446, - "loss": 46.0, - "step": 46605 - }, - { - "epoch": 3.563354167861307, - "grad_norm": 0.002441578544676304, - "learning_rate": 0.00019999373678281966, - "loss": 46.0, - "step": 46606 - }, - { - "epoch": 3.5634306248446967, - "grad_norm": 0.004548076540231705, - "learning_rate": 0.00019999373651398888, - "loss": 46.0, - "step": 46607 - }, - { - "epoch": 3.5635070818280865, - "grad_norm": 0.0036300229839980602, - "learning_rate": 0.00019999373624515236, - "loss": 46.0, - "step": 46608 - }, - { - "epoch": 3.5635835388114763, - "grad_norm": 0.001155280857346952, - "learning_rate": 0.00019999373597631007, - "loss": 46.0, - "step": 46609 - }, - { - "epoch": 3.563659995794866, - "grad_norm": 0.0022164478432387114, - "learning_rate": 0.00019999373570746197, - "loss": 46.0, - "step": 46610 - }, - { - "epoch": 3.563736452778256, - "grad_norm": 0.0008368267444893718, - "learning_rate": 0.00019999373543860816, - "loss": 46.0, - "step": 46611 - }, - { - "epoch": 3.563812909761645, - "grad_norm": 0.00203518895432353, - "learning_rate": 0.00019999373516974855, - "loss": 46.0, - "step": 46612 - }, - { - "epoch": 3.563889366745035, - "grad_norm": 0.0026849994901567698, - "learning_rate": 0.00019999373490088318, - "loss": 46.0, - "step": 46613 - }, - { - "epoch": 3.5639658237284246, - "grad_norm": 0.005631974898278713, - "learning_rate": 0.00019999373463201202, - "loss": 46.0, - "step": 46614 - }, - { - "epoch": 3.5640422807118144, - "grad_norm": 0.003811551257967949, - "learning_rate": 0.00019999373436313514, - "loss": 46.0, - "step": 46615 - }, - { - "epoch": 3.564118737695204, - "grad_norm": 0.001514188596047461, - "learning_rate": 0.00019999373409425246, - "loss": 46.0, - "step": 46616 - }, - { - "epoch": 3.564195194678594, - "grad_norm": 0.001378338783979416, - "learning_rate": 0.00019999373382536398, - "loss": 46.0, - "step": 46617 - }, - { - "epoch": 3.5642716516619837, - "grad_norm": 0.002717813942581415, - "learning_rate": 0.00019999373355646978, - "loss": 46.0, - "step": 46618 - }, - { - "epoch": 3.5643481086453734, - "grad_norm": 0.0022252292837947607, - "learning_rate": 0.00019999373328756978, - "loss": 46.0, - "step": 46619 - }, - { - "epoch": 3.564424565628763, - "grad_norm": 0.003563227830454707, - "learning_rate": 0.00019999373301866403, - "loss": 46.0, - "step": 46620 - }, - { - "epoch": 3.564501022612153, - "grad_norm": 0.00731264753267169, - "learning_rate": 0.00019999373274975254, - "loss": 46.0, - "step": 46621 - }, - { - "epoch": 3.5645774795955427, - "grad_norm": 0.003943635616451502, - "learning_rate": 0.00019999373248083522, - "loss": 46.0, - "step": 46622 - }, - { - "epoch": 3.5646539365789325, - "grad_norm": 0.000804942159447819, - "learning_rate": 0.00019999373221191215, - "loss": 46.0, - "step": 46623 - }, - { - "epoch": 3.5647303935623222, - "grad_norm": 0.0016056118765845895, - "learning_rate": 0.0001999937319429833, - "loss": 46.0, - "step": 46624 - }, - { - "epoch": 3.5648068505457116, - "grad_norm": 0.0024353060871362686, - "learning_rate": 0.00019999373167404872, - "loss": 46.0, - "step": 46625 - }, - { - "epoch": 3.5648833075291013, - "grad_norm": 0.0011932393535971642, - "learning_rate": 0.00019999373140510836, - "loss": 46.0, - "step": 46626 - }, - { - "epoch": 3.564959764512491, - "grad_norm": 0.0017103042919188738, - "learning_rate": 0.0001999937311361622, - "loss": 46.0, - "step": 46627 - }, - { - "epoch": 3.565036221495881, - "grad_norm": 0.0029872881714254618, - "learning_rate": 0.0001999937308672103, - "loss": 46.0, - "step": 46628 - }, - { - "epoch": 3.5651126784792706, - "grad_norm": 0.0030780588276684284, - "learning_rate": 0.0001999937305982526, - "loss": 46.0, - "step": 46629 - }, - { - "epoch": 3.5651891354626604, - "grad_norm": 0.0028714691288769245, - "learning_rate": 0.00019999373032928919, - "loss": 46.0, - "step": 46630 - }, - { - "epoch": 3.56526559244605, - "grad_norm": 0.0037491903640329838, - "learning_rate": 0.00019999373006031996, - "loss": 46.0, - "step": 46631 - }, - { - "epoch": 3.56534204942944, - "grad_norm": 0.004280867520719767, - "learning_rate": 0.00019999372979134496, - "loss": 46.0, - "step": 46632 - }, - { - "epoch": 3.5654185064128296, - "grad_norm": 0.0014825878897681832, - "learning_rate": 0.0001999937295223642, - "loss": 46.0, - "step": 46633 - }, - { - "epoch": 3.565494963396219, - "grad_norm": 0.0019989258144050837, - "learning_rate": 0.0001999937292533777, - "loss": 46.0, - "step": 46634 - }, - { - "epoch": 3.5655714203796087, - "grad_norm": 0.0010208177845925093, - "learning_rate": 0.0001999937289843854, - "loss": 46.0, - "step": 46635 - }, - { - "epoch": 3.5656478773629985, - "grad_norm": 0.0031489187385886908, - "learning_rate": 0.00019999372871538733, - "loss": 46.0, - "step": 46636 - }, - { - "epoch": 3.5657243343463882, - "grad_norm": 0.002669918118044734, - "learning_rate": 0.00019999372844638352, - "loss": 46.0, - "step": 46637 - }, - { - "epoch": 3.565800791329778, - "grad_norm": 0.0029144948348402977, - "learning_rate": 0.00019999372817737393, - "loss": 46.0, - "step": 46638 - }, - { - "epoch": 3.5658772483131678, - "grad_norm": 0.0024519572034478188, - "learning_rate": 0.00019999372790835854, - "loss": 46.0, - "step": 46639 - }, - { - "epoch": 3.5659537052965575, - "grad_norm": 0.0015700676012784243, - "learning_rate": 0.0001999937276393374, - "loss": 46.0, - "step": 46640 - }, - { - "epoch": 3.5660301622799473, - "grad_norm": 0.004720539320260286, - "learning_rate": 0.00019999372737031047, - "loss": 46.0, - "step": 46641 - }, - { - "epoch": 3.566106619263337, - "grad_norm": 0.0041053444147109985, - "learning_rate": 0.00019999372710127782, - "loss": 46.0, - "step": 46642 - }, - { - "epoch": 3.566183076246727, - "grad_norm": 0.0029860378708690405, - "learning_rate": 0.00019999372683223937, - "loss": 46.0, - "step": 46643 - }, - { - "epoch": 3.5662595332301166, - "grad_norm": 0.0023234044201672077, - "learning_rate": 0.00019999372656319517, - "loss": 46.0, - "step": 46644 - }, - { - "epoch": 3.5663359902135063, - "grad_norm": 0.006589425727725029, - "learning_rate": 0.00019999372629414517, - "loss": 46.0, - "step": 46645 - }, - { - "epoch": 3.566412447196896, - "grad_norm": 0.001928093028254807, - "learning_rate": 0.00019999372602508942, - "loss": 46.0, - "step": 46646 - }, - { - "epoch": 3.5664889041802854, - "grad_norm": 0.001166792819276452, - "learning_rate": 0.0001999937257560279, - "loss": 46.0, - "step": 46647 - }, - { - "epoch": 3.566565361163675, - "grad_norm": 0.001867242157459259, - "learning_rate": 0.0001999937254869606, - "loss": 46.0, - "step": 46648 - }, - { - "epoch": 3.566641818147065, - "grad_norm": 0.003704528324306011, - "learning_rate": 0.00019999372521788754, - "loss": 46.0, - "step": 46649 - }, - { - "epoch": 3.5667182751304547, - "grad_norm": 0.0034738376270979643, - "learning_rate": 0.00019999372494880873, - "loss": 46.0, - "step": 46650 - }, - { - "epoch": 3.5667947321138445, - "grad_norm": 0.003083515679463744, - "learning_rate": 0.00019999372467972411, - "loss": 46.0, - "step": 46651 - }, - { - "epoch": 3.566871189097234, - "grad_norm": 0.0011889126617461443, - "learning_rate": 0.00019999372441063375, - "loss": 46.0, - "step": 46652 - }, - { - "epoch": 3.566947646080624, - "grad_norm": 0.0027149440720677376, - "learning_rate": 0.00019999372414153762, - "loss": 46.0, - "step": 46653 - }, - { - "epoch": 3.5670241030640137, - "grad_norm": 0.0021190131083130836, - "learning_rate": 0.00019999372387243571, - "loss": 46.0, - "step": 46654 - }, - { - "epoch": 3.567100560047403, - "grad_norm": 0.003262369427829981, - "learning_rate": 0.000199993723603328, - "loss": 46.0, - "step": 46655 - }, - { - "epoch": 3.567177017030793, - "grad_norm": 0.002166547579690814, - "learning_rate": 0.00019999372333421458, - "loss": 46.0, - "step": 46656 - }, - { - "epoch": 3.5672534740141826, - "grad_norm": 0.001741804531775415, - "learning_rate": 0.00019999372306509535, - "loss": 46.0, - "step": 46657 - }, - { - "epoch": 3.5673299309975723, - "grad_norm": 0.005541711580008268, - "learning_rate": 0.00019999372279597038, - "loss": 46.0, - "step": 46658 - }, - { - "epoch": 3.567406387980962, - "grad_norm": 0.0021615559235215187, - "learning_rate": 0.00019999372252683966, - "loss": 46.0, - "step": 46659 - }, - { - "epoch": 3.567482844964352, - "grad_norm": 0.0025355208199471235, - "learning_rate": 0.00019999372225770312, - "loss": 46.0, - "step": 46660 - }, - { - "epoch": 3.5675593019477416, - "grad_norm": 0.002710143104195595, - "learning_rate": 0.00019999372198856082, - "loss": 46.0, - "step": 46661 - }, - { - "epoch": 3.5676357589311314, - "grad_norm": 0.0043915691785514355, - "learning_rate": 0.00019999372171941278, - "loss": 46.0, - "step": 46662 - }, - { - "epoch": 3.567712215914521, - "grad_norm": 0.002100900048390031, - "learning_rate": 0.00019999372145025897, - "loss": 46.0, - "step": 46663 - }, - { - "epoch": 3.567788672897911, - "grad_norm": 0.002226870507001877, - "learning_rate": 0.00019999372118109936, - "loss": 46.0, - "step": 46664 - }, - { - "epoch": 3.5678651298813007, - "grad_norm": 0.0007734823157079518, - "learning_rate": 0.00019999372091193397, - "loss": 46.0, - "step": 46665 - }, - { - "epoch": 3.5679415868646904, - "grad_norm": 0.0012925807386636734, - "learning_rate": 0.00019999372064276284, - "loss": 46.0, - "step": 46666 - }, - { - "epoch": 3.56801804384808, - "grad_norm": 0.0016663826536387205, - "learning_rate": 0.00019999372037358596, - "loss": 46.0, - "step": 46667 - }, - { - "epoch": 3.56809450083147, - "grad_norm": 0.0010066236136481166, - "learning_rate": 0.00019999372010440328, - "loss": 46.0, - "step": 46668 - }, - { - "epoch": 3.5681709578148593, - "grad_norm": 0.0017347463872283697, - "learning_rate": 0.00019999371983521483, - "loss": 46.0, - "step": 46669 - }, - { - "epoch": 3.568247414798249, - "grad_norm": 0.0034977858886122704, - "learning_rate": 0.00019999371956602063, - "loss": 46.0, - "step": 46670 - }, - { - "epoch": 3.568323871781639, - "grad_norm": 0.0008923871791921556, - "learning_rate": 0.00019999371929682065, - "loss": 46.0, - "step": 46671 - }, - { - "epoch": 3.5684003287650286, - "grad_norm": 0.0019429794047027826, - "learning_rate": 0.0001999937190276149, - "loss": 46.0, - "step": 46672 - }, - { - "epoch": 3.5684767857484183, - "grad_norm": 0.0010052897268906236, - "learning_rate": 0.0001999937187584034, - "loss": 46.0, - "step": 46673 - }, - { - "epoch": 3.568553242731808, - "grad_norm": 0.002584023866802454, - "learning_rate": 0.0001999937184891861, - "loss": 46.0, - "step": 46674 - }, - { - "epoch": 3.568629699715198, - "grad_norm": 0.0030269892886281013, - "learning_rate": 0.00019999371821996303, - "loss": 46.0, - "step": 46675 - }, - { - "epoch": 3.5687061566985876, - "grad_norm": 0.0016190746100619435, - "learning_rate": 0.00019999371795073422, - "loss": 46.0, - "step": 46676 - }, - { - "epoch": 3.568782613681977, - "grad_norm": 0.0026490408927202225, - "learning_rate": 0.0001999937176814996, - "loss": 46.0, - "step": 46677 - }, - { - "epoch": 3.5688590706653667, - "grad_norm": 0.0006975713768042624, - "learning_rate": 0.00019999371741225925, - "loss": 46.0, - "step": 46678 - }, - { - "epoch": 3.5689355276487564, - "grad_norm": 0.00419412087649107, - "learning_rate": 0.0001999937171430131, - "loss": 46.0, - "step": 46679 - }, - { - "epoch": 3.569011984632146, - "grad_norm": 0.002260678680613637, - "learning_rate": 0.0001999937168737612, - "loss": 46.0, - "step": 46680 - }, - { - "epoch": 3.569088441615536, - "grad_norm": 0.0012944111367687583, - "learning_rate": 0.00019999371660450355, - "loss": 46.0, - "step": 46681 - }, - { - "epoch": 3.5691648985989257, - "grad_norm": 0.0013006249209865928, - "learning_rate": 0.0001999937163352401, - "loss": 46.0, - "step": 46682 - }, - { - "epoch": 3.5692413555823155, - "grad_norm": 0.001816820353269577, - "learning_rate": 0.00019999371606597088, - "loss": 46.0, - "step": 46683 - }, - { - "epoch": 3.5693178125657052, - "grad_norm": 0.0019528622506186366, - "learning_rate": 0.00019999371579669593, - "loss": 46.0, - "step": 46684 - }, - { - "epoch": 3.569394269549095, - "grad_norm": 0.0036003452260047197, - "learning_rate": 0.00019999371552741516, - "loss": 46.0, - "step": 46685 - }, - { - "epoch": 3.5694707265324848, - "grad_norm": 0.0028399359434843063, - "learning_rate": 0.00019999371525812864, - "loss": 46.0, - "step": 46686 - }, - { - "epoch": 3.5695471835158745, - "grad_norm": 0.002477761823683977, - "learning_rate": 0.00019999371498883637, - "loss": 46.0, - "step": 46687 - }, - { - "epoch": 3.5696236404992643, - "grad_norm": 0.010253152810037136, - "learning_rate": 0.0001999937147195383, - "loss": 46.0, - "step": 46688 - }, - { - "epoch": 3.569700097482654, - "grad_norm": 0.0037822010926902294, - "learning_rate": 0.0001999937144502345, - "loss": 46.0, - "step": 46689 - }, - { - "epoch": 3.569776554466044, - "grad_norm": 0.0006942534237168729, - "learning_rate": 0.00019999371418092488, - "loss": 46.0, - "step": 46690 - }, - { - "epoch": 3.569853011449433, - "grad_norm": 0.0029970204923301935, - "learning_rate": 0.00019999371391160952, - "loss": 46.0, - "step": 46691 - }, - { - "epoch": 3.569929468432823, - "grad_norm": 0.0022718869149684906, - "learning_rate": 0.0001999937136422884, - "loss": 46.0, - "step": 46692 - }, - { - "epoch": 3.5700059254162126, - "grad_norm": 0.0026808385737240314, - "learning_rate": 0.0001999937133729615, - "loss": 46.0, - "step": 46693 - }, - { - "epoch": 3.5700823823996024, - "grad_norm": 0.004271664656698704, - "learning_rate": 0.00019999371310362883, - "loss": 46.0, - "step": 46694 - }, - { - "epoch": 3.570158839382992, - "grad_norm": 0.0015238302294164896, - "learning_rate": 0.0001999937128342904, - "loss": 46.0, - "step": 46695 - }, - { - "epoch": 3.570235296366382, - "grad_norm": 0.0011019359808415174, - "learning_rate": 0.00019999371256494618, - "loss": 46.0, - "step": 46696 - }, - { - "epoch": 3.5703117533497717, - "grad_norm": 0.0017429268918931484, - "learning_rate": 0.0001999937122955962, - "loss": 46.0, - "step": 46697 - }, - { - "epoch": 3.5703882103331614, - "grad_norm": 0.008877106010913849, - "learning_rate": 0.00019999371202624047, - "loss": 46.0, - "step": 46698 - }, - { - "epoch": 3.5704646673165508, - "grad_norm": 0.0010921615175902843, - "learning_rate": 0.00019999371175687895, - "loss": 46.0, - "step": 46699 - }, - { - "epoch": 3.5705411242999405, - "grad_norm": 0.0013169948942959309, - "learning_rate": 0.00019999371148751168, - "loss": 46.0, - "step": 46700 - }, - { - "epoch": 3.5706175812833303, - "grad_norm": 0.005135527346283197, - "learning_rate": 0.00019999371121813862, - "loss": 46.0, - "step": 46701 - }, - { - "epoch": 3.57069403826672, - "grad_norm": 0.003486905712634325, - "learning_rate": 0.00019999371094875978, - "loss": 46.0, - "step": 46702 - }, - { - "epoch": 3.57077049525011, - "grad_norm": 0.0026861154474318027, - "learning_rate": 0.0001999937106793752, - "loss": 46.0, - "step": 46703 - }, - { - "epoch": 3.5708469522334996, - "grad_norm": 0.0018732862081378698, - "learning_rate": 0.00019999371040998483, - "loss": 46.0, - "step": 46704 - }, - { - "epoch": 3.5709234092168893, - "grad_norm": 0.003529096022248268, - "learning_rate": 0.0001999937101405887, - "loss": 46.0, - "step": 46705 - }, - { - "epoch": 3.570999866200279, - "grad_norm": 0.0014549318002536893, - "learning_rate": 0.00019999370987118682, - "loss": 46.0, - "step": 46706 - }, - { - "epoch": 3.571076323183669, - "grad_norm": 0.001802139449864626, - "learning_rate": 0.00019999370960177915, - "loss": 46.0, - "step": 46707 - }, - { - "epoch": 3.5711527801670586, - "grad_norm": 0.0028477897867560387, - "learning_rate": 0.0001999937093323657, - "loss": 46.0, - "step": 46708 - }, - { - "epoch": 3.5712292371504484, - "grad_norm": 0.0017701752949506044, - "learning_rate": 0.0001999937090629465, - "loss": 46.0, - "step": 46709 - }, - { - "epoch": 3.571305694133838, - "grad_norm": 0.0031636760104447603, - "learning_rate": 0.00019999370879352152, - "loss": 46.0, - "step": 46710 - }, - { - "epoch": 3.571382151117228, - "grad_norm": 0.0011496504303067923, - "learning_rate": 0.00019999370852409078, - "loss": 46.0, - "step": 46711 - }, - { - "epoch": 3.5714586081006177, - "grad_norm": 0.0033891985658556223, - "learning_rate": 0.00019999370825465426, - "loss": 46.0, - "step": 46712 - }, - { - "epoch": 3.571535065084007, - "grad_norm": 0.006602306384593248, - "learning_rate": 0.00019999370798521197, - "loss": 46.0, - "step": 46713 - }, - { - "epoch": 3.5716115220673967, - "grad_norm": 0.005738503765314817, - "learning_rate": 0.00019999370771576396, - "loss": 46.0, - "step": 46714 - }, - { - "epoch": 3.5716879790507865, - "grad_norm": 0.002612931188195944, - "learning_rate": 0.00019999370744631012, - "loss": 46.0, - "step": 46715 - }, - { - "epoch": 3.5717644360341763, - "grad_norm": 0.00203883508220315, - "learning_rate": 0.00019999370717685053, - "loss": 46.0, - "step": 46716 - }, - { - "epoch": 3.571840893017566, - "grad_norm": 0.0027113761752843857, - "learning_rate": 0.00019999370690738518, - "loss": 46.0, - "step": 46717 - }, - { - "epoch": 3.571917350000956, - "grad_norm": 0.0010218023089691997, - "learning_rate": 0.00019999370663791407, - "loss": 46.0, - "step": 46718 - }, - { - "epoch": 3.5719938069843455, - "grad_norm": 0.0025397012941539288, - "learning_rate": 0.00019999370636843714, - "loss": 46.0, - "step": 46719 - }, - { - "epoch": 3.5720702639677353, - "grad_norm": 0.0016761726001277566, - "learning_rate": 0.00019999370609895446, - "loss": 46.0, - "step": 46720 - }, - { - "epoch": 3.5721467209511246, - "grad_norm": 0.0021245433017611504, - "learning_rate": 0.00019999370582946604, - "loss": 46.0, - "step": 46721 - }, - { - "epoch": 3.5722231779345144, - "grad_norm": 0.001341967610642314, - "learning_rate": 0.00019999370555997184, - "loss": 46.0, - "step": 46722 - }, - { - "epoch": 3.572299634917904, - "grad_norm": 0.0029063820838928223, - "learning_rate": 0.00019999370529047187, - "loss": 46.0, - "step": 46723 - }, - { - "epoch": 3.572376091901294, - "grad_norm": 0.0010640128748491406, - "learning_rate": 0.00019999370502096612, - "loss": 46.0, - "step": 46724 - }, - { - "epoch": 3.5724525488846837, - "grad_norm": 0.0018383142305538058, - "learning_rate": 0.00019999370475145463, - "loss": 46.0, - "step": 46725 - }, - { - "epoch": 3.5725290058680734, - "grad_norm": 0.003777771256864071, - "learning_rate": 0.00019999370448193734, - "loss": 46.0, - "step": 46726 - }, - { - "epoch": 3.572605462851463, - "grad_norm": 0.0013168500736355782, - "learning_rate": 0.0001999937042124143, - "loss": 46.0, - "step": 46727 - }, - { - "epoch": 3.572681919834853, - "grad_norm": 0.0019330760696902871, - "learning_rate": 0.00019999370394288546, - "loss": 46.0, - "step": 46728 - }, - { - "epoch": 3.5727583768182427, - "grad_norm": 0.0021763385739177465, - "learning_rate": 0.00019999370367335088, - "loss": 46.0, - "step": 46729 - }, - { - "epoch": 3.5728348338016325, - "grad_norm": 0.0011574061354622245, - "learning_rate": 0.00019999370340381055, - "loss": 46.0, - "step": 46730 - }, - { - "epoch": 3.5729112907850222, - "grad_norm": 0.0010531785665079951, - "learning_rate": 0.0001999937031342644, - "loss": 46.0, - "step": 46731 - }, - { - "epoch": 3.572987747768412, - "grad_norm": 0.003681953065097332, - "learning_rate": 0.0001999937028647125, - "loss": 46.0, - "step": 46732 - }, - { - "epoch": 3.5730642047518018, - "grad_norm": 0.00215995404869318, - "learning_rate": 0.00019999370259515483, - "loss": 46.0, - "step": 46733 - }, - { - "epoch": 3.5731406617351915, - "grad_norm": 0.0006553211132995784, - "learning_rate": 0.00019999370232559144, - "loss": 46.0, - "step": 46734 - }, - { - "epoch": 3.573217118718581, - "grad_norm": 0.004397883545607328, - "learning_rate": 0.0001999937020560222, - "loss": 46.0, - "step": 46735 - }, - { - "epoch": 3.5732935757019706, - "grad_norm": 0.0017515133367851377, - "learning_rate": 0.00019999370178644724, - "loss": 46.0, - "step": 46736 - }, - { - "epoch": 3.5733700326853604, - "grad_norm": 0.001964492490515113, - "learning_rate": 0.0001999937015168665, - "loss": 46.0, - "step": 46737 - }, - { - "epoch": 3.57344648966875, - "grad_norm": 0.003423060057684779, - "learning_rate": 0.00019999370124727998, - "loss": 46.0, - "step": 46738 - }, - { - "epoch": 3.57352294665214, - "grad_norm": 0.008906511589884758, - "learning_rate": 0.00019999370097768771, - "loss": 46.0, - "step": 46739 - }, - { - "epoch": 3.5735994036355296, - "grad_norm": 0.0013465622905641794, - "learning_rate": 0.00019999370070808965, - "loss": 46.0, - "step": 46740 - }, - { - "epoch": 3.5736758606189194, - "grad_norm": 0.007654804270714521, - "learning_rate": 0.00019999370043848584, - "loss": 46.0, - "step": 46741 - }, - { - "epoch": 3.573752317602309, - "grad_norm": 0.001357228378765285, - "learning_rate": 0.00019999370016887625, - "loss": 46.0, - "step": 46742 - }, - { - "epoch": 3.5738287745856985, - "grad_norm": 0.0018311721505597234, - "learning_rate": 0.00019999369989926092, - "loss": 46.0, - "step": 46743 - }, - { - "epoch": 3.5739052315690882, - "grad_norm": 0.004204512108117342, - "learning_rate": 0.0001999936996296398, - "loss": 46.0, - "step": 46744 - }, - { - "epoch": 3.573981688552478, - "grad_norm": 0.004237952642142773, - "learning_rate": 0.0001999936993600129, - "loss": 46.0, - "step": 46745 - }, - { - "epoch": 3.5740581455358678, - "grad_norm": 0.0031867539510130882, - "learning_rate": 0.0001999936990903802, - "loss": 46.0, - "step": 46746 - }, - { - "epoch": 3.5741346025192575, - "grad_norm": 0.0018509686924517155, - "learning_rate": 0.0001999936988207418, - "loss": 46.0, - "step": 46747 - }, - { - "epoch": 3.5742110595026473, - "grad_norm": 0.0027046131435781717, - "learning_rate": 0.0001999936985510976, - "loss": 46.0, - "step": 46748 - }, - { - "epoch": 3.574287516486037, - "grad_norm": 0.0024194049183279276, - "learning_rate": 0.00019999369828144762, - "loss": 46.0, - "step": 46749 - }, - { - "epoch": 3.574363973469427, - "grad_norm": 0.0024963058531284332, - "learning_rate": 0.00019999369801179187, - "loss": 46.0, - "step": 46750 - }, - { - "epoch": 3.5744404304528166, - "grad_norm": 0.0014862155076116323, - "learning_rate": 0.00019999369774213038, - "loss": 46.0, - "step": 46751 - }, - { - "epoch": 3.5745168874362063, - "grad_norm": 0.0010933363810181618, - "learning_rate": 0.0001999936974724631, - "loss": 46.0, - "step": 46752 - }, - { - "epoch": 3.574593344419596, - "grad_norm": 0.002765242476016283, - "learning_rate": 0.00019999369720279006, - "loss": 46.0, - "step": 46753 - }, - { - "epoch": 3.574669801402986, - "grad_norm": 0.0025579046923667192, - "learning_rate": 0.00019999369693311122, - "loss": 46.0, - "step": 46754 - }, - { - "epoch": 3.5747462583863756, - "grad_norm": 0.004957238677889109, - "learning_rate": 0.00019999369666342666, - "loss": 46.0, - "step": 46755 - }, - { - "epoch": 3.574822715369765, - "grad_norm": 0.00213652104139328, - "learning_rate": 0.0001999936963937363, - "loss": 46.0, - "step": 46756 - }, - { - "epoch": 3.5748991723531547, - "grad_norm": 0.005884305573999882, - "learning_rate": 0.0001999936961240402, - "loss": 46.0, - "step": 46757 - }, - { - "epoch": 3.5749756293365444, - "grad_norm": 0.0015873105730861425, - "learning_rate": 0.0001999936958543383, - "loss": 46.0, - "step": 46758 - }, - { - "epoch": 3.575052086319934, - "grad_norm": 0.0023645416367799044, - "learning_rate": 0.00019999369558463065, - "loss": 46.0, - "step": 46759 - }, - { - "epoch": 3.575128543303324, - "grad_norm": 0.0020588866900652647, - "learning_rate": 0.0001999936953149172, - "loss": 46.0, - "step": 46760 - }, - { - "epoch": 3.5752050002867137, - "grad_norm": 0.0026461975648999214, - "learning_rate": 0.000199993695045198, - "loss": 46.0, - "step": 46761 - }, - { - "epoch": 3.5752814572701035, - "grad_norm": 0.0023272226098924875, - "learning_rate": 0.00019999369477547303, - "loss": 46.0, - "step": 46762 - }, - { - "epoch": 3.5753579142534933, - "grad_norm": 0.0020812973380088806, - "learning_rate": 0.0001999936945057423, - "loss": 46.0, - "step": 46763 - }, - { - "epoch": 3.575434371236883, - "grad_norm": 0.0014014183543622494, - "learning_rate": 0.0001999936942360058, - "loss": 46.0, - "step": 46764 - }, - { - "epoch": 3.5755108282202723, - "grad_norm": 0.0008459005039185286, - "learning_rate": 0.00019999369396626353, - "loss": 46.0, - "step": 46765 - }, - { - "epoch": 3.575587285203662, - "grad_norm": 0.0014770374400541186, - "learning_rate": 0.0001999936936965155, - "loss": 46.0, - "step": 46766 - }, - { - "epoch": 3.575663742187052, - "grad_norm": 0.005236379336565733, - "learning_rate": 0.00019999369342676166, - "loss": 46.0, - "step": 46767 - }, - { - "epoch": 3.5757401991704416, - "grad_norm": 0.0006408869521692395, - "learning_rate": 0.00019999369315700207, - "loss": 46.0, - "step": 46768 - }, - { - "epoch": 3.5758166561538314, - "grad_norm": 0.006680544465780258, - "learning_rate": 0.00019999369288723674, - "loss": 46.0, - "step": 46769 - }, - { - "epoch": 3.575893113137221, - "grad_norm": 0.00097721500787884, - "learning_rate": 0.0001999936926174656, - "loss": 46.0, - "step": 46770 - }, - { - "epoch": 3.575969570120611, - "grad_norm": 0.0018070901278406382, - "learning_rate": 0.0001999936923476887, - "loss": 46.0, - "step": 46771 - }, - { - "epoch": 3.5760460271040007, - "grad_norm": 0.002206847770139575, - "learning_rate": 0.00019999369207790606, - "loss": 46.0, - "step": 46772 - }, - { - "epoch": 3.5761224840873904, - "grad_norm": 0.001448968192562461, - "learning_rate": 0.00019999369180811764, - "loss": 46.0, - "step": 46773 - }, - { - "epoch": 3.57619894107078, - "grad_norm": 0.0015066650230437517, - "learning_rate": 0.00019999369153832347, - "loss": 46.0, - "step": 46774 - }, - { - "epoch": 3.57627539805417, - "grad_norm": 0.00829948578029871, - "learning_rate": 0.00019999369126852347, - "loss": 46.0, - "step": 46775 - }, - { - "epoch": 3.5763518550375597, - "grad_norm": 0.002177748829126358, - "learning_rate": 0.00019999369099871775, - "loss": 46.0, - "step": 46776 - }, - { - "epoch": 3.5764283120209495, - "grad_norm": 0.0016441484913229942, - "learning_rate": 0.00019999369072890624, - "loss": 46.0, - "step": 46777 - }, - { - "epoch": 3.576504769004339, - "grad_norm": 0.002894565463066101, - "learning_rate": 0.00019999369045908897, - "loss": 46.0, - "step": 46778 - }, - { - "epoch": 3.5765812259877285, - "grad_norm": 0.001928357989527285, - "learning_rate": 0.00019999369018926594, - "loss": 46.0, - "step": 46779 - }, - { - "epoch": 3.5766576829711183, - "grad_norm": 0.0039041186682879925, - "learning_rate": 0.0001999936899194371, - "loss": 46.0, - "step": 46780 - }, - { - "epoch": 3.576734139954508, - "grad_norm": 0.0013528987765312195, - "learning_rate": 0.00019999368964960254, - "loss": 46.0, - "step": 46781 - }, - { - "epoch": 3.576810596937898, - "grad_norm": 0.008186491206288338, - "learning_rate": 0.00019999368937976216, - "loss": 46.0, - "step": 46782 - }, - { - "epoch": 3.5768870539212876, - "grad_norm": 0.004805978387594223, - "learning_rate": 0.00019999368910991608, - "loss": 46.0, - "step": 46783 - }, - { - "epoch": 3.5769635109046773, - "grad_norm": 0.004620899446308613, - "learning_rate": 0.00019999368884006418, - "loss": 46.0, - "step": 46784 - }, - { - "epoch": 3.577039967888067, - "grad_norm": 0.004319780506193638, - "learning_rate": 0.00019999368857020653, - "loss": 46.0, - "step": 46785 - }, - { - "epoch": 3.5771164248714564, - "grad_norm": 0.0009886305779218674, - "learning_rate": 0.00019999368830034308, - "loss": 46.0, - "step": 46786 - }, - { - "epoch": 3.577192881854846, - "grad_norm": 0.0007514212047681212, - "learning_rate": 0.00019999368803047388, - "loss": 46.0, - "step": 46787 - }, - { - "epoch": 3.577269338838236, - "grad_norm": 0.0017487631412222981, - "learning_rate": 0.00019999368776059891, - "loss": 46.0, - "step": 46788 - }, - { - "epoch": 3.5773457958216257, - "grad_norm": 0.0014503885759040713, - "learning_rate": 0.0001999936874907182, - "loss": 46.0, - "step": 46789 - }, - { - "epoch": 3.5774222528050155, - "grad_norm": 0.002369289519265294, - "learning_rate": 0.0001999936872208317, - "loss": 46.0, - "step": 46790 - }, - { - "epoch": 3.5774987097884052, - "grad_norm": 0.0029159081168472767, - "learning_rate": 0.00019999368695093942, - "loss": 46.0, - "step": 46791 - }, - { - "epoch": 3.577575166771795, - "grad_norm": 0.0027819473762065172, - "learning_rate": 0.00019999368668104138, - "loss": 46.0, - "step": 46792 - }, - { - "epoch": 3.5776516237551848, - "grad_norm": 0.0031795527320355177, - "learning_rate": 0.00019999368641113757, - "loss": 46.0, - "step": 46793 - }, - { - "epoch": 3.5777280807385745, - "grad_norm": 0.001487210625782609, - "learning_rate": 0.00019999368614122802, - "loss": 46.0, - "step": 46794 - }, - { - "epoch": 3.5778045377219643, - "grad_norm": 0.0009151780395768583, - "learning_rate": 0.00019999368587131266, - "loss": 46.0, - "step": 46795 - }, - { - "epoch": 3.577880994705354, - "grad_norm": 0.0014048231532797217, - "learning_rate": 0.00019999368560139153, - "loss": 46.0, - "step": 46796 - }, - { - "epoch": 3.577957451688744, - "grad_norm": 0.0009513715631328523, - "learning_rate": 0.00019999368533146465, - "loss": 46.0, - "step": 46797 - }, - { - "epoch": 3.5780339086721336, - "grad_norm": 0.0016650855541229248, - "learning_rate": 0.000199993685061532, - "loss": 46.0, - "step": 46798 - }, - { - "epoch": 3.5781103656555233, - "grad_norm": 0.0034799412824213505, - "learning_rate": 0.00019999368479159358, - "loss": 46.0, - "step": 46799 - }, - { - "epoch": 3.5781868226389126, - "grad_norm": 0.001487893401645124, - "learning_rate": 0.00019999368452164936, - "loss": 46.0, - "step": 46800 - }, - { - "epoch": 3.5782632796223024, - "grad_norm": 0.0018299927469342947, - "learning_rate": 0.00019999368425169942, - "loss": 46.0, - "step": 46801 - }, - { - "epoch": 3.578339736605692, - "grad_norm": 0.0008533790824003518, - "learning_rate": 0.0001999936839817437, - "loss": 46.0, - "step": 46802 - }, - { - "epoch": 3.578416193589082, - "grad_norm": 0.0035015440080314875, - "learning_rate": 0.00019999368371178218, - "loss": 46.0, - "step": 46803 - }, - { - "epoch": 3.5784926505724717, - "grad_norm": 0.0019301389111205935, - "learning_rate": 0.00019999368344181492, - "loss": 46.0, - "step": 46804 - }, - { - "epoch": 3.5785691075558614, - "grad_norm": 0.0011410403531044722, - "learning_rate": 0.00019999368317184189, - "loss": 46.0, - "step": 46805 - }, - { - "epoch": 3.578645564539251, - "grad_norm": 0.002320168074220419, - "learning_rate": 0.00019999368290186308, - "loss": 46.0, - "step": 46806 - }, - { - "epoch": 3.578722021522641, - "grad_norm": 0.004043637774884701, - "learning_rate": 0.0001999936826318785, - "loss": 46.0, - "step": 46807 - }, - { - "epoch": 3.5787984785060303, - "grad_norm": 0.004387584514915943, - "learning_rate": 0.00019999368236188817, - "loss": 46.0, - "step": 46808 - }, - { - "epoch": 3.57887493548942, - "grad_norm": 0.0011635865084826946, - "learning_rate": 0.00019999368209189204, - "loss": 46.0, - "step": 46809 - }, - { - "epoch": 3.57895139247281, - "grad_norm": 0.004327169619500637, - "learning_rate": 0.00019999368182189016, - "loss": 46.0, - "step": 46810 - }, - { - "epoch": 3.5790278494561996, - "grad_norm": 0.003773456672206521, - "learning_rate": 0.00019999368155188248, - "loss": 46.0, - "step": 46811 - }, - { - "epoch": 3.5791043064395893, - "grad_norm": 0.0008915658690966666, - "learning_rate": 0.0001999936812818691, - "loss": 46.0, - "step": 46812 - }, - { - "epoch": 3.579180763422979, - "grad_norm": 0.0006731427274644375, - "learning_rate": 0.0001999936810118499, - "loss": 46.0, - "step": 46813 - }, - { - "epoch": 3.579257220406369, - "grad_norm": 0.0016147417481988668, - "learning_rate": 0.00019999368074182492, - "loss": 46.0, - "step": 46814 - }, - { - "epoch": 3.5793336773897586, - "grad_norm": 0.0017773015424609184, - "learning_rate": 0.0001999936804717942, - "loss": 46.0, - "step": 46815 - }, - { - "epoch": 3.5794101343731484, - "grad_norm": 0.002532925456762314, - "learning_rate": 0.0001999936802017577, - "loss": 46.0, - "step": 46816 - }, - { - "epoch": 3.579486591356538, - "grad_norm": 0.0016357743879780173, - "learning_rate": 0.00019999367993171543, - "loss": 46.0, - "step": 46817 - }, - { - "epoch": 3.579563048339928, - "grad_norm": 0.0014051521429792047, - "learning_rate": 0.0001999936796616674, - "loss": 46.0, - "step": 46818 - }, - { - "epoch": 3.5796395053233176, - "grad_norm": 0.0008412550669163465, - "learning_rate": 0.00019999367939161359, - "loss": 46.0, - "step": 46819 - }, - { - "epoch": 3.5797159623067074, - "grad_norm": 0.0011157477274537086, - "learning_rate": 0.00019999367912155403, - "loss": 46.0, - "step": 46820 - }, - { - "epoch": 3.579792419290097, - "grad_norm": 0.0018081150483340025, - "learning_rate": 0.00019999367885148867, - "loss": 46.0, - "step": 46821 - }, - { - "epoch": 3.5798688762734865, - "grad_norm": 0.00424313684925437, - "learning_rate": 0.00019999367858141755, - "loss": 46.0, - "step": 46822 - }, - { - "epoch": 3.5799453332568762, - "grad_norm": 0.004288270603865385, - "learning_rate": 0.00019999367831134067, - "loss": 46.0, - "step": 46823 - }, - { - "epoch": 3.580021790240266, - "grad_norm": 0.0027884249575436115, - "learning_rate": 0.00019999367804125805, - "loss": 46.0, - "step": 46824 - }, - { - "epoch": 3.5800982472236558, - "grad_norm": 0.006068435963243246, - "learning_rate": 0.0001999936777711696, - "loss": 46.0, - "step": 46825 - }, - { - "epoch": 3.5801747042070455, - "grad_norm": 0.003302141558378935, - "learning_rate": 0.00019999367750107543, - "loss": 46.0, - "step": 46826 - }, - { - "epoch": 3.5802511611904353, - "grad_norm": 0.003297490766271949, - "learning_rate": 0.00019999367723097546, - "loss": 46.0, - "step": 46827 - }, - { - "epoch": 3.580327618173825, - "grad_norm": 0.003877032781019807, - "learning_rate": 0.00019999367696086972, - "loss": 46.0, - "step": 46828 - }, - { - "epoch": 3.580404075157215, - "grad_norm": 0.0012347853044047952, - "learning_rate": 0.00019999367669075823, - "loss": 46.0, - "step": 46829 - }, - { - "epoch": 3.580480532140604, - "grad_norm": 0.0023316224105656147, - "learning_rate": 0.00019999367642064097, - "loss": 46.0, - "step": 46830 - }, - { - "epoch": 3.580556989123994, - "grad_norm": 0.002378355246037245, - "learning_rate": 0.00019999367615051793, - "loss": 46.0, - "step": 46831 - }, - { - "epoch": 3.5806334461073837, - "grad_norm": 0.0013475813902914524, - "learning_rate": 0.00019999367588038913, - "loss": 46.0, - "step": 46832 - }, - { - "epoch": 3.5807099030907734, - "grad_norm": 0.004162842873483896, - "learning_rate": 0.00019999367561025454, - "loss": 46.0, - "step": 46833 - }, - { - "epoch": 3.580786360074163, - "grad_norm": 0.002723915968090296, - "learning_rate": 0.00019999367534011422, - "loss": 46.0, - "step": 46834 - }, - { - "epoch": 3.580862817057553, - "grad_norm": 0.005109093151986599, - "learning_rate": 0.00019999367506996811, - "loss": 46.0, - "step": 46835 - }, - { - "epoch": 3.5809392740409427, - "grad_norm": 0.0019570556469261646, - "learning_rate": 0.00019999367479981624, - "loss": 46.0, - "step": 46836 - }, - { - "epoch": 3.5810157310243325, - "grad_norm": 0.011387544684112072, - "learning_rate": 0.0001999936745296586, - "loss": 46.0, - "step": 46837 - }, - { - "epoch": 3.581092188007722, - "grad_norm": 0.002502802526578307, - "learning_rate": 0.00019999367425949517, - "loss": 46.0, - "step": 46838 - }, - { - "epoch": 3.581168644991112, - "grad_norm": 0.0032902935054153204, - "learning_rate": 0.00019999367398932597, - "loss": 46.0, - "step": 46839 - }, - { - "epoch": 3.5812451019745017, - "grad_norm": 0.001428350922651589, - "learning_rate": 0.000199993673719151, - "loss": 46.0, - "step": 46840 - }, - { - "epoch": 3.5813215589578915, - "grad_norm": 0.0008895163773559034, - "learning_rate": 0.0001999936734489703, - "loss": 46.0, - "step": 46841 - }, - { - "epoch": 3.5813980159412813, - "grad_norm": 0.00203204108402133, - "learning_rate": 0.0001999936731787838, - "loss": 46.0, - "step": 46842 - }, - { - "epoch": 3.581474472924671, - "grad_norm": 0.005746546201407909, - "learning_rate": 0.0001999936729085915, - "loss": 46.0, - "step": 46843 - }, - { - "epoch": 3.5815509299080603, - "grad_norm": 0.00214389874599874, - "learning_rate": 0.0001999936726383935, - "loss": 46.0, - "step": 46844 - }, - { - "epoch": 3.58162738689145, - "grad_norm": 0.0022019350435584784, - "learning_rate": 0.00019999367236818972, - "loss": 46.0, - "step": 46845 - }, - { - "epoch": 3.58170384387484, - "grad_norm": 0.0010690923081710935, - "learning_rate": 0.00019999367209798012, - "loss": 46.0, - "step": 46846 - }, - { - "epoch": 3.5817803008582296, - "grad_norm": 0.0031069230753928423, - "learning_rate": 0.00019999367182776481, - "loss": 46.0, - "step": 46847 - }, - { - "epoch": 3.5818567578416194, - "grad_norm": 0.0011770711280405521, - "learning_rate": 0.00019999367155754366, - "loss": 46.0, - "step": 46848 - }, - { - "epoch": 3.581933214825009, - "grad_norm": 0.00435941107571125, - "learning_rate": 0.0001999936712873168, - "loss": 46.0, - "step": 46849 - }, - { - "epoch": 3.582009671808399, - "grad_norm": 0.010944033041596413, - "learning_rate": 0.00019999367101708416, - "loss": 46.0, - "step": 46850 - }, - { - "epoch": 3.5820861287917887, - "grad_norm": 0.002510933205485344, - "learning_rate": 0.00019999367074684574, - "loss": 46.0, - "step": 46851 - }, - { - "epoch": 3.582162585775178, - "grad_norm": 0.002421025186777115, - "learning_rate": 0.00019999367047660155, - "loss": 46.0, - "step": 46852 - }, - { - "epoch": 3.5822390427585677, - "grad_norm": 0.00198395480401814, - "learning_rate": 0.0001999936702063516, - "loss": 46.0, - "step": 46853 - }, - { - "epoch": 3.5823154997419575, - "grad_norm": 0.002047466579824686, - "learning_rate": 0.00019999366993609586, - "loss": 46.0, - "step": 46854 - }, - { - "epoch": 3.5823919567253473, - "grad_norm": 0.001115086954087019, - "learning_rate": 0.0001999936696658344, - "loss": 46.0, - "step": 46855 - }, - { - "epoch": 3.582468413708737, - "grad_norm": 0.004388218745589256, - "learning_rate": 0.00019999366939556711, - "loss": 46.0, - "step": 46856 - }, - { - "epoch": 3.582544870692127, - "grad_norm": 0.00103157723788172, - "learning_rate": 0.00019999366912529408, - "loss": 46.0, - "step": 46857 - }, - { - "epoch": 3.5826213276755166, - "grad_norm": 0.002670151647180319, - "learning_rate": 0.0001999936688550153, - "loss": 46.0, - "step": 46858 - }, - { - "epoch": 3.5826977846589063, - "grad_norm": 0.002329086884856224, - "learning_rate": 0.00019999366858473072, - "loss": 46.0, - "step": 46859 - }, - { - "epoch": 3.582774241642296, - "grad_norm": 0.0017038799123838544, - "learning_rate": 0.00019999366831444036, - "loss": 46.0, - "step": 46860 - }, - { - "epoch": 3.582850698625686, - "grad_norm": 0.0020282629411667585, - "learning_rate": 0.0001999936680441443, - "loss": 46.0, - "step": 46861 - }, - { - "epoch": 3.5829271556090756, - "grad_norm": 0.0016655820654705167, - "learning_rate": 0.00019999366777384241, - "loss": 46.0, - "step": 46862 - }, - { - "epoch": 3.5830036125924654, - "grad_norm": 0.0009943764889612794, - "learning_rate": 0.00019999366750353477, - "loss": 46.0, - "step": 46863 - }, - { - "epoch": 3.583080069575855, - "grad_norm": 0.006375682540237904, - "learning_rate": 0.00019999366723322135, - "loss": 46.0, - "step": 46864 - }, - { - "epoch": 3.583156526559245, - "grad_norm": 0.0015145654324442148, - "learning_rate": 0.00019999366696290215, - "loss": 46.0, - "step": 46865 - }, - { - "epoch": 3.583232983542634, - "grad_norm": 0.0007737563573755324, - "learning_rate": 0.0001999936666925772, - "loss": 46.0, - "step": 46866 - }, - { - "epoch": 3.583309440526024, - "grad_norm": 0.0019043160136789083, - "learning_rate": 0.0001999936664222465, - "loss": 46.0, - "step": 46867 - }, - { - "epoch": 3.5833858975094137, - "grad_norm": 0.002586058573797345, - "learning_rate": 0.00019999366615191, - "loss": 46.0, - "step": 46868 - }, - { - "epoch": 3.5834623544928035, - "grad_norm": 0.003510398557409644, - "learning_rate": 0.00019999366588156772, - "loss": 46.0, - "step": 46869 - }, - { - "epoch": 3.5835388114761932, - "grad_norm": 0.002712306333705783, - "learning_rate": 0.0001999936656112197, - "loss": 46.0, - "step": 46870 - }, - { - "epoch": 3.583615268459583, - "grad_norm": 0.020472927019000053, - "learning_rate": 0.0001999936653408659, - "loss": 46.0, - "step": 46871 - }, - { - "epoch": 3.5836917254429728, - "grad_norm": 0.0013473426224663854, - "learning_rate": 0.00019999366507050635, - "loss": 46.0, - "step": 46872 - }, - { - "epoch": 3.5837681824263625, - "grad_norm": 0.0022178178187459707, - "learning_rate": 0.000199993664800141, - "loss": 46.0, - "step": 46873 - }, - { - "epoch": 3.583844639409752, - "grad_norm": 0.0014744015643373132, - "learning_rate": 0.0001999936645297699, - "loss": 46.0, - "step": 46874 - }, - { - "epoch": 3.5839210963931416, - "grad_norm": 0.0024269232526421547, - "learning_rate": 0.00019999366425939302, - "loss": 46.0, - "step": 46875 - }, - { - "epoch": 3.5839975533765314, - "grad_norm": 0.0010478358017280698, - "learning_rate": 0.00019999366398901038, - "loss": 46.0, - "step": 46876 - }, - { - "epoch": 3.584074010359921, - "grad_norm": 0.005806457716971636, - "learning_rate": 0.00019999366371862195, - "loss": 46.0, - "step": 46877 - }, - { - "epoch": 3.584150467343311, - "grad_norm": 0.001915874076075852, - "learning_rate": 0.0001999936634482278, - "loss": 46.0, - "step": 46878 - }, - { - "epoch": 3.5842269243267006, - "grad_norm": 0.0013806720962747931, - "learning_rate": 0.00019999366317782782, - "loss": 46.0, - "step": 46879 - }, - { - "epoch": 3.5843033813100904, - "grad_norm": 0.0012003335868939757, - "learning_rate": 0.0001999936629074221, - "loss": 46.0, - "step": 46880 - }, - { - "epoch": 3.58437983829348, - "grad_norm": 0.00372195802628994, - "learning_rate": 0.00019999366263701062, - "loss": 46.0, - "step": 46881 - }, - { - "epoch": 3.58445629527687, - "grad_norm": 0.001525621977634728, - "learning_rate": 0.00019999366236659336, - "loss": 46.0, - "step": 46882 - }, - { - "epoch": 3.5845327522602597, - "grad_norm": 0.0013389830710366368, - "learning_rate": 0.00019999366209617035, - "loss": 46.0, - "step": 46883 - }, - { - "epoch": 3.5846092092436495, - "grad_norm": 0.0019984717946499586, - "learning_rate": 0.00019999366182574154, - "loss": 46.0, - "step": 46884 - }, - { - "epoch": 3.584685666227039, - "grad_norm": 0.0016327903140336275, - "learning_rate": 0.000199993661555307, - "loss": 46.0, - "step": 46885 - }, - { - "epoch": 3.584762123210429, - "grad_norm": 0.00117179611697793, - "learning_rate": 0.00019999366128486666, - "loss": 46.0, - "step": 46886 - }, - { - "epoch": 3.5848385801938183, - "grad_norm": 0.0046615214087069035, - "learning_rate": 0.00019999366101442054, - "loss": 46.0, - "step": 46887 - }, - { - "epoch": 3.584915037177208, - "grad_norm": 0.0030638622120022774, - "learning_rate": 0.00019999366074396866, - "loss": 46.0, - "step": 46888 - }, - { - "epoch": 3.584991494160598, - "grad_norm": 0.0027536507695913315, - "learning_rate": 0.00019999366047351101, - "loss": 46.0, - "step": 46889 - }, - { - "epoch": 3.5850679511439876, - "grad_norm": 0.004430568311363459, - "learning_rate": 0.00019999366020304762, - "loss": 46.0, - "step": 46890 - }, - { - "epoch": 3.5851444081273773, - "grad_norm": 0.0024397619999945164, - "learning_rate": 0.00019999365993257843, - "loss": 46.0, - "step": 46891 - }, - { - "epoch": 3.585220865110767, - "grad_norm": 0.001831038505770266, - "learning_rate": 0.0001999936596621035, - "loss": 46.0, - "step": 46892 - }, - { - "epoch": 3.585297322094157, - "grad_norm": 0.0013811071403324604, - "learning_rate": 0.00019999365939162277, - "loss": 46.0, - "step": 46893 - }, - { - "epoch": 3.5853737790775466, - "grad_norm": 0.0010018610628321767, - "learning_rate": 0.00019999365912113629, - "loss": 46.0, - "step": 46894 - }, - { - "epoch": 3.5854502360609364, - "grad_norm": 0.0022364468313753605, - "learning_rate": 0.00019999365885064403, - "loss": 46.0, - "step": 46895 - }, - { - "epoch": 3.5855266930443257, - "grad_norm": 0.0029993706848472357, - "learning_rate": 0.000199993658580146, - "loss": 46.0, - "step": 46896 - }, - { - "epoch": 3.5856031500277155, - "grad_norm": 0.0018752289470285177, - "learning_rate": 0.0001999936583096422, - "loss": 46.0, - "step": 46897 - }, - { - "epoch": 3.585679607011105, - "grad_norm": 0.002106144791468978, - "learning_rate": 0.00019999365803913263, - "loss": 46.0, - "step": 46898 - }, - { - "epoch": 3.585756063994495, - "grad_norm": 0.002015571342781186, - "learning_rate": 0.0001999936577686173, - "loss": 46.0, - "step": 46899 - }, - { - "epoch": 3.5858325209778847, - "grad_norm": 0.002436260925605893, - "learning_rate": 0.0001999936574980962, - "loss": 46.0, - "step": 46900 - }, - { - "epoch": 3.5859089779612745, - "grad_norm": 0.006660154089331627, - "learning_rate": 0.00019999365722756933, - "loss": 46.0, - "step": 46901 - }, - { - "epoch": 3.5859854349446643, - "grad_norm": 0.0015384058933705091, - "learning_rate": 0.00019999365695703668, - "loss": 46.0, - "step": 46902 - }, - { - "epoch": 3.586061891928054, - "grad_norm": 0.001092072343453765, - "learning_rate": 0.0001999936566864983, - "loss": 46.0, - "step": 46903 - }, - { - "epoch": 3.586138348911444, - "grad_norm": 0.003085655625909567, - "learning_rate": 0.00019999365641595412, - "loss": 46.0, - "step": 46904 - }, - { - "epoch": 3.5862148058948335, - "grad_norm": 0.0013352144742384553, - "learning_rate": 0.00019999365614540416, - "loss": 46.0, - "step": 46905 - }, - { - "epoch": 3.5862912628782233, - "grad_norm": 0.005519264377653599, - "learning_rate": 0.00019999365587484844, - "loss": 46.0, - "step": 46906 - }, - { - "epoch": 3.586367719861613, - "grad_norm": 0.0007681608549319208, - "learning_rate": 0.00019999365560428696, - "loss": 46.0, - "step": 46907 - }, - { - "epoch": 3.586444176845003, - "grad_norm": 0.0011039824457839131, - "learning_rate": 0.0001999936553337197, - "loss": 46.0, - "step": 46908 - }, - { - "epoch": 3.586520633828392, - "grad_norm": 0.002910558832809329, - "learning_rate": 0.00019999365506314666, - "loss": 46.0, - "step": 46909 - }, - { - "epoch": 3.586597090811782, - "grad_norm": 0.0012377003440633416, - "learning_rate": 0.00019999365479256789, - "loss": 46.0, - "step": 46910 - }, - { - "epoch": 3.5866735477951717, - "grad_norm": 0.000693152891471982, - "learning_rate": 0.00019999365452198333, - "loss": 46.0, - "step": 46911 - }, - { - "epoch": 3.5867500047785614, - "grad_norm": 0.003015368478372693, - "learning_rate": 0.00019999365425139298, - "loss": 46.0, - "step": 46912 - }, - { - "epoch": 3.586826461761951, - "grad_norm": 0.0010261302813887596, - "learning_rate": 0.0001999936539807969, - "loss": 46.0, - "step": 46913 - }, - { - "epoch": 3.586902918745341, - "grad_norm": 0.0013995436020195484, - "learning_rate": 0.00019999365371019503, - "loss": 46.0, - "step": 46914 - }, - { - "epoch": 3.5869793757287307, - "grad_norm": 0.00220837933011353, - "learning_rate": 0.00019999365343958739, - "loss": 46.0, - "step": 46915 - }, - { - "epoch": 3.5870558327121205, - "grad_norm": 0.004423636943101883, - "learning_rate": 0.000199993653168974, - "loss": 46.0, - "step": 46916 - }, - { - "epoch": 3.58713228969551, - "grad_norm": 0.0034272857010364532, - "learning_rate": 0.0001999936528983548, - "loss": 46.0, - "step": 46917 - }, - { - "epoch": 3.5872087466788996, - "grad_norm": 0.0012460210127756, - "learning_rate": 0.00019999365262772986, - "loss": 46.0, - "step": 46918 - }, - { - "epoch": 3.5872852036622893, - "grad_norm": 0.002459676470607519, - "learning_rate": 0.00019999365235709915, - "loss": 46.0, - "step": 46919 - }, - { - "epoch": 3.587361660645679, - "grad_norm": 0.0017811853904277086, - "learning_rate": 0.00019999365208646266, - "loss": 46.0, - "step": 46920 - }, - { - "epoch": 3.587438117629069, - "grad_norm": 0.001715226098895073, - "learning_rate": 0.00019999365181582043, - "loss": 46.0, - "step": 46921 - }, - { - "epoch": 3.5875145746124586, - "grad_norm": 0.0006652458105236292, - "learning_rate": 0.0001999936515451724, - "loss": 46.0, - "step": 46922 - }, - { - "epoch": 3.5875910315958484, - "grad_norm": 0.002508498728275299, - "learning_rate": 0.0001999936512745186, - "loss": 46.0, - "step": 46923 - }, - { - "epoch": 3.587667488579238, - "grad_norm": 0.0034038997255265713, - "learning_rate": 0.00019999365100385906, - "loss": 46.0, - "step": 46924 - }, - { - "epoch": 3.587743945562628, - "grad_norm": 0.002733106492087245, - "learning_rate": 0.00019999365073319368, - "loss": 46.0, - "step": 46925 - }, - { - "epoch": 3.5878204025460176, - "grad_norm": 0.0014901467366144061, - "learning_rate": 0.0001999936504625226, - "loss": 46.0, - "step": 46926 - }, - { - "epoch": 3.5878968595294074, - "grad_norm": 0.004385835956782103, - "learning_rate": 0.00019999365019184574, - "loss": 46.0, - "step": 46927 - }, - { - "epoch": 3.587973316512797, - "grad_norm": 0.0018085431074723601, - "learning_rate": 0.00019999364992116312, - "loss": 46.0, - "step": 46928 - }, - { - "epoch": 3.588049773496187, - "grad_norm": 0.0017175941029563546, - "learning_rate": 0.0001999936496504747, - "loss": 46.0, - "step": 46929 - }, - { - "epoch": 3.5881262304795767, - "grad_norm": 0.0020792155992239714, - "learning_rate": 0.00019999364937978053, - "loss": 46.0, - "step": 46930 - }, - { - "epoch": 3.588202687462966, - "grad_norm": 0.0025319543201476336, - "learning_rate": 0.0001999936491090806, - "loss": 46.0, - "step": 46931 - }, - { - "epoch": 3.5882791444463558, - "grad_norm": 0.00318585941568017, - "learning_rate": 0.00019999364883837485, - "loss": 46.0, - "step": 46932 - }, - { - "epoch": 3.5883556014297455, - "grad_norm": 0.0013812975957989693, - "learning_rate": 0.0001999936485676634, - "loss": 46.0, - "step": 46933 - }, - { - "epoch": 3.5884320584131353, - "grad_norm": 0.0023615281097590923, - "learning_rate": 0.00019999364829694614, - "loss": 46.0, - "step": 46934 - }, - { - "epoch": 3.588508515396525, - "grad_norm": 0.0029017331544309855, - "learning_rate": 0.00019999364802622313, - "loss": 46.0, - "step": 46935 - }, - { - "epoch": 3.588584972379915, - "grad_norm": 0.0014864859404042363, - "learning_rate": 0.00019999364775549432, - "loss": 46.0, - "step": 46936 - }, - { - "epoch": 3.5886614293633046, - "grad_norm": 0.0017683276673778892, - "learning_rate": 0.00019999364748475977, - "loss": 46.0, - "step": 46937 - }, - { - "epoch": 3.5887378863466943, - "grad_norm": 0.0008812231244519353, - "learning_rate": 0.00019999364721401945, - "loss": 46.0, - "step": 46938 - }, - { - "epoch": 3.5888143433300836, - "grad_norm": 0.0031536284368485212, - "learning_rate": 0.00019999364694327335, - "loss": 46.0, - "step": 46939 - }, - { - "epoch": 3.5888908003134734, - "grad_norm": 0.0031987479887902737, - "learning_rate": 0.0001999936466725215, - "loss": 46.0, - "step": 46940 - }, - { - "epoch": 3.588967257296863, - "grad_norm": 0.0019306178437545896, - "learning_rate": 0.00019999364640176386, - "loss": 46.0, - "step": 46941 - }, - { - "epoch": 3.589043714280253, - "grad_norm": 0.00660546263679862, - "learning_rate": 0.00019999364613100044, - "loss": 46.0, - "step": 46942 - }, - { - "epoch": 3.5891201712636427, - "grad_norm": 0.000853507372085005, - "learning_rate": 0.00019999364586023127, - "loss": 46.0, - "step": 46943 - }, - { - "epoch": 3.5891966282470325, - "grad_norm": 0.0016048774123191833, - "learning_rate": 0.00019999364558945636, - "loss": 46.0, - "step": 46944 - }, - { - "epoch": 3.589273085230422, - "grad_norm": 0.0017520948313176632, - "learning_rate": 0.00019999364531867562, - "loss": 46.0, - "step": 46945 - }, - { - "epoch": 3.589349542213812, - "grad_norm": 0.0012636297615244985, - "learning_rate": 0.00019999364504788916, - "loss": 46.0, - "step": 46946 - }, - { - "epoch": 3.5894259991972017, - "grad_norm": 0.003234325908124447, - "learning_rate": 0.00019999364477709688, - "loss": 46.0, - "step": 46947 - }, - { - "epoch": 3.5895024561805915, - "grad_norm": 0.0017377709737047553, - "learning_rate": 0.00019999364450629887, - "loss": 46.0, - "step": 46948 - }, - { - "epoch": 3.5895789131639813, - "grad_norm": 0.0017416693735867739, - "learning_rate": 0.0001999936442354951, - "loss": 46.0, - "step": 46949 - }, - { - "epoch": 3.589655370147371, - "grad_norm": 0.0010475734015926719, - "learning_rate": 0.00019999364396468554, - "loss": 46.0, - "step": 46950 - }, - { - "epoch": 3.5897318271307608, - "grad_norm": 0.002614538185298443, - "learning_rate": 0.0001999936436938702, - "loss": 46.0, - "step": 46951 - }, - { - "epoch": 3.5898082841141505, - "grad_norm": 0.0032782768830657005, - "learning_rate": 0.00019999364342304911, - "loss": 46.0, - "step": 46952 - }, - { - "epoch": 3.58988474109754, - "grad_norm": 0.0019131769658997655, - "learning_rate": 0.00019999364315222224, - "loss": 46.0, - "step": 46953 - }, - { - "epoch": 3.5899611980809296, - "grad_norm": 0.0026985181029886007, - "learning_rate": 0.00019999364288138962, - "loss": 46.0, - "step": 46954 - }, - { - "epoch": 3.5900376550643194, - "grad_norm": 0.002289633033797145, - "learning_rate": 0.00019999364261055123, - "loss": 46.0, - "step": 46955 - }, - { - "epoch": 3.590114112047709, - "grad_norm": 0.001089455559849739, - "learning_rate": 0.00019999364233970704, - "loss": 46.0, - "step": 46956 - }, - { - "epoch": 3.590190569031099, - "grad_norm": 0.002395318355411291, - "learning_rate": 0.0001999936420688571, - "loss": 46.0, - "step": 46957 - }, - { - "epoch": 3.5902670260144887, - "grad_norm": 0.002379494486376643, - "learning_rate": 0.0001999936417980014, - "loss": 46.0, - "step": 46958 - }, - { - "epoch": 3.5903434829978784, - "grad_norm": 0.0036207493394613266, - "learning_rate": 0.0001999936415271399, - "loss": 46.0, - "step": 46959 - }, - { - "epoch": 3.590419939981268, - "grad_norm": 0.002129668602719903, - "learning_rate": 0.00019999364125627267, - "loss": 46.0, - "step": 46960 - }, - { - "epoch": 3.5904963969646575, - "grad_norm": 0.0025731255300343037, - "learning_rate": 0.00019999364098539967, - "loss": 46.0, - "step": 46961 - }, - { - "epoch": 3.5905728539480473, - "grad_norm": 0.002844603732228279, - "learning_rate": 0.00019999364071452086, - "loss": 46.0, - "step": 46962 - }, - { - "epoch": 3.590649310931437, - "grad_norm": 0.006743055302649736, - "learning_rate": 0.0001999936404436363, - "loss": 46.0, - "step": 46963 - }, - { - "epoch": 3.590725767914827, - "grad_norm": 0.0013882482890039682, - "learning_rate": 0.00019999364017274598, - "loss": 46.0, - "step": 46964 - }, - { - "epoch": 3.5908022248982165, - "grad_norm": 0.0032037049531936646, - "learning_rate": 0.0001999936399018499, - "loss": 46.0, - "step": 46965 - }, - { - "epoch": 3.5908786818816063, - "grad_norm": 0.003279771190136671, - "learning_rate": 0.00019999363963094804, - "loss": 46.0, - "step": 46966 - }, - { - "epoch": 3.590955138864996, - "grad_norm": 0.002363900886848569, - "learning_rate": 0.0001999936393600404, - "loss": 46.0, - "step": 46967 - }, - { - "epoch": 3.591031595848386, - "grad_norm": 0.005510049872100353, - "learning_rate": 0.000199993639089127, - "loss": 46.0, - "step": 46968 - }, - { - "epoch": 3.5911080528317756, - "grad_norm": 0.0017380713252350688, - "learning_rate": 0.00019999363881820784, - "loss": 46.0, - "step": 46969 - }, - { - "epoch": 3.5911845098151653, - "grad_norm": 0.002312297001481056, - "learning_rate": 0.0001999936385472829, - "loss": 46.0, - "step": 46970 - }, - { - "epoch": 3.591260966798555, - "grad_norm": 0.0026192215736955404, - "learning_rate": 0.0001999936382763522, - "loss": 46.0, - "step": 46971 - }, - { - "epoch": 3.591337423781945, - "grad_norm": 0.002573398407548666, - "learning_rate": 0.0001999936380054157, - "loss": 46.0, - "step": 46972 - }, - { - "epoch": 3.5914138807653346, - "grad_norm": 0.0014432446332648396, - "learning_rate": 0.00019999363773447347, - "loss": 46.0, - "step": 46973 - }, - { - "epoch": 3.5914903377487244, - "grad_norm": 0.0011821654625236988, - "learning_rate": 0.00019999363746352547, - "loss": 46.0, - "step": 46974 - }, - { - "epoch": 3.5915667947321137, - "grad_norm": 0.0016567215789109468, - "learning_rate": 0.0001999936371925717, - "loss": 46.0, - "step": 46975 - }, - { - "epoch": 3.5916432517155035, - "grad_norm": 0.0022444501519203186, - "learning_rate": 0.00019999363692161214, - "loss": 46.0, - "step": 46976 - }, - { - "epoch": 3.5917197086988932, - "grad_norm": 0.01023557037115097, - "learning_rate": 0.0001999936366506468, - "loss": 46.0, - "step": 46977 - }, - { - "epoch": 3.591796165682283, - "grad_norm": 0.005189172923564911, - "learning_rate": 0.00019999363637967572, - "loss": 46.0, - "step": 46978 - }, - { - "epoch": 3.5918726226656728, - "grad_norm": 0.00366743141785264, - "learning_rate": 0.00019999363610869884, - "loss": 46.0, - "step": 46979 - }, - { - "epoch": 3.5919490796490625, - "grad_norm": 0.002978543983772397, - "learning_rate": 0.00019999363583771623, - "loss": 46.0, - "step": 46980 - }, - { - "epoch": 3.5920255366324523, - "grad_norm": 0.002727055922150612, - "learning_rate": 0.00019999363556672784, - "loss": 46.0, - "step": 46981 - }, - { - "epoch": 3.592101993615842, - "grad_norm": 0.0022688647732138634, - "learning_rate": 0.00019999363529573367, - "loss": 46.0, - "step": 46982 - }, - { - "epoch": 3.5921784505992314, - "grad_norm": 0.0020660520531237125, - "learning_rate": 0.00019999363502473373, - "loss": 46.0, - "step": 46983 - }, - { - "epoch": 3.592254907582621, - "grad_norm": 0.0010479899356141686, - "learning_rate": 0.00019999363475372802, - "loss": 46.0, - "step": 46984 - }, - { - "epoch": 3.592331364566011, - "grad_norm": 0.001985242823138833, - "learning_rate": 0.00019999363448271656, - "loss": 46.0, - "step": 46985 - }, - { - "epoch": 3.5924078215494006, - "grad_norm": 0.0007140121888369322, - "learning_rate": 0.0001999936342116993, - "loss": 46.0, - "step": 46986 - }, - { - "epoch": 3.5924842785327904, - "grad_norm": 0.0016278420807793736, - "learning_rate": 0.00019999363394067627, - "loss": 46.0, - "step": 46987 - }, - { - "epoch": 3.59256073551618, - "grad_norm": 0.0057882145047187805, - "learning_rate": 0.00019999363366964752, - "loss": 46.0, - "step": 46988 - }, - { - "epoch": 3.59263719249957, - "grad_norm": 0.0009396523819305003, - "learning_rate": 0.00019999363339861295, - "loss": 46.0, - "step": 46989 - }, - { - "epoch": 3.5927136494829597, - "grad_norm": 0.0010874695144593716, - "learning_rate": 0.00019999363312757265, - "loss": 46.0, - "step": 46990 - }, - { - "epoch": 3.5927901064663494, - "grad_norm": 0.002160031348466873, - "learning_rate": 0.00019999363285652655, - "loss": 46.0, - "step": 46991 - }, - { - "epoch": 3.592866563449739, - "grad_norm": 0.002129316097125411, - "learning_rate": 0.00019999363258547468, - "loss": 46.0, - "step": 46992 - }, - { - "epoch": 3.592943020433129, - "grad_norm": 0.004814533982425928, - "learning_rate": 0.00019999363231441706, - "loss": 46.0, - "step": 46993 - }, - { - "epoch": 3.5930194774165187, - "grad_norm": 0.0008700086618773639, - "learning_rate": 0.00019999363204335367, - "loss": 46.0, - "step": 46994 - }, - { - "epoch": 3.5930959343999085, - "grad_norm": 0.0021089348010718822, - "learning_rate": 0.0001999936317722845, - "loss": 46.0, - "step": 46995 - }, - { - "epoch": 3.5931723913832982, - "grad_norm": 0.0011872408213093877, - "learning_rate": 0.00019999363150120957, - "loss": 46.0, - "step": 46996 - }, - { - "epoch": 3.5932488483666876, - "grad_norm": 0.001255916547961533, - "learning_rate": 0.00019999363123012886, - "loss": 46.0, - "step": 46997 - }, - { - "epoch": 3.5933253053500773, - "grad_norm": 0.002526640659198165, - "learning_rate": 0.0001999936309590424, - "loss": 46.0, - "step": 46998 - }, - { - "epoch": 3.593401762333467, - "grad_norm": 0.001285151462070644, - "learning_rate": 0.00019999363068795014, - "loss": 46.0, - "step": 46999 - }, - { - "epoch": 3.593478219316857, - "grad_norm": 0.002987871877849102, - "learning_rate": 0.00019999363041685214, - "loss": 46.0, - "step": 47000 - }, - { - "epoch": 3.5935546763002466, - "grad_norm": 0.0015190659323707223, - "learning_rate": 0.00019999363014574836, - "loss": 46.0, - "step": 47001 - }, - { - "epoch": 3.5936311332836364, - "grad_norm": 0.0019419463351368904, - "learning_rate": 0.0001999936298746388, - "loss": 46.0, - "step": 47002 - }, - { - "epoch": 3.593707590267026, - "grad_norm": 0.0015551402466371655, - "learning_rate": 0.0001999936296035235, - "loss": 46.0, - "step": 47003 - }, - { - "epoch": 3.593784047250416, - "grad_norm": 0.0018266671104356647, - "learning_rate": 0.00019999362933240242, - "loss": 46.0, - "step": 47004 - }, - { - "epoch": 3.593860504233805, - "grad_norm": 0.0014194740215316415, - "learning_rate": 0.00019999362906127557, - "loss": 46.0, - "step": 47005 - }, - { - "epoch": 3.593936961217195, - "grad_norm": 0.002629917347803712, - "learning_rate": 0.00019999362879014293, - "loss": 46.0, - "step": 47006 - }, - { - "epoch": 3.5940134182005847, - "grad_norm": 0.0026030107401311398, - "learning_rate": 0.00019999362851900454, - "loss": 46.0, - "step": 47007 - }, - { - "epoch": 3.5940898751839745, - "grad_norm": 0.00335359713062644, - "learning_rate": 0.00019999362824786037, - "loss": 46.0, - "step": 47008 - }, - { - "epoch": 3.5941663321673643, - "grad_norm": 0.0018676059553399682, - "learning_rate": 0.00019999362797671043, - "loss": 46.0, - "step": 47009 - }, - { - "epoch": 3.594242789150754, - "grad_norm": 0.003634584601968527, - "learning_rate": 0.00019999362770555475, - "loss": 46.0, - "step": 47010 - }, - { - "epoch": 3.5943192461341438, - "grad_norm": 0.0021456449758261442, - "learning_rate": 0.00019999362743439327, - "loss": 46.0, - "step": 47011 - }, - { - "epoch": 3.5943957031175335, - "grad_norm": 0.0020891083404421806, - "learning_rate": 0.00019999362716322604, - "loss": 46.0, - "step": 47012 - }, - { - "epoch": 3.5944721601009233, - "grad_norm": 0.0031798265408724546, - "learning_rate": 0.00019999362689205303, - "loss": 46.0, - "step": 47013 - }, - { - "epoch": 3.594548617084313, - "grad_norm": 0.003364681266248226, - "learning_rate": 0.00019999362662087423, - "loss": 46.0, - "step": 47014 - }, - { - "epoch": 3.594625074067703, - "grad_norm": 0.0013795975828543305, - "learning_rate": 0.0001999936263496897, - "loss": 46.0, - "step": 47015 - }, - { - "epoch": 3.5947015310510926, - "grad_norm": 0.001853947527706623, - "learning_rate": 0.00019999362607849935, - "loss": 46.0, - "step": 47016 - }, - { - "epoch": 3.5947779880344823, - "grad_norm": 0.001709077856503427, - "learning_rate": 0.00019999362580730328, - "loss": 46.0, - "step": 47017 - }, - { - "epoch": 3.5948544450178717, - "grad_norm": 0.004602494183927774, - "learning_rate": 0.00019999362553610144, - "loss": 46.0, - "step": 47018 - }, - { - "epoch": 3.5949309020012614, - "grad_norm": 0.0013291046489030123, - "learning_rate": 0.00019999362526489382, - "loss": 46.0, - "step": 47019 - }, - { - "epoch": 3.595007358984651, - "grad_norm": 0.005183434579521418, - "learning_rate": 0.0001999936249936804, - "loss": 46.0, - "step": 47020 - }, - { - "epoch": 3.595083815968041, - "grad_norm": 0.002122211502864957, - "learning_rate": 0.00019999362472246127, - "loss": 46.0, - "step": 47021 - }, - { - "epoch": 3.5951602729514307, - "grad_norm": 0.0031986648682504892, - "learning_rate": 0.00019999362445123633, - "loss": 46.0, - "step": 47022 - }, - { - "epoch": 3.5952367299348205, - "grad_norm": 0.0030911096837371588, - "learning_rate": 0.00019999362418000565, - "loss": 46.0, - "step": 47023 - }, - { - "epoch": 3.5953131869182102, - "grad_norm": 0.0030629821121692657, - "learning_rate": 0.00019999362390876917, - "loss": 46.0, - "step": 47024 - }, - { - "epoch": 3.5953896439016, - "grad_norm": 0.0014020544476807117, - "learning_rate": 0.0001999936236375269, - "loss": 46.0, - "step": 47025 - }, - { - "epoch": 3.5954661008849897, - "grad_norm": 0.015725765377283096, - "learning_rate": 0.0001999936233662789, - "loss": 46.0, - "step": 47026 - }, - { - "epoch": 3.595542557868379, - "grad_norm": 0.0037674284540116787, - "learning_rate": 0.00019999362309502513, - "loss": 46.0, - "step": 47027 - }, - { - "epoch": 3.595619014851769, - "grad_norm": 0.0011838682694360614, - "learning_rate": 0.0001999936228237656, - "loss": 46.0, - "step": 47028 - }, - { - "epoch": 3.5956954718351586, - "grad_norm": 0.0010207684244960546, - "learning_rate": 0.00019999362255250028, - "loss": 46.0, - "step": 47029 - }, - { - "epoch": 3.5957719288185483, - "grad_norm": 0.0028875903226435184, - "learning_rate": 0.0001999936222812292, - "loss": 46.0, - "step": 47030 - }, - { - "epoch": 3.595848385801938, - "grad_norm": 0.002142914105206728, - "learning_rate": 0.00019999362200995234, - "loss": 46.0, - "step": 47031 - }, - { - "epoch": 3.595924842785328, - "grad_norm": 0.0011943209683522582, - "learning_rate": 0.00019999362173866972, - "loss": 46.0, - "step": 47032 - }, - { - "epoch": 3.5960012997687176, - "grad_norm": 0.0009661823278293014, - "learning_rate": 0.00019999362146738133, - "loss": 46.0, - "step": 47033 - }, - { - "epoch": 3.5960777567521074, - "grad_norm": 0.002333032200112939, - "learning_rate": 0.00019999362119608717, - "loss": 46.0, - "step": 47034 - }, - { - "epoch": 3.596154213735497, - "grad_norm": 0.002394559793174267, - "learning_rate": 0.00019999362092478723, - "loss": 46.0, - "step": 47035 - }, - { - "epoch": 3.596230670718887, - "grad_norm": 0.0026340153999626637, - "learning_rate": 0.00019999362065348155, - "loss": 46.0, - "step": 47036 - }, - { - "epoch": 3.5963071277022767, - "grad_norm": 0.003347923280671239, - "learning_rate": 0.0001999936203821701, - "loss": 46.0, - "step": 47037 - }, - { - "epoch": 3.5963835846856664, - "grad_norm": 0.002986322855576873, - "learning_rate": 0.00019999362011085287, - "loss": 46.0, - "step": 47038 - }, - { - "epoch": 3.596460041669056, - "grad_norm": 0.0015189097030088305, - "learning_rate": 0.00019999361983952984, - "loss": 46.0, - "step": 47039 - }, - { - "epoch": 3.5965364986524455, - "grad_norm": 0.0010086576221510768, - "learning_rate": 0.00019999361956820106, - "loss": 46.0, - "step": 47040 - }, - { - "epoch": 3.5966129556358353, - "grad_norm": 0.0021347617730498314, - "learning_rate": 0.00019999361929686654, - "loss": 46.0, - "step": 47041 - }, - { - "epoch": 3.596689412619225, - "grad_norm": 0.0006580731715075672, - "learning_rate": 0.0001999936190255262, - "loss": 46.0, - "step": 47042 - }, - { - "epoch": 3.596765869602615, - "grad_norm": 0.002055598422884941, - "learning_rate": 0.00019999361875418012, - "loss": 46.0, - "step": 47043 - }, - { - "epoch": 3.5968423265860046, - "grad_norm": 0.004377503879368305, - "learning_rate": 0.0001999936184828283, - "loss": 46.0, - "step": 47044 - }, - { - "epoch": 3.5969187835693943, - "grad_norm": 0.0024345177225768566, - "learning_rate": 0.00019999361821147066, - "loss": 46.0, - "step": 47045 - }, - { - "epoch": 3.596995240552784, - "grad_norm": 0.0008339352207258344, - "learning_rate": 0.00019999361794010727, - "loss": 46.0, - "step": 47046 - }, - { - "epoch": 3.597071697536174, - "grad_norm": 0.007169728167355061, - "learning_rate": 0.00019999361766873813, - "loss": 46.0, - "step": 47047 - }, - { - "epoch": 3.597148154519563, - "grad_norm": 0.0028401638846844435, - "learning_rate": 0.0001999936173973632, - "loss": 46.0, - "step": 47048 - }, - { - "epoch": 3.597224611502953, - "grad_norm": 0.00604660389944911, - "learning_rate": 0.0001999936171259825, - "loss": 46.0, - "step": 47049 - }, - { - "epoch": 3.5973010684863427, - "grad_norm": 0.0017167206387966871, - "learning_rate": 0.00019999361685459603, - "loss": 46.0, - "step": 47050 - }, - { - "epoch": 3.5973775254697324, - "grad_norm": 0.004143692087382078, - "learning_rate": 0.0001999936165832038, - "loss": 46.0, - "step": 47051 - }, - { - "epoch": 3.597453982453122, - "grad_norm": 0.0026565559674054384, - "learning_rate": 0.0001999936163118058, - "loss": 46.0, - "step": 47052 - }, - { - "epoch": 3.597530439436512, - "grad_norm": 0.0006196071044541895, - "learning_rate": 0.00019999361604040202, - "loss": 46.0, - "step": 47053 - }, - { - "epoch": 3.5976068964199017, - "grad_norm": 0.0013243438443168998, - "learning_rate": 0.00019999361576899247, - "loss": 46.0, - "step": 47054 - }, - { - "epoch": 3.5976833534032915, - "grad_norm": 0.0015893429517745972, - "learning_rate": 0.00019999361549757718, - "loss": 46.0, - "step": 47055 - }, - { - "epoch": 3.5977598103866812, - "grad_norm": 0.0025597589556127787, - "learning_rate": 0.0001999936152261561, - "loss": 46.0, - "step": 47056 - }, - { - "epoch": 3.597836267370071, - "grad_norm": 0.0022576586343348026, - "learning_rate": 0.00019999361495472924, - "loss": 46.0, - "step": 47057 - }, - { - "epoch": 3.5979127243534608, - "grad_norm": 0.0008624986512586474, - "learning_rate": 0.00019999361468329662, - "loss": 46.0, - "step": 47058 - }, - { - "epoch": 3.5979891813368505, - "grad_norm": 0.001514685107395053, - "learning_rate": 0.00019999361441185823, - "loss": 46.0, - "step": 47059 - }, - { - "epoch": 3.5980656383202403, - "grad_norm": 0.0030874779913574457, - "learning_rate": 0.0001999936141404141, - "loss": 46.0, - "step": 47060 - }, - { - "epoch": 3.59814209530363, - "grad_norm": 0.004635296296328306, - "learning_rate": 0.00019999361386896416, - "loss": 46.0, - "step": 47061 - }, - { - "epoch": 3.5982185522870194, - "grad_norm": 0.0020104851573705673, - "learning_rate": 0.00019999361359750845, - "loss": 46.0, - "step": 47062 - }, - { - "epoch": 3.598295009270409, - "grad_norm": 0.0032096034847199917, - "learning_rate": 0.000199993613326047, - "loss": 46.0, - "step": 47063 - }, - { - "epoch": 3.598371466253799, - "grad_norm": 0.0016590581508353353, - "learning_rate": 0.00019999361305457977, - "loss": 46.0, - "step": 47064 - }, - { - "epoch": 3.5984479232371887, - "grad_norm": 0.0018877660622820258, - "learning_rate": 0.0001999936127831068, - "loss": 46.0, - "step": 47065 - }, - { - "epoch": 3.5985243802205784, - "grad_norm": 0.0034017807338386774, - "learning_rate": 0.00019999361251162801, - "loss": 46.0, - "step": 47066 - }, - { - "epoch": 3.598600837203968, - "grad_norm": 0.0011026897700503469, - "learning_rate": 0.0001999936122401435, - "loss": 46.0, - "step": 47067 - }, - { - "epoch": 3.598677294187358, - "grad_norm": 0.0019213076448068023, - "learning_rate": 0.00019999361196865317, - "loss": 46.0, - "step": 47068 - }, - { - "epoch": 3.5987537511707477, - "grad_norm": 0.0013271161587908864, - "learning_rate": 0.00019999361169715707, - "loss": 46.0, - "step": 47069 - }, - { - "epoch": 3.598830208154137, - "grad_norm": 0.001317350659519434, - "learning_rate": 0.00019999361142565523, - "loss": 46.0, - "step": 47070 - }, - { - "epoch": 3.5989066651375268, - "grad_norm": 0.003212464740499854, - "learning_rate": 0.00019999361115414762, - "loss": 46.0, - "step": 47071 - }, - { - "epoch": 3.5989831221209165, - "grad_norm": 0.0014197358395904303, - "learning_rate": 0.00019999361088263423, - "loss": 46.0, - "step": 47072 - }, - { - "epoch": 3.5990595791043063, - "grad_norm": 0.0031482691410928965, - "learning_rate": 0.0001999936106111151, - "loss": 46.0, - "step": 47073 - }, - { - "epoch": 3.599136036087696, - "grad_norm": 0.0017301241168752313, - "learning_rate": 0.00019999361033959016, - "loss": 46.0, - "step": 47074 - }, - { - "epoch": 3.599212493071086, - "grad_norm": 0.0020927705336362123, - "learning_rate": 0.00019999361006805945, - "loss": 46.0, - "step": 47075 - }, - { - "epoch": 3.5992889500544756, - "grad_norm": 0.0015665240352973342, - "learning_rate": 0.00019999360979652302, - "loss": 46.0, - "step": 47076 - }, - { - "epoch": 3.5993654070378653, - "grad_norm": 0.005547032225877047, - "learning_rate": 0.00019999360952498076, - "loss": 46.0, - "step": 47077 - }, - { - "epoch": 3.599441864021255, - "grad_norm": 0.0004632631316781044, - "learning_rate": 0.0001999936092534328, - "loss": 46.0, - "step": 47078 - }, - { - "epoch": 3.599518321004645, - "grad_norm": 0.001670603291131556, - "learning_rate": 0.000199993608981879, - "loss": 46.0, - "step": 47079 - }, - { - "epoch": 3.5995947779880346, - "grad_norm": 0.0020996350795030594, - "learning_rate": 0.00019999360871031946, - "loss": 46.0, - "step": 47080 - }, - { - "epoch": 3.5996712349714244, - "grad_norm": 0.0014863634714856744, - "learning_rate": 0.00019999360843875417, - "loss": 46.0, - "step": 47081 - }, - { - "epoch": 3.599747691954814, - "grad_norm": 0.0017067798180505633, - "learning_rate": 0.0001999936081671831, - "loss": 46.0, - "step": 47082 - }, - { - "epoch": 3.599824148938204, - "grad_norm": 0.0020308925304561853, - "learning_rate": 0.00019999360789560623, - "loss": 46.0, - "step": 47083 - }, - { - "epoch": 3.599900605921593, - "grad_norm": 0.0007679708651266992, - "learning_rate": 0.00019999360762402364, - "loss": 46.0, - "step": 47084 - }, - { - "epoch": 3.599977062904983, - "grad_norm": 0.0025735548697412014, - "learning_rate": 0.00019999360735243525, - "loss": 46.0, - "step": 47085 - }, - { - "epoch": 3.6000535198883727, - "grad_norm": 0.0012532372493296862, - "learning_rate": 0.0001999936070808411, - "loss": 46.0, - "step": 47086 - }, - { - "epoch": 3.6001299768717625, - "grad_norm": 0.0047006201930344105, - "learning_rate": 0.00019999360680924118, - "loss": 46.0, - "step": 47087 - }, - { - "epoch": 3.6002064338551523, - "grad_norm": 0.0008170231594704092, - "learning_rate": 0.00019999360653763547, - "loss": 46.0, - "step": 47088 - }, - { - "epoch": 3.600282890838542, - "grad_norm": 0.0015357394004240632, - "learning_rate": 0.00019999360626602402, - "loss": 46.0, - "step": 47089 - }, - { - "epoch": 3.600359347821932, - "grad_norm": 0.0016586343990638852, - "learning_rate": 0.0001999936059944068, - "loss": 46.0, - "step": 47090 - }, - { - "epoch": 3.6004358048053215, - "grad_norm": 0.0013088248670101166, - "learning_rate": 0.00019999360572278381, - "loss": 46.0, - "step": 47091 - }, - { - "epoch": 3.600512261788711, - "grad_norm": 0.0011919861426576972, - "learning_rate": 0.00019999360545115504, - "loss": 46.0, - "step": 47092 - }, - { - "epoch": 3.6005887187721006, - "grad_norm": 0.00356575520709157, - "learning_rate": 0.00019999360517952052, - "loss": 46.0, - "step": 47093 - }, - { - "epoch": 3.6006651757554904, - "grad_norm": 0.0015427334001287818, - "learning_rate": 0.0001999936049078802, - "loss": 46.0, - "step": 47094 - }, - { - "epoch": 3.60074163273888, - "grad_norm": 0.0019467585952952504, - "learning_rate": 0.0001999936046362341, - "loss": 46.0, - "step": 47095 - }, - { - "epoch": 3.60081808972227, - "grad_norm": 0.0032476249616593122, - "learning_rate": 0.00019999360436458229, - "loss": 46.0, - "step": 47096 - }, - { - "epoch": 3.6008945467056597, - "grad_norm": 0.0006188943516463041, - "learning_rate": 0.00019999360409292467, - "loss": 46.0, - "step": 47097 - }, - { - "epoch": 3.6009710036890494, - "grad_norm": 0.0015529319643974304, - "learning_rate": 0.00019999360382126126, - "loss": 46.0, - "step": 47098 - }, - { - "epoch": 3.601047460672439, - "grad_norm": 0.0014996129320934415, - "learning_rate": 0.00019999360354959215, - "loss": 46.0, - "step": 47099 - }, - { - "epoch": 3.601123917655829, - "grad_norm": 0.002248901640996337, - "learning_rate": 0.0001999936032779172, - "loss": 46.0, - "step": 47100 - }, - { - "epoch": 3.6012003746392187, - "grad_norm": 0.002364114858210087, - "learning_rate": 0.00019999360300623653, - "loss": 46.0, - "step": 47101 - }, - { - "epoch": 3.6012768316226085, - "grad_norm": 0.0019309918861836195, - "learning_rate": 0.00019999360273455005, - "loss": 46.0, - "step": 47102 - }, - { - "epoch": 3.6013532886059982, - "grad_norm": 0.003945961594581604, - "learning_rate": 0.00019999360246285782, - "loss": 46.0, - "step": 47103 - }, - { - "epoch": 3.601429745589388, - "grad_norm": 0.004926863592118025, - "learning_rate": 0.00019999360219115985, - "loss": 46.0, - "step": 47104 - }, - { - "epoch": 3.6015062025727778, - "grad_norm": 0.0006436780677177012, - "learning_rate": 0.0001999936019194561, - "loss": 46.0, - "step": 47105 - }, - { - "epoch": 3.601582659556167, - "grad_norm": 0.0018002215074375272, - "learning_rate": 0.00019999360164774658, - "loss": 46.0, - "step": 47106 - }, - { - "epoch": 3.601659116539557, - "grad_norm": 0.0005858491058461368, - "learning_rate": 0.00019999360137603126, - "loss": 46.0, - "step": 47107 - }, - { - "epoch": 3.6017355735229466, - "grad_norm": 0.0025122312363237143, - "learning_rate": 0.00019999360110431016, - "loss": 46.0, - "step": 47108 - }, - { - "epoch": 3.6018120305063364, - "grad_norm": 0.0017562670400366187, - "learning_rate": 0.00019999360083258335, - "loss": 46.0, - "step": 47109 - }, - { - "epoch": 3.601888487489726, - "grad_norm": 0.0018919516587629914, - "learning_rate": 0.00019999360056085073, - "loss": 46.0, - "step": 47110 - }, - { - "epoch": 3.601964944473116, - "grad_norm": 0.0019179179798811674, - "learning_rate": 0.00019999360028911234, - "loss": 46.0, - "step": 47111 - }, - { - "epoch": 3.6020414014565056, - "grad_norm": 0.002963228849694133, - "learning_rate": 0.0001999936000173682, - "loss": 46.0, - "step": 47112 - }, - { - "epoch": 3.6021178584398954, - "grad_norm": 0.0022020824253559113, - "learning_rate": 0.00019999359974561827, - "loss": 46.0, - "step": 47113 - }, - { - "epoch": 3.6021943154232847, - "grad_norm": 0.004007265437394381, - "learning_rate": 0.0001999935994738626, - "loss": 46.0, - "step": 47114 - }, - { - "epoch": 3.6022707724066745, - "grad_norm": 0.0022375090047717094, - "learning_rate": 0.00019999359920210114, - "loss": 46.0, - "step": 47115 - }, - { - "epoch": 3.6023472293900642, - "grad_norm": 0.0020432686433196068, - "learning_rate": 0.0001999935989303339, - "loss": 46.0, - "step": 47116 - }, - { - "epoch": 3.602423686373454, - "grad_norm": 0.006760394666343927, - "learning_rate": 0.0001999935986585609, - "loss": 46.0, - "step": 47117 - }, - { - "epoch": 3.6025001433568438, - "grad_norm": 0.0006727928994223475, - "learning_rate": 0.00019999359838678216, - "loss": 46.0, - "step": 47118 - }, - { - "epoch": 3.6025766003402335, - "grad_norm": 0.0009790933690965176, - "learning_rate": 0.00019999359811499761, - "loss": 46.0, - "step": 47119 - }, - { - "epoch": 3.6026530573236233, - "grad_norm": 0.001795065589249134, - "learning_rate": 0.00019999359784320732, - "loss": 46.0, - "step": 47120 - }, - { - "epoch": 3.602729514307013, - "grad_norm": 0.002396184951066971, - "learning_rate": 0.00019999359757141125, - "loss": 46.0, - "step": 47121 - }, - { - "epoch": 3.602805971290403, - "grad_norm": 0.004131458234041929, - "learning_rate": 0.00019999359729960944, - "loss": 46.0, - "step": 47122 - }, - { - "epoch": 3.6028824282737926, - "grad_norm": 0.001958385808393359, - "learning_rate": 0.0001999935970278018, - "loss": 46.0, - "step": 47123 - }, - { - "epoch": 3.6029588852571823, - "grad_norm": 0.0031765205785632133, - "learning_rate": 0.00019999359675598844, - "loss": 46.0, - "step": 47124 - }, - { - "epoch": 3.603035342240572, - "grad_norm": 0.0028282124549150467, - "learning_rate": 0.00019999359648416928, - "loss": 46.0, - "step": 47125 - }, - { - "epoch": 3.603111799223962, - "grad_norm": 0.0030555317644029856, - "learning_rate": 0.00019999359621234437, - "loss": 46.0, - "step": 47126 - }, - { - "epoch": 3.6031882562073516, - "grad_norm": 0.0013310738140717149, - "learning_rate": 0.0001999935959405137, - "loss": 46.0, - "step": 47127 - }, - { - "epoch": 3.603264713190741, - "grad_norm": 0.002595169236883521, - "learning_rate": 0.00019999359566867723, - "loss": 46.0, - "step": 47128 - }, - { - "epoch": 3.6033411701741307, - "grad_norm": 0.002087588654831052, - "learning_rate": 0.000199993595396835, - "loss": 46.0, - "step": 47129 - }, - { - "epoch": 3.6034176271575205, - "grad_norm": 0.00139293831307441, - "learning_rate": 0.00019999359512498703, - "loss": 46.0, - "step": 47130 - }, - { - "epoch": 3.60349408414091, - "grad_norm": 0.0032348160166293383, - "learning_rate": 0.00019999359485313326, - "loss": 46.0, - "step": 47131 - }, - { - "epoch": 3.6035705411243, - "grad_norm": 0.001367839053273201, - "learning_rate": 0.00019999359458127374, - "loss": 46.0, - "step": 47132 - }, - { - "epoch": 3.6036469981076897, - "grad_norm": 0.0026313778944313526, - "learning_rate": 0.00019999359430940842, - "loss": 46.0, - "step": 47133 - }, - { - "epoch": 3.6037234550910795, - "grad_norm": 0.0015562853077426553, - "learning_rate": 0.00019999359403753735, - "loss": 46.0, - "step": 47134 - }, - { - "epoch": 3.6037999120744693, - "grad_norm": 0.0026862341910600662, - "learning_rate": 0.0001999935937656605, - "loss": 46.0, - "step": 47135 - }, - { - "epoch": 3.6038763690578586, - "grad_norm": 0.004687380976974964, - "learning_rate": 0.00019999359349377792, - "loss": 46.0, - "step": 47136 - }, - { - "epoch": 3.6039528260412483, - "grad_norm": 0.0017340471968054771, - "learning_rate": 0.00019999359322188953, - "loss": 46.0, - "step": 47137 - }, - { - "epoch": 3.604029283024638, - "grad_norm": 0.0029415760654956102, - "learning_rate": 0.0001999935929499954, - "loss": 46.0, - "step": 47138 - }, - { - "epoch": 3.604105740008028, - "grad_norm": 0.0027057260740548372, - "learning_rate": 0.0001999935926780955, - "loss": 46.0, - "step": 47139 - }, - { - "epoch": 3.6041821969914176, - "grad_norm": 0.0014860716182738543, - "learning_rate": 0.0001999935924061898, - "loss": 46.0, - "step": 47140 - }, - { - "epoch": 3.6042586539748074, - "grad_norm": 0.0011507697636261582, - "learning_rate": 0.00019999359213427836, - "loss": 46.0, - "step": 47141 - }, - { - "epoch": 3.604335110958197, - "grad_norm": 0.0011245335917919874, - "learning_rate": 0.00019999359186236113, - "loss": 46.0, - "step": 47142 - }, - { - "epoch": 3.604411567941587, - "grad_norm": 0.004144533071666956, - "learning_rate": 0.00019999359159043816, - "loss": 46.0, - "step": 47143 - }, - { - "epoch": 3.6044880249249767, - "grad_norm": 0.004018872510641813, - "learning_rate": 0.00019999359131850936, - "loss": 46.0, - "step": 47144 - }, - { - "epoch": 3.6045644819083664, - "grad_norm": 0.004291102290153503, - "learning_rate": 0.00019999359104657484, - "loss": 46.0, - "step": 47145 - }, - { - "epoch": 3.604640938891756, - "grad_norm": 0.006488231476396322, - "learning_rate": 0.00019999359077463454, - "loss": 46.0, - "step": 47146 - }, - { - "epoch": 3.604717395875146, - "grad_norm": 0.0010807195212692022, - "learning_rate": 0.00019999359050268848, - "loss": 46.0, - "step": 47147 - }, - { - "epoch": 3.6047938528585357, - "grad_norm": 0.010882716625928879, - "learning_rate": 0.00019999359023073666, - "loss": 46.0, - "step": 47148 - }, - { - "epoch": 3.604870309841925, - "grad_norm": 0.002118566771969199, - "learning_rate": 0.00019999358995877905, - "loss": 46.0, - "step": 47149 - }, - { - "epoch": 3.604946766825315, - "grad_norm": 0.002121523953974247, - "learning_rate": 0.00019999358968681566, - "loss": 46.0, - "step": 47150 - }, - { - "epoch": 3.6050232238087045, - "grad_norm": 0.000612937321420759, - "learning_rate": 0.00019999358941484653, - "loss": 46.0, - "step": 47151 - }, - { - "epoch": 3.6050996807920943, - "grad_norm": 0.0010346367489546537, - "learning_rate": 0.00019999358914287162, - "loss": 46.0, - "step": 47152 - }, - { - "epoch": 3.605176137775484, - "grad_norm": 0.0013227801537141204, - "learning_rate": 0.00019999358887089094, - "loss": 46.0, - "step": 47153 - }, - { - "epoch": 3.605252594758874, - "grad_norm": 0.00315685011446476, - "learning_rate": 0.0001999935885989045, - "loss": 46.0, - "step": 47154 - }, - { - "epoch": 3.6053290517422636, - "grad_norm": 0.0027074345853179693, - "learning_rate": 0.00019999358832691226, - "loss": 46.0, - "step": 47155 - }, - { - "epoch": 3.6054055087256534, - "grad_norm": 0.002570002106949687, - "learning_rate": 0.0001999935880549143, - "loss": 46.0, - "step": 47156 - }, - { - "epoch": 3.605481965709043, - "grad_norm": 0.0026281271129846573, - "learning_rate": 0.00019999358778291054, - "loss": 46.0, - "step": 47157 - }, - { - "epoch": 3.6055584226924324, - "grad_norm": 0.0032254806719720364, - "learning_rate": 0.000199993587510901, - "loss": 46.0, - "step": 47158 - }, - { - "epoch": 3.605634879675822, - "grad_norm": 0.0015273073222488165, - "learning_rate": 0.0001999935872388857, - "loss": 46.0, - "step": 47159 - }, - { - "epoch": 3.605711336659212, - "grad_norm": 0.0040751914493739605, - "learning_rate": 0.00019999358696686463, - "loss": 46.0, - "step": 47160 - }, - { - "epoch": 3.6057877936426017, - "grad_norm": 0.003368379781022668, - "learning_rate": 0.00019999358669483782, - "loss": 46.0, - "step": 47161 - }, - { - "epoch": 3.6058642506259915, - "grad_norm": 0.003940230701118708, - "learning_rate": 0.0001999935864228052, - "loss": 46.0, - "step": 47162 - }, - { - "epoch": 3.6059407076093812, - "grad_norm": 0.0015386902960017323, - "learning_rate": 0.00019999358615076685, - "loss": 46.0, - "step": 47163 - }, - { - "epoch": 3.606017164592771, - "grad_norm": 0.0012139686150476336, - "learning_rate": 0.0001999935858787227, - "loss": 46.0, - "step": 47164 - }, - { - "epoch": 3.6060936215761608, - "grad_norm": 0.0013903678627684712, - "learning_rate": 0.00019999358560667278, - "loss": 46.0, - "step": 47165 - }, - { - "epoch": 3.6061700785595505, - "grad_norm": 0.001733639044687152, - "learning_rate": 0.0001999935853346171, - "loss": 46.0, - "step": 47166 - }, - { - "epoch": 3.6062465355429403, - "grad_norm": 0.0012850891798734665, - "learning_rate": 0.00019999358506255565, - "loss": 46.0, - "step": 47167 - }, - { - "epoch": 3.60632299252633, - "grad_norm": 0.002491175662726164, - "learning_rate": 0.00019999358479048845, - "loss": 46.0, - "step": 47168 - }, - { - "epoch": 3.60639944950972, - "grad_norm": 0.0014382494846358895, - "learning_rate": 0.00019999358451841545, - "loss": 46.0, - "step": 47169 - }, - { - "epoch": 3.6064759064931096, - "grad_norm": 0.0009760347893461585, - "learning_rate": 0.0001999935842463367, - "loss": 46.0, - "step": 47170 - }, - { - "epoch": 3.606552363476499, - "grad_norm": 0.0008436118368990719, - "learning_rate": 0.00019999358397425218, - "loss": 46.0, - "step": 47171 - }, - { - "epoch": 3.6066288204598886, - "grad_norm": 0.0076487017795443535, - "learning_rate": 0.0001999935837021619, - "loss": 46.0, - "step": 47172 - }, - { - "epoch": 3.6067052774432784, - "grad_norm": 0.002492173109203577, - "learning_rate": 0.00019999358343006582, - "loss": 46.0, - "step": 47173 - }, - { - "epoch": 3.606781734426668, - "grad_norm": 0.004754967056214809, - "learning_rate": 0.00019999358315796398, - "loss": 46.0, - "step": 47174 - }, - { - "epoch": 3.606858191410058, - "grad_norm": 0.0028412227984517813, - "learning_rate": 0.0001999935828858564, - "loss": 46.0, - "step": 47175 - }, - { - "epoch": 3.6069346483934477, - "grad_norm": 0.0014061275869607925, - "learning_rate": 0.00019999358261374304, - "loss": 46.0, - "step": 47176 - }, - { - "epoch": 3.6070111053768374, - "grad_norm": 0.005551229231059551, - "learning_rate": 0.00019999358234162388, - "loss": 46.0, - "step": 47177 - }, - { - "epoch": 3.607087562360227, - "grad_norm": 0.002698456170037389, - "learning_rate": 0.00019999358206949897, - "loss": 46.0, - "step": 47178 - }, - { - "epoch": 3.6071640193436165, - "grad_norm": 0.0009287746506743133, - "learning_rate": 0.0001999935817973683, - "loss": 46.0, - "step": 47179 - }, - { - "epoch": 3.6072404763270063, - "grad_norm": 0.0007593818590976298, - "learning_rate": 0.00019999358152523184, - "loss": 46.0, - "step": 47180 - }, - { - "epoch": 3.607316933310396, - "grad_norm": 0.00284673017449677, - "learning_rate": 0.00019999358125308964, - "loss": 46.0, - "step": 47181 - }, - { - "epoch": 3.607393390293786, - "grad_norm": 0.0020804463420063257, - "learning_rate": 0.00019999358098094164, - "loss": 46.0, - "step": 47182 - }, - { - "epoch": 3.6074698472771756, - "grad_norm": 0.006738160736858845, - "learning_rate": 0.00019999358070878792, - "loss": 46.0, - "step": 47183 - }, - { - "epoch": 3.6075463042605653, - "grad_norm": 0.007507022935897112, - "learning_rate": 0.00019999358043662838, - "loss": 46.0, - "step": 47184 - }, - { - "epoch": 3.607622761243955, - "grad_norm": 0.010416602715849876, - "learning_rate": 0.0001999935801644631, - "loss": 46.0, - "step": 47185 - }, - { - "epoch": 3.607699218227345, - "grad_norm": 0.003749399445950985, - "learning_rate": 0.00019999357989229202, - "loss": 46.0, - "step": 47186 - }, - { - "epoch": 3.6077756752107346, - "grad_norm": 0.0025361282750964165, - "learning_rate": 0.0001999935796201152, - "loss": 46.0, - "step": 47187 - }, - { - "epoch": 3.6078521321941244, - "grad_norm": 0.005283393897116184, - "learning_rate": 0.00019999357934793262, - "loss": 46.0, - "step": 47188 - }, - { - "epoch": 3.607928589177514, - "grad_norm": 0.003208644688129425, - "learning_rate": 0.00019999357907574423, - "loss": 46.0, - "step": 47189 - }, - { - "epoch": 3.608005046160904, - "grad_norm": 0.0030392338521778584, - "learning_rate": 0.0001999935788035501, - "loss": 46.0, - "step": 47190 - }, - { - "epoch": 3.6080815031442937, - "grad_norm": 0.0019993705209344625, - "learning_rate": 0.0001999935785313502, - "loss": 46.0, - "step": 47191 - }, - { - "epoch": 3.6081579601276834, - "grad_norm": 0.0029805595986545086, - "learning_rate": 0.00019999357825914454, - "loss": 46.0, - "step": 47192 - }, - { - "epoch": 3.6082344171110727, - "grad_norm": 0.0007770429365336895, - "learning_rate": 0.0001999935779869331, - "loss": 46.0, - "step": 47193 - }, - { - "epoch": 3.6083108740944625, - "grad_norm": 0.0035184675361961126, - "learning_rate": 0.00019999357771471586, - "loss": 46.0, - "step": 47194 - }, - { - "epoch": 3.6083873310778523, - "grad_norm": 0.0033095621038228273, - "learning_rate": 0.00019999357744249286, - "loss": 46.0, - "step": 47195 - }, - { - "epoch": 3.608463788061242, - "grad_norm": 0.0013891630806028843, - "learning_rate": 0.00019999357717026412, - "loss": 46.0, - "step": 47196 - }, - { - "epoch": 3.6085402450446318, - "grad_norm": 0.0023033798206597567, - "learning_rate": 0.00019999357689802963, - "loss": 46.0, - "step": 47197 - }, - { - "epoch": 3.6086167020280215, - "grad_norm": 0.0018179116304963827, - "learning_rate": 0.0001999935766257893, - "loss": 46.0, - "step": 47198 - }, - { - "epoch": 3.6086931590114113, - "grad_norm": 0.004559769295156002, - "learning_rate": 0.00019999357635354327, - "loss": 46.0, - "step": 47199 - }, - { - "epoch": 3.608769615994801, - "grad_norm": 0.00302033219486475, - "learning_rate": 0.00019999357608129143, - "loss": 46.0, - "step": 47200 - }, - { - "epoch": 3.6088460729781904, - "grad_norm": 0.0030741288792341948, - "learning_rate": 0.00019999357580903384, - "loss": 46.0, - "step": 47201 - }, - { - "epoch": 3.60892252996158, - "grad_norm": 0.003815828822553158, - "learning_rate": 0.00019999357553677048, - "loss": 46.0, - "step": 47202 - }, - { - "epoch": 3.60899898694497, - "grad_norm": 0.0027954436372965574, - "learning_rate": 0.00019999357526450133, - "loss": 46.0, - "step": 47203 - }, - { - "epoch": 3.6090754439283597, - "grad_norm": 0.0015756351640447974, - "learning_rate": 0.00019999357499222642, - "loss": 46.0, - "step": 47204 - }, - { - "epoch": 3.6091519009117494, - "grad_norm": 0.0029789507389068604, - "learning_rate": 0.00019999357471994577, - "loss": 46.0, - "step": 47205 - }, - { - "epoch": 3.609228357895139, - "grad_norm": 0.003662483999505639, - "learning_rate": 0.00019999357444765932, - "loss": 46.0, - "step": 47206 - }, - { - "epoch": 3.609304814878529, - "grad_norm": 0.0007836703443899751, - "learning_rate": 0.00019999357417536712, - "loss": 46.0, - "step": 47207 - }, - { - "epoch": 3.6093812718619187, - "grad_norm": 0.001132678589783609, - "learning_rate": 0.00019999357390306912, - "loss": 46.0, - "step": 47208 - }, - { - "epoch": 3.6094577288453085, - "grad_norm": 0.0031996930483728647, - "learning_rate": 0.00019999357363076537, - "loss": 46.0, - "step": 47209 - }, - { - "epoch": 3.6095341858286982, - "grad_norm": 0.0007871190900914371, - "learning_rate": 0.00019999357335845585, - "loss": 46.0, - "step": 47210 - }, - { - "epoch": 3.609610642812088, - "grad_norm": 0.003387199016287923, - "learning_rate": 0.00019999357308614056, - "loss": 46.0, - "step": 47211 - }, - { - "epoch": 3.6096870997954777, - "grad_norm": 0.002372474642470479, - "learning_rate": 0.00019999357281381952, - "loss": 46.0, - "step": 47212 - }, - { - "epoch": 3.6097635567788675, - "grad_norm": 0.0020967365708202124, - "learning_rate": 0.00019999357254149269, - "loss": 46.0, - "step": 47213 - }, - { - "epoch": 3.6098400137622573, - "grad_norm": 0.004114883486181498, - "learning_rate": 0.0001999935722691601, - "loss": 46.0, - "step": 47214 - }, - { - "epoch": 3.6099164707456466, - "grad_norm": 0.004482800140976906, - "learning_rate": 0.00019999357199682174, - "loss": 46.0, - "step": 47215 - }, - { - "epoch": 3.6099929277290363, - "grad_norm": 0.0024372737389057875, - "learning_rate": 0.00019999357172447758, - "loss": 46.0, - "step": 47216 - }, - { - "epoch": 3.610069384712426, - "grad_norm": 0.0022202099207788706, - "learning_rate": 0.0001999935714521277, - "loss": 46.0, - "step": 47217 - }, - { - "epoch": 3.610145841695816, - "grad_norm": 0.0026044619735330343, - "learning_rate": 0.00019999357117977203, - "loss": 46.0, - "step": 47218 - }, - { - "epoch": 3.6102222986792056, - "grad_norm": 0.001976800849661231, - "learning_rate": 0.0001999935709074106, - "loss": 46.0, - "step": 47219 - }, - { - "epoch": 3.6102987556625954, - "grad_norm": 0.003462227527052164, - "learning_rate": 0.00019999357063504335, - "loss": 46.0, - "step": 47220 - }, - { - "epoch": 3.610375212645985, - "grad_norm": 0.0028351584915071726, - "learning_rate": 0.00019999357036267038, - "loss": 46.0, - "step": 47221 - }, - { - "epoch": 3.610451669629375, - "grad_norm": 0.0008801952935755253, - "learning_rate": 0.00019999357009029166, - "loss": 46.0, - "step": 47222 - }, - { - "epoch": 3.6105281266127642, - "grad_norm": 0.0038246368058025837, - "learning_rate": 0.00019999356981790714, - "loss": 46.0, - "step": 47223 - }, - { - "epoch": 3.610604583596154, - "grad_norm": 0.003297134069725871, - "learning_rate": 0.00019999356954551685, - "loss": 46.0, - "step": 47224 - }, - { - "epoch": 3.6106810405795438, - "grad_norm": 0.009903750382363796, - "learning_rate": 0.00019999356927312078, - "loss": 46.0, - "step": 47225 - }, - { - "epoch": 3.6107574975629335, - "grad_norm": 0.0018430057680234313, - "learning_rate": 0.00019999356900071897, - "loss": 46.0, - "step": 47226 - }, - { - "epoch": 3.6108339545463233, - "grad_norm": 0.005019971169531345, - "learning_rate": 0.0001999935687283114, - "loss": 46.0, - "step": 47227 - }, - { - "epoch": 3.610910411529713, - "grad_norm": 0.0025201118551194668, - "learning_rate": 0.000199993568455898, - "loss": 46.0, - "step": 47228 - }, - { - "epoch": 3.610986868513103, - "grad_norm": 0.002068077214062214, - "learning_rate": 0.0001999935681834789, - "loss": 46.0, - "step": 47229 - }, - { - "epoch": 3.6110633254964926, - "grad_norm": 0.00215020845644176, - "learning_rate": 0.00019999356791105397, - "loss": 46.0, - "step": 47230 - }, - { - "epoch": 3.6111397824798823, - "grad_norm": 0.0011204774491488934, - "learning_rate": 0.00019999356763862331, - "loss": 46.0, - "step": 47231 - }, - { - "epoch": 3.611216239463272, - "grad_norm": 0.0012542122276499867, - "learning_rate": 0.0001999935673661869, - "loss": 46.0, - "step": 47232 - }, - { - "epoch": 3.611292696446662, - "grad_norm": 0.0031704932916909456, - "learning_rate": 0.0001999935670937447, - "loss": 46.0, - "step": 47233 - }, - { - "epoch": 3.6113691534300516, - "grad_norm": 0.003615632886067033, - "learning_rate": 0.0001999935668212967, - "loss": 46.0, - "step": 47234 - }, - { - "epoch": 3.6114456104134414, - "grad_norm": 0.0019761386793106794, - "learning_rate": 0.00019999356654884295, - "loss": 46.0, - "step": 47235 - }, - { - "epoch": 3.611522067396831, - "grad_norm": 0.0020734784193336964, - "learning_rate": 0.00019999356627638343, - "loss": 46.0, - "step": 47236 - }, - { - "epoch": 3.6115985243802204, - "grad_norm": 0.0018823568243533373, - "learning_rate": 0.00019999356600391814, - "loss": 46.0, - "step": 47237 - }, - { - "epoch": 3.61167498136361, - "grad_norm": 0.00300602032802999, - "learning_rate": 0.0001999935657314471, - "loss": 46.0, - "step": 47238 - }, - { - "epoch": 3.611751438347, - "grad_norm": 0.0037502057384699583, - "learning_rate": 0.0001999935654589703, - "loss": 46.0, - "step": 47239 - }, - { - "epoch": 3.6118278953303897, - "grad_norm": 0.004724597092717886, - "learning_rate": 0.00019999356518648768, - "loss": 46.0, - "step": 47240 - }, - { - "epoch": 3.6119043523137795, - "grad_norm": 0.0010078669292852283, - "learning_rate": 0.00019999356491399932, - "loss": 46.0, - "step": 47241 - }, - { - "epoch": 3.6119808092971692, - "grad_norm": 0.005235090386122465, - "learning_rate": 0.00019999356464150522, - "loss": 46.0, - "step": 47242 - }, - { - "epoch": 3.612057266280559, - "grad_norm": 0.0011519150575622916, - "learning_rate": 0.0001999935643690053, - "loss": 46.0, - "step": 47243 - }, - { - "epoch": 3.6121337232639488, - "grad_norm": 0.005454646423459053, - "learning_rate": 0.00019999356409649964, - "loss": 46.0, - "step": 47244 - }, - { - "epoch": 3.612210180247338, - "grad_norm": 0.0009252894087694585, - "learning_rate": 0.00019999356382398818, - "loss": 46.0, - "step": 47245 - }, - { - "epoch": 3.612286637230728, - "grad_norm": 0.006963848136365414, - "learning_rate": 0.00019999356355147099, - "loss": 46.0, - "step": 47246 - }, - { - "epoch": 3.6123630942141176, - "grad_norm": 0.006564173381775618, - "learning_rate": 0.00019999356327894802, - "loss": 46.0, - "step": 47247 - }, - { - "epoch": 3.6124395511975074, - "grad_norm": 0.0027480721473693848, - "learning_rate": 0.00019999356300641927, - "loss": 46.0, - "step": 47248 - }, - { - "epoch": 3.612516008180897, - "grad_norm": 0.0012267193524166942, - "learning_rate": 0.00019999356273388475, - "loss": 46.0, - "step": 47249 - }, - { - "epoch": 3.612592465164287, - "grad_norm": 0.0043920292519032955, - "learning_rate": 0.0001999935624613445, - "loss": 46.0, - "step": 47250 - }, - { - "epoch": 3.6126689221476767, - "grad_norm": 0.0039373235777020454, - "learning_rate": 0.00019999356218879842, - "loss": 46.0, - "step": 47251 - }, - { - "epoch": 3.6127453791310664, - "grad_norm": 0.0036451241467148066, - "learning_rate": 0.00019999356191624661, - "loss": 46.0, - "step": 47252 - }, - { - "epoch": 3.612821836114456, - "grad_norm": 0.0016169092850759625, - "learning_rate": 0.00019999356164368903, - "loss": 46.0, - "step": 47253 - }, - { - "epoch": 3.612898293097846, - "grad_norm": 0.002296799561008811, - "learning_rate": 0.00019999356137112567, - "loss": 46.0, - "step": 47254 - }, - { - "epoch": 3.6129747500812357, - "grad_norm": 0.0046439566649496555, - "learning_rate": 0.00019999356109855654, - "loss": 46.0, - "step": 47255 - }, - { - "epoch": 3.6130512070646255, - "grad_norm": 0.0033165907952934504, - "learning_rate": 0.00019999356082598164, - "loss": 46.0, - "step": 47256 - }, - { - "epoch": 3.613127664048015, - "grad_norm": 0.0026466366834938526, - "learning_rate": 0.00019999356055340096, - "loss": 46.0, - "step": 47257 - }, - { - "epoch": 3.613204121031405, - "grad_norm": 0.0049867830239236355, - "learning_rate": 0.00019999356028081456, - "loss": 46.0, - "step": 47258 - }, - { - "epoch": 3.6132805780147943, - "grad_norm": 0.0011484172428026795, - "learning_rate": 0.00019999356000822234, - "loss": 46.0, - "step": 47259 - }, - { - "epoch": 3.613357034998184, - "grad_norm": 0.0011415337212383747, - "learning_rate": 0.00019999355973562434, - "loss": 46.0, - "step": 47260 - }, - { - "epoch": 3.613433491981574, - "grad_norm": 0.002642180072143674, - "learning_rate": 0.00019999355946302062, - "loss": 46.0, - "step": 47261 - }, - { - "epoch": 3.6135099489649636, - "grad_norm": 0.001251637819223106, - "learning_rate": 0.0001999935591904111, - "loss": 46.0, - "step": 47262 - }, - { - "epoch": 3.6135864059483533, - "grad_norm": 0.002847895724698901, - "learning_rate": 0.00019999355891779584, - "loss": 46.0, - "step": 47263 - }, - { - "epoch": 3.613662862931743, - "grad_norm": 0.005133089609444141, - "learning_rate": 0.0001999935586451748, - "loss": 46.0, - "step": 47264 - }, - { - "epoch": 3.613739319915133, - "grad_norm": 0.00236779497936368, - "learning_rate": 0.00019999355837254797, - "loss": 46.0, - "step": 47265 - }, - { - "epoch": 3.6138157768985226, - "grad_norm": 0.00249236891977489, - "learning_rate": 0.00019999355809991538, - "loss": 46.0, - "step": 47266 - }, - { - "epoch": 3.613892233881912, - "grad_norm": 0.003141065128147602, - "learning_rate": 0.00019999355782727703, - "loss": 46.0, - "step": 47267 - }, - { - "epoch": 3.6139686908653017, - "grad_norm": 0.0022167351562529802, - "learning_rate": 0.0001999935575546329, - "loss": 46.0, - "step": 47268 - }, - { - "epoch": 3.6140451478486915, - "grad_norm": 0.001890449901111424, - "learning_rate": 0.00019999355728198302, - "loss": 46.0, - "step": 47269 - }, - { - "epoch": 3.6141216048320812, - "grad_norm": 0.0017392619047313929, - "learning_rate": 0.00019999355700932734, - "loss": 46.0, - "step": 47270 - }, - { - "epoch": 3.614198061815471, - "grad_norm": 0.0016764000756666064, - "learning_rate": 0.00019999355673666592, - "loss": 46.0, - "step": 47271 - }, - { - "epoch": 3.6142745187988607, - "grad_norm": 0.004622225649654865, - "learning_rate": 0.00019999355646399872, - "loss": 46.0, - "step": 47272 - }, - { - "epoch": 3.6143509757822505, - "grad_norm": 0.0005916609079577029, - "learning_rate": 0.00019999355619132572, - "loss": 46.0, - "step": 47273 - }, - { - "epoch": 3.6144274327656403, - "grad_norm": 0.0012697236379608512, - "learning_rate": 0.00019999355591864698, - "loss": 46.0, - "step": 47274 - }, - { - "epoch": 3.61450388974903, - "grad_norm": 0.0034948044922202826, - "learning_rate": 0.0001999935556459625, - "loss": 46.0, - "step": 47275 - }, - { - "epoch": 3.61458034673242, - "grad_norm": 0.0029641392175108194, - "learning_rate": 0.00019999355537327223, - "loss": 46.0, - "step": 47276 - }, - { - "epoch": 3.6146568037158096, - "grad_norm": 0.0010084653040394187, - "learning_rate": 0.00019999355510057616, - "loss": 46.0, - "step": 47277 - }, - { - "epoch": 3.6147332606991993, - "grad_norm": 0.0007937371265143156, - "learning_rate": 0.00019999355482787435, - "loss": 46.0, - "step": 47278 - }, - { - "epoch": 3.614809717682589, - "grad_norm": 0.0016434750286862254, - "learning_rate": 0.00019999355455516677, - "loss": 46.0, - "step": 47279 - }, - { - "epoch": 3.6148861746659784, - "grad_norm": 0.0033585482742637396, - "learning_rate": 0.0001999935542824534, - "loss": 46.0, - "step": 47280 - }, - { - "epoch": 3.614962631649368, - "grad_norm": 0.005722475238144398, - "learning_rate": 0.00019999355400973428, - "loss": 46.0, - "step": 47281 - }, - { - "epoch": 3.615039088632758, - "grad_norm": 0.0017641838639974594, - "learning_rate": 0.00019999355373700938, - "loss": 46.0, - "step": 47282 - }, - { - "epoch": 3.6151155456161477, - "grad_norm": 0.00762800732627511, - "learning_rate": 0.00019999355346427873, - "loss": 46.0, - "step": 47283 - }, - { - "epoch": 3.6151920025995374, - "grad_norm": 0.0025354577228426933, - "learning_rate": 0.0001999935531915423, - "loss": 46.0, - "step": 47284 - }, - { - "epoch": 3.615268459582927, - "grad_norm": 0.0017457692883908749, - "learning_rate": 0.00019999355291880008, - "loss": 46.0, - "step": 47285 - }, - { - "epoch": 3.615344916566317, - "grad_norm": 0.0011523324064910412, - "learning_rate": 0.00019999355264605214, - "loss": 46.0, - "step": 47286 - }, - { - "epoch": 3.6154213735497067, - "grad_norm": 0.0023378196638077497, - "learning_rate": 0.0001999935523732984, - "loss": 46.0, - "step": 47287 - }, - { - "epoch": 3.6154978305330965, - "grad_norm": 0.0016617362853139639, - "learning_rate": 0.0001999935521005389, - "loss": 46.0, - "step": 47288 - }, - { - "epoch": 3.615574287516486, - "grad_norm": 0.0019301587017253041, - "learning_rate": 0.00019999355182777362, - "loss": 46.0, - "step": 47289 - }, - { - "epoch": 3.6156507444998756, - "grad_norm": 0.005819922313094139, - "learning_rate": 0.00019999355155500258, - "loss": 46.0, - "step": 47290 - }, - { - "epoch": 3.6157272014832653, - "grad_norm": 0.0033543517347425222, - "learning_rate": 0.00019999355128222574, - "loss": 46.0, - "step": 47291 - }, - { - "epoch": 3.615803658466655, - "grad_norm": 0.00135278410743922, - "learning_rate": 0.00019999355100944316, - "loss": 46.0, - "step": 47292 - }, - { - "epoch": 3.615880115450045, - "grad_norm": 0.002793761668726802, - "learning_rate": 0.00019999355073665483, - "loss": 46.0, - "step": 47293 - }, - { - "epoch": 3.6159565724334346, - "grad_norm": 0.0035002746153622866, - "learning_rate": 0.00019999355046386067, - "loss": 46.0, - "step": 47294 - }, - { - "epoch": 3.6160330294168244, - "grad_norm": 0.013129138387739658, - "learning_rate": 0.0001999935501910608, - "loss": 46.0, - "step": 47295 - }, - { - "epoch": 3.616109486400214, - "grad_norm": 0.00368657149374485, - "learning_rate": 0.00019999354991825515, - "loss": 46.0, - "step": 47296 - }, - { - "epoch": 3.616185943383604, - "grad_norm": 0.00111564586404711, - "learning_rate": 0.0001999935496454437, - "loss": 46.0, - "step": 47297 - }, - { - "epoch": 3.6162624003669936, - "grad_norm": 0.001937650260515511, - "learning_rate": 0.0001999935493726265, - "loss": 46.0, - "step": 47298 - }, - { - "epoch": 3.6163388573503834, - "grad_norm": 0.0021980348974466324, - "learning_rate": 0.00019999354909980356, - "loss": 46.0, - "step": 47299 - }, - { - "epoch": 3.616415314333773, - "grad_norm": 0.003814734984189272, - "learning_rate": 0.00019999354882697481, - "loss": 46.0, - "step": 47300 - }, - { - "epoch": 3.616491771317163, - "grad_norm": 0.004749145358800888, - "learning_rate": 0.0001999935485541403, - "loss": 46.0, - "step": 47301 - }, - { - "epoch": 3.6165682283005522, - "grad_norm": 0.005444707814604044, - "learning_rate": 0.00019999354828130003, - "loss": 46.0, - "step": 47302 - }, - { - "epoch": 3.616644685283942, - "grad_norm": 0.0017532651545479894, - "learning_rate": 0.000199993548008454, - "loss": 46.0, - "step": 47303 - }, - { - "epoch": 3.6167211422673318, - "grad_norm": 0.000540543464012444, - "learning_rate": 0.00019999354773560216, - "loss": 46.0, - "step": 47304 - }, - { - "epoch": 3.6167975992507215, - "grad_norm": 0.0018666167743504047, - "learning_rate": 0.0001999935474627446, - "loss": 46.0, - "step": 47305 - }, - { - "epoch": 3.6168740562341113, - "grad_norm": 0.001620797673240304, - "learning_rate": 0.00019999354718988122, - "loss": 46.0, - "step": 47306 - }, - { - "epoch": 3.616950513217501, - "grad_norm": 0.003897682996466756, - "learning_rate": 0.00019999354691701212, - "loss": 46.0, - "step": 47307 - }, - { - "epoch": 3.617026970200891, - "grad_norm": 0.0010353117249906063, - "learning_rate": 0.00019999354664413724, - "loss": 46.0, - "step": 47308 - }, - { - "epoch": 3.6171034271842806, - "grad_norm": 0.0027613837737590075, - "learning_rate": 0.0001999935463712566, - "loss": 46.0, - "step": 47309 - }, - { - "epoch": 3.61717988416767, - "grad_norm": 0.004738753195852041, - "learning_rate": 0.00019999354609837015, - "loss": 46.0, - "step": 47310 - }, - { - "epoch": 3.6172563411510597, - "grad_norm": 0.001971774036064744, - "learning_rate": 0.00019999354582547798, - "loss": 46.0, - "step": 47311 - }, - { - "epoch": 3.6173327981344494, - "grad_norm": 0.0016679092077538371, - "learning_rate": 0.00019999354555258, - "loss": 46.0, - "step": 47312 - }, - { - "epoch": 3.617409255117839, - "grad_norm": 0.0034473752602934837, - "learning_rate": 0.00019999354527967626, - "loss": 46.0, - "step": 47313 - }, - { - "epoch": 3.617485712101229, - "grad_norm": 0.002821042202413082, - "learning_rate": 0.00019999354500676675, - "loss": 46.0, - "step": 47314 - }, - { - "epoch": 3.6175621690846187, - "grad_norm": 0.002369681838899851, - "learning_rate": 0.0001999935447338515, - "loss": 46.0, - "step": 47315 - }, - { - "epoch": 3.6176386260680085, - "grad_norm": 0.0009700052905827761, - "learning_rate": 0.00019999354446093045, - "loss": 46.0, - "step": 47316 - }, - { - "epoch": 3.617715083051398, - "grad_norm": 0.0037375895772129297, - "learning_rate": 0.00019999354418800364, - "loss": 46.0, - "step": 47317 - }, - { - "epoch": 3.617791540034788, - "grad_norm": 0.005681247916072607, - "learning_rate": 0.00019999354391507106, - "loss": 46.0, - "step": 47318 - }, - { - "epoch": 3.6178679970181777, - "grad_norm": 0.0063100517727434635, - "learning_rate": 0.0001999935436421327, - "loss": 46.0, - "step": 47319 - }, - { - "epoch": 3.6179444540015675, - "grad_norm": 0.003021263051778078, - "learning_rate": 0.00019999354336918857, - "loss": 46.0, - "step": 47320 - }, - { - "epoch": 3.6180209109849573, - "grad_norm": 0.0005241144099272788, - "learning_rate": 0.0001999935430962387, - "loss": 46.0, - "step": 47321 - }, - { - "epoch": 3.618097367968347, - "grad_norm": 0.00224726228043437, - "learning_rate": 0.00019999354282328305, - "loss": 46.0, - "step": 47322 - }, - { - "epoch": 3.618173824951737, - "grad_norm": 0.002868939656764269, - "learning_rate": 0.00019999354255032163, - "loss": 46.0, - "step": 47323 - }, - { - "epoch": 3.618250281935126, - "grad_norm": 0.0022335646208375692, - "learning_rate": 0.00019999354227735446, - "loss": 46.0, - "step": 47324 - }, - { - "epoch": 3.618326738918516, - "grad_norm": 0.0011630482040345669, - "learning_rate": 0.00019999354200438146, - "loss": 46.0, - "step": 47325 - }, - { - "epoch": 3.6184031959019056, - "grad_norm": 0.0011882652761414647, - "learning_rate": 0.00019999354173140275, - "loss": 46.0, - "step": 47326 - }, - { - "epoch": 3.6184796528852954, - "grad_norm": 0.003643147414550185, - "learning_rate": 0.00019999354145841823, - "loss": 46.0, - "step": 47327 - }, - { - "epoch": 3.618556109868685, - "grad_norm": 0.0030876793898642063, - "learning_rate": 0.00019999354118542797, - "loss": 46.0, - "step": 47328 - }, - { - "epoch": 3.618632566852075, - "grad_norm": 0.003609174396842718, - "learning_rate": 0.00019999354091243193, - "loss": 46.0, - "step": 47329 - }, - { - "epoch": 3.6187090238354647, - "grad_norm": 0.0019165801350027323, - "learning_rate": 0.00019999354063943012, - "loss": 46.0, - "step": 47330 - }, - { - "epoch": 3.6187854808188544, - "grad_norm": 0.0014679976738989353, - "learning_rate": 0.00019999354036642254, - "loss": 46.0, - "step": 47331 - }, - { - "epoch": 3.6188619378022437, - "grad_norm": 0.0014446813147515059, - "learning_rate": 0.0001999935400934092, - "loss": 46.0, - "step": 47332 - }, - { - "epoch": 3.6189383947856335, - "grad_norm": 0.004242730792611837, - "learning_rate": 0.00019999353982039008, - "loss": 46.0, - "step": 47333 - }, - { - "epoch": 3.6190148517690233, - "grad_norm": 0.006166866514831781, - "learning_rate": 0.00019999353954736518, - "loss": 46.0, - "step": 47334 - }, - { - "epoch": 3.619091308752413, - "grad_norm": 0.00638245465233922, - "learning_rate": 0.00019999353927433453, - "loss": 46.0, - "step": 47335 - }, - { - "epoch": 3.619167765735803, - "grad_norm": 0.00381590542383492, - "learning_rate": 0.00019999353900129814, - "loss": 46.0, - "step": 47336 - }, - { - "epoch": 3.6192442227191925, - "grad_norm": 0.0054197246208786964, - "learning_rate": 0.00019999353872825592, - "loss": 46.0, - "step": 47337 - }, - { - "epoch": 3.6193206797025823, - "grad_norm": 0.0020835886243730783, - "learning_rate": 0.00019999353845520797, - "loss": 46.0, - "step": 47338 - }, - { - "epoch": 3.619397136685972, - "grad_norm": 0.003529014065861702, - "learning_rate": 0.00019999353818215423, - "loss": 46.0, - "step": 47339 - }, - { - "epoch": 3.619473593669362, - "grad_norm": 0.0011554706143215299, - "learning_rate": 0.00019999353790909474, - "loss": 46.0, - "step": 47340 - }, - { - "epoch": 3.6195500506527516, - "grad_norm": 0.0021690039429813623, - "learning_rate": 0.00019999353763602948, - "loss": 46.0, - "step": 47341 - }, - { - "epoch": 3.6196265076361414, - "grad_norm": 0.0030679211486130953, - "learning_rate": 0.00019999353736295845, - "loss": 46.0, - "step": 47342 - }, - { - "epoch": 3.619702964619531, - "grad_norm": 0.004602686036378145, - "learning_rate": 0.00019999353708988164, - "loss": 46.0, - "step": 47343 - }, - { - "epoch": 3.619779421602921, - "grad_norm": 0.0019608382135629654, - "learning_rate": 0.00019999353681679906, - "loss": 46.0, - "step": 47344 - }, - { - "epoch": 3.6198558785863106, - "grad_norm": 0.0027885353192687035, - "learning_rate": 0.0001999935365437107, - "loss": 46.0, - "step": 47345 - }, - { - "epoch": 3.6199323355697, - "grad_norm": 0.006393016781657934, - "learning_rate": 0.0001999935362706166, - "loss": 46.0, - "step": 47346 - }, - { - "epoch": 3.6200087925530897, - "grad_norm": 0.0023105100262910128, - "learning_rate": 0.00019999353599751672, - "loss": 46.0, - "step": 47347 - }, - { - "epoch": 3.6200852495364795, - "grad_norm": 0.003415314480662346, - "learning_rate": 0.00019999353572441108, - "loss": 46.0, - "step": 47348 - }, - { - "epoch": 3.6201617065198692, - "grad_norm": 0.0009169764234684408, - "learning_rate": 0.00019999353545129965, - "loss": 46.0, - "step": 47349 - }, - { - "epoch": 3.620238163503259, - "grad_norm": 0.0005590890068560839, - "learning_rate": 0.00019999353517818246, - "loss": 46.0, - "step": 47350 - }, - { - "epoch": 3.6203146204866488, - "grad_norm": 0.003532255766913295, - "learning_rate": 0.0001999935349050595, - "loss": 46.0, - "step": 47351 - }, - { - "epoch": 3.6203910774700385, - "grad_norm": 0.003182093845680356, - "learning_rate": 0.00019999353463193078, - "loss": 46.0, - "step": 47352 - }, - { - "epoch": 3.6204675344534283, - "grad_norm": 0.0024112078826874495, - "learning_rate": 0.00019999353435879626, - "loss": 46.0, - "step": 47353 - }, - { - "epoch": 3.6205439914368176, - "grad_norm": 0.0017347057582810521, - "learning_rate": 0.000199993534085656, - "loss": 46.0, - "step": 47354 - }, - { - "epoch": 3.6206204484202074, - "grad_norm": 0.0027756644412875175, - "learning_rate": 0.00019999353381251, - "loss": 46.0, - "step": 47355 - }, - { - "epoch": 3.620696905403597, - "grad_norm": 0.0011865616543218493, - "learning_rate": 0.00019999353353935816, - "loss": 46.0, - "step": 47356 - }, - { - "epoch": 3.620773362386987, - "grad_norm": 0.001097788568586111, - "learning_rate": 0.00019999353326620057, - "loss": 46.0, - "step": 47357 - }, - { - "epoch": 3.6208498193703766, - "grad_norm": 0.002234102226793766, - "learning_rate": 0.00019999353299303725, - "loss": 46.0, - "step": 47358 - }, - { - "epoch": 3.6209262763537664, - "grad_norm": 0.010361786931753159, - "learning_rate": 0.00019999353271986814, - "loss": 46.0, - "step": 47359 - }, - { - "epoch": 3.621002733337156, - "grad_norm": 0.000992064829915762, - "learning_rate": 0.00019999353244669324, - "loss": 46.0, - "step": 47360 - }, - { - "epoch": 3.621079190320546, - "grad_norm": 0.001721574692055583, - "learning_rate": 0.0001999935321735126, - "loss": 46.0, - "step": 47361 - }, - { - "epoch": 3.6211556473039357, - "grad_norm": 0.0018225802341476083, - "learning_rate": 0.0001999935319003262, - "loss": 46.0, - "step": 47362 - }, - { - "epoch": 3.6212321042873254, - "grad_norm": 0.00235338625498116, - "learning_rate": 0.000199993531627134, - "loss": 46.0, - "step": 47363 - }, - { - "epoch": 3.621308561270715, - "grad_norm": 0.004489402286708355, - "learning_rate": 0.00019999353135393604, - "loss": 46.0, - "step": 47364 - }, - { - "epoch": 3.621385018254105, - "grad_norm": 0.0009082783362828195, - "learning_rate": 0.00019999353108073232, - "loss": 46.0, - "step": 47365 - }, - { - "epoch": 3.6214614752374947, - "grad_norm": 0.0021544923074543476, - "learning_rate": 0.00019999353080752284, - "loss": 46.0, - "step": 47366 - }, - { - "epoch": 3.6215379322208845, - "grad_norm": 0.0024716483894735575, - "learning_rate": 0.00019999353053430755, - "loss": 46.0, - "step": 47367 - }, - { - "epoch": 3.621614389204274, - "grad_norm": 0.002472597872838378, - "learning_rate": 0.0001999935302610865, - "loss": 46.0, - "step": 47368 - }, - { - "epoch": 3.6216908461876636, - "grad_norm": 0.0011978407856076956, - "learning_rate": 0.0001999935299878597, - "loss": 46.0, - "step": 47369 - }, - { - "epoch": 3.6217673031710533, - "grad_norm": 0.0018157584127038717, - "learning_rate": 0.00019999352971462715, - "loss": 46.0, - "step": 47370 - }, - { - "epoch": 3.621843760154443, - "grad_norm": 0.0026981134433299303, - "learning_rate": 0.0001999935294413888, - "loss": 46.0, - "step": 47371 - }, - { - "epoch": 3.621920217137833, - "grad_norm": 0.006168703082948923, - "learning_rate": 0.0001999935291681447, - "loss": 46.0, - "step": 47372 - }, - { - "epoch": 3.6219966741212226, - "grad_norm": 0.004905191715806723, - "learning_rate": 0.00019999352889489482, - "loss": 46.0, - "step": 47373 - }, - { - "epoch": 3.6220731311046124, - "grad_norm": 0.0008568610646761954, - "learning_rate": 0.00019999352862163917, - "loss": 46.0, - "step": 47374 - }, - { - "epoch": 3.622149588088002, - "grad_norm": 0.0061667305417358875, - "learning_rate": 0.00019999352834837778, - "loss": 46.0, - "step": 47375 - }, - { - "epoch": 3.6222260450713915, - "grad_norm": 0.001092708669602871, - "learning_rate": 0.00019999352807511058, - "loss": 46.0, - "step": 47376 - }, - { - "epoch": 3.622302502054781, - "grad_norm": 0.0033057217951864004, - "learning_rate": 0.00019999352780183762, - "loss": 46.0, - "step": 47377 - }, - { - "epoch": 3.622378959038171, - "grad_norm": 0.0015536271966993809, - "learning_rate": 0.00019999352752855888, - "loss": 46.0, - "step": 47378 - }, - { - "epoch": 3.6224554160215607, - "grad_norm": 0.0029149481561034918, - "learning_rate": 0.0001999935272552744, - "loss": 46.0, - "step": 47379 - }, - { - "epoch": 3.6225318730049505, - "grad_norm": 0.0006690032314509153, - "learning_rate": 0.00019999352698198415, - "loss": 46.0, - "step": 47380 - }, - { - "epoch": 3.6226083299883403, - "grad_norm": 0.0007043190416879952, - "learning_rate": 0.0001999935267086881, - "loss": 46.0, - "step": 47381 - }, - { - "epoch": 3.62268478697173, - "grad_norm": 0.002085176995024085, - "learning_rate": 0.0001999935264353863, - "loss": 46.0, - "step": 47382 - }, - { - "epoch": 3.62276124395512, - "grad_norm": 0.0016610943712294102, - "learning_rate": 0.00019999352616207873, - "loss": 46.0, - "step": 47383 - }, - { - "epoch": 3.6228377009385095, - "grad_norm": 0.00451140571385622, - "learning_rate": 0.00019999352588876538, - "loss": 46.0, - "step": 47384 - }, - { - "epoch": 3.6229141579218993, - "grad_norm": 0.005642641335725784, - "learning_rate": 0.00019999352561544628, - "loss": 46.0, - "step": 47385 - }, - { - "epoch": 3.622990614905289, - "grad_norm": 0.0004088491841685027, - "learning_rate": 0.0001999935253421214, - "loss": 46.0, - "step": 47386 - }, - { - "epoch": 3.623067071888679, - "grad_norm": 0.001353420433588326, - "learning_rate": 0.00019999352506879075, - "loss": 46.0, - "step": 47387 - }, - { - "epoch": 3.6231435288720686, - "grad_norm": 0.0024752598255872726, - "learning_rate": 0.00019999352479545433, - "loss": 46.0, - "step": 47388 - }, - { - "epoch": 3.6232199858554583, - "grad_norm": 0.001023277174681425, - "learning_rate": 0.00019999352452211214, - "loss": 46.0, - "step": 47389 - }, - { - "epoch": 3.6232964428388477, - "grad_norm": 0.00214571924880147, - "learning_rate": 0.0001999935242487642, - "loss": 46.0, - "step": 47390 - }, - { - "epoch": 3.6233728998222374, - "grad_norm": 0.0027552112005650997, - "learning_rate": 0.00019999352397541049, - "loss": 46.0, - "step": 47391 - }, - { - "epoch": 3.623449356805627, - "grad_norm": 0.0009797487873584032, - "learning_rate": 0.000199993523702051, - "loss": 46.0, - "step": 47392 - }, - { - "epoch": 3.623525813789017, - "grad_norm": 0.0008931035408750176, - "learning_rate": 0.00019999352342868574, - "loss": 46.0, - "step": 47393 - }, - { - "epoch": 3.6236022707724067, - "grad_norm": 0.0010882462374866009, - "learning_rate": 0.00019999352315531473, - "loss": 46.0, - "step": 47394 - }, - { - "epoch": 3.6236787277557965, - "grad_norm": 0.00335419038310647, - "learning_rate": 0.00019999352288193792, - "loss": 46.0, - "step": 47395 - }, - { - "epoch": 3.6237551847391862, - "grad_norm": 0.002310057869181037, - "learning_rate": 0.00019999352260855532, - "loss": 46.0, - "step": 47396 - }, - { - "epoch": 3.623831641722576, - "grad_norm": 0.0027128912042826414, - "learning_rate": 0.000199993522335167, - "loss": 46.0, - "step": 47397 - }, - { - "epoch": 3.6239080987059653, - "grad_norm": 0.0008993922383524477, - "learning_rate": 0.0001999935220617729, - "loss": 46.0, - "step": 47398 - }, - { - "epoch": 3.623984555689355, - "grad_norm": 0.0014454993652179837, - "learning_rate": 0.00019999352178837301, - "loss": 46.0, - "step": 47399 - }, - { - "epoch": 3.624061012672745, - "grad_norm": 0.0019481470808386803, - "learning_rate": 0.0001999935215149674, - "loss": 46.0, - "step": 47400 - }, - { - "epoch": 3.6241374696561346, - "grad_norm": 0.0005018050433136523, - "learning_rate": 0.00019999352124155597, - "loss": 46.0, - "step": 47401 - }, - { - "epoch": 3.6242139266395244, - "grad_norm": 0.0015739724040031433, - "learning_rate": 0.00019999352096813878, - "loss": 46.0, - "step": 47402 - }, - { - "epoch": 3.624290383622914, - "grad_norm": 0.001120415166951716, - "learning_rate": 0.00019999352069471584, - "loss": 46.0, - "step": 47403 - }, - { - "epoch": 3.624366840606304, - "grad_norm": 0.0022295115049928427, - "learning_rate": 0.00019999352042128713, - "loss": 46.0, - "step": 47404 - }, - { - "epoch": 3.6244432975896936, - "grad_norm": 0.0010030489647760987, - "learning_rate": 0.0001999935201478526, - "loss": 46.0, - "step": 47405 - }, - { - "epoch": 3.6245197545730834, - "grad_norm": 0.004237939137965441, - "learning_rate": 0.00019999351987441238, - "loss": 46.0, - "step": 47406 - }, - { - "epoch": 3.624596211556473, - "grad_norm": 0.0005940491100773215, - "learning_rate": 0.00019999351960096635, - "loss": 46.0, - "step": 47407 - }, - { - "epoch": 3.624672668539863, - "grad_norm": 0.004383536987006664, - "learning_rate": 0.00019999351932751457, - "loss": 46.0, - "step": 47408 - }, - { - "epoch": 3.6247491255232527, - "grad_norm": 0.0022701299749314785, - "learning_rate": 0.00019999351905405699, - "loss": 46.0, - "step": 47409 - }, - { - "epoch": 3.6248255825066424, - "grad_norm": 0.001550098997540772, - "learning_rate": 0.00019999351878059366, - "loss": 46.0, - "step": 47410 - }, - { - "epoch": 3.6249020394900318, - "grad_norm": 0.0022231393959373236, - "learning_rate": 0.00019999351850712453, - "loss": 46.0, - "step": 47411 - }, - { - "epoch": 3.6249784964734215, - "grad_norm": 0.0017720917239785194, - "learning_rate": 0.00019999351823364966, - "loss": 46.0, - "step": 47412 - }, - { - "epoch": 3.6250549534568113, - "grad_norm": 0.0025821721646934748, - "learning_rate": 0.00019999351796016904, - "loss": 46.0, - "step": 47413 - }, - { - "epoch": 3.625131410440201, - "grad_norm": 0.0030009495094418526, - "learning_rate": 0.00019999351768668262, - "loss": 46.0, - "step": 47414 - }, - { - "epoch": 3.625207867423591, - "grad_norm": 0.0015778911765664816, - "learning_rate": 0.00019999351741319043, - "loss": 46.0, - "step": 47415 - }, - { - "epoch": 3.6252843244069806, - "grad_norm": 0.0024254783056676388, - "learning_rate": 0.00019999351713969249, - "loss": 46.0, - "step": 47416 - }, - { - "epoch": 3.6253607813903703, - "grad_norm": 0.0014715736033394933, - "learning_rate": 0.00019999351686618877, - "loss": 46.0, - "step": 47417 - }, - { - "epoch": 3.62543723837376, - "grad_norm": 0.0008359206258319318, - "learning_rate": 0.00019999351659267926, - "loss": 46.0, - "step": 47418 - }, - { - "epoch": 3.62551369535715, - "grad_norm": 0.0017035825876519084, - "learning_rate": 0.000199993516319164, - "loss": 46.0, - "step": 47419 - }, - { - "epoch": 3.625590152340539, - "grad_norm": 0.0028160554356873035, - "learning_rate": 0.000199993516045643, - "loss": 46.0, - "step": 47420 - }, - { - "epoch": 3.625666609323929, - "grad_norm": 0.004391511902213097, - "learning_rate": 0.0001999935157721162, - "loss": 46.0, - "step": 47421 - }, - { - "epoch": 3.6257430663073187, - "grad_norm": 0.001714915968477726, - "learning_rate": 0.00019999351549858366, - "loss": 46.0, - "step": 47422 - }, - { - "epoch": 3.6258195232907084, - "grad_norm": 0.001265192055143416, - "learning_rate": 0.0001999935152250453, - "loss": 46.0, - "step": 47423 - }, - { - "epoch": 3.625895980274098, - "grad_norm": 0.003409244120121002, - "learning_rate": 0.0001999935149515012, - "loss": 46.0, - "step": 47424 - }, - { - "epoch": 3.625972437257488, - "grad_norm": 0.0012118630111217499, - "learning_rate": 0.00019999351467795133, - "loss": 46.0, - "step": 47425 - }, - { - "epoch": 3.6260488942408777, - "grad_norm": 0.003979481756687164, - "learning_rate": 0.00019999351440439571, - "loss": 46.0, - "step": 47426 - }, - { - "epoch": 3.6261253512242675, - "grad_norm": 0.001014834619127214, - "learning_rate": 0.00019999351413083427, - "loss": 46.0, - "step": 47427 - }, - { - "epoch": 3.6262018082076573, - "grad_norm": 0.0011285807704553008, - "learning_rate": 0.0001999935138572671, - "loss": 46.0, - "step": 47428 - }, - { - "epoch": 3.626278265191047, - "grad_norm": 0.005313312169164419, - "learning_rate": 0.00019999351358369416, - "loss": 46.0, - "step": 47429 - }, - { - "epoch": 3.6263547221744368, - "grad_norm": 0.0035176228266209364, - "learning_rate": 0.00019999351331011545, - "loss": 46.0, - "step": 47430 - }, - { - "epoch": 3.6264311791578265, - "grad_norm": 0.000344529515132308, - "learning_rate": 0.00019999351303653096, - "loss": 46.0, - "step": 47431 - }, - { - "epoch": 3.6265076361412163, - "grad_norm": 0.0022166178096085787, - "learning_rate": 0.0001999935127629407, - "loss": 46.0, - "step": 47432 - }, - { - "epoch": 3.6265840931246056, - "grad_norm": 0.0021415643859654665, - "learning_rate": 0.00019999351248934467, - "loss": 46.0, - "step": 47433 - }, - { - "epoch": 3.6266605501079954, - "grad_norm": 0.002091477857902646, - "learning_rate": 0.0001999935122157429, - "loss": 46.0, - "step": 47434 - }, - { - "epoch": 3.626737007091385, - "grad_norm": 0.001383824273943901, - "learning_rate": 0.0001999935119421353, - "loss": 46.0, - "step": 47435 - }, - { - "epoch": 3.626813464074775, - "grad_norm": 0.0009950530948117375, - "learning_rate": 0.000199993511668522, - "loss": 46.0, - "step": 47436 - }, - { - "epoch": 3.6268899210581647, - "grad_norm": 0.002144841942936182, - "learning_rate": 0.00019999351139490289, - "loss": 46.0, - "step": 47437 - }, - { - "epoch": 3.6269663780415544, - "grad_norm": 0.0005444727721624076, - "learning_rate": 0.00019999351112127801, - "loss": 46.0, - "step": 47438 - }, - { - "epoch": 3.627042835024944, - "grad_norm": 0.0005735830636695027, - "learning_rate": 0.0001999935108476474, - "loss": 46.0, - "step": 47439 - }, - { - "epoch": 3.627119292008334, - "grad_norm": 0.0017893824260681868, - "learning_rate": 0.000199993510574011, - "loss": 46.0, - "step": 47440 - }, - { - "epoch": 3.6271957489917233, - "grad_norm": 0.0019484329968690872, - "learning_rate": 0.0001999935103003688, - "loss": 46.0, - "step": 47441 - }, - { - "epoch": 3.627272205975113, - "grad_norm": 0.0023450450971722603, - "learning_rate": 0.00019999351002672087, - "loss": 46.0, - "step": 47442 - }, - { - "epoch": 3.627348662958503, - "grad_norm": 0.0033868427854031324, - "learning_rate": 0.00019999350975306713, - "loss": 46.0, - "step": 47443 - }, - { - "epoch": 3.6274251199418925, - "grad_norm": 0.002973944880068302, - "learning_rate": 0.00019999350947940764, - "loss": 46.0, - "step": 47444 - }, - { - "epoch": 3.6275015769252823, - "grad_norm": 0.00456279655918479, - "learning_rate": 0.0001999935092057424, - "loss": 46.0, - "step": 47445 - }, - { - "epoch": 3.627578033908672, - "grad_norm": 0.0017020419472828507, - "learning_rate": 0.00019999350893207138, - "loss": 46.0, - "step": 47446 - }, - { - "epoch": 3.627654490892062, - "grad_norm": 0.0032190780621021986, - "learning_rate": 0.0001999935086583946, - "loss": 46.0, - "step": 47447 - }, - { - "epoch": 3.6277309478754516, - "grad_norm": 0.0026783659122884274, - "learning_rate": 0.00019999350838471202, - "loss": 46.0, - "step": 47448 - }, - { - "epoch": 3.6278074048588413, - "grad_norm": 0.0017374379094690084, - "learning_rate": 0.0001999935081110237, - "loss": 46.0, - "step": 47449 - }, - { - "epoch": 3.627883861842231, - "grad_norm": 0.0013571102172136307, - "learning_rate": 0.00019999350783732962, - "loss": 46.0, - "step": 47450 - }, - { - "epoch": 3.627960318825621, - "grad_norm": 0.00201827147975564, - "learning_rate": 0.00019999350756362975, - "loss": 46.0, - "step": 47451 - }, - { - "epoch": 3.6280367758090106, - "grad_norm": 0.0017024754779413342, - "learning_rate": 0.00019999350728992413, - "loss": 46.0, - "step": 47452 - }, - { - "epoch": 3.6281132327924004, - "grad_norm": 0.004810396581888199, - "learning_rate": 0.0001999935070162127, - "loss": 46.0, - "step": 47453 - }, - { - "epoch": 3.62818968977579, - "grad_norm": 0.002099544508382678, - "learning_rate": 0.00019999350674249552, - "loss": 46.0, - "step": 47454 - }, - { - "epoch": 3.6282661467591795, - "grad_norm": 0.002537505468353629, - "learning_rate": 0.00019999350646877258, - "loss": 46.0, - "step": 47455 - }, - { - "epoch": 3.6283426037425692, - "grad_norm": 0.004741470795124769, - "learning_rate": 0.0001999935061950439, - "loss": 46.0, - "step": 47456 - }, - { - "epoch": 3.628419060725959, - "grad_norm": 0.0015688469866290689, - "learning_rate": 0.00019999350592130938, - "loss": 46.0, - "step": 47457 - }, - { - "epoch": 3.6284955177093487, - "grad_norm": 0.002859345404431224, - "learning_rate": 0.00019999350564756915, - "loss": 46.0, - "step": 47458 - }, - { - "epoch": 3.6285719746927385, - "grad_norm": 0.007290893699973822, - "learning_rate": 0.00019999350537382312, - "loss": 46.0, - "step": 47459 - }, - { - "epoch": 3.6286484316761283, - "grad_norm": 0.003457999788224697, - "learning_rate": 0.00019999350510007134, - "loss": 46.0, - "step": 47460 - }, - { - "epoch": 3.628724888659518, - "grad_norm": 0.0011462683323770761, - "learning_rate": 0.0001999935048263138, - "loss": 46.0, - "step": 47461 - }, - { - "epoch": 3.628801345642908, - "grad_norm": 0.001098414184525609, - "learning_rate": 0.00019999350455255046, - "loss": 46.0, - "step": 47462 - }, - { - "epoch": 3.628877802626297, - "grad_norm": 0.0017879842780530453, - "learning_rate": 0.00019999350427878137, - "loss": 46.0, - "step": 47463 - }, - { - "epoch": 3.628954259609687, - "grad_norm": 0.0018215954769402742, - "learning_rate": 0.0001999935040050065, - "loss": 46.0, - "step": 47464 - }, - { - "epoch": 3.6290307165930766, - "grad_norm": 0.0012119802413508296, - "learning_rate": 0.00019999350373122585, - "loss": 46.0, - "step": 47465 - }, - { - "epoch": 3.6291071735764664, - "grad_norm": 0.001823908882215619, - "learning_rate": 0.00019999350345743943, - "loss": 46.0, - "step": 47466 - }, - { - "epoch": 3.629183630559856, - "grad_norm": 0.006939653307199478, - "learning_rate": 0.0001999935031836473, - "loss": 46.0, - "step": 47467 - }, - { - "epoch": 3.629260087543246, - "grad_norm": 0.002816864987835288, - "learning_rate": 0.00019999350290984932, - "loss": 46.0, - "step": 47468 - }, - { - "epoch": 3.6293365445266357, - "grad_norm": 0.002402219455689192, - "learning_rate": 0.00019999350263604564, - "loss": 46.0, - "step": 47469 - }, - { - "epoch": 3.6294130015100254, - "grad_norm": 0.006036390550434589, - "learning_rate": 0.00019999350236223615, - "loss": 46.0, - "step": 47470 - }, - { - "epoch": 3.629489458493415, - "grad_norm": 0.0014716449659317732, - "learning_rate": 0.0001999935020884209, - "loss": 46.0, - "step": 47471 - }, - { - "epoch": 3.629565915476805, - "grad_norm": 0.0023648792412132025, - "learning_rate": 0.0001999935018145999, - "loss": 46.0, - "step": 47472 - }, - { - "epoch": 3.6296423724601947, - "grad_norm": 0.0016436890000477433, - "learning_rate": 0.00019999350154077309, - "loss": 46.0, - "step": 47473 - }, - { - "epoch": 3.6297188294435845, - "grad_norm": 0.0008651686948724091, - "learning_rate": 0.00019999350126694053, - "loss": 46.0, - "step": 47474 - }, - { - "epoch": 3.6297952864269742, - "grad_norm": 0.0029481409583240747, - "learning_rate": 0.0001999935009931022, - "loss": 46.0, - "step": 47475 - }, - { - "epoch": 3.629871743410364, - "grad_norm": 0.003094090148806572, - "learning_rate": 0.0001999935007192581, - "loss": 46.0, - "step": 47476 - }, - { - "epoch": 3.6299482003937533, - "grad_norm": 0.0019815596751868725, - "learning_rate": 0.00019999350044540827, - "loss": 46.0, - "step": 47477 - }, - { - "epoch": 3.630024657377143, - "grad_norm": 0.0021683755330741405, - "learning_rate": 0.00019999350017155262, - "loss": 46.0, - "step": 47478 - }, - { - "epoch": 3.630101114360533, - "grad_norm": 0.0012911329977214336, - "learning_rate": 0.00019999349989769123, - "loss": 46.0, - "step": 47479 - }, - { - "epoch": 3.6301775713439226, - "grad_norm": 0.0007185947615653276, - "learning_rate": 0.00019999349962382406, - "loss": 46.0, - "step": 47480 - }, - { - "epoch": 3.6302540283273124, - "grad_norm": 0.00220209127292037, - "learning_rate": 0.00019999349934995113, - "loss": 46.0, - "step": 47481 - }, - { - "epoch": 3.630330485310702, - "grad_norm": 0.0028558720368891954, - "learning_rate": 0.00019999349907607241, - "loss": 46.0, - "step": 47482 - }, - { - "epoch": 3.630406942294092, - "grad_norm": 0.0050702812150120735, - "learning_rate": 0.00019999349880218793, - "loss": 46.0, - "step": 47483 - }, - { - "epoch": 3.6304833992774816, - "grad_norm": 0.003349294187501073, - "learning_rate": 0.00019999349852829767, - "loss": 46.0, - "step": 47484 - }, - { - "epoch": 3.630559856260871, - "grad_norm": 0.002493863692507148, - "learning_rate": 0.00019999349825440167, - "loss": 46.0, - "step": 47485 - }, - { - "epoch": 3.6306363132442607, - "grad_norm": 0.001305361744016409, - "learning_rate": 0.00019999349798049986, - "loss": 46.0, - "step": 47486 - }, - { - "epoch": 3.6307127702276505, - "grad_norm": 0.004744191188365221, - "learning_rate": 0.00019999349770659234, - "loss": 46.0, - "step": 47487 - }, - { - "epoch": 3.6307892272110402, - "grad_norm": 0.002669707639142871, - "learning_rate": 0.000199993497432679, - "loss": 46.0, - "step": 47488 - }, - { - "epoch": 3.63086568419443, - "grad_norm": 0.0025324500165879726, - "learning_rate": 0.00019999349715875991, - "loss": 46.0, - "step": 47489 - }, - { - "epoch": 3.6309421411778198, - "grad_norm": 0.003164202207699418, - "learning_rate": 0.00019999349688483507, - "loss": 46.0, - "step": 47490 - }, - { - "epoch": 3.6310185981612095, - "grad_norm": 0.0013077250914648175, - "learning_rate": 0.0001999934966109044, - "loss": 46.0, - "step": 47491 - }, - { - "epoch": 3.6310950551445993, - "grad_norm": 0.00341529562138021, - "learning_rate": 0.00019999349633696803, - "loss": 46.0, - "step": 47492 - }, - { - "epoch": 3.631171512127989, - "grad_norm": 0.0007273780065588653, - "learning_rate": 0.00019999349606302587, - "loss": 46.0, - "step": 47493 - }, - { - "epoch": 3.631247969111379, - "grad_norm": 0.0024748428259044886, - "learning_rate": 0.0001999934957890779, - "loss": 46.0, - "step": 47494 - }, - { - "epoch": 3.6313244260947686, - "grad_norm": 0.005123466718941927, - "learning_rate": 0.00019999349551512422, - "loss": 46.0, - "step": 47495 - }, - { - "epoch": 3.6314008830781583, - "grad_norm": 0.0037410948425531387, - "learning_rate": 0.00019999349524116474, - "loss": 46.0, - "step": 47496 - }, - { - "epoch": 3.631477340061548, - "grad_norm": 0.0027024236042052507, - "learning_rate": 0.0001999934949671995, - "loss": 46.0, - "step": 47497 - }, - { - "epoch": 3.631553797044938, - "grad_norm": 0.0018701617373153567, - "learning_rate": 0.00019999349469322847, - "loss": 46.0, - "step": 47498 - }, - { - "epoch": 3.631630254028327, - "grad_norm": 0.0011156498221680522, - "learning_rate": 0.00019999349441925167, - "loss": 46.0, - "step": 47499 - }, - { - "epoch": 3.631706711011717, - "grad_norm": 0.004159875214099884, - "learning_rate": 0.00019999349414526914, - "loss": 46.0, - "step": 47500 - }, - { - "epoch": 3.6317831679951067, - "grad_norm": 0.0006476440466940403, - "learning_rate": 0.00019999349387128082, - "loss": 46.0, - "step": 47501 - }, - { - "epoch": 3.6318596249784965, - "grad_norm": 0.0021001342684030533, - "learning_rate": 0.00019999349359728675, - "loss": 46.0, - "step": 47502 - }, - { - "epoch": 3.631936081961886, - "grad_norm": 0.0014773018192499876, - "learning_rate": 0.00019999349332328688, - "loss": 46.0, - "step": 47503 - }, - { - "epoch": 3.632012538945276, - "grad_norm": 0.0015500617446377873, - "learning_rate": 0.00019999349304928123, - "loss": 46.0, - "step": 47504 - }, - { - "epoch": 3.6320889959286657, - "grad_norm": 0.0035139955580234528, - "learning_rate": 0.00019999349277526984, - "loss": 46.0, - "step": 47505 - }, - { - "epoch": 3.6321654529120555, - "grad_norm": 0.00399654870852828, - "learning_rate": 0.00019999349250125268, - "loss": 46.0, - "step": 47506 - }, - { - "epoch": 3.632241909895445, - "grad_norm": 0.00333567732013762, - "learning_rate": 0.00019999349222722974, - "loss": 46.0, - "step": 47507 - }, - { - "epoch": 3.6323183668788346, - "grad_norm": 0.004286587238311768, - "learning_rate": 0.00019999349195320103, - "loss": 46.0, - "step": 47508 - }, - { - "epoch": 3.6323948238622243, - "grad_norm": 0.002665396546944976, - "learning_rate": 0.00019999349167916657, - "loss": 46.0, - "step": 47509 - }, - { - "epoch": 3.632471280845614, - "grad_norm": 0.002656772034242749, - "learning_rate": 0.00019999349140512634, - "loss": 46.0, - "step": 47510 - }, - { - "epoch": 3.632547737829004, - "grad_norm": 0.0005230242386460304, - "learning_rate": 0.0001999934911310803, - "loss": 46.0, - "step": 47511 - }, - { - "epoch": 3.6326241948123936, - "grad_norm": 0.0030034389346837997, - "learning_rate": 0.00019999349085702853, - "loss": 46.0, - "step": 47512 - }, - { - "epoch": 3.6327006517957834, - "grad_norm": 0.0034105100203305483, - "learning_rate": 0.00019999349058297096, - "loss": 46.0, - "step": 47513 - }, - { - "epoch": 3.632777108779173, - "grad_norm": 0.0009571025730110705, - "learning_rate": 0.00019999349030890766, - "loss": 46.0, - "step": 47514 - }, - { - "epoch": 3.632853565762563, - "grad_norm": 0.0017008426366373897, - "learning_rate": 0.0001999934900348386, - "loss": 46.0, - "step": 47515 - }, - { - "epoch": 3.6329300227459527, - "grad_norm": 0.0026454338803887367, - "learning_rate": 0.00019999348976076372, - "loss": 46.0, - "step": 47516 - }, - { - "epoch": 3.6330064797293424, - "grad_norm": 0.000751490646507591, - "learning_rate": 0.0001999934894866831, - "loss": 46.0, - "step": 47517 - }, - { - "epoch": 3.633082936712732, - "grad_norm": 0.0021533139515668154, - "learning_rate": 0.0001999934892125967, - "loss": 46.0, - "step": 47518 - }, - { - "epoch": 3.633159393696122, - "grad_norm": 0.005315329413861036, - "learning_rate": 0.00019999348893850455, - "loss": 46.0, - "step": 47519 - }, - { - "epoch": 3.6332358506795117, - "grad_norm": 0.0015597057063132524, - "learning_rate": 0.00019999348866440658, - "loss": 46.0, - "step": 47520 - }, - { - "epoch": 3.633312307662901, - "grad_norm": 0.002478837501257658, - "learning_rate": 0.0001999934883903029, - "loss": 46.0, - "step": 47521 - }, - { - "epoch": 3.633388764646291, - "grad_norm": 0.004642905667424202, - "learning_rate": 0.00019999348811619344, - "loss": 46.0, - "step": 47522 - }, - { - "epoch": 3.6334652216296806, - "grad_norm": 0.0024296757765114307, - "learning_rate": 0.00019999348784207818, - "loss": 46.0, - "step": 47523 - }, - { - "epoch": 3.6335416786130703, - "grad_norm": 0.0011710509425029159, - "learning_rate": 0.00019999348756795718, - "loss": 46.0, - "step": 47524 - }, - { - "epoch": 3.63361813559646, - "grad_norm": 0.004194037988781929, - "learning_rate": 0.0001999934872938304, - "loss": 46.0, - "step": 47525 - }, - { - "epoch": 3.63369459257985, - "grad_norm": 0.0045677851885557175, - "learning_rate": 0.00019999348701969785, - "loss": 46.0, - "step": 47526 - }, - { - "epoch": 3.6337710495632396, - "grad_norm": 0.004974625073373318, - "learning_rate": 0.00019999348674555953, - "loss": 46.0, - "step": 47527 - }, - { - "epoch": 3.6338475065466294, - "grad_norm": 0.00275798118673265, - "learning_rate": 0.00019999348647141546, - "loss": 46.0, - "step": 47528 - }, - { - "epoch": 3.6339239635300187, - "grad_norm": 0.0030305704567581415, - "learning_rate": 0.0001999934861972656, - "loss": 46.0, - "step": 47529 - }, - { - "epoch": 3.6340004205134084, - "grad_norm": 0.0034770227503031492, - "learning_rate": 0.00019999348592310997, - "loss": 46.0, - "step": 47530 - }, - { - "epoch": 3.634076877496798, - "grad_norm": 0.0004107113054487854, - "learning_rate": 0.00019999348564894858, - "loss": 46.0, - "step": 47531 - }, - { - "epoch": 3.634153334480188, - "grad_norm": 0.008126107044517994, - "learning_rate": 0.0001999934853747814, - "loss": 46.0, - "step": 47532 - }, - { - "epoch": 3.6342297914635777, - "grad_norm": 0.003024878678843379, - "learning_rate": 0.00019999348510060848, - "loss": 46.0, - "step": 47533 - }, - { - "epoch": 3.6343062484469675, - "grad_norm": 0.0024242375511676073, - "learning_rate": 0.00019999348482642977, - "loss": 46.0, - "step": 47534 - }, - { - "epoch": 3.6343827054303572, - "grad_norm": 0.0027742113452404737, - "learning_rate": 0.00019999348455224531, - "loss": 46.0, - "step": 47535 - }, - { - "epoch": 3.634459162413747, - "grad_norm": 0.0021328695584088564, - "learning_rate": 0.00019999348427805506, - "loss": 46.0, - "step": 47536 - }, - { - "epoch": 3.6345356193971368, - "grad_norm": 0.000886952446307987, - "learning_rate": 0.00019999348400385905, - "loss": 46.0, - "step": 47537 - }, - { - "epoch": 3.6346120763805265, - "grad_norm": 0.0035439415369182825, - "learning_rate": 0.00019999348372965728, - "loss": 46.0, - "step": 47538 - }, - { - "epoch": 3.6346885333639163, - "grad_norm": 0.0007310874061658978, - "learning_rate": 0.00019999348345544973, - "loss": 46.0, - "step": 47539 - }, - { - "epoch": 3.634764990347306, - "grad_norm": 0.0015476186526939273, - "learning_rate": 0.00019999348318123643, - "loss": 46.0, - "step": 47540 - }, - { - "epoch": 3.634841447330696, - "grad_norm": 0.0016074333107098937, - "learning_rate": 0.00019999348290701733, - "loss": 46.0, - "step": 47541 - }, - { - "epoch": 3.634917904314085, - "grad_norm": 0.0038941458333283663, - "learning_rate": 0.0001999934826327925, - "loss": 46.0, - "step": 47542 - }, - { - "epoch": 3.634994361297475, - "grad_norm": 0.0008792912121862173, - "learning_rate": 0.00019999348235856185, - "loss": 46.0, - "step": 47543 - }, - { - "epoch": 3.6350708182808646, - "grad_norm": 0.0038506274577230215, - "learning_rate": 0.00019999348208432548, - "loss": 46.0, - "step": 47544 - }, - { - "epoch": 3.6351472752642544, - "grad_norm": 0.0014361513312906027, - "learning_rate": 0.0001999934818100833, - "loss": 46.0, - "step": 47545 - }, - { - "epoch": 3.635223732247644, - "grad_norm": 0.0056853280402719975, - "learning_rate": 0.00019999348153583538, - "loss": 46.0, - "step": 47546 - }, - { - "epoch": 3.635300189231034, - "grad_norm": 0.0020093037746846676, - "learning_rate": 0.00019999348126158167, - "loss": 46.0, - "step": 47547 - }, - { - "epoch": 3.6353766462144237, - "grad_norm": 0.002617260441184044, - "learning_rate": 0.00019999348098732222, - "loss": 46.0, - "step": 47548 - }, - { - "epoch": 3.6354531031978135, - "grad_norm": 0.0013669480103999376, - "learning_rate": 0.00019999348071305696, - "loss": 46.0, - "step": 47549 - }, - { - "epoch": 3.635529560181203, - "grad_norm": 0.0016319206915795803, - "learning_rate": 0.00019999348043878596, - "loss": 46.0, - "step": 47550 - }, - { - "epoch": 3.6356060171645925, - "grad_norm": 0.0010099452920258045, - "learning_rate": 0.0001999934801645092, - "loss": 46.0, - "step": 47551 - }, - { - "epoch": 3.6356824741479823, - "grad_norm": 0.0034300254192203283, - "learning_rate": 0.00019999347989022666, - "loss": 46.0, - "step": 47552 - }, - { - "epoch": 3.635758931131372, - "grad_norm": 0.003304105019196868, - "learning_rate": 0.00019999347961593836, - "loss": 46.0, - "step": 47553 - }, - { - "epoch": 3.635835388114762, - "grad_norm": 0.002226894721388817, - "learning_rate": 0.00019999347934164424, - "loss": 46.0, - "step": 47554 - }, - { - "epoch": 3.6359118450981516, - "grad_norm": 0.0008436719072051346, - "learning_rate": 0.00019999347906734442, - "loss": 46.0, - "step": 47555 - }, - { - "epoch": 3.6359883020815413, - "grad_norm": 0.0019015672150999308, - "learning_rate": 0.00019999347879303878, - "loss": 46.0, - "step": 47556 - }, - { - "epoch": 3.636064759064931, - "grad_norm": 0.005953916814178228, - "learning_rate": 0.00019999347851872742, - "loss": 46.0, - "step": 47557 - }, - { - "epoch": 3.636141216048321, - "grad_norm": 0.004504180513322353, - "learning_rate": 0.00019999347824441025, - "loss": 46.0, - "step": 47558 - }, - { - "epoch": 3.6362176730317106, - "grad_norm": 0.0038886885158717632, - "learning_rate": 0.00019999347797008732, - "loss": 46.0, - "step": 47559 - }, - { - "epoch": 3.6362941300151004, - "grad_norm": 0.0015144210774451494, - "learning_rate": 0.00019999347769575864, - "loss": 46.0, - "step": 47560 - }, - { - "epoch": 3.63637058699849, - "grad_norm": 0.0010850976686924696, - "learning_rate": 0.00019999347742142415, - "loss": 46.0, - "step": 47561 - }, - { - "epoch": 3.63644704398188, - "grad_norm": 0.00571185490116477, - "learning_rate": 0.00019999347714708392, - "loss": 46.0, - "step": 47562 - }, - { - "epoch": 3.6365235009652697, - "grad_norm": 0.0019794958643615246, - "learning_rate": 0.00019999347687273792, - "loss": 46.0, - "step": 47563 - }, - { - "epoch": 3.636599957948659, - "grad_norm": 0.0018844889709725976, - "learning_rate": 0.00019999347659838614, - "loss": 46.0, - "step": 47564 - }, - { - "epoch": 3.6366764149320487, - "grad_norm": 0.004877413623034954, - "learning_rate": 0.0001999934763240286, - "loss": 46.0, - "step": 47565 - }, - { - "epoch": 3.6367528719154385, - "grad_norm": 0.0014948087045922875, - "learning_rate": 0.0001999934760496653, - "loss": 46.0, - "step": 47566 - }, - { - "epoch": 3.6368293288988283, - "grad_norm": 0.0027713363524526358, - "learning_rate": 0.0001999934757752962, - "loss": 46.0, - "step": 47567 - }, - { - "epoch": 3.636905785882218, - "grad_norm": 0.0034205422271043062, - "learning_rate": 0.00019999347550092136, - "loss": 46.0, - "step": 47568 - }, - { - "epoch": 3.636982242865608, - "grad_norm": 0.0018202033825218678, - "learning_rate": 0.00019999347522654074, - "loss": 46.0, - "step": 47569 - }, - { - "epoch": 3.6370586998489975, - "grad_norm": 0.0011646030470728874, - "learning_rate": 0.00019999347495215438, - "loss": 46.0, - "step": 47570 - }, - { - "epoch": 3.6371351568323873, - "grad_norm": 0.0018189975526183844, - "learning_rate": 0.0001999934746777622, - "loss": 46.0, - "step": 47571 - }, - { - "epoch": 3.6372116138157766, - "grad_norm": 0.0020405708346515894, - "learning_rate": 0.00019999347440336428, - "loss": 46.0, - "step": 47572 - }, - { - "epoch": 3.6372880707991664, - "grad_norm": 0.000633329211268574, - "learning_rate": 0.0001999934741289606, - "loss": 46.0, - "step": 47573 - }, - { - "epoch": 3.637364527782556, - "grad_norm": 0.0032375201117247343, - "learning_rate": 0.00019999347385455112, - "loss": 46.0, - "step": 47574 - }, - { - "epoch": 3.637440984765946, - "grad_norm": 0.0013362066820263863, - "learning_rate": 0.00019999347358013586, - "loss": 46.0, - "step": 47575 - }, - { - "epoch": 3.6375174417493357, - "grad_norm": 0.001816531759686768, - "learning_rate": 0.0001999934733057149, - "loss": 46.0, - "step": 47576 - }, - { - "epoch": 3.6375938987327254, - "grad_norm": 0.004090809728950262, - "learning_rate": 0.0001999934730312881, - "loss": 46.0, - "step": 47577 - }, - { - "epoch": 3.637670355716115, - "grad_norm": 0.0021148703526705503, - "learning_rate": 0.0001999934727568556, - "loss": 46.0, - "step": 47578 - }, - { - "epoch": 3.637746812699505, - "grad_norm": 0.0011162839364260435, - "learning_rate": 0.00019999347248241727, - "loss": 46.0, - "step": 47579 - }, - { - "epoch": 3.6378232696828947, - "grad_norm": 0.0011677221627905965, - "learning_rate": 0.00019999347220797317, - "loss": 46.0, - "step": 47580 - }, - { - "epoch": 3.6378997266662845, - "grad_norm": 0.0006460328586399555, - "learning_rate": 0.00019999347193352333, - "loss": 46.0, - "step": 47581 - }, - { - "epoch": 3.6379761836496742, - "grad_norm": 0.0019672748167067766, - "learning_rate": 0.0001999934716590677, - "loss": 46.0, - "step": 47582 - }, - { - "epoch": 3.638052640633064, - "grad_norm": 0.003686018055304885, - "learning_rate": 0.00019999347138460632, - "loss": 46.0, - "step": 47583 - }, - { - "epoch": 3.6381290976164538, - "grad_norm": 0.0016379008302465081, - "learning_rate": 0.00019999347111013916, - "loss": 46.0, - "step": 47584 - }, - { - "epoch": 3.6382055545998435, - "grad_norm": 0.002490095794200897, - "learning_rate": 0.00019999347083566625, - "loss": 46.0, - "step": 47585 - }, - { - "epoch": 3.638282011583233, - "grad_norm": 0.0026382869109511375, - "learning_rate": 0.00019999347056118757, - "loss": 46.0, - "step": 47586 - }, - { - "epoch": 3.6383584685666226, - "grad_norm": 0.0026858348865062, - "learning_rate": 0.0001999934702867031, - "loss": 46.0, - "step": 47587 - }, - { - "epoch": 3.6384349255500124, - "grad_norm": 0.0014362436486408114, - "learning_rate": 0.00019999347001221286, - "loss": 46.0, - "step": 47588 - }, - { - "epoch": 3.638511382533402, - "grad_norm": 0.0021098421420902014, - "learning_rate": 0.00019999346973771686, - "loss": 46.0, - "step": 47589 - }, - { - "epoch": 3.638587839516792, - "grad_norm": 0.002550907665863633, - "learning_rate": 0.0001999934694632151, - "loss": 46.0, - "step": 47590 - }, - { - "epoch": 3.6386642965001816, - "grad_norm": 0.0012488883221521974, - "learning_rate": 0.00019999346918870756, - "loss": 46.0, - "step": 47591 - }, - { - "epoch": 3.6387407534835714, - "grad_norm": 0.003144358517602086, - "learning_rate": 0.00019999346891419427, - "loss": 46.0, - "step": 47592 - }, - { - "epoch": 3.638817210466961, - "grad_norm": 0.005914170295000076, - "learning_rate": 0.0001999934686396752, - "loss": 46.0, - "step": 47593 - }, - { - "epoch": 3.6388936674503505, - "grad_norm": 0.005192840937525034, - "learning_rate": 0.00019999346836515033, - "loss": 46.0, - "step": 47594 - }, - { - "epoch": 3.6389701244337402, - "grad_norm": 0.002688988344743848, - "learning_rate": 0.0001999934680906197, - "loss": 46.0, - "step": 47595 - }, - { - "epoch": 3.63904658141713, - "grad_norm": 0.007607114966958761, - "learning_rate": 0.00019999346781608335, - "loss": 46.0, - "step": 47596 - }, - { - "epoch": 3.6391230384005198, - "grad_norm": 0.0016015507280826569, - "learning_rate": 0.0001999934675415412, - "loss": 46.0, - "step": 47597 - }, - { - "epoch": 3.6391994953839095, - "grad_norm": 0.0010930881835520267, - "learning_rate": 0.00019999346726699325, - "loss": 46.0, - "step": 47598 - }, - { - "epoch": 3.6392759523672993, - "grad_norm": 0.0019073153380304575, - "learning_rate": 0.00019999346699243957, - "loss": 46.0, - "step": 47599 - }, - { - "epoch": 3.639352409350689, - "grad_norm": 0.0021241530776023865, - "learning_rate": 0.00019999346671788012, - "loss": 46.0, - "step": 47600 - }, - { - "epoch": 3.639428866334079, - "grad_norm": 0.0009620288037694991, - "learning_rate": 0.00019999346644331486, - "loss": 46.0, - "step": 47601 - }, - { - "epoch": 3.6395053233174686, - "grad_norm": 0.0013683775905519724, - "learning_rate": 0.0001999934661687439, - "loss": 46.0, - "step": 47602 - }, - { - "epoch": 3.6395817803008583, - "grad_norm": 0.0023403067607432604, - "learning_rate": 0.0001999934658941671, - "loss": 46.0, - "step": 47603 - }, - { - "epoch": 3.639658237284248, - "grad_norm": 0.0020057130604982376, - "learning_rate": 0.0001999934656195846, - "loss": 46.0, - "step": 47604 - }, - { - "epoch": 3.639734694267638, - "grad_norm": 0.0011659599840641022, - "learning_rate": 0.00019999346534499627, - "loss": 46.0, - "step": 47605 - }, - { - "epoch": 3.6398111512510276, - "grad_norm": 0.006553406361490488, - "learning_rate": 0.0001999934650704022, - "loss": 46.0, - "step": 47606 - }, - { - "epoch": 3.6398876082344174, - "grad_norm": 0.0020889381412416697, - "learning_rate": 0.00019999346479580236, - "loss": 46.0, - "step": 47607 - }, - { - "epoch": 3.6399640652178067, - "grad_norm": 0.0022696072701364756, - "learning_rate": 0.00019999346452119677, - "loss": 46.0, - "step": 47608 - }, - { - "epoch": 3.6400405222011964, - "grad_norm": 0.002234655898064375, - "learning_rate": 0.00019999346424658536, - "loss": 46.0, - "step": 47609 - }, - { - "epoch": 3.640116979184586, - "grad_norm": 0.0009737928630784154, - "learning_rate": 0.00019999346397196822, - "loss": 46.0, - "step": 47610 - }, - { - "epoch": 3.640193436167976, - "grad_norm": 0.0012431824579834938, - "learning_rate": 0.00019999346369734529, - "loss": 46.0, - "step": 47611 - }, - { - "epoch": 3.6402698931513657, - "grad_norm": 0.001995167462155223, - "learning_rate": 0.0001999934634227166, - "loss": 46.0, - "step": 47612 - }, - { - "epoch": 3.6403463501347555, - "grad_norm": 0.002032445976510644, - "learning_rate": 0.00019999346314808215, - "loss": 46.0, - "step": 47613 - }, - { - "epoch": 3.6404228071181453, - "grad_norm": 0.0019962149672210217, - "learning_rate": 0.00019999346287344195, - "loss": 46.0, - "step": 47614 - }, - { - "epoch": 3.640499264101535, - "grad_norm": 0.001421817927621305, - "learning_rate": 0.00019999346259879595, - "loss": 46.0, - "step": 47615 - }, - { - "epoch": 3.6405757210849243, - "grad_norm": 0.0021803828421980143, - "learning_rate": 0.00019999346232414417, - "loss": 46.0, - "step": 47616 - }, - { - "epoch": 3.640652178068314, - "grad_norm": 0.0017514686333015561, - "learning_rate": 0.00019999346204948662, - "loss": 46.0, - "step": 47617 - }, - { - "epoch": 3.640728635051704, - "grad_norm": 0.002726459875702858, - "learning_rate": 0.00019999346177482333, - "loss": 46.0, - "step": 47618 - }, - { - "epoch": 3.6408050920350936, - "grad_norm": 0.001060849754139781, - "learning_rate": 0.00019999346150015426, - "loss": 46.0, - "step": 47619 - }, - { - "epoch": 3.6408815490184834, - "grad_norm": 0.0037169125862419605, - "learning_rate": 0.00019999346122547942, - "loss": 46.0, - "step": 47620 - }, - { - "epoch": 3.640958006001873, - "grad_norm": 0.000881242158357054, - "learning_rate": 0.0001999934609507988, - "loss": 46.0, - "step": 47621 - }, - { - "epoch": 3.641034462985263, - "grad_norm": 0.0014947558520361781, - "learning_rate": 0.00019999346067611242, - "loss": 46.0, - "step": 47622 - }, - { - "epoch": 3.6411109199686527, - "grad_norm": 0.0005922022392041981, - "learning_rate": 0.00019999346040142028, - "loss": 46.0, - "step": 47623 - }, - { - "epoch": 3.6411873769520424, - "grad_norm": 0.0012894232058897614, - "learning_rate": 0.00019999346012672238, - "loss": 46.0, - "step": 47624 - }, - { - "epoch": 3.641263833935432, - "grad_norm": 0.0015524973860010505, - "learning_rate": 0.00019999345985201867, - "loss": 46.0, - "step": 47625 - }, - { - "epoch": 3.641340290918822, - "grad_norm": 0.001231900416314602, - "learning_rate": 0.0001999934595773092, - "loss": 46.0, - "step": 47626 - }, - { - "epoch": 3.6414167479022117, - "grad_norm": 0.0026711379177868366, - "learning_rate": 0.00019999345930259399, - "loss": 46.0, - "step": 47627 - }, - { - "epoch": 3.6414932048856015, - "grad_norm": 0.0018887639744207263, - "learning_rate": 0.000199993459027873, - "loss": 46.0, - "step": 47628 - }, - { - "epoch": 3.6415696618689912, - "grad_norm": 0.0022163440007716417, - "learning_rate": 0.00019999345875314624, - "loss": 46.0, - "step": 47629 - }, - { - "epoch": 3.6416461188523805, - "grad_norm": 0.001590995118021965, - "learning_rate": 0.00019999345847841372, - "loss": 46.0, - "step": 47630 - }, - { - "epoch": 3.6417225758357703, - "grad_norm": 0.0016796269919723272, - "learning_rate": 0.00019999345820367542, - "loss": 46.0, - "step": 47631 - }, - { - "epoch": 3.64179903281916, - "grad_norm": 0.0030253541190177202, - "learning_rate": 0.00019999345792893133, - "loss": 46.0, - "step": 47632 - }, - { - "epoch": 3.64187548980255, - "grad_norm": 0.0029934654012322426, - "learning_rate": 0.0001999934576541815, - "loss": 46.0, - "step": 47633 - }, - { - "epoch": 3.6419519467859396, - "grad_norm": 0.0030152606777846813, - "learning_rate": 0.0001999934573794259, - "loss": 46.0, - "step": 47634 - }, - { - "epoch": 3.6420284037693293, - "grad_norm": 0.0016618386143818498, - "learning_rate": 0.0001999934571046645, - "loss": 46.0, - "step": 47635 - }, - { - "epoch": 3.642104860752719, - "grad_norm": 0.002503752475604415, - "learning_rate": 0.00019999345682989735, - "loss": 46.0, - "step": 47636 - }, - { - "epoch": 3.642181317736109, - "grad_norm": 0.0026403041556477547, - "learning_rate": 0.00019999345655512444, - "loss": 46.0, - "step": 47637 - }, - { - "epoch": 3.642257774719498, - "grad_norm": 0.00218627299182117, - "learning_rate": 0.00019999345628034576, - "loss": 46.0, - "step": 47638 - }, - { - "epoch": 3.642334231702888, - "grad_norm": 0.0033590688835829496, - "learning_rate": 0.0001999934560055613, - "loss": 46.0, - "step": 47639 - }, - { - "epoch": 3.6424106886862777, - "grad_norm": 0.0028649545274674892, - "learning_rate": 0.00019999345573077108, - "loss": 46.0, - "step": 47640 - }, - { - "epoch": 3.6424871456696675, - "grad_norm": 0.001072224578820169, - "learning_rate": 0.0001999934554559751, - "loss": 46.0, - "step": 47641 - }, - { - "epoch": 3.6425636026530572, - "grad_norm": 0.0032022055238485336, - "learning_rate": 0.00019999345518117333, - "loss": 46.0, - "step": 47642 - }, - { - "epoch": 3.642640059636447, - "grad_norm": 0.0015884184977039695, - "learning_rate": 0.0001999934549063658, - "loss": 46.0, - "step": 47643 - }, - { - "epoch": 3.6427165166198368, - "grad_norm": 0.0021473481319844723, - "learning_rate": 0.0001999934546315525, - "loss": 46.0, - "step": 47644 - }, - { - "epoch": 3.6427929736032265, - "grad_norm": 0.005108244251459837, - "learning_rate": 0.00019999345435673345, - "loss": 46.0, - "step": 47645 - }, - { - "epoch": 3.6428694305866163, - "grad_norm": 0.0014162365114316344, - "learning_rate": 0.0001999934540819086, - "loss": 46.0, - "step": 47646 - }, - { - "epoch": 3.642945887570006, - "grad_norm": 0.00281914253719151, - "learning_rate": 0.000199993453807078, - "loss": 46.0, - "step": 47647 - }, - { - "epoch": 3.643022344553396, - "grad_norm": 0.0020775776356458664, - "learning_rate": 0.0001999934535322416, - "loss": 46.0, - "step": 47648 - }, - { - "epoch": 3.6430988015367856, - "grad_norm": 0.0009017596021294594, - "learning_rate": 0.00019999345325739948, - "loss": 46.0, - "step": 47649 - }, - { - "epoch": 3.6431752585201753, - "grad_norm": 0.002380523830652237, - "learning_rate": 0.00019999345298255157, - "loss": 46.0, - "step": 47650 - }, - { - "epoch": 3.643251715503565, - "grad_norm": 0.0029388549737632275, - "learning_rate": 0.00019999345270769786, - "loss": 46.0, - "step": 47651 - }, - { - "epoch": 3.6433281724869544, - "grad_norm": 0.00310456077568233, - "learning_rate": 0.00019999345243283843, - "loss": 46.0, - "step": 47652 - }, - { - "epoch": 3.643404629470344, - "grad_norm": 0.001864472753368318, - "learning_rate": 0.0001999934521579732, - "loss": 46.0, - "step": 47653 - }, - { - "epoch": 3.643481086453734, - "grad_norm": 0.005107543896883726, - "learning_rate": 0.0001999934518831022, - "loss": 46.0, - "step": 47654 - }, - { - "epoch": 3.6435575434371237, - "grad_norm": 0.0014210341032594442, - "learning_rate": 0.00019999345160822543, - "loss": 46.0, - "step": 47655 - }, - { - "epoch": 3.6436340004205134, - "grad_norm": 0.0008182196761481464, - "learning_rate": 0.00019999345133334294, - "loss": 46.0, - "step": 47656 - }, - { - "epoch": 3.643710457403903, - "grad_norm": 0.002491212449967861, - "learning_rate": 0.00019999345105845465, - "loss": 46.0, - "step": 47657 - }, - { - "epoch": 3.643786914387293, - "grad_norm": 0.003380777547135949, - "learning_rate": 0.00019999345078356058, - "loss": 46.0, - "step": 47658 - }, - { - "epoch": 3.6438633713706827, - "grad_norm": 0.0012305903946980834, - "learning_rate": 0.00019999345050866074, - "loss": 46.0, - "step": 47659 - }, - { - "epoch": 3.643939828354072, - "grad_norm": 0.0013567740097641945, - "learning_rate": 0.00019999345023375513, - "loss": 46.0, - "step": 47660 - }, - { - "epoch": 3.644016285337462, - "grad_norm": 0.003648675512522459, - "learning_rate": 0.00019999344995884374, - "loss": 46.0, - "step": 47661 - }, - { - "epoch": 3.6440927423208516, - "grad_norm": 0.0023186441976577044, - "learning_rate": 0.0001999934496839266, - "loss": 46.0, - "step": 47662 - }, - { - "epoch": 3.6441691993042413, - "grad_norm": 0.0007946082623675466, - "learning_rate": 0.0001999934494090037, - "loss": 46.0, - "step": 47663 - }, - { - "epoch": 3.644245656287631, - "grad_norm": 0.0011176541447639465, - "learning_rate": 0.00019999344913407502, - "loss": 46.0, - "step": 47664 - }, - { - "epoch": 3.644322113271021, - "grad_norm": 0.002799355424940586, - "learning_rate": 0.00019999344885914056, - "loss": 46.0, - "step": 47665 - }, - { - "epoch": 3.6443985702544106, - "grad_norm": 0.0014208493521437049, - "learning_rate": 0.00019999344858420034, - "loss": 46.0, - "step": 47666 - }, - { - "epoch": 3.6444750272378004, - "grad_norm": 0.001411773613654077, - "learning_rate": 0.00019999344830925434, - "loss": 46.0, - "step": 47667 - }, - { - "epoch": 3.64455148422119, - "grad_norm": 0.0031203078106045723, - "learning_rate": 0.0001999934480343026, - "loss": 46.0, - "step": 47668 - }, - { - "epoch": 3.64462794120458, - "grad_norm": 0.0026555671356618404, - "learning_rate": 0.00019999344775934507, - "loss": 46.0, - "step": 47669 - }, - { - "epoch": 3.6447043981879697, - "grad_norm": 0.0029595624655485153, - "learning_rate": 0.00019999344748438178, - "loss": 46.0, - "step": 47670 - }, - { - "epoch": 3.6447808551713594, - "grad_norm": 0.0009205969981849194, - "learning_rate": 0.0001999934472094127, - "loss": 46.0, - "step": 47671 - }, - { - "epoch": 3.644857312154749, - "grad_norm": 0.0015713887987658381, - "learning_rate": 0.0001999934469344379, - "loss": 46.0, - "step": 47672 - }, - { - "epoch": 3.6449337691381385, - "grad_norm": 0.004887838382273912, - "learning_rate": 0.00019999344665945729, - "loss": 46.0, - "step": 47673 - }, - { - "epoch": 3.6450102261215283, - "grad_norm": 0.0013900778722018003, - "learning_rate": 0.0001999934463844709, - "loss": 46.0, - "step": 47674 - }, - { - "epoch": 3.645086683104918, - "grad_norm": 0.0010336030973121524, - "learning_rate": 0.0001999934461094788, - "loss": 46.0, - "step": 47675 - }, - { - "epoch": 3.6451631400883078, - "grad_norm": 0.001061112037859857, - "learning_rate": 0.00019999344583448086, - "loss": 46.0, - "step": 47676 - }, - { - "epoch": 3.6452395970716975, - "grad_norm": 0.0016902538482099771, - "learning_rate": 0.00019999344555947718, - "loss": 46.0, - "step": 47677 - }, - { - "epoch": 3.6453160540550873, - "grad_norm": 0.0022786823101341724, - "learning_rate": 0.00019999344528446775, - "loss": 46.0, - "step": 47678 - }, - { - "epoch": 3.645392511038477, - "grad_norm": 0.0006105888751335442, - "learning_rate": 0.00019999344500945253, - "loss": 46.0, - "step": 47679 - }, - { - "epoch": 3.645468968021867, - "grad_norm": 0.002234566491097212, - "learning_rate": 0.00019999344473443153, - "loss": 46.0, - "step": 47680 - }, - { - "epoch": 3.6455454250052566, - "grad_norm": 0.00117902597412467, - "learning_rate": 0.00019999344445940478, - "loss": 46.0, - "step": 47681 - }, - { - "epoch": 3.645621881988646, - "grad_norm": 0.002178714144974947, - "learning_rate": 0.00019999344418437226, - "loss": 46.0, - "step": 47682 - }, - { - "epoch": 3.6456983389720357, - "grad_norm": 0.0022162015084177256, - "learning_rate": 0.00019999344390933397, - "loss": 46.0, - "step": 47683 - }, - { - "epoch": 3.6457747959554254, - "grad_norm": 0.0009124897769652307, - "learning_rate": 0.0001999934436342899, - "loss": 46.0, - "step": 47684 - }, - { - "epoch": 3.645851252938815, - "grad_norm": 0.0014934351202100515, - "learning_rate": 0.0001999934433592401, - "loss": 46.0, - "step": 47685 - }, - { - "epoch": 3.645927709922205, - "grad_norm": 0.002741024596616626, - "learning_rate": 0.00019999344308418445, - "loss": 46.0, - "step": 47686 - }, - { - "epoch": 3.6460041669055947, - "grad_norm": 0.0014128170441836119, - "learning_rate": 0.00019999344280912312, - "loss": 46.0, - "step": 47687 - }, - { - "epoch": 3.6460806238889845, - "grad_norm": 0.002291157841682434, - "learning_rate": 0.00019999344253405596, - "loss": 46.0, - "step": 47688 - }, - { - "epoch": 3.6461570808723742, - "grad_norm": 0.001676909625530243, - "learning_rate": 0.00019999344225898305, - "loss": 46.0, - "step": 47689 - }, - { - "epoch": 3.646233537855764, - "grad_norm": 0.0005470478790812194, - "learning_rate": 0.00019999344198390437, - "loss": 46.0, - "step": 47690 - }, - { - "epoch": 3.6463099948391537, - "grad_norm": 0.0012233055895194411, - "learning_rate": 0.00019999344170881992, - "loss": 46.0, - "step": 47691 - }, - { - "epoch": 3.6463864518225435, - "grad_norm": 0.001965420786291361, - "learning_rate": 0.00019999344143372972, - "loss": 46.0, - "step": 47692 - }, - { - "epoch": 3.6464629088059333, - "grad_norm": 0.0013269162736833096, - "learning_rate": 0.00019999344115863372, - "loss": 46.0, - "step": 47693 - }, - { - "epoch": 3.646539365789323, - "grad_norm": 0.003292139619588852, - "learning_rate": 0.00019999344088353197, - "loss": 46.0, - "step": 47694 - }, - { - "epoch": 3.6466158227727123, - "grad_norm": 0.002590600633993745, - "learning_rate": 0.00019999344060842446, - "loss": 46.0, - "step": 47695 - }, - { - "epoch": 3.646692279756102, - "grad_norm": 0.001371645601466298, - "learning_rate": 0.00019999344033331116, - "loss": 46.0, - "step": 47696 - }, - { - "epoch": 3.646768736739492, - "grad_norm": 0.005140399560332298, - "learning_rate": 0.00019999344005819212, - "loss": 46.0, - "step": 47697 - }, - { - "epoch": 3.6468451937228816, - "grad_norm": 0.0006940077873878181, - "learning_rate": 0.00019999343978306728, - "loss": 46.0, - "step": 47698 - }, - { - "epoch": 3.6469216507062714, - "grad_norm": 0.004302518907934427, - "learning_rate": 0.00019999343950793667, - "loss": 46.0, - "step": 47699 - }, - { - "epoch": 3.646998107689661, - "grad_norm": 0.0025637801736593246, - "learning_rate": 0.0001999934392328003, - "loss": 46.0, - "step": 47700 - }, - { - "epoch": 3.647074564673051, - "grad_norm": 0.003288046456873417, - "learning_rate": 0.00019999343895765818, - "loss": 46.0, - "step": 47701 - }, - { - "epoch": 3.6471510216564407, - "grad_norm": 0.002868850715458393, - "learning_rate": 0.00019999343868251027, - "loss": 46.0, - "step": 47702 - }, - { - "epoch": 3.64722747863983, - "grad_norm": 0.0009547343361191452, - "learning_rate": 0.0001999934384073566, - "loss": 46.0, - "step": 47703 - }, - { - "epoch": 3.6473039356232198, - "grad_norm": 0.004754739347845316, - "learning_rate": 0.00019999343813219714, - "loss": 46.0, - "step": 47704 - }, - { - "epoch": 3.6473803926066095, - "grad_norm": 0.002491709077730775, - "learning_rate": 0.00019999343785703192, - "loss": 46.0, - "step": 47705 - }, - { - "epoch": 3.6474568495899993, - "grad_norm": 0.0023551075719296932, - "learning_rate": 0.00019999343758186097, - "loss": 46.0, - "step": 47706 - }, - { - "epoch": 3.647533306573389, - "grad_norm": 0.004635291639715433, - "learning_rate": 0.0001999934373066842, - "loss": 46.0, - "step": 47707 - }, - { - "epoch": 3.647609763556779, - "grad_norm": 0.000653390830848366, - "learning_rate": 0.00019999343703150168, - "loss": 46.0, - "step": 47708 - }, - { - "epoch": 3.6476862205401686, - "grad_norm": 0.0018211125861853361, - "learning_rate": 0.0001999934367563134, - "loss": 46.0, - "step": 47709 - }, - { - "epoch": 3.6477626775235583, - "grad_norm": 0.0011218676809221506, - "learning_rate": 0.00019999343648111935, - "loss": 46.0, - "step": 47710 - }, - { - "epoch": 3.647839134506948, - "grad_norm": 0.0043051172979176044, - "learning_rate": 0.00019999343620591954, - "loss": 46.0, - "step": 47711 - }, - { - "epoch": 3.647915591490338, - "grad_norm": 0.0017796941101551056, - "learning_rate": 0.00019999343593071392, - "loss": 46.0, - "step": 47712 - }, - { - "epoch": 3.6479920484737276, - "grad_norm": 0.004201495088636875, - "learning_rate": 0.00019999343565550257, - "loss": 46.0, - "step": 47713 - }, - { - "epoch": 3.6480685054571174, - "grad_norm": 0.0018534999107941985, - "learning_rate": 0.00019999343538028543, - "loss": 46.0, - "step": 47714 - }, - { - "epoch": 3.648144962440507, - "grad_norm": 0.002223034854978323, - "learning_rate": 0.00019999343510506253, - "loss": 46.0, - "step": 47715 - }, - { - "epoch": 3.648221419423897, - "grad_norm": 0.0036811749450862408, - "learning_rate": 0.00019999343482983385, - "loss": 46.0, - "step": 47716 - }, - { - "epoch": 3.648297876407286, - "grad_norm": 0.003338853595778346, - "learning_rate": 0.0001999934345545994, - "loss": 46.0, - "step": 47717 - }, - { - "epoch": 3.648374333390676, - "grad_norm": 0.0030563834588974714, - "learning_rate": 0.0001999934342793592, - "loss": 46.0, - "step": 47718 - }, - { - "epoch": 3.6484507903740657, - "grad_norm": 0.005340575706213713, - "learning_rate": 0.0001999934340041132, - "loss": 46.0, - "step": 47719 - }, - { - "epoch": 3.6485272473574555, - "grad_norm": 0.0044814208522439, - "learning_rate": 0.00019999343372886146, - "loss": 46.0, - "step": 47720 - }, - { - "epoch": 3.6486037043408452, - "grad_norm": 0.001382531481795013, - "learning_rate": 0.00019999343345360396, - "loss": 46.0, - "step": 47721 - }, - { - "epoch": 3.648680161324235, - "grad_norm": 0.001145056216046214, - "learning_rate": 0.00019999343317834067, - "loss": 46.0, - "step": 47722 - }, - { - "epoch": 3.6487566183076248, - "grad_norm": 0.003555477363988757, - "learning_rate": 0.0001999934329030716, - "loss": 46.0, - "step": 47723 - }, - { - "epoch": 3.6488330752910145, - "grad_norm": 0.0016626415308564901, - "learning_rate": 0.00019999343262779677, - "loss": 46.0, - "step": 47724 - }, - { - "epoch": 3.648909532274404, - "grad_norm": 0.0016413347329944372, - "learning_rate": 0.0001999934323525162, - "loss": 46.0, - "step": 47725 - }, - { - "epoch": 3.6489859892577936, - "grad_norm": 0.010179664008319378, - "learning_rate": 0.00019999343207722982, - "loss": 46.0, - "step": 47726 - }, - { - "epoch": 3.6490624462411834, - "grad_norm": 0.004473886918276548, - "learning_rate": 0.0001999934318019377, - "loss": 46.0, - "step": 47727 - }, - { - "epoch": 3.649138903224573, - "grad_norm": 0.009979039430618286, - "learning_rate": 0.0001999934315266398, - "loss": 46.0, - "step": 47728 - }, - { - "epoch": 3.649215360207963, - "grad_norm": 0.0033817191142588854, - "learning_rate": 0.0001999934312513361, - "loss": 46.0, - "step": 47729 - }, - { - "epoch": 3.6492918171913526, - "grad_norm": 0.0022713039070367813, - "learning_rate": 0.00019999343097602668, - "loss": 46.0, - "step": 47730 - }, - { - "epoch": 3.6493682741747424, - "grad_norm": 0.002307408954948187, - "learning_rate": 0.00019999343070071146, - "loss": 46.0, - "step": 47731 - }, - { - "epoch": 3.649444731158132, - "grad_norm": 0.0022228495217859745, - "learning_rate": 0.0001999934304253905, - "loss": 46.0, - "step": 47732 - }, - { - "epoch": 3.649521188141522, - "grad_norm": 0.0015586961526423693, - "learning_rate": 0.00019999343015006374, - "loss": 46.0, - "step": 47733 - }, - { - "epoch": 3.6495976451249117, - "grad_norm": 0.0032822387292981148, - "learning_rate": 0.00019999342987473125, - "loss": 46.0, - "step": 47734 - }, - { - "epoch": 3.6496741021083015, - "grad_norm": 0.002411150373518467, - "learning_rate": 0.00019999342959939296, - "loss": 46.0, - "step": 47735 - }, - { - "epoch": 3.649750559091691, - "grad_norm": 0.0018034750828519464, - "learning_rate": 0.0001999934293240489, - "loss": 46.0, - "step": 47736 - }, - { - "epoch": 3.649827016075081, - "grad_norm": 0.003922856412827969, - "learning_rate": 0.00019999342904869906, - "loss": 46.0, - "step": 47737 - }, - { - "epoch": 3.6499034730584707, - "grad_norm": 0.0019520603818818927, - "learning_rate": 0.00019999342877334347, - "loss": 46.0, - "step": 47738 - }, - { - "epoch": 3.64997993004186, - "grad_norm": 0.0019292542710900307, - "learning_rate": 0.00019999342849798214, - "loss": 46.0, - "step": 47739 - }, - { - "epoch": 3.65005638702525, - "grad_norm": 0.001201114384457469, - "learning_rate": 0.00019999342822261498, - "loss": 46.0, - "step": 47740 - }, - { - "epoch": 3.6501328440086396, - "grad_norm": 0.0035335260909050703, - "learning_rate": 0.0001999934279472421, - "loss": 46.0, - "step": 47741 - }, - { - "epoch": 3.6502093009920293, - "grad_norm": 0.0027000131085515022, - "learning_rate": 0.0001999934276718634, - "loss": 46.0, - "step": 47742 - }, - { - "epoch": 3.650285757975419, - "grad_norm": 0.004167946521192789, - "learning_rate": 0.00019999342739647897, - "loss": 46.0, - "step": 47743 - }, - { - "epoch": 3.650362214958809, - "grad_norm": 0.0009979866445064545, - "learning_rate": 0.00019999342712108878, - "loss": 46.0, - "step": 47744 - }, - { - "epoch": 3.6504386719421986, - "grad_norm": 0.0026974081993103027, - "learning_rate": 0.0001999934268456928, - "loss": 46.0, - "step": 47745 - }, - { - "epoch": 3.6505151289255884, - "grad_norm": 0.0009071906097233295, - "learning_rate": 0.00019999342657029106, - "loss": 46.0, - "step": 47746 - }, - { - "epoch": 3.6505915859089777, - "grad_norm": 0.00834709219634533, - "learning_rate": 0.00019999342629488354, - "loss": 46.0, - "step": 47747 - }, - { - "epoch": 3.6506680428923675, - "grad_norm": 0.002150040352717042, - "learning_rate": 0.00019999342601947025, - "loss": 46.0, - "step": 47748 - }, - { - "epoch": 3.650744499875757, - "grad_norm": 0.004443623591214418, - "learning_rate": 0.0001999934257440512, - "loss": 46.0, - "step": 47749 - }, - { - "epoch": 3.650820956859147, - "grad_norm": 0.002520909532904625, - "learning_rate": 0.00019999342546862638, - "loss": 46.0, - "step": 47750 - }, - { - "epoch": 3.6508974138425367, - "grad_norm": 0.004866630304604769, - "learning_rate": 0.0001999934251931958, - "loss": 46.0, - "step": 47751 - }, - { - "epoch": 3.6509738708259265, - "grad_norm": 0.0007708751363679767, - "learning_rate": 0.00019999342491775943, - "loss": 46.0, - "step": 47752 - }, - { - "epoch": 3.6510503278093163, - "grad_norm": 0.0015716284979134798, - "learning_rate": 0.00019999342464231733, - "loss": 46.0, - "step": 47753 - }, - { - "epoch": 3.651126784792706, - "grad_norm": 0.003939527086913586, - "learning_rate": 0.0001999934243668694, - "loss": 46.0, - "step": 47754 - }, - { - "epoch": 3.651203241776096, - "grad_norm": 0.002039143582805991, - "learning_rate": 0.00019999342409141572, - "loss": 46.0, - "step": 47755 - }, - { - "epoch": 3.6512796987594855, - "grad_norm": 0.0017836353508755565, - "learning_rate": 0.0001999934238159563, - "loss": 46.0, - "step": 47756 - }, - { - "epoch": 3.6513561557428753, - "grad_norm": 0.0008404995314776897, - "learning_rate": 0.0001999934235404911, - "loss": 46.0, - "step": 47757 - }, - { - "epoch": 3.651432612726265, - "grad_norm": 0.0032451471779495478, - "learning_rate": 0.00019999342326502013, - "loss": 46.0, - "step": 47758 - }, - { - "epoch": 3.651509069709655, - "grad_norm": 0.0032415452878922224, - "learning_rate": 0.00019999342298954338, - "loss": 46.0, - "step": 47759 - }, - { - "epoch": 3.6515855266930446, - "grad_norm": 0.0021821290720254183, - "learning_rate": 0.00019999342271406087, - "loss": 46.0, - "step": 47760 - }, - { - "epoch": 3.651661983676434, - "grad_norm": 0.0017507910961285233, - "learning_rate": 0.00019999342243857257, - "loss": 46.0, - "step": 47761 - }, - { - "epoch": 3.6517384406598237, - "grad_norm": 0.0016612119507044554, - "learning_rate": 0.00019999342216307854, - "loss": 46.0, - "step": 47762 - }, - { - "epoch": 3.6518148976432134, - "grad_norm": 0.001398536842316389, - "learning_rate": 0.00019999342188757873, - "loss": 46.0, - "step": 47763 - }, - { - "epoch": 3.651891354626603, - "grad_norm": 0.002227661432698369, - "learning_rate": 0.00019999342161207312, - "loss": 46.0, - "step": 47764 - }, - { - "epoch": 3.651967811609993, - "grad_norm": 0.003660982707515359, - "learning_rate": 0.00019999342133656178, - "loss": 46.0, - "step": 47765 - }, - { - "epoch": 3.6520442685933827, - "grad_norm": 0.0019833461847156286, - "learning_rate": 0.00019999342106104465, - "loss": 46.0, - "step": 47766 - }, - { - "epoch": 3.6521207255767725, - "grad_norm": 0.0035065049305558205, - "learning_rate": 0.00019999342078552178, - "loss": 46.0, - "step": 47767 - }, - { - "epoch": 3.6521971825601622, - "grad_norm": 0.001908307196572423, - "learning_rate": 0.0001999934205099931, - "loss": 46.0, - "step": 47768 - }, - { - "epoch": 3.6522736395435516, - "grad_norm": 0.0031432416290044785, - "learning_rate": 0.00019999342023445865, - "loss": 46.0, - "step": 47769 - }, - { - "epoch": 3.6523500965269413, - "grad_norm": 0.002135820686817169, - "learning_rate": 0.00019999341995891845, - "loss": 46.0, - "step": 47770 - }, - { - "epoch": 3.652426553510331, - "grad_norm": 0.00328750885091722, - "learning_rate": 0.00019999341968337248, - "loss": 46.0, - "step": 47771 - }, - { - "epoch": 3.652503010493721, - "grad_norm": 0.008645646274089813, - "learning_rate": 0.00019999341940782074, - "loss": 46.0, - "step": 47772 - }, - { - "epoch": 3.6525794674771106, - "grad_norm": 0.002184393350034952, - "learning_rate": 0.00019999341913226322, - "loss": 46.0, - "step": 47773 - }, - { - "epoch": 3.6526559244605004, - "grad_norm": 0.002336875069886446, - "learning_rate": 0.00019999341885669996, - "loss": 46.0, - "step": 47774 - }, - { - "epoch": 3.65273238144389, - "grad_norm": 0.0015706915874034166, - "learning_rate": 0.00019999341858113092, - "loss": 46.0, - "step": 47775 - }, - { - "epoch": 3.65280883842728, - "grad_norm": 0.0008334586163982749, - "learning_rate": 0.00019999341830555608, - "loss": 46.0, - "step": 47776 - }, - { - "epoch": 3.6528852954106696, - "grad_norm": 0.0023207198828458786, - "learning_rate": 0.0001999934180299755, - "loss": 46.0, - "step": 47777 - }, - { - "epoch": 3.6529617523940594, - "grad_norm": 0.001355098094791174, - "learning_rate": 0.00019999341775438914, - "loss": 46.0, - "step": 47778 - }, - { - "epoch": 3.653038209377449, - "grad_norm": 0.002461744239553809, - "learning_rate": 0.000199993417478797, - "loss": 46.0, - "step": 47779 - }, - { - "epoch": 3.653114666360839, - "grad_norm": 0.002446526661515236, - "learning_rate": 0.00019999341720319913, - "loss": 46.0, - "step": 47780 - }, - { - "epoch": 3.6531911233442287, - "grad_norm": 0.0015633394941687584, - "learning_rate": 0.00019999341692759548, - "loss": 46.0, - "step": 47781 - }, - { - "epoch": 3.6532675803276184, - "grad_norm": 0.0014555988600477576, - "learning_rate": 0.00019999341665198603, - "loss": 46.0, - "step": 47782 - }, - { - "epoch": 3.6533440373110078, - "grad_norm": 0.002532979706302285, - "learning_rate": 0.00019999341637637084, - "loss": 46.0, - "step": 47783 - }, - { - "epoch": 3.6534204942943975, - "grad_norm": 0.0019914903677999973, - "learning_rate": 0.00019999341610074987, - "loss": 46.0, - "step": 47784 - }, - { - "epoch": 3.6534969512777873, - "grad_norm": 0.003524721134454012, - "learning_rate": 0.00019999341582512315, - "loss": 46.0, - "step": 47785 - }, - { - "epoch": 3.653573408261177, - "grad_norm": 0.00334327039308846, - "learning_rate": 0.0001999934155494906, - "loss": 46.0, - "step": 47786 - }, - { - "epoch": 3.653649865244567, - "grad_norm": 0.0009810846531763673, - "learning_rate": 0.00019999341527385234, - "loss": 46.0, - "step": 47787 - }, - { - "epoch": 3.6537263222279566, - "grad_norm": 0.0010697762481868267, - "learning_rate": 0.0001999934149982083, - "loss": 46.0, - "step": 47788 - }, - { - "epoch": 3.6538027792113463, - "grad_norm": 0.001967029646039009, - "learning_rate": 0.00019999341472255847, - "loss": 46.0, - "step": 47789 - }, - { - "epoch": 3.653879236194736, - "grad_norm": 0.0017179121496155858, - "learning_rate": 0.00019999341444690288, - "loss": 46.0, - "step": 47790 - }, - { - "epoch": 3.6539556931781254, - "grad_norm": 0.0009905846090987325, - "learning_rate": 0.00019999341417124153, - "loss": 46.0, - "step": 47791 - }, - { - "epoch": 3.654032150161515, - "grad_norm": 0.0037586409598588943, - "learning_rate": 0.00019999341389557442, - "loss": 46.0, - "step": 47792 - }, - { - "epoch": 3.654108607144905, - "grad_norm": 0.00219179829582572, - "learning_rate": 0.00019999341361990155, - "loss": 46.0, - "step": 47793 - }, - { - "epoch": 3.6541850641282947, - "grad_norm": 0.0025964563246816397, - "learning_rate": 0.00019999341334422287, - "loss": 46.0, - "step": 47794 - }, - { - "epoch": 3.6542615211116845, - "grad_norm": 0.0013601521495729685, - "learning_rate": 0.00019999341306853845, - "loss": 46.0, - "step": 47795 - }, - { - "epoch": 3.654337978095074, - "grad_norm": 0.003588613588362932, - "learning_rate": 0.00019999341279284825, - "loss": 46.0, - "step": 47796 - }, - { - "epoch": 3.654414435078464, - "grad_norm": 0.004260408692061901, - "learning_rate": 0.00019999341251715228, - "loss": 46.0, - "step": 47797 - }, - { - "epoch": 3.6544908920618537, - "grad_norm": 0.0023365281522274017, - "learning_rate": 0.00019999341224145054, - "loss": 46.0, - "step": 47798 - }, - { - "epoch": 3.6545673490452435, - "grad_norm": 0.002064472995698452, - "learning_rate": 0.00019999341196574305, - "loss": 46.0, - "step": 47799 - }, - { - "epoch": 3.6546438060286333, - "grad_norm": 0.001487192464992404, - "learning_rate": 0.00019999341169002976, - "loss": 46.0, - "step": 47800 - }, - { - "epoch": 3.654720263012023, - "grad_norm": 0.004042519256472588, - "learning_rate": 0.00019999341141431072, - "loss": 46.0, - "step": 47801 - }, - { - "epoch": 3.654796719995413, - "grad_norm": 0.001744654611684382, - "learning_rate": 0.00019999341113858588, - "loss": 46.0, - "step": 47802 - }, - { - "epoch": 3.6548731769788025, - "grad_norm": 0.0016182203544303775, - "learning_rate": 0.00019999341086285533, - "loss": 46.0, - "step": 47803 - }, - { - "epoch": 3.654949633962192, - "grad_norm": 0.0014370906865224242, - "learning_rate": 0.00019999341058711897, - "loss": 46.0, - "step": 47804 - }, - { - "epoch": 3.6550260909455816, - "grad_norm": 0.0024650858249515295, - "learning_rate": 0.00019999341031137684, - "loss": 46.0, - "step": 47805 - }, - { - "epoch": 3.6551025479289714, - "grad_norm": 0.0069202883169054985, - "learning_rate": 0.00019999341003562897, - "loss": 46.0, - "step": 47806 - }, - { - "epoch": 3.655179004912361, - "grad_norm": 0.0024855954106897116, - "learning_rate": 0.00019999340975987532, - "loss": 46.0, - "step": 47807 - }, - { - "epoch": 3.655255461895751, - "grad_norm": 0.0023160616401582956, - "learning_rate": 0.00019999340948411587, - "loss": 46.0, - "step": 47808 - }, - { - "epoch": 3.6553319188791407, - "grad_norm": 0.0015739668160676956, - "learning_rate": 0.00019999340920835067, - "loss": 46.0, - "step": 47809 - }, - { - "epoch": 3.6554083758625304, - "grad_norm": 0.0017326497472822666, - "learning_rate": 0.00019999340893257973, - "loss": 46.0, - "step": 47810 - }, - { - "epoch": 3.65548483284592, - "grad_norm": 0.0007422473863698542, - "learning_rate": 0.00019999340865680298, - "loss": 46.0, - "step": 47811 - }, - { - "epoch": 3.65556128982931, - "grad_norm": 0.00801160465925932, - "learning_rate": 0.00019999340838102047, - "loss": 46.0, - "step": 47812 - }, - { - "epoch": 3.6556377468126993, - "grad_norm": 0.003417121944949031, - "learning_rate": 0.0001999934081052322, - "loss": 46.0, - "step": 47813 - }, - { - "epoch": 3.655714203796089, - "grad_norm": 0.00772092305123806, - "learning_rate": 0.00019999340782943817, - "loss": 46.0, - "step": 47814 - }, - { - "epoch": 3.655790660779479, - "grad_norm": 0.001235006027854979, - "learning_rate": 0.00019999340755363836, - "loss": 46.0, - "step": 47815 - }, - { - "epoch": 3.6558671177628685, - "grad_norm": 0.0017201746813952923, - "learning_rate": 0.00019999340727783275, - "loss": 46.0, - "step": 47816 - }, - { - "epoch": 3.6559435747462583, - "grad_norm": 0.0022517256438732147, - "learning_rate": 0.00019999340700202142, - "loss": 46.0, - "step": 47817 - }, - { - "epoch": 3.656020031729648, - "grad_norm": 0.0011795066529884934, - "learning_rate": 0.0001999934067262043, - "loss": 46.0, - "step": 47818 - }, - { - "epoch": 3.656096488713038, - "grad_norm": 0.0009681026567704976, - "learning_rate": 0.00019999340645038142, - "loss": 46.0, - "step": 47819 - }, - { - "epoch": 3.6561729456964276, - "grad_norm": 0.0009234437020495534, - "learning_rate": 0.00019999340617455277, - "loss": 46.0, - "step": 47820 - }, - { - "epoch": 3.6562494026798174, - "grad_norm": 0.0012534464476630092, - "learning_rate": 0.00019999340589871834, - "loss": 46.0, - "step": 47821 - }, - { - "epoch": 3.656325859663207, - "grad_norm": 0.0051234387792646885, - "learning_rate": 0.00019999340562287815, - "loss": 46.0, - "step": 47822 - }, - { - "epoch": 3.656402316646597, - "grad_norm": 0.002360963262617588, - "learning_rate": 0.00019999340534703218, - "loss": 46.0, - "step": 47823 - }, - { - "epoch": 3.6564787736299866, - "grad_norm": 0.0018381357658654451, - "learning_rate": 0.00019999340507118044, - "loss": 46.0, - "step": 47824 - }, - { - "epoch": 3.6565552306133764, - "grad_norm": 0.0019373467657715082, - "learning_rate": 0.00019999340479532295, - "loss": 46.0, - "step": 47825 - }, - { - "epoch": 3.6566316875967657, - "grad_norm": 0.0025482310447841883, - "learning_rate": 0.00019999340451945966, - "loss": 46.0, - "step": 47826 - }, - { - "epoch": 3.6567081445801555, - "grad_norm": 0.0012002121657133102, - "learning_rate": 0.00019999340424359062, - "loss": 46.0, - "step": 47827 - }, - { - "epoch": 3.6567846015635452, - "grad_norm": 0.003962427377700806, - "learning_rate": 0.0001999934039677158, - "loss": 46.0, - "step": 47828 - }, - { - "epoch": 3.656861058546935, - "grad_norm": 0.0018632502760738134, - "learning_rate": 0.00019999340369183526, - "loss": 46.0, - "step": 47829 - }, - { - "epoch": 3.6569375155303248, - "grad_norm": 0.0022463444620370865, - "learning_rate": 0.0001999934034159489, - "loss": 46.0, - "step": 47830 - }, - { - "epoch": 3.6570139725137145, - "grad_norm": 0.0019852742552757263, - "learning_rate": 0.00019999340314005677, - "loss": 46.0, - "step": 47831 - }, - { - "epoch": 3.6570904294971043, - "grad_norm": 0.00528929615393281, - "learning_rate": 0.0001999934028641589, - "loss": 46.0, - "step": 47832 - }, - { - "epoch": 3.657166886480494, - "grad_norm": 0.0008439435623586178, - "learning_rate": 0.00019999340258825522, - "loss": 46.0, - "step": 47833 - }, - { - "epoch": 3.6572433434638834, - "grad_norm": 0.0008243733900599182, - "learning_rate": 0.00019999340231234583, - "loss": 46.0, - "step": 47834 - }, - { - "epoch": 3.657319800447273, - "grad_norm": 0.0029085951391607523, - "learning_rate": 0.00019999340203643063, - "loss": 46.0, - "step": 47835 - }, - { - "epoch": 3.657396257430663, - "grad_norm": 0.0017666706116870046, - "learning_rate": 0.00019999340176050966, - "loss": 46.0, - "step": 47836 - }, - { - "epoch": 3.6574727144140526, - "grad_norm": 0.0033291089348495007, - "learning_rate": 0.00019999340148458292, - "loss": 46.0, - "step": 47837 - }, - { - "epoch": 3.6575491713974424, - "grad_norm": 0.0013852810952812433, - "learning_rate": 0.00019999340120865046, - "loss": 46.0, - "step": 47838 - }, - { - "epoch": 3.657625628380832, - "grad_norm": 0.0021935708355158567, - "learning_rate": 0.00019999340093271214, - "loss": 46.0, - "step": 47839 - }, - { - "epoch": 3.657702085364222, - "grad_norm": 0.002076232573017478, - "learning_rate": 0.00019999340065676813, - "loss": 46.0, - "step": 47840 - }, - { - "epoch": 3.6577785423476117, - "grad_norm": 0.001903512398712337, - "learning_rate": 0.00019999340038081832, - "loss": 46.0, - "step": 47841 - }, - { - "epoch": 3.6578549993310014, - "grad_norm": 0.0020118169486522675, - "learning_rate": 0.00019999340010486277, - "loss": 46.0, - "step": 47842 - }, - { - "epoch": 3.657931456314391, - "grad_norm": 0.001925047137774527, - "learning_rate": 0.00019999339982890139, - "loss": 46.0, - "step": 47843 - }, - { - "epoch": 3.658007913297781, - "grad_norm": 0.0019569192081689835, - "learning_rate": 0.0001999933995529343, - "loss": 46.0, - "step": 47844 - }, - { - "epoch": 3.6580843702811707, - "grad_norm": 0.0012307136785238981, - "learning_rate": 0.0001999933992769614, - "loss": 46.0, - "step": 47845 - }, - { - "epoch": 3.6581608272645605, - "grad_norm": 0.0006263569230213761, - "learning_rate": 0.00019999339900098276, - "loss": 46.0, - "step": 47846 - }, - { - "epoch": 3.6582372842479502, - "grad_norm": 0.004020243417471647, - "learning_rate": 0.0001999933987249983, - "loss": 46.0, - "step": 47847 - }, - { - "epoch": 3.6583137412313396, - "grad_norm": 0.0018410271732136607, - "learning_rate": 0.00019999339844900814, - "loss": 46.0, - "step": 47848 - }, - { - "epoch": 3.6583901982147293, - "grad_norm": 0.0014446190325543284, - "learning_rate": 0.00019999339817301217, - "loss": 46.0, - "step": 47849 - }, - { - "epoch": 3.658466655198119, - "grad_norm": 0.002229816745966673, - "learning_rate": 0.00019999339789701046, - "loss": 46.0, - "step": 47850 - }, - { - "epoch": 3.658543112181509, - "grad_norm": 0.003180217929184437, - "learning_rate": 0.00019999339762100294, - "loss": 46.0, - "step": 47851 - }, - { - "epoch": 3.6586195691648986, - "grad_norm": 0.0046297479420900345, - "learning_rate": 0.00019999339734498968, - "loss": 46.0, - "step": 47852 - }, - { - "epoch": 3.6586960261482884, - "grad_norm": 0.002554561011493206, - "learning_rate": 0.00019999339706897065, - "loss": 46.0, - "step": 47853 - }, - { - "epoch": 3.658772483131678, - "grad_norm": 0.0013105327961966395, - "learning_rate": 0.00019999339679294584, - "loss": 46.0, - "step": 47854 - }, - { - "epoch": 3.658848940115068, - "grad_norm": 0.0016060414491221309, - "learning_rate": 0.00019999339651691526, - "loss": 46.0, - "step": 47855 - }, - { - "epoch": 3.658925397098457, - "grad_norm": 0.003461515996605158, - "learning_rate": 0.00019999339624087893, - "loss": 46.0, - "step": 47856 - }, - { - "epoch": 3.659001854081847, - "grad_norm": 0.00162501388695091, - "learning_rate": 0.00019999339596483683, - "loss": 46.0, - "step": 47857 - }, - { - "epoch": 3.6590783110652367, - "grad_norm": 0.0016237534582614899, - "learning_rate": 0.00019999339568878895, - "loss": 46.0, - "step": 47858 - }, - { - "epoch": 3.6591547680486265, - "grad_norm": 0.0014932382619008422, - "learning_rate": 0.00019999339541273528, - "loss": 46.0, - "step": 47859 - }, - { - "epoch": 3.6592312250320163, - "grad_norm": 0.00456963200122118, - "learning_rate": 0.00019999339513667588, - "loss": 46.0, - "step": 47860 - }, - { - "epoch": 3.659307682015406, - "grad_norm": 0.003629390150308609, - "learning_rate": 0.0001999933948606107, - "loss": 46.0, - "step": 47861 - }, - { - "epoch": 3.6593841389987958, - "grad_norm": 0.003933451604098082, - "learning_rate": 0.00019999339458453972, - "loss": 46.0, - "step": 47862 - }, - { - "epoch": 3.6594605959821855, - "grad_norm": 0.0015699260402470827, - "learning_rate": 0.000199993394308463, - "loss": 46.0, - "step": 47863 - }, - { - "epoch": 3.6595370529655753, - "grad_norm": 0.0016019229078665376, - "learning_rate": 0.00019999339403238052, - "loss": 46.0, - "step": 47864 - }, - { - "epoch": 3.659613509948965, - "grad_norm": 0.0040239556692540646, - "learning_rate": 0.00019999339375629226, - "loss": 46.0, - "step": 47865 - }, - { - "epoch": 3.659689966932355, - "grad_norm": 0.002149728126823902, - "learning_rate": 0.00019999339348019822, - "loss": 46.0, - "step": 47866 - }, - { - "epoch": 3.6597664239157446, - "grad_norm": 0.0012652275618165731, - "learning_rate": 0.0001999933932040984, - "loss": 46.0, - "step": 47867 - }, - { - "epoch": 3.6598428808991343, - "grad_norm": 0.0012707667192444205, - "learning_rate": 0.00019999339292799283, - "loss": 46.0, - "step": 47868 - }, - { - "epoch": 3.659919337882524, - "grad_norm": 0.0008732926798984408, - "learning_rate": 0.00019999339265188148, - "loss": 46.0, - "step": 47869 - }, - { - "epoch": 3.6599957948659134, - "grad_norm": 0.0036312055308371782, - "learning_rate": 0.00019999339237576437, - "loss": 46.0, - "step": 47870 - }, - { - "epoch": 3.660072251849303, - "grad_norm": 0.0024095510598272085, - "learning_rate": 0.0001999933920996415, - "loss": 46.0, - "step": 47871 - }, - { - "epoch": 3.660148708832693, - "grad_norm": 0.0016875972505658865, - "learning_rate": 0.00019999339182351285, - "loss": 46.0, - "step": 47872 - }, - { - "epoch": 3.6602251658160827, - "grad_norm": 0.002364052925258875, - "learning_rate": 0.00019999339154737843, - "loss": 46.0, - "step": 47873 - }, - { - "epoch": 3.6603016227994725, - "grad_norm": 0.0012726550921797752, - "learning_rate": 0.00019999339127123826, - "loss": 46.0, - "step": 47874 - }, - { - "epoch": 3.6603780797828622, - "grad_norm": 0.0022818357683718204, - "learning_rate": 0.0001999933909950923, - "loss": 46.0, - "step": 47875 - }, - { - "epoch": 3.660454536766252, - "grad_norm": 0.0023524975404143333, - "learning_rate": 0.00019999339071894055, - "loss": 46.0, - "step": 47876 - }, - { - "epoch": 3.6605309937496417, - "grad_norm": 0.0013641039840877056, - "learning_rate": 0.0001999933904427831, - "loss": 46.0, - "step": 47877 - }, - { - "epoch": 3.660607450733031, - "grad_norm": 0.0009009288623929024, - "learning_rate": 0.00019999339016661983, - "loss": 46.0, - "step": 47878 - }, - { - "epoch": 3.660683907716421, - "grad_norm": 0.0036308083217591047, - "learning_rate": 0.00019999338989045077, - "loss": 46.0, - "step": 47879 - }, - { - "epoch": 3.6607603646998106, - "grad_norm": 0.0015181595226749778, - "learning_rate": 0.000199993389614276, - "loss": 46.0, - "step": 47880 - }, - { - "epoch": 3.6608368216832003, - "grad_norm": 0.0026797018945217133, - "learning_rate": 0.0001999933893380954, - "loss": 46.0, - "step": 47881 - }, - { - "epoch": 3.66091327866659, - "grad_norm": 0.006255386862903833, - "learning_rate": 0.00019999338906190908, - "loss": 46.0, - "step": 47882 - }, - { - "epoch": 3.66098973564998, - "grad_norm": 0.0024160905741155148, - "learning_rate": 0.00019999338878571695, - "loss": 46.0, - "step": 47883 - }, - { - "epoch": 3.6610661926333696, - "grad_norm": 0.0035971486940979958, - "learning_rate": 0.0001999933885095191, - "loss": 46.0, - "step": 47884 - }, - { - "epoch": 3.6611426496167594, - "grad_norm": 0.002197209745645523, - "learning_rate": 0.00019999338823331543, - "loss": 46.0, - "step": 47885 - }, - { - "epoch": 3.661219106600149, - "grad_norm": 0.0032732898835092783, - "learning_rate": 0.00019999338795710604, - "loss": 46.0, - "step": 47886 - }, - { - "epoch": 3.661295563583539, - "grad_norm": 0.0031460553873330355, - "learning_rate": 0.00019999338768089084, - "loss": 46.0, - "step": 47887 - }, - { - "epoch": 3.6613720205669287, - "grad_norm": 0.005199407692998648, - "learning_rate": 0.00019999338740466987, - "loss": 46.0, - "step": 47888 - }, - { - "epoch": 3.6614484775503184, - "grad_norm": 0.0018900011200457811, - "learning_rate": 0.0001999933871284432, - "loss": 46.0, - "step": 47889 - }, - { - "epoch": 3.661524934533708, - "grad_norm": 0.0018733100732788444, - "learning_rate": 0.00019999338685221067, - "loss": 46.0, - "step": 47890 - }, - { - "epoch": 3.661601391517098, - "grad_norm": 0.0031615784391760826, - "learning_rate": 0.0001999933865759724, - "loss": 46.0, - "step": 47891 - }, - { - "epoch": 3.6616778485004873, - "grad_norm": 0.0007877142634242773, - "learning_rate": 0.00019999338629972838, - "loss": 46.0, - "step": 47892 - }, - { - "epoch": 3.661754305483877, - "grad_norm": 0.002684255363419652, - "learning_rate": 0.0001999933860234786, - "loss": 46.0, - "step": 47893 - }, - { - "epoch": 3.661830762467267, - "grad_norm": 0.0026270037051290274, - "learning_rate": 0.00019999338574722302, - "loss": 46.0, - "step": 47894 - }, - { - "epoch": 3.6619072194506566, - "grad_norm": 0.0006175162270665169, - "learning_rate": 0.0001999933854709617, - "loss": 46.0, - "step": 47895 - }, - { - "epoch": 3.6619836764340463, - "grad_norm": 0.0007937418995425105, - "learning_rate": 0.00019999338519469456, - "loss": 46.0, - "step": 47896 - }, - { - "epoch": 3.662060133417436, - "grad_norm": 0.003095922525972128, - "learning_rate": 0.0001999933849184217, - "loss": 46.0, - "step": 47897 - }, - { - "epoch": 3.662136590400826, - "grad_norm": 0.00045074871741235256, - "learning_rate": 0.00019999338464214307, - "loss": 46.0, - "step": 47898 - }, - { - "epoch": 3.6622130473842156, - "grad_norm": 0.0017013788456097245, - "learning_rate": 0.00019999338436585865, - "loss": 46.0, - "step": 47899 - }, - { - "epoch": 3.662289504367605, - "grad_norm": 0.004155789036303759, - "learning_rate": 0.00019999338408956848, - "loss": 46.0, - "step": 47900 - }, - { - "epoch": 3.6623659613509947, - "grad_norm": 0.0018713424215093255, - "learning_rate": 0.0001999933838132725, - "loss": 46.0, - "step": 47901 - }, - { - "epoch": 3.6624424183343844, - "grad_norm": 0.0023316089063882828, - "learning_rate": 0.0001999933835369708, - "loss": 46.0, - "step": 47902 - }, - { - "epoch": 3.662518875317774, - "grad_norm": 0.00036295020254328847, - "learning_rate": 0.0001999933832606633, - "loss": 46.0, - "step": 47903 - }, - { - "epoch": 3.662595332301164, - "grad_norm": 0.002408041153103113, - "learning_rate": 0.00019999338298435005, - "loss": 46.0, - "step": 47904 - }, - { - "epoch": 3.6626717892845537, - "grad_norm": 0.0035913202445954084, - "learning_rate": 0.00019999338270803102, - "loss": 46.0, - "step": 47905 - }, - { - "epoch": 3.6627482462679435, - "grad_norm": 0.002356767188757658, - "learning_rate": 0.0001999933824317062, - "loss": 46.0, - "step": 47906 - }, - { - "epoch": 3.6628247032513332, - "grad_norm": 0.0008597004343755543, - "learning_rate": 0.00019999338215537566, - "loss": 46.0, - "step": 47907 - }, - { - "epoch": 3.662901160234723, - "grad_norm": 0.002325072418898344, - "learning_rate": 0.00019999338187903933, - "loss": 46.0, - "step": 47908 - }, - { - "epoch": 3.6629776172181128, - "grad_norm": 0.0016015819273889065, - "learning_rate": 0.00019999338160269723, - "loss": 46.0, - "step": 47909 - }, - { - "epoch": 3.6630540742015025, - "grad_norm": 0.0022232159972190857, - "learning_rate": 0.00019999338132634936, - "loss": 46.0, - "step": 47910 - }, - { - "epoch": 3.6631305311848923, - "grad_norm": 0.0005791396833956242, - "learning_rate": 0.0001999933810499957, - "loss": 46.0, - "step": 47911 - }, - { - "epoch": 3.663206988168282, - "grad_norm": 0.00144652696326375, - "learning_rate": 0.00019999338077363626, - "loss": 46.0, - "step": 47912 - }, - { - "epoch": 3.663283445151672, - "grad_norm": 0.003915582317858934, - "learning_rate": 0.00019999338049727112, - "loss": 46.0, - "step": 47913 - }, - { - "epoch": 3.663359902135061, - "grad_norm": 0.0019386752974241972, - "learning_rate": 0.00019999338022090016, - "loss": 46.0, - "step": 47914 - }, - { - "epoch": 3.663436359118451, - "grad_norm": 0.0019776534754782915, - "learning_rate": 0.00019999337994452344, - "loss": 46.0, - "step": 47915 - }, - { - "epoch": 3.6635128161018407, - "grad_norm": 0.00853799656033516, - "learning_rate": 0.00019999337966814096, - "loss": 46.0, - "step": 47916 - }, - { - "epoch": 3.6635892730852304, - "grad_norm": 0.0011911524925380945, - "learning_rate": 0.0001999933793917527, - "loss": 46.0, - "step": 47917 - }, - { - "epoch": 3.66366573006862, - "grad_norm": 0.00240787654183805, - "learning_rate": 0.00019999337911535866, - "loss": 46.0, - "step": 47918 - }, - { - "epoch": 3.66374218705201, - "grad_norm": 0.0019760397262871265, - "learning_rate": 0.00019999337883895888, - "loss": 46.0, - "step": 47919 - }, - { - "epoch": 3.6638186440353997, - "grad_norm": 0.0006879131542518735, - "learning_rate": 0.0001999933785625533, - "loss": 46.0, - "step": 47920 - }, - { - "epoch": 3.6638951010187895, - "grad_norm": 0.0018098466098308563, - "learning_rate": 0.00019999337828614197, - "loss": 46.0, - "step": 47921 - }, - { - "epoch": 3.6639715580021788, - "grad_norm": 0.002037947066128254, - "learning_rate": 0.00019999337800972488, - "loss": 46.0, - "step": 47922 - }, - { - "epoch": 3.6640480149855685, - "grad_norm": 0.0010829000966623425, - "learning_rate": 0.00019999337773330203, - "loss": 46.0, - "step": 47923 - }, - { - "epoch": 3.6641244719689583, - "grad_norm": 0.0010545499389991164, - "learning_rate": 0.00019999337745687336, - "loss": 46.0, - "step": 47924 - }, - { - "epoch": 3.664200928952348, - "grad_norm": 0.0015009538037702441, - "learning_rate": 0.00019999337718043896, - "loss": 46.0, - "step": 47925 - }, - { - "epoch": 3.664277385935738, - "grad_norm": 0.0018316813511773944, - "learning_rate": 0.00019999337690399877, - "loss": 46.0, - "step": 47926 - }, - { - "epoch": 3.6643538429191276, - "grad_norm": 0.003338324837386608, - "learning_rate": 0.00019999337662755283, - "loss": 46.0, - "step": 47927 - }, - { - "epoch": 3.6644302999025173, - "grad_norm": 0.00455842399969697, - "learning_rate": 0.00019999337635110114, - "loss": 46.0, - "step": 47928 - }, - { - "epoch": 3.664506756885907, - "grad_norm": 0.0039513977244496346, - "learning_rate": 0.00019999337607464363, - "loss": 46.0, - "step": 47929 - }, - { - "epoch": 3.664583213869297, - "grad_norm": 0.0030990089289844036, - "learning_rate": 0.00019999337579818037, - "loss": 46.0, - "step": 47930 - }, - { - "epoch": 3.6646596708526866, - "grad_norm": 0.0012320346431806684, - "learning_rate": 0.00019999337552171136, - "loss": 46.0, - "step": 47931 - }, - { - "epoch": 3.6647361278360764, - "grad_norm": 0.0018863289151340723, - "learning_rate": 0.00019999337524523658, - "loss": 46.0, - "step": 47932 - }, - { - "epoch": 3.664812584819466, - "grad_norm": 0.001923258532769978, - "learning_rate": 0.000199993374968756, - "loss": 46.0, - "step": 47933 - }, - { - "epoch": 3.664889041802856, - "grad_norm": 0.0020726253278553486, - "learning_rate": 0.00019999337469226965, - "loss": 46.0, - "step": 47934 - }, - { - "epoch": 3.6649654987862452, - "grad_norm": 0.0023975956719368696, - "learning_rate": 0.00019999337441577758, - "loss": 46.0, - "step": 47935 - }, - { - "epoch": 3.665041955769635, - "grad_norm": 0.0032305391505360603, - "learning_rate": 0.0001999933741392797, - "loss": 46.0, - "step": 47936 - }, - { - "epoch": 3.6651184127530247, - "grad_norm": 0.005718642380088568, - "learning_rate": 0.00019999337386277609, - "loss": 46.0, - "step": 47937 - }, - { - "epoch": 3.6651948697364145, - "grad_norm": 0.003722132882103324, - "learning_rate": 0.00019999337358626667, - "loss": 46.0, - "step": 47938 - }, - { - "epoch": 3.6652713267198043, - "grad_norm": 0.0010933848097920418, - "learning_rate": 0.00019999337330975147, - "loss": 46.0, - "step": 47939 - }, - { - "epoch": 3.665347783703194, - "grad_norm": 0.002146756509318948, - "learning_rate": 0.00019999337303323053, - "loss": 46.0, - "step": 47940 - }, - { - "epoch": 3.665424240686584, - "grad_norm": 0.0013325114268809557, - "learning_rate": 0.00019999337275670382, - "loss": 46.0, - "step": 47941 - }, - { - "epoch": 3.6655006976699736, - "grad_norm": 0.0023758369497954845, - "learning_rate": 0.00019999337248017133, - "loss": 46.0, - "step": 47942 - }, - { - "epoch": 3.6655771546533633, - "grad_norm": 0.001031380146741867, - "learning_rate": 0.00019999337220363308, - "loss": 46.0, - "step": 47943 - }, - { - "epoch": 3.6656536116367526, - "grad_norm": 0.00375778297893703, - "learning_rate": 0.00019999337192708907, - "loss": 46.0, - "step": 47944 - }, - { - "epoch": 3.6657300686201424, - "grad_norm": 0.0013113364111632109, - "learning_rate": 0.0001999933716505393, - "loss": 46.0, - "step": 47945 - }, - { - "epoch": 3.665806525603532, - "grad_norm": 0.003102717688307166, - "learning_rate": 0.0001999933713739837, - "loss": 46.0, - "step": 47946 - }, - { - "epoch": 3.665882982586922, - "grad_norm": 0.002812005812302232, - "learning_rate": 0.0001999933710974224, - "loss": 46.0, - "step": 47947 - }, - { - "epoch": 3.6659594395703117, - "grad_norm": 0.0030215787701308727, - "learning_rate": 0.00019999337082085529, - "loss": 46.0, - "step": 47948 - }, - { - "epoch": 3.6660358965537014, - "grad_norm": 0.0012319745728746057, - "learning_rate": 0.0001999933705442824, - "loss": 46.0, - "step": 47949 - }, - { - "epoch": 3.666112353537091, - "grad_norm": 0.000795715197455138, - "learning_rate": 0.0001999933702677038, - "loss": 46.0, - "step": 47950 - }, - { - "epoch": 3.666188810520481, - "grad_norm": 0.0022924127988517284, - "learning_rate": 0.0001999933699911194, - "loss": 46.0, - "step": 47951 - }, - { - "epoch": 3.6662652675038707, - "grad_norm": 0.0058022141456604, - "learning_rate": 0.0001999933697145292, - "loss": 46.0, - "step": 47952 - }, - { - "epoch": 3.6663417244872605, - "grad_norm": 0.0016654469072818756, - "learning_rate": 0.00019999336943793327, - "loss": 46.0, - "step": 47953 - }, - { - "epoch": 3.6664181814706502, - "grad_norm": 0.00541280722245574, - "learning_rate": 0.00019999336916133156, - "loss": 46.0, - "step": 47954 - }, - { - "epoch": 3.66649463845404, - "grad_norm": 0.001333836349658668, - "learning_rate": 0.00019999336888472407, - "loss": 46.0, - "step": 47955 - }, - { - "epoch": 3.6665710954374298, - "grad_norm": 0.0035815811716020107, - "learning_rate": 0.00019999336860811084, - "loss": 46.0, - "step": 47956 - }, - { - "epoch": 3.666647552420819, - "grad_norm": 0.0011034669587388635, - "learning_rate": 0.0001999933683314918, - "loss": 46.0, - "step": 47957 - }, - { - "epoch": 3.666724009404209, - "grad_norm": 0.0024126798380166292, - "learning_rate": 0.000199993368054867, - "loss": 46.0, - "step": 47958 - }, - { - "epoch": 3.6668004663875986, - "grad_norm": 0.0024243113584816456, - "learning_rate": 0.00019999336777823645, - "loss": 46.0, - "step": 47959 - }, - { - "epoch": 3.6668769233709884, - "grad_norm": 0.0011374404421076179, - "learning_rate": 0.00019999336750160015, - "loss": 46.0, - "step": 47960 - }, - { - "epoch": 3.666953380354378, - "grad_norm": 0.0014976942911744118, - "learning_rate": 0.00019999336722495805, - "loss": 46.0, - "step": 47961 - }, - { - "epoch": 3.667029837337768, - "grad_norm": 0.0009365262230858207, - "learning_rate": 0.00019999336694831018, - "loss": 46.0, - "step": 47962 - }, - { - "epoch": 3.6671062943211576, - "grad_norm": 0.0031736285891383886, - "learning_rate": 0.00019999336667165656, - "loss": 46.0, - "step": 47963 - }, - { - "epoch": 3.6671827513045474, - "grad_norm": 0.0035877984482795, - "learning_rate": 0.00019999336639499714, - "loss": 46.0, - "step": 47964 - }, - { - "epoch": 3.6672592082879367, - "grad_norm": 0.0016949651762843132, - "learning_rate": 0.00019999336611833198, - "loss": 46.0, - "step": 47965 - }, - { - "epoch": 3.6673356652713265, - "grad_norm": 0.0015173788415268064, - "learning_rate": 0.000199993365841661, - "loss": 46.0, - "step": 47966 - }, - { - "epoch": 3.6674121222547162, - "grad_norm": 0.0024319153744727373, - "learning_rate": 0.00019999336556498432, - "loss": 46.0, - "step": 47967 - }, - { - "epoch": 3.667488579238106, - "grad_norm": 0.0016374427359551191, - "learning_rate": 0.00019999336528830184, - "loss": 46.0, - "step": 47968 - }, - { - "epoch": 3.6675650362214958, - "grad_norm": 0.0032984784338623285, - "learning_rate": 0.00019999336501161358, - "loss": 46.0, - "step": 47969 - }, - { - "epoch": 3.6676414932048855, - "grad_norm": 0.0014317476889118552, - "learning_rate": 0.00019999336473491957, - "loss": 46.0, - "step": 47970 - }, - { - "epoch": 3.6677179501882753, - "grad_norm": 0.0006523602642118931, - "learning_rate": 0.0001999933644582198, - "loss": 46.0, - "step": 47971 - }, - { - "epoch": 3.667794407171665, - "grad_norm": 0.0021338271908462048, - "learning_rate": 0.00019999336418151424, - "loss": 46.0, - "step": 47972 - }, - { - "epoch": 3.667870864155055, - "grad_norm": 0.005957825109362602, - "learning_rate": 0.00019999336390480292, - "loss": 46.0, - "step": 47973 - }, - { - "epoch": 3.6679473211384446, - "grad_norm": 0.0013046445092186332, - "learning_rate": 0.00019999336362808582, - "loss": 46.0, - "step": 47974 - }, - { - "epoch": 3.6680237781218343, - "grad_norm": 0.0011419422226026654, - "learning_rate": 0.00019999336335136295, - "loss": 46.0, - "step": 47975 - }, - { - "epoch": 3.668100235105224, - "grad_norm": 0.0027256333269178867, - "learning_rate": 0.00019999336307463433, - "loss": 46.0, - "step": 47976 - }, - { - "epoch": 3.668176692088614, - "grad_norm": 0.0008650197414681315, - "learning_rate": 0.0001999933627978999, - "loss": 46.0, - "step": 47977 - }, - { - "epoch": 3.6682531490720036, - "grad_norm": 0.0008292000275105238, - "learning_rate": 0.00019999336252115975, - "loss": 46.0, - "step": 47978 - }, - { - "epoch": 3.668329606055393, - "grad_norm": 0.001185743254609406, - "learning_rate": 0.0001999933622444138, - "loss": 46.0, - "step": 47979 - }, - { - "epoch": 3.6684060630387827, - "grad_norm": 0.0024928064085543156, - "learning_rate": 0.00019999336196766212, - "loss": 46.0, - "step": 47980 - }, - { - "epoch": 3.6684825200221725, - "grad_norm": 0.000558042898774147, - "learning_rate": 0.0001999933616909046, - "loss": 46.0, - "step": 47981 - }, - { - "epoch": 3.668558977005562, - "grad_norm": 0.0007560640806332231, - "learning_rate": 0.00019999336141414138, - "loss": 46.0, - "step": 47982 - }, - { - "epoch": 3.668635433988952, - "grad_norm": 0.004348853137344122, - "learning_rate": 0.00019999336113737237, - "loss": 46.0, - "step": 47983 - }, - { - "epoch": 3.6687118909723417, - "grad_norm": 0.0022174064069986343, - "learning_rate": 0.0001999933608605976, - "loss": 46.0, - "step": 47984 - }, - { - "epoch": 3.6687883479557315, - "grad_norm": 0.0019662310369312763, - "learning_rate": 0.00019999336058381704, - "loss": 46.0, - "step": 47985 - }, - { - "epoch": 3.6688648049391213, - "grad_norm": 0.0016567886341363192, - "learning_rate": 0.00019999336030703072, - "loss": 46.0, - "step": 47986 - }, - { - "epoch": 3.6689412619225106, - "grad_norm": 0.0020351523999124765, - "learning_rate": 0.00019999336003023862, - "loss": 46.0, - "step": 47987 - }, - { - "epoch": 3.6690177189059003, - "grad_norm": 0.002732141176238656, - "learning_rate": 0.00019999335975344075, - "loss": 46.0, - "step": 47988 - }, - { - "epoch": 3.66909417588929, - "grad_norm": 0.00147348758764565, - "learning_rate": 0.00019999335947663713, - "loss": 46.0, - "step": 47989 - }, - { - "epoch": 3.66917063287268, - "grad_norm": 0.0009496886632405221, - "learning_rate": 0.0001999933591998277, - "loss": 46.0, - "step": 47990 - }, - { - "epoch": 3.6692470898560696, - "grad_norm": 0.0015586194349452853, - "learning_rate": 0.00019999335892301258, - "loss": 46.0, - "step": 47991 - }, - { - "epoch": 3.6693235468394594, - "grad_norm": 0.005772904492914677, - "learning_rate": 0.0001999933586461916, - "loss": 46.0, - "step": 47992 - }, - { - "epoch": 3.669400003822849, - "grad_norm": 0.001957288244739175, - "learning_rate": 0.0001999933583693649, - "loss": 46.0, - "step": 47993 - }, - { - "epoch": 3.669476460806239, - "grad_norm": 0.001681795227341354, - "learning_rate": 0.00019999335809253244, - "loss": 46.0, - "step": 47994 - }, - { - "epoch": 3.6695529177896287, - "grad_norm": 0.0008049028110690415, - "learning_rate": 0.00019999335781569418, - "loss": 46.0, - "step": 47995 - }, - { - "epoch": 3.6696293747730184, - "grad_norm": 0.002104239072650671, - "learning_rate": 0.00019999335753885018, - "loss": 46.0, - "step": 47996 - }, - { - "epoch": 3.669705831756408, - "grad_norm": 0.0034681379329413176, - "learning_rate": 0.0001999933572620004, - "loss": 46.0, - "step": 47997 - }, - { - "epoch": 3.669782288739798, - "grad_norm": 0.0028721061535179615, - "learning_rate": 0.00019999335698514485, - "loss": 46.0, - "step": 47998 - }, - { - "epoch": 3.6698587457231877, - "grad_norm": 0.0016494493465870619, - "learning_rate": 0.00019999335670828355, - "loss": 46.0, - "step": 47999 - }, - { - "epoch": 3.6699352027065775, - "grad_norm": 0.003336281981319189, - "learning_rate": 0.00019999335643141643, - "loss": 46.0, - "step": 48000 - }, - { - "epoch": 3.670011659689967, - "grad_norm": 0.0013646723236888647, - "learning_rate": 0.0001999933561545436, - "loss": 46.0, - "step": 48001 - }, - { - "epoch": 3.6700881166733565, - "grad_norm": 0.0013325696345418692, - "learning_rate": 0.00019999335587766494, - "loss": 46.0, - "step": 48002 - }, - { - "epoch": 3.6701645736567463, - "grad_norm": 0.0005511973286047578, - "learning_rate": 0.00019999335560078057, - "loss": 46.0, - "step": 48003 - }, - { - "epoch": 3.670241030640136, - "grad_norm": 0.0010190497850999236, - "learning_rate": 0.0001999933553238904, - "loss": 46.0, - "step": 48004 - }, - { - "epoch": 3.670317487623526, - "grad_norm": 0.001558575313538313, - "learning_rate": 0.00019999335504699444, - "loss": 46.0, - "step": 48005 - }, - { - "epoch": 3.6703939446069156, - "grad_norm": 0.0013280980056151748, - "learning_rate": 0.00019999335477009276, - "loss": 46.0, - "step": 48006 - }, - { - "epoch": 3.6704704015903054, - "grad_norm": 0.002202605130150914, - "learning_rate": 0.00019999335449318528, - "loss": 46.0, - "step": 48007 - }, - { - "epoch": 3.670546858573695, - "grad_norm": 0.002156279981136322, - "learning_rate": 0.00019999335421627204, - "loss": 46.0, - "step": 48008 - }, - { - "epoch": 3.6706233155570844, - "grad_norm": 0.0024312122259289026, - "learning_rate": 0.00019999335393935304, - "loss": 46.0, - "step": 48009 - }, - { - "epoch": 3.670699772540474, - "grad_norm": 0.0006952566909603775, - "learning_rate": 0.00019999335366242824, - "loss": 46.0, - "step": 48010 - }, - { - "epoch": 3.670776229523864, - "grad_norm": 0.0021471420768648386, - "learning_rate": 0.00019999335338549768, - "loss": 46.0, - "step": 48011 - }, - { - "epoch": 3.6708526865072537, - "grad_norm": 0.001348915626294911, - "learning_rate": 0.0001999933531085614, - "loss": 46.0, - "step": 48012 - }, - { - "epoch": 3.6709291434906435, - "grad_norm": 0.001189972972497344, - "learning_rate": 0.00019999335283161932, - "loss": 46.0, - "step": 48013 - }, - { - "epoch": 3.6710056004740332, - "grad_norm": 0.0005687892553396523, - "learning_rate": 0.00019999335255467145, - "loss": 46.0, - "step": 48014 - }, - { - "epoch": 3.671082057457423, - "grad_norm": 0.002547189127653837, - "learning_rate": 0.0001999933522777178, - "loss": 46.0, - "step": 48015 - }, - { - "epoch": 3.6711585144408128, - "grad_norm": 0.000822422094643116, - "learning_rate": 0.0001999933520007584, - "loss": 46.0, - "step": 48016 - }, - { - "epoch": 3.6712349714242025, - "grad_norm": 0.0013455809094011784, - "learning_rate": 0.00019999335172379325, - "loss": 46.0, - "step": 48017 - }, - { - "epoch": 3.6713114284075923, - "grad_norm": 0.00370883964933455, - "learning_rate": 0.0001999933514468223, - "loss": 46.0, - "step": 48018 - }, - { - "epoch": 3.671387885390982, - "grad_norm": 0.0012432014336809516, - "learning_rate": 0.0001999933511698456, - "loss": 46.0, - "step": 48019 - }, - { - "epoch": 3.671464342374372, - "grad_norm": 0.0033680452033877373, - "learning_rate": 0.00019999335089286314, - "loss": 46.0, - "step": 48020 - }, - { - "epoch": 3.6715407993577616, - "grad_norm": 0.0021840636618435383, - "learning_rate": 0.0001999933506158749, - "loss": 46.0, - "step": 48021 - }, - { - "epoch": 3.6716172563411513, - "grad_norm": 0.002178065711632371, - "learning_rate": 0.00019999335033888088, - "loss": 46.0, - "step": 48022 - }, - { - "epoch": 3.6716937133245406, - "grad_norm": 0.005367441102862358, - "learning_rate": 0.00019999335006188113, - "loss": 46.0, - "step": 48023 - }, - { - "epoch": 3.6717701703079304, - "grad_norm": 0.004774544853717089, - "learning_rate": 0.00019999334978487555, - "loss": 46.0, - "step": 48024 - }, - { - "epoch": 3.67184662729132, - "grad_norm": 0.002754293382167816, - "learning_rate": 0.00019999334950786425, - "loss": 46.0, - "step": 48025 - }, - { - "epoch": 3.67192308427471, - "grad_norm": 0.002541779074817896, - "learning_rate": 0.00019999334923084718, - "loss": 46.0, - "step": 48026 - }, - { - "epoch": 3.6719995412580997, - "grad_norm": 0.0037251540925353765, - "learning_rate": 0.00019999334895382434, - "loss": 46.0, - "step": 48027 - }, - { - "epoch": 3.6720759982414894, - "grad_norm": 0.0019251458579674363, - "learning_rate": 0.00019999334867679572, - "loss": 46.0, - "step": 48028 - }, - { - "epoch": 3.672152455224879, - "grad_norm": 0.0005644903285428882, - "learning_rate": 0.0001999933483997613, - "loss": 46.0, - "step": 48029 - }, - { - "epoch": 3.672228912208269, - "grad_norm": 0.0025510292034596205, - "learning_rate": 0.00019999334812272114, - "loss": 46.0, - "step": 48030 - }, - { - "epoch": 3.6723053691916583, - "grad_norm": 0.0013216182123869658, - "learning_rate": 0.0001999933478456752, - "loss": 46.0, - "step": 48031 - }, - { - "epoch": 3.672381826175048, - "grad_norm": 0.0009647682309150696, - "learning_rate": 0.00019999334756862352, - "loss": 46.0, - "step": 48032 - }, - { - "epoch": 3.672458283158438, - "grad_norm": 0.0012674200115725398, - "learning_rate": 0.00019999334729156604, - "loss": 46.0, - "step": 48033 - }, - { - "epoch": 3.6725347401418276, - "grad_norm": 0.0020439722575247288, - "learning_rate": 0.0001999933470145028, - "loss": 46.0, - "step": 48034 - }, - { - "epoch": 3.6726111971252173, - "grad_norm": 0.00381052796728909, - "learning_rate": 0.0001999933467374338, - "loss": 46.0, - "step": 48035 - }, - { - "epoch": 3.672687654108607, - "grad_norm": 0.00462719239294529, - "learning_rate": 0.00019999334646035903, - "loss": 46.0, - "step": 48036 - }, - { - "epoch": 3.672764111091997, - "grad_norm": 0.002047655638307333, - "learning_rate": 0.0001999933461832785, - "loss": 46.0, - "step": 48037 - }, - { - "epoch": 3.6728405680753866, - "grad_norm": 0.0014202672755345702, - "learning_rate": 0.00019999334590619215, - "loss": 46.0, - "step": 48038 - }, - { - "epoch": 3.6729170250587764, - "grad_norm": 0.006818561814725399, - "learning_rate": 0.00019999334562910008, - "loss": 46.0, - "step": 48039 - }, - { - "epoch": 3.672993482042166, - "grad_norm": 0.003302361350506544, - "learning_rate": 0.00019999334535200224, - "loss": 46.0, - "step": 48040 - }, - { - "epoch": 3.673069939025556, - "grad_norm": 0.001957076834514737, - "learning_rate": 0.00019999334507489862, - "loss": 46.0, - "step": 48041 - }, - { - "epoch": 3.6731463960089457, - "grad_norm": 0.002648213878273964, - "learning_rate": 0.0001999933447977892, - "loss": 46.0, - "step": 48042 - }, - { - "epoch": 3.6732228529923354, - "grad_norm": 0.001613419852219522, - "learning_rate": 0.00019999334452067407, - "loss": 46.0, - "step": 48043 - }, - { - "epoch": 3.673299309975725, - "grad_norm": 0.001669177319854498, - "learning_rate": 0.00019999334424355313, - "loss": 46.0, - "step": 48044 - }, - { - "epoch": 3.6733757669591145, - "grad_norm": 0.004495824687182903, - "learning_rate": 0.00019999334396642645, - "loss": 46.0, - "step": 48045 - }, - { - "epoch": 3.6734522239425043, - "grad_norm": 0.0020188684575259686, - "learning_rate": 0.00019999334368929396, - "loss": 46.0, - "step": 48046 - }, - { - "epoch": 3.673528680925894, - "grad_norm": 0.0017921437975019217, - "learning_rate": 0.00019999334341215573, - "loss": 46.0, - "step": 48047 - }, - { - "epoch": 3.673605137909284, - "grad_norm": 0.003095908323302865, - "learning_rate": 0.00019999334313501176, - "loss": 46.0, - "step": 48048 - }, - { - "epoch": 3.6736815948926735, - "grad_norm": 0.00504260091111064, - "learning_rate": 0.00019999334285786196, - "loss": 46.0, - "step": 48049 - }, - { - "epoch": 3.6737580518760633, - "grad_norm": 0.0016842473996803164, - "learning_rate": 0.0001999933425807064, - "loss": 46.0, - "step": 48050 - }, - { - "epoch": 3.673834508859453, - "grad_norm": 0.001480426057241857, - "learning_rate": 0.0001999933423035451, - "loss": 46.0, - "step": 48051 - }, - { - "epoch": 3.673910965842843, - "grad_norm": 0.0036324074026197195, - "learning_rate": 0.00019999334202637804, - "loss": 46.0, - "step": 48052 - }, - { - "epoch": 3.673987422826232, - "grad_norm": 0.00061061792075634, - "learning_rate": 0.00019999334174920517, - "loss": 46.0, - "step": 48053 - }, - { - "epoch": 3.674063879809622, - "grad_norm": 0.0025726137682795525, - "learning_rate": 0.00019999334147202655, - "loss": 46.0, - "step": 48054 - }, - { - "epoch": 3.6741403367930117, - "grad_norm": 0.004986766260117292, - "learning_rate": 0.00019999334119484216, - "loss": 46.0, - "step": 48055 - }, - { - "epoch": 3.6742167937764014, - "grad_norm": 0.0038000785280019045, - "learning_rate": 0.000199993340917652, - "loss": 46.0, - "step": 48056 - }, - { - "epoch": 3.674293250759791, - "grad_norm": 0.0015942901372909546, - "learning_rate": 0.00019999334064045606, - "loss": 46.0, - "step": 48057 - }, - { - "epoch": 3.674369707743181, - "grad_norm": 0.0011252585100010037, - "learning_rate": 0.00019999334036325438, - "loss": 46.0, - "step": 48058 - }, - { - "epoch": 3.6744461647265707, - "grad_norm": 0.0023074571508914232, - "learning_rate": 0.00019999334008604692, - "loss": 46.0, - "step": 48059 - }, - { - "epoch": 3.6745226217099605, - "grad_norm": 0.002204613061621785, - "learning_rate": 0.0001999933398088337, - "loss": 46.0, - "step": 48060 - }, - { - "epoch": 3.6745990786933502, - "grad_norm": 0.003391342470422387, - "learning_rate": 0.0001999933395316147, - "loss": 46.0, - "step": 48061 - }, - { - "epoch": 3.67467553567674, - "grad_norm": 0.0022395451087504625, - "learning_rate": 0.00019999333925438991, - "loss": 46.0, - "step": 48062 - }, - { - "epoch": 3.6747519926601298, - "grad_norm": 0.0030172611586749554, - "learning_rate": 0.00019999333897715937, - "loss": 46.0, - "step": 48063 - }, - { - "epoch": 3.6748284496435195, - "grad_norm": 0.0020320371259003878, - "learning_rate": 0.00019999333869992304, - "loss": 46.0, - "step": 48064 - }, - { - "epoch": 3.6749049066269093, - "grad_norm": 0.005190548952668905, - "learning_rate": 0.000199993338422681, - "loss": 46.0, - "step": 48065 - }, - { - "epoch": 3.6749813636102986, - "grad_norm": 0.0050984956324100494, - "learning_rate": 0.00019999333814543313, - "loss": 46.0, - "step": 48066 - }, - { - "epoch": 3.6750578205936884, - "grad_norm": 0.00440974673256278, - "learning_rate": 0.00019999333786817954, - "loss": 46.0, - "step": 48067 - }, - { - "epoch": 3.675134277577078, - "grad_norm": 0.002784652402624488, - "learning_rate": 0.00019999333759092012, - "loss": 46.0, - "step": 48068 - }, - { - "epoch": 3.675210734560468, - "grad_norm": 0.0022830702364444733, - "learning_rate": 0.00019999333731365496, - "loss": 46.0, - "step": 48069 - }, - { - "epoch": 3.6752871915438576, - "grad_norm": 0.0015071353409439325, - "learning_rate": 0.00019999333703638405, - "loss": 46.0, - "step": 48070 - }, - { - "epoch": 3.6753636485272474, - "grad_norm": 0.0018765542190521955, - "learning_rate": 0.00019999333675910737, - "loss": 46.0, - "step": 48071 - }, - { - "epoch": 3.675440105510637, - "grad_norm": 0.0014902527909725904, - "learning_rate": 0.0001999933364818249, - "loss": 46.0, - "step": 48072 - }, - { - "epoch": 3.675516562494027, - "grad_norm": 0.002455366076901555, - "learning_rate": 0.00019999333620453666, - "loss": 46.0, - "step": 48073 - }, - { - "epoch": 3.6755930194774167, - "grad_norm": 0.0011709202080965042, - "learning_rate": 0.00019999333592724265, - "loss": 46.0, - "step": 48074 - }, - { - "epoch": 3.675669476460806, - "grad_norm": 0.001296831644140184, - "learning_rate": 0.00019999333564994288, - "loss": 46.0, - "step": 48075 - }, - { - "epoch": 3.6757459334441958, - "grad_norm": 0.001863612444140017, - "learning_rate": 0.00019999333537263736, - "loss": 46.0, - "step": 48076 - }, - { - "epoch": 3.6758223904275855, - "grad_norm": 0.0020350965205579996, - "learning_rate": 0.00019999333509532606, - "loss": 46.0, - "step": 48077 - }, - { - "epoch": 3.6758988474109753, - "grad_norm": 0.0032479208894073963, - "learning_rate": 0.00019999333481800896, - "loss": 46.0, - "step": 48078 - }, - { - "epoch": 3.675975304394365, - "grad_norm": 0.0015698898350819945, - "learning_rate": 0.00019999333454068615, - "loss": 46.0, - "step": 48079 - }, - { - "epoch": 3.676051761377755, - "grad_norm": 0.003312168875709176, - "learning_rate": 0.0001999933342633575, - "loss": 46.0, - "step": 48080 - }, - { - "epoch": 3.6761282183611446, - "grad_norm": 0.0019198836525902152, - "learning_rate": 0.00019999333398602312, - "loss": 46.0, - "step": 48081 - }, - { - "epoch": 3.6762046753445343, - "grad_norm": 0.005966040305793285, - "learning_rate": 0.00019999333370868295, - "loss": 46.0, - "step": 48082 - }, - { - "epoch": 3.676281132327924, - "grad_norm": 0.0035501893144100904, - "learning_rate": 0.00019999333343133705, - "loss": 46.0, - "step": 48083 - }, - { - "epoch": 3.676357589311314, - "grad_norm": 0.005789449438452721, - "learning_rate": 0.00019999333315398536, - "loss": 46.0, - "step": 48084 - }, - { - "epoch": 3.6764340462947036, - "grad_norm": 0.001949035795405507, - "learning_rate": 0.0001999933328766279, - "loss": 46.0, - "step": 48085 - }, - { - "epoch": 3.6765105032780934, - "grad_norm": 0.0008523133583366871, - "learning_rate": 0.00019999333259926468, - "loss": 46.0, - "step": 48086 - }, - { - "epoch": 3.676586960261483, - "grad_norm": 0.0023959651589393616, - "learning_rate": 0.00019999333232189568, - "loss": 46.0, - "step": 48087 - }, - { - "epoch": 3.6766634172448724, - "grad_norm": 0.004174513276666403, - "learning_rate": 0.0001999933320445209, - "loss": 46.0, - "step": 48088 - }, - { - "epoch": 3.676739874228262, - "grad_norm": 0.0015015817480161786, - "learning_rate": 0.00019999333176714038, - "loss": 46.0, - "step": 48089 - }, - { - "epoch": 3.676816331211652, - "grad_norm": 0.001505725085735321, - "learning_rate": 0.00019999333148975406, - "loss": 46.0, - "step": 48090 - }, - { - "epoch": 3.6768927881950417, - "grad_norm": 0.0032581575214862823, - "learning_rate": 0.000199993331212362, - "loss": 46.0, - "step": 48091 - }, - { - "epoch": 3.6769692451784315, - "grad_norm": 0.0026731491088867188, - "learning_rate": 0.00019999333093496415, - "loss": 46.0, - "step": 48092 - }, - { - "epoch": 3.6770457021618213, - "grad_norm": 0.001202498096972704, - "learning_rate": 0.00019999333065756053, - "loss": 46.0, - "step": 48093 - }, - { - "epoch": 3.677122159145211, - "grad_norm": 0.001614939421415329, - "learning_rate": 0.00019999333038015117, - "loss": 46.0, - "step": 48094 - }, - { - "epoch": 3.6771986161286008, - "grad_norm": 0.004757698159664869, - "learning_rate": 0.000199993330102736, - "loss": 46.0, - "step": 48095 - }, - { - "epoch": 3.67727507311199, - "grad_norm": 0.006576675921678543, - "learning_rate": 0.00019999332982531507, - "loss": 46.0, - "step": 48096 - }, - { - "epoch": 3.67735153009538, - "grad_norm": 0.0019824367482215166, - "learning_rate": 0.00019999332954788842, - "loss": 46.0, - "step": 48097 - }, - { - "epoch": 3.6774279870787696, - "grad_norm": 0.0013069227570667863, - "learning_rate": 0.00019999332927045593, - "loss": 46.0, - "step": 48098 - }, - { - "epoch": 3.6775044440621594, - "grad_norm": 0.0018959550652652979, - "learning_rate": 0.0001999933289930177, - "loss": 46.0, - "step": 48099 - }, - { - "epoch": 3.677580901045549, - "grad_norm": 0.0015238405903801322, - "learning_rate": 0.0001999933287155737, - "loss": 46.0, - "step": 48100 - }, - { - "epoch": 3.677657358028939, - "grad_norm": 0.0008242375333793461, - "learning_rate": 0.00019999332843812395, - "loss": 46.0, - "step": 48101 - }, - { - "epoch": 3.6777338150123287, - "grad_norm": 0.0022841491736471653, - "learning_rate": 0.00019999332816066843, - "loss": 46.0, - "step": 48102 - }, - { - "epoch": 3.6778102719957184, - "grad_norm": 0.0029639010317623615, - "learning_rate": 0.0001999933278832071, - "loss": 46.0, - "step": 48103 - }, - { - "epoch": 3.677886728979108, - "grad_norm": 0.0037852886598557234, - "learning_rate": 0.00019999332760574004, - "loss": 46.0, - "step": 48104 - }, - { - "epoch": 3.677963185962498, - "grad_norm": 0.002614909317344427, - "learning_rate": 0.0001999933273282672, - "loss": 46.0, - "step": 48105 - }, - { - "epoch": 3.6780396429458877, - "grad_norm": 0.0031065987423062325, - "learning_rate": 0.00019999332705078859, - "loss": 46.0, - "step": 48106 - }, - { - "epoch": 3.6781160999292775, - "grad_norm": 0.004767583683133125, - "learning_rate": 0.0001999933267733042, - "loss": 46.0, - "step": 48107 - }, - { - "epoch": 3.678192556912667, - "grad_norm": 0.004384160041809082, - "learning_rate": 0.00019999332649581406, - "loss": 46.0, - "step": 48108 - }, - { - "epoch": 3.678269013896057, - "grad_norm": 0.000985337421298027, - "learning_rate": 0.00019999332621831815, - "loss": 46.0, - "step": 48109 - }, - { - "epoch": 3.6783454708794463, - "grad_norm": 0.002141169738024473, - "learning_rate": 0.00019999332594081645, - "loss": 46.0, - "step": 48110 - }, - { - "epoch": 3.678421927862836, - "grad_norm": 0.0021303766407072544, - "learning_rate": 0.000199993325663309, - "loss": 46.0, - "step": 48111 - }, - { - "epoch": 3.678498384846226, - "grad_norm": 0.0024959477595984936, - "learning_rate": 0.0001999933253857958, - "loss": 46.0, - "step": 48112 - }, - { - "epoch": 3.6785748418296156, - "grad_norm": 0.004737451206892729, - "learning_rate": 0.0001999933251082768, - "loss": 46.0, - "step": 48113 - }, - { - "epoch": 3.6786512988130053, - "grad_norm": 0.0021286066621541977, - "learning_rate": 0.000199993324830752, - "loss": 46.0, - "step": 48114 - }, - { - "epoch": 3.678727755796395, - "grad_norm": 0.0007798591395840049, - "learning_rate": 0.0001999933245532215, - "loss": 46.0, - "step": 48115 - }, - { - "epoch": 3.678804212779785, - "grad_norm": 0.000854724959935993, - "learning_rate": 0.00019999332427568517, - "loss": 46.0, - "step": 48116 - }, - { - "epoch": 3.6788806697631746, - "grad_norm": 0.0027591839898377657, - "learning_rate": 0.00019999332399814313, - "loss": 46.0, - "step": 48117 - }, - { - "epoch": 3.678957126746564, - "grad_norm": 0.0008757482282817364, - "learning_rate": 0.00019999332372059526, - "loss": 46.0, - "step": 48118 - }, - { - "epoch": 3.6790335837299537, - "grad_norm": 0.0028068888932466507, - "learning_rate": 0.00019999332344304164, - "loss": 46.0, - "step": 48119 - }, - { - "epoch": 3.6791100407133435, - "grad_norm": 0.0014979691477492452, - "learning_rate": 0.00019999332316548228, - "loss": 46.0, - "step": 48120 - }, - { - "epoch": 3.6791864976967332, - "grad_norm": 0.0021155099384486675, - "learning_rate": 0.00019999332288791712, - "loss": 46.0, - "step": 48121 - }, - { - "epoch": 3.679262954680123, - "grad_norm": 0.0016879817703738809, - "learning_rate": 0.00019999332261034621, - "loss": 46.0, - "step": 48122 - }, - { - "epoch": 3.6793394116635127, - "grad_norm": 0.004583878442645073, - "learning_rate": 0.00019999332233276956, - "loss": 46.0, - "step": 48123 - }, - { - "epoch": 3.6794158686469025, - "grad_norm": 0.001366142649203539, - "learning_rate": 0.00019999332205518708, - "loss": 46.0, - "step": 48124 - }, - { - "epoch": 3.6794923256302923, - "grad_norm": 0.0019854449201375246, - "learning_rate": 0.00019999332177759885, - "loss": 46.0, - "step": 48125 - }, - { - "epoch": 3.679568782613682, - "grad_norm": 0.0019844858907163143, - "learning_rate": 0.00019999332150000485, - "loss": 46.0, - "step": 48126 - }, - { - "epoch": 3.679645239597072, - "grad_norm": 0.0011439098743721843, - "learning_rate": 0.0001999933212224051, - "loss": 46.0, - "step": 48127 - }, - { - "epoch": 3.6797216965804616, - "grad_norm": 0.001849220134317875, - "learning_rate": 0.00019999332094479958, - "loss": 46.0, - "step": 48128 - }, - { - "epoch": 3.6797981535638513, - "grad_norm": 0.004560360684990883, - "learning_rate": 0.00019999332066718829, - "loss": 46.0, - "step": 48129 - }, - { - "epoch": 3.679874610547241, - "grad_norm": 0.001056564156897366, - "learning_rate": 0.0001999933203895712, - "loss": 46.0, - "step": 48130 - }, - { - "epoch": 3.679951067530631, - "grad_norm": 0.001179404091089964, - "learning_rate": 0.00019999332011194838, - "loss": 46.0, - "step": 48131 - }, - { - "epoch": 3.68002752451402, - "grad_norm": 0.004666946828365326, - "learning_rate": 0.00019999331983431974, - "loss": 46.0, - "step": 48132 - }, - { - "epoch": 3.68010398149741, - "grad_norm": 0.0017488595331087708, - "learning_rate": 0.00019999331955668537, - "loss": 46.0, - "step": 48133 - }, - { - "epoch": 3.6801804384807997, - "grad_norm": 0.0025561857037246227, - "learning_rate": 0.00019999331927904527, - "loss": 46.0, - "step": 48134 - }, - { - "epoch": 3.6802568954641894, - "grad_norm": 0.002687942935153842, - "learning_rate": 0.00019999331900139933, - "loss": 46.0, - "step": 48135 - }, - { - "epoch": 3.680333352447579, - "grad_norm": 0.0018847760511562228, - "learning_rate": 0.00019999331872374765, - "loss": 46.0, - "step": 48136 - }, - { - "epoch": 3.680409809430969, - "grad_norm": 0.0008842428214848042, - "learning_rate": 0.0001999933184460902, - "loss": 46.0, - "step": 48137 - }, - { - "epoch": 3.6804862664143587, - "grad_norm": 0.002648785710334778, - "learning_rate": 0.00019999331816842697, - "loss": 46.0, - "step": 48138 - }, - { - "epoch": 3.6805627233977485, - "grad_norm": 0.003374918596819043, - "learning_rate": 0.000199993317890758, - "loss": 46.0, - "step": 48139 - }, - { - "epoch": 3.680639180381138, - "grad_norm": 0.002672750735655427, - "learning_rate": 0.00019999331761308322, - "loss": 46.0, - "step": 48140 - }, - { - "epoch": 3.6807156373645276, - "grad_norm": 0.004739441443234682, - "learning_rate": 0.0001999933173354027, - "loss": 46.0, - "step": 48141 - }, - { - "epoch": 3.6807920943479173, - "grad_norm": 0.0033642337657511234, - "learning_rate": 0.0001999933170577164, - "loss": 46.0, - "step": 48142 - }, - { - "epoch": 3.680868551331307, - "grad_norm": 0.0029913003090769053, - "learning_rate": 0.00019999331678002434, - "loss": 46.0, - "step": 48143 - }, - { - "epoch": 3.680945008314697, - "grad_norm": 0.007018153555691242, - "learning_rate": 0.0001999933165023265, - "loss": 46.0, - "step": 48144 - }, - { - "epoch": 3.6810214652980866, - "grad_norm": 0.002710035303607583, - "learning_rate": 0.00019999331622462288, - "loss": 46.0, - "step": 48145 - }, - { - "epoch": 3.6810979222814764, - "grad_norm": 0.0031039919704198837, - "learning_rate": 0.00019999331594691352, - "loss": 46.0, - "step": 48146 - }, - { - "epoch": 3.681174379264866, - "grad_norm": 0.0030833669006824493, - "learning_rate": 0.0001999933156691984, - "loss": 46.0, - "step": 48147 - }, - { - "epoch": 3.681250836248256, - "grad_norm": 0.002846986288204789, - "learning_rate": 0.00019999331539147748, - "loss": 46.0, - "step": 48148 - }, - { - "epoch": 3.6813272932316456, - "grad_norm": 0.0019875599537044764, - "learning_rate": 0.0001999933151137508, - "loss": 46.0, - "step": 48149 - }, - { - "epoch": 3.6814037502150354, - "grad_norm": 0.005792904645204544, - "learning_rate": 0.00019999331483601832, - "loss": 46.0, - "step": 48150 - }, - { - "epoch": 3.681480207198425, - "grad_norm": 0.004038248211145401, - "learning_rate": 0.00019999331455828012, - "loss": 46.0, - "step": 48151 - }, - { - "epoch": 3.681556664181815, - "grad_norm": 0.002190410392358899, - "learning_rate": 0.00019999331428053615, - "loss": 46.0, - "step": 48152 - }, - { - "epoch": 3.6816331211652047, - "grad_norm": 0.002345144748687744, - "learning_rate": 0.00019999331400278635, - "loss": 46.0, - "step": 48153 - }, - { - "epoch": 3.681709578148594, - "grad_norm": 0.0012302587274461985, - "learning_rate": 0.00019999331372503085, - "loss": 46.0, - "step": 48154 - }, - { - "epoch": 3.6817860351319838, - "grad_norm": 0.0027402357663959265, - "learning_rate": 0.00019999331344726956, - "loss": 46.0, - "step": 48155 - }, - { - "epoch": 3.6818624921153735, - "grad_norm": 0.0029681732412427664, - "learning_rate": 0.00019999331316950246, - "loss": 46.0, - "step": 48156 - }, - { - "epoch": 3.6819389490987633, - "grad_norm": 0.003746664384379983, - "learning_rate": 0.00019999331289172965, - "loss": 46.0, - "step": 48157 - }, - { - "epoch": 3.682015406082153, - "grad_norm": 0.004141835030168295, - "learning_rate": 0.00019999331261395104, - "loss": 46.0, - "step": 48158 - }, - { - "epoch": 3.682091863065543, - "grad_norm": 0.00218791957013309, - "learning_rate": 0.00019999331233616668, - "loss": 46.0, - "step": 48159 - }, - { - "epoch": 3.6821683200489326, - "grad_norm": 0.003090356709435582, - "learning_rate": 0.00019999331205837652, - "loss": 46.0, - "step": 48160 - }, - { - "epoch": 3.6822447770323223, - "grad_norm": 0.0009678081260062754, - "learning_rate": 0.00019999331178058064, - "loss": 46.0, - "step": 48161 - }, - { - "epoch": 3.6823212340157117, - "grad_norm": 0.00293830968439579, - "learning_rate": 0.00019999331150277896, - "loss": 46.0, - "step": 48162 - }, - { - "epoch": 3.6823976909991014, - "grad_norm": 0.002567223273217678, - "learning_rate": 0.0001999933112249715, - "loss": 46.0, - "step": 48163 - }, - { - "epoch": 3.682474147982491, - "grad_norm": 0.0011430465383455157, - "learning_rate": 0.0001999933109471583, - "loss": 46.0, - "step": 48164 - }, - { - "epoch": 3.682550604965881, - "grad_norm": 0.0028940029442310333, - "learning_rate": 0.0001999933106693393, - "loss": 46.0, - "step": 48165 - }, - { - "epoch": 3.6826270619492707, - "grad_norm": 0.002087947679683566, - "learning_rate": 0.00019999331039151455, - "loss": 46.0, - "step": 48166 - }, - { - "epoch": 3.6827035189326605, - "grad_norm": 0.0006349135073833168, - "learning_rate": 0.00019999331011368403, - "loss": 46.0, - "step": 48167 - }, - { - "epoch": 3.68277997591605, - "grad_norm": 0.0006834067753516138, - "learning_rate": 0.00019999330983584774, - "loss": 46.0, - "step": 48168 - }, - { - "epoch": 3.68285643289944, - "grad_norm": 0.0016259908443316817, - "learning_rate": 0.00019999330955800567, - "loss": 46.0, - "step": 48169 - }, - { - "epoch": 3.6829328898828297, - "grad_norm": 0.006330954376608133, - "learning_rate": 0.00019999330928015786, - "loss": 46.0, - "step": 48170 - }, - { - "epoch": 3.6830093468662195, - "grad_norm": 0.002377611119300127, - "learning_rate": 0.00019999330900230425, - "loss": 46.0, - "step": 48171 - }, - { - "epoch": 3.6830858038496093, - "grad_norm": 0.0023775510489940643, - "learning_rate": 0.00019999330872444486, - "loss": 46.0, - "step": 48172 - }, - { - "epoch": 3.683162260832999, - "grad_norm": 0.0020591297652572393, - "learning_rate": 0.00019999330844657973, - "loss": 46.0, - "step": 48173 - }, - { - "epoch": 3.683238717816389, - "grad_norm": 0.0015864745946601033, - "learning_rate": 0.00019999330816870882, - "loss": 46.0, - "step": 48174 - }, - { - "epoch": 3.6833151747997785, - "grad_norm": 0.0033565384801477194, - "learning_rate": 0.00019999330789083214, - "loss": 46.0, - "step": 48175 - }, - { - "epoch": 3.683391631783168, - "grad_norm": 0.004203010816127062, - "learning_rate": 0.0001999933076129497, - "loss": 46.0, - "step": 48176 - }, - { - "epoch": 3.6834680887665576, - "grad_norm": 0.0015852771466597915, - "learning_rate": 0.0001999933073350615, - "loss": 46.0, - "step": 48177 - }, - { - "epoch": 3.6835445457499474, - "grad_norm": 0.003748524934053421, - "learning_rate": 0.0001999933070571675, - "loss": 46.0, - "step": 48178 - }, - { - "epoch": 3.683621002733337, - "grad_norm": 0.003356809262186289, - "learning_rate": 0.00019999330677926774, - "loss": 46.0, - "step": 48179 - }, - { - "epoch": 3.683697459716727, - "grad_norm": 0.0016292484942823648, - "learning_rate": 0.00019999330650136222, - "loss": 46.0, - "step": 48180 - }, - { - "epoch": 3.6837739167001167, - "grad_norm": 0.002851768396794796, - "learning_rate": 0.00019999330622345093, - "loss": 46.0, - "step": 48181 - }, - { - "epoch": 3.6838503736835064, - "grad_norm": 0.004709701519459486, - "learning_rate": 0.0001999933059455339, - "loss": 46.0, - "step": 48182 - }, - { - "epoch": 3.683926830666896, - "grad_norm": 0.0013135176850482821, - "learning_rate": 0.00019999330566761105, - "loss": 46.0, - "step": 48183 - }, - { - "epoch": 3.6840032876502855, - "grad_norm": 0.0021869600750505924, - "learning_rate": 0.00019999330538968244, - "loss": 46.0, - "step": 48184 - }, - { - "epoch": 3.6840797446336753, - "grad_norm": 0.0014609413919970393, - "learning_rate": 0.0001999933051117481, - "loss": 46.0, - "step": 48185 - }, - { - "epoch": 3.684156201617065, - "grad_norm": 0.003742674132809043, - "learning_rate": 0.00019999330483380794, - "loss": 46.0, - "step": 48186 - }, - { - "epoch": 3.684232658600455, - "grad_norm": 0.005365589167922735, - "learning_rate": 0.00019999330455586204, - "loss": 46.0, - "step": 48187 - }, - { - "epoch": 3.6843091155838446, - "grad_norm": 0.0013962156372144818, - "learning_rate": 0.00019999330427791036, - "loss": 46.0, - "step": 48188 - }, - { - "epoch": 3.6843855725672343, - "grad_norm": 0.0007765196496620774, - "learning_rate": 0.0001999933039999529, - "loss": 46.0, - "step": 48189 - }, - { - "epoch": 3.684462029550624, - "grad_norm": 0.001581687363795936, - "learning_rate": 0.0001999933037219897, - "loss": 46.0, - "step": 48190 - }, - { - "epoch": 3.684538486534014, - "grad_norm": 0.003194936318323016, - "learning_rate": 0.00019999330344402073, - "loss": 46.0, - "step": 48191 - }, - { - "epoch": 3.6846149435174036, - "grad_norm": 0.0032320802565664053, - "learning_rate": 0.00019999330316604596, - "loss": 46.0, - "step": 48192 - }, - { - "epoch": 3.6846914005007934, - "grad_norm": 0.0015889855567365885, - "learning_rate": 0.00019999330288806547, - "loss": 46.0, - "step": 48193 - }, - { - "epoch": 3.684767857484183, - "grad_norm": 0.003247536951676011, - "learning_rate": 0.00019999330261007917, - "loss": 46.0, - "step": 48194 - }, - { - "epoch": 3.684844314467573, - "grad_norm": 0.004080689512193203, - "learning_rate": 0.0001999933023320871, - "loss": 46.0, - "step": 48195 - }, - { - "epoch": 3.6849207714509626, - "grad_norm": 0.004314050544053316, - "learning_rate": 0.0001999933020540893, - "loss": 46.0, - "step": 48196 - }, - { - "epoch": 3.684997228434352, - "grad_norm": 0.0018197973258793354, - "learning_rate": 0.00019999330177608568, - "loss": 46.0, - "step": 48197 - }, - { - "epoch": 3.6850736854177417, - "grad_norm": 0.002514402149245143, - "learning_rate": 0.00019999330149807632, - "loss": 46.0, - "step": 48198 - }, - { - "epoch": 3.6851501424011315, - "grad_norm": 0.001138661173172295, - "learning_rate": 0.0001999933012200612, - "loss": 46.0, - "step": 48199 - }, - { - "epoch": 3.6852265993845212, - "grad_norm": 0.0015935691772028804, - "learning_rate": 0.00019999330094204028, - "loss": 46.0, - "step": 48200 - }, - { - "epoch": 3.685303056367911, - "grad_norm": 0.0013981260126456618, - "learning_rate": 0.0001999933006640136, - "loss": 46.0, - "step": 48201 - }, - { - "epoch": 3.6853795133513008, - "grad_norm": 0.001632009749300778, - "learning_rate": 0.00019999330038598115, - "loss": 46.0, - "step": 48202 - }, - { - "epoch": 3.6854559703346905, - "grad_norm": 0.006392085924744606, - "learning_rate": 0.00019999330010794295, - "loss": 46.0, - "step": 48203 - }, - { - "epoch": 3.6855324273180803, - "grad_norm": 0.0006125652580522001, - "learning_rate": 0.00019999329982989898, - "loss": 46.0, - "step": 48204 - }, - { - "epoch": 3.68560888430147, - "grad_norm": 0.0010959943756461143, - "learning_rate": 0.00019999329955184923, - "loss": 46.0, - "step": 48205 - }, - { - "epoch": 3.6856853412848594, - "grad_norm": 0.005319190677255392, - "learning_rate": 0.00019999329927379372, - "loss": 46.0, - "step": 48206 - }, - { - "epoch": 3.685761798268249, - "grad_norm": 0.002845548326149583, - "learning_rate": 0.00019999329899573245, - "loss": 46.0, - "step": 48207 - }, - { - "epoch": 3.685838255251639, - "grad_norm": 0.0011395186884328723, - "learning_rate": 0.00019999329871766536, - "loss": 46.0, - "step": 48208 - }, - { - "epoch": 3.6859147122350286, - "grad_norm": 0.0003700665256474167, - "learning_rate": 0.00019999329843959255, - "loss": 46.0, - "step": 48209 - }, - { - "epoch": 3.6859911692184184, - "grad_norm": 0.0021044376771897078, - "learning_rate": 0.00019999329816151396, - "loss": 46.0, - "step": 48210 - }, - { - "epoch": 3.686067626201808, - "grad_norm": 0.0028955296147614717, - "learning_rate": 0.0001999932978834296, - "loss": 46.0, - "step": 48211 - }, - { - "epoch": 3.686144083185198, - "grad_norm": 0.001400425098836422, - "learning_rate": 0.00019999329760533947, - "loss": 46.0, - "step": 48212 - }, - { - "epoch": 3.6862205401685877, - "grad_norm": 0.001666681026108563, - "learning_rate": 0.00019999329732724356, - "loss": 46.0, - "step": 48213 - }, - { - "epoch": 3.6862969971519775, - "grad_norm": 0.0018024382879957557, - "learning_rate": 0.00019999329704914188, - "loss": 46.0, - "step": 48214 - }, - { - "epoch": 3.686373454135367, - "grad_norm": 0.0017111904453486204, - "learning_rate": 0.00019999329677103446, - "loss": 46.0, - "step": 48215 - }, - { - "epoch": 3.686449911118757, - "grad_norm": 0.0018292288295924664, - "learning_rate": 0.00019999329649292123, - "loss": 46.0, - "step": 48216 - }, - { - "epoch": 3.6865263681021467, - "grad_norm": 0.0035259316209703684, - "learning_rate": 0.00019999329621480226, - "loss": 46.0, - "step": 48217 - }, - { - "epoch": 3.6866028250855365, - "grad_norm": 0.00329809682443738, - "learning_rate": 0.00019999329593667752, - "loss": 46.0, - "step": 48218 - }, - { - "epoch": 3.686679282068926, - "grad_norm": 0.001148804440163076, - "learning_rate": 0.000199993295658547, - "loss": 46.0, - "step": 48219 - }, - { - "epoch": 3.6867557390523156, - "grad_norm": 0.004229803569614887, - "learning_rate": 0.0001999932953804107, - "loss": 46.0, - "step": 48220 - }, - { - "epoch": 3.6868321960357053, - "grad_norm": 0.005939357914030552, - "learning_rate": 0.00019999329510226867, - "loss": 46.0, - "step": 48221 - }, - { - "epoch": 3.686908653019095, - "grad_norm": 0.0011773926671594381, - "learning_rate": 0.00019999329482412085, - "loss": 46.0, - "step": 48222 - }, - { - "epoch": 3.686985110002485, - "grad_norm": 0.0019149387953802943, - "learning_rate": 0.00019999329454596727, - "loss": 46.0, - "step": 48223 - }, - { - "epoch": 3.6870615669858746, - "grad_norm": 0.0015631114365532994, - "learning_rate": 0.0001999932942678079, - "loss": 46.0, - "step": 48224 - }, - { - "epoch": 3.6871380239692644, - "grad_norm": 0.0025576287880539894, - "learning_rate": 0.00019999329398964278, - "loss": 46.0, - "step": 48225 - }, - { - "epoch": 3.687214480952654, - "grad_norm": 0.0012888452038168907, - "learning_rate": 0.00019999329371147187, - "loss": 46.0, - "step": 48226 - }, - { - "epoch": 3.687290937936044, - "grad_norm": 0.001446765149012208, - "learning_rate": 0.0001999932934332952, - "loss": 46.0, - "step": 48227 - }, - { - "epoch": 3.687367394919433, - "grad_norm": 0.007158469408750534, - "learning_rate": 0.00019999329315511277, - "loss": 46.0, - "step": 48228 - }, - { - "epoch": 3.687443851902823, - "grad_norm": 0.006073956377804279, - "learning_rate": 0.00019999329287692454, - "loss": 46.0, - "step": 48229 - }, - { - "epoch": 3.6875203088862127, - "grad_norm": 0.004490464460104704, - "learning_rate": 0.0001999932925987306, - "loss": 46.0, - "step": 48230 - }, - { - "epoch": 3.6875967658696025, - "grad_norm": 0.0023652713280171156, - "learning_rate": 0.00019999329232053083, - "loss": 46.0, - "step": 48231 - }, - { - "epoch": 3.6876732228529923, - "grad_norm": 0.008131634443998337, - "learning_rate": 0.0001999932920423253, - "loss": 46.0, - "step": 48232 - }, - { - "epoch": 3.687749679836382, - "grad_norm": 0.0023487142752856016, - "learning_rate": 0.00019999329176411404, - "loss": 46.0, - "step": 48233 - }, - { - "epoch": 3.687826136819772, - "grad_norm": 0.0012566643999889493, - "learning_rate": 0.00019999329148589698, - "loss": 46.0, - "step": 48234 - }, - { - "epoch": 3.6879025938031615, - "grad_norm": 0.0030417630914598703, - "learning_rate": 0.00019999329120767417, - "loss": 46.0, - "step": 48235 - }, - { - "epoch": 3.6879790507865513, - "grad_norm": 0.0022397369612008333, - "learning_rate": 0.00019999329092944555, - "loss": 46.0, - "step": 48236 - }, - { - "epoch": 3.688055507769941, - "grad_norm": 0.00214776280336082, - "learning_rate": 0.00019999329065121122, - "loss": 46.0, - "step": 48237 - }, - { - "epoch": 3.688131964753331, - "grad_norm": 0.0013408100930973887, - "learning_rate": 0.0001999932903729711, - "loss": 46.0, - "step": 48238 - }, - { - "epoch": 3.6882084217367206, - "grad_norm": 0.0011732698185369372, - "learning_rate": 0.00019999329009472519, - "loss": 46.0, - "step": 48239 - }, - { - "epoch": 3.6882848787201103, - "grad_norm": 0.0015864792512729764, - "learning_rate": 0.00019999328981647353, - "loss": 46.0, - "step": 48240 - }, - { - "epoch": 3.6883613357034997, - "grad_norm": 0.0005314729060046375, - "learning_rate": 0.00019999328953821608, - "loss": 46.0, - "step": 48241 - }, - { - "epoch": 3.6884377926868894, - "grad_norm": 0.0015775756910443306, - "learning_rate": 0.00019999328925995289, - "loss": 46.0, - "step": 48242 - }, - { - "epoch": 3.688514249670279, - "grad_norm": 0.0007871423149481416, - "learning_rate": 0.00019999328898168391, - "loss": 46.0, - "step": 48243 - }, - { - "epoch": 3.688590706653669, - "grad_norm": 0.0027579700108617544, - "learning_rate": 0.0001999932887034092, - "loss": 46.0, - "step": 48244 - }, - { - "epoch": 3.6886671636370587, - "grad_norm": 0.0019248995231464505, - "learning_rate": 0.00019999328842512868, - "loss": 46.0, - "step": 48245 - }, - { - "epoch": 3.6887436206204485, - "grad_norm": 0.005275071132928133, - "learning_rate": 0.00019999328814684239, - "loss": 46.0, - "step": 48246 - }, - { - "epoch": 3.6888200776038382, - "grad_norm": 0.0011181710287928581, - "learning_rate": 0.00019999328786855035, - "loss": 46.0, - "step": 48247 - }, - { - "epoch": 3.688896534587228, - "grad_norm": 0.0019004099303856492, - "learning_rate": 0.0001999932875902525, - "loss": 46.0, - "step": 48248 - }, - { - "epoch": 3.6889729915706173, - "grad_norm": 0.0034082112833857536, - "learning_rate": 0.00019999328731194893, - "loss": 46.0, - "step": 48249 - }, - { - "epoch": 3.689049448554007, - "grad_norm": 0.0014565015444532037, - "learning_rate": 0.00019999328703363957, - "loss": 46.0, - "step": 48250 - }, - { - "epoch": 3.689125905537397, - "grad_norm": 0.0010718003613874316, - "learning_rate": 0.00019999328675532444, - "loss": 46.0, - "step": 48251 - }, - { - "epoch": 3.6892023625207866, - "grad_norm": 0.0006640134961344302, - "learning_rate": 0.00019999328647700356, - "loss": 46.0, - "step": 48252 - }, - { - "epoch": 3.6892788195041764, - "grad_norm": 0.0043806410394608974, - "learning_rate": 0.0001999932861986769, - "loss": 46.0, - "step": 48253 - }, - { - "epoch": 3.689355276487566, - "grad_norm": 0.0016125160036608577, - "learning_rate": 0.00019999328592034446, - "loss": 46.0, - "step": 48254 - }, - { - "epoch": 3.689431733470956, - "grad_norm": 0.002997814677655697, - "learning_rate": 0.00019999328564200626, - "loss": 46.0, - "step": 48255 - }, - { - "epoch": 3.6895081904543456, - "grad_norm": 0.004889064468443394, - "learning_rate": 0.0001999932853636623, - "loss": 46.0, - "step": 48256 - }, - { - "epoch": 3.6895846474377354, - "grad_norm": 0.001033770153298974, - "learning_rate": 0.00019999328508531254, - "loss": 46.0, - "step": 48257 - }, - { - "epoch": 3.689661104421125, - "grad_norm": 0.002096008276566863, - "learning_rate": 0.00019999328480695705, - "loss": 46.0, - "step": 48258 - }, - { - "epoch": 3.689737561404515, - "grad_norm": 0.0011949068866670132, - "learning_rate": 0.00019999328452859576, - "loss": 46.0, - "step": 48259 - }, - { - "epoch": 3.6898140183879047, - "grad_norm": 0.003170607378706336, - "learning_rate": 0.0001999932842502287, - "loss": 46.0, - "step": 48260 - }, - { - "epoch": 3.6898904753712944, - "grad_norm": 0.004153276327997446, - "learning_rate": 0.0001999932839718559, - "loss": 46.0, - "step": 48261 - }, - { - "epoch": 3.689966932354684, - "grad_norm": 0.0038226621691137552, - "learning_rate": 0.0001999932836934773, - "loss": 46.0, - "step": 48262 - }, - { - "epoch": 3.6900433893380735, - "grad_norm": 0.0012407738249748945, - "learning_rate": 0.00019999328341509297, - "loss": 46.0, - "step": 48263 - }, - { - "epoch": 3.6901198463214633, - "grad_norm": 0.0031481976620852947, - "learning_rate": 0.0001999932831367028, - "loss": 46.0, - "step": 48264 - }, - { - "epoch": 3.690196303304853, - "grad_norm": 0.00109721883200109, - "learning_rate": 0.00019999328285830693, - "loss": 46.0, - "step": 48265 - }, - { - "epoch": 3.690272760288243, - "grad_norm": 0.0017670998349785805, - "learning_rate": 0.00019999328257990526, - "loss": 46.0, - "step": 48266 - }, - { - "epoch": 3.6903492172716326, - "grad_norm": 0.0018323304830119014, - "learning_rate": 0.00019999328230149783, - "loss": 46.0, - "step": 48267 - }, - { - "epoch": 3.6904256742550223, - "grad_norm": 0.005305479280650616, - "learning_rate": 0.00019999328202308466, - "loss": 46.0, - "step": 48268 - }, - { - "epoch": 3.690502131238412, - "grad_norm": 0.001006661681458354, - "learning_rate": 0.00019999328174466566, - "loss": 46.0, - "step": 48269 - }, - { - "epoch": 3.690578588221802, - "grad_norm": 0.0013932877918705344, - "learning_rate": 0.00019999328146624095, - "loss": 46.0, - "step": 48270 - }, - { - "epoch": 3.690655045205191, - "grad_norm": 0.0022297517862170935, - "learning_rate": 0.00019999328118781043, - "loss": 46.0, - "step": 48271 - }, - { - "epoch": 3.690731502188581, - "grad_norm": 0.002408611122518778, - "learning_rate": 0.00019999328090937417, - "loss": 46.0, - "step": 48272 - }, - { - "epoch": 3.6908079591719707, - "grad_norm": 0.0015594715951010585, - "learning_rate": 0.0001999932806309321, - "loss": 46.0, - "step": 48273 - }, - { - "epoch": 3.6908844161553604, - "grad_norm": 0.0030948061030358076, - "learning_rate": 0.0001999932803524843, - "loss": 46.0, - "step": 48274 - }, - { - "epoch": 3.69096087313875, - "grad_norm": 0.0014688984956592321, - "learning_rate": 0.0001999932800740307, - "loss": 46.0, - "step": 48275 - }, - { - "epoch": 3.69103733012214, - "grad_norm": 0.0009575940202921629, - "learning_rate": 0.00019999327979557138, - "loss": 46.0, - "step": 48276 - }, - { - "epoch": 3.6911137871055297, - "grad_norm": 0.001533214352093637, - "learning_rate": 0.00019999327951710622, - "loss": 46.0, - "step": 48277 - }, - { - "epoch": 3.6911902440889195, - "grad_norm": 0.0032603831496089697, - "learning_rate": 0.00019999327923863534, - "loss": 46.0, - "step": 48278 - }, - { - "epoch": 3.6912667010723093, - "grad_norm": 0.00631999084725976, - "learning_rate": 0.0001999932789601587, - "loss": 46.0, - "step": 48279 - }, - { - "epoch": 3.691343158055699, - "grad_norm": 0.0015919201541692019, - "learning_rate": 0.00019999327868167627, - "loss": 46.0, - "step": 48280 - }, - { - "epoch": 3.6914196150390888, - "grad_norm": 0.0020129787735641003, - "learning_rate": 0.00019999327840318807, - "loss": 46.0, - "step": 48281 - }, - { - "epoch": 3.6914960720224785, - "grad_norm": 0.0024249712005257607, - "learning_rate": 0.00019999327812469407, - "loss": 46.0, - "step": 48282 - }, - { - "epoch": 3.6915725290058683, - "grad_norm": 0.0055593387223780155, - "learning_rate": 0.00019999327784619438, - "loss": 46.0, - "step": 48283 - }, - { - "epoch": 3.691648985989258, - "grad_norm": 0.0021007873583585024, - "learning_rate": 0.00019999327756768884, - "loss": 46.0, - "step": 48284 - }, - { - "epoch": 3.6917254429726474, - "grad_norm": 0.0005054878420196474, - "learning_rate": 0.00019999327728917758, - "loss": 46.0, - "step": 48285 - }, - { - "epoch": 3.691801899956037, - "grad_norm": 0.005452163517475128, - "learning_rate": 0.00019999327701066054, - "loss": 46.0, - "step": 48286 - }, - { - "epoch": 3.691878356939427, - "grad_norm": 0.004880304913967848, - "learning_rate": 0.00019999327673213773, - "loss": 46.0, - "step": 48287 - }, - { - "epoch": 3.6919548139228167, - "grad_norm": 0.0006186253158375621, - "learning_rate": 0.00019999327645360914, - "loss": 46.0, - "step": 48288 - }, - { - "epoch": 3.6920312709062064, - "grad_norm": 0.0023824337404221296, - "learning_rate": 0.0001999932761750748, - "loss": 46.0, - "step": 48289 - }, - { - "epoch": 3.692107727889596, - "grad_norm": 0.0009436077089048922, - "learning_rate": 0.00019999327589653468, - "loss": 46.0, - "step": 48290 - }, - { - "epoch": 3.692184184872986, - "grad_norm": 0.0010116018820554018, - "learning_rate": 0.0001999932756179888, - "loss": 46.0, - "step": 48291 - }, - { - "epoch": 3.6922606418563757, - "grad_norm": 0.0016154585173353553, - "learning_rate": 0.00019999327533943713, - "loss": 46.0, - "step": 48292 - }, - { - "epoch": 3.692337098839765, - "grad_norm": 0.0021027622278779745, - "learning_rate": 0.0001999932750608797, - "loss": 46.0, - "step": 48293 - }, - { - "epoch": 3.692413555823155, - "grad_norm": 0.008123385719954967, - "learning_rate": 0.0001999932747823165, - "loss": 46.0, - "step": 48294 - }, - { - "epoch": 3.6924900128065445, - "grad_norm": 0.001654416206292808, - "learning_rate": 0.00019999327450374757, - "loss": 46.0, - "step": 48295 - }, - { - "epoch": 3.6925664697899343, - "grad_norm": 0.004944758024066687, - "learning_rate": 0.0001999932742251728, - "loss": 46.0, - "step": 48296 - }, - { - "epoch": 3.692642926773324, - "grad_norm": 0.004164565354585648, - "learning_rate": 0.0001999932739465923, - "loss": 46.0, - "step": 48297 - }, - { - "epoch": 3.692719383756714, - "grad_norm": 0.0007931336294859648, - "learning_rate": 0.00019999327366800604, - "loss": 46.0, - "step": 48298 - }, - { - "epoch": 3.6927958407401036, - "grad_norm": 0.001713545061647892, - "learning_rate": 0.000199993273389414, - "loss": 46.0, - "step": 48299 - }, - { - "epoch": 3.6928722977234933, - "grad_norm": 0.0013544049579650164, - "learning_rate": 0.0001999932731108162, - "loss": 46.0, - "step": 48300 - }, - { - "epoch": 3.692948754706883, - "grad_norm": 0.002430782187730074, - "learning_rate": 0.0001999932728322126, - "loss": 46.0, - "step": 48301 - }, - { - "epoch": 3.693025211690273, - "grad_norm": 0.002690747380256653, - "learning_rate": 0.00019999327255360326, - "loss": 46.0, - "step": 48302 - }, - { - "epoch": 3.6931016686736626, - "grad_norm": 0.001381297712214291, - "learning_rate": 0.00019999327227498815, - "loss": 46.0, - "step": 48303 - }, - { - "epoch": 3.6931781256570524, - "grad_norm": 0.004420554731041193, - "learning_rate": 0.00019999327199636725, - "loss": 46.0, - "step": 48304 - }, - { - "epoch": 3.693254582640442, - "grad_norm": 0.006982902996242046, - "learning_rate": 0.0001999932717177406, - "loss": 46.0, - "step": 48305 - }, - { - "epoch": 3.693331039623832, - "grad_norm": 0.0018282266100868583, - "learning_rate": 0.00019999327143910818, - "loss": 46.0, - "step": 48306 - }, - { - "epoch": 3.6934074966072212, - "grad_norm": 0.0019510556012392044, - "learning_rate": 0.00019999327116047, - "loss": 46.0, - "step": 48307 - }, - { - "epoch": 3.693483953590611, - "grad_norm": 0.003085219766944647, - "learning_rate": 0.000199993270881826, - "loss": 46.0, - "step": 48308 - }, - { - "epoch": 3.6935604105740008, - "grad_norm": 0.005437133833765984, - "learning_rate": 0.0001999932706031763, - "loss": 46.0, - "step": 48309 - }, - { - "epoch": 3.6936368675573905, - "grad_norm": 0.002673087175935507, - "learning_rate": 0.00019999327032452078, - "loss": 46.0, - "step": 48310 - }, - { - "epoch": 3.6937133245407803, - "grad_norm": 0.0025422677863389254, - "learning_rate": 0.00019999327004585952, - "loss": 46.0, - "step": 48311 - }, - { - "epoch": 3.69378978152417, - "grad_norm": 0.005424900446087122, - "learning_rate": 0.00019999326976719248, - "loss": 46.0, - "step": 48312 - }, - { - "epoch": 3.69386623850756, - "grad_norm": 0.002368065994232893, - "learning_rate": 0.0001999932694885197, - "loss": 46.0, - "step": 48313 - }, - { - "epoch": 3.6939426954909496, - "grad_norm": 0.0012580581242218614, - "learning_rate": 0.00019999326920984109, - "loss": 46.0, - "step": 48314 - }, - { - "epoch": 3.694019152474339, - "grad_norm": 0.0007263403967954218, - "learning_rate": 0.00019999326893115676, - "loss": 46.0, - "step": 48315 - }, - { - "epoch": 3.6940956094577286, - "grad_norm": 0.0011105338344350457, - "learning_rate": 0.00019999326865246663, - "loss": 46.0, - "step": 48316 - }, - { - "epoch": 3.6941720664411184, - "grad_norm": 0.001534783747047186, - "learning_rate": 0.00019999326837377078, - "loss": 46.0, - "step": 48317 - }, - { - "epoch": 3.694248523424508, - "grad_norm": 0.0005228347145020962, - "learning_rate": 0.00019999326809506907, - "loss": 46.0, - "step": 48318 - }, - { - "epoch": 3.694324980407898, - "grad_norm": 0.004914476536214352, - "learning_rate": 0.00019999326781636168, - "loss": 46.0, - "step": 48319 - }, - { - "epoch": 3.6944014373912877, - "grad_norm": 0.0022618144284933805, - "learning_rate": 0.00019999326753764848, - "loss": 46.0, - "step": 48320 - }, - { - "epoch": 3.6944778943746774, - "grad_norm": 0.0018209160771220922, - "learning_rate": 0.00019999326725892954, - "loss": 46.0, - "step": 48321 - }, - { - "epoch": 3.694554351358067, - "grad_norm": 0.0017061736434698105, - "learning_rate": 0.0001999932669802048, - "loss": 46.0, - "step": 48322 - }, - { - "epoch": 3.694630808341457, - "grad_norm": 0.0013384344056248665, - "learning_rate": 0.00019999326670147428, - "loss": 46.0, - "step": 48323 - }, - { - "epoch": 3.6947072653248467, - "grad_norm": 0.002426867140457034, - "learning_rate": 0.00019999326642273805, - "loss": 46.0, - "step": 48324 - }, - { - "epoch": 3.6947837223082365, - "grad_norm": 0.005450364667922258, - "learning_rate": 0.000199993266143996, - "loss": 46.0, - "step": 48325 - }, - { - "epoch": 3.6948601792916262, - "grad_norm": 0.0029282495379447937, - "learning_rate": 0.00019999326586524817, - "loss": 46.0, - "step": 48326 - }, - { - "epoch": 3.694936636275016, - "grad_norm": 0.001443423330783844, - "learning_rate": 0.00019999326558649462, - "loss": 46.0, - "step": 48327 - }, - { - "epoch": 3.6950130932584053, - "grad_norm": 0.0007966472185216844, - "learning_rate": 0.0001999932653077353, - "loss": 46.0, - "step": 48328 - }, - { - "epoch": 3.695089550241795, - "grad_norm": 0.005007739178836346, - "learning_rate": 0.00019999326502897016, - "loss": 46.0, - "step": 48329 - }, - { - "epoch": 3.695166007225185, - "grad_norm": 0.0008839049842208624, - "learning_rate": 0.00019999326475019928, - "loss": 46.0, - "step": 48330 - }, - { - "epoch": 3.6952424642085746, - "grad_norm": 0.0018118356820195913, - "learning_rate": 0.00019999326447142263, - "loss": 46.0, - "step": 48331 - }, - { - "epoch": 3.6953189211919644, - "grad_norm": 0.0014934723731130362, - "learning_rate": 0.0001999932641926402, - "loss": 46.0, - "step": 48332 - }, - { - "epoch": 3.695395378175354, - "grad_norm": 0.002866104943677783, - "learning_rate": 0.00019999326391385202, - "loss": 46.0, - "step": 48333 - }, - { - "epoch": 3.695471835158744, - "grad_norm": 0.0013519731583073735, - "learning_rate": 0.00019999326363505805, - "loss": 46.0, - "step": 48334 - }, - { - "epoch": 3.6955482921421337, - "grad_norm": 0.003833555383607745, - "learning_rate": 0.00019999326335625833, - "loss": 46.0, - "step": 48335 - }, - { - "epoch": 3.6956247491255234, - "grad_norm": 0.0011996623361483216, - "learning_rate": 0.00019999326307745284, - "loss": 46.0, - "step": 48336 - }, - { - "epoch": 3.6957012061089127, - "grad_norm": 0.005955439526587725, - "learning_rate": 0.00019999326279864158, - "loss": 46.0, - "step": 48337 - }, - { - "epoch": 3.6957776630923025, - "grad_norm": 0.002025017747655511, - "learning_rate": 0.00019999326251982454, - "loss": 46.0, - "step": 48338 - }, - { - "epoch": 3.6958541200756923, - "grad_norm": 0.0009732857579365373, - "learning_rate": 0.00019999326224100174, - "loss": 46.0, - "step": 48339 - }, - { - "epoch": 3.695930577059082, - "grad_norm": 0.003599429503083229, - "learning_rate": 0.00019999326196217315, - "loss": 46.0, - "step": 48340 - }, - { - "epoch": 3.6960070340424718, - "grad_norm": 0.0014631038065999746, - "learning_rate": 0.00019999326168333882, - "loss": 46.0, - "step": 48341 - }, - { - "epoch": 3.6960834910258615, - "grad_norm": 0.0026974461507052183, - "learning_rate": 0.00019999326140449872, - "loss": 46.0, - "step": 48342 - }, - { - "epoch": 3.6961599480092513, - "grad_norm": 0.0022770550567656755, - "learning_rate": 0.00019999326112565285, - "loss": 46.0, - "step": 48343 - }, - { - "epoch": 3.696236404992641, - "grad_norm": 0.0022206429857760668, - "learning_rate": 0.0001999932608468012, - "loss": 46.0, - "step": 48344 - }, - { - "epoch": 3.696312861976031, - "grad_norm": 0.007957326248288155, - "learning_rate": 0.00019999326056794378, - "loss": 46.0, - "step": 48345 - }, - { - "epoch": 3.6963893189594206, - "grad_norm": 0.0009934828849509358, - "learning_rate": 0.00019999326028908055, - "loss": 46.0, - "step": 48346 - }, - { - "epoch": 3.6964657759428103, - "grad_norm": 0.0006912837270647287, - "learning_rate": 0.00019999326001021164, - "loss": 46.0, - "step": 48347 - }, - { - "epoch": 3.6965422329262, - "grad_norm": 0.0033846464939415455, - "learning_rate": 0.0001999932597313369, - "loss": 46.0, - "step": 48348 - }, - { - "epoch": 3.69661868990959, - "grad_norm": 0.0021086963824927807, - "learning_rate": 0.0001999932594524564, - "loss": 46.0, - "step": 48349 - }, - { - "epoch": 3.696695146892979, - "grad_norm": 0.0011045270366594195, - "learning_rate": 0.00019999325917357012, - "loss": 46.0, - "step": 48350 - }, - { - "epoch": 3.696771603876369, - "grad_norm": 0.0012832568027079105, - "learning_rate": 0.0001999932588946781, - "loss": 46.0, - "step": 48351 - }, - { - "epoch": 3.6968480608597587, - "grad_norm": 0.0069931442849338055, - "learning_rate": 0.0001999932586157803, - "loss": 46.0, - "step": 48352 - }, - { - "epoch": 3.6969245178431485, - "grad_norm": 0.0019340061116963625, - "learning_rate": 0.00019999325833687675, - "loss": 46.0, - "step": 48353 - }, - { - "epoch": 3.6970009748265382, - "grad_norm": 0.00277696643024683, - "learning_rate": 0.0001999932580579674, - "loss": 46.0, - "step": 48354 - }, - { - "epoch": 3.697077431809928, - "grad_norm": 0.0022195095662027597, - "learning_rate": 0.0001999932577790523, - "loss": 46.0, - "step": 48355 - }, - { - "epoch": 3.6971538887933177, - "grad_norm": 0.001210420741699636, - "learning_rate": 0.00019999325750013141, - "loss": 46.0, - "step": 48356 - }, - { - "epoch": 3.6972303457767075, - "grad_norm": 0.0010217016097158194, - "learning_rate": 0.00019999325722120477, - "loss": 46.0, - "step": 48357 - }, - { - "epoch": 3.6973068027600973, - "grad_norm": 0.0015789292519912124, - "learning_rate": 0.00019999325694227234, - "loss": 46.0, - "step": 48358 - }, - { - "epoch": 3.6973832597434866, - "grad_norm": 0.002075741533190012, - "learning_rate": 0.00019999325666333415, - "loss": 46.0, - "step": 48359 - }, - { - "epoch": 3.6974597167268763, - "grad_norm": 0.0024765036068856716, - "learning_rate": 0.0001999932563843902, - "loss": 46.0, - "step": 48360 - }, - { - "epoch": 3.697536173710266, - "grad_norm": 0.0044881487265229225, - "learning_rate": 0.0001999932561054405, - "loss": 46.0, - "step": 48361 - }, - { - "epoch": 3.697612630693656, - "grad_norm": 0.0016344249015673995, - "learning_rate": 0.00019999325582648498, - "loss": 46.0, - "step": 48362 - }, - { - "epoch": 3.6976890876770456, - "grad_norm": 0.001778409699909389, - "learning_rate": 0.00019999325554752372, - "loss": 46.0, - "step": 48363 - }, - { - "epoch": 3.6977655446604354, - "grad_norm": 0.0009742042166180909, - "learning_rate": 0.0001999932552685567, - "loss": 46.0, - "step": 48364 - }, - { - "epoch": 3.697842001643825, - "grad_norm": 0.003260405734181404, - "learning_rate": 0.0001999932549895839, - "loss": 46.0, - "step": 48365 - }, - { - "epoch": 3.697918458627215, - "grad_norm": 0.0016500040655955672, - "learning_rate": 0.00019999325471060532, - "loss": 46.0, - "step": 48366 - }, - { - "epoch": 3.6979949156106047, - "grad_norm": 0.0030370925087481737, - "learning_rate": 0.000199993254431621, - "loss": 46.0, - "step": 48367 - }, - { - "epoch": 3.6980713725939944, - "grad_norm": 0.003414441365748644, - "learning_rate": 0.00019999325415263091, - "loss": 46.0, - "step": 48368 - }, - { - "epoch": 3.698147829577384, - "grad_norm": 0.0017931165639311075, - "learning_rate": 0.000199993253873635, - "loss": 46.0, - "step": 48369 - }, - { - "epoch": 3.698224286560774, - "grad_norm": 0.0012098739389330149, - "learning_rate": 0.0001999932535946334, - "loss": 46.0, - "step": 48370 - }, - { - "epoch": 3.6983007435441637, - "grad_norm": 0.002628604182973504, - "learning_rate": 0.00019999325331562594, - "loss": 46.0, - "step": 48371 - }, - { - "epoch": 3.698377200527553, - "grad_norm": 0.001029858598485589, - "learning_rate": 0.00019999325303661278, - "loss": 46.0, - "step": 48372 - }, - { - "epoch": 3.698453657510943, - "grad_norm": 0.0010376691352576017, - "learning_rate": 0.00019999325275759383, - "loss": 46.0, - "step": 48373 - }, - { - "epoch": 3.6985301144943326, - "grad_norm": 0.0024488382041454315, - "learning_rate": 0.0001999932524785691, - "loss": 46.0, - "step": 48374 - }, - { - "epoch": 3.6986065714777223, - "grad_norm": 0.00046565619413740933, - "learning_rate": 0.0001999932521995386, - "loss": 46.0, - "step": 48375 - }, - { - "epoch": 3.698683028461112, - "grad_norm": 0.003151064272969961, - "learning_rate": 0.00019999325192050234, - "loss": 46.0, - "step": 48376 - }, - { - "epoch": 3.698759485444502, - "grad_norm": 0.0013690125197172165, - "learning_rate": 0.00019999325164146034, - "loss": 46.0, - "step": 48377 - }, - { - "epoch": 3.6988359424278916, - "grad_norm": 0.0011113978689536452, - "learning_rate": 0.00019999325136241253, - "loss": 46.0, - "step": 48378 - }, - { - "epoch": 3.6989123994112814, - "grad_norm": 0.004496335051953793, - "learning_rate": 0.00019999325108335895, - "loss": 46.0, - "step": 48379 - }, - { - "epoch": 3.6989888563946707, - "grad_norm": 0.0024092558305710554, - "learning_rate": 0.00019999325080429965, - "loss": 46.0, - "step": 48380 - }, - { - "epoch": 3.6990653133780604, - "grad_norm": 0.006183784920722246, - "learning_rate": 0.00019999325052523452, - "loss": 46.0, - "step": 48381 - }, - { - "epoch": 3.69914177036145, - "grad_norm": 0.003960580099374056, - "learning_rate": 0.00019999325024616367, - "loss": 46.0, - "step": 48382 - }, - { - "epoch": 3.69921822734484, - "grad_norm": 0.002071754075586796, - "learning_rate": 0.00019999324996708702, - "loss": 46.0, - "step": 48383 - }, - { - "epoch": 3.6992946843282297, - "grad_norm": 0.0034523485228419304, - "learning_rate": 0.0001999932496880046, - "loss": 46.0, - "step": 48384 - }, - { - "epoch": 3.6993711413116195, - "grad_norm": 0.00606326200067997, - "learning_rate": 0.0001999932494089164, - "loss": 46.0, - "step": 48385 - }, - { - "epoch": 3.6994475982950092, - "grad_norm": 0.002248312346637249, - "learning_rate": 0.00019999324912982247, - "loss": 46.0, - "step": 48386 - }, - { - "epoch": 3.699524055278399, - "grad_norm": 0.001950485398992896, - "learning_rate": 0.00019999324885072275, - "loss": 46.0, - "step": 48387 - }, - { - "epoch": 3.6996005122617888, - "grad_norm": 0.000931296672206372, - "learning_rate": 0.00019999324857161726, - "loss": 46.0, - "step": 48388 - }, - { - "epoch": 3.6996769692451785, - "grad_norm": 0.0034372382797300816, - "learning_rate": 0.000199993248292506, - "loss": 46.0, - "step": 48389 - }, - { - "epoch": 3.6997534262285683, - "grad_norm": 0.0016661185072734952, - "learning_rate": 0.000199993248013389, - "loss": 46.0, - "step": 48390 - }, - { - "epoch": 3.699829883211958, - "grad_norm": 0.004137810319662094, - "learning_rate": 0.0001999932477342662, - "loss": 46.0, - "step": 48391 - }, - { - "epoch": 3.699906340195348, - "grad_norm": 0.0008407170535065234, - "learning_rate": 0.00019999324745513763, - "loss": 46.0, - "step": 48392 - }, - { - "epoch": 3.6999827971787376, - "grad_norm": 0.003180784871801734, - "learning_rate": 0.0001999932471760033, - "loss": 46.0, - "step": 48393 - }, - { - "epoch": 3.700059254162127, - "grad_norm": 0.004735833499580622, - "learning_rate": 0.00019999324689686318, - "loss": 46.0, - "step": 48394 - }, - { - "epoch": 3.7001357111455166, - "grad_norm": 0.0018702286761254072, - "learning_rate": 0.00019999324661771733, - "loss": 46.0, - "step": 48395 - }, - { - "epoch": 3.7002121681289064, - "grad_norm": 0.0008011350873857737, - "learning_rate": 0.00019999324633856568, - "loss": 46.0, - "step": 48396 - }, - { - "epoch": 3.700288625112296, - "grad_norm": 0.0027386655565351248, - "learning_rate": 0.0001999932460594083, - "loss": 46.0, - "step": 48397 - }, - { - "epoch": 3.700365082095686, - "grad_norm": 0.00297924573533237, - "learning_rate": 0.0001999932457802451, - "loss": 46.0, - "step": 48398 - }, - { - "epoch": 3.7004415390790757, - "grad_norm": 0.001809094799682498, - "learning_rate": 0.00019999324550107615, - "loss": 46.0, - "step": 48399 - }, - { - "epoch": 3.7005179960624655, - "grad_norm": 0.001520155812613666, - "learning_rate": 0.0001999932452219014, - "loss": 46.0, - "step": 48400 - }, - { - "epoch": 3.700594453045855, - "grad_norm": 0.006784497294574976, - "learning_rate": 0.00019999324494272095, - "loss": 46.0, - "step": 48401 - }, - { - "epoch": 3.7006709100292445, - "grad_norm": 0.001747858477756381, - "learning_rate": 0.0001999932446635347, - "loss": 46.0, - "step": 48402 - }, - { - "epoch": 3.7007473670126343, - "grad_norm": 0.0017368352273479104, - "learning_rate": 0.00019999324438434269, - "loss": 46.0, - "step": 48403 - }, - { - "epoch": 3.700823823996024, - "grad_norm": 0.004228346515446901, - "learning_rate": 0.00019999324410514485, - "loss": 46.0, - "step": 48404 - }, - { - "epoch": 3.700900280979414, - "grad_norm": 0.0018827090971171856, - "learning_rate": 0.00019999324382594132, - "loss": 46.0, - "step": 48405 - }, - { - "epoch": 3.7009767379628036, - "grad_norm": 0.001362600363790989, - "learning_rate": 0.000199993243546732, - "loss": 46.0, - "step": 48406 - }, - { - "epoch": 3.7010531949461933, - "grad_norm": 0.0011410992592573166, - "learning_rate": 0.0001999932432675169, - "loss": 46.0, - "step": 48407 - }, - { - "epoch": 3.701129651929583, - "grad_norm": 0.001566110411658883, - "learning_rate": 0.00019999324298829602, - "loss": 46.0, - "step": 48408 - }, - { - "epoch": 3.701206108912973, - "grad_norm": 0.0015470510115846992, - "learning_rate": 0.00019999324270906938, - "loss": 46.0, - "step": 48409 - }, - { - "epoch": 3.7012825658963626, - "grad_norm": 0.002202283591032028, - "learning_rate": 0.00019999324242983698, - "loss": 46.0, - "step": 48410 - }, - { - "epoch": 3.7013590228797524, - "grad_norm": 0.0022866330109536648, - "learning_rate": 0.0001999932421505988, - "loss": 46.0, - "step": 48411 - }, - { - "epoch": 3.701435479863142, - "grad_norm": 0.0033468049950897694, - "learning_rate": 0.00019999324187135485, - "loss": 46.0, - "step": 48412 - }, - { - "epoch": 3.701511936846532, - "grad_norm": 0.002168313367292285, - "learning_rate": 0.00019999324159210513, - "loss": 46.0, - "step": 48413 - }, - { - "epoch": 3.7015883938299217, - "grad_norm": 0.003805692307651043, - "learning_rate": 0.00019999324131284965, - "loss": 46.0, - "step": 48414 - }, - { - "epoch": 3.7016648508133114, - "grad_norm": 0.010626656003296375, - "learning_rate": 0.0001999932410335884, - "loss": 46.0, - "step": 48415 - }, - { - "epoch": 3.7017413077967007, - "grad_norm": 0.002321209292858839, - "learning_rate": 0.00019999324075432138, - "loss": 46.0, - "step": 48416 - }, - { - "epoch": 3.7018177647800905, - "grad_norm": 0.001496392535045743, - "learning_rate": 0.0001999932404750486, - "loss": 46.0, - "step": 48417 - }, - { - "epoch": 3.7018942217634803, - "grad_norm": 0.004110829904675484, - "learning_rate": 0.00019999324019577005, - "loss": 46.0, - "step": 48418 - }, - { - "epoch": 3.70197067874687, - "grad_norm": 0.0007118233479559422, - "learning_rate": 0.00019999323991648572, - "loss": 46.0, - "step": 48419 - }, - { - "epoch": 3.70204713573026, - "grad_norm": 0.0008011743775568902, - "learning_rate": 0.0001999932396371956, - "loss": 46.0, - "step": 48420 - }, - { - "epoch": 3.7021235927136495, - "grad_norm": 0.003797075478360057, - "learning_rate": 0.00019999323935789972, - "loss": 46.0, - "step": 48421 - }, - { - "epoch": 3.7022000496970393, - "grad_norm": 0.0022682654671370983, - "learning_rate": 0.00019999323907859807, - "loss": 46.0, - "step": 48422 - }, - { - "epoch": 3.702276506680429, - "grad_norm": 0.00231959973461926, - "learning_rate": 0.0001999932387992907, - "loss": 46.0, - "step": 48423 - }, - { - "epoch": 3.7023529636638184, - "grad_norm": 0.0016140383668243885, - "learning_rate": 0.0001999932385199775, - "loss": 46.0, - "step": 48424 - }, - { - "epoch": 3.702429420647208, - "grad_norm": 0.0015152163105085492, - "learning_rate": 0.0001999932382406586, - "loss": 46.0, - "step": 48425 - }, - { - "epoch": 3.702505877630598, - "grad_norm": 0.001525428844615817, - "learning_rate": 0.00019999323796133386, - "loss": 46.0, - "step": 48426 - }, - { - "epoch": 3.7025823346139877, - "grad_norm": 0.0028463888447731733, - "learning_rate": 0.00019999323768200337, - "loss": 46.0, - "step": 48427 - }, - { - "epoch": 3.7026587915973774, - "grad_norm": 0.0017425266560167074, - "learning_rate": 0.00019999323740266714, - "loss": 46.0, - "step": 48428 - }, - { - "epoch": 3.702735248580767, - "grad_norm": 0.004447468090802431, - "learning_rate": 0.0001999932371233251, - "loss": 46.0, - "step": 48429 - }, - { - "epoch": 3.702811705564157, - "grad_norm": 0.0038088408764451742, - "learning_rate": 0.00019999323684397733, - "loss": 46.0, - "step": 48430 - }, - { - "epoch": 3.7028881625475467, - "grad_norm": 0.0014751197304576635, - "learning_rate": 0.00019999323656462377, - "loss": 46.0, - "step": 48431 - }, - { - "epoch": 3.7029646195309365, - "grad_norm": 0.0017119219992309809, - "learning_rate": 0.00019999323628526442, - "loss": 46.0, - "step": 48432 - }, - { - "epoch": 3.7030410765143262, - "grad_norm": 0.0013304936001077294, - "learning_rate": 0.00019999323600589935, - "loss": 46.0, - "step": 48433 - }, - { - "epoch": 3.703117533497716, - "grad_norm": 0.0024929079227149487, - "learning_rate": 0.00019999323572652847, - "loss": 46.0, - "step": 48434 - }, - { - "epoch": 3.7031939904811058, - "grad_norm": 0.0015167458914220333, - "learning_rate": 0.00019999323544715185, - "loss": 46.0, - "step": 48435 - }, - { - "epoch": 3.7032704474644955, - "grad_norm": 0.002394638955593109, - "learning_rate": 0.00019999323516776943, - "loss": 46.0, - "step": 48436 - }, - { - "epoch": 3.7033469044478853, - "grad_norm": 0.0016509632114320993, - "learning_rate": 0.0001999932348883813, - "loss": 46.0, - "step": 48437 - }, - { - "epoch": 3.7034233614312746, - "grad_norm": 0.0016896927263587713, - "learning_rate": 0.00019999323460898733, - "loss": 46.0, - "step": 48438 - }, - { - "epoch": 3.7034998184146644, - "grad_norm": 0.002033929107710719, - "learning_rate": 0.00019999323432958762, - "loss": 46.0, - "step": 48439 - }, - { - "epoch": 3.703576275398054, - "grad_norm": 0.00311658950522542, - "learning_rate": 0.00019999323405018213, - "loss": 46.0, - "step": 48440 - }, - { - "epoch": 3.703652732381444, - "grad_norm": 0.00377217517234385, - "learning_rate": 0.00019999323377077087, - "loss": 46.0, - "step": 48441 - }, - { - "epoch": 3.7037291893648336, - "grad_norm": 0.0018248014384880662, - "learning_rate": 0.0001999932334913539, - "loss": 46.0, - "step": 48442 - }, - { - "epoch": 3.7038056463482234, - "grad_norm": 0.0009557501180097461, - "learning_rate": 0.0001999932332119311, - "loss": 46.0, - "step": 48443 - }, - { - "epoch": 3.703882103331613, - "grad_norm": 0.0014009297592565417, - "learning_rate": 0.00019999323293250253, - "loss": 46.0, - "step": 48444 - }, - { - "epoch": 3.703958560315003, - "grad_norm": 0.0013136379420757294, - "learning_rate": 0.00019999323265306823, - "loss": 46.0, - "step": 48445 - }, - { - "epoch": 3.7040350172983922, - "grad_norm": 0.000936382042709738, - "learning_rate": 0.00019999323237362813, - "loss": 46.0, - "step": 48446 - }, - { - "epoch": 3.704111474281782, - "grad_norm": 0.001213796786032617, - "learning_rate": 0.00019999323209418226, - "loss": 46.0, - "step": 48447 - }, - { - "epoch": 3.7041879312651718, - "grad_norm": 0.0005398842040449381, - "learning_rate": 0.0001999932318147306, - "loss": 46.0, - "step": 48448 - }, - { - "epoch": 3.7042643882485615, - "grad_norm": 0.0032729131635278463, - "learning_rate": 0.00019999323153527322, - "loss": 46.0, - "step": 48449 - }, - { - "epoch": 3.7043408452319513, - "grad_norm": 0.0029777884483337402, - "learning_rate": 0.00019999323125581006, - "loss": 46.0, - "step": 48450 - } - ], - "logging_steps": 1, - "max_steps": 13079250, - "num_input_tokens_seen": 0, - "num_train_epochs": 1001, - "save_steps": 150, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 1064633798639616.0, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} +version https://git-lfs.github.com/spec/v1 +oid sha256:cd6c438e2ef01a873df9c327ce7afbfd766face1a016800286a9e6839bd39d53 +size 12022870